commit 9ece019c9d7e2a2e70d56a9bbd8f9d06b41d86c5
Author: Daniel Swanson <popcorn.tomato.dude@gmail.com>
Date:   Fri Jun 11 14:24:15 2021 -0500

    use ICU

diff --git a/configure.ac b/configure.ac
index 5b60c37..b200861 100644
--- a/configure.ac
+++ b/configure.ac
@@ -38,13 +38,19 @@ PKG_CHECK_MODULES([LIBXML], [libxml-2.0 >= required_libxml_version])
 AC_SUBST(LIBXML_CFLAGS)
 AC_SUBST(LIBXML_LIBS)
 
+PKG_CHECK_MODULES([ICU], [icu-i18n, icu-io, icu-uc])
+
+AC_SUBST(ICU_CFLAGS)
+AC_SUBST(ICU_LIBS)
+
 # Checks for libraries.
 AC_CHECK_LIB(xml2, xmlReaderForFile)
 
-AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, fputc_unlocked, fputs_unlocked, fgetwc_unlocked, fputwc_unlocked, fgetws_unlocked, fputws_unlocked])
+AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, fputc_unlocked, fputs_unlocked])
+AC_CHECK_HEADER([utf8.h], [], [AC_MSG_ERROR([You don't have utfcpp installed.])])
 
-CPPFLAGS="$CPPFLAGS $CFLAGS $LTTOOLBOX_CFLAGS $APERTIUM_CFLAGS $LIBXML_CFLAGS"
-LIBS="$LIBS $LTTOOLBOX_LIBS $APERTIUM_LIBS $LIBXML_LIBS"
+CPPFLAGS="$CPPFLAGS $CFLAGS $LTTOOLBOX_CFLAGS $APERTIUM_CFLAGS $LIBXML_CFLAGS $ICU_CFLAGS"
+LIBS="$LIBS $LTTOOLBOX_LIBS $APERTIUM_LIBS $LIBXML_LIBS $ICU_LIBS"
 
 # Checks for highest supported C++ standard
 AC_LANG(C++)
diff --git a/src/lsx_comp.cc b/src/lsx_comp.cc
index 4f9f5a6..905395e 100644
--- a/src/lsx_comp.cc
+++ b/src/lsx_comp.cc
@@ -2,6 +2,7 @@
 #include <cerrno>
 #include <iostream>
 #include <stdlib.h>
+#include <cstring>
 
 #include <lsx_compiler.h>
 #include <lttoolbox/lt_locale.h>
@@ -29,7 +30,7 @@ int main (int argc, char** argv)
 
   Compiler c;
 
-  wstring dir;
+  UString dir;
 
   if(strcmp(argv[1], "lr") == 0)
   {
diff --git a/src/lsx_compiler.cc b/src/lsx_compiler.cc
index 1e5b4c8..069c890 100644
--- a/src/lsx_compiler.cc
+++ b/src/lsx_compiler.cc
@@ -29,14 +29,15 @@ using namespace std;
 
 // Removed static globals copied from lttoolbox's compiler.cc. Same namespace, same mangling, bad result.
 
-wstring const Compiler::COMPILER_ANYTAG_ELEM        = L"t";
-wstring const Compiler::COMPILER_ANYCHAR_ELEM       = L"w";
-wstring const Compiler::COMPILER_WB_ELEM            = L"j";
+UString const Compiler::COMPILER_ANYTAG_ELEM        = "t"_u;
+UString const Compiler::COMPILER_ANYCHAR_ELEM       = "w"_u;
+UString const Compiler::COMPILER_WB_ELEM            = "j"_u;
 
 Compiler::Compiler() :
 reader(0),
 verbose(false),
-first_element(false)
+first_element(false),
+any_tag(0), any_char(0), word_boundary(0)
 {
 }
 
@@ -45,19 +46,22 @@ Compiler::~Compiler()
 }
 
 void
-Compiler::parse(string const &fichero, wstring const &dir)
+Compiler::parse(string const &fichero, UString const &dir)
 {
     direction = dir;
     reader = xmlReaderForFile(fichero.c_str(), NULL, 0);
     if(reader == NULL)
     {
-        wcerr << "Error: Cannot open '" << fichero.c_str() << "'." << endl;
+        cerr << "Error: Cannot open '" << fichero.c_str() << "'." << endl;
         exit(EXIT_FAILURE);
     }
 
-    alphabet.includeSymbol(L"<ANY_TAG>");
-    alphabet.includeSymbol(L"<ANY_CHAR>");
-    alphabet.includeSymbol(L"<$>");
+    alphabet.includeSymbol(Transducer::ANY_TAG_SYMBOL);
+    alphabet.includeSymbol(Transducer::ANY_CHAR_SYMBOL);
+    alphabet.includeSymbol(Transducer::LSX_BOUNDARY_SYMBOL);
+    any_tag       = alphabet(Transducer::ANY_TAG_SYMBOL);
+    any_char      = alphabet(Transducer::ANY_CHAR_SYMBOL);
+    word_boundary = alphabet(Transducer::LSX_BOUNDARY_SYMBOL);
 
     int ret = xmlTextReaderRead(reader);
     while(ret == 1)
@@ -69,7 +73,7 @@ Compiler::parse(string const &fichero, wstring const &dir)
 
     if(ret != 0)
     {
-        wcerr << L"Error: Parse error at the end of input." << endl;
+        cerr << "Error: Parse error at the end of input." << endl;
     }
 
     xmlFreeTextReader(reader);
@@ -77,19 +81,16 @@ Compiler::parse(string const &fichero, wstring const &dir)
 
 
     // Minimize transducers and ensure that all paths end with <$>
-    int end_trans = alphabet(alphabet(L"<$>"), alphabet(L"<$>"));
-    for(map<wstring, Transducer, Ltstr>::iterator it = sections.begin(),
-        limit = sections.end();
-        it != limit; it++)
-    {
-        (it->second).minimize();
+    int end_trans = alphabet(word_boundary, word_boundary);
+    for (auto& it : sections) {
+        it.second.minimize();
         // any paths which did not already end with <$> now will
         // having 2 finals isn't a problem because -separable only checks
         // for finals when it reads $, and you can't have 2 of those in a row
-        for(auto fin : (it->second).getFinals())
+        for(auto fin : it.second.getFinals())
         {
-          int end_state = (it->second).insertSingleTransduction(end_trans, fin.first);
-          (it->second).setFinal(end_state);
+          int end_state = it.second.insertSingleTransduction(end_trans, fin.first);
+          it.second.setFinal(end_state);
         }
     }
 }
@@ -105,8 +106,7 @@ Compiler::procAlphabet()
         int ret = xmlTextReaderRead(reader);
         if(ret == 1)
         {
-            xmlChar const *valor = xmlTextReaderConstValue(reader);
-            letters = XMLParseUtil::towstring(valor);
+            UString letters = XMLParseUtil::readValue(reader);
             bool espai = true;
             for(unsigned int i = 0; i < letters.length(); i++)
             {
@@ -118,13 +118,13 @@ Compiler::procAlphabet()
             }
             if(espai == true)  // libxml2 returns '\n' for <alphabet></alphabet>, should be empty
             {
-                letters = L"";
+              letters.clear();
             }
         }
         else
         {
-            wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-            wcerr << L"): Missing alphabet symbols." << endl;
+            cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+            cerr << "): Missing alphabet symbols." << endl;
             exit(EXIT_FAILURE);
         }
     }
@@ -133,7 +133,11 @@ Compiler::procAlphabet()
 void
 Compiler::procSDef()
 {
-    alphabet.includeSymbol(L"<"+attrib(COMPILER_N_ATTR)+L">");
+  UString s;
+  s += '<';
+  s.append(attrib(COMPILER_N_ATTR));
+  s += '>';
+  alphabet.includeSymbol(s);
 }
 
 void
@@ -151,15 +155,15 @@ Compiler::procParDef()
         {
             paradigms[current_paradigm].minimize();
             paradigms[current_paradigm].joinFinals();
-            current_paradigm = L"";
+            current_paradigm.clear();
         }
     }
 }
 
 int
-Compiler::matchTransduction(list<int> const &pi, list<int> const &pd, int estado, Transducer &t)
+Compiler::matchTransduction(vector<int> const &pi, vector<int> const &pd, int estado, Transducer &t)
 {
-    list<int>::const_iterator izqda, dcha, limizqda, limdcha;
+    vector<int>::const_iterator izqda, dcha, limizqda, limdcha;
 
     if(direction == COMPILER_RESTRICTION_LR_VAL)
     {
@@ -183,8 +187,6 @@ Compiler::matchTransduction(list<int> const &pi, list<int> const &pd, int estado
     }
     else
     {
-        int rsymbol = 0;
-
         while(true)
         {
             int etiqueta;
@@ -202,33 +204,31 @@ Compiler::matchTransduction(list<int> const &pi, list<int> const &pd, int estado
             else if(dcha == limdcha)
             {
                 etiqueta = alphabet(*izqda, 0);
-                rsymbol = 0;
                 izqda++;
             }
             else
             {
                 etiqueta = alphabet(*izqda, *dcha);
-                rsymbol = *dcha;
                 izqda++;
                 dcha++;
             }
 
-            if(etiqueta == alphabet(0, alphabet(L"<ANY_TAG>")) ||
-               etiqueta == alphabet(0, alphabet(L"<ANY_CHAR>"))
+            if(etiqueta == alphabet(0, any_tag) ||
+               etiqueta == alphabet(0, any_char)
               )
             {
               // rl compilation of a badly written rule
               // having an epsilon with wildcard output will produce
               // garbage output -- see https://github.com/apertium/apertium-separable/issues/8
-              wcerr << L"Warning: Cannot insert <t/> from empty input. Ignoring. (You probably want to specify exact tags when deleting a word.)" << endl;
+              cerr << "Warning: Cannot insert <t/> from empty input. Ignoring. (You probably want to specify exact tags when deleting a word.)" << endl;
               continue;
             }
 
             int nuevo_estado = t.insertSingleTransduction(etiqueta, estado);
-            if(etiqueta == alphabet(alphabet(L"<ANY_TAG>"),alphabet(L"<ANY_TAG>"))
-               || etiqueta == alphabet(alphabet(L"<ANY_CHAR>"),alphabet(L"<ANY_CHAR>"))
-               || etiqueta == alphabet(alphabet(L"<ANY_TAG>"), 0)
-               || etiqueta == alphabet(alphabet(L"<ANY_CHAR>"), 0)
+            if(etiqueta == alphabet(any_tag, any_tag)
+               || etiqueta == alphabet(any_char, any_char)
+               || etiqueta == alphabet(any_tag, 0)
+               || etiqueta == alphabet(any_char, 0)
               )
             {
                 t.linkStates(nuevo_estado, estado, 0);
@@ -242,12 +242,12 @@ Compiler::matchTransduction(list<int> const &pi, list<int> const &pd, int estado
 
 
 void
-Compiler::requireEmptyError(wstring const &name)
+Compiler::requireEmptyError(UString const &name)
 {
     if(!xmlTextReaderIsEmptyElement(reader))
     {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Non-empty element '<" << name << L">' should be empty." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Non-empty element '<" << name << ">' should be empty." << endl;
         exit(EXIT_FAILURE);
     }
 }
@@ -255,139 +255,137 @@ Compiler::requireEmptyError(wstring const &name)
 bool
 Compiler::allBlanks()
 {
-    bool flag = true;
-    wstring text = XMLParseUtil::towstring(xmlTextReaderConstValue(reader));
-
-    for(unsigned int i = 0, limit = text.size(); i < limit; i++)
-    {
-        flag = flag && iswspace(text[i]);
-    }
-
-    return flag;
+  vector<int32_t> text;
+  XMLParseUtil::readValueInto32(reader, text);
+  for (auto& it : text) {
+    if (!u_isspace(it)) {
+      return false;
+    }
+  }
+  return true;
 }
 
 void
-Compiler::readString(list<int> &result, wstring const &name)
+Compiler::readString(vector<int> &result, UString const &name)
 {
-    if(name == L"#text")
+    if(name == "#text"_u)
     {
-        wstring value = XMLParseUtil::towstring(xmlTextReaderConstValue(reader));
-        for(unsigned int i = 0, limit = value.size(); i < limit; i++)
-        {
-            result.push_back(static_cast<int>(value[i]));
-        }
+      XMLParseUtil::readValueInto32(reader, result);
     }
     else if(name == COMPILER_BLANK_ELEM)
     {
         requireEmptyError(name);
-        result.push_back(static_cast<int>(L' '));
+        result.push_back(static_cast<int>(' '));
     }
     else if(name == COMPILER_POSTGENERATOR_ELEM)
     {
         requireEmptyError(name);
-        result.push_back(static_cast<int>(L'~'));
+        result.push_back(static_cast<int>('~'));
     }
     else if(name == COMPILER_GROUP_ELEM)
     {
         int tipo=xmlTextReaderNodeType(reader);
         if(tipo != XML_READER_TYPE_END_ELEMENT)
         {
-            result.push_back(static_cast<int>(L'#'));
+            result.push_back(static_cast<int>('#'));
         }
     }
     else if(name == COMPILER_S_ELEM)
     {
         requireEmptyError(name);
-        wstring symbol = L"<" + attrib(COMPILER_N_ATTR) + L">";
+        UString symbol;
+        symbol += '<';
+        symbol.append(attrib(COMPILER_N_ATTR));
+        symbol += '>';
 
         if(!alphabet.isSymbolDefined(symbol))
         {
-            wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-            wcerr << L"): Undefined symbol '" << symbol << L"'." << endl;
+            cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+            cerr << "): Undefined symbol '" << symbol << "'." << endl;
             exit(EXIT_FAILURE);
         }
         result.push_back(alphabet(symbol));
     }
     else if(name == COMPILER_ANYTAG_ELEM)
     {
-        result.push_back(alphabet(L"<ANY_TAG>"));
+        result.push_back(any_tag);
     }
     else if(name == COMPILER_ANYCHAR_ELEM)
     {
-        result.push_back(alphabet(L"<ANY_CHAR>"));
+        result.push_back(any_char);
     }
     else if(name == COMPILER_WB_ELEM)
     {
         requireEmptyError(name);
-        result.push_back(alphabet(L"<$>"));
+        result.push_back(word_boundary);
     }
 
     else
     {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Invalid specification of element '<" << name;
-        wcerr << L">' in this context." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Invalid specification of element '<" << name;
+        cerr << ">' in this context." << endl;
         exit(EXIT_FAILURE);
     }
 }
 
 void
-Compiler::skipBlanks(wstring &name)
+Compiler::skipBlanks(UString &name)
 {
-    while(name == L"#text" || name == L"#comment")
+    while(name == "#text"_u || name == "#comment"_u)
     {
-        if(name != L"#comment")
+        if(name != "#comment"_u)
         {
             if(!allBlanks())
             {
-                wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-                wcerr << L"): Invalid construction." << endl;
+                cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+                cerr << "): Invalid construction." << endl;
                 exit(EXIT_FAILURE);
             }
         }
 
         xmlTextReaderRead(reader);
-        name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+        name = XMLParseUtil::readName(reader);
     }
 }
 
 void
-Compiler::skip(wstring &name, wstring const &elem)
+Compiler::skip(UString &name, UString const &elem)
 {
     skip(name, elem, true);
 }
 
 void
-Compiler::skip(wstring &name, wstring const &elem, bool open)
+Compiler::skip(UString &name, UString const &elem, bool open)
 {
     xmlTextReaderRead(reader);
-    name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
-    wstring slash;
+    name = XMLParseUtil::readName(reader);
+    UString slash;
 
     if(!open)
     {
-        slash = L"/";
+        slash = "/"_u;
     }
 
-    while(name == L"#text" || name == L"#comment")
+    while(name == "#text"_u || name == "#comment"_u)
     {
-        if(name != L"#comment")
+        if(name != "#comment"_u)
         {
             if(!allBlanks())
             {
-                wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-                wcerr << L"): Invalid construction." << endl;
+                cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+                cerr << "): Invalid construction." << endl;
                 exit(EXIT_FAILURE);
             }
         }
         xmlTextReaderRead(reader);
-        name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+        name = XMLParseUtil::readName(reader);
     }
 
     if(name != elem)
     {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Expected '<" << slash << elem << L">'." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Expected '<" << slash << elem << ">'." << endl;
         exit(EXIT_FAILURE);
     }
 }
@@ -395,16 +393,16 @@ Compiler::skip(wstring &name, wstring const &elem, bool open)
 EntryToken
 Compiler::procIdentity()
 {
-    list<int> both_sides;
+    vector<int> both_sides;
 
     if(!xmlTextReaderIsEmptyElement(reader))
     {
-        wstring name = L"";
+        UString name;
 
         while(true)
         {
             xmlTextReaderRead(reader);
-            name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+            name = XMLParseUtil::readName(reader);
             if(name == COMPILER_IDENTITY_ELEM)
             {
                 break;
@@ -413,10 +411,10 @@ Compiler::procIdentity()
         }
     }
 
-    if(verbose && first_element && (both_sides.front() == (int)L' '))
+    if(verbose && first_element && (both_sides.front() == (int)' '))
     {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Entry begins with space." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Entry begins with space." << endl;
     }
     first_element = false;
     EntryToken e;
@@ -427,18 +425,18 @@ Compiler::procIdentity()
 EntryToken
 Compiler::procTransduction()
 {
-    list<int> lhs, rhs;
-    wstring name;
+    vector<int> lhs, rhs;
+    UString name;
 
     skip(name, COMPILER_LEFT_ELEM);
 
     if(!xmlTextReaderIsEmptyElement(reader))
     {
-        name = L"";
+      name.clear();
         while(true)
         {
             xmlTextReaderRead(reader);
-            name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+            name = XMLParseUtil::readName(reader);
             if(name == COMPILER_LEFT_ELEM)
             {
                 break;
@@ -447,10 +445,10 @@ Compiler::procTransduction()
         }
     }
 
-    if(verbose && first_element && (lhs.front() == (int)L' '))
+    if(verbose && first_element && (lhs.front() == (int)' '))
     {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Entry begins with space." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Entry begins with space." << endl;
     }
     first_element = false;
 
@@ -458,11 +456,11 @@ Compiler::procTransduction()
 
     if(!xmlTextReaderIsEmptyElement(reader))
     {
-        name = L"";
+      name.clear();
         while(true)
         {
             xmlTextReaderRead(reader);
-            name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+            name = XMLParseUtil::readName(reader);
             if(name == COMPILER_RIGHT_ELEM)
             {
                 break;
@@ -479,8 +477,8 @@ Compiler::procTransduction()
     return e;
 }
 
-wstring
-Compiler::attrib(wstring const &name)
+UString
+Compiler::attrib(UString const &name)
 {
     return XMLParseUtil::attrib(reader, name);
 }
@@ -489,20 +487,20 @@ EntryToken
 Compiler::procPar()
 {
     EntryToken e;
-    wstring nomparadigma = attrib(COMPILER_N_ATTR);
+    UString nomparadigma = attrib(COMPILER_N_ATTR);
     first_element = false;
 
-    if(current_paradigm != L"" && nomparadigma == current_paradigm)
+    if(!current_paradigm.empty() && nomparadigma == current_paradigm)
     {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Paradigm refers to itself '" << nomparadigma << L"'." <<endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Paradigm refers to itself '" << nomparadigma << "'." <<endl;
         exit(EXIT_FAILURE);
     }
 
     if(paradigms.find(nomparadigma) == paradigms.end())
     {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Undefined paradigm '" << nomparadigma << L"'." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Undefined paradigm '" << nomparadigma << "'." << endl;
         exit(EXIT_FAILURE);
     }
     e.setParadigm(nomparadigma);
@@ -512,8 +510,7 @@ Compiler::procPar()
 void
 Compiler::insertEntryTokens(vector<EntryToken> const &elements)
 {
-    if(current_paradigm != L"")
-    {
+  if(!current_paradigm.empty()) {
         // compilation of paradigms
         Transducer &t = paradigms[current_paradigm];
         int e = t.getInitial();
@@ -537,8 +534,8 @@ Compiler::insertEntryTokens(vector<EntryToken> const &elements)
             }
             else
             {
-                wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-                wcerr << L"): Invalid entry token." << endl;
+                cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+                cerr << "): Invalid entry token." << endl;
                 exit(EXIT_FAILURE);
             }
         }
@@ -608,15 +605,14 @@ Compiler::insertEntryTokens(vector<EntryToken> const &elements)
 
 
 void
-Compiler::requireAttribute(wstring const &value, wstring const &attrname,
-                           wstring const &elemname)
+Compiler::requireAttribute(UString const &value, UString const &attrname,
+                           UString const &elemname)
 {
-    if(value == L"")
-    {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): '<" << elemname;
-        wcerr << L"' element must specify non-void '";
-        wcerr << attrname << L"' attribute." << endl;
+  if(value.empty()) {
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): '<" << elemname;
+        cerr << "' element must specify non-void '";
+        cerr << attrname << "' attribute." << endl;
         exit(EXIT_FAILURE);
     }
 }
@@ -629,46 +625,46 @@ Compiler::procSection()
 
     if(tipo != XML_READER_TYPE_END_ELEMENT)
     {
-        wstring const &id = attrib(COMPILER_ID_ATTR);
-        wstring const &type = attrib(COMPILER_TYPE_ATTR);
+        UString const &id = attrib(COMPILER_ID_ATTR);
+        UString const &type = attrib(COMPILER_TYPE_ATTR);
         requireAttribute(id, COMPILER_ID_ATTR, COMPILER_SECTION_ELEM);
         requireAttribute(type, COMPILER_TYPE_ATTR, COMPILER_SECTION_ELEM);
 
         current_section = id;
-        current_section += L"@";
+        current_section += '@';
         current_section.append(type);
     }
     else
     {
-        current_section = L"";
+      current_section.clear();
     }
 }
 
 void
 Compiler::procEntry()
 {
-    wstring atributo=this->attrib(COMPILER_RESTRICTION_ATTR);
-    wstring ignore = this->attrib(COMPILER_IGNORE_ATTR);
-    wstring altval = this->attrib(COMPILER_ALT_ATTR);
-    wstring varval = this->attrib(COMPILER_V_ATTR);
-    wstring varl   = this->attrib(COMPILER_VL_ATTR);
-    wstring varr   = this->attrib(COMPILER_VR_ATTR);
-
-    //���if entry is masked by a restriction of direction or an ignore mark
-    if((atributo != L"" && atributo != direction)
+    UString atributo=this->attrib(COMPILER_RESTRICTION_ATTR);
+    UString ignore = this->attrib(COMPILER_IGNORE_ATTR);
+    UString altval = this->attrib(COMPILER_ALT_ATTR);
+    UString varval = this->attrib(COMPILER_V_ATTR);
+    UString varl   = this->attrib(COMPILER_VL_ATTR);
+    UString varr   = this->attrib(COMPILER_VR_ATTR);
+
+    // if entry is masked by a restriction of direction or an ignore mark
+    if((!atributo.empty() && atributo != direction)
        || ignore == COMPILER_IGNORE_YES_VAL
-       || (altval != L"" && altval != alt)
-       || (direction == COMPILER_RESTRICTION_RL_VAL && varval != L"" && varval != variant)
-       || (direction == COMPILER_RESTRICTION_RL_VAL && varl != L"" && varl != variant_left)
-       || (direction == COMPILER_RESTRICTION_LR_VAL && varr != L"" && varr != variant_right))
+       || (!altval.empty() && altval != alt)
+       || (direction == COMPILER_RESTRICTION_RL_VAL && !varval.empty() && varval != variant)
+       || (direction == COMPILER_RESTRICTION_RL_VAL && !varl.empty() && varl != variant_left)
+       || (direction == COMPILER_RESTRICTION_LR_VAL && !varr.empty() && varr != variant_right))
     {
         // parse to the end of the entry
-        wstring name = L"";
+        UString name;
 
         while(name != COMPILER_ENTRY_ELEM)
         {
             xmlTextReaderRead(reader);
-            name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+            name = XMLParseUtil::readName(reader);
         }
 
         return;
@@ -681,14 +677,14 @@ Compiler::procEntry()
         int ret = xmlTextReaderRead(reader);
         if(ret != 1)
         {
-            wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-            wcerr << L"): Parse error." << endl;
+            cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+            cerr << "): Parse error." << endl;
             exit(EXIT_FAILURE);
         }
-        wstring name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+        UString name = XMLParseUtil::readName(reader);
         skipBlanks(name);
 
-        if(current_paradigm == L"" && verbose)
+        if(current_paradigm.empty() && verbose)
         {
             first_element = true;
         }
@@ -712,12 +708,12 @@ Compiler::procEntry()
 
             // detecci���n del uso de paradigmas no definidos
 
-            wstring const &p = elements.rbegin()->paradigmName();
+            UString const &p = elements.rbegin()->paradigmName();
 
             if(paradigms.find(p) == paradigms.end())
             {
-                wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-                wcerr << L"): Undefined paradigm '" << p << L"'." <<endl;
+                cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+                cerr << "): Undefined paradigm '" << p << "'." <<endl;
                 exit(EXIT_FAILURE);
             }
             // descartar entradas con paradigms vac���os (por las direciones,
@@ -727,7 +723,7 @@ Compiler::procEntry()
                 while(name != COMPILER_ENTRY_ELEM || tipo != XML_READER_TYPE_END_ELEMENT)
                 {
                     xmlTextReaderRead(reader);
-                    name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+                    name = XMLParseUtil::readName(reader);
                     tipo = xmlTextReaderNodeType(reader);
                 }
                 return;
@@ -736,8 +732,8 @@ Compiler::procEntry()
         else if(name == COMPILER_ENTRY_ELEM && tipo == XML_READER_TYPE_END_ELEMENT)
         {
             /* INSERTING FINAL <$> HERE */
-            // list<int> wb;
-            // wb.push_back(alphabet(L"<$>"));
+            // vector<int> wb;
+            // wb.push_back(word_boundary);
             // EntryToken e;
             // e.setSingleTransduction(wb, wb);
             // elements.push_back(e);
@@ -748,9 +744,9 @@ Compiler::procEntry()
         }
         else
         {
-            wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-            wcerr << L"): Invalid inclusion of '<" << name << L">' into '<" << COMPILER_ENTRY_ELEM;
-            wcerr << L">'." << endl;
+            cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+            cerr << "): Invalid inclusion of '<" << name << ">' into '<" << COMPILER_ENTRY_ELEM;
+            cerr << ">'." << endl;
             exit(EXIT_FAILURE);
         }
 
@@ -760,12 +756,11 @@ Compiler::procEntry()
 void
 Compiler::procNode()
 {
-    xmlChar const *xnombre = xmlTextReaderConstName(reader);
-    wstring nombre = XMLParseUtil::towstring(xnombre);
+    UString nombre = XMLParseUtil::readName(reader);
 
-    // HACER: optimizar el orden de ejecuci���n de esta ristra de "ifs"
+    // HACER: optimizar el orden de ejecución de esta ristra de "ifs"
 
-    if(nombre == L"#text")
+    if(nombre == "#text"_u)
     {
         /* ignorar */
     }
@@ -801,14 +796,14 @@ Compiler::procNode()
     {
         procSection();
     }
-    else if(nombre == L"#comment")
+    else if(nombre == "#comment"_u)
     {
         /* ignorar */
     }
     else
     {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Invalid node '<" << nombre << L">'." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Invalid node '<" << nombre << ">'." << endl;
         exit(EXIT_FAILURE);
     }
 }
@@ -818,7 +813,7 @@ Compiler::procRegexp()
 {
     EntryToken et;
     xmlTextReaderRead(reader);
-    wstring re = XMLParseUtil::towstring(xmlTextReaderConstValue(reader));
+    UString re = XMLParseUtil::readValue(reader);
     et.setRegexp(re);
     xmlTextReaderRead(reader);
     return et;
@@ -828,7 +823,7 @@ void
 Compiler::write(FILE *output)
 {
     // letters
-    Compression::wstring_write(letters, output);
+    Compression::string_write(letters, output);
 
     // symbols
     alphabet.write(output);
@@ -836,16 +831,11 @@ Compiler::write(FILE *output)
     // transducers
     Compression::multibyte_write(sections.size(), output);
 
-    int conta=0;
-    for(map<wstring, Transducer, Ltstr>::iterator it = sections.begin(),
-        limit = sections.end();
-        it != limit; it++)
-    {
-        conta++;
-        wcout << it->first << " " << it->second.size();
-        wcout << " " << it->second.numberOfTransitions() << endl;
-        Compression::wstring_write(it->first, output);
-        it->second.write(output);
+    for (auto& it : sections) {
+        cout << it.first << " " << it.second.size();
+        cout << " " << it.second.numberOfTransitions() << endl;
+        Compression::string_write(it.first, output);
+        it.second.write(output);
     }
 }
 
diff --git a/src/lsx_compiler.h b/src/lsx_compiler.h
index 971c6fb..10a5aa2 100644
--- a/src/lsx_compiler.h
+++ b/src/lsx_compiler.h
@@ -20,12 +20,12 @@
 #include <lttoolbox/alphabet.h>
 #include <lttoolbox/regexp_compiler.h>
 #include <lttoolbox/entry_token.h>
-#include <lttoolbox/ltstr.h>
 #include <lttoolbox/transducer.h>
 
 #include <map>
 #include <string>
 #include <set>
+#include <cstdint>
 #include <libxml/xmlreader.h>
 
 using namespace std;
@@ -44,43 +44,43 @@ private:
     /**
      * The alt value
      */
-    wstring alt;
+    UString alt;
 
     /**
      * The variant value (monodix)
      */
-    wstring variant;
+    UString variant;
 
     /**
      * The variant value (left side of bidix)
      */
-    wstring variant_left;
+    UString variant_left;
 
     /**
      * The variant value (right side of bidix)
      */
-    wstring variant_right;
+    UString variant_right;
 
     /**
      * The paradigm being compiled
      */
-    wstring current_paradigm;
+    UString current_paradigm;
 
     /**
      * The dictionary section being compiled
      */
-    wstring current_section;
+    UString current_section;
 
     /**
      * The direction of the compilation, 'lr' (left-to-right) or 'rl'
      * (right-to-left)
      */
-    wstring direction;
+    UString direction;
 
     /**
      * List of characters to be considered alphabetic
      */
-    wstring letters;
+    UString letters;
 
     /**
      * Set verbose mode: warnings which may or may not be correct
@@ -97,30 +97,37 @@ private:
      */
     Alphabet alphabet;
 
+    /**
+     * Special symbols
+     */
+    int32_t any_tag;
+    int32_t any_char;
+    int32_t word_boundary;
+
     /**
      * List of named transducers-paradigms
      */
-    map<wstring, Transducer, Ltstr> paradigms;
+    map<UString, Transducer> paradigms;
 
     /**
      * List of named dictionary sections
      */
-    map<wstring, Transducer, Ltstr> sections;
+    map<UString, Transducer> sections;
 
     /**
      * List of named prefix copy of a paradigm
      */
-    map<wstring, map<wstring, int, Ltstr>, Ltstr> prefix_paradigms;
+    map<UString, map<UString, int>> prefix_paradigms;
 
     /**
      * List of named suffix copy of a paradigm
      */
-    map<wstring, map<wstring, int, Ltstr>, Ltstr> suffix_paradigms;
+    map<UString, map<UString, int>> suffix_paradigms;
 
     /**
      * List of named endings of a suffix copy of a paradgim
      */
-    map<wstring, map<wstring, int, Ltstr>, Ltstr> postsuffix_paradigms;
+    map<UString, map<UString, int>> postsuffix_paradigms;
 
 
     /*
@@ -175,7 +182,7 @@ private:
      * @param name the name of the attribute
      * @return the value of the attribute
      */
-    wstring attrib(wstring const &name);
+    UString attrib(UString const &name);
 
     /**
      * Construct symbol pairs by align left side of both parts and insert
@@ -186,7 +193,7 @@ private:
      * @param t the transducer
      * @return the last state of the inserted transduction
      */
-    int matchTransduction(list<int> const &lp, list<int> const &rp,
+    int matchTransduction(vector<int> const &lp, vector<int> const &rp,
                           int state, Transducer &t);
     /**
      * Parse the &lt;p&lt; element
@@ -217,7 +224,7 @@ private:
      * @param name the name of the node
      * @param elem the name of the expected node
      */
-    void skip(wstring &name, wstring const &elem);
+    void skip(UString &name, UString const &elem);
 
     /**
      * Skip all document #text nodes before "elem"
@@ -225,22 +232,22 @@ private:
      * @param elem the name of the expected node
      * @param open true for open element, false for closed
      */
-    void skip(wstring &name, wstring const &elem, bool open);
+    void skip(UString &name, UString const &elem, bool open);
 
     /**
      * Skip all blank #text nodes before "name"
      * @param name the name of the node
      */
-    void skipBlanks(wstring &name);
+    void skipBlanks(UString &name);
 
 
-    void readString(list<int> &result, wstring const &name);
+    void readString(vector<int> &result, UString const &name);
 
     /**
      * Force an element to be empty, and check for it
      * @param name the element
      */
-    void requireEmptyError(wstring const &name);
+    void requireEmptyError(UString const &name);
 
     /**
      * Force an attribute to be specified, amd check for it
@@ -249,8 +256,8 @@ private:
      * @param elemname the parent of the attribute
      */
 
-    void requireAttribute(wstring const &value, wstring const &attrname,
-                          wstring const &elemname);
+    void requireAttribute(UString const &value, UString const &attrname,
+                          UString const &elemname);
     /**
      * True if all the elements in the current node are blanks
      * @return true if all are blanks
@@ -263,42 +270,42 @@ public:
      * Constants to represent the element and the attributes of
      * dictionaries
      */
-    static wstring const COMPILER_DICTIONARY_ELEM;
-    static wstring const COMPILER_ALPHABET_ELEM;
-    static wstring const COMPILER_SDEFS_ELEM;
-    static wstring const COMPILER_SDEF_ELEM;
-    static wstring const COMPILER_N_ATTR;
-    static wstring const COMPILER_PARDEFS_ELEM;
-    static wstring const COMPILER_PARDEF_ELEM;
-    static wstring const COMPILER_PAR_ELEM;
-    static wstring const COMPILER_ENTRY_ELEM;
-    static wstring const COMPILER_RESTRICTION_ATTR;
-    static wstring const COMPILER_RESTRICTION_LR_VAL;
-    static wstring const COMPILER_RESTRICTION_RL_VAL;
-    static wstring const COMPILER_PAIR_ELEM;
-    static wstring const COMPILER_LEFT_ELEM;
-    static wstring const COMPILER_RIGHT_ELEM;
-    static wstring const COMPILER_S_ELEM;
-    static wstring const COMPILER_REGEXP_ELEM;
-    static wstring const COMPILER_SECTION_ELEM;
-    static wstring const COMPILER_ID_ATTR;
-    static wstring const COMPILER_TYPE_ATTR;
-    static wstring const COMPILER_IDENTITY_ELEM;
-    static wstring const COMPILER_JOIN_ELEM;
-    static wstring const COMPILER_BLANK_ELEM;
-    static wstring const COMPILER_POSTGENERATOR_ELEM;
-    static wstring const COMPILER_GROUP_ELEM;
-    static wstring const COMPILER_LEMMA_ATTR;
-    static wstring const COMPILER_IGNORE_ATTR;
-    static wstring const COMPILER_IGNORE_YES_VAL;
-    static wstring const COMPILER_ALT_ATTR;
-    static wstring const COMPILER_V_ATTR;
-    static wstring const COMPILER_VL_ATTR;
-    static wstring const COMPILER_VR_ATTR;
-
-    static wstring const COMPILER_ANYTAG_ELEM;
-    static wstring const COMPILER_ANYCHAR_ELEM;
-    static wstring const COMPILER_WB_ELEM;
+    static UString const COMPILER_DICTIONARY_ELEM;
+    static UString const COMPILER_ALPHABET_ELEM;
+    static UString const COMPILER_SDEFS_ELEM;
+    static UString const COMPILER_SDEF_ELEM;
+    static UString const COMPILER_N_ATTR;
+    static UString const COMPILER_PARDEFS_ELEM;
+    static UString const COMPILER_PARDEF_ELEM;
+    static UString const COMPILER_PAR_ELEM;
+    static UString const COMPILER_ENTRY_ELEM;
+    static UString const COMPILER_RESTRICTION_ATTR;
+    static UString const COMPILER_RESTRICTION_LR_VAL;
+    static UString const COMPILER_RESTRICTION_RL_VAL;
+    static UString const COMPILER_PAIR_ELEM;
+    static UString const COMPILER_LEFT_ELEM;
+    static UString const COMPILER_RIGHT_ELEM;
+    static UString const COMPILER_S_ELEM;
+    static UString const COMPILER_REGEXP_ELEM;
+    static UString const COMPILER_SECTION_ELEM;
+    static UString const COMPILER_ID_ATTR;
+    static UString const COMPILER_TYPE_ATTR;
+    static UString const COMPILER_IDENTITY_ELEM;
+    static UString const COMPILER_JOIN_ELEM;
+    static UString const COMPILER_BLANK_ELEM;
+    static UString const COMPILER_POSTGENERATOR_ELEM;
+    static UString const COMPILER_GROUP_ELEM;
+    static UString const COMPILER_LEMMA_ATTR;
+    static UString const COMPILER_IGNORE_ATTR;
+    static UString const COMPILER_IGNORE_YES_VAL;
+    static UString const COMPILER_ALT_ATTR;
+    static UString const COMPILER_V_ATTR;
+    static UString const COMPILER_VL_ATTR;
+    static UString const COMPILER_VR_ATTR;
+
+    static UString const COMPILER_ANYTAG_ELEM;
+    static UString const COMPILER_ANYCHAR_ELEM;
+    static UString const COMPILER_WB_ELEM;
 
 
     /**
@@ -316,7 +323,7 @@ public:
      * @param fichero file
      * @param dir direction
      */
-    void parse(string const &fichero, wstring const &dir);
+    void parse(string const &fichero, UString const &dir);
 
     //  auto getAlt();
     //  auto getInt();
diff --git a/src/lsx_proc.cc b/src/lsx_proc.cc
index fbaedba..eaf327c 100644
--- a/src/lsx_proc.cc
+++ b/src/lsx_proc.cc
@@ -1,6 +1,7 @@
 #include <lttoolbox/lt_locale.h>
 #include <iostream>
 #include <getopt.h>
+#include <libgen.h>
 
 #include "lsx_processor.h"
 
@@ -27,8 +28,8 @@ int main (int argc, char** argv)
   LtLocale::tryToSetLocale();
   
   LSXProcessor fstp;
-  FILE* input = stdin;
-  FILE* output = stdout;
+  InputFile input;
+  UFILE* output = u_finit(stdout, NULL, NULL);
 
 #if HAVE_GETOPT_LONG
   static struct option long_options[]=
@@ -71,22 +72,18 @@ int main (int argc, char** argv)
   }
   FILE* fst = fopen(argv[optind], "rb");
   if(!fst) {
-    wcerr << "Error: Cannot open file '" << argv[optind] << "' for reading." << endl;
+    cerr << "Error: Cannot open file '" << argv[optind] << "' for reading." << endl;
     exit(EXIT_FAILURE);
   }
   fstp.load(fst);
 
   if (optind <= (argc - 2)) {
-    input = fopen(argv[optind+1], "rb");
-    if (input == NULL || ferror(input)) {
-      wcerr << "Error: Cannot open file '" << argv[optind+1] << "' for reading." << endl;
-      exit(EXIT_FAILURE);
-    }
+    input.open_or_exit(argv[optind+1]);
   }
   if (optind <= (argc - 3)) {
-    output = fopen(argv[optind+2], "wb");
-    if (output == NULL || ferror(output)) {
-      wcerr << "Error: Cannot open file '" << argv[optind+2] << "' for writing." << endl;
+    output = u_fopen(argv[optind+2], "w", NULL, NULL);
+    if (output == NULL) {
+      cerr << "Error: Cannot open file '" << argv[optind+2] << "' for writing." << endl;
     }
   }
   
diff --git a/src/lsx_processor.cc b/src/lsx_processor.cc
index 7f68dc5..043cf2c 100644
--- a/src/lsx_processor.cc
+++ b/src/lsx_processor.cc
@@ -1,20 +1,21 @@
 #include "lsx_processor.h"
 
 #include <lttoolbox/compression.h>
+#include <cstring>
 
 LSXProcessor::LSXProcessor()
 {
-  escaped_chars.insert(L'[');
-  escaped_chars.insert(L']');
-  escaped_chars.insert(L'{');
-  escaped_chars.insert(L'}');
-  escaped_chars.insert(L'^');
-  escaped_chars.insert(L'$');
-  escaped_chars.insert(L'/');
-  escaped_chars.insert(L'\\');
-  escaped_chars.insert(L'@');
-  escaped_chars.insert(L'<');
-  escaped_chars.insert(L'>');
+  escaped_chars.insert('[');
+  escaped_chars.insert(']');
+  escaped_chars.insert('{');
+  escaped_chars.insert('}');
+  escaped_chars.insert('^');
+  escaped_chars.insert('$');
+  escaped_chars.insert('/');
+  escaped_chars.insert('\\');
+  escaped_chars.insert('@');
+  escaped_chars.insert('<');
+  escaped_chars.insert('>');
 
   null_flush = false;
   dictionary_case = false;
@@ -52,12 +53,12 @@ LSXProcessor::load(FILE *input)
 
   // symbols
   alphabet.read(input);
-  word_boundary = alphabet(L"<$>");
-  any_char = alphabet(L"<ANY_CHAR>");
-  any_tag = alphabet(L"<ANY_TAG>");
+  word_boundary = alphabet("<$>"_u);
+  any_char = alphabet("<ANY_CHAR>"_u);
+  any_tag = alphabet("<ANY_TAG>"_u);
 
   len = Compression::multibyte_read(input);
-  Compression::wstring_read(input); // name
+  Compression::string_read(input); // name
   // there should only be 1 transducer in the file
   // so ignore any subsequent ones
   trans.read(input, alphabet);
@@ -67,65 +68,65 @@ LSXProcessor::load(FILE *input)
 }
 
 void
-LSXProcessor::readNextLU(FILE* input)
+LSXProcessor::readNextLU(InputFile& input)
 {
-  vector<wstring> parts = vector<wstring>(3);
+  vector<UString> parts = vector<UString>(3);
   int loc = 0; // 0 = blank, 1 = bound blank, 2 = LU
   bool box = false; // are we in a [ ] blank
-  while(!feof(input))
+  while(!input.eof())
   {
-    wchar_t c = fgetwc_unlocked(input);
-    if ((unsigned int)c == WEOF) {
+    UChar32 c = input.get();
+    if ((unsigned int)c == U_EOF) {
         break;
     }
-    if(null_flush && c == L'\0')
+    if(null_flush && c == '\0')
     {
       at_end = true;
       at_null = true;
       break;
     }
-    else if(c == L'\\')
+    else if(c == '\\')
     {
       parts[loc] += c;
-      c = fgetwc_unlocked(input);
+      c = input.get();
       parts[loc] += c;
     }
     else if(loc == 0 && box)
     {
-      if(c == L']')
+      if(c == ']')
       {
         box = false;
       }
       parts[loc] += c;
     }
-    else if(loc == 0 && c == L'[')
+    else if(loc == 0 && c == '[')
     {
-      c = fgetwc_unlocked(input);
-      if(c == L'[')
+      c = input.get();
+      if(c == '[')
       {
         loc = 1;
       }
       else
       {
-        parts[loc] += L'[';
+        parts[loc] += '[';
         parts[loc] += c;
-        if(c != L']')
+        if(c != ']')
         {
           box = true;
         }
-        if(c == L'\\')
+        if(c == '\\')
         {
-          parts[loc] += fgetwc_unlocked(input);
+          parts[loc] += input.get();
         }
       }
     }
-    else if(loc == 1 && c == L']')
+    else if(loc == 1 && c == ']')
     {
-      c = fgetwc_unlocked(input);
-      if(c == L']')
+      c = input.get();
+      if(c == ']')
       {
-        c = fgetwc_unlocked(input);
-        if(c == L'^')
+        c = input.get();
+        if(c == '^')
         {
           loc = 2;
         }
@@ -134,25 +135,25 @@ LSXProcessor::readNextLU(FILE* input)
           // this situation is invalid
           // but I like making parsers harder to break than required
           // by the standard
-          parts[loc] += L"]]";
+          parts[loc] += "]]"_u;
           parts[loc] += c;
         }
       }
       else
       {
-        parts[loc] += L']';
+        parts[loc] += ']';
         parts[loc] += c;
-        if(c == L'\\')
+        if(c == '\\')
         {
-          parts[loc] += fgetwc_unlocked(input);
+          parts[loc] += input.get();
         }
       }
     }
-    else if(loc == 0 && c == L'^')
+    else if(loc == 0 && c == '^')
     {
       loc = 2;
     }
-    else if(loc == 2 && c == L'$')
+    else if(loc == 2 && c == '$')
     {
       break;
     }
@@ -161,7 +162,7 @@ LSXProcessor::readNextLU(FILE* input)
       parts[loc] += c;
     }
   }
-  if(feof(input))
+  if(input.eof())
   {
     at_end = true;
   }
@@ -171,7 +172,7 @@ LSXProcessor::readNextLU(FILE* input)
 }
 
 void
-LSXProcessor::processWord(FILE* input, FILE* output)
+LSXProcessor::processWord(InputFile& input, UFILE* output)
 {
   if(lu_queue.size() == 0)
   {
@@ -180,14 +181,14 @@ LSXProcessor::processWord(FILE* input, FILE* output)
   if(at_end && lu_queue.size() == 1 && lu_queue.back().size() == 0)
   {
     // we're at the final blank, no more work to do
-    fputws_unlocked(blank_queue.back().c_str(), output);
+    write(blank_queue.back(), output);
     blank_queue.pop_front();
     bound_blank_queue.pop_front();
     lu_queue.pop_front();
     return;
   }
   size_t last_final = 0;
-  wstring last_final_out;
+  UString last_final_out;
   State s;
   s.init(trans.getInitial());
   size_t idx = 0;
@@ -203,7 +204,7 @@ LSXProcessor::processWord(FILE* input, FILE* output)
       }
       readNextLU(input);
     }
-    wstring lu = lu_queue[idx];
+    UString lu = lu_queue[idx];
     if(lu.size() == 0)
     {
       break;
@@ -214,22 +215,22 @@ LSXProcessor::processWord(FILE* input, FILE* output)
     }
     for(size_t i = 0; i < lu.size(); i++)
     {
-      if(lu[i] == L'<')
+      if(lu[i] == '<')
       {
         size_t j = i+1;
         for(; j < lu.size(); j++)
         {
-          if(lu[j] == L'\\')
+          if(lu[j] == '\\')
           {
             j++;
           }
-          else if(lu[j] == L'>')
+          else if(lu[j] == '>')
           {
             j++;
             break;
           }
         }
-        wstring tag = lu.substr(i, j-i);
+        UString tag = lu.substr(i, j-i);
         i = j-1;
         if(!alphabet.isSymbolDefined(tag))
         {
@@ -239,7 +240,7 @@ LSXProcessor::processWord(FILE* input, FILE* output)
       }
       else
       {
-        if(lu[i] == L'\\')
+        if(lu[i] == '\\')
         {
           i++;
         }
@@ -258,28 +259,24 @@ LSXProcessor::processWord(FILE* input, FILE* output)
   }
   if(last_final == 0)
   {
-    fputws_unlocked(blank_queue.front().c_str(), output);
+    write(blank_queue.front(), output);
     blank_queue.pop_front();
-    if(bound_blank_queue.front().size() > 0)
+    if(!bound_blank_queue.front().empty())
     {
-      fputws_unlocked(L"[[", output);
-      fputws_unlocked(bound_blank_queue.front().c_str(), output);
-      fputws_unlocked(L"]]", output);
+      u_fprintf(output, "[[%S]]", bound_blank_queue.front().c_str());
     }
     bound_blank_queue.pop_front();
-    fputwc_unlocked(L'^', output);
-    fputws_unlocked(lu_queue.front().c_str(), output);
-    fputwc_unlocked(L'$', output);
+    u_fprintf(output, "^%S$", lu_queue.front().c_str());
     lu_queue.pop_front();
     return;
   }
-  vector<wstring> out_lus;
+  vector<UString> out_lus;
   size_t pos = 0;
-  while(pos != wstring::npos && pos != last_final_out.size())
+  while(pos != UString::npos && pos != last_final_out.size())
   {
     size_t start = pos;
-    pos = last_final_out.find(L"<$>", start);
-    if(pos == wstring::npos)
+    pos = last_final_out.find("<$>"_u, start);
+    if(pos == UString::npos)
     {
       out_lus.push_back(last_final_out.substr(start));
     }
@@ -290,26 +287,26 @@ LSXProcessor::processWord(FILE* input, FILE* output)
     }
   }
   
-  wstring wblank;
+  UString wblank;
   for(size_t i = 0; i < last_final; i++)
   {
     if(!bound_blank_queue[i].empty())
     {
       if(wblank.empty())
       {
-        wblank += L"[[";
+        wblank += "[["_u;
       }
       else
       {
-        wblank += L"; ";
+        wblank += "; "_u;
       }
       
-      wblank += bound_blank_queue[i].c_str();
+      wblank += bound_blank_queue[i];
     }
   }
   if(!wblank.empty())
   {
-    wblank += L"]]";
+    wblank += "]]"_u;
   }
   
   size_t i = 0;
@@ -317,22 +314,22 @@ LSXProcessor::processWord(FILE* input, FILE* output)
   {
     if(i < last_final)
     {
-      fputws_unlocked(blank_queue[i].c_str(), output);
+      write(blank_queue[i], output);
     }
     else
     {
-      fputwc_unlocked(L' ', output);
+      u_fputc(' ', output);
     }
-    fputws_unlocked(wblank.c_str(), output);
-    fputwc_unlocked(L'^', output);
-    fputws_unlocked(out_lus[i].c_str(), output);
-    fputwc_unlocked(L'$', output);
+    write(wblank, output);
+    u_fputc('^', output);
+    write(out_lus[i], output);
+    u_fputc('$', output);
   }
   for(; i < last_final; i++)
   {
-    if(blank_queue[i] != L" ")
+    if(blank_queue[i] != " "_u)
     {
-      fputws_unlocked(blank_queue[i].c_str(), output);
+      write(blank_queue[i], output);
     }
   }
   blank_queue.erase(blank_queue.begin(), blank_queue.begin()+last_final);
@@ -341,7 +338,7 @@ LSXProcessor::processWord(FILE* input, FILE* output)
 }
 
 void
-LSXProcessor::process(FILE* input, FILE* output)
+LSXProcessor::process(InputFile& input, UFILE* output)
 {
   while(true)
   {
@@ -351,12 +348,8 @@ LSXProcessor::process(FILE* input, FILE* output)
     }
     if(at_null)
     {
-      fputwc_unlocked(L'\0', output);
-      int code = fflush(output);
-      if(code != 0)
-      {
-        wcerr << L"Could not flush output " << errno << endl;
-      }
+      u_fputc('\0', output);
+      u_fflush(output);
       at_end = false;
       at_null = false;
     }
diff --git a/src/lsx_processor.h b/src/lsx_processor.h
index 90d5a47..264a5dd 100644
--- a/src/lsx_processor.h
+++ b/src/lsx_processor.h
@@ -2,10 +2,11 @@
 #define _LSX_PROCESSOR_H_
 
 #include <lttoolbox/alphabet.h>
-#include <lttoolbox/ltstr.h>
+#include <lttoolbox/input_file.h>
 #include <lttoolbox/my_stdio.h>
 #include <lttoolbox/state.h>
 #include <lttoolbox/trans_exe.h>
+#include <unicode/ustdio.h>
 #include <deque>
 
 class LSXProcessor
@@ -13,8 +14,8 @@ class LSXProcessor
 private:
   TransExe trans;
   State initial_state;
-  set<wchar_t> escaped_chars;
-  set<wchar_t> alphabetic_chars;
+  set<UChar32> escaped_chars;
+  set<UChar32> alphabetic_chars;
   map<Node *, double> all_finals;
   Alphabet alphabet;
   bool null_flush;
@@ -22,12 +23,12 @@ private:
   bool at_end;
   bool at_null;
 
-  deque<wstring> blank_queue;
-  deque<wstring> bound_blank_queue;
-  deque<wstring> lu_queue;
+  deque<UString> blank_queue;
+  deque<UString> bound_blank_queue;
+  deque<UString> lu_queue;
 
-  void readNextLU(FILE* input);
-  void processWord(FILE* input, FILE* output);
+  void readNextLU(InputFile& input);
+  void processWord(InputFile& input, UFILE* output);
 
   int word_boundary;
   int any_char;
@@ -35,7 +36,7 @@ private:
 public:
   LSXProcessor();
   void load(FILE* input);
-  void process(FILE* input, FILE* output);
+  void process(InputFile& input, UFILE* output);
   void setNullFlush(bool val)
   {
     null_flush = val;