commit 81de69848b1b7f0278fa72e78c9d0ac9e3239f1f
Author: Daniel Swanson <popcorn.tomato.dude@gmail.com>
Date:   Wed Jun 30 08:49:36 2021 -0500

    ICU stuff (#115)
    
    ICU changes (closes #81)
    - replace all instances of `std::wstring` with `UString` (= `std::basic_string<UChar>`)
    - create `InputFile` wrapper to handle UTF-8 streams with nulls
    
    efficiency, readability, and code style changes
    - eliminate `Ltstr` and `string_to_wostream`
    - simplify Makefile
    - make transducer symbols `int32_t` rather than `int`
    - make common symbols static attributes of `Transducer`
    - extract some other string constants
    - prefer `std::vector` to `std::list`
    - prefer `.clear()` and `.empty()` to `= ""` and `== ""`
    - prefer range-for loops
    - remove old lsx code
    - have `regex_compiler` iterate over the input string rather than modifying it
    - lift a static computation out of a loop in `Transducer::determinize()`
    - move constant initializers to class header
    
    helper function and dependency changes
    - move `StringUtils` here from apertium
    - depend on external utfcpp rather than bundling it
    - make `XMLParseUtil` functions more specific to their typical usecases
    - add `xml_walk_util.h` for cleanly iterating over children of `xmlNode*`

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index b38525d..f2716f4 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -10,7 +10,7 @@ jobs:
     - name: dependencies
       run: |
            sudo apt-get -qy update
-           sudo apt-get -qfy install --no-install-recommends build-essential automake autotools-dev pkg-config libxml2-dev libxml2-utils python3-dev python3-setuptools swig
+           sudo apt-get -qfy install --no-install-recommends build-essential automake autotools-dev pkg-config libutfcpp-dev libxml2-dev libxml2-utils python3-dev python3-setuptools swig
     - name: autoreconf
       run: autoreconf -fvi
     - name: configure
diff --git a/.gitignore b/.gitignore
index 6972eaf..7f5b72f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,6 +73,7 @@
 /lttoolbox/lt-expand
 /python/Makefile
 /python/Makefile.in
+/python/lttoolbox.i
 /python/lttoolbox_wrap.cpp
 /python/lttoolbox.py
 /python/setup.py
@@ -80,3 +81,4 @@
 *.egg-info/
 *.egg
 **/.mypy_cache/
+*~
diff --git a/.travis.yml b/.travis.yml
index 312faa8..894f00e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,8 +6,14 @@ compiler:
   - clang
   - gcc
 
+addons:
+  homebrew:
+    packages:
+	- icu4c
+	- utf8cpp
+
 before_install:
-  - if [ $TRAVIS_OS_NAME = linux ]; then sudo apt-get install -y swig; else brew install swig; fi
+  - if [ $TRAVIS_OS_NAME = linux ]; then sudo apt-get install -y swig libutfcpp-dev; else brew install swig utf8cpp; fi
 script:
   - $CXX --version
   - autoreconf -fvi
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 09755dd..3f42928 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -102,7 +102,6 @@ if(WIN32)
 	add_definitions(-D_SECURE_SCL=0 -D_ITERATOR_DEBUG_LEVEL=0 -D_CRT_SECURE_NO_DEPRECATE -DWIN32_LEAN_AND_MEAN -DVC_EXTRALEAN -DNOMINMAX)
 	add_definitions(-DSTDC_HEADERS -DREGEX_MALLOC)
 	include_directories("lttoolbox/win32")
-	include_directories("utf8")
 else()
 	add_definitions(-D_POSIX_C_SOURCE=200112 -D_GNU_SOURCE)
 endif()
@@ -110,7 +109,7 @@ endif()
 # Unlocked I/O functions
 include(CheckSymbolExists)
 set(CMAKE_REQUIRED_DEFINITIONS -D_POSIX_C_SOURCE=200112 -D_GNU_SOURCE)
-foreach(func fread_unlocked fwrite_unlocked fgetc_unlocked fputc_unlocked fputs_unlocked fgetwc_unlocked fputwc_unlocked fputws_unlocked)
+foreach(func fread_unlocked fwrite_unlocked fgetc_unlocked fputc_unlocked fputs_unlocked)
 	string(TOUPPER ${func} _uc)
 	CHECK_SYMBOL_EXISTS(${func} "stdio.h" HAVE_DECL_${_uc})
 	if(HAVE_DECL_${_uc})
diff --git a/Makefile.am b/Makefile.am
index e07e620..13a7779 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -10,7 +10,7 @@ endif
 pkgconfigdir = $(libdir)/pkgconfig
 pkgconfig_DATA = lttoolbox.pc
 
-EXTRA_DIST=autogen.sh utf8 tests
+EXTRA_DIST=autogen.sh tests
 
 # TODO: the below will use python3 if you run it on Arch Linux with no python2 installed
 test: tests/run_tests.py
diff --git a/README b/README
index 32fe0dc..54dbd6c 100644
--- a/README
+++ b/README
@@ -51,6 +51,8 @@ Requirements:
 * g++ >= 2.95
 * GNU make
 * libxml2 >= 2.6.17
+* ICU
+* utfcpp
 
 Building & installing:
 
diff --git a/configure.ac b/configure.ac
index 8e5dee4..05fce75 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,8 +1,8 @@
 AC_PREREQ(2.52)
 
 m4_define([PKG_VERSION_MAJOR], [3])
-m4_define([PKG_VERSION_MINOR], [5])
-m4_define([PKG_VERSION_PATCH], [3])
+m4_define([PKG_VERSION_MINOR], [6])
+m4_define([PKG_VERSION_PATCH], [0])
 
 AC_INIT([lttoolbox], [PKG_VERSION_MAJOR.PKG_VERSION_MINOR.PKG_VERSION_PATCH], [apertium-stuff@lists.sourceforge.net], [lttoolbox], [https://wiki.apertium.org/wiki/Lttoolbox])
 
@@ -38,29 +38,8 @@ AC_ARG_ENABLE(profile,
               [CXXFLAGS="-pg -g -Wall"; CFLAGS="-pg -g -Wall"; LDFLAGS="-pg"])
 
 
-PKG_CHECK_MODULES(LTTOOLBOX, [libxml-2.0 >= 2.6.17])
-
-# Check for wide strings
-AC_DEFUN([AC_CXX_WSTRING],[
-  AC_CACHE_CHECK(whether the compiler supports wide strings,
-  ac_cv_cxx_wstring,
-  [AC_LANG_SAVE
-   AC_LANG_CPLUSPLUS
-   AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <string>]],[[
-std::wstring test = L"test";
-   ]])],
-   [ac_cv_cxx_wstring=yes], [ac_cv_cxx_wstring=no])
-   AC_LANG_RESTORE
-  ])
-])
-
-AC_CXX_WSTRING
-
-if test "$ac_cv_cxx_wstring" = no
-then
-  AC_MSG_ERROR([Missing wide string support])
-fi
-
+PKG_CHECK_MODULES(LIBXML, [libxml-2.0 >= 2.6.17])
+PKG_CHECK_MODULES(ICU, [icu-i18n, icu-io, icu-uc])
 
 # Checks for libraries.
 AC_CHECK_LIB(xml2, xmlReaderForFile)
@@ -68,6 +47,7 @@ AC_CHECK_LIB(xml2, xmlReaderForFile)
 # Checks for header files.
 AC_HEADER_STDC
 AC_CHECK_HEADERS([stdlib.h string.h unistd.h stddef.h])
+AC_CHECK_HEADER([utf8.h], [], [AC_MSG_ERROR([You don't have utfcpp installed.])])
 
 # Checks for typedefs, structures, and compiler characteristics.
 AC_HEADER_STDBOOL
@@ -78,7 +58,7 @@ AC_TYPE_SIZE_T
 AC_FUNC_ERROR_AT_LINE
 
 AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, \
-fputc_unlocked, fputs_unlocked, fgetwc_unlocked, fputwc_unlocked, fputws_unlocked, ungetwc_unlocked])
+fputc_unlocked, fputs_unlocked])
 
 AC_CHECK_FUNCS([setlocale strdup getopt_long])
 
diff --git a/lttoolbox/CMakeLists.txt b/lttoolbox/CMakeLists.txt
index 8a8aa0e..8b25032 100644
--- a/lttoolbox/CMakeLists.txt
+++ b/lttoolbox/CMakeLists.txt
@@ -57,7 +57,6 @@ if(WIN32)
 		win32/regex.c
 		win32/regex.h
 		win32/unistd.h
-		${CMAKE_SOURCE_DIR}/utf8/utf8_fwrap.h
 		${LIBLTTOOLBOX_SOURCES}
 		)
 	if(NOT VCPKG_TOOLCHAIN)
diff --git a/lttoolbox/Makefile.am b/lttoolbox/Makefile.am
index fb44eeb..2fd56b0 100644
--- a/lttoolbox/Makefile.am
+++ b/lttoolbox/Makefile.am
@@ -1,15 +1,15 @@
 
 h_sources = alphabet.h att_compiler.h buffer.h compiler.h compression.h  \
-            deserialiser.h entry_token.h expander.h fst_processor.h lt_locale.h \
-            ltstr.h match_exe.h match_node.h match_state.h my_stdio.h node.h \
-            pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h state.h \
-            transducer.h trans_exe.h xml_parse_util.h exception.h tmx_compiler.h \
-            string_to_wostream.h
+            deserialiser.h entry_token.h expander.h fst_processor.h input_file.h lt_locale.h \
+            match_exe.h match_node.h match_state.h my_stdio.h node.h \
+            pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h state.h string_utils.h \
+            transducer.h trans_exe.h xml_parse_util.h xml_walk_util.h exception.h tmx_compiler.h \
+            ustring.h
 cc_sources = alphabet.cc att_compiler.cc compiler.cc compression.cc entry_token.cc \
-             expander.cc fst_processor.cc lt_locale.cc match_exe.cc \
+             expander.cc fst_processor.cc input_file.cc lt_locale.cc match_exe.cc \
              match_node.cc match_state.cc node.cc pattern_list.cc \
-             regexp_compiler.cc sorted_vector.cc state.cc transducer.cc \
-             trans_exe.cc xml_parse_util.cc tmx_compiler.cc
+             regexp_compiler.cc sorted_vector.cc state.cc string_utils.cc transducer.cc \
+             trans_exe.cc xml_parse_util.cc xml_walk_util.cc tmx_compiler.cc ustring.cc
 
 library_includedir = $(includedir)/$(PACKAGE_NAME)-$(VERSION_API)/$(PACKAGE_NAME)
 library_include_HEADERS = $(h_sources)
@@ -27,33 +27,16 @@ lttoolboxlib = $(prefix)/lib
 
 lttoolbox_DATA = dix.dtd dix.rng dix.rnc acx.rng xsd/dix.xsd xsd/acx.xsd
 
-lt_print_SOURCES = lt_print.cc
-lt_print_LDADD = liblttoolbox$(VERSION_MAJOR).la
-lt_print_LDFLAGS = -llttoolbox$(VERSION_MAJOR) $(LTTOOLBOX_LIBS)
+LDADD = liblttoolbox$(VERSION_MAJOR).la
+AM_LDFLAGS = -llttoolbox$(VERSION_MAJOR) $(LIBXML_LIBS) $(ICU_LIBS)
 
+lt_print_SOURCES = lt_print.cc
 lt_trim_SOURCES = lt_trim.cc
-lt_trim_LDADD = liblttoolbox$(VERSION_MAJOR).la
-lt_trim_LDFLAGS = -llttoolbox$(VERSION_MAJOR) $(LTTOOLBOX_LIBS)
-
 lt_comp_SOURCES = lt_comp.cc
-lt_comp_LDADD = liblttoolbox$(VERSION_MAJOR).la
-lt_comp_LDFLAGS = -llttoolbox$(VERSION_MAJOR) $(LTTOOLBOX_LIBS)
-
 lt_proc_SOURCES = lt_proc.cc
-lt_proc_LDADD = liblttoolbox$(VERSION_MAJOR).la
-lt_proc_LDFLAGS = -llttoolbox$(VERSION_MAJOR) $(LTTOOLBOX_LIBS)
-
 lt_expand_SOURCES = lt_expand.cc
-lt_expand_LDADD = liblttoolbox$(VERSION_MAJOR).la
-lt_expand_LDFLAGS = -llttoolbox$(VERSION_MAJOR) $(LTTOOLBOX_LIBS)
-
 lt_tmxcomp_SOURCES = lt_tmxcomp.cc
-lt_tmxcomp_LDADD = liblttoolbox$(VERSION_MAJOR).la
-lt_tmxcomp_LDFLAGS = -llttoolbox$(VERSION_MAJOR) $(LTTOOLBOX_LIBS)
-
 lt_tmxproc_SOURCES = lt_tmxproc.cc
-lt_tmxproc_LDADD = liblttoolbox$(VERSION_MAJOR).la
-lt_tmxproc_LDFLAGS = -llttoolbox$(VERSION_MAJOR) $(LTTOOLBOX_LIBS)
 
 #lt-validate-dictionary: Makefile.am validate-header.sh
 #	@echo "Creating lt-validate-dictionary script"
@@ -67,10 +50,7 @@ lt_tmxproc_LDFLAGS = -llttoolbox$(VERSION_MAJOR) $(LTTOOLBOX_LIBS)
 
 man_MANS = lt-comp.1 lt-expand.1 lt-proc.1 lt-tmxcomp.1 lt-tmxproc.1 lt-print.1 lt-trim.1
 
-INCLUDES = -I$(top_srcdir) $(LTTOOLBOX_CFLAGS)
-if WINDOWS
-  INCLUDES += -I$(top_srcdir)/utf8
-endif
+INCLUDES = -I$(top_srcdir) $(LIBXML_CFLAGS) $(ICU_CFLAGS)
 CLEANFILES = *~
 
 EXTRA_DIST = dix.dtd dix.rng dix.rnc acx.rng xsd/dix.xsd xsd/acx.xsd $(man_MANS)
diff --git a/lttoolbox/alphabet.cc b/lttoolbox/alphabet.cc
index 6a47095..a313814 100644
--- a/lttoolbox/alphabet.cc
+++ b/lttoolbox/alphabet.cc
@@ -23,19 +23,16 @@
 #include <cctype>
 #include <cstdlib>
 #include <set>
-#include <cwchar>
-#include <cwctype>
 
-#if defined(_WIN32) && !defined(_MSC_VER)
-#include <utf8_fwrap.h>
-#endif
+#include <unicode/uchar.h>
 
 using namespace std;
+using namespace icu;
 
 Alphabet::Alphabet()
 {
-  spair[pair<int, int>(0,0)] = 0;
-  spairinv.push_back(pair<int, int>(0,0));
+  spair[pair<int32_t, int32_t>(0,0)] = 0;
+  spairinv.push_back(pair<int32_t, int32_t>(0,0));
 }
 
 Alphabet::~Alphabet()
@@ -74,23 +71,23 @@ Alphabet::copy(Alphabet const &a)
 }
 
 void
-Alphabet::includeSymbol(wstring const &s)
+Alphabet::includeSymbol(UString const &s)
 {
   if(slexic.find(s) == slexic.end())
   {
-    int slexic_size = slexic.size();
+    int32_t slexic_size = slexic.size();
     slexic[s] = -(slexic_size+1);
     slexicinv.push_back(s);
   }
 }
 
-int
-Alphabet::operator()(int const c1, int const c2)
+int32_t
+Alphabet::operator()(int32_t const c1, int32_t const c2)
 {
   auto tmp = make_pair(c1, c2);
   if(spair.find(tmp) == spair.end())
   {
-    int spair_size = spair.size();
+    int32_t spair_size = spair.size();
     spair[tmp] = spair_size;
     spairinv.push_back(tmp);
   }
@@ -98,14 +95,14 @@ Alphabet::operator()(int const c1, int const c2)
   return spair[tmp];
 }
 
-int
-Alphabet::operator()(wstring const &s)
+int32_t
+Alphabet::operator()(UString const &s)
 {
   return slexic[s];
 }
 
-int
-Alphabet::operator()(wstring const &s) const
+int32_t
+Alphabet::operator()(UString const &s) const
 {
   auto it = slexic.find(s);
   if (it == slexic.end()) {
@@ -115,12 +112,12 @@ Alphabet::operator()(wstring const &s) const
 }
 
 bool
-Alphabet::isSymbolDefined(wstring const &s)
+Alphabet::isSymbolDefined(UString const &s)
 {
   return slexic.find(s) != slexic.end();
 }
 
-int
+int32_t
 Alphabet::size() const
 {
   return slexic.size();
@@ -131,16 +128,16 @@ Alphabet::write(FILE *output)
 {
   // First, we write the taglist
   Compression::multibyte_write(slexicinv.size(), output);  // taglist size
-  for(unsigned int i = 0, limit = slexicinv.size(); i < limit; i++)
+  for(size_t i = 0, limit = slexicinv.size(); i < limit; i++)
   {
-    Compression::wstring_write(slexicinv[i].substr(1, slexicinv[i].size()-2), output);
+    Compression::string_write(slexicinv[i].substr(1, slexicinv[i].size()-2), output);
   }
 
   // Then we write the list of pairs
   // All numbers are biased + slexicinv.size() to be positive or zero
-  unsigned int bias = slexicinv.size();
+  size_t bias = slexicinv.size();
   Compression::multibyte_write(spairinv.size(), output);
-  for(unsigned int i = 0, limit = spairinv.size(); i != limit; i++)
+  for(size_t i = 0, limit = spairinv.size(); i != limit; i++)
   {
     Compression::multibyte_write(spairinv[i].first + bias, output);
     Compression::multibyte_write(spairinv[i].second + bias, output);
@@ -155,26 +152,28 @@ Alphabet::read(FILE *input)
   a_new.spair.clear();
 
   // Reading of taglist
-  int tam = Compression::multibyte_read(input);
-  map<int, string> tmp;
+  int32_t tam = Compression::multibyte_read(input);
+  map<int32_t, string> tmp;
   while(tam > 0)
   {
     tam--;
-    wstring mytag = L"<" + Compression::wstring_read(input) + L">";
+    UString mytag = "<"_u;
+    mytag += Compression::string_read(input);
+    mytag += ">"_u;
     a_new.slexicinv.push_back(mytag);
     a_new.slexic[mytag]= -a_new.slexicinv.size(); // ToDo: This does not turn the result negative due to unsigned semantics
   }
 
   // Reading of pairlist
-  unsigned int bias = a_new.slexicinv.size();
+  size_t bias = a_new.slexicinv.size();
   tam = Compression::multibyte_read(input);
   while(tam > 0)
   {
     tam--;
-    int first = Compression::multibyte_read(input);
-    int second = Compression::multibyte_read(input);
-    pair<int, int> tmp(first - bias, second - bias);
-    int spair_size = a_new.spair.size();
+    int32_t first = Compression::multibyte_read(input);
+    int32_t second = Compression::multibyte_read(input);
+    pair<int32_t, int32_t> tmp(first - bias, second - bias);
+    int32_t spair_size = a_new.spair.size();
     a_new.spair[tmp] = spair_size;
     a_new.spairinv.push_back(tmp);
   }
@@ -185,8 +184,8 @@ Alphabet::read(FILE *input)
 void
 Alphabet::serialise(std::ostream &serialised) const
 {
-  Serialiser<const vector<wstring> >::serialise(slexicinv, serialised);
-  Serialiser<vector<pair<int, int> > >::serialise(spairinv, serialised);
+  Serialiser<const vector<UString> >::serialise(slexicinv, serialised);
+  Serialiser<vector<pair<int32_t, int32_t> > >::serialise(spairinv, serialised);
 }
 
 void
@@ -196,31 +195,32 @@ Alphabet::deserialise(std::istream &serialised)
   slexic.clear();
   spairinv.clear();
   spair.clear();
-  slexicinv = Deserialiser<vector<wstring> >::deserialise(serialised);
+  slexicinv = Deserialiser<vector<UString> >::deserialise(serialised);
   for (size_t i = 0; i < slexicinv.size(); i++) {
     slexic[slexicinv[i]] = -i - 1; // ToDo: This does not turn the result negative due to unsigned semantics
   }
-  spairinv = Deserialiser<vector<pair<int, int> > >::deserialise(serialised);
+  spairinv = Deserialiser<vector<pair<int32_t, int32_t> > >::deserialise(serialised);
   for (size_t i = 0; i < slexicinv.size(); i++) {
     spair[spairinv[i]] = i;
   }
 }
 
 void
-Alphabet::writeSymbol(int const symbol, FILE *output) const
+Alphabet::writeSymbol(int32_t const symbol, UFILE *output) const
 {
   if(symbol < 0)
   {
-    fputws_unlocked(slexicinv[-symbol-1].c_str(), output);
+    // write() has a name conflict
+    u_fprintf(output, "%S", slexicinv[-symbol-1].c_str());
   }
   else
   {
-    fputwc_unlocked(static_cast<wchar_t>(symbol), output);
+    u_fputc(static_cast<UChar32>(symbol), output);
   }
 }
 
 void
-Alphabet::getSymbol(wstring &result, int const symbol, bool uppercase) const
+Alphabet::getSymbol(UString &result, int32_t const symbol, bool uppercase) const
 {
   if(symbol == 0)
   {
@@ -231,7 +231,7 @@ Alphabet::getSymbol(wstring &result, int const symbol, bool uppercase) const
   {
     if(symbol >= 0)
     {
-      result += static_cast<wchar_t>(symbol);
+      result += static_cast<UChar32>(symbol);
     }
     else
     {
@@ -240,7 +240,7 @@ Alphabet::getSymbol(wstring &result, int const symbol, bool uppercase) const
   }
   else if(symbol >= 0)
   {
-    result += static_cast<wchar_t>(towupper(static_cast<wint_t>(symbol)));
+    result += u_toupper(static_cast<UChar32>(symbol));
   }
   else
   {
@@ -249,20 +249,20 @@ Alphabet::getSymbol(wstring &result, int const symbol, bool uppercase) const
 }
 
 bool
-Alphabet::isTag(int const symbol) const
+Alphabet::isTag(int32_t const symbol) const
 {
   return symbol < 0;
 }
 
-pair<int, int> const &
-Alphabet::decode(int const code) const
+pair<int32_t, int32_t> const &
+Alphabet::decode(int32_t const code) const
 {
   return spairinv[code];
 }
 
-set<int>
-Alphabet::symbolsWhereLeftIs(wchar_t l) const {
-  set<int> eps;
+set<int32_t>
+Alphabet::symbolsWhereLeftIs(UChar32 l) const {
+  set<int32_t> eps;
   for(const auto& sp: spair) {  // [(l, r) : tag]
     if(sp.first.first == l) {
       eps.insert(sp.second);
@@ -271,17 +271,17 @@ Alphabet::symbolsWhereLeftIs(wchar_t l) const {
   return eps;
 }
 
-void Alphabet::setSymbol(int symbol, wstring newSymbolString) {
+void Alphabet::setSymbol(int32_t symbol, UString newSymbolString) {
   //Should be a special character!
   if (symbol < 0) slexicinv[-symbol-1] = newSymbolString;
 }
 
 void
-Alphabet::createLoopbackSymbols(set<int> &symbols, Alphabet &basis, Side s, bool nonTagsToo)
+Alphabet::createLoopbackSymbols(set<int32_t> &symbols, Alphabet &basis, Side s, bool nonTagsToo)
 {
-  // Non-tag letters get the same int in spairinv across alphabets,
+  // Non-tag letters get the same int32_t in spairinv across alphabets,
   // but tags may differ, so do those separately afterwards.
-  set<int> tags;
+  set<int32_t> tags;
   for(auto& it : basis.spairinv)
   {
     if(s == left) {
diff --git a/lttoolbox/alphabet.h b/lttoolbox/alphabet.h
index 3218334..8c6dec2 100644
--- a/lttoolbox/alphabet.h
+++ b/lttoolbox/alphabet.h
@@ -22,10 +22,11 @@
 #include <map>
 #include <set>
 #include <vector>
-
-#include <lttoolbox/ltstr.h>
+#include <cstdint>
+#include "ustring.h"
 
 using namespace std;
+using namespace icu;
 
 /**
  * Alphabet class.
@@ -38,27 +39,27 @@ private:
    * Symbol-identifier relationship. Only contains <tags>.
    * @see slexicinv
    */
-  map<wstring, int, Ltstr> slexic;
+  map<UString, int32_t> slexic;
 
   /**
    * Identifier-symbol relationship. Only contains <tags>.
    * @see slexic
    */
-  vector<wstring> slexicinv;
+  vector<UString> slexicinv;
 
 
   /**
    * Map from symbol-pairs to symbols; tags get negative numbers,
-   * other characters are wchar_t's casted to ints.
+   * other characters are UChar32's casted to ints.
    * @see spairinv
    */
-  map<pair<int,int>, int> spair;
+  map<pair<int32_t, int32_t>, int32_t> spair;
 
   /**
    * All symbol-pairs (both <tags> and letters).
    * @see spair
    */
-  vector<pair<int, int> > spairinv;
+  vector<pair<int32_t, int32_t> > spairinv;
 
 
   void copy(Alphabet const &a);
@@ -89,7 +90,7 @@ public:
   /**
    * Include a symbol into the alphabet.
    */
-  void includeSymbol(wstring const &s);
+  void includeSymbol(UString const &s);
 
   /**
    * Get an unique code for every symbol pair.  This flavour is for
@@ -98,8 +99,8 @@ public:
    * @param c2 right symbol.
    * @return code for (c1, c2).
    */
-  int operator()(int const c1, int const c2);
-  int operator()(wstring const &s) const;
+  int32_t operator()(int32_t const c1, int32_t const c2);
+  int32_t operator()(UString const &s) const;
 
   /**
    * Gets the individual symbol identifier. Assumes it already exists!
@@ -107,20 +108,20 @@ public:
    * @param s symbol to be identified.
    * @return symbol identifier.
    */
-  int operator()(wstring const &s);
+  int32_t operator()(UString const &s);
 
   /**
    * Check wether the symbol is defined in the alphabet.
    * @param s symbol
    * @return true if defined
    */
-  bool isSymbolDefined(wstring const &s);
+  bool isSymbolDefined(UString const &s);
 
   /**
    * Returns the size of the alphabet (number of symbols).
    * @return number of symbols.
    */
-  int size() const;
+  int32_t size() const;
 
   /**
    * Write method.
@@ -142,7 +143,7 @@ public:
    * @param symbol symbol code.
    * @param output output stream.
    */
-  void writeSymbol(int const symbol, FILE *output) const;
+  void writeSymbol(int32_t const symbol, UFILE *output) const;
 
   /**
    * Concat a symbol in the string that is passed by reference.
@@ -150,7 +151,7 @@ public:
    * @param symbol code of the symbol
    * @param uppercase true if we want an uppercase symbol
    */
-  void getSymbol(wstring &result, int const symbol,
+  void getSymbol(UString &result, int32_t const symbol,
 		 bool uppercase = false) const;
 
   /**
@@ -158,27 +159,27 @@ public:
    * @param symbol the code of the symbol
    * @return true if the symbol is a tag
    */
-  bool isTag(int const symbol) const;
+  bool isTag(int32_t const symbol) const;
 
   /**
    * Sets an already existing symbol to represent a new value.
    * @param symbol the code of the symbol to set
    * @param newSymbolString the new string for this symbol
    */
-  void setSymbol(int symbol, wstring newSymbolString);
+  void setSymbol(int32_t symbol, UString newSymbolString);
 
   /**
    * Note: both the symbol int and int-pair are specific to this alphabet instance.
-   * @see operator() to go from general wstrings to alphabet-specific ints.
+   * @see operator() to go from general strings to alphabet-specific ints.
    * @param code a symbol
    * @return the pair which code represents in this alphabet
    */
-  pair<int, int> const & decode(int const code) const;
+  pair<int32_t, int32_t> const & decode(int32_t const code) const;
 
   /**
    * Get all symbols where the left-hand side of the symbol-pair is l.
    */
-  set<int> symbolsWhereLeftIs(wchar_t l) const;
+  set<int32_t> symbolsWhereLeftIs(UChar32 l) const;
 
   enum Side
   {
@@ -195,7 +196,7 @@ public:
    * @param s whether to loopback on the left or right side of the symbol-pair
    * @param nonTagsToo by default only tags are included, but if this is true we include all symbols
    */
-  void createLoopbackSymbols(set<int> &symbols, Alphabet &basis, Side s = right, bool nonTagsToo = false);
+  void createLoopbackSymbols(set<int32_t> &symbols, Alphabet &basis, Side s = right, bool nonTagsToo = false);
 };
 
 #endif
diff --git a/lttoolbox/att_compiler.cc b/lttoolbox/att_compiler.cc
index a511f5a..eaa0dd8 100644
--- a/lttoolbox/att_compiler.cc
+++ b/lttoolbox/att_compiler.cc
@@ -19,21 +19,24 @@
 #include <lttoolbox/alphabet.h>
 #include <lttoolbox/transducer.h>
 #include <lttoolbox/compression.h>
-#include <lttoolbox/string_to_wostream.h>
+#include <lttoolbox/string_utils.h>
 #include <algorithm>
 #include <stack>
+#include <unicode/uchar.h>
+#include <unicode/ustring.h>
+#include <utf8.h>
+#include <unicode/utf16.h>
 
 using namespace std;
+using namespace icu;
 
 AttCompiler::AttCompiler() :
 starting_state(0),
 default_weight(0.0000)
-{
-}
+{}
 
 AttCompiler::~AttCompiler()
-{
-}
+{}
 
 void
 AttCompiler::clear()
@@ -52,21 +55,24 @@ AttCompiler::clear()
  *       for conversion?
  */
 void
-AttCompiler::convert_hfst(wstring& symbol)
+AttCompiler::convert_hfst(UString& symbol)
 {
-  if (symbol == L"@0@" || symbol == L"ε")
-  {
-    symbol = L"";
-  }
-  else if (symbol == L"@_SPACE_@")
-  {
-    symbol = L" ";
+  if (symbol == Transducer::HFST_EPSILON_SYMBOL_SHORT ||
+      symbol == Transducer::HFST_EPSILON_SYMBOL_LONG ||
+      symbol == Transducer::LTTB_EPSILON_SYMBOL) {
+    symbol.clear();
+  } else if (symbol == Transducer::HFST_SPACE_SYMBOL) {
+    symbol = " "_u;
+  } else if (symbol == Transducer::HFST_TAB_SYMBOL) {
+    symbol = "\t"_u;
   }
 }
 
 bool
-AttCompiler::is_word_punct(wchar_t symbol)
+AttCompiler::is_word_punct(UChar32 symbol)
 {
+  // this version isn't quite right, but something like it should be possible
+  //return u_charType(symbol) & (U_NON_SPACING_MARK | U_ENCLOSING_MARK | U_COMBINING_SPACING_MARK);
   // https://en.wikipedia.org/wiki/Combining_character#Unicode_ranges
   if((symbol >= 0x0300 && symbol <= 0x036F) // Combining Diacritics
   || (symbol >= 0x1AB0 && symbol <= 0x1AFF) // ... Extended
@@ -90,115 +96,108 @@ AttCompiler::is_word_punct(wchar_t symbol)
  *         only) character otherwise.
  */
 int
-AttCompiler::symbol_code(const wstring& symbol)
+AttCompiler::symbol_code(const UString& symbol)
 {
-  if (symbol.length() > 1) {
+  if (u_strHasMoreChar32Than(symbol.c_str(), -1, 1)) {
     alphabet.includeSymbol(symbol);
     return alphabet(symbol);
-  } else if (symbol == L"") {
+  } else if (symbol.empty()) {
     return 0;
-  } else if ((iswpunct(symbol[0]) || iswspace(symbol[0])) && !is_word_punct(symbol[0])) {
-    return symbol[0];
   } else {
-    letters.insert(symbol[0]);
-    if(iswlower(symbol[0]))
-    {
-      letters.insert(towupper(symbol[0]));
-    }
-    else if(iswupper(symbol[0]))
-    {
-      letters.insert(towlower(symbol[0]));
+    UChar32 c;
+    U16_GET(symbol, 0, 0, symbol.size(), c);
+    if ((u_ispunct(c) || u_isspace(c)) && !is_word_punct(c)) {
+      return c;
+    } else {
+      letters.insert(c);
+      if(u_islower(c)) {
+        letters.insert(u_toupper(c));
+      } else if(u_isupper(c)) {
+        letters.insert(u_tolower(c));
+      }
+      return c;
     }
-    return symbol[0];
   }
 }
 
-bool
-AttCompiler::has_multiple_fsts(string const &file_name)
-{
-  wifstream infile(file_name.c_str());  // TODO: error checking
-  wstring line;
-
-  while(getline(infile, line)){
-    if (line.find('-') == 0)
-      return true;
-  }
-
-  return false;
-}
-
 void
-AttCompiler::parse(string const &file_name, wstring const &dir)
+AttCompiler::parse(string const &file_name, bool read_rl)
 {
   clear();
 
-  wifstream infile(file_name.c_str());  // TODO: error checking
-  vector<wstring> tokens;
-  wstring line;
+  UFILE* infile = u_fopen(file_name.c_str(), "r", NULL, NULL);
+  if (infile == NULL) {
+    cerr << "Error: unable to open '" << file_name << "' for reading." << endl;
+  }
+  vector<UString> tokens;
   bool first_line_in_fst = true;       // First line -- see below
-  int state_id_offset = 0;
+  bool multiple_transducers = false;
+  int state_id_offset = 1;
   int largest_seen_state_id = 0;
+  int line_number = 0;
 
-  if (has_multiple_fsts(file_name)){
-    wcerr << "Warning: Multiple fsts in '" << file_name << "' will be disjuncted." << endl;
-
-    // Set the starting state to 0 (Epsilon transtions will be added later)
-    starting_state = 0;
-    state_id_offset = 1;
-  }
-
-  while (getline(infile, line))
+  while (!u_feof(infile))
   {
+    line_number++;
     tokens.clear();
+    tokens.push_back(""_u);
+    do {
+      UChar c = u_fgetc(infile);
+      if (c == '\n') {
+        break;
+      } else if (c == '\t') {
+        tokens.push_back(""_u);
+      } else {
+        tokens.back() += c;
+      }
+    } while (!u_feof(infile));
+
     int from, to;
-    wstring upper, lower;
+    UString upper, lower;
     double weight;
 
-    if (line.length() == 0 && first_line_in_fst)
+    if (tokens[0].length() == 0 && first_line_in_fst)
     {
-      wcerr << "Error: empty file '" << file_name << "'." << endl;
+      cerr << "Error: empty file '" << file_name << "'." << endl;
       exit(EXIT_FAILURE);
     }
-    if (first_line_in_fst && line.find(L"\t") == wstring::npos)
+    if (first_line_in_fst && tokens.size() == 1)
     {
-      wcerr << "Error: invalid format '" << file_name << "'." << endl;
+      cerr << "Error: invalid format in file '" << file_name << "' on line " << line_number << "." << endl;
       exit(EXIT_FAILURE);
     }
 
     /* Empty line. */
-    if (line.length() == 0)
+    if (tokens.size() == 1 && tokens[0].length() == 0)
     {
       continue;
     }
-    split(line, L'\t', tokens);
 
     if (tokens[0].find('-') == 0)
     {
+      if (state_id_offset == 1) {
+        // this is the first split we've seen
+        cerr << "Warning: Multiple fsts in '" << file_name << "' will be disjuncted." << endl;
+        multiple_transducers = true;
+      }
       // Update the offset for the new FST
       state_id_offset = largest_seen_state_id + 1;
       first_line_in_fst = true;
       continue;
     }
 
-    from = stoi(tokens[0]) + state_id_offset;
+    from = StringUtils::stoi(tokens[0]) + state_id_offset;
     largest_seen_state_id = max(largest_seen_state_id, from);
 
     AttNode* source = get_node(from);
     /* First line: the initial state is of both types. */
     if (first_line_in_fst)
     {
-      // If the file has a single FST - No need for state id mapping
-      if (state_id_offset == 0)
-        starting_state = from;
-      else{
-        AttNode * starting_node = get_node(starting_state);
-
-        // Add an Epsilon transition from the new starting state
-        starting_node->transductions.push_back(
-          Transduction(from, L"", L"",
-            alphabet(symbol_code(L""), symbol_code(L"")),
-            default_weight));
-      }
+      AttNode * starting_node = get_node(starting_state);
+
+      // Add an Epsilon transition from the new starting state
+      starting_node->transductions.push_back(
+                     Transduction(from, ""_u, ""_u, 0, default_weight));
       first_line_in_fst = false;
     }
 
@@ -207,7 +206,7 @@ AttCompiler::parse(string const &file_name, wstring const &dir)
     {
       if (tokens.size() > 1)
       {
-        weight = stod(tokens[1]);
+        weight = StringUtils::stod(tokens[1]);
       }
       else
       {
@@ -217,9 +216,9 @@ AttCompiler::parse(string const &file_name, wstring const &dir)
     }
     else
     {
-      to = stoi(tokens[1]) + state_id_offset;
+      to = StringUtils::stoi(tokens[1]) + state_id_offset;
       largest_seen_state_id = max(largest_seen_state_id, to);
-      if(dir == L"RL")
+      if(read_rl)
       {
         upper = tokens[3];
         lower = tokens[2];
@@ -234,7 +233,7 @@ AttCompiler::parse(string const &file_name, wstring const &dir)
       int tag = alphabet(symbol_code(upper), symbol_code(lower));
       if(tokens.size() > 4)
       {
-        weight = stod(tokens[4]);
+        weight = StringUtils::stod(tokens[4]);
       }
       else
       {
@@ -247,12 +246,19 @@ AttCompiler::parse(string const &file_name, wstring const &dir)
     }
   }
 
+  if (!multiple_transducers) {
+    starting_state = 1;
+    // if we aren't disjuncting multiple transducers
+    // then we have an extra epsilon transduction at the beginning
+    // so skip it
+  }
+
   /* Classify the nodes of the graph. */
   classify_forwards();
   set<int> path;
   classify_backwards(starting_state, path);
 
-  infile.close();
+  u_fclose(infile);
 }
 
 /** Extracts the sub-transducer made of states of type @p type. */
@@ -268,27 +274,27 @@ AttCompiler::extract_transducer(TransducerType type)
   _extract_transducer(type, starting_state, transducer, corr, visited);
 
   /* The final states. */
-  bool noFinals = true;
+  //bool noFinals = true;
   for (auto& f : finals)
   {
     if (corr.find(f.first) != corr.end())
     {
       transducer.setFinal(corr[f.first], f.second);
-      noFinals = false;
+      //noFinals = false;
     }
   }
 
 /*
   if(noFinals)
   {
-    wcerr << L"No final states (" << type << ")" << endl;
-    wcerr << L"  were:" << endl;
-    wcerr << L"\t" ;
+    cerr << "No final states (" << type << ")" << endl;
+    cerr << "  were:" << endl;
+    cerr << "\t" ;
     for (auto& f : finals)
     {
-      wcerr << f.first << L" ";
+      cerr << f.first << " ";
     }
-    wcerr << endl;
+    cerr << endl;
   }
 */
   return transducer;
@@ -353,11 +359,12 @@ AttCompiler::_extract_transducer(TransducerType type, int from,
 void
 AttCompiler::classify_single_transition(Transduction& t)
 {
-  if (t.upper.length() == 1) {
-    if (letters.find(t.upper[0]) != letters.end()) {
+  int32_t sym = alphabet.decode(t.tag).first;
+  if (sym > 0) {
+    if (letters.find(sym) != letters.end()) {
       t.type |= WORD;
     }
-    if (iswpunct(t.upper[0])) {
+    if (u_ispunct(sym)) {
       t.type |= PUNCT;
     }
   }
@@ -380,10 +387,10 @@ AttCompiler::classify_forwards()
     for(auto& t1 : n1->transductions) {
       AttNode* n2 = get_node(t1.to);
       for(auto& t2 : n2->transductions) {
-	t2.type |= t1.type;
+        t2.type |= t1.type;
       }
       if(done.find(t1.to) == done.end()) {
-	todo.push(t1.to);
+        todo.push(t1.to);
       }
     }
     done.insert(next);
@@ -400,7 +407,7 @@ TransducerType
 AttCompiler::classify_backwards(int state, set<int>& path)
 {
   if(finals.find(state) != finals.end()) {
-    wcerr << L"ERROR: Transducer contains epsilon transition to a final state. Aborting." << endl;
+    cerr << "ERROR: Transducer contains epsilon transition to a final state. Aborting." << endl;
     exit(EXIT_FAILURE);
   }
   AttNode* node = get_node(state);
@@ -409,7 +416,7 @@ AttCompiler::classify_backwards(int state, set<int>& path)
     if(t1.type != UNDECIDED) {
       type |= t1.type;
     } else if(path.find(t1.to) != path.end()) {
-      wcerr << L"ERROR: Transducer contains initial epsilon loop. Aborting." << endl;
+      cerr << "ERROR: Transducer contains initial epsilon loop. Aborting." << endl;
       exit(EXIT_FAILURE);
     } else {
       path.insert(t1.to);
@@ -429,14 +436,14 @@ void
 AttCompiler::write(FILE *output)
 {
 //  FILE* output = fopen(file_name, "wb");
-  fwrite(HEADER_LTTOOLBOX, 1, 4, output);
+  fwrite_unlocked(HEADER_LTTOOLBOX, 1, 4, output);
   uint64_t features = 0;
   write_le(output, features);
 
   Transducer punct_fst = extract_transducer(PUNCT);
 
   /* Non-multichar symbols. */
-  Compression::wstring_write(wstring(letters.begin(), letters.end()), output);
+  Compression::string_write(UString(letters.begin(), letters.end()), output);
   /* Multichar symbols. */
   alphabet.write(output);
   /* And now the FST. */
@@ -448,17 +455,17 @@ AttCompiler::write(FILE *output)
   {
     Compression::multibyte_write(2, output);
   }
-  Compression::wstring_write(L"main@standard", output);
+  Compression::string_write("main@standard"_u, output);
   Transducer word_fst = extract_transducer(WORD);
   word_fst.write(output);
-  wcout << L"main@standard" << " " << word_fst.size();
-  wcout << " " << word_fst.numberOfTransitions() << endl;
-  Compression::wstring_write(L"final@inconditional", output);
+  cout << "main@standard" << " " << word_fst.size();
+  cout << " " << word_fst.numberOfTransitions() << endl;
+  Compression::string_write("final@inconditional"_u, output);
   if(punct_fst.numberOfTransitions() != 0)
   {
     punct_fst.write(output);
-    wcout << L"final@inconditional" << " " << punct_fst.size();
-    wcout << " " << punct_fst.numberOfTransitions() << endl;
+    cout << "final@inconditional" << " " << punct_fst.size();
+    cout << " " << punct_fst.numberOfTransitions() << endl;
   }
 //  fclose(output);
 }
diff --git a/lttoolbox/att_compiler.h b/lttoolbox/att_compiler.h
index 126ca56..557eb55 100644
--- a/lttoolbox/att_compiler.h
+++ b/lttoolbox/att_compiler.h
@@ -19,11 +19,11 @@
 
 #include <string>
 #include <fstream>
-#include <sstream>
 #include <map>
 #include <set>
 #include <vector>
 
+#include <lttoolbox/ustring.h>
 #include <lttoolbox/alphabet.h>
 #include <lttoolbox/transducer.h>
 #include <lttoolbox/compression.h>
@@ -36,25 +36,11 @@
 #define BOTH      3
 
 using namespace std;
+using namespace icu;
 
 /** Bitmask; 1 = WORD, 2 = PUNCT, 3 = BOTH. */
 typedef unsigned int TransducerType;
 
-namespace
-{
-  /** Splits a string into fields. */
-  inline vector<wstring>& split(const wstring& s, wchar_t delim, vector<wstring> &out)
-  {
-      wistringstream ss(s);
-      wstring item;
-      while (getline(ss, item, delim))
-      {
-        out.push_back(item);
-      }
-      return out;
-  }
-};
-
 /**
  * Converts transducers from AT&T text format to lt binary format.
  *
@@ -90,8 +76,9 @@ public:
   /**
    * Reads the AT&T format file @p file_name. The transducer and the alphabet
    * are both cleared before reading the new file.
+   * If read_rl = true then the second tape is used as the input
    */
-  void parse(string const &file_name, wstring const &dir);
+  void parse(string const &file_name, bool read_rl);
 
   /** Writes the transducer to @p file_name in lt binary format. */
 
@@ -113,20 +100,20 @@ private:
 
   Alphabet alphabet;
   /** All non-multicharacter symbols. */
-  set<wchar_t> letters;
+  set<UChar> letters;
 
   /** Used in AttNode. */
   struct Transduction
   {
     int            to;
-    wstring        upper;
-    wstring        lower;
+    UString        upper;
+    UString        lower;
     int            tag;
     double         weight;
     TransducerType type;
 
-    Transduction(int to, wstring upper, wstring lower, int tag, double weight,
-                 TransducerType type=UNDECIDED) :
+    Transduction(int to, UString upper, UString lower, int tag,
+                 double weight, TransducerType type=UNDECIDED) :
       to(to), upper(upper), lower(lower), tag(tag), weight(weight), type(type) {}
   };
 
@@ -170,7 +157,7 @@ private:
    * Returns true for combining diacritics and modifier letters
    *
    */
-  bool is_word_punct(wchar_t symbol);
+  bool is_word_punct(UChar32 symbol);
 
   /**
    * Determines initial type of single transition
@@ -186,7 +173,7 @@ private:
    * @todo Are there other special symbols? If so, add them, and maybe use a map
    *       for conversion?
    */
-  void convert_hfst(wstring& symbol);
+  void convert_hfst(UString& symbol);
 
   /**
    * Returns the code of the symbol in the alphabet. Run after convert_hfst has
@@ -197,12 +184,7 @@ private:
    * @return the code of the symbol, if @p symbol is multichar; its first (and
    *         only) character otherwise.
    */
-  int symbol_code(const wstring& symbol);
-
-  /**
-   * Finds whether an at&t file contains multiple FSTs or not
-  */
-  bool has_multiple_fsts(string const &file_name);
+  int symbol_code(const UString& symbol);
 };
 
 #endif /* _MYATT_COMPILER_ */
diff --git a/lttoolbox/buffer.h b/lttoolbox/buffer.h
index 9a1397f..5d19417 100644
--- a/lttoolbox/buffer.h
+++ b/lttoolbox/buffer.h
@@ -75,8 +75,8 @@ public:
     {
       if(buf_size == 0)
       {
-	wcerr << "Error: Cannot create empty buffer." << endl;
-	exit(EXIT_FAILURE);
+        cerr << "Error: Cannot create empty buffer." << endl;
+        exit(EXIT_FAILURE);
       }
       buf = new T[buf_size];
       size = buf_size;
@@ -115,8 +115,8 @@ public:
     {
       if(&b != this)
       {
-	destroy();
-	copy(b);
+        destroy();
+        copy(b);
       }
       return *this;
     }
@@ -130,7 +130,7 @@ public:
     {
       if(lastpos == size)
       {
-	lastpos = 0;
+        lastpos = 0;
       }
       buf[lastpos++] = value;
       currentpos = lastpos;
@@ -147,7 +147,7 @@ public:
     {
       if(lastpos == size)
       {
-	lastpos = 0;
+        lastpos = 0;
       }
       currentpos = lastpos;
       return buf[lastpos -1];
@@ -162,15 +162,15 @@ public:
     {
       if(currentpos != lastpos)
       {
-	if(currentpos == size)
-	{
-	  currentpos = 0;
-	}
-	return buf[currentpos++];
+        if(currentpos == size)
+          {
+            currentpos = 0;
+          }
+        return buf[currentpos++];
       }
       else
       {
-	return last();
+        return last();
       }
     }
 
@@ -182,11 +182,11 @@ public:
     {
       if(lastpos != 0)
       {
-	return buf[lastpos-1];
+        return buf[lastpos-1];
       }
       else
       {
-	return buf[size-1];
+        return buf[size-1];
       }
     }
 
@@ -218,11 +218,11 @@ public:
     {
       if(prevpos <= currentpos)
       {
-	return currentpos - prevpos;
+        return currentpos - prevpos;
       }
       else
       {
-	return currentpos + size - prevpos;
+        return currentpos + size - prevpos;
       }
     }
 
@@ -236,11 +236,11 @@ public:
     {
       if(postpos >= currentpos)
       {
-	return postpos - currentpos;
+        return postpos - currentpos;
       }
       else
       {
-	return postpos + size - currentpos;
+        return postpos + size - currentpos;
       }
     }
 
diff --git a/lttoolbox/compiler.cc b/lttoolbox/compiler.cc
index 00a6287..d2ab234 100644
--- a/lttoolbox/compiler.cc
+++ b/lttoolbox/compiler.cc
@@ -19,7 +19,7 @@
 #include <lttoolbox/entry_token.h>
 #include <lttoolbox/lt_locale.h>
 #include <lttoolbox/xml_parse_util.h>
-#include <lttoolbox/string_to_wostream.h>
+#include <lttoolbox/string_utils.h>
 
 #include <string>
 #include <cstdlib>
@@ -28,41 +28,47 @@
 
 using namespace std;
 
-wstring const Compiler::COMPILER_DICTIONARY_ELEM    = L"dictionary";
-wstring const Compiler::COMPILER_ALPHABET_ELEM      = L"alphabet";
-wstring const Compiler::COMPILER_SDEFS_ELEM         = L"sdefs";
-wstring const Compiler::COMPILER_SDEF_ELEM          = L"sdef";
-wstring const Compiler::COMPILER_N_ATTR             = L"n";
-wstring const Compiler::COMPILER_PARDEFS_ELEM       = L"pardefs";
-wstring const Compiler::COMPILER_PARDEF_ELEM        = L"pardef";
-wstring const Compiler::COMPILER_PAR_ELEM           = L"par";
-wstring const Compiler::COMPILER_ENTRY_ELEM         = L"e";
-wstring const Compiler::COMPILER_RESTRICTION_ATTR   = L"r";
-wstring const Compiler::COMPILER_RESTRICTION_LR_VAL = L"LR";
-wstring const Compiler::COMPILER_RESTRICTION_RL_VAL = L"RL";
-wstring const Compiler::COMPILER_PAIR_ELEM          = L"p";
-wstring const Compiler::COMPILER_LEFT_ELEM          = L"l";
-wstring const Compiler::COMPILER_RIGHT_ELEM         = L"r";
-wstring const Compiler::COMPILER_S_ELEM             = L"s";
-wstring const Compiler::COMPILER_M_ELEM             = L"m";
-wstring const Compiler::COMPILER_REGEXP_ELEM        = L"re";
-wstring const Compiler::COMPILER_SECTION_ELEM       = L"section";
-wstring const Compiler::COMPILER_ID_ATTR            = L"id";
-wstring const Compiler::COMPILER_TYPE_ATTR          = L"type";
-wstring const Compiler::COMPILER_IDENTITY_ELEM      = L"i";
-wstring const Compiler::COMPILER_IDENTITYGROUP_ELEM = L"ig";
-wstring const Compiler::COMPILER_JOIN_ELEM          = L"j";
-wstring const Compiler::COMPILER_BLANK_ELEM         = L"b";
-wstring const Compiler::COMPILER_POSTGENERATOR_ELEM = L"a";
-wstring const Compiler::COMPILER_GROUP_ELEM         = L"g";
-wstring const Compiler::COMPILER_LEMMA_ATTR         = L"lm";
-wstring const Compiler::COMPILER_IGNORE_ATTR        = L"i";
-wstring const Compiler::COMPILER_IGNORE_YES_VAL     = L"yes";
-wstring const Compiler::COMPILER_ALT_ATTR           = L"alt";
-wstring const Compiler::COMPILER_V_ATTR             = L"v";
-wstring const Compiler::COMPILER_VL_ATTR            = L"vl";
-wstring const Compiler::COMPILER_VR_ATTR            = L"vr";
-wstring const Compiler::COMPILER_WEIGHT_ATTR        = L"w";
+UString const Compiler::COMPILER_DICTIONARY_ELEM    = "dictionary"_u;
+UString const Compiler::COMPILER_ALPHABET_ELEM      = "alphabet"_u;
+UString const Compiler::COMPILER_SDEFS_ELEM         = "sdefs"_u;
+UString const Compiler::COMPILER_SDEF_ELEM          = "sdef"_u;
+UString const Compiler::COMPILER_N_ATTR             = "n"_u;
+UString const Compiler::COMPILER_PARDEFS_ELEM       = "pardefs"_u;
+UString const Compiler::COMPILER_PARDEF_ELEM        = "pardef"_u;
+UString const Compiler::COMPILER_PAR_ELEM           = "par"_u;
+UString const Compiler::COMPILER_ENTRY_ELEM         = "e"_u;
+UString const Compiler::COMPILER_RESTRICTION_ATTR   = "r"_u;
+UString const Compiler::COMPILER_RESTRICTION_LR_VAL = "LR"_u;
+UString const Compiler::COMPILER_RESTRICTION_RL_VAL = "RL"_u;
+UString const Compiler::COMPILER_PAIR_ELEM          = "p"_u;
+UString const Compiler::COMPILER_LEFT_ELEM          = "l"_u;
+UString const Compiler::COMPILER_RIGHT_ELEM         = "r"_u;
+UString const Compiler::COMPILER_S_ELEM             = "s"_u;
+UString const Compiler::COMPILER_M_ELEM             = "m"_u;
+UString const Compiler::COMPILER_REGEXP_ELEM        = "re"_u;
+UString const Compiler::COMPILER_SECTION_ELEM       = "section"_u;
+UString const Compiler::COMPILER_ID_ATTR            = "id"_u;
+UString const Compiler::COMPILER_TYPE_ATTR          = "type"_u;
+UString const Compiler::COMPILER_IDENTITY_ELEM      = "i"_u;
+UString const Compiler::COMPILER_IDENTITYGROUP_ELEM = "ig"_u;
+UString const Compiler::COMPILER_JOIN_ELEM          = "j"_u;
+UString const Compiler::COMPILER_BLANK_ELEM         = "b"_u;
+UString const Compiler::COMPILER_POSTGENERATOR_ELEM = "a"_u;
+UString const Compiler::COMPILER_GROUP_ELEM         = "g"_u;
+UString const Compiler::COMPILER_LEMMA_ATTR         = "lm"_u;
+UString const Compiler::COMPILER_IGNORE_ATTR        = "i"_u;
+UString const Compiler::COMPILER_IGNORE_YES_VAL     = "yes"_u;
+UString const Compiler::COMPILER_ALT_ATTR           = "alt"_u;
+UString const Compiler::COMPILER_V_ATTR             = "v"_u;
+UString const Compiler::COMPILER_VL_ATTR            = "vl"_u;
+UString const Compiler::COMPILER_VR_ATTR            = "vr"_u;
+UString const Compiler::COMPILER_WEIGHT_ATTR        = "w"_u;
+UString const Compiler::COMPILER_TEXT_NODE          = "#text"_u;
+UString const Compiler::COMPILER_COMMENT_NODE       = "#comment"_u;
+UString const Compiler::COMPILER_ACX_ANALYSIS_ELEM  = "analysis-chars"_u;
+UString const Compiler::COMPILER_ACX_CHAR_ELEM      = "char"_u;
+UString const Compiler::COMPILER_ACX_EQUIV_CHAR_ELEM= "equiv-char"_u;
+UString const Compiler::COMPILER_ACX_VALUE_ATTR     = "value"_u;
 
 Compiler::Compiler() :
 reader(0),
@@ -78,14 +84,14 @@ Compiler::~Compiler()
 }
 
 void
-Compiler::parseACX(string const &file, wstring const &dir)
+Compiler::parseACX(string const &file, UString const &dir)
 {
   if(dir == COMPILER_RESTRICTION_LR_VAL)
   {
     reader = xmlReaderForFile(file.c_str(), NULL, 0);
     if(reader == NULL)
     {
-      wcerr << "Error: cannot open '" << file << "'." << endl;
+      cerr << "Error: cannot open '" << file << "'." << endl;
       exit(EXIT_FAILURE);
     }
     int ret = xmlTextReaderRead(reader);
@@ -98,13 +104,13 @@ Compiler::parseACX(string const &file, wstring const &dir)
 }
 
 void
-Compiler::parse(string const &file, wstring const &dir)
+Compiler::parse(string const &file, UString const &dir)
 {
   direction = dir;
   reader = xmlReaderForFile(file.c_str(), NULL, 0);
   if(reader == NULL)
   {
-    wcerr << "Error: Cannot open '" << file << "'." << endl;
+    cerr << "Error: Cannot open '" << file << "'." << endl;
     exit(EXIT_FAILURE);
   }
 
@@ -117,7 +123,7 @@ Compiler::parse(string const &file, wstring const &dir)
 
   if(ret != 0)
   {
-    wcerr << L"Error: Parse error at the end of input." << endl;
+    cerr << "Error: Parse error at the end of input." << endl;
   }
 
   xmlFreeTextReader(reader);
@@ -136,22 +142,22 @@ Compiler::parse(string const &file, wstring const &dir)
 }
 
 bool
-Compiler::valid(wstring const& dir) const
+Compiler::valid(UString const& dir) const
 {
-  const wstring side = dir == COMPILER_RESTRICTION_RL_VAL ? L"right" : L"left";
+  const char* side = dir == COMPILER_RESTRICTION_RL_VAL ? "right" : "left";
   const set<int> epsilonSymbols = alphabet.symbolsWhereLeftIs(0);
-  const set<int> spaceSymbols = alphabet.symbolsWhereLeftIs(L' ');
+  const set<int> spaceSymbols = alphabet.symbolsWhereLeftIs(' ');
   for (auto &section : sections) {
     auto &fst = section.second;
     auto finals = fst.getFinals();
     auto initial = fst.getInitial();
     for(const auto i : fst.closure(initial, epsilonSymbols)) {
       if (finals.count(i)) {
-        wcerr << L"Error: Invalid dictionary (hint: the " << side << " side of an entry is empty)" << endl;
+        cerr << "Error: Invalid dictionary (hint: the " << side << " side of an entry is empty)" << endl;
         return false;
       }
       if(fst.closure(i, spaceSymbols).size() > 1) { // >1 since closure always includes self
-        wcerr << L"Error: Invalid dictionary (hint: entry on the " << side << " beginning with whitespace)" << endl;
+        cerr << "Error: Invalid dictionary (hint: entry on the " << side << " beginning with whitespace)" << endl;
         return false;
       }
     }
@@ -169,12 +175,11 @@ Compiler::procAlphabet()
     int ret = xmlTextReaderRead(reader);
     if(ret == 1)
     {
-      xmlChar const *value = xmlTextReaderConstValue(reader);
-      letters = XMLParseUtil::towstring(value);
+      letters = XMLParseUtil::readValue(reader);
       bool space = true;
       for(unsigned int i = 0; i < letters.length(); i++)
       {
-        if(!isspace(letters.at(i)))
+        if(!u_isspace(letters.at(i)))
         {
           space = false;
           break;
@@ -182,13 +187,13 @@ Compiler::procAlphabet()
       }
       if(space == true)  // libxml2 returns '\n' for <alphabet></alphabet>, should be empty
       {
-        letters = L"";
+        letters.clear();
       }
     }
     else
     {
-      wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-      wcerr << L"): Missing alphabet symbols." << endl;
+      cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+      cerr << "): Missing alphabet symbols." << endl;
       exit(EXIT_FAILURE);
     }
   }
@@ -197,7 +202,7 @@ Compiler::procAlphabet()
 void
 Compiler::procSDef()
 {
-  alphabet.includeSymbol(L"<"+attrib(COMPILER_N_ATTR)+L">");
+  alphabet.includeSymbol("<"_u + attrib(COMPILER_N_ATTR) + ">"_u);
 }
 
 void
@@ -215,18 +220,18 @@ Compiler::procParDef()
     {
       paradigms[current_paradigm].minimize();
       paradigms[current_paradigm].joinFinals();
-      current_paradigm = L"";
+      current_paradigm.clear();
     }
   }
 }
 
 int
-Compiler::matchTransduction(list<int> const &pi,
-                           list<int> const &pd,
+Compiler::matchTransduction(vector<int> const &pi,
+                           vector<int> const &pd,
                            int state, Transducer &t,
                            double const &entry_weight)
 {
-  list<int>::const_iterator left, right, limleft, limright;
+  vector<int>::const_iterator left, right, limleft, limright;
 
   if(direction == COMPILER_RESTRICTION_LR_VAL)
   {
@@ -313,12 +318,12 @@ Compiler::matchTransduction(list<int> const &pi,
 
 
 void
-Compiler::requireEmptyError(wstring const &name)
+Compiler::requireEmptyError(UString const &name)
 {
   if(!xmlTextReaderIsEmptyElement(reader))
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Non-empty element '<" << name << L">' should be empty." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Non-empty element '<" << name << ">' should be empty." << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -327,67 +332,63 @@ bool
 Compiler::allBlanks()
 {
   bool flag = true;
-  wstring text = XMLParseUtil::towstring(xmlTextReaderConstValue(reader));
+  UString text = XMLParseUtil::readValue(reader);
 
   for(auto c : text)
   {
-    flag = flag && iswspace(c);
+    flag = flag && u_isspace(c);
   }
 
   return flag;
 }
 
 void
-Compiler::readString(list<int> &result, wstring const &name)
+Compiler::readString(vector<int> &result, UString const &name)
 {
-  if(name == L"#text")
+  if(name == COMPILER_TEXT_NODE)
   {
-    wstring value = XMLParseUtil::towstring(xmlTextReaderConstValue(reader));
-    for(unsigned int i = 0, limit = value.size(); i < limit; i++)
-    {
-      result.push_back(static_cast<int>(value[i]));
-    }
+    XMLParseUtil::readValueInto32(reader, result);
   }
   else if(name == COMPILER_M_ELEM)
   {
     requireEmptyError(name);
     if(keep_boundaries)
     {
-      result.push_back(static_cast<int>(L'>'));
+      result.push_back(static_cast<int>('>'));
     }
   }
   else if(name == COMPILER_BLANK_ELEM)
   {
     requireEmptyError(name);
-    result.push_back(static_cast<int>(L' '));
+    result.push_back(static_cast<int>(' '));
   }
   else if(name == COMPILER_JOIN_ELEM)
   {
     requireEmptyError(name);
-    result.push_back(static_cast<int>(L'+'));
+    result.push_back(static_cast<int>('+'));
   }
   else if(name == COMPILER_POSTGENERATOR_ELEM)
   {
     requireEmptyError(name);
-    result.push_back(static_cast<int>(L'~'));
+    result.push_back(static_cast<int>('~'));
   }
   else if(name == COMPILER_GROUP_ELEM)
   {
     int type=xmlTextReaderNodeType(reader);
     if(type != XML_READER_TYPE_END_ELEMENT)
     {
-      result.push_back(static_cast<int>(L'#'));
+      result.push_back(static_cast<int>('#'));
     }
   }
   else if(name == COMPILER_S_ELEM)
   {
     requireEmptyError(name);
-    wstring symbol = L"<" + attrib(COMPILER_N_ATTR) + L">";
+    UString symbol = "<"_u + attrib(COMPILER_N_ATTR) + ">"_u;
 
     if(!alphabet.isSymbolDefined(symbol))
     {
-      wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-      wcerr << L"): Undefined symbol '" << symbol << L"'." << endl;
+      cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+      cerr << "): Undefined symbol '" << symbol << "'." << endl;
       exit(EXIT_FAILURE);
     }
 
@@ -395,88 +396,87 @@ Compiler::readString(list<int> &result, wstring const &name)
   }
   else
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Invalid specification of element '<" << name;
-    wcerr << L">' in this context." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Invalid specification of element '<" << name;
+    cerr << ">' in this context." << endl;
     exit(EXIT_FAILURE);
   }
 }
 
 void
-Compiler::skipBlanks(wstring &name)
+Compiler::skipBlanks(UString &name)
 {
-  while(name == L"#text" || name == L"#comment")
+  while(name == COMPILER_TEXT_NODE || name == COMPILER_COMMENT_NODE)
   {
-    if(name != L"#comment")
+    if(name != COMPILER_COMMENT_NODE)
     {
       if(!allBlanks())
       {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Invalid construction." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Invalid construction." << endl;
         exit(EXIT_FAILURE);
       }
     }
 
     xmlTextReaderRead(reader);
-    name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+    name = XMLParseUtil::readName(reader);
   }
 }
 
 void
-Compiler::skip(wstring &name, wstring const &elem)
+Compiler::skip(UString &name, UString const &elem)
 {
   skip(name, elem, true);
 }
 
 void
-Compiler::skip(wstring &name, wstring const &elem, bool open)
+Compiler::skip(UString &name, UString const &elem, bool open)
 {
   xmlTextReaderRead(reader);
-  name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
-  wstring slash;
+  name = XMLParseUtil::readName(reader);
+  UString slash;
 
   if(!open)
   {
-    slash = L"/";
+    slash = "/"_u;
   }
 
-  while(name == L"#text" || name == L"#comment")
+  while(name == COMPILER_TEXT_NODE || name == COMPILER_COMMENT_NODE)
   {
-    if(name != L"#comment")
+    if(name != COMPILER_COMMENT_NODE)
     {
       if(!allBlanks())
       {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Invalid construction." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Invalid construction." << endl;
         exit(EXIT_FAILURE);
       }
     }
     xmlTextReaderRead(reader);
-    name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+    name = XMLParseUtil::readName(reader);
   }
 
   if(name != elem)
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Expected '<" << slash << elem << L">'." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Expected '<" << slash << elem << ">'." << endl;
     exit(EXIT_FAILURE);
   }
 }
 
 EntryToken
-Compiler::procIdentity(wstring const &wsweight, bool ig)
+Compiler::procIdentity(double const entry_weight, bool ig)
 {
-  list<int> both_sides;
-  double entry_weight = stod(wsweight);
+  vector<int> both_sides;
 
   if(!xmlTextReaderIsEmptyElement(reader))
   {
-    wstring name = L"";
+    UString name;
 
     while(true)
     {
       xmlTextReaderRead(reader);
-      name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+      name = XMLParseUtil::readName(reader);
       if(name == COMPILER_IDENTITY_ELEM || name == COMPILER_IDENTITYGROUP_ELEM)
       {
         break;
@@ -485,17 +485,17 @@ Compiler::procIdentity(wstring const &wsweight, bool ig)
     }
   }
 
-  if(verbose && first_element && (both_sides.front() == (int)L' '))
+  if(verbose && first_element && (both_sides.front() == (int)' '))
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Entry begins with space." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Entry begins with space." << endl;
   }
   first_element = false;
   EntryToken e;
   if(ig)
   {
-    list<int> right;
-    right.push_back(static_cast<int>(L'#'));
+    vector<int> right;
+    right.push_back(static_cast<int>('#'));
     right.insert(right.end(), both_sides.begin(), both_sides.end());
     e.setSingleTransduction(both_sides, right, entry_weight);
   }
@@ -507,21 +507,20 @@ Compiler::procIdentity(wstring const &wsweight, bool ig)
 }
 
 EntryToken
-Compiler::procTransduction(wstring const &wsweight)
+Compiler::procTransduction(double const entry_weight)
 {
-  list<int> lhs, rhs;
-  double entry_weight = stod(wsweight);
-  wstring name;
+  vector<int> lhs, rhs;
+  UString name;
 
   skip(name, COMPILER_LEFT_ELEM);
 
   if(!xmlTextReaderIsEmptyElement(reader))
   {
-    name = L"";
+    name.clear();
     while(true)
     {
       xmlTextReaderRead(reader);
-      name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+      name = XMLParseUtil::readName(reader);
       if(name == COMPILER_LEFT_ELEM)
       {
         break;
@@ -530,10 +529,10 @@ Compiler::procTransduction(wstring const &wsweight)
     }
   }
 
-  if(verbose && first_element && (lhs.front() == (int)L' '))
+  if(verbose && first_element && (lhs.front() == (int)' '))
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Entry begins with space." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Entry begins with space." << endl;
   }
   first_element = false;
 
@@ -541,11 +540,11 @@ Compiler::procTransduction(wstring const &wsweight)
 
   if(!xmlTextReaderIsEmptyElement(reader))
   {
-    name = L"";
+    name.clear();
     while(true)
     {
       xmlTextReaderRead(reader);
-      name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+      name = XMLParseUtil::readName(reader);
       if(name == COMPILER_RIGHT_ELEM)
       {
         break;
@@ -561,8 +560,8 @@ Compiler::procTransduction(wstring const &wsweight)
   return e;
 }
 
-wstring
-Compiler::attrib(wstring const &name)
+UString
+Compiler::attrib(UString const &name)
 {
   return XMLParseUtil::attrib(reader, name);
 }
@@ -571,20 +570,20 @@ EntryToken
 Compiler::procPar()
 {
   EntryToken e;
-  wstring paradigm_name = attrib(COMPILER_N_ATTR);
+  UString paradigm_name = attrib(COMPILER_N_ATTR);
   first_element = false;
 
-  if(current_paradigm != L"" && paradigm_name == current_paradigm)
+  if(!current_paradigm.empty() && paradigm_name == current_paradigm)
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Paradigm refers to itself '" << paradigm_name << L"'." <<endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Paradigm refers to itself '" << paradigm_name << "'." <<endl;
     exit(EXIT_FAILURE);
   }
 
   if(paradigms.find(paradigm_name) == paradigms.end())
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Undefined paradigm '" << paradigm_name << L"'." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Undefined paradigm '" << paradigm_name << "'." << endl;
     exit(EXIT_FAILURE);
   }
   e.setParadigm(paradigm_name);
@@ -594,7 +593,7 @@ Compiler::procPar()
 void
 Compiler::insertEntryTokens(vector<EntryToken> const &elements)
 {
-  if(current_paradigm != L"")
+  if(!current_paradigm.empty())
   {
     // compilation of paradigms
     Transducer &t = paradigms[current_paradigm];
@@ -620,8 +619,8 @@ Compiler::insertEntryTokens(vector<EntryToken> const &elements)
       }
       else
       {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Invalid entry token." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Invalid entry token." << endl;
         exit(EXIT_FAILURE);
       }
     }
@@ -691,15 +690,15 @@ Compiler::insertEntryTokens(vector<EntryToken> const &elements)
 
 
 void
-Compiler::requireAttribute(wstring const &value, wstring const &attrname,
-                           wstring const &elemname)
+Compiler::requireAttribute(UString const &value, UString const &attrname,
+                           UString const &elemname)
 {
-  if(value == L"")
+  if(value.empty())
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): '<" << elemname;
-    wcerr << L"' element must specify non-void '";
-    wcerr << attrname << L"' attribute." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): '<" << elemname;
+    cerr << "' element must specify non-void '";
+    cerr << attrname << "' attribute." << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -712,55 +711,56 @@ Compiler::procSection()
 
   if(type != XML_READER_TYPE_END_ELEMENT)
   {
-    wstring const &id = attrib(COMPILER_ID_ATTR);
-    wstring const &type = attrib(COMPILER_TYPE_ATTR);
+    UString const &id = attrib(COMPILER_ID_ATTR);
+    UString const &type = attrib(COMPILER_TYPE_ATTR);
     requireAttribute(id, COMPILER_ID_ATTR, COMPILER_SECTION_ELEM);
     requireAttribute(type, COMPILER_TYPE_ATTR, COMPILER_SECTION_ELEM);
 
     current_section = id;
-    current_section += L"@";
+    current_section += '@';
     current_section.append(type);
   }
   else
   {
-    current_section = L"";
+    current_section.clear();
   }
 }
 
 void
 Compiler::procEntry()
 {
-  wstring attribute = this->attrib(COMPILER_RESTRICTION_ATTR);
-  wstring ignore    = this->attrib(COMPILER_IGNORE_ATTR);
-  wstring altval    = this->attrib(COMPILER_ALT_ATTR);
-  wstring varval    = this->attrib(COMPILER_V_ATTR);
-  wstring varl      = this->attrib(COMPILER_VL_ATTR);
-  wstring varr      = this->attrib(COMPILER_VR_ATTR);
-  wstring wsweight  = this->attrib(COMPILER_WEIGHT_ATTR);
+  UString attribute = this->attrib(COMPILER_RESTRICTION_ATTR);
+  UString ignore    = this->attrib(COMPILER_IGNORE_ATTR);
+  UString altval    = this->attrib(COMPILER_ALT_ATTR);
+  UString varval    = this->attrib(COMPILER_V_ATTR);
+  UString varl      = this->attrib(COMPILER_VL_ATTR);
+  UString varr      = this->attrib(COMPILER_VR_ATTR);
+  UString wsweight  = this->attrib(COMPILER_WEIGHT_ATTR);
 
   // if entry is masked by a restriction of direction or an ignore mark
-  if((attribute != L"" && attribute != direction)
+  if((!attribute.empty() && attribute != direction)
    || ignore == COMPILER_IGNORE_YES_VAL
-   || (altval != L"" && altval != alt)
-   || (direction == COMPILER_RESTRICTION_RL_VAL && varval != L"" && varval != variant)
-   || (direction == COMPILER_RESTRICTION_RL_VAL && varl != L"" && varl != variant_left)
-   || (direction == COMPILER_RESTRICTION_LR_VAL && varr != L"" && varr != variant_right))
+   || (!altval.empty() && altval != alt)
+   || (direction == COMPILER_RESTRICTION_RL_VAL && !varval.empty() && varval != variant)
+   || (direction == COMPILER_RESTRICTION_RL_VAL && !varl.empty() && varl != variant_left)
+   || (direction == COMPILER_RESTRICTION_LR_VAL && !varr.empty() && varr != variant_right))
   {
     // parse to the end of the entry
-    wstring name = L"";
+    UString name;
 
     while(name != COMPILER_ENTRY_ELEM)
     {
       xmlTextReaderRead(reader);
-      name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+      name = XMLParseUtil::readName(reader);
     }
 
     return;
   }
 
-  if(wsweight == L"")
+  double weight = 0.0;
+  if(!wsweight.empty())
   {
-    wsweight = L"0.0000";
+    weight = StringUtils::stod(wsweight);
   }
 
   vector<EntryToken> elements;
@@ -770,14 +770,14 @@ Compiler::procEntry()
     int ret = xmlTextReaderRead(reader);
     if(ret != 1)
     {
-      wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-      wcerr << L"): Parse error." << endl;
+      cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+      cerr << "): Parse error." << endl;
       exit(EXIT_FAILURE);
     }
-    wstring name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+    UString name = XMLParseUtil::readName(reader);
     skipBlanks(name);
 
-    if(current_paradigm == L"" && verbose)
+    if(current_paradigm.empty() && verbose)
     {
       first_element = true;
     }
@@ -785,15 +785,15 @@ Compiler::procEntry()
     int type = xmlTextReaderNodeType(reader);
     if(name == COMPILER_PAIR_ELEM)
     {
-      elements.push_back(procTransduction(wsweight));
+      elements.push_back(procTransduction(weight));
     }
     else if(name == COMPILER_IDENTITY_ELEM)
     {
-      elements.push_back(procIdentity(wsweight, false));
+      elements.push_back(procIdentity(weight, false));
     }
     else if(name == COMPILER_IDENTITYGROUP_ELEM)
     {
-      elements.push_back(procIdentity(wsweight, true));
+      elements.push_back(procIdentity(weight, true));
     }
     else if(name == COMPILER_REGEXP_ELEM)
     {
@@ -805,12 +805,12 @@ Compiler::procEntry()
 
       // detection of the use of undefined paradigms
 
-      wstring const &p = elements.rbegin()->paradigmName();
+      UString const &p = elements.rbegin()->paradigmName();
 
       if(paradigms.find(p) == paradigms.end())
       {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Undefined paradigm '" << p << L"'." <<endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Undefined paradigm '" << p << "'." <<endl;
         exit(EXIT_FAILURE);
       }
       // discard entries with empty paradigms (by the directions, normally)
@@ -819,7 +819,7 @@ Compiler::procEntry()
         while(name != COMPILER_ENTRY_ELEM || type != XML_READER_TYPE_END_ELEMENT)
         {
           xmlTextReaderRead(reader);
-          name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+          name = XMLParseUtil::readName(reader);
           type = xmlTextReaderNodeType(reader);
         }
         return;
@@ -831,14 +831,14 @@ Compiler::procEntry()
       insertEntryTokens(elements);
       return;
     }
-    else if(name == L"#text" && allBlanks())
+    else if(name == COMPILER_TEXT_NODE && allBlanks())
     {
     }
     else
     {
-      wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-      wcerr << L"): Invalid inclusion of '<" << name << L">' into '<" << COMPILER_ENTRY_ELEM;
-      wcerr << L">'." << endl;
+      cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+      cerr << "): Invalid inclusion of '<" << name << ">' into '<" << COMPILER_ENTRY_ELEM;
+      cerr << ">'." << endl;
       exit(EXIT_FAILURE);
     }
   }
@@ -847,32 +847,31 @@ Compiler::procEntry()
 void
 Compiler::procNodeACX()
 {
-  xmlChar  const *xname = xmlTextReaderConstName(reader);
-  wstring name = XMLParseUtil::towstring(xname);
-  if(name == L"#text")
+  UString name = XMLParseUtil::readName(reader);
+  if(name == COMPILER_TEXT_NODE)
   {
     /* ignore */
   }
-  else if(name == L"analysis-chars")
+  else if(name == COMPILER_ACX_ANALYSIS_ELEM)
   {
     /* ignore */
   }
-  else if(name == L"char")
+  else if(name == COMPILER_ACX_CHAR_ELEM)
   {
-    acx_current_char = static_cast<int>(attrib(L"value")[0]);
+    acx_current_char = static_cast<int>(attrib(COMPILER_ACX_VALUE_ATTR)[0]);
   }
-  else if(name == L"equiv-char")
+  else if(name == COMPILER_ACX_EQUIV_CHAR_ELEM)
   {
-    acx_map[acx_current_char].insert(static_cast<int>(attrib(L"value")[0]));
+    acx_map[acx_current_char].insert(static_cast<int>(attrib(COMPILER_ACX_VALUE_ATTR)[0]));
   }
-  else if(name == L"#comment")
+  else if(name == COMPILER_COMMENT_NODE)
   {
     /* ignore */
   }
   else
   {
-    wcerr << L"Error in ACX file (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Invalid node '<" << name << L">'." << endl;
+    cerr << "Error in ACX file (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Invalid node '<" << name << ">'." << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -880,12 +879,11 @@ Compiler::procNodeACX()
 void
 Compiler::procNode()
 {
-  xmlChar const *xname = xmlTextReaderConstName(reader);
-  wstring name = XMLParseUtil::towstring(xname);
+  UString name = XMLParseUtil::readName(reader);
 
   // TODO: optimize the execution order of the string "ifs"
 
-  if(name == L"#text")
+  if(name == COMPILER_TEXT_NODE)
   {
     /* ignore */
   }
@@ -921,14 +919,14 @@ Compiler::procNode()
   {
     procSection();
   }
-  else if(name== L"#comment")
+  else if(name== COMPILER_COMMENT_NODE)
   {
     /* ignore */
   }
   else
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Invalid node '<" << name << L">'." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Invalid node '<" << name << ">'." << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -938,8 +936,7 @@ Compiler::procRegexp()
 {
   EntryToken et;
   xmlTextReaderRead(reader);
-  wstring re = XMLParseUtil::towstring(xmlTextReaderConstValue(reader));
-  et.setRegexp(re);
+  et.readRegexp(reader);
   xmlTextReaderRead(reader);
   return et;
 }
@@ -947,12 +944,12 @@ Compiler::procRegexp()
 void
 Compiler::write(FILE *output)
 {
-  fwrite(HEADER_LTTOOLBOX, 1, 4, output);
+  fwrite_unlocked(HEADER_LTTOOLBOX, 1, 4, output);
   uint64_t features = 0;
   write_le(output, features);
 
   // letters
-  Compression::wstring_write(letters, output);
+  Compression::string_write(letters, output);
 
   // symbols
   alphabet.write(output);
@@ -964,35 +961,35 @@ Compiler::write(FILE *output)
   for(auto& it : sections)
   {
     count++;
-    wcout << it.first << " " << it.second.size();
-    wcout << " " << it.second.numberOfTransitions() << endl;
-    Compression::wstring_write(it.first, output);
+    cout << it.first << " " << it.second.size();
+    cout << " " << it.second.numberOfTransitions() << endl;
+    Compression::string_write(it.first, output);
     it.second.write(output);
   }
 }
 
 void
-Compiler::setAltValue(string const &a)
+Compiler::setAltValue(UString const &a)
 {
-  alt = XMLParseUtil::stows(a);
+  alt = a;
 }
 
 void
-Compiler::setVariantValue(string const &v)
+Compiler::setVariantValue(UString const &v)
 {
-  variant = XMLParseUtil::stows(v);
+  variant = v;
 }
 
 void
-Compiler::setVariantLeftValue(string const &v)
+Compiler::setVariantLeftValue(UString const &v)
 {
-  variant_left = XMLParseUtil::stows(v);
+  variant_left = v;
 }
 
 void
-Compiler::setVariantRightValue(string const &v)
+Compiler::setVariantRightValue(UString const &v)
 {
-  variant_right = XMLParseUtil::stows(v);
+  variant_right = v;
 }
 
 void
diff --git a/lttoolbox/compiler.h b/lttoolbox/compiler.h
index acd8b7a..ad18f69 100644
--- a/lttoolbox/compiler.h
+++ b/lttoolbox/compiler.h
@@ -20,8 +20,8 @@
 #include <lttoolbox/alphabet.h>
 #include <lttoolbox/regexp_compiler.h>
 #include <lttoolbox/entry_token.h>
-#include <lttoolbox/ltstr.h>
 #include <lttoolbox/transducer.h>
+#include <lttoolbox/ustring.h>
 
 #include <map>
 #include <string>
@@ -54,43 +54,43 @@ private:
   /**
    * The alt value
    */
-  wstring alt;
+  UString alt;
 
   /**
    * The variant value (monodix)
    */
-  wstring variant;
+  UString variant;
 
   /**
    * The variant value (left side of bidix)
    */
-  wstring variant_left;
+  UString variant_left;
 
   /**
    * The variant value (right side of bidix)
    */
-  wstring variant_right;
+  UString variant_right;
 
   /**
    * The paradigm being compiled
    */
-  wstring current_paradigm;
+  UString current_paradigm;
 
   /**
    * The dictionary section being compiled
    */
-  wstring current_section;
+  UString current_section;
 
   /**
    * The direction of the compilation, 'lr' (left-to-right) or 'rl'
    * (right-to-left)
    */
-  wstring direction;
+  UString direction;
 
   /**
    * List of characters to be considered alphabetic
    */
-  wstring letters;
+  UString letters;
 
   /**
    * Set verbose mode: warnings which may or may not be correct
@@ -121,27 +121,27 @@ private:
   /**
    * List of named transducers-paradigms
    */
-  map<wstring, Transducer, Ltstr> paradigms;
+  map<UString, Transducer> paradigms;
 
   /**
    * List of named dictionary sections
    */
-  map<wstring, Transducer, Ltstr> sections;
+  map<UString, Transducer> sections;
 
   /**
    * List of named prefix copy of a paradigm
    */
-  map<wstring, map<wstring, int, Ltstr>, Ltstr> prefix_paradigms;
+  map<UString, map<UString, int> > prefix_paradigms;
 
   /**
    * List of named suffix copy of a paradigm
    */
-  map<wstring, map<wstring, int, Ltstr>, Ltstr> suffix_paradigms;
+  map<UString, map<UString, int> > suffix_paradigms;
 
   /**
    * List of named endings of a suffix copy of a paradgim
    */
-  map<wstring, map<wstring, int, Ltstr>, Ltstr> postsuffix_paradigms;
+  map<UString, map<UString, int> > postsuffix_paradigms;
 
   /**
    * Mapping of aliases of characters specified in ACX files
@@ -205,7 +205,7 @@ private:
    * @param name the name of the attribute
    * @return the value of the attribute
    */
-  wstring attrib(wstring const &name);
+  UString attrib(UString const &name);
 
   /**
    * Construct symbol pairs by align left side of both parts and insert
@@ -216,19 +216,19 @@ private:
    * @param t the transducer
    * @return the last state of the inserted transduction
    */
-  int matchTransduction(list<int> const &lp, list<int> const &rp,
+  int matchTransduction(vector<int> const &lp, vector<int> const &rp,
                         int state, Transducer &t, double const &entry_weight);
   /**
    * Parse the &lt;p&gt; element
    * @return a list of tokens from the dictionary's entry
    */
-  EntryToken procTransduction(wstring const &wsweight);
+  EntryToken procTransduction(double const entry_weight);
 
   /**
    * Parse the &lt;i&gt; element
    * @return a list of tokens from the dictionary's entry
    */
-  EntryToken procIdentity(wstring const &wsweight, bool ig = false);
+  EntryToken procIdentity(double const entry_weight, bool ig = false);
 
   /**
    * Parse the &lt;par&gt; element
@@ -247,7 +247,7 @@ private:
    * @param name the name of the node
    * @param elem the name of the expected node
    */
-  void skip(wstring &name, wstring const &elem);
+  void skip(UString &name, UString const &elem);
 
   /**
    * Skip all document #text nodes before "elem"
@@ -255,22 +255,22 @@ private:
    * @param elem the name of the expected node
    * @param open true for open element, false for closed
    */
-  void skip(wstring &name, wstring const &elem, bool open);
+  void skip(UString &name, UString const &elem, bool open);
 
   /**
    * Skip all blank #text nodes before "name"
    * @param name the name of the node
    */
-  void skipBlanks(wstring &name);
+  void skipBlanks(UString &name);
 
 
-  void readString(list<int> &result, wstring const &name);
+  void readString(vector<int> &result, UString const &name);
 
   /**
    * Force an element to be empty, and check for it
    * @param name the element
    */
-  void requireEmptyError(wstring const &name);
+  void requireEmptyError(UString const &name);
 
   /**
    * Force an attribute to be specified, amd check for it
@@ -278,8 +278,8 @@ private:
    * @param attrname the name of the attribute
    * @param elemname the parent of the attribute
    */
-  void requireAttribute(wstring const &value, wstring const &attrname,
-                        wstring const &elemname);
+  void requireAttribute(UString const &value, UString const &attrname,
+                        UString const &elemname);
 
   /**
    * True if all the elements in the current node are blanks
@@ -287,7 +287,7 @@ private:
    */
   bool allBlanks();
 
-  bool valid(wstring const& dir) const;
+  bool valid(UString const& dir) const;
 
 public:
 
@@ -295,41 +295,47 @@ public:
    * Constants to represent the element and the attributes of
    * dictionaries
    */
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_DICTIONARY_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_ALPHABET_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_SDEFS_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_SDEF_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_N_ATTR;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_PARDEFS_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_PARDEF_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_PAR_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_ENTRY_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_RESTRICTION_ATTR;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_RESTRICTION_LR_VAL;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_RESTRICTION_RL_VAL;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_PAIR_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_LEFT_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_RIGHT_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_S_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_M_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_REGEXP_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_SECTION_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_ID_ATTR;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_TYPE_ATTR;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_IDENTITY_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_IDENTITYGROUP_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_JOIN_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_BLANK_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_POSTGENERATOR_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_GROUP_ELEM;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_LEMMA_ATTR;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_IGNORE_ATTR;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_IGNORE_YES_VAL;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_ALT_ATTR;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_V_ATTR;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_VL_ATTR;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_VR_ATTR;
-  LTTOOLBOX_IMPORTS static wstring const COMPILER_WEIGHT_ATTR;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_DICTIONARY_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_ALPHABET_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_SDEFS_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_SDEF_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_N_ATTR;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_PARDEFS_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_PARDEF_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_PAR_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_ENTRY_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_RESTRICTION_ATTR;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_RESTRICTION_LR_VAL;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_RESTRICTION_RL_VAL;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_PAIR_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_LEFT_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_RIGHT_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_S_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_M_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_REGEXP_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_SECTION_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_ID_ATTR;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_TYPE_ATTR;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_IDENTITY_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_IDENTITYGROUP_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_JOIN_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_BLANK_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_POSTGENERATOR_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_GROUP_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_LEMMA_ATTR;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_IGNORE_ATTR;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_IGNORE_YES_VAL;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_ALT_ATTR;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_V_ATTR;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_VL_ATTR;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_VR_ATTR;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_WEIGHT_ATTR;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_TEXT_NODE;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_COMMENT_NODE;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_ACX_ANALYSIS_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_ACX_CHAR_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_ACX_EQUIV_CHAR_ELEM;
+  LTTOOLBOX_IMPORTS static UString const COMPILER_ACX_VALUE_ATTR;
 
   /**
    * Constructor
@@ -344,12 +350,12 @@ public:
   /**
    * Compile dictionary to letter transducers
    */
-  void parse(string const &file, wstring const &dir);
+  void parse(string const &file, UString const &dir);
 
   /**
    * Read ACX file
    */
-  void parseACX(string const &file, wstring const &dir);
+  void parseACX(string const &file, UString const &dir);
 
 
   /**
@@ -372,25 +378,25 @@ public:
    * Set the alt value to use in compilation
    * @param a the value
    */
-  void setAltValue(string const &a);
+  void setAltValue(UString const &a);
 
   /**
    * Set the variant value to use in compilation
    * @param v the value
    */
-  void setVariantValue(string const &v);
+  void setVariantValue(UString const &v);
 
   /**
    * Set the variant_left value to use in compilation
    * @param v the value
    */
-  void setVariantLeftValue(string const &v);
+  void setVariantLeftValue(UString const &v);
 
   /**
    * Set the variant_right value to use in compilation
    * @param v the value
    */
-  void setVariantRightValue(string const &v);
+  void setVariantRightValue(UString const &v);
 };
 
 
diff --git a/lttoolbox/compression.cc b/lttoolbox/compression.cc
index 0ba78b5..42c4d2b 100644
--- a/lttoolbox/compression.cc
+++ b/lttoolbox/compression.cc
@@ -21,13 +21,15 @@
 #include <cmath>
 #include <limits>
 #include <iostream>
+#include <utf8.h>
+#include <vector>
 
 void
 Compression::writeByte(unsigned char byte, FILE *output)
 {
   if(fwrite_unlocked(&byte, 1, 1, output) != 1)
   {
-    wcerr << L"I/O Error writing" << endl;
+    cerr << "I/O Error writing" << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -39,7 +41,7 @@ Compression::readByte(FILE *input)
   if(fread_unlocked(&value, 1, 1, input) != 1)
   {
 //    Not uncomment this code since
-//    wcerr << L"I/O Error reading" << endl;
+//    cerr << "I/O Error reading" << endl;
 //    exit(EXIT_FAILURE);
   }
 
@@ -86,7 +88,7 @@ Compression::multibyte_write(unsigned int value, FILE *output)
   }
   else
   {
-    wcerr << L"Out of range: " << value << endl;
+    cerr << "Out of range: " << value << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -133,7 +135,7 @@ Compression::multibyte_write(unsigned int value, ostream &output)
   }
   else
   {
-    wcerr << "Out of range: " << value << endl;
+    cerr << "Out of range: " << value << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -254,48 +256,26 @@ Compression::multibyte_read(istream &input)
 
 
 void
-Compression::wstring_write(wstring const &str, FILE *output)
+Compression::string_write(UString const &str, FILE *output)
 {
-  Compression::multibyte_write(str.size(), output);
-  for(auto c : str)
+  vector<int32_t> vec;
+  ustring_to_vec32(str, vec);
+  Compression::multibyte_write(vec.size(), output);
+  for(auto c : vec)
   {
-    Compression::multibyte_write(static_cast<int>(c), output);
+    Compression::multibyte_write(c, output);
   }
 }
 
-wstring
-Compression::wstring_read(FILE *input)
-{
-  wstring retval = L"";
-
-  for(unsigned int i = 0, limit = Compression::multibyte_read(input);
-      i != limit; i++)
-  {
-    retval += static_cast<wchar_t>(Compression::multibyte_read(input));
-  }
-
-  return retval;
-}
-
-void
-Compression::string_write(string const &str, FILE *output)
-{
-  Compression::multibyte_write(str.size(), output);
-  for(auto c : str)
-  {
-    Compression::multibyte_write(static_cast<int>(c), output);
-  }
-}
-
-string
+UString
 Compression::string_read(FILE *input)
 {
-  string retval = "";
+  UString retval;
+  unsigned int limit = Compression::multibyte_read(input);
+  retval.reserve(limit);
 
-  for(unsigned int i = 0, limit = Compression::multibyte_read(input);
-      i != limit; i++)
-  {
-    retval += static_cast<char>(Compression::multibyte_read(input));
+  for(unsigned int i = 0; i != limit; i++) {
+    retval += static_cast<UChar32>(Compression::multibyte_read(input));
   }
 
   return retval;
diff --git a/lttoolbox/compression.h b/lttoolbox/compression.h
index 8af6cf9..8b5a2b1 100644
--- a/lttoolbox/compression.h
+++ b/lttoolbox/compression.h
@@ -19,9 +19,9 @@
 
 #include <cstdio>
 #include <cstdint>
-#include <string>
 #include <iostream>
 #include <stdexcept>
+#include <lttoolbox/ustring.h>
 
 using namespace std;
 
@@ -42,7 +42,7 @@ enum TD_FEATURES : uint64_t {
 
 
 inline auto write_u64(FILE *out, uint64_t value) {
-  auto rv = fwrite(reinterpret_cast<const char*>(&value), 1, sizeof(value), out);
+  auto rv = fwrite_unlocked(reinterpret_cast<const char*>(&value), 1, sizeof(value), out);
   if (rv != sizeof(value)) {
     throw std::runtime_error("Failed to write uint64_t");
   }
@@ -77,7 +77,7 @@ inline auto write_le(Stream& out, uint64_t value) {
 
 inline auto read_u64(FILE *in) {
   uint64_t value = 0;
-  if (fread(reinterpret_cast<char*>(&value), 1, sizeof(value), in) != sizeof(value)) {
+  if (fread_unlocked(reinterpret_cast<char*>(&value), 1, sizeof(value), in) != sizeof(value)) {
     throw std::runtime_error("Failed to read uint64_t");
   }
   return value;
@@ -174,23 +174,6 @@ public:
    */
   static unsigned int multibyte_read(istream &is);
 
-  /**
-   * This method allows to write a wide string to an output stream
-   * using its UCSencoding as integer.
-   * @see wstring_read()
-   * @param str the string to write.
-   * @param output the output stream.
-   */
-  static void wstring_write(wstring const &str, FILE *output);
-
-  /**
-   * This method reads a wide string from the input stream.
-   * @see wstring_write()
-   * @param input the input stream.
-   * @return the wide string read.
-   */
-  static wstring wstring_read(FILE *input);
-
   /**
    * This method allows to write a plain string to an output stream
    * using its UCSencoding as integer.
@@ -198,7 +181,7 @@ public:
    * @param str the string to write.
    * @param output the output stream.
    */
-  static void string_write(string const &str, FILE *output);
+  static void string_write(UString const &str, FILE *output);
 
   /**
    * This method reads a plain string from the input stream.
@@ -206,7 +189,7 @@ public:
    * @param input the input stream.
    * @return the string read.
    */
-  static string string_read(FILE *input);
+  static UString string_read(FILE *input);
 
   /**
    * Encodes a double value and writes it into the output stream
diff --git a/lttoolbox/deserialiser.h b/lttoolbox/deserialiser.h
index 4697640..b5ae2f2 100644
--- a/lttoolbox/deserialiser.h
+++ b/lttoolbox/deserialiser.h
@@ -33,6 +33,8 @@
 #include <type_traits>
 #include <iterator>
 
+#include <unicode/uchar.h>
+
 template <typename DeserialisedType> class Deserialiser;
 
 template <typename value_type>
@@ -76,12 +78,12 @@ public:
   inline static uint32_t deserialise(std::istream &Stream_);
 };
 
-template <> class Deserialiser<wchar_t> {
+template <> class Deserialiser<char> {
 public:
-  inline static wchar_t deserialise(std::istream &Stream_);
+  inline static char deserialise(std::istream &Stream_);
 };
 
-template <> class Deserialiser<char> {
+template <> class Deserialiser<UChar> {
 public:
   inline static char deserialise(std::istream &Stream_);
 };
@@ -168,14 +170,14 @@ uint32_t Deserialiser<uint32_t>::deserialise(std::istream &Stream_) {
   return int_deserialise<uint64_t>(Stream_);
 }
 
-wchar_t Deserialiser<wchar_t>::deserialise(std::istream &Stream_) {
-  return int_deserialise<uint32_t>(Stream_);
-}
-
 char Deserialiser<char>::deserialise(std::istream &Stream_) {
   return int_deserialise<uint8_t>(Stream_);
 }
 
+char Deserialiser<UChar>::deserialise(std::istream &Stream_) {
+  return int_deserialise<uint16_t>(Stream_);
+}
+
 double Deserialiser<double>::deserialise(std::istream &Stream_) {
   union {
     uint64_t i;
diff --git a/lttoolbox/entry_token.cc b/lttoolbox/entry_token.cc
index f03bca5..f9401ab 100644
--- a/lttoolbox/entry_token.cc
+++ b/lttoolbox/entry_token.cc
@@ -61,14 +61,14 @@ EntryToken::destroy()
 }
 
 void
-EntryToken::setParadigm(wstring const &np)
+EntryToken::setParadigm(UString const &np)
 {
   parName = np;
   type = paradigm;
 }
 
 void
-EntryToken::setSingleTransduction(list<int> const &pi, list<int> const &pd, double const ew)
+EntryToken::setSingleTransduction(vector<int> const &pi, vector<int> const &pd, double const ew)
 {
   weight = ew;
   leftSide = pi;
@@ -77,9 +77,17 @@ EntryToken::setSingleTransduction(list<int> const &pi, list<int> const &pd, doub
 }
 
 void
-EntryToken::setRegexp(wstring const &r)
+EntryToken::setRegexp(UString const &r)
 {
-  myregexp = r;
+  myregexp.clear();
+  ustring_to_vec32(r, myregexp);
+  type = regexp;
+}
+
+void
+EntryToken::readRegexp(xmlTextReaderPtr reader)
+{
+  XMLParseUtil::readValueInto32(reader, myregexp);
   type = regexp;
 }
 
@@ -101,25 +109,25 @@ EntryToken::isRegexp() const
   return type == regexp;
 }
 
-wstring const &
+UString const &
 EntryToken::paradigmName() const
 {
   return parName;
 }
 
-list<int> const &
+vector<int> const &
 EntryToken::left() const
 {
   return leftSide;
 }
 
-list<int> const &
+vector<int> const &
 EntryToken::right() const
 {
   return rightSide;
 }
 
-wstring const &
+vector<int32_t> const &
 EntryToken::regExp() const
 {
   return myregexp;
diff --git a/lttoolbox/entry_token.h b/lttoolbox/entry_token.h
index 6b2886c..0b4b43a 100644
--- a/lttoolbox/entry_token.h
+++ b/lttoolbox/entry_token.h
@@ -18,8 +18,10 @@
 #define _ENTRYTOKEN_
 
 
-#include <list>
-#include <string>
+#include <vector>
+#include <lttoolbox/ustring.h>
+#include <lttoolbox/xml_parse_util.h>
+#include <cstdint>
 
 using namespace std;
 
@@ -42,7 +44,7 @@ private:
   /**
    * Name of the paradigm (if it is of 'paradigm' 'type')
    */
-  wstring parName;
+  UString parName;
 
   /**
    * Weight value for the entry (default_weight if unspecified)
@@ -52,17 +54,17 @@ private:
   /**
    * Left side of transduction (if 'single_transduction')
    */
-  list<int> leftSide;
+  vector<int> leftSide;
 
   /**
    * Right side of transduction (if 'single_transduction')
    */
-  list<int> rightSide;
+  vector<int> rightSide;
 
   /**
    * Regular expression (if 'regexp')
    */
-  wstring myregexp;
+  vector<int32_t> myregexp;
 
   /**
    * copy method
@@ -99,7 +101,7 @@ public:
    * Sets the name of the paradigm.
    * @param np the paradigm name
    */
-  void setParadigm(wstring const &np);
+  void setParadigm(UString const &np);
 
   /**
    * Set both parts of a single transduction.
@@ -107,13 +109,19 @@ public:
    * @param pd right part
    * @param ew entry weight
    */
-  void setSingleTransduction(list<int> const &pi, list<int> const &pd, double const ew = 0);
+  void setSingleTransduction(vector<int> const &pi, vector<int> const &pd, double const ew = 0);
 
   /**
    * Set regular expression.
    * @param r the regular expression specification.
    */
-  void setRegexp(wstring const &r);
+  void setRegexp(UString const &r);
+
+  /**
+   * More efficient version of setRegexp()
+   * @param reader the current xml parser state
+   */
+  void readRegexp(xmlTextReaderPtr reader);
 
   /**
    * eTest EntryToken to detect if is a paradigm.
@@ -137,25 +145,25 @@ public:
    * Retrieve the name of the paradigm.
    * @return the name of the paradigm.
    */
-  wstring const & paradigmName() const;
+  UString const & paradigmName() const;
 
   /**
    * Retrieve the left part of the paradigm.
    * @return the left part of the paradigm.
    */
-  list<int> const & left() const;
+  vector<int> const & left() const;
 
   /**
    * Retrieve the right part of the paradigm.
    * @return the right part of the paradigm.
    */
-  list<int> const & right() const;
+  vector<int> const & right() const;
 
   /**
    * Retrieve the regular expression specification.
    * @return the regular expression specification.
    */
-  wstring const & regExp() const;
+  vector<int32_t> const & regExp() const;
 
   /**
    * Retrieve the weight value of the entry.
diff --git a/lttoolbox/expander.cc b/lttoolbox/expander.cc
index 8592331..1baf12a 100644
--- a/lttoolbox/expander.cc
+++ b/lttoolbox/expander.cc
@@ -25,9 +25,6 @@
 #include <iostream>
 #include <libxml/encoding.h>
 
-#if defined(_WIN32) && !defined(_MSC_VER)
-#include <utf8_fwrap.h>
-#endif
 
 using namespace std;
 
@@ -42,12 +39,12 @@ Expander::~Expander()
 }
 
 void
-Expander::expand(string const &file, FILE *output)
+Expander::expand(string const &file, UFILE* output)
 {
   reader = xmlReaderForFile(file.c_str(), NULL, 0);
   if(reader == NULL)
   {
-    wcerr << "Error: Cannot open '" << file << "'." << endl;
+    cerr << "Error: Cannot open '" << file << "'." << endl;
     exit(EXIT_FAILURE);
   }
 
@@ -60,7 +57,7 @@ Expander::expand(string const &file, FILE *output)
 
   if(ret != 0)
   {
-    wcerr << L"Error: Parse error at the end of input." << endl;
+    cerr << "Error: Parse error at the end of input." << endl;
   }
 
   xmlFreeTextReader(reader);
@@ -78,17 +75,17 @@ Expander::procParDef()
   }
   else
   {
-    current_paradigm = L"";
+    current_paradigm.clear();
   }
 }
 
 void
-Expander::requireEmptyError(wstring const &name)
+Expander::requireEmptyError(UString const &name)
 {
   if(!xmlTextReaderIsEmptyElement(reader))
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Non-empty element '<" << name << L">' should be empty." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Non-empty element '<" << name << ">' should be empty." << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -97,7 +94,7 @@ bool
 Expander::allBlanks()
 {
   bool flag = true;
-  wstring text = XMLParseUtil::towstring(xmlTextReaderConstValue(reader));
+  UString text = XMLParseUtil::readValue(reader);
 
   for(auto c : text)
   {
@@ -108,16 +105,16 @@ Expander::allBlanks()
 }
 
 void
-Expander::readString(wstring &result, wstring const &name)
+Expander::readString(UString &result, UString const &name)
 {
-  if(name == L"#text")
+  if(name == Compiler::COMPILER_TEXT_NODE)
   {
-    wstring value = XMLParseUtil::towstring(xmlTextReaderConstValue(reader));
-    wstring escaped = L"^$/<>{}\\*@#+~:";
+    UString value = XMLParseUtil::readValue(reader);
+    UString escaped = "^$/<>{}\\*@#+~:"_u;
     for(size_t i = value.size()-1; i > 0; i--)
     {
-      if(escaped.find(value[i]) != wstring::npos) {
-        value.insert(value.begin()+i, L'\\');
+      if(escaped.find(value[i]) != UString::npos) {
+        value.insert(value.begin()+i, '\\');
       }
     }
     result.append(value);
@@ -125,105 +122,105 @@ Expander::readString(wstring &result, wstring const &name)
   else if(name == Compiler::COMPILER_BLANK_ELEM)
   {
     requireEmptyError(name);
-    result += L' ';
+    result += ' ';
   }
   else if(name == Compiler::COMPILER_M_ELEM)
   {
     requireEmptyError(name);
     if(keep_boundaries)
     {
-      result += L'>';
+      result += '>';
     }
   }
   else if(name == Compiler::COMPILER_JOIN_ELEM)
   {
     requireEmptyError(name);
-    result += L'+';
+    result += '+';
   }
   else if(name == Compiler::COMPILER_POSTGENERATOR_ELEM)
   {
     requireEmptyError(name);
-    result += L'~';
+    result += '~';
   }
   else if(name == Compiler::COMPILER_GROUP_ELEM)
   {
     int type=xmlTextReaderNodeType(reader);
     if(type != XML_READER_TYPE_END_ELEMENT)
     {
-      result += L'#';
+      result += '#';
     }
   }
   else if(name == Compiler::COMPILER_S_ELEM)
   {
     requireEmptyError(name);
-    result += L'<';
+    result += '<';
     result.append(attrib(Compiler::COMPILER_N_ATTR));
-    result += L'>';
+    result += '>';
   }
   else
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Invalid specification of element '<" << name;
-    wcerr << L">' in this context." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Invalid specification of element '<" << name;
+    cerr << ">' in this context." << endl;
     exit(EXIT_FAILURE);
   }
 }
 
 void
-Expander::skipBlanks(wstring &name)
+Expander::skipBlanks(UString &name)
 {
-  if(name == L"#text")
+  if(name == Compiler::COMPILER_TEXT_NODE)
   {
     if(!allBlanks())
     {
-      wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-      wcerr << L"): Invalid construction." << endl;
+      cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+      cerr << "): Invalid construction." << endl;
       exit(EXIT_FAILURE);
     }
     xmlTextReaderRead(reader);
-    name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+    name = XMLParseUtil::readName(reader);
   }
 }
 
 void
-Expander::skip(wstring &name, wstring const &elem)
+Expander::skip(UString &name, UString const &elem)
 {
   xmlTextReaderRead(reader);
-  name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+  name = XMLParseUtil::readName(reader);
 
-  if(name == L"#text")
+  if(name == Compiler::COMPILER_TEXT_NODE)
   {
     if(!allBlanks())
     {
-      wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-      wcerr << L"): Invalid construction." << endl;
+      cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+      cerr << "): Invalid construction." << endl;
       exit(EXIT_FAILURE);
     }
     xmlTextReaderRead(reader);
-    name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+    name = XMLParseUtil::readName(reader);
   }
 
   if(name != elem)
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Expected '<" << elem << L">'." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Expected '<" << elem << ">'." << endl;
     exit(EXIT_FAILURE);
   }
 }
 
-wstring
+UString
 Expander::procIdentity()
 {
-  wstring both_sides = L"";
+  UString both_sides;
 
   if(!xmlTextReaderIsEmptyElement(reader))
   {
-    wstring name = L"";
+    UString name;
 
     while(true)
     {
       xmlTextReaderRead(reader);
-      name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+      name = XMLParseUtil::readName(reader);
       if(name == Compiler::COMPILER_IDENTITY_ELEM)
       {
         break;
@@ -234,21 +231,21 @@ Expander::procIdentity()
   return both_sides;
 }
 
-pair<wstring, wstring>
+pair<UString, UString>
 Expander::procIdentityGroup()
 {
-  wstring lhs = L"";
-  wstring rhs = L"#";
-  wstring both_sides = L"";
+  UString lhs;
+  UString rhs = "#"_u;
+  UString both_sides;
 
   if(!xmlTextReaderIsEmptyElement(reader))
   {
-    wstring name = L"";
+    UString name;
 
     while(true)
     {
       xmlTextReaderRead(reader);
-      name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+      name = XMLParseUtil::readName(reader);
       if(name == Compiler::COMPILER_IDENTITYGROUP_ELEM)
       {
         break;
@@ -259,25 +256,25 @@ Expander::procIdentityGroup()
   lhs += both_sides;
   rhs += both_sides;
 
-  pair<wstring, wstring> e(lhs, rhs);
+  pair<UString, UString> e(lhs, rhs);
   return e;
 }
 
-pair<wstring, wstring>
+pair<UString, UString>
 Expander::procTransduction()
 {
-  wstring lhs = L"", rhs = L"";
-  wstring name = L"";
+  UString lhs, rhs;
+  UString name;
 
   skip(name, Compiler::COMPILER_LEFT_ELEM);
 
   if(!xmlTextReaderIsEmptyElement(reader))
   {
-    name = L"";
+    name.clear();
     while(true)
     {
       xmlTextReaderRead(reader);
-      name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+      name = XMLParseUtil::readName(reader);
       if(name == Compiler::COMPILER_LEFT_ELEM)
       {
         break;
@@ -290,11 +287,11 @@ Expander::procTransduction()
 
   if(!xmlTextReaderIsEmptyElement(reader))
   {
-    name = L"";
+    name.clear();
     while(true)
     {
       xmlTextReaderRead(reader);
-      name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+      name = XMLParseUtil::readName(reader);
       if(name == Compiler::COMPILER_RIGHT_ELEM)
       {
         break;
@@ -305,67 +302,67 @@ Expander::procTransduction()
 
   skip(name, Compiler::COMPILER_PAIR_ELEM);
 
-  pair<wstring, wstring> e(lhs, rhs);
+  pair<UString, UString> e(lhs, rhs);
   return e;
 }
 
-wstring
-Expander::attrib(wstring const &name)
+UString
+Expander::attrib(UString const &name)
 {
   return XMLParseUtil::attrib(reader, name);
 }
 
-wstring
+UString
 Expander::procPar()
 {
   EntryToken e;
-  wstring paradigm_name = attrib(Compiler::COMPILER_N_ATTR);
+  UString paradigm_name = attrib(Compiler::COMPILER_N_ATTR);
   return paradigm_name;
 }
 
 void
-Expander::requireAttribute(wstring const &value, wstring const &attrname,
-                           wstring const &elemname)
+Expander::requireAttribute(UString const &value, UString const &attrname,
+                           UString const &elemname)
 {
-  if(value == L"")
+  if(value.empty())
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): '<" << elemname;
-    wcerr << L"' element must specify non-void '";
-    wcerr<< attrname << L"' attribute." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): '<" << elemname;
+    cerr << "' element must specify non-void '";
+    cerr<< attrname << "' attribute." << endl;
     exit(EXIT_FAILURE);
   }
 }
 
 void
-Expander::procEntry(FILE *output)
+Expander::procEntry(UFILE* output)
 {
-  wstring attribute = this->attrib(Compiler::COMPILER_RESTRICTION_ATTR);
-  wstring entrname  = this->attrib(Compiler::COMPILER_LEMMA_ATTR);
-  wstring altval    = this->attrib(Compiler::COMPILER_ALT_ATTR);
-  wstring varval    = this->attrib(Compiler::COMPILER_V_ATTR);
-  wstring varl      = this->attrib(Compiler::COMPILER_VL_ATTR);
-  wstring varr      = this->attrib(Compiler::COMPILER_VR_ATTR);
-  wstring wsweight  = this->attrib(Compiler::COMPILER_WEIGHT_ATTR);
-
-  wstring myname = L"";
-  if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == L"yes"
-   || (altval != L"" && altval != alt)
-   || (varval != L"" && varval != variant && attribute == Compiler::COMPILER_RESTRICTION_RL_VAL)
-   || ((varl != L"" && varl != variant_left) && (varr != L"" && varr != variant_right))
-   || (varl != L"" && varl != variant_left && attribute == Compiler::COMPILER_RESTRICTION_RL_VAL)
-   || (varr != L"" && varr != variant_right && attribute == Compiler::COMPILER_RESTRICTION_LR_VAL))
+  UString attribute = this->attrib(Compiler::COMPILER_RESTRICTION_ATTR);
+  UString entrname  = this->attrib(Compiler::COMPILER_LEMMA_ATTR);
+  UString altval    = this->attrib(Compiler::COMPILER_ALT_ATTR);
+  UString varval    = this->attrib(Compiler::COMPILER_V_ATTR);
+  UString varl      = this->attrib(Compiler::COMPILER_VL_ATTR);
+  UString varr      = this->attrib(Compiler::COMPILER_VR_ATTR);
+  UString wsweight  = this->attrib(Compiler::COMPILER_WEIGHT_ATTR);
+
+  UString myname;
+  if(this->attrib(Compiler::COMPILER_IGNORE_ATTR) == Compiler::COMPILER_IGNORE_YES_VAL
+   || (!altval.empty() && altval != alt)
+   || (!varval.empty() && varval != variant && attribute == Compiler::COMPILER_RESTRICTION_RL_VAL)
+   || ((!varl.empty() && varl != variant_left) && (!varr.empty() && varr != variant_right))
+   || (!varl.empty() && varl != variant_left && attribute == Compiler::COMPILER_RESTRICTION_RL_VAL)
+   || (!varr.empty() && varr != variant_right && attribute == Compiler::COMPILER_RESTRICTION_LR_VAL))
   {
     do
     {
       int ret = xmlTextReaderRead(reader);
       if(ret != 1)
       {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Parse error." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Parse error." << endl;
         exit(EXIT_FAILURE);
       }
-      myname = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+      myname = XMLParseUtil::readName(reader);
     }
     while(myname != Compiler::COMPILER_ENTRY_ELEM);
     return;
@@ -373,19 +370,19 @@ Expander::procEntry(FILE *output)
 
   EntList items, items_lr, items_rl;
   if(attribute == Compiler::COMPILER_RESTRICTION_LR_VAL
-   || (varval != L"" && varval != variant && attribute != Compiler::COMPILER_RESTRICTION_RL_VAL)
-   || (varl != L"" && varl != variant_left))
+   || (!varval.empty() && varval != variant && attribute != Compiler::COMPILER_RESTRICTION_RL_VAL)
+   || (!varl.empty() && varl != variant_left))
   {
-    items_lr.push_back(make_pair(L"", L""));
+    items_lr.push_back(make_pair(""_u, ""_u));
   }
   else if(attribute == Compiler::COMPILER_RESTRICTION_RL_VAL
-        || (varr != L"" && varr != variant_right))
+        || (!varr.empty() && varr != variant_right))
   {
-    items_rl.push_back(make_pair(L"", L""));
+    items_rl.push_back(make_pair(""_u, ""_u));
   }
   else
   {
-    items.push_back(make_pair(L"", L""));
+    items.push_back(make_pair(""_u, ""_u));
   }
 
   while(true)
@@ -393,53 +390,53 @@ Expander::procEntry(FILE *output)
     int ret = xmlTextReaderRead(reader);
     if(ret != 1)
     {
-      wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-      wcerr << L"): Parse error." << endl;
+      cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+      cerr << "): Parse error." << endl;
       exit(EXIT_FAILURE);
     }
-    wstring name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+    UString name = XMLParseUtil::readName(reader);
     skipBlanks(name);
 
     int type = xmlTextReaderNodeType(reader);
     if(name == Compiler::COMPILER_PAIR_ELEM)
     {
-      pair<wstring, wstring> p = procTransduction();
+      pair<UString, UString> p = procTransduction();
       append(items, p);
       append(items_lr, p);
       append(items_rl, p);
     }
     else if(name == Compiler::COMPILER_IDENTITY_ELEM)
     {
-      wstring val = procIdentity();
+      UString val = procIdentity();
       append(items, val);
       append(items_lr, val);
       append(items_rl, val);
     }
     else if(name == Compiler::COMPILER_IDENTITYGROUP_ELEM)
     {
-      pair<wstring, wstring> p = procIdentityGroup();
+      pair<UString, UString> p = procIdentityGroup();
       append(items, p);
       append(items_lr, p);
       append(items_rl, p);
     }
     else if(name == Compiler::COMPILER_REGEXP_ELEM)
     {
-      wstring val = L"__REGEXP__" + procRegexp();
+      UString val = "__REGEXP__"_u + procRegexp();
       append(items, val);
       append(items_lr, val);
       append(items_rl, val);
     }
     else if(name == Compiler::COMPILER_PAR_ELEM)
     {
-      wstring p = procPar();
+      UString p = procPar();
       // detection of the use of undefined paradigms
 
       if(paradigm.find(p) == paradigm.end() &&
          paradigm_lr.find(p) == paradigm_lr.end() &&
          paradigm_rl.find(p) == paradigm_rl.end())
       {
-        wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << L"): Undefined paradigm '" << p << L"'." <<endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Undefined paradigm '" << p << "'." <<endl;
         exit(EXIT_FAILURE);
       }
 
@@ -491,32 +488,19 @@ Expander::procEntry(FILE *output)
     }
     else if(name == Compiler::COMPILER_ENTRY_ELEM && type == XML_READER_TYPE_END_ELEMENT)
     {
-      if(current_paradigm == L"")
+      if(current_paradigm.empty())
       {
         for(auto& it : items)
         {
-          fputws_unlocked(it.first.c_str(), output);
-          fputwc_unlocked(L':', output);
-          fputws_unlocked(it.second.c_str(), output);
-          fputwc_unlocked(L'\n', output);
+          u_fprintf(output, "%S:%S\n", it.first.c_str(), it.second.c_str());
         }
         for(auto& it : items_lr)
         {
-          fputws_unlocked(it.first.c_str(), output);
-          fputwc_unlocked(L':', output);
-          fputwc_unlocked(L'>', output);
-          fputwc_unlocked(L':', output);
-          fputws_unlocked(it.second.c_str(), output);
-          fputwc_unlocked(L'\n', output);
+          u_fprintf(output, "%S:>:%S\n", it.first.c_str(), it.second.c_str());
         }
         for(auto& it : items_rl)
         {
-          fputws_unlocked(it.first.c_str(), output);
-          fputwc_unlocked(L':', output);
-          fputwc_unlocked(L'<', output);
-          fputwc_unlocked(L':', output);
-          fputws_unlocked(it.second.c_str(), output);
-          fputwc_unlocked(L'\n', output);
+          u_fprintf(output, "%S:<:%S\n", it.first.c_str(), it.second.c_str());
         }
       }
       else
@@ -531,31 +515,30 @@ Expander::procEntry(FILE *output)
 
       return;
     }
-    else if(name == L"#text" && allBlanks())
+    else if(name == Compiler::COMPILER_TEXT_NODE && allBlanks())
     {
     }
-    else if(name == L"#comment")
+    else if(name == Compiler::COMPILER_COMMENT_NODE)
     {
     }
     else
     {
-      wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-      wcerr << L"): Invalid inclusion of '<" << name << L">' into '<" << Compiler::COMPILER_ENTRY_ELEM;
-      wcerr << L">'." << endl;
+      cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+      cerr << "): Invalid inclusion of '<" << name << ">' into '<" << Compiler::COMPILER_ENTRY_ELEM;
+      cerr << ">'." << endl;
       exit(EXIT_FAILURE);
     }
   }
 }
 
 void
-Expander::procNode(FILE *output)
+Expander::procNode(UFILE *output)
 {
-  xmlChar const *xname = xmlTextReaderConstName(reader);
-  wstring name = XMLParseUtil::towstring(xname);
+  UString name = XMLParseUtil::readName(reader);
 
   // DO: optimize the execution order of this string "ifs"
 
-  if(name == L"#text")
+  if(name == Compiler::COMPILER_TEXT_NODE)
   {
     /* ignorar */
   }
@@ -591,23 +574,23 @@ Expander::procNode(FILE *output)
   {
     /* ignorar */
   }
-  else if(name == L"#comment")
+  else if(name == Compiler::COMPILER_COMMENT_NODE)
   {
     /* ignorar */
   }
   else
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Invalid node '<" << name << L">'." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Invalid node '<" << name << ">'." << endl;
     exit(EXIT_FAILURE);
   }
 }
 
-wstring
+UString
 Expander::procRegexp()
 {
   xmlTextReaderRead(reader);
-  wstring re = XMLParseUtil::towstring(xmlTextReaderConstValue(reader));
+  UString re = XMLParseUtil::readValue(reader);
   xmlTextReaderRead(reader);
   return re;
 }
@@ -622,7 +605,7 @@ Expander::append(EntList &result,
   {
     for(auto& it2 : endings)
     {
-      temp.push_back(pair<wstring, wstring>(it.first + it2.first,
+      temp.push_back(pair<UString, UString>(it.first + it2.first,
                           it.second + it2.second));
     }
   }
@@ -631,7 +614,7 @@ Expander::append(EntList &result,
 }
 
 void
-Expander::append(EntList &result, wstring const &endings)
+Expander::append(EntList &result, UString const &endings)
 {
   for(auto& it : result)
   {
@@ -642,7 +625,7 @@ Expander::append(EntList &result, wstring const &endings)
 
 void
 Expander::append(EntList &result,
-                 pair<wstring, wstring> const &endings)
+                 pair<UString, UString> const &endings)
 {
   for(auto& it : result)
   {
@@ -652,27 +635,27 @@ Expander::append(EntList &result,
 }
 
 void
-Expander::setAltValue(string const &a)
+Expander::setAltValue(UString const &a)
 {
-  alt = XMLParseUtil::stows(a);
+  alt = a;
 }
 
 void
-Expander::setVariantValue(string const &v)
+Expander::setVariantValue(UString const &v)
 {
-  variant = XMLParseUtil::stows(v);
+  variant = v;
 }
 
 void
-Expander::setVariantLeftValue(string const &v)
+Expander::setVariantLeftValue(UString const &v)
 {
-  variant_left = XMLParseUtil::stows(v);
+  variant_left = v;
 }
 
 void
-Expander::setVariantRightValue(string const &v)
+Expander::setVariantRightValue(UString const &v)
 {
-  variant_right = XMLParseUtil::stows(v);
+  variant_right = v;
 }
 
 void
@@ -680,4 +663,3 @@ Expander::setKeepBoundaries(bool keep)
 {
   keep_boundaries = keep;
 }
-
diff --git a/lttoolbox/expander.h b/lttoolbox/expander.h
index 74da7e2..3d2c6df 100644
--- a/lttoolbox/expander.h
+++ b/lttoolbox/expander.h
@@ -17,8 +17,7 @@
 #ifndef _EXPANDER_
 #define _EXPANDER_
 
-#include <lttoolbox/ltstr.h>
-#include <lttoolbox/string_to_wostream.h>
+#include <lttoolbox/ustring.h>
 
 #include <list>
 #include <map>
@@ -27,7 +26,7 @@
 
 using namespace std;
 
-typedef list<pair<wstring, wstring> > EntList;
+typedef list<pair<UString, UString> > EntList;
 
 /**
  * An expander of dictionaries
@@ -43,33 +42,33 @@ private:
   /**
    * The alt value
    */
-  wstring alt;
+  UString alt;
 
   /**
    * The variant value (monodix)
    */
-  wstring variant;
+  UString variant;
 
   /**
    * The variant value (left side of bidix)
    */
-  wstring variant_left;
+  UString variant_left;
 
   /**
    * The variant value (right side of bidix)
    */
-  wstring variant_right;
+  UString variant_right;
 
   /**
    * The paradigm being compiled
    */
-  wstring current_paradigm;
+  UString current_paradigm;
 
   /**
    * The direction of the compilation, 'lr' (left-to-right) or 'rl'
    * (right-to-left)
    */
-  wstring direction;
+  UString direction;
 
   /**
    * Do we print boundaries or not?
@@ -79,16 +78,16 @@ private:
   /**
    * Paradigms
    */
-  map<wstring, EntList, Ltstr> paradigm;
+  map<UString, EntList> paradigm;
 
-  map<wstring, EntList, Ltstr> paradigm_lr;
+  map<UString, EntList> paradigm_lr;
 
-  map<wstring, EntList, Ltstr> paradigm_rl;
+  map<UString, EntList> paradigm_rl;
 
   /**
    * Method to parse an XML Node
    */
-  void procNode(FILE *output);
+  void procNode(UFILE* output);
 
   /**
    * Parse the &lt;pardef&gt; element
@@ -98,67 +97,67 @@ private:
   /**
    * Parse the &lt;e&gt; element
    */
-  void procEntry(FILE *output);
+  void procEntry(UFILE* output);
 
   /**
    * Parse the &lt;re&gt; element
    * @return the string representing the regular expression
    */
-  wstring procRegexp();
+  UString procRegexp();
 
   /**
    * Gets an attribute value with their name and the current context
    * @param name the name of the attribute
    * @return the value of the attribute
    */
-  wstring attrib(wstring const &name);
+  UString attrib(UString const &name);
 
   /**
    * Parse the &lt;p&gt; element
    * @return a pair of strings, left part and right part of a transduction
    */
-  pair<wstring, wstring> procTransduction();
+  pair<UString, UString> procTransduction();
 
   /**
    * Parse the &lt;i&gt; element
    * @return a string from the dictionary's entry
    */
-  wstring procIdentity();
+  UString procIdentity();
 
   /**
    * Parse the &lt;ig&gt; element
    * @return a pair of strings, whose right part begins with '#'
    * but are otherwise identical
    */
-  pair<wstring, wstring> procIdentityGroup();
+  pair<UString, UString> procIdentityGroup();
 
   /**
    * Parse the &lt;par&gt; element
    * @return the name of the paradigm
    */
-  wstring procPar();
+  UString procPar();
 
   /**
    * Skip all document #text nodes before "elem"
    * @param name the name of the node
    * @param elem the name of the expected node
    */
-  void skip(wstring &name, wstring const &elem);
+  void skip(UString &name, UString const &elem);
 
   /**
    * Skip all blank #text nodes before "name"
    * @param name the name of the node
    */
-  void skipBlanks(wstring &name);
+  void skipBlanks(UString &name);
 
 
-  void readString(wstring &result, wstring const &name);
+  void readString(UString &result, UString const &name);
 
   /**
    * Force an element to be empty, and check for it
    * @param name the element
    */
-  void requireEmptyError(wstring const &name);
+  void requireEmptyError(UString const &name);
 
   /**
    * Force an attribute to be specified, amd check for it
@@ -166,8 +165,8 @@ private:
    * @param attrname the name of the attribute
    * @param elemname the parent of the attribute
    */
-  void requireAttribute(wstring const &value, wstring const &attrname,
-                        wstring const &elemname);
+  void requireAttribute(UString const &value, UString const &attrname,
+                        UString const &elemname);
 
   /**
    * True if all the elements in the current node are blanks
@@ -181,8 +180,8 @@ private:
    *               this method, the result of concatenations.
    * @param endings the endings to be appended.
    */
-  static void append(list<pair<wstring, wstring> > &result,
-                     list<pair<wstring, wstring> > const &endings);
+  static void append(list<pair<UString, UString> > &result,
+                     list<pair<UString, UString> > const &endings);
 
   /**
    * Append a list of endings to a list of current transductions.
@@ -190,8 +189,8 @@ private:
    *               this method, the result of concatenations.
    * @param endings the endings to be appended.
    */
-  static void append(list<pair<wstring, wstring> > &result,
-                     wstring const &endings);
+  static void append(list<pair<UString, UString> > &result,
+                     UString const &endings);
 
   /**
    * Append a list of endings to a list of current transductions.
@@ -199,8 +198,8 @@ private:
    *               this method, the result of concatenations.
    * @param endings the endings to be appended.
    */
-  static void append(list<pair<wstring, wstring> > &result,
-                     pair<wstring, wstring> const &endings);
+  static void append(list<pair<UString, UString> > &result,
+                     pair<UString, UString> const &endings);
 
 public:
   /**
@@ -216,31 +215,31 @@ public:
   /**
    * Compile dictionary to letter transducers
    */
-  void expand(string const &file, FILE *output);
+  void expand(string const &file, UFILE* output);
 
   /**
    * Set the alt value to use in compilation
    * @param a the value
    */
-   void setAltValue(string const &a);
+   void setAltValue(UString const &a);
 
   /**
    * Set the variant value to use in expansion
    * @param v the value
    */
-   void setVariantValue(string const &v);
+   void setVariantValue(UString const &v);
 
   /**
    * Set the variant_left value to use in expansion
    * @param v the value
    */
-   void setVariantLeftValue(string const &v);
+   void setVariantLeftValue(UString const &v);
 
   /**
    * Set the variant_right value to use in expansion
    * @param v the value
    */
-   void setVariantRightValue(string const &v);
+   void setVariantRightValue(UString const &v);
 
   /**
    * Set if we are going to keep morpheme boundaries
diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc
index f4ec2a1..732acc8 100644
--- a/lttoolbox/fst_processor.cc
+++ b/lttoolbox/fst_processor.cc
@@ -22,49 +22,37 @@
 #include <iostream>
 #include <cerrno>
 #include <climits>
-#include <cwctype>
 
-#if defined(_WIN32) && !defined(_MSC_VER)
-#include <utf8_fwrap.h>
-#endif
 
 using namespace std;
 
 
-FSTProcessor::FSTProcessor() :
-default_weight(0.0000),
-outOfWord(false),
-isLastBlankTM(false)
+UString const FSTProcessor::XML_TEXT_NODE           = "#text"_u;
+UString const FSTProcessor::XML_COMMENT_NODE        = "#comment"_u;
+UString const FSTProcessor::XML_IGNORED_CHARS_ELEM  = "ignored-chars"_u;
+UString const FSTProcessor::XML_RESTORE_CHAR_ELEM   = "restore-char"_u;
+UString const FSTProcessor::XML_RESTORE_CHARS_ELEM  = "restore-chars"_u;
+UString const FSTProcessor::XML_VALUE_ATTR          = "value"_u;
+UString const FSTProcessor::XML_CHAR_ELEM           = "char"_u;
+UString const FSTProcessor::WBLANK_START            = "[["_u;
+UString const FSTProcessor::WBLANK_END              = "]]"_u;
+UString const FSTProcessor::WBLANK_FINAL            = "[[/]]"_u;
+
+
+FSTProcessor::FSTProcessor()
 {
   // escaped_chars chars
-  escaped_chars.insert(L'[');
-  escaped_chars.insert(L']');
-  escaped_chars.insert(L'{');
-  escaped_chars.insert(L'}');
-  escaped_chars.insert(L'^');
-  escaped_chars.insert(L'$');
-  escaped_chars.insert(L'/');
-  escaped_chars.insert(L'\\');
-  escaped_chars.insert(L'@');
-  escaped_chars.insert(L'<');
-  escaped_chars.insert(L'>');
-
-  caseSensitive = false;
-  dictionaryCase = false;
-  do_decomposition = false;
-  nullFlush = false;
-  nullFlushGeneration = false;
-  useIgnoredChars = false;
-  useDefaultIgnoredChars = true;
-  useRestoreChars = false;
-  displayWeightsMode = false;
-  showControlSymbols = false;
-  biltransSurfaceForms = false;
-  maxAnalyses = INT_MAX;
-  maxWeightClasses = INT_MAX;
-  compoundOnlyLSymbol = 0;
-  compoundRSymbol = 0;
-  compound_max_elements = 4;
+  escaped_chars.insert('[');
+  escaped_chars.insert(']');
+  escaped_chars.insert('{');
+  escaped_chars.insert('}');
+  escaped_chars.insert('^');
+  escaped_chars.insert('$');
+  escaped_chars.insert('/');
+  escaped_chars.insert('\\');
+  escaped_chars.insert('@');
+  escaped_chars.insert('<');
+  escaped_chars.insert('>');
 
   if(useDefaultIgnoredChars)
   {
@@ -126,28 +114,27 @@ FSTProcessor::parseRCX(string const &file)
 void
 FSTProcessor::procNodeICX()
 {
-  xmlChar  const *xname = xmlTextReaderConstName(reader);
-  wstring name = XMLParseUtil::towstring(xname);
-  if(name == L"#text")
+  UString name = XMLParseUtil::readName(reader);
+  if(name == XML_TEXT_NODE)
   {
     /* ignore */
   }
-  else if(name == L"ignored-chars")
+  else if(name == XML_IGNORED_CHARS_ELEM)
   {
     /* ignore */
   }
-  else if(name == L"char")
+  else if(name == XML_CHAR_ELEM)
   {
-    ignored_chars.insert(static_cast<int>(XMLParseUtil::attrib(reader, L"value")[0]));
+    ignored_chars.insert(static_cast<int32_t>(XMLParseUtil::attrib(reader, XML_VALUE_ATTR)[0]));
   }
-  else if(name == L"#comment")
+  else if(name == XML_COMMENT_NODE)
   {
     /* ignore */
   }
   else
   {
-    wcerr << L"Error in ICX file (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Invalid node '<" << name << L">'." << endl;
+    cerr << "Error in ICX file (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Invalid node '<" << name << ">'." << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -161,140 +148,54 @@ FSTProcessor::initDefaultIgnoredCharacters()
 void
 FSTProcessor::procNodeRCX()
 {
-  xmlChar  const *xname = xmlTextReaderConstName(reader);
-  wstring name = XMLParseUtil::towstring(xname);
-  if(name == L"#text")
+  UString name = XMLParseUtil::readName(reader);
+  if(name == XML_TEXT_NODE)
   {
     /* ignore */
   }
-  else if(name == L"restore-chars")
+  else if(name == XML_RESTORE_CHARS_ELEM)
   {
     /* ignore */
   }
-  else if(name == L"char")
+  else if(name == XML_CHAR_ELEM)
   {
-    rcx_current_char = static_cast<int>(XMLParseUtil::attrib(reader, L"value")[0]);
+    rcx_current_char = static_cast<int32_t>(XMLParseUtil::attrib(reader, XML_VALUE_ATTR)[0]);
   }
-  else if(name == L"restore-char")
+  else if(name == XML_RESTORE_CHAR_ELEM)
   {
-    rcx_map[rcx_current_char].insert(static_cast<int>(XMLParseUtil::attrib(reader, L"value")[0]));
+    rcx_map[rcx_current_char].insert(static_cast<int32_t>(XMLParseUtil::attrib(reader, XML_VALUE_ATTR)[0]));
   }
-  else if(name == L"#comment")
+  else if(name == XML_COMMENT_NODE)
   {
     /* ignore */
   }
   else
   {
-    wcerr << L"Error in RCX file (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Invalid node '<" << name << L">'." << endl;
+    cerr << "Error in RCX file (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Invalid node '<" << name << ">'." << endl;
     exit(EXIT_FAILURE);
   }
 }
 
-wchar_t
-FSTProcessor::readEscaped(FILE *input)
-{
-  if(feof(input))
-  {
-    streamError();
-  }
-
-  wchar_t val = static_cast<wchar_t>(fgetwc_unlocked(input));
-
-  if(feof(input))
-  {
-    streamError();
-  }
-
-  return val;
-}
-
-wstring
-FSTProcessor::readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2)
-{
-  wstring result = L"";
-  result += delim1;
-  wchar_t c = delim1;
-
-  while(!feof(input) && c != delim2)
-  {
-    c = static_cast<wchar_t>(fgetwc_unlocked(input));
-    result += c;
-    if(c != L'\\')
-    {
-      continue;
-    }
-    else
-    {
-      result += static_cast<wchar_t>(readEscaped(input));
-    }
-  }
-
-  if(c != delim2)
-  {
-    streamError();
-  }
-
-  return result;
-}
-
-wstring
-FSTProcessor::readWblank(FILE *input)
-{
-  wstring result = L"";
-  result += L"[[";
-  wchar_t c = 0;
-
-  while(!feof(input))
-  {
-    c = static_cast<wchar_t>(fgetwc_unlocked(input));
-    result += c;
-
-    if(c == L'\\')
-    {
-      result += static_cast<wchar_t>(readEscaped(input));
-    }
-    else if(c == L']')
-    {
-      c = static_cast<wchar_t>(fgetwc_unlocked(input));
-      result += c;
-
-      if(c == L']')
-      {
-        break;
-      }
-    }
-  }
-
-  if(c != L']')
-  {
-    streamError();
-  }
-
-  return result;
-}
-
 bool
-FSTProcessor::wblankPostGen(FILE *input, FILE *output)
+FSTProcessor::wblankPostGen(InputFile& input, UFILE *output)
 {
-  wstring result = L"";
-  result += L"[[";
-  wchar_t c = 0;
+  UString result = WBLANK_START;
+  UChar32 c = 0;
   bool in_content = false;
 
-  while(!feof(input))
+  while(!input.eof())
   {
-    c = static_cast<wchar_t>(fgetwc_unlocked(input));
-
-    if(in_content && c == L'~')
+    c = input.get();
+    if(in_content && c == '~')
     {
-      if(result[result.size()-1] == L']') {
+      if(result[result.size()-1] == ']') {
         // We just saw the end of a wblank, may want to merge
         wblankqueue.push(result);
       }
       else {
         // wake-up-mark happened some characters into the wblanked word
-        fputws(result.c_str(), output);
+        write(result, output);
       }
       return true;
     }
@@ -303,21 +204,22 @@ FSTProcessor::wblankPostGen(FILE *input, FILE *output)
       result += c;
     }
 
-    if(c == L'\\')
+    if(c == '\\')
     {
-      result += static_cast<wchar_t>(readEscaped(input));
+      if (input.eof()) streamError();
+      result += input.get();
     }
-    else if(c == L']')
+    else if(c == ']')
     {
-      c = static_cast<wchar_t>(fgetwc_unlocked(input));
+      c = input.get();
       result += c;
 
-      if(c == L']')
+      if(c == ']')
       {
         int resultlen = result.size();
         if(result[resultlen-5] == '[' && result[resultlen-4] == '[' && result[resultlen-3] == '/') //ending blank [[/]]
         {
-          fputws(result.c_str(), output);
+          write(result, output);
           break;
         }
         else
@@ -328,7 +230,7 @@ FSTProcessor::wblankPostGen(FILE *input, FILE *output)
     }
   }
 
-  if(c != L']')
+  if(c != ']')
   {
     streamError();
   }
@@ -337,63 +239,65 @@ FSTProcessor::wblankPostGen(FILE *input, FILE *output)
 }
 
 int
-FSTProcessor::readAnalysis(FILE *input)
+FSTProcessor::readAnalysis(InputFile& input)
 {
   if(!input_buffer.isEmpty())
   {
     return input_buffer.next();
   }
 
-  wchar_t val = static_cast<wchar_t>(fgetwc_unlocked(input));
-  int altval = 0;
-  if(feof(input))
+  UChar32 val = input.get();
+  int32_t altval = 0;
+  if(input.eof())
   {
     input_buffer.add(0);        // so it's treated like the NUL byte
     return 0;
+  } else if(val == U_EOF) {
+    val = 0;
   }
 
   if((useIgnoredChars || useDefaultIgnoredChars) && ignored_chars.find(val) != ignored_chars.end())
   {
     input_buffer.add(val);
-    val = static_cast<wchar_t>(fgetwc_unlocked(input));
+    val = input.get();
   }
 
   if(escaped_chars.find(val) != escaped_chars.end())
   {
     switch(val)
     {
-      case L'<':
-        altval = static_cast<int>(alphabet(readFullBlock(input, L'<', L'>')));
+      case '<':
+        altval = alphabet(input.readBlock('<', '>'));
         input_buffer.add(altval);
         return altval;
 
-      case L'[':
-        val = static_cast<wchar_t>(fgetwc_unlocked(input));
+      case '[':
+        val = input.get();
 
-        if(val == L'[')
+        if(val == '[')
         {
-          blankqueue.push(readWblank(input));
+          blankqueue.push(input.finishWBlank());
         }
         else
         {
-          ungetwc_unlocked(val, input);
-          blankqueue.push(readFullBlock(input, L'[', L']'));
+          input.unget(val);
+          blankqueue.push(input.readBlock('[', ']'));
         }
 
-        input_buffer.add(static_cast<int>(L' '));
-        return static_cast<int>(L' ');
+        input_buffer.add(static_cast<int32_t>(' '));
+        return static_cast<int32_t>(' ');
 
-      case L'\\':
-        val = static_cast<wchar_t>(fgetwc_unlocked(input));
-        input_buffer.add(static_cast<int>(val));
+      case '\\':
+        val = input.get();
+        input_buffer.add(static_cast<int32_t>(val));
         return val;
 
       default:
         streamError();
     }
   }
-  if(val == L' ') {
-    blankqueue.push(L" ");
+  if(val == ' ') {
+    blankqueue.push(" "_u);
   }
 
   input_buffer.add(val);
@@ -401,7 +305,7 @@ FSTProcessor::readAnalysis(FILE *input)
 }
 
 int
-FSTProcessor::readTMAnalysis(FILE *input)
+FSTProcessor::readTMAnalysis(InputFile& input)
 {
   isLastBlankTM = false;
   if(!input_buffer.isEmpty())
@@ -409,64 +313,64 @@ FSTProcessor::readTMAnalysis(FILE *input)
     return input_buffer.next();
   }
 
-  wchar_t val = static_cast<wchar_t>(fgetwc_unlocked(input));
-  int altval = 0;
-  if(feof(input))
+  UChar32 val = input.get();
+  int32_t altval = 0;
+  if(input.eof())
   {
     return 0;
   }
 
-  if(escaped_chars.find(val) != escaped_chars.end() || iswdigit(val))
+  if(escaped_chars.find(val) != escaped_chars.end() || u_isdigit(val))
   {
     switch(val)
     {
-      case L'<':
-        altval = static_cast<int>(alphabet(readFullBlock(input, L'<', L'>')));
+      case '<':
+        altval = alphabet(input.readBlock('<', '>'));
         input_buffer.add(altval);
         return altval;
 
-      case L'[':
-        val = static_cast<wchar_t>(fgetwc_unlocked(input));
+      case '[':
+        val = input.get();
 
-        if(val == L'[')
+        if(val == '[')
         {
-          blankqueue.push(readWblank(input));
+          blankqueue.push(input.finishWBlank());
         }
         else
         {
-          ungetwc_unlocked(val, input);
-          blankqueue.push(readFullBlock(input, L'[', L']'));
+          input.unget(val);
+          blankqueue.push(input.readBlock('[', ']'));
         }
 
-        input_buffer.add(static_cast<int>(L' '));
+        input_buffer.add(static_cast<int32_t>(' '));
         isLastBlankTM = true;
-        return static_cast<int>(L' ');
+        return static_cast<int32_t>(' ');
 
-      case L'\\':
-        val = static_cast<wchar_t>(fgetwc_unlocked(input));
-        input_buffer.add(static_cast<int>(val));
+      case '\\':
+        val = input.get();
+        input_buffer.add(static_cast<int32_t>(val));
         return val;
-      case L'0':
-      case L'1':
-      case L'2':
-      case L'3':
-      case L'4':
-      case L'5':
-      case L'6':
-      case L'7':
-      case L'8':
-      case L'9':
-        {
-          wstring ws = L"";
+      case '0':
+      case '1':
+      case '2':
+      case '3':
+      case '4':
+      case '5':
+      case '6':
+      case '7':
+      case '8':
+      case '9':
+        {
+          UString ws;
           do
           {
             ws += val;
-            val = static_cast<wchar_t>(fgetwc_unlocked(input));
-          } while(iswdigit(val));
-          ungetwc_unlocked(val, input);
-          input_buffer.add(alphabet(L"<n>"));
+            val = input.get();
+          } while(u_isdigit(val));
+          input.unget(val);
+          input_buffer.add(alphabet("<n>"_u));
           numbers.push_back(ws);
-          return alphabet(L"<n>");
+          return alphabet("<n>"_u);
         }
         break;
 
@@ -480,61 +384,61 @@ FSTProcessor::readTMAnalysis(FILE *input)
 }
 
 int
-FSTProcessor::readPostgeneration(FILE *input, FILE *output)
+FSTProcessor::readPostgeneration(InputFile& input, UFILE *output)
 {
   if(!input_buffer.isEmpty())
   {
     return input_buffer.next();
   }
 
-  wchar_t val = static_cast<wchar_t>(fgetwc_unlocked(input));
-  int altval = 0;
+  UChar32 val = input.get();
+  int32_t altval = 0;
   is_wblank = false;
-  if(feof(input))
+  if(input.eof())
   {
     return 0;
   }
 
   switch(val)
   {
-    case L'<':
-      altval = static_cast<int>(alphabet(readFullBlock(input, L'<', L'>')));
+    case '<':
+      altval = alphabet(input.readBlock('<', '>'));
       input_buffer.add(altval);
       return altval;
 
-    case L'[':
-      val = static_cast<wchar_t>(fgetwc_unlocked(input));
+    case '[':
+      val = input.get();
 
-      if(val == L'[')
+      if(val == '[')
       {
         if(collect_wblanks)
         {
-          wblankqueue.push(readWblank(input));
+          wblankqueue.push(input.finishWBlank());
           is_wblank = true;
-          return static_cast<int>(L' ');
+          return static_cast<int32_t>(' ');
         }
         else if(wblankPostGen(input, output))
         {
-          return static_cast<int>(L'~');
+          return static_cast<int32_t>('~');
         }
         else
         {
           is_wblank = true;
-          return static_cast<int>(L' ');
+          return static_cast<int32_t>(' ');
         }
       }
       else
       {
-        ungetwc_unlocked(val, input);
-        blankqueue.push(readFullBlock(input, L'[', L']'));
+        input.unget(val);
+        blankqueue.push(input.readBlock('[', ']'));
 
-        input_buffer.add(static_cast<int>(L' '));
-        return static_cast<int>(L' ');
+        input_buffer.add(static_cast<int32_t>(' '));
+        return static_cast<int32_t>(' ');
       }
 
-    case L'\\':
-      val = static_cast<wchar_t>(fgetwc_unlocked(input));
-      input_buffer.add(static_cast<int>(val));
+    case '\\':
+      val = input.get();
+      input_buffer.add(static_cast<int32_t>(val));
       return val;
 
     default:
@@ -544,33 +448,33 @@ FSTProcessor::readPostgeneration(FILE *input, FILE *output)
 }
 
 void
-FSTProcessor::skipUntil(FILE *input, FILE *output, wint_t const character)
+FSTProcessor::skipUntil(InputFile& input, UFILE *output, UChar32 const character)
 {
   while(true)
   {
-    wint_t val = fgetwc_unlocked(input);
-    if(feof(input))
+    UChar32 val = input.get();
+    if(input.eof())
     {
       return;
     }
 
     switch(val)
     {
-      case L'\\':
-        val = fgetwc_unlocked(input);
-        if(feof(input))
+      case '\\':
+        val = input.get();
+        if(input.eof())
         {
           return;
         }
-        fputwc_unlocked(L'\\', output);
-        fputwc_unlocked(val, output);
+        u_fputc('\\', output);
+        u_fputc(val, output);
         break;
 
-      case L'\0':
-        fputwc_unlocked(val, output);
+      case '\0':
+        u_fputc(val, output);
         if(nullFlushGeneration)
         {
-          fflush(output);
+          u_fflush(output);
         }
         break;
 
@@ -581,7 +485,7 @@ FSTProcessor::skipUntil(FILE *input, FILE *output, wint_t const character)
         }
         else
         {
-          fputwc_unlocked(val, output);
+          u_fputc(val, output);
         }
         break;
     }
@@ -589,47 +493,47 @@ FSTProcessor::skipUntil(FILE *input, FILE *output, wint_t const character)
 }
 
 int
-FSTProcessor::readGeneration(FILE *input, FILE *output)
+FSTProcessor::readGeneration(InputFile& input, UFILE *output)
 {
-  wint_t val = fgetwc_unlocked(input);
+  UChar32 val = input.get();
 
-  if(feof(input))
+  if(input.eof())
   {
     return 0x7fffffff;
   }
 
   if(outOfWord)
   {
-    if(val == L'^')
+    if(val == '^')
     {
-      val = fgetwc_unlocked(input);
-      if(feof(input))
+      val = input.get();
+      if(input.eof())
       {
         return 0x7fffffff;
       }
     }
-    else if(val == L'\\')
+    else if(val == '\\')
     {
-      fputwc_unlocked(val, output);
-      val = fgetwc_unlocked(input);
-      if(feof(input))
+      u_fputc(val, output);
+      val = input.get();
+      if(input.eof())
       {
         return 0x7fffffff;
       }
-      fputwc_unlocked(val,output);
-      skipUntil(input, output, L'^');
-      val = fgetwc_unlocked(input);
-      if(feof(input))
+      u_fputc(val,output);
+      skipUntil(input, output, '^');
+      val = input.get();
+      if(input.eof())
       {
         return 0x7fffffff;
       }
     }
     else
     {
-      fputwc_unlocked(val, output);
-      skipUntil(input, output, L'^');
-      val = fgetwc_unlocked(input);
-      if(feof(input))
+      u_fputc(val, output);
+      skipUntil(input, output, '^');
+      val = input.get();
+      if(input.eof())
       {
         return 0x7fffffff;
       }
@@ -637,129 +541,106 @@ FSTProcessor::readGeneration(FILE *input, FILE *output)
     outOfWord = false;
   }
 
-  if(val == L'\\')
+  if(val == '\\')
   {
-    val = fgetwc_unlocked(input);
-    return static_cast<int>(val);
+    val = input.get();
+    return static_cast<int32_t>(val);
   }
-  else if(val == L'$')
+  else if(val == '$')
   {
     outOfWord = true;
-    return static_cast<int>(L'$');
+    return static_cast<int32_t>('$');
   }
-  else if(val == L'<')
+  else if(val == '<')
   {
-    wstring cad = L"";
-    cad += static_cast<wchar_t>(val);
-
-    while((val = fgetwc_unlocked(input)) != L'>')
-    {
-      if(feof(input))
-      {
-        streamError();
-      }
-      cad += static_cast<wchar_t>(val);
-    }
-    cad += static_cast<wchar_t>(val);
-
-    return alphabet(cad);
+    return alphabet(input.readBlock('<', '>'));
   }
-  else if(val == L'[')
+  else if(val == '[')
   {
-    val = fgetwc_unlocked(input);
-    if(val == L'[')
+    val = input.get();
+    if(val == '[')
     {
-      fputws_unlocked(readWblank(input).c_str(), output);
+      write(input.finishWBlank(), output);
     }
     else
     {
-      ungetwc_unlocked(val, input);
-      fputws_unlocked(readFullBlock(input, L'[', L']').c_str(), output);
+      input.unget(val);
+      write(input.readBlock('[', ']'), output);
     }
 
     return readGeneration(input, output);
   }
   else
   {
-    return static_cast<int>(val);
+    return static_cast<int32_t>(val);
   }
 
   return 0x7fffffff;
 }
 
-pair<wstring, int>
-FSTProcessor::readBilingual(FILE *input, FILE *output)
+pair<UString, int>
+FSTProcessor::readBilingual(InputFile& input, UFILE *output)
 {
-  wint_t val = fgetwc_unlocked(input);
-  wstring symbol = L"";
+  UChar32 val = input.get();
+  UString symbol;
 
-  if(feof(input))
+  if(input.eof())
   {
-    return pair<wstring, int>(symbol, 0x7fffffff);
+    return pair<UString, int>(symbol, 0x7fffffff);
   }
 
   if(outOfWord)
   {
-    if(val == L'^')
+    if(val == '^')
     {
-      val = fgetwc_unlocked(input);
-      if(feof(input))
+      val = input.get();
+      if(input.eof())
       {
-        return pair<wstring, int>(symbol, 0x7fffffff);
+        return pair<UString, int>(symbol, 0x7fffffff);
       }
     }
-    else if(val == L'\\')
+    else if(val == '\\')
     {
-      fputwc_unlocked(val, output);
-      val = fgetwc_unlocked(input);
-      if(feof(input))
+      u_fputc(val, output);
+      val = input.get();
+      if(input.eof())
       {
-        return pair<wstring, int>(symbol, 0x7fffffff);
+        return pair<UString, int>(symbol, 0x7fffffff);
       }
-      fputwc_unlocked(val,output);
-      skipUntil(input, output, L'^');
-      val = fgetwc_unlocked(input);
-      if(feof(input))
+      u_fputc(val,output);
+      skipUntil(input, output, '^');
+      val = input.get();
+      if(input.eof())
       {
-        return pair<wstring, int>(symbol, 0x7fffffff);
+        return pair<UString, int>(symbol, 0x7fffffff);
       }
     }
     else
     {
-      fputwc_unlocked(val, output);
-      skipUntil(input, output, L'^');
-      val = fgetwc_unlocked(input);
-      if(feof(input))
+      u_fputc(val, output);
+      skipUntil(input, output, '^');
+      val = input.get();
+      if(input.eof())
       {
-        return pair<wstring, int>(symbol, 0x7fffffff);
+        return pair<UString, int>(symbol, 0x7fffffff);
       }
     }
     outOfWord = false;
   }
 
-  if(val == L'\\')
+  if(val == '\\')
   {
-    val = fgetwc_unlocked(input);
-    return pair<wstring, int>(symbol, val);
+    val = input.get();
+    return pair<UString, int>(symbol, val);
   }
-  else if(val == L'$')
+  else if(val == '$')
   {
     outOfWord = true;
-    return pair<wstring, int>(symbol, static_cast<int>(L'$'));
+    return pair<UString, int>(symbol, static_cast<int32_t>('$'));
   }
-  else if(val == L'<')
+  else if(val == '<')
   {
-    wstring cad = L"";
-    cad += static_cast<wchar_t>(val);
-    while((val = fgetwc_unlocked(input)) != L'>')
-    {
-      if(feof(input))
-      {
-        streamError();
-      }
-      cad += static_cast<wchar_t>(val);
-    }
-    cad += static_cast<wchar_t>(val);
+    UString cad = input.readBlock('<', '>');
 
     int res = alphabet(cad);
 
@@ -767,66 +648,66 @@ FSTProcessor::readBilingual(FILE *input, FILE *output)
     {
       symbol = cad;
     }
-    return pair<wstring, int>(symbol, res);
+    return pair<UString, int>(symbol, res);
   }
-  else if(val == L'[')
+  else if(val == '[')
   {
-    val = fgetwc_unlocked(input);
-    if(val == L'[')
+    val = input.get();
+    if(val == '[')
     {
-      fputws_unlocked(readWblank(input).c_str(), output);
+      write(input.finishWBlank(), output);
     }
     else
     {
-      ungetwc_unlocked(val, input);
-      fputws_unlocked(readFullBlock(input, L'[', L']').c_str(), output);
+      input.unget(val);
+      write(input.readBlock('[', ']'), output);
     }
 
     return readBilingual(input, output);
   }
 
-  return pair<wstring, int>(symbol, val);
+  return pair<UString, int>(symbol, val);
 }
 
 void
-FSTProcessor::flushBlanks(FILE *output)
+FSTProcessor::flushBlanks(UFILE *output)
 {
   for(size_t i = blankqueue.size(); i > 0; i--)
   {
-    fputws_unlocked(blankqueue.front().c_str(), output);
+    write(blankqueue.front(), output);
     blankqueue.pop();
   }
 }
 
 void
-FSTProcessor::flushWblanks(FILE *output)
+FSTProcessor::flushWblanks(UFILE *output)
 {
   while(wblankqueue.size() > 0)
   {
-    fputws_unlocked(wblankqueue.front().c_str(), output);
+    write(wblankqueue.front(), output);
     wblankqueue.pop();
   }
 }
 
-wstring
+UString
 FSTProcessor::combineWblanks()
 {
-  wstring final_wblank;
-  wstring last_wblank = L"";
+  UString final_wblank;
+  UString last_wblank;
   bool seen_wblank = false;
 
   while(wblankqueue.size() > 0)
   {
-    if(wblankqueue.front().compare(L"[[/]]") == 0)
+    if(wblankqueue.front().compare(WBLANK_FINAL) == 0)
     {
       if(seen_wblank) {
         if(final_wblank.empty())
         {
-          final_wblank += L"[[";
+          final_wblank += WBLANK_START;
         }
         else if(final_wblank.size() > 2)
         {
-          final_wblank += L"; ";
+          final_wblank += "; "_u;
         }
 
         final_wblank += last_wblank.substr(2,last_wblank.size()-4); //add wblank without brackets [[..]]
@@ -851,7 +732,7 @@ FSTProcessor::combineWblanks()
 
   if(!final_wblank.empty())
   {
-    final_wblank += L"]]";
+    final_wblank += WBLANK_END;
     need_end_wblank = true;
   }
   return final_wblank;
@@ -860,18 +741,15 @@ FSTProcessor::combineWblanks()
 void
 FSTProcessor::calcInitial()
 {
-  for(map<wstring, TransExe, Ltstr>::iterator it = transducers.begin(),
-                                             limit = transducers.end();
-      it != limit; it++)
-  {
-    root.addTransition(0, 0, it->second.getInitial(), default_weight);
+  for(auto& it : transducers) {
+    root.addTransition(0, 0, it.second.getInitial(), default_weight);
   }
 
   initial_state.init(&root);
 }
 
 bool
-FSTProcessor::endsWith(wstring const &str, wstring const &suffix)
+FSTProcessor::endsWith(UString const &str, UString const &suffix)
 {
   if(str.size() < suffix.size())
   {
@@ -886,64 +764,61 @@ FSTProcessor::endsWith(wstring const &str, wstring const &suffix)
 void
 FSTProcessor::classifyFinals()
 {
-  for(map<wstring, TransExe, Ltstr>::iterator it = transducers.begin(),
-                                             limit = transducers.end();
-      it != limit; it++)
-  {
-    if(endsWith(it->first, L"@inconditional"))
+  for(auto& it : transducers) {
+    if(endsWith(it.first, "@inconditional"_u))
     {
-      inconditional.insert(it->second.getFinals().begin(),
-                           it->second.getFinals().end());
+      inconditional.insert(it.second.getFinals().begin(),
+                           it.second.getFinals().end());
     }
-    else if(endsWith(it->first, L"@standard"))
+    else if(endsWith(it.first, "@standard"_u))
     {
-      standard.insert(it->second.getFinals().begin(),
-                      it->second.getFinals().end());
+      standard.insert(it.second.getFinals().begin(),
+                      it.second.getFinals().end());
     }
-    else if(endsWith(it->first, L"@postblank"))
+    else if(endsWith(it.first, "@postblank"_u))
     {
-      postblank.insert(it->second.getFinals().begin(),
-                       it->second.getFinals().end());
+      postblank.insert(it.second.getFinals().begin(),
+                       it.second.getFinals().end());
     }
-    else if(endsWith(it->first, L"@preblank"))
+    else if(endsWith(it.first, "@preblank"_u))
     {
-      preblank.insert(it->second.getFinals().begin(),
-                      it->second.getFinals().end());
+      preblank.insert(it.second.getFinals().begin(),
+                      it.second.getFinals().end());
     }
     else
     {
-      wcerr << L"Error: Unsupported transducer type for '";
-      wcerr << it->first << L"'." << endl;
+      cerr << "Error: Unsupported transducer type for '";
+      cerr << it.first << "'." << endl;
       exit(EXIT_FAILURE);
     }
   }
 }
 
 void
-FSTProcessor::writeEscaped(wstring const &str, FILE *output)
+FSTProcessor::writeEscaped(UString const &str, UFILE *output)
 {
   for(unsigned int i = 0, limit = str.size(); i < limit; i++)
   {
     if(escaped_chars.find(str[i]) != escaped_chars.end())
     {
-      fputwc_unlocked(L'\\', output);
+      u_fputc('\\', output);
     }
-    fputwc_unlocked(str[i], output);
+    u_fputc(str[i], output);
   }
 }
 
 size_t
-FSTProcessor::writeEscapedPopBlanks(wstring const &str, FILE *output)
+FSTProcessor::writeEscapedPopBlanks(UString const &str, UFILE *output)
 {
   size_t postpop = 0;
   for (unsigned int i = 0, limit = str.size(); i < limit; i++)
   {
     if (escaped_chars.find(str[i]) != escaped_chars.end()) {
-      fputwc_unlocked(L'\\', output);
+      u_fputc('\\', output);
     }
-    fputwc_unlocked(str[i], output);
-    if (str[i] == L' ') {
-      if (blankqueue.front() == L" ") {
+    u_fputc(str[i], output);
+    if (str[i] == ' ') {
+      if (blankqueue.front() == " "_u) {
         blankqueue.pop();
       } else {
         postpop++;
@@ -954,71 +829,67 @@ FSTProcessor::writeEscapedPopBlanks(wstring const &str, FILE *output)
 }
 
 void
-FSTProcessor::writeEscapedWithTags(wstring const &str, FILE *output)
+FSTProcessor::writeEscapedWithTags(UString const &str, UFILE *output)
 {
   for(unsigned int i = 0, limit = str.size(); i < limit; i++)
   {
-    if(str[i] == L'<' && i >=1 && str[i-1] != L'\\')
+    if(str[i] == '<' && i >=1 && str[i-1] != '\\')
     {
-      fputws_unlocked(str.substr(i).c_str(), output);
+      write(str.substr(i), output);
       return;
     }
 
     if(escaped_chars.find(str[i]) != escaped_chars.end())
     {
-      fputwc_unlocked(L'\\', output);
+      u_fputc('\\', output);
     }
-    fputwc_unlocked(str[i], output);
+    u_fputc(str[i], output);
   }
 }
 
 
 
 void
-FSTProcessor::printWord(wstring const &sf, wstring const &lf, FILE *output)
+FSTProcessor::printWord(UString const &sf, UString const &lf, UFILE *output)
 {
-  fputwc_unlocked(L'^', output);
+  u_fputc('^', output);
   writeEscaped(sf, output);
-  fputws_unlocked(lf.c_str(), output);
-  fputwc_unlocked(L'$', output);
+  write(lf, output);
+  u_fputc('$', output);
 }
 
 void
-FSTProcessor::printWordPopBlank(wstring const &sf, wstring const &lf, FILE *output)
+FSTProcessor::printWordPopBlank(UString const &sf, UString const &lf, UFILE *output)
 {
-  fputwc_unlocked(L'^', output);
+  u_fputc('^', output);
   size_t postpop = writeEscapedPopBlanks(sf, output);
-  fputws_unlocked(lf.c_str(), output);
-  fputwc_unlocked(L'$', output);
+  u_fprintf(output, "%S$", lf.c_str());
   while (postpop-- && blankqueue.size() > 0)
   {
-    fputws(blankqueue.front().c_str(), output);
+    write(blankqueue.front(), output);
     blankqueue.pop();
   }
 }
 
 void
-FSTProcessor::printWordBilingual(wstring const &sf, wstring const &lf, FILE *output)
+FSTProcessor::printWordBilingual(UString const &sf, UString const &lf, UFILE *output)
 {
-  fputwc_unlocked(L'^', output);
-  fputws_unlocked(sf.c_str(), output);
-  fputws_unlocked(lf.c_str(), output);
-  fputwc_unlocked(L'$', output);
+  u_fprintf(output, "^%S%S$", sf.c_str(), lf.c_str());
 }
 
 void
-FSTProcessor::printUnknownWord(wstring const &sf, FILE *output)
+FSTProcessor::printUnknownWord(UString const &sf, UFILE *output)
 {
-  fputwc_unlocked(L'^', output);
+  u_fputc('^', output);
   writeEscaped(sf, output);
-  fputwc_unlocked(L'/', output);
-  fputwc_unlocked(L'*', output);
+  u_fputc('/', output);
+  u_fputc('*', output);
   writeEscaped(sf, output);
-  fputwc_unlocked(L'$', output);
+  u_fputc('$', output);
 }
 
 unsigned int
-FSTProcessor::lastBlank(wstring const &str)
+FSTProcessor::lastBlank(UString const &str)
 {
   for(int i = static_cast<int>(str.size())-1; i >= 0; i--)
   {
@@ -1032,7 +903,7 @@ FSTProcessor::lastBlank(wstring const &str)
 }
 
 void
-FSTProcessor::printSpace(wchar_t const val, FILE *output)
+FSTProcessor::printSpace(UChar const val, UFILE *output)
 {
   if(blankqueue.size() > 0)
   {
@@ -1040,20 +911,20 @@ FSTProcessor::printSpace(wchar_t const val, FILE *output)
   }
   else
   {
-    fputwc_unlocked(val, output);
+    u_fputc(val, output);
   }
 }
 
 bool
-FSTProcessor::isEscaped(wchar_t const c) const
+FSTProcessor::isEscaped(UChar32 const c) const
 {
   return escaped_chars.find(c) != escaped_chars.end();
 }
 
 bool
-FSTProcessor::isAlphabetic(wchar_t const c) const
+FSTProcessor::isAlphabetic(UChar32 const c) const
 {
-  return (bool)std::iswalnum(c) || alphabetic_chars.find(c) != alphabetic_chars.end();
+  return u_isalnum(c) || alphabetic_chars.find(c) != alphabetic_chars.end();
 }
 
 void
@@ -1062,7 +933,7 @@ FSTProcessor::load(FILE *input)
   fpos_t pos;
   if (fgetpos(input, &pos) == 0) {
       char header[4]{};
-      fread(header, 1, 4, input);
+      fread_unlocked(header, 1, 4, input);
       if (strncmp(header, HEADER_LTTOOLBOX, 4) == 0) {
           auto features = read_le<uint64_t>(input);
           if (features >= LTF_UNKNOWN) {
@@ -1079,7 +950,7 @@ FSTProcessor::load(FILE *input)
   int len = Compression::multibyte_read(input);
   while(len > 0)
   {
-    alphabetic_chars.insert(static_cast<wchar_t>(Compression::multibyte_read(input)));
+    alphabetic_chars.insert(static_cast<UChar32>(Compression::multibyte_read(input)));
     len--;
   }
 
@@ -1090,278 +961,12 @@ FSTProcessor::load(FILE *input)
 
   while(len > 0)
   {
-    int len2 = Compression::multibyte_read(input);
-    wstring name = L"";
-    while(len2 > 0)
-    {
-      name += static_cast<wchar_t>(Compression::multibyte_read(input));
-      len2--;
-    }
+    UString name = Compression::string_read(input);
     transducers[name].read(input, alphabet);
     len--;
   }
 }
 
-void
-FSTProcessor::lsx_wrapper_null_flush(FILE *input, FILE *output)
-{
-  setNullFlush(false);
-  //nullFlushGeneration = true;
-
-  while(!feof(input))
-  {
-    lsx(input, output);
-    fputwc_unlocked(L'\0', output);
-    int code = fflush(output);
-    if(code != 0)
-    {
-        wcerr << L"Could not flush output " << errno << endl;
-    }
-  }
-}
-
-void
-FSTProcessor::lsx(FILE *input, FILE *output)
-{
-  if(getNullFlush())
-  {
-    lsx_wrapper_null_flush(input, output);
-  }
-
-  vector<State> new_states, alive_states;
-  wstring blank, out, in, alt_out, alt_in;
-  bool outOfWord = true;
-  bool finalFound = false;
-  bool plus_thing = false;
-
-  alive_states.push_back(initial_state);
-
-  int val = -1;
-
-  while(!feof(input) && val != 0)
-  {
-    val = fgetwc_unlocked(input);
-
-    if(val == L'+' && isEscaped(val) && !outOfWord)
-    {
-      val = L'$';
-      plus_thing = true;
-    }
-
-    if((val == L'^' && isEscaped(val) && outOfWord) || feof(input) || val == 0)
-    {
-      blankqueue.push(blank);
-
-      if(alive_states.size() == 0)
-      {
-        if(blankqueue.size() > 0)
-        {
-          fputws(blankqueue.front().c_str(), output);
-          fflush(output);
-          blankqueue.pop();
-        }
-
-        alive_states.push_back(initial_state);
-
-        alt_in = L"";
-        for(int i=0; i < (int) in.size(); i++) // FIXME indexing
-        {
-          alt_in += in[i];
-          if(in[i] == L'$' && in[i+1] == L'^' && blankqueue.size() > 0)
-          {
-            // in.insert(i+1, blankqueue.front().c_str());
-            alt_in += blankqueue.front().c_str();
-            blankqueue.pop();
-          }
-        }
-        in = alt_in;
-        fputws(in.c_str(), output);
-        fflush(output);
-        in = L"";
-        finalFound = false;
-      }
-      else if(finalFound && alive_states.size() == 1)
-      {
-        finalFound = false;
-      }
-
-      blank = L"";
-      in += val;
-      outOfWord = false;
-      continue;
-    }
-
-    // wcerr << L"\n[!] " << (wchar_t)val << L" ||| " << outOfWord << endl;
-
-    if(outOfWord)
-    {
-      blank += val;
-      continue;
-    }
-
-    if((val == 0 || feof(input) || val == L'$') && !outOfWord) // && isEscaped(val)
-    {
-      new_states.clear();
-      for(vector<State>::const_iterator it = alive_states.begin(); it != alive_states.end(); it++)
-      {
-        State s = *it;
-        //wcerr << endl << L"[0] FEOF | $ | " << s.size() << L" | " << s.isFinal(all_finals) << endl;
-        s.step(alphabet(L"<$>"));
-        //wcerr << endl << L"[1] FEOF | $ | " << s.size() << L" | " << s.isFinal(all_finals) << endl;
-        if(s.size() > 0)
-        {
-          new_states.push_back(s);
-        }
-
-        /*if(s.isFinal(all_finals))
-        {
-          out += s.filterFinals(all_finals, alphabet, escaped_chars, displayWeightsMode, maxAnalyses, maxWeightClasses);
-          new_states.push_back(*initial_state);
-        }*/
-
-        if(s.isFinal(all_finals))
-        {
-          new_states.clear();
-          new_states.push_back(initial_state);
-          out = s.filterFinals(all_finals, alphabet, escaped_chars, displayWeightsMode, maxAnalyses, maxWeightClasses);
-
-          alt_out = L"";
-          for (int i=0; i < (int) out.size(); i++)
-          {
-            wchar_t c = out.at(i);
-            if(c == L'/')
-            {
-              alt_out += L'^';
-            }
-            else if(out[i-1] == L'<' && c == L'$' && out[i+1] == L'>') // indexing
-            {
-              alt_out += c;
-              alt_out += L'^';
-            }
-            else if(!(c == L'<' && out[i+1] == L'$' && out[i+2] == L'>') && !(out[i-2] == L'<' && out[i-1] == L'$' && c == L'>'))
-            {
-              alt_out += c;
-            }
-          }
-          out = alt_out;
-
-
-          if(out[out.length()-1] == L'^')
-          {
-            out = out.substr(0, out.length()-1); // extra ^ at the end
-            if(plus_thing)
-            {
-              out[out.size()-1] = L'+';
-              plus_thing = false;
-            }
-          }
-          else // take# out ... of
-          {
-            for(int i=out.length()-1; i>=0; i--) // indexing
-            {
-              if(out.at(i) == L'$')
-              {
-                out.insert(i+1, L" ");
-                break;
-              }
-            }
-            out += L'$';
-          }
-
-          if(blankqueue.size() > 0)
-          {
-            fputws(blankqueue.front().c_str(), output);
-            blankqueue.pop();
-          }
-
-          alt_out = L"";
-          for(int i=0; i < (int) out.size(); i++) // indexing
-          {
-            if((out.at(i) == L'$') && blankqueue.size() > 0)
-            {
-              alt_out += out.at(i);
-              alt_out += blankqueue.front().c_str();
-              blankqueue.pop();
-            }
-            else if((out.at(i) == L'$') && blankqueue.size() == 0 && i != (int) out.size()-1)
-            {
-              alt_out += out.at(i);
-              alt_out += L' ';
-            }
-            else if(out.at(i) == L' ' && blankqueue.size() > 0)
-            {
-              alt_out += blankqueue.front().c_str();
-              blankqueue.pop();
-            }
-            else
-            {
-              alt_out += out.at(i);
-            }
-          }
-          out = alt_out;
-
-          fputws(out.c_str(), output);
-          flushBlanks(output);
-          finalFound = true;
-          out = L"";
-          in = L"";
-        }
-      }
-
-      alive_states.swap(new_states);
-      outOfWord = true;
-
-      if(!finalFound)
-      {
-        in += val; //do not remove
-      }
-      continue;
-    }
-
-    if(!outOfWord) // && (!(feof(input) || val == L'$')))
-    {
-      if(val == L'<') // tag
-      {
-        wstring tag = readFullBlock(input, L'<', L'>');
-        in += tag;
-        if(!alphabet.isSymbolDefined(tag))
-        {
-          alphabet.includeSymbol(tag);
-        }
-        val = static_cast<int>(alphabet(tag));
-      }
-      else
-      {
-        in += (wchar_t) val;
-      }
-
-      new_states.clear();
-      for(vector<State>::const_iterator it = alive_states.begin(); it != alive_states.end(); it++)
-      {
-        State s = *it;
-        if(val < 0)
-        {
-          s.step_override(val, alphabet(L"<ANY_TAG>"), val);
-        }
-        else if(val > 0)
-        {
-          int val_lowercase = towlower(val);
-          s.step_override(val_lowercase, alphabet(L"<ANY_CHAR>"), val); // FIXME deal with cases! in step_override
-        }
-
-        if(s.size() > 0)
-        {
-          new_states.push_back(s);
-        }
-
-      }
-      alive_states.swap(new_states);
-    }
-  }
-
-  flushBlanks(output);
-}
-
 void
 FSTProcessor::initAnalysis()
 {
@@ -1378,12 +983,9 @@ FSTProcessor::initTMAnalysis()
 {
   calcInitial();
 
-  for(map<wstring, TransExe, Ltstr>::iterator it = transducers.begin(),
-                                             limit = transducers.end();
-      it != limit; it++)
-  {
-    all_finals.insert(it->second.getFinals().begin(),
-                      it->second.getFinals().end());
+  for(auto& it : transducers) {
+    all_finals.insert(it.second.getFinals().begin(),
+                      it.second.getFinals().end());
   }
 }
 
@@ -1392,12 +994,9 @@ FSTProcessor::initGeneration()
 {
   setIgnoredChars(false);
   calcInitial();
-  for(map<wstring, TransExe, Ltstr>::iterator it = transducers.begin(),
-                                             limit = transducers.end();
-      it != limit; it++)
-  {
-    all_finals.insert(it->second.getFinals().begin(),
-                      it->second.getFinals().end());
+  for(auto& it : transducers) {
+    all_finals.insert(it.second.getFinals().begin(),
+                      it.second.getFinals().end());
   }
 }
 
@@ -1414,8 +1013,8 @@ FSTProcessor::initBiltrans()
 }
 
 
-wstring
-FSTProcessor::compoundAnalysis(wstring input_word, bool uppercase, bool firstupper)
+UString
+FSTProcessor::compoundAnalysis(UString input_word, bool uppercase, bool firstupper)
 {
   const int MAX_COMBINATIONS = 32767;
 
@@ -1423,16 +1022,16 @@ FSTProcessor::compoundAnalysis(wstring input_word, bool uppercase, bool firstupp
 
   for(unsigned int i=0; i<input_word.size(); i++)
   {
-    wchar_t val=input_word.at(i);
+    UChar val=input_word[i];
 
     current_state.step_case(val, caseSensitive);
 
     if(current_state.size() > MAX_COMBINATIONS)
     {
-      wcerr << L"Warning: compoundAnalysis's MAX_COMBINATIONS exceeded for '" << input_word << L"'" << endl;
-      wcerr << L"         gave up at char " << i << L" '" << val << L"'." << endl;
+      cerr << "Warning: compoundAnalysis's MAX_COMBINATIONS exceeded for '" << input_word << "'" << endl;
+      cerr << "         gave up at char " << i << " '" << val << "'." << endl;
 
-      wstring nullString = L"";
+      UString nullString;
       return  nullString;
     }
 
@@ -1443,13 +1042,13 @@ FSTProcessor::compoundAnalysis(wstring input_word, bool uppercase, bool firstupp
 
     if(current_state.size()==0)
     {
-      wstring nullString = L"";
+      UString nullString;
       return nullString;
     }
   }
 
   current_state.pruneCompounds(compoundRSymbol, '+', compound_max_elements);
-  wstring result = current_state.filterFinals(all_finals, alphabet, escaped_chars, displayWeightsMode, maxAnalyses, maxWeightClasses, uppercase, firstupper);
+  UString result = current_state.filterFinals(all_finals, alphabet, escaped_chars, displayWeightsMode, maxAnalyses, maxWeightClasses, uppercase, firstupper);
 
   return result;
 }
@@ -1459,30 +1058,30 @@ FSTProcessor::compoundAnalysis(wstring input_word, bool uppercase, bool firstupp
 void
 FSTProcessor::initDecompositionSymbols()
 {
-  if((compoundOnlyLSymbol=alphabet(L"<:co:only-L>")) == 0
-     && (compoundOnlyLSymbol=alphabet(L"<:compound:only-L>")) == 0
-     && (compoundOnlyLSymbol=alphabet(L"<@co:only-L>")) == 0
-     && (compoundOnlyLSymbol=alphabet(L"<@compound:only-L>")) == 0
-     && (compoundOnlyLSymbol=alphabet(L"<compound-only-L>")) == 0)
+  if((compoundOnlyLSymbol=alphabet("<:co:only-L>"_u)) == 0
+     && (compoundOnlyLSymbol=alphabet("<:compound:only-L>"_u)) == 0
+     && (compoundOnlyLSymbol=alphabet("<@co:only-L>"_u)) == 0
+     && (compoundOnlyLSymbol=alphabet("<@compound:only-L>"_u)) == 0
+     && (compoundOnlyLSymbol=alphabet("<compound-only-L>"_u)) == 0)
   {
-    wcerr << L"Warning: Decomposition symbol <:compound:only-L> not found" << endl;
+    cerr << "Warning: Decomposition symbol <:compound:only-L> not found" << endl;
   }
   else if(!showControlSymbols)
   {
-    alphabet.setSymbol(compoundOnlyLSymbol, L"");
+    alphabet.setSymbol(compoundOnlyLSymbol, ""_u);
   }
 
-  if((compoundRSymbol=alphabet(L"<:co:R>")) == 0
-     && (compoundRSymbol=alphabet(L"<:compound:R>")) == 0
-     && (compoundRSymbol=alphabet(L"<@co:R>")) == 0
-     && (compoundRSymbol=alphabet(L"<@compound:R>")) == 0
-     && (compoundRSymbol=alphabet(L"<compound-R>")) == 0)
+  if((compoundRSymbol=alphabet("<:co:R>"_u)) == 0
+     && (compoundRSymbol=alphabet("<:compound:R>"_u)) == 0
+     && (compoundRSymbol=alphabet("<@co:R>"_u)) == 0
+     && (compoundRSymbol=alphabet("<@compound:R>"_u)) == 0
+     && (compoundRSymbol=alphabet("<compound-R>"_u)) == 0)
   {
-    wcerr << L"Warning: Decomposition symbol <:compound:R> not found" << endl;
+    cerr << "Warning: Decomposition symbol <:compound:R> not found" << endl;
   }
   else if(!showControlSymbols)
   {
-    alphabet.setSymbol(compoundRSymbol, L"");
+    alphabet.setSymbol(compoundRSymbol, ""_u);
   }
 }
 
@@ -1496,7 +1095,7 @@ FSTProcessor::initDecomposition()
 }
 
 void
-FSTProcessor::analysis(FILE *input, FILE *output)
+FSTProcessor::analysis(InputFile& input, UFILE *output)
 {
   if(getNullFlush())
   {
@@ -1507,13 +1106,13 @@ FSTProcessor::analysis(FILE *input, FILE *output)
   bool last_postblank = false;
   bool last_preblank = false;
   State current_state = initial_state;
-  wstring lf = L"";   //lexical form
-  wstring sf = L"";   //surface form
+  UString lf;   //lexical form
+  UString sf;   //surface form
   int last = 0;
   bool firstupper = false, uppercase = false;
   map<int, set<int> >::iterator rcx_map_ptr;
 
-  wchar_t val;
+  UChar32 val;
   do
   {
     val = readAnalysis(input);
@@ -1524,8 +1123,8 @@ FSTProcessor::analysis(FILE *input, FILE *output)
       {
         if(!dictionaryCase)
         {
-          firstupper = iswupper(sf[0]);
-          uppercase = firstupper && iswupper(sf[sf.size()-1]);
+          firstupper = u_isupper(sf[0]);
+          uppercase = firstupper && u_isupper(sf[sf.size()-1]);
         }
 
         if(do_decomposition && compoundOnlyLSymbol != 0)
@@ -1543,8 +1142,8 @@ FSTProcessor::analysis(FILE *input, FILE *output)
       {
         if(!dictionaryCase)
         {
-          firstupper = iswupper(sf[0]);
-          uppercase = firstupper && iswupper(sf[sf.size()-1]);
+          firstupper = u_isupper(sf[0]);
+          uppercase = firstupper && u_isupper(sf[sf.size()-1]);
         }
 
         if(do_decomposition && compoundOnlyLSymbol != 0)
@@ -1562,8 +1161,8 @@ FSTProcessor::analysis(FILE *input, FILE *output)
       {
         if(!dictionaryCase)
         {
-          firstupper = iswupper(sf[0]);
-          uppercase = firstupper && iswupper(sf[sf.size()-1]);
+          firstupper = u_isupper(sf[0]);
+          uppercase = firstupper && u_isupper(sf[sf.size()-1]);
         }
 
         if(do_decomposition && compoundOnlyLSymbol != 0)
@@ -1581,8 +1180,8 @@ FSTProcessor::analysis(FILE *input, FILE *output)
       {
         if(!dictionaryCase)
         {
-          firstupper = iswupper(sf[0]);
-          uppercase = firstupper && iswupper(sf[sf.size()-1]);
+          firstupper = u_isupper(sf[0]);
+          uppercase = firstupper && u_isupper(sf[sf.size()-1]);
         }
 
         if(do_decomposition && compoundOnlyLSymbol != 0)
@@ -1599,9 +1198,9 @@ FSTProcessor::analysis(FILE *input, FILE *output)
         last = input_buffer.getPos();
       }
     }
-    else if(sf == L"" && iswspace(val))
+    else if(sf.empty() && u_isspace(val))
     {
-      lf = L"/*";
+      lf = "/*"_u;
       lf.append(sf);
       last_postblank = false;
       last_preblank = false;
@@ -1613,11 +1212,11 @@ FSTProcessor::analysis(FILE *input, FILE *output)
     {
       rcx_map_ptr = rcx_map.find(val);
       set<int> tmpset = rcx_map_ptr->second;
-      if(!iswupper(val) || caseSensitive)
+      if(!u_isupper(val) || caseSensitive)
       {
         current_state.step(val, tmpset);
       }
-      else if(rcx_map.find(towlower(val)) != rcx_map.end())
+      else if(rcx_map.find(u_tolower(val)) != rcx_map.end())
       {
         rcx_map_ptr = rcx_map.find(tolower(val));
         tmpset.insert(tolower(val));
@@ -1632,14 +1231,7 @@ FSTProcessor::analysis(FILE *input, FILE *output)
     }
     else
     {
-      if(!iswupper(val) || caseSensitive)
-      {
-        current_state.step(val);
-      }
-      else
-      {
-        current_state.step(val, towlower(val));
-      }
+      current_state.step_case(val, caseSensitive);
     }
 
     if(current_state.size() != 0)
@@ -1651,29 +1243,29 @@ FSTProcessor::analysis(FILE *input, FILE *output)
     }
     else
     {
-      if(!isAlphabetic(val) && sf == L"")
+      if(!isAlphabetic(val) && sf.empty())
       {
-        if(iswspace(val))
+        if(u_isspace(val))
         {
           if (blankqueue.size() > 0)
           {
-            fputws_unlocked(blankqueue.front().c_str(), output);
+            write(blankqueue.front(), output);
             blankqueue.pop();
           }
           else
           {
-            fputwc_unlocked(val, output);
+            u_fputc(val, output);
           }
         }
         else
         {
           if(isEscaped(val))
           {
-            fputwc_unlocked(L'\\', output);
+            u_fputc('\\', output);
           }
           if(val)
           {
-            fputwc_unlocked(val, output);
+            u_fputc(val, output);
           }
         }
       }
@@ -1681,13 +1273,13 @@ FSTProcessor::analysis(FILE *input, FILE *output)
       {
         printWordPopBlank(sf.substr(0, sf.size()-input_buffer.diffPrevPos(last)),
                           lf, output);
-        fputwc_unlocked(L' ', output);
+        u_fputc(' ', output);
         input_buffer.setPos(last);
         input_buffer.back(1);
       }
       else if(last_preblank)
       {
-        fputwc_unlocked(L' ', output);
+        u_fputc(' ', output);
         printWordPopBlank(sf.substr(0, sf.size()-input_buffer.diffPrevPos(last)),
                           lf, output);
         input_buffer.setPos(last);
@@ -1702,7 +1294,7 @@ FSTProcessor::analysis(FILE *input, FILE *output)
       }
       else if(isAlphabetic(val) &&
               ((sf.size()-input_buffer.diffPrevPos(last)) > lastBlank(sf) ||
-               lf == L""))
+               lf.empty()))
       {
         do
         {
@@ -1712,7 +1304,7 @@ FSTProcessor::analysis(FILE *input, FILE *output)
 
         unsigned int limit = firstNotAlpha(sf);
         unsigned int size = sf.size();
-        limit = (limit == static_cast<unsigned int>(wstring::npos)?size:limit);
+        limit = (limit == static_cast<unsigned int>(UString::npos)?size:limit);
         if(limit == 0)
         {
           input_buffer.back(sf.size());
@@ -1721,18 +1313,18 @@ FSTProcessor::analysis(FILE *input, FILE *output)
         else
         {
           input_buffer.back(1+(size-limit));
-          wstring unknown_word = sf.substr(0, limit);
+          UString unknown_word = sf.substr(0, limit);
           if(do_decomposition)
           {
             if(!dictionaryCase)
             {
-              firstupper = iswupper(sf[0]);
-              uppercase = firstupper && iswupper(sf[sf.size()-1]);
+              firstupper = u_isupper(sf[0]);
+              uppercase = firstupper && u_isupper(sf[sf.size()-1]);
             }
 
-            wstring compound = L"";
+            UString compound;
             compound = compoundAnalysis(unknown_word, uppercase, firstupper);
-            if(compound != L"")
+            if(!compound.empty())
             {
               printWord(unknown_word, compound, output);
             }
@@ -1747,11 +1339,11 @@ FSTProcessor::analysis(FILE *input, FILE *output)
           }
         }
       }
-      else if(lf == L"")
+      else if(lf.empty())
       {
         unsigned int limit = firstNotAlpha(sf);
         unsigned int size = sf.size();
-        limit = (limit == static_cast<unsigned int >(wstring::npos)?size:limit);
+        limit = (limit == static_cast<unsigned int >(UString::npos)?size:limit);
         if(limit == 0)
         {
           input_buffer.back(sf.size());
@@ -1760,18 +1352,18 @@ FSTProcessor::analysis(FILE *input, FILE *output)
         else
         {
           input_buffer.back(1+(size-limit));
-          wstring unknown_word = sf.substr(0, limit);
+          UString unknown_word = sf.substr(0, limit);
           if(do_decomposition)
           {
             if(!dictionaryCase)
             {
-              firstupper = iswupper(sf[0]);
-              uppercase = firstupper && iswupper(sf[sf.size()-1]);
+              firstupper = u_isupper(sf[0]);
+              uppercase = firstupper && u_isupper(sf[sf.size()-1]);
             }
 
-            wstring compound = L"";
+            UString compound;
             compound = compoundAnalysis(unknown_word, uppercase, firstupper);
-            if(compound != L"")
+            if(!compound.empty())
             {
               printWord(unknown_word, compound, output);
             }
@@ -1801,8 +1393,8 @@ FSTProcessor::analysis(FILE *input, FILE *output)
       }
 
       current_state = initial_state;
-      lf = L"";
-      sf = L"";
+      lf.clear();
+      sf.clear();
       last_incond = false;
       last_postblank = false;
       last_preblank = false;
@@ -1815,102 +1407,82 @@ FSTProcessor::analysis(FILE *input, FILE *output)
 }
 
 void
-FSTProcessor::analysis_wrapper_null_flush(FILE *input, FILE *output)
+FSTProcessor::analysis_wrapper_null_flush(InputFile& input, UFILE *output)
 {
   setNullFlush(false);
-  while(!feof(input))
+  while(!input.eof())
   {
     analysis(input, output);
-    fputwc_unlocked(L'\0', output);
-    int code = fflush(output);
-    if(code != 0)
-    {
-        wcerr << L"Could not flush output " << errno << endl;
-    }
+    u_fputc('\0', output);
+    u_fflush(output);
   }
 }
 
 void
-FSTProcessor::generation_wrapper_null_flush(FILE *input, FILE *output,
+FSTProcessor::generation_wrapper_null_flush(InputFile& input, UFILE *output,
                                             GenerationMode mode)
 {
   setNullFlush(false);
   nullFlushGeneration = true;
 
-  while(!feof(input))
+  while(!input.eof())
   {
     generation(input, output, mode);
-    fputwc_unlocked(L'\0', output);
-    int code = fflush(output);
-    if(code != 0)
-    {
-        wcerr << L"Could not flush output " << errno << endl;
-    }
+    u_fputc('\0', output);
+    u_fflush(output);
   }
 }
 
 void
-FSTProcessor::postgeneration_wrapper_null_flush(FILE *input, FILE *output)
+FSTProcessor::postgeneration_wrapper_null_flush(InputFile& input, UFILE *output)
 {
   setNullFlush(false);
-  while(!feof(input))
+  while(!input.eof())
   {
     postgeneration(input, output);
-    fputwc_unlocked(L'\0', output);
-    int code = fflush(output);
-    if(code != 0)
-    {
-        wcerr << L"Could not flush output " << errno << endl;
-    }
+    u_fputc('\0', output);
+    u_fflush(output);
   }
 }
 
 void
-FSTProcessor::intergeneration_wrapper_null_flush(FILE *input, FILE *output)
+FSTProcessor::intergeneration_wrapper_null_flush(InputFile& input, UFILE *output)
 {
   setNullFlush(false);
-  while (!feof(input))
+  while (!input.eof())
   {
     intergeneration(input, output);
-    fputwc_unlocked(L'\0', output);
-    int code = fflush(output);
-    if (code != 0)
-    {
-      wcerr << L"Could not flush output " << errno << endl;
-    }
+    u_fputc('\0', output);
+    u_fflush(output);
   }
 }
 
 void
-FSTProcessor::transliteration_wrapper_null_flush(FILE *input, FILE *output)
+FSTProcessor::transliteration_wrapper_null_flush(InputFile& input, UFILE *output)
 {
   setNullFlush(false);
-  while(!feof(input))
+  while(!input.eof())
   {
     transliteration(input, output);
-    fputwc_unlocked(L'\0', output);
-    int code = fflush(output);
-    if(code != 0)
-    {
-        wcerr << L"Could not flush output " << errno << endl;
-    }
+    u_fputc('\0', output);
+    u_fflush(output);
   }
 }
 
 void
-FSTProcessor::tm_analysis(FILE *input, FILE *output)
+FSTProcessor::tm_analysis(InputFile& input, UFILE *output)
 {
   State current_state = initial_state;
-  wstring lf = L"";     //lexical form
-  wstring sf = L"";     //surface form
+  UString lf;     //lexical form
+  UString sf;     //surface form
   int last = 0;
 
-  while(wchar_t val = readTMAnalysis(input))
+  while(int32_t val = readTMAnalysis(input))
   {
     // test for final states
     if(current_state.isFinal(all_finals))
     {
-      if(iswpunct(val))
+      if(u_ispunct(val))
       {
         lf = current_state.filterFinalsTM(all_finals, alphabet,
                                           escaped_chars,
@@ -1919,20 +1491,13 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
         numbers.clear();
       }
     }
-    else if(sf == L"" && iswspace(val))
+    else if(sf.empty() && u_isspace(val))
     {
       lf.append(sf);
       last = input_buffer.getPos();
     }
 
-    if(!iswupper(val))
-    {
-      current_state.step(val);
-    }
-    else
-    {
-      current_state.step(val, towlower(val));
-    }
+    current_state.step_case(val, false);
 
     if(current_state.size() != 0)
     {
@@ -1940,7 +1505,7 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
       {
         sf.append(numbers[numbers.size()-1]);
       }
-      else if(isLastBlankTM && val == L' ')
+      else if(isLastBlankTM && val == ' ')
       {
         sf.append(blankqueue.back());
       }
@@ -1951,9 +1516,9 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
     }
     else
     {
-      if((iswspace(val) || iswpunct(val)) && sf == L"")
+      if((u_isspace(val) || u_ispunct(val)) && sf.empty())
       {
-        if(iswspace(val))
+        if(u_isspace(val))
         {
           printSpace(val, output);
         }
@@ -1961,14 +1526,14 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
         {
           if(isEscaped(val))
           {
-            fputwc_unlocked(L'\\', output);
+            u_fputc('\\', output);
           }
-          fputwc_unlocked(val, output);
+          u_fputc(val, output);
         }
       }
-      else if(!iswspace(val) && !iswpunct(val) &&
+      else if(!u_isspace(val) && !u_ispunct(val) &&
               ((sf.size()-input_buffer.diffPrevPos(last)) > lastBlank(sf) ||
-               lf == L""))
+               lf.empty()))
       {
 
         do
@@ -1977,7 +1542,7 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
           {
             sf.append(numbers[numbers.size()-1]);
           }
-          else if(isLastBlankTM && val == L' ')
+          else if(isLastBlankTM && val == ' ')
           {
             sf.append(blankqueue.back());
           }
@@ -1986,16 +1551,16 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
             alphabet.getSymbol(sf, val);
           }
         }
-        while((val = readTMAnalysis(input)) && !iswspace(val) && !iswpunct(val));
+        while((val = readTMAnalysis(input)) && !u_isspace(val) && !u_ispunct(val));
 
         if(val == 0)
         {
-          fputws_unlocked(sf.c_str(), output);
+          write(sf, output);
           return;
         }
 
         input_buffer.back(1);
-        fputws_unlocked(sf.c_str(), output);
+        write(sf, output);
 
         while(blankqueue.size() > 0)
         {
@@ -2007,22 +1572,22 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
         }
 
 /*
-        unsigned int limit = sf.find(L' ');
+        unsigned int limit = sf.find(' ');
         unsigned int size = sf.size();
-        limit = (limit == static_cast<unsigned int>(wstring::npos)?size:limit);
+        limit = (limit == static_cast<unsigned int>(UString::npos)?size:limit);
         input_buffer.back(1+(size-limit));
-        fputws_unlocked(sf.substr(0, limit).c_str(), output);
+        write(sf.substr(0, limit), output);
 */      }
-      else if(lf == L"")
+      else if(lf.empty())
       {
-/*        unsigned int limit = sf.find(L' ');
+/*        unsigned int limit = sf.find(' ');
         unsigned int size = sf.size();
-        limit = (limit == static_cast<unsigned int >(wstring::npos)?size:limit);
+        limit = (limit == static_cast<unsigned int >(UString::npos)?size:limit);
         input_buffer.back(1+(size-limit));
-        fputws_unlocked(sf.substr(0, limit).c_str(), output);
+        write(sf.substr(0, limit), output);
 */
         input_buffer.back(1);
-        fputws_unlocked(sf.c_str(), output);
+        write(sf, output);
 
         while(blankqueue.size() > 0)
         {
@@ -2036,16 +1601,14 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
       }
       else
       {
-        fputwc_unlocked(L'[', output);
-        fputws_unlocked(lf.c_str(), output);
-        fputwc_unlocked(L']', output);
+        u_fprintf(output, "[%S]", lf.c_str());
         input_buffer.setPos(last);
         input_buffer.back(1);
       }
 
       current_state = initial_state;
-      lf = L"";
-      sf = L"";
+      lf.clear();
+      sf.clear();
     }
   }
 
@@ -2055,7 +1618,7 @@ FSTProcessor::tm_analysis(FILE *input, FILE *output)
 
 
 void
-FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
+FSTProcessor::generation(InputFile& input, UFILE *output, GenerationMode mode)
 {
   if(getNullFlush())
   {
@@ -2063,24 +1626,24 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
   }
 
   State current_state = initial_state;
-  wstring sf = L"";
+  UString sf;
 
   outOfWord = false;
 
-  skipUntil(input, output, L'^');
+  skipUntil(input, output, '^');
   int val;
 
   while((val = readGeneration(input, output)) != 0x7fffffff)
   {
-    if(sf == L"" && val == L'=')
+    if(sf.empty() && val == '=')
     {
-      fputwc(L'=', output);
+      u_fputc('=', output);
       val = readGeneration(input, output);
     }
 
-    if(val == L'$' && outOfWord)
+    if(val == '$' && outOfWord)
     {
-      if(sf[0] == L'*' || sf[0] == L'%')
+      if(sf[0] == '*' || sf[0] == '%')
       {
         if(mode != gm_clean && mode != gm_tagged_nm)
         {
@@ -2092,14 +1655,14 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
         }
         else if(mode == gm_tagged_nm)
         {
-          fputwc_unlocked(L'^', output);
+          u_fputc('^', output);
           writeEscaped(removeTags(sf.substr(1)), output);
-          fputwc_unlocked(L'/', output);
+          u_fputc('/', output);
           writeEscapedWithTags(sf, output);
-          fputwc_unlocked(L'$', output);
+          u_fputc('$', output);
         }
       }
-      else if(sf[0] == L'@')
+      else if(sf[0] == '@')
       {
         if(mode == gm_all)
         {
@@ -2119,11 +1682,11 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
         }
         else if(mode == gm_tagged_nm)
         {
-          fputwc_unlocked(L'^', output);
+          u_fputc('^', output);
           writeEscaped(removeTags(sf.substr(1)), output);
-          fputwc_unlocked(L'/', output);
+          u_fputc('/', output);
           writeEscapedWithTags(sf, output);
-          fputwc_unlocked(L'$', output);
+          u_fputc('$', output);
         }
       }
       else if(current_state.isFinal(all_finals))
@@ -2131,24 +1694,24 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
         bool firstupper = false, uppercase = false;
         if(!dictionaryCase)
         {
-          uppercase = sf.size() > 1 && iswupper(sf[1]);
-          firstupper= iswupper(sf[0]);
+          uppercase = sf.size() > 1 && u_isupper(sf[1]);
+          firstupper= u_isupper(sf[0]);
         }
 
         if(mode == gm_tagged || mode == gm_tagged_nm)
         {
-          fputwc_unlocked(L'^', output);
+          u_fputc('^', output);
         }
 
-        fputws_unlocked(current_state.filterFinals(all_finals, alphabet,
-                                                   escaped_chars,
-                                                   displayWeightsMode, maxAnalyses, maxWeightClasses,
-                                                   uppercase, firstupper).substr(1).c_str(), output);
+        write(current_state.filterFinals(all_finals, alphabet,
+                                         escaped_chars,
+                                         displayWeightsMode, maxAnalyses, maxWeightClasses,
+                                         uppercase, firstupper).substr(1), output);
         if(mode == gm_tagged || mode == gm_tagged_nm)
         {
-          fputwc_unlocked(L'/', output);
+          u_fputc('/', output);
           writeEscapedWithTags(sf, output);
-          fputwc_unlocked(L'$', output);
+          u_fputc('$', output);
         }
 
       }
@@ -2156,7 +1719,7 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
       {
         if(mode == gm_all)
         {
-          fputwc_unlocked(L'#', output);
+          u_fputc('#', output);
           writeEscaped(sf, output);
         }
         else if(mode == gm_clean)
@@ -2165,36 +1728,36 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
         }
         else if(mode == gm_unknown)
         {
-          if(sf != L"")
+          if(!sf.empty())
           {
-            fputwc_unlocked(L'#', output);
+            u_fputc('#', output);
             writeEscaped(removeTags(sf), output);
           }
         }
         else if(mode == gm_tagged)
         {
-          fputwc_unlocked(L'#', output);
+          u_fputc('#', output);
           writeEscaped(removeTags(sf), output);
         }
         else if(mode == gm_tagged_nm)
         {
-          fputwc_unlocked(L'^', output);
+          u_fputc('^', output);
           writeEscaped(removeTags(sf), output);
-          fputwc_unlocked(L'/', output);
-          fputwc_unlocked(L'#', output);
+          u_fputc('/', output);
+          u_fputc('#', output);
           writeEscapedWithTags(sf, output);
-          fputwc_unlocked(L'$', output);
+          u_fputc('$', output);
         }
       }
 
       current_state = initial_state;
-      sf = L"";
+      sf.clear();
     }
-    else if(iswspace(val) && sf.size() == 0)
+    else if(u_isspace(val) && sf.size() == 0)
     {
       // do nothing
     }
-    else if(sf.size() > 0 && (sf[0] == L'*' || sf[0] == L'%' ))
+    else if(sf.size() > 0 && (sf[0] == '*' || sf[0] == '%' ))
     {
       alphabet.getSymbol(sf, val);
     }
@@ -2203,15 +1766,15 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
       alphabet.getSymbol(sf,val);
       if(current_state.size() > 0)
       {
-        if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
+        if(!alphabet.isTag(val) && u_isupper(val) && !caseSensitive)
         {
           if(mode == gm_carefulcase)
           {
-            current_state.step_careful(val, towlower(val));
+            current_state.step_careful(val, u_tolower(val));
           }
           else
           {
-            current_state.step(val, towlower(val));
+            current_state.step(val, u_tolower(val));
           }
         }
         else
@@ -2224,7 +1787,7 @@ FSTProcessor::generation(FILE *input, FILE *output, GenerationMode mode)
 }
 
 void
-FSTProcessor::postgeneration(FILE *input, FILE *output)
+FSTProcessor::postgeneration(InputFile& input, UFILE *output)
 {
   if(getNullFlush())
   {
@@ -2235,14 +1798,14 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
   collect_wblanks = false;
   need_end_wblank = false;
   State current_state = initial_state;
-  wstring lf = L"";
-  wstring sf = L"";
+  UString lf;
+  UString sf;
   int last = 0;
-  set<wchar_t> empty_escaped_chars;
+  set<UChar32> empty_escaped_chars;
 
-  while(wchar_t val = readPostgeneration(input, output))
+  while(UChar val = readPostgeneration(input, output))
   {
-    if(val == L'~')
+    if(val == '~')
     {
       skip_mode = false;
       collect_wblanks = true;
@@ -2254,11 +1817,11 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
     }
     else if(skip_mode)
     {
-      if(iswspace(val))
+      if(u_isspace(val))
       {
         if(need_end_wblank)
         {
-          fputws_unlocked(L"[[/]]", output);
+          write(WBLANK_FINAL, output);
           need_end_wblank = false;
         }
 
@@ -2273,13 +1836,13 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
 
         if(isEscaped(val))
         {
-          fputwc_unlocked(L'\\', output);
+          u_fputc('\\', output);
         }
-        fputwc_unlocked(val, output);
+        u_fputc(val, output);
 
         if(need_end_wblank)
         {
-          fputws_unlocked(L"[[/]]", output);
+          write(WBLANK_FINAL, output);
           need_end_wblank = false;
         }
       }
@@ -2294,8 +1857,8 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
       // test for final states
       if(current_state.isFinal(all_finals))
       {
-        bool firstupper = iswupper(sf[1]);
-        bool uppercase = sf.size() > 1 && firstupper && iswupper(sf[2]);
+        bool firstupper = u_isupper(sf[1]);
+        bool uppercase = sf.size() > 1 && firstupper && u_isupper(sf[2]);
         lf = current_state.filterFinals(all_finals, alphabet,
                                         empty_escaped_chars,
                                         displayWeightsMode, maxAnalyses, maxWeightClasses,
@@ -2303,7 +1866,7 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
 
         // case of the beggining of the next word
 
-        wstring mybuf = L"";
+        UString mybuf;
         for(size_t i = sf.size(); i > 0; --i)
         {
           if(!isalpha(sf[i-1]))
@@ -2318,8 +1881,8 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
 
         if(mybuf.size() > 0)
         {
-          bool myfirstupper = iswupper(mybuf[0]);
-          bool myuppercase = mybuf.size() > 1 && iswupper(mybuf[1]);
+          bool myfirstupper = u_isupper(mybuf[0]);
+          bool myuppercase = mybuf.size() > 1 && u_isupper(mybuf[1]);
 
           for(size_t i = lf.size(); i > 0; --i)
           {
@@ -2327,11 +1890,11 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
             {
               if(myfirstupper && i != lf.size())
               {
-                lf[i] = towupper(lf[i]);
+                lf[i] = u_toupper(lf[i]);
               }
               else
               {
-                lf[i] = towlower(lf[i]);
+                lf[i] = u_tolower(lf[i]);
               }
               break;
             }
@@ -2339,11 +1902,11 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
             {
               if(myuppercase)
               {
-                lf[i-1] = towupper(lf[i-1]);
+                lf[i-1] = u_toupper(lf[i-1]);
               }
               else
               {
-                lf[i-1] = towlower(lf[i-1]);
+                lf[i-1] = u_tolower(lf[i-1]);
               }
             }
           }
@@ -2352,14 +1915,7 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
         last = input_buffer.getPos();
       }
 
-      if(!iswupper(val) || caseSensitive)
-      {
-        current_state.step(val);
-      }
-      else
-      {
-        current_state.step(val, towlower(val));
-      }
+      current_state.step_case(val, caseSensitive);
 
       if(current_state.size() != 0)
       {
@@ -2367,51 +1923,51 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
       }
       else
       {
-        wstring final_wblank = combineWblanks();
-        fputws_unlocked(final_wblank.c_str(), output);
+        UString final_wblank = combineWblanks();
+        write(final_wblank, output);
 
-        if(lf == L"")
+        if(lf.empty())
         {
           unsigned int mark = sf.size();
           unsigned int space_index = sf.size();
-          
+
           for(unsigned int i = 1, limit = sf.size(); i < limit; i++)
           {
-            if(sf[i] == L'~')
+            if(sf[i] == '~')
             {
               mark = i;
               break;
             }
-            else if(sf[i] == L' ')
+            else if(sf[i] == ' ')
             {
               space_index = i;
             }
           }
-          
+
           if(space_index != sf.size())
           {
-            fputws_unlocked(sf.substr(1, space_index-1).c_str(), output);
-            
+            write(sf.substr(1, space_index-1), output);
+
             if(need_end_wblank)
             {
-              fputws_unlocked(L"[[/]]", output);
+              write(WBLANK_FINAL, output);
               need_end_wblank = false;
-              fputwc_unlocked(sf[space_index], output);
+              u_fputc(sf[space_index], output);
               flushWblanks(output);
             }
             else
             {
-              fputwc_unlocked(sf[space_index], output);
+              u_fputc(sf[space_index], output);
             }
-            
-            fputws_unlocked(sf.substr(space_index+1, mark-space_index-1).c_str(), output);
+
+            write(sf.substr(space_index+1, mark-space_index-1), output);
           }
           else
           {
             flushWblanks(output);
-            fputws_unlocked(sf.substr(1, mark-1).c_str(), output);
+            write(sf.substr(1, mark-1), output);
           }
-          
+
           if(mark == sf.size())
           {
             input_buffer.back(1);
@@ -2423,11 +1979,11 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
         }
         else
         {
-          fputws_unlocked(lf.substr(1,lf.size()-3).c_str(), output);
+          write(lf.substr(1,lf.size()-3), output);
           input_buffer.setPos(last);
           input_buffer.back(2);
           val = lf[lf.size()-2];
-          if(iswspace(val))
+          if(u_isspace(val))
           {
             printSpace(val, output);
           }
@@ -2435,15 +1991,15 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
           {
             if(isEscaped(val))
             {
-              fputwc_unlocked(L'\\', output);
+              u_fputc('\\', output);
             }
-            fputwc_unlocked(val, output);
+            u_fputc(val, output);
           }
         }
 
         current_state = initial_state;
-        lf = L"";
-        sf = L"";
+        lf.clear();
+        sf.clear();
         skip_mode = true;
         collect_wblanks = false;
       }
@@ -2455,7 +2011,7 @@ FSTProcessor::postgeneration(FILE *input, FILE *output)
 }
 
 void
-FSTProcessor::intergeneration(FILE *input, FILE *output)
+FSTProcessor::intergeneration(InputFile& input, UFILE *output)
 {
   if (getNullFlush())
   {
@@ -2464,35 +2020,35 @@ FSTProcessor::intergeneration(FILE *input, FILE *output)
 
   bool skip_mode = true;
   State current_state = initial_state;
-  wstring target = L"";
-  wstring source = L"";
+  UString target;
+  UString source;
   int last = 0;
-  set<wchar_t> empty_escaped_chars;
+  set<UChar32> empty_escaped_chars;
 
   while (true)
   {
-    wchar_t val = readPostgeneration(input, output);
+    UChar val = readPostgeneration(input, output);
 
-    if (val == L'~')
+    if (val == '~')
     {
       skip_mode = false;
     }
 
     if (skip_mode)
     {
-      if (iswspace(val))
+      if (u_isspace(val))
       {
         printSpace(val, output);
       }
       else
       {
-        if(val != L'\0')
+        if(val != '\0')
         {
           if (isEscaped(val))
           {
-            fputwc_unlocked(L'\\', output);
+            u_fputc('\\', output);
           }
-          fputwc_unlocked(val, output);
+          u_fputc(val, output);
         }
       }
     }
@@ -2501,8 +2057,8 @@ FSTProcessor::intergeneration(FILE *input, FILE *output)
       // test for final states
       if (current_state.isFinal(all_finals))
       {
-        bool firstupper = iswupper(source[1]);
-        bool uppercase = source.size() > 1 && firstupper && iswupper(source[2]);
+        bool firstupper = u_isupper(source[1]);
+        bool uppercase = source.size() > 1 && firstupper && u_isupper(source[2]);
         target = current_state.filterFinals(all_finals, alphabet,
                                         empty_escaped_chars,
                                         displayWeightsMode, maxAnalyses, maxWeightClasses,
@@ -2511,39 +2067,32 @@ FSTProcessor::intergeneration(FILE *input, FILE *output)
         last = input_buffer.getPos();
       }
 
-      if (val != L'\0')
+      if (val != '\0')
       {
-        if (!iswupper(val) || caseSensitive)
-        {
-          current_state.step(val);
-        }
-        else
-        {
-          current_state.step(val, towlower(val));
-        }
+        current_state.step_case(val, caseSensitive);
       }
 
-      if (val != L'\0' && current_state.size() != 0)
+      if (val != '\0' && current_state.size() != 0)
       {
         alphabet.getSymbol(source, val);
       }
       else
       {
-        if (target == L"") // no match
+        if (target.empty()) // no match
         {
-          if (val == L'\0')
+          if (val == '\0')
           {
             // flush source
-            fputws_unlocked(source.c_str(), output);
+            write(source, output);
           }
           else
           {
-            fputwc_unlocked(source[0], output);
+            u_fputc(source[0], output);
 
             unsigned int mark, limit;
-            for (mark = 1, limit = source.size(); mark < limit && source[mark] != L'~' ; mark++)
+            for (mark = 1, limit = source.size(); mark < limit && source[mark] != '~' ; mark++)
             {
-              fputwc_unlocked(source[mark], output);
+              u_fputc(source[mark], output);
             }
 
             if (mark != source.size())
@@ -2552,20 +2101,20 @@ FSTProcessor::intergeneration(FILE *input, FILE *output)
               input_buffer.back(back);
             }
 
-            if (val == L'~')
+            if (val == '~')
             {
               input_buffer.back(1);
             } else {
-               fputwc_unlocked(val, output);
+               u_fputc(val, output);
             }
           }
         }
         else
         {
           for(unsigned int i=1; i<target.size(); i++) {
-            wchar_t c = target[i];
+            UChar c = target[i];
 
-            if (iswspace(c))
+            if (u_isspace(c))
             {
               printSpace(c, output);
             }
@@ -2573,13 +2122,13 @@ FSTProcessor::intergeneration(FILE *input, FILE *output)
             {
               if (isEscaped(c))
               {
-                fputwc_unlocked(L'\\', output);
+                u_fputc('\\', output);
               }
-              fputwc_unlocked(c, output);
+              u_fputc(c, output);
             }
           }
 
-          if (val != L'\0')
+          if (val != '\0')
           {
             input_buffer.setPos(last);
             input_buffer.back(1);
@@ -2587,13 +2136,13 @@ FSTProcessor::intergeneration(FILE *input, FILE *output)
         }
 
         current_state = initial_state;
-        target = L"";
-        source = L"";
+        target.clear();
+        source.clear();
         skip_mode = true;
       }
     }
 
-    if (val == L'\0')
+    if (val == '\0')
     {
       break;
     }
@@ -2604,7 +2153,7 @@ FSTProcessor::intergeneration(FILE *input, FILE *output)
 }
 
 void
-FSTProcessor::transliteration(FILE *input, FILE *output)
+FSTProcessor::transliteration(InputFile& input, UFILE *output)
 {
   if(getNullFlush())
   {
@@ -2612,27 +2161,27 @@ FSTProcessor::transliteration(FILE *input, FILE *output)
   }
 
   State current_state = initial_state;
-  wstring lf = L"";
-  wstring sf = L"";
+  UString lf;
+  UString sf;
   int last = 0;
 
-  while(wchar_t val = readPostgeneration(input, output))
+  while(UChar val = readPostgeneration(input, output))
   {
-    if(iswpunct(val) || iswspace(val))
+    if(u_ispunct(val) || u_isspace(val))
     {
-      bool firstupper = iswupper(sf[1]);
-      bool uppercase = sf.size() > 1 && firstupper && iswupper(sf[2]);
+      bool firstupper = u_isupper(sf[1]);
+      bool uppercase = sf.size() > 1 && firstupper && u_isupper(sf[2]);
       lf = current_state.filterFinals(all_finals, alphabet, escaped_chars,
                                       displayWeightsMode, maxAnalyses, maxWeightClasses,
                                       uppercase, firstupper, 0);
       if(!lf.empty())
       {
-        fputws_unlocked(lf.substr(1).c_str(), output);
+        write(lf.substr(1), output);
         current_state = initial_state;
-        lf = L"";
-        sf = L"";
+        lf.clear();
+        sf.clear();
       }
-      if(iswspace(val))
+      if(u_isspace(val))
       {
         printSpace(val, output);
       }
@@ -2640,17 +2189,17 @@ FSTProcessor::transliteration(FILE *input, FILE *output)
       {
         if(isEscaped(val))
         {
-          fputwc_unlocked(L'\\', output);
+          u_fputc('\\', output);
         }
-        fputwc_unlocked(val, output);
+        u_fputc(val, output);
       }
     }
     else
     {
       if(current_state.isFinal(all_finals))
       {
-        bool firstupper = iswupper(sf[1]);
-        bool uppercase = sf.size() > 1 && firstupper && iswupper(sf[2]);
+        bool firstupper = u_isupper(sf[1]);
+        bool uppercase = sf.size() > 1 && firstupper && u_isupper(sf[2]);
         lf = current_state.filterFinals(all_finals, alphabet, escaped_chars,
                                         displayWeightsMode, maxAnalyses, maxWeightClasses,
                                         uppercase, firstupper, 0);
@@ -2666,14 +2215,14 @@ FSTProcessor::transliteration(FILE *input, FILE *output)
       {
         if(!lf.empty())
         {
-          fputws_unlocked(lf.substr(1).c_str(), output);
+          write(lf.substr(1), output);
           input_buffer.setPos(last);
           input_buffer.back(1);
           val = lf[lf.size()-1];
         }
         else
         {
-          if(iswspace(val))
+          if(u_isspace(val))
           {
             printSpace(val, output);
           }
@@ -2681,14 +2230,14 @@ FSTProcessor::transliteration(FILE *input, FILE *output)
           {
             if(isEscaped(val))
             {
-              fputwc_unlocked(L'\\', output);
+              u_fputc('\\', output);
             }
-            fputwc_unlocked(val, output);
+            u_fputc(val, output);
           }
         }
         current_state = initial_state;
-        lf = L"";
-        sf = L"";
+        lf.clear();
+        sf.clear();
       }
     }
   }
@@ -2696,14 +2245,14 @@ FSTProcessor::transliteration(FILE *input, FILE *output)
   flushBlanks(output);
 }
 
-wstring
-FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
+UString
+FSTProcessor::biltransfull(UString const &input_word, bool with_delim)
 {
   State current_state = initial_state;
-  wstring result = L"";
+  UString result;
   unsigned int start_point = 1;
   unsigned int end_point = input_word.size()-2;
-  wstring queue = L"";
+  UString queue;
   bool mark = false;
 
   if(with_delim == false)
@@ -2712,37 +2261,37 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
     end_point = input_word.size()-1;
   }
 
-  if(input_word[start_point] == L'*')
+  if(input_word[start_point] == '*')
   {
     return input_word;
   }
 
-  if(input_word[start_point] == L'=')
+  if(input_word[start_point] == '=')
   {
     start_point++;
     mark = true;
   }
 
-  bool firstupper = iswupper(input_word[start_point]);
-  bool uppercase = firstupper && iswupper(input_word[start_point+1]);
+  bool firstupper = u_isupper(input_word[start_point]);
+  bool uppercase = firstupper && u_isupper(input_word[start_point+1]);
 
   for(unsigned int i = start_point; i <= end_point; i++)
   {
     int val;
-    wstring symbol = L"";
+    UString symbol;
 
-    if(input_word[i] == L'\\')
+    if(input_word[i] == '\\')
     {
       i++;
-      val = static_cast<int>(input_word[i]);
+      val = static_cast<int32_t>(input_word[i]);
     }
-    else if(input_word[i] == L'<')
+    else if(input_word[i] == '<')
     {
-      symbol = L'<';
+      symbol = '<';
       for(unsigned int j = i + 1; j <= end_point; j++)
       {
         symbol += input_word[j];
-        if(input_word[j] == L'>')
+        if(input_word[j] == '>')
         {
           i = j;
           break;
@@ -2752,13 +2301,13 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
     }
     else
     {
-      val = static_cast<int>(input_word[i]);
+      val = static_cast<int32_t>(input_word[i]);
     }
     if(current_state.size() != 0)
     {
-      if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
+      if(!alphabet.isTag(val) && u_isupper(val) && !caseSensitive)
       {
-        current_state.step(val, towlower(val));
+        current_state.step(val, u_tolower(val));
       }
       else
       {
@@ -2767,37 +2316,22 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
     }
     if(current_state.isFinal(all_finals))
     {
-      result = current_state.filterFinals(all_finals, alphabet,
-                                          escaped_chars,
-                                          displayWeightsMode, maxAnalyses, maxWeightClasses,
-                                          uppercase, firstupper, 0);
-      if(with_delim)
-      {
-        if(mark)
-        {
-          result = L"^="+result.substr(1);
-        }
-        else
-        {
-          result[0] = L'^';
-        }
+      result.clear();
+      if(with_delim) {
+        result += '^';
       }
-      else
-      {
-        if(mark)
-        {
-          result = L"=" + result.substr(1);
-        }
-        else
-        {
-          result = result.substr(1);
-        }
+      if(mark) {
+        result += '=';
       }
+      result += current_state.filterFinals(all_finals, alphabet,
+                                           escaped_chars,
+                                           displayWeightsMode, maxAnalyses, maxWeightClasses,
+                                           uppercase, firstupper, 0).substr(1);
     }
 
     if(current_state.size() == 0)
     {
-      if(symbol != L"" && result != L"")
+      if(!symbol.empty() && !result.empty())
       {
         queue.append(symbol);
       }
@@ -2806,11 +2340,11 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
         // word is not present
         if(with_delim)
         {
-          result = L"^@" + input_word.substr(1);
+          result = "^@"_u + input_word.substr(1);
         }
         else
         {
-          result = L"@" + input_word;
+          result = "@"_u + input_word;
         }
         return result;
       }
@@ -2819,23 +2353,23 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
 
   if(start_point < (end_point - 3))
   {
-    return L"^$";
+    return "^$"_u;
   }
   // attach unmatched queue automatically
 
-  if(queue != L"")
+  if(!queue.empty())
   {
-    wstring result_with_queue = L"";
+    UString result_with_queue;
     for(unsigned int i = 0, limit = result.size(); i != limit; i++)
     {
       switch(result[i])
       {
-        case L'\\':
-          result_with_queue += L'\\';
+        case '\\':
+          result_with_queue += '\\';
           i++;
           break;
 
-        case L'/':
+        case '/':
           result_with_queue.append(queue);
           break;
 
@@ -2848,7 +2382,7 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
 
     if(with_delim)
     {
-      result_with_queue += L'$';
+      result_with_queue += '$';
     }
     return result_with_queue;
   }
@@ -2856,7 +2390,7 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
   {
     if(with_delim)
     {
-      result += L'$';
+      result += '$';
     }
     return result;
   }
@@ -2864,14 +2398,14 @@ FSTProcessor::biltransfull(wstring const &input_word, bool with_delim)
 
 
 
-wstring
-FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
+UString
+FSTProcessor::biltrans(UString const &input_word, bool with_delim)
 {
   State current_state = initial_state;
-  wstring result = L"";
+  UString result;
   unsigned int start_point = 1;
   unsigned int end_point = input_word.size()-2;
-  wstring queue = L"";
+  UString queue;
   bool mark = false;
 
   if(with_delim == false)
@@ -2880,37 +2414,37 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
     end_point = input_word.size()-1;
   }
 
-  if(input_word[start_point] == L'*')
+  if(input_word[start_point] == '*')
   {
     return input_word;
   }
 
-  if(input_word[start_point] == L'=')
+  if(input_word[start_point] == '=')
   {
     start_point++;
     mark = true;
   }
 
-  bool firstupper = iswupper(input_word[start_point]);
-  bool uppercase = firstupper && iswupper(input_word[start_point+1]);
+  bool firstupper = u_isupper(input_word[start_point]);
+  bool uppercase = firstupper && u_isupper(input_word[start_point+1]);
 
   for(unsigned int i = start_point; i <= end_point; i++)
   {
     int val;
-    wstring symbol = L"";
+    UString symbol;
 
-    if(input_word[i] == L'\\')
+    if(input_word[i] == '\\')
     {
       i++;
-      val = static_cast<int>(input_word[i]);
+      val = static_cast<int32_t>(input_word[i]);
     }
-    else if(input_word[i] == L'<')
+    else if(input_word[i] == '<')
     {
-      symbol = L'<';
+      symbol = '<';
       for(unsigned int j = i + 1; j <= end_point; j++)
       {
         symbol += input_word[j];
-        if(input_word[j] == L'>')
+        if(input_word[j] == '>')
         {
           i = j;
           break;
@@ -2920,13 +2454,13 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
     }
     else
     {
-      val = static_cast<int>(input_word[i]);
+      val = static_cast<int32_t>(input_word[i]);
     }
     if(current_state.size() != 0)
     {
-      if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
+      if(!alphabet.isTag(val) && u_isupper(val) && !caseSensitive)
       {
-        current_state.step(val, towlower(val));
+        current_state.step(val, u_tolower(val));
       }
       else
       {
@@ -2935,37 +2469,22 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
     }
     if(current_state.isFinal(all_finals))
     {
-      result = current_state.filterFinals(all_finals, alphabet,
-                                          escaped_chars,
-                                          displayWeightsMode, maxAnalyses, maxWeightClasses,
-                                          uppercase, firstupper, 0);
-      if(with_delim)
-      {
-        if(mark)
-        {
-          result = L"^="+result.substr(1);
-        }
-        else
-        {
-          result[0] = L'^';
-        }
+      result.clear();
+      if (with_delim) {
+        result += '^';
       }
-      else
-      {
-        if(mark)
-        {
-          result = L"=" + result.substr(1);
-        }
-        else
-        {
-          result = result.substr(1);
-        }
+      if (mark) {
+        result += '=';
       }
+      result += current_state.filterFinals(all_finals, alphabet,
+                                           escaped_chars,
+                                           displayWeightsMode, maxAnalyses, maxWeightClasses,
+                                           uppercase, firstupper, 0).substr(1);
     }
 
     if(current_state.size() == 0)
     {
-      if(symbol != L"" && result != L"")
+      if(!symbol.empty() && !result.empty())
       {
         queue.append(symbol);
       }
@@ -2974,11 +2493,11 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
         // word is not present
         if(with_delim)
         {
-          result = L"^@" + input_word.substr(1);
+          result = "^@"_u + input_word.substr(1);
         }
         else
         {
-          result = L"@" + input_word;
+          result = "@"_u + input_word;
         }
         return result;
       }
@@ -2987,19 +2506,19 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
 
   // attach unmatched queue automatically
 
-  if(queue != L"")
+  if(!queue.empty())
   {
-    wstring result_with_queue = L"";
+    UString result_with_queue;
     for(unsigned int i = 0, limit = result.size(); i != limit; i++)
     {
       switch(result[i])
       {
-        case L'\\':
-          result_with_queue += L'\\';
+        case '\\':
+          result_with_queue += '\\';
           i++;
           break;
 
-        case L'/':
+        case '/':
           result_with_queue.append(queue);
           break;
 
@@ -3012,7 +2531,7 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
 
     if(with_delim)
     {
-      result_with_queue += L'$';
+      result_with_queue += '$';
     }
     return result_with_queue;
   }
@@ -3020,54 +2539,53 @@ FSTProcessor::biltrans(wstring const &input_word, bool with_delim)
   {
     if(with_delim)
     {
-      result += L'$';
+      result += '$';
     }
     return result;
   }
 }
 
 void
-FSTProcessor::bilingual_wrapper_null_flush(FILE *input, FILE *output, GenerationMode mode)
+FSTProcessor::bilingual_wrapper_null_flush(InputFile& input, UFILE *output, GenerationMode mode)
 {
   setNullFlush(false);
   nullFlushGeneration = true;
 
-  while(!feof(input))
+  while(!input.eof())
   {
     bilingual(input, output, mode);
-    fputwc_unlocked(L'\0', output);
-    int code = fflush(output);
-    if(code != 0)
-    {
-        wcerr << L"Could not flush output " << errno << endl;
-    }
+    u_fputc('\0', output);
+    u_fflush(output);
   }
 }
 
-wstring
-FSTProcessor::compose(wstring const &lexforms, wstring const &queue) const
+UString
+FSTProcessor::compose(UString const &lexforms, UString const &queue) const
 {
-  wstring result = L"";
+  UString result;
+  result.reserve(lexforms.size() + 2 * queue.size());
+  result += '/';
 
   for(unsigned int i = 1; i< lexforms.size(); i++)
   {
-    if(lexforms[i] == L'\\')
+    if(lexforms[i] == '\\')
     {
-      result += L'\\';
+      result += '\\';
       i++;
     }
-    else if(lexforms[i] == L'/')
+    else if(lexforms[i] == '/')
     {
       result.append(queue);
     }
     result += lexforms[i];
   }
 
-  return L"/" + result + queue;
+  result += queue;
+  return result;
 }
 
 void
-FSTProcessor::bilingual(FILE *input, FILE *output, GenerationMode mode)
+FSTProcessor::bilingual(InputFile& input, UFILE *output, GenerationMode mode)
 {
   if(getNullFlush())
   {
@@ -3075,20 +2593,20 @@ FSTProcessor::bilingual(FILE *input, FILE *output, GenerationMode mode)
   }
 
   State current_state = initial_state;
-  wstring sf = L"";                   // source language analysis
-  wstring queue = L"";                // symbols to be added to each target
-  wstring result = L"";               // result of looking up analysis in bidix
+  UString sf;                   // source language analysis
+  UString queue;                // symbols to be added to each target
+  UString result;               // result of looking up analysis in bidix
 
   outOfWord = false;
 
-  skipUntil(input, output, L'^');
-  pair<wstring,int> tr;           // readBilingual return value, containing:
+  skipUntil(input, output, '^');
+  pair<UString,int> tr;           // readBilingual return value, containing:
   int val;                        // the alphabet value of current symbol, and
-  wstring symbol = L"";           // the current symbol as a string
+  UString symbol;           // the current symbol as a string
   bool seentags = false;          // have we seen any tags at all in the analysis?
 
   bool seensurface = false;
-  wstring surface = L"";
+  UString surface;
 
   while(true)                   // ie. while(val != 0x7fffffff)
   {
@@ -3096,17 +2614,17 @@ FSTProcessor::bilingual(FILE *input, FILE *output, GenerationMode mode)
     symbol = tr.first;
     val = tr.second;
 
-    //fwprintf(stderr, L"> %ls : %lc : %d\n", tr.first.c_str(), tr.second, tr.second);
+    //fprintf(stderr, "> %ls : %lc : %d\n", tr.first.c_str(), tr.second, tr.second);
     if(biltransSurfaceForms && !seensurface && !outOfWord)
     {
-      while(val != L'/' && val != 0x7fffffff)
+      while(val != '/' && val != 0x7fffffff)
       {
         surface = surface + symbol;
         alphabet.getSymbol(surface, val);
         tr = readBilingual(input, output);
         symbol = tr.first;
         val = tr.second;
-        //fwprintf(stderr, L" == %ls : %lc : %d => %ls\n", symbol.c_str(), val, val, surface.c_str());
+        //fprintf(stderr, " == %ls : %lc : %d => %ls\n", symbol.c_str(), val, val, surface.c_str());
       }
       seensurface = true;
       tr = readBilingual(input, output);
@@ -3119,12 +2637,12 @@ FSTProcessor::bilingual(FILE *input, FILE *output, GenerationMode mode)
       break;
     }
 
-    if(val == L'$' && outOfWord)
+    if(val == '$' && outOfWord)
     {
       if(!seentags)        // if no tags: only return complete matches
       {
-        bool uppercase = sf.size() > 1 && iswupper(sf[1]);
-        bool firstupper= iswupper(sf[0]);
+        bool uppercase = sf.size() > 1 && u_isupper(sf[1]);
+        bool firstupper= u_isupper(sf[0]);
 
         result = current_state.filterFinals(all_finals, alphabet,
                                             escaped_chars,
@@ -3132,16 +2650,16 @@ FSTProcessor::bilingual(FILE *input, FILE *output, GenerationMode mode)
                                             uppercase, firstupper, 0);
       }
 
-      if(sf[0] == L'*')
+      if(sf[0] == '*')
       {
         if (mode == gm_clean) {
-          printWordBilingual(sf, L"/" + sf.substr(1), output);
+          printWordBilingual(sf, "/"_u + sf.substr(1), output);
         }
         else {
-          printWordBilingual(sf, L"/" + sf, output);
+          printWordBilingual(sf, "/"_u + sf, output);
         }
       }
-      else if(result != L"")
+      else if(!result.empty())
       {
         printWordBilingual(sf, compose(result, queue), output);
       }
@@ -3149,30 +2667,30 @@ FSTProcessor::bilingual(FILE *input, FILE *output, GenerationMode mode)
       { //xxx
         if(biltransSurfaceForms)
         {
-          printWordBilingual(surface, L"/@"+surface, output);
+          printWordBilingual(surface, "/@"_u + surface, output);
         }
         else
         {
-          printWordBilingual(sf, L"/@"+sf, output);
+          printWordBilingual(sf, "/@"_u + sf, output);
         }
       }
       seensurface = false;
-      surface = L"";
-      queue = L"";
-      result = L"";
+      surface.clear();
+      queue.clear();
+      result.clear();
       current_state = initial_state;
-      sf = L"";
+      sf.clear();
       seentags = false;
     }
-    else if(iswspace(val) && sf.size() == 0)
+    else if(u_isspace(val) && sf.size() == 0)
     {
       // do nothing
     }
-    else if(sf.size() > 0 && sf[0] == L'*')
+    else if(sf.size() > 0 && sf[0] == '*')
     {
       if(escaped_chars.find(val) != escaped_chars.end())
       {
-        sf += L'\\';
+        sf += '\\';
       }
       alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic
       if(val == 0)  // non-alphabetic, possibly unknown tag; add to sf
@@ -3184,7 +2702,7 @@ FSTProcessor::bilingual(FILE *input, FILE *output, GenerationMode mode)
     {
       if(escaped_chars.find(val) != escaped_chars.end())
       {
-        sf += L'\\';
+        sf += '\\';
       }
       alphabet.getSymbol(sf, val); // add symbol to sf iff alphabetic
       if(val == 0)  // non-alphabetic, possibly unknown tag; add to sf
@@ -3197,9 +2715,9 @@ FSTProcessor::bilingual(FILE *input, FILE *output, GenerationMode mode)
       }
       if(current_state.size() != 0)
       {
-        if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
+        if(!alphabet.isTag(val) && u_isupper(val) && !caseSensitive)
         {
-          current_state.step(val, towlower(val));
+          current_state.step(val, u_tolower(val));
         }
         else
         {
@@ -3208,16 +2726,16 @@ FSTProcessor::bilingual(FILE *input, FILE *output, GenerationMode mode)
       }
       if(current_state.isFinal(all_finals))
       {
-        bool uppercase = sf.size() > 1 && iswupper(sf[1]);
-        bool firstupper= iswupper(sf[0]);
+        bool uppercase = sf.size() > 1 && u_isupper(sf[1]);
+        bool firstupper= u_isupper(sf[0]);
 
-        queue = L""; // the intervening tags were matched
+        queue.clear(); // the intervening tags were matched
         result = current_state.filterFinals(all_finals, alphabet,
                                             escaped_chars,
                                             displayWeightsMode, maxAnalyses, maxWeightClasses,
                                             uppercase, firstupper, 0);
       }
-      else if(result != L"")
+      else if(!result.empty())
       {
         // We already have a result, but there is still more to read
         // of the analysis; following tags are not consumed, but
@@ -3234,21 +2752,21 @@ FSTProcessor::bilingual(FILE *input, FILE *output, GenerationMode mode)
         else if(current_state.size() == 0)
         {
           // There are no more alive transductions and the current symbol is not a tag -- unknown word!
-          result = L"";
+          result.clear();
         }
       }
     }
   }
 }
 
-pair<wstring, int>
-FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
+pair<UString, int>
+FSTProcessor::biltransWithQueue(UString const &input_word, bool with_delim)
 {
   State current_state = initial_state;
-  wstring result = L"";
+  UString result;
   unsigned int start_point = 1;
   unsigned int end_point = input_word.size()-2;
-  wstring queue = L"";
+  UString queue;
   bool mark = false;
   bool seentags = false;  // have we seen any tags at all in the analysis?
 
@@ -3258,38 +2776,38 @@ FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
     end_point = input_word.size()-1;
   }
 
-  if(input_word[start_point] == L'*')
+  if(input_word[start_point] == '*')
   {
-    return pair<wstring, int>(input_word, 0);
+    return pair<UString, int>(input_word, 0);
   }
 
-  if(input_word[start_point] == L'=')
+  if(input_word[start_point] == '=')
   {
     start_point++;
     mark = true;
   }
 
-  bool firstupper = iswupper(input_word[start_point]);
-  bool uppercase = firstupper && iswupper(input_word[start_point+1]);
+  bool firstupper = u_isupper(input_word[start_point]);
+  bool uppercase = firstupper && u_isupper(input_word[start_point+1]);
 
   for(unsigned int i = start_point; i <= end_point; i++)
   {
     int val = 0;
-    wstring symbol = L"";
+    UString symbol;
 
-    if(input_word[i] == L'\\')
+    if(input_word[i] == '\\')
     {
       i++;
       val = input_word[i];
     }
-    else if(input_word[i] == L'<')
+    else if(input_word[i] == '<')
     {
       seentags = true;
-      symbol = L'<';
+      symbol = '<';
       for(unsigned int j = i + 1; j <= end_point; j++)
       {
         symbol += input_word[j];
-        if(input_word[j] == L'>')
+        if(input_word[j] == '>')
         {
           i = j;
           break;
@@ -3303,9 +2821,9 @@ FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
     }
     if(current_state.size() != 0)
     {
-      if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
+      if(!alphabet.isTag(val) && u_isupper(val) && !caseSensitive)
       {
-        current_state.step(val, towlower(val));
+        current_state.step(val, u_tolower(val));
       }
       else
       {
@@ -3314,37 +2832,22 @@ FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
     }
     if(current_state.isFinal(all_finals))
     {
-      result = current_state.filterFinals(all_finals, alphabet,
-                                          escaped_chars,
-                                          displayWeightsMode, maxAnalyses, maxWeightClasses,
-                                          uppercase, firstupper, 0);
-      if(with_delim)
-      {
-        if(mark)
-        {
-          result = L"^=" + result.substr(1);
-        }
-        else
-        {
-          result[0] = L'^';
-        }
+      result.clear();
+      if (with_delim) {
+        result += '^';
       }
-      else
-      {
-        if(mark)
-        {
-          result = L"=" + result.substr(1);
-        }
-        else
-        {
-          result = result.substr(1);
-        }
+      if (mark) {
+        result += '=';
       }
+      result += current_state.filterFinals(all_finals, alphabet,
+                                           escaped_chars,
+                                           displayWeightsMode, maxAnalyses, maxWeightClasses,
+                                           uppercase, firstupper, 0).substr(1);
     }
 
     if(current_state.size() == 0)
     {
-      if(symbol != L"" && result != L"")
+      if(!symbol.empty() && !result.empty())
       {
         queue.append(symbol);
       }
@@ -3353,52 +2856,51 @@ FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
         // word is not present
         if(with_delim)
         {
-          result = L"^@" + input_word.substr(1);
+          result = "^@"_u + input_word.substr(1);
         }
         else
         {
-          result = L"@" + input_word;
+          result = "@"_u + input_word;
         }
-        return pair<wstring, int>(result, 0);
+        return pair<UString, int>(result, 0);
       }
     }
   }
 
   if (!seentags
-      && L"" == current_state.filterFinals(all_finals, alphabet,
-                                           escaped_chars,
-                                           displayWeightsMode, maxAnalyses, maxWeightClasses,
-                                           uppercase, firstupper, 0))
+      && current_state.filterFinals(all_finals, alphabet, escaped_chars,
+                                    displayWeightsMode, maxAnalyses, maxWeightClasses,
+                                    uppercase, firstupper, 0).empty())
   {
     // word is not present
     if(with_delim)
     {
-      result = L"^@" + input_word.substr(1);
+      result = "^@"_u + input_word.substr(1);
     }
     else
     {
-      result = L"@" + input_word;
+      result = "@"_u + input_word;
     }
-    return pair<wstring, int>(result, 0);
+    return pair<UString, int>(result, 0);
   }
 
 
 
   // attach unmatched queue automatically
 
-  if(queue != L"")
+  if(!queue.empty())
   {
-    wstring result_with_queue = L"";
+    UString result_with_queue;
     for(unsigned int i = 0, limit = result.size(); i != limit; i++)
     {
       switch(result[i])
       {
-        case L'\\':
-          result_with_queue += L'\\';
+        case '\\':
+          result_with_queue += '\\';
           i++;
           break;
 
-        case L'/':
+        case '/':
           result_with_queue.append(queue);
           break;
 
@@ -3411,25 +2913,25 @@ FSTProcessor::biltransWithQueue(wstring const &input_word, bool with_delim)
 
     if(with_delim)
     {
-      result_with_queue += L'$';
+      result_with_queue += '$';
     }
-    return pair<wstring, int>(result_with_queue, queue.size());
+    return pair<UString, int>(result_with_queue, queue.size());
   }
   else
   {
     if(with_delim)
     {
-      result += L'$';
+      result += '$';
     }
-    return pair<wstring, int>(result, 0);
+    return pair<UString, int>(result, 0);
   }
 }
 
-wstring
-FSTProcessor::biltransWithoutQueue(wstring const &input_word, bool with_delim)
+UString
+FSTProcessor::biltransWithoutQueue(UString const &input_word, bool with_delim)
 {
   State current_state = initial_state;
-  wstring result = L"";
+  UString result;
   unsigned int start_point = 1;
   unsigned int end_point = input_word.size()-2;
   bool mark = false;
@@ -3440,37 +2942,37 @@ FSTProcessor::biltransWithoutQueue(wstring const &input_word, bool with_delim)
     end_point = input_word.size()-1;
   }
 
-  if(input_word[start_point] == L'*')
+  if(input_word[start_point] == '*')
   {
     return input_word;
   }
 
-  if(input_word[start_point] == L'=')
+  if(input_word[start_point] == '=')
   {
     start_point++;
     mark = true;
   }
 
-  bool firstupper = iswupper(input_word[start_point]);
-  bool uppercase = firstupper && iswupper(input_word[start_point+1]);
+  bool firstupper = u_isupper(input_word[start_point]);
+  bool uppercase = firstupper && u_isupper(input_word[start_point+1]);
 
   for(unsigned int i = start_point; i <= end_point; i++)
   {
     int val;
-    wstring symbol = L"";
+    UString symbol;
 
-    if(input_word[i] == L'\\')
+    if(input_word[i] == '\\')
     {
       i++;
-      val = static_cast<int>(input_word[i]);
+      val = static_cast<int32_t>(input_word[i]);
     }
-    else if(input_word[i] == L'<')
+    else if(input_word[i] == '<')
     {
-      symbol = L'<';
+      symbol = '<';
       for(unsigned int j = i + 1; j <= end_point; j++)
       {
         symbol += input_word[j];
-        if(input_word[j] == L'>')
+        if(input_word[j] == '>')
         {
           i = j;
           break;
@@ -3480,13 +2982,13 @@ FSTProcessor::biltransWithoutQueue(wstring const &input_word, bool with_delim)
     }
     else
     {
-      val = static_cast<int>(input_word[i]);
+      val = static_cast<int32_t>(input_word[i]);
     }
     if(current_state.size() != 0)
     {
-      if(!alphabet.isTag(val) && iswupper(val) && !caseSensitive)
+      if(!alphabet.isTag(val) && u_isupper(val) && !caseSensitive)
       {
-        current_state.step(val, towlower(val));
+        current_state.step(val, u_tolower(val));
       }
       else
       {
@@ -3495,46 +2997,31 @@ FSTProcessor::biltransWithoutQueue(wstring const &input_word, bool with_delim)
     }
     if(current_state.isFinal(all_finals))
     {
-      result = current_state.filterFinals(all_finals, alphabet,
-                                          escaped_chars,
-                                          displayWeightsMode, maxAnalyses, maxWeightClasses,
-                                          uppercase, firstupper, 0);
-      if(with_delim)
-      {
-        if(mark)
-        {
-          result = L"^=" + result.substr(1);
-        }
-        else
-        {
-          result[0] = L'^';
-        }
+      result.clear();
+      if (with_delim) {
+        result += '^';
       }
-      else
-      {
-        if(mark)
-        {
-          result = L"=" + result.substr(1);
-        }
-        else
-        {
-          result = result.substr(1);
-        }
+      if (mark) {
+        result += '=';
       }
+      result += current_state.filterFinals(all_finals, alphabet,
+                                           escaped_chars,
+                                           displayWeightsMode, maxAnalyses, maxWeightClasses,
+                                           uppercase, firstupper, 0).substr(1);
     }
 
     if(current_state.size() == 0)
     {
-      if(symbol == L"")
+      if(symbol.empty())
       {
         // word is not present
         if(with_delim)
         {
-          result = L"^@" + input_word.substr(1);
+          result = "^@"_u + input_word.substr(1);
         }
         else
         {
-          result = L"@" + input_word;
+          result = "@"_u + input_word;
         }
         return result;
       }
@@ -3543,7 +3030,7 @@ FSTProcessor::biltransWithoutQueue(wstring const &input_word, bool with_delim)
 
   if(with_delim)
   {
-    result += L'$';
+    result += '$';
   }
   return result;
 }
@@ -3554,16 +3041,16 @@ FSTProcessor::valid() const
 {
   if(initial_state.isFinal(all_finals))
   {
-    wcerr << L"Error: Invalid dictionary (hint: the left side of an entry is empty)" << endl;
+    cerr << "Error: Invalid dictionary (hint: the left side of an entry is empty)" << endl;
     return false;
   }
   else
   {
     State s = initial_state;
-    s.step(L' ');
+    s.step(' ');
     if(s.size() != 0)
     {
-      wcerr << L"Error: Invalid dictionary (hint: entry beginning with whitespace)" << endl;
+      cerr << "Error: Invalid dictionary (hint: entry beginning with whitespace)" << endl;
       return false;
     }
   }
@@ -3572,45 +3059,45 @@ FSTProcessor::valid() const
 }
 
 int
-FSTProcessor::readSAO(FILE *input)
+FSTProcessor::readSAO(InputFile& input)
 {
   if(!input_buffer.isEmpty())
   {
     return input_buffer.next();
   }
 
-  wchar_t val = static_cast<wchar_t>(fgetwc_unlocked(input));
-  if(feof(input))
+  UChar32 val = input.get();
+  if(input.eof())
   {
     return 0;
   }
 
   if(escaped_chars.find(val) != escaped_chars.end())
   {
-    if(val == L'<')
+    if(val == '<')
     {
-      wstring str = readFullBlock(input, L'<', L'>');
-      if(str.substr(0, 9) == L"<![CDATA[")
+      UString str = input.readBlock('<', '>');
+      if(str.substr(0, 9) == "<![CDATA["_u)
       {
-        while(str.substr(str.size()-3) != L"]]>")
+        while(str.substr(str.size()-3) != "]]>"_u)
         {
-          str.append(readFullBlock(input, L'<', L'>').substr(1));
+          str.append(input.readBlock('<', '>').substr(1));
         }
         blankqueue.push(str);
-        input_buffer.add(static_cast<int>(L' '));
-        return static_cast<int>(L' ');
+        input_buffer.add(static_cast<int32_t>(' '));
+        return static_cast<int32_t>(' ');
       }
       else
       {
         streamError();
       }
     }
-    else if (val == L'\\') {
-      val = static_cast<wchar_t>(fgetwc_unlocked(input));
+    else if (val == '\\') {
+      val = input.get();
       if(isEscaped(val))
       {
         input_buffer.add(val);
-        return static_cast<int>(val);
+        return static_cast<int32_t>(val);
       }
       else
         streamError();
@@ -3621,47 +3108,47 @@ FSTProcessor::readSAO(FILE *input)
     }
   }
 
-  input_buffer.add(val);
-  return static_cast<int>(val);
+  input_buffer.add(static_cast<int32_t>(val));
+  return static_cast<int32_t>(val);
 }
 
 void
-FSTProcessor::printSAOWord(wstring const &lf, FILE *output)
+FSTProcessor::printSAOWord(UString const &lf, UFILE *output)
 {
   for(unsigned int i = 1, limit = lf.size(); i != limit; i++)
   {
-    if(lf[i] == L'/')
+    if(lf[i] == '/')
     {
       break;
     }
-    fputwc_unlocked(lf[i], output);
+    u_fputc(lf[i], output);
   }
 }
 
 void
-FSTProcessor::SAO(FILE *input, FILE *output)
+FSTProcessor::SAO(InputFile& input, UFILE *output)
 {
   bool last_incond = false;
   bool last_postblank = false;
   State current_state = initial_state;
-  wstring lf = L"";
-  wstring sf = L"";
+  UString lf;
+  UString sf;
   int last = 0;
 
   escaped_chars.clear();
-  escaped_chars.insert(static_cast<wchar_t>(L'\\'));
-  escaped_chars.insert(static_cast<wchar_t>(L'<'));
-  escaped_chars.insert(static_cast<wchar_t>(L'>'));
+  escaped_chars.insert('\\');
+  escaped_chars.insert('<');
+  escaped_chars.insert('>');
 
-  while(wchar_t val = readSAO(input))
+  while(UChar32 val = readSAO(input))
   {
     // test for final states
     if(current_state.isFinal(all_finals))
     {
       if(current_state.isFinal(inconditional))
       {
-        bool firstupper = iswupper(sf[0]);
-        bool uppercase = firstupper && iswupper(sf[sf.size()-1]);
+        bool firstupper = u_isupper(sf[0]);
+        bool uppercase = firstupper && u_isupper(sf[sf.size()-1]);
 
         lf = current_state.filterFinalsSAO(all_finals, alphabet,
                                         escaped_chars,
@@ -3671,8 +3158,8 @@ FSTProcessor::SAO(FILE *input, FILE *output)
       }
       else if(current_state.isFinal(postblank))
       {
-        bool firstupper = iswupper(sf[0]);
-        bool uppercase = firstupper && iswupper(sf[sf.size()-1]);
+        bool firstupper = u_isupper(sf[0]);
+        bool uppercase = firstupper && u_isupper(sf[sf.size()-1]);
 
         lf = current_state.filterFinalsSAO(all_finals, alphabet,
                                         escaped_chars,
@@ -3682,8 +3169,8 @@ FSTProcessor::SAO(FILE *input, FILE *output)
       }
       else if(!isAlphabetic(val))
       {
-        bool firstupper = iswupper(sf[0]);
-        bool uppercase = firstupper && iswupper(sf[sf.size()-1]);
+        bool firstupper = u_isupper(sf[0]);
+        bool uppercase = firstupper && u_isupper(sf[sf.size()-1]);
 
         lf = current_state.filterFinalsSAO(all_finals, alphabet,
                                         escaped_chars,
@@ -3693,23 +3180,16 @@ FSTProcessor::SAO(FILE *input, FILE *output)
         last = input_buffer.getPos();
       }
     }
-    else if(sf == L"" && iswspace(val))
+    else if(sf.empty() && u_isspace(val))
     {
-      lf = L"/*";
+      lf = "/*"_u;
       lf.append(sf);
       last_postblank = false;
       last_incond = false;
       last = input_buffer.getPos();
     }
 
-    if(!iswupper(val) || caseSensitive)
-    {
-      current_state.step(val);
-    }
-    else
-    {
-      current_state.step(val, towlower(val));
-    }
+    current_state.step_case(val, caseSensitive);
 
     if(current_state.size() != 0)
     {
@@ -3717,9 +3197,9 @@ FSTProcessor::SAO(FILE *input, FILE *output)
     }
     else
     {
-      if(!isAlphabetic(val) && sf == L"")
+      if(!isAlphabetic(val) && sf.empty())
       {
-        if(iswspace(val))
+        if(u_isspace(val))
         {
           printSpace(val, output);
         }
@@ -3727,9 +3207,9 @@ FSTProcessor::SAO(FILE *input, FILE *output)
         {
           if(isEscaped(val))
           {
-            fputwc_unlocked(L'\\', output);
+            u_fputc('\\', output);
           }
-          fputwc_unlocked(val, output);
+          u_fputc(val, output);
         }
       }
       else if(last_incond)
@@ -3741,13 +3221,13 @@ FSTProcessor::SAO(FILE *input, FILE *output)
       else if(last_postblank)
       {
         printSAOWord(lf, output);
-        fputwc_unlocked(L' ', output);
+        u_fputc(' ', output);
         input_buffer.setPos(last);
         input_buffer.back(1);
       }
       else if(isAlphabetic(val) &&
               ((sf.size()-input_buffer.diffPrevPos(last)) > lastBlank(sf) ||
-               lf == L""))
+               lf.empty()))
       {
         do
         {
@@ -3757,21 +3237,17 @@ FSTProcessor::SAO(FILE *input, FILE *output)
 
         unsigned int limit = firstNotAlpha(sf);
         unsigned int size = sf.size();
-        limit = (limit == static_cast<unsigned int>(wstring::npos)?size:limit);
+        limit = (limit == static_cast<unsigned int>(UString::npos)?size:limit);
         input_buffer.back(1+(size-limit));
-        fputws_unlocked(L"<d>", output);
-        fputws_unlocked(sf.c_str(), output);
-        fputws_unlocked(L"</d>", output);
+        u_fprintf(output, "<d>%S</d>", sf.c_str());
       }
-      else if(lf == L"")
+      else if(lf.empty())
       {
         unsigned int limit = firstNotAlpha(sf);
         unsigned int size = sf.size();
-        limit = (limit == static_cast<unsigned int>(wstring::npos)?size:limit);
+        limit = (limit == static_cast<unsigned int>(UString::npos)?size:limit);
         input_buffer.back(1+(size-limit));
-        fputws_unlocked(L"<d>", output);
-        fputws_unlocked(sf.c_str(), output);
-        fputws_unlocked(L"</d>", output);
+        u_fprintf(output, "<d>%S</d>", sf.c_str());
       }
       else
       {
@@ -3781,8 +3257,8 @@ FSTProcessor::SAO(FILE *input, FILE *output)
       }
 
       current_state = initial_state;
-      lf = L"";
-      sf = L"";
+      lf.clear();
+      sf.clear();
       last_incond = false;
       last_postblank = false;
     }
@@ -3792,12 +3268,12 @@ FSTProcessor::SAO(FILE *input, FILE *output)
   flushBlanks(output);
 }
 
-wstring
-FSTProcessor::removeTags(wstring const &str)
+UString
+FSTProcessor::removeTags(UString const &str)
 {
   for(unsigned int i = 0; i < str.size(); i++)
   {
-    if(str[i] == L'<' && i >=1 && str[i-1] != L'\\')
+    if(str[i] == '<' && i >=1 && str[i-1] != '\\')
     {
       return str.substr(0, i);
     }
@@ -3880,7 +3356,7 @@ FSTProcessor::getNullFlush()
 }
 
 size_t
-FSTProcessor::firstNotAlpha(wstring const &sf)
+FSTProcessor::firstNotAlpha(UString const &sf)
 {
   for(size_t i = 0, limit = sf.size(); i < limit; i++)
   {
@@ -3890,5 +3366,5 @@ FSTProcessor::firstNotAlpha(wstring const &sf)
     }
   }
 
-  return wstring::npos;
+  return UString::npos;
 }
diff --git a/lttoolbox/fst_processor.h b/lttoolbox/fst_processor.h
index 628356d..32263ac 100644
--- a/lttoolbox/fst_processor.h
+++ b/lttoolbox/fst_processor.h
@@ -18,19 +18,20 @@
 #ifndef _FSTPROCESSOR_
 #define _FSTPROCESSOR_
 
+#include <lttoolbox/ustring.h>
 #include <lttoolbox/alphabet.h>
 #include <lttoolbox/buffer.h>
-#include <lttoolbox/ltstr.h>
 #include <lttoolbox/my_stdio.h>
 #include <lttoolbox/state.h>
 #include <lttoolbox/trans_exe.h>
+#include <lttoolbox/input_file.h>
 #include <libxml/xmlreader.h>
 
-#include <cwchar>
 #include <map>
 #include <queue>
 #include <set>
 #include <string>
+#include <cstdint>
 
 using namespace std;
 
@@ -56,7 +57,7 @@ private:
   /**
    * Transducers in FSTP
    */
-  map<wstring, TransExe, Ltstr> transducers;
+  map<UString, TransExe> transducers;
 
   /**
    * Current state of lexical analysis
@@ -71,7 +72,7 @@ private:
   /**
    * Default value of weight unless specified
    */
-  double default_weight;
+  double default_weight = 0.0000;
 
   /**
    * The final states of inconditional sections in the dictionaries
@@ -101,27 +102,27 @@ private:
   /**
    * Queue of blanks, used in reading methods
    */
-  queue<wstring> blankqueue;
+  queue<UString> blankqueue;
 
   /**
    * Queue of wordbound blanks, used in reading methods
    */
-  queue<wstring> wblankqueue;
+  queue<UString> wblankqueue;
 
   /**
    * Set of characters being considered alphabetics
    */
-  set<wchar_t> alphabetic_chars;
+  set<UChar32> alphabetic_chars;
 
   /**
    * Set of characters to escape with a backslash
    */
-  set<wchar_t> escaped_chars;
+  set<UChar32> escaped_chars;
 
   /**
    * Set of characters to ignore
    */
-  set<wchar_t> ignored_chars;
+  set<UChar32> ignored_chars;
 
   /**
    * Mapping of characters for simplistic diacritic restoration specified in RCX files
@@ -141,7 +142,7 @@ private:
   /**
    * Input buffer
    */
-  Buffer<int> input_buffer;
+  Buffer<int32_t> input_buffer;
 
   /**
    * Begin of the transducer
@@ -151,86 +152,86 @@ private:
   /**
    * true if the position of input stream is out of a word
    */
-  bool outOfWord;
+  bool outOfWord = false;
 
   /**
    * true if we're automatically removing surface forms.
    */
-  bool biltransSurfaceForms;
+  bool biltransSurfaceForms = false;
 
 
   /**
    * if true, makes always difference between uppercase and lowercase
    * characters
    */
-  bool caseSensitive;
+  bool caseSensitive = false;
 
   /**
    * if true, uses the dictionary case, discarding surface case
    * information
    */
-  bool dictionaryCase;
+  bool dictionaryCase = false;
 
   /**
    * if true, flush the output when the null character is found
    */
-  bool nullFlush;
+  bool nullFlush = false;
 
   /**
    * nullFlush property for the skipUntil function
    */
-  bool nullFlushGeneration;
+  bool nullFlushGeneration = false;
 
   /**
    * if true, ignore the provided set of characters
    */
-  bool useIgnoredChars;
+  bool useIgnoredChars = false;
 
   /**
    * if true, attempt simplistic diacritic restoration
    */
-  bool useRestoreChars;
+  bool useRestoreChars = false;
 
   /**
    * if true, skips loading the default set of ignored characters
    */
-  bool useDefaultIgnoredChars;
+  bool useDefaultIgnoredChars = true;
 
   /**
    * if true, displays the final weights (if any)
    */
-  bool displayWeightsMode;
+  bool displayWeightsMode = false;
 
   /**
    * try analysing unknown words as compounds
    */
-  bool do_decomposition;
+  bool do_decomposition = false;
 
   /**
    * Symbol of CompoundOnlyL
    */
-  int compoundOnlyLSymbol;
+  int compoundOnlyLSymbol = 0;
 
   /**
    * Symbol of CompoundR
    */
-  int compoundRSymbol;
+  int compoundRSymbol = 0;
 
   /**
    * Show or not the controls symbols (as compoundRSymbol)
    */
-   bool showControlSymbols;
+  bool showControlSymbols = false;
 
   /**
    * Max compound elements
    * Hard coded for now, but there might come a switch one day
    */
-  int compound_max_elements;
+  int compound_max_elements = 4;
 
   /**
    * Output no more than 'N' number of weighted analyses
    */
-  int maxAnalyses;
+  int maxAnalyses = INT_MAX;
 
   /**
    * True if a wblank block ([[..]]xyz[[/]]) was just read
@@ -250,62 +251,41 @@ private:
   /**
    * Output no more than 'N' best weight classes
    */
-  int maxWeightClasses;
+  int maxWeightClasses = INT_MAX;
 
   /**
    * Prints an error of input stream and exits
    */
   void streamError();
 
-  /**
-   * Reads a character that is defined in the set of escaped_chars
-   * @param input the stream to read from
-   * @return code of the character
-   */
-  wchar_t readEscaped(FILE *input);
-
-  /**
-   * Reads a block from the stream input, enclosed by delim1 and delim2
-   * @param input the stream being read
-   * @param delim1 the delimiter of the beginning of the sequence
-   * @param delim1 the delimiter of the end of the sequence
-   */
-  wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2);
-
-  /**
-   * Reads a wordbound blank from the stream input
-   * @param input the stream being read
-   */
-  wstring readWblank(FILE *input);
-
   /**
    * Reads a wordbound blank (opening blank to closing blank) from the stream input -> [[...]]xyz[[/]]
    * @param input the stream being read
    * @param output the stream to write on
    * @return true if the word enclosed by the wordbound blank has a ~ for postgeneration activation
    */
-  bool wblankPostGen(FILE *input, FILE *output);
+  bool wblankPostGen(InputFile& input, UFILE *output);
 
   /**
    * Returns true if the character code is identified as alphabetic
    * @param c the code provided by the user
    * @return true if it's alphabetic
    */
-  bool isAlphabetic(wchar_t const c) const;
+  bool isAlphabetic(UChar32 const c) const;
 
   /**
    * Tests if a character is in the set of escaped_chars
    * @param c the character code provided by the user
    * @return true if it is in the set
    */
-  bool isEscaped(wchar_t const c) const;
+  bool isEscaped(UChar32 const c) const;
 
   /**
    * Read text from stream (analysis version)
    * @param input the stream to read
    * @return the next symbol in the stream
    */
-  int readAnalysis(FILE *input);
+  int readAnalysis(InputFile& input);
 
   /**
    * Read text from stream (decomposition version)
@@ -313,7 +293,7 @@ private:
    * @param output the stream to write on
    * @return the next symbol in the stream
    */
-  int readDecomposition(FILE *input, FILE *output);
+  int readDecomposition(InputFile& input, UFILE *output);
 
   /**
    * Read text from stream (postgeneration version)
@@ -321,7 +301,7 @@ private:
    * @param output the stream to write on
    * @return the next symbol in the stream
    */
-  int readPostgeneration(FILE *input, FILE *output);
+  int readPostgeneration(InputFile& input, UFILE *output);
 
   /**
    * Read text from stream (generation version)
@@ -329,7 +309,7 @@ private:
    * @param output the stream being written to
    * @return the next symbol in the stream
    */
-  int readGeneration(FILE *input, FILE *output);
+  int readGeneration(InputFile& input, UFILE *output);
 
   /**
    * Read text from stream (biltrans version)
@@ -337,26 +317,26 @@ private:
    * @param output the stream to write on
    * @return the queue of 0-symbols, and the next symbol in the stream
    */
-  pair<wstring, int> readBilingual(FILE *input, FILE *output);
+  pair<UString, int> readBilingual(InputFile& input, UFILE *output);
 
   /**
    * Read text from stream (SAO version)
    * @param input the stream to read
    * @return the next symbol in the stream
    */
-  int readSAO(FILE *input);
+  int readSAO(InputFile& input);
 
   /**
    * Flush all the blanks remaining in the current process
    * @param output stream to write blanks
    */
-  void flushBlanks(FILE *output);
+  void flushBlanks(UFILE *output);
 
   /**
    * Flush all the wordbound blanks remaining in the current process
    * @param output stream to write blanks
    */
-  void flushWblanks(FILE *output);
+  void flushWblanks(UFILE *output);
 
   /**
    * Combine wordbound blanks in the queue and return them.
@@ -370,7 +350,7 @@ private:
    *
    * @return final wblank string
   */
-  wstring combineWblanks();
+  UString combineWblanks();
 
   /**
    * Calculate the initial state of parsing
@@ -387,7 +367,7 @@ private:
    * @param str the string to write, escaping characters
    * @param output the stream to write in
    */
-  void writeEscaped(wstring const &str, FILE *output);
+  void writeEscaped(UString const &str, UFILE *output);
 
   /**
    * Write a string to an output stream.
@@ -398,7 +378,7 @@ private:
    * @param output the stream to write in
    * @return how many blanks to pop and print after printing lu
    */
-  size_t writeEscapedPopBlanks(wstring const &str, FILE *output);
+  size_t writeEscapedPopBlanks(UString const &str, UFILE *output);
 
   /**
    * Write a string to an output stream, escaping all escapable characters
@@ -406,7 +386,7 @@ private:
    * @param str the string to write, escaping characters
    * @param output the stream to write in
    */
-  void writeEscapedWithTags(wstring const &str, FILE *output);
+  void writeEscapedWithTags(UString const &str, UFILE *output);
 
 
   /**
@@ -415,7 +395,7 @@ private:
    * @param the searched suffix
    * @returns true if 'str' has the suffix 'suffix'
    */
-  static bool endsWith(wstring const &str, wstring const &suffix);
+  static bool endsWith(UString const &str, UString const &suffix);
 
   /**
    * Prints a word
@@ -423,7 +403,7 @@ private:
    * @param lf lexical form of the word
    * @param output stream where the word is written
    */
-  void printWord(wstring const &sf, wstring const &lf, FILE *output);
+  void printWord(UString const &sf, UString const &lf, UFILE *output);
 
   /**
    * Prints a word.
@@ -433,7 +413,7 @@ private:
    * @param lf lexical form of the word
    * @param output stream where the word is written
    */
-  void printWordPopBlank(wstring const &sf, wstring const &lf, FILE *output);
+  void printWordPopBlank(UString const &sf, UString const &lf, UFILE *output);
 
   /**
    * Prints a word (Bilingual version)
@@ -441,7 +421,7 @@ private:
    * @param lf lexical form of the word
    * @param output stream where the word is written
    */
-  void printWordBilingual(wstring const &sf, wstring const &lf, FILE *output);
+  void printWordBilingual(UString const &sf, UString const &lf, UFILE *output);
 
 
   /**
@@ -449,21 +429,21 @@ private:
    * @param lf lexical form
    * @param output stream where the word is written
    */
-  void printSAOWord(wstring const &lf, FILE *output);
+  void printSAOWord(UString const &lf, UFILE *output);
 
   /**
    * Prints an unknown word
    * @param sf surface form of the word
    * @param output stream where the word is written
    */
-  void printUnknownWord(wstring const &sf, FILE *output);
+  void printUnknownWord(UString const &sf, UFILE *output);
 
   void initDecompositionSymbols();
 
-  vector<wstring> numbers;
-  int readTMAnalysis(FILE *input);
+  vector<UString> numbers;
+  int readTMAnalysis(InputFile& input);
 
-  unsigned int lastBlank(wstring const &str);
+  unsigned int lastBlank(UString const &str);
 
   /**
    * Print one blankqueue item if there is one, or a given "space" value.
@@ -471,32 +451,46 @@ private:
    * @param val the space character to use if no blank queue
    * @param output stream where the word is written
    */
-  void printSpace(wchar_t const val, FILE *output);
+  void printSpace(UChar const val, UFILE *output);
 
-  void skipUntil(FILE *input, FILE *output, wint_t const character);
-  static wstring removeTags(wstring const &str);
-  wstring compoundAnalysis(wstring str, bool uppercase, bool firstupper);
-  size_t firstNotAlpha(wstring const &sf);
+  void skipUntil(InputFile& input, UFILE *output, UChar32 const character);
+  static UString removeTags(UString const &str);
+  UString compoundAnalysis(UString str, bool uppercase, bool firstupper);
+  size_t firstNotAlpha(UString const &sf);
 
-  void analysis_wrapper_null_flush(FILE *input, FILE *output);
-  void lsx_wrapper_null_flush(FILE *input, FILE *output);
-  void bilingual_wrapper_null_flush(FILE *input, FILE *output, GenerationMode mode = gm_unknown);
-  void generation_wrapper_null_flush(FILE *input, FILE *output,
+  void analysis_wrapper_null_flush(InputFile& input, UFILE *output);
+  void bilingual_wrapper_null_flush(InputFile& input, UFILE *output, GenerationMode mode = gm_unknown);
+  void generation_wrapper_null_flush(InputFile& input, UFILE *output,
                                      GenerationMode mode);
-  void postgeneration_wrapper_null_flush(FILE *input, FILE *output);
-  void intergeneration_wrapper_null_flush(FILE *input, FILE *output);
-  void transliteration_wrapper_null_flush(FILE *input, FILE *output);
+  void postgeneration_wrapper_null_flush(InputFile& input, UFILE *output);
+  void intergeneration_wrapper_null_flush(InputFile& input, UFILE *output);
+  void transliteration_wrapper_null_flush(InputFile& input, UFILE *output);
 
-  wstring compose(wstring const &lexforms, wstring const &queue) const;
+  UString compose(UString const &lexforms, UString const &queue) const;
 
   void procNodeICX();
   void procNodeRCX();
   void initDefaultIgnoredCharacters();
 
-  bool isLastBlankTM;
+  bool isLastBlankTM = false;
 
   xmlTextReaderPtr reader;
 public:
+
+  /*
+   * String constants
+   */
+  static UString const XML_TEXT_NODE;
+  static UString const XML_COMMENT_NODE;
+  static UString const XML_IGNORED_CHARS_ELEM;
+  static UString const XML_RESTORE_CHAR_ELEM;
+  static UString const XML_RESTORE_CHARS_ELEM;
+  static UString const XML_VALUE_ATTR;
+  static UString const XML_CHAR_ELEM;
+  static UString const WBLANK_START;
+  static UString const WBLANK_END;
+  static UString const WBLANK_FINAL;
+
   FSTProcessor();
 
   void initAnalysis();
@@ -507,25 +501,23 @@ public:
   void initBiltrans();
   void initDecomposition();
 
-  void analysis(FILE *input = stdin, FILE *output = stdout);
-  void tm_analysis(FILE *input = stdin, FILE *output = stdout);
-  void generation(FILE *input = stdin, FILE *output = stdout, GenerationMode mode = gm_unknown);
-  void postgeneration(FILE *input = stdin, FILE *output = stdout);
-  void intergeneration(FILE *input = stdin, FILE *output = stdout);
-  void transliteration(FILE *input = stdin, FILE *output = stdout);
-  wstring biltrans(wstring const &input_word, bool with_delim = true);
-  wstring biltransfull(wstring const &input_word, bool with_delim = true);
-  void bilingual(FILE *input = stdin, FILE *output = stdout, GenerationMode mode = gm_unknown);
-  pair<wstring, int> biltransWithQueue(wstring const &input_word, bool with_delim = true);
-  wstring biltransWithoutQueue(wstring const &input_word, bool with_delim = true);
-  void SAO(FILE *input = stdin, FILE *output = stdout);
+  void analysis(InputFile& input, UFILE *output);
+  void tm_analysis(InputFile& input, UFILE *output);
+  void generation(InputFile& input, UFILE *output, GenerationMode mode = gm_unknown);
+  void postgeneration(InputFile& input, UFILE *output);
+  void intergeneration(InputFile& input, UFILE *output);
+  void transliteration(InputFile& input, UFILE *output);
+  UString biltrans(UString const &input_word, bool with_delim = true);
+  UString biltransfull(UString const &input_word, bool with_delim = true);
+  void bilingual(InputFile& input, UFILE *output, GenerationMode mode = gm_unknown);
+  pair<UString, int> biltransWithQueue(UString const &input_word, bool with_delim = true);
+  UString biltransWithoutQueue(UString const &input_word, bool with_delim = true);
+  void SAO(InputFile& input, UFILE *output);
   void parseICX(string const &file);
   void parseRCX(string const &file);
 
   void load(FILE *input);
 
-  void lsx(FILE *input, FILE *output);
-
   bool valid() const;
 
   void setCaseSensitiveMode(bool const value);
diff --git a/lttoolbox/input_file.cc b/lttoolbox/input_file.cc
new file mode 100644
index 0000000..307c8c9
--- /dev/null
+++ b/lttoolbox/input_file.cc
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2021 Apertium
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <lttoolbox/input_file.h>
+#include <utf8.h>
+#include <stdexcept>
+#include <unicode/ustdio.h>
+#include <cstring>
+#include <iostream>
+
+InputFile::InputFile()
+  : infile(stdin), buffer_size(0)
+{}
+
+InputFile::~InputFile()
+{
+  close();
+}
+
+bool
+InputFile::open(const char* fname)
+{
+  close();
+  if (fname == nullptr) {
+    infile = stdin;
+  } else {
+    infile = fopen(fname, "rb");
+  }
+  return (infile != nullptr);
+}
+
+void
+InputFile::open_or_exit(const char* fname)
+{
+  if (!open(fname)) {
+    std::cerr << "Error: Unable to open '" << fname << "' for reading." << std::endl;
+    exit(EXIT_FAILURE);
+  }
+}
+
+void
+InputFile::close()
+{
+  if (infile != nullptr) {
+    if (infile != stdin) {
+      fclose(infile);
+    }
+    infile = nullptr;
+  }
+}
+
+void
+InputFile::wrap(FILE* newinfile)
+{
+  close();
+  infile = newinfile;
+}
+
+void
+InputFile::internal_read()
+{
+  if (buffer_size) {
+    return;
+  }
+  if (feof(infile)) {
+    ubuffer[buffer_size++] = U_EOF;
+    return;
+  }
+  int i = 1;
+  cbuffer[0] = fgetc_unlocked(infile);
+  if (cbuffer[0] == EOF) {
+    ubuffer[buffer_size++] = U_EOF;
+    return;
+  } else if (cbuffer[0] == '\0') {
+    ubuffer[buffer_size++] = '\0';
+    return;
+  }
+  if ((cbuffer[0] & 0xF0) == 0xF0) {
+    i += 3;
+    if (fread_unlocked(cbuffer+1, 1, 3, infile) != 3) {
+      throw std::runtime_error("Could not read 3 expected bytes from stream");
+    }
+  } else if ((cbuffer[0] & 0xE0) == 0xE0) {
+    i += 2;
+    if (fread_unlocked(cbuffer+1, 1, 2, infile) != 2) {
+      throw std::runtime_error("Could not read 2 expected bytes from stream");
+    }
+  } else if ((cbuffer[0] & 0xC0) == 0xC0) {
+    i += 1;
+    if (fread_unlocked(cbuffer+1, 1, 1, infile) != 1) {
+      throw std::runtime_error("Could not read 1 expected byte from stream");
+    }
+  }
+  memset(ubuffer, 0, 3*sizeof(UChar));
+  utf8::utf8to32(cbuffer, cbuffer+i, ubuffer);
+  buffer_size = 1;
+}
+
+UChar32
+InputFile::get()
+{
+  if (!buffer_size) {
+    internal_read();
+  }
+  return ubuffer[--buffer_size];
+}
+
+UChar32
+InputFile::peek()
+{
+  if (!buffer_size) {
+    internal_read();
+  }
+  return ubuffer[buffer_size-1];
+}
+
+void
+InputFile::unget(UChar32 c)
+{
+  // this will probably segfault if called multiple times
+  ubuffer[buffer_size++] = c;
+}
+
+bool
+InputFile::eof()
+{
+  return (infile == nullptr) || feof(infile);
+}
+
+void
+InputFile::rewind()
+{
+  if (infile != nullptr) {
+    if (std::fseek(infile, 0, SEEK_SET) != 0) {
+      std::cerr << "Error: Unable to rewind file" << std::endl;
+      exit(EXIT_FAILURE);
+    }
+  }
+}
+
+UString
+InputFile::readBlock(const UChar32 start, const UChar32 end)
+{
+  UString ret;
+  ret += start;
+  UChar32 c = 0;
+  while (c != end && !eof()) {
+    c = get();
+    if (c == '\0') {
+      break;
+    }
+    ret += c;
+    if (c == '\\') {
+      ret += get();
+    }
+  }
+  return ret;
+}
+
+UString
+InputFile::finishWBlank()
+{
+  UString ret;
+  ret += '[';
+  ret += '[';
+  UChar32 c = 0;
+  while (!eof()) {
+    c = get();
+    if (c == '\0') {
+      break;
+    }
+    ret += c;
+    if (c == '\\') {
+      ret += get();
+    } else if (c == ']' && peek() == ']') {
+      ret += get();
+      break;
+    }
+  }
+  return ret;
+}
diff --git a/lttoolbox/input_file.h b/lttoolbox/input_file.h
new file mode 100644
index 0000000..de031c8
--- /dev/null
+++ b/lttoolbox/input_file.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2021 Apertium
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LT_INPUT_FILE_H_
+#define _LT_INPUT_FILE_H_
+
+#include <cstdio>
+#include <unicode/uchar.h>
+#include <lttoolbox/ustring.h>
+
+class InputFile
+{
+private:
+  FILE* infile;
+  UChar32 ubuffer[3];
+  char cbuffer[4];
+  int buffer_size;
+  void internal_read();
+public:
+  InputFile();
+  ~InputFile();
+  bool open(const char* fname = nullptr);
+  void open_or_exit(const char* fname = nullptr);
+  void close();
+  void wrap(FILE* newinfile);
+  UChar32 get();
+  UChar32 peek();
+  void unget(UChar32 c);
+  bool eof();
+  void rewind();
+  // assumes that start has already been read
+  // returns string from start to end inclusive
+  // respects backslash escapes
+  UString readBlock(const UChar32 start, const UChar32 end);
+  // assumes [[ has already been read, reads to ]]
+  // returns entire string, including brackets
+  UString finishWBlank();
+};
+
+#endif
diff --git a/lttoolbox/lt_comp.cc b/lttoolbox/lt_comp.cc
index 9d05c21..0202343 100644
--- a/lttoolbox/lt_comp.cc
+++ b/lttoolbox/lt_comp.cc
@@ -17,7 +17,6 @@
 #include <lttoolbox/compiler.h>
 #include <lttoolbox/att_compiler.h>
 #include <lttoolbox/lt_locale.h>
-#include <lttoolbox/string_to_wostream.h>
 
 #include <cstdlib>
 #include <iostream>
@@ -103,21 +102,21 @@ int main(int argc, char *argv[])
     switch (cnt)
     {
       case 'a':
-        c.setAltValue(optarg);
+        c.setAltValue(to_ustring(optarg));
         break;
 
       case 'v':
-        c.setVariantValue(optarg);
+        c.setVariantValue(to_ustring(optarg));
         break;
 
       case 'l':
         vl = optarg;
-        c.setVariantLeftValue(vl);
+        c.setVariantLeftValue(to_ustring(optarg));
         break;
 
       case 'r':
         vr = optarg;
-        c.setVariantRightValue(vr);
+        c.setVariantRightValue(to_ustring(optarg));
         break;
 
       case 'm':
@@ -176,7 +175,7 @@ int main(int argc, char *argv[])
   }
   else
   {
-    wcerr << "Error: Cannot not open file '" << infile << "'." << endl << endl;
+    cerr << "Error: Cannot not open file '" << infile << "'." << endl << endl;
     exit(EXIT_FAILURE);
   }
   initGenericErrorDefaultFunc(NULL);
@@ -192,7 +191,7 @@ int main(int argc, char *argv[])
     if(ttype == 'a')
     {
       LtLocale::tryToSetLocale();
-      a.parse(infile, Compiler::COMPILER_RESTRICTION_LR_VAL);
+      a.parse(infile, false);
     }
     else
     {
@@ -214,7 +213,7 @@ int main(int argc, char *argv[])
     if(ttype == 'a')
     {
       LtLocale::tryToSetLocale();
-      a.parse(infile, Compiler::COMPILER_RESTRICTION_RL_VAL);
+      a.parse(infile, true);
     }
     else
     {
@@ -230,7 +229,7 @@ int main(int argc, char *argv[])
   FILE *output = fopen(outfile.c_str(), "wb");
   if(!output)
   {
-    wcerr << "Error: Cannot open file '" << outfile << "'." << endl;
+    cerr << "Error: Cannot open file '" << outfile << "'." << endl;
     exit(EXIT_FAILURE);
   }
   if(ttype == 'a')
diff --git a/lttoolbox/lt_expand.cc b/lttoolbox/lt_expand.cc
index 283f209..3d9facc 100644
--- a/lttoolbox/lt_expand.cc
+++ b/lttoolbox/lt_expand.cc
@@ -55,7 +55,8 @@ void endProgram(char *name)
 
 int main(int argc, char *argv[])
 {
-  FILE *input = NULL, *output = NULL;
+  FILE* input = NULL;
+  UFILE* output = NULL;
   Expander e;
   e.setKeepBoundaries(false);
 
@@ -86,15 +87,15 @@ int main(int argc, char *argv[])
     switch (cnt)
     {
       case 'a':
-        e.setAltValue(optarg);
+        e.setAltValue(to_ustring(optarg));
         break;
 
       case 'v':
-        e.setVariantValue(optarg);
+        e.setVariantValue(to_ustring(optarg));
         break;
 
       case 'l':
-        e.setVariantLeftValue(optarg);
+        e.setVariantLeftValue(to_ustring(optarg));
         break;
 
       case 'm':
@@ -102,7 +103,7 @@ int main(int argc, char *argv[])
         break;
 
       case 'r':
-        e.setVariantRightValue(optarg);
+        e.setVariantRightValue(to_ustring(optarg));
         break;
 
       case 'h':
@@ -122,11 +123,11 @@ int main(int argc, char *argv[])
       input = fopen(infile.c_str(), "rb");
       if(input == NULL)
       {
-        wcerr << "Error: Cannot open file '" << infile << "'." << endl;
+        cerr << "Error: Cannot open file '" << infile << "'." << endl;
         exit(EXIT_FAILURE);
       }
       fclose(input);
-      output = stdout;
+      output = u_finit(stdout, NULL, NULL);
       break;
 
     case 3:
@@ -134,16 +135,16 @@ int main(int argc, char *argv[])
       input = fopen(infile.c_str(), "rb");
       if(input == NULL)
       {
-        wcerr << "Error: Cannot open file '" << infile << "'." << endl;
+        cerr << "Error: Cannot open file '" << infile << "'." << endl;
         exit(EXIT_FAILURE);
       }
       fclose(input);
 
       outfile = argv[argc-1];
-      output = fopen(argv[argc-1], "wb");
+      output = u_fopen(argv[argc-1], "wb", NULL, NULL);
       if(output == NULL)
       {
-        wcerr << "Error: Cannot open file '" << outfile << "'." << endl;
+        cerr << "Error: Cannot open file '" << outfile << "'." << endl;
         exit(EXIT_FAILURE);
       }
       break;
@@ -158,7 +159,7 @@ int main(int argc, char *argv[])
 #endif
 
   e.expand(infile, output);
-  fclose(output);
+  u_fclose(output);
 
   return EXIT_SUCCESS;
 }
diff --git a/lttoolbox/lt_locale.cc b/lttoolbox/lt_locale.cc
index 64cb71e..10378b3 100644
--- a/lttoolbox/lt_locale.cc
+++ b/lttoolbox/lt_locale.cc
@@ -41,7 +41,7 @@ LtLocale::tryToSetLocale()
     return;
   }
 
-  wcerr << "Warning: unsupported locale, fallback to \"C\"" << endl;
+  cerr << "Warning: unsupported locale, fallback to \"C\"" << endl;
 
   setlocale(LC_ALL, "C");
 #endif
diff --git a/lttoolbox/lt_print.cc b/lttoolbox/lt_print.cc
index c138d56..8139e02 100644
--- a/lttoolbox/lt_print.cc
+++ b/lttoolbox/lt_print.cc
@@ -24,6 +24,7 @@
 #include <iostream>
 #include <libgen.h>
 #include <string>
+#include <cstring>
 #include <getopt.h>
 
 #ifdef _MSC_VER
@@ -50,7 +51,7 @@ int main(int argc, char *argv[])
 {
   bool hfst = false;
   FILE* input = NULL;
-  FILE* output = stdout;
+  UFILE* output = u_finit(stdout, NULL, NULL);
 
   LtLocale::tryToSetLocale();
 
@@ -118,7 +119,7 @@ int main(int argc, char *argv[])
 
   if(outfile != "")
   {
-    output = fopen(outfile.c_str(), "wb");
+    output = u_fopen(outfile.c_str(), "wb", NULL, NULL);
     if(!output)
     {
       cerr << "Error: Cannot open file '" << outfile << "' for writing." << endl;
@@ -127,14 +128,14 @@ int main(int argc, char *argv[])
   }
 
   Alphabet alphabet;
-  set<wchar_t> alphabetic_chars;
+  set<UChar> alphabetic_chars;
 
-  map<wstring, Transducer> transducers;
+  map<UString, Transducer> transducers;
 
   fpos_t pos;
   if (fgetpos(input, &pos) == 0) {
       char header[4]{};
-      fread(header, 1, 4, input);
+      fread_unlocked(header, 1, 4, input);
       if (strncmp(header, HEADER_LTTOOLBOX, 4) == 0) {
           auto features = read_le<uint64_t>(input);
           if (features >= LTF_UNKNOWN) {
@@ -151,7 +152,7 @@ int main(int argc, char *argv[])
   int len = Compression::multibyte_read(input);
   while(len > 0)
   {
-    alphabetic_chars.insert(static_cast<wchar_t>(Compression::multibyte_read(input)));
+    alphabetic_chars.insert(static_cast<UChar32>(Compression::multibyte_read(input)));
     len--;
   }
 
@@ -162,13 +163,7 @@ int main(int argc, char *argv[])
 
   while(len > 0)
   {
-    int len2 = Compression::multibyte_read(input);
-    wstring name = L"";
-    while(len2 > 0)
-    {
-      name += static_cast<wchar_t>(Compression::multibyte_read(input));
-      len2--;
-    }
+    UString name = Compression::string_read(input);
     transducers[name].read(input);
 
     len--;
@@ -176,23 +171,20 @@ int main(int argc, char *argv[])
 
   /////////////////////
 
-  map<wstring, Transducer>::iterator penum = transducers.end();
+  map<UString, Transducer>::iterator penum = transducers.end();
   penum--;
-  for(map<wstring, Transducer>::iterator it = transducers.begin(); it != transducers.end(); it++)
+  for(map<UString, Transducer>::iterator it = transducers.begin(); it != transducers.end(); it++)
   {
     it->second.joinFinals();
     it->second.show(alphabet, output, 0, hfst);
     if(it != penum)
     {
-      fwprintf(output, L"--\n", it->first.c_str()); // ToDo: Was %ls meant to go somewhere here?
+      u_fprintf(output, "--\n");
     }
   }
 
   fclose(input);
-  if(output != stdout)
-  {
-    fclose(output);
-  }
+  u_fclose(output);
 
   return 0;
 }
diff --git a/lttoolbox/lt_proc.cc b/lttoolbox/lt_proc.cc
index 7ff4c8b..9be941d 100644
--- a/lttoolbox/lt_proc.cc
+++ b/lttoolbox/lt_proc.cc
@@ -28,9 +28,6 @@
 #include <fcntl.h>
 #endif
 
-#if defined(_WIN32) && !defined(_MSC_VER)
-#include <utf8_fwrap.h>
-#endif
 
 using namespace std;
 
@@ -183,7 +180,7 @@ int main(int argc, char *argv[])
       maxAnalyses = atoi(optarg);
       if (maxAnalyses < 1)
       {
-        wcerr << "Invalid or no argument for analyses count" << endl;
+        cerr << "Invalid or no argument for analyses count" << endl;
         exit(EXIT_FAILURE);
       }
       fstp.setMaxAnalysesValue(maxAnalyses);
@@ -193,7 +190,7 @@ int main(int argc, char *argv[])
       maxWeightClasses = atoi(optarg);
       if (maxWeightClasses < 1)
       {
-        wcerr << "Invalid or no argument for weight class count" << endl;
+        cerr << "Invalid or no argument for weight class count" << endl;
         exit(EXIT_FAILURE);
       }
       fstp.setMaxWeightClassesValue(maxWeightClasses);
@@ -252,7 +249,8 @@ int main(int argc, char *argv[])
     }
   }
 
-  FILE *input = stdin, *output = stdout;
+  InputFile input;
+  UFILE* output = u_finit(stdout, NULL, NULL);
   LtLocale::tryToSetLocale();
 
   if(optind == (argc - 3))
@@ -260,21 +258,19 @@ int main(int argc, char *argv[])
     FILE *in = fopen(argv[optind], "rb");
     if(in == NULL || ferror(in))
     {
-      wcerr << "Error: Cannot open file '" << argv[optind] << "'." << endl << endl;
+      cerr << "Error: Cannot open file '" << argv[optind] << "'." << endl << endl;
       exit(EXIT_FAILURE);
     }
 
-    input = fopen(argv[optind+1], "rb");
-    if(input == NULL || ferror(input))
-    {
-      wcerr << "Error: Cannot open file '" << argv[optind+1] << "'." << endl << endl;
+    if (!input.open(argv[optind+1])) {
+      cerr << "Error: Cannot open file '" << argv[optind+1] << "'." << endl << endl;
       exit(EXIT_FAILURE);
     }
 
-    output= fopen(argv[optind+2], "wb");
-    if(output == NULL || ferror(output))
+    output = u_fopen(argv[optind+2], "wb", NULL, NULL);
+    if(output == NULL)
     {
-      wcerr << "Error: Cannot open file '" << argv[optind+2] << "'." << endl << endl;
+      cerr << "Error: Cannot open file '" << argv[optind+2] << "'." << endl << endl;
       exit(EXIT_FAILURE);
     }
 
@@ -286,14 +282,12 @@ int main(int argc, char *argv[])
     FILE *in = fopen(argv[optind], "rb");
     if(in == NULL || ferror(in))
     {
-      wcerr << "Error: Cannot open file '" << argv[optind] << "'." << endl << endl;
+      cerr << "Error: Cannot open file '" << argv[optind] << "'." << endl << endl;
       exit(EXIT_FAILURE);
     }
 
-    input = fopen(argv[optind+1], "rb");
-    if(input == NULL || ferror(input))
-    {
-      wcerr << "Error: Cannot open file '" << argv[optind+1] << "'." << endl << endl;
+    if (!input.open(argv[optind+1])) {
+      cerr << "Error: Cannot open file '" << argv[optind+1] << "'." << endl << endl;
       exit(EXIT_FAILURE);
     }
 
@@ -305,7 +299,7 @@ int main(int argc, char *argv[])
     FILE *in = fopen(argv[optind], "rb");
     if(in == NULL || ferror(in))
     {
-      wcerr << "Error: Cannot open file '" << argv[optind] << "'." << endl << endl;
+      cerr << "Error: Cannot open file '" << argv[optind] << "'." << endl << endl;
       exit(EXIT_FAILURE);
      }
     fstp.load(in);
@@ -414,15 +408,14 @@ int main(int argc, char *argv[])
   }
   catch (exception& e)
   {
-    wcerr << e.what();
+    cerr << e.what();
     if (fstp.getNullFlush()) {
-      fputwc_unlocked(L'\0', output);
+      u_fputc('\0', output);
     }
 
     exit(1);
   }
 
-  fclose(input);
-  fclose(output);
+  u_fclose(output);
   return EXIT_SUCCESS;
 }
diff --git a/lttoolbox/lt_tmxcomp.cc b/lttoolbox/lt_tmxcomp.cc
index ab7df4b..32ab99f 100644
--- a/lttoolbox/lt_tmxcomp.cc
+++ b/lttoolbox/lt_tmxcomp.cc
@@ -82,25 +82,11 @@ int main(int argc, char *argv[])
     switch(c_t)
     {
       case 'o':
-        {
-          wchar_t *param = new wchar_t[strlen(optarg)+1];
-          if((size_t) -1 != mbstowcs(param, optarg, strlen(optarg)))
-          {
-            c.setOriginLanguageCode(param);
-          }
-          delete[] param;
-        }
+        c.setOriginLanguageCode(to_ustring(optarg));
         break;
 
       case 'm':
-        {
-          wchar_t *param = new wchar_t[strlen(optarg)+1];
-          if((size_t) -1 != mbstowcs(param, optarg, strlen(optarg)))
-          {
-            c.setMetaLanguageCode(param);
-          }
-          delete[] param;
-        }
+        c.setMetaLanguageCode(to_ustring(optarg));
         break;
 
       default:
@@ -109,27 +95,20 @@ int main(int argc, char *argv[])
     }
   }
 
-  string opc = argv[argc-3];
-  wchar_t* lo = new wchar_t[opc.size()+1];
-  wchar_t* lm = new wchar_t[opc.size()+1];
+  UString opc = to_ustring(argv[argc-3]);
+  UString lo = opc.substr(0, opc.find('-'));
+  UString lm = opc.substr(opc.find('-')+1);
 
-  if(((size_t) -1 == mbstowcs(lo, opc.substr(0, opc.find('-')).c_str(), opc.size()))||
-     ((size_t) -1 == mbstowcs(lm, opc.substr(opc.find('-')+1).c_str(), opc.size())))
-  {
-    delete[] lo;
-    delete[] lm;
+  if(lo.empty() || lm.empty()) {
     endProgram(argv[0]);
   }
 
-
   c.parse(argv[argc-2], lo, lm);
-  delete[] lo;
-  delete[] lm;
 
   FILE *output = fopen(argv[argc-1], "wb");
   if(!output)
   {
-    wcerr << "Error: Cannot open file '" << argv[2] << "'." << endl;
+    cerr << "Error: Cannot open file '" << argv[2] << "'." << endl;
     exit(EXIT_FAILURE);
   }
   c.write(output);
diff --git a/lttoolbox/lt_tmxproc.cc b/lttoolbox/lt_tmxproc.cc
index c90aca9..0abee7f 100644
--- a/lttoolbox/lt_tmxproc.cc
+++ b/lttoolbox/lt_tmxproc.cc
@@ -43,7 +43,8 @@ void checkValidity(FSTProcessor const &fstp)
 
 int main(int argc, char *argv[])
 {
-  FILE *input = stdin, *output = stdout;
+  InputFile input;
+  UFILE* output = u_finit(stdout, NULL, NULL);
   LtLocale::tryToSetLocale();
   FSTProcessor fstp;
   FILE *aux;
@@ -51,16 +52,14 @@ int main(int argc, char *argv[])
   switch(argc)
   {
     case 4:
-      output = fopen(argv[3], "wb");
+      output = u_fopen(argv[3], "wb", NULL, NULL);
       if(!output)
       {
         endProgram(argv[0]);
       }
       // follow
     case 3:
-      input = fopen(argv[2], "rb");
-      if(!input)
-      {
+      if (!input.open(argv[2])) {
         endProgram(argv[0]);
       }
       // follow
@@ -82,7 +81,6 @@ int main(int argc, char *argv[])
   checkValidity(fstp);
   fstp.tm_analysis(input, output);
 
-  fclose(input);
-  fclose(output);
+  u_fclose(output);
   return EXIT_SUCCESS;
 }
diff --git a/lttoolbox/lt_trim.cc b/lttoolbox/lt_trim.cc
index 837794f..f685752 100644
--- a/lttoolbox/lt_trim.cc
+++ b/lttoolbox/lt_trim.cc
@@ -24,6 +24,7 @@
 #include <iostream>
 #include <libgen.h>
 #include <string>
+#include <cstring>
 
 void endProgram(char *name)
 {
@@ -35,18 +36,17 @@ void endProgram(char *name)
   exit(EXIT_FAILURE);
 }
 
-std::pair<std::pair<Alphabet, wstring>, std::map<wstring, Transducer> >
+std::pair<std::pair<Alphabet, UString>, std::map<UString, Transducer> >
 read_fst(FILE *bin_file)
 {
   Alphabet new_alphabet;
-  wstring letters = L"";
 
-  std::map<wstring, Transducer> transducers;
+  std::map<UString, Transducer> transducers;
 
   fpos_t pos;
   if (fgetpos(bin_file, &pos) == 0) {
       char header[4]{};
-      fread(header, 1, 4, bin_file);
+      fread_unlocked(header, 1, 4, bin_file);
       if (strncmp(header, HEADER_LTTOOLBOX, 4) == 0) {
           auto features = read_le<uint64_t>(bin_file);
           if (features >= LTF_UNKNOWN) {
@@ -60,47 +60,36 @@ read_fst(FILE *bin_file)
   }
 
   // letters
-  int len = Compression::multibyte_read(bin_file);
-  while(len > 0)
-  {
-    letters.push_back(static_cast<wchar_t>(Compression::multibyte_read(bin_file)));
-    len--;
-  }
+  UString letters = Compression::string_read(bin_file);
 
   // symbols
   new_alphabet.read(bin_file);
 
-  len = Compression::multibyte_read(bin_file);
+  int len = Compression::multibyte_read(bin_file);
 
   while(len > 0)
   {
-    int len2 = Compression::multibyte_read(bin_file);
-    wstring name = L"";
-    while(len2 > 0)
-    {
-      name += static_cast<wchar_t>(Compression::multibyte_read(bin_file));
-      len2--;
-    }
+    UString name = Compression::string_read(bin_file);
     transducers[name].read(bin_file);
 
     len--;
   }
 
-  std::pair<Alphabet, wstring> alph_letters;
+  std::pair<Alphabet, UString> alph_letters;
   alph_letters.first = new_alphabet;
   alph_letters.second = letters;
-  return std::pair<std::pair<Alphabet, wstring>, std::map<wstring, Transducer> > (alph_letters, transducers);
+  return std::pair<std::pair<Alphabet, UString>, std::map<UString, Transducer> > (alph_letters, transducers);
 }
 
-std::pair<std::pair<Alphabet, wstring>, std::map<wstring, Transducer> >
+std::pair<std::pair<Alphabet, UString>, std::map<UString, Transducer> >
 trim(FILE *file_mono, FILE *file_bi)
 {
-  std::pair<std::pair<Alphabet, wstring>, std::map<wstring, Transducer> > alph_trans_mono = read_fst(file_mono);
+  std::pair<std::pair<Alphabet, UString>, std::map<UString, Transducer> > alph_trans_mono = read_fst(file_mono);
   Alphabet alph_mono = alph_trans_mono.first.first;
-  std::map<wstring, Transducer> trans_mono = alph_trans_mono.second;
-  std::pair<std::pair<Alphabet, wstring>, std::map<wstring, Transducer> > alph_trans_bi = read_fst(file_bi);
+  std::map<UString, Transducer> trans_mono = alph_trans_mono.second;
+  std::pair<std::pair<Alphabet, UString>, std::map<UString, Transducer> > alph_trans_bi = read_fst(file_bi);
   Alphabet alph_bi = alph_trans_bi.first.first;
-  std::map<wstring, Transducer> trans_bi = alph_trans_bi.second;
+  std::map<UString, Transducer> trans_bi = alph_trans_bi.second;
 
   // The prefix transducer is the union of all transducers from bidix,
   // with a ".*" appended
@@ -111,7 +100,7 @@ trim(FILE *file_mono, FILE *file_bi)
   set<int> loopback_symbols;    // ints refer to alph_prefix
   alph_prefix.createLoopbackSymbols(loopback_symbols, alph_mono, Alphabet::right);
 
-  for(std::map<wstring, Transducer>::iterator it = trans_bi.begin(); it != trans_bi.end(); it++)
+  for(std::map<UString, Transducer>::iterator it = trans_bi.begin(); it != trans_bi.end(); it++)
   {
     Transducer union_tmp = it->second;
     if(union_transducer.isEmpty())
@@ -130,21 +119,21 @@ trim(FILE *file_mono, FILE *file_bi)
   Transducer moved_transducer = prefix_transducer.moveLemqsLast(alph_prefix);
 
 
-  for(std::map<wstring, Transducer>::iterator it = trans_mono.begin(); it != trans_mono.end(); it++)
+  for(std::map<UString, Transducer>::iterator it = trans_mono.begin(); it != trans_mono.end(); it++)
   {
     Transducer trimmed = it->second.intersect(moved_transducer,
                                               alph_mono,
                                               alph_prefix);
 
-    wcout << it->first << " " << it->second.size();
-    wcout << " " << it->second.numberOfTransitions() << endl;
+    cout << it->first << " " << it->second.size();
+    cout << " " << it->second.numberOfTransitions() << endl;
     if(it->second.numberOfTransitions() == 0)
     {
-      wcerr << L"Warning: empty section! Skipping it ..."<<endl;
+      cerr << "Warning: empty section! Skipping it ..."<<endl;
       trans_mono[it->first].clear();
     }
     else if(trimmed.hasNoFinals()) {
-      wcerr << L"Warning: section had no final state after trimming! Skipping it ..."<<endl;
+      cerr << "Warning: section had no final state after trimming! Skipping it ..."<<endl;
       trans_mono[it->first].clear();
     }
     else {
@@ -170,25 +159,24 @@ int main(int argc, char *argv[])
   FILE *analyser = fopen(argv[1], "rb");
   if(!analyser)
   {
-    wcerr << "Error: Cannot open file '" << argv[1] << "'." << endl << endl;
+    cerr << "Error: Cannot open file '" << argv[1] << "'." << endl << endl;
     exit(EXIT_FAILURE);
   }
   FILE *bidix = fopen(argv[2], "rb");
   if(!bidix)
   {
-    wcerr << "Error: Cannot open file '" << argv[2] << "'." << endl << endl;
+    cerr << "Error: Cannot open file '" << argv[2] << "'." << endl << endl;
     exit(EXIT_FAILURE);
   }
 
-  std::pair<std::pair<Alphabet, wstring>, std::map<wstring, Transducer> > trimmed = trim(analyser, bidix);
+  std::pair<std::pair<Alphabet, UString>, std::map<UString, Transducer> > trimmed = trim(analyser, bidix);
   Alphabet alph_t = trimmed.first.first;
-  wstring letters = trimmed.first.second;
-  std::map<wstring, Transducer> trans_t = trimmed.second;
+  UString letters = trimmed.first.second;
+  std::map<UString, Transducer> trans_t = trimmed.second;
 
   int n_transducers = 0;
-  for(std::map<wstring, Transducer>::iterator it = trans_t.begin(); it != trans_t.end(); it++)
-  {
-    if(!(it->second.isEmpty()))
+  for(auto& it : trans_t) {
+    if(!(it.second.isEmpty()))
     {
       n_transducers++;
     }
@@ -196,9 +184,9 @@ int main(int argc, char *argv[])
 
   if(n_transducers == 0)
   {
-    wcerr << L"Error: Trimming gave empty transducer!" << endl;
-    wcerr << L"Hint: There are no words in bilingual dictionary that match "
-      L"words in both monolingual dictionaries?" << endl;
+    cerr << "Error: Trimming gave empty transducer!" << endl;
+    cerr << "Hint: There are no words in bilingual dictionary that match "
+      "words in both monolingual dictionaries?" << endl;
     exit(EXIT_FAILURE);
   }
 
@@ -206,24 +194,23 @@ int main(int argc, char *argv[])
   FILE *output = fopen(argv[3], "wb");
   if(!output)
   {
-    wcerr << "Error: Cannot open file '" << argv[3] << "'." << endl << endl;
+    cerr << "Error: Cannot open file '" << argv[3] << "'." << endl << endl;
     exit(EXIT_FAILURE);
   }
 
   // letters
-  Compression::wstring_write(letters, output);
+  Compression::string_write(letters, output);
 
   // symbols
   alph_t.write(output);
 
   // transducers
   Compression::multibyte_write(n_transducers, output);
-  for(std::map<wstring, Transducer>::iterator it = trans_t.begin(); it != trans_t.end(); it++)
-  {
-    if(!(it->second.isEmpty()))
+  for(auto& it : trans_t) {
+    if(!(it.second.isEmpty()))
     {
-      Compression::wstring_write(it->first, output);
-      it->second.write(output);
+      Compression::string_write(it.first, output);
+      it.second.write(output);
     }
   }
 
diff --git a/lttoolbox/ltstr.h b/lttoolbox/ltstr.h
deleted file mode 100644
index 9e5abb6..0000000
--- a/lttoolbox/ltstr.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <https://www.gnu.org/licenses/>.
- */
-#ifndef _Ltstr_
-#define _Ltstr_
-
-#include <string>
-#include <cwchar>
-#include <cstring>
-
-using namespace std;
-
-struct Ltstr
-{
-  bool operator()(string const &s1, string const &s2) const
-  {
-    return strcmp(s1.c_str(), s2.c_str()) < 0;
-  }
-
-  bool operator()(wchar_t const *s1, wchar_t const *s2) const
-  {
-    return wcscmp(s1, s2) < 0;
-  }
-
-  bool operator()(char const *s1, char const *s2) const
-  {
-    return strcmp(s1, s2) < 0;
-  }
-
-  bool operator()(wstring const &s1, wstring const &s2) const
-  {
-    return wcscmp(s1.c_str(), s2.c_str()) < 0;
-  }
-};
-
-#endif
diff --git a/lttoolbox/my_stdio.h b/lttoolbox/my_stdio.h
index 6cf2083..a446278 100644
--- a/lttoolbox/my_stdio.h
+++ b/lttoolbox/my_stdio.h
@@ -46,20 +46,4 @@
 #define fread_unlocked fread
 #endif
 
-#if !HAVE_DECL_FGETWC_UNLOCKED
-#define fgetwc_unlocked fgetwc
-#endif
-
-#if !HAVE_DECL_FPUTWC_UNLOCKED
-#define fputwc_unlocked fputwc
-#endif
-
-#if !HAVE_DECL_FPUTWS_UNLOCKED
-#define fputws_unlocked fputws
-#endif
-
-#if !HAVE_DECL_UNGETWC_UNLOCKED
-#define ungetwc_unlocked ungetwc
-#endif
-
 #endif
diff --git a/lttoolbox/pattern_list.cc b/lttoolbox/pattern_list.cc
index ed1f056..810ecff 100644
--- a/lttoolbox/pattern_list.cc
+++ b/lttoolbox/pattern_list.cc
@@ -22,9 +22,9 @@
 #include <cstdlib>
 #include <iostream>
 
-wstring const PatternList::ANY_CHAR  = L"<ANY_CHAR>";
-wstring const PatternList::ANY_TAG   = L"<ANY_TAG>";
-wstring const PatternList::QUEUE = L"<QUEUE>";
+UString const PatternList::ANY_CHAR  = "<ANY_CHAR>"_u;
+UString const PatternList::ANY_TAG   = "<ANY_TAG>"_u;
+UString const PatternList::QUEUE     = "<QUEUE>"_u;
 
 void
 PatternList::copy(PatternList const &o)
@@ -80,7 +80,7 @@ PatternList::beginSequence()
 {
   if(sequence)
   {
-    wcerr << L"Error: opening an unended sequence" << endl;
+    cerr << "Error: opening an unended sequence" << endl;
     exit(EXIT_FAILURE);
   }
   sequence = true;
@@ -92,7 +92,7 @@ PatternList::endSequence()
 {
   if(!sequence)
   {
-    wcerr << L"Error: ending an unopened sequence" << endl;
+    cerr << "Error: ending an unopened sequence" << endl;
     exit(EXIT_FAILURE);
   }
   sequence = false;
@@ -107,10 +107,10 @@ PatternList::endSequence()
 }
 
 void
-PatternList::insertOutOfSequence(wstring const &lemma, wstring const &tags,
+PatternList::insertOutOfSequence(UString const &lemma, UString const &tags,
                                  vector<int> &result)
 {
-  if(lemma == L"")
+  if(lemma.empty())
   {
     result.push_back(alphabet(ANY_CHAR));
   }
@@ -118,17 +118,17 @@ PatternList::insertOutOfSequence(wstring const &lemma, wstring const &tags,
   {
     for(unsigned int i = 0, limit = lemma.size(); i < limit; i++)
     {
-      if(lemma[i] == L'*')
+      if(lemma[i] == '*')
       {
         result.push_back(alphabet(ANY_CHAR));
       }
       else
       {
-        result.push_back(int((wchar_t) lemma[i]));
+        result.push_back(static_cast<int32_t>(lemma[i]));
       }
     }
   }
-  if(tags == L"")
+  if(tags.empty())
   {
     result.push_back(alphabet(ANY_TAG));
   }
@@ -136,9 +136,9 @@ PatternList::insertOutOfSequence(wstring const &lemma, wstring const &tags,
   {
     for(unsigned int i = 0, limit = tagCount(tags); i < limit; i++)
     {
-      wstring tag = L"<" + tagAt(tags, i) + L">";
+      UString tag = "<"_u + tagAt(tags, i) + ">"_u;
 
-      if(tag == L"<*>")
+      if(tag == "<*>"_u)
       {
         result.push_back(alphabet(ANY_TAG));
       }
@@ -152,8 +152,8 @@ PatternList::insertOutOfSequence(wstring const &lemma, wstring const &tags,
 }
 
 void
-PatternList::insertIntoSequence(int const id, wstring const &lemma,
-				wstring const &tags)
+PatternList::insertIntoSequence(int const id, UString const &lemma,
+				UString const &tags)
 {
   sequence_id = id;
 
@@ -169,14 +169,14 @@ PatternList::insertIntoSequence(int const id, wstring const &lemma,
     list<vector<int> >::iterator limit = sequence_data.end();
     for(; it != limit; it++)
     {
-      it->push_back(L'+');
+      it->push_back('+');
       insertOutOfSequence(lemma, tags, *it);
     }
   }
 }
 
 void
-PatternList::insert(int const id, wstring const &lemma, wstring const &tags)
+PatternList::insert(int const id, UString const &lemma, UString const &tags)
 {
   if(!sequence)
   {
@@ -196,7 +196,7 @@ PatternList::insert(int const id, int const otherid)
 {
   if(!sequence)
   {
-    wcerr << L"Error: using labels outside of a sequence" << endl;
+    cerr << "Error: using labels outside of a sequence" << endl;
     exit(EXIT_FAILURE);
   }
 
@@ -221,7 +221,7 @@ PatternList::insert(int const id, int const otherid)
           p.first != p.second; p.first++)
       {
         vector<int> temp = *it;
-        temp.push_back(L'+');
+        temp.push_back('+');
         temp.insert(temp.end(), (p.first->second).begin(),
                     (p.first->second).end());
         new_sequence_data.push_back(temp);
@@ -233,7 +233,7 @@ PatternList::insert(int const id, int const otherid)
 }
 
 int
-PatternList::tagCount(wstring const &tags)
+PatternList::tagCount(UString const &tags)
 {
   int count = 0;
 
@@ -243,7 +243,7 @@ PatternList::tagCount(wstring const &tags)
     {
       count++;
     }
-    else if(tags[i] == L'.')
+    else if(tags[i] == '.')
     {
       count++;
     }
@@ -252,8 +252,8 @@ PatternList::tagCount(wstring const &tags)
   return count;
 }
 
-wstring
-PatternList::tagAt(wstring const &tags, int const index)
+UString
+PatternList::tagAt(UString const &tags, int const index)
 {
   int start = 0;
   int end = 0;
@@ -261,7 +261,7 @@ PatternList::tagAt(wstring const &tags, int const index)
 
   for(unsigned int i = 0, limit = tags.size(); i < limit; i++)
   {
-    if(tags[i] == L'.')
+    if(tags[i] == '.')
     {
       count++;
       if(end == 0)
@@ -282,7 +282,7 @@ PatternList::tagAt(wstring const &tags, int const index)
 
   if(index > count)
   {
-    return L"";
+    return ""_u;
   }
   if(end != 0)
   {
@@ -331,9 +331,9 @@ PatternList::buildTransducer()
 
         // optional queue
         prevstate = state;
-        state = transducer.insertSingleTransduction(static_cast<int>(L'_'), state, default_weight);
-        transducer.linkStates(prevstate, state, static_cast<int>(L' '), default_weight);
-        transducer.linkStates(prevstate, state, static_cast<int>(L'#'), default_weight);
+        state = transducer.insertSingleTransduction(static_cast<int>('_'), state, default_weight);
+        transducer.linkStates(prevstate, state, static_cast<int>(' '), default_weight);
+        transducer.linkStates(prevstate, state, static_cast<int>('#'), default_weight);
         transducer.linkStates(state, state, alphabet(ANY_CHAR), default_weight);
       }
       else
@@ -366,10 +366,10 @@ void
 PatternList::write(FILE *output)
 {
   alphabet.write(output);
-  wstring const tagger_name = L"tagger";
+  UString const tagger_name = "tagger"_u;
 
   Compression::multibyte_write(1, output);
-  Compression::wstring_write(tagger_name, output);
+  Compression::string_write(tagger_name, output);
   transducer.write(output, alphabet.size());
 
   Compression::multibyte_write(final_type.size(), output);
@@ -391,7 +391,7 @@ PatternList::read(FILE *input)
   alphabet.read(input);
   if(Compression::multibyte_read(input) == 1)
   {
-    wstring mystr = Compression::wstring_read(input);
+    UString mystr = Compression::string_read(input);
     transducer.read(input, alphabet.size());
 
     int finalsize = Compression::multibyte_read(input);
diff --git a/lttoolbox/pattern_list.h b/lttoolbox/pattern_list.h
index 5dde942..1b88403 100644
--- a/lttoolbox/pattern_list.h
+++ b/lttoolbox/pattern_list.h
@@ -45,29 +45,29 @@ private:
 
   void copy(PatternList const &o);
   void destroy();
-  void insertOutOfSequence(wstring const &lemma, wstring const &tags,
+  void insertOutOfSequence(UString const &lemma, UString const &tags,
                            vector<int> &result);
-  void insertIntoSequence(int const id, wstring const &lemma,
-                          wstring const &tags);
+  void insertIntoSequence(int const id, UString const &lemma,
+                          UString const &tags);
 
-  static int tagCount(wstring const &tags);
-  static wstring tagAt(wstring const &tags, int const index);
+  static int tagCount(UString const &tags);
+  static UString tagAt(UString const &tags, int const index);
 
 public:
   /**
    * This symbol stands for any char
    */
-  static wstring const ANY_CHAR;
+  static UString const ANY_CHAR;
 
   /**
    * This symbol stands for any tag
    */
-  static wstring const ANY_TAG;
+  static UString const ANY_TAG;
 
   /**
    * This symbol marks a word queue
    */
-  static wstring const QUEUE;
+  static UString const QUEUE;
 
   /**
    * Constructor
@@ -106,7 +106,7 @@ public:
    * @param lemma
    * @param tags
    */
-  void insert(int const id, wstring const &lemma, wstring const &tags);
+  void insert(int const id, UString const &lemma, UString const &tags);
 
   /**
    * Insertion method
diff --git a/lttoolbox/regexp_compiler.cc b/lttoolbox/regexp_compiler.cc
index e94ee9f..96d98c8 100644
--- a/lttoolbox/regexp_compiler.cc
+++ b/lttoolbox/regexp_compiler.cc
@@ -21,9 +21,11 @@
 
 RegexpCompiler::RegexpCompiler() :
 token(0),
+index(0),
 alphabet(0),
 state(0),
 letter(0),
+postop(0),
 default_weight(0.0000)
 {
 }
@@ -74,17 +76,17 @@ RegexpCompiler::isReserved(int const t)
 {
   switch(t)
   {
-    case L'(':
-    case L')':
-    case L'[':
-    case L']':
-    case L'*':
-    case L'?':
-    case L'+':
-    case L'-':
-    case L'^':
-    case L'\\':
-    case L'|':
+    case '(':
+    case ')':
+    case '[':
+    case ']':
+    case '*':
+    case '?':
+    case '+':
+    case '-':
+    case '^':
+    case '\\':
+    case '|':
     case FIN_FICHERO:
       return true;
 
@@ -96,14 +98,14 @@ RegexpCompiler::isReserved(int const t)
 void
 RegexpCompiler::error()
 {
-  wcerr << L"Error parsing regexp" <<endl;
+  cerr << "Error parsing regexp" <<endl;
   exit(EXIT_FAILURE);
 }
 
 void
 RegexpCompiler::errorConsuming(int const t)
 {
-  wcerr << L"Error parsing regexp" << endl;
+  cerr << "Error parsing regexp" << endl;
   exit(EXIT_FAILURE);
 }
 
@@ -112,14 +114,14 @@ RegexpCompiler::consume(int const t)
 {
   if(token == t)
   {
-    input = input.substr(1);
-    if(input ==  L"")
+    index++;
+    if(index == input.size())
     {
       token = FIN_FICHERO;
     }
     else
     {
-      token = input[0];
+      token = input[index];
     }
   }
   else
@@ -129,10 +131,11 @@ RegexpCompiler::consume(int const t)
 }
 
 void
-RegexpCompiler::compile(wstring const &er)
+RegexpCompiler::compile(vector<int32_t> const &er)
 {
   input = er;
-  token = static_cast<int>(input[0]);
+  token = input[0];
+  index = 0;
   state = transducer.getInitial();
   S();
   transducer.setFinal(state, default_weight);
@@ -141,7 +144,7 @@ RegexpCompiler::compile(wstring const &er)
 void
 RegexpCompiler::S()
 {
-  if(token == L'(' || token == L'[' || !isReserved(token) || token == L'\\')
+  if(token == '(' || token == '[' || !isReserved(token) || token == '\\')
   {
     RExpr();
     Cola();
@@ -155,7 +158,7 @@ RegexpCompiler::S()
 void
 RegexpCompiler::RExpr()
 {
-  if(token == L'(' || token == L'[' || !isReserved(token) || token == L'\\')
+  if(token == '(' || token == '[' || !isReserved(token) || token == '\\')
   {
     Term();
     RExprp();
@@ -169,14 +172,14 @@ RegexpCompiler::RExpr()
 void
 RegexpCompiler::Cola()
 {
-  if(token == FIN_FICHERO || token == L')')
+  if(token == FIN_FICHERO || token == ')')
   {
   }
-  else if(token == L'|')
+  else if(token == '|')
   {
     int e = state;
     state = transducer.getInitial();
-    consume(L'|');
+    consume('|');
     RExpr();
     Cola();
 
@@ -192,7 +195,7 @@ RegexpCompiler::Cola()
 void
 RegexpCompiler::Term()
 {
-  if(!isReserved(token) || token == L'\\')
+  if(!isReserved(token) || token == '\\')
   {
     Transducer t;
     int e = t.getInitial();
@@ -200,53 +203,53 @@ RegexpCompiler::Term()
     e = t.insertNewSingleTransduction((*alphabet)(letter, letter), e, default_weight);
     t.setFinal(e, default_weight);
     Postop();
-    if(postop == L"*")
+    if(postop == '*')
     {
       t.zeroOrMore((*alphabet)(0, 0));
     }
-    else if(postop == L"+")
+    else if(postop == '+')
     {
       t.oneOrMore((*alphabet)(0, 0));
     }
-    else if(postop == L"?")
+    else if(postop == '?')
     {
       t.optional((*alphabet)(0, 0));
     }
 
-    postop = L"";
+    postop = 0;
     state = transducer.insertTransducer(state, t, (*alphabet)(0, 0));
   }
-  else if(token == L'(')
+  else if(token == '(')
   {
     Transducer t = transducer;
     int e = state;
     transducer.clear();
     state = transducer.getInitial();
-    consume(L'(');
+    consume('(');
     S();
-    consume(L')');
+    consume(')');
     transducer.setFinal(state, default_weight);
     Postop();
-    if(postop == L"*")
+    if(postop == '*')
     {
       transducer.zeroOrMore((*alphabet)(0, 0));
     }
-    else if(postop == L"+")
+    else if(postop == '+')
     {
       transducer.oneOrMore((*alphabet)(0, 0));
     }
-    else if(postop == L"?")
+    else if(postop == '?')
     {
       transducer.optional((*alphabet)(0, 0));
     }
 
-    postop = L"";
+    postop = 0;
     state = t.insertTransducer(e, transducer, (*alphabet)(0, 0));
     transducer = t;
   }
-  else if(token == L'[')
+  else if(token == '[')
   {
-    consume(L'[');
+    consume('[');
     Esp();
   }
   else
@@ -258,12 +261,12 @@ RegexpCompiler::Term()
 void
 RegexpCompiler::RExprp()
 {
-  if(token == L'(' || token == L'[' || !isReserved(token) || token == L'\\')
+  if(token == '(' || token == '[' || !isReserved(token) || token == '\\')
   {
     Term();
     RExprp();
   }
-  else if(token == L'|' || token == FIN_FICHERO || token == L')')
+  else if(token == '|' || token == FIN_FICHERO || token == ')')
   {
   }
   else
@@ -280,9 +283,9 @@ RegexpCompiler::Letra()
     letter = token;
     consume(token);
   }
-  else if(token == L'\\')
+  else if(token == '\\')
   {
-    consume(L'\\');
+    consume('\\');
     letter = token;
     Reservado();
   }
@@ -295,24 +298,24 @@ RegexpCompiler::Letra()
 void
 RegexpCompiler::Postop()
 {
-  if(token == L'*')
+  if(token == '*')
   {
-    consume(L'*');
-    postop = L"*";
+    consume('*');
+    postop = '*';
   }
-  else if(token == L'?')
+  else if(token == '?')
   {
-    consume(L'?');
-    postop = L"?";
+    consume('?');
+    postop = '?';
   }
-  else if(token == L'+')
+  else if(token == '+')
   {
-    consume(L'+');
-    postop = L"+";
+    consume('+');
+    postop = '+';
   }
-  else if(token == L'(' || token == L'[' || !isReserved(token) ||
-          token == L'\\' || token == L'|' ||  token == FIN_FICHERO ||
-          token == L')')
+  else if(token == '(' || token == '[' || !isReserved(token) ||
+          token == '\\' || token == '|' ||  token == FIN_FICHERO ||
+          token == ')')
   {
   }
   else
@@ -325,10 +328,10 @@ void
 RegexpCompiler::Esp()
 {
   Transducer t;
-  if(!isReserved(token) || token == L'\\' || token == L']')
+  if(!isReserved(token) || token == '\\' || token == ']')
   {
     Lista();
-    consume(L']');
+    consume(']');
     Postop();
 
     for(set<int>::iterator it = brackets.begin();
@@ -342,11 +345,11 @@ RegexpCompiler::Esp()
 
     t.joinFinals((*alphabet)(0, 0));
   }
-  else if(token == L'^')
+  else if(token == '^')
   {
-    consume(L'^');
+    consume('^');
     Lista();
-    consume(L']');
+    consume(']');
     Postop();
 
     for(int i = 0; i < 256 ;i++)
@@ -367,20 +370,20 @@ RegexpCompiler::Esp()
     error();
   }
 
-  if(postop == L"+")
+  if(postop == '+')
   {
     t.oneOrMore((*alphabet)(0, 0));
   }
-  else if(postop == L"*")
+  else if(postop == '*')
   {
     t.zeroOrMore((*alphabet)(0, 0));
   }
-  else if(postop == L"?")
+  else if(postop == '?')
   {
     t.optional((*alphabet)(0, 0));
   }
   brackets.clear();
-  postop = L"";
+  postop = 0;
 
   state = transducer.insertTransducer(state, t, (*alphabet)(0, 0));
 }
@@ -388,12 +391,12 @@ RegexpCompiler::Esp()
 void
 RegexpCompiler::Lista()
 {
-  if(!isReserved(token) || token == L'\\')
+  if(!isReserved(token) || token == '\\')
   {
     Elem();
     Lista();
   }
-  else if(token == L']')
+  else if(token == ']')
   {
   }
   else
@@ -418,7 +421,7 @@ RegexpCompiler::Reservado()
 void
 RegexpCompiler::Elem()
 {
-  if(!isReserved(token) || token == L'\\')
+  if(!isReserved(token) || token == '\\')
   {
     Letra();
     int rango1 = letter;
@@ -446,12 +449,12 @@ RegexpCompiler::Elem()
 void
 RegexpCompiler::ColaLetra()
 {
-  if(token == L'-')
+  if(token == '-')
   {
-    consume(L'-');
+    consume('-');
     Letra();
   }
-  else if(!isReserved(token) || token == L'\\' || token == L']')
+  else if(!isReserved(token) || token == '\\' || token == ']')
   {
   }
   else
@@ -478,5 +481,5 @@ RegexpCompiler::initialize(Alphabet *a)
   setAlphabet(a);
   transducer.clear();
   brackets.clear();
-  postop = L"";
+  postop = 0;
 }
diff --git a/lttoolbox/regexp_compiler.h b/lttoolbox/regexp_compiler.h
index dd11ca9..e9bdb30 100644
--- a/lttoolbox/regexp_compiler.h
+++ b/lttoolbox/regexp_compiler.h
@@ -17,10 +17,13 @@
 #ifndef _REGEXP_COMPILER_
 #define _REGEXP_COMPILER_
 
+#include <lttoolbox/ustring.h>
 #include <lttoolbox/alphabet.h>
 #include <lttoolbox/transducer.h>
 
 #include <set>
+#include <vector>
+#include <cstdint>
 
 using namespace std;
 
@@ -41,7 +44,12 @@ private:
   /**
    * Input string
    */
-  wstring input;
+  vector<int32_t> input;
+
+  /**
+   * Location in the input string
+   */
+  size_t index;
 
   /**
    * Alphabet to encode symbols
@@ -66,7 +74,7 @@ private:
   /**
    * Post-operator: '+', '?', '*'
    */
-  wstring postop;
+  UChar32 postop;
 
   /**
    * Default value of weight
@@ -200,7 +208,7 @@ public:
    * Function that parses a regular expression and produces a transducer
    * @param er the regular expression
    */
-  void compile(wstring const &er);
+  void compile(vector<int32_t> const &er);
 
   /**
    * Set the decoder of symbols
diff --git a/lttoolbox/serialiser.h b/lttoolbox/serialiser.h
index 01abb3e..bc04f2c 100644
--- a/lttoolbox/serialiser.h
+++ b/lttoolbox/serialiser.h
@@ -29,6 +29,7 @@
 #include <string>
 #include <utility>
 #include <vector>
+#include <unicode/uchar.h>
 
 namespace {
 template <typename SerialisedType>
@@ -96,15 +97,15 @@ public:
                                std::ostream &Output);
 };
 
-template <> class Serialiser<wchar_t> {
+template <> class Serialiser<char> {
 public:
-  inline static void serialise(const wchar_t &SerialisedType_,
+  inline static void serialise(const char &SerialisedType_,
                                std::ostream &Output);
 };
 
-template <> class Serialiser<char> {
+template <> class Serialiser<UChar> {
 public:
-  inline static void serialise(const char &SerialisedType_,
+  inline static void serialise(const UChar &SerialisedType_,
                                std::ostream &Output);
 };
 
@@ -213,16 +214,16 @@ void Serialiser<uint32_t>::serialise(const uint32_t &SerialisedType_,
   int_serialise((uint64_t)SerialisedType_, Output);
 }
 
-void Serialiser<wchar_t>::serialise(const wchar_t &SerialisedType_,
-                                    std::ostream &Output) {
-  int_serialise((uint32_t)SerialisedType_, Output);
-}
-
 void Serialiser<char>::serialise(const char &SerialisedType_,
                                  std::ostream &Output) {
   int_serialise((uint8_t)SerialisedType_, Output);
 }
 
+void Serialiser<UChar>::serialise(const UChar &SerialisedType_,
+                                  std::ostream &Output) {
+  int_serialise((uint16_t)SerialisedType_, Output);
+}
+
 void Serialiser<double>::serialise(const double &SerialisedType_,
                                    std::ostream &Output) {
   union {
diff --git a/lttoolbox/state.cc b/lttoolbox/state.cc
index 03abae9..facd537 100644
--- a/lttoolbox/state.cc
+++ b/lttoolbox/state.cc
@@ -17,7 +17,6 @@
 #include <lttoolbox/state.h>
 
 #include <cstring>
-#include <cwctype>
 #include <climits>
 #include <algorithm>
 
@@ -403,12 +402,12 @@ State::step(int const input, set<int> const alts)
 }
 
 void
-State::step_case(wchar_t val, wchar_t val2, bool caseSensitive)
+State::step_case(UChar32 val, UChar32 val2, bool caseSensitive)
 {
-  if (!iswupper(val) || caseSensitive) {
+  if (!u_isupper(val) || caseSensitive) {
     step(val, val2);
-  } else if(val != towlower(val)) {
-    step(val, towlower(val), val2);
+  } else if(val != u_tolower(val)) {
+    step(val, u_tolower(val), val2);
   } else {
     step(val, val2);
   }
@@ -416,12 +415,12 @@ State::step_case(wchar_t val, wchar_t val2, bool caseSensitive)
 
 
 void
-State::step_case(wchar_t val, bool caseSensitive)
+State::step_case(UChar32 val, bool caseSensitive)
 {
-  if (!iswupper(val) || caseSensitive) {
+  if (!u_isupper(val) || caseSensitive) {
     step(val);
   } else {
-    step(val, towlower(val));
+    step(val, u_tolower(val));
   }
 }
 
@@ -441,14 +440,14 @@ State::isFinal(map<Node *, double> const &finals) const
 }
 
 
-vector<pair< wstring, double >>
-State::NFinals(vector<pair<wstring, double>> lf, int maxAnalyses, int maxWeightClasses) const
+vector<pair< UString, double >>
+State::NFinals(vector<pair<UString, double>> lf, int maxAnalyses, int maxWeightClasses) const
 {
-  vector<pair<wstring, double>> result;
+  vector<pair<UString, double>> result;
 
-  sort(lf.begin(), lf.end(), sort_weights<wstring, double>());
+  sort(lf.begin(), lf.end(), sort_weights<UString, double>());
 
-  for(vector<pair<wstring, double> >::iterator it = lf.begin(); it != lf.end(); it++)
+  for(vector<pair<UString, double> >::iterator it = lf.begin(); it != lf.end(); it++)
   {
     double last_weight = 0.0000;
     if(maxAnalyses > 0 && maxWeightClasses > 0)
@@ -466,16 +465,16 @@ State::NFinals(vector<pair<wstring, double>> lf, int maxAnalyses, int maxWeightC
 }
 
 
-wstring
+UString
 State::filterFinals(map<Node *, double> const &finals,
                     Alphabet const &alphabet,
-                    set<wchar_t> const &escaped_chars,
+                    set<UChar32> const &escaped_chars,
                     bool display_weights, int max_analyses, int max_weight_classes,
                     bool uppercase, bool firstupper, int firstchar) const
 {
-  vector<pair< wstring, double >> response;
+  vector<pair< UString, double >> response;
 
-  wstring result = L"";
+  UString result;
   double cost = 0.0000;
 
   for(size_t i = 0, limit = state.size(); i != limit; i++)
@@ -491,21 +490,21 @@ State::filterFinals(map<Node *, double> const &finals,
         {
           if(escaped_chars.find(((*(state[i].sequence))[j]).first) != escaped_chars.end())
           {
-            result += L'\\';
+            result += '\\';
           }
           alphabet.getSymbol(result, ((*(state[i].sequence))[j]).first, uppercase);
           cost += ((*(state[i].sequence))[j]).second;
         }
         if(firstupper)
         {
-          if(result[first_char] == L'~')
+          if(result[first_char] == '~')
           {
             // skip post-generation mark
-            result[first_char+1] = towupper(result[first_char+1]);
+            result[first_char+1] = u_toupper(result[first_char+1]);
           }
           else
           {
-            result[first_char] = towupper(result[first_char]);
+            result[first_char] = u_toupper(result[first_char]);
           }
         }
       }
@@ -517,7 +516,7 @@ State::filterFinals(map<Node *, double> const &finals,
         {
           if(escaped_chars.find(((*(state[i].sequence))[j]).first) != escaped_chars.end())
           {
-            result += L'\\';
+            result += '\\';
           }
           alphabet.getSymbol(result, ((*(state[i].sequence))[j]).first);
           cost += ((*(state[i].sequence))[j]).second;
@@ -532,16 +531,17 @@ State::filterFinals(map<Node *, double> const &finals,
 
   response = NFinals(response, max_analyses, max_weight_classes);
 
-  result = L"";
-  for(vector<pair<wstring, double>>::iterator it = response.begin(); it != response.end(); it++)
+  result.clear();
+  for(vector<pair<UString, double>>::iterator it = response.begin(); it != response.end(); it++)
   {
-    result += L'/';
+    result += '/';
     result += it->first;
     if(display_weights)
     {
-      result += L"<W:";
-      result += to_wstring(it->second);
-      result += L">";
+      UChar temp[16]{};
+      // if anyone wants a weight of 10000, this will not be enough
+      u_sprintf(temp, "<W:%f>", it->second);
+      result += temp;
     }
   }
 
@@ -549,39 +549,39 @@ State::filterFinals(map<Node *, double> const &finals,
 }
 
 
-set<pair<wstring, vector<wstring> > >
+set<pair<UString, vector<UString> > >
 State::filterFinalsLRX(map<Node *, double> const &finals,
                        Alphabet const &alphabet,
-                       set<wchar_t> const &escaped_chars,
+                       set<UChar32> const &escaped_chars,
                        bool uppercase, bool firstupper, int firstchar) const
 {
-  set<pair<wstring, vector<wstring> > > results;
+  set<pair<UString, vector<UString> > > results;
 
-  vector<wstring> current_result;
-  wstring rule_id = L"";
+  vector<UString> current_result;
+  UString rule_id;
 
   for(size_t i = 0, limit = state.size(); i != limit; i++)
   {
     if(finals.find(state[i].where) != finals.end())
     {
       current_result.clear();
-      rule_id = L"";
-      wstring current_word = L"";
+      rule_id.clear();
+      UString current_word;
       for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++)
       {
         if(escaped_chars.find(((*(state[i].sequence))[j]).first) != escaped_chars.end())
         {
-          current_word += L'\\';
+          current_word += '\\';
         }
-        wstring sym = L"";
+        UString sym;
         alphabet.getSymbol(sym, ((*(state[i].sequence))[j]).first, uppercase);
-        if(sym == L"<$>")
+        if(sym == "<$>"_u)
         {
-          if(current_word != L"")
+          if(!current_word.empty())
           {
             current_result.push_back(current_word);
           }
-          current_word = L"";
+          current_word.clear();
         }
         else
         {
@@ -597,32 +597,34 @@ State::filterFinalsLRX(map<Node *, double> const &finals,
 }
 
 
-wstring
+UString
 State::filterFinalsSAO(map<Node *, double> const &finals,
                        Alphabet const &alphabet,
-                       set<wchar_t> const &escaped_chars,
+                       set<UChar32> const &escaped_chars,
                        bool uppercase, bool firstupper, int firstchar) const
 {
-  wstring result = L"";
-  wstring annot = L"";
+  UString result;
+  UString annot;
 
   for(size_t i = 0, limit = state.size(); i != limit; i++)
   {
     if(finals.find(state[i].where) != finals.end())
     {
-      result += L'/';
+      result += '/';
       unsigned int const first_char = result.size() + firstchar;
       for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++)
       {
         if(escaped_chars.find(((*(state[i].sequence))[j]).first) != escaped_chars.end())
         {
-          result += L'\\';
+          result += '\\';
         }
         if(alphabet.isTag(((*(state[i].sequence))[j]).first))
         {
-          annot = L"";
+          annot.clear();
           alphabet.getSymbol(annot, ((*(state[i].sequence))[j]).first);
-          result += L'&'+annot.substr(1,annot.length()-2)+L';';
+          result += '&';
+          result += annot.substr(1,annot.length()-2);
+          result += ';';
         }
         else
         {
@@ -631,14 +633,14 @@ State::filterFinalsSAO(map<Node *, double> const &finals,
       }
       if(firstupper)
       {
-        if(result[first_char] == L'~')
+        if(result[first_char] == '~')
         {
           // skip post-generation mark
-          result[first_char+1] = towupper(result[first_char+1]);
+          result[first_char+1] = u_toupper(result[first_char+1]);
         }
         else
         {
-          result[first_char] = towupper(result[first_char]);
+          result[first_char] = u_toupper(result[first_char]);
         }
       }
     }
@@ -647,24 +649,24 @@ State::filterFinalsSAO(map<Node *, double> const &finals,
   return result;
 }
 
-wstring
+UString
 State::filterFinalsTM(map<Node *, double> const &finals,
                       Alphabet const &alphabet,
-                      set<wchar_t> const &escaped_chars,
-                      queue<wstring> &blankqueue, vector<wstring> &numbers) const
+                      set<UChar32> const &escaped_chars,
+                      queue<UString> &blankqueue, vector<UString> &numbers) const
 {
-  wstring result = L"";
+  UString result;
 
   for(size_t i = 0, limit = state.size(); i != limit; i++)
   {
     if(finals.find(state[i].where) != finals.end())
     {
-      result += L'/';
+      result += '/';
       for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++)
       {
         if(escaped_chars.find((*(state[i].sequence))[j].first) != escaped_chars.end())
         {
-          result += L'\\';
+          result += '\\';
         }
         alphabet.getSymbol(result, (*(state[i].sequence))[j].first);
       }
@@ -672,15 +674,15 @@ State::filterFinalsTM(map<Node *, double> const &finals,
   }
 
 
-  wstring result2 = L"";
-  vector<wstring> fragment;
-  fragment.push_back(L"");
+  UString result2;
+  vector<UString> fragment;
+  fragment.push_back(""_u);
 
   for(unsigned int i = 0, limit = result.size(); i != limit ; i++)
   {
-    if(result[i] == L')')
+    if(result[i] == ')')
     {
-      fragment.push_back(L"");
+      fragment.push_back(""_u);
     }
     else
     {
@@ -692,9 +694,9 @@ State::filterFinalsTM(map<Node *, double> const &finals,
   {
     if(i != limit -1)
     {
-      if(fragment[i].size() >=2 && fragment[i].substr(fragment[i].size()-2) == L"(#")
+      if(fragment[i].size() >=2 && fragment[i].substr(fragment[i].size()-2) == "(#"_u)
       {
-        wstring whitespace = L" ";
+        UString whitespace = " "_u;
         if(blankqueue.size() != 0)
         {
           whitespace = blankqueue.front().substr(1);
@@ -709,15 +711,15 @@ State::filterFinalsTM(map<Node *, double> const &finals,
         bool substitute = false;
         for(int j = fragment[i].size() - 1; j >= 0; j--)
         {
-          if(fragment[i].size()-j > 3 && fragment[i][j] == L'\\' &&
-             fragment[i][j+1] == L'@' && fragment[i][j+2] == L'(')
+          if(fragment[i].size()-j > 3 && fragment[i][j] == '\\' &&
+             fragment[i][j+1] == '@' && fragment[i][j+2] == '(')
           {
             int num = 0;
             bool correct = true;
             for(unsigned int k = (unsigned int) j+3, limit2 = fragment[i].size();
                 k != limit2; k++)
             {
-              if(iswdigit(fragment[i][k]))
+              if(u_isdigit(fragment[i][k]))
               {
                 num = num * 10;
                 num += (int) fragment[i][k] - 48;
@@ -738,13 +740,13 @@ State::filterFinalsTM(map<Node *, double> const &finals,
         }
         if(substitute == false)
         {
-          fragment[i] += L')';
+          fragment[i] += ')';
         }
       }
     }
   }
 
-  result = L"";
+  result.clear();
 
   for(unsigned int i = 0, limit = fragment.size(); i != limit; i++)
   {
@@ -888,26 +890,28 @@ State::restartFinals(const map<Node *, double> &finals, int requiredSymbol, Stat
 
 
 
-wstring
+UString
 State::getReadableString(const Alphabet &a)
 {
-  wstring retval = L"[";
+  UString retval;
+  retval += '[';
 
   for(unsigned int i=0; i<state.size(); i++)
   {
     vector<pair<int, double>>* seq = state.at(i).sequence;
     if(seq != NULL) for (unsigned int j=0; j<seq->size(); j++)
     {
-      wstring ws = L"";
+      UString ws;
       a.getSymbol(ws, (seq->at(j)).first);
       retval.append(ws);
     }
 
     if(i+1 < state.size())
     {
-      retval.append(L", ");
+      retval += ',';
+      retval += ' ';
     }
   }
-  retval.append(L"]");
+  retval += ']';
   return retval;
 }
diff --git a/lttoolbox/state.h b/lttoolbox/state.h
index a7840c7..31f0e42 100644
--- a/lttoolbox/state.h
+++ b/lttoolbox/state.h
@@ -30,6 +30,8 @@
 #include <lttoolbox/match_state.h>
 #include <lttoolbox/transducer.h>
 
+#include <lttoolbox/ustring.h>
+
 using namespace std;
 
 /**
@@ -188,9 +190,9 @@ public:
    */
   void step(int const input, set<int> const alts);
 
-  void step_case(wchar_t val, bool caseSensitive);
+  void step_case(UChar32 val, bool caseSensitive);
 
-  void step_case(wchar_t val, wchar_t val2, bool caseSensitive);
+  void step_case(UChar32 val, UChar32 val2, bool caseSensitive);
 
   void step_careful(int const input, int const alt);
 
@@ -236,7 +238,7 @@ public:
       }
   };
 
-  vector<pair< wstring, double >> NFinals(vector<pair<wstring, double>> lf,
+  vector<pair< UString, double >> NFinals(vector<pair<UString, double>> lf,
                                           int maxAnalyses,
                                           int maxWeightClasses) const;
 
@@ -252,9 +254,9 @@ public:
    * @param firstchar first character of the word
    * @return the result of the transduction
    */
-  wstring filterFinals(map<Node *, double> const &finals,
+  UString filterFinals(map<Node *, double> const &finals,
                        Alphabet const &a,
-                       set<wchar_t> const &escaped_chars,
+                       set<UChar32> const &escaped_chars,
                        bool display_weights = false,
                        int max_analyses = INT_MAX,
                        int max_weight_classes = INT_MAX,
@@ -273,9 +275,9 @@ public:
    * @param firstchar first character of the word
    * @return the result of the transduction
    */
-  wstring filterFinalsSAO(map<Node *, double> const &finals,
+  UString filterFinalsSAO(map<Node *, double> const &finals,
                           Alphabet const &a,
-                          set<wchar_t> const &escaped_chars,
+                          set<UChar32> const &escaped_chars,
                           bool uppercase = false,
                           bool firstupper = false,
                           int firstchar = 0) const;
@@ -293,9 +295,9 @@ public:
    * @return the result of the transduction
    */
 
-  set<pair<wstring, vector<wstring> > > filterFinalsLRX(map<Node *, double> const &finals,
+  set<pair<UString, vector<UString> > > filterFinalsLRX(map<Node *, double> const &finals,
                                                         Alphabet const &a,
-                                                        set<wchar_t> const &escaped_chars,
+                                                        set<UChar32> const &escaped_chars,
                                                         bool uppercase = false,
                                                         bool firstupper = false,
                                                         int firstchar = 0) const;
@@ -326,13 +328,13 @@ public:
   /**
    * Return the full states string (to allow debuging...) using a Java ArrayList.toString style
    */
-  wstring getReadableString(const Alphabet &a);
+  UString getReadableString(const Alphabet &a);
 
-  wstring filterFinalsTM(map<Node *, double> const &finals,
+  UString filterFinalsTM(map<Node *, double> const &finals,
                          Alphabet const &alphabet,
-                         set<wchar_t> const &escaped_chars,
-                         queue<wstring> &blanks,
-                         vector<wstring> &numbers) const;
+                         set<UChar32> const &escaped_chars,
+                         queue<UString> &blanks,
+                         vector<UString> &numbers) const;
 
 };
 
diff --git a/lttoolbox/string_to_wostream.h b/lttoolbox/string_to_wostream.h
deleted file mode 100644
index 4ffbb4b..0000000
--- a/lttoolbox/string_to_wostream.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// Include string_utils.h instead if you're linking against apertium
-
-#ifndef __STRING_TO_WOSTREAM_H_
-#define __STRING_TO_WOSTREAM_H_
-
-#include <iterator>
-
-static std::wostream & operator<<(std::wostream & ostr, std::string const & str) {
-  ostr << str.c_str();
-  return ostr;
-}
-
-#endif
diff --git a/lttoolbox/string_utils.cc b/lttoolbox/string_utils.cc
new file mode 100644
index 0000000..411380d
--- /dev/null
+++ b/lttoolbox/string_utils.cc
@@ -0,0 +1,249 @@
+#include <lttoolbox/string_utils.h>
+
+#include <unicode/utf16.h>
+#include <unicode/uchar.h>
+#include <unicode/ustring.h>
+#include <iostream>
+
+UString
+StringUtils::trim(const UString& str)
+{
+  if (str.empty()) {
+    return str;
+  }
+  size_t begin = 0;
+  size_t end = str.size();
+  size_t i = 0;
+  UChar32 c;
+  while (begin < end) {
+    U16_GET(str.c_str(), begin, i, end, c);
+    if (!u_isspace(c)) {
+      begin = i;
+      break;
+    } else {
+      U16_FWD_1(str.c_str(), i, end);
+    }
+  }
+  i = str.size();
+  U16_BACK_1(str.c_str(), 0, i);
+  U16_GET(str.c_str(), 0, i, end, c);
+  if (!u_isspace(c)) {
+    if (begin == 0) {
+      return str;
+    } else {
+      return str.substr(begin);
+    }
+  }
+  while (end > begin) {
+    end = i;
+    U16_BACK_1(str.c_str(), 0, i);
+    U16_GET(str.c_str(), 0, i, str.size(), c);
+    if (!u_isspace(c)) {
+      break;
+    }
+  }
+  return str.substr(begin, end-begin);
+}
+
+std::vector<UString>
+StringUtils::split(const UString& str, const UString& delim)
+{
+  size_t pos = 0;
+  size_t new_pos;
+  std::vector<UString> result;
+  while (pos < str.size()) {
+    new_pos = str.find(delim, pos);
+    if (new_pos == UString::npos) {
+      new_pos = str.size();
+    }
+    if (new_pos > pos) {
+      // if we have a non-empty substring between this delimiter
+      // and the last one
+      result.push_back(str.substr(pos, new_pos-pos));
+    }
+    pos = new_pos + delim.size();
+  }
+  return result;
+}
+
+UString
+StringUtils::join(const std::vector<UString>& vec, const UString& delim)
+{
+  UString s;
+  for (auto& piece : vec) {
+    if (!s.empty()) {
+      s.append(delim);
+    }
+    s.append(piece);
+  }
+  return s;
+}
+
+UString
+StringUtils::substitute(const UString& str, const UString& olds, const UString& news)
+{
+  UString s = str;
+  size_t p = s.find(olds, 0);
+  while (p != UString::npos) {
+    s.replace(p, olds.length(), news);
+    p += news.length();
+    p = s.find(olds, p);
+  }
+  return s;
+}
+
+UString
+StringUtils::itoa(int n)
+{
+  UChar str[256];
+  u_snprintf(str, 256, "%d", n);
+  return str;
+}
+
+std::string
+StringUtils::itoa_string(int n)
+{
+  char str[256];
+  snprintf(str, 256, "%d", n);
+  return str;
+}
+
+UString
+StringUtils::ftoa(double f)
+{
+  UChar str[256];
+  u_snprintf(str, 256, "%f", f);
+  return str;
+}
+
+int
+StringUtils::stoi(const UString& str)
+{
+  int ret;
+  int c = u_sscanf(str.c_str(), "%d", &ret);
+  if (c != 1) {
+    throw std::invalid_argument("unable to parse int");
+  }
+  return ret;
+}
+
+double
+StringUtils::stod(const UString& str)
+{
+  double ret;
+  int c = u_sscanf(str.c_str(), "%lf", &ret);
+  if (c != 1) {
+    throw std::invalid_argument("unable to parse float");
+  }
+  return ret;
+}
+
+UString
+StringUtils::tolower(const UString& str)
+{
+  UChar buf[str.size()*2];
+  UErrorCode err = U_ZERO_ERROR;
+  u_strToLower(buf, str.size()*2, str.c_str(), str.size(), NULL, &err);
+  if (U_FAILURE(err)) {
+    std::cerr << "Error: unable to lowercase string '" << str << "'.\n";
+    std::cerr << "error code: " << u_errorName(err) << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  return buf;
+}
+
+UString
+StringUtils::toupper(const UString& str)
+{
+  UChar buf[str.size()*2];
+  UErrorCode err = U_ZERO_ERROR;
+  u_strToUpper(buf, str.size()*2, str.c_str(), str.size(), NULL, &err);
+  if (U_FAILURE(err)) {
+    std::cerr << "Error: unable to uppercase string '" << str << "'.\n";
+    std::cerr << "error code: " << u_errorName(err) << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  return buf;
+}
+
+UString
+StringUtils::totitle(const UString& str)
+{
+  UChar buf[str.size()*2];
+  UErrorCode err = U_ZERO_ERROR;
+  u_strToTitle(buf, str.size()*2, str.c_str(), str.size(), NULL, NULL, &err);
+  if (U_FAILURE(err)) {
+    std::cerr << "Error: unable to titlecase string '" << str << "'.\n";
+    std::cerr << "error code: " << u_errorName(err) << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  return buf;
+}
+
+UString
+StringUtils::getcase(const UString& str)
+{
+  UString ret = "aa"_u;
+  if (str.empty()) {
+    return ret;
+  }
+  size_t i = 0;
+  size_t l = str.size();
+  UChar32 c;
+  U16_NEXT(str.c_str(), i, l, c);
+  if (u_isupper(c)) {
+    ret[0] = 'A';
+    if (i < l) {
+      U16_BACK_1(str.c_str(), i, l); // decrements l
+      U16_GET(str.c_str(), 0, l, str.size(), c);
+      if (u_isupper(c)) {
+        ret[1] = 'A';
+      }
+    }
+  }
+  return ret;
+}
+
+UString
+StringUtils::copycase(const UString& source, const UString& target)
+{
+  if (source.empty() || target.empty()) {
+    return target;
+  }
+  size_t i = 0;
+  size_t l = source.size();
+  UChar32 c;
+  U16_NEXT(source.c_str(), i, l, c);
+  bool firstupper = u_isupper(c);
+  bool uppercase = false;
+  if (firstupper) {
+    if (i != l) {
+      U16_BACK_1(source.c_str(), i, l); // decrements l
+      U16_GET(source.c_str(), 0, l, source.size(), c);
+      uppercase = u_isupper(c);
+    }
+  }
+  if (firstupper) {
+    if (uppercase) {
+      return toupper(target);
+    } else {
+      return totitle(target);
+    }
+  } else {
+    return tolower(target);
+  }
+}
+
+bool
+StringUtils::caseequal(const UString& a, const UString& b)
+{
+  UErrorCode err = U_ZERO_ERROR;
+  int cmp = u_strCaseCompare(a.c_str(), -1, b.c_str(), -1, 0, &err);
+  if (U_FAILURE(err)) {
+    std::cerr << "Error: caseless string comparison failed on '";
+    std::cerr << a << "' and '" << b << "'" << std::endl;
+    std::cerr << "error code: " << u_errorName(err) << std::endl;
+    exit(EXIT_FAILURE);
+  }
+  return (cmp == 0);
+}
diff --git a/lttoolbox/string_utils.h b/lttoolbox/string_utils.h
new file mode 100644
index 0000000..79aeadf
--- /dev/null
+++ b/lttoolbox/string_utils.h
@@ -0,0 +1,38 @@
+#ifndef __LT_STRING_UTILS_H__
+#define __LT_STRING_UTILS_H__
+
+#include <lttoolbox/ustring.h>
+#include <vector>
+
+class StringUtils {
+public:
+  // delete leading and trailing whitespace
+  static UString trim(const UString& str);
+
+  // split string on delimiter
+  static std::vector<UString> split(const UString& str, const UString& delim);
+
+  // inverse of split
+  static UString join(const std::vector<UString>& vec, const UString& delim);
+
+  // replace each occurrence of olds with news
+  static UString substitute(const UString& str, const UString& olds, const UString& news);
+
+  static UString itoa(int n);
+  static std::string itoa_string(int n);
+  static UString ftoa(double f);
+  // these throw std::invalid_argument if parsing fails
+  static int stoi(const UString& str);
+  static double stod(const UString& str);
+
+  static UString tolower(const UString& str);
+  static UString toupper(const UString& str);
+  static UString totitle(const UString& str);
+
+  static UString getcase(const UString& str);
+  static UString copycase(const UString& source, const UString& target);
+
+  static bool caseequal(const UString& a, const UString& b);
+};
+
+#endif // __LT_STRING_UTILS_H__
diff --git a/lttoolbox/tmx_compiler.cc b/lttoolbox/tmx_compiler.cc
index 39113ee..db0394b 100644
--- a/lttoolbox/tmx_compiler.cc
+++ b/lttoolbox/tmx_compiler.cc
@@ -19,37 +19,38 @@
 #include <lttoolbox/entry_token.h>
 #include <lttoolbox/lt_locale.h>
 #include <lttoolbox/xml_parse_util.h>
-#include <lttoolbox/string_to_wostream.h>
 
 #include <cstdlib>
 #include <iostream>
 #include <libxml/encoding.h>
 
-#ifdef _WIN32
-#define swprintf _snwprintf
-#endif
-
 using namespace std;
 
-wstring const TMXCompiler::TMX_COMPILER_TMX_ELEM     = L"tmx";
-wstring const TMXCompiler::TMX_COMPILER_HEADER_ELEM  = L"header";
-wstring const TMXCompiler::TMX_COMPILER_BODY_ELEM    = L"body";
-wstring const TMXCompiler::TMX_COMPILER_TU_ELEM      = L"tu";
-wstring const TMXCompiler::TMX_COMPILER_TUV_ELEM     = L"tuv";
-wstring const TMXCompiler::TMX_COMPILER_HI_ELEM      = L"hi";
-wstring const TMXCompiler::TMX_COMPILER_PH_ELEM      = L"ph";
-wstring const TMXCompiler::TMX_COMPILER_XMLLANG_ATTR = L"xml:lang";
-wstring const TMXCompiler::TMX_COMPILER_LANG_ATTR    = L"lang";
-wstring const TMXCompiler::TMX_COMPILER_SEG_ELEM     = L"seg";
-wstring const TMXCompiler::TMX_COMPILER_PROP_ELEM    = L"prop";
+UString const TMXCompiler::TMX_COMPILER_TMX_ELEM     = "tmx"_u;
+UString const TMXCompiler::TMX_COMPILER_HEADER_ELEM  = "header"_u;
+UString const TMXCompiler::TMX_COMPILER_BODY_ELEM    = "body"_u;
+UString const TMXCompiler::TMX_COMPILER_TU_ELEM      = "tu"_u;
+UString const TMXCompiler::TMX_COMPILER_TUV_ELEM     = "tuv"_u;
+UString const TMXCompiler::TMX_COMPILER_HI_ELEM      = "hi"_u;
+UString const TMXCompiler::TMX_COMPILER_PH_ELEM      = "ph"_u;
+UString const TMXCompiler::TMX_COMPILER_XMLLANG_ATTR = "xml:lang"_u;
+UString const TMXCompiler::TMX_COMPILER_LANG_ATTR    = "lang"_u;
+UString const TMXCompiler::TMX_COMPILER_SEG_ELEM     = "seg"_u;
+UString const TMXCompiler::TMX_COMPILER_PROP_ELEM    = "prop"_u;
+UString const TMXCompiler::TMX_COMPILER_TEXT_NODE    = "#text"_u;
+UString const TMXCompiler::TMX_COMPILER_COMMENT_NODE = "#comment"_u;
+UString const TMXCompiler::TMX_COMPILER_NUMBER_TAG   = "<n>"_u;
+UString const TMXCompiler::TMX_COMPILER_BLANK_TAG    = "<b>"_u;
 
 TMXCompiler::TMXCompiler() :
 reader(0),
 default_weight(0.0000)
 {
   LtLocale::tryToSetLocale();
-  alphabet.includeSymbol(L"<n>"); // -1 -> numbers
-  alphabet.includeSymbol(L"<b>"); // -2 -> blanks
+  alphabet.includeSymbol(TMX_COMPILER_NUMBER_TAG); // -1 -> numbers
+  alphabet.includeSymbol(TMX_COMPILER_BLANK_TAG); // -2 -> blanks
+  number_tag = alphabet(TMX_COMPILER_NUMBER_TAG);
+  blank_tag = alphabet(TMX_COMPILER_BLANK_TAG);
 }
 
 TMXCompiler::~TMXCompiler()
@@ -57,14 +58,14 @@ TMXCompiler::~TMXCompiler()
 }
 
 void
-TMXCompiler::parse(string const &file, wstring const &lo, wstring const &lm)
+TMXCompiler::parse(string const &file, UString const &lo, UString const &lm)
 {
   origin_language = lo;
   meta_language = lm;
   reader = xmlReaderForFile(file.c_str(), NULL, 0);
   if(reader == NULL)
   {
-    wcerr << "Error: Cannot open '" << file << "'." << endl;
+    cerr << "Error: Cannot open '" << file << "'." << endl;
     exit(EXIT_FAILURE);
   }
 
@@ -77,7 +78,7 @@ TMXCompiler::parse(string const &file, wstring const &lo, wstring const &lm)
 
   if(ret != 0)
   {
-    wcerr << L"Error: Parse error at the end of input." << endl;
+    cerr << "Error: Parse error at the end of input." << endl;
   }
 
   xmlFreeTextReader(reader);
@@ -88,12 +89,12 @@ TMXCompiler::parse(string const &file, wstring const &lo, wstring const &lm)
 }
 
 void
-TMXCompiler::requireEmptyError(wstring const &name)
+TMXCompiler::requireEmptyError(UString const &name)
 {
   if(!xmlTextReaderIsEmptyElement(reader))
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Non-empty element '<" << name << L">' should be empty." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Non-empty element '<" << name << ">' should be empty." << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -101,91 +102,91 @@ TMXCompiler::requireEmptyError(wstring const &name)
 bool
 TMXCompiler::allBlanks()
 {
-  bool flag = true;
-  wstring text = XMLParseUtil::towstring(xmlTextReaderConstValue(reader));
+  UString text = XMLParseUtil::readValue(reader);
 
   for(auto c : text)
   {
-    flag = flag && iswspace(c);
+    if (!u_isspace(c)) {
+      return false;
+    }
   }
-
-  return flag;
+  return true;
 }
 
 void
-TMXCompiler::skipBlanks(wstring &name)
+TMXCompiler::skipBlanks(UString &name)
 {
-  while(name == L"#text" || name == L"#comment")
+  while(name == TMX_COMPILER_TEXT_NODE || name == TMX_COMPILER_COMMENT_NODE)
   {
-    if(name != L"#comment")
+    if(name != TMX_COMPILER_COMMENT_NODE)
     {
       if(!allBlanks())
       {
-        wcerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << "): Invalid construction." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Invalid construction." << endl;
         exit(EXIT_FAILURE);
       }
     }
 
     xmlTextReaderRead(reader);
-    name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+    name = XMLParseUtil::readName(reader);
   }
 }
 
 void
-TMXCompiler::skip(wstring &name, wstring const &elem)
+TMXCompiler::skip(UString &name, UString const &elem)
 {
   xmlTextReaderRead(reader);
-  name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+  name = XMLParseUtil::readName(reader);
 
-  while(name == L"#text" || name == L"#comment")
+  while(name == TMX_COMPILER_TEXT_NODE || name == TMX_COMPILER_COMMENT_NODE)
   {
-    if(name != L"#comment")
+    if(name != TMX_COMPILER_COMMENT_NODE)
     {
       if(!allBlanks())
       {
-        wcerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
-        wcerr << "): Invalid construction." << endl;
+        cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+        cerr << "): Invalid construction." << endl;
         exit(EXIT_FAILURE);
       }
     }
     xmlTextReaderRead(reader);
-    name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+    name = XMLParseUtil::readName(reader);
   }
 
   if(name != elem)
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Expected '<" << elem << L">'." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Expected '<" << elem << ">'." << endl;
     exit(EXIT_FAILURE);
   }
 }
 
-wstring
-TMXCompiler::attrib(wstring const &name)
+UString
+TMXCompiler::attrib(UString const &name)
 {
   return XMLParseUtil::attrib(reader, name);
 }
 
 void
-TMXCompiler::requireAttribute(wstring const &value, wstring const &attrname,
-                           wstring const &elemname)
+TMXCompiler::requireAttribute(UString const &value, UString const &attrname,
+                           UString const &elemname)
 {
-  if(value == L"")
+  if(value.empty())
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): '<" << elemname;
-    wcerr << L"' element must specify non-void '";
-    wcerr << attrname << L"' attribute." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): '<" << elemname;
+    cerr << "' element must specify non-void '";
+    cerr << attrname << "' attribute." << endl;
     exit(EXIT_FAILURE);
   }
 }
 
-wstring
+UString
 TMXCompiler::getTag(size_t const &val) const
 {
-  wchar_t cad[32];
-  swprintf(cad, 32, L"<%d>", val);
+  UChar cad[32];
+  u_snprintf(cad, 32, "<%d>", val);
   return cad;
 }
 
@@ -197,7 +198,7 @@ TMXCompiler::insertTU(vector<int> const &origin, vector<int> const &meta)
     return;
   }
 
-  if(origin[0] == alphabet(L"<b>") || meta[0] == alphabet(L"<b>"))
+  if(origin[0] == blank_tag || meta[0] == blank_tag)
   {
     return;
   }
@@ -273,12 +274,10 @@ TMXCompiler::align_blanks(vector<int> &o, vector<int> &m)
   vector<unsigned int> puntos;
   vector<int> resultado_o, resultado_m;
 
-  int const symbol = alphabet(L"<b>");
-
   vector<vector<int> > so, sm;
 
-  split(o, so, symbol);
-  split(m, sm, symbol);
+  split(o, so, blank_tag);
+  split(m, sm, blank_tag);
 
   if(so.size() == sm.size())
   {
@@ -288,8 +287,8 @@ TMXCompiler::align_blanks(vector<int> &o, vector<int> &m)
       trim(sm[i]);
       if(sm.size() - 1 != i)
       {
-        sm[i].push_back(L'(');
-        sm[i].push_back(L'#');
+        sm[i].push_back('(');
+        sm[i].push_back('#');
       }
     /*
       while(so[i].size() < sm[i].size())
@@ -301,8 +300,8 @@ TMXCompiler::align_blanks(vector<int> &o, vector<int> &m)
         sm[i].push_back(0);
       }*/
     }
-    o = join(so, L' ');
-    m = join(sm, L')');
+    o = join(so, ' ');
+    m = join(sm, ')');
   }
   else
   {
@@ -315,19 +314,19 @@ TMXCompiler::align_blanks(vector<int> &o, vector<int> &m)
       trim(sm[i]);
       if(sm.size() - 1 != i)
       {
-        sm[i].push_back(L'(');
-        sm[i].push_back(L'#');
+        sm[i].push_back('(');
+        sm[i].push_back('#');
       }
     }
-    o = join(so, L' ');
-    m = join(sm, L')');
+    o = join(so, ' ');
+    m = join(sm, ')');
   }
 }
 
 void
 TMXCompiler::procTU()
 {
-  wstring name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+  UString name = XMLParseUtil::readName(reader);
   int type = xmlTextReaderNodeType(reader);
   vector<int> origin;
   vector<int> meta;
@@ -337,9 +336,8 @@ TMXCompiler::procTU()
   {
     if(name == TMX_COMPILER_TUV_ELEM && type != XML_READER_TYPE_END_ELEMENT)
     {
-      wstring l = attrib(TMX_COMPILER_XMLLANG_ATTR);
-      if(l == L"")
-      {
+      UString l = attrib(TMX_COMPILER_XMLLANG_ATTR);
+      if(l.empty()) {
         l = attrib(TMX_COMPILER_LANG_ATTR);
       }
 
@@ -360,57 +358,43 @@ TMXCompiler::procTU()
       while(name != TMX_COMPILER_TUV_ELEM || type != XML_READER_TYPE_END_ELEMENT)
       {
         xmlTextReaderRead(reader);
-        name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+        name = XMLParseUtil::readName(reader);
         type = xmlTextReaderNodeType(reader);
 
-        if(name == L"#text")
+        if(name == TMX_COMPILER_TEXT_NODE)
         {
-          wstring l = XMLParseUtil::towstring(xmlTextReaderConstValue(reader));
-          for(size_t i = 0, limit = l.size(); i != limit; i++)
-          {
-            ref->push_back(l[i]);
-          }
+          XMLParseUtil::readValueInto32(reader, *ref);
         }
         else if(name == TMX_COMPILER_HI_ELEM || name == TMX_COMPILER_PH_ELEM)
         {
           if(type != XML_READER_TYPE_END_ELEMENT)
           {
-            ref->push_back(alphabet(L"<b>"));
+            ref->push_back(blank_tag);
           }
         }
       }
     }
     xmlTextReaderRead(reader);
-    name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+    name = XMLParseUtil::readName(reader);
     type = xmlTextReaderNodeType(reader);
   }
 
   trim(origin);
   trim(meta);
-//  wcout << L"DESPUES DE TRIM\n";
-//  printvector(origin);
-//  printvector(meta);
 
   align(origin, meta);
-//  wcout << L"DESPUES DE ALIGN\n";
-//  printvector(origin);
-//  printvector(meta);
   align_blanks(origin, meta);
-//  wcout << L"DESPUES DE ALIGNBLANKS\n";
-//  printvector(origin);
-//  printvector(meta);
   insertTU(origin, meta);
 }
 
 void
 TMXCompiler::procNode()
 {
-  xmlChar const *xname = xmlTextReaderConstName(reader);
-  wstring name = XMLParseUtil::towstring(xname);
+  UString name = XMLParseUtil::readName(reader);
 
   // HACER: optimizar el orden de ejecución de esta ristra de "ifs"
 
-  if(name == L"#text")
+  if(name == TMX_COMPILER_TEXT_NODE)
   {
     /* ignorar */
   }
@@ -434,14 +418,14 @@ TMXCompiler::procNode()
   {
     procTU();
   }
-  else if(name== L"#comment")
+  else if(name== TMX_COMPILER_COMMENT_NODE)
   {
     /* ignorar */
   }
   else
   {
-    wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader);
-    wcerr << L"): Invalid node '<" << name << L">'." << endl;
+    cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader);
+    cerr << "): Invalid node '<" << name << ">'." << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -449,24 +433,24 @@ TMXCompiler::procNode()
 void
 TMXCompiler::write(FILE *output)
 {
-  fwrite(HEADER_LTTOOLBOX, 1, 4, output);
+  fwrite_unlocked(HEADER_LTTOOLBOX, 1, 4, output);
   uint64_t features = 0;
   write_le(output, features);
 
   // letters (empty to keep the file format)
-  Compression::wstring_write(L"", output);
+  Compression::multibyte_write(0, output);
 
   // symbols
   alphabet.write(output);
 
-  // transducers
+  // transducers (1, with empty name)
   Compression::multibyte_write(1, output); // keeping file format
-  Compression::wstring_write(L"", output); // keeping file format
+  Compression::multibyte_write(0, output); // keeping file format
   transducer.write(output);
 
-  wcout << origin_language << L"->" << meta_language << L" ";
-  wcout << transducer.size() << L" " << transducer.numberOfTransitions();
-  wcout << endl;
+  cout << origin_language << "->" << meta_language << " ";
+  cout << transducer.size() << " " << transducer.numberOfTransitions();
+  cout << endl;
 }
 
 void
@@ -474,7 +458,7 @@ TMXCompiler::trim(vector<int> &v) const
 {
   while(v.size() > 0)
   {
-    if(iswspace(v[v.size()-1]))
+    if(u_isspace(v[v.size()-1]))
     {
       v.pop_back();
     }
@@ -488,7 +472,7 @@ TMXCompiler::trim(vector<int> &v) const
   vector<int> aux;
   for(auto c : v)
   {
-    if(!iswspace(c) || !principio)
+    if(!u_isspace(c) || !principio)
     {
       principio = false;
       aux.push_back(c);
@@ -514,7 +498,7 @@ TMXCompiler::align(vector<int> &origin, vector<int> &meta)
       numbers_origin_start.push_back(i);
       numbers_origin_length.push_back(nl);
       i += nl-1;
-      modified_origin.push_back(alphabet(L"<n>"));
+      modified_origin.push_back(number_tag);
     }
     else
     {
@@ -536,16 +520,15 @@ TMXCompiler::align(vector<int> &origin, vector<int> &meta)
           if(vectorcmp(origin, numbers_origin_start[j],
                        meta, i, nl))
           {
-            modified_meta.push_back(L'@');
-            modified_meta.push_back(L'(');
-            wchar_t *valor = new wchar_t[8];
-            swprintf(valor, 8, L"%d", j+1);
-            for(int k = 0, limit3 = wcslen(valor); k != limit3; k++)
+            modified_meta.push_back('@');
+            modified_meta.push_back('(');
+            UChar valor[8]{};
+            int limit3 = u_snprintf(valor, 8, "%d", j+1);
+            for(int k = 0; k != limit3; k++)
             {
               modified_meta.push_back(valor[k]);
             }
-            delete[] valor;
-            modified_meta.push_back(L')');
+            modified_meta.push_back(')');
             i += nl-1;
             tocado = true;
             break;
@@ -582,7 +565,7 @@ TMXCompiler::numberLength(vector<int> &v, unsigned int const position) const
 {
   for(unsigned int i = position, limit = v.size(); i < limit; i++)
   {
-    if(!iswdigit(v[i]) && (v[i] != L'.' || i == position) && (v[i] != L',' || i == position))
+    if(!u_isdigit(v[i]) && (v[i] != '.' || i == position) && (v[i] != ',' || i == position))
     {
       if(i == position)
       {
@@ -593,7 +576,7 @@ TMXCompiler::numberLength(vector<int> &v, unsigned int const position) const
         while(i != position)
         {
           i--;
-          if(iswdigit(v[i]))
+          if(u_isdigit(v[i]))
           {
             return i - position + 1;
           }
@@ -607,7 +590,7 @@ TMXCompiler::numberLength(vector<int> &v, unsigned int const position) const
   while(i != position)
   {
     i--;
-    if(iswdigit(v[i]))
+    if(u_isdigit(v[i]))
     {
       return i - position + 1;
     }
@@ -634,34 +617,13 @@ TMXCompiler::vectorcmp(vector<int> const &orig, unsigned int const begin_orig,
 }
 
 void
-TMXCompiler::printvector(vector<int> const &v, wostream &os)
-{
-  for(unsigned int i = 0, limit = v.size(); i != limit; i++)
-  {
-    if(i != 0)
-    {
-      os << L" ";
-    }
-    if(v[i] > 31)
-    {
-      os << v[i] << L" ('" << wchar_t(v[i]) << L"')";
-    }
-    else
-    {
-      os << v[i];
-    }
-  }
-  os << endl;
-}
-
-void
-TMXCompiler::setOriginLanguageCode(wstring const &code)
+TMXCompiler::setOriginLanguageCode(UString const &code)
 {
   // nada
 }
 
 void
-TMXCompiler::setMetaLanguageCode(wstring const &code)
+TMXCompiler::setMetaLanguageCode(UString const &code)
 {
   // nada
 }
diff --git a/lttoolbox/tmx_compiler.h b/lttoolbox/tmx_compiler.h
index 53bb4b5..9cf9595 100644
--- a/lttoolbox/tmx_compiler.h
+++ b/lttoolbox/tmx_compiler.h
@@ -20,7 +20,6 @@
 #include <lttoolbox/alphabet.h>
 #include <lttoolbox/regexp_compiler.h>
 #include <lttoolbox/entry_token.h>
-#include <lttoolbox/ltstr.h>
 #include <lttoolbox/transducer.h>
 
 #include <map>
@@ -60,22 +59,25 @@ private:
   /**
    * Origin language
    */
-  wstring origin_language;
+  UString origin_language;
 
   /**
    * Meta language
    */
-  wstring meta_language;
+  UString meta_language;
 
   /**
    * Origin language code in the TMX
    */
-  wstring origin_language_inner_code;
+  UString origin_language_inner_code;
 
   /**
    * Origin language code in the TMX
    */
-  wstring meta_language_inner_code;
+  UString meta_language_inner_code;
+
+  int32_t number_tag;
+  int32_t blank_tag;
 
 
   /**
@@ -100,26 +102,26 @@ private:
    * @param name the name of the attribute
    * @return the value of the attribute
    */
-  wstring attrib(wstring const &name);
+  UString attrib(UString const &name);
 
   /**
    * Skip all document #text nodes before "elem"
    * @param name the name of the node
    * @param elem the name of the expected node
    */
-  void skip(wstring &name, wstring const &elem);
+  void skip(UString &name, UString const &elem);
 
   /**
    * Skip all blank #text nodes before "name"
    * @param name the name of the node
    */
-  void skipBlanks(wstring &name);
+  void skipBlanks(UString &name);
 
   /**
    * Force an element to be empty, and check for it
    * @param name the element
    */
-  void requireEmptyError(wstring const &name);
+  void requireEmptyError(UString const &name);
 
   /**
    * Force an attribute to be specified, amd check for it
@@ -127,8 +129,8 @@ private:
    * @param attrname the name of the attribute
    * @param elemname the parent of the attribute
    */
-  void requireAttribute(wstring const &value, wstring const &attrname,
-                        wstring const &elemname);
+  void requireAttribute(UString const &value, UString const &attrname,
+                        UString const &elemname);
 
   /**
    * True if all the elements in the current node are blanks
@@ -136,7 +138,7 @@ private:
    */
   bool allBlanks();
 
-  wstring getTag(size_t const &val) const;
+  UString getTag(size_t const &val) const;
   void trim(vector<int> &v) const;
   void align(vector<int> &origin, vector<int> &meta);
   unsigned int numberLength(vector<int> &v, unsigned int const position) const;
@@ -147,25 +149,27 @@ private:
   void align_blanks(vector<int> &o, vector<int> &m);
   vector<int> join(vector<vector<int> > const &v, int const s) const;
 
-  static void printvector(vector<int> const &v, wostream &wos = std::wcout);  //eliminar este método
-
 public:
 
   /*
    * Constants to represent the element and the attributes of
    * translation memories in TMX format
    */
-  static wstring const TMX_COMPILER_TMX_ELEM;
-  static wstring const TMX_COMPILER_HEADER_ELEM;
-  static wstring const TMX_COMPILER_BODY_ELEM;
-  static wstring const TMX_COMPILER_TU_ELEM;
-  static wstring const TMX_COMPILER_TUV_ELEM;
-  static wstring const TMX_COMPILER_HI_ELEM;
-  static wstring const TMX_COMPILER_PH_ELEM;
-  static wstring const TMX_COMPILER_XMLLANG_ATTR;
-  static wstring const TMX_COMPILER_LANG_ATTR;
-  static wstring const TMX_COMPILER_SEG_ELEM;
-  static wstring const TMX_COMPILER_PROP_ELEM;
+  static UString const TMX_COMPILER_TMX_ELEM;
+  static UString const TMX_COMPILER_HEADER_ELEM;
+  static UString const TMX_COMPILER_BODY_ELEM;
+  static UString const TMX_COMPILER_TU_ELEM;
+  static UString const TMX_COMPILER_TUV_ELEM;
+  static UString const TMX_COMPILER_HI_ELEM;
+  static UString const TMX_COMPILER_PH_ELEM;
+  static UString const TMX_COMPILER_XMLLANG_ATTR;
+  static UString const TMX_COMPILER_LANG_ATTR;
+  static UString const TMX_COMPILER_SEG_ELEM;
+  static UString const TMX_COMPILER_PROP_ELEM;
+  static UString const TMX_COMPILER_TEXT_NODE;
+  static UString const TMX_COMPILER_COMMENT_NODE;
+  static UString const TMX_COMPILER_NUMBER_TAG;
+  static UString const TMX_COMPILER_BLANK_TAG;
 
 
   /**
@@ -181,7 +185,7 @@ public:
   /**
    * Compile dictionary to letter transducers
    */
-  void parse(string const &file, wstring const &lo, wstring const &lm);
+  void parse(string const &file, UString const &lo, UString const &lm);
 
   /**
    * Write the result of compilation
@@ -193,13 +197,13 @@ public:
    * Set origin language inner code
    * @param code the code of the origin language into the TMX file being compiled
    */
-  void setOriginLanguageCode(wstring const &code);
+  void setOriginLanguageCode(UString const &code);
 
   /**
    * Set meta language inner code
    * @param code the code of the meta language into the TMX file being compiled
    */
-  void setMetaLanguageCode(wstring const &code);
+  void setMetaLanguageCode(UString const &code);
 
 };
 
diff --git a/lttoolbox/trans_exe.cc b/lttoolbox/trans_exe.cc
index ce39ff6..4dcc5aa 100644
--- a/lttoolbox/trans_exe.cc
+++ b/lttoolbox/trans_exe.cc
@@ -18,6 +18,7 @@
 #include <lttoolbox/trans_exe.h>
 #include <lttoolbox/compression.h>
 #include <lttoolbox/my_stdio.h>
+#include <cstring>
 
 TransExe::TransExe():
 initial_id(0),
@@ -70,7 +71,7 @@ TransExe::read(FILE *input, Alphabet const &alphabet)
   fpos_t pos;
   if (fgetpos(input, &pos) == 0) {
       char header[4]{};
-      fread(header, 1, 4, input);
+      fread_unlocked(header, 1, 4, input);
       if (strncmp(header, HEADER_TRANSDUCER, 4) == 0) {
           auto features = read_le<uint64_t>(input);
           if (features >= TDF_UNKNOWN) {
diff --git a/lttoolbox/transducer.cc b/lttoolbox/transducer.cc
index b37ae96..e9e6454 100644
--- a/lttoolbox/transducer.cc
+++ b/lttoolbox/transducer.cc
@@ -24,6 +24,24 @@
 #include <cstdlib>
 #include <iostream>
 #include <vector>
+#include <cstring>
+
+UString const Transducer::HFST_EPSILON_SYMBOL_SHORT   = "@0@"_u;
+UString const Transducer::HFST_EPSILON_SYMBOL_LONG    = "@_EPSILON_SYMBOL_@"_u;
+// could extend the ""_u helper to include u""_u
+// this is the only place that needs it
+UString const Transducer::LTTB_EPSILON_SYMBOL         = UString(1, (UChar)0x3B5);
+                                                   // = "ε"_u;
+UString const Transducer::HFST_SPACE_SYMBOL           = "@_SPACE_@"_u;
+UString const Transducer::HFST_TAB_SYMBOL             = "@_TAB_@"_u;
+UString const Transducer::GROUP_SYMBOL                = "#"_u;
+UString const Transducer::JOIN_SYMBOL                 = "+"_u;
+UString const Transducer::ANY_TAG_SYMBOL              = "<ANY_TAG>"_u;
+UString const Transducer::ANY_CHAR_SYMBOL             = "<ANY_CHAR>"_u;
+UString const Transducer::LSX_BOUNDARY_SYMBOL         = "<$>"_u;
+UString const Transducer::COMPOUND_ONLY_L_SYMBOL      = "<compound-only-L>"_u;
+UString const Transducer::COMPOUND_R_SYMBOL           = "<compound-R>"_u;
+
 
 int
 Transducer::newState()
@@ -170,8 +188,8 @@ Transducer::linkStates(int const source, int const target,
   }
   else
   {
-    wcerr << L"Error: Trying to link nonexistent states (" << source;
-    wcerr << L", " << target << L", " << tag << L")" << endl;
+    cerr << "Error: Trying to link nonexistent states (" << source;
+    cerr << ", " << target << ", " << tag << ")" << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -189,7 +207,7 @@ Transducer::setFinal(int const state, double const weight, bool value)
   int initial_copy = getInitial();
   if(state == initial_copy)
   {
-    wcerr << L"Setting initial state to final" << endl;
+    cerr << "Setting initial state to final" << endl;
   }
 */
   if(value)
@@ -261,7 +279,7 @@ Transducer::joinFinals(int const epsilon_tag)
   }
   else if(finals.size() == 0)
   {
-    wcerr << L"Error: empty set of final states" <<endl;
+    cerr << "Error: empty set of final states" <<endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -319,6 +337,11 @@ Transducer::determinize(int const epsilon_tag)
 
   int t = 0;
 
+  set<int> finals_state;
+  for(auto& it : finals) {
+    finals_state.insert(it.first);
+  }
+
   while(size_Q_prime != Q_prime.size())
   {
     size_Q_prime = Q_prime.size();
@@ -326,11 +349,6 @@ Transducer::determinize(int const epsilon_tag)
 
     for(auto& it : R[t])
     {
-      set<int> finals_state;
-      for(auto& it2 : finals)
-      {
-        finals_state.insert(it2.first);
-      }
       if(!isEmptyIntersection(Q_prime[it], finals_state))
       {
         double w = default_weight;
@@ -378,8 +396,8 @@ Transducer::determinize(int const epsilon_tag)
     t = (t+1)%2;
   }
 
-  transitions = transitions_prime;
-  finals = finals_prime;
+  transitions.swap(transitions_prime);
+  finals.swap(finals_prime);
   initial = initial_prime;
 }
 
@@ -517,7 +535,7 @@ bool Transducer::weighted() {
 void
 Transducer::write(FILE *output, int const decalage)
 {
-  fwrite(HEADER_TRANSDUCER, 1, 4, output);
+  fwrite_unlocked(HEADER_TRANSDUCER, 1, 4, output);
 
   bool write_weights = weighted();
 
@@ -578,7 +596,7 @@ Transducer::read(FILE *input, int const decalage)
   fpos_t pos;
   if (fgetpos(input, &pos) == 0) {
       char header[4]{};
-      fread(header, 1, 4, input);
+      fread_unlocked(header, 1, 4, input);
       if (strncmp(header, HEADER_TRANSDUCER, 4) == 0) {
           auto features = read_le<uint64_t>(input);
           if (features >= TDF_UNKNOWN) {
@@ -713,61 +731,58 @@ Transducer::reverse(int const epsilon_tag)
 }
 
 void
-Transducer::escapeSymbol(wstring& symbol, bool hfst) const
+Transducer::escapeSymbol(UString& symbol, bool hfst) const
 {
-  if(symbol == L"") // If it's an epsilon
+  if(symbol.empty()) // If it's an epsilon
   {
     if(hfst)
     {
-      symbol = L"@0@";
+      symbol = HFST_EPSILON_SYMBOL_SHORT;
     }
     else
     {
-      symbol = L"ε";
+      symbol = LTTB_EPSILON_SYMBOL;
     }
   }
-  else if(hfst && symbol == L" ")
+  else if(hfst && symbol == " "_u)
   {
-    symbol = L"@_SPACE_@";
+    symbol = HFST_SPACE_SYMBOL;
   }
-  else if(hfst && symbol == L"\t")
+  else if(hfst && symbol == "\t"_u)
   {
-    symbol = L"@_TAB_@";
+    symbol = HFST_TAB_SYMBOL;
   }
 }
 
 void
-Transducer::show(Alphabet const &alphabet, FILE *output, int const epsilon_tag, bool hfst) const
+Transducer::show(Alphabet const &alphabet, UFILE *output, int const epsilon_tag, bool hfst) const
 {
   for(auto& it : transitions)
   {
     for(auto& it2 : it.second)
     {
       auto t = alphabet.decode(it2.first);
-      fwprintf(output, L"%d\t", it.first);
-      fwprintf(output, L"%d\t", it2.second.first);
-      wstring l = L"";
+      u_fprintf(output, "%d\t%d\t", it.first, it2.second.first);
+      UString l;
       alphabet.getSymbol(l, t.first);
       escapeSymbol(l, hfst);
-      fwprintf(output, L"%ls\t", l.c_str());
-      wstring r = L"";
+      u_fprintf(output, "%S\t", l.c_str());
+      UString r;
       alphabet.getSymbol(r, t.second);
       escapeSymbol(r, hfst);
-      fwprintf(output, L"%ls\t", r.c_str());
-      fwprintf(output, L"%f\t", it2.second.second);
-      fwprintf(output, L"\n");
+      u_fprintf(output, "%S\t", r.c_str());
+      u_fprintf(output, "%f\t\n", it2.second.second);
     }
   }
 
   for(auto& it3 : finals)
   {
-    fwprintf(output, L"%d\t", it3.first);
-    fwprintf(output, L"%f\n", it3.second);
+    u_fprintf(output, "%d\t%f\n", it3.first, it3.second);
   }
 }
 
 void
-Transducer::show(Alphabet const &alphabet, FILE *output, int const epsilon_tag) const
+Transducer::show(Alphabet const &alphabet, UFILE *output, int const epsilon_tag) const
 {
   return show(alphabet, output, epsilon_tag, false);
 }
@@ -789,7 +804,7 @@ Transducer::getStateSize(int const state)
 }
 
 bool
-Transducer::recognise(wstring pattern, Alphabet &a, FILE *err)
+Transducer::recognise(UString pattern, Alphabet &a, FILE *err)
 {
   bool accepted = false;
   set<int> states;
@@ -801,7 +816,7 @@ Transducer::recognise(wstring pattern, Alphabet &a, FILE *err)
   {
     set<int> new_state;        //Transducer::closure(int const state, int const epsilon_tag)
     // For each of the current alive states
-    //fwprintf(err, L"step: %ls %lc (%d)\n", pattern.c_str(), *it, sym);
+    //fprintf(err, "step: %ls %lc (%d)\n", pattern.c_str(), *it, sym);
     for(auto& it2 : states)
     {
       auto& p = transitions[it2];
@@ -811,19 +826,19 @@ Transducer::recognise(wstring pattern, Alphabet &a, FILE *err)
       {
 
         auto t = a.decode(it3.first);
-        wstring l = L"";
+        UString l;
         a.getSymbol(l, t.first);
-        //wstring r = L"";
+        //UString r;
         //a.getSymbol(r, t.second);
 
-        //fwprintf(err, L"  -> state: %d, trans: %ls:%ls, targ: %d\n", *it2, (l == L"") ?  L"ε" : l.c_str(),  (r == L"") ?  L"ε" : r.c_str(), it3->second);
-        //if(l.find(*it) != wstring::npos || l == L"" )
-        if(l.find(it) != wstring::npos)
+        //fprintf(err, "  -> state: %d, trans: %ls:%ls, targ: %d\n", *it2, (l.empty()) ?  "ε" : l.c_str(),  (r.empty()) ?  "ε" : r.c_str(), it3->second);
+        //if(l.find(*it) != UString::npos || l.empty() )
+        if(l.find(it) != UString::npos)
         {
           auto myclosure = closure(it3.second.first, 0);
-          //wcerr << L"Before closure alives: " <<new_state.size() << endl;
+          //cerr << "Before closure alives: " <<new_state.size() << endl;
           new_state.insert(myclosure.begin(), myclosure.end());
-          //wcerr << L"After closure alives: " <<new_state.size() << endl;
+          //cerr << "After closure alives: " <<new_state.size() << endl;
         }
       }
     }
@@ -984,10 +999,6 @@ Transducer
 Transducer::moveLemqsLast(Alphabet const &alphabet,
                           int const epsilon_tag)
 {
-  // TODO: These should be in file which is included by both
-  // fst_processor.cc and compiler.cc:
-  wstring COMPILER_GROUP_ELEM = L"#";
-
   Transducer new_t;
   typedef int SearchState;
   std::set<SearchState> seen;
@@ -1006,11 +1017,11 @@ Transducer::moveLemqsLast(Alphabet const &alphabet,
     {
       int label = trans_it.first,
        this_trg = trans_it.second.first;
-      wstring left = L"";
+      UString left;
       alphabet.getSymbol(left, alphabet.decode(label).first);
       int new_src = states_this_new[this_src];
 
-      if(left == COMPILER_GROUP_ELEM)
+      if(left == GROUP_SYMBOL)
       {
         Transducer tagsFirst = copyWithTagsFirst(this_trg, label, alphabet, epsilon_tag);
         new_t.finals.insert(make_pair(
@@ -1055,16 +1066,6 @@ Transducer::intersect(Transducer &trimmer,
    * The trimmer is typically a bidix passed through appendDotStar.
    */
 
-  // TODO: These should be in file which is included by both
-  // fst_processor.cc and compiler.cc:
-  wstring compoundOnlyLSymbol = L"<compound-only-L>";
-  wstring compoundRSymbol = L"<compound-R>";
-  wstring COMPILER_JOIN_ELEM = L"+";
-  wstring COMPILER_GROUP_ELEM = L"#";
-  wstring COMPILER_ANY_TAG = L"<ANY_TAG>";
-  wstring COMPILER_ANY_CHAR = L"<ANY_CHAR>";
-  wstring COMPILER_SEPARABLE_BOUNDARY = L"<$>";
-
   // When searching, we need to record (this, (trimmer, trimmer_pre_plus))
   typedef std::pair<int, std::pair<int, int > > SearchState;
   // first: currently searched state in this;
@@ -1095,7 +1096,7 @@ Transducer::intersect(Transducer &trimmer,
         trimmer_preplus_next = trimmer_preplus;
 
     if(states_this_trimmed.find(current) == states_this_trimmed.end()) {
-      wcerr <<L"Error: couldn't find "<<this_src<<L","<<trimmer_src<<L" in state map"<<endl;
+      cerr <<"Error: couldn't find "<<this_src<<","<<trimmer_src<<" in state map"<<endl;
       exit(EXIT_FAILURE);
     }
     int trimmed_src = states_this_trimmed[current];
@@ -1105,7 +1106,7 @@ Transducer::intersect(Transducer &trimmer,
       int trimmer_label = trimmer_trans_it.first,
           trimmer_trg   = trimmer_trans_it.second.first;
       double trimmer_wt = trimmer_trans_it.second.second;
-      wstring trimmer_left = L"";
+      UString trimmer_left;
       trimmer_a.getSymbol(trimmer_left, trimmer_a.decode(trimmer_label).first);
 
       if(trimmer_preplus == trimmer_src) {
@@ -1113,7 +1114,7 @@ Transducer::intersect(Transducer &trimmer,
         trimmer_preplus_next = trimmer_trg;
       }
 
-      if(trimmer_left == L"")
+      if(trimmer_left.empty())
       {
         next = make_pair(this_src, make_pair(trimmer_trg, trimmer_preplus_next));
         if(seen.find(next) == seen.end())
@@ -1136,10 +1137,10 @@ Transducer::intersect(Transducer &trimmer,
       int this_label = trans_it.first,
           this_trg   = trans_it.second.first;
       double this_wt = trans_it.second.second;
-      wstring this_right = L"";
+      UString this_right;
       this_a.getSymbol(this_right, this_a.decode(this_label).second);
 
-      if(this_right == COMPILER_JOIN_ELEM || this_right == COMPILER_SEPARABLE_BOUNDARY)
+      if(this_right == JOIN_SYMBOL || this_right == LSX_BOUNDARY_SYMBOL)
       {
         if(trimmer_preplus == trimmer_src) {
           // Keep the old preplus state if it was set; equal to current trimmer state means unset:
@@ -1160,14 +1161,14 @@ Transducer::intersect(Transducer &trimmer,
                            trimmed_trg, // toState
                            this_label, // symbol-pair, using this alphabet
                            this_wt); //weight of transduction
-        if(this_right == COMPILER_SEPARABLE_BOUNDARY && isFinal(this_trg))
+        if(this_right == LSX_BOUNDARY_SYMBOL && isFinal(this_trg))
         {
           trimmed.setFinal(trimmed_trg, default_weight);
         }
       }
-      else if ( this_right == compoundOnlyLSymbol
-                || this_right == compoundRSymbol
-                || this_right == L"" )
+      else if ( this_right == COMPOUND_ONLY_L_SYMBOL
+                || this_right == COMPOUND_R_SYMBOL
+                || this_right.empty() )
       {
         // Stay put in the trimmer FST
         int trimmer_trg = trimmer_src;
@@ -1197,7 +1198,7 @@ Transducer::intersect(Transducer &trimmer,
         // Loop through non-epsilon arcs from the live state of trimmer
 
         // If we see a hash/group, we may have to rewind our trimmer state first:
-        if(this_right == COMPILER_GROUP_ELEM && trimmer_preplus != trimmer_src)
+        if(this_right == GROUP_SYMBOL && trimmer_preplus != trimmer_src)
         {
           states_this_trimmed.insert(make_pair(make_pair(this_src, make_pair(trimmer_preplus,
                                                                              trimmer_preplus)),
@@ -1209,7 +1210,7 @@ Transducer::intersect(Transducer &trimmer,
         {
           int trimmer_label = trimmer_trans_it.first,
               trimmer_trg   = trimmer_trans_it.second.first;
-          wstring trimmer_left = L"";
+          UString trimmer_left;
           trimmer_a.getSymbol(trimmer_left, trimmer_a.decode(trimmer_label).first);
 
           if(trimmer_preplus == trimmer_src) {
@@ -1217,9 +1218,9 @@ Transducer::intersect(Transducer &trimmer,
             trimmer_preplus_next = trimmer_trg;
           }
 
-          if(trimmer_left != L"" && // we've already dealt with trimmer epsilons
+          if(!trimmer_left.empty() && // we've already dealt with trimmer epsilons
              (this_right == trimmer_left ||
-              (this_right == ((trimmer_left[0] == L'<') ? COMPILER_ANY_TAG : COMPILER_ANY_CHAR))))
+              (this_right == ((trimmer_left[0] == '<') ? ANY_TAG_SYMBOL : ANY_CHAR_SYMBOL))))
           {
             next = make_pair(this_trg, make_pair(trimmer_trg, trimmer_preplus_next));
             if(seen.find(next) == seen.end())
diff --git a/lttoolbox/transducer.h b/lttoolbox/transducer.h
index cbfe112..3dd91d4 100644
--- a/lttoolbox/transducer.h
+++ b/lttoolbox/transducer.h
@@ -87,10 +87,26 @@ private:
    * @param symbol the string to be escaped
    * @param hfst if true, use HFST-compatible escape sequences
    */
-  void escapeSymbol(wstring& symbol, bool hfst) const;
+  void escapeSymbol(UString& symbol, bool hfst) const;
 
 public:
 
+  /**
+   * String constants
+   */
+  static UString const HFST_EPSILON_SYMBOL_SHORT;
+  static UString const HFST_EPSILON_SYMBOL_LONG;
+  static UString const LTTB_EPSILON_SYMBOL;
+  static UString const HFST_SPACE_SYMBOL;
+  static UString const HFST_TAB_SYMBOL;
+  static UString const GROUP_SYMBOL;
+  static UString const JOIN_SYMBOL;
+  static UString const ANY_TAG_SYMBOL;
+  static UString const ANY_CHAR_SYMBOL;
+  static UString const LSX_BOUNDARY_SYMBOL;
+  static UString const COMPOUND_ONLY_L_SYMBOL;
+  static UString const COMPOUND_R_SYMBOL;
+
   /**
    * Constructor
    */
@@ -172,7 +188,7 @@ public:
    * @param a widestring of the pattern to be recognised
    * @return true if the pattern is recognised by the transducer
    */
-  bool recognise(wstring pattern, Alphabet &a, FILE *err = stderr);
+  bool recognise(UString pattern, Alphabet &a, FILE *err = stderr);
 
   /**
    * Set the state as a final or not, yes by default
@@ -232,8 +248,8 @@ public:
    * @param hfst if true, use HFST-compatible escape characters
    * @param epsilon_tag the tag to take as epsilon
    */
-  void show(Alphabet const &a, FILE *output = stdout, int const epsilon_tag = 0, bool hfst = false) const;
-  void show(Alphabet const &a, FILE *output = stdout, int const epsilon_tag = 0) const;
+  void show(Alphabet const &a, UFILE *output, int const epsilon_tag = 0, bool hfst = false) const;
+  void show(Alphabet const &a, UFILE *output, int const epsilon_tag = 0) const;
 
   /**
    * Determinize the transducer
diff --git a/lttoolbox/ustring.cc b/lttoolbox/ustring.cc
new file mode 100644
index 0000000..87056c2
--- /dev/null
+++ b/lttoolbox/ustring.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2021 Apertium
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "ustring.h"
+
+#include <stdexcept>
+#include <utf8.h>
+#include <cstring>
+#include <unicode/utf16.h>
+
+using namespace icu;
+
+void
+write(const UString& str, UFILE* output)
+{
+  // u_fputs() inserts a newline
+  u_fprintf(output, "%S", str.c_str());
+}
+
+UString
+to_ustring(const char* s)
+{
+  return to_ustring(reinterpret_cast<const uint8_t*>(s));
+}
+
+UString
+to_ustring(const uint8_t* s)
+{
+  auto sz = strlen(reinterpret_cast<const char*>(s));
+  UString ret;
+  ret.reserve(sz);
+  utf8::utf8to16(s, s+sz, std::back_inserter(ret));
+  return ret;
+}
+
+void
+ustring_to_vec32(const UString& str, std::vector<int32_t>& vec)
+{
+  if (str.empty()) {
+    return;
+  }
+
+  size_t i = 0;
+  size_t len = str.size();
+  vec.reserve(vec.size() + str.size());
+  int32_t c;
+  while (i < str.size()) {
+    U16_NEXT(str, i, len, c);
+    vec.push_back(c);
+  }
+}
diff --git a/lttoolbox/ustring.h b/lttoolbox/ustring.h
new file mode 100644
index 0000000..7068e28
--- /dev/null
+++ b/lttoolbox/ustring.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2021 Apertium
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LT_USTRING_H_
+#define _LT_USTRING_H_
+
+#include <unicode/ustdio.h>
+#include <string>
+#include <utf8.h>
+#include <vector>
+#include <cstdint>
+
+typedef std::basic_string<UChar> UString;
+
+void write(const UString& str, UFILE* output);
+
+UString to_ustring(const char* str);
+UString to_ustring(const uint8_t* str);
+
+// append UTF-16 string to UTF-32 vector of symbols
+void ustring_to_vec32(const UString& str, std::vector<int32_t>& vec);
+
+inline std::ostream&
+operator<<(std::ostream& ostr, UChar c)
+{
+  ostr << std::hex << static_cast<uint16_t>(c);
+  return ostr;
+}
+
+inline std::ostream&
+operator<<(std::ostream& ostr, const UString& str)
+{
+  std::string res;
+  utf8::utf16to8(str.begin(), str.end(), std::back_inserter(res));
+  ostr << res;
+  return ostr;
+}
+
+inline UString operator "" _u(const char* str, std::size_t len) {
+	UString us(len, 0);
+	for (size_t i = 0; i < len; ++i) {
+		us[i] = str[i];
+	}
+	return us;
+}
+
+inline void operator+=(UString& str, UChar32 c)
+{
+  if (c <= 0xFFFF) {
+    str += static_cast<UChar>(c);
+  } else {
+    str += static_cast<UChar>(0xD800 + ((c - 0x10000) >> 10));
+    str += static_cast<UChar>(0xDC00 + (c & 0x3FF));
+  }
+}
+
+#endif
diff --git a/lttoolbox/xml_parse_util.cc b/lttoolbox/xml_parse_util.cc
index 3149900..0f5e3c5 100644
--- a/lttoolbox/xml_parse_util.cc
+++ b/lttoolbox/xml_parse_util.cc
@@ -18,128 +18,74 @@
 
 #include <cstdlib>
 #include <iostream>
+#include <utf8.h>
 
 using namespace std;
 
-wstring
-XMLParseUtil::attrib(xmlTextReaderPtr reader, wstring const &name)
+UString
+XMLParseUtil::attrib(xmlTextReaderPtr reader, UString const &name)
 {
-  string mystr = "";
-  for(int i = 0, limit = name.size(); i != limit; i++)
-  {
-    mystr += static_cast<char>(name[i]);
-  }
-
-  xmlChar *attrname = xmlCharStrdup(mystr.c_str());
-  xmlChar *myattr = xmlTextReaderGetAttribute(reader, attrname);
-  wstring result = towstring(myattr);
-  xmlFree(myattr);
-  xmlFree(attrname);
-  return result;
+  return attrib(reader, name, ""_u);
 }
 
-wstring
-XMLParseUtil::attrib(xmlTextReaderPtr reader, wstring const &name, const wstring fallback)
+UString
+XMLParseUtil::attrib(xmlTextReaderPtr reader, UString const& name, const UString& fallback)
 {
-  string mystr = "";
-  for (int i = 0, limit = name.size(); i != limit; i++) {
-    mystr += static_cast<char>(name[i]);
-  }
-
-  xmlChar *attrname = xmlCharStrdup(mystr.c_str());
+  std::string temp;
+  temp.reserve(name.size());
+  utf8::utf16to8(name.begin(), name.end(), std::back_inserter(temp));
+  const xmlChar *attrname = reinterpret_cast<const xmlChar*>(temp.c_str());
   xmlChar *myattr = xmlTextReaderGetAttribute(reader, attrname);
-  wstring result = XMLParseUtil::towstring(myattr);
-  xmlFree(myattr);
-  xmlFree(attrname);
   if(myattr == NULL) {
+    xmlFree(myattr);
     return fallback;
-  }
-  else {
+  } else {
+    UString result = to_ustring(reinterpret_cast<char*>(myattr));
+    xmlFree(myattr);
     return result;
   }
 }
 
-
-string
-XMLParseUtil::latin1(xmlChar const *input)
+std::string
+XMLParseUtil::attrib_str(xmlTextReaderPtr reader, const UString& name)
 {
- if(input == NULL)
-  {
+  std::string temp;
+  temp.reserve(name.size());
+  utf8::utf16to8(name.begin(), name.end(), std::back_inserter(temp));
+  const xmlChar *attrname = reinterpret_cast<const xmlChar*>(temp.c_str());
+  xmlChar *myattr = xmlTextReaderGetAttribute(reader, attrname);
+  if(myattr == NULL) {
+    xmlFree(myattr);
     return "";
+  } else {
+    std::string result = reinterpret_cast<char*>(myattr);
+    xmlFree(myattr);
+    return result;
   }
-
-  int outputlen = xmlStrlen(input) + 1;
-  int inputlen = xmlStrlen(input);
-
-  unsigned char* output = new unsigned char[outputlen];
-
-  if(UTF8Toisolat1(output, &outputlen, input, &inputlen) != 0)
-  {
-  }
-
-  output[outputlen] = 0;
-  string result = reinterpret_cast<char *>(output);
-  delete[] output;
-  return result;
 }
 
-wstring
-XMLParseUtil::towstring(xmlChar const * input)
+UString
+XMLParseUtil::readName(xmlTextReaderPtr reader)
 {
-  wstring result = L"";
-
-  for(int i = 0, limit = xmlStrlen(input); i != limit; i++)
-  {
-    int val = 0;
-    if(((unsigned char) input[i] & 0x80) == 0x0)
-    {
-      val = static_cast<wchar_t>(input[i]);
-    }
-    else if(((unsigned char) input[i] & 0xE0) == 0xC0)
-    {
-      val = (input[i] & 0x1F) << 6;
-      i++;
-      val += input[i] & 0x7F;
-    }
-    else if(((unsigned char) input[i] & 0xF0) == 0xE0)
-    {
-      val = (input[i] & 0x0F) << 6;
-      i++;
-      val += input[i] & 0x7F;
-      val = val << 6;
-      i++;
-      val += input[i] & 0x7F;
-    }
-    else if(((unsigned char) input[i] & 0xF8) == 0xF0)
-    {
-      val = (input[i] & 0x07) << 6;
-      i++;
-      val += input[i] & 0x7F;
-      val = val << 6;
-      i++;
-      val += input[i] & 0x7F;
-      val = val << 6;
-      i++;
-      val += input[i] & 0x7F;
-    }
-    else
-    {
-      wcerr << L"UTF-8 invalid string" << endl;
-      exit(EXIT_FAILURE);
-    }
+  const xmlChar* name = xmlTextReaderConstName(reader);
+  if (name == NULL) return ""_u;
+  return to_ustring(reinterpret_cast<const char*>(name));
+}
 
-    result += static_cast<wchar_t>(val);
-  }
-  return result;
+UString
+XMLParseUtil::readValue(xmlTextReaderPtr reader)
+{
+  const xmlChar* val = xmlTextReaderConstValue(reader);
+  if (val == NULL) return ""_u;
+  return to_ustring(reinterpret_cast<const char*>(val));
 }
 
-wstring
-XMLParseUtil::stows(string const &str)
+void
+XMLParseUtil::readValueInto32(xmlTextReaderPtr reader, vector<int32_t>& vec)
 {
-  wchar_t* result = new wchar_t[str.size()+1];
-  size_t retval = mbstowcs(result, str.c_str(), str.size());
-  result[retval] = L'\0';
-  wstring result2 = result;
-  delete[] result;
-  return result2;
+  const xmlChar* val = xmlTextReaderConstValue(reader);
+  if (val == NULL) return;
+  auto sz = xmlStrlen(val);
+  vec.reserve(vec.size() + sz);
+  utf8::utf8to32(val, val+sz, std::back_inserter(vec));
 }
diff --git a/lttoolbox/xml_parse_util.h b/lttoolbox/xml_parse_util.h
index beca741..9409cdd 100644
--- a/lttoolbox/xml_parse_util.h
+++ b/lttoolbox/xml_parse_util.h
@@ -19,8 +19,10 @@
 
 #include <libxml/encoding.h>
 #include <libxml/xmlreader.h>
+#include <lttoolbox/ustring.h>
+#include <vector>
+#include <cstdint>
 #include <string>
-#include <cwchar>
 
 using namespace std;
 
@@ -29,14 +31,16 @@ class XMLParseUtil
 public:
 
   /* If attrib does not exist (or other error), returns an empty string: */
-  static wstring attrib(xmlTextReaderPtr reader, wstring const &name);
+  static UString attrib(xmlTextReaderPtr reader, UString const &name);
 
   /* If attrib does not exist (or other error), returns fallback: */
-  static wstring attrib(xmlTextReaderPtr reader, wstring const &name, const wstring fallback);
+  static UString attrib(xmlTextReaderPtr reader, UString const &name, const UString& fallback);
 
-  static string latin1(xmlChar const * input); // mark for deletion
-  static wstring towstring(xmlChar const * input);
-  static wstring stows(string const &str);
+  static string attrib_str(xmlTextReaderPtr reader, const UString& name);
+
+  static UString readName(xmlTextReaderPtr reader);
+  static UString readValue(xmlTextReaderPtr reader);
+  static void readValueInto32(xmlTextReaderPtr reader, vector<int32_t>& vec);
 };
 
 #endif
diff --git a/lttoolbox/xml_walk_util.cc b/lttoolbox/xml_walk_util.cc
new file mode 100644
index 0000000..8611556
--- /dev/null
+++ b/lttoolbox/xml_walk_util.cc
@@ -0,0 +1,65 @@
+#include <lttoolbox/xml_walk_util.h>
+
+children::children(xmlNode* node_)
+  : node(node_), cur(node->children)
+{
+  while (cur && cur->type != XML_ELEMENT_NODE) {
+    cur = cur->next;
+  }
+}
+
+children::children(const children& it)
+  : node(it.node), cur(it.cur)
+{}
+
+children::~children()
+{} // we don't own the pointers, so we don't delete them
+
+children&
+children::operator++()
+{
+  if (node && cur) {
+    cur = cur->next;
+    while (cur && cur->type != XML_ELEMENT_NODE) {
+      cur = cur->next;
+    }
+  }
+  return *this;
+}
+
+children
+children::begin()
+{
+  return children(node);
+}
+
+children
+children::end()
+{
+  children ret(node);
+  ret.cur = nullptr;
+  return ret;
+}
+
+bool
+children::operator!=(const children& other) const
+{
+  return node != other.node || cur != other.cur;
+}
+
+bool
+children::operator==(const children& other) const
+{
+  return node == other.node && cur == other.cur;
+}
+
+UString
+getattr(xmlNode* node, const char* attr)
+{
+  for (xmlAttr* i = node->properties; i != NULL; i = i->next) {
+    if (!xmlStrcmp(i->name, (const xmlChar*) attr)) {
+      return to_ustring((const char*) i->children->content);
+    }
+  }
+  return ""_u;
+}
diff --git a/lttoolbox/xml_walk_util.h b/lttoolbox/xml_walk_util.h
new file mode 100644
index 0000000..13ca6a4
--- /dev/null
+++ b/lttoolbox/xml_walk_util.h
@@ -0,0 +1,29 @@
+#ifndef _XML_WALK_UTIL_
+#define _XML_WALK_UTIL_
+
+#include <lttoolbox/ustring.h>
+
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+
+class children
+{
+private:
+  xmlNode* node;
+  xmlNode* cur;
+public:
+  children(xmlNode* node);
+  children(const children& it);
+  ~children();
+
+  children& operator++();
+  children begin();
+  children end();
+  inline xmlNode* operator*() const { return cur; }
+  bool operator!=(const children& other) const;
+  bool operator==(const children& other) const;
+};
+
+UString getattr(xmlNode* node, const char* attr);
+
+#endif
diff --git a/python/lttoolbox.i.in b/python/lttoolbox.i.in
index d7362e1..f57fe92 100644
--- a/python/lttoolbox.i.in
+++ b/python/lttoolbox.i.in
@@ -54,8 +54,9 @@ public:
 
   void lt_proc(int argc, char **argv, char *input_path, char *output_path)
   {
-    FILE* input = fopen(input_path, "r");
-    FILE* output = fopen(output_path, "w");
+    InputFile input;
+    input.open(input_path);
+    UFILE* output = u_fopen(output_path, "w", NULL, NULL);
     int cmd = 0;
     int c = 0;
     optind = 1;
@@ -103,8 +104,7 @@ public:
         break;
       }
 
-    fclose(input);
-    fclose(output);
+    u_fclose(output);
   }
 };
 
diff --git a/tests/data/arabic-punct.att b/tests/data/arabic-punct.att
new file mode 100644
index 0000000..db8c154
--- /dev/null
+++ b/tests/data/arabic-punct.att
@@ -0,0 +1,9 @@
+0	1	،	،	0.000
+0	1	؛	؛	0.000
+0	1	؟	؟	0.000
+0	2	a	a	0.000
+0	2	b	b	0.000
+1	3	@0@	<punct>	0.000
+2	4	@0@	<n>	0.000
+3	0.000
+4	0.000
diff --git a/tests/data/non-bmp.att b/tests/data/non-bmp.att
new file mode 100644
index 0000000..1a1f661
--- /dev/null
+++ b/tests/data/non-bmp.att
@@ -0,0 +1,34 @@
+0	1	𐅀	𐅀	0.000
+0	1	𐅁	𐅁	0.000
+0	1	𐅂	𐅂	0.000
+0	1	𐅃	𐅃	0.000
+0	1	𐅄	𐅄	0.000
+0	1	𐅅	𐅅	0.000
+0	1	𐅆	𐅆	0.000
+0	1	𐅇	𐅇	0.000
+0	1	𐅈	𐅈	0.000
+0	1	𐅉	𐅉	0.000
+0	1	𐅊	𐅊	0.000
+0	1	𐅋	𐅋	0.000
+0	1	𐅌	𐅌	0.000
+0	1	𐅍	𐅍	0.000
+0	1	𐅎	𐅎	0.000
+0	1	𐅏	𐅏	0.000
+1	1	𐅀	𐅀	0.000
+1	1	𐅁	𐅁	0.000
+1	1	𐅂	𐅂	0.000
+1	1	𐅃	𐅃	0.000
+1	1	𐅄	𐅄	0.000
+1	1	𐅅	𐅅	0.000
+1	1	𐅆	𐅆	0.000
+1	1	𐅇	𐅇	0.000
+1	1	𐅈	𐅈	0.000
+1	1	𐅉	𐅉	0.000
+1	1	𐅊	𐅊	0.000
+1	1	𐅋	𐅋	0.000
+1	1	𐅌	𐅌	0.000
+1	1	𐅍	𐅍	0.000
+1	1	𐅎	𐅎	0.000
+1	1	𐅏	𐅏	0.000
+1	2	@0@	<num>	0.000
+2	0.000
diff --git a/tests/data/non-bmp.dix b/tests/data/non-bmp.dix
new file mode 100644
index 0000000..161a161
--- /dev/null
+++ b/tests/data/non-bmp.dix
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<dictionary>
+	<alphabet>𐅀𐅁𐅂𐅃𐅄𐅅𐅆𐅇𐅈𐅉𐅊𐅋𐅌𐅍𐅎𐅏</alphabet>
+	<sdefs>
+		<sdef n="num"/>
+	</sdefs>
+	<pardefs>
+		<pardef n="num">
+			<e><p><l></l><r><s n="num"/></r></p></e>
+		</pardef>
+	</pardefs>
+	<section id="main" type="standard">
+		<e><re>[𐅀𐅁𐅂𐅃𐅄𐅅𐅆𐅇𐅈𐅉𐅊𐅋𐅌𐅍𐅎𐅏]+</re><par n="num"/></e>
+	</section>
+</dictionary>
diff --git a/tests/lt_proc/__init__.py b/tests/lt_proc/__init__.py
index fca9df9..2de472a 100644
--- a/tests/lt_proc/__init__.py
+++ b/tests/lt_proc/__init__.py
@@ -148,8 +148,8 @@ class PostgenerationBasicTest(ProcTest):
                         "El perro ~de el amigo.",
                         "abc ~les testword"]
     expectedOutputs = [ "xyz ejemplo u ho nombre.",
-                        "xyz se la pelota.", 
-                        "El perro del amigo.", 
+                        "xyz se la pelota.",
+                        "El perro del amigo.",
                         "abc le pe test testword"]
 
 class PostgenerationWordboundBlankTest(ProcTest):
@@ -228,5 +228,24 @@ class SpaceAtEOF(ProcTest):
     flushing = False
 
 
+class NonBMPDixTest(ProcTest):
+	procdix = "data/non-bmp.dix"
+	inputs = ['𐅁𐅃𐅅', '𐅂𐅄𐅆']
+	expectedOutputs = ['^𐅁𐅃𐅅/𐅁𐅃𐅅<num>$', '^𐅂𐅄𐅆/𐅂𐅄𐅆<num>$']
+
+
+class NonBMPATTTest(ProcTest):
+	procdix = "data/non-bmp.att"
+	inputs = ['𐅁𐅃𐅅', '𐅂𐅄𐅆']
+	expectedOutputs = ['^𐅁𐅃𐅅/𐅁𐅃𐅅<num>$', '^𐅂𐅄𐅆/𐅂𐅄𐅆<num>$']
+
+
+class NonBMPGeneratorTest(ProcTest):
+	procdix = "data/non-bmp.att"
+	inputs = ['^𐅁𐅃𐅅<num>$', '^𐅂𐅄𐅆<num>$']
+	expectedOutputs = ['𐅁𐅃𐅅', '𐅂𐅄𐅆']
+	procflags = ['-z', '-g']
+	procdir = "rl"
+
 # These fail on some systems:
 #from null_flush_invalid_stream_format import *
diff --git a/utf8/utf8.h b/utf8/utf8.h
deleted file mode 100644
index c2c85d6..0000000
--- a/utf8/utf8.h
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include "utf8/checked.h"
-#include "utf8/unchecked.h"
-
-#if __cplusplus >= 201103L // C++ 11 or later
-#include "utf8/cpp11.h"
-#endif // C++ 11 or later
-
-#endif // header guard
diff --git a/utf8/utf8/checked.h b/utf8/utf8/checked.h
deleted file mode 100644
index c31861e..0000000
--- a/utf8/utf8/checked.h
+++ /dev/null
@@ -1,324 +0,0 @@
-// Copyright 2006-2016 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include "core.h"
-#include <stdexcept>
-
-namespace utf8
-{
-    // Base for the exceptions that may be thrown from the library
-    class exception : public ::std::exception {
-    };
-
-    // Exceptions that may be thrown from the library functions.
-    class invalid_code_point : public exception {
-        uint32_t cp;
-    public:
-        invalid_code_point(uint32_t codepoint) : cp(codepoint) {}
-        virtual const char* what() const throw() { return "Invalid code point"; }
-        uint32_t code_point() const {return cp;}
-    };
-
-    class invalid_utf8 : public exception {
-        uint8_t u8;
-    public:
-        invalid_utf8 (uint8_t u) : u8(u) {}
-        virtual const char* what() const throw() { return "Invalid UTF-8"; }
-        uint8_t utf8_octet() const {return u8;}
-    };
-
-    class invalid_utf16 : public exception {
-        uint16_t u16;
-    public:
-        invalid_utf16 (uint16_t u) : u16(u) {}
-        virtual const char* what() const throw() { return "Invalid UTF-16"; }
-        uint16_t utf16_word() const {return u16;}
-    };
-
-    class not_enough_room : public exception {
-    public:
-        virtual const char* what() const throw() { return "Not enough space"; }
-    };
-
-    /// The library API - functions intended to be called by the users
-
-    template <typename octet_iterator>
-    octet_iterator append(uint32_t cp, octet_iterator result)
-    {
-        if (!utf8::internal::is_code_point_valid(cp))
-            throw invalid_code_point(cp);
-
-        if (cp < 0x80)                        // one octet
-            *(result++) = static_cast<uint8_t>(cp);
-        else if (cp < 0x800) {                // two octets
-            *(result++) = static_cast<uint8_t>((cp >> 6)            | 0xc0);
-            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
-        }
-        else if (cp < 0x10000) {              // three octets
-            *(result++) = static_cast<uint8_t>((cp >> 12)           | 0xe0);
-            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
-            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
-        }
-        else {                                // four octets
-            *(result++) = static_cast<uint8_t>((cp >> 18)           | 0xf0);
-            *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)  | 0x80);
-            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
-            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
-        }
-        return result;
-    }
-
-    template <typename octet_iterator, typename output_iterator>
-    output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
-    {
-        while (start != end) {
-            octet_iterator sequence_start = start;
-            internal::utf_error err_code = utf8::internal::validate_next(start, end);
-            switch (err_code) {
-                case internal::UTF8_OK :
-                    for (octet_iterator it = sequence_start; it != start; ++it)
-                        *out++ = *it;
-                    break;
-                case internal::NOT_ENOUGH_ROOM:
-                    out = utf8::append (replacement, out);
-                    start = end;
-                    break;
-                case internal::INVALID_LEAD:
-                    out = utf8::append (replacement, out);
-                    ++start;
-                    break;
-                case internal::INCOMPLETE_SEQUENCE:
-                case internal::OVERLONG_SEQUENCE:
-                case internal::INVALID_CODE_POINT:
-                    out = utf8::append (replacement, out);
-                    ++start;
-                    // just one replacement mark for the sequence
-                    while (start != end && utf8::internal::is_trail(*start))
-                        ++start;
-                    break;
-            }
-        }
-        return out;
-    }
-
-    template <typename octet_iterator, typename output_iterator>
-    inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
-    {
-        static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
-        return utf8::replace_invalid(start, end, out, replacement_marker);
-    }
-
-    template <typename octet_iterator>
-    uint32_t next(octet_iterator& it, octet_iterator end)
-    {
-        uint32_t cp = 0;
-        internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
-        switch (err_code) {
-            case internal::UTF8_OK :
-                break;
-            case internal::NOT_ENOUGH_ROOM :
-                throw not_enough_room();
-            case internal::INVALID_LEAD :
-            case internal::INCOMPLETE_SEQUENCE :
-            case internal::OVERLONG_SEQUENCE :
-                throw invalid_utf8(*it);
-            case internal::INVALID_CODE_POINT :
-                throw invalid_code_point(cp);
-        }
-        return cp;
-    }
-
-    template <typename octet_iterator>
-    uint32_t peek_next(octet_iterator it, octet_iterator end)
-    {
-        return utf8::next(it, end);
-    }
-
-    template <typename octet_iterator>
-    uint32_t prior(octet_iterator& it, octet_iterator start)
-    {
-        // can't do much if it == start
-        if (it == start)
-            throw not_enough_room();
-
-        octet_iterator end = it;
-        // Go back until we hit either a lead octet or start
-        while (utf8::internal::is_trail(*(--it)))
-            if (it == start)
-                throw invalid_utf8(*it); // error - no lead byte in the sequence
-        return utf8::peek_next(it, end);
-    }
-
-    template <typename octet_iterator, typename distance_type>
-    void advance (octet_iterator& it, distance_type n, octet_iterator end)
-    {
-        const distance_type zero(0);
-        if (n < zero) {
-            // backward
-            for (distance_type i = n; i < zero; ++i)
-                utf8::prior(it, end);
-        } else {
-            // forward
-            for (distance_type i = zero; i < n; ++i)
-                utf8::next(it, end);
-        }
-    }
-
-    template <typename octet_iterator>
-    typename std::iterator_traits<octet_iterator>::difference_type
-    distance (octet_iterator first, octet_iterator last)
-    {
-        typename std::iterator_traits<octet_iterator>::difference_type dist;
-        for (dist = 0; first < last; ++dist)
-            utf8::next(first, last);
-        return dist;
-    }
-
-    template <typename u16bit_iterator, typename octet_iterator>
-    octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
-    {
-        while (start != end) {
-            uint32_t cp = utf8::internal::mask16(*start++);
-            // Take care of surrogate pairs first
-            if (utf8::internal::is_lead_surrogate(cp)) {
-                if (start != end) {
-                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
-                    if (utf8::internal::is_trail_surrogate(trail_surrogate))
-                        cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
-                    else
-                        throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
-                }
-                else
-                    throw invalid_utf16(static_cast<uint16_t>(cp));
-
-            }
-            // Lone trail surrogate
-            else if (utf8::internal::is_trail_surrogate(cp))
-                throw invalid_utf16(static_cast<uint16_t>(cp));
-
-            result = utf8::append(cp, result);
-        }
-        return result;
-    }
-
-    template <typename u16bit_iterator, typename octet_iterator>
-    u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
-    {
-        while (start < end) {
-            uint32_t cp = utf8::next(start, end);
-            if (cp > 0xffff) { //make a surrogate pair
-                *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
-                *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
-            }
-            else
-                *result++ = static_cast<uint16_t>(cp);
-        }
-        return result;
-    }
-
-    template <typename octet_iterator, typename u32bit_iterator>
-    octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
-    {
-        while (start != end)
-            result = utf8::append(*(start++), result);
-
-        return result;
-    }
-
-    template <typename octet_iterator, typename u32bit_iterator>
-    u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
-    {
-        while (start < end)
-            (*result++) = utf8::next(start, end);
-
-        return result;
-    }
-
-    // The iterator class
-    template <typename octet_iterator>
-    class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
-      octet_iterator it;
-      octet_iterator range_start;
-      octet_iterator range_end;
-      public:
-      iterator () {}
-      explicit iterator (const octet_iterator& octet_it,
-                         const octet_iterator& rangestart,
-                         const octet_iterator& rangeend) :
-               it(octet_it), range_start(rangestart), range_end(rangeend)
-      {
-          if (it < range_start || it > range_end)
-              throw std::out_of_range("Invalid utf-8 iterator position");
-      }
-      // the default "big three" are OK
-      octet_iterator base () const { return it; }
-      uint32_t operator * () const
-      {
-          octet_iterator temp = it;
-          return utf8::next(temp, range_end);
-      }
-      bool operator == (const iterator& rhs) const
-      {
-          if (range_start != rhs.range_start || range_end != rhs.range_end)
-              throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
-          return (it == rhs.it);
-      }
-      bool operator != (const iterator& rhs) const
-      {
-          return !(operator == (rhs));
-      }
-      iterator& operator ++ ()
-      {
-          utf8::next(it, range_end);
-          return *this;
-      }
-      iterator operator ++ (int)
-      {
-          iterator temp = *this;
-          utf8::next(it, range_end);
-          return temp;
-      }
-      iterator& operator -- ()
-      {
-          utf8::prior(it, range_start);
-          return *this;
-      }
-      iterator operator -- (int)
-      {
-          iterator temp = *this;
-          utf8::prior(it, range_start);
-          return temp;
-      }
-    }; // class iterator
-
-} // namespace utf8
-
-#endif //header guard
-
diff --git a/utf8/utf8/core.h b/utf8/utf8/core.h
deleted file mode 100644
index b1f1eff..0000000
--- a/utf8/utf8/core.h
+++ /dev/null
@@ -1,321 +0,0 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include <iterator>
-
-namespace utf8
-{
-    // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
-    // You may need to change them to match your system.
-    // These typedefs have the same names as ones from cstdint, or boost/cstdint
-    typedef unsigned char   uint8_t;
-    typedef unsigned short  uint16_t;
-    typedef unsigned int    uint32_t;
-
-// Helper code - not intended to be directly called by the library users. May be changed at any time
-namespace internal
-{
-    // Unicode constants
-    // Leading (high) surrogates: 0xd800 - 0xdbff
-    // Trailing (low) surrogates: 0xdc00 - 0xdfff
-    const uint16_t LEAD_SURROGATE_MIN  = 0xd800u;
-    const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu;
-    const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
-    const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
-    const uint16_t LEAD_OFFSET         = 0xd7c0u;       // LEAD_SURROGATE_MIN - (0x10000 >> 10)
-    const uint32_t SURROGATE_OFFSET    = 0xfca02400u;   // 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN
-
-    // Maximum valid value for a Unicode code point
-    const uint32_t CODE_POINT_MAX      = 0x0010ffffu;
-
-    template<typename octet_type>
-    inline uint8_t mask8(octet_type oc)
-    {
-        return static_cast<uint8_t>(0xff & oc);
-    }
-    template<typename u16_type>
-    inline uint16_t mask16(u16_type oc)
-    {
-        return static_cast<uint16_t>(0xffff & oc);
-    }
-    template<typename octet_type>
-    inline bool is_trail(octet_type oc)
-    {
-        return ((utf8::internal::mask8(oc) >> 6) == 0x2);
-    }
-
-    template <typename u16>
-    inline bool is_lead_surrogate(u16 cp)
-    {
-        return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
-    }
-
-    template <typename u16>
-    inline bool is_trail_surrogate(u16 cp)
-    {
-        return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
-    }
-
-    template <typename u16>
-    inline bool is_surrogate(u16 cp)
-    {
-        return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
-    }
-
-    template <typename u32>
-    inline bool is_code_point_valid(u32 cp)
-    {
-        return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
-    }
-
-    template <typename octet_iterator>
-    inline typename std::iterator_traits<octet_iterator>::difference_type
-    sequence_length(octet_iterator lead_it)
-    {
-        uint8_t lead = utf8::internal::mask8(*lead_it);
-        if (lead < 0x80)
-            return 1;
-        else if ((lead >> 5) == 0x6)
-            return 2;
-        else if ((lead >> 4) == 0xe)
-            return 3;
-        else if ((lead >> 3) == 0x1e)
-            return 4;
-        else
-            return 0;
-    }
-
-    template <typename octet_difference_type>
-    inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
-    {
-        if (cp < 0x80) {
-            if (length != 1)
-                return true;
-        }
-        else if (cp < 0x800) {
-            if (length != 2)
-                return true;
-        }
-        else if (cp < 0x10000) {
-            if (length != 3)
-                return true;
-        }
-
-        return false;
-    }
-
-    enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
-
-    /// Helper for get_sequence_x
-    template <typename octet_iterator>
-    utf_error increase_safely(octet_iterator& it, octet_iterator end)
-    {
-        if (++it == end)
-            return NOT_ENOUGH_ROOM;
-
-        if (!utf8::internal::is_trail(*it))
-            return INCOMPLETE_SEQUENCE;
-
-        return UTF8_OK;
-    }
-
-    #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
-
-    /// get_sequence_x functions decode utf-8 sequences of the length x
-    template <typename octet_iterator>
-    utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-            return NOT_ENOUGH_ROOM;
-
-        code_point = utf8::internal::mask8(*it);
-
-        return UTF8_OK;
-    }
-
-    template <typename octet_iterator>
-    utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-            return NOT_ENOUGH_ROOM;
-
-        code_point = utf8::internal::mask8(*it);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
-
-        return UTF8_OK;
-    }
-
-    template <typename octet_iterator>
-    utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-            return NOT_ENOUGH_ROOM;
-
-        code_point = utf8::internal::mask8(*it);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point += (*it) & 0x3f;
-
-        return UTF8_OK;
-    }
-
-    template <typename octet_iterator>
-    utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-           return NOT_ENOUGH_ROOM;
-
-        code_point = utf8::internal::mask8(*it);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point += (*it) & 0x3f;
-
-        return UTF8_OK;
-    }
-
-    #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
-
-    template <typename octet_iterator>
-    utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-            return NOT_ENOUGH_ROOM;
-
-        // Save the original value of it so we can go back in case of failure
-        // Of course, it does not make much sense with i.e. stream iterators
-        octet_iterator original_it = it;
-
-        uint32_t cp = 0;
-        // Determine the sequence length based on the lead octet
-        typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
-        const octet_difference_type length = utf8::internal::sequence_length(it);
-
-        // Get trail octets and calculate the code point
-        utf_error err = UTF8_OK;
-        switch (length) {
-            case 0:
-                return INVALID_LEAD;
-            case 1:
-                err = utf8::internal::get_sequence_1(it, end, cp);
-                break;
-            case 2:
-                err = utf8::internal::get_sequence_2(it, end, cp);
-            break;
-            case 3:
-                err = utf8::internal::get_sequence_3(it, end, cp);
-            break;
-            case 4:
-                err = utf8::internal::get_sequence_4(it, end, cp);
-            break;
-        }
-
-        if (err == UTF8_OK) {
-            // Decoding succeeded. Now, security checks...
-            if (utf8::internal::is_code_point_valid(cp)) {
-                if (!utf8::internal::is_overlong_sequence(cp, length)){
-                    // Passed! Return here.
-                    code_point = cp;
-                    ++it;
-                    return UTF8_OK;
-                }
-                else
-                    err = OVERLONG_SEQUENCE;
-            }
-            else
-                err = INVALID_CODE_POINT;
-        }
-
-        // Failure branch - restore the original value of the iterator
-        it = original_it;
-        return err;
-    }
-
-    template <typename octet_iterator>
-    inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
-        uint32_t ignored;
-        return utf8::internal::validate_next(it, end, ignored);
-    }
-
-} // namespace internal
-
-    /// The library API - functions intended to be called by the users
-
-    // Byte order mark
-    const uint8_t bom[] = {0xef, 0xbb, 0xbf};
-
-    template <typename octet_iterator>
-    octet_iterator find_invalid(octet_iterator start, octet_iterator end)
-    {
-        octet_iterator result = start;
-        while (result != end) {
-            utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
-            if (err_code != internal::UTF8_OK)
-                return result;
-        }
-        return result;
-    }
-
-    template <typename octet_iterator>
-    inline bool is_valid(octet_iterator start, octet_iterator end)
-    {
-        return (utf8::find_invalid(start, end) == end);
-    }
-
-    template <typename octet_iterator>
-    inline bool starts_with_bom (octet_iterator it, octet_iterator end)
-    {
-        return (
-            ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
-            ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
-            ((it != end) && (utf8::internal::mask8(*it))   == bom[2])
-           );
-    }
-} // namespace utf8
-
-#endif // header guard
-
-
diff --git a/utf8/utf8/cpp11.h b/utf8/utf8/cpp11.h
deleted file mode 100644
index 77771ff..0000000
--- a/utf8/utf8/cpp11.h
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright 2018 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
-#define UTF8_FOR_CPP_a184c22c_d012_11e8_a8d5_f2801f1b9fd1
-
-#include "checked.h"
-#include <string>
-
-namespace utf8
-{
-
-    inline void append(char32_t cp, std::string& s)
-    {
-        append(uint32_t(cp), std::back_inserter(s));
-    }
-
-    inline std::string utf16to8(const std::u16string& s)
-    {
-        std::string result;
-        utf16to8(s.begin(), s.end(), std::back_inserter(result));
-        return result;
-    }
-
-    inline std::u16string utf8to16(const std::string& s)
-    {
-        std::u16string result;
-        utf8to16(s.begin(), s.end(), std::back_inserter(result));
-        return result;
-    }
-
-    inline std::string utf32to8(const std::u32string& s)
-    {
-        std::string result;
-        utf32to8(s.begin(), s.end(), std::back_inserter(result));
-        return result;
-    }
-
-    inline std::u32string utf8to32(const std::string& s)
-    {
-        std::u32string result;
-        utf8to32(s.begin(), s.end(), std::back_inserter(result));
-        return result;
-    }
-
-    inline std::size_t find_invalid(const std::string& s)
-    {
-        std::string::const_iterator invalid = find_invalid(s.begin(), s.end());
-        return (invalid == s.end()) ? std::string::npos : (invalid - s.begin());
-    }
-
-    inline bool is_valid(const std::string& s)
-    {
-        return is_valid(s.begin(), s.end());
-    }
-
-    inline std::string replace_invalid(const std::string& s, char32_t replacement)
-    {
-        std::string result;
-        replace_invalid(s.begin(), s.end(), std::back_inserter(result), replacement);
-        return result;
-    }
-
-    inline std::string replace_invalid(const std::string& s)
-    {
-        std::string result;
-        replace_invalid(s.begin(), s.end(), std::back_inserter(result));
-        return result;
-    }
-
-    inline bool starts_with_bom(const std::string& s)
-    {
-        return starts_with_bom(s.begin(), s.end());
-    }
-
-} // namespace utf8
-
-#endif // header guard
-
diff --git a/utf8/utf8/unchecked.h b/utf8/utf8/unchecked.h
deleted file mode 100644
index 5ca6eb7..0000000
--- a/utf8/utf8/unchecked.h
+++ /dev/null
@@ -1,269 +0,0 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include "core.h"
-
-namespace utf8
-{
-    namespace unchecked
-    {
-        template <typename octet_iterator>
-        octet_iterator append(uint32_t cp, octet_iterator result)
-        {
-            if (cp < 0x80)                        // one octet
-                *(result++) = static_cast<uint8_t>(cp);
-            else if (cp < 0x800) {                // two octets
-                *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0);
-                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
-            }
-            else if (cp < 0x10000) {              // three octets
-                *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0);
-                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
-                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
-            }
-            else {                                // four octets
-                *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0);
-                *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
-                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
-                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
-            }
-            return result;
-        }
-
-        template <typename octet_iterator, typename output_iterator>
-        output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
-        {
-            while (start != end) {
-                octet_iterator sequence_start = start;
-                internal::utf_error err_code = utf8::internal::validate_next(start, end);
-                switch (err_code) {
-                    case internal::UTF8_OK :
-                        for (octet_iterator it = sequence_start; it != start; ++it)
-                            *out++ = *it;
-                        break;
-                    case internal::NOT_ENOUGH_ROOM:
-                        out = utf8::unchecked::append (replacement, out);
-                        start = end;
-                        break;
-                    case internal::INVALID_LEAD:
-                        out = utf8::unchecked::append (replacement, out);
-                        ++start;
-                        break;
-                    case internal::INCOMPLETE_SEQUENCE:
-                    case internal::OVERLONG_SEQUENCE:
-                    case internal::INVALID_CODE_POINT:
-                        out = utf8::unchecked::append (replacement, out);
-                        ++start;
-                        // just one replacement mark for the sequence
-                        while (start != end && utf8::internal::is_trail(*start))
-                            ++start;
-                        break;
-                }
-            }
-            return out;
-        }
-
-        template <typename octet_iterator, typename output_iterator>
-        inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
-        {
-            static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
-            return utf8::unchecked::replace_invalid(start, end, out, replacement_marker);
-        }
-
-        template <typename octet_iterator>
-        uint32_t next(octet_iterator& it)
-        {
-            uint32_t cp = utf8::internal::mask8(*it);
-            typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
-            switch (length) {
-                case 1:
-                    break;
-                case 2:
-                    it++;
-                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
-                    break;
-                case 3:
-                    ++it;
-                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
-                    ++it;
-                    cp += (*it) & 0x3f;
-                    break;
-                case 4:
-                    ++it;
-                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
-                    ++it;
-                    cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
-                    ++it;
-                    cp += (*it) & 0x3f;
-                    break;
-            }
-            ++it;
-            return cp;
-        }
-
-        template <typename octet_iterator>
-        uint32_t peek_next(octet_iterator it)
-        {
-            return utf8::unchecked::next(it);
-        }
-
-        template <typename octet_iterator>
-        uint32_t prior(octet_iterator& it)
-        {
-            while (utf8::internal::is_trail(*(--it))) ;
-            octet_iterator temp = it;
-            return utf8::unchecked::next(temp);
-        }
-
-        template <typename octet_iterator, typename distance_type>
-        void advance (octet_iterator& it, distance_type n)
-        {
-            const distance_type zero(0);
-            if (n < zero) {
-                // backward
-                for (distance_type i = n; i < zero; ++i)
-                    utf8::unchecked::prior(it);
-            } else {
-                // forward
-                for (distance_type i = zero; i < n; ++i)
-                    utf8::unchecked::next(it);
-            }
-        }
-
-        template <typename octet_iterator>
-        typename std::iterator_traits<octet_iterator>::difference_type
-        distance (octet_iterator first, octet_iterator last)
-        {
-            typename std::iterator_traits<octet_iterator>::difference_type dist;
-            for (dist = 0; first < last; ++dist)
-                utf8::unchecked::next(first);
-            return dist;
-        }
-
-        template <typename u16bit_iterator, typename octet_iterator>
-        octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
-        {
-            while (start != end) {
-                uint32_t cp = utf8::internal::mask16(*start++);
-            // Take care of surrogate pairs first
-                if (utf8::internal::is_lead_surrogate(cp)) {
-                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
-                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
-                }
-                result = utf8::unchecked::append(cp, result);
-            }
-            return result;
-        }
-
-        template <typename u16bit_iterator, typename octet_iterator>
-        u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
-        {
-            while (start < end) {
-                uint32_t cp = utf8::unchecked::next(start);
-                if (cp > 0xffff) { //make a surrogate pair
-                    *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
-                    *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
-                }
-                else
-                    *result++ = static_cast<uint16_t>(cp);
-            }
-            return result;
-        }
-
-        template <typename octet_iterator, typename u32bit_iterator>
-        octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
-        {
-            while (start != end)
-                result = utf8::unchecked::append(*(start++), result);
-
-            return result;
-        }
-
-        template <typename octet_iterator, typename u32bit_iterator>
-        u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
-        {
-            while (start < end)
-                (*result++) = utf8::unchecked::next(start);
-
-            return result;
-        }
-
-        // The iterator class
-        template <typename octet_iterator>
-          class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
-            octet_iterator it;
-            public:
-            iterator () {}
-            explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
-            // the default "big three" are OK
-            octet_iterator base () const { return it; }
-            uint32_t operator * () const
-            {
-                octet_iterator temp = it;
-                return utf8::unchecked::next(temp);
-            }
-            bool operator == (const iterator& rhs) const
-            {
-                return (it == rhs.it);
-            }
-            bool operator != (const iterator& rhs) const
-            {
-                return !(operator == (rhs));
-            }
-            iterator& operator ++ ()
-            {
-                ::std::advance(it, utf8::internal::sequence_length(it));
-                return *this;
-            }
-            iterator operator ++ (int)
-            {
-                iterator temp = *this;
-                ::std::advance(it, utf8::internal::sequence_length(it));
-                return temp;
-            }
-            iterator& operator -- ()
-            {
-                utf8::unchecked::prior(it);
-                return *this;
-            }
-            iterator operator -- (int)
-            {
-                iterator temp = *this;
-                utf8::unchecked::prior(it);
-                return temp;
-            }
-          }; // class iterator
-
-    } // namespace utf8::unchecked
-} // namespace utf8
-
-
-#endif // header guard
-
diff --git a/utf8/utf8_fwrap.h b/utf8/utf8_fwrap.h
deleted file mode 100644
index 5d41b6b..0000000
--- a/utf8/utf8_fwrap.h
+++ /dev/null
@@ -1,140 +0,0 @@
-#ifndef _UTF8_FWRAP_HPP
-#define _UTF8_FWRAP_HPP
-
-#include <utf8.h>
-#include <string>
-#include <iterator>
-#include <stdexcept>
-#include <cstdio>
-#include <cwchar>
-#include <stdint.h>
-
-#ifdef _WIN32
-	#define utf32to8 utf16to8
-#endif
-
-inline wint_t fgetwc_u8(FILE *in) {
-#ifdef _WIN32
-	struct _cps {
-		FILE *f = 0;
-		wchar_t c = 0;
-	};
-	static _cps cps[4];
-
-	for (auto& cp : cps) {
-		if (cp.f == in) {
-			cp.f = 0;
-			return cp.c;
-		}
-	}
-#endif
-
-	int32_t rv = 0;
-	int c = 0, i = 0;
-	char buf[4];
-	if ((c = fgetc_unlocked(in)) != EOF) {
-		buf[i++] = static_cast<char>(c);
-		if ((c & 0xF0) == 0xF0) {
-			if (fread_unlocked(buf+i, 1, 3, in) != 3) {
-				throw std::runtime_error("Could not read 3 expected bytes from stream");
-			}
-			i += 3;
-		}
-		else if ((c & 0xE0) == 0xE0) {
-			if (fread_unlocked(buf+i, 1, 2, in) != 2) {
-				throw std::runtime_error("Could not read 2 expected bytes from stream");
-			}
-			i += 2;
-		}
-		else if ((c & 0xC0) == 0xC0) {
-			if (fread_unlocked(buf+i, 1, 1, in) != 1) {
-				throw std::runtime_error("Could not read 1 expected byte from stream");
-			}
-			i += 1;
-		}
-	}
-	if (i == 0 && c == EOF) {
-		rv = WEOF;
-	}
-	else {
-#ifdef _WIN32
-		wchar_t u16[2] = {};
-		utf8::unchecked::utf8to16(buf, buf+i, u16);
-
-		if (u16[1]) {
-			for (auto& cp : cps) {
-				if (cp.f == 0) {
-					cp.f = in;
-					cp.c = u16[1];
-					return u16[0];
-				}
-			}
-			throw std::runtime_error("Not enough space to store UTF-16 high surrogate");
-		}
-		rv = u16[0];
-#else
-		utf8::unchecked::utf8to32(buf, buf+i, &rv);
-#endif
-	}
-	return static_cast<wint_t>(rv);
-}
-
-inline wint_t fputwc_u8(wint_t wc, FILE *out) {
-	char buf[4] = {};
-	char *e = utf8::unchecked::utf32to8(&wc, &wc+1, buf);
-	if (fwrite_unlocked(buf, 1, e-buf, out) != static_cast<size_t>(e-buf)) {
-		return WEOF;
-	}
-
-	return wc;
-}
-
-inline int fputws_u8(const wchar_t* str, FILE *out) {
-	static std::string buf;
-	buf.clear();
-	size_t len = wcslen(str);
-	utf8::unchecked::utf32to8(str, str+len, std::back_inserter(buf));
-	if (fwrite_unlocked(&buf[0], 1, buf.size(), out) != buf.size()) {
-		return WEOF;
-	}
-
-	return 1;
-}
-
-inline wint_t ungetwc_u8(wint_t wc, FILE *out) {
-	char buf[4] = {};
-	char *e = utf8::unchecked::utf32to8(&wc, &wc+1, buf);
-	for (char *b = buf ; b != e ; ++b) {
-		if (ungetc(*b, out) == EOF) {
-			return WEOF;
-		}
-	}
-
-	return wc;
-}
-
-#ifdef fgetwc_unlocked
-	#undef fgetwc_unlocked
-#endif
-#define fgetwc_unlocked fgetwc_u8
-
-#ifdef fputwc_unlocked
-	#undef fputwc_unlocked
-#endif
-#define fputwc_unlocked fputwc_u8
-
-#ifdef fputws_unlocked
-	#undef fputws_unlocked
-#endif
-#define fputws_unlocked fputws_u8
-
-#ifdef ungetwc
-	#undef ungetwc
-#endif
-#define ungetwc ungetwc_u8
-
-#ifdef _WIN32
-	#undef utf32to8
-#endif
-
-#endif