commit 09429754c11bc63786d2e12dbd1d2540de96b4e6
Author: Daniel Swanson <popcorn.tomato.dude@gmail.com>
Date:   Wed Jun 30 08:50:26 2021 -0500

    ICU Stuff (#127)
    
    ICU changes
    - replace all `std::wstring` and related types with `UString`
    - move input streams to `lttoolbox/input_file.h`
    - move output streams to `UFILE*`
    - switch transfer regular expressions from PCRE to ICU
    - use lttoolbox case functions
    
    efficiency, readability, and code style changes
    - copy `.editorconfig` file from lttoolbox
    - eliminate use of `void*` in transfer
    - prefer `.clear()` and `.empty()` over `= ""` and `== ""`
    - prefer range-for loops
    - simplify file-closing code in `tagger.cc`
    - remove unused `transfer_word_list`
    
    helper function, dependency, and code structure changes
    - unbundle utfcpp and rely on installed version
    - incorporate m4 changes suggested in #125
    - move `StringUtils` to lttoolbox
    - add transfer regex optimizer from apertium-recursive
    - add `transfer_base` to combine shared code from `transfer`, `interchunk`, and `postchunk`
    - use iterator from `lttoolbox/xml_walk_util.h` to simplify transfer code
    
    other
    - add `<reject-current-rule>` in interchunk and postchunk (closes #34)
    - reset transfer variables on null flush (closes #101)

diff --git a/.editorconfig b/.editorconfig
new file mode 100755
index 0000000..dd10a25
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,18 @@
+# https://editorconfig.org/
+root = yes
+
+[*]
+charset = utf-8
+end_of_line = lf
+indent_size = 4
+indent_style = tab
+insert_final_newline = true
+trim_trailing_whitespace = true
+
+[**.cc]
+indent_size = 2
+indent_style = space
+
+[**.h]
+indent_size = 2
+indent_style = space
diff --git a/Makefile.am b/Makefile.am
index 444db77..dc6873f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -15,7 +15,7 @@ if HAVE_PYTHON_BINDINGS
 SUBDIRS += python
 endif
 
-EXTRA_DIST=autogen.sh README-MODES apertium.m4 utf8 tests
+EXTRA_DIST=autogen.sh README-MODES apertium.m4 tests
 
 install-data-local:
 	mkdir -p $(DESTDIR)$(modesdir)
diff --git a/apertium.m4 b/apertium.m4
index 46c8a6a..6b8012e 100644
--- a/apertium.m4
+++ b/apertium.m4
@@ -109,19 +109,25 @@ AC_DEFUN([AP_MKINCLUDE],
   cat >$srcdir/ap_include.am <<EOF
 
 modes/%.mode: modes.xml
-	apertium-validate-modes modes.xml
-	apertium-gen-modes modes.xml
+	apertium-validate-modes \@S|@<
+	apertium-gen-modes \@S|@<
 
 apertium_modesdir=\$(prefix)/share/apertium/modes/
-install-modes:
-	apertium-gen-modes -f modes.xml \$(prefix)/share/apertium/\$(BASENAME)
+install-modes: modes.xml
+	apertium-gen-modes -f \@S|@< \$(prefix)/share/apertium/\$(BASENAME)
 	\$(MKDIR_P) \$(DESTDIR)\$(apertium_modesdir)
-	modes=\`xmllint --xpath '//mode@<:@@install="yes"@:>@/@name' modes.xml | sed 's/ *name="\(@<:@^"@:>@*\)"/\1.mode /g'\`; \\
+	modes=\`xmllint --xpath '//mode@<:@@install="yes"@:>@/@name' \@S|@< | sed 's/ *name="\(@<:@^"@:>@*\)"/\1.mode /g'\`; \\
 		if test -n "\$\$modes"; then \\
 			\$(INSTALL_DATA) \$\$modes \$(DESTDIR)\$(apertium_modesdir); \\
 			rm \$\$modes; \\
 		fi
 
+uninstall-modes: modes.xml
+	files=\`xmllint --xpath '//mode@<:@@install="yes"@:>@/@name' \@S|@< | sed 's/ *name="\(@<:@^"@:>@*\)"/\1.mode /g'\`; \\
+	if test -n "\$\$files"; then \\
+		dir=\$(DESTDIR)\$(apertium_modesdir); \$(am__uninstall_files_from_dir)
+	fi
+
 .deps/.d:
 	\$(MKDIR_P) .deps
 	touch \$[]@
diff --git a/apertium/Makefile.am b/apertium/Makefile.am
index 48223c4..b731e60 100644
--- a/apertium/Makefile.am
+++ b/apertium/Makefile.am
@@ -38,7 +38,6 @@ h_sources = a.h \
 	    stream_tagger.h \
 	    streamed_type.h \
 	    string_to_wostream.h \
-	    string_utils.h \
 	    shell_utils.h \
 	    tag.h \
 	    tagger.h \
@@ -63,19 +62,19 @@ h_sources = a.h \
 	    tmx_trail_postprocessors.h \
 	    tmx_translate.h \
 	    tmx_words.h \
+		transfer_base.h \
 	    transfer_data.h \
 	    transfer.h \
 	    transfer_instr.h \
 	    transfer_mult.h \
+		transfer_regex.h \
 	    transfer_token.h \
 	    transfer_word.h \
-	    transfer_word_list.h \
 	    trx_reader.h \
 	    tsx_reader.h \
 	    ttag.h \
 	    unigram_tagger.h \
 	    unlocked_cstdio.h \
-	    utf_converter.h \
 	    utils.h \
 	    xml_reader.h
 
@@ -109,7 +108,6 @@ cc_sources = a.cc \
 	     sentence_stream.cc \
 	     stream.cc \
 	     stream_tagger.cc \
-	     string_utils.cc \
 	     shell_utils.cc \
 	     tag.cc \
 	     tagger.cc \
@@ -130,16 +128,16 @@ cc_sources = a.cc \
 	     tmx_trail_postprocessors.cc \
 	     tmx_translate.cc \
 	     transfer.cc \
+		 transfer_base.cc \
 	     transfer_data.cc \
 	     transfer_instr.cc \
 	     transfer_mult.cc \
+		 transfer_regex.cc \
 	     transfer_token.cc \
 	     transfer_word.cc \
-	     transfer_word_list.cc \
 	     trx_reader.cc \
 	     tsx_reader.cc \
 	     unigram_tagger.cc \
-	     utf_converter.cc \
 	     xml_reader.cc
 
 library_includedir = $(includedir)/$(PACKAGE_NAME)-$(VERSION_API)/$(PACKAGE_NAME)
@@ -301,7 +299,7 @@ apertium_gen_modes_SOURCES = gen_modes.cc
 apertium_gen_modes_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES)
 
 if WINDOWS
-AM_CPPFLAGS = -I$(top_srcdir)/utf8 -I$(top_srcdir)/apertium/win32 -I$(top_srcdir) $(APERTIUM_CFLAGS) $(ICU_CFLAGS)
+AM_CPPFLAGS = -I$(top_srcdir)/apertium/win32 -I$(top_srcdir) $(APERTIUM_CFLAGS) $(ICU_CFLAGS)
 else
 AM_CPPFLAGS = -I$(top_srcdir) $(APERTIUM_CFLAGS) $(ICU_CFLAGS)
 endif
diff --git a/apertium/adapt_docx.cc b/apertium/adapt_docx.cc
index 4699a0c..91f4937 100644
--- a/apertium/adapt_docx.cc
+++ b/apertium/adapt_docx.cc
@@ -30,7 +30,7 @@
 #include <fcntl.h>
 #endif
 
-#include "utf8/utf8.h"
+#include <utf8.h>
 #include "unicode/uchar.h"
 
 using namespace std;
diff --git a/apertium/align.cc b/apertium/align.cc
index 4b814a5..68f0bd3 100644
--- a/apertium/align.cc
+++ b/apertium/align.cc
@@ -15,7 +15,7 @@
 
 #include "align.h"
 #include "linebreak.h"
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
 #include <iomanip>
 #include <ios>
@@ -33,7 +33,7 @@ void align::align_(
   for (std::vector<std::pair<std::string, std::string> >::const_iterator i_ =
            string_.begin();
        i_ != string_.end(); ++i_) {
-    std::wcerr << "  " << std::setw(width_) << std::left << i_->first
+    std::cerr << "  " << std::setw(width_) << std::left << i_->first
                << std::setw(0)
                << linebreak::linebreak_(i_->second, width_ + 2, width_ + 4)
                << '\n';
diff --git a/apertium/analysis.cc b/apertium/analysis.cc
index 5e4e241..b3394ba 100644
--- a/apertium/analysis.cc
+++ b/apertium/analysis.cc
@@ -16,14 +16,12 @@
 #include "analysis.h"
 
 #include "exception.h"
-#include "morpheme.h"
-
-#include <string>
-#include <vector>
 
 namespace Apertium {
-std::wostream &operator<<(std::wostream &Stream_, const Analysis &Analysis_) {
-  Stream_ << static_cast<std::wstring>(Analysis_);
+std::ostream &operator<<(std::ostream &Stream_, const Analysis &Analysis_) {
+  ::operator<<(Stream_, static_cast<UString>(Analysis_));
+  //Stream_ << static_cast<UString>(Analysis_);
+  // namespace issue
   return Stream_;
 }
 
@@ -35,21 +33,22 @@ bool operator<(const Analysis &a, const Analysis &b) {
   return a.TheMorphemes < b.TheMorphemes;
 }
 
-Analysis::operator std::wstring() const {
+Analysis::operator UString() const {
   if (TheMorphemes.empty())
     throw Exception::Analysis::TheMorphemes_empty(
         "can't convert Analysis comprising empty Morpheme std::vector to "
-        "std::wstring");
+        "UString");
 
   std::vector<Morpheme>::const_iterator Morpheme_ = TheMorphemes.begin();
-  std::wstring wstring_ = *Morpheme_;
+  UString UString_ = *Morpheme_;
   ++Morpheme_;
 
   // Call .end() each iteration to save memory.
   for (; Morpheme_ != TheMorphemes.end(); ++Morpheme_) {
-    wstring_ += L"+" + static_cast<std::wstring>(*Morpheme_);
+    UString_ += '+';
+    UString_ += static_cast<UString>(*Morpheme_);
   }
 
-  return wstring_;
+  return UString_;
 }
 }
diff --git a/apertium/analysis.h b/apertium/analysis.h
index f70a966..194bc60 100644
--- a/apertium/analysis.h
+++ b/apertium/analysis.h
@@ -21,15 +21,16 @@
 #include <ostream>
 #include <string>
 #include <vector>
+#include <lttoolbox/ustring.h>
 
 namespace Apertium {
 class Analysis {
 public:
-  friend std::wostream &operator<<(std::wostream &Stream_,
-                                   const Analysis &Analysis_);
+  friend std::ostream &operator<<(std::ostream &Stream_,
+                                  const Analysis &Analysis_);
   friend bool operator==(const Analysis &a, const Analysis &b);
   friend bool operator<(const Analysis &a, const Analysis &b);
-  operator std::wstring() const;
+  operator UString() const;
   std::vector<Morpheme> TheMorphemes;
 };
 }
diff --git a/apertium/apertium-multiple-translations.cc b/apertium/apertium-multiple-translations.cc
index ecdebab..f7a6443 100644
--- a/apertium/apertium-multiple-translations.cc
+++ b/apertium/apertium-multiple-translations.cc
@@ -61,30 +61,25 @@ int main(int argc, char *argv[])
     }
   }
 
-  FILE *input = stdin, *output = stdout;
+  InputFile input;
+  UFILE* output = u_finit(stdout, NULL, NULL);
   if(argc >= 4)
   {
-    input = fopen(argv[3], "r");
-    if(!input)
-    {
+    if (!input.open(argv[3])) {
       cerr << "Error: can't open input file '" << argv[3] << "'." << endl;
       exit(EXIT_FAILURE);
     }
     if(argc == 5)
     {
-      output = fopen(argv[4], "w");
+      output = u_fopen(argv[4], "w", NULL, NULL);
       if(!output)
       {
-	cerr << "Error: can't open output file '";
-	cerr << argv[4] << "'." << endl;
-	exit(EXIT_FAILURE);
+        cerr << "Error: can't open output file '";
+        cerr << argv[4] << "'." << endl;
+        exit(EXIT_FAILURE);
       }
     }
   }
-#ifdef _MSC_VER
-  _setmode(_fileno(input), _O_U8TEXT);
-  _setmode(_fileno(output), _O_U8TEXT);
-#endif
 
   TransferMult t;
   t.read(argv[1], argv[2]);
diff --git a/apertium/apertium-postlatex-raw.l b/apertium/apertium-postlatex-raw.l
index a7eb161..05a4094 100644
--- a/apertium/apertium-postlatex-raw.l
+++ b/apertium/apertium-postlatex-raw.l
@@ -6,10 +6,7 @@
 
 #include <cstdlib>
 #include <iostream>
-#include <map>
 #include <string>
-#include <vector>
-#include <apertium/latex_accentsmap.h>
 
 extern "C" {
 #if !defined(__STDC__)
@@ -19,51 +16,13 @@ extern "C" {
 }
 
 #include <lttoolbox/lt_locale.h>
-#include <lttoolbox/ltstr.h>
 #ifndef GENFORMAT
 #include "apertium_config.h"
 #endif
 #include <apertium/unlocked_cstdio.h>
-#ifdef _WIN32
-#include <io.h>
-#include <fcntl.h>
-#endif
 
 using namespace std;
 
-AccentsMap accentsMap(true);
-wstring closesym = L"";
-string memconv = "";
-
-wstring convertir(string const &multibyte, int const length)
-{
-  memconv.append(multibyte.c_str(), length);
-  int tam = memconv.size();
-  wchar_t *retval = new wchar_t[tam+1];
-  size_t l = mbstowcs(retval, memconv.c_str(), tam);
-
-  if(l == ((size_t) -1))
-  {
-    delete[] retval;
-    if(memconv.size() >= 4)
-    {
-      wcerr << L"Warning: wrong encoding" << endl;
-    }
-    return L"";
-  }
-  else
-  {
-    memconv = "";
-    retval[l] = 0;
-    wstring ret = retval;
-    delete[] retval;
-    return ret;
-  }
-}
-
-
-
-
 %}
 
 
@@ -79,106 +38,107 @@ wstring convertir(string const &multibyte, int const length)
 
 
 &quot;	{
-	fputws(L"\"",yyout);
+	fputs_unlocked("\"",yyout);
 }
 &apos;	{
-	fputws(L"\'",yyout);
+	fputs_unlocked("\'",yyout);
 }
 &lt;	{
-	fputws(L"<",yyout);
+	fputs_unlocked("<",yyout);
 }
 &gt;	{
-	fputws(L">",yyout);
+	fputs_unlocked(">",yyout);
 }
 &amp;	{
-	fputws(L"\\&",yyout);
+	fputs_unlocked("\\&",yyout);
 }
 \<AMP\/\>	{
-	fputws(L"&",yyout);
+	fputs_unlocked("&",yyout);
 }
 
 \<LEFTESCAPEDBRACE\/\>	{
-        fputws(L"\\{", yyout);
+        fputs_unlocked("\\{", yyout);
 }
 
 \<RIGHTESCAPEDBRACE\/\>	{
-        fputws(L"\\}", yyout);
+        fputs_unlocked("\\}", yyout);
 }
 
 \<ESCAPEDPERCENT\/\>	{
-        fputws(L"\\%", yyout);
+        fputs_unlocked("\\%", yyout);
 }
 
 
 
 ¿	{
-	fputws(L"?`",yyout);
+	fputs_unlocked("?`",yyout);
 }
 
 ¡	{
-	fputws(L"!`",yyout);
+	fputs_unlocked("!`",yyout);
 }
 
 
 
 \<MATH_DOLLARS\>	{
 	BEGIN(mathenv);
-	fputws(L"$$",yyout);
+	fputs_unlocked("$$",yyout);
 }
 
 <mathenv>\<\/MATH_DOLLARS\>	{
-	fputws(L"$$",yyout);
+	fputs_unlocked("$$",yyout);
 	BEGIN(0);
 }
 
 
 \<MATH_DOLLAR\>	{
 	BEGIN(mathenv);
-	fputws(L"$",yyout);
+	fputs_unlocked("$",yyout);
 }
 
 <mathenv>\<\/MATH_DOLLAR\>	{
-	fputws(L"$",yyout);
+	fputs_unlocked("$",yyout);
 	BEGIN(0);
 }
 
 \<MATH_PAR\>	{
-	fputws(L"\\(",yyout);
+	fputs_unlocked("\\(",yyout);
 }
 
 \<\/MATH_PAR\>	{
-	fputws(L"\\)",yyout);
+	fputs_unlocked("\\)",yyout);
 }
 
 \<MATH_BRA\>	{
-	fputws(L"\\[",yyout);
+	fputs_unlocked("\\[",yyout);
 }
 
 \<\/MATH_BRA\>	{
-	fputws(L"\\]",yyout);
+	fputs_unlocked("\\]",yyout);
 }
 
 
 \<CONTENTS\>	{
-	fputws(L"{",yyout);
+	fputs_unlocked("{",yyout);
 }
 
 \<\/CONTENTS\>	{
-	fputws(L"}",yyout);
+	fputs_unlocked("}",yyout);
 }
 
 &NBSP;	{
-	fputws(L"~",yyout);
+	fputs_unlocked("~",yyout);
 }
 
 
 
 \<BR\/\>	{
-	fputws(L"\\\\",yyout);
+	fputs_unlocked("\\\\",yyout);
 }
 
 \<COMMENT\>[^\<]*	{
-	fputws((wstring(L"\%")+convertir(yytext+9,yyleng-9)).c_str(),yyout);
+	fputs_unlocked("\%", yyout);
+	fwrite(yytext+9, 1, yyleng-9, yyout);
 }
 
 \<\/COMMENT\>	{
@@ -186,14 +146,15 @@ wstring convertir(string const &multibyte, int const length)
 
 
 \<PARAM\>[^\<]*	{
-	fputws((wstring(L"[")+convertir(yytext+7,yyleng-7)).c_str(),yyout);
+	fputs_unlocked("[", yyout);
+	fwrite(yytext+7, 1, yyleng-7, yyout);
 }
 \<\/PARAM\>	{
-	fputws(L"]", yyout);
+	fputs_unlocked("]", yyout);
 }
 
 \<VERB\>	{
-        fputws(L"\\verb", yyout);
+        fputs_unlocked("\\verb", yyout);
 }
 
 \<\/VERB\>	{
@@ -202,40 +163,51 @@ wstring convertir(string const &multibyte, int const length)
 
 
 \<[a-zA-Z0-9]+\>	{
-	fputws((wstring(L"\\begin{")+convertir(yytext+1,yyleng-2)+wstring(L"}")).c_str(),yyout);
+	fputs_unlocked("\\begin{", yyout);
+	fwrite(yytext+1, 1, yyleng-2, yyout);
+	fputs_unlocked("}", yyout);
 }
 
 \<[a-zA-Z0-9]+_STAR\>	{
-	fputws((wstring(L"\\begin{")+convertir(yytext+1,yyleng-7)+wstring(L"*}")).c_str(),yyout);
+	fputs_unlocked("\\begin{", yyout);
+	fwrite(yytext+1, 1, yyleng-7, yyout);
+	fputs_unlocked("*}", yyout);
 }
 
 \<\/[a-zA-Z0-9]+\>	{
-	fputws((wstring(L"\\end{")+convertir(yytext+2,yyleng-3)+wstring(L"}")).c_str(),yyout);
+	fputs_unlocked("\\end{", yyout);
+	fwrite(yytext+2, 1, yyleng-3, yyout);
+	fputs_unlocked("}", yyout);
 }
 
 \<\/[a-zA-Z0-9]+_STAR\>	{
-	fputws((wstring(L"\\end{")+convertir(yytext+2,yyleng-8)+wstring(L"*}")).c_str(),yyout);
+	fputs_unlocked("\\end{", yyout);
+	fwrite(yytext+2, 1, yyleng-8, yyout);
+	fputs_unlocked("*}", yyout);
 }
 
 \<[a-zA-Z0-9]+\/\>	{
-	fputws((wstring(L"\\")+convertir(yytext+1,yyleng-3)).c_str(),yyout);
+	fputs_unlocked("\\", yyout);
+	fwrite(yytext+1, 1, yyleng-3, yyout);
 }
 
 \<[a-zA-Z0-9]+_STAR\/\>	{
-	fputws((wstring(L"\\")+convertir(yytext+1,yyleng-8)+wstring(L"*")).c_str(),yyout);
+  fputc_unlocked('\\', yyout);
+  fwrite(yytext+1, 1, yyleng-8, yyout);
+  fputc_unlocked('*', yyout);
 }
 
 \#	{
-        fputws(L"\\#", yyout);
+        fputs_unlocked("\\#", yyout);
 }
 
 
 (.|\n)	{
-	fputws(convertir(yytext,yyleng).c_str(),yyout);
+	fwrite(yytext, 1, yyleng, yyout);
 }
 
 <mathenv>(.|\n)	{
-	fputws(convertir(yytext,yyleng).c_str(),yyout);
+	fwrite(yytext, 1, yyleng, yyout);
 }
 
 
@@ -289,10 +261,6 @@ int main(int argc, char *argv[])
       break;
   }
 
-#ifdef _MSC_VER
-  _setmode(_fileno(yyin), _O_U8TEXT);
-  _setmode(_fileno(yyout), _O_U8TEXT);
-#endif
   // prevent warning message
   yy_push_state(1);
   yy_top_state();
diff --git a/apertium/apertium-postlatex.l b/apertium/apertium-postlatex.l
index c33673d..4bf1457 100644
--- a/apertium/apertium-postlatex.l
+++ b/apertium/apertium-postlatex.l
@@ -19,11 +19,11 @@ extern "C" {
 }
 
 #include <lttoolbox/lt_locale.h>
-#include <lttoolbox/ltstr.h>
 #ifndef GENFORMAT
 #include "apertium_config.h"
 #endif
 #include <apertium/unlocked_cstdio.h>
+#include <utf8.h>
 #ifdef _WIN32
 #include <io.h>
 #include <fcntl.h>
@@ -32,37 +32,6 @@ extern "C" {
 using namespace std;
 
 AccentsMap accentsMap(true);
-wstring closesym = L"";
-string memconv = "";
-
-wstring convertir(string const &multibyte, int const length)
-{
-  memconv.append(multibyte.c_str(), length);
-  int tam = memconv.size();
-  wchar_t *retval = new wchar_t[tam+1];
-  size_t l = mbstowcs(retval, memconv.c_str(), tam);
-
-  if(l == ((size_t) -1))
-  {
-    delete[] retval;
-    if(memconv.size() >= 4)
-    {
-      wcerr << L"Warning: wrong encoding" << endl;
-    }
-    return L"";
-  }
-  else
-  {
-    memconv = "";
-    retval[l] = 0;
-    wstring ret = retval;
-    delete[] retval;
-    return ret;
-  }
-}
-
-
-
 
 %}
 
@@ -79,104 +48,105 @@ wstring convertir(string const &multibyte, int const length)
 
 
 &quot;	{
-	fputws(L"\"",yyout);
+	fputs("\"",yyout);
 }
 &apos;	{
-	fputws(L"\'",yyout);
+	fputs("\'",yyout);
 }
 &lt;	{
-	fputws(L"<",yyout);
+	fputs("<",yyout);
 }
 &gt;	{
-	fputws(L">",yyout);
+	fputs(">",yyout);
 }
 &amp;	{
-	fputws(L"\\&",yyout);
+	fputs("\\&",yyout);
 }
 \<AMP\/\>	{
-	fputws(L"&",yyout);
+	fputs("&",yyout);
 }
 
 \<LEFTESCAPEDBRACE\/\>	{
-        fputws(L"\\{", yyout);
+        fputs("\\{", yyout);
 }
 
 \<RIGHTESCAPEDBRACE\/\>	{
-        fputws(L"\\}", yyout);
+        fputs("\\}", yyout);
 }
 
 \<ESCAPEDPERCENT\/\>	{
-        fputws(L"\\%", yyout);
+        fputs("\\%", yyout);
 }
 
 ¿	{
-	fputws(L"?`",yyout);
+	fputs("?`",yyout);
 }
 
 ¡	{
-	fputws(L"!`",yyout);
+	fputs("!`",yyout);
 }
 
 
 
 \<MATH_DOLLARS\>	{
 	BEGIN(mathenv);
-	fputws(L"$$",yyout);
+	fputs("$$",yyout);
 }
 
 <mathenv>\<\/MATH_DOLLARS\>	{
-	fputws(L"$$",yyout);
+	fputs("$$",yyout);
 	BEGIN(0);
 }
 
 
 \<MATH_DOLLAR\>	{
 	BEGIN(mathenv);
-	fputws(L"$",yyout);
+	fputs("$",yyout);
 }
 
 <mathenv>\<\/MATH_DOLLAR\>	{
-	fputws(L"$",yyout);
+	fputs("$",yyout);
 	BEGIN(0);
 }
 
 \<MATH_PAR\>	{
-	fputws(L"\\(",yyout);
+	fputs("\\(",yyout);
 }
 
 \<\/MATH_PAR\>	{
-	fputws(L"\\)",yyout);
+	fputs("\\)",yyout);
 }
 
 \<MATH_BRA\>	{
-	fputws(L"\\[",yyout);
+	fputs("\\[",yyout);
 }
 
 \<\/MATH_BRA\>	{
-	fputws(L"\\]",yyout);
+	fputs("\\]",yyout);
 }
 
 
 \<CONTENTS\>	{
-	fputws(L"{",yyout);
+	fputs("{",yyout);
 }
 
 \<\/CONTENTS\>	{
-	fputws(L"}",yyout);
+	fputs("}",yyout);
 }
 
 &NBSP;	{
-	fputws(L"~",yyout);
+	fputs("~",yyout);
 }
 
 
 
 \<BR\/\>	{
-	fputws(L"\\\\",yyout);
+	fputs("\\\\",yyout);
 }
 
 \<COMMENT\>[^\<]*	{
-	fputws((wstring(L"\%")+convertir(yytext+9,yyleng-9)).c_str(),yyout);
+    fputs("\%", yyout);
+    fwrite(yytext+9, 1, yyleng-9, yyout);
 }
 
 \<\/COMMENT\>	{
@@ -184,14 +154,15 @@ wstring convertir(string const &multibyte, int const length)
 
 
 \<PARAM\>[^\<]*	{
-	fputws((wstring(L"[")+convertir(yytext+7,yyleng-7)).c_str(),yyout);
+    fputc('[', yyout);
+    fwrite(yytext+7, 1, yyleng-7, yyout);
 }
 \<\/PARAM\>	{
-	fputws(L"]", yyout);
+	fputs("]", yyout);
 }
 
 \<VERB\>	{
-        fputws(L"\\verb", yyout);
+        fputs("\\verb", yyout);
 }
 
 \<\/VERB\>	{
@@ -201,99 +172,118 @@ wstring convertir(string const &multibyte, int const length)
 
 
 ł	{
-	fputws(L"\\l", yyout);
+	fputs("\\l", yyout);
 }
 
 
 œ	{
-	fputws(L"{\\oe}",yyout);
+	fputs("{\\oe}",yyout);
 }
 
 Œ	{
-	fputws(L"{\\OE}",yyout);
+	fputs("{\\OE}",yyout);
 }
 
 æ	{
-	fputws(L"{\\ae}",yyout);
+	fputs("{\\ae}",yyout);
 }
 
 Æ	{
-	fputws(L"{\\AE}",yyout);
+	fputs("{\\AE}",yyout);
 }
 
 å	{
-	fputws(L"{\\aa}",yyout);
+	fputs("{\\aa}",yyout);
 }
 
 Å	{
-	fputws(L"{\\AA}",yyout);
+	fputs("{\\AA}",yyout);
 }
 
 ø	{
-	fputws(L"{\\o}",yyout);
+	fputs("{\\o}",yyout);
 }
 
 Ø	{
-	fputws(L"{\\O}",yyout);
+	fputs("{\\O}",yyout);
 }
 
 ß	{
-	fputws(L"{\\ss}",yyout);
+	fputs("{\\ss}",yyout);
 }
 
 \<[a-zA-Z0-9]+\>	{
-	fputws((wstring(L"\\begin{")+convertir(yytext+1,yyleng-2)+wstring(L"}")).c_str(),yyout);
+    fputs("\\begin{", yyout);
+    fwrite(yytext+1, 1, yyleng-2, yyout);
+    fputc('}', yyout);
 }
 
 \<HASH_[0-9]+\/\>	{
-	fputws((wstring(L"\\#")+convertir(yytext+6,yyleng-8)).c_str(),yyout);
+    fputs("\\#", yyout);
+    fwrite(yytext+6, 1, yyleng-8, yyout);
 }
 
 \<HASH\/\>		{
-        fputws(L"\\#", yyout);
+        fputs("\\#", yyout);
 }
 
 \<[a-zA-Z0-9]+_STAR\>	{
-	fputws((wstring(L"\\begin{")+convertir(yytext+1,yyleng-7)+wstring(L"*}")).c_str(),yyout);
+    fputs("\\begin{", yyout);
+    fwrite(yytext+1, 1, yyleng-7, yyout);
+    fputs("*}", yyout);
 }
 
 \<\/[a-zA-Z0-9]+\>	{
-	fputws((wstring(L"\\end{")+convertir(yytext+2,yyleng-3)+wstring(L"}")).c_str(),yyout);
+    fputs("\\end{", yyout);
+    fwrite(yytext+2, 1, yyleng-3, yyout);
+    fputc('}', yyout);
 }
 
 \<\/[a-zA-Z0-9]+_STAR\>	{
-	fputws((wstring(L"\\end{")+convertir(yytext+2,yyleng-8)+wstring(L"*}")).c_str(),yyout);
+    fputs("\\end{", yyout);
+    fwrite(yytext+2, 1, yyleng-8, yyout);
+    fputs("*}", yyout);
 }
 
 \<[a-zA-Z0-9]+\/\>	{
-	fputws((wstring(L"\\")+convertir(yytext+1,yyleng-3)).c_str(),yyout);
+    fputc('\\', yyout);
+    fwrite(yytext+1, 1, yyleng-3, yyout);
 }
 
 \<[a-zA-Z0-9]+_STAR\/\>	{
-	fputws((wstring(L"\\")+convertir(yytext+1,yyleng-8)+wstring(L"*")).c_str(),yyout);
+    fputc('\\', yyout);
+    fwrite(yytext+1, 1, yyleng-8, yyout);
+    fputc('*', yyout);
 }
  /*NO ENTIENDO ESTA REGLA
  \#	{
-        fputws(L"\\#", yyout);
+        fputs("\\#", yyout);
  }*/
 
 
 [^A-Za-z\n]	{
-	wstring wt = convertir(yytext,yyleng);
-	wstring wa = accentsMap.get(wt);
-	if( wa == L"" )
-		fputws(wt.c_str(),yyout);
- 	else
-		fputws(wstring(L"\\"+wa.substr(0,1)+L"{"+wa.substr(1)+L"}").c_str(),yyout);
+    UString wt = to_ustring(yytext);
+	UString wa = accentsMap.get(wt);
+	if (wa.empty()) {
+		fputus(wt, yyout);
+ 	} else {
+        UString tmp;
+        tmp += '\\';
+        tmp += wa[0];
+        tmp += '{';
+        tmp += wa.substr(1);
+        tmp += '}';
+        fputus(tmp, yyout);
+    }
 }
 
 
 (.|\n)	{
-	fputws(convertir(yytext,yyleng).c_str(),yyout);
+	fputs(yytext, yyout);
 }
 
 <mathenv>(.|\n)	{
-	fputws(convertir(yytext,yyleng).c_str(),yyout);
+	fputs(yytext, yyout);
 }
 
 
@@ -323,7 +313,7 @@ int main(int argc, char *argv[])
     base++;
   }
 
- if((argc-base) > 4)
+  if((argc-base) > 4)
   {
     usage(argv[0]);
   }
@@ -347,10 +337,6 @@ int main(int argc, char *argv[])
       break;
   }
 
-#ifdef _MSC_VER
-  _setmode(_fileno(yyin), _O_U8TEXT);
-  _setmode(_fileno(yyout), _O_U8TEXT);
-#endif
   // prevent warning message
   yy_push_state(1);
   yy_top_state();
diff --git a/apertium/apertium-prelatex.l b/apertium/apertium-prelatex.l
index 2bf7243..13faec1 100644
--- a/apertium/apertium-prelatex.l
+++ b/apertium/apertium-prelatex.l
@@ -10,7 +10,8 @@
 #include <string>
 #include <vector>
 #include <apertium/latex_accentsmap.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
+#include <apertium/string_to_wostream.h>
 
 extern "C" {
 #if !defined(__STDC__)
@@ -20,7 +21,6 @@ extern "C" {
 }
 
 #include <lttoolbox/lt_locale.h>
-#include <lttoolbox/ltstr.h>
 #ifndef GENFORMAT
 #include "apertium_config.h"
 #endif
@@ -33,38 +33,10 @@ extern "C" {
 using namespace std;
 
 AccentsMap accentsMap(false);
-wstring closesym = L"";
-string memconv = "";
+UString closesym;
 //For german babel detection
 bool ngermanbabel = false;
 
-wstring convertir(string const &multibyte, int const length)
-{
-  memconv.append(multibyte.c_str(), length);
-  int tam = memconv.size();
-  wchar_t *retval = new wchar_t[tam+1];
-  size_t l = mbstowcs(retval, memconv.c_str(), tam);
-
-  if(l == ((size_t) -1))
-  {
-    delete[] retval;
-    if(memconv.size() >= 4)
-    {
-      wcerr << L"Warning: wrong encoding" << endl;
-    }
-    return L"";
-  }
-  else
-  {
-    memconv = "";
-    retval[l] = 0;
-    wstring ret = retval;
-    delete[] retval;
-    return ret;
-  }
-}
-
-
 
 
 %}
@@ -84,25 +56,25 @@ wstring convertir(string const &multibyte, int const length)
 
 
 \\t\{..\}	{ //This information is lost
-	fputws(convertir(yytext+3,yyleng-4).c_str(),yyout);
+    fwrite(yytext+3, 1, yyleng-4, yyout);
 }
 \\l	{
-	fputws(L"ł", yyout);
+	fputs("ł", yyout);
 }
 
 \"[oOaAuUsS]	{ //When usepackage[ngerman]{babel} is present (not checked).
-	if(!ngermanbabel)
-		fputws(convertir(yytext,yyleng).c_str(),yyout);
-	else {
+	if(!ngermanbabel) {
+        fputs(yytext, yyout);
+    } else {
 		switch(yytext[1]){
-			case 'o': fputws(L"ö", yyout); break;
-			case 'O': fputws(L"Ö", yyout); break;
-			case 'a': fputws(L"ä", yyout); break;
-			case 'A': fputws(L"Ä", yyout); break;
-			case 'u': fputws(L"ü", yyout); break;
-			case 'U': fputws(L"Ü", yyout); break;
-			case 's': fputws(L"ß", yyout); break;
-			case 'S': fputws(L"ß", yyout); break;
+			case 'o': fputs("ö", yyout); break;
+			case 'O': fputs("Ö", yyout); break;
+			case 'a': fputs("ä", yyout); break;
+			case 'A': fputs("Ä", yyout); break;
+			case 'u': fputs("ü", yyout); break;
+			case 'U': fputs("Ü", yyout); break;
+			case 's': fputs("ß", yyout); break;
+			case 'S': fputs("ß", yyout); break;
 		}
 	}
 }
@@ -113,288 +85,304 @@ wstring convertir(string const &multibyte, int const length)
 	switch(yytext[1]){
 	case '^':
 		if(yytext[4]=='i')
-			fputws(L"î", yyout);
+			fputs("î", yyout);
 		else
-			fputws(L"ĵ",yyout);
+			fputs("ĵ",yyout);
 		break;
 	case '\"':
 		if(yytext[4]=='i')
-			fputws(L"ï",yyout);
+			fputs("ï",yyout);
 		else
-			fputws(L"j",yyout); //should actually be j with umlaut
+			fputs("j",yyout); //should actually be j with umlaut
 		break;
 	case '\'':
 		if(yytext[4]=='i')
-			fputws(L"í",yyout);
+			fputs("í",yyout);
 		else
-			fputws(L"j",yyout); //should actually be j with accent
+			fputs("j",yyout); //should actually be j with accent
 		break;
 	case '`':
 		if(yytext[4]=='i')
-			fputws(L"ì",yyout);
+			fputs("ì",yyout);
 		else
-			fputws(L"k",yyout); //should actually be j with accent
+			fputs("k",yyout); //should actually be j with accent
 		break;
 	}
 }
 
 \{\\oe\}	{
-	fputws(L"œ",yyout);
+	fputs("œ",yyout);
 }
 
 \{\\OE\}	{
-	fputws(L"Œ",yyout);
+	fputs("Œ",yyout);
 }
 
 \{\\ae\}	{
-	fputws(L"æ",yyout);
+	fputs("æ",yyout);
 }
 
 \{\\AE\}	{
-	fputws(L"Æ",yyout);
+	fputs("Æ",yyout);
 }
 
 \{\\aa\}	{
-	fputws(L"å",yyout);
+	fputs("å",yyout);
 }
 
 \{\\AA\}	{
-	fputws(L"Å",yyout);
+	fputs("Å",yyout);
 }
 
 \{\\o\}	{
-	fputws(L"ø",yyout);
+	fputs("ø",yyout);
 }
 
 \{\\O\}	{
-	fputws(L"Ø",yyout);
+	fputs("Ø",yyout);
 }
 
 \{\\ss\}	{
-	fputws(L"ß",yyout);
+	fputs("ß",yyout);
 }
 
 \\#[0-9]+	{
-        fputws((wstring(L"<HASH_")+convertir(yytext+2,yyleng-2)+wstring(L"/>")).c_str(),yyout);
+        fputs("<HASH_", yyout);
+        fwrite(yytext+2, 1, yyleng-2, yyout);
+        fputs("/>", yyout);
 }
 
 \\#		{
-        fputws(L"<HASH/>", yyout);
+        fputs("<HASH/>", yyout);
 }
 
 \\[`'\^\"H~ck=b.druv]((\{.\})|(.))	{
-	wstring ws = convertir(yytext,yyleng).c_str();
-
-	wstring result = accentsMap.get(
-	  L""+ws.substr(1,1)+ (
-	    (yyleng==3)? ws.substr(2,1) : ws.substr(3,1)
-	  ));
-
-        if(result == L"")
-        {
-	  fputws((wstring(L"<")+convertir(yytext+1,yyleng)+wstring(L"/>")).c_str(),yyout);
-        }
-        else
-        {
-  	  fputws(result.c_str(), yyout);
-        }
+    UString ws = to_ustring(yytext);
+    UString key;
+    key += ws[1];
+    key += (yyleng == 3) ? ws[2] : ws[3];
+
+	UString result = accentsMap.get(key);
+    if (result.empty()) {
+      fputc('<', yyout);
+      fwrite(yytext+1, 1, yyleng-1, yyout);
+      fputs("/>", yyout);
+    } else {
+      fputus(result, yyout);
+    }
 }
 
 \\\\	{
-	fputws(L"<BR/>",yyout);
+	fputs("<BR/>",yyout);
 }
 
 \%.*	{
-	if(yytext[yyleng-1]=='\r')
-		fputws((wstring(L"<COMMENT>")+convertir(yytext+1,yyleng-2)+wstring(L"</COMMENT>\r")).c_str(),yyout);
-	else
-		fputws((wstring(L"<COMMENT>")+convertir(yytext+1,yyleng-1)+wstring(L"</COMMENT>")).c_str(),yyout);
+    fputs("<COMMENT>", yyout);
+	if (yytext[yyleng-1] == '\r') {
+      fwrite(yytext+1, 1, yyleng-2, yyout);
+      fputs("</COMMENT>\r", yyout);
+	} else {
+      fwrite(yytext+1, 1, yyleng-1, yyout);
+      fputs("</COMMENT>", yyout);
+    }
 }
 
 \\usepackage\[[^\]]*\]	{
-	wstring ws = convertir(yytext+12,yyleng-13);
-	fputws((wstring(L"<usepackage/><PARAM>")+ws+wstring(L"</PARAM>")).c_str(), yyout);
-	if(ws.find(L"ngerman") != wstring::npos)
-		ngermanbabel = true;
+  fputs("<usepackage/><PARAM>", yyout);
+  // this is maybe wrong, but hopefully no one puts non-ASCII
+  // characters in their package names
+  UString ws = to_ustring(yytext).substr(12, yyleng-13);
+  fputus(ws, yyout);
+  fputs("</PARAM>", yyout);
+  if(ws.find("ngerman"_u) != UString::npos)
+    ngermanbabel = true;
 }
 
 \[[^\]]*\]	{
-	fputws((wstring(L"<PARAM>")+convertir(yytext+1,yyleng-2)+wstring(L"</PARAM>")).c_str(), yyout);
+  fputs("<PARAM>", yyout);
+  fwrite(yytext+1, 1, yyleng-2, yyout);
+  fputs("</PARAM>", yyout);
 }
 
 \\begin[^a-zA-Z0-9_]	{
 	BEGIN(readbrackets);
-	closesym = L"";
+	closesym = ""_u;
 }
 
 \\end[^a-zA-Z0-9_]	{
 	BEGIN(readbrackets);
-	closesym = L"/";
+	closesym = "/"_u;
 }
 
 
 
 <readbrackets>[ \n\r\t]*\{?[ \n\r\t]*	{
-	wstring ws = convertir(yytext,yyleng);
-	int i = ws.find(L'{'); //remove it
-	if(i>=0)
-		ws = ws.substr(0,i)+ws.substr(i+1);
-	fputws(ws.c_str(),yyout);
+  UString ws = to_ustring(yytext);
+  int i = ws.find('{');
+  if (i >= 0) {
+    ws = ws.substr(0, i) + ws.substr(i+1);
+  }
+  fputus(ws, yyout);
 }
 
 <readbrackets>[a-zA-Z0-9]+\*	{
-	fputws((wstring(L"<")+closesym+convertir(yytext,yyleng-1)+wstring(L"_STAR>")).c_str(),yyout);
+  fputc('<', yyout);
+  fputus(closesym, yyout);
+  fwrite(yytext+0, 1, yyleng-1, yyout);
+  fputs("_STAR>", yyout);
 }
 
 <readbrackets>[a-zA-Z0-9]+	{
-	fputws((wstring(L"<")+closesym+convertir(yytext,yyleng)+wstring(L">")).c_str(),yyout);
+  fputc('<', yyout);
+  fputus(closesym, yyout);
+  fputs(yytext, yyout);
+  fputc('>', yyout);
 }
 
 <readbrackets>[ \n\r\t]*\}[ \n\r\t]*	{
 	BEGIN(0);
-	wstring ws = convertir(yytext,yyleng);
-	int i = ws.find(L'}'); //remove it
-	if(i>=0)
-		ws = ws.substr(0,i)+ws.substr(i+1);
-	fputws(ws.c_str(),yyout);
+    UString ws = to_ustring(yytext);
+    int i = ws.find('}');
+    if (i >= 0) {
+      ws = ws.substr(0, i) + ws.substr(i+1);
+    }
+    fputus(ws, yyout);
 }
 
 
 \\[A-Za-z]+\*	{
-	fputws((wstring(L"<")+convertir(yytext+1,yyleng-2)+wstring(L"_STAR/>")).c_str(),yyout);
+  fputc('<', yyout);
+  fwrite(yytext+1, 1, yyleng-2, yyout);
+  fputs("_STAR/>", yyout);
 }
 
 \\[A-Za-z]+	{
-	fputws((wstring(L"<")+convertir(yytext+1,yyleng)+wstring(L"/>")).c_str(),yyout);
+  fputc('<', yyout);
+  fwrite(yytext+1, 1, yyleng-1, yyout);
+  fputs("/>", yyout);
 }
 
 \\\{	{
-        fputws(L"<LEFTESCAPEDBRACE/>", yyout);
+        fputs("<LEFTESCAPEDBRACE/>", yyout);
         }
 
 \\\{	{
-        fputws(L"<RIGHTESCAPEDBRACE/>", yyout);
+        fputs("<RIGHTESCAPEDBRACE/>", yyout);
         }
 
 \\\%	{
-        fputws(L"<ESCAPEDPERCENT/>", yyout);
+        fputs("<ESCAPEDPERCENT/>", yyout);
         }
 
 \{	{
-	fputws(L"<CONTENTS>",yyout);
+	fputs("<CONTENTS>",yyout);
 }
 
 \}	{
-	fputws((wstring(L"</CONTENTS>")).c_str(),yyout);
+	fputs("</CONTENTS>", yyout);
 }
 
 ~	{
-	fputws(L"&NBSP;",yyout);
+	fputs("&NBSP;",yyout);
 }
 
 \$\$	{
 	BEGIN(mathenv);
-	fputws(L"<MATH_DOLLARS>",yyout);
+	fputs("<MATH_DOLLARS>",yyout);
 }
 
 <mathenv>\$\$	{
-	fputws(L"</MATH_DOLLARS>",yyout);
+	fputs("</MATH_DOLLARS>",yyout);
 	BEGIN(0);
 }
 
 \$	{
 	BEGIN(mathenv);
-	fputws(L"<MATH_DOLLAR>",yyout);
+	fputs("<MATH_DOLLAR>",yyout);
 }
 
 <mathenv>\$	{
-	fputws(L"</MATH_DOLLAR>",yyout);
+	fputs("</MATH_DOLLAR>",yyout);
 	BEGIN(0);
 }
 
 \\verb[|][^|]+[|]	{
-        fputws(L"<VERB>",yyout);
-        wstring ws = convertir(yytext, yyleng);
-        fputws(ws.substr(5, ws.size()-5).c_str(), yyout);
-        fputws(L"</VERB>", yyout);
+        fputs("<VERB>", yyout);
+        fwrite(yytext+5, 1, yyleng-5, yyout);
+        fputs("</VERB>", yyout);
 }
 
 \\verb[!][^!]+[!]	{
-        fputws(L"<VERB>",yyout);
-        wstring ws = convertir(yytext, yyleng);
-        fputws(ws.substr(5, ws.size()-5).c_str(), yyout);
-        fputws(L"</VERB>", yyout);
+        fputs("<VERB>",yyout);
+        fwrite(yytext+5, 1, yyleng-5, yyout);
+        fputs("</VERB>", yyout);
 }
 
 \\verb[?][^?]+[?]	{
-        fputws(L"<VERB>",yyout);
-        wstring ws = convertir(yytext, yyleng);
-        fputws(ws.substr(5, ws.size()-5).c_str(), yyout);
-        fputws(L"</VERB>", yyout);
+        fputs("<VERB>",yyout);
+        fwrite(yytext+5, 1, yyleng-5, yyout);
+        fputs("</VERB>", yyout);
 }
 
 \\verb[/][^/]+[/]	{
-        fputws(L"<VERB>",yyout);
-        wstring ws = convertir(yytext, yyleng);
-        fputws(ws.substr(5, ws.size()-5).c_str(), yyout);
-        fputws(L"</VERB>", yyout);
+        fputs("<VERB>",yyout);
+        fwrite(yytext+5, 1, yyleng-5, yyout);
+        fputs("</VERB>", yyout);
 }
 
 \\verb[#][^#]+[#]	{
-        fputws(L"<VERB>",yyout);
-        wstring ws = convertir(yytext, yyleng);
-        fputws(ws.substr(5, ws.size()-5).c_str(), yyout);
-        fputws(L"</VERB>", yyout);
+        fputs("<VERB>",yyout);
+        fwrite(yytext+5, 1, yyleng-5, yyout);
+        fputs("</VERB>", yyout);
 }
 
 \\verb[+][^+]+[+]	{
-        fputws(L"<VERB>",yyout);
-        wstring ws = convertir(yytext, yyleng);
-        fputws(ws.substr(5, ws.size()-5).c_str(), yyout);
-        fputws(L"</VERB>", yyout);
+        fputs("<VERB>",yyout);
+        fwrite(yytext+5, 1, yyleng-5, yyout);
+        fputs("</VERB>", yyout);
 }
 
 \\\(	{
-	fputws(L"<MATH_PAR>",yyout);
+	fputs("<MATH_PAR>",yyout);
 }
 
 \\\)	{
-	fputws(L"</MATH_PAR>",yyout);
+	fputs("</MATH_PAR>",yyout);
 }
 
 \\\[	{
-	fputws(L"<MATH_BRA>",yyout);
+	fputs("<MATH_BRA>",yyout);
 }
 
 \\\]	{
-	fputws(L"</MATH_BRA>",yyout);
+	fputs("</MATH_BRA>",yyout);
 }
 
 \?`	{
-	fputws(L"¿",yyout);
+	fputs("¿",yyout);
 }
 
 !`	{
-	fputws(L"¡",yyout);
+	fputs("¡",yyout);
 }
 
 \"	{
-	fputws(L"&quot;",yyout);
+	fputs("&quot;",yyout);
 }
 \'	{
-	fputws(L"&apos;",yyout);
+	fputs("&apos;",yyout);
 }
 \<	{
-	fputws(L"&lt;",yyout);
+	fputs("&lt;",yyout);
 }
 \>	{
-	fputws(L"&gt;",yyout);
+	fputs("&gt;",yyout);
 }
 \\\&	{
-	fputws(L"&amp;",yyout);
+	fputs("&amp;",yyout);
 }
 \&	{
-	fputws(L"<AMP/>",yyout);
+	fputs("<AMP/>",yyout);
 }
 
 
@@ -402,11 +390,11 @@ wstring convertir(string const &multibyte, int const length)
 
 
 (.|\n|\r)	{
-	fputws(convertir(yytext,yyleng).c_str(),yyout);
+	fputs(yytext, yyout);
 }
 
 <mathenv>(.|\n)	{
-	fputws(convertir(yytext,yyleng).c_str(),yyout);
+  fputs(yytext, yyout);
 }
 
 
@@ -420,9 +408,9 @@ wstring convertir(string const &multibyte, int const length)
 void usage(string const &progname)
 {
 
-  wcerr << "USAGE: " << progname << " [input_file [output_file]" << ']' << endl;
+  cerr << "USAGE: " << progname << " [input_file [output_file]" << ']' << endl;
 
-  wcerr << "LaTeX format preprocessor " << endl;
+  cerr << "LaTeX format preprocessor " << endl;
   exit(EXIT_SUCCESS);
 }
 
diff --git a/apertium/apertium_cleanstream.cc b/apertium/apertium_cleanstream.cc
index bd43a0b..38aa316 100644
--- a/apertium/apertium_cleanstream.cc
+++ b/apertium/apertium_cleanstream.cc
@@ -22,6 +22,9 @@
 #include <iostream>
 #include <cstdio>
 #include <list>
+#include <lttoolbox/ustring.h>
+#include <lttoolbox/lt_locale.h>
+#include <lttoolbox/input_file.h>
 
 #ifdef __MINGW32__
 #include <windows.h>
@@ -29,154 +32,73 @@
 
 using namespace std;
 
-#ifndef fputwc_unlocked
-#define fputwc_unlocked fputwc
-#endif
-
-#ifndef fputws_unlocked
-#define fputws_unlocked fputws
-#endif
-
-#ifndef fgetwc_unlocked
-#define fgetwc_unlocked getwc
-#endif
-
-
-void
-tryToSetLocale()
-{
-#if !defined(__CYGWIN__) && !defined (__MINGW32__)
-  if(setlocale(LC_CTYPE, "") != NULL)
-  {
-    return;
-  }
-
-  wcerr << "Warning: unsupported locale, fallback to \"C\"" << endl;
-
-  setlocale(LC_ALL, "C");
-#endif
-#ifdef __CYGWIN__
-  setlocale(LC_ALL, "C.UTF-8");
-#endif
-#ifdef __MINGW32__
-  //SetConsoleInputCP(65001);
-  SetConsoleOutputCP(65001);
-#endif
-}
-
-wstring
-readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2)
-{
-  wstring result = L"";
-  result += delim1;
-  wchar_t c = delim1;
-
-  while(!feof(input) && c != delim2)
-  {
-    c = static_cast<wchar_t>(fgetwc_unlocked(input));
-    result += c;
-    if(c != L'\\')
-    {
-      continue;
-    }
-    else
-    {
-      result += L'\\';
-      c = static_cast<wchar_t>(fgetwc(input));
-      result += c;
-    }
-  }
-
-  if(c != delim2)
-  {
-    wcerr << "Error: expected: " << delim2 << ", saw: " << c << endl;
-  }
-
-  return result;
-}
-
 int
 main (int argc, char** argv)
 {
-  wstring buf = L"";
-  wstring blanktmp = L"";
+  UString buf;
+  UString blanktmp;
   bool keepblank = false;
 
   bool spaced = true;
   bool intoken = false;
 
-  wchar_t ws = L' ';
+  UChar32 ws = ' ';
 
   for(int i=1; i<argc; i++) {
     if (strcmp(argv[i], "-n") == 0) {
       spaced = false;
-      ws = L'\n';
+      ws = '\n';
     }
     else if (strcmp(argv[i], "-b") == 0) {
       keepblank = true;
     }
   }
 
-  tryToSetLocale();
-
-  wint_t c;
-  while ((c = fgetwc(stdin)) != WEOF)
-  {
-    if (c == (wint_t) '^')
-    {
-      if (intoken)
-      {
-        wcerr << L"Error: unescaped '^': " << buf << "^" << endl;
-        buf += L"\\^";
-      }
-      else
-      {
+  LtLocale::tryToSetLocale();
+
+  InputFile input;
+  UFILE* output = u_finit(stdout, NULL, NULL);
+  UChar32 c;
+  while (input.eof()) {
+    c = input.get();
+    if (c == '^') {
+      if (intoken) {
+        cerr << "Error: unescaped '^': " << buf << "^" << endl;
+        buf += "\\^"_u;
+      } else {
         intoken = true;
-        if (buf != L"" || ((buf == L"") && !spaced))
-        {
-          fputwc_unlocked(ws, stdout);
+        if (!buf.empty() || (buf.empty() && !spaced)) {
+          u_fputc(ws, output);
         }
-        buf = L"^";
+        buf = "^"_u;
       }
-    }
-    else if(c == (wint_t) '$')
-    {
-      if (intoken)
-      {
+    } else if(c == '$') {
+      if (intoken) {
         intoken = false;
         buf += c;
-        fputws_unlocked(buf.c_str(), stdout);
-        buf = L"";
+        write(buf, output);
+        buf.clear();
+      } else {
+        cerr << "Error: stray '$'" << endl;
       }
-      else
-      {
-        wcerr << "Error: stray '$'" << endl;
-      }
-    }
-    else if(c == (wint_t) '\\')
-    {
-      c = fgetwc_unlocked(stdin);
-      buf += L'\\';
+    } else if(c == '\\') {
+      c = input.get();
+      buf += '\\';
       buf += c;
-    }
-    else if(!intoken && c == (wint_t) '[')
-    {
-      fputwc_unlocked(ws, stdout);
-      blanktmp = readFullBlock(stdin, L'[', L']');
+    } else if(!intoken && c == '[') {
+      u_fputc(ws, output);
+      blanktmp = input.readBlock('[', ']');
       if(keepblank) {
-        fputws_unlocked(blanktmp.c_str(), stdout);
+        write(blanktmp, output);
       }
-      blanktmp = L"";
-    }
-    else
-    {
+      blanktmp.clear();
+    } else {
       buf += c;
     }
   }
 
   // If not in space mode, make sure there's a final newline
-  if (!spaced)
-  {
-    fputwc_unlocked(L'\n', stdout);
+  if (!spaced) {
+    u_fputc('\n', output);
   }
 }
diff --git a/apertium/apertium_filter_ambiguity.cc b/apertium/apertium_filter_ambiguity.cc
index e9e6b4f..c9c61c3 100644
--- a/apertium/apertium_filter_ambiguity.cc
+++ b/apertium/apertium_filter_ambiguity.cc
@@ -20,7 +20,8 @@
 #include <apertium/hmm.h>
 #include <apertium/tagger_data_hmm.h>
 #include <apertium/tagger_word.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
+#include <lttoolbox/input_file.h>
 
 #include <cstdlib>
 #include <iostream>
@@ -36,49 +37,29 @@
 using namespace Apertium;
 using namespace std;
 
-FILE * open_file(char const *filename, char const *mode)
-{
-  FILE *retval;
-
-  struct stat var;
-  if(stat(filename, &var))
-  {
-    wcerr << "Can't stat '" << filename << "'" << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  retval = fopen(filename, mode);
-
-  if(!retval)
-  {
-    wcerr << "Can't open '" << filename << "'" << endl;
-    exit(EXIT_FAILURE);
-  }
-#ifdef _MSC_VER
-  _setmode(_fileno(retval), _O_U8TEXT);
-#endif
-
-  return retval;
-}
-
 int main(int argc, char *argv[])
 {
   LtLocale::tryToSetLocale();
 
   if(argc < 2 || argc > 4)
   {
-    wcerr << "USAGE: " << basename(argv[0]) << " tsx_file [input [output]" << endl;
+    cerr << "USAGE: " << basename(argv[0]) << " tsx_file [input [output]" << endl;
     exit(EXIT_FAILURE);
   }
 
-  FILE *input = stdin, *output = stdout;
+  char* input = NULL;
+  UFILE* output = u_finit(stdout, NULL, NULL);
   switch(argc)
   {
     case 4:
-      output = open_file(argv[3], "w");
+      output = u_fopen(argv[3], "w", NULL, NULL);
+      if (!output) {
+        cerr << "Error: Unable to open '" << argv[3] << "' for writing." << endl;
+        exit(EXIT_FAILURE);
+      }
       // no break
     case 3:
-      input = open_file(argv[2], "r");
+      input = argv[2];
       // no break
     case 2:
     default:
diff --git a/apertium/apertium_interchunk.cc b/apertium/apertium_interchunk.cc
index 73bc555..b6b6c8b 100644
--- a/apertium/apertium_interchunk.cc
+++ b/apertium/apertium_interchunk.cc
@@ -23,7 +23,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include "getopt_long.h"
 
 #ifdef _MSC_VER
@@ -31,19 +31,18 @@
 #include <fcntl.h>
 #endif
 
-using namespace Apertium;
 using namespace std;
 
 void message(char *progname)
 {
-  wcerr << "USAGE: " << basename(progname) << " [-tz] t2x preproc [input [output]]" << endl;
-  wcerr << "  t2x        t2x rules file" << endl;
-  wcerr << "  preproc    result of preprocess trules file" << endl;
-  wcerr << "  input      input file, standard input by default" << endl;
-  wcerr << "  output     output file, standard output by default" << endl;
-  wcerr << "OPTIONS" <<endl;
-  wcerr << "  -t         trace mode" << endl;
-  wcerr << "  -z         flush buffer on '\0'" << endl;
+  cerr << "USAGE: " << basename(progname) << " [-tz] t2x preproc [input [output]]" << endl;
+  cerr << "  t2x        t2x rules file" << endl;
+  cerr << "  preproc    result of preprocess trules file" << endl;
+  cerr << "  input      input file, standard input by default" << endl;
+  cerr << "  output     output file, standard output by default" << endl;
+  cerr << "OPTIONS" <<endl;
+  cerr << "  -t         trace mode" << endl;
+  cerr << "  -z         flush buffer on '\0'" << endl;
 
   exit(EXIT_FAILURE);
 }
@@ -53,32 +52,28 @@ void testfile(string const &filename)
   struct stat mybuf;
   if(stat(filename.c_str(), &mybuf) == -1)
   {
-    wcerr << "Error: can't stat file '";
-    wcerr << filename << "'." << endl;
+    cerr << "Error: can't stat file '";
+    cerr << filename << "'." << endl;
     exit(EXIT_FAILURE);
   }
 }
 
-FILE * open_input(string const &filename)
+void open_input(InputFile& input, const char* filename)
 {
-  FILE *input = fopen(filename.c_str(), "r");
-  if(!input)
-  {
-    wcerr << "Error: can't open input file '";
-    wcerr << filename.c_str() << "'." << endl;
+  if (!input.open(filename)) {
+    cerr << "Error: can't open input file '";
+    cerr << filename << "'." << endl;
     exit(EXIT_FAILURE);
   }
-
-  return input;
 }
 
-FILE * open_output(string const &filename)
+UFILE * open_output(const char* filename)
 {
-  FILE *output = fopen(filename.c_str(), "w");
+  UFILE* output = u_fopen(filename, "w", NULL, NULL);
   if(!output)
   {
-    wcerr << "Error: can't open output file '";
-    wcerr << filename.c_str() << "'." << endl;
+    cerr << "Error: can't open output file '";
+    cerr << filename << "'." << endl;
     exit(EXIT_FAILURE);
   }
   return output;
@@ -122,13 +117,15 @@ int main(int argc, char *argv[])
     }
   }
 
-  FILE *input = stdin, *output = stdout;
-  string f1, f2;
+  InputFile input;
+  UFILE* output = u_finit(stdout, NULL, NULL);
+  const char* f1;
+  const char* f2;
   switch(argc - optind + 1)
   {
     case 5:
       output = open_output(argv[argc-1]);
-      input = open_input(argv[argc-2]);
+      open_input(input, argv[argc-2]);
       testfile(argv[argc-3]);
       testfile(argv[argc-4]);
       f1 = argv[argc-4];
@@ -136,7 +133,7 @@ int main(int argc, char *argv[])
       break;
 
     case 4:
-      input = open_input(argv[argc-1]);
+      open_input(input, argv[argc-1]);
       testfile(argv[argc-2]);
       testfile(argv[argc-3]);
       f1 = argv[argc-3];
@@ -155,11 +152,6 @@ int main(int argc, char *argv[])
       break;
   }
 
-#ifdef _MSC_VER
-  _setmode(_fileno(input), _O_U8TEXT);
-  _setmode(_fileno(output), _O_U8TEXT);
-#endif
-
   i.read(f1, f2);
   i.interchunk(input, output);
   return EXIT_SUCCESS;
diff --git a/apertium/apertium_perceptron_trace.cc b/apertium/apertium_perceptron_trace.cc
index 3bc490e..f006a93 100644
--- a/apertium/apertium_perceptron_trace.cc
+++ b/apertium/apertium_perceptron_trace.cc
@@ -21,17 +21,17 @@ int perceptron_trace(int argc, char* argv[])
     std::ifstream tagger_model;
     try_open_fstream("MODEL", argv[2], tagger_model);
     pt.deserialise(tagger_model);
-    std::wcout << pt;
+    std::cout << pt;
   }
   else if (argc == 3 && std::string(argv[1]) == "mtx")
   {
     PerceptronSpec spec;
     MTXReader mtx_reader(spec);
     mtx_reader.read(argv[2]);
-    std::wcout << "== Macro definitions ==\n";
+    std::cout << "== Macro definitions ==\n";
     mtx_reader.printTmplDefns();
-    std::wcout << "== Spec ==\n";
-    std::wcout << spec;
+    std::cout << "== Spec ==\n";
+    std::cout << spec;
   }
   else if (argc == 5 && std::string(argv[1]) == "path")
   {
@@ -41,11 +41,11 @@ int perceptron_trace(int argc, char* argv[])
     PerceptronTagger pt(flags);
     pt.read_spec(argv[2]);
 
-    std::wifstream untagged_stream;
+    std::ifstream untagged_stream;
     try_open_fstream("UNTAGGED_CORPUS", argv[3], untagged_stream);
     Stream untagged(flags, untagged_stream, argv[3]);
 
-    std::wifstream tagged_stream;
+    std::ifstream tagged_stream;
     try_open_fstream("TAGGED_CORPUS", argv[4], tagged_stream);
     Stream tagged(flags, tagged_stream, argv[4]);
 
@@ -67,7 +67,7 @@ int perceptron_trace(int argc, char* argv[])
         {
           Optional<Analysis> saved_token = tagged_sent[token_idx];
           tagged_sent[token_idx] = lu.TheAnalyses[analy_idx];
-          std::wcout << L"LU:" << tagged_sent[token_idx] << std::endl ;
+          std::cout << "LU:" << tagged_sent[token_idx] << std::endl ;
           std::vector<Morpheme> &wordoids = lu.TheAnalyses[analy_idx].TheMorphemes;
           for (wrd_idx=0; wrd_idx<wordoids.size(); wrd_idx++)
           {
@@ -76,14 +76,14 @@ int perceptron_trace(int argc, char* argv[])
               tagged_sent, untagged_sent,
               token_idx, wrd_idx,
               feat_vec);
-            std::wcout << "Sentence " << sent_idx << " of " << tc.sentences.size() << "\t\t"
+            std::cout << "Sentence " << sent_idx << " of " << tc.sentences.size() << "\t\t"
                        << "Token " << token_idx << " of " << untagged_sent.size() << "\t\t"
                        << "Analysis " << analy_idx << " of " << lu.TheAnalyses.size() << "\t\t"
                        << "Wordoid " << wrd_idx << " of " << wordoids.size() << "\n";
-            std::wcout << "" << wordoids[wrd_idx] << "\n";
+            std::cout << "" << wordoids[wrd_idx] << "\n";
             FeatureVec fv(feat_vec);
-            std::wcout << fv;
-            std::wcout << "\n\n";
+            std::cout << fv;
+            std::cout << "\n\n";
           }
           tagged_sent[token_idx] = saved_token;
         }
@@ -92,13 +92,13 @@ int perceptron_trace(int argc, char* argv[])
   }
   else
   {
-    std::wcout << "Run with one of:\n";
-    std::wcout << argv[0] << " model <binary model file>\n";
-    std::wcout << "Output features and weights from a model file.\n";
-    std::wcout << argv[0] << " mtx <mtx file>\n";
-    std::wcout << "Output macros and features from an mtx file.\n";
-    std::wcout << argv[0] << " path <mtx file> <untagged> <tagged>\n";
-    std::wcout << "Trace a particular path through giving which features fire "
+    std::cout << "Run with one of:\n";
+    std::cout << argv[0] << " model <binary model file>\n";
+    std::cout << "Output features and weights from a model file.\n";
+    std::cout << argv[0] << " mtx <mtx file>\n";
+    std::cout << "Output macros and features from an mtx file.\n";
+    std::cout << argv[0] << " path <mtx file> <untagged> <tagged>\n";
+    std::cout << "Trace a particular path through giving which features fire "
                << "and the resulting score. Useful for interactively "
                << "designing feature sets.\n";
   }
diff --git a/apertium/apertium_postchunk.cc b/apertium/apertium_postchunk.cc
index bde462e..8c3cfe8 100644
--- a/apertium/apertium_postchunk.cc
+++ b/apertium/apertium_postchunk.cc
@@ -24,60 +24,46 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #ifdef _MSC_VER
 #include <io.h>
 #include <fcntl.h>
 #endif
 
-using namespace Apertium;
 using namespace std;
 
 void message(char *progname)
 {
-  wcerr << "USAGE: " << basename(progname) << " [-z] t3x preproc [input [output]]" << endl;
-  wcerr << "  t3x        t3x rules file" << endl;
-  wcerr << "  preproc    result of preprocess trules file" << endl;
-  wcerr << "  input      input file, standard input by default" << endl;
-  wcerr << "  output     output file, standard output by default" << endl;
-  wcerr << "OPTIONS" <<endl;
-  wcerr << "  -t         trace (show rule numbers and patterns matched)" << endl;
-  wcerr << "  -z         null-flushing output on '\0'" << endl;
+  cerr << "USAGE: " << basename(progname) << " [-z] t3x preproc [input [output]]" << endl;
+  cerr << "  t3x        t3x rules file" << endl;
+  cerr << "  preproc    result of preprocess trules file" << endl;
+  cerr << "  input      input file, standard input by default" << endl;
+  cerr << "  output     output file, standard output by default" << endl;
+  cerr << "OPTIONS" <<endl;
+  cerr << "  -t         trace (show rule numbers and patterns matched)" << endl;
+  cerr << "  -z         null-flushing output on '\0'" << endl;
 
   exit(EXIT_FAILURE);
 }
 
-void testfile(string const &filename)
+void testfile(const char* filename)
 {
   struct stat mybuf;
-  if(stat(filename.c_str(), &mybuf) == -1)
+  if(stat(filename, &mybuf) == -1)
   {
-    wcerr << "Error: can't stat file '";
-    wcerr << filename << "'." << endl;
+    cerr << "Error: can't stat file '";
+    cerr << filename << "'." << endl;
     exit(EXIT_FAILURE);
   }
 }
 
-FILE * open_input(string const &filename)
+UFILE * open_output(string const &filename)
 {
-  FILE *input = fopen(filename.c_str(), "r");
-  if(!input)
-  {
-    wcerr << "Error: can't open input file '";
-    wcerr << filename.c_str() << "'." << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  return input;
-}
-
-FILE * open_output(string const &filename)
-{
-  FILE *output = fopen(filename.c_str(), "w");
+  UFILE *output = u_fopen(filename.c_str(), "w", NULL, NULL);
   if(!output)
   {
-    wcerr << "Error: can't open output file '";
-    wcerr << filename.c_str() << "'." << endl;
+    cerr << "Error: can't open output file '";
+    cerr << filename.c_str() << "'." << endl;
     exit(EXIT_FAILURE);
   }
   return output;
@@ -121,13 +107,15 @@ int main(int argc, char *argv[])
     }
   }
 
-  FILE *input = stdin, *output = stdout;
-  string f1, f2;
+  InputFile input;
+  UFILE* output = u_finit(stdout, NULL, NULL);
+  const char* f1;
+  const char* f2;
   switch(argc - optind + 1)
   {
     case 5:
       output = open_output(argv[argc-1]);
-      input = open_input(argv[argc-2]);
+      input.open_or_exit(argv[argc-2]);
       testfile(argv[argc-3]);
       testfile(argv[argc-4]);
       f1 = argv[argc-4];
@@ -135,7 +123,7 @@ int main(int argc, char *argv[])
       break;
 
     case 4:
-      input = open_input(argv[argc-1]);
+      input.open_or_exit(argv[argc-1]);
       testfile(argv[argc-2]);
       testfile(argv[argc-3]);
       f1 = argv[argc-3];
@@ -154,11 +142,6 @@ int main(int argc, char *argv[])
       break;
   }
 
-#ifdef _MSC_VER
-  _setmode(_fileno(input), _O_U8TEXT);
-  _setmode(_fileno(output), _O_U8TEXT);
-#endif
-
   p.read(f1, f2);
   p.postchunk(input, output);
 
diff --git a/apertium/apertium_posttransfer.cc b/apertium/apertium_posttransfer.cc
index 3e0176a..45e2ad8 100644
--- a/apertium/apertium_posttransfer.cc
+++ b/apertium/apertium_posttransfer.cc
@@ -25,38 +25,38 @@
 #include <io.h>
 #include <fcntl.h>
 #endif
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include <lttoolbox/lt_locale.h>
+#include <lttoolbox/input_file.h>
 
 
-using namespace Apertium;
 using namespace std;
 
 void usage(char *progname)
 {
-  wcerr << L"USAGE: " << basename(progname) << L" [input_file [output_file]]" << endl;
-  wcerr << L"  -z         null-flushing output on '\0'" << endl;
-  wcerr << L"  -h         shows this message" << endl;
+  cerr << "USAGE: " << basename(progname) << " [input_file [output_file]]" << endl;
+  cerr << "  -z         null-flushing output on '\0'" << endl;
+  cerr << "  -h         shows this message" << endl;
   exit(EXIT_FAILURE);
 }
 
-void processStream(FILE *in, FILE *out, bool null_flush)
+void processStream(InputFile& in, UFILE* out, bool null_flush)
 {
   int prev = -1;
-  int c = fgetc(in);
+  UChar32 c = in.get();
   while (c != EOF)
   {
     if (!((c == ' ') && (prev == ' ')))
     {
-      putc(c, out);
+      u_fputc(c, out);
     }
     if (c == 0 && null_flush)
     {
-      fflush(out);
-      putc(c, out);
+      u_fflush(out);
+      u_fputc(c, out);
     }
     prev = c;
-    c = fgetc(in);
+    c = in.get();
   }
 }
 
@@ -98,44 +98,33 @@ int main(int argc, char *argv[])
     usage(argv[0]);
   }
 
-  FILE *input, *output;
+  InputFile input;
+  UFILE* output;
 
   if((argc-optind+1) == 1)
   {
-    input = stdin;
-    output = stdout;
+    output = u_finit(stdout, NULL, NULL);
   }
   else if ((argc-optind+1) == 2)
   {
-    input = fopen(argv[argc-1], "r");
-    if(!input)
-    {
+    if (!input.open(argv[argc-1])) {
       usage(argv[0]);
     }
-    output = stdout;
+    output = u_finit(stdout, NULL, NULL);
   }
   else
   {
-    input = fopen(argv[argc-2], "r");
-    output = fopen(argv[argc-1], "w");
-
-    if(!input || !output)
-    {
+    output = u_fopen(argv[argc-1], "w", NULL, NULL);
+    if (!output || !input.open(argv[argc-2])) {
       usage(argv[0]);
     }
   }
 
-  if(feof(input))
+  if(input.eof())
   {
-    wcerr << L"ERROR: Can't read file '" << argv[1] << L"'" << endl;
+    cerr << "ERROR: Can't read file '" << argv[1] << "'" << endl;
     exit(EXIT_FAILURE);
   }
 
   processStream(input, output, null_flush);
-
-#ifdef _MSC_VER
-    _setmode(_fileno(input), _O_U8TEXT);
-    _setmode(_fileno(output), _O_U8TEXT);
-#endif
-
 }
diff --git a/apertium/apertium_pretransfer.cc b/apertium/apertium_pretransfer.cc
index 95c2c4c..f5c07f0 100644
--- a/apertium/apertium_pretransfer.cc
+++ b/apertium/apertium_pretransfer.cc
@@ -26,18 +26,18 @@
 #include <fcntl.h>
 #endif
 #include <apertium/pretransfer.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
+#include <lttoolbox/lt_locale.h>
 
-using namespace Apertium;
 using namespace std;
 
 void usage(char *progname)
 {
-  wcerr << L"USAGE: " << basename(progname) << L" [input_file [output_file]]" << endl;
-  wcerr << L"  -n         assume no surface forms" << endl;
-  wcerr << L"  -e         treat ~ as compound separator" << endl;
-  wcerr << L"  -z         null-flushing output on '\0'" << endl;
-  wcerr << L"  -h         shows this message" << endl;
+  cerr << "USAGE: " << basename(progname) << " [input_file [output_file]]" << endl;
+  cerr << "  -n         assume no surface forms" << endl;
+  cerr << "  -e         treat ~ as compound separator" << endl;
+  cerr << "  -z         null-flushing output on '\\0'" << endl;
+  cerr << "  -h         shows this message" << endl;
   exit(EXIT_FAILURE);
 }
 
@@ -90,43 +90,35 @@ int main(int argc, char *argv[])
     usage(argv[0]);
   }
 
-  FILE *input, *output;
+  InputFile input;
+  UFILE* output;
 
   if((argc-optind+1) == 1)
   {
-    input = stdin;
-    output = stdout;
+    output = u_finit(stdout, NULL, NULL);
   }
   else if ((argc-optind+1) == 2)
   {
-    input = fopen(argv[argc-1], "r");
-    if(!input)
-    {
+    if(!input.open(argv[argc-1])) {
       usage(argv[0]);
     }
-    output = stdout;
+    output = u_finit(stdout, NULL, NULL);
   }
   else
   {
-    input = fopen(argv[argc-2], "r");
-    output = fopen(argv[argc-1], "w");
+    output = u_fopen(argv[argc-1], "w", NULL, NULL);
 
-    if(!input || !output)
+    if(!output || !input.open(argv[argc-2]))
     {
       usage(argv[0]);
     }
   }
 
-  if(feof(input))
+  if(input.eof())
   {
-    wcerr << L"ERROR: Can't read file '" << argv[1] << L"'" << endl;
+    cerr << "ERROR: Can't read file '" << argv[1] << "'" << endl;
     exit(EXIT_FAILURE);
   }
 
-#ifdef _MSC_VER
-    _setmode(_fileno(input), _O_U8TEXT);
-    _setmode(_fileno(output), _O_U8TEXT);
-#endif
-
   processStream(input, output, null_flush, surface_forms, compound_sep);
 }
diff --git a/apertium/apertium_re.cc b/apertium/apertium_re.cc
index 7182614..01599b3 100644
--- a/apertium/apertium_re.cc
+++ b/apertium/apertium_re.cc
@@ -18,155 +18,114 @@
 #include <lttoolbox/compression.h>
 #include <iostream>
 #include <cstdlib>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
-using namespace Apertium;
 using namespace std;
+using namespace icu;
 
-std::string& pcre_version_endian() {
-  static std::string pve;
-  if (pve.empty()) {
-    pve = pcre_version();
-#ifdef WORDS_BIGENDIAN
-    pve += "-be";
-#else
-    pve += "-le";
-#endif
-  }
-  return pve;
-}
-
-ApertiumRE::ApertiumRE() :
-re(0)
-{
-  empty = true;
-}
+ApertiumRE::ApertiumRE() {}
 
 ApertiumRE::~ApertiumRE()
 {
-  if(!empty)
-  {
-    pcre_free(re);
+  if (re != nullptr) {
+    delete re;
   }
-  empty = true;
 }
 
 void
 ApertiumRE::read(FILE *input)
 {
   unsigned int size = Compression::multibyte_read(input);
-  re = static_cast<pcre *>(pcre_malloc(size));
-  if(size != fread(re, 1, size, input))
-  {
-    wcerr << L"Error reading regexp" << endl;
+  if (fseek(input, size, SEEK_CUR) != 0) {
+    cerr << "Error reading regexp" << endl;
     exit(EXIT_FAILURE);
   }
-
-  empty = false;
 }
 
 void
-ApertiumRE::compile(string const &str)
+ApertiumRE::compile(UString const &str)
 {
-  const char *error;
-  int erroroffset;
-  re = pcre_compile(str.c_str(), PCRE_DOTALL|PCRE_CASELESS|PCRE_EXTENDED|PCRE_UTF8,
-	            &error, &erroroffset, NULL);
-  if(re == NULL)
-  {
-    wcerr << L"Error: pcre_compile ";
-    wcerr << error << endl;
+  if (re != nullptr) {
+    delete re;
+  }
+  UnicodeString s = str.c_str();
+  UErrorCode err = U_ZERO_ERROR;
+  re = RegexPattern::compile(s, UREGEX_DOTALL|UREGEX_CASE_INSENSITIVE, err);
+  if(U_FAILURE(err)) {
+    cerr << "Error: unable to compile regular expression '" << str << "'." << endl;
+    cerr << "error code: " << u_errorName(err) << endl;
     exit(EXIT_FAILURE);
   }
-
-  empty = false;
 }
 
 void
 ApertiumRE::write(FILE *output) const
 {
-  if(empty)
-  {
-    wcerr << L"Error, cannot write empty regexp" << endl;
+  if (re == nullptr) {
+    cerr << "Error, cannot write empty regexp" << endl;
     exit(EXIT_FAILURE);
   }
+  // for backwards compatibility, write empty binary form
+  Compression::multibyte_write(0, output);
+}
 
-  size_t size;
-  int rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
-  if(rc < 0)
-  {
-    wcerr << L"Error calling pcre_fullinfo()\n" << endl;
-    exit(EXIT_FAILURE);
+UString
+ApertiumRE::match(UString const &str) const
+{
+  if(re == nullptr) {
+    return ""_u;
   }
 
-  Compression::multibyte_write(size, output);
+  UnicodeString s = str.c_str();
+  UErrorCode err = U_ZERO_ERROR;
+  RegexMatcher* m = re->matcher(s, err);
 
-  size_t rc2 = fwrite(re, 1, size, output);
-  if(rc2 != size)
-  {
-    wcerr << L"Error writing precompiled regex\n" << endl;
+  if (U_FAILURE(err)) {
+    cerr << "Error: Unable to apply regexp" << endl;
+    cerr << "error code: " << u_errorName(err) << endl;
     exit(EXIT_FAILURE);
   }
-}
 
-string
-ApertiumRE::match(string const &str) const
-{
-  if(empty)
-  {
-    return "";
+  if (!m->find()) {
+    return ""_u;
   }
 
-  int result[3];
-  int workspace[4096];
-//  int rc = pcre_exec(re, NULL, str.c_str(), str.size(), 0, PCRE_NO_UTF8_CHECK, result, 3);
-  int rc = pcre_dfa_exec(re, NULL, str.c_str(), str.size(), 0, PCRE_NO_UTF8_CHECK, result, 3, workspace, 4096);
-
-  if(rc < 0)
-  {
-    switch(rc)
-    {
-      case PCRE_ERROR_NOMATCH:
-	return "";
-
-      default:
-	wcerr << L"Error: Unknown error matching regexp (code " << rc << L")" << endl;
-	exit(EXIT_FAILURE);
-    }
+  UString ret = m->group(err).getTerminatedBuffer();
+  if (U_FAILURE(err)) {
+    cerr << "Error: Unable to extract substring from regexp match" << endl;
+    cerr << "error code: " << u_errorName(err) << endl;
+    exit(EXIT_FAILURE);
   }
 
-  return str.substr(result[0], result[1]-result[0]);
+  return ret;
 }
 
 // Return true if something was replaced and false otherwise
 bool
-ApertiumRE::replace(string &str, string const &value) const
+ApertiumRE::replace(UString &str, UString const &value) const
 {
-  if(empty)
-  {
+  if(re == nullptr) {
     return false;
   }
 
-  int result[3];
-  int workspace[4096];
-  // int rc = pcre_exec(re, NULL, str.c_str(), str.size(), 0, PCRE_NO_UTF8_CHECK, result, 3);
-  int rc = pcre_dfa_exec(re, NULL, str.c_str(), str.size(), 0, PCRE_NO_UTF8_CHECK, result, 3, workspace, 4096);
-  if(rc < 0)
-  {
-    switch(rc)
-    {
-      case PCRE_ERROR_NOMATCH:
-	return false;
-
-      default:
-	wcerr << L"Error: Unknown error matching regexp (code " << rc << L")" << endl;
-	exit(EXIT_FAILURE);
-    }
+  UnicodeString s = str.c_str();
+  UErrorCode err = U_ZERO_ERROR;
+  RegexMatcher* m = re->matcher(s, err);
+
+  if (U_FAILURE(err)) {
+    cerr << "Error: Unable to apply regexp" << endl;
+    cerr << "error code: " << u_errorName(err) << endl;
+    exit(EXIT_FAILURE);
   }
 
-  string res = str.substr(0, result[0]);
+  // do this manually rather than call m->replaceFirst()
+  // because we want to know that a match happened
+  if (!m->find()) {
+    return false;
+  }
+  UString res = str.substr(0, m->start(err));
   res.append(value);
-  res.append(str.substr(result[1]));
-  str = res;
+  res.append(str.substr(m->end(err)));
+  res.swap(str);
   return true;
 }
diff --git a/apertium/apertium_re.h b/apertium/apertium_re.h
index c9cb8c0..3cca42b 100644
--- a/apertium/apertium_re.h
+++ b/apertium/apertium_re.h
@@ -18,27 +18,24 @@
 #ifndef _APERTIUM_RE_
 #define _APERTIUM_RE_
 
-#include <pcre.h>
 #include <cstdio>
-#include <string>
+#include <unicode/regex.h>
+#include <lttoolbox/ustring.h>
 
 using namespace std;
 
-std::string& pcre_version_endian();
-
 class ApertiumRE
 {
 private:
-  bool empty;
-  pcre *re;
+  icu::RegexPattern* re = nullptr;
 public:
   ApertiumRE();
   ~ApertiumRE();
   void read(FILE *);
   void write(FILE *) const;
-  string match(string const &str) const;
-  bool replace(string &str, string const &value) const;
-  void compile(string const &str);
+  UString match(UString const &str) const;
+  bool replace(UString &str, UString const &value) const;
+  void compile(UString const &str);
 };
 
 #endif
diff --git a/apertium/apertium_tagger.cc b/apertium/apertium_tagger.cc
index 8f6bb74..a384330 100644
--- a/apertium/apertium_tagger.cc
+++ b/apertium/apertium_tagger.cc
@@ -33,7 +33,7 @@ int main(int argc, char **argv) {
   try {
     Apertium::apertium_tagger(argc, argv);
   } catch (const Apertium::Exception::apertium_tagger::err_Exception &err_Exception_) {
-    std::wcerr << "Try 'apertium-tagger --help' for more information." << std::endl;
+    std::cerr << "Try 'apertium-tagger --help' for more information." << std::endl;
     return 1;
   } catch (...) {
     throw;
diff --git a/apertium/apertium_tagger_apply_new_rules.cc b/apertium/apertium_tagger_apply_new_rules.cc
index a8238e6..93974ac 100644
--- a/apertium/apertium_tagger_apply_new_rules.cc
+++ b/apertium/apertium_tagger_apply_new_rules.cc
@@ -26,7 +26,7 @@
 #include <apertium/hmm.h>
 #include <apertium/tagger_data_hmm.h>
 #include <apertium/tsx_reader.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
 using namespace Apertium;
 
@@ -38,17 +38,17 @@ TTag eos; //End-of-sentence tag
 
 void check_file(FILE *f, const string& path) {
   if (!f) {
-    wcerr<<"Error: cannot open file '"<<path.c_str()<<"'\n";
+    cerr<<"Error: cannot open file '"<<path.c_str()<<"'\n";
     exit(EXIT_FAILURE);
   }
 }
 
 void help(char *name) {
-  wcerr<<"Forbid and enforce rules are applied to the given HMM parameters\n\n";
-  wcerr<<"USAGE:\n";
-  wcerr<<name<<" --filein filein.prob --fileout fileout.prob --tsxfile file.tsx\n\n";
+  cerr<<"Forbid and enforce rules are applied to the given HMM parameters\n\n";
+  cerr<<"USAGE:\n";
+  cerr<<name<<" --filein filein.prob --fileout fileout.prob --tsxfile file.tsx\n\n";
 
-  wcerr<<"ARGUMENTS: \n"
+  cerr<<"ARGUMENTS: \n"
       <<"   --filein|-i: To specify the file with the HMM parameter to process\n\n"
       <<"   --fileout|-o: To specify the file to which the HMM will be written\n\n"
       <<"   --tsxfile|-x: File containing the rules to apply\n\n"
@@ -63,10 +63,10 @@ int main(int argc, char* argv[]) {
   int c;
   int option_index=0;
 
-  wcerr<<"Command line: ";
+  cerr<<"Command line: ";
   for(int i=0; i<argc; i++)
-    wcerr<<argv[i]<<" ";
-  wcerr<<"\n";
+    cerr<<argv[i]<<" ";
+  cerr<<"\n";
 
   while (true) {
     static struct option long_options[] =
@@ -95,7 +95,7 @@ int main(int argc, char* argv[]) {
       filetsx=optarg;
       break;
     case 'v':
-      wcerr<<"LICENSE:\n\n"
+      cerr<<"LICENSE:\n\n"
 	  <<"   Copyright (C) 2006 Felipe Sánchez Martínez\n\n"
 	  <<"   This program is free software; you can redistribute it and/or\n"
 	  <<"   modify it under the terms of the GNU General Public License as\n"
@@ -119,19 +119,19 @@ int main(int argc, char* argv[]) {
 
   //Now we check the command line arguments
   if (filein=="") {
-    wcerr<<"Error: You did not provide an input file (.prob). Use --filein to do that\n";
+    cerr<<"Error: You did not provide an input file (.prob). Use --filein to do that\n";
     help(argv[0]);
     exit(EXIT_FAILURE);
   }
 
   if (fileout=="") {
-    wcerr<<"Error: You did not provide an output file (.prob). Use --fileout to do that\n";
+    cerr<<"Error: You did not provide an output file (.prob). Use --fileout to do that\n";
     help(argv[0]);
     exit(EXIT_FAILURE);
   }
 
   if (filetsx=="") {
-    wcerr<<"Error: You did not provide a tagger definition file (.tsx). Use --filetsx to do that\n";
+    cerr<<"Error: You did not provide a tagger definition file (.tsx). Use --filetsx to do that\n";
     help(argv[0]);
     exit(EXIT_FAILURE);
   }
@@ -141,15 +141,15 @@ int main(int argc, char* argv[]) {
   fin=fopen(filein.c_str(), "rb");
   check_file(fin, filein);
 
-  wcerr<<"Reading apertium-tagger data from file '"<<filein<<"' ... "<<flush;
+  cerr<<"Reading apertium-tagger data from file '"<<filein<<"' ... "<<flush;
   tagger_data_hmm.read(fin);
   fclose(fin);
-  wcerr<<"done.\n";
+  cerr<<"done.\n";
 
-  wcerr<<"Reading apertium-tagger definition from file '"<<filetsx<<"' ... "<<flush;
+  cerr<<"Reading apertium-tagger definition from file '"<<filetsx<<"' ... "<<flush;
   TSXReader treader;
   treader.read(filetsx);
-  wcerr<<"done.\n";
+  cerr<<"done.\n";
 
   tagger_data_hmm.setForbidRules(treader.getTaggerData().getForbidRules());
   tagger_data_hmm.setEnforceRules(treader.getTaggerData().getEnforceRules());
@@ -160,8 +160,8 @@ int main(int argc, char* argv[]) {
 
   fout=fopen(fileout.c_str(), "wb");
   check_file(fout, fileout);
-  wcerr<<"Writing apertium-tagger data to file '"<<fileout<<"' ... "<<flush;
+  cerr<<"Writing apertium-tagger data to file '"<<fileout<<"' ... "<<flush;
   hmm.serialise(fout);
   fclose(fout);
-  wcerr<<"done.\n";
+  cerr<<"done.\n";
 }
diff --git a/apertium/apertium_tagger_readwords.cc b/apertium/apertium_tagger_readwords.cc
index 8317d6d..2e783cc 100644
--- a/apertium/apertium_tagger_readwords.cc
+++ b/apertium/apertium_tagger_readwords.cc
@@ -17,7 +17,6 @@
  */
 
 #include "getopt_long.h"
-#include <apertium/utf_converter.h>
 #include <apertium/file_morpho_stream.h>
 #include <apertium/tsx_reader.h>
 #include <apertium/tagger_data_hmm.h>
@@ -25,7 +24,7 @@
 #include <iostream>
 
 #include <cstdlib>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
 
 using namespace std;
@@ -36,13 +35,13 @@ bool check_ambclasses;
 
 void check_file(FILE *f, const string& path) {
   if (!f) {
-    wcerr<<"Error: cannot open file '"<<path<<"'\n";
+    cerr<<"Error: cannot open file '"<<path<<"'\n";
     exit(EXIT_FAILURE);
   }
 }
 
-void readwords (FILE *is, int corpus_length) {
-  FileMorphoStream lexmorfo(is, true, &tagger_data_hmm);
+void readwords (int corpus_length) {
+  FileMorphoStream lexmorfo(NULL, true, &tagger_data_hmm);
   TaggerWord *word=NULL;
   int nwords=0;
 
@@ -50,15 +49,15 @@ void readwords (FILE *is, int corpus_length) {
   while(word) {
     nwords++;
 
-    cout<<UtfConverter::toUtf8(word->get_superficial_form())<<" "<<UtfConverter::toUtf8(word->get_string_tags())<<"\n";
+    cout << word->get_superficial_form() << " " << word->get_string_tags() << "\n";
 
     if (check_ambclasses) {
       int k=tagger_data_hmm.getOutput()[word->get_tags()];
 
       if ((k>=tagger_data_hmm.getM())||(k<0)) {
-	wcerr<<"Error: Ambiguity class number out of range: "<<k<<"\n";
-	wcerr<<"Word: "<<UtfConverter::toUtf8(word->get_superficial_form())<<"\n";
-	wcerr<<"Ambiguity class: "<<UtfConverter::toUtf8(word->get_string_tags())<<"\n";
+	cerr<<"Error: Ambiguity class number out of range: "<<k<<"\n";
+	cerr<<"Word: "<< word->get_superficial_form() << "\n";
+	cerr<<"Ambiguity class: "<< word->get_string_tags() << "\n";
       }
     }
 
@@ -69,15 +68,15 @@ void readwords (FILE *is, int corpus_length) {
 
     word=lexmorfo.get_next_word();
   }
-  wcerr<<nwords<<" were readed\n";
+  cerr<<nwords<<" were readed\n";
 }
 
 
 void help(char *name) {
-  wcerr<<"USAGE:\n";
-  wcerr<<name<<" {--tsxfile file.tsx | --probfile file.prob} [--clength <corpus_length>] < file.crp \n\n";
+  cerr<<"USAGE:\n";
+  cerr<<name<<" {--tsxfile file.tsx | --probfile file.prob} [--clength <corpus_length>] < file.crp \n\n";
 
-  wcerr<<"ARGUMENTS: \n"
+  cerr<<"ARGUMENTS: \n"
       <<"   --tsxfile|-x: Specify a tagger specification file\n"
       <<"   --probfile|-p: Specify a tagger parameter file\n"
       <<"   --clength|-l: Specify the length of the corpus to process\n";
@@ -92,12 +91,12 @@ int main(int argc, char* argv[]) {
   int c;
   int option_index=0;
 
-  wcerr<<"LOCALE: "<<setlocale(LC_ALL,"")<<"\n";
+  cerr<<"LOCALE: "<<setlocale(LC_ALL,"")<<"\n";
 
-  wcerr<<"Command line: ";
+  cerr<<"Command line: ";
   for(int i=0; i<argc; i++)
-    wcerr<<argv[i]<<" ";
-  wcerr<<"\n";
+    cerr<<argv[i]<<" ";
+  cerr<<"\n";
 
   while (true) {
     static struct option long_options[] =
@@ -118,7 +117,7 @@ int main(int argc, char* argv[]) {
     case 'l':
       corpus_length=atoi(optarg);
       if(corpus_length<=0) {
-	wcerr<<"Error: corpus length provided with --clength must be a positive integer\n";
+	cerr<<"Error: corpus length provided with --clength must be a positive integer\n";
 	help(argv[0]);
 	exit(EXIT_FAILURE);
       }
@@ -134,8 +133,8 @@ int main(int argc, char* argv[]) {
       exit(EXIT_SUCCESS);
       break;
     case 'v':
-      wcerr<<"apertium-tagger-readwords\n";
-      wcerr<<"LICENSE:\n\n"
+      cerr<<"apertium-tagger-readwords\n";
+      cerr<<"LICENSE:\n\n"
 	  <<"   Copyright (C) 2006 Felipe Sánchez Martínez\n\n"
 	  <<"   This program is free software; you can redistribute it and/or\n"
 	  <<"   modify it under the terms of the GNU General Public License as\n"
@@ -158,38 +157,38 @@ int main(int argc, char* argv[]) {
   }
 
   if((tsxfile=="") && (probfile=="")) {
-    wcerr<<"Error: You have provided neither a tagger specification file (.tsx) nor a tagger probability file (.prob). Use --tsxfile or --probfile to provide one of them\n";
+    cerr<<"Error: You have provided neither a tagger specification file (.tsx) nor a tagger probability file (.prob). Use --tsxfile or --probfile to provide one of them\n";
     help(argv[0]);
     exit(EXIT_FAILURE);
   }
 
   if((tsxfile!="") && (probfile!="")) {
-    wcerr<<"Error: You provided a tagger specification file and a tagger probability file. Only one of them can be provided, not both\n";
+    cerr<<"Error: You provided a tagger specification file and a tagger probability file. Only one of them can be provided, not both\n";
     help(argv[0]);
     exit(EXIT_FAILURE);
   }
 
   if (tsxfile!="") {
-    wcerr<<"Reading tagger specification from file '"<<tsxfile<<"' ..."<<flush;
+    cerr<<"Reading tagger specification from file '"<<tsxfile<<"' ..."<<flush;
     TSXReader treader;
     treader.read(tsxfile);
     tagger_data_hmm=treader.getTaggerData();
-    wcerr<<"done.\n";
+    cerr<<"done.\n";
     check_ambclasses=false;
   }
 
   if (probfile!="") {
-    wcerr<<"Reading tagger parameters from file '"<<probfile<<"' ..."<<flush;
+    cerr<<"Reading tagger parameters from file '"<<probfile<<"' ..."<<flush;
     FILE* fin=NULL;
     fin=fopen(probfile.c_str(), "r");
     check_file(fin, probfile);
     tagger_data_hmm.read(fin);
-    wcerr<<"done.\n";
+    cerr<<"done.\n";
     fclose(fin);
     check_ambclasses=true;
   }
 
   TaggerWord::setArrayTags(tagger_data_hmm.getArrayTags());
 
-  readwords(stdin, corpus_length);
+  readwords(corpus_length);
 }
diff --git a/apertium/apertium_tmxbuild.cc b/apertium/apertium_tmxbuild.cc
index 2b3c66f..a1c475f 100644
--- a/apertium/apertium_tmxbuild.cc
+++ b/apertium/apertium_tmxbuild.cc
@@ -24,36 +24,34 @@
 
 #include <apertium/apertium_config.h>
 #include <apertium/tmx_builder.h>
-#include <apertium/utf_converter.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include "apertium_config.h"
 #include <apertium/unlocked_cstdio.h>
 
-using namespace Apertium;
 using namespace std;
 
 void usage(char *progname)
 {
-  wcerr << L"USAGE: " << basename(progname) << L" [options] code1 code2 doc1 doc2 [output_file]" << endl;
-  wcerr << L"Options:" << endl;
-  wcerr << L"  -p percent    number 0 < n <= 1 to set margin of confidence of TU's " << endl;
-  wcerr << L"                (0.85 by default) in length terms" << endl;
-  wcerr << L"  -e edit       number 0 < n <= 1 to set margin of confidence of TU's " << endl;
-  wcerr << L"                (0.30 by default) in edit distance terms" << endl;
-  wcerr << L"  -l low-limit  ignore percent if the segment is less than lowlimit" <<endl;
-  wcerr << L"                (15 by default)" << endl;
-  wcerr << L"  -m max-edit   characters to be taken into account when aligning" << endl;
-  wcerr << L"                sentences (50 by default)" << endl;
-  wcerr << L"  -d diagonal   diagonal width for using edit distance, 10 by default" << endl;
-  wcerr << L"  -w window     window size of the edit distance with sentences" << endl;
-  wcerr << L"                (100 sentences by default)" << endl;
-  wcerr << L"  -s step       step for moving the window during the alingment" <<endl;
-  wcerr << L"                (75 sentences by default)" << endl;
-  wcerr << L"  -h help       display this help" << endl;
-  wcerr << L"Other parameters:" << endl;
-  wcerr << L"  code1, code2 codes of the languages (i.e. ISO-631 ones)" << endl;
-  wcerr << L"  doc1, doc2    unformatted docs to build the TMX file" << endl;
-  wcerr << L"  output_file   if not specified, the result will be printed to stdout" << endl;
+  cerr << "USAGE: " << basename(progname) << " [options] code1 code2 doc1 doc2 [output_file]" << endl;
+  cerr << "Options:" << endl;
+  cerr << "  -p percent    number 0 < n <= 1 to set margin of confidence of TU's " << endl;
+  cerr << "                (0.85 by default) in length terms" << endl;
+  cerr << "  -e edit       number 0 < n <= 1 to set margin of confidence of TU's " << endl;
+  cerr << "                (0.30 by default) in edit distance terms" << endl;
+  cerr << "  -l low-limit  ignore percent if the segment is less than lowlimit" <<endl;
+  cerr << "                (15 by default)" << endl;
+  cerr << "  -m max-edit   characters to be taken into account when aligning" << endl;
+  cerr << "                sentences (50 by default)" << endl;
+  cerr << "  -d diagonal   diagonal width for using edit distance, 10 by default" << endl;
+  cerr << "  -w window     window size of the edit distance with sentences" << endl;
+  cerr << "                (100 sentences by default)" << endl;
+  cerr << "  -s step       step for moving the window during the alingment" <<endl;
+  cerr << "                (75 sentences by default)" << endl;
+  cerr << "  -h help       display this help" << endl;
+  cerr << "Other parameters:" << endl;
+  cerr << "  code1, code2 codes of the languages (i.e. ISO-631 ones)" << endl;
+  cerr << "  doc1, doc2    unformatted docs to build the TMX file" << endl;
+  cerr << "  output_file   if not specified, the result will be printed to stdout" << endl;
 
   exit(EXIT_FAILURE);
 }
@@ -64,7 +62,7 @@ int main(int argc, char *argv[])
   LtLocale::tryToSetLocale();
   string output_file = "";
   string doc1 = "", doc2 = "";
-  string lang1 = "", lang2 = "";
+  UString lang1, lang2;
 
   double percent = 0.85;
   int low_limit = 15;
@@ -160,7 +158,7 @@ int main(int argc, char *argv[])
 
 
       default:
-        //wcerr<<L"Error: getopt() returned the char code '"<<c<<L"'\n";
+        //cerr<<"Error: getopt() returned the char code '"<<c<<"'\n";
         usage(argv[0]);
         break;
     }
@@ -174,8 +172,8 @@ int main(int argc, char *argv[])
     case 5:
       doc1 = argv[optind - 1 + 3];
       doc2 = argv[optind - 1 + 4];
-      lang1 = argv[optind - 1 + 1];
-      lang2 = argv[optind - 1 + 2];
+      lang1 = to_ustring(argv[optind - 1 + 1]);
+      lang2 = to_ustring(argv[optind - 1 + 2]);
       break;
 
     default:
@@ -183,10 +181,10 @@ int main(int argc, char *argv[])
       return EXIT_FAILURE;
   }
 
-  TMXBuilder tmxb(UtfConverter::fromUtf8(lang1), UtfConverter::fromUtf8(lang2));
+  TMXBuilder tmxb(lang1, lang2);
 //  if(!tmxb.check(doc1, doc2))
 //  {
-//    wcerr << L"Error: The two files are incompatible for building a TMX." << endl;
+//    cerr << "Error: The two files are incompatible for building a TMX." << endl;
 //    exit(EXIT_FAILURE);
 //  }
 
diff --git a/apertium/apertium_transfer.cc b/apertium/apertium_transfer.cc
index 8d0d512..c3f6d81 100644
--- a/apertium/apertium_transfer.cc
+++ b/apertium/apertium_transfer.cc
@@ -23,37 +23,36 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <unistd.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include "getopt_long.h"
 #ifdef _MSC_VER
 #include <io.h>
 #include <fcntl.h>
 #endif
 
-using namespace Apertium;
 using namespace std;
 
 void message(char *progname)
 {
-  wcerr << "USAGE: " << basename(progname) << " trules preproc biltrans [input [output]]" << endl;
-  wcerr << "       " << basename(progname) << " -b trules preproc [input [output]]" << endl;
-  wcerr << "       " << basename(progname) << " -n trules preproc [input [output]]" << endl;
-  wcerr << "       " << basename(progname) << " -x extended trules preproc biltrans [input [output]]" << endl;
-  wcerr << "       " << basename(progname) << " -c trules preproc biltrans [input [output]]" << endl;
-  wcerr << "       " << basename(progname) << " -t trules preproc biltrans [input [output]]" << endl;
-  wcerr << "  trules     transfer rules file" << endl;
-  wcerr << "  preproc    result of preprocess trules file" << endl;
-  wcerr << "  biltrans   bilingual letter transducer file" << endl;
-  wcerr << "  input      input file, standard input by default" << endl;
-  wcerr << "  output     output file, standard output by default" << endl;
-  wcerr << "  -b         input from lexical transfer" << endl;
-  wcerr << "  -n         don't use bilingual dictionary" << endl;
-  wcerr << "  -x bindix  extended mode with user dictionary" << endl;
-  wcerr << "  -c         case-sensitiveness while accessing bilingual dictionary" << endl;
-  wcerr << "  -t         trace (show rule numbers and patterns matched)" << endl;
-  wcerr << "  -T         trace, for apertium-transfer-tools (also sets -t)" << endl;
-  wcerr << "  -z         null-flushing output on '\0'" << endl;
-  wcerr << "  -h         shows this message" << endl;
+  cerr << "USAGE: " << basename(progname) << " trules preproc biltrans [input [output]]" << endl;
+  cerr << "       " << basename(progname) << " -b trules preproc [input [output]]" << endl;
+  cerr << "       " << basename(progname) << " -n trules preproc [input [output]]" << endl;
+  cerr << "       " << basename(progname) << " -x extended trules preproc biltrans [input [output]]" << endl;
+  cerr << "       " << basename(progname) << " -c trules preproc biltrans [input [output]]" << endl;
+  cerr << "       " << basename(progname) << " -t trules preproc biltrans [input [output]]" << endl;
+  cerr << "  trules     transfer rules file" << endl;
+  cerr << "  preproc    result of preprocess trules file" << endl;
+  cerr << "  biltrans   bilingual letter transducer file" << endl;
+  cerr << "  input      input file, standard input by default" << endl;
+  cerr << "  output     output file, standard output by default" << endl;
+  cerr << "  -b         input from lexical transfer" << endl;
+  cerr << "  -n         don't use bilingual dictionary" << endl;
+  cerr << "  -x bindix  extended mode with user dictionary" << endl;
+  cerr << "  -c         case-sensitiveness while accessing bilingual dictionary" << endl;
+  cerr << "  -t         trace (show rule numbers and patterns matched)" << endl;
+  cerr << "  -T         trace, for apertium-transfer-tools (also sets -t)" << endl;
+  cerr << "  -z         null-flushing output on '\0'" << endl;
+  cerr << "  -h         shows this message" << endl;
 
 
   exit(EXIT_FAILURE);
@@ -64,32 +63,27 @@ void testfile(string const &filename)
   struct stat mybuf;
   if(stat(filename.c_str(), &mybuf) == -1)
   {
-    wcerr << "Error: can't stat file '";
-    wcerr << filename << "'." << endl;
+    cerr << "Error: can't stat file '";
+    cerr << filename << "'." << endl;
     exit(EXIT_FAILURE);
   }
 }
 
-FILE * open_input(string const &filename)
+void open_input(InputFile& input, const char* filename)
 {
-  FILE *input = fopen(filename.c_str(), "r");
-  if(!input)
-  {
-    wcerr << "Error: can't open input file '";
-    wcerr << filename.c_str() << "'." << endl;
+  if (!input.open(filename)) {
+    cerr << "Error: can't open input file '";
+    cerr << filename << "'." << endl;
     exit(EXIT_FAILURE);
   }
-
-  return input;
 }
 
-FILE * open_output(string const &filename)
+UFILE* open_output(const char* filename)
 {
-  FILE *output = fopen(filename.c_str(), "w");
-  if(!output)
-  {
-    wcerr << "Error: can't open output file '";
-    wcerr << filename.c_str() << "'." << endl;
+  UFILE* output = u_fopen(filename, "w", NULL, NULL);
+  if(!output) {
+    cerr << "Error: can't open output file '";
+    cerr << filename << "'." << endl;
     exit(EXIT_FAILURE);
   }
   return output;
@@ -107,13 +101,13 @@ int main(int argc, char *argv[])
     static struct option long_options[] =
     {
       {"from-bilingual",      no_argument, 0, 'b'},
-      {"no-bilingual",      no_argument, 0, 'n'},
+      {"no-bilingual",        no_argument, 0, 'n'},
       {"extended",      required_argument, 0, 'x'},
-      {"case-sensitive", no_argument, 0, 'c'},
-      {"null-flush", no_argument, 0, 'z'},
-      {"trace", no_argument, 0, 't'},
-      {"trace_att", no_argument, 0, 'T'},
-      {"help", no_argument, 0, 'h'},
+      {"case-sensitive",      no_argument, 0, 'c'},
+      {"null-flush",          no_argument, 0, 'z'},
+      {"trace",               no_argument, 0, 't'},
+      {"trace_att",           no_argument, 0, 'T'},
+      {"help",                no_argument, 0, 'h'},
       {0, 0, 0, 0}
     };
 
@@ -160,13 +154,14 @@ int main(int argc, char *argv[])
     }
   }
 
-  FILE *input = stdin, *output = stdout;
+  InputFile input;
+  UFILE* output = u_finit(stdout, NULL, NULL);
 
   switch(argc - optind + 1)
   {
     case 6:
       output = open_output(argv[argc-1]);
-      input = open_input(argv[argc-2]);
+      open_input(input, argv[argc-2]);
       testfile(argv[argc-3]);
       testfile(argv[argc-4]);
       testfile(argv[argc-5]);
@@ -177,14 +172,14 @@ int main(int argc, char *argv[])
       if(t.getUseBilingual() == false || t.getPreBilingual() == true)
       {
         output = open_output(argv[argc-1]);
-        input = open_input(argv[argc-2]);
+        open_input(input, argv[argc-2]);
         testfile(argv[argc-3]);
         testfile(argv[argc-4]);
         t.read(argv[argc-4], argv[argc-3]);
       }
       else
       {
-        input = open_input(argv[argc-1]);
+        open_input(input, argv[argc-1]);
         testfile(argv[argc-2]);
         testfile(argv[argc-3]);
         testfile(argv[argc-4]);
@@ -195,7 +190,7 @@ int main(int argc, char *argv[])
     case 4:
       if(t.getUseBilingual() == false || t.getPreBilingual() == true)
       {
-        input = open_input(argv[argc-1]);
+        open_input(input, argv[argc-1]);
         testfile(argv[argc-2]);
         testfile(argv[argc-3]);
         t.read(argv[argc-3], argv[argc-2]);
@@ -226,11 +221,6 @@ int main(int argc, char *argv[])
       break;
   }
 
-#ifdef _MSC_VER
-  _setmode(_fileno(input), _O_U8TEXT);
-  _setmode(_fileno(output), _O_U8TEXT);
-#endif
-
   t.transfer(input, output);
   return EXIT_SUCCESS;
 }
diff --git a/apertium/collection.cc b/apertium/collection.cc
index 995c14a..ab80607 100644
--- a/apertium/collection.cc
+++ b/apertium/collection.cc
@@ -16,7 +16,7 @@
  */
 #include <lttoolbox/compression.h>
 #include <apertium/collection.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include <apertium/serialiser.h>
 #include <apertium/deserialiser.h>
 
diff --git a/apertium/constant_manager.cc b/apertium/constant_manager.cc
index c4ddb32..5302171 100644
--- a/apertium/constant_manager.cc
+++ b/apertium/constant_manager.cc
@@ -16,7 +16,7 @@
  */
 #include <apertium/constant_manager.h>
 #include <lttoolbox/compression.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include <apertium/serialiser.h>
 #include <apertium/deserialiser.h>
 
@@ -57,13 +57,13 @@ ConstantManager::operator =(ConstantManager const &o)
   return *this;
 }
 void
-ConstantManager::setConstant(wstring const &constant, int const value)
+ConstantManager::setConstant(UString const &constant, int const value)
 {
   constants[constant] = value;
 }
 
 int
-ConstantManager::getConstant(wstring const &constant)
+ConstantManager::getConstant(UString const &constant)
 {
   return constants[constant];
 }
@@ -73,10 +73,10 @@ ConstantManager::write(FILE *output)
 {
   Compression::multibyte_write(constants.size(), output);
 
-  for(map<wstring, int>::const_iterator it = constants.begin(), limit = constants.end();
+  for(map<UString, int>::const_iterator it = constants.begin(), limit = constants.end();
       it != limit; it++)
   {
-    Compression::wstring_write(it->first, output);
+    Compression::string_write(it->first, output);
     Compression::multibyte_write(it->second, output);
   }
 }
@@ -88,7 +88,7 @@ ConstantManager::read(FILE *input)
   int size = Compression::multibyte_read(input);
   for(int i = 0; i != size; i++)
   {
-    wstring mystr = Compression::wstring_read(input);
+    UString mystr = Compression::string_read(input);
     constants[mystr] = Compression::multibyte_read(input);
   }
 }
@@ -96,11 +96,11 @@ ConstantManager::read(FILE *input)
 void
 ConstantManager::serialise(std::ostream &serialised) const
 {
-  Serialiser<map<wstring, int> >::serialise(constants, serialised);
+  Serialiser<map<UString, int> >::serialise(constants, serialised);
 }
 
 void
 ConstantManager::deserialise(std::istream &serialised)
 {
-  constants = Deserialiser<map<wstring, int> >::deserialise(serialised);
+  constants = Deserialiser<map<UString, int> >::deserialise(serialised);
 }
diff --git a/apertium/constant_manager.h b/apertium/constant_manager.h
index d8ed3f7..395edae 100644
--- a/apertium/constant_manager.h
+++ b/apertium/constant_manager.h
@@ -20,13 +20,14 @@
 #include <cstdio>
 #include <map>
 #include <string>
+#include <lttoolbox/ustring.h>
 
 using namespace std;
 
 class ConstantManager
 {
 private:
-  map<wstring, int> constants;
+  map<UString, int> constants;
 
   void copy(ConstantManager const &o);
   void destroy();
@@ -36,8 +37,8 @@ public:
   ConstantManager(ConstantManager const &o);
   ConstantManager & operator =(ConstantManager const &o);
 
-  void setConstant(wstring const &constant, int const value);
-  int getConstant(wstring const &constant);
+  void setConstant(UString const &constant, int const value);
+  int getConstant(UString const &constant);
   void write(FILE *output);
   void read(FILE *input);
   void serialise(std::ostream &serialised) const;
diff --git a/apertium/deformat.xsl b/apertium/deformat.xsl
index fd93401..b3da49b 100644
--- a/apertium/deformat.xsl
+++ b/apertium/deformat.xsl
@@ -90,15 +90,12 @@
       </xsl:if>
       <xsl:value-of select="string('  printBuffer();&#xA;')"/>
 
-      <xsl:value-of select="string('  if (hasWrite_white) {&#xA;    fputws(L&quot; &quot;, yyout);&#xA;')"/>
+      <xsl:value-of select="string('  if (hasWrite_white) {&#xA;    fputs(&quot; &quot;, yyout);&#xA;')"/>
       <xsl:value-of select="string('    offset++;&#xA;    hasWrite_white = false;&#xA;  }&#xA;')"/>
 
       <xsl:value-of select="string('  current++;&#xA;  orders.push_back(current);&#xA;')"/>
       <xsl:value-of select="string('  last=&quot;open_tag&quot;;&#xA;  offsets.push_back(offset);&#xA;')"/>
-      <xsl:value-of select="string('  wchar_t* symbol = new wchar_t[strlen(yytext) + 1];&#xA;')"/>
-      <xsl:value-of select="string('  mbstowcs(symbol, yytext, strlen(yytext));&#xA;')"/>
-      <xsl:value-of select="string('  symbol[strlen(yytext)] = (char) 0;&#xA;')"/>
-      <xsl:value-of select="string('  tags.push_back(symbol);&#xA;  delete[] symbol;&#xA;}&#xA;')"/>
+      <xsl:value-of select="string('  tags.push_back(to_ustring(yytext));&#xA;}&#xA;')"/>
     </xsl:when>
     <xsl:when test="./@type = 'close'">
       <xsl:value-of select="./tag/@regexp"/>
@@ -164,50 +161,45 @@ extern "C" {
 }
 
 #include &lt;lttoolbox/lt_locale.h&gt;
-#include &lt;lttoolbox/ltstr.h&gt;
-#include &lt;apertium/string_to_wostream.h&gt;
+#include &lt;lttoolbox/ustring.h&gt;
 #ifndef GENFORMAT
 #include "apertium_config.h"
 #endif
-#include &lt;utf8/utf8.h&gt;
+#include &lt;utf8.h&gt;
 #include &lt;apertium/unlocked_cstdio.h&gt;
-#ifdef _WIN32
-#include &lt;io.h&gt;
-#include &lt;fcntl.h&gt;
-#define utf8to32 utf8to16
-#define utf32to8 utf16to8
-#endif
 
 using namespace std;
 
-wstring buffer;
+UString buffer;
 string symbuf;
 bool isDot, isEoh, hasWrite_dot, hasWrite_white;
 bool eosIncond;
 bool noDot;
 bool markEoh;
-FILE *formatfile;
+UFILE* formatfile;
 string last;
 int current;
 long int offset;
 
 
 vector&lt;long int&gt; offsets;
-vector&lt;wstring&gt; tags;
+vector&lt;UString&gt; tags;
 vector&lt;int&gt; orders;
 
 regex_t escape_chars;
 regex_t names_regexp;
 
-void bufferAppend(wstring &amp;buf, string const &amp;str)
+void bufferAppend(UString &amp;buf, string const &amp;str)
 {
-  symbuf.append(str);
-  if (utf8::is_valid(symbuf.begin(), symbuf.end())) {
-  	utf8::utf8to32(symbuf.begin(), symbuf.end(), std::back_inserter(buf));
-  	symbuf.clear();
-  }
+  buf += to_ustring(str.c_str());
 }
 
+void put(const UString&amp; str, FILE* f)
+{
+  string temp;
+  utf8::utf16to8(str.begin(), str.end(), std::back_inserter(temp));
+  fputs_unlocked(temp.c_str(), f);
+}
 
 void init_escape()
 {
@@ -218,7 +210,7 @@ void init_escape()
       <xsl:with-param name="replacement" select="string('\\')"/>
     </xsl:call-template>", REG_EXTENDED))
   {
-    wcerr &lt;&lt; "ERROR: Illegal regular expression for escape characters" &lt;&lt; endl;
+    cerr &lt;&lt; "ERROR: Illegal regular expression for escape characters" &lt;&lt; endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -232,7 +224,7 @@ void init_tagNames()
       <xsl:with-param name="replacement" select="string('\\')"/>
     </xsl:call-template>", REG_EXTENDED))
   {
-    wcerr &lt;&lt; "ERROR: Illegal regular expression for tag-names" &lt;&lt; endl;
+    cerr &lt;&lt; "ERROR: Illegal regular expression for tag-names" &lt;&lt; endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -254,20 +246,20 @@ string backslash(string const &amp;str)
 }
 
 
-wstring escape(string const &amp;str)
+UString escape(string const &amp;str)
 {
   regmatch_t pmatch;
 
   char const *mystring = str.c_str();
   int base = 0;
-  wstring result;
+  UString result;
 
   while(!regexec(&amp;escape_chars, mystring + base, 1, &amp;pmatch, 0))
   {
     bufferAppend(result, str.substr(base, pmatch.rm_so));
-    result += L'\\';
+    result += '\\';
     const char *mb = str.c_str() + base + pmatch.rm_so;
-    wchar_t micaracter = utf8::next(mb, mb+4);
+    UChar32 micaracter = utf8::next(mb, mb+4);
 
     result += micaracter;
     base += pmatch.rm_eo;
@@ -277,10 +269,10 @@ wstring escape(string const &amp;str)
   return result;
 }
 
-wstring escape(wstring const &amp;str)
+UString escape(UString const &amp;str)
 {
   string dest;
-  utf8::utf32to8(str.begin(), str.end(), std::back_inserter(dest));
+  utf8::utf16to8(str.begin(), str.end(), std::back_inserter(dest));
   return escape(dest);
 }
 
@@ -302,7 +294,7 @@ string get_tagName(string tag){
 <xsl:for-each select="./rules/replacement-rule">
   <xsl:variable name="varname"
 		select="concat(concat(string('S'),position()),string('_substitution'))"/>
-  <xsl:value-of select="string('map&lt;string, wstring, Ltstr&gt; S')"/>
+  <xsl:value-of select="string('map&lt;string, UString&gt; S')"/>
   <xsl:value-of select="position()"/>
   <xsl:value-of select="string('_substitution;&#xA;&#xA;void S')"/>
   <xsl:value-of select="position()"/>
@@ -313,9 +305,9 @@ string get_tagName(string tag){
     <xsl:value-of select="$varname"/>
     <xsl:value-of select="string('[&quot;')"/>
     <xsl:value-of select="./@source"/>
-    <xsl:value-of select="string('&quot;] = L&quot;')"/>
+    <xsl:value-of select="string('&quot;] = &quot;')"/>
     <xsl:value-of select="./@target"/>
-    <xsl:value-of select="string('&quot;;')"/>
+    <xsl:value-of select="string('&quot;_u;')"/>
   </xsl:for-each>
 
   <xsl:value-of select="string('&#xA;}&#xA;')"/>
@@ -328,7 +320,7 @@ int get_index(string end_tag){
 
   for (int i=tags.size()-1; i >= 0; i--) {
     new_end_tag.clear();
-    utf8::utf32to8(tags[i].begin(), tags[i].end(), std::back_inserter(new_end_tag));
+    utf8::utf16to8(tags[i].begin(), tags[i].end(), std::back_inserter(new_end_tag));
 
     if (get_tagName(end_tag) == get_tagName(new_end_tag))
       return i;
@@ -338,15 +330,8 @@ int get_index(string end_tag){
 }
 
 void print_emptyTags() {
-  wchar_t tag[250];
-
   for (size_t i=0; i &lt; tags.size(); i++) {
-    swprintf(tag, 250, L"&lt;format-tag offset=\"%d\" order= \"%d\"&gt;&lt;![CDATA[", offsets[i], orders[i]);
-    fputws(tag, formatfile);
-    fputws(tags[i].c_str(), formatfile);
-    fputwc(L']', formatfile);
-    swprintf(tag, 250, L"]&gt;&lt;/format-tag&gt;\n");
-    fputws(tag, formatfile);
+    u_fprintf(formatfile, "&lt;format-tag offset=\"%d\" order= \"%d\"&gt;&lt;![CDATA[%S]&gt;&lt;/format-tag&gt;\n", offsets[i], orders[i], tags[i].c_str());
   }
 }
 </xsl:if>
@@ -355,14 +340,11 @@ void print_emptyTags() {
   <xsl:when test="$mode=string('matxin')">
 void printBuffer(int ind=-1, string end_tag="")
 {
-  wchar_t tag[250];
-  wstring etiketa;
-  wstring wend_tag;
+  UString etiketa;
+  UString wend_tag = to_ustring(end_tag.c_str());
   size_t pos;
   int num;
 
-  utf8::utf8to32(end_tag.begin(), end_tag.end(), std::back_inserter(wend_tag));
-
   if (ind != -1 &amp;&amp; ind == tags.size()-1 &amp;&amp;
       offsets[ind] == offset &amp;&amp; orders[ind] == current)
   {
@@ -372,7 +354,7 @@ void printBuffer(int ind=-1, string end_tag="")
     offsets.pop_back();
     orders.pop_back();
   }
-  else if (ind == -1 &amp;&amp; wend_tag != L"")
+  else if (ind == -1 &amp;&amp; !wend_tag.empty())
   {
     last = "buffer";
     buffer = buffer + wend_tag;
@@ -382,10 +364,9 @@ void printBuffer(int ind=-1, string end_tag="")
     // isEoh handling TODO matxin format
     if (hasWrite_dot &amp;&amp; isDot)
     {
-      swprintf(tag, 250, L"&lt;empty-tag offset=\"%d\"/&gt;\n", offset+1);
-      fputws(tag, formatfile);
+	  u_fprintf(formatfile, "&lt;empty-tag offset=\"%d\"/&gt;\n", offset+1);
 
-      fputws(L" .\n", yyout);
+      fputs(" .\n", yyout);
       offset += 2;
       hasWrite_dot = false;
     }
@@ -396,24 +377,22 @@ void printBuffer(int ind=-1, string end_tag="")
     {
       if (hasWrite_white)
       {
-        fputws(L" ", yyout);
+        fputs(" ", yyout);
         offset++;
         hasWrite_white = false;
       }
 
       current++;
 
-      swprintf(tag, 250, L"&lt;format-tag offset=\"%d\" order=\"%d\"&gt;&lt;![CDATA[", offset, current);
-      fputws(tag, formatfile);
-      while ((pos = buffer.find(L"]]&gt;")) != wstring::npos)
-        buffer.replace(pos, 3, L"\\]\\]\\&gt;");
-      fputws(buffer.c_str(), formatfile);
-      swprintf(tag, 250, L"]]&gt;&lt;/format-tag&gt;\n");
-      fputws(tag, formatfile);
+	  u_fprintf(formatfile, "&lt;format-tag offset=\"%d\" order=\"%d\"&gt;&lt;![CDATA[", offset, current);
+      while ((pos = buffer.find("]]&gt;")) != UString::npos)
+        buffer.replace(pos, 3, "\\]\\]\\&gt;"_u);
+      write(buffer, formatfile);
+	  u_fprintf(formatfile, "]]&gt;&lt;/format-tag&gt;\n");
     }
     else
     {
-      fputws(buffer.c_str(), yyout);
+	  put(buffer, yyout);
       offset += buffer.size();
     }
 
@@ -422,30 +401,27 @@ void printBuffer(int ind=-1, string end_tag="")
     {
       if (hasWrite_white)
       {
-        fputws(L" ", yyout);
+	    fputc(' ', yyout);
         offset++;
         hasWrite_white = false;
       }
 
-      num = swprintf(tag, 250, L"&lt;open-close-tag&gt;\n");
-      swprintf(tag + num, 250 - num, L"&lt;open-tag offset=\"%d\" order=\"%d\"&gt;&lt;![CDATA[", offsets[ind], orders[ind]);
-      fputws(tag, formatfile);
+	  u_fprintf(formatfile, "&lt;open-close-tag&gt;\n");
+	  u_fprintf(formatfile, "&lt;open-tag offset=\"%d\"order=\"%d\"&gt;&lt;![CDATA[", offsets[ind], orders[ind]);
       etiketa = tags[ind];
-      while ((pos = etiketa.find(L"]]&gt;")) != wstring::npos)
-        etiketa.replace(pos, 3, L"\\]\\]\\&gt;");
-      fputws(etiketa.c_str(), formatfile);
+      while ((pos = etiketa.find("]]&gt;"_u)) != UString::npos)
+        etiketa.replace(pos, 3, "\\]\\]\\&gt;"_u);
+      write(etiketa, formatfile);
 
       current++;
 
-      num = swprintf(tag, 250, L"]]&gt;&lt;/open-tag&gt;\n");
-      swprintf(tag + num, 250 - num, L"&lt;close-tag offset=\"%d\" order=\"%d\"&gt;&lt;![CDATA[", offset, current);
-      fputws(tag, formatfile);
-      while ((pos = wend_tag.find(L"]]&gt;")) != wstring::npos)
-        wend_tag.replace(pos, 3, L"\\]\\]\\&gt;");
-      fputws(wend_tag.c_str(), formatfile);
-      num = swprintf(tag, 250, L"]]&gt;&lt;/close-tag&gt;\n");
-      swprintf(tag + num, 250 - num, L"&lt;/open-close-tag&gt;\n");
-      fputws(tag, formatfile);
+      u_fprintf(formatfile, "]]&gt;&lt;/open-tag&gt;\n");
+      u_fprintf(formatfile, "&lt;close-tag offset=\"%d\" order=\"%d\"&gt;&lt;![CDATA[", offset, current);
+      while ((pos = wend_tag.find("]]&gt;"_u)) != UString::npos)
+        wend_tag.replace(pos, 3, "\\]\\]\\&gt;"_u);
+      write(wend_tag, formatfile);
+      u_fprintf(formatfile, "]]&gt;&lt;/close-tag&gt;\n");
+      u_fprintf(formatfile, "&lt;/open-close-tag&gt;\n");
 
       tags.erase(tags.begin() + ind);
       offsets.erase(offsets.begin() + ind);
@@ -454,7 +430,7 @@ void printBuffer(int ind=-1, string end_tag="")
 
 
     last = "buffer";
-    buffer = L"";
+    buffer.clear();
   }
 
 }
@@ -467,11 +443,11 @@ void preDot()
   {
     if(noDot)
     {
-      fputws_unlocked(L"[]", yyout);
+      fputs_unlocked("[]", yyout);
     }
     else
     {
-      fputws_unlocked(L".[]", yyout);
+      fputs_unlocked(".[]", yyout);
     }
   }
 }
@@ -480,66 +456,64 @@ void printBuffer()
 {
   if(isEoh &amp;&amp; markEoh)
   {
-    fputws_unlocked(L"[]\x2761", yyout);
+	put(u"[]\u2761", yyout);
     isEoh = false;
   }
   if(isDot &amp;&amp; !eosIncond)
   {
     if(noDot)
     {
-      fputws_unlocked(L"[]", yyout);
+      fputs_unlocked("[]", yyout);
     }
     else
     {
-      fputws_unlocked(L".[]", yyout);
+      fputs_unlocked(".[]", yyout);
     }
     isDot = false;
   }
   if(buffer.size() &gt; <xsl:value-of select="/format/options/largeblocks/@size"/>)
   {
     string filename = tmpnam(NULL);
-    FILE *largeblock = fopen(filename.c_str(), "wb");
-    fputws_unlocked(buffer.c_str(), largeblock);
-    fclose(largeblock);
+    UFILE *largeblock = u_fopen(filename.c_str(), "wb", NULL, NULL);
+    write(buffer, largeblock);
+    u_fclose(largeblock);
     preDot();
-    fputwc_unlocked(L'[', yyout);
-    fputwc_unlocked(L'@', yyout);
-    std::wstring cad;
-    utf8::utf8to32(filename.begin(), filename.end(), std::back_inserter(cad));
-    fputws_unlocked(cad.c_str(), yyout);
-    fputwc_unlocked(L']', yyout);
+    fputc_unlocked('[', yyout);
+    fputc_unlocked('@', yyout);
+    fputs_unlocked(filename.c_str(), yyout);
+    fputc_unlocked(']', yyout);
   }
   else if(buffer.size() &gt; 1)
   {
     preDot();
-    fputwc_unlocked(L'[', yyout);
-    wstring const tmp = escape(buffer);
-    if(tmp[0] == L'@')
+    fputc_unlocked('[', yyout);
+    UString const tmp = escape(buffer);
+    if(tmp[0] == '@')
     {
-      fputwc_unlocked(L'\\', yyout);
+      fputc_unlocked('\\', yyout);
     }
-    fputws_unlocked(tmp.c_str(), yyout);
-    fputwc_unlocked(L']', yyout);
+	put(tmp, yyout);
+    fputc_unlocked(']', yyout);
   }
-  else if(buffer.size() == 1 &amp;&amp; buffer[0] != L' ')
+  else if(buffer.size() == 1 &amp;&amp; buffer[0] != ' ')
   {
     preDot();
-    fputwc_unlocked(L'[', yyout);
-    wstring const tmp = escape(buffer);
-    if(tmp[0] == L'@')
+    fputc_unlocked('[', yyout);
+    UString const tmp = escape(buffer);
+    if(tmp[0] == '@')
     {
-      fputwc_unlocked(L'\\', yyout);
+      fputc_unlocked('\\', yyout);
     }
-    fputws_unlocked(tmp.c_str(), yyout);
+    put(tmp, yyout);
 
-    fputwc_unlocked(L']', yyout);
+    fputc_unlocked(']', yyout);
   }
   else
   {
-    fputws_unlocked(buffer.c_str(), yyout);
+    put(buffer, yyout);
   }
 
-  buffer = L"";
+  buffer.clear();
 }
   </xsl:otherwise>
 </xsl:choose>
@@ -618,9 +592,9 @@ void printBuffer()
   <xsl:value-of select="$varname"/>
   <xsl:value-of select="string('.find(yytext) != ')"/>
   <xsl:value-of select="$varname"/>
-  <xsl:value-of select="string('.end())&#xA;  {&#xA;    printBuffer();&#xA;    fputws_unlocked(')"/>
+  <xsl:value-of select="string('.end())&#xA;  {&#xA;    printBuffer();&#xA;    put(')"/>
   <xsl:value-of select="$varname"/>
-  <xsl:value-of select="string('[yytext].c_str(), yyout);&#xA;    offset+=')"/>
+  <xsl:value-of select="string('[yytext], yyout);&#xA;    offset+=')"/>
   <xsl:value-of select="$varname"/>
   <xsl:value-of select="string('[yytext].size();&#xA;')"/>
   <xsl:value-of select="string('    hasWrite_dot = hasWrite_white = true;&#xA;  }&#xA;  else&#xA;  {&#xA;')"/>
@@ -637,12 +611,11 @@ void printBuffer()
 
 <xsl:value-of select="./options/escape-chars/@regexp"/>&#x9;{
   printBuffer();
-  fputwc_unlocked(L'\\', yyout);
+  fputc_unlocked('\\', yyout);
   offset++;
   const char *mb = yytext;
-  wchar_t symbol = utf8::next(mb, mb+4);
-
-  fputwc_unlocked(symbol, yyout);
+  UChar32 symbol = utf8::next(mb, mb+4);
+  put(UString(1, symbol), yyout);
   offset++;
   hasWrite_dot = hasWrite_white = true;
 
@@ -654,9 +627,9 @@ void printBuffer()
 
   if (utf8::is_valid(symbuf.begin(), symbuf.end())) {
     const char *mb = symbuf.c_str();
-    wchar_t symbol = utf8::next(mb, mb+4);
+    UChar32 symbol = utf8::next(mb, mb+4);
     symbuf.clear();
-    fputwc_unlocked(symbol, yyout);
+	put(UString(1, symbol), yyout);
     offset++;
     hasWrite_dot = hasWrite_white = true;
   }
@@ -677,20 +650,20 @@ void usage(string const &amp;progname)
 {
 <xsl:choose>
   <xsl:when test="$mode=string('matxin')">
-  wcerr &lt;&lt; "USAGE: " &lt;&lt; progname &lt;&lt; " format_file [input_file [output_file]" &lt;&lt; ']' &lt;&lt; endl;
+  cerr &lt;&lt; "USAGE: " &lt;&lt; progname &lt;&lt; " format_file [input_file [output_file]" &lt;&lt; ']' &lt;&lt; endl;
   </xsl:when>
   <xsl:otherwise>
-  wcerr &lt;&lt; "USAGE: " &lt;&lt; progname &lt;&lt; " [ -h | -o | -i | -n ] [input_file [output_file]" &lt;&lt; ']' &lt;&lt; endl;
+  cerr &lt;&lt; "USAGE: " &lt;&lt; progname &lt;&lt; " [ -h | -o | -i | -n ] [input_file [output_file]" &lt;&lt; ']' &lt;&lt; endl;
   </xsl:otherwise>
 </xsl:choose>
-  wcerr &lt;&lt; "<xsl:value-of select="./@name"/> format processor " &lt;&lt; endl;
+  cerr &lt;&lt; "<xsl:value-of select="./@name"/> format processor " &lt;&lt; endl;
   exit(EXIT_SUCCESS);
 }
 
 int main(int argc, char *argv[])
 {
   LtLocale::tryToSetLocale();
-  size_t base = 0;
+  int base = 0;
   eosIncond = false;
 
   if(argc &gt;= 2)
@@ -740,7 +713,7 @@ int main(int argc, char *argv[])
         usage(argv[0]);
       }
     case 2:
-      formatfile = fopen(argv[1+base], "wb");
+      formatfile = u_fopen(argv[1+base], "wb", NULL, NULL);
       if(!formatfile)
       {
         usage(argv[0]);
@@ -751,35 +724,23 @@ int main(int argc, char *argv[])
   }
   </xsl:when>
   <xsl:otherwise>
- if((argc-base) &gt; 4)
-  {
+  if((argc-base) &gt; 4) {
     usage(argv[0]);
   }
-
-  switch(argc-base)
-  {
-    case 3:
-      yyout = fopen(argv[2+base], "wb");
-      if(!yyout)
-      {
-        usage(argv[0]);
-      }
-    case 2:
-      yyin = fopen(argv[1+base], "rb");
-      if(!yyin)
-      {
-        usage(argv[0]);
-      }
-      break;
-    default:
-      break;
+  if ((argc - base) == 3) {
+    yyout = fopen(argv[2 + base], "wb");
+	if (!yyout) {
+	  usage(argv[0]);
+	}
+  }
+  if ((argc - base) >= 2) {
+    yyin = fopen(argv[1 + base], "rb");
+    if (!yyin) {
+      usage(argv[0]);
+    }
   }
   </xsl:otherwise>
 </xsl:choose>
-#ifdef _MSC_VER
-  _setmode(_fileno(yyin), _O_U8TEXT);
-  _setmode(_fileno(yyout), _O_U8TEXT);
-#endif
   // prevent warning message
   yy_push_state(1);
   yy_top_state();
@@ -792,8 +753,8 @@ int main(int argc, char *argv[])
 </xsl:for-each>
 
 <xsl:if test="$mode=string('matxin')">
-  fputws(L"&lt;?xml version=\&quot;1.0\&quot; encoding=\&quot;UTF-8\&quot; ?>\n", formatfile);
-  fputws(L"&lt;format&gt;\n", formatfile);
+  write("&lt;?xml version=\&quot;1.0\&quot; encoding=\&quot;UTF-8\&quot; ?>\n"_u, formatfile);
+  write("&lt;format&gt;\n"_u, formatfile);
 </xsl:if>
 
   last.clear();
@@ -807,7 +768,7 @@ int main(int argc, char *argv[])
 
 <xsl:if test="$mode=string('matxin')">
   print_emptyTags();
-  fputws(L"&lt;/format&gt;", formatfile);
+  write("&lt;/format&gt;"_u, formatfile);
   fclose(formatfile);
 </xsl:if>
   fclose(yyin);
diff --git a/apertium/deserialiser.h b/apertium/deserialiser.h
index ae40972..2f90ea2 100644
--- a/apertium/deserialiser.h
+++ b/apertium/deserialiser.h
@@ -90,13 +90,13 @@ i Deserialiser<i>::deserialise(std::istream &Stream_) {
 
 Lemma Deserialiser<Lemma>::deserialise(std::istream &Stream_) {
   Lemma StreamedType_;
-  StreamedType_.TheLemma = Deserialiser<std::wstring>::deserialise(Stream_);
+  StreamedType_.TheLemma = Deserialiser<UString>::deserialise(Stream_);
   return StreamedType_;
 }
 
 Morpheme Deserialiser<Morpheme>::deserialise(std::istream &Stream_) {
   Morpheme SerialisedType_;
-  SerialisedType_.TheLemma = Deserialiser<std::wstring>::deserialise(Stream_);
+  SerialisedType_.TheLemma = Deserialiser<UString>::deserialise(Stream_);
   SerialisedType_.TheTags =
       Deserialiser<std::vector<Tag> >::deserialise(Stream_);
   return SerialisedType_;
@@ -104,7 +104,7 @@ Morpheme Deserialiser<Morpheme>::deserialise(std::istream &Stream_) {
 
 Tag Deserialiser<Tag>::deserialise(std::istream &Stream_) {
   Tag SerialisedType_;
-  SerialisedType_.TheTag = Deserialiser<std::wstring>::deserialise(Stream_);
+  SerialisedType_.TheTag = Deserialiser<UString>::deserialise(Stream_);
   return SerialisedType_;
 }
 
diff --git a/apertium/exception.h b/apertium/exception.h
index 2bda473..3b97a76 100644
--- a/apertium/exception.h
+++ b/apertium/exception.h
@@ -27,9 +27,8 @@ namespace Exception {
     EXCEPTION_TYPE(const char *const what_) : ExceptionType(what_) {}          \
     EXCEPTION_TYPE(const std::string &what_) : ExceptionType(what_) {}         \
     EXCEPTION_TYPE(const std::stringstream &what_) : ExceptionType(what_) {}   \
-    EXCEPTION_TYPE(const wchar_t *const what_) : ExceptionType(what_) {}       \
-    EXCEPTION_TYPE(const std::wstring &what_) : ExceptionType(what_) {}        \
-    EXCEPTION_TYPE(const std::wstringstream &what_) : ExceptionType(what_) {}  \
+    EXCEPTION_TYPE(const UChar *const what_) : ExceptionType(what_) {}         \
+    EXCEPTION_TYPE(const UString &what_) : ExceptionType(what_) {}             \
     ~EXCEPTION_TYPE() throw() {}                                               \
   };
 
diff --git a/apertium/exception_type.cc b/apertium/exception_type.cc
index 0f32b45..7c1eec8 100644
--- a/apertium/exception_type.cc
+++ b/apertium/exception_type.cc
@@ -15,34 +15,31 @@
 
 #include "exception_type.h"
 
-#include "utf_converter.h"
 #include <sstream>
 #include <string>
 
 namespace Apertium {
-ExceptionType::ExceptionType(const char *const what_) : what_(what_) {}
+ExceptionType::ExceptionType(const char *const what_)
+  : what_(to_ustring(what_)) {}
 
-ExceptionType::ExceptionType(const std::string &what_) : what_(what_) {}
+ExceptionType::ExceptionType(const std::string &what_)
+  : what_(to_ustring(what_.c_str())) {}
 
 ExceptionType::ExceptionType(const std::stringstream &what_)
-    : what_(what_.str()) {}
+  : what_(to_ustring(what_.str().c_str())) {}
 
-ExceptionType::ExceptionType(const wchar_t *const what_)
-{
-  this->what_ = UtfConverter::toUtf8(what_);
-}
+ExceptionType::ExceptionType(const UChar *const what_)
+  : what_(what_) {}
 
-ExceptionType::ExceptionType(const std::wstring &what_)
-{
-  this->what_ = UtfConverter::toUtf8(what_);
-}
-
-ExceptionType::ExceptionType(const std::wstringstream &what_)
-{
-  this->what_ = UtfConverter::toUtf8(what_.str());
-}
+ExceptionType::ExceptionType(const UString &what_)
+  : what_(what_) {}
 
 ExceptionType::~ExceptionType() throw() {}
 
-const char *ExceptionType::what() const throw() { return what_.c_str(); }
+const char *ExceptionType::what() const throw()
+{
+  std::string res;
+  utf8::utf16to8(what_.begin(), what_.end(), std::back_inserter(res));
+  return res.c_str();
+}
 }
diff --git a/apertium/exception_type.h b/apertium/exception_type.h
index a780b75..9ee46ac 100644
--- a/apertium/exception_type.h
+++ b/apertium/exception_type.h
@@ -19,6 +19,7 @@
 #include <exception>
 #include <sstream>
 #include <string>
+#include <lttoolbox/ustring.h>
 
 namespace Apertium {
 class ExceptionType : public std::exception {
@@ -26,14 +27,13 @@ public:
   ExceptionType(const char *const what_);
   ExceptionType(const std::string &what_);
   ExceptionType(const std::stringstream &what_);
-  ExceptionType(const wchar_t *wchar_t_what_);
-  ExceptionType(const std::wstring &wchar_t_what_);
-  ExceptionType(const std::wstringstream &wchar_t_what_);
+  ExceptionType(const UChar *wchar_t_what_);
+  ExceptionType(const UString &wchar_t_what_);
   virtual ~ExceptionType() throw() = 0;
   const char *what() const throw();
 
 protected:
-  std::string what_;
+  UString what_;
 };
 }
 
diff --git a/apertium/feature_vec.cc b/apertium/feature_vec.cc
index fc95d10..1119134 100644
--- a/apertium/feature_vec.cc
+++ b/apertium/feature_vec.cc
@@ -68,7 +68,7 @@ operator<<(OStream & out, FeatureVec const &fv)
     out << std::dec << (int)(*(bc_it++))[0] << "; ";
     for (;bc_it != feat_it->first.end(); bc_it++)
     {
-      out << bc_it->c_str();
+      out << *bc_it;
       if (bc_it + 1 != feat_it->first.end())
       {
         out << ", ";
@@ -79,9 +79,6 @@ operator<<(OStream & out, FeatureVec const &fv)
   return out;
 }
 
-template std::wostream&
-operator<<(std::wostream& out, FeatureVec const &fv);
-
 template std::ostream&
 operator<<(std::ostream& out, FeatureVec const &fv);
 
diff --git a/apertium/feature_vec.h b/apertium/feature_vec.h
index a4dcd6a..0df6b69 100644
--- a/apertium/feature_vec.h
+++ b/apertium/feature_vec.h
@@ -6,6 +6,7 @@
 #include <string>
 #include <utility>
 #include <iostream>
+#include <lttoolbox/ustring.h>
 
 namespace Apertium {
 
diff --git a/apertium/file_morpho_stream.cc b/apertium/file_morpho_stream.cc
index 5040216..82d264f 100644
--- a/apertium/file_morpho_stream.cc
+++ b/apertium/file_morpho_stream.cc
@@ -21,34 +21,33 @@
  */
 
 #include <apertium/file_morpho_stream.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include "apertium_config.h"
 #include <apertium/unlocked_cstdio.h>
 
-using namespace Apertium;
-FileMorphoStream::FileMorphoStream(FILE *ftxt, bool d, TaggerData *t) :
+FileMorphoStream::FileMorphoStream(const char* ftxt, bool d, TaggerData *t) :
     ms() {
   foundEOF = false;
   debug=d;
   td = t;
   me = td->getPatternList().newMatchExe();
   alphabet = td->getPatternList().getAlphabet();
-  input = ftxt;
+  input.open(ftxt);
   ca_any_char = alphabet(PatternList::ANY_CHAR);
   ca_any_tag = alphabet(PatternList::ANY_TAG);
 
   ConstantManager &constants = td->getConstants();
-  ca_kignorar = constants.getConstant(L"kIGNORAR");
-  ca_kbarra = constants.getConstant(L"kBARRA");
-  ca_kdollar = constants.getConstant(L"kDOLLAR");
-  ca_kbegin = constants.getConstant(L"kBEGIN");
-  ca_kmot = constants.getConstant(L"kMOT");
-  ca_kmas = constants.getConstant(L"kMAS");
-  ca_kunknown = constants.getConstant(L"kUNKNOWN");
+  ca_kignorar = constants.getConstant("kIGNORAR"_u);
+  ca_kbarra = constants.getConstant("kBARRA"_u);
+  ca_kdollar = constants.getConstant("kDOLLAR"_u);
+  ca_kbegin = constants.getConstant("kBEGIN"_u);
+  ca_kmot = constants.getConstant("kMOT"_u);
+  ca_kmas = constants.getConstant("kMAS"_u);
+  ca_kunknown = constants.getConstant("kUNKNOWN"_u);
 
-  map<wstring, int, Ltstr> &tag_index = td->getTagIndex();
-  ca_tag_keof = tag_index[L"TAG_kEOF"];
-  ca_tag_kundef = tag_index[L"TAG_kUNDEF"];
+  map<UString, int> &tag_index = td->getTagIndex();
+  ca_tag_keof = tag_index["TAG_kEOF"_u];
+  ca_tag_kundef = tag_index["TAG_kUNDEF"_u];
 
   end_of_file = false;
   null_flush = false;
@@ -69,7 +68,7 @@ FileMorphoStream::get_next_word()
 
     if(word->isAmbiguous())
     {
-      vector<wstring> &ref = td->getDiscardRules();
+      vector<UString> &ref = td->getDiscardRules();
       for(unsigned int i = 0; i < ref.size(); i++)
       {
         word->discardOnAmbiguity(ref[i]);
@@ -79,7 +78,7 @@ FileMorphoStream::get_next_word()
     return word;
   }
 
-  if(feof(input))
+  if(input.eof())
   {
     return NULL;
   }
@@ -89,77 +88,68 @@ FileMorphoStream::get_next_word()
 
   while(true)
   {
-    int symbol = fgetwc_unlocked(input);
-    if(feof(input) || (null_flush && symbol == L'\0'))
+    UChar32 symbol = input.get();
+    if(input.eof() || (null_flush && symbol == '\0'))
     {
       end_of_file = true;
-      vwords[ivwords]->add_tag(ca_tag_keof, L"", td->getPreferRules());
+      vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
       return get_next_word();
     }
-    if(symbol == L'^')
+    if(symbol == '^')
     {
       readRestOfWord(ivwords);
       return get_next_word();
     }
     else
     {
-      wstring str = L"";
-      if(symbol == L'\\')
+      UString str = ""_u;
+      if(symbol == '\\')
       {
-        symbol = fgetwc_unlocked(input);
-        str += L'\\';
-        str += static_cast<wchar_t>(symbol);
-        symbol = L'\\';
+        symbol = input.get();
+        str += '\\';
+        str += symbol;
+        symbol = '\\';
       }
       else
       {
-        str += static_cast<wchar_t>(symbol);
+        str += symbol;
       }
 
-      while(symbol != L'^')
+      while(symbol != '^')
       {
-	symbol = fgetwc_unlocked(input);
-	if(feof(input) || (null_flush && symbol == L'\0'))
-	{
-	  end_of_file = true;
-	  vwords[ivwords]->add_ignored_string(str);
-          vwords[ivwords]->add_tag(ca_tag_keof, L"", td->getPreferRules());
-	  return get_next_word();
-	}
-	else if(symbol == L'\\')
-	{
-	  str += L'\\';
-          symbol = fgetwc_unlocked(input);
-	  if(feof(input) || (null_flush && symbol == L'\0'))
-	  {
-	    end_of_file = true;
-	    vwords[ivwords]->add_ignored_string(str);
-            vwords[ivwords]->add_tag(ca_tag_keof, L"", td->getPreferRules());
-	    return get_next_word();
-	  }
-	  str += static_cast<wchar_t>(symbol);
-	  symbol = L'\\';
-	}
-	else if(symbol == L'^')
-	{
-	  if(str.size() > 0)
-	  {
-	    vwords[ivwords]->add_ignored_string(str);
+        symbol = input.get();
+        if(input.eof() || (null_flush && symbol == '\0')) {
+          end_of_file = true;
+          vwords[ivwords]->add_ignored_string(str);
+          vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
+          return get_next_word();
+        } else if(symbol == '\\') {
+          str += '\\';
+          symbol = input.get();
+          if(input.eof() || (null_flush && symbol == '\0')) {
+            end_of_file = true;
+            vwords[ivwords]->add_ignored_string(str);
+            vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
+            return get_next_word();
           }
-	  readRestOfWord(ivwords);
-	  return get_next_word();
-	}
-        else
-	{
-	  str += static_cast<wchar_t>(symbol);
-	}
+          str += symbol;
+          symbol = '\\';
+        } else if(symbol == '^') {
+          if(str.size() > 0) {
+            vwords[ivwords]->add_ignored_string(str);
+          }
+          readRestOfWord(ivwords);
+          return get_next_word();
+        } else {
+          str += symbol;
+        }
       }
     }
   }
 }
 
 void
-FileMorphoStream::lrlmClassify(wstring const &str, int &ivwords)
+FileMorphoStream::lrlmClassify(UString const &str, int &ivwords)
 {
   int floor = 0;
   int last_type = -1;
@@ -168,9 +158,9 @@ FileMorphoStream::lrlmClassify(wstring const &str, int &ivwords)
   ms.init(me->getInitial());
   for(int i = 0, limit = str.size(); i != limit; i++)
   {
-    if(str[i] != L'<')
+    if(str[i] != '<')
     {
-      if(str[i] == L'+')
+      if(str[i] == '+')
       {
         int val = ms.classifyFinals(me->getFinals());
         if(val != -1)
@@ -179,18 +169,18 @@ FileMorphoStream::lrlmClassify(wstring const &str, int &ivwords)
           last_type = val;
         }
       }
-      ms.step(towlower(str[i]), ca_any_char);
+      ms.step(u_tolower(str[i]), ca_any_char);
     }
     else
     {
-      wstring tag = L"";
+      UString tag;
       for(int j = i+1; j != limit; j++)
       {
-        if(str[j] == L'\\')
+        if(str[j] == '\\')
         {
  	  j++;
         }
-        else if(str[j] == L'>')
+        else if(str[j] == '>')
         {
  	  tag = str.substr(i, j-i+1);
 	  i = j;
@@ -216,7 +206,7 @@ FileMorphoStream::lrlmClassify(wstring const &str, int &ivwords)
         vwords[ivwords]->add_tag(last_type,
                                  str.substr(floor, last_pos - floor + 1),
                                  td->getPreferRules());
-	if(str[last_pos+1] == L'+' && last_pos+1 < limit )
+	if(str[last_pos+1] == '+' && last_pos+1 < limit )
 	{
 	  floor = last_pos + 1;
 	  last_pos = floor + 1;
@@ -232,8 +222,8 @@ FileMorphoStream::lrlmClassify(wstring const &str, int &ivwords)
       {
         if (debug)
         {
-	  wcerr<<L"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<L"'\n";
-          wcerr<<L"         This is because of an incomplete tagset definition or a dictionary error\n";
+	  cerr<<"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<"'\n";
+          cerr<<"         This is because of an incomplete tagset definition or a dictionary error\n";
 	}
         vwords[ivwords]->add_tag(ca_tag_kundef, str.substr(floor) , td->getPreferRules());
 	return;
@@ -248,7 +238,7 @@ FileMorphoStream::lrlmClassify(wstring const &str, int &ivwords)
 	  vwords[ivwords]->add_tag(last_type,
                                    str.substr(floor, last_pos - floor + 1),
                                    td->getPreferRules());
-          if(str[last_pos+1] == L'+' && last_pos+1 < limit )
+          if(str[last_pos+1] == '+' && last_pos+1 < limit )
           {
             floor = last_pos + 1;
 	    last_pos = floor;
@@ -264,8 +254,8 @@ FileMorphoStream::lrlmClassify(wstring const &str, int &ivwords)
         {
           if (debug)
           {
-	    wcerr<<L"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<L"'\n";
-            wcerr<<L"         This is because of an incomplete tagset definition or a dictionary error\n";
+	    cerr<<"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<"'\n";
+            cerr<<"         This is because of an incomplete tagset definition or a dictionary error\n";
 	  }
           vwords[ivwords]->add_tag(ca_tag_kundef, str.substr(floor) , td->getPreferRules());
 	  return;
@@ -280,8 +270,8 @@ FileMorphoStream::lrlmClassify(wstring const &str, int &ivwords)
     val = ca_tag_kundef;
     if (debug)
     {
-      wcerr<<L"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<L"'\n";
-      wcerr<<L"         This is because of an incomplete tagset definition or a dictionary error\n";
+      cerr<<"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<"'\n";
+      cerr<<"         This is because of an incomplete tagset definition or a dictionary error\n";
     }
 
   }
@@ -292,45 +282,45 @@ void
 FileMorphoStream::readRestOfWord(int &ivwords)
 {
   // first we have the superficial form
-  wstring  str = L"";
+  UString str;
 
   while(true)
   {
-    int symbol = fgetwc_unlocked(input);
-    if(feof(input) || (null_flush && symbol == L'\0'))
+    UChar32 symbol = input.get();
+    if(input.eof() || (null_flush && symbol == '\0'))
     {
       end_of_file = true;
       if(str.size() > 0)
       {
         vwords[ivwords]->add_ignored_string(str);
-        wcerr<<L"Warning (internal): kIGNORE was returned while reading a word\n";
-        wcerr<<L"Word being read: "<<vwords[ivwords]->get_superficial_form()<<L"\n";
-        wcerr<<L"Debug: "<< str <<L"\n";
+        cerr<<"Warning (internal): kIGNORE was returned while reading a word\n";
+        cerr<<"Word being read: "<<vwords[ivwords]->get_superficial_form()<<"\n";
+        cerr<<"Debug: "<< str <<"\n";
       }
-      vwords[ivwords]->add_tag(ca_tag_keof, L"", td->getPreferRules());
+      vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
       return;
     }
-    else if(symbol == L'\\')
+    else if(symbol == '\\')
     {
-      symbol = fgetwc_unlocked(input);
-      str += L'\\';
-      str += static_cast<wchar_t>(symbol);
+      symbol = input.get();
+      str += '\\';
+      str += symbol;
     }
-    else if(symbol == L'/')
+    else if(symbol == '/')
     {
       vwords[ivwords]->set_superficial_form(str);
-      str = L"";
+      str.clear();
       break;
     }
-    else if(symbol == L'$')
+    else if(symbol == '$')
     {
       vwords[ivwords]->set_superficial_form(str);
-      vwords[ivwords]->add_ignored_string(L"$");
+      vwords[ivwords]->add_ignored_string("$"_u);
       break;
     }
     else
     {
-      str += static_cast<wchar_t>(symbol);
+      str += symbol;
     }
   }
 
@@ -338,45 +328,45 @@ FileMorphoStream::readRestOfWord(int &ivwords)
 
   while(true)
   {
-    int symbol = fgetwc_unlocked(input);
-    if(feof(input) || (null_flush && symbol == L'\0'))
+    UChar32 symbol = input.get();
+    if(input.eof() || (null_flush && symbol == '\0'))
     {
       end_of_file = true;
       if(str.size() > 0)
       {
         vwords[ivwords]->add_ignored_string(str);
-        wcerr<<L"Warning (internal): kIGNORE was returned while reading a word\n";
-        wcerr<<L"Word being read: "<<vwords[ivwords]->get_superficial_form()<<L"\n";
-        wcerr<<L"Debug: "<< str <<L"\n";
+        cerr<<"Warning (internal): kIGNORE was returned while reading a word\n";
+        cerr<<"Word being read: "<<vwords[ivwords]->get_superficial_form()<<"\n";
+        cerr<<"Debug: "<< str <<"\n";
       }
-      vwords[ivwords]->add_tag(ca_tag_keof, L"", td->getPreferRules());
+      vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
       return;
     }
-    else if(symbol == L'\\')
+    else if(symbol == '\\')
     {
-      symbol = fgetwc_unlocked(input);
-      str += L'\\';
-      str += static_cast<wchar_t>(symbol);
-      symbol = L'\\';  // to prevent exiting with '\$'
+      symbol = input.get();
+      str += '\\';
+      str += symbol;
+      symbol = '\\';  // to prevent exiting with '\$'
     }
-    else if(symbol == L'/')
+    else if(symbol == '/')
     {
       lrlmClassify(str, ivwords);
-      str = L"";
+      str.clear();
       ivwords = 0;
       continue;
     }
-    else if(symbol == L'$')
+    else if(symbol == '$')
     {
-      if(str[0] != L'*')// do nothing with unknown words
+      if(str[0] != '*')// do nothing with unknown words
       {
-	lrlmClassify(str, ivwords);
+        lrlmClassify(str, ivwords);
       }
       return;
     }
     else
     {
-      str += static_cast<wchar_t>(symbol);
+      str += symbol;
     }
   }
 }
@@ -402,6 +392,6 @@ FileMorphoStream::setEndOfFile(bool eof)
 void
 FileMorphoStream::rewind()
 {
-  std::fseek(input, 0, SEEK_SET);
+  input.rewind();
   end_of_file = false;
 }
diff --git a/apertium/file_morpho_stream.h b/apertium/file_morpho_stream.h
index 3d40802..fdf8871 100644
--- a/apertium/file_morpho_stream.h
+++ b/apertium/file_morpho_stream.h
@@ -29,6 +29,7 @@
 #include <apertium/tagger_data.h>
 #include <apertium/tagger_word.h>
 #include <apertium/morpho_stream.h>
+#include <lttoolbox/input_file.h>
 
 #include <cstdio>
 #include <deque>
@@ -47,9 +48,9 @@ using namespace std;
 class FileMorphoStream : public MorphoStream {
 private:
   bool foundEOF;
-  wstring last_string_tag;
+  UString last_string_tag;
   bool debug;
-  FILE *input;
+  InputFile input;
   int ca_any_char;
   int ca_any_tag;
   int ca_kignorar;
@@ -74,13 +75,13 @@ private:
   bool end_of_file;
 
   void readRestOfWord(int &ivwords);
-  void lrlmClassify(wstring const &str, int &ivwords);
+  void lrlmClassify(UString const &str, int &ivwords);
 public:
 
    /** Constructor
     *  @param is the input stream.
     */
-   FileMorphoStream(FILE *ftxt, bool d, TaggerData *t);
+   FileMorphoStream(const char* ftxt, bool d, TaggerData *t);
 
    /**
     *  Destructor
diff --git a/apertium/file_tagger.cc b/apertium/file_tagger.cc
index cdce82c..f272a72 100644
--- a/apertium/file_tagger.cc
+++ b/apertium/file_tagger.cc
@@ -40,8 +40,8 @@ void FILE_Tagger::setNullFlush(const bool &NullFlush) {
   TheFlags.setNullFlush(NullFlush);
 }
 
-void FILE_Tagger::tagger(FILE *Input, FILE *Output) {
-  FileMorphoStream morpho_stream(Input, TheFlags.getDebug(), &get_tagger_data());
+void FILE_Tagger::tagger(const char* input_file, UFILE *Output) {
+  FileMorphoStream morpho_stream(input_file, TheFlags.getDebug(), &get_tagger_data());
 
   tagger(morpho_stream, Output);
 }
@@ -51,13 +51,13 @@ void FILE_Tagger::init_and_train(MorphoStream &lexmorfo, unsigned long count) {
   train(lexmorfo, count);
 }
 
-void FILE_Tagger::init_and_train(FILE *corpus, unsigned long count) {
-  init_probabilities_kupiec_(corpus);
-  train(corpus, count);
+void FILE_Tagger::init_and_train(const char* corpus_file, unsigned long count) {
+  init_probabilities_kupiec_(corpus_file);
+  train(corpus_file, count);
 }
 
-void FILE_Tagger::train(FILE *corpus, unsigned long count) {
-  FileMorphoStream lexmorfo(corpus, true, &get_tagger_data());
+void FILE_Tagger::train(const char* corpus_file, unsigned long count) {
+  FileMorphoStream lexmorfo(corpus_file, true, &get_tagger_data());
   train(lexmorfo, count);
 }
 
@@ -67,19 +67,20 @@ void FILE_Tagger::deserialise(string const &TaggerSpecificationFilename) {
   deserialise(TaggerSpecificationReader_.getTaggerData());
 }
 
-void FILE_Tagger::init_probabilities_from_tagged_text_(FILE *TaggedCorpus,
-                                                       FILE *Corpus) {
-  FileMorphoStream stream_tagged(TaggedCorpus, true, &get_tagger_data());
-  FileMorphoStream stream_untagged(Corpus, true, &get_tagger_data());
+void FILE_Tagger::init_probabilities_from_tagged_text_(
+         const char* tagged_file, const char* untagged_file)
+{
+  FileMorphoStream stream_tagged(tagged_file, true, &get_tagger_data());
+  FileMorphoStream stream_untagged(untagged_file, true, &get_tagger_data());
   init_probabilities_from_tagged_text_(stream_tagged, stream_untagged);
 }
 
-void FILE_Tagger::init_probabilities_kupiec_(FILE *Corpus) {
-  FileMorphoStream lexmorfo(Corpus, true, &get_tagger_data());
+void FILE_Tagger::init_probabilities_kupiec_(const char* corpus_file) {
+  FileMorphoStream lexmorfo(corpus_file, true, &get_tagger_data());
   init_probabilities_kupiec_(lexmorfo);
 }
 
-void FILE_Tagger::read_dictionary(FILE *fdic) {
+void FILE_Tagger::read_dictionary(const char* fdic) {
   tagger_utils::scan_for_ambg_classes(fdic, get_tagger_data());
   tagger_utils::add_neccesary_ambg_classes(get_tagger_data());
   post_ambg_class_scan();
diff --git a/apertium/file_tagger.h b/apertium/file_tagger.h
index dc9a543..00496e9 100644
--- a/apertium/file_tagger.h
+++ b/apertium/file_tagger.h
@@ -19,6 +19,8 @@
 #include <apertium/tagger_flags.h>
 #include <apertium/tagger_data.h>
 #include <apertium/morpho_stream.h>
+#include <unicode/ustdio.h>
+#include <lttoolbox/input_file.h>
 
 #include <cstdio>
 #include <string>
@@ -34,29 +36,29 @@ public:
   void set_debug(const bool &Debug);
   void set_show_sf(const bool &ShowSuperficial);
   void setNullFlush(const bool &NullFlush);
-  virtual void tagger(FILE *Input, FILE *Output);
-  virtual void tagger(MorphoStream &morpho_stream, FILE *Output) = 0;
-  virtual std::vector<std::wstring> &getArrayTags() = 0;
+  virtual void tagger(const char* input_file, UFILE* Output);
+  virtual void tagger(MorphoStream &morpho_stream, UFILE* Output) = 0;
+  virtual std::vector<UString> &getArrayTags() = 0;
   void init_and_train(MorphoStream &lexmorfo, unsigned long Count);
-  void init_and_train(FILE *Corpus, unsigned long Count);
-  virtual void train(FILE *Corpus, unsigned long Count);
+  void init_and_train(const char* corpus_file, unsigned long Count);
+  virtual void train(const char* corpus_file, unsigned long Count);
   virtual void train(MorphoStream &lexmorpho, unsigned long count) = 0;
   virtual void train(MorphoStream &lexmorpho) = 0;
   virtual void serialise(FILE *Stream_) = 0;
   void deserialise(string const &TaggerSpecificationFilename);
   virtual void init_probabilities_from_tagged_text_(
-      FILE *TaggedCorpus, FILE *Corpus);
+      const char* tagged_file, const char* untagged_file);
   virtual void init_probabilities_from_tagged_text_(
       MorphoStream &stream_tagged,
       MorphoStream &stream_untagged) = 0;
-  virtual void init_probabilities_kupiec_(FILE *Corpus);
+  virtual void init_probabilities_kupiec_(const char* corpus_file);
   virtual void init_probabilities_kupiec_(MorphoStream &lexmorfo) = 0;
 
   /** It reads the expanded dictionary received as a parameter and calculates
    *  the set of ambiguity classes that the tagger will manage.
-   *  @param is the input stream with the expanded dictionary to read
+   *  @param is the filename of expanded dictionary to read (or NULL for stdin)
    */
-  void read_dictionary(FILE *is);
+  void read_dictionary(const char* is);
 
   virtual TaggerData& get_tagger_data() = 0;
 
diff --git a/apertium/gen_modes.cc b/apertium/gen_modes.cc
index 1dc7f8f..6dbb78c 100644
--- a/apertium/gen_modes.cc
+++ b/apertium/gen_modes.cc
@@ -21,8 +21,7 @@
 #include <cstdlib>
 #include <iostream>
 #include <fstream>
-#include "string_utils.h"
-#include "utf_converter.h"
+#include <lttoolbox/string_utils.h>
 #include <libgen.h>
 #include <getopt.h>
 #include <libxml/xmlreader.h>
@@ -32,7 +31,6 @@
 #include <string>
 #include <set>
 
-using namespace Apertium;
 using namespace std;
 
 void endProgram(char *name)
diff --git a/apertium/hmm.cc b/apertium/hmm.cc
index 9285217..3dd9414 100644
--- a/apertium/hmm.cc
+++ b/apertium/hmm.cc
@@ -30,7 +30,7 @@
 #include <unistd.h>
 #include <vector>
 #include <algorithm>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include <apertium/file_morpho_stream.h>
 
 inline bool p_isnan(double v) {
@@ -58,10 +58,10 @@ TaggerData& HMM::get_tagger_data() {
 
 void HMM::deserialise(FILE *Serialised_FILE_Tagger) {
   tdhmm.read(Serialised_FILE_Tagger);
-  eos = (tdhmm.getTagIndex())[L"TAG_SENT"];
+  eos = (tdhmm.getTagIndex())["TAG_SENT"_u];
 }
 
-std::vector<std::wstring> &HMM::getArrayTags() {
+std::vector<UString> &HMM::getArrayTags() {
   return tdhmm.getArrayTags();
 }
 
@@ -69,7 +69,7 @@ void HMM::serialise(FILE *Stream_) { tdhmm.write(Stream_); }
 
 void HMM::deserialise(const TaggerData &Deserialised_FILE_Tagger) {
   tdhmm = TaggerDataHMM(Deserialised_FILE_Tagger);
-  eos = (tdhmm.getTagIndex())[L"TAG_SENT"];
+  eos = (tdhmm.getTagIndex())["TAG_SENT"_u];
 }
 
 void HMM::init_probabilities_from_tagged_text_(MorphoStream &stream_tagged,
@@ -99,7 +99,7 @@ HMM::HMM(TaggerFlags& Flags_) : FILE_Tagger(Flags_) {}
 HMM::HMM(TaggerDataHMM _tdhmm)
   : tdhmm(_tdhmm)
 {
-  eos = (tdhmm.getTagIndex())[L"TAG_SENT"];
+  eos = (tdhmm.getTagIndex())["TAG_SENT"_u];
 }
 
 HMM::HMM(TaggerDataHMM *tdhmm) : tdhmm(*tdhmm) {}
@@ -193,7 +193,7 @@ HMM::init_probabilities_kupiec(MorphoStream &lexmorfo)
   //We count for each ambiguity class the number of ocurrences
   word = lexmorfo.get_next_word();
   while((word)) {
-    if (++nw%10000==0) wcerr<<L'.'<<flush;
+    if (++nw%10000==0) cerr<<'.'<<flush;
 
     tags=word->get_tags();
 
@@ -265,7 +265,7 @@ HMM::init_probabilities_kupiec(MorphoStream &lexmorfo)
       }
     }
   }
-  wcerr<<L"\n";
+  cerr<<"\n";
 }
 
 
@@ -291,30 +291,30 @@ HMM::init_probabilities_from_tagged_text(MorphoStream &stream_tagged,
   word_tagged = stream_tagged.get_next_word();
   word_untagged = stream_untagged.get_next_word();
   while(word_tagged) {
-    wcerr<<*word_tagged;
-    wcerr<<L" -- "<<*word_untagged<<L"\n";
+    cerr<<*word_tagged;
+    cerr<<" -- "<<*word_untagged<<"\n";
 
     if (word_tagged->get_superficial_form()!=word_untagged->get_superficial_form()) {
-      wcerr<<L"\nTagged text (.tagged) and analyzed text (.untagged) streams are not aligned.\n";
-      wcerr<<L"Take a look at tagged text (.tagged).\n";
-      wcerr<<L"Perhaps this is caused by a multiword unit that is not a multiword unit in one of the two files.\n";
-      wcerr<<*word_tagged<<L" -- "<<*word_untagged<<L"\n";
+      cerr<<"\nTagged text (.tagged) and analyzed text (.untagged) streams are not aligned.\n";
+      cerr<<"Take a look at tagged text (.tagged).\n";
+      cerr<<"Perhaps this is caused by a multiword unit that is not a multiword unit in one of the two files.\n";
+      cerr<<*word_tagged<<" -- "<<*word_untagged<<"\n";
       exit(1);
     }
 
-    if (++nw%100==0) wcerr<<L'.'<<flush;
+    if (++nw%100==0) cerr<<'.'<<flush;
 
     tag2 = tag1;
 
     if (word_untagged==NULL) {
-      wcerr<<L"word_untagged==NULL\n";
+      cerr<<"word_untagged==NULL\n";
       exit(1);
     }
 
     if (word_tagged->get_tags().size()==0) // Unknown word
       tag1 = -1;
     else if (word_tagged->get_tags().size()>1) // Ambiguous word
-      wcerr<<L"Error in tagged text. An ambiguous word was found: "<<word_tagged->get_superficial_form()<<L"\n";
+      cerr<<"Error in tagged text. An ambiguous word was found: "<<word_tagged->get_superficial_form()<<"\n";
     else
       tag1 = *(word_tagged->get_tags()).begin();
 
@@ -368,7 +368,7 @@ HMM::init_probabilities_from_tagged_text(MorphoStream &stream_tagged,
     }
    }
 
-  wcerr<<L"\n";
+  cerr<<"\n";
 }
 
 void
@@ -416,15 +416,15 @@ void
 HMM::post_ambg_class_scan() {
   int N = (tdhmm.getTagIndex()).size();
   int M = (tdhmm.getOutput()).size();
-  wcerr << N << L" states and " << M <<L" ambiguity classes\n";
+  cerr << N << " states and " << M <<" ambiguity classes\n";
 
   tdhmm.setProbabilities(N, M);
 }
 
 void
-HMM::filter_ambiguity_classes(FILE *in, FILE *out) {
+HMM::filter_ambiguity_classes(const char* input_file, UFILE* out) {
   set<set<TTag> > ambiguity_classes;
-  FileMorphoStream morpho_stream(in, true, &tdhmm);
+  FileMorphoStream morpho_stream(input_file, true, &tdhmm);
 
   TaggerWord *word = morpho_stream.get_next_word();
 
@@ -434,7 +434,7 @@ HMM::filter_ambiguity_classes(FILE *in, FILE *out) {
       if(ambiguity_classes.find(tags) == ambiguity_classes.end()) {
 	    ambiguity_classes.insert(tags);
 	    word->outputOriginal(out);
-	    //wcerr<<word->get_string_tags()<<L"\n";
+	    //cerr<<word->get_string_tags()<<"\n";
       }
     }
     delete word;
@@ -474,12 +474,12 @@ HMM::train(MorphoStream &morpho_stream) {
 
   while (word) {
 
-    //wcerr<<L"Enter para continuar\n";
+    //cerr<<"Enter para continuar\n";
     //getchar();
 
-    if (++nw%10000==0) wcerr<<L'.'<<flush;
+    if (++nw%10000==0) cerr<<'.'<<flush;
 
-    //wcerr<<*word<<L"\n";
+    //cerr<<*word<<"\n";
 
     pretags = pending.back();
 
@@ -501,15 +501,15 @@ HMM::train(MorphoStream &morpho_stream) {
       i=*itag;
       for (jtag=pretags.begin(); jtag!=pretags.end(); jtag++) {
          j=*jtag;
-         //wcerr<<"previous alpha["<<len<<"]["<<i<<"]="<<alpha[len][i]<<"\n";
-	 //wcerr<<"alpha["<<len-1<<"]["<<j<<"]="<<alpha[len-1][j]<<"\n";
-         //wcerr<<"a["<<j<<"]["<<i<<"]="<<a[j][i]<<"\n";
-         //wcerr<<"b["<<i<<"]["<<k<<"]="<<b[i][k]<<"\n";
+         //cerr<<"previous alpha["<<len<<"]["<<i<<"]="<<alpha[len][i]<<"\n";
+	 //cerr<<"alpha["<<len-1<<"]["<<j<<"]="<<alpha[len-1][j]<<"\n";
+         //cerr<<"a["<<j<<"]["<<i<<"]="<<a[j][i]<<"\n";
+         //cerr<<"b["<<i<<"]["<<k<<"]="<<b[i][k]<<"\n";
 	 alpha[len][i] += alpha[len-1][j]*(tdhmm.getA())[j][i]*(tdhmm.getB())[i][k];
       }
       if (alpha[len][i]==0)
         alpha[len][i]=DBL_MIN;
-      //wcerr<<"alpha["<<len<<"]["<<i<<"]="<<alpha[len][i]<<"\n--------\n";
+      //cerr<<"alpha["<<len<<"]["<<i<<"]="<<alpha[len][i]<<"\n--------\n";
     }
 
     if (tags.size()>1) {
@@ -521,8 +521,8 @@ HMM::train(MorphoStream &morpho_stream) {
 
       prob = alpha[len][tag];
 
-      //wcerr<<"prob="<<prob<<"\n";
-      //wcerr<<"alpha["<<len<<"]["<<tag<<"]="<<alpha[len][tag]<<"\n";
+      //cerr<<"prob="<<prob<<"\n";
+      //cerr<<"alpha["<<len<<"]["<<tag<<"]="<<alpha[len][tag]<<"\n";
       loli -= log(prob);
 
       for (t=0; t<len; t++) {  // loop from T-1 to 0
@@ -541,13 +541,13 @@ HMM::train(MorphoStream &morpho_stream) {
 
 	       gamma[i] +=  alpha[len-t][i]*beta[t%2][i]/prob;
 	       if (p_isnan(gamma[i])) {
-	          wcerr<<L"NAN(3) gamma["<<i<<L"] = "<<gamma[i]<<L" alpha["<<len-t<<L"]["<<i<<L"]= "<<alpha[len-t][i]
-	               <<L" beta["<<t%2<<L"]["<<i<<L"] = "<<beta[t%2][i]<<L" prob = "<<prob<<L" previous gamma = "<<previous_value<<L"\n";
+	          cerr<<"NAN(3) gamma["<<i<<"] = "<<gamma[i]<<" alpha["<<len-t<<"]["<<i<<"]= "<<alpha[len-t][i]
+	               <<" beta["<<t%2<<"]["<<i<<"] = "<<beta[t%2][i]<<" prob = "<<prob<<" previous gamma = "<<previous_value<<"\n";
 	          exit(1);
 	       }
 	       if (p_isinf(gamma[i])) {
-	          wcerr<<L"INF(3) gamma["<<i<<L"] = "<<gamma[i]<<L" alpha["<<len-t<<L"]["<<i<<L"]= "<<alpha[len-t][i]
-	               <<L" beta["<<t%2<<L"]["<<i<<L"] = "<<beta[t%2][i]<<L" prob = "<<prob<<L" previous gamma = "<<previous_value<<L"\n";
+	          cerr<<"INF(3) gamma["<<i<<"] = "<<gamma[i]<<" alpha["<<len-t<<"]["<<i<<"]= "<<alpha[len-t][i]
+	               <<" beta["<<t%2<<"]["<<i<<"] = "<<beta[t%2][i]<<" prob = "<<prob<<" previous gamma = "<<previous_value<<"\n";
 	          exit(1);
 	       }
 	       if (gamma[i]==0) {
@@ -572,11 +572,11 @@ HMM::train(MorphoStream &morpho_stream) {
     word = morpho_stream.get_next_word();
   }
 
-  if ((pending.size()>1) || ((tag!=eos)&&(tag != (tdhmm.getTagIndex())[L"TAG_kEOF"]))) {
-    wcerr << L"Warning: The last tag is not the end-of-sentence-tag "
-          << L"but rather " << tdhmm.getArrayTags()[tag] << L". Line: " << nw
-	  << L". Pending: " << pending.size() << ". Tags: ";
-    wcerr << "\n";
+  if ((pending.size()>1) || ((tag!=eos)&&(tag != (tdhmm.getTagIndex())["TAG_kEOF"_u]))) {
+    cerr << "Warning: The last tag is not the end-of-sentence-tag "
+          << "but rather " << tdhmm.getArrayTags()[tag] << ". Line: " << nw
+	  << ". Pending: " << pending.size() << ". Tags: ";
+    cerr << "\n";
   }
 
   int N = tdhmm.getN();
@@ -597,24 +597,24 @@ HMM::train(MorphoStream &morpho_stream) {
       j = jt->first;
       if (xsi[i][j]>0) {
         if (gamma[i]==0) {
-          wcerr<<L"Warning: gamma["<<i<<L"]=0\n";
+          cerr<<"Warning: gamma["<<i<<"]=0\n";
           gamma[i]=DBL_MIN;
         }
 
         (tdhmm.getA())[i][j] = xsi[i][j]/gamma[i];
 
         if (p_isnan((tdhmm.getA())[i][j])) {
-          wcerr<<L"NAN\n";
-          wcerr <<L"Error: BW - NAN(1) a["<<i<<L"]["<<j<<L"]="<<(tdhmm.getA())[i][j]<<L"\txsi["<<i<<L"]["<<j<<L"]="<<xsi[i][j]<<L"\tgamma["<<i<<L"]="<<gamma[i]<<L"\n";
+          cerr<<"NAN\n";
+          cerr <<"Error: BW - NAN(1) a["<<i<<"]["<<j<<"]="<<(tdhmm.getA())[i][j]<<"\txsi["<<i<<"]["<<j<<"]="<<xsi[i][j]<<"\tgamma["<<i<<"]="<<gamma[i]<<"\n";
 	  exit(1);
         }
 	if (p_isinf((tdhmm.getA())[i][j])) {
-	  wcerr<<L"INF\n";
-          wcerr <<L"Error: BW - INF(1) a["<<i<<L"]["<<j<<L"]="<<(tdhmm.getA())[i][j]<<L"\txsi["<<i<<L"]["<<j<<L"]="<<xsi[i][j]<<L"\tgamma["<<i<<L"]="<<gamma[i]<<L"\n";
+	  cerr<<"INF\n";
+          cerr <<"Error: BW - INF(1) a["<<i<<"]["<<j<<"]="<<(tdhmm.getA())[i][j]<<"\txsi["<<i<<"]["<<j<<"]="<<xsi[i][j]<<"\tgamma["<<i<<"]="<<gamma[i]<<"\n";
           exit(1);
         }
 	if ((tdhmm.getA())[i][j]==0) {
-          //wcerr <<"Error: BW - ZERO(1) a["<<i<<"]["<<j<<"]="<<(tdhmm.getA())[i][j]<<"\txsi["<<i<<"]["<<j<<"]="<<xsi[i][j]<<"\tgamma["<<i<<"]="<<gamma[i]<<"\n";
+          //cerr <<"Error: BW - ZERO(1) a["<<i<<"]["<<j<<"]="<<(tdhmm.getA())[i][j]<<"\txsi["<<i<<"]["<<j<<"]="<<xsi[i][j]<<"\tgamma["<<i<<"]="<<gamma[i]<<"\n";
 	  //     exit(1);
         }
       }
@@ -629,15 +629,15 @@ HMM::train(MorphoStream &morpho_stream) {
         (tdhmm.getB())[i][k] = phi[i][k]/gamma[i];
 
 	if (p_isnan((tdhmm.getB())[i][k])) {
-          wcerr<<L"Error: BW - NAN(2) b["<<i<<L"]["<<k<<L"]="<<(tdhmm.getB())[i][k]<<L"\tphi["<<i<<L"]["<<k<<L"]="<<phi[i][k]<<L"\tgamma["<<i<<L"]="<<gamma[i]<<L"\n";
+          cerr<<"Error: BW - NAN(2) b["<<i<<"]["<<k<<"]="<<(tdhmm.getB())[i][k]<<"\tphi["<<i<<"]["<<k<<"]="<<phi[i][k]<<"\tgamma["<<i<<"]="<<gamma[i]<<"\n";
 	       exit(1);
         }
 	if (p_isinf((tdhmm.getB())[i][k])) {
-          wcerr<<L"Error: BW - INF(2) b["<<i<<L"]["<<k<<L"]="<<(tdhmm.getB())[i][k]<<L"\tphi["<<i<<L"]["<<k<<L"]="<<phi[i][k]<<L"\tgamma["<<i<<L"]="<<gamma[i]<<L"\n";
+          cerr<<"Error: BW - INF(2) b["<<i<<"]["<<k<<"]="<<(tdhmm.getB())[i][k]<<"\tphi["<<i<<"]["<<k<<"]="<<phi[i][k]<<"\tgamma["<<i<<"]="<<gamma[i]<<"\n";
 	       exit(1);
         }
 	if ((tdhmm.getB())[i][k]==0) {
-          //wcerr <<"Error: BW - ZERO(2) b["<<i<<"]["<<k<<"]="<<(tdhmm.getB())[i][k]<<"\tphi["<<i<<"]["<<k<<"]="<<phi[i][k]<<"\tgamma["<<i<<"]="<<gamma[i]<<"\n";
+          //cerr <<"Error: BW - ZERO(2) b["<<i<<"]["<<k<<"]="<<(tdhmm.getB())[i][k]<<"\tphi["<<i<<"]["<<k<<"]="<<phi[i][k]<<"\tgamma["<<i<<"]="<<gamma[i]<<"\n";
 	  //     exit(1);
         }
       }
@@ -666,11 +666,11 @@ HMM::train(MorphoStream &morpho_stream) {
     }
   }
 
-  wcerr<<L"Log="<<loli<<L"\n";
+  cerr<<"Log="<<loli<<"\n";
 }
 
 void
-HMM::tagger(MorphoStream &morpho_stream, FILE *Output) {
+HMM::tagger(MorphoStream &morpho_stream, UFILE* Output) {
   int i, j, k, nw;
   TaggerWord *word = NULL;
   TTag tag;
@@ -740,17 +740,17 @@ HMM::tagger(MorphoStream &morpho_stream, FILE *Output) {
 	loli -= log(prob);
       else {
         if (TheFlags.getDebug())
-	  wcerr<<L"Problem with word '"<<word->get_superficial_form()<<L"' "<<word->get_string_tags()<<L"\n";
+	  cerr<<"Problem with word '"<<word->get_superficial_form()<<"' "<<word->get_string_tags()<<"\n";
       }
       for (unsigned t=0; t<best[nwpend%2][tag].size(); t++) {
 	if (TheFlags.getFirst()) {
-	  wstring const &micad = wpend[t].get_all_chosen_tag_first(best[nwpend%2][tag][t], (tdhmm.getTagIndex())[L"TAG_kEOF"]);
-	  fputws_unlocked(micad.c_str(), Output);
+	  UString const &micad = wpend[t].get_all_chosen_tag_first(best[nwpend%2][tag][t], (tdhmm.getTagIndex())["TAG_kEOF"_u]);
+	  write(micad, Output);
 	} else {
 	  // print Output
 	  wpend[t].set_show_sf(TheFlags.getShowSuperficial());
-	  wstring const &micad = wpend[t].get_lexical_form(best[nwpend%2][tag][t], (tdhmm.getTagIndex())[L"TAG_kEOF"]);
-	  fputws_unlocked(micad.c_str(), Output);
+	  UString const &micad = wpend[t].get_lexical_form(best[nwpend%2][tag][t], (tdhmm.getTagIndex())["TAG_kEOF"_u]);
+	  write(micad, Output);
 	}
       }
 
@@ -765,23 +765,21 @@ HMM::tagger(MorphoStream &morpho_stream, FILE *Output) {
     {
       if(TheFlags.getNullFlush())
       {
-        fputwc_unlocked(L'\0', Output);
+        u_fputc('\0', Output);
         tags.clear();
         tags.insert(eos);
         alpha[0][eos] = 1;
       }
 
-      fflush(Output);
+      u_fflush(Output);
       morpho_stream.setEndOfFile(false);
     }
     word = morpho_stream.get_next_word();
   }
 
   if ((tags.size()>1)&&(TheFlags.getDebug())) {
-    wstring errors;
-    errors = L"The text to disambiguate has finished, but there are ambiguous words that has not been disambiguated.\n";
-    errors+= L"This message should never appears. If you are reading this ..... these are very bad news.\n";
-    wcerr<<L"Error: "<<errors;
+    cerr << "Error: The text to disambiguate has finished, but there are ambiguous words that has not been disambiguated.\n";
+    cerr << "This message should never appears. If you are reading this ..... these are very bad news.\n";
   }
 }
 
diff --git a/apertium/hmm.h b/apertium/hmm.h
index 7307e57..1031566 100644
--- a/apertium/hmm.h
+++ b/apertium/hmm.h
@@ -65,7 +65,7 @@ protected:
 public:
    TaggerData& get_tagger_data();
    void deserialise(FILE *Serialised_FILE_Tagger);
-   std::vector<std::wstring> &getArrayTags();
+   std::vector<UString> &getArrayTags();
    void serialise(FILE *Stream_);
    void deserialise(const TaggerData &Deserialised_FILE_Tagger);
    void init_probabilities_from_tagged_text_(MorphoStream &stream_tagged,
@@ -143,7 +143,7 @@ public:
     *  @param in the input stream with the untagged text to tag
     *  @param out the output stream with the tagged text
     */
-   void tagger(MorphoStream &morpho_stream, FILE *Output);
+   void tagger(MorphoStream &morpho_stream, UFILE* Output);
 
    /** Prints the A matrix.
     */
@@ -157,7 +157,7 @@ public:
     */
    void print_ambiguity_classes();
 
-   void filter_ambiguity_classes(FILE *in, FILE *out);
+   void filter_ambiguity_classes(const char* input_file, UFILE* output);
 };
 
 #endif
diff --git a/apertium/interchunk.cc b/apertium/interchunk.cc
index 019577d..e52c500 100644
--- a/apertium/interchunk.cc
+++ b/apertium/interchunk.cc
@@ -15,489 +15,197 @@
  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  */
 #include <apertium/interchunk.h>
-#include <apertium/trx_reader.h>
-#include <apertium/utf_converter.h>
-#include <lttoolbox/compression.h>
-#include <lttoolbox/xml_parse_util.h>
 
-#include <cctype>
-#include <cerrno>
+#include <lttoolbox/xml_walk_util.h>
+#include <lttoolbox/string_utils.h>
+
 #include <iostream>
-#include <stack>
-#include <apertium/string_utils.h>
-#include "apertium_config.h"
-#include <apertium/unlocked_cstdio.h>
 
-using namespace Apertium;
 using namespace std;
 
-void
-Interchunk::destroy()
-{
-  delete me;
-  me = NULL;
+Interchunk::Interchunk()
+  : word(0), last_lword(0), inword(false)
+{}
 
-  if(doc)
+bool
+Interchunk::checkIndex(xmlNode *element, int index, int limit)
+{
+  if(index >= limit)
   {
-    xmlFreeDoc(doc);
-    doc = NULL;
+    cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index >= limit" << endl;
+    return false;
   }
-}
-
-Interchunk::Interchunk() :
-word(0),
-lword(0),
-last_lword(0),
-output(0),
-any_char(0),
-any_tag(0),
-nwords(0)
-{
-  me = NULL;
-  doc = NULL;
-  root_element = NULL;
-  lastrule = NULL;
-  inword = false;
-  null_flush = false;
-  internal_null_flush = false;
-  trace = false;
-  in_out = false;
-}
-
-Interchunk::~Interchunk()
-{
-  destroy();
-}
-
-void
-Interchunk::readData(FILE *in)
-{
-  alphabet.read(in);
-  any_char = alphabet(TRXReader::ANY_CHAR);
-  any_tag = alphabet(TRXReader::ANY_TAG);
-
-  Transducer t;
-  t.read(in, alphabet.size());
-
-  map<int, int> finals;
-
-  // finals
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  if(index < 0) {
+    cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index < 0" << endl;
+    return false;
+  }
+  if(word[index] == 0)
   {
-    int key = Compression::multibyte_read(in);
-    finals[key] = Compression::multibyte_read(in);
+    cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": Null access at word[index]" << endl;
+    return false;
   }
+  return true;
+}
 
-  me = new MatchExe(t, finals);
-
-  // attr_items
-  bool recompile_attrs = Compression::string_read(in) != pcre_version_endian();
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    attr_items[cad_k].read(in);
-    wstring fallback = Compression::wstring_read(in);
-    if(recompile_attrs) {
-      attr_items[cad_k].compile(UtfConverter::toUtf8(fallback));
+UString
+Interchunk::evalCachedString(xmlNode* element)
+{
+  TransferInstr& ti = evalStringCache[element];
+  switch (ti.getType()) {
+  case ti_clip_tl:
+    if (checkIndex(element, ti.getPos(), lword)) {
+      if (ti.getContent() == "content"_u) {
+        UString wf = word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]);
+        return wf.substr(1, wf.length()-2); // trim { and }
+      } else {
+        return word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]);
+      }
     }
-  }
+    break;
 
-  // variables
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    variables[cad_k] = UtfConverter::toUtf8(Compression::wstring_read(in));
-  }
+  case ti_var:
+    return variables[ti.getContent()];
 
-  // macros
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    macros[cad_k] = Compression::multibyte_read(in);
-  }
+  case ti_lit_tag:
+  case ti_lit:
+    return ti.getContent();
 
-  // lists
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+  case ti_b:
+    if (!blank_queue.empty()) {
+      UString retblank = blank_queue.front();
+      if (in_out) {
+        blank_queue.pop();
+      }
+      return retblank;
+    } else {
+      return " "_u;
+    }
+    break;
 
-    for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
-    {
-      wstring const cad_v = Compression::wstring_read(in);
-      lists[cad_k].insert(UtfConverter::toUtf8(cad_v));
-      listslow[cad_k].insert(UtfConverter::toUtf8(StringUtils::tolower(cad_v)));
+  case ti_get_case_from:
+    if (checkIndex(element, ti.getPos(), lword)) {
+      return StringUtils::copycase(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]),
+                                   evalString(ti.getPointer()));
     }
-  }
-}
+    break;
 
-void
-Interchunk::read(string const &transferfile, string const &datafile)
-{
-  readInterchunk(transferfile);
+  case ti_case_of_tl:
+    if (checkIndex(element, ti.getPos(), lword)) {
+      return StringUtils::getcase(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]));
+    }
+    break;
 
-  // datafile
-  FILE *in = fopen(datafile.c_str(), "rb");
-  if(!in)
-  {
-    wcerr << "Error: Could not open file '" << datafile << "'." << endl;
-    exit(EXIT_FAILURE);
+  default:
+    return ""_u;
   }
-  readData(in);
-  fclose(in);
-
+  return ""_u;
 }
 
 void
-Interchunk::readInterchunk(string const &in)
+Interchunk::processClip(xmlNode* element)
 {
-  doc = xmlReadFile(in.c_str(), NULL, 0);
-
-  if(doc == NULL)
-  {
-    wcerr << "Error: Could not parse file '" << in << "'." << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  root_element = xmlDocGetRootElement(doc);
-
-  // search for macros & rules
-  for(xmlNode *i = root_element->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "section-def-macros"))
-      {
-        collectMacros(i);
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "section-rules"))
-      {
-        collectRules(i);
-      }
+  int pos = 0;
+  UString part;
+  for (xmlAttr* i = element->properties; i != NULL; i = i->next) {
+    if (!xmlStrcmp(i->name, (const xmlChar*) "part")) {
+      part = to_ustring((const char*) i->children->content);
+    } else if (!xmlStrcmp(i->name, (const xmlChar*) "pos")) {
+      pos = atoi((const char*) i->children->content) - 1;
     }
   }
+  evalStringCache[element] = TransferInstr(ti_clip_tl, part, pos, NULL);
 }
 
 void
-Interchunk::collectRules(xmlNode *localroot)
+Interchunk::processBlank(xmlNode* element)
 {
-  for(xmlNode *rule = localroot->children; rule != NULL; rule = rule->next)
-  {
-    if(rule->type == XML_ELEMENT_NODE)
-    {
-      size_t line = rule->line;
-      for(xmlNode *rulechild = rule->children; ; rulechild = rulechild->next)
-      {
-        if(rulechild->type == XML_ELEMENT_NODE && !xmlStrcmp(rulechild->name, (const xmlChar *) "action"))
-        {
-          rule_map.push_back(rulechild);
-          rule_lines.push_back(line);
-          break;
-        }
-      }
-    }
+  if (element->properties == NULL) {
+    evalStringCache[element] = TransferInstr(ti_b, " "_u, -1);
+  } else {
+    int pos = atoi((const char*) element->properties->children->content) - 1;
+    evalStringCache[element] = TransferInstr(ti_b, ""_u, pos);
   }
 }
 
 void
-Interchunk::collectMacros(xmlNode *localroot)
+Interchunk::processLuCount(xmlNode* element)
 {
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      macro_map.push_back(i);
-    }
-  }
+  cerr << "Error: unexpected expression: '" << element->name << "'" << endl;
+  exit(EXIT_FAILURE);
 }
 
-bool
-Interchunk::checkIndex(xmlNode *element, int index, int limit)
+UString
+Interchunk::processLu(xmlNode* element)
 {
-  if(index >= limit)
-  {
-    wcerr << L"Error in " << UtfConverter::fromUtf8((char *) doc->URL) << L": line " << element->line << L": index >= limit" << endl;
-    return false;
-  }
-  if(index < 0) {
-    wcerr << L"Error in " << UtfConverter::fromUtf8((char *) doc->URL) << L": line " << element->line << L": index < 0" << endl;
-    return false;
-  }
-  if(word[index] == 0)
-  {
-    wcerr << L"Error in " << UtfConverter::fromUtf8((char *) doc->URL) << L": line " << element->line << L": Null access at word[index]" << endl;
-    return false;
-  }
-  return true;
+  cerr << "Error: unexpected expression: '" << element->name << "'" << endl;
+  exit(EXIT_FAILURE);
+  return ""_u; // make the type checker happy
 }
 
-
-string
-Interchunk::evalString(xmlNode *element)
+UString
+Interchunk::processMlu(xmlNode* element)
 {
-  if (element == 0)
-  {
-    throw "Interchunk::evalString() was passed a NULL element";
-  }
-
-  map<xmlNode *, TransferInstr>::iterator it;
-  it = evalStringCache.find(element);
-  if(it != evalStringCache.end())
-  {
-    TransferInstr &ti = it->second;
-    switch(ti.getType())
-    {
-      case ti_clip_tl:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          if(ti.getContent() == "content") // jacob's new 'part'
-          {
-            string wf = word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]);
-            return wf.substr(1, wf.length()-2); // trim away the { and }
-          }
-          else
-          {
-            return word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]);
-          }
-        }
-        break;
-
-      case ti_var:
-        return variables[ti.getContent()];
-
-      case ti_lit_tag:
-      case ti_lit:
-        return ti.getContent();
-
-      case ti_b:
-        if(!blank_queue.empty())
-        {
-          string retblank = blank_queue.front();
-          
-          if(in_out)
-          {
-            blank_queue.pop();
-          }
-          
-          return retblank;
-        }
-        else
-        {
-          return " ";
-        }
-        break;
-
-      case ti_get_case_from:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          return copycase(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]),
-                          evalString((xmlNode *) ti.getPointer()));
-        }
-        break;
-
-      case ti_case_of_tl:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          return caseOf(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]));
-        }
-        break;
-
-      default:
-        return "";
-    }
-    return "";
-  }
-
-  if(!xmlStrcmp(element->name, (const xmlChar *) "clip"))
-  {
-    int pos = 0;
-    xmlChar *part = NULL;
-
-    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
-      {
-	part = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
-      {
-	pos = atoi((const char *)i->children->content) - 1;
-      }
-    }
-
-    evalStringCache[element] = TransferInstr(ti_clip_tl, (const char *) part, pos, NULL);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit-tag"))
-  {
-    evalStringCache[element] = TransferInstr(ti_lit_tag,
-                                             tags((const char *) element->properties->children->content), 0);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit"))
-  {
-    evalStringCache[element] = TransferInstr(ti_lit, ((const char *) element->properties->children->content), 0);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "b"))
-  {
-    if(element->properties == NULL)
-    {
-      evalStringCache[element] = TransferInstr(ti_b, " ", -1);
-    }
-    else
-    {
-      int pos = atoi((const char *) element->properties->children->content) - 1;
-      evalStringCache[element] = TransferInstr(ti_b, "", pos);
-    }
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "get-case-from"))
-  {
-    int pos = atoi((const char *) element->properties->children->content) - 1;
-    xmlNode *param = NULL;
-    for(xmlNode *i = element->children; i != NULL; i = i->next)
-    {
-      if(i->type == XML_ELEMENT_NODE)
-      {
-	param = i;
-	break;
-      }
-    }
-
-    evalStringCache[element] = TransferInstr(ti_get_case_from, "lem", pos, param);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "var"))
-  {
-    evalStringCache[element] = TransferInstr(ti_var, (const char *) element->properties->children->content, 0);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "case-of"))
-  {
-    int pos = 0;
-    xmlChar *part = NULL;
-
-    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
-      {
-	part = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
-      {
-	pos = atoi((const char *) i->children->content) - 1;
-      }
-    }
+  cerr << "Error: unexpected expression: '" << element->name << "'" << endl;
+  exit(EXIT_FAILURE);
+  return ""_u; // make the type checker happy
+}
 
-    evalStringCache[element] = TransferInstr(ti_case_of_tl, (const char *) part, pos);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "concat"))
-  {
-    string value;
-    for(xmlNode *i = element->children; i != NULL; i = i->next)
-    {
-      if(i->type == XML_ELEMENT_NODE)
-      {
-        value.append(evalString(i));
-      }
+void
+Interchunk::processCaseOf(xmlNode* element)
+{
+  int pos = 0;
+  UString part;
+  for (xmlAttr* i = element->properties; i != NULL; i = i->next) {
+    if (!xmlStrcmp(i->name, (const xmlChar*) "part")) {
+      part = to_ustring((char*) i->children->content);
+    } else if (!xmlStrcmp(i->name, (const xmlChar*) "pos")) {
+      pos = atoi((const char*) i->children->content) - 1;
     }
-    return value;
   }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "chunk"))
-  {
-    return processChunk(element);
-  }
-  else
-  {
-    wcerr << "Error: unexpected rvalue expression '" << element->name << "'" << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  return evalString(element);
+  evalStringCache[element] = TransferInstr(ti_case_of_tl, part, pos);
 }
 
 void
 Interchunk::processOut(xmlNode *localroot)
 {
   in_out = true;
-  
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "chunk"))
-      {
-        fputws_unlocked(UtfConverter::fromUtf8(processChunk(i)).c_str(), output);
-      }
-      else // 'b'
-      {
-        fputws_unlocked(UtfConverter::fromUtf8(evalString(i)).c_str(), output);
-      }
+
+  for (auto i : children(localroot)) {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "chunk")) {
+      write(processChunk(i), output);
+    } else { // 'b'
+      write(evalString(i), output);
     }
   }
-  
+
   in_out = false;
 }
 
-string
+UString
 Interchunk::processChunk(xmlNode *localroot)
 {
-  string result;
-  result.append("^");
+  UString result;
+  result.append("^"_u);
 
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      result.append(evalString(i));
-    }
+  for (auto i : children(localroot)) {
+    result.append(evalString(i));
   }
 
-  result.append("$");
+  result.append("$"_u);
   return result;
 }
 
-void
-Interchunk::processInstruction(xmlNode *localroot)
-{
-  if(!xmlStrcmp(localroot->name, (const xmlChar *) "choose"))
-  {
-    processChoose(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "let"))
-  {
-    processLet(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "append"))
-  {
-    processAppend(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "out"))
-  {
-    processOut(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "call-macro"))
-  {
-    processCallMacro(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "modify-case"))
-  {
-    processModifyCase(localroot);
-  }
-}
-
 void
 Interchunk::processLet(xmlNode *localroot)
 {
   xmlNode *leftSide = NULL, *rightSide = NULL;
 
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(leftSide == NULL)
-      {
-	leftSide = i;
-      }
-      else
-      {
-	rightSide = i;
-	break;
-      }
+  for (auto i : children(localroot)) {
+    if(leftSide == NULL) {
+      leftSide = i;
+    } else {
+      rightSide = i;
+      break;
     }
   }
 
@@ -516,7 +224,7 @@ Interchunk::processLet(xmlNode *localroot)
         bool match = word[ti.getPos()]->setChunkPart(attr_items[ti.getContent()], evalString(rightSide));
         if(!match && trace)
         {
-          wcerr << "apertium-interchunk warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
+          cerr << "apertium-interchunk warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
         }
       }
         return;
@@ -527,119 +235,90 @@ Interchunk::processLet(xmlNode *localroot)
   }
   if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
   {
-    string const val = (const char *) leftSide->properties->children->content;
+    UString const val = to_ustring((const char *) leftSide->properties->children->content);
     variables[val] = evalString(rightSide);
     evalStringCache[leftSide] = TransferInstr(ti_var, val, 0);
   }
   else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
   {
     int pos = 0;
-    xmlChar *part = NULL;
+    UString part;
 
     for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
     {
       if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
       {
-	part = i->children->content;
+        part = to_ustring((char*)i->children->content);
       }
       else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
       {
-	pos = atoi((const char *) i->children->content) - 1;
+        pos = atoi((const char *) i->children->content) - 1;
       }
     }
 
 
-    bool match = word[pos]->setChunkPart(attr_items[(const char *) part],
+    bool match = word[pos]->setChunkPart(attr_items[part],
 					 evalString(rightSide));
     if(!match && trace)
     {
-      wcerr << "apertium-interchunk warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
+      cerr << "apertium-interchunk warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
     }
     evalStringCache[leftSide] = TransferInstr(ti_clip_tl,
-					      (const char *) part,
+					      part,
 					      pos, NULL);
   }
 }
 
-void
-Interchunk::processAppend(xmlNode *localroot)
-{
-  string name;
-  for(xmlAttr *i = localroot->properties; i != NULL; i = i->next)
-  {
-    if(!xmlStrcmp(i->name, (const xmlChar *) "n"))
-    {
-      name = (char *) i->children->content;
-      break;
-    }
-  }
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      variables[name].append(evalString(i));
-    }
-  }
-}
-
 void
 Interchunk::processModifyCase(xmlNode *localroot)
 {
   xmlNode *leftSide = NULL, *rightSide = NULL;
 
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(leftSide == NULL)
-      {
-	leftSide = i;
-      }
-      else
-      {
-	rightSide = i;
-	break;
-      }
+  for (auto i : children(localroot)) {
+    if(leftSide == NULL) {
+      leftSide = i;
+    } else {
+      rightSide = i;
+      break;
     }
   }
 
   if(leftSide->name != NULL && !xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
   {
     int pos = 0;
-    xmlChar *part = NULL;
+    UString part;
 
     for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
     {
       if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
       {
-	part = i->children->content;
+        part = to_ustring((char*)i->children->content);
       }
       else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
       {
-	pos = atoi((const char *) i->children->content) - 1;
+        pos = atoi((const char *) i->children->content) - 1;
       }
     }
 
-    string const result = copycase(evalString(rightSide),
-				   word[pos]->chunkPart(attr_items[(const char *) part]));
-    bool match = word[pos]->setChunkPart(attr_items[(const char *) part], result);
+    UString const result = StringUtils::copycase(evalString(rightSide),
+				   word[pos]->chunkPart(attr_items[part]));
+    bool match = word[pos]->setChunkPart(attr_items[part], result);
     if(!match && trace)
     {
-      wcerr << "apertium-interchunk warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
+      cerr << "apertium-interchunk warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
     }
   }
   else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
   {
-    string const val = (const char *) leftSide->properties->children->content;
-    variables[val] = copycase(evalString(rightSide), variables[val]);
+    UString const val = to_ustring((const char *) leftSide->properties->children->content);
+    variables[val] = StringUtils::copycase(evalString(rightSide), variables[val]);
   }
 }
 
 void
 Interchunk::processCallMacro(xmlNode *localroot)
 {
-  const char *n = (const char *) localroot->properties->children->content;
+  UString n = to_ustring((const char *) localroot->properties->children->content);
   int npar = 0;
 
   xmlNode *macro = macro_map[macros[n]];
@@ -656,16 +335,11 @@ Interchunk::processCallMacro(xmlNode *localroot)
   // ToDo: Is it at all valid if npar <= 0 ?
 
   InterchunkWord **myword = NULL;
+  int idx = 0;
   if(npar > 0)
   {
     myword = new InterchunkWord *[npar];
-  }
-
-  int idx = 0;
-  for(xmlNode *i = localroot->children; npar && i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
+    for (auto i : children(localroot)) {
       int pos = atoi((const char *) i->properties->children->content)-1;
       myword[idx] = word[pos];
       idx++;
@@ -675,12 +349,8 @@ Interchunk::processCallMacro(xmlNode *localroot)
   swap(myword, word);
   swap(npar, lword);
 
-  for(xmlNode *i = macro->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      processInstruction(i);
-    }
+  for (auto i : children(macro)) {
+    processInstruction(i);
   }
 
   swap(myword, word);
@@ -689,741 +359,91 @@ Interchunk::processCallMacro(xmlNode *localroot)
   delete[] myword;
 }
 
-void
-Interchunk::processChoose(xmlNode *localroot)
+TransferToken &
+Interchunk::readToken(InputFile& in)
 {
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  if(!input_buffer.isEmpty())
   {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "when"))
-      {
-        bool picked_option = false;
+    return input_buffer.next();
+  }
 
-	for(xmlNode *j = i->children; j != NULL; j = j->next)
-	{
-	  if(j->type == XML_ELEMENT_NODE)
-	  {
-	    if(!xmlStrcmp(j->name, (const xmlChar *) "test"))
-	    {
-	      if(!processTest(j))
-	      {
-		break;
-	      }
-	      else
-	      {
-	        picked_option = true;
-              }
-	    }
-	    else
-	    {
-	      processInstruction(j);
-	    }
-	  }
-	}
-        if(picked_option)
-        {
-          return;
-        }
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "otherwise"))
-      {
-	for(xmlNode *j = i->children; j != NULL; j = j->next)
-	{
-	  if(j->type == XML_ELEMENT_NODE)
-	  {
-	    processInstruction(j);
-	  }
-	}
-      }
-    }
-  }
-}
-
-bool
-Interchunk::processLogical(xmlNode *localroot)
-{
-  if(!xmlStrcmp(localroot->name, (const xmlChar *) "equal"))
-  {
-    return processEqual(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with"))
-  {
-    return processBeginsWith(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with-list"))
-  {
-    return processBeginsWithList(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with"))
-  {
-    return processEndsWith(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with-list"))
-  {
-    return processEndsWithList(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "contains-substring"))
-  {
-    return processContainsSubstring(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "or"))
-  {
-    return processOr(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "and"))
-  {
-    return processAnd(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "not"))
-  {
-    return processNot(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "in"))
-  {
-    return processIn(localroot);
-  }
-
-  return false;
-}
-
-bool
-Interchunk::processIn(xmlNode *localroot)
-{
-  xmlNode *value = NULL;
-  xmlChar *idlist = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(value == NULL)
-      {
-	value = i;
-      }
-      else
-      {
-	idlist = i->properties->children->content;
-	break;
-      }
-    }
-  }
-
-  string sval = evalString(value);
-
-  if(localroot->properties != NULL)
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      set<string, Ltstr> &myset = listslow[(const char *) idlist];
-      if(myset.find(tolower(sval)) != myset.end())
-      {
-	return true;
-      }
-      else
-      {
-	return false;
-      }
-    }
-  }
-
-  set<string, Ltstr> &myset = lists[(const char *) idlist];
-  if(myset.find(sval) != myset.end())
-  {
-    return true;
-  }
-  else
-  {
-    return false;
-  }
-}
-
-bool
-Interchunk::processTest(xmlNode *localroot)
-{
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      return processLogical(i);
-    }
-  }
-  return false;
-}
-
-bool
-Interchunk::processAnd(xmlNode *localroot)
-{
-  bool val = true;
-  for(xmlNode *i = localroot->children; val && i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      val = val && processLogical(i);
-    }
-  }
-
-  return val;
-}
-
-bool
-Interchunk::processOr(xmlNode *localroot)
-{
-  bool val = false;
-  for(xmlNode *i = localroot->children; !val && i != NULL ; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      val = val || processLogical(i);
-    }
-  }
-
-  return val;
-}
-
-bool
-Interchunk::processNot(xmlNode *localroot)
-{
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      return !processLogical(i);
-    }
-  }
-  return false;
-}
-
-bool
-Interchunk::processEqual(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
-  }
-
-  if(localroot->properties == NULL)
-  {
-    return evalString(first) == evalString(second);
-  }
-  else
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      return tolower(evalString(first)) == tolower(evalString(second));
-    }
-    else
-    {
-      return evalString(first) == evalString(second);
-    }
-  }
-}
-
-bool
-Interchunk::beginsWith(string const &s1, string const &s2) const
-{
-  int const limit = s2.size(), constraint = s1.size();
-
-  if(constraint < limit)
-  {
-    return false;
-  }
-  for(int i = 0; i != limit; i++)
-  {
-    if(s1[i] != s2[i])
-    {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-bool
-Interchunk::endsWith(string const &s1, string const &s2) const
-{
-  int const limit = s2.size(), constraint = s1.size();
-
-  if(constraint < limit)
-  {
-    return false;
-  }
-  for(int i = limit-1, j = constraint - 1; i >= 0; i--, j--)
-  {
-    if(s1[j] != s2[i])
-    {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-
-bool
-Interchunk::processBeginsWith(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
-  }
-
-  if(localroot->properties == NULL)
-  {
-    return beginsWith(evalString(first), evalString(second));
-  }
-  else
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      return beginsWith(tolower(evalString(first)), tolower(evalString(second)));
-    }
-    else
-    {
-      return beginsWith(evalString(first), evalString(second));
-    }
-  }
-}
-
-bool
-Interchunk::processEndsWith(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
-  }
-
-  if(localroot->properties == NULL)
-  {
-    return endsWith(evalString(first), evalString(second));
-  }
-  else
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      return endsWith(tolower(evalString(first)), tolower(evalString(second)));
-    }
-    else
-    {
-      return endsWith(evalString(first), evalString(second));
-    }
-  }
-}
-
-bool
-Interchunk::processBeginsWithList(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
-  }
-
-  xmlChar *idlist = second->properties->children->content;
-  string needle = evalString(first);
-  set<string, Ltstr>::iterator it, limit;
-
-  if(localroot->properties == NULL ||
-     xmlStrcmp(localroot->properties->children->content, (const xmlChar *) "yes"))
-  {
-    it = lists[(const char *) idlist].begin();
-    limit = lists[(const char *) idlist].end();
-  }
-  else
-  {
-    needle = tolower(needle);
-    it = listslow[(const char *) idlist].begin();
-    limit = listslow[(const char *) idlist].end();
-  }
-
-  for(; it != limit; it++)
-  {
-    if(beginsWith(needle, *it))
-    {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool
-Interchunk::processEndsWithList(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
-  }
-
-  xmlChar *idlist = second->properties->children->content;
-  string needle = evalString(first);
-  set<string, Ltstr>::iterator it, limit;
-
-  if(localroot->properties == NULL ||
-     xmlStrcmp(localroot->properties->children->content, (const xmlChar *) "yes"))
-  {
-    it = lists[(const char *) idlist].begin();
-    limit = lists[(const char *) idlist].end();
-  }
-  else
-  {
-    needle = tolower(needle);
-    it = listslow[(const char *) idlist].begin();
-    limit = listslow[(const char *) idlist].end();
-  }
-
-  for(; it != limit; it++)
-  {
-    if(endsWith(needle, *it))
-    {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool
-Interchunk::processContainsSubstring(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
-  }
-
-  if(localroot->properties == NULL)
-  {
-    return evalString(first).find(evalString(second)) != string::npos;
-  }
-  else
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      return tolower(evalString(first)).find(tolower(evalString(second))) != string::npos;
-    }
-    else
-    {
-      return evalString(first).find(evalString(second)) != string::npos;
-    }
-  }
-}
-
-string
-Interchunk::copycase(string const &source_word, string const &target_word)
-{
-  wstring result;
-  wstring const s_word = UtfConverter::fromUtf8(source_word);
-  wstring const t_word = UtfConverter::fromUtf8(target_word);
-
-  bool firstupper = iswupper(s_word[0]);
-  bool uppercase = firstupper && iswupper(s_word[s_word.size()-1]);
-  bool sizeone = s_word.size() == 1;
-
-  if(!uppercase || (sizeone && uppercase))
-  {
-    result = StringUtils::tolower(t_word);
-  }
-  else
-  {
-    result = StringUtils::toupper(t_word);
-  }
-
-  if(firstupper)
-  {
-    result[0] = towupper(result[0]);
-  }
-
-  return UtfConverter::toUtf8(result);
-}
-
-string
-Interchunk::caseOf(string const &str)
-{
-  wstring const s = UtfConverter::fromUtf8(str);
-
-  if(s.size() > 1)
-  {
-    if(!iswupper(s[0]))
-    {
-      return "aa";
-    }
-    else if(!iswupper(s[s.size()-1]))
-    {
-      return "Aa";
-    }
-    else
-    {
-      return "AA";
-    }
-  }
-  else if(s.size() == 1)
-  {
-    if(!iswupper(s[0]))
-    {
-      return "aa";
-    }
-    else
-    {
-      return "Aa";
-    }
-  }
-  else
-  {
-    return "aa";
-  }
-}
-
-string
-Interchunk::tolower(string const &str) const
-{
-  return UtfConverter::toUtf8(StringUtils::tolower(UtfConverter::fromUtf8(str)));
-}
-
-string
-Interchunk::tags(string const &str) const
-{
-  string result = "<";
-
-  for(unsigned int i = 0, limit = str.size(); i != limit; i++)
-  {
-    if(str[i] == '.')
-    {
-      result.append("><");
-    }
-    else
-    {
-      result += str[i];
-    }
-  }
-
-  result += '>';
-
-  return result;
-}
-
-void
-Interchunk::processRule(xmlNode *localroot)
-{
-  // localroot is suposed to be an 'action' tag
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      processInstruction(i);
-    }
-  }
-  
-  while(!blank_queue.empty()) //flush remaining blanks that are not spaces
-  {
-    if(blank_queue.front().compare(" ") != 0)
-    {
-      fputws_unlocked(UtfConverter::fromUtf8(blank_queue.front()).c_str(), output);
-    }
-    blank_queue.pop();
-  }
-}
-
-TransferToken &
-Interchunk::readToken(FILE *in)
-{
-  if(!input_buffer.isEmpty())
-  {
-    return input_buffer.next();
-  }
-
-  wstring content;
+  UString content;
   while(true)
   {
-    int val = fgetwc_unlocked(in);
-    if(feof(in) || (internal_null_flush && val == 0))
+    int val = in.get();
+    if(in.eof() || (internal_null_flush && val == 0))
     {
       return input_buffer.add(TransferToken(content, tt_eof));
     }
-    if(val == L'\\')
+    if(val == '\\')
     {
-      content += L'\\';
-      content += wchar_t(fgetwc_unlocked(in));
+      content += '\\';
+      content += in.get();
     }
-    else if(val == L'[')
+    else if(val == '[')
     {
-      content += L'[';
+      content += '[';
       while(true)
       {
-	int val2 = fgetwc_unlocked(in);
-	if(val2 == L'\\')
-	{
-	  content += L'\\';
-	  content += wchar_t(fgetwc_unlocked(in));
-	}
-	else if(val2 == L']')
-	{
-	  content += L']';
-	  break;
-	}
-	else
-	{
-	  content += wchar_t(val2);
-	}
+        UChar32 val2 = in.get();
+        if(val2 == '\\') {
+          content += '\\';
+          content += in.get();
+        } else if(val2 == ']') {
+          content += ']';
+          break;
+        } else {
+          content += val2;
+        }
       }
     }
-    else if(inword && val == L'{')
-    {
-      content += L'{';
-      while(true)
-      {
-	int val2 = fgetwc_unlocked(in);
-	if(val2 == L'\\')
-	{
-	  content += L'\\';
-	  content += wchar_t(fgetwc_unlocked(in));
-	}
-	else if(val2 == L'}')
-	{
-	  wint_t val3 = wchar_t(fgetwc_unlocked(in));
-	  ungetwc(val3, in);
+    else if(inword && val == '{') {
+      content += '{';
+      while(true) {
+        UChar32 val2 = in.get();
+        if(val2 == '\\') {
+          content += '\\';
+          content += in.get();
+        } else if(val2 == '}') {
+          UChar32 val3 = in.peek();
 
-	  content += L'}';
-	  if(val3 == L'$')
-	  {
-	    break;
-	  }
-	}
-	else
-	{
-	  content += wchar_t(val2);
-	}
+          content += '}';
+          if(val3 == '$') {
+            break;
+          }
+        } else {
+          content += val2;
+        }
       }
     }
-    else if(inword && val == L'$')
+    else if(inword && val == '$')
     {
       inword = false;
       return input_buffer.add(TransferToken(content, tt_word));
     }
-    else if(val == L'^')
+    else if(val == '^')
     {
       inword = true;
       return input_buffer.add(TransferToken(content, tt_blank));
     }
     else
     {
-      content += wchar_t(val);
+      content += val;
     }
   }
 }
 
-bool
-Interchunk::getNullFlush(void)
-{
-  return null_flush;
-}
-
 void
-Interchunk::setNullFlush(bool null_flush)
-{
-  this->null_flush = null_flush;
-}
-
-void
-Interchunk::setTrace(bool trace)
-{
-  this->trace = trace;
-}
-
-void
-Interchunk::interchunk_wrapper_null_flush(FILE *in, FILE *out)
+Interchunk::interchunk_wrapper_null_flush(InputFile& in, UFILE* out)
 {
   null_flush = false;
   internal_null_flush = true;
 
-  while(!feof(in))
-  {
+  while(!in.eof()) {
     interchunk(in, out);
-    fputwc_unlocked(L'\0', out);
-    int code = fflush(out);
-    if(code != 0)
-    {
-      wcerr << L"Could not flush output " << errno << endl;
-    }
+    u_fputc('\0', out);
+    u_fflush(out);
+    variables = variable_defaults;
   }
   internal_null_flush = false;
   null_flush = true;
@@ -1431,7 +451,7 @@ Interchunk::interchunk_wrapper_null_flush(FILE *in, FILE *out)
 
 
 void
-Interchunk::interchunk(FILE *in, FILE *out)
+Interchunk::interchunk(InputFile& in, UFILE* out)
 {
   if(getNullFlush())
   {
@@ -1439,6 +459,9 @@ Interchunk::interchunk(FILE *in, FILE *out)
   }
 
   unsigned int last = input_buffer.getPos();
+  unsigned int prev_last = last;
+  int lastrule_id = -1;
+  set<int> banned_rules;
 
   output = out;
   ms.init(me->getInitial());
@@ -1449,52 +472,69 @@ Interchunk::interchunk(FILE *in, FILE *out)
     {
       if(lastrule != NULL)
       {
-	applyRule();
-	input_buffer.setPos(last);
+        int words_to_consume = applyRule();
+        if (words_to_consume == -1) {
+          banned_rules.clear();
+          input_buffer.setPos(last);
+        } else if (words_to_consume == 1) {
+          banned_rules.clear();
+          if (prev_last >= input_buffer.getSize()) {
+            input_buffer.setPos(0);
+          } else {
+            input_buffer.setPos(prev_last+1);
+          }
+          while (true) {
+            TransferToken& tt = input_buffer.next();
+            if (tt.getType() == tt_word) {
+              break;
+            }
+          }
+        } else {
+          banned_rules.insert(lastrule_id);
+          input_buffer.setPos(prev_last);
+          input_buffer.next();
+          last = input_buffer.getPos();
+        }
+        lastrule_id = -1;
       }
       else
       {
-	if(tmpword.size() != 0)
-	{
-          fputwc_unlocked(L'^', output);
-          fputws_unlocked(tmpword[0]->c_str(), output);
-          fputwc_unlocked(L'$', output);
-	  tmpword.clear();
-	  input_buffer.setPos(last);
-	  input_buffer.next();
-	  last = input_buffer.getPos();
-	  ms.init(me->getInitial());
-	}
-	else if(tmpblank.size() != 0)
-	{
-	  fputws_unlocked(tmpblank[0]->c_str(), output);
-	  tmpblank.clear();
-	  last = input_buffer.getPos();
-	  ms.init(me->getInitial());
-	}
+        if(tmpword.size() != 0) {
+          u_fprintf(output, "^%S$", tmpword[0]->c_str());
+          tmpword.clear();
+          input_buffer.setPos(last);
+          input_buffer.next();
+          prev_last = last;
+          banned_rules.clear();
+          last = input_buffer.getPos();
+          ms.init(me->getInitial());
+        }
+        else if(tmpblank.size() != 0) {
+          write(*tmpblank[0], output);
+          tmpblank.clear();
+          prev_last = last;
+          last = input_buffer.getPos();
+          ms.init(me->getInitial());
+        }
       }
     }
-    int val = ms.classifyFinals(me->getFinals());
+    int val = ms.classifyFinals(me->getFinals(), banned_rules);
     if(val != -1)
     {
       size_t lastrule_line = rule_lines[val-1];
       lastrule = rule_map[val-1];
       last = input_buffer.getPos();
-      
+      lastrule_id = val;
+
       last_lword = tmpword.size();
 
       if(trace)
       {
-        wcerr << endl << L"apertium-interchunk: Rule " << val << L" line " << lastrule_line << L" ";
-        for (unsigned int ind = 0; ind < tmpword.size(); ind++)
-        {
-          if (ind != 0)
-          {
-            wcerr << L" ";
-          }
-          fputws_unlocked(tmpword[ind]->c_str(), stderr);
+        cerr << endl << "apertium-interchunk: Rule " << val << " line " << lastrule_line;
+        for (auto& it : tmpword) {
+          cerr << " " << *it;
         }
-        wcerr << endl;
+        cerr << endl;
       }
     }
 
@@ -1502,38 +542,36 @@ Interchunk::interchunk(FILE *in, FILE *out)
 
     switch(current.getType())
     {
-      case tt_word:
-	applyWord(current.getContent());
-        tmpword.push_back(&current.getContent());
-	break;
+    case tt_word:
+      applyWord(current.getContent());
+      tmpword.push_back(&current.getContent());
+      break;
 
-      case tt_blank:
-	ms.step(L' ');
-	tmpblank.push_back(&current.getContent());
-	break;
+    case tt_blank:
+      ms.step(' ');
+      tmpblank.push_back(&current.getContent());
+      break;
 
-      case tt_eof:
-	if(tmpword.size() != 0)
-	{
-	  tmpblank.push_back(&current.getContent());
-	  ms.clear();
-	}
-	else
-	{
-	  fputws_unlocked(current.getContent().c_str(), output);
-	  tmpblank.clear();
-	  return;
-	}
-	break;
+    case tt_eof:
+      if(tmpword.size() != 0) {
+        tmpblank.push_back(&current.getContent());
+        ms.clear();
+      }
+      else {
+        write(current.getContent(), output);
+        tmpblank.clear();
+        return;
+      }
+      break;
 
-      default:
-	wcerr << "Error: Unknown input token." << endl;
-	return;
+    default:
+      cerr << "Error: Unknown input token." << endl;
+      return;
     }
   }
 }
 
-void
+int
 Interchunk::applyRule()
 {
   unsigned int limit = tmpword.size();
@@ -1549,15 +587,15 @@ Interchunk::applyRule()
     {
       if(int(blank_queue.size()) < last_lword - 1)
       {
-        string blank_to_add = string(UtfConverter::toUtf8(*tmpblank[i-1]));
+        UString blank_to_add = UString(*tmpblank[i-1]);
         blank_queue.push(blank_to_add);
       }
     }
 
-    word[i] = new InterchunkWord(UtfConverter::toUtf8(*tmpword[i]));
+    word[i] = new InterchunkWord(*tmpword[i]);
   }
 
-  processRule(lastrule);
+  int words_to_consume = processRule(lastrule);
   lastrule = NULL;
 
   if(word)
@@ -1573,25 +611,26 @@ Interchunk::applyRule()
   tmpword.clear();
   tmpblank.clear();
   ms.init(me->getInitial());
+  return words_to_consume;
 }
 
 void
-Interchunk::applyWord(wstring const &word_str)
+Interchunk::applyWord(UString const &word_str)
 {
-  ms.step(L'^');
+  ms.step('^');
   for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
   {
     switch(word_str[i])
     {
-      case L'\\':
+      case '\\':
         i++;
-	ms.step(towlower(word_str[i]), any_char);
+	ms.step(u_tolower(word_str[i]), any_char);
 	break;
 
-      case L'<':
+      case '<':
 	for(unsigned int j = i+1; j != limit; j++)
 	{
-	  if(word_str[j] == L'>')
+	  if(word_str[j] == '>')
 	  {
 	    int symbol = alphabet(word_str.substr(i, j-i+1));
 	    if(symbol)
@@ -1608,14 +647,14 @@ Interchunk::applyWord(wstring const &word_str)
 	}
 	break;
 
-      case L'{':  // ignore the unmodifiable part of the chunk
-        ms.step(L'$');
+      case '{':  // ignore the unmodifiable part of the chunk
+        ms.step('$');
         return;
 
       default:
-	ms.step(towlower(word_str[i]), any_char);
+	ms.step(u_tolower(word_str[i]), any_char);
 	break;
     }
   }
-  ms.step(L'$');
+  ms.step('$');
 }
diff --git a/apertium/interchunk.dtd b/apertium/interchunk.dtd
index 25e2f66..91af7ec 100644
--- a/apertium/interchunk.dtd
+++ b/apertium/interchunk.dtd
@@ -23,7 +23,7 @@
 
 <!ENTITY % condition "(and|or|not|equal|begins-with|begins-with-list|ends-with|ends-with-list|contains-substring|in)">
 <!ENTITY % container "(var|clip)">
-<!ENTITY % sentence "(let|out|choose|modify-case|call-macro|append)">
+<!ENTITY % sentence "(let|out|choose|modify-case|call-macro|append|reject-current-rule)">
 <!ENTITY % value "(b|clip|lit|lit-tag|var|get-case-from|case-of|concat|chunk)">
 <!ENTITY % stringvalue "(clip|lit|var|get-case-from|case-of)">
 
@@ -425,6 +425,16 @@ get-case-from -->
 <!ELEMENT concat (%value;)+>
 <!-- Concatenates a sequence of values -->
 
+<!ELEMENT reject-current-rule EMPTY>
+<!ATTLIST reject-current-rule shifting (yes|no) #IMPLIED>
+<!--
+      This instruction cancels the execution of the rule being processed.
+      If "shifting" is set to "yes" or is not specified, the matching process
+      consumes exactly one word at the input. If "shifting" is set to "no"
+      then marks the rule to not to be considered in the current matching
+      until the input buffer advances at least one single word
+-->
+
 <!ELEMENT chunk (%value;)+>
 <!--
      Encloses a chunk
diff --git a/apertium/interchunk.h b/apertium/interchunk.h
index 6efbf45..7254b9e 100644
--- a/apertium/interchunk.h
+++ b/apertium/interchunk.h
@@ -17,119 +17,49 @@
 #ifndef _INTERCHUNK_
 #define _INTERCHUNK_
 
-#include <apertium/transfer_instr.h>
-#include <apertium/transfer_token.h>
-#include <apertium/interchunk_word.h>
-#include <apertium/apertium_re.h>
-#include <lttoolbox/alphabet.h>
-#include <lttoolbox/buffer.h>
-#include <lttoolbox/ltstr.h>
-#include <lttoolbox/match_exe.h>
-#include <lttoolbox/match_state.h>
+#include <apertium/transfer_base.h>
 
-#include <cstring>
-#include <cstdio>
-#include <libxml/parser.h>
-#include <libxml/tree.h>
-#include <map>
-#include <set>
-#include <vector>
-#include <queue>
+#include <apertium/interchunk_word.h>
+#include <lttoolbox/input_file.h>
 
 using namespace std;
 
-class Interchunk
+class Interchunk : public TransferBase
 {
 private:
-
-  Alphabet alphabet;
-  MatchExe *me;
-  MatchState ms;
-  map<string, ApertiumRE, Ltstr> attr_items;
-  map<string, string, Ltstr> variables;
-  map<string, int, Ltstr> macros;
-  map<string, set<string, Ltstr>, Ltstr> lists;
-  map<string, set<string, Ltstr>, Ltstr> listslow;
-  vector<xmlNode *> macro_map;
-  vector<xmlNode *> rule_map;
-  vector<size_t> rule_lines;
-  xmlDoc *doc;
-  xmlNode *root_element;
   InterchunkWord **word;
-  queue <string> blank_queue;
-  int lword;
   int last_lword;
-  Buffer<TransferToken> input_buffer;
-  vector<wstring *> tmpword;
-  vector<wstring *> tmpblank;
-
-  FILE *output;
-  int any_char;
-  int any_tag;
 
-  xmlNode *lastrule;
-  unsigned int nwords;
-
-  map<xmlNode *, TransferInstr> evalStringCache;
   bool inword;
-  bool null_flush;
-  bool internal_null_flush;
-  bool trace;
-  bool in_out;
-
-  void destroy();
-  void readData(FILE *input);
-  void readInterchunk(string const &input);
-  void collectMacros(xmlNode *localroot);
-  void collectRules(xmlNode *localroot);
-  string caseOf(string const &str);
-  string copycase(string const &source_word, string const &target_word);
 
   void processLet(xmlNode *localroot);
-  void processAppend(xmlNode *localroot);
   void processOut(xmlNode *localroot);
   void processCallMacro(xmlNode *localroot);
   void processModifyCase(xmlNode *localroot);
-  bool processLogical(xmlNode *localroot);
-  bool processTest(xmlNode *localroot);
-  bool processAnd(xmlNode *localroot);
-  bool processOr(xmlNode *localroot);
-  bool processEqual(xmlNode *localroot);
-  bool processBeginsWith(xmlNode *localroot);
-  bool processBeginsWithList(xmlNode *localroot);
-  bool processEndsWith(xmlNode *localroot);
-  bool processEndsWithList(xmlNode *localroot);
-  bool processContainsSubstring(xmlNode *localroot);
-  bool processNot(xmlNode *localroot);
-  bool processIn(xmlNode *localroot);
-  void processRule(xmlNode *localroot);
-  string evalString(xmlNode *localroot);
-  void processInstruction(xmlNode *localroot);
-  void processChoose(xmlNode *localroot);
-  string processChunk(xmlNode *localroot);
+  UString processChunk(xmlNode *localroot);
+  void processClip(xmlNode* localroot);
+  void processBlank(xmlNode* localroot);
+  void processCaseOf(xmlNode* localroot);
+
+  void processLuCount(xmlNode* localroot);
+  UString processLu(xmlNode* localroot);
+  UString processMlu(xmlNode* localroot);
+
+  UString evalCachedString(xmlNode* element);
 
-  bool beginsWith(string const &str1, string const &str2) const;
-  bool endsWith(string const &str1, string const &str2) const;
-  string tolower(string const &str) const;
-  string tags(string const &str) const;
-  string readWord(FILE *in);
-  string readBlank(FILE *in);
-  string readUntil(FILE *in, int const symbol) const;
-  void applyWord(wstring const &word_str);
-  void applyRule();
-  TransferToken & readToken(FILE *in);
+  UString readWord(InputFile& in);
+  UString readBlank(InputFile& in);
+  UString readUntil(InputFile& in, int const symbol) const;
+  void applyWord(UString const &word_str);
+  int applyRule();
+  TransferToken & readToken(InputFile& in);
   bool checkIndex(xmlNode *element, int index, int limit);
-  void interchunk_wrapper_null_flush(FILE *in, FILE *out);
+  void interchunk_wrapper_null_flush(InputFile& in, UFILE* out);
 
 public:
   Interchunk();
-  ~Interchunk();
 
-  void read(string const &transferfile, string const &datafile);
-  void interchunk(FILE *in, FILE *out);
-  bool getNullFlush(void);
-  void setNullFlush(bool null_flush);
-  void setTrace(bool trace);
+  void interchunk(InputFile& in, UFILE* out);
 };
 
 #endif
diff --git a/apertium/interchunk_word.cc b/apertium/interchunk_word.cc
index 28df1c5..14a27b3 100644
--- a/apertium/interchunk_word.cc
+++ b/apertium/interchunk_word.cc
@@ -17,9 +17,7 @@
 
 #include <apertium/interchunk_word.h>
 #include <iostream>
-#include <apertium/string_utils.h>
-
-using namespace Apertium;
+#include <lttoolbox/string_utils.h>
 
 void
 InterchunkWord::copy(InterchunkWord const &o)
@@ -36,7 +34,7 @@ InterchunkWord::InterchunkWord()
 {
 }
 
-InterchunkWord::InterchunkWord(string const &chunk)
+InterchunkWord::InterchunkWord(UString const &chunk)
 {
   init(chunk);
 }
@@ -63,7 +61,7 @@ InterchunkWord::operator =(InterchunkWord const &o)
 }
 
 void
-InterchunkWord::init(string const &chunk)
+InterchunkWord::init(UString const &chunk)
 {
   size_t b_end = 0;
   for(size_t i = 0; i < chunk.size(); i++)
@@ -86,7 +84,7 @@ InterchunkWord::init(string const &chunk)
       }
     }
   }
-  
+
   if(b_end > 0)
   {
     this->wblank = chunk.substr(0, b_end);
@@ -96,19 +94,19 @@ InterchunkWord::init(string const &chunk)
   {
     this->chunk = chunk;
   }
-  this->queue = "";
+  this->queue.clear();
 }
 
-string
+UString
 InterchunkWord::chunkPart(ApertiumRE const &part)
 {
-  string result = part.match(chunk);
+  UString result = part.match(chunk);
   if(result.size() == 0)
   {
     result = part.match(queue);
     if(result.size() != queue.size())
     {
-      return "";
+      return ""_u;
     }
     else
     {
@@ -125,14 +123,14 @@ InterchunkWord::chunkPart(ApertiumRE const &part)
   }
 }
 
-string
+UString
 InterchunkWord::getWblank()
 {
   return wblank;
 }
 
 bool
-InterchunkWord::setChunkPart(ApertiumRE const &part, string const &value)
+InterchunkWord::setChunkPart(ApertiumRE const &part, UString const &value)
 {
   return part.replace(chunk, value);
 }
diff --git a/apertium/interchunk_word.h b/apertium/interchunk_word.h
index 670bcca..bd0c5ed 100644
--- a/apertium/interchunk_word.h
+++ b/apertium/interchunk_word.h
@@ -21,6 +21,7 @@
 #include <apertium/apertium_re.h>
 #include <map>
 #include <string>
+#include <lttoolbox/ustring.h>
 
 using namespace std;
 
@@ -33,17 +34,17 @@ private:
   /**
    * Target language chunk name and tags
    */
-  string chunk;
+  UString chunk;
 
   /**
    * Target language chunk content
    */
-  string queue;
+  UString queue;
   
   /**
    * Wordbound blank (for postchunk)
    */
-  string wblank;
+  UString wblank;
 
   /**
    * Copy method
@@ -76,7 +77,7 @@ public:
    * Parametric constructor calling init()
    * @param chunk the chunk
    */
-  InterchunkWord(string const &chunk);
+  InterchunkWord(UString const &chunk);
 
   /**
    * Assignment operator
@@ -89,20 +90,20 @@ public:
    * Sets a chunk
    * @param chunk the chunk
    */
-  void init(string const &chunk);
+  void init(UString const &chunk);
 
   /**
    * Reference a chunk part
    * @param part regular expression to match
    * @returns reference to the part of string matched
    */
-  string chunkPart(ApertiumRE const &part);
+  UString chunkPart(ApertiumRE const &part);
   
   /**
    * Reference the wordbound blank (for postchunk)
    * @returns reference to the wblank string
    */
-  string getWblank();
+  UString getWblank();
 
   /**
    * Sets a value for a chunk part
@@ -110,7 +111,7 @@ public:
    * @param value the new value for the given part
    * @returns whether part matched
    */
-  bool setChunkPart(ApertiumRE const &part, string const &value);
+  bool setChunkPart(ApertiumRE const &part, UString const &value);
 
 };
 
diff --git a/apertium/latex_accentsmap.cc b/apertium/latex_accentsmap.cc
index 4e8dd9a..b3d6c8f 100644
--- a/apertium/latex_accentsmap.cc
+++ b/apertium/latex_accentsmap.cc
@@ -42,68 +42,68 @@ void AccentsMap::init_acmap() {
 
 void AccentsMap::init_camap() {
 
-	map[L"à"] = L"`a"; // Grave accent
-	map[L"è"] = L"`e";
-	map[L"ì"] = L"`\\i";
-	map[L"ò"] = L"`o";
-	map[L"ù"] = L"`u";
-	map[L"ỳ"] = L"`y";
-	map[L"À"] = L"`A";
-	map[L"È"] = L"`E";
-	map[L"Ì"] = L"`I";
-	map[L"Ò"] = L"`O";
-	map[L"Ù"] = L"`U";
-	map[L"Ỳ"] = L"`Y";
-	map[L"á"] = L"'a"; // Acute accent
-	map[L"é"] = L"'e";
-	map[L"í"] = L"'\\i";
-	map[L"ó"] = L"'o";
-	map[L"ú"] = L"'u";
-	map[L"ý"] = L"'y";
-	map[L"Á"] = L"'A";
-	map[L"É"] = L"'E";
-	map[L"Í"] = L"'I";
-	map[L"Ó"] = L"'O";
-	map[L"Ú"] = L"'U";
-	map[L"Ý"] = L"'Y";
-	map[L"â"] = L"^a"; // Circumflex
-	map[L"ê"] = L"^e";
-	map[L"î"] = L"^\\i";
-	map[L"ô"] = L"^o";
-	map[L"û"] = L"^u";
-	map[L"ŷ"] = L"^y";
-	map[L"Â"] = L"^A";
-	map[L"Ê"] = L"^E";
-	map[L"Î"] = L"^I";
-	map[L"Ô"] = L"^O";
-	map[L"Û"] = L"^U";
-	map[L"Ŷ"] = L"^Y";
-	map[L"ä"] = L"\"a";    // Umlaut or dieresis
-	map[L"ë"] = L"\"e";
-	map[L"ï"] = L"\"\\i";
-	map[L"ö"] = L"\"o";
-	map[L"ü"] = L"\"u";
-	map[L"ÿ"] = L"\"y";
-	map[L"Ä"] = L"\"A";
-	map[L"Ë"] = L"\"E";
-	map[L"Ï"] = L"\"I";
-	map[L"Ö"] = L"\"O";
-	map[L"Ü"] = L"\"U";
-	map[L"Ÿ"] = L"\"Y";
-
-	map[L"ñ"] = L"~n";
-	map[L"Ñ"] = L"~N";
-
-	map[L"ç"] = L"cc";   // Cedilla
-	map[L"Ç"] = L"cC";
+	map["à"_u] = "`a"_u; // Grave accent
+	map["è"_u] = "`e"_u;
+	map["ì"_u] = "`\\i"_u;
+	map["ò"_u] = "`o"_u;
+	map["ù"_u] = "`u"_u;
+	map["ỳ"_u] = "`y"_u;
+	map["À"_u] = "`A"_u;
+	map["È"_u] = "`E"_u;
+	map["Ì"_u] = "`I"_u;
+	map["Ò"_u] = "`O"_u;
+	map["Ù"_u] = "`U"_u;
+	map["Ỳ"_u] = "`Y"_u;
+	map["á"_u] = "'a"_u; // Acute accent
+	map["é"_u] = "'e"_u;
+	map["í"_u] = "'\\i"_u;
+	map["ó"_u] = "'o"_u;
+	map["ú"_u] = "'u"_u;
+	map["ý"_u] = "'y"_u;
+	map["Á"_u] = "'A"_u;
+	map["É"_u] = "'E"_u;
+	map["Í"_u] = "'I"_u;
+	map["Ó"_u] = "'O"_u;
+	map["Ú"_u] = "'U"_u;
+	map["Ý"_u] = "'Y"_u;
+	map["â"_u] = "^a"_u; // Circumflex
+	map["ê"_u] = "^e"_u;
+	map["î"_u] = "^\\i"_u;
+	map["ô"_u] = "^o"_u;
+	map["û"_u] = "^u"_u;
+	map["ŷ"_u] = "^y"_u;
+	map["Â"_u] = "^A"_u;
+	map["Ê"_u] = "^E"_u;
+	map["Î"_u] = "^I"_u;
+	map["Ô"_u] = "^O"_u;
+	map["Û"_u] = "^U"_u;
+	map["Ŷ"_u] = "^Y"_u;
+	map["ä"_u] = "\"a"_u;    // Umlaut or dieresis
+	map["ë"_u] = "\"e"_u;
+	map["ï"_u] = "\"\\i"_u;
+	map["ö"_u] = "\"o"_u;
+	map["ü"_u] = "\"u"_u;
+	map["ÿ"_u] = "\"y"_u;
+	map["Ä"_u] = "\"A"_u;
+	map["Ë"_u] = "\"E"_u;
+	map["Ï"_u] = "\"I"_u;
+	map["Ö"_u] = "\"O"_u;
+	map["Ü"_u] = "\"U"_u;
+	map["Ÿ"_u] = "\"Y"_u;
+
+	map["ñ"_u] = "~n"_u;
+	map["Ñ"_u] = "~N"_u;
+
+	map["ç"_u] = "cc"_u;   // Cedilla
+	map["Ç"_u] = "cC"_u;
 
 
 }
 
-wstring AccentsMap::get(wstring input){
+UString AccentsMap::get(UString input){
 	it = map.find(input);
 	if(it == map.end())
-		return L"";
+		return ""_u;
 	else
 		return (*it).second;
 }
@@ -112,101 +112,104 @@ wstring AccentsMap::get(wstring input){
 void AccentsMap::init_locale(){
 	char *locale = setlocale(LC_ALL, "");
 	std::locale lollocale(locale);
-	wcout.imbue(lollocale);
+	cout.imbue(lollocale);
 }
 
 
 
 /*latexAccents = [
-	map[L"à"] = L"\\`a"; # Grave accent
-	map[L"è"] = L"\\`e";
-	map[L"ì"] = L"\\`\\i";
-	map[L"ò"] = L"\\`o";
-	map[L"ù"] = L"\\`u";
-	map[L"ỳ"] = L"\\`y";
-	map[L"À"] = L"\\`A";
-	map[L"È"] = L"\\`E";
-	map[L"Ì"] = L"\\`\\I";
-	map[L"Ò"] = L"\\`O";
-	map[L"Ù"] = L"\\`U";
-	map[L"Ỳ"] = L"\\`Y";
-	map[L"á"] = L"\\'a"; # Acute accent
-	map[L"é"] = L"\\'e";
-	map[L"í"] = L"\\'\\i";
-	map[L"ó"] = L"\\'o";
-	map[L"ú"] = L"\\'u";
-	map[L"ý"] = L"\\'y";
-	map[L"Á"] = L"\\'A";
-	map[L"É"] = L"\\'E";
-	map[L"Í"] = L"\\'\\I";
-	map[L"Ó"] = L"\\'O";
-	map[L"Ú"] = L"\\'U";
-	map[L"Ý"] = L"\\'Y";
-	map[L"â"] = L"\\^a"; # Circumflex
-	map[L"ê"] = L"\\^e";
-	map[L"î"] = L"\\^\\i";
-	map[L"ô"] = L"\\^o";
-	map[L"û"] = L"\\^u";
-	map[L"ŷ"] = L"\\^y";
-	map[L"Â"] = L"\\^A";
-	map[L"Ê"] = L"\\^E";
-	map[L"Î"] = L"\\^\\I";
-	map[L"Ô"] = L"\\^O";
-	map[L"Û"] = L"\\^U";
-	map[L"Ŷ"] = L"\\^Y";
-	map[L"ä"] = L"\\\"a";    # Umlaut or dieresis
-	map[L"ë"] = L"\\\"e";
-	map[L"ï"] = L"\\\"\\i";
-	map[L"ö"] = L"\\\"o";
-	map[L"ü"] = L"\\\"u";
-	map[L"ÿ"] = L"\\\"y";
-	map[L"Ä"] = L"\\\"A";
-	map[L"Ë"] = L"\\\"E";
-	map[L"Ï"] = L"\\\"\\I";
-	map[L"Ö"] = L"\\\"O";
-	map[L"Ü"] = L"\\\"U";
-	map[L"Ÿ"] = L"\\\"Y";
-	map[L"ç"] = L"\\c{c}";   # Cedilla
-	map[L"Ç"] = L"\\c{C}";
-	map[L"œ"] = L"{\\oe}";   # Ligatures
-	map[L"Œ"] = L"{\\OE}";
-	map[L"æ"] = L"{\\ae}";
-	map[L"Æ"] = L"{\\AE}";
-	map[L"å"] = L"{\\aa}";
-	map[L"Å"] = L"{\\AA}";
-	map[L"–"] = L"--";   # Dashes
-	map[L"—"] = L"---";
-	map[L"ø"] = L"{\\o}";    # Misc latin-1 letters
-	map[L"Ø"] = L"{\\O}";
-	map[L"ß"] = L"{\\ss}";
-	map[L"¡"] = L"{!`}";
-	map[L"¿"] = L"{?`}";
-	map[L"\\"] = L"\\\\";    # Characters that should be quoted
-	map[L"~"] = L"\\~";
-	map[L"&"] = L"\\&";
-	map[L"$"] = L"\\$";
-	map[L"{"] = L"\\{";
-	map[L"}"] = L"\\}";
-	map[L"%"] = L"\\%";
-	map[L"#"] = L"\\#";
-	map[L"_"] = L"\\_";
-	map[L"≥"] = L"$\\ge$";   # Math operators
-	map[L"≤"] = L"$\\le$";
-	map[L"≠"] = L"$\\neq$";
-	map[L"©"] = L"\copyright"; # Misc
-	map[L"ı"] = L"{\\i}";
-	map[L"µ"] = L"$\\mu$";
-	map[L"°"] = L"$\\deg$";
-	map[L"‘"] = L"`";    #Quotes
-	map[L"’"] = L"'";
-	map[L"“"] = L"``";
-	map[L"”"] = L"''";
-	map[L"‚"] = L",";
-	map[L"„"] = L",,";
+	map["à"_u] = "\\`a"_u; # Grave accent
+	map["è"_u] = "\\`e"_u;
+	map["ì"_u] = "\\`\\i"_u;
+	map["ò"_u] = "\\`o"_u;
+	map["ù"_u] = "\\`u"_u;
+	map["ỳ"_u] = "\\`y"_u;
+	map["À"_u] = "\\`A"_u;
+	map["È"_u] = "\\`E"_u;
+	map["Ì"_u] = "\\`\\I"_u;
+	map["Ò"_u] = "\\`O"_u;
+	map["Ù"_u] = "\\`U"_u;
+	map["Ỳ"_u] = "\\`Y"_u;
+	map["á"_u] = "\\'a"_u; # Acute accent
+	map["é"_u] = "\\'e"_u;
+	map["í"_u] = "\\'\\i"_u;
+	map["ó"_u] = "\\'o"_u;
+	map["ú"_u] = "\\'u"_u;
+	map["ý"_u] = "\\'y"_u;
+	map["Á"_u] = "\\'A"_u;
+	map["É"_u] = "\\'E"_u;
+	map["Í"_u] = "\\'\\I"_u;
+	map["Ó"_u] = "\\'O"_u;
+	map["Ú"_u] = "\\'U"_u;
+	map["Ý"_u] = "\\'Y"_u;
+	map["â"_u] = "\\^a"_u; # Circumflex
+	map["ê"_u] = "\\^e"_u;
+	map["î"_u] = "\\^\\i"_u;
+	map["ô"_u] = "\\^o"_u;
+	map["û"_u] = "\\^u"_u;
+	map["ŷ"_u] = "\\^y"_u;
+	map["Â"_u] = "\\^A"_u;
+	map["Ê"_u] = "\\^E"_u;
+	map["Î"_u] = "\\^\\I"_u;
+	map["Ô"_u] = "\\^O"_u;
+	map["Û"_u] = "\\^U"_u;
+	map["Ŷ"_u] = "\\^Y"_u;
+	map["ä"_u] = "\\\"a"_u;    # Umlaut or dieresis
+	map["ë"_u] = "\\\"e"_u;
+	map["ï"_u] = "\\\"\\i"_u;
+	map["ö"_u] = "\\\"o"_u;
+	map["ü"_u] = "\\\"u"_u;
+	map["ÿ"_u] = "\\\"y"_u;
+	map["Ä"_u] = "\\\"A"_u;
+	map["Ë"_u] = "\\\"E"_u;
+	map["Ï"_u] = "\\\"\\I"_u;
+	map["Ö"_u] = "\\\"O"_u;
+	map["Ü"_u] = "\\\"U"_u;
+	map["Ÿ"_u] = "\\\"Y"_u;
+	map["ç"_u] = "\\c{c}"_u;   # Cedilla
+	map["Ç"_u] = "\\c{C}"_u;
+	map["œ"_u] = "{\\oe}"_u;   # Ligatures
+	map["Œ"_u] = "{\\OE}"_u;
+	map["æ"_u] = "{\\ae}"_u;
+	map["Æ"_u] = "{\\AE}"_u;
+	map["å"_u] = "{\\aa}"_u;
+	map["Å"_u] = "{\\AA}"_u;
+	map["–"_u] = "--"_u;   # Dashes
+	map["—"_u] = "---"_u;
+	map["ø"_u] = "{\\o}"_u;    # Misc latin-1 letters
+	map["Ø"_u] = "{\\O}"_u;
+	map["ß"_u] = "{\\ss}"_u;
+	map["¡"_u] = "{!`}"_u;
+	map["¿"_u] = "{?`}"_u;
+	map["\\"_u] = "\\\\"_u;    # Characters that should be quoted
+	map["~"_u] = "\\~"_u;
+	map["&"_u] = "\\&"_u;
+	map["$"_u] = "\\$"_u;
+	map["{"_u] = "\\{"_u;
+	map["}"_u] = "\\}"_u;
+	map["%"_u] = "\\%"_u;
+	map["#"_u] = "\\#"_u;
+	map["_"_u] = "\\_"_u;
+	map["≥"_u] = "$\\ge$"_u;   # Math operators
+	map["≤"_u] = "$\\le$"_u;
+	map["≠"_u] = "$\\neq$"_u;
+	map["©"_u] = "\copyright"_u; # Misc
+	map["ı"_u] = "{\\i}"_u;
+	map["µ"_u] = "$\\mu$"_u;
+	map["°"_u] = "$\\deg$"_u;
+	map["‘"_u] = "`"_u;    #Quotes
+	map["’"_u] = "'"_u;
+	map["“"_u] = "``"_u;
+	map["”"_u] = "''"_u;
+	map["‚"_u] = ","_u;
+	map["„"_u] = ",,"_u;
 ]*/
 
 
-
-
-
-
+void fputus(const UString& s, FILE* out)
+{
+  string temp;
+  temp.reserve(s.size()*2);
+  utf8::utf16to8(s.begin(), s.end(), std::back_inserter(temp));
+  fputs(temp.c_str(), out);
+}
diff --git a/apertium/latex_accentsmap.h b/apertium/latex_accentsmap.h
index e93b6de..89ccafd 100644
--- a/apertium/latex_accentsmap.h
+++ b/apertium/latex_accentsmap.h
@@ -17,25 +17,15 @@
 
 #include <map>
 #include <iostream>
-#include <cwchar>
 #include <string>
 #include <cstring>
 #include <locale>
-#include <lttoolbox/ltstr.h>
+#include <lttoolbox/ustring.h>
 
 using namespace std;
 
-/*struct Ltstr // Already in lttoolbox/ltstr.h
-{
-  bool operator()(wstring const &s1, wstring const &s2) const
-  {
-    return wcscmp(s1.c_str(), s2.c_str()) < 0;
-  }
-};
-*/
-
 class AccentsMap {
-	typedef std::map<wstring, wstring, Ltstr> acmap;
+	typedef std::map<UString, UString> acmap;
 	private:
 		acmap           map; // Accent to character
 		acmap::iterator it;  // Iterator for searching
@@ -50,6 +40,7 @@ class AccentsMap {
 		void init_locale();
 
 		// The getter for both directions depending on init.
-		wstring get(wstring input);
+		UString get(UString input);
 };
 
+void fputus(const UString& s, FILE* out);
diff --git a/apertium/lemma.cc b/apertium/lemma.cc
index 9bb55f8..629870c 100644
--- a/apertium/lemma.cc
+++ b/apertium/lemma.cc
@@ -37,7 +37,7 @@ Lemma::Lemma(const Analysis &Analysis_) : TheLemma() {
   if (Analysis_.TheMorphemes.front().TheLemma.empty())
     throw Exception::Morpheme::TheLemma_empty(
         "can't convert const Analysis & comprising Morpheme comprising empty "
-        "Lemma std::wstring to Lemma");
+        "Lemma UString to Lemma");
 
   TheLemma = Analysis_.TheMorphemes.front().TheLemma;
 }
@@ -46,7 +46,7 @@ Lemma::Lemma(const Morpheme &Morpheme_) : TheLemma() {
   if (Morpheme_.TheLemma.empty())
     throw Exception::Morpheme::TheLemma_empty("can't convert const Morpheme & "
                                               "comprising empty Lemma "
-                                              "std::wstring to Lemma");
+                                              "UString to Lemma");
 
   TheLemma = Morpheme_.TheLemma;
 }
diff --git a/apertium/lemma.h b/apertium/lemma.h
index 9655633..e056d74 100644
--- a/apertium/lemma.h
+++ b/apertium/lemma.h
@@ -29,7 +29,7 @@ public:
   Lemma();
   Lemma(const Analysis &Analysis_);
   Lemma(const Morpheme &Morpheme_);
-  std::wstring TheLemma;
+  UString TheLemma;
 };
 }
 
diff --git a/apertium/lexical_unit.h b/apertium/lexical_unit.h
index 8b7bf04..7599caa 100644
--- a/apertium/lexical_unit.h
+++ b/apertium/lexical_unit.h
@@ -24,7 +24,7 @@
 namespace Apertium {
 class LexicalUnit {
 public:
-  std::wstring TheSurfaceForm;
+  UString TheSurfaceForm;
   std::vector<Analysis> TheAnalyses;
 };
 }
diff --git a/apertium/lswpost.cc b/apertium/lswpost.cc
index c84870a..d93c9bd 100644
--- a/apertium/lswpost.cc
+++ b/apertium/lswpost.cc
@@ -40,7 +40,7 @@
 #include <unistd.h>
 #include <vector>
 #include <algorithm>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include <cstdlib>
 
 using namespace std;
@@ -53,10 +53,10 @@ TaggerData& LSWPoST::get_tagger_data() {
 
 void LSWPoST::deserialise(FILE *Serialised_FILE_Tagger) {
   tdlsw.read(Serialised_FILE_Tagger);
-  eos = (tdlsw.getTagIndex())[L"TAG_SENT"];
+  eos = (tdlsw.getTagIndex())["TAG_SENT"_u];
 }
 
-std::vector<std::wstring> &LSWPoST::getArrayTags() {
+std::vector<UString> &LSWPoST::getArrayTags() {
   return tdlsw.getArrayTags();
 }
 
@@ -64,7 +64,7 @@ void LSWPoST::serialise(FILE *Stream_) { tdlsw.write(Stream_); }
 
 void LSWPoST::deserialise(const TaggerData &Deserialised_FILE_Tagger) {
   tdlsw = TaggerDataLSW(Deserialised_FILE_Tagger);
-  eos = (tdlsw.getTagIndex())[L"TAG_SENT"];
+  eos = (tdlsw.getTagIndex())["TAG_SENT"_u];
 }
 
 void LSWPoST::init_probabilities_from_tagged_text_(MorphoStream &, MorphoStream &) {
@@ -88,7 +88,7 @@ LSWPoST::LSWPoST(TaggerFlags& Flags_) : FILE_Tagger(Flags_) {}
 
 LSWPoST::LSWPoST(TaggerDataLSW t) {
   tdlsw = t;
-  eos = (tdlsw.getTagIndex())[L"TAG_SENT"];
+  eos = (tdlsw.getTagIndex())["TAG_SENT"_u];
 }
 
 LSWPoST::~LSWPoST() {}
@@ -112,7 +112,7 @@ LSWPoST::init_probabilities(MorphoStream &morpho_stream) {
   int num_valid_seq = 0;
 
   word = new TaggerWord();          // word for tags left
-  word->add_tag(eos, L"sent", tdlsw.getPreferRules());
+  word->add_tag(eos, "sent"_u, tdlsw.getPreferRules());
   tags_left = word->get_tags();     // tags left
   if (tags_left.size()==0) { //This is an unknown word
     tags_left = tdlsw.getOpenClass();
@@ -138,7 +138,7 @@ LSWPoST::init_probabilities(MorphoStream &morpho_stream) {
   // count each element of the para matrix
   while (word != NULL) {
     if (++nw % 10000 == 0) {
-      wcerr << L'.' << flush;
+      cerr << '.' << flush;
     }
 
     tags_right = word->get_tags();       // tags right
@@ -184,7 +184,7 @@ LSWPoST::init_probabilities(MorphoStream &morpho_stream) {
     }
   }
 
-  wcerr << L"\n";
+  cerr << "\n";
 }
 
 bool LSWPoST::is_valid_seq(TTag left, TTag mid, TTag right) {
@@ -232,7 +232,7 @@ void
 LSWPoST::post_ambg_class_scan() {
   int N = (tdlsw.getTagIndex()).size();
   int M = (tdlsw.getOutput()).size();
-  wcerr << N << L" states and " << M <<L" ambiguity classes\n";
+  cerr << N << " states and " << M <<" ambiguity classes\n";
 
   // set up the probability matrix of tdlsw, the pointer to the TaggerDataLSW object
   tdlsw.setProbabilities(N);
@@ -249,7 +249,7 @@ LSWPoST::train(MorphoStream &morpho_stream) {
   vector<vector<vector<double> > > para_matrix_new(N, vector<vector<double> >(N, vector<double>(N, 0)));
 
   word = new TaggerWord();          // word for tags left
-  word->add_tag(eos, L"sent", tdlsw.getPreferRules());
+  word->add_tag(eos, "sent"_u, tdlsw.getPreferRules());
   tags_left = word->get_tags();     // tags left
   if (tags_left.size()==0) { //This is an unknown word
     tags_left = tdlsw.getOpenClass();
@@ -273,7 +273,7 @@ LSWPoST::train(MorphoStream &morpho_stream) {
 
   while (word) {
     if (++nw % 10000 == 0) {
-      wcerr << L'.' << flush;
+      cerr << '.' << flush;
     }
 
     tags_right = word->get_tags();       // tags right
@@ -320,11 +320,11 @@ LSWPoST::train(MorphoStream &morpho_stream) {
 
 void
 LSWPoST::print_para_matrix() {
-  wcout << L"para matrix D\n----------------------------\n";
+  cout << "para matrix D\n----------------------------\n";
   for (int i = 0; i < tdlsw.getN(); ++i) {
     for (int j = 0; j < tdlsw.getN(); ++j) {
       for (int k = 0; k < tdlsw.getN(); ++k) {
-        wcout << L"D[" << i << L"][" << j << L"][" << k << L"] = "
+        cout << "D[" << i << "][" << j << "][" << k << "] = "
             << tdlsw.getD()[i][j][k] << "\n";
       }
     }
@@ -332,14 +332,14 @@ LSWPoST::print_para_matrix() {
 }
 
 void
-LSWPoST::tagger(MorphoStream &morpho_stream, FILE *Output) {
+LSWPoST::tagger(MorphoStream &morpho_stream, UFILE* Output) {
   TaggerWord *word_left = NULL, *word_mid = NULL, *word_right = NULL;
   set<TTag> tags_left, tags_mid, tags_right;
   set<TTag>::iterator iter_left, iter_mid, iter_right;
   morpho_stream.setNullFlush(TheFlags.getNullFlush());
 
   word_left = new TaggerWord();          // word left
-  word_left->add_tag(eos, L"sent", tdlsw.getPreferRules());
+  word_left->add_tag(eos, "sent"_u, tdlsw.getPreferRules());
   word_left->set_show_sf(TheFlags.getShowSuperficial());
   tags_left = word_left->get_tags();          // tags left
 
@@ -357,7 +357,7 @@ LSWPoST::tagger(MorphoStream &morpho_stream, FILE *Output) {
   word_right = morpho_stream.get_next_word(); // word_right
   word_right->set_show_sf(TheFlags.getShowSuperficial());
 
-  wstring micad;
+  UString micad;
 
   while (word_right) {
     tags_right = word_right->get_tags();
@@ -380,13 +380,13 @@ LSWPoST::tagger(MorphoStream &morpho_stream, FILE *Output) {
       }
     }
 
-    micad = word_mid->get_lexical_form(tag_max, (tdlsw.getTagIndex())[L"TAG_kEOF"]);
-    fputws_unlocked(micad.c_str(), Output);
+    micad = word_mid->get_lexical_form(tag_max, (tdlsw.getTagIndex())["TAG_kEOF"_u]);
+    write(micad, Output);
     if (morpho_stream.getEndOfFile()) {
       if (TheFlags.getNullFlush()) {
-        fputwc_unlocked(L'\0', Output);
+        u_fputc('\0', Output);
       }
-      fflush(Output);
+      u_fflush(Output);
       morpho_stream.setEndOfFile(false);
     }
 
diff --git a/apertium/lswpost.h b/apertium/lswpost.h
index 65d485e..7065cb1 100644
--- a/apertium/lswpost.h
+++ b/apertium/lswpost.h
@@ -57,7 +57,7 @@ protected:
 public:
   TaggerData& get_tagger_data();
   void deserialise(FILE *Serialised_FILE_Tagger);
-  std::vector<std::wstring> &getArrayTags();
+  std::vector<UString> &getArrayTags();
   void serialise(FILE *Stream_);
   void deserialise(const TaggerData &Deserialised_FILE_Tagger);
   void init_probabilities_from_tagged_text_(MorphoStream &, MorphoStream &);
@@ -102,6 +102,6 @@ public:
 
    /** Do the tagging
     */
-   void tagger(MorphoStream &morpho_stream, FILE *Output);
+   void tagger(MorphoStream &morpho_stream, UFILE *Output);
 };
 #endif
diff --git a/apertium/morpheme.cc b/apertium/morpheme.cc
index c86c397..2de0d8b 100644
--- a/apertium/morpheme.cc
+++ b/apertium/morpheme.cc
@@ -29,35 +29,34 @@ bool operator<(const Morpheme &a, const Morpheme &b) {
   return a.TheTags < b.TheTags;
 }
 
-std::wostream& operator<<(std::wostream& out, const Morpheme &morph) {
-  out << morph.TheLemma;
-  const std::vector<Tag> &tags = morph.TheTags;
-  std::vector<Tag>::const_iterator it = tags.begin();
-  for (; it != tags.end(); it++) {
-    out << L"<" << it->TheTag << L">";
+std::ostream& operator<<(std::ostream& out, const Morpheme &morph) {
+  ::operator<<(out, morph.TheLemma);
+  for (auto& it : morph.TheTags) {
+    out << '<';
+    ::operator<<(out, it.TheTag);
+    out << '>';
   }
+  // namespace issue
   return out;
 }
 
-Morpheme::operator std::wstring() const {
+Morpheme::operator UString() const {
   if (TheTags.empty())
     throw Exception::Morpheme::TheTags_empty("can't convert Morpheme "
                                              "comprising empty Tag std::vector "
-                                             "to std::wstring");
+                                             "to UString");
 
   if (TheLemma.empty())
     throw Exception::Morpheme::TheLemma_empty("can't convert Morpheme "
                                               "comprising empty TheLemma "
-                                              "std::wstring to std::wstring");
+                                              "UString to UString");
 
-  std::wstring wstring_ = TheLemma;
+  UString ustring_ = TheLemma;
 
-  for (std::vector<Tag>::const_iterator Tag_ = TheTags.begin();
-       // Call .end() each iteration to save memory.
-       Tag_ != TheTags.end(); ++Tag_) {
-    wstring_ += static_cast<std::wstring>(*Tag_);
+  for (auto& Tag_ : TheTags) {
+    ustring_ += static_cast<UString>(Tag_);
   }
 
-  return wstring_;
+  return ustring_;
 }
 }
diff --git a/apertium/morpheme.h b/apertium/morpheme.h
index 88fdece..eb2c3d2 100644
--- a/apertium/morpheme.h
+++ b/apertium/morpheme.h
@@ -27,9 +27,9 @@ class Morpheme {
 public:
   friend bool operator==(const Morpheme &a, const Morpheme &b);
   friend bool operator<(const Morpheme &a, const Morpheme &b);
-  friend std::wostream& operator<<(std::wostream& out, const Morpheme &morph);
-  operator std::wstring() const;
-  std::wstring TheLemma;
+  friend std::ostream& operator<<(std::ostream& out, const Morpheme &morph);
+  operator UString() const;
+  UString TheLemma;
   std::vector<Tag> TheTags;
 };
 }
diff --git a/apertium/mtx_reader.cc b/apertium/mtx_reader.cc
index 076ac2f..ea7e597 100644
--- a/apertium/mtx_reader.cc
+++ b/apertium/mtx_reader.cc
@@ -17,7 +17,7 @@
 #include <apertium/mtx_reader.h>
 #include <lttoolbox/xml_parse_util.h>
 #include <lttoolbox/compression.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include <apertium/utils.h>
 #include <apertium/tsx_reader.h>
 #include <apertium/perceptron_spec.h>
@@ -39,7 +39,7 @@ MTXReader::MTXReader(VM &spec) :
 size_t MTXReader::pushSetConst(std::string &val)
 {
   size_t set_idx = spec.set_consts.size();
-  stringstream val_ss(val);
+  std::stringstream val_ss(val);
   spec.set_consts.push_back(set<std::string>(
     istream_iterator<std::string>(val_ss),
     istream_iterator<std::string>()
@@ -83,7 +83,7 @@ void MTXReader::emitUInt(int val)
 
 void MTXReader::procCoarseTags()
 {
-  std::string tsx_fn = attrib("tag");
+  std::string tsx_fn((const char*) xmlTextReaderGetAttribute(reader, (const xmlChar*) "tag"));
   bool is_abs = ((tsx_fn.size() >= 1 && tsx_fn[0] == '/') ||
                  (tsx_fn.size() >= 2 && tsx_fn[1] == ':'));
   if (!is_abs) {
@@ -96,38 +96,38 @@ void MTXReader::procCoarseTags()
   tsx_reader.read(tsx_fn);
   spec.coarse_tags = Optional<TaggerDataPercepCoarseTags>(
       tsx_reader.getTaggerData());
-  stepPastSelfClosingTag(L"coarse-tags");
+  stepPastSelfClosingTag("coarse-tags"_u);
 }
 
 void MTXReader::procSetDef()
 {
-  std::wstring name = attrib(L"name");
+  std::string name = attrib_str("name"_u);
   stepToNextTag();
   size_t set_idx = spec.set_consts.size();
   spec.set_consts.push_back(VMSet());
   VMSet &vm_set = spec.set_consts.back();
   while (type != XML_READER_TYPE_END_ELEMENT) {
-    if (name == L"set-member") {
-      std::string tag = attrib("tag");
-      std::string str = attrib("str");
-      vm_set.insert(tag != "" ? tag : str);
+    if (name == "set-member") {
+      std::string tag = attrib_str("tag"_u);
+      std::string str = attrib_str("str"_u);
+      vm_set.insert(tag.empty() ? str : tag);
     } else {
-      parseError(L"Expected set-member");
+      parseError("Expected set-member"_u);
     }
     stepToNextTag();
   }
   set_names[name] = set_idx;
-  assert(name == L"def-set");
+  assert(name == "def-set");
   stepToNextTag();
 }
 
 void MTXReader::procStrDef()
 {
-  std::wstring name = attrib(L"name");
-  std::string tag = attrib("tag");
-  std::string str = attrib("str");
-  str_names[name] = pushStrConst(tag != "" ? tag : str);
-  stepPastSelfClosingTag(L"def-str");
+  std::string name = attrib_str("name"_u);
+  std::string tag = attrib_str("tag"_u);
+  std::string str = attrib_str("str"_u);
+  str_names[name] = pushStrConst(tag.empty() ? str : tag);
+  stepPastSelfClosingTag("def-str"_u);
 }
 
 void
@@ -135,19 +135,19 @@ MTXReader::procDefns()
 {
   stepToNextTag();
   while (type != XML_READER_TYPE_END_ELEMENT) {
-    if (name == L"def-set") {
+    if (name == "def-set"_u) {
       procSetDef();
-    } else if (name == L"def-str") {
+    } else if (name == "def-str"_u) {
       procStrDef();
-    } else if (name == L"def-macro") {
+    } else if (name == "def-macro"_u) {
       procDefMacro();
-    } else if (name == L"#text" || name == L"#comment") {
+    } else if (name == "#text"_u || name == "#comment"_u) {
       // skip
     } else {
       unexpectedTag();
     }
   }
-  assert(name == L"defns");
+  assert(name == "defns"_u);
   stepToNextTag();
 }
 
@@ -157,7 +157,7 @@ MTXReader::procGlobalPred()
   cur_feat = &spec.global_pred;
   stepToNextTag();
   procBoolExpr();
-  assert(name == L"global-pred" && type == XML_READER_TYPE_END_ELEMENT);
+  assert(name == "global-pred"_u && type == XML_READER_TYPE_END_ELEMENT);
   stepToNextTag();
 }
 
@@ -202,50 +202,50 @@ MTXReader::procIntExpr(bool allow_fail)
   /* Self-closing tags */
   if (!tryProcArg(INTEXPR, true)
       && !tryProcVar(VM::INTVAL)) {
-    if (name == L"sentlen") {
+    if (name == "sentlen"_u) {
       emitOpcode(VM::SENTLENTOK);
-      stepPastSelfClosingTag(L"sentlen");
-    } else if (name == L"pathlen") {
+      stepPastSelfClosingTag("sentlen"_u);
+    } else if (name == "pathlen"_u) {
       emitOpcode(VM::SENTLENWRD);
-      stepPastSelfClosingTag(L"pathlen");
-    } else if (name == L"tokaddr") {
+      stepPastSelfClosingTag("pathlen"_u);
+    } else if (name == "tokaddr"_u) {
       emitOpcode(VM::PUSHTOKADDR);
-      stepPastSelfClosingTag(L"tokaddr");
-    } else if (name == L"wrdidx") {
+      stepPastSelfClosingTag("tokaddr"_u);
+    } else if (name == "wrdidx"_u) {
       emitOpcode(VM::PUSHWRDADDR);
-      stepPastSelfClosingTag(L"wrdidx");
-    } else if (name == L"int") {
+      stepPastSelfClosingTag("wrdidx"_u);
+    } else if (name == "int"_u) {
       emitOpcode(VM::PUSHINT);
       getAndEmitInt();
-      stepPastSelfClosingTag(L"int");
+      stepPastSelfClosingTag("int"_u);
     /* Other tags */
-    } else if (name == L"add") {
+    } else if (name == "add"_u) {
       stepToNextTag();
       procIntExpr();
       procIntExpr();
-      assert(name == L"add" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "add"_u && type == XML_READER_TYPE_END_ELEMENT);
       emitOpcode(VM::ADD);
       stepToNextTag();
-    } else if (name == L"toklen") {
+    } else if (name == "toklen"_u) {
       procIntExpr();
-      assert(name == L"toklen" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "toklen"_u && type == XML_READER_TYPE_END_ELEMENT);
       emitOpcode(VM::TOKLENWRD);
       stepToNextTag();
-    } else if (name == L"strlen") {
+    } else if (name == "strlen"_u) {
       procStrExpr();
-      assert(name == L"strlen" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "strlen"_u && type == XML_READER_TYPE_END_ELEMENT);
       emitOpcode(VM::STRLEN);
       stepToNextTag();
-    } else if (name == L"arrlen") {
+    } else if (name == "arrlen"_u) {
       procStrArrExpr();
-      assert(name == L"arrlen" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "arrlen"_u && type == XML_READER_TYPE_END_ELEMENT);
       procBinCompareOp(VM::ARRLEN);
       stepToNextTag();
     } else {
       if (allow_fail) {
         return false;
       }
-      parseError(L"Expected an integer expression.");
+      parseError("Expected an integer expression."_u);
     }
   }
   return true;
@@ -258,22 +258,22 @@ MTXReader::procStrArrExpr(bool allow_fail)
   if (!tryProcArg(STRARREXPR, true)
       && !tryProcVar(VM::STRARRVAL)
       && !tryProcSlice(&MTXReader::procStrArrExpr)) {
-    if (name == L"ex-tags") {
+    if (name == "ex-tags"_u) {
       stepToNextTag();
       procWordoidExpr();
       assert(type == XML_READER_TYPE_END_ELEMENT);
       emitOpcode(VM::EXTAGS);
-    } else if (name == L"ex-ambgset") {
+    } else if (name == "ex-ambgset"_u) {
       stepToNextTag();
       procIntExpr();
       emitOpcode(VM::EXAMBGSET);
-    } else if (name == L"for-each") {
+    } else if (name == "for-each"_u) {
       procForEach(STREXPR);
     } else {
       if (allow_fail) {
         return false;
       }
-      parseError(L"Expected a string list expression.");
+      parseError("Expected a string list expression."_u);
     }
     stepToNextTag();
   }
@@ -282,13 +282,13 @@ MTXReader::procStrArrExpr(bool allow_fail)
 
 bool MTXReader::tryProcSubscript(bool (MTXReader::*proc_inner)(bool))
 {
-  if (name == L"subscript") {
-    int idx = getInt("idx");
+  if (name == "subscript"_u) {
+    int idx = getInt("idx"_u);
     stepToNextTag();
     (this->*proc_inner)(false);
     emitOpcode(VM::SUBSCRIPT);
     emitUInt(idx);
-    assert(name == L"subscript" && type == XML_READER_TYPE_END_ELEMENT);
+    assert(name == "subscript"_u && type == XML_READER_TYPE_END_ELEMENT);
     stepToNextTag();
     return true;
   }
@@ -297,24 +297,24 @@ bool MTXReader::tryProcSubscript(bool (MTXReader::*proc_inner)(bool))
 
 bool MTXReader::tryProcSlice(bool (MTXReader::*proc_inner)(bool))
 {
-  if (name == L"slice") {
+  if (name == "slice"_u) {
     stepToNextTag();
     (this->*proc_inner)(false);
     bool exists;
     emitOpcode(VM::SLICE);
-    int start_lit = getInt("start", exists);
+    int start_lit = getInt("start"_u, exists);
     if (exists) {
       emitInt(start_lit);
     } else {
       emitInt(0);
     }
-    int end_lit = getInt("end", exists);
+    int end_lit = getInt("end"_u, exists);
     if (exists) {
       emitInt(end_lit);
     } else {
       emitInt(0);
     }
-    assert(name == L"slice" && type == XML_READER_TYPE_END_ELEMENT);
+    assert(name == "slice"_u && type == XML_READER_TYPE_END_ELEMENT);
     stepToNextTag();
     return true;
   }
@@ -323,17 +323,17 @@ bool MTXReader::tryProcSlice(bool (MTXReader::*proc_inner)(bool))
 
 bool MTXReader::tryProcArg(ExprType expr_type, bool allow_fail)
 {
-  if (name == L"var") {
-    std::wstring var_name = attrib(L"name");
+  if (name == "var"_u) {
+    std::string var_name = attrib_str("name"_u);
     if (in_global_defn) {
       VarNVMap::const_iterator arg_name_it = template_arg_names.find(var_name);
       if (arg_name_it != template_arg_names.end()) {
         cur_replacements->push_back(make_pair(arg_name_it->second, expr_type));
-        stepPastSelfClosingTag(L"var");
+        stepPastSelfClosingTag("var"_u);
         return true;
       }
       if (!allow_fail) {
-        parseError(L"No such argument " + var_name);
+        parseError("No such argument " + var_name);
       }
     }
   }
@@ -342,31 +342,31 @@ bool MTXReader::tryProcArg(ExprType expr_type, bool allow_fail)
 
 bool MTXReader::tryProcVar(VM::StackValueType svt)
 {
-  if (name == L"var") {
-    std::wstring var_name = attrib(L"name");
+  if (name == "var"_u) {
+    std::string var_name = attrib_str("name"_u);
 
     VarNVMap::const_iterator slot_names_it = slot_names.find(var_name);
     if (slot_names_it != slot_names.end()) {
       if (slot_types[slot_names_it->second] != svt) {
-        parseError(L"Variable " + var_name + L" has the wrong type");
+        parseError("Variable " + var_name + " has the wrong type");
       }
       emitOpcode(VM::GETVAR);
       emitUInt(slot_names_it->second);
-      stepPastSelfClosingTag(L"var");
+      stepPastSelfClosingTag("var"_u);
       return true;
     }
 
-    parseError(L"Variable " + var_name + L" has not been set.");
-  } else if (!in_global_defn && name == L"macro") {
+    parseError("Variable " + var_name + " has not been set.");
+  } else if (!in_global_defn && name == "macro"_u) {
     // Get template data
-    std::wstring var_name = attrib(L"name");
+    std::string var_name = attrib_str("name"_u);
     VarNVMap::const_iterator template_name_it = template_slot_names.find(var_name);
     if (template_name_it == template_slot_names.end()) {
-      parseError(L"No such macro " + var_name);
+      parseError("No such macro " + var_name);
     }
     size_t templ_idx = template_name_it->second;
     if (template_slot_types[templ_idx] != svt) {
-      parseError(L"Macro " + var_name + L" returns the wrong type");
+      parseError("Macro " + var_name + " returns the wrong type");
     }
     std::pair<VM::FeatureDefn, TemplateReplacements> &templ_defn = template_defns[templ_idx];
     // Get arg values
@@ -417,7 +417,7 @@ bool MTXReader::tryProcVar(VM::StackValueType svt)
     emitOpcode(VM::GETGVAR);
     emitUInt(templ_instcia_it->second);
     // Step past end
-    assert(name == L"macro" && type == XML_READER_TYPE_END_ELEMENT);
+    assert(name == "macro"_u && type == XML_READER_TYPE_END_ELEMENT);
     stepToNextTag();
     return true;
   }
@@ -431,19 +431,19 @@ MTXReader::procStrExpr(bool allow_fail)
       && !tryProcVar(VM::STRVAL)
       && !tryProcSlice(&MTXReader::procStrExpr)
       && !tryProcSubscript(&MTXReader::procStrArrExpr)) {
-    if (name == L"ex-surf") {
+    if (name == "ex-surf"_u) {
       stepToNextTag();
       procIntExpr();
       emitOpcode(VM::EXTOKSURF);
-    } else if (name == L"ex-lemma") {
+    } else if (name == "ex-lemma"_u) {
       stepToNextTag();
       procWordoidExpr();
       emitOpcode(VM::EXWRDLEMMA);
-    } else if (name == L"ex-coarse") {
+    } else if (name == "ex-coarse"_u) {
       stepToNextTag();
       procWordoidExpr();
       emitOpcode(VM::EXWRDCOARSETAG);
-    } else if (name == L"join") {
+    } else if (name == "join"_u) {
       bool has_attr;
       size_t str_idx = getStrRef(has_attr);
       if (!has_attr) {
@@ -457,7 +457,7 @@ MTXReader::procStrExpr(bool allow_fail)
       if (allow_fail) {
         return false;
       }
-      parseError(L"Expected a string expression.");
+      parseError("Expected a string expression."_u);
     }
     assert(type == XML_READER_TYPE_END_ELEMENT);
     stepToNextTag();
@@ -470,95 +470,95 @@ MTXReader::procBoolExpr(bool allow_fail)
 {
   if (!tryProcArg(BEXPR, true)
       && !tryProcVar(VM::BVAL)) {
-    if (name == L"and") {
+    if (name == "and"_u) {
       stepToNextTag();
       procCommBoolOp(VM::AND);
-      assert(name == L"and" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "and"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
-    } else if (name == L"or") {
+    } else if (name == "or"_u) {
       stepToNextTag();
       procCommBoolOp(VM::OR);
-      assert(name == L"or" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "or"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
-    } else if (name == L"not") {
+    } else if (name == "not"_u) {
       stepToNextTag();
       procBoolExpr();
       emitOpcode(VM::NOT);
-      assert(name == L"not" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "not"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
-    } else if (name == L"eq") {
+    } else if (name == "eq"_u) {
       stepToNextTag();
       procBinCompareOp(VM::EQ);
-      assert(name == L"eq" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "eq"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
-    } else if (name == L"neq") {
+    } else if (name == "neq"_u) {
       stepToNextTag();
       procBinCompareOp(VM::NEQ);
-      assert(name == L"neq" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "neq"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
-    } else if (name == L"lt") {
+    } else if (name == "lt"_u) {
       stepToNextTag();
       procBinCompareOp(VM::LT);
-      assert(name == L"lt" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "lt"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
-    } else if (name == L"lte") {
+    } else if (name == "lte"_u) {
       stepToNextTag();
       procBinCompareOp(VM::LTE);
-      assert(name == L"lte" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "lte"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
-    } else if (name == L"gt") {
+    } else if (name == "gt"_u) {
       stepToNextTag();
       procBinCompareOp(VM::GT);
-      assert(name == L"gt" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "gt"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
-    } else if (name == L"gte") {
+    } else if (name == "gte"_u) {
       stepToNextTag();
       procBinCompareOp(VM::GTE);
-      assert(name == L"gte" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "gte"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
-    } else if (name == L"streq") {
+    } else if (name == "streq"_u) {
       size_t str_ref = getStrRef();
       stepToNextTag();
       procStrExpr();
       emitOpcode(VM::STREQ);
       emitUInt(str_ref);
-      assert(name == L"streq" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "streq"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
-    } else if (name == L"strin") {
+    } else if (name == "strin"_u) {
       size_t set_ref = getSetRef();
       stepToNextTag();
       procStrExpr();
       emitOpcode(VM::STRIN);
       emitUInt(set_ref);
-      assert(name == L"strin" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "strin"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
     /* Identical to strin?
-    } else if (name == L"sethas") {
+    } else if (name == "sethas"_u) {
       stepToNextTag();
       procStrExpr();
       emitSetImmOp(VM::SETHAS);
     */
-    } else if (name == L"sethasany") {
+    } else if (name == "sethasany"_u) {
       size_t set_ref = getSetRef();
       stepToNextTag();
       procStrArrExpr();
       emitOpcode(VM::SETHASANY);
       emitUInt(set_ref);
-      assert(name == L"sethasany" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "sethasany"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
-    } else if (name == L"sethasall") {
+    } else if (name == "sethasall"_u) {
       size_t set_ref = getSetRef();
       stepToNextTag();
       procStrArrExpr();
       emitOpcode(VM::SETHASALL);
       emitUInt(set_ref);
-      assert(name == L"sethasall" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "sethasall"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
     } else {
       if (allow_fail) {
         return false;
       }
-      parseError(L"Expected a boolean expression.");
+      parseError("Expected a boolean expression."_u);
     }
   }
   return true;
@@ -570,37 +570,37 @@ MTXReader::procAddrExpr()
   stepToTag();
   /* Self-closing tags */
   if (!tryProcArg(ADDREXPR)) {
-    if (name == L"wrdaddr") {
+    if (name == "wrdaddr"_u) {
       emitOpcode(VM::PUSHADDR);
-      stepPastSelfClosingTag(L"wrdaddr");
+      stepPastSelfClosingTag("wrdaddr"_u);
     /* Others */
-    } else if (name == L"addr-of-ints") {
+    } else if (name == "addr-of-ints"_u) {
       stepToNextTag();
       procIntExpr();
       procIntExpr();
-      assert(name == L"addr-of-ints" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "addr-of-ints"_u && type == XML_READER_TYPE_END_ELEMENT);
       stepToNextTag();
-    } else if (name == L"add") {
+    } else if (name == "add"_u) {
       stepToNextTag();
       procAddrExpr();
       procAddrExpr();
-      assert(name == L"add" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "add"_u && type == XML_READER_TYPE_END_ELEMENT);
       emitOpcode(VM::ADD2);
       stepToNextTag();
-    } else if (name == L"adjust") {
+    } else if (name == "adjust"_u) {
       stepToNextTag();
       procAddrExpr();
-      assert(name == L"adjust" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "adjust"_u && type == XML_READER_TYPE_END_ELEMENT);
       emitOpcode(VM::ADJADDR);
       stepToNextTag();
-    } else if (name == L"clamp") {
+    } else if (name == "clamp"_u) {
       stepToNextTag();
       procAddrExpr();
-      assert(name == L"clamp" && type == XML_READER_TYPE_END_ELEMENT);
+      assert(name == "clamp"_u && type == XML_READER_TYPE_END_ELEMENT);
       emitOpcode(VM::CLAMPADDR);
       stepToNextTag();
     } else {
-      parseError(L"Expected an address expression.");
+      parseError("Expected an address expression."_u);
     }
   }
 }
@@ -611,18 +611,18 @@ MTXReader::procWordoidArrExpr(bool allow_fail)
   if (!tryProcArg(WRDARREXPR, true)
       && !tryProcVar(VM::WRDARRVAL)
       && !tryProcSlice(&MTXReader::procWordoidArrExpr)) {
-    if (name == L"ex-wordoids") {
+    if (name == "ex-wordoids"_u) {
       stepToNextTag();
       procIntExpr();
       emitOpcode(VM::EXWRDARR);
-      assert(name == L"ex-wordoids" && type == XML_READER_TYPE_END_ELEMENT);
-    } else if (name == L"for-each") {
+      assert(name == "ex-wordoids"_u && type == XML_READER_TYPE_END_ELEMENT);
+    } else if (name == "for-each"_u) {
       procForEach(WRDEXPR);
     } else {
       if (allow_fail) {
         return false;
       }
-      parseError(L"Expected a wordoid array expression.");
+      parseError("Expected a wordoid array expression."_u);
     }
     stepToNextTag();
   }
@@ -636,7 +636,7 @@ MTXReader::procWordoidExpr(bool allow_fail)
   if (!tryProcArg(WRDEXPR, true)
       && !tryProcVar(VM::WRDVAL)
       && !tryProcSubscript(&MTXReader::procWordoidArrExpr)) {
-    if (name == L"ex-wordoid") {
+    if (name == "ex-wordoid"_u) {
       stepToNextTag();
       procAddrExpr();
       emitOpcode(VM::GETWRD);
@@ -644,7 +644,7 @@ MTXReader::procWordoidExpr(bool allow_fail)
       if (allow_fail) {
         return false;
       }
-      parseError(L"Expected a wordoid expression.");
+      parseError("Expected a wordoid expression."_u);
     }
     assert(type == XML_READER_TYPE_END_ELEMENT);
     stepToNextTag();
@@ -657,30 +657,30 @@ MTXReader::procPred()
 {
   stepToNextTag();
   procBoolExpr();
-  assert(name == L"pred" && type == XML_READER_TYPE_END_ELEMENT);
+  assert(name == "pred"_u && type == XML_READER_TYPE_END_ELEMENT);
   emitOpcode(VM::DIEIFFALSE);
   stepToNextTag();
 }
 
 size_t
 MTXReader::getConstRef(
-    const std::wstring &ref_attr,
-    const std::string &lit_attr,
-    const std::wstring &what,
+    const UString &ref_attr,
+    const UString &lit_attr,
+    const UString &what,
     VarNVMap &const_map,
     size_t (MTXReader::*push_new)(std::string&),
     bool& exists)
 {
-  std::wstring const_name = attrib(ref_attr);
+  std::string const_name = attrib_str(ref_attr);
   if (!const_name.empty()) {
     exists = true;
     VarNVMap::iterator sit = const_map.find(const_name);
     if (sit == const_map.end()) {
-      parseError(L"No " + what + L" named " + const_name);
+      parseError("No "_u + what + " named "_u + to_ustring(const_name.c_str()));
     }
     return sit->second;
   }
-  std::string const_lit = attrib(lit_attr);
+  std::string const_lit = attrib_str(lit_attr);
   if (!const_lit.empty()) {
     exists = true;
     return (this->*push_new)(const_lit);
@@ -692,7 +692,7 @@ MTXReader::getConstRef(
 size_t
 MTXReader::getSetRef(bool& exists)
 {
-  return getConstRef(L"name", "val", L"set", set_names, &MTXReader::pushSetConst, exists);
+  return getConstRef("name"_u, "val"_u, "set"_u, set_names, &MTXReader::pushSetConst, exists);
 }
 
 size_t
@@ -701,7 +701,7 @@ MTXReader::getSetRef()
   bool has_attr;
   size_t set_ref = getSetRef(has_attr);
   if (!has_attr) {
-    parseError(L"Set required");
+    parseError("Set required"_u);
   }
   return set_ref;
 }
@@ -709,7 +709,7 @@ MTXReader::getSetRef()
 size_t
 MTXReader::getStrRef(bool& exists)
 {
-  return getConstRef(L"name", "val", L"string", str_names, &MTXReader::pushStrConst, exists);
+  return getConstRef("name"_u, "val"_u, "string"_u, str_names, &MTXReader::pushStrConst, exists);
 }
 
 size_t
@@ -718,15 +718,15 @@ MTXReader::getStrRef()
   bool has_attr;
   size_t str_ref = getStrRef(has_attr);
   if (!has_attr) {
-    parseError(L"String required");
+    parseError("String required"_u);
   }
   return str_ref;
 }
 
 int
-MTXReader::getInt(std::string attr_name, bool& exists)
+MTXReader::getInt(const UString& attr_name, bool& exists)
 {
-  std::string int_lit = attrib(attr_name);
+  std::string int_lit = attrib_str(attr_name);
   if (!int_lit.empty()) {
     exists = true;
     int int_out;
@@ -741,16 +741,16 @@ MTXReader::getInt(std::string attr_name, bool& exists)
 int
 MTXReader::getInt(bool& exists)
 {
-  return getInt("val", exists);
+  return getInt("val"_u, exists);
 }
 
 int
-MTXReader::getInt(std::string attr_name)
+MTXReader::getInt(const UString& attr_name)
 {
   bool has_attr;
   int i = getInt(attr_name, has_attr);
   if (!has_attr) {
-    parseError(L"String required");
+    parseError("String required");
   }
   return i;
 }
@@ -758,18 +758,18 @@ MTXReader::getInt(std::string attr_name)
 int
 MTXReader::getInt()
 {
-  return getInt("val");
+  return getInt("val"_u);
 }
 
 template<typename GetT, typename EmitT>
 void
 MTXReader::emitAttr(
-    std::wstring what, GetT (MTXReader::*getter)(bool&), void (MTXReader::*emitter)(EmitT))
+    std::string what, GetT (MTXReader::*getter)(bool&), void (MTXReader::*emitter)(EmitT))
 {
   bool has_attr = false;
   GetT val = (this->*getter)(has_attr);
   if (!has_attr) {
-    parseError(what + L" required");
+    parseError(what + " required");
   }
   (this->*emitter)(val);
 }
@@ -777,19 +777,19 @@ MTXReader::emitAttr(
 void
 MTXReader::getAndEmitStrRef()
 {
-  emitAttr(L"String", &MTXReader::getStrRef, &MTXReader::emitUInt);
+  emitAttr("String", &MTXReader::getStrRef, &MTXReader::emitUInt);
 }
 
 void
 MTXReader::getAndEmitSetRef()
 {
-  emitAttr(L"Set", &MTXReader::getSetRef, &MTXReader::emitUInt);
+  emitAttr("Set", &MTXReader::getSetRef, &MTXReader::emitUInt);
 }
 
 void
 MTXReader::getAndEmitInt()
 {
-  emitAttr(L"Integer", &MTXReader::getInt, &MTXReader::emitInt);
+  emitAttr("Integer", &MTXReader::getInt, &MTXReader::emitInt);
 }
 
 void
@@ -797,7 +797,7 @@ MTXReader::procInst()
 {
   // XXX: There's no way to tell the difference between an empty and absent
   // attribute with the current lttoolbox xml code
-  std::string op = attrib("opcode");
+  std::string op = attrib_str("opcode"_u);
   std::transform(op.begin(), op.end(), op.begin(), ::toupper);
   emitOpcode(VM::opcode_values[op]);
   int val;
@@ -809,7 +809,7 @@ MTXReader::procInst()
   val = getInt(has_int_lit);
   int num_operands = has_set_ref + has_str_ref + has_int_lit;
   if (num_operands > 1) {
-    parseError(L"Opcodes can have at most one operand.");
+    parseError("Opcodes can have at most one operand."_u);
   } else if (num_operands == 1) {
     if (has_int_lit) {
       emitInt(val);
@@ -837,10 +837,10 @@ MTXReader::procOut()
     has_expr = true;
   }
   if (!has_expr) {
-    parseError(L"Expected a string, bool or int expression.");
+    parseError("Expected a string, bool or int expression."_u);
   }
   stepToTag();
-  assert(name == L"out" && type == XML_READER_TYPE_END_ELEMENT);
+  assert(name == "out"_u && type == XML_READER_TYPE_END_ELEMENT);
   stepToNextTag();
 }
 
@@ -850,21 +850,21 @@ MTXReader::procOutMany()
   stepToNextTag();
   procStrArrExpr();
   emitOpcode(VM::FCATSTRARR);
-  assert(name == L"out-many" && type == XML_READER_TYPE_END_ELEMENT);
+  assert(name == "out-many"_u && type == XML_READER_TYPE_END_ELEMENT);
   stepToNextTag();
 }
 
 void
 MTXReader::printTmplDefn(const TemplateDefn &tmpl_defn)
 {
-  PerceptronSpec::printFeature(std::wcerr, tmpl_defn.first);
+  PerceptronSpec::printFeature(std::cerr, tmpl_defn.first);
   if (tmpl_defn.second.size() > 0) {
-    std::wcerr << "Replacements:\n";
+    std::cerr << "Replacements:\n"_u;
     TemplateReplacements::const_iterator it = tmpl_defn.second.begin();
     for (; it != tmpl_defn.second.end(); it++) {
-      std::wcerr << "Index: " << it->first << " ";
+      std::cerr << "Index: "_u << it->first << " "_u;
       printTypeExpr(it->second);
-      std::wcerr << "\n";
+      std::cerr << "\n"_u;
     }
   }
 }
@@ -874,22 +874,22 @@ MTXReader::printStackValueType(VM::StackValueType svt)
 {
   switch (svt) {
     case VM::INTVAL:
-      std::wcerr << "INT";
+      std::cerr << "INT";
       break;
     case VM::BVAL:
-      std::wcerr << "BOOL";
+      std::cerr << "BOOL";
       break;
     case VM::STRVAL:
-      std::wcerr << "STR";
+      std::cerr << "STR";
       break;
     case VM::STRARRVAL:
-      std::wcerr << "STRARR";
+      std::cerr << "STRARR";
       break;
     case VM::WRDVAL:
-      std::wcerr << "WRD";
+      std::cerr << "WRD";
       break;
     case VM::WRDARRVAL:
-      std::wcerr << "WRDARR";
+      std::cerr << "WRDARR";
       break;
     default:
       throw 1;
@@ -901,29 +901,29 @@ MTXReader::printTypeExpr(ExprType expr_type)
 {
   switch (expr_type) {
     case VOIDEXPR:
-      std::wcerr << "VOID";
+      std::cerr << "VOID";
       break;
     case INTEXPR:
-      std::wcerr << "INT";
+      std::cerr << "INT";
       break;
     case BEXPR:
-      std::wcerr << "BOOL";
+      std::cerr << "BOOL";
       break;
     case STREXPR:
-      std::wcerr << "STR";
+      std::cerr << "STR";
       procStrExpr();
       break;
     case STRARREXPR:
-      std::wcerr << "STRARR";
+      std::cerr << "STRARR";
       break;
     case WRDEXPR:
-      std::wcerr << "WRD";
+      std::cerr << "WRD";
       break;
     case WRDARREXPR:
-      std::wcerr << "WRDARR";
+      std::cerr << "WRDARR";
       break;
     case ADDREXPR:
-      std::wcerr << "ADDR";
+      std::cerr << "ADDR";
       break;
     default:
       throw 1;
@@ -966,9 +966,9 @@ MTXReader::procTypeExpr(ExprType expr_type)
 void
 MTXReader::procForEach(ExprType expr_type)
 {
-  std::wstring var_name = attrib(L"as");
-  if (var_name == L"") {
-    parseError(L"'as' attribute required for for-each.");
+  std::string var_name = attrib_str("as"_u);
+  if (var_name.empty()) {
+    parseError("'as' attribute required for for-each."_u);
   }
   size_t slot_idx = slot_counter++;
   slot_names[var_name] = slot_idx;
@@ -983,7 +983,7 @@ MTXReader::procForEach(ExprType expr_type)
     has_expr = true;
   }
   if (!has_expr) {
-    parseError(L"Expected a string array or wordoid array expression.");
+    parseError("Expected a string array or wordoid array expression."_u);
   }
 
   emitOpcode(VM::FOREACHINIT);
@@ -1021,21 +1021,21 @@ bool
 MTXReader::procVoidExpr(bool allow_fail)
 {
   stepToTag();
-  if (name == L"pred") {
+  if (name == "pred"_u) {
     procPred();
-  } else if (name == L"out") {
+  } else if (name == "out"_u) {
     procOut();
-  } else if (name == L"out-many") {
+  } else if (name == "out-many"_u) {
     procOutMany();
-  } else if (name == L"for-each") {
+  } else if (name == "for-each"_u) {
     procForEach(VOIDEXPR);
-  } else if (name == L"inst") {
+  } else if (name == "inst"_u) {
     procInst();
   } else {
     if (allow_fail) {
       return false;
     }
-    parseError(L"Expected a void expression.");
+    parseError("Expected a void expression."_u);
   }
   return true;
 }
@@ -1049,20 +1049,20 @@ MTXReader::procDefMacro()
   cur_feat = &template_defns.back().first;
   cur_replacements = &template_defns.back().second;
 
-  std::wstring var_name = attrib(L"as");
-  if (var_name == L"") {
-    parseError(L"'as' attribute required for def-macro.");
+  std::string var_name = attrib_str("as"_u);
+  if (var_name.empty()) {
+    parseError("'as' attribute required for def-macro."_u);
   }
   template_slot_names[var_name] = template_slot_counter;
 
   template_arg_names.clear();
-  std::wstring args = attrib(L"args");
-  std::wistringstream args_ss(args);
+  std::string args = attrib_str("args"_u);
+  std::istringstream args_ss(args);
   size_t arg_i = 0;
   for (; !args_ss.eof(); arg_i++) {
-    wstring arg_name;
+    std::string arg_name;
     args_ss >> arg_name;
-    if (arg_name == L"") {
+    if (arg_name.empty()) {
       break;
     }
     template_arg_names[arg_name] = arg_i;
@@ -1095,9 +1095,9 @@ MTXReader::procDefMacro()
     has_expr = true;
   }
   if (!has_expr) {
-    parseError(L"Expected a non-void expression.");
+    parseError("Expected a non-void expression."_u);
   }
-  assert(name == L"def-macro" && type == XML_READER_TYPE_END_ELEMENT);
+  assert(name == "def-macro"_u && type == XML_READER_TYPE_END_ELEMENT);
   stepToNextTag();
 
   template_slot_counter++;
@@ -1114,7 +1114,7 @@ MTXReader::procFeat()
   while (type != XML_READER_TYPE_END_ELEMENT) {
     procVoidExpr();
   }
-  assert(name == L"feat");
+  assert(name == "feat"_u);
   stepToNextTag();
 }
 
@@ -1123,13 +1123,13 @@ MTXReader::procFeats()
 {
   stepToNextTag();
   while (type != XML_READER_TYPE_END_ELEMENT) {
-    if (name == L"feat") {
+    if (name == "feat"_u) {
       procFeat();
     } else {
       unexpectedTag();
     }
   }
-  assert(name == L"feats");
+  assert(name == "feats"_u);
   stepToNextTag();
 }
 
@@ -1138,7 +1138,7 @@ MTXReader::printTmplDefns()
 {
   std::vector<TemplateDefn>::const_iterator it = template_defns.begin();
   for (; it != template_defns.end(); it++) {
-    std::wcerr << " Macro " << it - template_defns.begin() << "\n";
+    std::cerr << " Macro " << it - template_defns.begin() << "\n";
     printTmplDefn(*it);
   }
 }
@@ -1151,30 +1151,30 @@ MTXReader::parse()
   if (type == XML_READER_TYPE_DOCUMENT_TYPE) {
     stepToNextTag();
   }
-  if (name != L"metatag") {
-    parseError(L"expected <metatag> tag");
+  if (name != "metatag"_u) {
+    parseError("expected <metatag> tag"_u);
   }
   stepToNextTag();
-  if (name == L"coarse-tags") {
+  if (name == "coarse-tags"_u) {
     procCoarseTags();
   }
-  if (name == L"beam-width") {
+  if (name == "beam-width"_u) {
     size_t val;
-    std::istringstream val_ss(attrib("val"));
+    std::istringstream val_ss(attrib_str("val"_u));
     val_ss >> val;
     spec.beam_width = val;
   } else {
     spec.beam_width = 4;
   }
-  if (name == L"defns") {
+  if (name == "defns"_u) {
     procDefns();
   }
-  if (name == L"global-pred") {
+  if (name == "global-pred"_u) {
     procGlobalPred();
   }
-  if (name == L"feats") {
+  if (name == "feats"_u) {
     procFeats();
   }
-  assert(name == L"metatag" && type == XML_READER_TYPE_END_ELEMENT);
+  assert(name == "metatag"_u && type == XML_READER_TYPE_END_ELEMENT);
 }
 }
diff --git a/apertium/mtx_reader.h b/apertium/mtx_reader.h
index 6a6d1af..c500a26 100644
--- a/apertium/mtx_reader.h
+++ b/apertium/mtx_reader.h
@@ -22,7 +22,6 @@
 #include <apertium/ttag.h>
 #include <apertium/xml_reader.h>
 #include <lttoolbox/pattern_list.h>
-#include <lttoolbox/ltstr.h>
 
 #include <libxml/xmlreader.h>
 #include <map>
@@ -44,7 +43,7 @@ class MTXReader : public XMLReader
   };
 
   typedef PerceptronSpec VM;
-  typedef std::map<std::wstring, size_t> VarNVMap;
+  typedef std::map<std::string, size_t> VarNVMap;
   typedef std::vector<std::pair<size_t, ExprType> > TemplateReplacements;
   typedef std::map<std::pair<size_t, std::vector<VM::FeatureDefn> >, size_t> InstanciationMap;
   typedef std::pair<VM::FeatureDefn, TemplateReplacements> TemplateDefn;
@@ -59,8 +58,8 @@ protected:
 private:
   size_t pushSetConst(std::string &val);
   size_t pushStrConst(std::string &val);
-  size_t getConstRef(const std::wstring &ref_attr, const std::string &lit_attr,
-                     const std::wstring &what, VarNVMap &const_map,
+  size_t getConstRef(const UString& ref_attr, const UString& lit_attr,
+                     const UString& what, VarNVMap &const_map,
                      size_t (MTXReader::*push_new)(std::string&), bool& exists);
   size_t getSetRef(bool& exists);
   size_t getSetRef();
@@ -71,9 +70,9 @@ private:
   void pokeBytecode(size_t addr, VM::Bytecode bc);
   void emitInt(int val);
   void emitUInt(int val);
-  int getInt(std::string attr_name, bool& exists);
+  int getInt(const UString& attr_name, bool& exists);
   int getInt(bool& exists);
-  int getInt(std::string attr_name);
+  int getInt(const UString& attr_name);
   int getInt();
 
   void procCoarseTags();
@@ -109,7 +108,7 @@ private:
   void procForEach(ExprType type);
   void procPred();
   template<typename GetT, typename EmitT> void emitAttr(
-      std::wstring what, GetT (MTXReader::*getter)(bool&),
+      std::string what, GetT (MTXReader::*getter)(bool&),
       void (MTXReader::*emitter)(EmitT));
   void getAndEmitStrRef();
   void getAndEmitSetRef();
diff --git a/apertium/perceptron_spec.cc b/apertium/perceptron_spec.cc
index 89a9d73..c785899 100644
--- a/apertium/perceptron_spec.cc
+++ b/apertium/perceptron_spec.cc
@@ -1,17 +1,18 @@
 #include <apertium/perceptron_spec.h>
-#include <apertium/utf_converter.h>
 #include <apertium/deserialiser.h>
 #include <apertium/serialiser.h>
 #include <lttoolbox/match_state.h>
 #include <iomanip>
+#include <lttoolbox/string_utils.h>
+#include <utf8.h>
 
 
 namespace Apertium {
 
-void PerceptronSpec::printFeature(std::wostream &out, const PerceptronSpec::FeatureDefn &feat_defn)
+void PerceptronSpec::printFeature(std::ostream &out, const PerceptronSpec::FeatureDefn &feat_defn)
 {
   ios::fmtflags orig_flags(out.flags());
-  out << std::hex << std::setw(2) << std::setfill(L'0');
+  out << std::hex << std::setw(2) << std::setfill('0');
   for (size_t j = 0; j < feat_defn.size(); j++) {
      out << +feat_defn[j]  << " ";
   }
@@ -27,8 +28,8 @@ void PerceptronSpec::printFeature(std::wostream &out, const PerceptronSpec::Feat
   out << "\n";
 }
 
-std::wostream &
-operator<<(std::wostream &out, PerceptronSpec const &ps) {
+std::ostream &
+operator<<(std::ostream &out, PerceptronSpec const &ps) {
   out << "= Global predicate =\n";
   PerceptronSpec::printFeature(out, ps.global_pred);
   out << "= Globals (" << ps.global_defns.size() << ") =\n";
@@ -51,12 +52,13 @@ const std::string PerceptronSpec::opcode_names[] = {
 #undef X
 
 const std::string PerceptronSpec::type_names[] = {
-  "integer", "boolean", "string", "string array", "wordoid", "wordoid array"
+     "integer", "boolean", "string", "string array",
+     "wordoid", "wordoid array"
 };
 
 static Morpheme make_sentinel_wordoid(
-    const std::wstring &lemma_str,
-    const std::wstring &tag_str) {
+    const UString &lemma_str,
+    const UString &tag_str) {
   Morpheme morpheme;
   morpheme.TheLemma = lemma_str;
   Tag tag;
@@ -66,17 +68,17 @@ static Morpheme make_sentinel_wordoid(
 }
 
 static std::vector<Morpheme> make_sentinel_wordoids(
-    const std::wstring &lemma_str,
-    const std::wstring &tag_str) {
+    const UString &lemma_str,
+    const UString &tag_str) {
   std::vector<Morpheme> morphemes;
   morphemes.push_back(make_sentinel_wordoid(lemma_str, tag_str));
   return morphemes;
 }
 
 static LexicalUnit make_sentinel_token(
-    const std::wstring &surf,
-    const std::wstring &lemma_str,
-    const std::wstring &tag_str) {
+    const UString &surf,
+    const UString &lemma_str,
+    const UString &tag_str) {
   Analysis analy;
   analy.TheMorphemes = make_sentinel_wordoids(lemma_str, tag_str);
   LexicalUnit lu;
@@ -92,9 +94,9 @@ PerceptronSpec::PerceptronSpec() {
       opcode_values[opcode_names[i]] = (Opcode)i;
     }
 
-    untagged_sentinel = make_sentinel_wordoids(L"!UNTAGGED!", L"!UT!");
-    token_wordoids_underflow = make_sentinel_token(L"!SURF_UNDERFLOW!", L"!TOK_UNDERFLOW!", L"!TUF!");
-    token_wordoids_overflow = make_sentinel_token(L"!SURF_OVERFLOW!", L"!TOK_OVERFLOW!", L"!TOF!");
+    untagged_sentinel = make_sentinel_wordoids("!UNTAGGED!"_u, "!UT!"_u);
+    token_wordoids_underflow = make_sentinel_token("!SURF_UNDERFLOW!"_u, "!TOK_UNDERFLOW!"_u, "!TUF!"_u);
+    token_wordoids_overflow = make_sentinel_token("!SURF_OVERFLOW!"_u, "!TOK_OVERFLOW!"_u, "!TOF!"_u);
 
     static_constructed = true;
   }
@@ -158,9 +160,11 @@ PerceptronSpec::coarsen(const Morpheme &wrd) const
 {
   std::map<const Morpheme, std::string>::const_iterator it = coarsen_cache.find(wrd);
   if (it == coarsen_cache.end()) {
-    std::string coarse_tag = UtfConverter::toUtf8(coarse_tags->coarsen(wrd));
-    coarsen_cache[wrd] = coarse_tag;
-    return coarse_tag;
+    UString coarse_tag = coarse_tags->coarsen(wrd);
+    std::string result;
+    utf8::utf16to8(coarse_tag.begin(), coarse_tag.end(), std::back_inserter(result));
+    coarsen_cache[wrd] = result;
+    return result;
   }
   return it->second;
 }
@@ -254,11 +258,6 @@ PerceptronSpec::Machine::Machine(
     token_idx(token_idx), wordoid_idx(wordoid_idx) {}
 
 
-static bool
-inRange(int lower, int upper, int x) {
-  return lower <= x && x < upper;
-}
-
 static int
 clamp(int lower, int upper, int x) {
   return std::min(std::max(x, lower), upper);
@@ -289,14 +288,14 @@ subscript(std::vector<T> vec, int idx) {
 void
 PerceptronSpec::Machine::traceMachineState()
 {
-  std::wcerr << "pc: " << bytecode_iter - feat.begin() << "\n";
-  std::wcerr << "peek: ";
-  std::wcerr << *bytecode_iter;
+  std::cerr << "pc: " << bytecode_iter - feat.begin() << "\n";
+  std::cerr << "peek: ";
+  std::cerr << *bytecode_iter;
   if (*bytecode_iter < num_opcodes) {
-    std::wcerr << " (" << opcode_names[*bytecode_iter].c_str() << ")";
+    std::cerr << " (" << opcode_names[*bytecode_iter].c_str() << ")";
   }
-  std::wcerr << "\n";
-  std::wcerr << "stack: " << stack << "\n";
+  std::cerr << "\n";
+  std::cerr << "stack: " << stack << "\n";
 }
 
 bool
@@ -367,12 +366,12 @@ PerceptronSpec::Machine::execCommonOp(Opcode op)
           .accumulator=StackValue(0)});
     } break;
     case FOREACH: {
-      //std::wcerr << "size: " << loop_stack.back().iterable.size()
+      //std::cerr << "size: " << loop_stack.back().iterable.size()
                  //<< " iteration: " << loop_stack.back().iteration << "\n";
-      //std::wcerr << "foreach pc: " << bytecode_iter - feat.begin() << "\n";
+      //std::cerr << "foreach pc: " << bytecode_iter - feat.begin() << "\n";
       size_t slot = get_uint_operand();
       size_t end_offset = get_uint_operand();
-      //std::wcerr << "after foreach pc: " << bytecode_iter - feat.begin() << "\n";
+      //std::cerr << "after foreach pc: " << bytecode_iter - feat.begin() << "\n";
       if (loop_stack.back().iteration == loop_stack.back().iterable.size()) {
         stack.push(loop_stack.back().accumulator);
         loop_stack.pop_back();
@@ -392,10 +391,10 @@ PerceptronSpec::Machine::execCommonOp(Opcode op)
         if (loop_state.iteration == 0) {
           if (stack.top().type == WRDVAL) {
             loop_state.accumulator = StackValue(std::vector<Morpheme>());
-            //std::wcerr << "Wordoid array size " << loop_state.iterable.size() << "\n";
+            //std::cerr << "Wordoid array size " << loop_state.iterable.size() << "\n";
           } else if (stack.top().type == STRVAL) {
             loop_state.accumulator = StackValue(std::vector<std::string>());
-            //std::wcerr << "String array size " << loop_state.iterable.size() << "\n";
+            //std::cerr << "String array size " << loop_state.iterable.size() << "\n";
           } else {
             throw 1;
           }
@@ -403,7 +402,7 @@ PerceptronSpec::Machine::execCommonOp(Opcode op)
         if (stack.top().type == WRDVAL) {
           loop_state.accumulator.wrdArr().push_back(stack.top().wrd());
         } else if (stack.top().type == STRVAL) {
-          //std::wcerr << "String array size " << loop_state.accumulator.size() << "\n";
+          //std::cerr << "String array size " << loop_state.accumulator.size() << "\n";
           loop_state.accumulator.strArr().push_back(stack.top().str());
         } else {
           throw 1;
@@ -416,7 +415,7 @@ PerceptronSpec::Machine::execCommonOp(Opcode op)
     } break;
     case GETGVAR: {
       int slot = get_uint_operand();
-      //std::wcerr << "GETGVAR " << slot << " " << spec.global_results[slot] << "\n";
+      //std::cerr << "GETGVAR " << slot << " " << spec.global_results[slot] << "\n";
       stack.push(spec.global_results[slot]);
     } break;
     case GETVAR: {
@@ -476,17 +475,21 @@ PerceptronSpec::Machine::execCommonOp(Opcode op)
       stack.push(clamp(0, (int)untagged.size() - 1, stack.pop_off().intVal()));
       break;
     case GETWRD: {
-      //std::wcerr << "GETWRD start\n";
+      //std::cerr << "GETWRD start\n";
       stack.push(get_wordoid(tagged));
-      //std::wcerr << "GETWRD done\n";
+      //std::cerr << "GETWRD done\n";
     } break;
     case EXTOKSURF: {
-      std::wstring surf = get_token(untagged).TheSurfaceForm;
-      stack.push(new std::string(UtfConverter::toUtf8(surf)));
+      UString surf = get_token(untagged).TheSurfaceForm;
+      std::string temp;
+      utf8::utf16to8(surf.begin(), surf.end(), std::back_inserter(temp));
+      stack.push(std::move(temp));
     } break;
     case EXWRDLEMMA: {
-      std::wstring lemma = stack.pop_off().wrd().TheLemma;
-      stack.push(new std::string(UtfConverter::toUtf8(lemma)));
+      UString lemma = stack.pop_off().wrd().TheLemma;
+      std::string temp;
+      utf8::utf16to8(lemma.begin(), lemma.end(), std::back_inserter(temp));
+      stack.push(std::move(temp));
     } break;
     case EXWRDCOARSETAG: {
       assert(spec.coarse_tags);
@@ -510,7 +513,7 @@ PerceptronSpec::Machine::execCommonOp(Opcode op)
           if (wrd_it == wrds.end()) {
             break;
           } else {
-            ambgset.back() += "+";
+            ambgset.back() += '+';
           }
         }
       }
@@ -519,11 +522,11 @@ PerceptronSpec::Machine::execCommonOp(Opcode op)
     case EXTAGS: {
       const std::vector<Tag> &tags = stack.top().wrd().TheTags;
       /*std::vector<Tag>::const_iterator it = tags.begin();
-      std::wcerr << "tags: ";
+      std::cerr << "tags: ";
       for (;it != tags.end(); it++) {
-        std::wcerr << &(*it) << " " << it->TheTag << ", ";
+        std::cerr << &(*it) << " " << it->TheTag << ", ";
       }
-      std::wcerr << "\n";*/
+      std::cerr << "\n";*/
       std::vector<std::string> *tags_str = new std::vector<std::string>;
       tags_str->resize(tags.size());
       transform(tags.begin(), tags.end(), tags_str->begin(), get_tag);
@@ -608,9 +611,11 @@ PerceptronSpec::Machine::execCommonOp(Opcode op)
     case HASANYSUBSTR: unimplemented_opcode("HASANYSUBSTR"); break;
     case CPYSTR: unimplemented_opcode("CPYSTR"); break;
     case LOWER: {
-      // XXX: Eek! Bad! No Unicode. ICU please.
-      std::string &str = stack.top().str();
-      std::transform(str.begin(), str.end(), str.begin(), ::tolower);
+      UString str = to_ustring(stack.pop_off().str().c_str());
+      UString low = StringUtils::tolower(str);
+      std::string tmp;
+      utf8::utf16to8(low.begin(), low.end(), std::back_inserter(tmp));
+      stack.push(tmp);
     } break;
     case SLICE: {
       int begin = get_int_operand();
@@ -641,16 +646,15 @@ PerceptronSpec::Machine::execCommonOp(Opcode op)
     } break;
     case JOIN: {
       const std::string &sep = get_str_operand();
-      std::stringstream ss;
       std::vector<std::string> str_arr = stack.pop_off().strArr();
-      std::vector<std::string>::const_iterator it;
-      for (it = str_arr.begin(); it != str_arr.end(); it++) {
-        ss << *it;
-        if (it + 1 != str_arr.end()) {
-          ss << sep;
+      std::string ss;
+      for (auto& it : str_arr) {
+        if (!ss.empty()) {
+          ss.append(sep);
         }
+        ss.append(it);
       }
-      stack.push(StackValue(ss.str()));
+      stack.push(StackValue(ss));
     } break;
     default:
       return false;
@@ -740,8 +744,8 @@ void
 PerceptronSpec::Machine::unimplemented_opcode(std::string opstr) {
   int bytecode_idx = bytecode_iter - feat.begin();
   std::stringstream msg;
-  msg << "Unimplemented opcode: " << opstr
-      << " at " << (is_feature ? "feature" : "global") << " #" << feat_idx << " address #" << bytecode_idx;
+  msg << "Unimplemented opcode: " << opstr;
+  msg << " at " << (is_feature ? "feature" : "global") << " #" << feat_idx << " address #" << bytecode_idx;
   throw Apertium::Exception::apertium_tagger::UnimplementedOpcode(msg);
 }
 
@@ -767,7 +771,9 @@ void PerceptronSpec::appendStr(UnaryFeatureVec::iterator begin,
 
 std::string
 PerceptronSpec::Machine::get_tag(const Tag &in) {
-  return UtfConverter::toUtf8(in.TheTag);
+  std::string result;
+  utf8::utf16to8(in.TheTag.begin(), in.TheTag.end(), std::back_inserter(result));
+  return result;
 }
 
 void PerceptronSpec::serialiseFeatDefn(
diff --git a/apertium/perceptron_spec.h b/apertium/perceptron_spec.h
index c577e39..d51a507 100644
--- a/apertium/perceptron_spec.h
+++ b/apertium/perceptron_spec.h
@@ -32,8 +32,8 @@ class PerceptronSpec
 {
 public:
   typedef std::vector<unsigned char> FeatureDefn;
-  static void printFeature(std::wostream &out, const PerceptronSpec::FeatureDefn &feat_defn);
-  friend std::wostream& operator<<(std::wostream &out, PerceptronSpec const &pt);
+  static void printFeature(std::ostream &out, const PerceptronSpec::FeatureDefn &feat_defn);
+  friend std::ostream& operator<<(std::ostream &out, PerceptronSpec const &pt);
   PerceptronSpec();
   #define OPCODES \
     /** Boolean and arithmetic */\
@@ -179,7 +179,7 @@ public:
   };
   class StackValue {
   public:
-    friend std::wostream& operator<<(std::wostream& out, StackValue const &val) {
+    friend std::ostream& operator<<(std::ostream& out, StackValue const &val) {
       switch (val.type) {
         case INTVAL:
           out << val.intVal();
@@ -188,14 +188,12 @@ public:
           out << val.boolVal();
           break;
         case STRVAL:
-          out << val.str().c_str();
+          out << val.str();
           break;
         case STRARRVAL: {
           out << "[";
-          std::vector<std::string> &str_arr = val.strArr();
-          std::vector<std::string>::const_iterator it = str_arr.begin();
-          for (; it != str_arr.end(); it++) {
-            out << it->c_str();
+          for (auto& it : val.strArr()) {
+            out << it;
           }
           out << "]";
         } break;
@@ -205,9 +203,8 @@ public:
         case WRDARRVAL: {
           out << "[";
           std::vector<Morpheme> &wrd_arr = val.wrdArr();
-          std::vector<Morpheme>::const_iterator it = wrd_arr.begin();
-          for (; it != wrd_arr.end(); it++) {
-            out << *it;
+          for (auto& it : wrd_arr) {
+            out << it;
           }
           out << "]";
         } break;
@@ -226,7 +223,7 @@ public:
     StackValue(const StackValue &other) {
       // C++11: Probably reference counting with shared_ptr would be better
       // than all this copying if it were available
-      //std::wcerr << "StackValue init\n";
+      //std::cerr << "StackValue init\n";
       type = other.type;
       switch (type) {
         case STRVAL:
@@ -248,7 +245,7 @@ public:
       }
     }
     StackValue& operator=(StackValue other) {
-      //std::wcerr << "StackValue assign\n";
+      //std::cerr << "StackValue assign\n";
       swap(*this, other);
       return *this;
     }
@@ -260,7 +257,7 @@ public:
       payload.bval = bval;
       type = BVAL;
     }
-    StackValue(const std::string &strval) {
+    StackValue(const std::string& strval) {
       payload.strval = new std::string(strval);
       type = STRVAL;
     }
@@ -269,32 +266,32 @@ public:
       type = STRARRVAL;
     }
     StackValue(const Morpheme &wordoid) {
-      /*std::wcerr << L"Before ";
+      /*std::cerr << "Before ";
       std::vector<Tag>::const_iterator it = wordoid.TheTags.begin();
       for (;it != wordoid.TheTags.end(); it++) {
-        std::wcerr << &(*it) << " ";
+        std::cerr << &(*it) << " ";
       }
-      std::wcerr << L"\n";
-      std::wcerr << L"Copy morpheme " << &wordoid;*/
+      std::cerr << "\n";
+      std::cerr << "Copy morpheme " << &wordoid;*/
       payload.wrdval = new Morpheme(wordoid);
-      /*std::wcerr << L" to " << payload.wrdval << "\n";
-      std::wcerr << L"After ";
+      /*std::cerr << " to " << payload.wrdval << "\n";
+      std::cerr << "After ";
       it = payload.wrdval->TheTags.begin();
       for (;it != payload.wrdval->TheTags.end(); it++) {
-        std::wcerr << &(*it) << " ";
+        std::cerr << &(*it) << " ";
       }
-      std::wcerr << L"\n";*/
+      std::cerr << "\n";*/
       type = WRDVAL;
     }
     StackValue(const std::vector<Morpheme> &wordoids) {
       payload.wrdarrval = new std::vector<Morpheme>(wordoids);
       type = WRDARRVAL;
     }
-    StackValue(std::string *strval) {
+    StackValue(std::string* strval) {
       payload.strval = strval;
       type = STRVAL;
     }
-    StackValue(std::vector<std::string> *strarrval) {
+    StackValue(std::vector<std::string>* strarrval) {
       payload.strarrval = strarrval;
       type = STRARRVAL;
     }
@@ -410,20 +407,20 @@ private:
       data.pop_back();
     }
     /*void push(StackValue val) {
-      std::wcerr << "before copy push\n";
+      std::cerr << "before copy push\n";
       data.push_back(val);
-      std::wcerr << "after copy push\n";
+      std::cerr << "after copy push\n";
     }*/
     void push(const StackValue &val) {
-      //std::wcerr << "before push\n";
+      //std::cerr << "before push\n";
       data.push_back(val);
-      //std::wcerr << "after push\n";
+      //std::cerr << "after push\n";
     }
     StackValue& top() {
       return data.back();
     }
     StackValue pop_off() {
-      //std::wcerr << L"Top value: " << top().payload.intval << "\n";
+      //std::cerr << "Top value: " << top().payload.intval << "\n";
       StackValue ret = top();
       pop();
       return ret;
diff --git a/apertium/perceptron_tagger.cc b/apertium/perceptron_tagger.cc
index e121c9b..01f800a 100644
--- a/apertium/perceptron_tagger.cc
+++ b/apertium/perceptron_tagger.cc
@@ -12,7 +12,7 @@ PerceptronTagger::PerceptronTagger(TaggerFlags flags) : StreamTagger(flags) {};
 
 PerceptronTagger::~PerceptronTagger() {};
 
-void PerceptronTagger::tag(Stream &in, std::wostream &out) {
+void PerceptronTagger::tag(Stream &in, std::ostream &out) {
   SentenceStream::SentenceTagger::tag(in, out, TheFlags.getSentSeg());
 }
 
@@ -20,8 +20,8 @@ void PerceptronTagger::read_spec(const std::string &filename) {
   MTXReader(spec).read(filename);
 }
 
-std::wostream &
-operator<<(std::wostream &out, PerceptronTagger const &pt) {
+std::ostream &
+operator<<(std::ostream &out, PerceptronTagger const &pt) {
   out << "== Spec ==\n";
   out << pt.spec;
   out << "== Weights " << pt.weights.size() << " ==\n";
@@ -73,9 +73,9 @@ PerceptronTagger::tagSentence(const Sentence &untagged_sent) {
                             token_idx, wordoid_idx, feat_vec_delta);
           if (TheFlags.getDebug()) {
             FeatureVec fv(feat_vec_delta);
-            std::wcerr << "Token " << token_idx << "\t\tWordoid " << wordoid_idx << "\n";
-            std::wcerr << fv;
-            std::wcerr << "Score: " << weights * feat_vec_delta << "\n";
+            std::cerr << "Token " << token_idx << "\t\tWordoid " << wordoid_idx << "\n";
+            std::cerr << fv;
+            std::cerr << "Score: " << weights * feat_vec_delta << "\n";
           }
           new_agenda_item.score += weights * feat_vec_delta;
         }
@@ -83,14 +83,14 @@ PerceptronTagger::tagSentence(const Sentence &untagged_sent) {
     }
     // Apply the beam
     if (TheFlags.getDebug()) {
-      std::wcerr << "-- Before beam: --\n" << new_agenda;
+      std::cerr << "-- Before beam: --\n" << new_agenda;
     }
     size_t new_agenda_size = std::min((size_t)spec.beam_width, new_agenda.size());
     agenda.resize(new_agenda_size);
     std::partial_sort_copy(new_agenda.begin(), new_agenda.end(),
                            agenda.begin(), agenda.end());
     if (TheFlags.getDebug()) {
-      std::wcerr << "-- After beam: --\n" << agenda;
+      std::cerr << "-- After beam: --\n" << agenda;
     }
   }
 
@@ -100,7 +100,7 @@ PerceptronTagger::tagSentence(const Sentence &untagged_sent) {
 
 void PerceptronTagger::outputLexicalUnit(
     const LexicalUnit &lexical_unit, const Optional<Analysis> analysis,
-    std::wostream &output) {
+    std::ostream &output) {
   StreamTagger::outputLexicalUnit(lexical_unit, analysis, output);
 }
 
@@ -129,7 +129,7 @@ bool PerceptronTagger::trainSentence(
   std::vector<Morpheme>::const_iterator wordoid_it;
 
   for (size_t token_idx = 0; token_idx < sent_len; token_idx++) {
-    //std::wcerr << "Token idx: " << token_idx << "\n";
+    //std::cerr << "Token idx: " << token_idx << "\n";
     const TaggedToken &tagged_tok(tagged_sent[token_idx]);
     const StreamedType &untagged_tok(untagged_sent[token_idx]);
     correct_sentence.tagged.push_back(tagged_tok);
@@ -156,7 +156,7 @@ bool PerceptronTagger::trainSentence(
 
     bool correct_available = false;
     for (agenda_it = agenda.begin(); agenda_it != agenda.end(); agenda_it++) {
-      //std::wcerr << *agenda_it;
+      //std::cerr << *agenda_it;
       for (analys_it = analyses.begin(); analys_it != analyses.end(); analys_it++) {
         const std::vector<Morpheme> &wordoids = analys_it->TheMorphemes;
 
@@ -182,24 +182,24 @@ bool PerceptronTagger::trainSentence(
       if (TheFlags.getSkipErrors()) {
         return true;
       } else {
-        std::wstringstream what_;
-        what_ << L"Tagged analysis unavailable in untagged/ambigous input.\n";
-        what_ << L"Available:\n";
+        std::stringstream what_;
+        what_ << "Tagged analysis unavailable in untagged/ambigous input.\n";
+        what_ << "Available:\n";
         for (analys_it = analyses.begin(); analys_it != analyses.end(); analys_it++) {
-          what_ << *analys_it << L"\n";
+          what_ << *analys_it << "\n";
         }
-        what_ << L"Required: " << *tagged_tok << L"\n";
-        what_ << L"Rerun with --skip-on-error to skip this sentence.";
+        what_ << "Required: " << *tagged_tok << "\n";
+        what_ << "Rerun with --skip-on-error to skip this sentence.";
         throw Apertium::Exception::PerceptronTagger::CorrectAnalysisUnavailable(what_);
       }
     }
     // Apply the beam
-    //std::wcerr << "-- Before beam: --\n" << new_agenda;
+    //std::cerr << "-- Before beam: --\n" << new_agenda;
     size_t new_agenda_size = std::min((size_t)spec.beam_width, new_agenda.size());
     agenda.resize(new_agenda_size);
     std::partial_sort_copy(new_agenda.begin(), new_agenda.end(),
                            agenda.begin(), agenda.end());
-    //std::wcerr << "-- After beam: --\n" << agenda;
+    //std::cerr << "-- After beam: --\n" << agenda;
 
     // Early update "fallen off the beam"
     bool any_match = false;
@@ -211,29 +211,29 @@ bool PerceptronTagger::trainSentence(
       }
     }
     if (!any_match) {
-      /*std::wcerr << "Early update time!\n";
-      std::wcerr << "Before:\n" << weights << "\n";
-      std::wcerr << "Incorrect:\n" << agenda.front().vec << "\n";
-      std::wcerr << "Correct:\n" << correct_sentence.vec << "\n";*/
+      /*std::cerr << "Early update time!\n";
+      std::cerr << "Before:\n" << weights << "\n";
+      std::cerr << "Incorrect:\n" << agenda.front().vec << "\n";
+      std::cerr << "Correct:\n" << correct_sentence.vec << "\n";*/
       avg_weights -= agenda.front().vec;
       avg_weights += correct_sentence.vec;
       avg_weights.incIteration();
-      //std::wcerr << "After:\n" << weights << "\n";
+      //std::cerr << "After:\n" << weights << "\n";
       return false;
     }
   }
   // Normal update
-  /*std::wcerr << "Best match:\n" << agenda.front().tagged << "\n\n";
-  std::wcerr << "Correct:\n" << correct_sentence.tagged << "\n\n";*/
+  /*std::cerr << "Best match:\n" << agenda.front().tagged << "\n\n";
+  std::cerr << "Correct:\n" << correct_sentence.tagged << "\n\n";*/
   if (agenda.front().tagged != correct_sentence.tagged) {
-    /*std::wcerr << "Normal update time!\n";
-    std::wcerr << "Before:\n" << weights << "\n";
-    std::wcerr << "Incorrect:\n" << agenda.front().vec << "\n";
-    std::wcerr << "Correct:\n" << correct_sentence.vec << "\n";*/
+    /*std::cerr << "Normal update time!\n";
+    std::cerr << "Before:\n" << weights << "\n";
+    std::cerr << "Incorrect:\n" << agenda.front().vec << "\n";
+    std::cerr << "Correct:\n" << correct_sentence.vec << "\n";*/
     avg_weights -= agenda.front().vec;
     avg_weights += correct_sentence.vec;
     avg_weights.incIteration();
-    //std::wcerr << "After:\n" << weights << "\n";
+    //std::cerr << "After:\n" << weights << "\n";
   }
   return false;
 }
@@ -248,7 +248,7 @@ void PerceptronTagger::train(
   TrainingCorpus tc(tagged, untagged, TheFlags.getSkipErrors(), TheFlags.getSentSeg());
   size_t avail_skipped;
   for (int i = 0; i < iterations; i++) {
-    std::wcerr << "Iteration " << i + 1 << " of " << iterations << "\n";
+    std::cerr << "Iteration " << i + 1 << " of " << iterations << "\n";
     avail_skipped = 0;
     tc.shuffle();
     std::vector<TrainingSentence>::const_iterator si;
@@ -259,12 +259,12 @@ void PerceptronTagger::train(
   }
   avg_weights.average();
   if (avail_skipped) {
-    std::wcerr << "Skipped " << tc.skipped << " sentences due to token "
+    std::cerr << "Skipped " << tc.skipped << " sentences due to token "
                << "misalignment and " << avail_skipped << " sentences due to "
                << "tagged token being unavailable in untagged file out of "
                << tc.sentences.size() << " total sentences.\n";
   }
-  //std::wcerr << *this;
+  //std::cerr << *this;
 }
 
 void PerceptronTagger::serialise(std::ostream &serialised) const
@@ -289,8 +289,8 @@ PerceptronTagger::extendAgendaAll(
   }
 }
 
-std::wostream&
-operator<<(std::wostream &out, const TaggedSentence &tagged) {
+std::ostream&
+operator<<(std::ostream &out, const TaggedSentence &tagged) {
   TaggedSentence::const_iterator tsi;
   for (tsi = tagged.begin(); tsi != tagged.end(); tsi++) {
     if (*tsi) {
@@ -303,8 +303,8 @@ operator<<(std::wostream &out, const TaggedSentence &tagged) {
   return out;
 }
 
-std::wostream&
-operator<<(std::wostream &out, const PerceptronTagger::TrainingAgendaItem &tai) {
+std::ostream&
+operator<<(std::ostream &out, const PerceptronTagger::TrainingAgendaItem &tai) {
   out << "Score: " << tai.score << "\n";
   out << "Sentence: " << tai.tagged << "\n";
   out << "\n";
@@ -312,8 +312,8 @@ operator<<(std::wostream &out, const PerceptronTagger::TrainingAgendaItem &tai)
   return out;
 }
 
-std::wostream&
-operator<<(std::wostream &out, const std::vector<PerceptronTagger::TrainingAgendaItem> &agenda) {
+std::ostream&
+operator<<(std::ostream &out, const std::vector<PerceptronTagger::TrainingAgendaItem> &agenda) {
   std::vector<PerceptronTagger::TrainingAgendaItem>::const_iterator agenda_it;
   for (agenda_it = agenda.begin(); agenda_it != agenda.end(); agenda_it++) {
     out << *agenda_it;
@@ -322,15 +322,15 @@ operator<<(std::wostream &out, const std::vector<PerceptronTagger::TrainingAgend
   return out;
 }
 
-std::wostream&
-operator<<(std::wostream &out, const PerceptronTagger::AgendaItem &ai) {
+std::ostream&
+operator<<(std::ostream &out, const PerceptronTagger::AgendaItem &ai) {
   out << "Score: " << ai.score << "\n";
   out << "Sentence: " << ai.tagged << "\n";
   return out;
 }
 
-std::wostream&
-operator<<(std::wostream &out, const std::vector<PerceptronTagger::AgendaItem> &agenda) {
+std::ostream&
+operator<<(std::ostream &out, const std::vector<PerceptronTagger::AgendaItem> &agenda) {
   std::vector<PerceptronTagger::AgendaItem>::const_iterator agenda_it;
   for (agenda_it = agenda.begin(); agenda_it != agenda.end(); agenda_it++) {
     out << *agenda_it;
diff --git a/apertium/perceptron_tagger.h b/apertium/perceptron_tagger.h
index 9c7ba0f..73986d2 100644
--- a/apertium/perceptron_tagger.h
+++ b/apertium/perceptron_tagger.h
@@ -21,16 +21,16 @@ public:
   virtual void train(Stream &tagged, Stream &untagged, int iterations);
   // tagger
   virtual void deserialise(std::istream &serialised);
-  virtual void tag(Stream &input, std::wostream &output);
+  virtual void tag(Stream &input, std::ostream &output);
 
   void read_spec(const std::string &filename);
 
-  friend std::wostream& operator<<(std::wostream &out, PerceptronTagger const &pt);
+  friend std::ostream& operator<<(std::ostream &out, PerceptronTagger const &pt);
 protected:
   virtual TaggedSentence tagSentence(const Sentence &untagged);
   virtual void outputLexicalUnit(
     const LexicalUnit &lexical_unit, const Optional<Analysis> analysis,
-    std::wostream &output);
+    std::ostream &output);
 private:
   bool trainSentence(
     const TrainingSentence &sentence,
@@ -52,18 +52,18 @@ private:
   };
   template <typename T> static void extendAgendaAll(
     std::vector<T> &agenda, Optional<Analysis> analy);
-  friend std::wostream& operator<<(std::wostream &out,
+  friend std::ostream& operator<<(std::ostream &out,
                                    const TrainingAgendaItem &tai);
-  friend std::wostream& operator<<(
-      std::wostream &out, const std::vector<TrainingAgendaItem> &agenda);
+  friend std::ostream& operator<<(
+      std::ostream &out, const std::vector<TrainingAgendaItem> &agenda);
   friend bool operator<(const AgendaItem &a, const AgendaItem &b);
-  friend std::wostream& operator<<(
-      std::wostream &out, const PerceptronTagger::AgendaItem &ai);
-  friend std::wostream& operator<<(
-      std::wostream &out, const std::vector<PerceptronTagger::AgendaItem> &agenda);
+  friend std::ostream& operator<<(
+      std::ostream &out, const PerceptronTagger::AgendaItem &ai);
+  friend std::ostream& operator<<(
+      std::ostream &out, const std::vector<PerceptronTagger::AgendaItem> &agenda);
 };
 
-std::wostream& operator<<(std::wostream &out, const TaggedSentence &tagged);
+std::ostream& operator<<(std::ostream &out, const TaggedSentence &tagged);
 }
 
 #endif
diff --git a/apertium/postchunk.cc b/apertium/postchunk.cc
index 91f55ad..b1b2c50 100644
--- a/apertium/postchunk.cc
+++ b/apertium/postchunk.cc
@@ -15,1731 +15,530 @@
  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  */
 #include <apertium/postchunk.h>
-#include <apertium/trx_reader.h>
-#include <apertium/utf_converter.h>
-#include <lttoolbox/compression.h>
-#include <lttoolbox/xml_parse_util.h>
 
-#include <cctype>
-#include <cerrno>
-#include <iostream>
-#include <stack>
-#include <apertium/string_utils.h>
-#include "apertium_config.h"
-#include <apertium/unlocked_cstdio.h>
-
-using namespace Apertium;
-using namespace std;
-
-void
-Postchunk::destroy()
-{
-  if(me)
-  {
-    delete me;
-    me = NULL;
-  }
-  if(doc)
-  {
-    xmlFreeDoc(doc);
-    doc = NULL;
-  }
-}
-
-Postchunk::Postchunk() :
-word(0),
-lword(0),
-output(0),
-any_char(0),
-any_tag(0),
-nwords(0)
-{
-  me = NULL;
-  doc = NULL;
-  root_element = NULL;
-  lastrule = NULL;
-  inword = false;
-  null_flush = false;
-  internal_null_flush = false;
-  trace = false;
-  in_lu = false;
-  in_out = false;
-  in_let_var = false;
-  in_wblank = false;
-}
-
-Postchunk::~Postchunk()
-{
-  destroy();
-}
-
-void
-Postchunk::readData(FILE *in)
-{
-  alphabet.read(in);
-  any_char = alphabet(TRXReader::ANY_CHAR);
-  any_tag = alphabet(TRXReader::ANY_TAG);
-
-  Transducer t;
-  t.read(in, alphabet.size());
-
-  map<int, int> finals;
-
-  // finals
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    int key = Compression::multibyte_read(in);
-    finals[key] = Compression::multibyte_read(in);
-  }
-
-  me = new MatchExe(t, finals);
-
-  // attr_items
-  bool recompile_attrs = Compression::string_read(in) != pcre_version_endian();
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    attr_items[cad_k].read(in);
-    wstring fallback = Compression::wstring_read(in);
-    if(recompile_attrs) {
-      attr_items[cad_k].compile(UtfConverter::toUtf8(fallback));
-    }
-  }
-
-  // variables
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    variables[cad_k] = UtfConverter::toUtf8(Compression::wstring_read(in));
-  }
-
-  // macros
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    macros[cad_k] = Compression::multibyte_read(in);
-  }
-
-  // lists
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-
-    for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
-    {
-      wstring const cad_v = Compression::wstring_read(in);
-      lists[cad_k].insert(UtfConverter::toUtf8(cad_v));
-      listslow[cad_k].insert(UtfConverter::toUtf8(StringUtils::tolower(cad_v)));
-    }
-  }
-}
-
-void
-Postchunk::read(string const &transferfile, string const &datafile)
-{
-  readPostchunk(transferfile);
-
-  // datafile
-  FILE *in = fopen(datafile.c_str(), "rb");
-  if(!in)
-  {
-    wcerr << "Error: Could not open file '" << datafile << "'." << endl;
-    exit(EXIT_FAILURE);
-  }
-  readData(in);
-  fclose(in);
-
-}
-
-void
-Postchunk::readPostchunk(string const &in)
-{
-  doc = xmlReadFile(in.c_str(), NULL, 0);
-
-  if(doc == NULL)
-  {
-    wcerr << "Error: Could not parse file '" << in << "'." << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  root_element = xmlDocGetRootElement(doc);
-
-  // search for macros & rules
-  for(xmlNode *i = root_element->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "section-def-macros"))
-      {
-        collectMacros(i);
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "section-rules"))
-      {
-        collectRules(i);
-      }
-    }
-  }
-}
-
-void
-Postchunk::collectRules(xmlNode *localroot)
-{
-  for(xmlNode *rule = localroot->children; rule != NULL; rule = rule->next)
-  {
-    if(rule->type == XML_ELEMENT_NODE)
-    {
-      size_t line = rule->line;
-      for(xmlNode *rulechild = rule->children; ; rulechild = rulechild->next)
-      {
-        if(rulechild->type == XML_ELEMENT_NODE && !xmlStrcmp(rulechild->name, (const xmlChar *) "action"))
-        {
-          rule_map.push_back(rulechild);
-          rule_lines.push_back(line);
-          break;
-        }
-      }
-    }
-  }
-}
-
-void
-Postchunk::collectMacros(xmlNode *localroot)
-{
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      macro_map.push_back(i);
-    }
-  }
-}
-
-bool
-Postchunk::checkIndex(xmlNode *element, int index, int limit)
-{
-  if(index > limit) // Note: Unlike transfer/interchunk, we allow index==limit!
-  {
-    wcerr << L"Error in " << UtfConverter::fromUtf8((char *) doc->URL) << L": line " << element->line << L": index > limit" << endl;
-    return false;
-  }
-  if(index < 0) {
-    wcerr << L"Error in " << UtfConverter::fromUtf8((char *) doc->URL) << L": line " << element->line << L": index < 0" << endl;
-    return false;
-  }
-  if(word[index] == 0)
-  {
-    wcerr << L"Error in " << UtfConverter::fromUtf8((char *) doc->URL) << L": line " << element->line << L": Null access at word[index]" << endl;
-    return false;
-  }
-  return true;
-}
-
-bool
-Postchunk::gettingLemmaFromWord(string attr)
-{
-    return (attr.compare("lem") == 0 || attr.compare("lemh") == 0 || attr.compare("whole") == 0);
-}
-
-string
-Postchunk::combineWblanks(string wblank_current, string wblank_to_add)
-{
-  if(wblank_current.empty() && wblank_to_add.empty())
-  {
-    return wblank_current;
-  }
-  else if(wblank_current.empty())
-  {
-    return wblank_to_add;
-  }
-  else if(wblank_to_add.empty())
-  {
-    return wblank_current;
-  }
-  
-  string new_out_wblank;
-  for(string::const_iterator it = wblank_current.begin(); it != wblank_current.end(); it++)
-  {
-    if(*it == '\\')
-    {
-      new_out_wblank += *it;
-      it++;
-      new_out_wblank += *it;
-    }
-    else if(*it == ']')
-    {
-      if(*(it+1) == ']')
-      {
-        new_out_wblank += ';';
-        break;
-      }
-    }
-    else
-    {
-      new_out_wblank += *it;
-    }
-  }
-  
-  for(string::const_iterator it = wblank_to_add.begin(); it != wblank_to_add.end(); it++)
-  {
-    if(*it == '\\')
-    {
-      new_out_wblank += *it;
-      it++;
-      new_out_wblank += *it;
-    }
-    else if(*it == '[')
-    {
-      if(*(it+1) == '[')
-      {
-        new_out_wblank += ' ';
-        it++;
-      }
-    }
-    else
-    {
-      new_out_wblank += *it;
-    }
-  }
-  
-  return new_out_wblank;
-}
-
-string
-Postchunk::evalString(xmlNode *element)
-{
-  map<xmlNode *, TransferInstr>::iterator it;
-  it = evalStringCache.find(element);
-  if(it != evalStringCache.end())
-  {
-    TransferInstr &ti = it->second;
-    switch(ti.getType())
-    {
-      case ti_clip_tl:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          if(gettingLemmaFromWord(ti.getContent()) && lword > 1)
-          {
-            if(in_lu)
-            {
-              out_wblank = combineWblanks(out_wblank, word[ti.getPos()]->getWblank());
-            }
-            else if(in_let_var)
-            {
-              var_out_wblank[var_val] = combineWblanks(var_out_wblank[var_val], word[ti.getPos()]->getWblank());
-            }
-          }
-          
-          return word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]);
-        }
-        break;
-
-      case ti_lu_count:
-        return StringUtils::itoa_string(tmpword.size());
-
-      case ti_var:
-        if(lword > 1)
-        {
-        out_wblank = combineWblanks(out_wblank, var_out_wblank[ti.getContent()]);
-        }
-        
-        return variables[ti.getContent()];
-
-      case ti_lit_tag:
-      case ti_lit:
-        return ti.getContent();
-
-      case ti_b:
-        if(!blank_queue.empty())
-        {
-          string retblank = blank_queue.front();
-          if(in_out)
-          {
-            blank_queue.pop();
-          }
-          
-          return retblank;
-        }
-        else
-        {
-          return " ";
-        }
-        break;
-
-      case ti_get_case_from:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          return copycase(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]),
-                          evalString((xmlNode *) ti.getPointer()));
-        }
-        break;
-
-      case ti_case_of_tl:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          return caseOf(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]));
-        }
-        break;
-
-      default:
-        return "";
-    }
-    return "";
-  }
-
-  if(!xmlStrcmp(element->name, (const xmlChar *) "clip"))
-  {
-    int pos = 0;
-    xmlChar *part = NULL;
-
-    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
-      {
-	part = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
-      {
-	pos = atoi((const char *)i->children->content);
-      }
-    }
-
-    evalStringCache[element] = TransferInstr(ti_clip_tl, (const char *) part, pos, NULL);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit-tag"))
-  {
-    evalStringCache[element] = TransferInstr(ti_lit_tag,
-                                             tags((const char *) element->properties->children->content), 0);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit"))
-  {
-    evalStringCache[element] = TransferInstr(ti_lit, string((char *) element->properties->children->content), 0);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "b"))
-  {
-    if(element->properties == NULL)
-    {
-      evalStringCache[element] = TransferInstr(ti_b, " ", -1);
-    }
-    else
-    {
-      int pos = atoi((const char *) element->properties->children->content) - 1;
-      evalStringCache[element] = TransferInstr(ti_b, "", pos);
-    }
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "get-case-from"))
-  {
-    int pos = atoi((const char *) element->properties->children->content);
-    xmlNode *param = NULL;
-    for(xmlNode *i = element->children; i != NULL; i = i->next)
-    {
-      if(i->type == XML_ELEMENT_NODE)
-      {
-	param = i;
-	break;
-      }
-    }
-
-    evalStringCache[element] = TransferInstr(ti_get_case_from, "lem", pos, param);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "var"))
-  {
-    evalStringCache[element] = TransferInstr(ti_var, (const char *) element->properties->children->content, 0);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "lu-count"))
-  {
-    evalStringCache[element] = TransferInstr(ti_lu_count, "", 0);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "case-of"))
-  {
-    int pos = 0;
-    xmlChar *part = NULL;
-
-    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
-      {
-	part = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
-      {
-	pos = atoi((const char *) i->children->content);
-      }
-    }
-
-    evalStringCache[element] = TransferInstr(ti_case_of_tl, (const char *) part, pos);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "concat"))
-  {
-    string value;
-    for(xmlNode *i = element->children; i != NULL; i = i->next)
-    {
-      if(i->type == XML_ELEMENT_NODE)
-      {
-        value.append(evalString(i));
-      }
-    }
-    return value;
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "lu"))
-  {
-    in_lu = true;
-    out_wblank.clear();
-    
-    string myword;
-    for(xmlNode *i = element->children; i != NULL; i = i->next)
-    {
-       if(i->type == XML_ELEMENT_NODE)
-       {
-         myword.append(evalString(i));
-       }
-    }
-    
-    in_lu = false;
-    
-    if(lword == 1)
-    {
-      out_wblank = word[1]->getWblank();
-    }
-
-    if(myword != "")
-    {
-      return out_wblank+"^"+myword+"$";
-    }
-    else
-    {
-      return "";
-    }
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "mlu"))
-  {
-    string value;
-
-    bool first_time = true;
-    out_wblank.clear();
-
-    for(xmlNode *i = element->children; i != NULL; i = i->next)
-    {
-      if(i->type == XML_ELEMENT_NODE)
-      {
-        in_lu = true;
-        
-        string myword;
-
-        for(xmlNode *j = i->children; j != NULL; j = j->next)
-        {
-          if(j->type == XML_ELEMENT_NODE)
-          {
-            myword.append(evalString(j));
-          }
-        }
-        
-        in_lu = false;
-
-	if(!first_time)
-	{
-	  if(myword != "" && myword[0] != '#')  //'+#' problem
-	  {
-	    value.append("+");
-          }
-	}
-	else
-	{
-	  if(myword != "")
-	  {
-	    first_time = false;
-          }
-	}
-
-	value.append(myword);
-      }
-    }
-    
-    if(lword == 1)
-    {
-      out_wblank = word[1]->getWblank();
-    }
-
-    if(value != "")
-    {
-      return out_wblank+"^"+value+"$";
-    }
-    else
-    {
-      return "";
-    }
-  }
-
-  else
-  {
-    wcerr << "Error: unexpected rvalue expression '" << element->name << "'" << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  return evalString(element);
-}
-
-void
-Postchunk::processOut(xmlNode *localroot)
-{
-  in_out = true;
-  
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "lu"))
-      {
-        in_lu = true;
-        out_wblank.clear();
-        
-        string myword;
-        for(xmlNode *j = i->children; j != NULL; j = j->next)
-        {
-          if(j->type == XML_ELEMENT_NODE)
-          {
-            myword.append(evalString(j));
-          }
-        }
-        
-        in_lu = false;
-        
-        if(lword == 1)
-        {
-          out_wblank = word[1]->getWblank();
-        }
-        
-        if(myword != "")
-        {
-          fputws_unlocked(UtfConverter::fromUtf8(out_wblank).c_str(), output);
-          fputwc_unlocked(L'^', output);
-          fputws_unlocked(UtfConverter::fromUtf8(myword).c_str(), output);
-          fputwc_unlocked(L'$', output);
-        }
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu"))
-      {
-        string myword;
-        bool first_time = true;
-        out_wblank.clear();
-        
-        for(xmlNode *j = i->children; j != NULL; j = j->next)
-        {
-          if(j->type == XML_ELEMENT_NODE)
-          {
-            in_lu = true;
-            
-            string mylocalword;
-            for(xmlNode *k = j->children; k != NULL; k = k->next)
-            {
-              if(k->type == XML_ELEMENT_NODE)
-              {
-                mylocalword.append(evalString(k));
-              }
-            }
-            
-            in_lu = false;
-
-            if(!first_time)
-            {
-              if(mylocalword != "")
-              {
-                myword += '+';
-              }
-            }
-            else
-            {
-              if(mylocalword != "")
-              {
-                first_time = false;
-              }
-            }
-            
-            myword.append(mylocalword);
-          }
-        }
-        
-        if(lword == 1)
-        {
-          out_wblank = word[1]->getWblank();
-        }
-
-        fputws_unlocked(UtfConverter::fromUtf8(out_wblank).c_str(), output);
-        fputwc_unlocked('^', output);
-        fputws_unlocked(UtfConverter::fromUtf8(myword).c_str(), output);
-        fputwc_unlocked(L'$', output);
-      }
-      else // 'b'
-      {
-        fputws_unlocked(UtfConverter::fromUtf8(evalString(i)).c_str(), output);
-      }
-    }
-  }
-  
-  in_out = false;
-}
-
-void
-Postchunk::processTags(xmlNode *localroot)
-{
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(!xmlStrcmp(i->name, (xmlChar const *) "tag"))
-      {
-        for(xmlNode *j = i->children; j != NULL; j = j->next)
-        {
-          if(j->type == XML_ELEMENT_NODE)
-          {
-            fputws_unlocked(UtfConverter::fromUtf8(evalString(j)).c_str(), output);
-          }
-        }
-      }
-    }
-  }
-}
-
-void
-Postchunk::processInstruction(xmlNode *localroot)
-{
-  if(!xmlStrcmp(localroot->name, (const xmlChar *) "choose"))
-  {
-    processChoose(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "let"))
-  {
-    processLet(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "append"))
-  {
-    processAppend(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "out"))
-  {
-    processOut(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "call-macro"))
-  {
-    processCallMacro(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "modify-case"))
-  {
-    processModifyCase(localroot);
-  }
-}
-
-void
-Postchunk::processLet(xmlNode *localroot)
-{
-  xmlNode *leftSide = NULL, *rightSide = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(leftSide == NULL)
-      {
-	leftSide = i;
-      }
-      else
-      {
-	rightSide = i;
-	break;
-      }
-    }
-  }
-
-  map<xmlNode *, TransferInstr>::iterator it = evalStringCache.find(leftSide);
-  if(it != evalStringCache.end())
-  {
-    TransferInstr &ti = it->second;
-    switch(ti.getType())
-    {
-      case ti_var:
-        in_let_var = true;
-        var_val = ti.getContent();
-        var_out_wblank[var_val].clear();
-        
-        variables[ti.getContent()] = evalString(rightSide);
-        
-        in_let_var = false;
-        return;
-
-      case ti_clip_tl:
-      {
-        bool match = word[ti.getPos()]->setChunkPart(attr_items[ti.getContent()], evalString(rightSide));
-        if(!match && trace)
-        {
-          wcerr << "apertium-postchunk warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
-        }
-      }
-        return;
-
-      default:
-        return;
-    }
-  }
-  if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
-  {
-    in_let_var = true;
-    
-    string const val = (const char *) leftSide->properties->children->content;
-    
-    var_val = val;
-    var_out_wblank[var_val].clear();
-    
-    variables[val] = evalString(rightSide);
-    
-    in_let_var = false;
-    evalStringCache[leftSide] = TransferInstr(ti_var, val, 0);
-  }
-  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
-  {
-    int pos = 0;
-    xmlChar *part = NULL;
-
-    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
-      {
-	part = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
-      {
-	pos = atoi((const char *) i->children->content);
-      }
-    }
-
-
-    bool match = word[pos]->setChunkPart(attr_items[(const char *) part],
-					 evalString(rightSide));
-    if(!match && trace)
-    {
-      wcerr << "apertium-postchunk warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
-    }
-    evalStringCache[leftSide] = TransferInstr(ti_clip_tl, (const char *) part,
-					      pos, NULL);
-  }
-}
-
-void
-Postchunk::processAppend(xmlNode *localroot)
-{
-  string name;
-  for(xmlAttr *i = localroot->properties; i != NULL; i = i->next)
-  {
-    if(!xmlStrcmp(i->name, (const xmlChar *) "n"))
-    {
-      name = (char *) i->children->content;
-      break;
-    }
-  }
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      in_let_var = true;
-      var_val = name;
-      variables[name].append(evalString(i));
-      in_let_var = false;
-    }
-  }
-}
-
-void
-Postchunk::processModifyCase(xmlNode *localroot)
-{
-  xmlNode *leftSide = NULL, *rightSide = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(leftSide == NULL)
-      {
-	leftSide = i;
-      }
-      else
-      {
-	rightSide = i;
-	break;
-      }
-    }
-  }
-
-  if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
-  {
-    int pos = 0;
-    xmlChar *part = NULL;
-
-    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
-      {
-	part = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
-      {
-	pos = atoi((const char *) i->children->content);
-      }
-    }
-
-    string const result = copycase(evalString(rightSide),
-				   word[pos]->chunkPart(attr_items[(const char *) part]));
-    bool match = word[pos]->setChunkPart(attr_items[(const char *) part], result);
-
-    if(!match && trace)
-    {
-      wcerr << "apertium-postchunk warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
-    }
-  }
-  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
-  {
-    string const val = (const char *) leftSide->properties->children->content;
-    variables[val] = copycase(evalString(rightSide), variables[val]);
-  }
-}
-
-void
-Postchunk::processCallMacro(xmlNode *localroot)
-{
-  const char *n = (const char *) localroot->properties->children->content;
-  int npar = 0;
-
-  xmlNode *macro = macro_map[macros[n]];
-
-  for(xmlAttr *i = macro->properties; i != NULL; i = i->next)
-  {
-    if(!xmlStrcmp(i->name, (const xmlChar *) "npar"))
-    {
-      npar = atoi((const char *) i->children->content);
-      break;
-    }
-  }
+#include <lttoolbox/xml_walk_util.h>
+#include <lttoolbox/string_utils.h>
 
-  if (npar <= 0)
-  {
-    throw "Postchunk::processCallMacro() assumes npar > 0, but got npar <= 0";
-  }
+#include <iostream>
 
-  InterchunkWord **myword = NULL;
-  if(npar > 0)
-  {
-    myword = new InterchunkWord *[npar+1];
-  }
+using namespace std;
 
-  myword[0] = word[0];
+Postchunk::Postchunk()
+  : word(0), in_wblank(false), inword(false)
+{}
 
-  bool indexesOK = true;
-  int idx = 1;
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+bool
+Postchunk::checkIndex(xmlNode *element, int index, int limit)
+{
+  if(index > limit) // Note: Unlike transfer/interchunk, we allow index==limit!
   {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      int pos = atoi((const char *) i->properties->children->content);
-      if(!checkIndex(localroot, pos, lword)) {
-        indexesOK = false;      // avoid segfaulting on empty chunks, e.g. ^x<x>{}$
-        pos = 1;
-      }
-      myword[idx] = word[pos];
-      idx++;
-    }
+    cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index > limit" << endl;
+    return false;
   }
-
-  swap(myword, word);
-  swap(npar, lword);
-
-  if(indexesOK) {
-    for(xmlNode *i = macro->children; i != NULL; i = i->next)
-    {
-      if(i->type == XML_ELEMENT_NODE)
-      {
-        processInstruction(i);
-      }
-    }
+  if(index < 0) {
+    cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index < 0" << endl;
+    return false;
   }
-  else {
-    wcerr << "Warning: Not calling macro \"" << n << "\" from line " << localroot->line << " (empty word?)" << endl;
+  if(word[index] == 0)
+  {
+    cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": Null access at word[index]" << endl;
+    return false;
   }
-
-  swap(myword, word);
-  swap(npar, lword);
-
-  delete[] myword;
+  return true;
 }
 
-void
-Postchunk::processChoose(xmlNode *localroot)
+UString
+Postchunk::evalCachedString(xmlNode* element)
 {
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "when"))
-      {
-        bool picked_option = false;
-
-	for(xmlNode *j = i->children; j != NULL; j = j->next)
-	{
-	  if(j->type == XML_ELEMENT_NODE)
-	  {
-	    if(!xmlStrcmp(j->name, (const xmlChar *) "test"))
-	    {
-	      if(!processTest(j))
-	      {
-		break;
-	      }
-	      else
-	      {
-	        picked_option = true;
-              }
-	    }
-	    else
-	    {
-	      processInstruction(j);
-	    }
-	  }
-	}
-        if(picked_option)
-        {
-          return;
+  TransferInstr& ti = evalStringCache[element];
+  switch (ti.getType()) {
+  case ti_clip_tl:
+    if (checkIndex(element, ti.getPos(), lword)) {
+      if (gettingLemmaFromWord(ti.getContent()) && lword > 1) {
+        if (in_lu) {
+          out_wblank = combineWblanks(out_wblank, word[ti.getPos()]->getWblank());
+        } else if (in_let_var) {
+          var_out_wblank[var_val] = combineWblanks(var_out_wblank[var_val],
+                                                   word[ti.getPos()]->getWblank());
         }
       }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "otherwise"))
-      {
-	for(xmlNode *j = i->children; j != NULL; j = j->next)
-	{
-	  if(j->type == XML_ELEMENT_NODE)
-	  {
-	    processInstruction(j);
-	  }
-	}
-      }
+      return word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]);
     }
-  }
-}
+    break;
 
-bool
-Postchunk::processLogical(xmlNode *localroot)
-{
-  if(!xmlStrcmp(localroot->name, (const xmlChar *) "equal"))
-  {
-    return processEqual(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with"))
-  {
-    return processBeginsWith(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with-list"))
-  {
-    return processBeginsWithList(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with"))
-  {
-    return processEndsWith(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with-list"))
-  {
-    return processEndsWithList(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "contains-substring"))
-  {
-    return processContainsSubstring(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "or"))
-  {
-    return processOr(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "and"))
-  {
-    return processAnd(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "not"))
-  {
-    return processNot(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "in"))
-  {
-    return processIn(localroot);
-  }
+  case ti_lu_count:
+    return StringUtils::itoa(tmpword.size());
 
-  return false;
-}
+  case ti_var:
+    if (lword > 1) {
+      out_wblank = combineWblanks(out_wblank, var_out_wblank[ti.getContent()]);
+    }
+    return variables[ti.getContent()];
 
-bool
-Postchunk::processIn(xmlNode *localroot)
-{
-  xmlNode *value = NULL;
-  xmlChar *idlist = NULL;
+  case ti_lit_tag:
+  case ti_lit:
+    return ti.getContent();
 
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(value == NULL)
-      {
-	value = i;
-      }
-      else
-      {
-	idlist = i->properties->children->content;
-	break;
+  case ti_b:
+    if (!blank_queue.empty()) {
+      UString retblank = blank_queue.front();
+      if (in_out) {
+        blank_queue.pop();
       }
+      return retblank;
+    } else {
+      return " "_u;
     }
-  }
+    break;
 
-  string sval = evalString(value);
+  case ti_get_case_from:
+    if (checkIndex(element, ti.getPos(), lword)) {
+      return StringUtils::copycase(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]),
+                                   evalString(ti.getPointer()));
+    }
+    break;
 
-  if(localroot->properties != NULL)
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      set<string, Ltstr> &myset = listslow[(const char *) idlist];
-      if(myset.find(tolower(sval)) != myset.end())
-      {
-	return true;
-      }
-      else
-      {
-	return false;
-      }
+  case ti_case_of_tl:
+    if (checkIndex(element, ti.getPos(), lword)) {
+      return StringUtils::getcase(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]));
     }
-  }
+    break;
 
-  set<string, Ltstr> &myset = lists[(const char *) idlist];
-  if(myset.find(sval) != myset.end())
-  {
-    return true;
-  }
-  else
-  {
-    return false;
+  default:
+    return ""_u;
   }
+  return ""_u;
 }
 
-bool
-Postchunk::processTest(xmlNode *localroot)
+void
+Postchunk::processClip(xmlNode* element)
 {
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      return processLogical(i);
+  int pos = 0;
+  UString part;
+  for(xmlAttr* i = element->properties; i != NULL; i = i->next) {
+    if (!xmlStrcmp(i->name, (const xmlChar*) "part")) {
+      part = to_ustring((const char*) i->children->content);
+    } else if (!xmlStrcmp(i->name, (const xmlChar*) "pos")) {
+      pos = atoi((const char *)i->children->content);
     }
   }
-  return false;
+  evalStringCache[element] = TransferInstr(ti_clip_tl, part, pos, NULL);
 }
 
-bool
-Postchunk::processAnd(xmlNode *localroot)
+void
+Postchunk::processBlank(xmlNode* element)
 {
-  bool val = true;
-  for(xmlNode *i = localroot->children; val && i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      val = val && processLogical(i);
-    }
+  if (element->properties == NULL) {
+    evalStringCache[element] = TransferInstr(ti_b, " "_u, -1);
+  } else {
+    int pos = atoi((const char *) element->properties->children->content) - 1;
+    evalStringCache[element] = TransferInstr(ti_b, ""_u, pos);
   }
-
-  return val;
 }
 
-bool
-Postchunk::processOr(xmlNode *localroot)
+void
+Postchunk::processLuCount(xmlNode* element)
 {
-  bool val = false;
-  for(xmlNode *i = localroot->children; !val && i != NULL ; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      val = val || processLogical(i);
-    }
-  }
-
-  return val;
+  evalStringCache[element] = TransferInstr(ti_lu_count, ""_u, 0);
 }
 
-bool
-Postchunk::processNot(xmlNode *localroot)
+void
+Postchunk::processCaseOf(xmlNode* element)
 {
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      return !processLogical(i);
+  int pos = 0;
+  UString part;
+  for (xmlAttr* i = element->properties; i != NULL; i = i->next) {
+    if (!xmlStrcmp(i->name, (const xmlChar*) "part")) {
+      part = to_ustring((const char*) i->children->content);
+    } else if(!xmlStrcmp(i->name, (const xmlChar*) "pos")) {
+      pos = atoi((const char *) i->children->content);
     }
   }
-  return false;
+  evalStringCache[element] = TransferInstr(ti_case_of_tl, part, pos);
 }
 
-bool
-Postchunk::processEqual(xmlNode *localroot)
+UString
+Postchunk::processLu(xmlNode* element)
 {
-  xmlNode *first = NULL, *second = NULL;
+  in_lu = true;
+  out_wblank.clear();
 
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
+  UString myword;
+  for (auto i : children(element)) {
+    myword.append(evalString(i));
   }
+  in_lu = false;
 
-  if(localroot->properties == NULL)
-  {
-    return evalString(first) == evalString(second);
+  if (lword == 1) {
+    out_wblank = word[1]->getWblank();
   }
-  else
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      return tolower(evalString(first)) == tolower(evalString(second));
-    }
-    else
-    {
-      return evalString(first) == evalString(second);
-    }
-  }
-}
 
-bool
-Postchunk::beginsWith(string const &s1, string const &s2) const
-{
-  int const limit = s2.size(), constraint = s1.size();
-
-  if(constraint < limit)
-  {
-    return false;
-  }
-  for(int i = 0; i != limit; i++)
-  {
-    if(s1[i] != s2[i])
-    {
-      return false;
-    }
+  if (myword.empty()) {
+    return ""_u;
+  } else {
+    return out_wblank+"^"_u+myword+"$"_u;
   }
-
-  return true;
 }
 
-bool
-Postchunk::endsWith(string const &s1, string const &s2) const
+UString
+Postchunk::processMlu(xmlNode* element)
 {
-  int const limit = s2.size(), constraint = s1.size();
-
-  if(constraint < limit)
-  {
-    return false;
-  }
-  for(int i = limit-1, j = constraint - 1; i >= 0; i--, j--)
-  {
-    if(s1[j] != s2[i])
-    {
-      return false;
-    }
-  }
+  UString value;
 
-  return true;
-}
+  bool first_time = true;
+  out_wblank.clear();
+  in_lu = true;
 
+  for (auto i : children(element)) {
+    UString myword;
 
-bool
-Postchunk::processBeginsWith(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
+    for (auto j : children(i)) {
+      myword.append(evalString(j));
+    }
 
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
+	if (!first_time) {
+      if(!myword.empty() && myword[0] != '#') {  //'+#' problem
+        value += '+';
       }
-      else
-      {
-	second = i;
-	break;
+    } else {
+      if (!myword.empty()) {
+        first_time = false;
       }
     }
-  }
 
-  if(localroot->properties == NULL)
-  {
-    return beginsWith(evalString(first), evalString(second));
-  }
-  else
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      return beginsWith(tolower(evalString(first)), tolower(evalString(second)));
-    }
-    else
-    {
-      return beginsWith(evalString(first), evalString(second));
-    }
+	value.append(myword);
   }
-}
 
-bool
-Postchunk::processEndsWith(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
+  in_lu = false;
 
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
+  if (lword == 1) {
+    out_wblank = word[1]->getWblank();
   }
 
-  if(localroot->properties == NULL)
-  {
-    return endsWith(evalString(first), evalString(second));
-  }
-  else
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      return endsWith(tolower(evalString(first)), tolower(evalString(second)));
-    }
-    else
-    {
-      return endsWith(evalString(first), evalString(second));
-    }
+  if (value.empty()) {
+    return ""_u;
+  } else {
+    return out_wblank+"^"_u+value+"$"_u;
   }
 }
 
-bool
-Postchunk::processBeginsWithList(xmlNode *localroot)
+UString
+Postchunk::processChunk(xmlNode* element)
 {
-  xmlNode *first = NULL, *second = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
-  }
-
-  xmlChar *idlist = second->properties->children->content;
-  string needle = evalString(first);
-  set<string, Ltstr>::iterator it, limit;
+  cerr << "Error: unexpected expression: '" << element->name << "'" << endl;
+  exit(EXIT_FAILURE);
+  return ""_u; // make the type checker happy
+}
 
-  if(localroot->properties == NULL ||
-     xmlStrcmp(localroot->properties->children->content, (const xmlChar *) "yes"))
-  {
-    it = lists[(const char *) idlist].begin();
-    limit = lists[(const char *) idlist].end();
-  }
-  else
-  {
-    needle = tolower(needle);
-    it = listslow[(const char *) idlist].begin();
-    limit = listslow[(const char *) idlist].end();
-  }
+void
+Postchunk::processOut(xmlNode *localroot)
+{
+  in_out = true;
 
-  for(; it != limit; it++)
-  {
-    if(beginsWith(needle, *it))
-    {
-      return true;
+  for (auto i : children(localroot)) {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "lu")) {
+      write(processLu(i), output);
+    } else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) {
+      write(processMlu(i), output);
+    } else { // 'b'
+      write(evalString(i), output);
     }
   }
-  return false;
+
+  in_out = false;
 }
 
-bool
-Postchunk::processEndsWithList(xmlNode *localroot)
+void
+Postchunk::processTags(xmlNode *localroot)
 {
-  xmlNode *first = NULL, *second = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
+  for (auto i : children(localroot)) {
+    if(!xmlStrcmp(i->name, (xmlChar const *) "tag")) {
+      for (auto j : children(i)) {
+        write(evalString(j), output);
       }
     }
   }
+}
 
-  xmlChar *idlist = second->properties->children->content;
-  string needle = evalString(first);
-  set<string, Ltstr>::iterator it, limit;
+void
+Postchunk::processLet(xmlNode *localroot)
+{
+  xmlNode *leftSide = NULL, *rightSide = NULL;
 
-  if(localroot->properties == NULL ||
-     xmlStrcmp(localroot->properties->children->content, (const xmlChar *) "yes"))
-  {
-    it = lists[(const char *) idlist].begin();
-    limit = lists[(const char *) idlist].end();
-  }
-  else
-  {
-    needle = tolower(needle);
-    it = listslow[(const char *) idlist].begin();
-    limit = listslow[(const char *) idlist].end();
+  for (auto i : children(localroot)) {
+    if(leftSide == NULL) {
+      leftSide = i;
+    } else {
+      rightSide = i;
+      break;
+    }
   }
 
-  for(; it != limit; it++)
+  map<xmlNode *, TransferInstr>::iterator it = evalStringCache.find(leftSide);
+  if(it != evalStringCache.end())
   {
-    if(endsWith(needle, *it))
+    TransferInstr &ti = it->second;
+    switch(ti.getType())
     {
-      return true;
+      case ti_var:
+        in_let_var = true;
+        var_val = ti.getContent();
+        var_out_wblank[var_val].clear();
+
+        variables[ti.getContent()] = evalString(rightSide);
+
+        in_let_var = false;
+        return;
+
+      case ti_clip_tl:
+      {
+        bool match = word[ti.getPos()]->setChunkPart(attr_items[ti.getContent()], evalString(rightSide));
+        if(!match && trace)
+        {
+          cerr << "apertium-postchunk warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
+        }
+      }
+        return;
+
+      default:
+        return;
     }
   }
-  return false;
-}
+  if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
+  {
+    in_let_var = true;
 
+    UString const val = to_ustring((const char *) leftSide->properties->children->content);
 
-bool
-Postchunk::processContainsSubstring(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
+    var_val = val;
+    var_out_wblank[var_val].clear();
+
+    variables[val] = evalString(rightSide);
 
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+    in_let_var = false;
+    evalStringCache[leftSide] = TransferInstr(ti_var, val, 0);
+  }
+  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
   {
-    if(i->type == XML_ELEMENT_NODE)
+    int pos = 0;
+    UString part;
+
+    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
     {
-      if(first == NULL)
+      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
       {
-        first = i;
+        part = to_ustring((const char*)i->children->content);
       }
-      else
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
       {
-	second = i;
-	break;
+	pos = atoi((const char *) i->children->content);
       }
     }
-  }
 
-  if(localroot->properties == NULL)
-  {
-    return evalString(first).find(evalString(second)) != string::npos;
-  }
-  else
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      return tolower(evalString(first)).find(tolower(evalString(second))) != string::npos;
-    }
-    else
+
+    bool match = word[pos]->setChunkPart(attr_items[part],
+					 evalString(rightSide));
+    if(!match && trace)
     {
-      return evalString(first).find(evalString(second)) != string::npos;
+      cerr << "apertium-postchunk warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
     }
+    evalStringCache[leftSide] = TransferInstr(ti_clip_tl, part, pos, NULL);
   }
 }
 
-string
-Postchunk::copycase(string const &source_word, string const &target_word)
+void
+Postchunk::processModifyCase(xmlNode *localroot)
 {
-  wstring result;
-  wstring const s_word = UtfConverter::fromUtf8(source_word);
-  wstring const t_word = UtfConverter::fromUtf8(target_word);
-
-  bool firstupper = iswupper(s_word[0]);
-  bool uppercase = firstupper && iswupper(s_word[s_word.size()-1]);
-  bool sizeone = s_word.size() == 1;
+  xmlNode *leftSide = NULL, *rightSide = NULL;
 
-  if(!uppercase || (sizeone && uppercase))
-  {
-    result = StringUtils::tolower(t_word);
-  }
-  else
-  {
-    result = StringUtils::toupper(t_word);
+  for (auto i : children(localroot)) {
+    if(leftSide == NULL) {
+      leftSide = i;
+    } else {
+      rightSide = i;
+      break;
+    }
   }
 
-  if(firstupper)
+  if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
   {
-    result[0] = towupper(result[0]);
-  }
-
-  return UtfConverter::toUtf8(result);
-}
-
-string
-Postchunk::caseOf(string const &str)
-{
-  wstring const s = UtfConverter::fromUtf8(str);
+    int pos = 0;
+    UString part;
 
-  if(s.size() > 1)
-  {
-    if(!iswupper(s[0]))
-    {
-      return "aa";
-    }
-    else if(!iswupper(s[s.size()-1]))
-    {
-      return "Aa";
-    }
-    else
-    {
-      return "AA";
-    }
-  }
-  else if(s.size() == 1)
-  {
-    if(!iswupper(s[0]))
+    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
     {
-      return "aa";
+      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+        part = to_ustring((const char*)i->children->content);
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+        pos = atoi((const char *) i->children->content);
+      }
     }
-    else
+
+    UString const result = StringUtils::copycase(evalString(rightSide),
+				   word[pos]->chunkPart(attr_items[part]));
+    bool match = word[pos]->setChunkPart(attr_items[part], result);
+
+    if(!match && trace)
     {
-      return "Aa";
+      cerr << "apertium-postchunk warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
     }
   }
-  else
+  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
   {
-    return "aa";
+    UString const val = to_ustring((const char *) leftSide->properties->children->content);
+    variables[val] = StringUtils::copycase(evalString(rightSide), variables[val]);
   }
 }
 
-wstring
-Postchunk::caseOf(wstring const &str)
+void
+Postchunk::processCallMacro(xmlNode *localroot)
 {
-  if(str.size() > 1)
+  UString n = to_ustring((const char *) localroot->properties->children->content);
+  int npar = 0;
+
+  xmlNode *macro = macro_map[macros[n]];
+
+  for(xmlAttr *i = macro->properties; i != NULL; i = i->next)
   {
-    if(!iswupper(str[0]))
-    {
-      return L"aa";
-    }
-    else if(!iswupper(str[str.size()-1]))
-    {
-      return L"Aa";
-    }
-    else
+    if(!xmlStrcmp(i->name, (const xmlChar *) "npar"))
     {
-      return L"AA";
+      npar = atoi((const char *) i->children->content);
+      break;
     }
   }
-  else if(str.size() == 1)
+
+  if (npar <= 0)
   {
-    if(!iswupper(str[0]))
-    {
-      return L"aa";
-    }
-    else
-    {
-      return L"Aa";
-    }
+    throw "Postchunk::processCallMacro() assumes npar > 0, but got npar <= 0";
   }
-  else
+
+  InterchunkWord **myword = NULL;
+  if(npar > 0)
   {
-    return L"aa";
+    myword = new InterchunkWord *[npar+1];
   }
-}
 
-string
-Postchunk::tolower(string const &str) const
-{
-  return UtfConverter::toUtf8(StringUtils::tolower(UtfConverter::fromUtf8(str)));
-}
-
-string
-Postchunk::tags(string const &str) const
-{
-  string result = "<";
+  myword[0] = word[0];
 
-  for(unsigned int i = 0, limit = str.size(); i != limit; i++)
-  {
-    if(str[i] == '.')
-    {
-      result.append("><");
-    }
-    else
-    {
-      result += str[i];
+  bool indexesOK = true;
+  int idx = 1;
+  for (auto i : children(localroot)) {
+    int pos = atoi((const char *) i->properties->children->content);
+    if(!checkIndex(localroot, pos, lword)) {
+      indexesOK = false;      // avoid segfaulting on empty chunks, e.g. ^x<x>{}$
+      pos = 1;
     }
+    myword[idx] = word[pos];
+    idx++;
   }
 
-  result += '>';
-
-  return result;
-}
+  swap(myword, word);
+  swap(npar, lword);
 
-void
-Postchunk::processRule(xmlNode *localroot)
-{
-  // localroot is suposed to be an 'action' tag
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
+  if(indexesOK) {
+    for (auto i : children(macro)) {
       processInstruction(i);
     }
   }
-  
-  while(!blank_queue.empty()) //flush remaining blanks that are not spaces
-  {
-    if(blank_queue.front().compare(" ") != 0)
-    {
-      fputws_unlocked(UtfConverter::fromUtf8(blank_queue.front()).c_str(), output);
-    }
-    blank_queue.pop();
+  else {
+    cerr << "Warning: Not calling macro \"" << n << "\" from line " << localroot->line << " (empty word?)" << endl;
   }
+
+  swap(myword, word);
+  swap(npar, lword);
+
+  delete[] myword;
 }
 
 TransferToken &
-Postchunk::readToken(FILE *in)
+Postchunk::readToken(InputFile& in)
 {
   if(!input_buffer.isEmpty())
   {
     return input_buffer.next();
   }
 
-  wstring content;
+  UString content;
   while(true)
   {
-    int val = fgetwc_unlocked(in);
-    if(feof(in) || (internal_null_flush && val == 0))
+    UChar32 val = in.get();
+    if(in.eof() || (internal_null_flush && val == 0))
     {
       return input_buffer.add(TransferToken(content, tt_eof));
     }
-    if(val == L'\\')
+    if(val == '\\')
     {
-      content += L'\\';
-      content += wchar_t(fgetwc_unlocked(in));
+      content += '\\';
+      content += in.get();
     }
-    else if(val == L'[')
+    else if(val == '[')
     {
-      content += L'[';
+      content += '[';
       while(true)
       {
-	int val2 = fgetwc_unlocked(in);
-	if(val2 == L'\\')
-	{
-	  content += L'\\';
-	  content += wchar_t(fgetwc_unlocked(in));
-	}
-	else if(val2 == L']')
-	{
-	  content += L']';
-	  break;
-	}
-	else
-	{
-	  content += wchar_t(val2);
-	}
+        UChar32 val2 = in.get();
+        if(val2 == '\\') {
+          content += '\\';
+          content += in.get();
+        } else if(val2 == ']') {
+          content += ']';
+          break;
+        } else {
+          content += val2;
+        }
       }
     }
-    else if(inword && val == L'{')
-    {
-      content += L'{';
-      while(true)
-      {
-	int val2 = fgetwc_unlocked(in);
-	if(val2 == L'\\')
-	{
-	  content += L'\\';
-	  content += wchar_t(fgetwc_unlocked(in));
-	}
-	else if(val2 == L'}')
-	{
-	  int val3 = wchar_t(fgetwc_unlocked(in));
-	  ungetwc(val3, in);
-
-	  content += L'}';
-	  if(val3 == L'$')
-	  {
-	    break;
-	  }
-	}
-	else
-	{
-	  content += wchar_t(val2);
-	}
+    else if(inword && val == '{')
+    {
+      content += '{';
+      while(true) {
+        UChar32 val2 = in.get();
+        if(val2 == '\\') {
+          content += '\\';
+          content += in.get();
+        } else if(val2 == '}') {
+          UChar32 val3 = in.peek();
+          content += '}';
+          if(val3 == '$') {
+            break;
+          }
+        } else {
+          content += val2;
+        }
       }
     }
-    else if(inword && val == L'$')
+    else if(inword && val == '$')
     {
       inword = false;
       return input_buffer.add(TransferToken(content, tt_word));
     }
-    else if(val == L'^')
+    else if(val == '^')
     {
       inword = true;
       return input_buffer.add(TransferToken(content, tt_blank));
     }
     else
     {
-      content += wchar_t(val);
+      content += val;
     }
   }
 }
 
-bool
-Postchunk::getNullFlush(void)
-{
-  return null_flush;
-}
-
-void
-Postchunk::setNullFlush(bool null_flush)
-{
-  this->null_flush = null_flush;
-}
-
-void
-Postchunk::setTrace(bool trace)
-{
-  this->trace = trace;
-}
-
 void
-Postchunk::postchunk_wrapper_null_flush(FILE *in, FILE *out)
+Postchunk::postchunk_wrapper_null_flush(InputFile& in, UFILE* out)
 {
   null_flush = false;
   internal_null_flush = true;
 
-  while(!feof(in))
+  while(!in.eof())
   {
     postchunk(in, out);
-    fputwc_unlocked(L'\0', out);
-    int code = fflush(out);
-    if(code != 0)
-    {
-      wcerr << L"Could not flush output " << errno << endl;
-    }
+    u_fputc('\0', out);
+    u_fflush(out);
+    variables = variable_defaults;
   }
 
   internal_null_flush = false;
@@ -1747,7 +546,7 @@ Postchunk::postchunk_wrapper_null_flush(FILE *in, FILE *out)
 }
 
 void
-Postchunk::postchunk(FILE *in, FILE *out)
+Postchunk::postchunk(InputFile& in, UFILE* out)
 {
   if(getNullFlush())
   {
@@ -1755,6 +554,9 @@ Postchunk::postchunk(FILE *in, FILE *out)
   }
 
   unsigned int last = input_buffer.getPos();
+  unsigned int prev_last = last;
+  int lastrule_id = -1;
+  set<int> banned_rules;
 
   output = out;
   ms.init(me->getInitial());
@@ -1765,48 +567,67 @@ Postchunk::postchunk(FILE *in, FILE *out)
     {
       if(lastrule != NULL)
       {
-	applyRule();
-	input_buffer.setPos(last);
+        int words_to_consume = applyRule();
+        if (words_to_consume == -1) {
+          banned_rules.clear();
+          input_buffer.setPos(last);
+        } else if (words_to_consume == 1) {
+          banned_rules.clear();
+          if (prev_last >= input_buffer.getSize()) {
+            input_buffer.setPos(0);
+          } else {
+            input_buffer.setPos(prev_last+1);
+          }
+          while (true) {
+            TransferToken& tt = input_buffer.next();
+            if (tt.getType() == tt_word) {
+              break;
+            }
+          }
+        } else {
+          banned_rules.insert(lastrule_id);
+          input_buffer.setPos(prev_last);
+          input_buffer.next();
+          last = input_buffer.getPos();
+        }
+        lastrule_id = -1;
       }
       else
       {
-	if(tmpword.size() != 0)
-	{
-	  unchunk(*tmpword[0], output);
-	  tmpword.clear();
-	  input_buffer.setPos(last);
-	  input_buffer.next();
-	  last = input_buffer.getPos();
-	  ms.init(me->getInitial());
-	}
-	else if(tmpblank.size() != 0)
-	{
-	  fputws_unlocked(tmpblank[0]->c_str(), output);
-	  tmpblank.clear();
-	  last = input_buffer.getPos();
-	  ms.init(me->getInitial());
-	}
+        if(tmpword.size() != 0) {
+          unchunk(*tmpword[0], output);
+          tmpword.clear();
+          input_buffer.setPos(last);
+          input_buffer.next();
+          prev_last = last;
+          banned_rules.clear();
+          last = input_buffer.getPos();
+          ms.init(me->getInitial());
+        }
+        else if(tmpblank.size() != 0) {
+          write(*tmpblank[0], output);
+          tmpblank.clear();
+          prev_last = last;
+          last = input_buffer.getPos();
+          ms.init(me->getInitial());
+        }
       }
     }
-    int val = ms.classifyFinals(me->getFinals());
+    int val = ms.classifyFinals(me->getFinals(), banned_rules);
     if(val != -1)
     {
       size_t lastrule_line = rule_lines[val-1];
       lastrule = rule_map[val-1];
       last = input_buffer.getPos();
+      lastrule_id = val;
 
       if(trace)
       {
-        wcerr << endl << L"apertium-postchunk: Rule " << val << L" line " << lastrule_line << L" ";
-        for (unsigned int ind = 0; ind < tmpword.size(); ind++)
-        {
-          if (ind != 0)
-          {
-            wcerr << L" ";
-          }
-          fputws_unlocked(tmpword[ind]->c_str(), stderr);
+        cerr << endl << "apertium-postchunk: Rule " << val << " line " << lastrule_line;
+        for (auto& it : tmpword) {
+          cerr << " " << *it;
         }
-        wcerr << endl;
+        cerr << endl;
       }
     }
 
@@ -1814,59 +635,55 @@ Postchunk::postchunk(FILE *in, FILE *out)
 
     switch(current.getType())
     {
-      case tt_word:
-	applyWord(current.getContent());
-        tmpword.push_back(&current.getContent());
-	break;
+    case tt_word:
+      applyWord(current.getContent());
+      tmpword.push_back(&current.getContent());
+      break;
 
-      case tt_blank:
-	ms.step(L' ');
-	tmpblank.push_back(&current.getContent());
-	break;
+    case tt_blank:
+      ms.step(' ');
+      tmpblank.push_back(&current.getContent());
+      break;
 
-      case tt_eof:
-	if(tmpword.size() != 0)
-	{
-	  tmpblank.push_back(&current.getContent());
-	  ms.clear();
-	}
-	else
-	{
-	  fputws_unlocked(current.getContent().c_str(), output);
-	  return;
-	}
-	break;
+    case tt_eof:
+      if(tmpword.size() != 0) {
+        tmpblank.push_back(&current.getContent());
+        ms.clear();
+      }
+      else {
+        write(current.getContent(), output);
+        return;
+      }
+      break;
 
-      default:
-	wcerr << "Error: Unknown input token." << endl;
-	return;
+    default:
+      cerr << "Error: Unknown input token." << endl;
+      return;
     }
   }
 }
 
-void
+int
 Postchunk::applyRule()
 {
-  wstring const chunk = *tmpword[0];
+  UString const chunk = *tmpword[0];
   tmpword.clear();
   splitWordsAndBlanks(chunk, tmpword, tmpblank);
 
   word = new InterchunkWord *[tmpword.size()+1];
   lword = tmpword.size();
-  word[0] = new InterchunkWord(UtfConverter::toUtf8(wordzero(chunk)));
+  word[0] = new InterchunkWord(wordzero(chunk));
 
   for(unsigned int i = 1, limit = tmpword.size()+1; i != limit; i++)
   {
-    if(i != 1)
-    {
-      string blank_to_add = string(UtfConverter::toUtf8(*tmpblank[i-1]));
-      blank_queue.push(blank_to_add);
+    if(i != 1) {
+      blank_queue.push(*tmpblank[i-1]);
     }
 
-    word[i] = new InterchunkWord(UtfConverter::toUtf8(*tmpword[i-1]));
+    word[i] = new InterchunkWord(*tmpword[i-1]);
   }
 
-  processRule(lastrule);
+  int words_to_consume = processRule(lastrule);
   lastrule = NULL;
 
   if(word)
@@ -1890,22 +707,23 @@ Postchunk::applyRule()
   tmpword.clear();
   tmpblank.clear();
   ms.init(me->getInitial());
+  return words_to_consume;
 }
 
 void
-Postchunk::applyWord(wstring const &word_str)
+Postchunk::applyWord(UString const &word_str)
 {
-  ms.step(L'^');
+  ms.step('^');
   for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
   {
     switch(word_str[i])
     {
-      case L'\\':
+      case '\\':
         i++;
-	ms.step(towlower(word_str[i]), any_char);
+	ms.step(u_tolower(word_str[i]), any_char);
 	break;
 
-      case L'<':
+      case '<':
 /*	for(unsigned int j = i+1; j != limit; j++)
 	{
 	  if(word_str[j] == '>')
@@ -1925,40 +743,41 @@ Postchunk::applyWord(wstring const &word_str)
 	}
 	break;*/
 
-      case L'{':  // ignore the unmodifiable part of the chunk
-        ms.step(L'$');
+      case '{':  // ignore the unmodifiable part of the chunk
+        ms.step('$');
         return;
 
       default:
-	ms.step(towlower(word_str[i]), any_char);
+	ms.step(u_tolower(word_str[i]), any_char);
 	break;
     }
   }
-  ms.step(L'$');
+  ms.step('$');
 }
 
-vector<wstring>
-Postchunk::getVecTags(wstring const &chunk)
+vector<UString>
+Postchunk::getVecTags(UString const &chunk)
 {
-  vector<wstring> vectags;
+  vector<UString> vectags;
 
   for(int i = 0, limit = chunk.size(); i != limit; i++)
   {
-    if(chunk[i] == L'\\')
+    if(chunk[i] == '\\')
     {
       i++;
     }
-    else if(chunk[i] == L'<')
+    else if(chunk[i] == '<')
     {
-      wstring mytag;
+      UString mytag;
       do
       {
         mytag += chunk[i++];
       }
-      while(chunk[i] != L'>');
-      vectags.push_back(mytag + L'>');
+      while(chunk[i] != '>');
+      mytag += '>';
+      vectags.push_back(mytag);
     }
-    else if(chunk[i] == L'{')
+    else if(chunk[i] == '{')
     {
       break;
     }
@@ -1967,15 +786,15 @@ Postchunk::getVecTags(wstring const &chunk)
 }
 
 int
-Postchunk::beginChunk(wstring const &chunk)
+Postchunk::beginChunk(UString const &chunk)
 {
   for(int i = 0, limit = chunk.size(); i != limit; i++)
   {
-    if(chunk[i] == L'\\')
+    if(chunk[i] == '\\')
     {
       i++;
     }
-    else if(chunk[i] == L'{')
+    else if(chunk[i] == '{')
     {
       return i + 1;
     }
@@ -1984,222 +803,220 @@ Postchunk::beginChunk(wstring const &chunk)
 }
 
 int
-Postchunk::endChunk(wstring const &chunk)
+Postchunk::endChunk(UString const &chunk)
 {
   return chunk.size()-2;
 }
 
-wstring
-Postchunk::wordzero(wstring const &chunk)
+UString
+Postchunk::wordzero(UString const &chunk)
 {
   for(unsigned int i = 0, limit = chunk.size(); i != limit ;i++)
   {
-    if(chunk[i] == L'\\')
+    if(chunk[i] == '\\')
     {
       i++;
     }
-    else if(chunk[i] == L'{')
+    else if(chunk[i] == '{')
     {
       return chunk.substr(0, i);
     }
   }
 
-  return L"";
+  return ""_u;
 }
 
-wstring
-Postchunk::pseudolemma(wstring const &chunk)
+UString
+Postchunk::pseudolemma(UString const &chunk)
 {
   for(unsigned int i = 0, limit = chunk.size(); i != limit ;i++)
   {
-    if(chunk[i] == L'\\')
+    if(chunk[i] == '\\')
     {
       i++;
     }
-    else if(chunk[i] == L'<' || chunk[i] == L'{')
+    else if(chunk[i] == '<' || chunk[i] == '{')
     {
       return chunk.substr(0, i);
     }
   }
 
-  return L"";
+  return ""_u;
 }
 
 void
-Postchunk::unchunk(wstring const &chunk, FILE *output)
+Postchunk::unchunk(UString const &chunk, UFILE* output)
 {
-  vector<wstring> vectags = getVecTags(chunk);
-  wstring case_info = caseOf(pseudolemma(chunk));
+  vector<UString> vectags = getVecTags(chunk);
+  UString case_info = StringUtils::getcase(pseudolemma(chunk));
   bool uppercase_all = false;
   bool uppercase_first = false;
 
-  if(case_info == L"AA")
+  if(case_info == "AA"_u)
   {
     uppercase_all = true;
   }
-  else if(case_info == L"Aa")
+  else if(case_info == "Aa"_u)
   {
     uppercase_first = true;
   }
 
   for(int i = beginChunk(chunk), limit = endChunk(chunk); i < limit; i++)
   {
-    if(chunk[i] == L'\\')
-    {
-      fputwc_unlocked(L'\\', output);
-      fputwc_unlocked(chunk[++i], output);
-    }
-    else if(chunk[i] == L'^')
-    {
-      fputwc_unlocked(L'^', output);
-      while(chunk[++i] != L'$')
+    if(chunk[i] == '\\') {
+      u_fputc('\\', output);
+      u_fputc(chunk[++i], output);
+    } else if(chunk[i] == '^') {
+      u_fputc('^', output);
+      while(chunk[++i] != '$')
       {
-        if(chunk[i] == L'\\')
+        if(chunk[i] == '\\')
         {
-          fputwc_unlocked(L'\\', output);
-          fputwc_unlocked(chunk[++i], output);
+          u_fputc('\\', output);
+          u_fputc(chunk[++i], output);
         }
-        else if(chunk[i] == L'<')
+        else if(chunk[i] == '<')
         {
-          if(iswdigit(chunk[i+1]))
+          if(u_isdigit(chunk[i+1]))
           {
-            // replace tag
-            unsigned long value = wcstoul(chunk.c_str()+i+1,
-					  NULL, 0) - 1;
-            //atoi(chunk.c_str()+i+1)-1;
+            int j = ++i;
+            while (chunk[++i] != '>');
+            unsigned long value = StringUtils::stoi(chunk.substr(j, i-j)) - 1;
             if(vectags.size() > value)
             {
-              fputws_unlocked(vectags[value].c_str(), output);
+              write(vectags[value], output);
             }
-            while(chunk[++i] != L'>');
           }
           else
           {
-            fputwc_unlocked(L'<', output);
-	    while(chunk[++i] != L'>') fputwc_unlocked(chunk[i], output);
-            fputwc_unlocked(L'>', output);
+            u_fputc('<', output);
+            while(chunk[++i] != '>') u_fputc(chunk[i], output);
+            u_fputc('>', output);
           }
         }
         else
         {
           if(uppercase_all)
           {
-            fputwc_unlocked(towupper(chunk[i]), output);
+            // TODO
+            u_fputc(u_toupper(chunk[i]), output);
           }
           else if(uppercase_first)
           {
-	    if(iswalnum(chunk[i]))
-	    {
-	      fputwc_unlocked(towupper(chunk[i]), output);
-	      uppercase_first = false;
-	    }
-            else
-	    {
-	      fputwc_unlocked(chunk[i], output);
-	    }
+            if(u_isalnum(chunk[i])) {
+              // TODO
+              u_fputc(u_toupper(chunk[i]), output);
+              uppercase_first = false;
+            } else {
+              u_fputc(chunk[i], output);
+            }
           }
           else
           {
-            fputwc_unlocked(chunk[i], output);
+            u_fputc(chunk[i], output);
           }
         }
       }
-      fputwc_unlocked(L'$', output);
+      u_fputc('$', output);
     }
-    else if(chunk[i] == L'[')
+    else if(chunk[i] == '[')
     {
-      fputwc_unlocked(L'[', output);
-      while(chunk[++i] != L']')
+      u_fputc('[', output);
+      while(chunk[++i] != ']')
       {
-        if(chunk[i] == L'\\')
+        if(chunk[i] == '\\')
         {
-          fputwc_unlocked(L'\\', output);
-          fputwc_unlocked(chunk[++i], output);
+          u_fputc('\\', output);
+          u_fputc(chunk[++i], output);
         }
         else
         {
-          fputwc_unlocked(chunk[i], output);
+          u_fputc(chunk[i], output);
         }
       }
-      fputwc_unlocked(L']', output);
+      u_fputc(']', output);
     }
     else
     {
-      fputwc_unlocked(chunk[i], output);
+      u_fputc(chunk[i], output);
     }
   }
 }
 
 
 void
-Postchunk::splitWordsAndBlanks(wstring const &chunk, vector<wstring *> &words,
-                               vector<wstring *> &blanks)
+Postchunk::splitWordsAndBlanks(UString const &chunk, vector<UString *> &words,
+                               vector<UString *> &blanks)
 {
-  vector<wstring> vectags = getVecTags(chunk);
-  wstring case_info = caseOf(pseudolemma(chunk));
+  vector<UString> vectags = getVecTags(chunk);
+  UString case_info = StringUtils::getcase(pseudolemma(chunk));
   bool uppercase_all = false;
   bool uppercase_first = false;
   bool lastblank = true;
 
-  if(case_info == L"AA")
+  if(case_info == "AA"_u)
   {
     uppercase_all = true;
   }
-  else if(case_info == L"Aa")
+  else if(case_info == "Aa"_u)
   {
     uppercase_first = true;
   }
 
   for(int i = beginChunk(chunk), limit = endChunk(chunk); i < limit; i++)
   {
-    if(chunk[i] == L'^')
+    if(chunk[i] == '^')
     {
       if(!lastblank)
       {
-        blanks.push_back(new wstring(L""));
+        blanks.push_back(new UString(""_u));
       }
       lastblank = false;
-      wstring *myword = new wstring();
-      wstring &ref = *myword;
+      UString *myword = new UString();
+      UString &ref = *myword;
 
-      while(chunk[++i] != L'$')
+      while(chunk[++i] != '$')
       {
-        if(chunk[i] == L'\\')
+        if(chunk[i] == '\\')
         {
-          ref += L'\\';
+          ref += '\\';
           ref += chunk[++i];
         }
-        else if(chunk[i] == L'<')
+        else if(chunk[i] == '<')
         {
-          if(iswdigit(chunk[i+1]))
+          if(u_isdigit(chunk[i+1]))
           {
             // replace tag
-            unsigned long value = wcstoul(chunk.c_str()+i+1,
-                                          NULL, 0) - 1;
+            unsigned long value = StringUtils::stoi(chunk.c_str()+i+1) - 1;
+            // TODO
+            //unsigned long value = wcstoul(chunk.c_str()+i+1,
+            //                              NULL, 0) - 1;
             if(vectags.size() > value)
             {
               ref.append(vectags[value]);
             }
-            while(chunk[++i] != L'>');
+            while(chunk[++i] != '>');
           }
           else
           {
-            ref += L'<';
-            while(chunk[++i] != L'>') ref += chunk[i];
-            ref += L'>';
+            ref += '<';
+            while(chunk[++i] != '>') ref += chunk[i];
+            ref += '>';
           }
         }
         else
         {
           if(uppercase_all)
           {
-            ref += towupper(chunk[i]);
+            // TODO
+            ref += u_toupper(chunk[i]);
           }
           else if(uppercase_first)
           {
-            if(iswalnum(chunk[i]))
+            if(u_isalnum(chunk[i]))
             {
-              ref += towupper(chunk[i]);
+              // TODO
+              ref += u_toupper(chunk[i]);
               uppercase_first = false;
             }
             else
@@ -2216,77 +1033,80 @@ Postchunk::splitWordsAndBlanks(wstring const &chunk, vector<wstring *> &words,
 
       words.push_back(myword);
     }
-    else if(chunk[i] == L'[')
+    else if(chunk[i] == '[')
     {
-      if(chunk[i+1] == L'[') //wordbound blank
+      if(chunk[i+1] == '[') //wordbound blank
       {
         if(!lastblank)
         {
-          blanks.push_back(new wstring(L""));
+          blanks.push_back(new UString(""_u));
         }
         lastblank = false;
-        wstring *myword = new wstring();
-        wstring &ref = *myword;
+        UString *myword = new UString();
+        UString &ref = *myword;
 
         while(true)
         {
-          if(chunk[i] == L'\\')
+          if(chunk[i] == '\\')
           {
-            ref += L'\\';
+            ref += '\\';
             ref += chunk[++i];
           }
-          else if(chunk[i] == L']' && chunk[i-1] == L']')
+          else if(chunk[i] == ']' && chunk[i-1] == ']')
           {
             ref += chunk[i];
-            i++; //i->"^"
+            i++; //i->"^"_u
             break;
           }
           else
           {
             ref += chunk[i];
           }
-          
+
           i++;
         }
-        
-        while(chunk[++i] != L'$')
+
+        while(chunk[++i] != '$')
         {
-          if(chunk[i] == L'\\')
+          if(chunk[i] == '\\')
           {
-            ref += L'\\';
+            ref += '\\';
             ref += chunk[++i];
           }
-          else if(chunk[i] == L'<')
+          else if(chunk[i] == '<')
           {
-            if(iswdigit(chunk[i+1]))
+            if(u_isdigit(chunk[i+1]))
             {
               // replace tag
-              unsigned long value = wcstoul(chunk.c_str()+i+1,
-                                            NULL, 0) - 1;
+              unsigned long value = StringUtils::stoi(chunk.c_str()+i+1) - 1;
+              //unsigned long value = wcstoul(chunk.c_str()+i+1,
+              //                              NULL, 0) - 1;
+              // TODO: make sure this is equivalent
               if(vectags.size() > value)
               {
                 ref.append(vectags[value]);
               }
-              while(chunk[++i] != L'>');
+              while(chunk[++i] != '>');
             }
             else
             {
-              ref += L'<';
-              while(chunk[++i] != L'>') ref += chunk[i];
-              ref += L'>';
+              ref += '<';
+              while(chunk[++i] != '>') ref += chunk[i];
+              ref += '>';
             }
           }
           else
           {
             if(uppercase_all)
             {
-              ref += towupper(chunk[i]);
+              // TODO
+              ref += u_toupper(chunk[i]);
             }
             else if(uppercase_first)
             {
-              if(iswalnum(chunk[i]))
+              if(u_isalnum(chunk[i])) // TODO
               {
-                ref += towupper(chunk[i]);
+                ref += u_toupper(chunk[i]); // TODO
                 uppercase_first = false;
               }
               else
@@ -2307,15 +1127,15 @@ Postchunk::splitWordsAndBlanks(wstring const &chunk, vector<wstring *> &words,
       {
         if (!(lastblank && blanks.back()))
         {
-          blanks.push_back(new wstring());
+          blanks.push_back(new UString());
         }
-        wstring &ref = *(blanks.back());
-        ref += L'[';
-        while(chunk[++i] != L']')
+        UString &ref = *(blanks.back());
+        ref += '[';
+        while(chunk[++i] != ']')
         {
-          if(chunk[i] == L'\\')
+          if(chunk[i] == '\\')
           {
-            ref += L'\\';
+            ref += '\\';
             ref += chunk[++i];
           }
           else
@@ -2332,13 +1152,13 @@ Postchunk::splitWordsAndBlanks(wstring const &chunk, vector<wstring *> &words,
     {
       if (!lastblank)
       {
-        wstring *myblank = new wstring(L"");
+        UString *myblank = new UString(""_u);
         blanks.push_back(myblank);
       }
-      wstring &ref = *(blanks.back());
-      if(chunk[i] == L'\\')
+      UString &ref = *(blanks.back());
+      if(chunk[i] == '\\')
       {
-        ref += L'\\';
+        ref += '\\';
         ref += chunk[++i];
       }
       else
@@ -2349,4 +1169,3 @@ Postchunk::splitWordsAndBlanks(wstring const &chunk, vector<wstring *> &words,
     }
   }
 }
-
diff --git a/apertium/postchunk.dtd b/apertium/postchunk.dtd
index ea61777..0e4513f 100644
--- a/apertium/postchunk.dtd
+++ b/apertium/postchunk.dtd
@@ -23,7 +23,7 @@
 
 <!ENTITY % condition "(and|or|not|equal|begins-with|begins-with-list|ends-with|ends-with-list|contains-substring|in)">
 <!ENTITY % container "(var|clip)">
-<!ENTITY % sentence "(let|out|choose|modify-case|call-macro|append)">
+<!ENTITY % sentence "(let|out|choose|modify-case|call-macro|append|reject-current-rule)">
 <!ENTITY % value "(b|clip|lit|lit-tag|var|get-case-from|case-of|concat|lu-count|lu|mlu)">
 <!ENTITY % stringvalue "(clip|lit|var|get-case-from|case-of|lu-count)">
 
@@ -414,6 +414,16 @@ get-case-from -->
 <!ELEMENT concat (%value;)+>
 <!-- Concatenates a sequence of values -->
 
+<!ELEMENT reject-current-rule EMPTY>
+<!ATTLIST reject-current-rule shifting (yes|no) #IMPLIED>
+<!--
+      This instruction cancels the execution of the rule being processed.
+      If "shifting" is set to "yes" or is not specified, the matching process
+      consumes exactly one word at the input. If "shifting" is set to "no"
+      then marks the rule to not to be considered in the current matching
+      until the input buffer advances at least one single word
+-->
+
 <!ELEMENT mlu (lu+)>
 <!-- Encloses a multiword -->
 
diff --git a/apertium/postchunk.h b/apertium/postchunk.h
index 9f3a254..70044e0 100644
--- a/apertium/postchunk.h
+++ b/apertium/postchunk.h
@@ -17,136 +17,63 @@
 #ifndef _POSTCHUNK_
 #define _POSTCHUNK_
 
-#include <apertium/transfer_instr.h>
-#include <apertium/transfer_token.h>
-#include <apertium/interchunk_word.h>
-#include <apertium/apertium_re.h>
-#include <lttoolbox/alphabet.h>
-#include <lttoolbox/buffer.h>
-#include <lttoolbox/ltstr.h>
-#include <lttoolbox/match_exe.h>
-#include <lttoolbox/match_state.h>
+#include <apertium/transfer_base.h>
 
-#include <cstdio>
-#include <libxml/parser.h>
-#include <libxml/tree.h>
-#include <map>
-#include <set>
-#include <vector>
-#include <queue>
+#include <apertium/interchunk_word.h>
+#include <lttoolbox/input_file.h>
 
 using namespace std;
 
-class Postchunk
+class Postchunk : public TransferBase
 {
 private:
 
-  Alphabet alphabet;
-  MatchExe *me;
-  MatchState ms;
-  map<string, ApertiumRE, Ltstr> attr_items;
-  map<string, string, Ltstr> variables;
-  map<string, int, Ltstr> macros;
-  map<string, set<string, Ltstr>, Ltstr> lists;
-  map<string, set<string, Ltstr>, Ltstr> listslow;
-  vector<xmlNode *> macro_map;
-  vector<xmlNode *> rule_map;
-  vector<size_t> rule_lines;
-  xmlDoc *doc;
-  xmlNode *root_element;
   InterchunkWord **word;
-  queue <string> blank_queue;
-  int lword;
-  Buffer<TransferToken> input_buffer;
-  vector<wstring *> tmpword;
-  vector<wstring *> tmpblank;
-  
-  bool in_out;
+
   bool in_lu;
-  bool in_let_var;
-  string var_val;
   bool in_wblank;
-  string out_wblank;
-  map <string, string> var_out_wblank;
-
-  FILE *output;
-  int any_char;
-  int any_tag;
-
-  xmlNode *lastrule;
-  unsigned int nwords;
-
-  map<xmlNode *, TransferInstr> evalStringCache;
+  UString out_wblank;
+  map <UString, UString> var_out_wblank;
 
   bool inword;
-  bool null_flush;
-  bool internal_null_flush;
-  bool trace;
 
-  void destroy();
-  void readData(FILE *input);
-  void readPostchunk(string const &input);
-  void collectMacros(xmlNode *localroot);
-  void collectRules(xmlNode *localroot);
-  static string caseOf(string const &str);
-  static wstring caseOf(wstring const &str);
-  string copycase(string const &source_word, string const &target_word);
+  UString evalCachedString(xmlNode* element);
+  void processClip(xmlNode* element);
+  void processBlank(xmlNode* element);
+  void processLuCount(xmlNode* element);
+  void processCaseOf(xmlNode* element);
+  UString processLu(xmlNode* element);
+  UString processMlu(xmlNode* element);
+
+  UString processChunk(xmlNode* element);
 
   void processLet(xmlNode *localroot);
-  void processAppend(xmlNode *localroot);
   void processOut(xmlNode *localroot);
   void processCallMacro(xmlNode *localroot);
   void processModifyCase(xmlNode *localroot);
-  bool processLogical(xmlNode *localroot);
-  bool processTest(xmlNode *localroot);
-  bool processAnd(xmlNode *localroot);
-  bool processOr(xmlNode *localroot);
-  bool processEqual(xmlNode *localroot);
-  bool processBeginsWith(xmlNode *localroot);
-  bool processBeginsWithList(xmlNode *localroot);
-  bool processEndsWith(xmlNode *localroot);
-  bool processEndsWithList(xmlNode *localroot);
-  bool processContainsSubstring(xmlNode *localroot);
-  bool processNot(xmlNode *localroot);
-  bool processIn(xmlNode *localroot);
-  void processRule(xmlNode *localroot);
-  string evalString(xmlNode *localroot);
-  void processInstruction(xmlNode *localroot);
-  void processChoose(xmlNode *localroot);
   void processTags(xmlNode *localroot);
-  bool beginsWith(string const &str1, string const &str2) const;
-  bool endsWith(string const &str1, string const &str2) const;
-  string tolower(string const &str) const;
-  string tags(string const &str) const;
-  string readWord(FILE *in);
-  string readBlank(FILE *in);
-  string readUntil(FILE *in, int const symbol) const;
-  void applyWord(wstring const &word_str);
-  void applyRule();
-  TransferToken & readToken(FILE *in);
-  static void unchunk(wstring const &chunk, FILE *output);
-  static vector<wstring> getVecTags(wstring const &chunk);
-  static int beginChunk(wstring const &chunk);
-  static int endChunk(wstring const &chunk);
-  static void splitWordsAndBlanks(wstring const &chunk,
-				  vector<wstring *> &words,
-				  vector<wstring *> &blanks);
-  static wstring pseudolemma(wstring const &chunk);
-  static wstring wordzero(wstring const &chunk);
+  UString readWord(InputFile& in);
+  UString readBlank(InputFile& in);
+  UString readUntil(InputFile& in, int const symbol) const;
+  void applyWord(UString const &word_str);
+  int applyRule();
+  TransferToken & readToken(InputFile& in);
+  static void unchunk(UString const &chunk, UFILE *output);
+  static vector<UString> getVecTags(UString const &chunk);
+  static int beginChunk(UString const &chunk);
+  static int endChunk(UString const &chunk);
+  static void splitWordsAndBlanks(UString const &chunk,
+				  vector<UString *> &words,
+				  vector<UString *> &blanks);
+  static UString pseudolemma(UString const &chunk);
+  static UString wordzero(UString const &chunk);
   bool checkIndex(xmlNode *element, int index, int limit);
-  void postchunk_wrapper_null_flush(FILE *in, FILE *out);
-  bool gettingLemmaFromWord(string attr);
-  string combineWblanks(string wblank_current, string wblank_to_add);
+  void postchunk_wrapper_null_flush(InputFile& in, UFILE* out);
 
 public:
   Postchunk();
-  ~Postchunk();
 
-  void read(string const &transferfile, string const &datafile);
-  void postchunk(FILE *in, FILE *out);
-  bool getNullFlush(void);
-  void setNullFlush(bool null_flush);
-  void setTrace(bool trace);
+  void postchunk(InputFile& in, UFILE* out);
 };
 
 #endif
diff --git a/apertium/pretransfer.cc b/apertium/pretransfer.cc
index 764b3cf..6c8c778 100644
--- a/apertium/pretransfer.cc
+++ b/apertium/pretransfer.cc
@@ -3,74 +3,74 @@
 #include <lttoolbox/lt_locale.h>
 #include <apertium/apertium_config.h>
 #include <apertium/unlocked_cstdio.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
 #include <iostream>
 #include <string>
 
-wstring storeAndWriteWblank(FILE *input, FILE *output)
+UString storeAndWriteWblank(InputFile& input, UFILE* output)
 {
   int mychar;
-  wstring content = L"[[";
+  UString content = "[["_u;
 
   while(true)
   {
-    mychar = fgetwc_unlocked(input);
-    if(feof(input))
+    mychar = input.get();
+    if(input.eof())
     {
-      wcerr << L"ERROR: Unexpected EOF" << endl;
+      cerr << "ERROR: Unexpected EOF" << endl;
       exit(EXIT_FAILURE);
     }
-    
+
     content += mychar;
-    fputwc_unlocked(mychar, output);
-    
-    if(mychar == L'\\')
+    u_fputc(mychar, output);
+
+    if(mychar == '\\')
     {
-      mychar = fgetwc(input);
+      mychar = input.get();
       content += mychar;
-      fputwc(mychar, output);
+      u_fputc(mychar, output);
     }
-    else if(mychar == L']')
+    else if(mychar == ']')
     {
-      mychar = fgetwc(input);
-      
-      if(mychar == L']')
+      mychar = input.get();
+
+      if(mychar == ']')
       {
         content += mychar;
-        fputwc(mychar, output);
+        u_fputc(mychar, output);
         break;
       }
     }
   }
-  
+
   return content;
 }
 
-void readAndWriteUntil(FILE *input, FILE *output, int const charcode)
+void readAndWriteUntil(InputFile& input, UFILE* output, int const charcode)
 {
   int mychar;
 
-  while((mychar = fgetwc_unlocked(input)) != charcode)
+  while((mychar = input.get()) != charcode)
   {
-    if(feof(input))
+    if(input.eof())
     {
-      wcerr << L"ERROR: Unexpected EOF" << endl;
+      cerr << "ERROR: Unexpected EOF" << endl;
       exit(EXIT_FAILURE);
     }
-    fputwc_unlocked(mychar, output);
-    if(mychar == L'\\')
+    u_fputc(mychar, output);
+    if(mychar == '\\')
     {
-      mychar = fgetwc(input);
-      fputwc(mychar, output);
+      mychar = input.get();
+      u_fputc(mychar, output);
     }
   }
 }
 
-void procWord(FILE *input, FILE *output, bool surface_forms, bool compound_sep, wstring wblank = L"")
+void procWord(InputFile& input, UFILE* output, bool surface_forms, bool compound_sep, UString wblank = ""_u)
 {
   int mychar;
-  wstring buffer = L"";
+  UString buffer;
 
   bool buffer_mode = false;
   bool in_tag = false;
@@ -78,20 +78,20 @@ void procWord(FILE *input, FILE *output, bool surface_forms, bool compound_sep,
 
   if(surface_forms)
   {
-    while((mychar = fgetwc_unlocked(input)) != L'/') ;
+    while((mychar = input.get()) != '/') ;
   }
 
-  while((mychar = fgetwc_unlocked(input)) != L'$')
+  while((mychar = input.get()) != '$')
   {
-    if(feof(input))
+    if(input.eof())
     {
-      wcerr << L"ERROR: Unexpected EOF" << endl;
+      cerr << "ERROR: Unexpected EOF" << endl;
       exit(EXIT_FAILURE);
     }
 
     switch(mychar)
     {
-    case L'<':
+    case '<':
       in_tag = true;
       if(!buffer_mode)
       {
@@ -99,11 +99,11 @@ void procWord(FILE *input, FILE *output, bool surface_forms, bool compound_sep,
       }
       break;
 
-    case L'>':
+    case '>':
       in_tag = false;
       break;
 
-    case L'#':
+    case '#':
       if(buffer_mode)
       {
         buffer_mode = false;
@@ -114,106 +114,106 @@ void procWord(FILE *input, FILE *output, bool surface_forms, bool compound_sep,
 
     if(buffer_mode)
     {
-      if((mychar != L'+' || (mychar == L'+' && in_tag == true)) &&
-         (mychar != L'~' || (mychar == L'~' && in_tag == true)))
+      if((mychar != '+' || (mychar == '+' && in_tag == true)) &&
+         (mychar != '~' || (mychar == '~' && in_tag == true)))
       {
-        buffer += static_cast<wchar_t>(mychar);
+        buffer += mychar;
       }
-      else if(in_tag == false && mychar == L'+')
+      else if(in_tag == false && mychar == '+')
       {
-        buffer.append(L"$ ");
+        buffer.append("$ "_u);
         buffer.append(wblank);
-        buffer.append(L"^");
+        buffer.append("^"_u);
       }
-      else if(in_tag == false && mychar == L'~' and compound_sep == true)
+      else if(in_tag == false && mychar == '~' and compound_sep == true)
       {
-        buffer.append(L"$");
+        buffer.append("$"_u);
         buffer.append(wblank);
-        buffer.append(L"^");
+        buffer.append("^"_u);
       }
     }
     else
     {
-      if(mychar == L'+' && queuing == true)
+      if(mychar == '+' && queuing == true)
       {
-        buffer.append(L"$ ");
+        buffer.append("$ "_u);
         buffer.append(wblank);
-        buffer.append(L"^");
+        buffer.append("^"_u);
         buffer_mode = true;
       }
       else
       {
-        fputwc_unlocked(mychar, output);
+        u_fputc(mychar, output);
       }
     }
 
   }
-  fputws_unlocked(buffer.c_str(), output);
+  write(buffer, output);
 }
 
-void processStream(FILE *input, FILE *output, bool null_flush, bool surface_forms, bool compound_sep)
+void processStream(InputFile& input, UFILE* output, bool null_flush, bool surface_forms, bool compound_sep)
 {
   while(true)
   {
-    int mychar = fgetwc_unlocked(input);
-    if(feof(input))
+    int mychar = input.get();
+    if(input.eof())
     {
       break;
     }
     switch(mychar)
     {
-      case L'[':
-        fputwc_unlocked(L'[', output);
-        mychar = fgetwc_unlocked(input);
-        
-        if(mychar == L'[')
+      case '[':
+        u_fputc('[', output);
+        mychar = input.get();
+
+        if(mychar == '[')
         {
-          fputwc_unlocked(L'[', output);
-          wstring wblank = storeAndWriteWblank(input, output);
-          mychar = fgetwc_unlocked(input);
-          
-          if(mychar == L'^')
+          u_fputc('[', output);
+          UString wblank = storeAndWriteWblank(input, output);
+          mychar = input.get();
+
+          if(mychar == '^')
           {
-            fputwc_unlocked(mychar, output);
+            u_fputc(mychar, output);
             procWord(input, output, surface_forms, compound_sep, wblank);
-            fputwc_unlocked(L'$', output);
+            u_fputc('$', output);
           }
           else
           {
-            wcerr << L"ERROR: Wordbound blank isn't immediately followed by the Lexical Unit." << endl;
+            cerr << "ERROR: Wordbound blank isn't immediately followed by the Lexical Unit." << endl;
             exit(EXIT_FAILURE);
           }
         }
         else
         {
-          ungetwc(mychar, input);
-          readAndWriteUntil(input, output, L']');
-          fputwc_unlocked(L']', output);
+          input.unget(mychar);
+          readAndWriteUntil(input, output, ']');
+          u_fputc(']', output);
         }
         break;
 
-      case L'\\':
-        fputwc_unlocked(mychar, output);
-        fputwc_unlocked(fgetwc_unlocked(input), output);
+      case '\\':
+        u_fputc(mychar, output);
+        u_fputc(input.get(), output);
         break;
 
-      case L'^':
-        fputwc_unlocked(mychar, output);
+      case '^':
+        u_fputc(mychar, output);
         procWord(input, output, surface_forms, compound_sep);
-        fputwc_unlocked(L'$', output);
+        u_fputc('$', output);
         break;
 
-      case L'\0':
-        fputwc_unlocked(mychar, output);
+      case '\0':
+        u_fputc(mychar, output);
 
         if(null_flush)
         {
-          fflush(output);
+          u_fflush(output);
         }
         break;
 
       default:
-        fputwc_unlocked(mychar, output);
+        u_fputc(mychar, output);
         break;
     }
   }
diff --git a/apertium/pretransfer.h b/apertium/pretransfer.h
index d664b13..ffc9737 100644
--- a/apertium/pretransfer.h
+++ b/apertium/pretransfer.h
@@ -16,12 +16,13 @@
 #ifndef PRETRANSFER_H
 #define PRETRANSFER_H
 
-#include <iostream>
-#include <lttoolbox/lt_locale.h>
+#include <unicode/ustdio.h>
+#include <lttoolbox/input_file.h>
+#include <lttoolbox/ustring.h>
 
-wstring storeAndWriteWblank(FILE *input, FILE *output);
-void readAndWriteUntil(FILE *input, FILE *output, int const charcode);
-void procWord(FILE *input, FILE *output, bool surface_forms, bool compound_sep, wstring wblank);
-void processStream(FILE *input, FILE *output, bool null_flush, bool surface_forms, bool compound_sep);
+UString storeAndWriteWblank(InputFile& input, UFILE *output);
+void readAndWriteUntil(InputFile& input, UFILE *output, int const charcode);
+void procWord(InputFile& input, UFILE *output, bool surface_forms, bool compound_sep, UString wblank);
+void processStream(InputFile& input, UFILE *output, bool null_flush, bool surface_forms, bool compound_sep);
 
 #endif
diff --git a/apertium/reformat.xsl b/apertium/reformat.xsl
index 3316b42..f471823 100644
--- a/apertium/reformat.xsl
+++ b/apertium/reformat.xsl
@@ -26,7 +26,6 @@
 #ifndef GENFORMAT
 #include "apertium_config.h"
 #endif
-#include &lt;utf8/utf8.h&gt;
 #include &lt;apertium/unlocked_cstdio.h&gt;
 
 #include &lt;cstdlib&gt;
@@ -36,22 +35,13 @@
 #include &lt;string&gt;
 #include &lt;unistd.h&gt;
 #include &lt;lttoolbox/lt_locale.h&gt;
-#include &lt;lttoolbox/ltstr.h&gt;
-#include &lt;apertium/string_to_wostream.h&gt;
-#include &lt;wchar.h&gt;
-#ifdef _WIN32
-#include &lt;io.h&gt;
-#include &lt;fcntl.h&gt;
-#define utf8to32 utf8to16
-#define utf32to8 utf16to8
-#endif
 
 using namespace std;
 
 <xsl:for-each select="./rules/replacement-rule">
   <xsl:variable name="varname"
 		select="concat(concat(string('S'),position()),string('_substitution'))"/>
-  <xsl:value-of select="string('map&lt;wstring, wstring, Ltstr&gt; S')"/>
+  <xsl:value-of select="string('map&lt;string, string&gt; S')"/>
   <xsl:value-of select="position()"/>
   <xsl:value-of select="string('_substitution;&#xA;&#xA;void S')"/>
   <xsl:value-of select="position()"/>
@@ -61,9 +51,9 @@ using namespace std;
     <xsl:if test="./@prefer = string('yes')">
       <xsl:value-of select="string('&#xA;  ')"/>
       <xsl:value-of select="$varname"/>
-      <xsl:value-of select="string('[L&quot;')"/>
+      <xsl:value-of select="string('[&quot;')"/>
       <xsl:value-of select="./@target"/>
-      <xsl:value-of select="string('&quot;] = L&quot;')"/>
+      <xsl:value-of select="string('&quot;] = &quot;')"/>
       <xsl:value-of select="./@source"/>
       <xsl:value-of select="string('&quot;;')"/>
     </xsl:if>
@@ -72,19 +62,6 @@ using namespace std;
   <xsl:value-of select="string('&#xA;}&#xA;')"/>
 </xsl:for-each>
 
-string memconv;
-
-wstring convertir(char const *multibyte, int const length)
-{
-  std::wstring rv;
-  memconv.append(multibyte, length);
-  if (utf8::is_valid(memconv.begin(), memconv.end())) {
-  	utf8::utf8to32(memconv.begin(), memconv.end(), std::back_inserter(rv));
-  	memconv.clear();
-  }
-  return rv;
-}
-
 %}
 
 %option nounput
@@ -101,26 +78,22 @@ wstring convertir(char const *multibyte, int const length)
   string filename = yytext;
   filename = filename.substr(2, filename.size()-3);
   FILE *temp = fopen(filename.c_str(), "rb");
-  wint_t mychar;
-#ifdef _MSC_VER
-  _setmode(_fileno(temp), _O_U8TEXT);
-#endif
+  int mychar;
 
   if(!temp)
   {
-    wcerr &lt;&lt; "ERROR: File '" &lt;&lt; filename &lt;&lt;"' not found." &lt;&lt; endl;
+    cerr &lt;&lt; "ERROR: File '" &lt;&lt; filename &lt;&lt;"' not found." &lt;&lt; endl;
     exit(EXIT_FAILURE);
   }
-  while(static_cast&lt;int&gt;(mychar = fgetwc_unlocked(temp)) != EOF)
-  {
-    fputwc_unlocked(mychar, yyout);
+  while((mychar = fgetc(temp)) != EOF) {
+    fputc_unlocked(mychar, yyout);
   }
   fclose(temp);
   unlink(filename.c_str());
 }
 
 "[\\@"&#x9;{
-  fputwc_unlocked(L'@', yyout);
+  fputc_unlocked('@', yyout);
 }
 
 ".[]"&#x9;{
@@ -128,13 +101,13 @@ wstring convertir(char const *multibyte, int const length)
 }
 
 "\\"<xsl:value-of select="/format/options/escape-chars/@regexp"/>&#x9;{
-  fputws_unlocked(convertir(yytext+1, yyleng-1).c_str(), yyout);
+  fwrite(yytext+1, 1, yyleng-1, yyout);
 }
 
 
 
 .|\n&#x9;{
-  wstring yytext_conv = convertir(yytext, yyleng);
+  string yytext_conv = yytext;
 <xsl:for-each select="./rules/replacement-rule">
   <xsl:variable name="varname"
 		select="concat(concat(string('S'),position()),string('_substitution'))"/>
@@ -148,7 +121,7 @@ wstring convertir(char const *multibyte, int const length)
   <xsl:value-of select="string('.find(yytext_conv) != ')"/>
   <xsl:value-of select="$varname"/>
   <xsl:value-of select="string('.end())&#xA;  {&#xA;')"/>
-  <xsl:value-of select="string('    fputws_unlocked(')"/>
+  <xsl:value-of select="string('    fputs_unlocked(')"/>
   <xsl:value-of select="$varname"/>
   <xsl:value-of select="string('[yytext_conv].c_str(), yyout);')"/>
   <xsl:value-of select="string('&#xA;  }&#xA;')"/>
@@ -157,7 +130,7 @@ wstring convertir(char const *multibyte, int const length)
 <xsl:if test="not(count(./rules/replacement-rule)=0)">
   <xsl:value-of select="string('  else&#xA;  {&#xA;  ')"/>
 </xsl:if>
-<xsl:value-of select="string('  fputws_unlocked(yytext_conv.c_str(), yyout);&#xA;')"/>
+<xsl:value-of select="string('  fputs_unlocked(yytext, yyout);&#xA;')"/>
 <xsl:if test="not(count(./rules/replacement-rule)=0)">
   <xsl:value-of select="string('  }')"/>
 </xsl:if>
@@ -171,8 +144,8 @@ wstring convertir(char const *multibyte, int const length)
 
 void usage(string const &amp;progname)
 {
-  wcerr &lt;&lt; "USAGE: " &lt;&lt; progname &lt;&lt; " [input_file [output_file]" &lt;&lt; ']' &lt;&lt; endl;
-  wcerr &lt;&lt; "<xsl:value-of select="./@name"/> format processor " &lt;&lt; endl;
+  cerr &lt;&lt; "USAGE: " &lt;&lt; progname &lt;&lt; " [input_file [output_file]" &lt;&lt; ']' &lt;&lt; endl;
+  cerr &lt;&lt; "<xsl:value-of select="./@name"/> format processor " &lt;&lt; endl;
   exit(EXIT_SUCCESS);
 }
 
@@ -203,10 +176,6 @@ int main(int argc, char *argv[])
     default:
       break;
   }
-#ifdef _MSC_VER
-  _setmode(_fileno(yyin), _O_U8TEXT);
-  _setmode(_fileno(yyout), _O_U8TEXT);
-#endif
 
 <xsl:for-each select="./rules/replacement-rule">
   <xsl:value-of select="string('  S')"/>
diff --git a/apertium/sentence_stream.cc b/apertium/sentence_stream.cc
index c83450b..a90e56e 100644
--- a/apertium/sentence_stream.cc
+++ b/apertium/sentence_stream.cc
@@ -21,7 +21,7 @@ bool isSentenceEnd(StreamedType &token) {
     return false;
   }
   Tag &tag = *tags.begin();
-  if (tag.TheTag != L"sent") {
+  if (tag.TheTag != "sent"_u) {
     return false;
   }
   return true;
@@ -37,7 +37,7 @@ bool isSentenceEnd(StreamedType tok, Stream &in, bool sent_seg) {
 
 SentenceTagger::SentenceTagger() {}
 
-void SentenceTagger::tag(Stream &in, std::wostream &out, bool sent_seg) {
+void SentenceTagger::tag(Stream &in, std::ostream &out, bool sent_seg) {
   clearBuffers();
 
   while (true) {
@@ -67,7 +67,7 @@ void SentenceTagger::clearBuffers() const {
   flushes.clear();
 }
 
-void SentenceTagger::tagAndPutSentence(std::wostream &out) {
+void SentenceTagger::tagAndPutSentence(std::ostream &out) {
   TaggedSentence tagged_sent = tagSentence(lexical_sent);
   TaggedSentence::const_iterator ts_it = tagged_sent.begin();
 
@@ -103,17 +103,17 @@ TrainingCorpus::TrainingCorpus(Stream &tagged, Stream &untagged,
     untagged_line++;
     if (!tagged_token.TheLexicalUnit || !untagged_token.TheLexicalUnit) {
       if (tagged_token.TheLexicalUnit || untagged_token.TheLexicalUnit) {
-        std::wcerr << "Normal perm\n";
-        std::wcerr << "tagged: " << tagged_line << " " << (!!tagged_token.TheLexicalUnit) << "\n";
-        std::wcerr << "untagged: " << untagged_line << " " << (!!untagged_token.TheLexicalUnit) << "\n";
+        std::cerr << "Normal perm\n";
+        std::cerr << "tagged: " << tagged_line << " " << (!!tagged_token.TheLexicalUnit) << "\n";
+        std::cerr << "untagged: " << untagged_line << " " << (!!untagged_token.TheLexicalUnit) << "\n";
         prematureEnd();
       }
       break;
     }
-    //std::wcerr << tagged_token.TheLexicalUnit->TheSurfaceForm << " || " << untagged_token.TheLexicalUnit->TheSurfaceForm << "\n";
+    //std::cerr << tagged_token.TheLexicalUnit->TheSurfaceForm << " || " << untagged_token.TheLexicalUnit->TheSurfaceForm << "\n";
     if (untagged_token.TheLexicalUnit->TheSurfaceForm != tagged_token.TheLexicalUnit->TheSurfaceForm) {
       if (!skip_on_error) {
-        std::wstringstream what_;
+        std::stringstream what_;
         what_ << "Streams diverged at line " << tagged_line << "\n";
         what_ << "Untagged token: "
               << untagged_token.TheLexicalUnit->TheSurfaceForm << "\n";
@@ -127,18 +127,18 @@ TrainingCorpus::TrainingCorpus(Stream &tagged, Stream &untagged,
       training_sentence->first.clear();
       training_sentence->second.clear();
 
-      std::wcerr << "fast forward\n";
+      std::cerr << "fast forward\n";
       bool tagged_ended = contToEndOfSent(tagged, tagged_token, tagged_line);
       bool untagged_ended = contToEndOfSent(untagged, untagged_token, untagged_line);
       if (tagged_ended || untagged_ended) {
         if (!tagged_ended || !untagged_ended) {
-          std::wcerr << "fast forward prem\n";
+          std::cerr << "fast forward prem\n";
           prematureEnd();
         }
-        std::wcerr << "fast forward finish\n";
+        std::cerr << "fast forward finish\n";
         break;
       }
-      std::wcerr << "fast forwarded\n";
+      std::cerr << "fast forwarded\n";
       continue;
     }
     if (was_sentence_end) {
@@ -169,7 +169,7 @@ bool TrainingCorpus::contToEndOfSent(Stream &stream, StreamedType token,
     if (isSentenceEnd(token, stream, sent_seg)) {
       return false;
     }
-    std::wcerr << "Skip " << token.TheLexicalUnit->TheSurfaceForm << "\n";
+    std::cerr << "Skip " << token.TheLexicalUnit->TheSurfaceForm << "\n";
     token = stream.get();
     line++;
   }
diff --git a/apertium/sentence_stream.h b/apertium/sentence_stream.h
index aea298e..133f6ba 100644
--- a/apertium/sentence_stream.h
+++ b/apertium/sentence_stream.h
@@ -20,18 +20,18 @@ namespace SentenceStream {
   bool isSentenceEnd(Stream &in, bool sent_seg = false);
   class SentenceTagger {
   public:
-    void tag(Stream &in, std::wostream &out, bool sent_seg);
+    void tag(Stream &in, std::ostream &out, bool sent_seg);
     SentenceTagger();
   protected:
     virtual TaggedSentence tagSentence(const Sentence &untagged) = 0;
     virtual void outputLexicalUnit(
       const LexicalUnit &lexical_unit, const Optional<Analysis> analysis,
-      std::wostream &output) = 0;
+      std::ostream &output) = 0;
   private:
     void clearBuffers() const;
-    void tagAndPutSentence(std::wostream &out);
+    void tagAndPutSentence(std::ostream &out);
     void putTaggedSent(
-      std::wostream &out, TaggedSentence &tagged_sent, Sentence &full_sent,
+      std::ostream &out, TaggedSentence &tagged_sent, Sentence &full_sent,
       std::vector<bool> &flushes) const;
     mutable Sentence full_sent;
     mutable Sentence lexical_sent;
diff --git a/apertium/shell_utils.cc b/apertium/shell_utils.cc
index cccfa5b..e9d040c 100644
--- a/apertium/shell_utils.cc
+++ b/apertium/shell_utils.cc
@@ -73,12 +73,14 @@ FILE *try_open_file(const char *metavar, const char *filename,
   return f;
 }
 
-FILE *try_open_file_utf8(const char *metavar, const char *filename,
+UFILE* try_open_file_utf8(const char *metavar, const char *filename,
                                        const char *flags) {
-  FILE *f = try_open_file(metavar, filename, flags);
-#ifdef _MSC_VER
-  _setmode(_fileno(f), _O_U8TEXT);
-#endif // _MSC_VER
+  UFILE* f = u_fopen(filename, flags, NULL, NULL);
+  if (f == NULL) {
+    std::stringstream what_;
+    what_ << "can't open " << metavar << " file \"" << filename << "\"";
+    throw Exception::Shell::FopenError(what_);
+  }
   return f;
 }
 
diff --git a/apertium/shell_utils.h b/apertium/shell_utils.h
index 11b7a36..f2ca314 100644
--- a/apertium/shell_utils.h
+++ b/apertium/shell_utils.h
@@ -3,6 +3,7 @@
 
 #include <fstream>
 #include <cstdio>
+#include <unicode/ustdio.h>
 
 namespace Apertium {
 namespace ShellUtils {
@@ -22,7 +23,7 @@ FILE*
 try_open_file(const char *metavar, const char *filename,
               const char *flags);
 
-FILE*
+UFILE*
 try_open_file_utf8(const char *metavar, const char *filename,
                    const char *flags);
 
diff --git a/apertium/stream.cc b/apertium/stream.cc
index 4d83a3e..3d8897b 100644
--- a/apertium/stream.cc
+++ b/apertium/stream.cc
@@ -25,23 +25,23 @@
 
 namespace Apertium {
 Stream::Stream(TaggerFlags &Flags_)
-    : TheLineNumber(1), TheCharacterStream(std::wcin), TheFilename(), TheLine(),
+    : TheLineNumber(1), TheCharacterStream(std::cin), TheFilename(), TheLine(),
       TheFlags(Flags_), private_flush_(false), ThePreviousCase() {}
 
 Stream::Stream(TaggerFlags &Flags_,
-               std::wifstream &CharacterStream_, const char *const Filename_)
+               std::ifstream &CharacterStream_, const char *const Filename_)
     : TheLineNumber(1), TheCharacterStream(CharacterStream_), TheFilename(Filename_),
       TheLine(), TheFlags(Flags_), private_flush_(false),
       ThePreviousCase() {}
 
 Stream::Stream(TaggerFlags &Flags_,
-               std::wifstream &CharacterStream_, const std::string &Filename_)
+               std::ifstream &CharacterStream_, const std::string &Filename_)
     : TheLineNumber(1), TheCharacterStream(CharacterStream_), TheFilename(Filename_),
       TheLine(), TheFlags(Flags_), private_flush_(false),
       ThePreviousCase() {}
 
 Stream::Stream(TaggerFlags &Flags_,
-               std::wifstream &CharacterStream_,
+               std::ifstream &CharacterStream_,
                const std::stringstream &Filename_)
     : TheLineNumber(1), TheCharacterStream(CharacterStream_), TheFilename(Filename_.str()),
       TheLine(), TheFlags(Flags_), private_flush_(false),
@@ -49,13 +49,13 @@ Stream::Stream(TaggerFlags &Flags_,
 
 StreamedType Stream::get() {
   StreamedType TheStreamedType;
-  std::wstring Lemma;
+  UString Lemma;
   private_flush_ = false;
 
   //TheCharacterStream.clear();
   if (!is_eof_throw_if_not_TheCharacterStream_good()) {
     while (true) {
-      const wchar_t Character_ = TheCharacterStream.get();
+      const UChar Character_ = TheCharacterStream.get();
 
       if (is_eof_throw_if_not_TheCharacterStream_good(TheStreamedType, Lemma,
                                                       Character_))
@@ -64,21 +64,21 @@ StreamedType Stream::get() {
       TheLine.push_back(Character_);
 
       switch (Character_) {
-      case L'\\': // <\>  92,  Hex 5c,  Octal 134
+      case '\\': // <\>  92,  Hex 5c,  Octal 134
         case_0x5c(TheStreamedType, Lemma, Character_);
         continue;
-      case L'[':
+      case '[':
         if (ThePreviousCase) {
           switch (ThePreviousCase->ThePreviousCase) {
-          case L'[':
-          case L']':
-          case L'$':
+          case '[':
+          case ']':
+          case '$':
             break;
           default:
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_ << L"' following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_ << "' following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '[' expected to follow '[', ']' or '$'";
+                    << "', '[' expected to follow '[', ']' or '$'";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
         }
@@ -86,43 +86,43 @@ StreamedType Stream::get() {
         push_back_Character(TheStreamedType, Lemma, Character_);
         ThePreviousCase = PreviousCaseType(Character_);
         continue;
-      case L']':
+      case ']':
         if (!ThePreviousCase) {
-          std::wstringstream Message;
-          Message << L"unexpected '" << Character_
-                  << L"', ']' expected to follow '['";
+          std::stringstream Message;
+          Message << "unexpected '" << Character_
+                  << "', ']' expected to follow '['";
           throw Exception::Stream::UnexpectedCase(Message_what(Message));
         }
 
         switch (ThePreviousCase->ThePreviousCase) {
-        case L'[':
-        case L']':
+        case '[':
+        case ']':
           push_back_Character(TheStreamedType, Lemma, Character_);
           ThePreviousCase = PreviousCaseType(Character_);
           continue;
         default:
-          std::wstringstream Message;
-          Message << L"unexpected '" << Character_ << L"' following '"
+          std::stringstream Message;
+          Message << "unexpected '" << Character_ << "' following '"
                   << ThePreviousCase->ThePreviousCase
-                  << L"', ']' expected to follow '[' or ']'";
+                  << "', ']' expected to follow '[' or ']'";
           throw Exception::Stream::UnexpectedCase(Message_what(Message));
         }
 
         std::abort();
-      case L'^':
+      case '^':
         if (ThePreviousCase) {
           switch (ThePreviousCase->ThePreviousCase) {
-          case L'[':
+          case '[':
             push_back_Character(TheStreamedType, Lemma, Character_);
             continue;
-          case L']':
-          case L'$':
+          case ']':
+          case '$':
             break;
           default:
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_ << L"' following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_ << "' following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '^' expected to follow '[', ']', or '$'";
+                    << "', '^' expected to follow '[', ']', or '$'";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
         }
@@ -130,39 +130,39 @@ StreamedType Stream::get() {
         TheStreamedType.TheLexicalUnit = LexicalUnit();
         ThePreviousCase = PreviousCaseType(Character_);
         continue;
-      case L'/':
+      case '/':
         if (!ThePreviousCase) {
-          std::wstringstream Message;
-          Message << L"unexpected '" << Character_
-                  << L"', '/' expected to follow '[', to follow '>' "
-                     L"immediately, or to follow '^' or '#' not immediately";
+          std::stringstream Message;
+          Message << "unexpected '" << Character_
+                  << "', '/' expected to follow '[', to follow '>' "
+                     "immediately, or to follow '^' or '#' not immediately";
           throw Exception::Stream::UnexpectedCase(Message_what(Message));
         }
 
         switch (ThePreviousCase->ThePreviousCase) {
-        case L'[':
+        case '[':
           push_back_Character(TheStreamedType, Lemma, Character_);
           continue;
-        case L'^':
+        case '^':
           if (ThePreviousCase->isPreviousCharacter) {
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_
-                    << L"' immediately following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_
+                    << "' immediately following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '/' expected to follow '[', to follow '>' "
-                       L"immediately, or to follow '^' or '#' not immediately";
+                    << "', '/' expected to follow '[', to follow '>' "
+                       "immediately, or to follow '^' or '#' not immediately";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
 
           ThePreviousCase = PreviousCaseType(Character_);
 
           {
-            const wchar_t Character_ = TheCharacterStream.get();
+            const UChar Character_ = TheCharacterStream.get();
 
             if (is_eof_throw_if_not_TheCharacterStream_good(
                     TheStreamedType, Lemma, Character_)) {
-              std::wstringstream Message;
-              Message << L"unexpected end-of-file following '"
+              std::stringstream Message;
+              Message << "unexpected end-of-file following '"
                       << ThePreviousCase->ThePreviousCase
                       << "', end-of-file expected to follow ']' or '$'";
               throw Exception::Stream::UnexpectedEndOfFile(
@@ -172,24 +172,24 @@ StreamedType Stream::get() {
             TheLine.push_back(Character_);
 
             switch (Character_) {
-            case L'\\':
+            case '\\':
               TheStreamedType.TheLexicalUnit->TheAnalyses.push_back(Analysis());
               TheStreamedType.TheLexicalUnit->TheAnalyses.back()
                   .TheMorphemes.push_back(Morpheme());
               case_0x5c(TheStreamedType, Lemma, Character_);
               continue;
-            case L'*':
+            case '*':
               ThePreviousCase = PreviousCaseType(Character_);
               continue;
-            case L'\n': {
-              std::wstringstream Message;
-              Message << L"unexpected newline following '"
+            case '\n': {
+              std::stringstream Message;
+              Message << "unexpected newline following '"
                       << ThePreviousCase->ThePreviousCase
                       << "', newline expected to follow '[', ']', or '$'";
               throw Exception::Stream::UnexpectedCharacter(
                   Message_what(Message));
             };
-            case L'<':
+            case '<':
               TheStreamedType.TheLexicalUnit->TheAnalyses.push_back(Analysis());
               TheStreamedType.TheLexicalUnit->TheAnalyses.back()
                 .TheMorphemes.push_back(Morpheme());
@@ -198,18 +198,18 @@ StreamedType Stream::get() {
                 .TheTags.push_back(Tag());
               ThePreviousCase = PreviousCaseType(Character_);
               continue;
-                
-            case L'[':
-            case L']':
-            case L'^':
-            case L'#':
-            case L'>':
-            case L'+':
-            case L'$': {
-              std::wstringstream Message;
-              Message << L"unexpected '" << Character_
-                      << L"' immediately following '"
-                      << ThePreviousCase->ThePreviousCase << L"', expected '*'";
+
+            case '[':
+            case ']':
+            case '^':
+            case '#':
+            case '>':
+            case '+':
+            case '$': {
+              std::stringstream Message;
+              Message << "unexpected '" << Character_
+                      << "' immediately following '"
+                      << ThePreviousCase->ThePreviousCase << "', expected '*'";
               throw Exception::Stream::UnexpectedPreviousCase(
                   Message_what(Message));
             }
@@ -223,37 +223,37 @@ StreamedType Stream::get() {
           }
 
           continue;
-        case L'>':
+        case '>':
           if (!ThePreviousCase->isPreviousCharacter) {
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_
-                    << L"' not immediately following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_
+                    << "' not immediately following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '/' expected to follow '[', to follow '>' "
-                       L"immediately, or to follow '^' or '#' not immediately";
+                    << "', '/' expected to follow '[', to follow '>' "
+                       "immediately, or to follow '^' or '#' not immediately";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
 
           break;
-        case L'#':
+        case '#':
 
           if (ThePreviousCase->isPreviousCharacter) {
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_
-                    << L"' immediately following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_
+                    << "' immediately following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '/' expected to follow '[', to follow '>' "
-                       L"immediately, or to follow '^' or '#' not immediately";
+                    << "', '/' expected to follow '[', to follow '>' "
+                       "immediately, or to follow '^' or '#' not immediately";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
 
           break;
         default:
-          std::wstringstream Message;
-          Message << L"unexpected '" << Character_ << L"' following '"
+          std::stringstream Message;
+          Message << "unexpected '" << Character_ << "' following '"
                   << ThePreviousCase->ThePreviousCase
-                  << L"', '/' expected to follow '[', to follow '>' "
-                     L"immediately, or to follow '^' or '#' not immediately";
+                  << "', '/' expected to follow '[', to follow '>' "
+                     "immediately, or to follow '^' or '#' not immediately";
           throw Exception::Stream::UnexpectedCase(Message_what(Message));
         }
 
@@ -262,65 +262,65 @@ StreamedType Stream::get() {
             .TheMorphemes.push_back(Morpheme());
         ThePreviousCase = PreviousCaseType(Character_);
         continue;
-      case L'*':
+      case '*':
         if (ThePreviousCase) {
           switch (ThePreviousCase->ThePreviousCase) {
-          case L'[':
-          case L']':
-          case L'$':
+          case '[':
+          case ']':
+          case '$':
             break;
           default:
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_ << L"' following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_ << "' following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '*' expected to follow '[', ']', or '$' or to "
-                       L"follow '/' immediately";
+                    << "', '*' expected to follow '[', ']', or '$' or to "
+                       "follow '/' immediately";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
         }
 
         push_back_Character(TheStreamedType, Lemma, Character_);
         continue;
-      case L'<':
+      case '<':
         if (!ThePreviousCase) {
-          std::wstringstream Message;
-          Message << L"unexpected '" << Character_
-                  << L"', '<' expected to follow '[', to follow '>' "
-                     L"immediately, or to follow '#', '/' or '+' not "
-                     L"immediately";
+          std::stringstream Message;
+          Message << "unexpected '" << Character_
+                  << "', '<' expected to follow '[', to follow '>' "
+                     "immediately, or to follow '#', '/' or '+' not "
+                     "immediately";
           throw Exception::Stream::UnexpectedCase(Message_what(Message));
         }
 
         switch (ThePreviousCase->ThePreviousCase) {
-        case L'[':
+        case '[':
           push_back_Character(TheStreamedType, Lemma, Character_);
           continue;
-        case L'/':
+        case '/':
           break;
-        case L'#':
-          //std::wcerr << L"[306] Character: " << Character_ << L"||| Lemma: " << Lemma << std::endl ;
-        case L'+':
+        case '#':
+          //std::cerr << "[306] Character: " << Character_ << "||| Lemma: " << Lemma << std::endl ;
+        case '+':
           if (ThePreviousCase->isPreviousCharacter) {
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_
-                    << L"' immediately following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_
+                    << "' immediately following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '<' expected to follow '[', '/', '>'"
-                       L"immediately, or to follow '#' or '+' not "
-                       L"immediately";
+                    << "', '<' expected to follow '[', '/', '>'"
+                       "immediately, or to follow '#' or '+' not "
+                       "immediately";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
 
           break;
-        case L'>':
+        case '>':
           break;
         default:
-          std::wstringstream Message;
-          Message << L"unexpected '" << Character_ << L"' following '"
+          std::stringstream Message;
+          Message << "unexpected '" << Character_ << "' following '"
                   << ThePreviousCase->ThePreviousCase
-                  << L"', '<' expected to follow '[', to follow '>' "
-                     L"immediately, or to follow '#', '/' or '+' not "
-                     L"immediately";
+                  << "', '<' expected to follow '[', to follow '>' "
+                     "immediately, or to follow '#', '/' or '+' not "
+                     "immediately";
           throw Exception::Stream::UnexpectedCase(Message_what(Message));
         }
 
@@ -329,137 +329,137 @@ StreamedType Stream::get() {
             .TheTags.push_back(Tag());
         ThePreviousCase = PreviousCaseType(Character_);
         continue;
-      case L'>':
+      case '>':
         if (!ThePreviousCase) {
-          std::wstringstream Message;
-          Message << L"unexpected '" << Character_
-                  << L"', '>' expected to follow '[' or to follow '<' not "
-                     L"immediately";
+          std::stringstream Message;
+          Message << "unexpected '" << Character_
+                  << "', '>' expected to follow '[' or to follow '<' not "
+                     "immediately";
           throw Exception::Stream::UnexpectedCase(Message_what(Message));
         }
 
         switch (ThePreviousCase->ThePreviousCase) {
-        case L'[':
+        case '[':
           push_back_Character(TheStreamedType, Lemma, Character_);
           continue;
-        case L'<':
+        case '<':
           if (ThePreviousCase->isPreviousCharacter) {
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_
-                    << L"' immediately following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_
+                    << "' immediately following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '>' expected to follow '[' or to follow '<' not "
-                       L"immediately";
+                    << "', '>' expected to follow '[' or to follow '<' not "
+                       "immediately";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
 
           ThePreviousCase = PreviousCaseType(Character_);
           continue;
         default:
-          std::wstringstream Message;
-          Message << L"unexpected '" << Character_ << L"' following '"
+          std::stringstream Message;
+          Message << "unexpected '" << Character_ << "' following '"
                   << ThePreviousCase->ThePreviousCase
-                  << L"', '>' expected to follow '[' or to follow '<' not "
-                     L"immediately";
+                  << "', '>' expected to follow '[' or to follow '<' not "
+                     "immediately";
           throw Exception::Stream::UnexpectedCase(Message_what(Message));
         }
 
         std::abort();
-      case L'#':
-        //std::wcerr << L"[391] Character: " << Character_ << L"||| Lemma: " << Lemma << std::endl ;
+      case '#':
+        //std::cerr << "[391] Character: " << Character_ << "||| Lemma: " << Lemma << std::endl ;
         if (ThePreviousCase) {
           switch (ThePreviousCase->ThePreviousCase) {
-          case L'[':
-          case L']':
-          case L'^':
-          case L'$':
+          case '[':
+          case ']':
+          case '^':
+          case '$':
             push_back_Character(TheStreamedType, Lemma, Character_);
             continue;
-          case L'/':
+          case '/':
             if (ThePreviousCase->isPreviousCharacter) {
-              std::wstringstream Message;
-              Message << L"unexpected '" << Character_
-                      << L"' immediately following '"
+              std::stringstream Message;
+              Message << "unexpected '" << Character_
+                      << "' immediately following '"
                       << ThePreviousCase->ThePreviousCase
-                      << L"', '#' expected to follow '[', ']', or '$', to "
-                         L"follow '>' immediately, or to follow '/' not "
-                         L"immediately";
+                      << "', '#' expected to follow '[', ']', or '$', to "
+                         "follow '>' immediately, or to follow '/' not "
+                         "immediately";
               throw Exception::Stream::UnexpectedCase(Message_what(Message));
             }
 
             break;
-          case L'>':
+          case '>':
             if (!ThePreviousCase->isPreviousCharacter) {
-              std::wstringstream Message;
-              Message << L"unexpected '" << Character_
-                      << L"' not immediately following '"
+              std::stringstream Message;
+              Message << "unexpected '" << Character_
+                      << "' not immediately following '"
                       << ThePreviousCase->ThePreviousCase
-                      << L"', '#' expected to follow '[', ']', or '$', to "
-                         L"follow '>' immediately, or to follow '/' not "
-                         L"immediately";
+                      << "', '#' expected to follow '[', ']', or '$', to "
+                         "follow '>' immediately, or to follow '/' not "
+                         "immediately";
               throw Exception::Stream::UnexpectedCase(Message_what(Message));
             }
 
             break;
           default:
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_ << L"' following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_ << "' following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '#' expected to follow '[', ']', or '$', to follow "
-                       L"'>' immediately, or to follow '/' not immediately";
+                    << "', '#' expected to follow '[', ']', or '$', to follow "
+                       "'>' immediately, or to follow '/' not immediately";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
 
           ThePreviousCase = PreviousCaseType(Character_);
           push_back_Character(TheStreamedType, Lemma, Character_);
-          //std::wcerr << L"[440] Character: " << Character_ << L"||| Lemma: " << Lemma << std::endl ;
+          //std::cerr << "[440] Character: " << Character_ << "||| Lemma: " << Lemma << std::endl ;
           continue;
         }
 
         push_back_Character(TheStreamedType, Lemma, Character_);
         continue;
-      case L'+':
+      case '+':
         if (ThePreviousCase) {
           switch (ThePreviousCase->ThePreviousCase) {
-          case L'[':
-          case L']':
-          case L'^':
-          case L'/':
-          case L'$':
+          case '[':
+          case ']':
+          case '^':
+          case '/':
+          case '$':
             push_back_Character(TheStreamedType, Lemma, Character_);
             continue;
-          case L'>':
+          case '>':
             if (!ThePreviousCase->isPreviousCharacter) {
-              std::wstringstream Message;
-              Message << L"unexpected '" << Character_
-                      << L"' not immediately following '"
+              std::stringstream Message;
+              Message << "unexpected '" << Character_
+                      << "' not immediately following '"
                       << ThePreviousCase->ThePreviousCase
-                      << L"', '+' expected to follow '[', ']', '^', '/' or "
-                         L"'$', to follow '>' immediately, or to follow '#' "
-                         L"not immediately";
+                      << "', '+' expected to follow '[', ']', '^', '/' or "
+                         "'$', to follow '>' immediately, or to follow '#' "
+                         "not immediately";
               throw Exception::Stream::UnexpectedCase(Message_what(Message));
             }
 
             break;
-          case L'#':
+          case '#':
             if (ThePreviousCase->isPreviousCharacter) {
-              std::wstringstream Message;
-              Message << L"unexpected '" << Character_
-                      << L"' immediately following '"
+              std::stringstream Message;
+              Message << "unexpected '" << Character_
+                      << "' immediately following '"
                       << ThePreviousCase->ThePreviousCase
-                      << L"', '+' expected to follow '[', ']', or '$', to "
-                         L"follow '>' immediately, or to follow '#' not "
-                         L"immediately";
+                      << "', '+' expected to follow '[', ']', or '$', to "
+                         "follow '>' immediately, or to follow '#' not "
+                         "immediately";
               throw Exception::Stream::UnexpectedCase(Message_what(Message));
             }
 
             break;
           default: {
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_ << L"' following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_ << "' following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '+' expected to follow '[', ']', or '$', to follow "
-                       L"'>' immediately, or to follow '#' not immediately";
+                    << "', '+' expected to follow '[', ']', or '$', to follow "
+                       "'>' immediately, or to follow '#' not immediately";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
           }
@@ -472,87 +472,87 @@ StreamedType Stream::get() {
 
         push_back_Character(TheStreamedType, Lemma, Character_);
         continue;
-      case L'$':
+      case '$':
         if (!ThePreviousCase) {
-          std::wstringstream Message;
-          Message << L"unexpected '" << Character_
-                  << L"', '$' expected to follow '[', to follow '>' "
-                     L"immediately, or to follow '*' or '#' not immediately";
+          std::stringstream Message;
+          Message << "unexpected '" << Character_
+                  << "', '$' expected to follow '[', to follow '>' "
+                     "immediately, or to follow '*' or '#' not immediately";
           throw Exception::Stream::UnexpectedCase(Message_what(Message));
         }
 
         switch (ThePreviousCase->ThePreviousCase) {
-        case L'[':
+        case '[':
           push_back_Character(TheStreamedType, Lemma, Character_);
           continue;
-        case L'*':
+        case '*':
           if (ThePreviousCase->isPreviousCharacter) {
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_
-                    << L"' immediately following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_
+                    << "' immediately following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '$' expected to follow '[', to follow '>' "
-                       L"immediately, or to follow '*' or '#' not immediately";
+                    << "', '$' expected to follow '[', to follow '>' "
+                       "immediately, or to follow '*' or '#' not immediately";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
 
           if (TheFlags.getDebug()) {
             if (Lemma != TheStreamedType.TheLexicalUnit->TheSurfaceForm)
-              std::wcerr << L"unexpected lemma \"" << Lemma
-                         << L"\", expected \""
+              std::cerr << "unexpected lemma \"" << Lemma
+                         << "\", expected \""
                          << TheStreamedType.TheLexicalUnit->TheSurfaceForm
-                         << L"\"\n";
+                         << "\"\n";
           }
 
           ThePreviousCase = PreviousCaseType(Character_);
           return TheStreamedType;
-        case L'>':
+        case '>':
           if (!ThePreviousCase->isPreviousCharacter) {
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_
-                    << L"' not immediately following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_
+                    << "' not immediately following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '$' expected to follow '[', to follow '>' "
-                       L"immediately, or to follow '*' or '#' not immediately";
+                    << "', '$' expected to follow '[', to follow '>' "
+                       "immediately, or to follow '*' or '#' not immediately";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
 
           break;
-        case L'#':
+        case '#':
           if (ThePreviousCase->isPreviousCharacter) {
-            std::wstringstream Message;
-            Message << L"unexpected '" << Character_
-                    << L"' immediately following '"
+            std::stringstream Message;
+            Message << "unexpected '" << Character_
+                    << "' immediately following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', '$' expected to follow '[', to follow '>' "
-                       L"immediately, or to follow '*' or '#' not immediately";
+                    << "', '$' expected to follow '[', to follow '>' "
+                       "immediately, or to follow '*' or '#' not immediately";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
 
           break;
         default:
-          std::wstringstream Message;
-          Message << L"unexpected '" << Character_ << L"' following '"
+          std::stringstream Message;
+          Message << "unexpected '" << Character_ << "' following '"
                   << ThePreviousCase->ThePreviousCase
-                  << L"', '$' expected to follow '[', to follow '>' "
-                     L"immediately, or to follow '*' or '#' not immediately";
+                  << "', '$' expected to follow '[', to follow '>' "
+                     "immediately, or to follow '*' or '#' not immediately";
           throw Exception::Stream::UnexpectedCase(Message_what(Message));
         }
 
         ThePreviousCase = PreviousCaseType(Character_);
         return TheStreamedType;
-      case L'\n':
+      case '\n':
         if (ThePreviousCase) {
           switch (ThePreviousCase->ThePreviousCase) {
-          case L'[':
-          case L']':
-          case L'$':
+          case '[':
+          case ']':
+          case '$':
             break;
           default:
-            std::wstringstream Message;
-            Message << L"unexpected newline following '"
+            std::stringstream Message;
+            Message << "unexpected newline following '"
                     << ThePreviousCase->ThePreviousCase
-                    << L"', newline expected to follow '[', ']', or '$'";
+                    << "', newline expected to follow '[', ']', or '$'";
             throw Exception::Stream::UnexpectedCase(Message_what(Message));
           }
         }
@@ -572,14 +572,14 @@ StreamedType Stream::get() {
 
   if (ThePreviousCase) {
     switch (ThePreviousCase->ThePreviousCase) {
-    case L']':
-    case L'$':
+    case ']':
+    case '$':
       break;
     default:
-      std::wstringstream Message;
-      Message << L"unexpected end-of-file following '"
+      std::stringstream Message;
+      Message << "unexpected end-of-file following '"
               << ThePreviousCase->ThePreviousCase
-              << L"', end-of-file expected to follow ']' or '$'";
+              << "', end-of-file expected to follow ']' or '$'";
       throw Exception::Stream::UnexpectedEndOfFile(Message_what(Message));
     }
   }
@@ -604,38 +604,38 @@ bool Stream::peekIsBlank() {
   std::ios::iostate state = TheCharacterStream.rdstate();
   int pos = TheCharacterStream.tellg();
 
-  const wchar_t newline1 = TheCharacterStream.get();
-  const wchar_t newline2 = TheCharacterStream.get();
+  const UChar newline1 = TheCharacterStream.get();
+  const UChar newline2 = TheCharacterStream.get();
 
   TheCharacterStream.clear(state);
   TheCharacterStream.seekg(pos);
 
-  return newline1 == L'\n' && newline2 == L'\n';
+  return newline1 == '\n' && newline2 == '\n';
 }
 
 bool Stream::flush_() const { return private_flush_; }
 
 void Stream::outputLexicalUnit(
     const LexicalUnit &lexical_unit, const Optional<Analysis> analysis,
-    std::wostream &output, TaggerFlags &flags) {
+    std::ostream &output, TaggerFlags &flags) {
   using namespace std::rel_ops;
-  output << L"^";
+  output << "^";
 
   if (lexical_unit.TheAnalyses.empty() || !analysis) {
     if (flags.getShowSuperficial())
-      output << lexical_unit.TheSurfaceForm << L"/";
+      output << lexical_unit.TheSurfaceForm << "/";
 
-    output << L"*" << lexical_unit.TheSurfaceForm << L"$";
+    output << "*" << lexical_unit.TheSurfaceForm << "$";
     return;
   }
 
   if (flags.getMark()) {
     if (lexical_unit.TheAnalyses.size() != 1)
-      output << L"=";
+      output << "=";
   }
 
   if (flags.getShowSuperficial())
-    output << lexical_unit.TheSurfaceForm << L"/";
+    output << lexical_unit.TheSurfaceForm << "/";
 
   output << *analysis;
 
@@ -645,14 +645,14 @@ void Stream::outputLexicalUnit(
          // Call .end() each iteration to save memory.
          other_analysis != lexical_unit.TheAnalyses.end(); ++other_analysis) {
       if (*other_analysis != *analysis)
-        output << L"/" << *other_analysis;
+        output << "/" << *other_analysis;
     }
   }
 
-  output << L"$";
+  output << "$";
 }
 
-Stream::PreviousCaseType::PreviousCaseType(const wchar_t &PreviousCase_)
+Stream::PreviousCaseType::PreviousCaseType(const UChar &PreviousCase_)
     : ThePreviousCase(PreviousCase_), isPreviousCharacter(true) {}
 
 bool Stream::is_eof_throw_if_not_TheCharacterStream_good() const {
@@ -660,12 +660,12 @@ bool Stream::is_eof_throw_if_not_TheCharacterStream_good() const {
     return true;
 
   if (!TheCharacterStream) {
-    std::wcerr << L"State bad " << TheCharacterStream.good() << " "
+    std::cerr << "State bad " << TheCharacterStream.good() << " "
                                 << TheCharacterStream.eof() << " "
                                 << TheCharacterStream.fail() << " "
                                 << TheCharacterStream.bad() << "\n";
-    std::wstringstream Message;
-    Message << L"can't get const wchar_t: TheCharacterStream not good";
+    std::stringstream Message;
+    Message << "can't get const UChar: TheCharacterStream not good";
     throw Exception::Stream::TheCharacterStream_not_good(
         Message_what(Message));
   }
@@ -673,28 +673,28 @@ bool Stream::is_eof_throw_if_not_TheCharacterStream_good() const {
   return false;
 }
 
-std::wstring Stream::Message_what(const std::wstringstream &Message) const {
-  std::wstringstream what_;
+UString Stream::Message_what(const std::stringstream &Message) const {
+  std::stringstream what_;
 
   if (TheFilename)
-    what_ << std::wstring(TheFilename->begin(), TheFilename->end()) << L": ";
+    what_ << UString(TheFilename->begin(), TheFilename->end()) << ": ";
 
-  what_ << TheLineNumber << L":" << TheLine.size() << L": " << Message.str()
-        << L'\n' << TheLine << L'\n' << std::wstring(TheLine.size() - 1, L' ')
-        << L'^';
-  return what_.str();
+  what_ << TheLineNumber << ":" << TheLine.size() << ": " << Message.str()
+        << '\n' << TheLine << '\n' << UString(TheLine.size() - 1, ' ')
+        << '^';
+  return to_ustring(what_.str().c_str());
 }
 
 bool
 Stream::is_eof_throw_if_not_TheCharacterStream_good(StreamedType &StreamedType_,
-                                                    std::wstring &Lemma,
-                                                    const wchar_t &Character_) {
+                                                    UString &Lemma,
+                                                    const UChar &Character_) {
   if (isTheCharacterStream_eof(StreamedType_, Lemma, Character_))
     return true;
 
   if (!TheCharacterStream) {
-    std::wstringstream Message;
-    Message << L"can't get const wchar_t: TheCharacterStream not good";
+    std::stringstream Message;
+    Message << "can't get const UChar: TheCharacterStream not good";
     throw Exception::Stream::TheCharacterStream_not_good(
         Message_what(Message));
   }
@@ -703,13 +703,13 @@ Stream::is_eof_throw_if_not_TheCharacterStream_good(StreamedType &StreamedType_,
 }
 
 bool Stream::isTheCharacterStream_eof(StreamedType &StreamedType_,
-                                      std::wstring &Lemma,
-                                      const wchar_t &Character_) {
+                                      UString &Lemma,
+                                      const UChar &Character_) {
   if (TheCharacterStream.eof())
     return true;
 
   if (TheFlags.getNullFlush()) {
-    if (Character_ == L'\0') {
+    if (Character_ == '\0') {
       push_back_Character(StreamedType_, Lemma, Character_);
       private_flush_ = true;
       return true;
@@ -720,55 +720,55 @@ bool Stream::isTheCharacterStream_eof(StreamedType &StreamedType_,
 }
 
 void Stream::push_back_Character(StreamedType &StreamedType_,
-                                 std::wstring &Lemma,
-                                 const wchar_t &Character_) {
+                                 UString &Lemma,
+                                 const UChar &Character_) {
   if (ThePreviousCase) {
     switch (ThePreviousCase->ThePreviousCase) {
-    case L'[':
+    case '[':
       StreamedType_.TheString += Character_;
       break;
-    case L']':
+    case ']':
       StreamedType_.TheString += Character_;
       break;
-    case L'^':
+    case '^':
       StreamedType_.TheLexicalUnit->TheSurfaceForm += Character_;
       break;
-    case L'/':
+    case '/':
       StreamedType_.TheLexicalUnit->TheAnalyses.back()
           .TheMorphemes.back()
           .TheLemma.push_back(Character_);
       break;
-    case L'*':
+    case '*':
       Lemma += Character_;
       break;
-    case L'<':
+    case '<':
       StreamedType_.TheLexicalUnit->TheAnalyses.back()
           .TheMorphemes.back()
           .TheTags.back()
           .TheTag += Character_;
       break;
-    case L'>':
+    case '>':
       StreamedType_.TheLexicalUnit->TheAnalyses.back()
           .TheMorphemes.back()
           .TheLemma.push_back(Character_);
       break;
-    case L'#':
+    case '#':
       StreamedType_.TheLexicalUnit->TheAnalyses.back()
           .TheMorphemes.back()
           .TheLemma.push_back(Character_);
       break;
-    case L'+':
+    case '+':
       StreamedType_.TheLexicalUnit->TheAnalyses.back()
           .TheMorphemes.back()
           .TheLemma.push_back(Character_);
       break;
-    case L'$':
+    case '$':
       StreamedType_.TheString += Character_;
       break;
     default:
-      std::wstringstream Message;
-      Message << L"unexpected previous reserved or special character '"
-              << ThePreviousCase->ThePreviousCase << L"'";
+      std::stringstream Message;
+      Message << "unexpected previous reserved or special character '"
+              << ThePreviousCase->ThePreviousCase << "'";
       throw Exception::Stream::UnexpectedPreviousCase(Message_what(Message));
     }
 
@@ -779,18 +779,18 @@ void Stream::push_back_Character(StreamedType &StreamedType_,
   StreamedType_.TheString += Character_;
 }
 
-void Stream::case_0x5c(StreamedType &StreamedType_, std::wstring &Lemma,
-                       const wchar_t &Character_) {
+void Stream::case_0x5c(StreamedType &StreamedType_, UString &Lemma,
+                       const UChar &Character_) {
   push_back_Character(StreamedType_, Lemma, Character_);
 
   {
-    const wchar_t Character_ = TheCharacterStream.get();
+    const UChar Character_ = TheCharacterStream.get();
 
     if (is_eof_throw_if_not_TheCharacterStream_good(StreamedType_, Lemma,
                                                     Character_)) {
-      std::wstringstream Message;
-      Message << L"unexpected end-of-file following '\\', end-of-file "
-                 L"expected to follow ']' or '$'";
+      std::stringstream Message;
+      Message << "unexpected end-of-file following '\\', end-of-file "
+                 "expected to follow ']' or '$'";
       throw Exception::Stream::UnexpectedEndOfFile(Message_what(Message));
     }
 
diff --git a/apertium/stream.h b/apertium/stream.h
index e1603ea..69c266a 100644
--- a/apertium/stream.h
+++ b/apertium/stream.h
@@ -30,11 +30,11 @@ namespace Apertium {
 class Stream {
 public:
   Stream(TaggerFlags &Flags_);
-  Stream(TaggerFlags &Flags_, std::wifstream &CharacterStream_,
+  Stream(TaggerFlags &Flags_, std::ifstream &CharacterStream_,
          const char *const Filename_);
-  Stream(TaggerFlags &Flags_, std::wifstream &CharacterStream_,
+  Stream(TaggerFlags &Flags_, std::ifstream &CharacterStream_,
          const std::string &Filename_);
-  Stream(TaggerFlags &Flags_, std::wifstream &CharacterStream_,
+  Stream(TaggerFlags &Flags_, std::ifstream &CharacterStream_,
          const std::stringstream &Filename_);
   StreamedType get();
   StreamedType peek();
@@ -43,30 +43,30 @@ public:
 
   static void outputLexicalUnit(
     const LexicalUnit &lexical_unit, const Optional<Analysis> analysis,
-    std::wostream &output, TaggerFlags &flags);
+    std::ostream &output, TaggerFlags &flags);
 
   std::size_t TheLineNumber;
 private:
   class PreviousCaseType {
   public:
-    PreviousCaseType(const wchar_t &PreviousCase_);
-    wchar_t ThePreviousCase;
+    PreviousCaseType(const UChar &PreviousCase_);
+    UChar ThePreviousCase;
     bool isPreviousCharacter : 1;
   };
   bool is_eof_throw_if_not_TheCharacterStream_good() const;
-  std::wstring Message_what(const std::wstringstream &Message) const;
+  UString Message_what(const std::stringstream &Message) const;
   bool is_eof_throw_if_not_TheCharacterStream_good(StreamedType &StreamedType_,
-                                                   std::wstring &Lemma,
-                                                   const wchar_t &Character_);
+                                                   UString &Lemma,
+                                                   const UChar &Character_);
   bool isTheCharacterStream_eof(StreamedType &StreamedType_,
-                                std::wstring &Lemma, const wchar_t &Character_);
-  void push_back_Character(StreamedType &StreamedType_, std::wstring &Lemma,
-                           const wchar_t &Character_);
-  void case_0x5c(StreamedType &StreamedType_, std::wstring &Lemma,
-                 const wchar_t &Character_);
-  std::wistream &TheCharacterStream;
+                                UString &Lemma, const UChar &Character_);
+  void push_back_Character(StreamedType &StreamedType_, UString &Lemma,
+                           const UChar &Character_);
+  void case_0x5c(StreamedType &StreamedType_, UString &Lemma,
+                 const UChar &Character_);
+  std::istream &TheCharacterStream;
   Optional<std::string> TheFilename;
-  std::wstring TheLine;
+  UString TheLine;
   TaggerFlags &TheFlags;
   bool private_flush_ : 1;
   Optional<PreviousCaseType> ThePreviousCase;
diff --git a/apertium/stream_tagger.cc b/apertium/stream_tagger.cc
index 617588e..a00735c 100644
--- a/apertium/stream_tagger.cc
+++ b/apertium/stream_tagger.cc
@@ -9,7 +9,7 @@ StreamTagger::~StreamTagger() {}
 
 void StreamTagger::outputLexicalUnit(
     const LexicalUnit &lexical_unit, const Optional<Analysis> analysis,
-    std::wostream &output) {
+    std::ostream &output) {
   Stream::outputLexicalUnit(lexical_unit, analysis, output, TheFlags);
 }
 }
diff --git a/apertium/stream_tagger.h b/apertium/stream_tagger.h
index 11e93ca..2d0e123 100644
--- a/apertium/stream_tagger.h
+++ b/apertium/stream_tagger.h
@@ -15,11 +15,11 @@ public:
   virtual ~StreamTagger();
   virtual void serialise(std::ostream &Serialised_basic_Tagger) const = 0;
   virtual void deserialise(std::istream &Serialised_basic_Tagger) = 0;
-  virtual void tag(Stream &Input, std::wostream &Output) = 0;
+  virtual void tag(Stream &Input, std::ostream &Output) = 0;
   virtual void train(Stream &TaggedCorpus) = 0;
   void outputLexicalUnit(
     const LexicalUnit &lexical_unit, const Optional<Analysis> analysis,
-    std::wostream &output);
+    std::ostream &output);
 };
 }
 
diff --git a/apertium/streamed_type.h b/apertium/streamed_type.h
index f73ec21..8a70b44 100644
--- a/apertium/streamed_type.h
+++ b/apertium/streamed_type.h
@@ -24,7 +24,7 @@
 namespace Apertium {
 class StreamedType {
 public:
-  std::wstring TheString;
+  UString TheString;
   Optional<LexicalUnit> TheLexicalUnit;
 };
 }
diff --git a/apertium/string_utils.cc b/apertium/string_utils.cc
deleted file mode 100644
index 8ae064b..0000000
--- a/apertium/string_utils.cc
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
- * author: Felipe Sánchez-Martínez
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <https://www.gnu.org/licenses/>.
- */
-
-#include <apertium/string_utils.h>
-#include <lttoolbox/xml_parse_util.h>
-#include <iostream>
-#include <cstring>
-
-#ifdef _MSC_VER
-#define snprintf _snprintf
-#endif
-
-//Delete white spaces from the end and the begining of the string
-wstring
-StringUtils::trim(wstring const &str)
-{
-  if(str == L"")
-  {
-    return L"";
-  }
-
-  int begin = 0, end = str.size() - 1;
-
-  while(begin < end && iswspace(str[begin]))
-  {
-    begin++;
-  }
-
-  while(end > begin && iswspace(str[end]))
-  {
-    end--;
-  }
-
-  if(!iswspace(str[end]))
-  {
-    end++;
-  }
-
-  return str.substr(begin, end-begin);
-}
-
-vector<wstring>
-StringUtils::split_wstring(wstring const &input, wstring const &delimiter)
-{
-  unsigned pos;
-  int new_pos;
-  vector<wstring> result;
-  wstring s = L"";
-  pos=0;
-
-  while(pos<input.size())
-  {
-    new_pos=input.find(delimiter, pos);
-    if(new_pos<0)
-      new_pos=input.size();
-    s=input.substr(pos, new_pos-pos);
-    if (s.length()==0) {
-      wcerr<<L"Warning in StringUtils::split_wstring: After splitting there is an empty string\n";
-      wcerr<<L"Skipping this empty string\n";
-    } else
-      result.push_back(s);
-    pos=new_pos+delimiter.size();
-  }
-
-  return result;
-}
-
-wstring
-StringUtils::vector2wstring(vector<wstring> const &v)
-{
-  wstring s = L"";
-  for(unsigned i=0; i<v.size(); i++)
-  {
-    if (i>0)
-      s+=L' ';
-    s.append(v[i]);
-  }
-  return s;
-}
-
-wstring
-StringUtils::substitute(wstring const &source, wstring const &olds, wstring const &news) {
-  wstring s = source;
-
-  unsigned int p=s.find(olds , 0);
-  while (p!=static_cast<unsigned int>(wstring::npos))
-  {
-    s.replace(p, olds.length(), news);
-    p+=news.length();
-    p=s.find(olds,p);
-  }
-
-  return s;
-}
-
-wstring
-StringUtils::itoa(int n)
-{
-  return XMLParseUtil::stows(itoa_string(n));
-}
-
-string
-StringUtils::itoa_string(int n)
-{
-  char str[256];
-  snprintf(str, 256, "%d", n);
-  return str;
-}
-
-wstring
-StringUtils::ftoa(double f)
-{
-  char str[256];
-  sprintf(str, "%f",f);
-  return XMLParseUtil::stows(str);
-}
-
-wstring
-StringUtils::tolower(wstring const &s)
-{
-  wstring l=s;
-  for(unsigned i=0; i<s.length(); i++)
-  {
-    l[i] = (wchar_t) towlower(s[i]);
-  }
-  return l;
-}
-
-wstring
-StringUtils::toupper(wstring const &s) {
-  wstring l=s;
-  for(unsigned i=0; i<s.length(); i++)
-  {
-    l[i]  = (wchar_t) towupper(s[i]);
-  }
-
-  return l;
-}
-
-bool Apertium::operator==(string const &s1, string const &s2)
-{
-  return strcmp(s1.c_str(), s2.c_str()) == 0;
-}
-
-bool Apertium::operator==(string const &s1, char const *s2)
-{
-  return strcmp(s1.c_str(), s2) == 0;
-}
-
-bool Apertium::operator==(char const *s1, string const &s2)
-{
-  return strcmp(s1, s2.c_str()) == 0;
-}
-
-bool Apertium::operator!=(string const &s1, string const &s2)
-{
-  return strcmp(s1.c_str(), s2.c_str()) != 0;
-}
-
-bool Apertium::operator!=(string const &s1, char const *s2)
-{
-  return strcmp(s1.c_str(), s2) != 0;
-}
-
-bool Apertium::operator!=(char const *s1, string const &s2)
-{
-  return strcmp(s1, s2.c_str()) != 0;
-}
-
-#include "string_to_wostream.h"
diff --git a/apertium/string_utils.h b/apertium/string_utils.h
deleted file mode 100644
index 1e02123..0000000
--- a/apertium/string_utils.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
- * author: Felipe Sánchez-Martínez
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <https://www.gnu.org/licenses/>.
- */
-#ifndef __STRINGUTILS_H_
-#define __STRINGUTILS_H_
-
-#include <string>
-#include <cstring>
-#include <vector>
-
-using namespace std;
-
-namespace Apertium
-{
-  bool operator==(string const &s1, string const &s2);
-  bool operator==(string const &s1, char const *s2);
-  bool operator==(char const *s1, string const &s2);
-  bool operator!=(string const &s1, string const &s2);
-  bool operator!=(string const &s1, char const *s2);
-  bool operator!=(char const *s1, string const &s2);
-}
-
-class StringUtils {
-  public:
-
-  static wstring trim(wstring const &str);
-
-  static vector<wstring> split_wstring(wstring const &input, wstring const &delimiter);
-
-  static wstring vector2wstring(vector<wstring> const &v);
-
-  //Replace each ocurrence of the string 'olds' by the string 'news' in string 'source'
-  static wstring substitute(const wstring &source, const wstring &olds, const wstring &news);
-
-  static wstring itoa(int n);
-
-  static string itoa_string(int n);
-
-  static wstring ftoa(double f);
-
-  static wstring tolower(wstring const &s);
-
-  static wstring toupper(wstring const &s);
-};
-
-std::wostream & operator<< (std::wostream & ostr, std::string const & str);
-
-#endif
diff --git a/apertium/tag.cc b/apertium/tag.cc
index 0aba018..2983f94 100644
--- a/apertium/tag.cc
+++ b/apertium/tag.cc
@@ -24,11 +24,16 @@ bool operator==(const Tag &a, const Tag &b) { return a.TheTag == b.TheTag; }
 
 bool operator<(const Tag &a, const Tag &b) { return a.TheTag < b.TheTag; }
 
-Tag::operator std::wstring() const {
+Tag::operator UString() const {
   if (TheTag.empty())
     throw Exception::Tag::TheTags_empty("can't convert Tag comprising empty "
-                                        "TheTag std::wstring to std::wstring");
+                                        "TheTag UString to UString");
 
-  return L"<" + TheTag + L">";
+  UString ret;
+  ret.reserve(TheTag.size() + 2);
+  ret += '<';
+  ret.append(TheTag);
+  ret += '>';
+  return ret;
 }
 }
diff --git a/apertium/tag.h b/apertium/tag.h
index 147ea0d..62698e8 100644
--- a/apertium/tag.h
+++ b/apertium/tag.h
@@ -16,15 +16,15 @@
 #ifndef TAG_H
 #define TAG_H
 
-#include <string>
+#include <lttoolbox/ustring.h>
 
 namespace Apertium {
 class Tag {
 public:
   friend bool operator==(const Tag &a, const Tag &b);
   friend bool operator<(const Tag &a, const Tag &b);
-  operator std::wstring() const;
-  std::wstring TheTag;
+  operator UString() const;
+  UString TheTag;
 };
 }
 
diff --git a/apertium/tagger.cc b/apertium/tagger.cc
index a7f3cc0..6e8372c 100644
--- a/apertium/tagger.cc
+++ b/apertium/tagger.cc
@@ -285,7 +285,7 @@ apertium_tagger::apertium_tagger(int &argc, char **&argv)
       std::abort();
     }
   } catch (const ExceptionType &ExceptionType_) {
-    std::wcerr << "apertium-tagger: " << ExceptionType_.what() << std::endl;
+    std::cerr << "apertium-tagger: " << ExceptionType_.what() << std::endl;
     throw Exception::apertium_tagger::err_Exception("");
   }
 }
@@ -294,7 +294,7 @@ apertium_tagger::~apertium_tagger() {}
 
 void apertium_tagger::help() {
 
-  std::wcerr <<
+  std::cerr <<
 "Usage: apertium-tagger [OPTION]... -g SERIALISED_TAGGER                        \\\n"
 "                                      [INPUT                                   \\\n"
 "                                      [OUTPUT]]\n"
@@ -339,27 +339,27 @@ void apertium_tagger::help() {
   options_description_.push_back(std::make_pair("-p, --show-superficial", "with -g, output each lexical unit's surface form"));
   options_description_.push_back(std::make_pair("-z, --null-flush",       "with -g, flush the output after getting each null character"));
   align::align_(options_description_);
-  std::wcerr << '\n';
+  std::cerr << '\n';
   options_description_.clear();
   options_description_.push_back(std::make_pair("-u, --unigram=MODEL", "use unigram algorithm MODEL from <https://coltekin.net/cagri/papers/trmorph-tools.pdf>"));
   align::align_(options_description_);
-  std::wcerr << '\n';
+  std::cerr << '\n';
   options_description_.clear();
   options_description_.push_back(std::make_pair("-w, --sliding-window", "use the Light Sliding Window algorithm"));
   options_description_.push_back(std::make_pair("-x, --perceptron", "use the averaged perceptron algorithm"));
   options_description_.push_back(std::make_pair("-e, --skip-on-error", "with -xs, ignore certain types of errors with the training corpus"));
   align::align_(options_description_);
-  std::wcerr << '\n';
+  std::cerr << '\n';
   options_description_.clear();
   options_description_.push_back(std::make_pair("-g, --tagger", "disambiguate the input"));
   align::align_(options_description_);
-  std::wcerr << '\n';
+  std::cerr << '\n';
   options_description_.clear();
   options_description_.push_back(std::make_pair("-r, --retrain=ITERATIONS", "with -u: exit;\notherwise: retrain the tagger with ITERATIONS unsupervised iterations"));
   options_description_.push_back(std::make_pair("-s, --supervised=ITERATIONS", "with -u: train the tagger with a hand-tagged corpus;\nwith -w: exit;\notherwise: initialise the tagger with a hand-tagged corpus and retrain it with ITERATIONS unsupervised iterations"));
   options_description_.push_back(std::make_pair("-t, --train=ITERATIONS", "with -u: exit;\notherwise: train the tagger with ITERATIONS unsupervised iterations"));
   align::align_(options_description_);
-  std::wcerr << '\n';
+  std::cerr << '\n';
   options_description_.clear();
   options_description_.push_back(std::make_pair("-h, --help", "display this help and exit"));
   align::align_(options_description_);
@@ -550,24 +550,12 @@ void apertium_tagger::init_FILE_Tagger(FILE_Tagger &FILE_Tagger_, string const &
 MorphoStream* apertium_tagger::setup_untagged_morpho_stream(
     FILE_Tagger &FILE_Tagger_,
     char *DicFn, char *UntaggedFn,
-    FILE **Dictionary, FILE **UntaggedCorpus) {
-  if (*TheFunctionType != Retrain) {
-    *Dictionary = try_open_file_utf8("DICTIONARY", DicFn, "r");
-  }
+    UFILE* *UntaggedCorpus) {
   *UntaggedCorpus = try_open_file_utf8("UNTAGGED_CORPUS", UntaggedFn, "r");
 
-  FILE_Tagger_.read_dictionary(*Dictionary);
-
-  return new FileMorphoStream(*UntaggedCorpus, true, &FILE_Tagger_.get_tagger_data());
-}
+  FILE_Tagger_.read_dictionary(DicFn);
 
-void apertium_tagger::close_untagged_files(
-    char *DicFn, char *UntaggedFn,
-    FILE *Dictionary, FILE *UntaggedCorpus) {
-  if (*TheFunctionType == Supervised || *TheFunctionType == Train) {
-    try_close_file("DICTIONARY", DicFn, Dictionary);
-  }
-  try_close_file("UNTAGGED_CORPUS", UntaggedFn, UntaggedCorpus);
+  return new FileMorphoStream(UntaggedFn, true, &FILE_Tagger_.get_tagger_data());
 }
 
 /** Implementation of flags/subcommands */
@@ -591,20 +579,20 @@ void apertium_tagger::g_StreamTagger(StreamTagger &StreamTagger_) {
   }
   if (nonoptarg < 2) {
     Stream Input(TheFlags);
-    StreamTagger_.tag(Input, std::wcout);
+    StreamTagger_.tag(Input, std::cout);
     return;
   }
 
-  std::wifstream Input_stream;
+  std::ifstream Input_stream;
   try_open_fstream("INPUT", argv[optind + 1], Input_stream);
 
   if (nonoptarg < 3) {
     Stream Input(TheFlags, Input_stream, argv[optind + 1]);
-    StreamTagger_.tag(Input, std::wcout);
+    StreamTagger_.tag(Input, std::cout);
     return;
   }
 
-  std::wofstream Output_stream;
+  std::ofstream Output_stream;
   try_open_fstream("OUTPUT", argv[optind + 2], Input_stream);
 
   Stream Input(TheFlags, Input_stream, argv[optind + 1]);
@@ -628,12 +616,12 @@ void apertium_tagger::s_StreamTaggerTrainer(
     expect_file_arguments(nonoptarg, 2);
   }
 
-  std::wifstream TaggedCorpus_stream;
+  std::ifstream TaggedCorpus_stream;
   try_open_fstream("TAGGED_CORPUS", argv[optind + 1], TaggedCorpus_stream);
   Stream TaggedCorpus(TheFlags, TaggedCorpus_stream, argv[optind + 1]);
 
   if (*TheFunctionTypeType == Perceptron) {
-    std::wifstream UntaggedCorpus_stream;
+    std::ifstream UntaggedCorpus_stream;
     try_open_fstream("UNTAGGED_CORPUS", argv[optind + 2], UntaggedCorpus_stream);
     Stream UntaggedCorpus(TheFlags, UntaggedCorpus_stream, argv[optind + 2]);
 
@@ -661,21 +649,16 @@ void apertium_tagger::g_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
   try_close_file("SERIALISED_TAGGER", argv[optind], Serialised_FILE_Tagger);
   TaggerWord::setArrayTags(FILE_Tagger_.getArrayTags());
   TaggerWord::generate_marks = TheFlags.getMark();
-  if (nonoptarg < 2)
-    FILE_Tagger_.tagger(stdin, stdout);
-  else {
-    FILE *Input = try_open_file("INPUT", argv[optind + 1], "r");
-
-    if (nonoptarg < 3)
-      FILE_Tagger_.tagger(Input, stdout);
-    else {
-      FILE *Output = try_open_file_utf8("OUTPUT", argv[optind + 2], "w");
-      FILE_Tagger_.tagger(Input, Output);
-      try_close_file("OUTPUT", argv[optind + 2], Output);
+  const char* infile = NULL;
+  UFILE* Output = u_finit(stdout, NULL, NULL);
+  if (nonoptarg >= 2) {
+    infile = argv[optind + 1];
+    if (nonoptarg >= 3) {
+      Output = try_open_file_utf8("OUTPUT", argv[optind + 2], "w");
     }
-
-    try_close_file("INPUT", argv[optind + 1], Input);
   }
+  FILE_Tagger_.tagger(infile, Output);
+  u_fclose(Output);
 }
 
 void apertium_tagger::r_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
@@ -697,17 +680,15 @@ void apertium_tagger::r_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
 
   TaggerWord::setArrayTags(FILE_Tagger_.getArrayTags());
 
-  FILE *UntaggedCorpus;
+  UFILE* UntaggedCorpus;
   MorphoStream* ms = setup_untagged_morpho_stream(
     FILE_Tagger_,
     NULL, UntaggedFn,
-    NULL, &UntaggedCorpus);
+    &UntaggedCorpus);
 
   FILE_Tagger_.train(*ms, TheFunctionTypeOptionArgument);
   delete ms;
-  close_untagged_files(
-    NULL, UntaggedFn,
-    NULL, UntaggedCorpus);
+  u_fclose(UntaggedCorpus);
 
   Serialised_FILE_Tagger =
       try_open_file("SERIALISED_TAGGER", ProbFn, "wb");
@@ -732,26 +713,20 @@ void apertium_tagger::s_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
       &TsxFn, &ProbFn);
   init_FILE_Tagger(FILE_Tagger_, TsxFn);
 
-  FILE *Dictionary, *UntaggedCorpus;
+  UFILE* UntaggedCorpus;
   MorphoStream* ms = setup_untagged_morpho_stream(
     FILE_Tagger_,
     DicFn, UntaggedFn,
-    &Dictionary, &UntaggedCorpus);
-  FILE *TaggedCorpus = try_open_file("TAGGED_CORPUS", TaggedFn, "r");
-  FileMorphoStream tms(TaggedCorpus, true, &FILE_Tagger_.get_tagger_data());
+    &UntaggedCorpus);
+  FileMorphoStream tms(TaggedFn, true, &FILE_Tagger_.get_tagger_data());
 
   FILE_Tagger_.init_probabilities_from_tagged_text_(tms, *ms);
-  try_close_file("TAGGED_CORPUS", TaggedFn, TaggedCorpus);
   delete ms;
-  close_untagged_files(
-    DicFn, UntaggedFn,
-    Dictionary, UntaggedCorpus);
+  u_fclose(UntaggedCorpus);
 
   if (do_unsup) {
-    FILE *Corpus = try_open_file_utf8("CORPUS", CrpFn, "r");
-    FILE_Tagger_.train(Corpus, TheFunctionTypeOptionArgument);
-    try_close_file("CORPUS", CrpFn, Corpus);
- }
+    FILE_Tagger_.train(CrpFn, TheFunctionTypeOptionArgument);
+  }
 
   FILE *Serialised_FILE_Tagger =
       try_open_file("SERIALISED_TAGGER", ProbFn, "wb");
@@ -773,17 +748,15 @@ void apertium_tagger::t_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
       &TsxFn, &ProbFn);
   init_FILE_Tagger(FILE_Tagger_, TsxFn);
 
-  FILE *Dictionary, *UntaggedCorpus;
+  UFILE* UntaggedCorpus;
   MorphoStream* ms = setup_untagged_morpho_stream(
     FILE_Tagger_,
     DicFn, UntaggedFn,
-    &Dictionary, &UntaggedCorpus);
+    &UntaggedCorpus);
 
   FILE_Tagger_.init_and_train(*ms, TheFunctionTypeOptionArgument);
   delete ms;
-  close_untagged_files(
-    DicFn, UntaggedFn,
-    Dictionary, UntaggedCorpus);
+  u_fclose(UntaggedCorpus);
 
   FILE *Serialised_FILE_Tagger =
       try_open_file("SERIALISED_TAGGER", ProbFn, "wb");
diff --git a/apertium/tagger.h b/apertium/tagger.h
index 1d00620..cf3017b 100644
--- a/apertium/tagger.h
+++ b/apertium/tagger.h
@@ -62,10 +62,7 @@ private:
   MorphoStream* setup_untagged_morpho_stream(
     FILE_Tagger &FILE_Tagger_,
     char *DicFn, char *UntaggedFn,
-    FILE **Dictionary, FILE **UntaggedCorpus);
-  void close_untagged_files(
-    char *DicFn, char *UntaggedFn,
-    FILE *Dictionary, FILE *UntaggedCorpus);
+    UFILE **UntaggedCorpus);
 
   void g_StreamTagger(StreamTagger &StreamTagger_);
   void s_StreamTaggerTrainer(StreamTagger &StreamTaggerTrainer_);
diff --git a/apertium/tagger_data.cc b/apertium/tagger_data.cc
index 759642d..0eb93ec 100644
--- a/apertium/tagger_data.cc
+++ b/apertium/tagger_data.cc
@@ -17,9 +17,7 @@
 #include <apertium/tagger_data.h>
 #include <lttoolbox/compression.h>
 #include <apertium/endian_double_util.h>
-#include <apertium/string_utils.h>
-
-using namespace Apertium;
+#include <lttoolbox/string_utils.h>
 
 void
 TaggerData::copy(TaggerData const &o)
@@ -94,38 +92,38 @@ TaggerData::setForbidRules(vector<TForbidRule> &fr)
   forbid_rules = fr;
 }
 
-map<wstring, TTag, Ltstr> &
+map<UString, TTag> &
 TaggerData::getTagIndex()
 {
   return tag_index;
 }
 
-const map<wstring, TTag, Ltstr> &
+const map<UString, TTag> &
 TaggerData::getTagIndex() const
 {
   return tag_index;
 }
 
 void
-TaggerData::setTagIndex(map<wstring, TTag, Ltstr> const &ti)
+TaggerData::setTagIndex(map<UString, TTag> const &ti)
 {
   tag_index = ti;
 }
 
-vector<wstring> &
+vector<UString> &
 TaggerData::getArrayTags()
 {
   return array_tags;
 }
 
-const vector<wstring> &
+const vector<UString> &
 TaggerData::getArrayTags() const
 {
   return array_tags;
 }
 
 void
-TaggerData::setArrayTags(vector<wstring> const &at)
+TaggerData::setArrayTags(vector<UString> const &at)
 {
   array_tags = at;
 }
@@ -148,38 +146,38 @@ TaggerData::setEnforceRules(vector<TEnforceAfterRule> const &tear)
   enforce_rules = tear;
 }
 
-vector<wstring> &
+vector<UString> &
 TaggerData::getPreferRules()
 {
   return prefer_rules;
 }
 
-const vector<wstring> &
+const vector<UString> &
 TaggerData::getPreferRules() const
 {
   return prefer_rules;
 }
 
 void
-TaggerData::setPreferRules(vector<wstring> const &pr)
+TaggerData::setPreferRules(vector<UString> const &pr)
 {
   prefer_rules = pr;
 }
 
-vector<wstring> &
+vector<UString> &
 TaggerData::getDiscardRules()
 {
   return discard;
 }
 
-const vector<wstring> &
+const vector<UString> &
 TaggerData::getDiscardRules() const
 {
   return discard;
 }
 
 void
-TaggerData::setDiscardRules(vector<wstring> const &v)
+TaggerData::setDiscardRules(vector<UString> const &v)
 {
   discard = v;
 }
@@ -233,7 +231,7 @@ TaggerData::setPatternList(PatternList const &pl)
 }
 
 void
-TaggerData::addDiscard(wstring const &tags)
+TaggerData::addDiscard(UString const &tags)
 {
   discard.push_back(tags);
 }
diff --git a/apertium/tagger_data.h b/apertium/tagger_data.h
index 2190c8d..2b7dd26 100644
--- a/apertium/tagger_data.h
+++ b/apertium/tagger_data.h
@@ -21,7 +21,6 @@
 #include <apertium/ttag.h>
 #include <apertium/collection.h>
 #include <lttoolbox/pattern_list.h>
-#include <lttoolbox/ltstr.h>
 
 #include <map>
 #include <set>
@@ -35,14 +34,14 @@ class TaggerData
 protected:
   set<TTag> open_class;
   vector<TForbidRule> forbid_rules;
-  map<wstring, TTag, Ltstr> tag_index;
-  vector<wstring> array_tags;
+  map<UString, TTag> tag_index;
+  vector<UString> array_tags;
   vector<TEnforceAfterRule> enforce_rules;
-  vector<wstring> prefer_rules;
+  vector<UString> prefer_rules;
   ConstantManager constants;
   Collection output;
   PatternList plist;
-  vector<wstring> discard;
+  vector<UString> discard;
 
   void copy(TaggerData const &o);
 public:
@@ -59,25 +58,25 @@ public:
   const vector<TForbidRule> & getForbidRules() const;
   void setForbidRules(vector<TForbidRule> &fr);
 
-  map<wstring, TTag, Ltstr> & getTagIndex();
-  const map<wstring, TTag, Ltstr> & getTagIndex() const;
-  void setTagIndex(map<wstring, TTag, Ltstr> const &ti);
+  map<UString, TTag> & getTagIndex();
+  const map<UString, TTag> & getTagIndex() const;
+  void setTagIndex(map<UString, TTag> const &ti);
 
-  vector<wstring> & getArrayTags();
-  const vector<wstring> & getArrayTags() const;
-  void setArrayTags(vector<wstring> const &at);
+  vector<UString> & getArrayTags();
+  const vector<UString> & getArrayTags() const;
+  void setArrayTags(vector<UString> const &at);
 
   vector<TEnforceAfterRule> & getEnforceRules();
   const vector<TEnforceAfterRule> & getEnforceRules() const;
   void setEnforceRules(vector<TEnforceAfterRule> const &tear);
 
-  vector<wstring> & getPreferRules();
-  const vector<wstring> & getPreferRules() const;
-  void setPreferRules(vector<wstring> const &pr);
+  vector<UString> & getPreferRules();
+  const vector<UString> & getPreferRules() const;
+  void setPreferRules(vector<UString> const &pr);
 
-  vector<wstring> & getDiscardRules();
-  const vector<wstring> & getDiscardRules() const;
-  void setDiscardRules(vector<wstring> const &dr);
+  vector<UString> & getDiscardRules();
+  const vector<UString> & getDiscardRules() const;
+  void setDiscardRules(vector<UString> const &dr);
 
   ConstantManager & getConstants();
   const ConstantManager & getConstants() const;
@@ -91,7 +90,7 @@ public:
   PatternList & getPatternList();
   const PatternList & getPatternList() const;
 
-  void addDiscard(wstring const &tags);
+  void addDiscard(UString const &tags);
 };
 
 #endif
diff --git a/apertium/tagger_data_hmm.cc b/apertium/tagger_data_hmm.cc
index 0b60776..759ac73 100644
--- a/apertium/tagger_data_hmm.cc
+++ b/apertium/tagger_data_hmm.cc
@@ -18,7 +18,7 @@
 #include <apertium/tagger_data_hmm.h>
 #include <lttoolbox/compression.h>
 #include <apertium/endian_double_util.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
 using namespace Apertium;
 
@@ -191,13 +191,13 @@ TaggerDataHMM::read(FILE *in)
   // array_tags
   for(int i = Compression::multibyte_read(in); i != 0; i--)
   {
-    array_tags.push_back(Compression::wstring_read(in));
+    array_tags.push_back(Compression::string_read(in));
   }
 
   // tag_index
   for(int i = Compression::multibyte_read(in); i != 0; i--)
   {
-    wstring tmp = Compression::wstring_read(in);
+    UString tmp = Compression::string_read(in);
     tag_index[tmp] = Compression::multibyte_read(in);
   }
 
@@ -216,7 +216,7 @@ TaggerDataHMM::read(FILE *in)
   // prefer_rules
   for(int i = Compression::multibyte_read(in); i != 0; i--)
   {
-    prefer_rules.push_back(Compression::wstring_read(in));
+    prefer_rules.push_back(Compression::string_read(in));
   }
 
   // constants
@@ -280,7 +280,7 @@ TaggerDataHMM::read(FILE *in)
 
   for(unsigned int i = 0; i < limit; i++)
   {
-    discard.push_back(Compression::wstring_read(in));
+    discard.push_back(Compression::string_read(in));
   }
 }
 
@@ -310,16 +310,14 @@ TaggerDataHMM::write(FILE *out)
   Compression::multibyte_write(array_tags.size(), out);
   for(unsigned int i = 0, limit = array_tags.size(); i != limit; i++)
   {
-    Compression::wstring_write(array_tags[i], out);
+    Compression::string_write(array_tags[i], out);
   }
 
   // tag_index
   Compression::multibyte_write(tag_index.size(), out);
-  for(map<wstring, int, Ltstr>::iterator it = tag_index.begin(), limit = tag_index.end();
-      it != limit; it++)
-  {
-    Compression::wstring_write(it->first, out);
-    Compression::multibyte_write(it->second, out);
+  for (auto& it : tag_index) {
+    Compression::string_write(it.first, out);
+    Compression::multibyte_write(it.second, out);
   }
 
   // enforce_rules
@@ -338,7 +336,7 @@ TaggerDataHMM::write(FILE *out)
   Compression::multibyte_write(prefer_rules.size(), out);
   for(unsigned int i = 0, limit = prefer_rules.size(); i != limit; i++)
   {
-    Compression::wstring_write(prefer_rules[i], out);
+    Compression::string_write(prefer_rules[i], out);
   }
 
   // constants
@@ -396,7 +394,7 @@ TaggerDataHMM::write(FILE *out)
     Compression::multibyte_write(discard.size(), out);
     for(unsigned int i = 0, limit = discard.size(); i != limit; i++)
     {
-      Compression::wstring_write(discard[i], out);
+      Compression::string_write(discard[i], out);
     }
   }
 }
diff --git a/apertium/tagger_data_lsw.cc b/apertium/tagger_data_lsw.cc
index 3a79aca..d8a521f 100644
--- a/apertium/tagger_data_lsw.cc
+++ b/apertium/tagger_data_lsw.cc
@@ -18,7 +18,7 @@
 #include <apertium/tagger_data_lsw.h>
 #include <lttoolbox/compression.h>
 #include <apertium/endian_double_util.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
 using namespace Apertium;
 
@@ -136,13 +136,13 @@ TaggerDataLSW::read(FILE *in)
   // array_tags
   for(int i = Compression::multibyte_read(in); i != 0; i--)
   {
-    array_tags.push_back(Compression::wstring_read(in));
+    array_tags.push_back(Compression::string_read(in));
   }
 
   // tag_index
   for(int i = Compression::multibyte_read(in); i != 0; i--)
   {
-    wstring tmp = Compression::wstring_read(in);
+    UString tmp = Compression::string_read(in);
     tag_index[tmp] = Compression::multibyte_read(in);
   }
 
@@ -161,7 +161,7 @@ TaggerDataLSW::read(FILE *in)
   // prefer_rules
   for(int i = Compression::multibyte_read(in); i != 0; i--)
   {
-    prefer_rules.push_back(Compression::wstring_read(in));
+    prefer_rules.push_back(Compression::string_read(in));
   }
 
   // constants
@@ -212,7 +212,7 @@ TaggerDataLSW::read(FILE *in)
 
   for(unsigned int i = 0; i < limit; i++)
   {
-    discard.push_back(Compression::wstring_read(in));
+    discard.push_back(Compression::string_read(in));
   }
 }
 
@@ -242,16 +242,14 @@ TaggerDataLSW::write(FILE *out)
   Compression::multibyte_write(array_tags.size(), out);
   for(unsigned int i = 0, limit = array_tags.size(); i != limit; i++)
   {
-    Compression::wstring_write(array_tags[i], out);
+    Compression::string_write(array_tags[i], out);
   }
 
   // tag_index
   Compression::multibyte_write(tag_index.size(), out);
-  for(map<wstring, int, Ltstr>::iterator it = tag_index.begin(), limit = tag_index.end();
-      it != limit; it++)
-  {
-    Compression::wstring_write(it->first, out);
-    Compression::multibyte_write(it->second, out);
+  for(auto& it : tag_index) {
+    Compression::string_write(it.first, out);
+    Compression::multibyte_write(it.second, out);
   }
 
   // enforce_rules
@@ -270,7 +268,7 @@ TaggerDataLSW::write(FILE *out)
   Compression::multibyte_write(prefer_rules.size(), out);
   for(unsigned int i = 0, limit = prefer_rules.size(); i != limit; i++)
   {
-    Compression::wstring_write(prefer_rules[i], out);
+    Compression::string_write(prefer_rules[i], out);
   }
 
   // constants
@@ -317,7 +315,7 @@ TaggerDataLSW::write(FILE *out)
     Compression::multibyte_write(discard.size(), out);
     for(unsigned int i = 0, limit = discard.size(); i != limit; i++)
     {
-      Compression::wstring_write(discard[i], out);
+      Compression::string_write(discard[i], out);
     }
   }
 }
diff --git a/apertium/tagger_data_percep_coarse_tags.cc b/apertium/tagger_data_percep_coarse_tags.cc
index aa3080d..89ce084 100644
--- a/apertium/tagger_data_percep_coarse_tags.cc
+++ b/apertium/tagger_data_percep_coarse_tags.cc
@@ -27,8 +27,8 @@ TaggerDataPercepCoarseTags::~TaggerDataPercepCoarseTags() {}
 void TaggerDataPercepCoarseTags::serialise(std::ostream &serialised) const
 {
   Serialiser<set<TTag> >::serialise(open_class, serialised);
-  Serialiser<vector<wstring> >::serialise(array_tags, serialised);
-  Serialiser<map<wstring, TTag, Ltstr> >::serialise(tag_index, serialised);
+  Serialiser<vector<UString> >::serialise(array_tags, serialised);
+  Serialiser<map<UString, TTag> >::serialise(tag_index, serialised);
   constants.serialise(serialised);
   output.serialise(serialised);
   plist.serialise(serialised);
@@ -37,14 +37,14 @@ void TaggerDataPercepCoarseTags::serialise(std::ostream &serialised) const
 void TaggerDataPercepCoarseTags::deserialise(std::istream &serialised)
 {
   open_class = Deserialiser<set<TTag> >::deserialise(serialised);
-  array_tags = Deserialiser<vector<wstring> >::deserialise(serialised);
-  tag_index = Deserialiser<map<wstring, TTag, Ltstr> >::deserialise(serialised);
+  array_tags = Deserialiser<vector<UString> >::deserialise(serialised);
+  tag_index = Deserialiser<map<UString, TTag> >::deserialise(serialised);
   constants.deserialise(serialised);
   output.deserialise(serialised);
   plist.deserialise(serialised);
 }
 
-const wstring& TaggerDataPercepCoarseTags::coarsen(const Apertium::Morpheme &wrd) const
+const UString& TaggerDataPercepCoarseTags::coarsen(const Apertium::Morpheme &wrd) const
 {
   // Init fine -> coarse tags matching machinary
   MatchState ms;
@@ -52,16 +52,20 @@ const wstring& TaggerDataPercepCoarseTags::coarsen(const Apertium::Morpheme &wrd
   const Alphabet alphabet = plist.getAlphabet();
   int ca_any_char = alphabet(PatternList::ANY_CHAR);
   int ca_any_tag = alphabet(PatternList::ANY_TAG);
-  map<wstring, int, Ltstr>::const_iterator undef_it = tag_index.find(L"TAG_kUNDEF");
+  map<UString, int>::const_iterator undef_it = tag_index.find("TAG_kUNDEF"_u);
   int ca_tag_kundef = undef_it->second;
   // Input lemma
   ms.init(me->getInitial());
   for (size_t i = 0; i < wrd.TheLemma.size(); i++) {
-    ms.step(std::towlower(wrd.TheLemma[i]), ca_any_char);
+    ms.step(u_tolower(wrd.TheLemma[i]), ca_any_char);
   }
   // Input fine tags
   for (size_t i = 0; i < wrd.TheTags.size(); i++) {
-    int symbol = alphabet(L"<" + wrd.TheTags[i].TheTag + L">");
+    UString tag;
+    tag += '<';
+    tag.append(wrd.TheTags[i].TheTag);
+    tag += '>';
+    int symbol = alphabet(tag);
     if (symbol) {
       ms.step(symbol, ca_any_tag);
     }
diff --git a/apertium/tagger_data_percep_coarse_tags.h b/apertium/tagger_data_percep_coarse_tags.h
index 08317bf..6a44b0a 100644
--- a/apertium/tagger_data_percep_coarse_tags.h
+++ b/apertium/tagger_data_percep_coarse_tags.h
@@ -14,7 +14,7 @@ public:
   virtual ~TaggerDataPercepCoarseTags();
   void serialise(std::ostream &serialised) const;
   void deserialise(std::istream &serialised);
-  const wstring& coarsen(const Apertium::Morpheme &wrd) const;
+  const UString& coarsen(const Apertium::Morpheme &wrd) const;
 };
 
 #endif
diff --git a/apertium/tagger_utils.cc b/apertium/tagger_utils.cc
index 9892866..3da0415 100644
--- a/apertium/tagger_utils.cc
+++ b/apertium/tagger_utils.cc
@@ -22,30 +22,16 @@
 #include <sstream>
 #include <algorithm>
 #include <climits>
-#include <apertium/string_utils.h>
-#ifdef _MSC_VER
-#define wcstok wcstok_s
-#endif
-#ifdef __MINGW32__
-
-wchar_t *_wcstok(wchar_t *wcs, const wchar_t *delim, wchar_t **ptr) {
-  (void)ptr;
-  return wcstok(wcs, delim);
-}
-
-#define wcstok _wcstok
-#endif
+#include <lttoolbox/string_utils.h>
 
-using namespace Apertium;
 
-
-void tagger_utils::fatal_error (wstring const &s) {
-  wcerr<<L"Error: "<<s<<L"\n";
+void tagger_utils::fatal_error (UString const &s) {
+  cerr<<"Error: "<<s<<"\n";
   exit(1);
 }
 
 void tagger_utils::file_name_error (string const &s) {
-  wcerr << "Error: " << s << endl;
+  cerr << "Error: " << s << endl;
   exit(1);
 }
 
@@ -65,67 +51,51 @@ void tagger_utils::clear_array_vector(vector<TTag> v[], int l) {
     v[i].clear();
 }
 
-int tagger_utils::ntokens_multiword(wstring const &s)
+int tagger_utils::ntokens_multiword(UString const &s)
 {
-   wchar_t *news = new wchar_t[s.size()+1];
-   wcscpy(news, s.c_str());
-   news[s.size()] = 0;
-   wcerr << news << endl;
-
-   wchar_t const *delim = L"_";
-   wchar_t *ptr;
-   int n=0;
-
-   if (wcstok(news, delim, &ptr))
-     n++;
-   while (wcstok(NULL, delim, &ptr))
-     n++;
-
-   delete[] news;
-
-   return n;
+  vector<UString> tmp = StringUtils::split(s, "_"_u);
+  int n = 0;
+  for (auto& it : tmp) {
+    if (!it.empty()) {
+      n++;
+    }
+  }
+  return n;
 }
 
-int tagger_utils::nguiones_fs(wstring const & s) {
-   wchar_t *news = new wchar_t[s.size()+1];
-   wcscpy(news, s.c_str());
-   news[s.size()] = 0;
-   wcerr << news << endl;
-   wchar_t const *delim = L"-";
-   wchar_t *ptr;
-   int n=0;
-
-   if (wcstok(news, delim, &ptr))
-     n++;
-   while (wcstok(NULL, delim, &ptr))
-     n++;
-
-   delete[] news;
-
-   return n;
+int tagger_utils::nguiones_fs(UString const & s) {
+  vector<UString> tmp = StringUtils::split(s, "-"_u);
+  int n = 0;
+  for (auto& it : tmp) {
+    if (!it.empty()) {
+      n++;
+    }
+  }
+  return n;
 }
 
-wstring tagger_utils::trim(wstring s)
+UString tagger_utils::trim(UString s)
 {
-  if (s.length()==0)
-    return L"";
+  if (s.empty()) {
+    return ""_u;
+  }
 
   for (unsigned int i=0; i<(s.length()-1); i++) {
-    if ((s.at(i)==L' ')&&(s.at(i+1)==L' ')) {
+    if ((s.at(i)==' ')&&(s.at(i+1)==' ')) {
       s.erase(i,1);
       i--;
     }
   }
 
-  if ((s.length()>0)&&(s.at(s.length()-1)==L' '))
+  if ((s.length()>0)&&(s.at(s.length()-1)==' '))
     s.erase(s.length()-1,1);
-  if ((s.length()>0)&&(s.at(0)==L' '))
+  if ((s.length()>0)&&(s.at(0)==' '))
     s.erase(0,1);
 
   return s;
 }
 
-void tagger_utils::scan_for_ambg_classes(FILE *fdic, TaggerData &td) {
+void tagger_utils::scan_for_ambg_classes(const char* fdic, TaggerData &td) {
   Collection &output = td.getOutput();
   FileMorphoStream morpho_stream(fdic, true, &td);
   tagger_utils::scan_for_ambg_classes(output, morpho_stream);
@@ -142,7 +112,7 @@ void tagger_utils::scan_for_ambg_classes(Collection &output, MorphoStream &morph
 
   while (word) {
     if (++nw % 10000 == 0)
-      wcerr << L'.' << flush;
+      cerr << '.' << flush;
 
     tags = word->get_tags();
 
@@ -152,7 +122,7 @@ void tagger_utils::scan_for_ambg_classes(Collection &output, MorphoStream &morph
     delete word;
     word = morpho_stream.get_next_word();
   }
-  wcerr << L"\n";
+  cerr << "\n";
 }
 
 void
@@ -179,7 +149,6 @@ set<TTag> &
 tagger_utils::find_similar_ambiguity_class(TaggerData &td, set<TTag> &c) {
   set<TTag> &ret = td.getOpenClass();
   Collection &output = td.getOutput();
-  int ret_idx = output[ret];
 
   for (int k=0; k<output.size(); k++) {
     const set<TTag> &ambg_class = output[k];
@@ -188,7 +157,6 @@ tagger_utils::find_similar_ambiguity_class(TaggerData &td, set<TTag> &c) {
       continue;
     }
     if (includes(ambg_class.begin(), ambg_class.end(), c.begin(), c.end())) {
-      ret_idx = k;
       ret = ambg_class;
     }
   }
@@ -198,27 +166,30 @@ tagger_utils::find_similar_ambiguity_class(TaggerData &td, set<TTag> &c) {
 void
 tagger_utils::require_ambiguity_class(TaggerData &td, set<TTag> &tags, TaggerWord &word, int nw) {
   if (td.getOutput().has_not(tags)) {
-    wstring errors;
-    errors = L"A new ambiguity class was found. I cannot continue.\n";
-    errors+= L"Word '" + word.get_superficial_form() + L"' not found in the dictionary.\n";
-    errors+= L"New ambiguity class: " + word.get_string_tags() + L"\n";
+    UString errors;
+    errors = "A new ambiguity class was found. I cannot continue.\nWord '"_u;
+    errors += word.get_superficial_form();
+    errors += "' not found in the dictionary.\n"_u;
+    errors += "New ambiguity class: "_u;
+    errors += word.get_string_tags();
+    errors += '\n';
     if (nw >= 0) {
-      std::wostringstream ws;
+      std::ostringstream ws;
       ws << (nw + 1);
-      errors+= L"Line number: " + ws.str() + L"\n";
+      errors += "Line number: "_u;
+      errors += to_ustring(ws.str().c_str());
+      errors += '\n';
     }
-    errors+= L"Take a look at the dictionary, then retrain.";
+    errors += "Take a look at the dictionary, then retrain."_u;
     fatal_error(errors);
   }
 }
 
 static void _warn_absent_ambiguity_class(TaggerWord &word) {
-  wstring errors;
-  errors = L"A new ambiguity class was found. \n";
-  errors += L"Retraining the tagger is necessary so as to take it into account.\n";
-  errors += L"Word '" + word.get_superficial_form() + L"'.\n";
-  errors += L"New ambiguity class: " + word.get_string_tags() + L"\n";
-  wcerr << L"Error: " << errors;
+  cerr << "Error: A new ambiguity class was found. \n";
+  cerr << "Retraining the tagger is necessary so as to take it into account.\n";
+  cerr << "Word '" << word.get_superficial_form() << "'.\n";
+  cerr << "New ambiguity class: " << word.get_string_tags() << "\n";
 }
 
 set<TTag> &
@@ -265,7 +236,7 @@ istream& operator>> (istream& is, map <int, T> & f) {
     is>>i;     // warning: does not work if both
     is>>f[i];  // lines merged in a single one
   }
-  if (is.bad()) tagger_utils::fatal_error(L"reading map");
+  if (is.bad()) tagger_utils::fatal_error("reading map"_u);
   return is;
 }
 
@@ -280,4 +251,3 @@ ostream& operator<< (ostream& os, const set<T>& s) {
   os<<'}';
   return os;
 }
-
diff --git a/apertium/tagger_utils.h b/apertium/tagger_utils.h
index f895735..31b6458 100644
--- a/apertium/tagger_utils.h
+++ b/apertium/tagger_utils.h
@@ -36,7 +36,7 @@ namespace tagger_utils
 /** Print a fatal error message
  *  @param s the error message to print
  */
-void fatal_error (wstring const &s);
+void fatal_error (UString const &s);
 
 /** Print a fatal error message related to a file
  *  @param s the file name to be printted in the error message
@@ -63,18 +63,18 @@ void clear_array_vector(vector<TTag> v[], int l);
 
 /** Return the number of tokens in the multiword unit
  */
- int ntokens_multiword(wstring const &s);
+ int ntokens_multiword(UString const &s);
 
 /** Devuelve el nº de guiones que contiene la cadena pasada como argumento
   */
-int nguiones_fs(wstring const &cadena);
+int nguiones_fs(UString const &cadena);
 
 /** Reads the expanded dictionary received as a parameter puts the resulting
  *  ambiguity classes that the tagger will manage.
  *  @param fdic the input stream with the expanded dictionary to read
  *  @param td the tagger data instance to mutate
  */
-void scan_for_ambg_classes(FILE *fdic, TaggerData &td);
+void scan_for_ambg_classes(const char* fdic, TaggerData &td);
 void scan_for_ambg_classes(Collection &output, MorphoStream &morpho_stream);
 
 void add_neccesary_ambg_classes(TaggerData &td);
@@ -105,7 +105,7 @@ set<TTag> & require_similar_ambiguity_class(TaggerData &td, set<TTag> &tags);
 /** Just prints a warning if warn */
 void warn_absent_ambiguity_class(TaggerData &td, set<TTag> &tags, TaggerWord &word, bool warn);
 
-wstring trim(wstring s);
+UString trim(UString s);
 
 };
 
diff --git a/apertium/tagger_word.cc b/apertium/tagger_word.cc
index e7982ec..ff44de4 100644
--- a/apertium/tagger_word.cc
+++ b/apertium/tagger_word.cc
@@ -15,25 +15,21 @@
  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  */
 #include <apertium/tagger_word.h>
-#include <apertium/utf_converter.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include "apertium_config.h"
 #include <apertium/unlocked_cstdio.h>
 
-using namespace Apertium;
-
 bool TaggerWord::generate_marks=false;
 
-vector<wstring> TaggerWord::array_tags;
+vector<UString> TaggerWord::array_tags;
 
 bool TaggerWord::show_ignored_string=true;
 
-map<wstring, ApertiumRE, Ltstr> TaggerWord::patterns;
+map<UString, ApertiumRE> TaggerWord::patterns;
 
 TaggerWord::TaggerWord(bool prev_plus_cut) :
 show_sf(false)
 {
-   ignored_string = L"";
    plus_cut=false;
    previous_plus_cut=prev_plus_cut;
 }
@@ -62,46 +58,45 @@ TaggerWord::get_show_sf(){
 }
 
 void
-TaggerWord::set_superficial_form(const wstring &sf){
+TaggerWord::set_superficial_form(const UString &sf){
   superficial_form = sf;
 }
 
-wstring&
+UString&
 TaggerWord::get_superficial_form() {
   return superficial_form;
 }
 
 bool
-TaggerWord::match(wstring const &s, wstring const &pattern)
+TaggerWord::match(UString const &s, UString const &pattern)
 {
-  map<wstring, ApertiumRE, Ltstr>::iterator it = patterns.find(pattern);
-  string const utfs = UtfConverter::toUtf8(s);
+  map<UString, ApertiumRE>::iterator it = patterns.find(pattern);
 
   if(it == patterns.end())
   {
-    string utfpattern = UtfConverter::toUtf8(pattern);
-    string regexp = "";
+    UString utfpattern = pattern;
+    UString regexp;
 
     while(true)
     {
-      size_t pos = utfpattern.find("<*>");
-      if(pos == string::npos)
+      size_t pos = utfpattern.find("<*>"_u);
+      if(pos == UString::npos)
       {
         break;
       }
-      utfpattern.replace(pos, 3, "(<[^>]+>)+");
+      utfpattern.replace(pos, 3, "(<[^>]+>)+"_u);
     }
     patterns[pattern].compile(utfpattern);
-    return patterns[pattern].match(utfs) != "";
+    return !patterns[pattern].match(s).empty();
   }
   else
   {
-    return it->second.match(utfs) != "";
+    return !it->second.match(s).empty();
   }
 }
 
 void
-TaggerWord::add_tag(TTag &t, const wstring &lf, vector<wstring> const &prefer_rules){
+TaggerWord::add_tag(TTag &t, const UString &lf, vector<UString> const &prefer_rules){
 
   //Tag is added only is it is not present yet
   //Sometime one word can have more than one lexical form assigned to the same tag
@@ -132,25 +127,25 @@ TaggerWord::isAmbiguous() const
   return tags.size() > 1;
 }
 
-wstring
+UString
 TaggerWord::get_string_tags() {
-  wstring st;
+  UString st;
   set<TTag>::iterator itag = tags.begin();
 
-  st=L"{";
+  st += '{';
   for(itag=tags.begin(); itag!=tags.end(); itag++) {
     if (itag!=tags.begin())
-      st+=L',';
+      st+=',';
     st+=array_tags[*itag];
   }
-  st += L'}';
+  st += '}';
 
   return st;
 }
 
-wstring
+UString
 TaggerWord::get_lexical_form(TTag &t, int const TAG_kEOF) {
-  wstring ret= L"";
+  UString ret;
 
   if (show_ignored_string)
     ret.append(ignored_string);
@@ -158,30 +153,27 @@ TaggerWord::get_lexical_form(TTag &t, int const TAG_kEOF) {
   if(t==TAG_kEOF)
     return ret;
 
-  if (!previous_plus_cut){
-    if(TaggerWord::generate_marks && isAmbiguous())
-    {
-      ret.append(L"^=");
-    }
-    else
-    {
-      ret += L'^';
+  if (!previous_plus_cut) {
+    if(TaggerWord::generate_marks && isAmbiguous()) {
+      ret.append("^="_u);
+    } else {
+      ret += '^';
     }
 
-    if(get_show_sf()){   // append the superficial form
+    if(get_show_sf()) {   // append the superficial form
       ret.append(superficial_form);
-      ret+=L'/';
+      ret += '/';
     }
   }
 
   if (lexical_forms.size()==0) { // This is an UNKNOWN WORD
-    ret +=L'*';
+    ret += '*';
     ret.append(superficial_form);
-  } else if ((*lexical_forms.begin()).second[0]==L'*') { //This is an
+  } else if ((*lexical_forms.begin()).second[0]=='*') { //This is an
 							//unknown word
 							//that has
 							//been guessed
-    ret += L'*';
+    ret += '*';
     ret.append(superficial_form);
   } else if (lexical_forms.size()>1) {  //This is an ambiguous word
     ret.append(lexical_forms[t]);
@@ -191,9 +183,9 @@ TaggerWord::get_lexical_form(TTag &t, int const TAG_kEOF) {
 
   if (ret != ignored_string) {
     if (plus_cut)
-      ret+=L'+';
+      ret += '+';
     else {
-      ret += L'$';
+      ret += '$';
     }
   }
 
@@ -207,52 +199,49 @@ TaggerWord::get_lexical_form(TTag &t, int const TAG_kEOF) {
   return ret;
 }
 
-wstring
+UString
 TaggerWord::get_all_chosen_tag_first(TTag &t, int const TAG_kEOF) {
-  wstring ret=L"";
+  UString ret;
 
-  if (show_ignored_string)
+  if (show_ignored_string) {
     ret.append(ignored_string);
+  }
 
-  if(t==TAG_kEOF)
+  if(t==TAG_kEOF) {
     return ret;
+  }
 
-  if (!previous_plus_cut)
-  {
-    if(TaggerWord::generate_marks && isAmbiguous())
-    {
-      ret.append(L"^=");
-    }
-    else
-    {
-      ret += L'^';
+  if (!previous_plus_cut) {
+    if(TaggerWord::generate_marks && isAmbiguous()) {
+      ret.append("^="_u);
+    } else {
+      ret += '^';
     }
   }
 
   ret.append(superficial_form);
 
   if (lexical_forms.size()==0) { // This is an UNKNOWN WORD
-    ret+=L"/*";
+    ret += "/*"_u;
     ret.append(superficial_form);
   } else {
-    ret+=L"/";
+    ret+="/"_u;
     ret.append(lexical_forms[t]);
     if (lexical_forms.size()>1) {
-      set<TTag>::iterator it;
-      for (it=tags.begin(); it!=tags.end(); it++) {
-	if (*it != t) {
-	  ret+=L"/";
-          ret.append(lexical_forms[*it]);
-	}
+      for (auto& it : tags) {
+        if (it != t) {
+          ret += '/';
+          ret.append(lexical_forms[it]);
+        }
       }
     }
   }
 
   if (ret != ignored_string) {
-    if (plus_cut)
-      ret+=L"+";
-    else {
-      ret+=L"$";
+    if (plus_cut) {
+      ret += '+';
+    } else {
+      ret += '$';
     }
   }
 
@@ -260,29 +249,30 @@ TaggerWord::get_all_chosen_tag_first(TTag &t, int const TAG_kEOF) {
 }
 
 //OBSOLETE
-wstring
+UString
 TaggerWord::get_lexical_form_without_ignored_string(TTag &t, int const TAG_kEOF) {
-  wstring ret;
+  UString ret;
 
-  if(t==TAG_kEOF)
+  if(t==TAG_kEOF) {
      return ret;
+  }
 
   if (lexical_forms.size()==0) { //This is an unknown word
-      ret.append(L"*^");
-      ret.append(superficial_form);
+    ret.append("*^"_u);
+    ret.append(superficial_form);
   } else if ((*lexical_forms.begin()).second[0]=='*') {  //This is an unknown word that has been guessed
-    ret.append(L"*^");
+    ret.append("*^"_u);
     ret.append(superficial_form);
   } else {
-    ret += L'^';
+    ret += '^';
     ret.append(lexical_forms[t]);
   }
 
   if (ret.length() != 0) {
     if (plus_cut)
-      ret+=L'+';
+      ret += '+';
     else {
-      ret +=L'$';
+      ret += '$';
     }
   }
 
@@ -290,7 +280,7 @@ TaggerWord::get_lexical_form_without_ignored_string(TTag &t, int const TAG_kEOF)
 }
 
 void
-TaggerWord::add_ignored_string(wstring const &s) {
+TaggerWord::add_ignored_string(UString const &s) {
   ignored_string.append(s);
 }
 
@@ -304,14 +294,14 @@ TaggerWord::get_plus_cut() {
   return plus_cut;
 }
 
-wostream&
-operator<< (wostream& os, TaggerWord &w) {
-  os<<w.get_string_tags()<< L" \t Word: " << w.get_superficial_form();
+ostream&
+operator<< (ostream& os, TaggerWord &w) {
+  os<<w.get_string_tags()<< " \t Word: " << w.get_superficial_form();
   return os;
 }
 
 void
-TaggerWord::setArrayTags(vector<wstring> const &at)
+TaggerWord::setArrayTags(vector<UString> const &at)
 {
   array_tags = at;
 }
@@ -319,42 +309,37 @@ TaggerWord::setArrayTags(vector<wstring> const &at)
 void
 TaggerWord::print()
 {
-  wcout << L"[#" << superficial_form << L"# ";
+  cout << "[#" << superficial_form << "# ";
   for(set<TTag>::iterator it=tags.begin(), limit = tags.end(); it != limit; it++)
   {
-    wcout << L"(" << *it << L" " << lexical_forms[*it] << L") ";
+    cout << "(" << *it << " " << lexical_forms[*it] << ") ";
   }
-  wcout << L"\b]\n";
+  cout << "\b]\n";
 }
 
 void
-TaggerWord::outputOriginal(FILE *output) {
+TaggerWord::outputOriginal(UFILE *output) {
 
-  wstring s=superficial_form;
+  UString s=superficial_form;
 
-  map<TTag, wstring>::iterator it;
-  for(it=lexical_forms.begin(); it!=lexical_forms.end(); it++) {
-    if (it->second.length()>0)
-    {
-      s+=L'/';
-      s.append(it->second);
+  for (auto& it : lexical_forms) {
+    if (!it.second.empty()) {
+      s += '/';
+      s.append(it.second);
     }
   }
 
-  if (s.length()>0)
-  {
-    s=L"^"+s+L"$\n";
+  if (!s.empty()) {
+    u_fprintf(output, "^%S$\n", s.c_str());
   }
-
-  fputws_unlocked(s.c_str(), output);
 }
 
 void
-TaggerWord::discardOnAmbiguity(wstring const &tags)
+TaggerWord::discardOnAmbiguity(UString const &tags)
 {
   if(isAmbiguous())
   {
-    map<TTag, wstring>::iterator it = lexical_forms.begin(),
+    map<TTag, UString>::iterator it = lexical_forms.begin(),
                               limit = lexical_forms.end();
     set<TTag> newsettag;
     while(it != limit)
diff --git a/apertium/tagger_word.h b/apertium/tagger_word.h
index 601481e..560500a 100644
--- a/apertium/tagger_word.h
+++ b/apertium/tagger_word.h
@@ -23,7 +23,6 @@
 #include <string>
 #include <vector>
 
-#include <lttoolbox/ltstr.h>
 #include <apertium/ttag.h>
 #include <apertium/apertium_re.h>
 
@@ -36,12 +35,12 @@ using namespace std;
  */
 class TaggerWord{
 private:
-  wstring superficial_form;
+  UString superficial_form;
 
   set<TTag> tags;  //Set of all possible tags
-  map<TTag, wstring> lexical_forms;  //For a given coarse tag it stores the fine tag
+  map<TTag, UString> lexical_forms;  //For a given coarse tag it stores the fine tag
                                     //delevered by the morphological analyzer
-  wstring ignored_string;
+  UString ignored_string;
 
   bool plus_cut; //Flag to distinguish the way in which the word was ended.
                   //If it was done by '$' its value should be false
@@ -50,12 +49,12 @@ private:
 			  //previous word was ended. It has the same
 			  //plus_cut meaning
   bool show_sf; // Show the superficial form in the output
-  static map<wstring, ApertiumRE, Ltstr> patterns;
+  static map<UString, ApertiumRE> patterns;
 
-  bool match(wstring const &s, wstring const &pattern);
+  bool match(UString const &s, UString const &pattern);
 public:
   static bool generate_marks;
-  static vector<wstring> array_tags;
+  static vector<UString> array_tags;
 
   static bool show_ignored_string;
 
@@ -77,47 +76,47 @@ public:
    /** Set the superficial form of the word.
     *  @param s the superficial form
     */
-   void set_superficial_form(const wstring &s);
+   void set_superficial_form(const UString &s);
 
    /** Get the superficial form of the word
     *
     */
-   wstring& get_superficial_form();
+   UString& get_superficial_form();
 
    /** Add a new tag to the set of all possible tags of the word.
     *  @param t the coarse tag
     *  @param lf the lexical form (fine tag)
     */
-   virtual void add_tag(TTag &t, const wstring &lf, vector<wstring> const &prefer_rules);
+   virtual void add_tag(TTag &t, const UString &lf, vector<UString> const &prefer_rules);
 
    /** Get the set of tags of this word.
     *  @return  set of tags.
     */
    virtual set<TTag>& get_tags();
 
-   /** Get a wstring with the set of tags
+   /** Get a UString with the set of tags
     */
-   virtual wstring get_string_tags();
+   virtual UString get_string_tags();
 
   /** Get the lexical form (fine tag) for a given tag (coarse one)
    *  @param  t the tag
    *  @return the lexical form of tag t
    */
-  virtual wstring get_lexical_form(TTag &t, int const TAG_kEOF);
+  virtual UString get_lexical_form(TTag &t, int const TAG_kEOF);
 
-  wstring get_all_chosen_tag_first(TTag &t, int const TAG_kEOF);
+  UString get_all_chosen_tag_first(TTag &t, int const TAG_kEOF);
 
   /** Get the lexical form (fine tag) for a given tag (coarse one)
    *  @param  t the tag
    *  @return the lexical form of tag t without other text that
    *          is ignored.
    */
-  wstring get_lexical_form_without_ignored_string(TTag &t, int const TAG_kEOF);
+  UString get_lexical_form_without_ignored_string(TTag &t, int const TAG_kEOF);
 
   /** Add text to the ignored string
    *
    */
-  void add_ignored_string(wstring const &s);
+  void add_ignored_string(UString const &s);
 
   /** Set the flag plus_cut to a certain value. If this flag is set to true means
    *  that there were a '+' between this word and the next one
@@ -135,18 +134,18 @@ public:
 
   /** Output operator
    */
-  friend wostream& operator<< (wostream& os, TaggerWord &w);
+  friend ostream& operator<< (ostream& os, TaggerWord &w);
 
-  static void setArrayTags(vector<wstring> const &at);
+  static void setArrayTags(vector<UString> const &at);
 
   void print();
 
-  void outputOriginal(FILE *output);
+  void outputOriginal(UFILE *output);
 
   bool isAmbiguous() const;  // CAUTION: unknown words are not considered to
                              // be ambiguous by this method
 
-  void discardOnAmbiguity(wstring const &tags);
+  void discardOnAmbiguity(UString const &tags);
 };
 
 #endif
diff --git a/apertium/tmx_aligner_tool.cc b/apertium/tmx_aligner_tool.cc
index c595b6e..9dc7e7b 100644
--- a/apertium/tmx_aligner_tool.cc
+++ b/apertium/tmx_aligner_tool.cc
@@ -10,7 +10,7 @@
 *                                                                        *
 *************************************************************************/
 #include <apertium/tmx_aligner_tool.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
 namespace TMXAligner
 {
@@ -28,7 +28,7 @@ void readTrailOrBisentenceList( std::istream& is, Trail& trail )
     is >> huPos;
     if (is.peek()!=' ')
     {
-      std::wcerr << "no space in line" << std::endl;
+      std::cerr << "no space in line" << std::endl;
       throw "data error";
     }
     is.ignore();
@@ -36,7 +36,7 @@ void readTrailOrBisentenceList( std::istream& is, Trail& trail )
     is >> enPos;
     if (is.peek()!='\n')
     {
-      std::wcerr << "too much data in line" << std::endl;
+      std::cerr << "too much data in line" << std::endl;
       throw "data error";
     }
     is.ignore();
@@ -99,7 +99,7 @@ void collectBisentences( const Trail& bestTrail, const AlignMatrix& dynMatrix,
     enBisentences.push_back( enSentenceListPretty[ bisentenceList[i].second ] );
   }
 
-//  std::wcerr << huBisentences.size() << " bisentences collected." << std::endl;
+//  std::cerr << huBisentences.size() << " bisentences collected." << std::endl;
 
 }
 
@@ -152,11 +152,11 @@ double alignerToolWithObjects( const DictionaryItems& dictionary,
   setSentenceValues( enSentenceList,       enLength, alignParameters.utfCharCountingMode );
 
   bool quasiglobal_stopwordRemoval = false;
-//  std::wcerr << "quasiglobal_stopwordRemoval is set to " << quasiglobal_stopwordRemoval << std::endl;
+//  std::cerr << "quasiglobal_stopwordRemoval is set to " << quasiglobal_stopwordRemoval << std::endl;
   if (quasiglobal_stopwordRemoval)
   {
     removeStopwords( huSentenceListPretty, enSentenceList );
-//    std::wcerr << "Stopwords removed." << std::endl;
+//    std::cerr << "Stopwords removed." << std::endl;
   }
 
   SentenceList huSentenceListGarbled, enSentenceListGarbled;
@@ -186,9 +186,9 @@ double alignerToolWithObjects( const DictionaryItems& dictionary,
 
   if (thickness>maximalThickness)
   {
-//    std::wcerr << "WARNING: Downgrading planned thickness " << thickness << " to " << maximalThickness ;
-//    std::wcerr << " to obey memory constraint of " << quasiglobal_maximalSizeInMegabytes << " megabytes " << std::endl;
-//    std::wcerr << "You should recompile if you have much more physical RAM than that. People of the near-future, forgive me for the inconvenience." << std::endl;
+//    std::cerr << "WARNING: Downgrading planned thickness " << thickness << " to " << maximalThickness ;
+//    std::cerr << " to obey memory constraint of " << quasiglobal_maximalSizeInMegabytes << " megabytes " << std::endl;
+//    std::cerr << "You should recompile if you have much more physical RAM than that. People of the near-future, forgive me for the inconvenience." << std::endl;
 
     thickness = maximalThickness;
   }
@@ -196,20 +196,20 @@ double alignerToolWithObjects( const DictionaryItems& dictionary,
   AlignMatrix similarityMatrix( huBookSize, enBookSize, thickness, outsideOfRadiusValue );
 
   sentenceListsToAlignMatrixIdentity( huSentenceListGarbled, enSentenceListGarbled, similarityMatrix );
-//  std::wcerr << std::endl;
-//  std::wcerr << "Rough translation-based similarity matrix ready." << std::endl;
+//  std::cerr << std::endl;
+//  std::cerr << "Rough translation-based similarity matrix ready." << std::endl;
 
   Trail bestTrail;
   AlignMatrix dynMatrix( huBookSize+1, enBookSize+1, thickness, 1e30 );
 
   align( similarityMatrix, huLength, enLength, bestTrail, dynMatrix );
-//  std::wcerr << "Align ready." << std::endl;
+//  std::cerr << "Align ready." << std::endl;
 
   double globalQuality;
   globalQuality = globalScoreOfTrail( bestTrail, dynMatrix,
                                       huSentenceListGarbled, enSentenceListGarbled );
 
-  //  std::wcerr << "Global quality of unfiltered align " << globalQuality << std::endl;
+  //  std::cerr << "Global quality of unfiltered align " << globalQuality << std::endl;
 
   if (alignParameters.realignType==AlignParameters::NoRealign)
   {
@@ -222,11 +222,11 @@ double alignerToolWithObjects( const DictionaryItems& dictionary,
 
     if (!success)
     {
-//      std::wcerr << "Realign zone too close to quasidiagonal border. Abandoning realign. The align itself is suspicious." << std::endl;
+//      std::cerr << "Realign zone too close to quasidiagonal border. Abandoning realign. The align itself is suspicious." << std::endl;
     }
     else
     {
-//      std::wcerr << "Border of realign zone determined." << std::endl;
+//      std::cerr << "Border of realign zone determined." << std::endl;
 
       switch (alignParameters.realignType)
       {
@@ -237,24 +237,24 @@ double alignerToolWithObjects( const DictionaryItems& dictionary,
           SentenceList huBisentences,enBisentences;
 
           throw "unimplemented";
-//          std::wcerr << "Plausible bisentences filtered." << std::endl;
+//          std::cerr << "Plausible bisentences filtered." << std::endl;
 
           modelOne.build(huBisentences,enBisentences);
-//          std::wcerr << "IBM Model I ready." << std::endl;
+//          std::cerr << "IBM Model I ready." << std::endl;
 
           sentenceListsToAlignMatrixIBMModelOne( huSentenceListPretty, enSentenceList, modelOne, similarityMatrixDetailed );
-//          std::wcerr << "IBM Model I based similarity matrix ready." << std::endl;
+//          std::cerr << "IBM Model I based similarity matrix ready." << std::endl;
           break;
         }
       case AlignParameters::FineTranslationRealign:
         {
           TransLex transLex;
           transLex.build(dictionary);
-//          std::wcerr << "Hashtable for dictionary ready." << std::endl;
+//          std::cerr << "Hashtable for dictionary ready." << std::endl;
 
           sentenceListsToAlignMatrixTranslation( huSentenceListPretty, enSentenceList, transLex, similarityMatrixDetailed );
 
-//          std::wcerr << "Fine translation-based similarity matrix ready." << std::endl;
+//          std::cerr << "Fine translation-based similarity matrix ready." << std::endl;
           break;
         }
 
@@ -268,7 +268,7 @@ double alignerToolWithObjects( const DictionaryItems& dictionary,
       Trail bestTrailDetailed;
       AlignMatrix dynMatrixDetailed( huBookSize+1, enBookSize+1, thickness, 1e30 );
       align( similarityMatrixDetailed, huLength, enLength, bestTrailDetailed, dynMatrixDetailed );
-//      std::wcerr << "Detail realign ready." << std::endl;
+//      std::cerr << "Detail realign ready." << std::endl;
 
       bestTrail = bestTrailDetailed;
       dynMatrix = dynMatrixDetailed;
@@ -276,7 +276,7 @@ double alignerToolWithObjects( const DictionaryItems& dictionary,
       globalQuality = globalScoreOfTrail( bestTrail, dynMatrix,
                                           huSentenceListGarbled, enSentenceListGarbled );
 
-      //      std::wcerr << "Global quality of unfiltered align after realign " << globalQuality << std::endl;
+      //      std::cerr << "Global quality of unfiltered align after realign " << globalQuality << std::endl;
     }
   }
 
@@ -285,27 +285,27 @@ double alignerToolWithObjects( const DictionaryItems& dictionary,
   if ( alignParameters.postprocessTrailQualityThreshold != -1 )
   {
     postprocessTrail( bestTrail, trailScoresInterval, alignParameters.postprocessTrailQualityThreshold );
-//    std::wcerr << "Trail start and end postprocessed by score." << std::endl;
+//    std::cerr << "Trail start and end postprocessed by score." << std::endl;
   }
 
   if ( alignParameters.postprocessTrailStartAndEndQualityThreshold != -1 )
   {
     postprocessTrailStartAndEnd( bestTrail, trailScoresInterval, alignParameters.postprocessTrailStartAndEndQualityThreshold );
-//    std::wcerr << "Trail start and end postprocessed by score." << std::endl;
+//    std::cerr << "Trail start and end postprocessed by score." << std::endl;
   }
 
   if ( alignParameters.postprocessTrailByTopologyQualityThreshold != -1 )
   {
     postprocessTrailByTopology( bestTrail, alignParameters.postprocessTrailByTopologyQualityThreshold );
-//    std::wcerr << "Trail postprocessed by topology." << std::endl;
+//    std::cerr << "Trail postprocessed by topology." << std::endl;
   }
 
   bool quasiglobal_spaceOutBySentenceLength = true;
-//  std::wcerr << "quasiglobal_spaceOutBySentenceLength is set to " << quasiglobal_spaceOutBySentenceLength << std::endl;
+//  std::cerr << "quasiglobal_spaceOutBySentenceLength is set to " << quasiglobal_spaceOutBySentenceLength << std::endl;
   if (quasiglobal_spaceOutBySentenceLength)
   {
     spaceOutBySentenceLength( bestTrail, huSentenceListPretty, enSentenceList, alignParameters.utfCharCountingMode );
-//    std::wcerr << "Trail spaced out by sentence length." << std::endl;
+//    std::cerr << "Trail spaced out by sentence length." << std::endl;
   }
 
   // In cautious mode, auto-aligned rundles are thrown away if
@@ -313,13 +313,13 @@ double alignerToolWithObjects( const DictionaryItems& dictionary,
   if (alignParameters.cautiousMode)
   {
     cautiouslyFilterTrail( bestTrail );
-//    std::wcerr << "Trail filtered by topology." << std::endl;
+//    std::cerr << "Trail filtered by topology." << std::endl;
   }
 
   globalQuality = globalScoreOfTrail( bestTrail, dynMatrix,
                                     huSentenceListGarbled, enSentenceListGarbled );
 
-  //  std::wcerr << "Global quality of unfiltered align after realign " << globalQuality << std::endl;
+  //  std::cerr << "Global quality of unfiltered align after realign " << globalQuality << std::endl;
 
   bool textual = ! alignParameters.justSentenceIds ;
 
@@ -429,17 +429,17 @@ void alignerToolWithFilenames( const DictionaryItems& dictionary,
   std::ifstream hus(huFilename.c_str());
   SentenceList huSentenceListPretty;
   huSentenceListPretty.readNoIds( hus );
-//  std::wcerr << huSentenceListPretty.size() << " hungarian sentences read." << std::endl;
+//  std::cerr << huSentenceListPretty.size() << " hungarian sentences read." << std::endl;
 
   std::ifstream ens(enFilename.c_str());
   SentenceList enSentenceList;
   enSentenceList.readNoIds( ens );
-//  std::wcerr << enSentenceList.size() << " english sentences read." << std::endl;
+//  std::cerr << enSentenceList.size() << " english sentences read." << std::endl;
 
   if ( (enSentenceList.      size() < huSentenceListPretty.size()/5) ||
        (huSentenceListPretty.size() < enSentenceList.      size()/5) )
   {
-//    std::wcerr << "Sizes differing too much. Ignoring files to avoid a rare loop bug." << std::endl;
+//    std::cerr << "Sizes differing too much. Ignoring files to avoid a rare loop bug." << std::endl;
     return;
   }
 
@@ -448,7 +448,7 @@ void alignerToolWithFilenames( const DictionaryItems& dictionary,
     /* double globalQuality = */alignerToolWithObjects
      ( dictionary, huSentenceListPretty, enSentenceList, alignParameters, std::cout );
 
-//    std::wcerr << "Quality " << globalQuality << std::endl ;
+//    std::cerr << "Quality " << globalQuality << std::endl ;
 
   }
   else
@@ -458,7 +458,7 @@ void alignerToolWithFilenames( const DictionaryItems& dictionary,
      ( dictionary, huSentenceListPretty, enSentenceList, alignParameters, os );
 
     // If you want to collect global quality information in batch mode, grep "^Quality" of stderr must do.
-//    std::wcerr << "Quality\t" << outputFilename << "\t" << globalQuality << std::endl ;
+//    std::cerr << "Quality\t" << outputFilename << "\t" << globalQuality << std::endl ;
   }
 
 }
@@ -474,7 +474,7 @@ void fillPercentParameter( Arguments& args, const std::string& argName, double&
 
 void main_alignerToolUsage()
 {
-  std::wcerr << "Usage (either):\n\
+  std::cerr << "Usage (either):\n\
     alignerTool [ common_arguments ] [ -hand=hand_align_file ] dictionary_file source_text target_text\n\
 \n\
 or:\n\
@@ -586,8 +586,8 @@ int main_alignerTool(int argC, char* argV[])
 
     if (batchMode && (remains.size()!=2) )
     {
-      std::wcerr << "Batch mode requires exactly two file arguments." << std::endl;
-      std::wcerr << std::endl;
+      std::cerr << "Batch mode requires exactly two file arguments." << std::endl;
+      std::cerr << std::endl;
 
       main_alignerToolUsage();
       throw "argument error";
@@ -598,7 +598,7 @@ int main_alignerTool(int argC, char* argV[])
     {
       if (batchMode)
       {
-        std::wcerr << "-batch and -" << handArgumentname << " are incompatible switches." << std::endl;
+        std::cerr << "-batch and -" << handArgumentname << " are incompatible switches." << std::endl;
         throw "argument error";
       }
       else
@@ -608,7 +608,7 @@ int main_alignerTool(int argC, char* argV[])
 
         if (alignParameters.handAlignFilename.empty())
         {
-          std::wcerr << "-" << handArgumentname << " switch requires a filename value." << std::endl;
+          std::cerr << "-" << handArgumentname << " switch requires a filename value." << std::endl;
           throw "argument error";
         }
       }
@@ -619,7 +619,7 @@ int main_alignerTool(int argC, char* argV[])
     {
       if (batchMode)
       {
-        std::wcerr << "-batch and -" << autoDictDumpArgumentname << " are incompatible switches." << std::endl;
+        std::cerr << "-batch and -" << autoDictDumpArgumentname << " are incompatible switches." << std::endl;
         throw "argument error";
       }
       else
@@ -629,7 +629,7 @@ int main_alignerTool(int argC, char* argV[])
 
         if (alignParameters.autoDictionaryDumpFilename.empty())
         {
-          std::wcerr << "-" << autoDictDumpArgumentname << " switch requires a filename value." << std::endl;
+          std::cerr << "-" << autoDictDumpArgumentname << " switch requires a filename value." << std::endl;
           throw "argument error";
         }
       }
@@ -637,8 +637,8 @@ int main_alignerTool(int argC, char* argV[])
 
     if (!batchMode && (remains.size()!=3) )
     {
-      std::wcerr << "Nonbatch mode requires exactly three file arguments." << std::endl;
-      std::wcerr << std::endl;
+      std::cerr << "Nonbatch mode requires exactly three file arguments." << std::endl;
+      std::cerr << std::endl;
 
       main_alignerToolUsage();
       throw "argument error";
@@ -650,13 +650,13 @@ int main_alignerTool(int argC, char* argV[])
     }
     catch (...)
     {
-      std::wcerr << std::endl;
+      std::cerr << std::endl;
 
       main_alignerToolUsage();
       throw "argument error";
     }
 
-//    std::wcerr << "Reading dictionary..." << std::endl;
+//    std::cerr << "Reading dictionary..." << std::endl;
     const char* dicFilename = remains[0] ;
     DictionaryItems dictionary;
     std::ifstream dis(dicFilename);
@@ -677,7 +677,7 @@ int main_alignerTool(int argC, char* argV[])
 
         if (words.size()!=3)
         {
-          std::wcerr << "Batch file has incorrect format." << std::endl;
+          std::cerr << "Batch file has incorrect format." << std::endl;
           throw "data error";
         }
 
@@ -686,7 +686,7 @@ int main_alignerTool(int argC, char* argV[])
         enFilename  = words[1];
         outFilename = words[2];
 
-//        std::wcerr << "Processing " << outFilename << std::endl;
+//        std::cerr << "Processing " << outFilename << std::endl;
         bool failed = false;
         try
         {
@@ -694,23 +694,23 @@ int main_alignerTool(int argC, char* argV[])
         }
         catch ( const char* errorType )
         {
-          std::wcerr << errorType << std::endl;
+          std::cerr << errorType << std::endl;
           failed = true;
         }
         catch ( std::exception& e )
         {
-          std::wcerr << "some failed assertion:" << e.what() << std::endl;
+          std::cerr << "some failed assertion:" << e.what() << std::endl;
           failed = true;
         }
         catch ( ... )
         {
-          std::wcerr << "some unknown failed assertion..." << std::endl;
+          std::cerr << "some unknown failed assertion..." << std::endl;
           failed = true;
         }
 
         if (failed)
         {
-          std::wcerr << "Align failed for " << outFilename << std::endl;
+          std::cerr << "Align failed for " << outFilename << std::endl;
         }
       }
     }
@@ -725,17 +725,17 @@ int main_alignerTool(int argC, char* argV[])
 #ifndef _DEBUG
   catch ( const char* errorType )
   {
-    std::wcerr << errorType << std::endl;
+    std::cerr << errorType << std::endl;
     return -1;
   }
   catch ( std::exception& e )
   {
-    std::wcerr << "some failed assertion:" << e.what() << std::endl;
+    std::cerr << "some failed assertion:" << e.what() << std::endl;
     return -1;
   }
   catch ( ... )
   {
-    std::wcerr << "some unknown failed assertion..." << std::endl;
+    std::cerr << "some unknown failed assertion..." << std::endl;
     return -1;
   }
 #endif
diff --git a/apertium/tmx_alignment.cc b/apertium/tmx_alignment.cc
index 8b556fc..97ae617 100644
--- a/apertium/tmx_alignment.cc
+++ b/apertium/tmx_alignment.cc
@@ -13,7 +13,7 @@
 
 #include <apertium/tmx_words.h> // For SentenceList
 #include <apertium/tmx_dictionary.h> // For FrequencyMap
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
 #include <iostream>
 #include <map>
@@ -21,7 +21,7 @@
 #include <algorithm>
 
 // Copypaste-elve. TODO Elhelyezni.
-#define massert(e) if (!(e)) { std::wcerr << #e << " failed" << std::endl; throw "assert"; }
+#define massert(e) if (!(e)) { std::cerr << #e << " failed" << std::endl; throw "assert"; }
 
 std::ostream& operator<<( std::ostream& os, std::pair<int,int> p )
 {
@@ -241,7 +241,7 @@ void trelliToLadder( const TrelliMatrix& trellis, Trail& bestTrail )
 
   bool logging = false;
 
-  if (logging) std::wcerr << std::endl;
+  if (logging) std::cerr << std::endl;
 
   bool over = false;
   bool hopelesslyBadTrail = false;
@@ -304,7 +304,7 @@ void trelliToLadder( const TrelliMatrix& trellis, Trail& bestTrail )
 
     if (logging)
     {
-      std::wcerr << huPos << " \t" << enPos << std::endl;
+      std::cerr << huPos << " \t" << enPos << std::endl;
     }
 
   }
@@ -314,7 +314,7 @@ void trelliToLadder( const TrelliMatrix& trellis, Trail& bestTrail )
     bestTrail.clear();
     bestTrail.push_back(std::make_pair(huBookSize,enBookSize));
     bestTrail.push_back(std::make_pair(0,0));
-    std::wcerr << "Error: hopelessly bad trail." << std::endl;
+    std::cerr << "Error: hopelessly bad trail." << std::endl;
   }
 
   std::reverse(bestTrail.begin(),  bestTrail.end()  );
@@ -335,11 +335,11 @@ void align( const AlignMatrix& w, const SentenceValues& huLength, const Sentence
 
   buildDynProgMatrix( w, huLength, enLength, v, trellis );
 
-//  std::wcerr << "Matrix built." << std::endl;
+//  std::cerr << "Matrix built." << std::endl;
 
   trelliToLadder( trellis, bestTrail );
 
-//  std::wcerr << "Trail found." << std::endl;
+//  std::cerr << "Trail found." << std::endl;
 }
 
 
@@ -383,10 +383,10 @@ double scoreTrailOrBisentenceList( const Trail& trailAuto, const Trail& trailHan
 {
   int score = countIntersectionOfTrails( trailAuto, trailHand );
 
-  std::wcerr << trailAuto.size()-score << " misaligned out of " << trailHand.size() << " correct items, "
+  std::cerr << trailAuto.size()-score << " misaligned out of " << trailHand.size() << " correct items, "
     << trailAuto.size() << " bets." << std::endl;
 
-  std::wcerr << "Precision: " << 1.0*score/trailAuto.size()
+  std::cerr << "Precision: " << 1.0*score/trailAuto.size()
     << ", Recall: " << 1.0*score/trailHand.size() << std::endl;
 
   double ratio = 1.0*(trailAuto.size()-score)/trailAuto.size();
@@ -494,7 +494,7 @@ bool borderDetailedAlignMatrix( AlignMatrix& alignMatrix, const Trail& trail, in
       }
     }
 
-    std::wcerr << numberOfEvaluatedItems << " items inside the border." << std::endl;
+    std::cerr << numberOfEvaluatedItems << " items inside the border." << std::endl;
   }
 
   return true;
diff --git a/apertium/tmx_arguments_parser.cc b/apertium/tmx_arguments_parser.cc
index 5498a7d..acd00ec 100644
--- a/apertium/tmx_arguments_parser.cc
+++ b/apertium/tmx_arguments_parser.cc
@@ -10,7 +10,7 @@
 *                                                                        *
 *************************************************************************/
 #include <apertium/tmx_arguments_parser.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include <iostream>
 #include <stdlib.h>
 
@@ -27,7 +27,7 @@ bool Arguments::read( int argc, char **argv )
     std::string p = argv[i];
     if (p.empty() || p[0]!='-')
     {
-      std::wcerr << p << ": unable to parse argument\n";
+      std::cerr << p << ": unable to parse argument\n";
       throw "argument error";
       return false;
     }
@@ -35,7 +35,7 @@ bool Arguments::read( int argc, char **argv )
 
     if (p.empty())
     {
-      std::wcerr << "Empty argument\n";
+      std::cerr << "Empty argument\n";
       throw "argument error";
       return false;
     }
@@ -86,7 +86,7 @@ bool Arguments::read( int argc, char **argv, std::vector<const char*>& remains )
 
     if (p.empty())
     {
-      std::wcerr << "Empty argument\n";
+      std::cerr << "Empty argument\n";
       throw "argument error";
       return false;
     }
@@ -124,13 +124,13 @@ bool Arguments::getNumericParam( const std::string& name, int& num )
   const_iterator it=find(name);
   if (it==end())
   {
-    // std::wcerr << "Argument -" << name << " missing.\n";
+    // std::cerr << "Argument -" << name << " missing.\n";
     return false;
   }
 
   if (it->second.kind != AnyData::Int)
   {
-    std::wcerr << "Argument -" << name << ": integer expected.\n";
+    std::cerr << "Argument -" << name << ": integer expected.\n";
     throw "argument error";
   }
 
@@ -149,7 +149,7 @@ bool Arguments::getSwitchConst( const ArgName& name, bool& sw ) const
   }
   else if (! it->second.dString.empty())
   {
-    std::wcerr << "Argument -" << name << ": value is not allowed.\n";
+    std::cerr << "Argument -" << name << ": value is not allowed.\n";
     return false;
   }
   else
@@ -179,7 +179,7 @@ bool Arguments::getSwitchCompact( const ArgName& name )
   }
   else
   {
-    std::wcerr << "No value is allowed for argument -" << name << ".\n";
+    std::cerr << "No value is allowed for argument -" << name << ".\n";
     throw "argument error";
   }
 }
@@ -188,16 +188,16 @@ void Arguments::checkEmptyArgs() const
 {
   if (!empty())
   {
-    std::wcerr << "Invalid argument: ";
+    std::cerr << "Invalid argument: ";
 
     for ( Arguments::const_iterator it=begin(); it!=end(); ++it )
     {
-      std::wcerr << "-" << it->first;
+      std::cerr << "-" << it->first;
       if (!it->second.dString.empty())
-        std::wcerr << "=" << it->second.dString;
-      std::wcerr << " ";
+        std::cerr << "=" << it->second.dString;
+      std::cerr << " ";
     }
-    std::wcerr << std::endl;
+    std::cerr << std::endl;
 
     throw "argument error";
   }
diff --git a/apertium/tmx_book_to_matrix.cc b/apertium/tmx_book_to_matrix.cc
index fb37b79..c115afb 100644
--- a/apertium/tmx_book_to_matrix.cc
+++ b/apertium/tmx_book_to_matrix.cc
@@ -185,7 +185,7 @@ void sentenceListsToAlignMatrixIdentity( const SentenceList& huSentenceList, con
 
     if (!rarelyLogging || (huPos%100==0))
     {
-     // std::wcerr << huPos << " ";
+     // std::cerr << huPos << " ";
     }
   }
 }
@@ -255,7 +255,7 @@ void sentenceListsToAlignMatrixTranslation(
 
     if (!rarelyLogging || (huPos%100==0))
     {
-     // std::wcerr << huPos << " (" << numberOfEvaluatedItems << ") ";
+     // std::cerr << huPos << " (" << numberOfEvaluatedItems << ") ";
     }
   }
 }
@@ -301,7 +301,7 @@ void sentenceListsToAlignMatrixIBMModelOne(
 
     if (!rarelyLogging || (huPos%100==0))
     {
-     // std::wcerr << huPos << " ";
+     // std::cerr << huPos << " ";
     }
   }
 }
diff --git a/apertium/tmx_builder.cc b/apertium/tmx_builder.cc
index 15d84fa..c62204d 100644
--- a/apertium/tmx_builder.cc
+++ b/apertium/tmx_builder.cc
@@ -15,10 +15,8 @@
  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  */
 #include <apertium/tmx_builder.h>
-#include <apertium/utf_converter.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include <apertium/tmx_aligner_tool.h>
-#include <lttoolbox/ltstr.h>
 #include <lttoolbox/compression.h>
 
 
@@ -37,10 +35,9 @@
 #include <fcntl.h>
 #endif
 
-using namespace Apertium;
 using namespace std;
 
-TMXBuilder::TMXBuilder(wstring const &l1, wstring const &l2):
+TMXBuilder::TMXBuilder(UString const &l1, UString const &l2):
 low_limit(0)
 {
   lang1 = l1;
@@ -62,61 +59,34 @@ TMXBuilder::~TMXBuilder()
 {
 }
 
-wstring
-TMXBuilder::restOfBlank(FILE *input)
+UString
+TMXBuilder::restOfBlank(InputFile& input)
 {
-  wstring result = L"[";
-
-  while(true)
-  {
-    wint_t val = fgetwc(input);
-    if(feof(input))
-    {
-      return L"";
-    }
-    switch(val)
-    {
-      case L'\\':
-        result += L'\\';
-        val = fgetwc(input);
-        if(feof(input))
-        {
-          return L"";
-        }
-        result += static_cast<wchar_t>(val);
-        break;
-
-      case L']':
-        result += L']';
-        return result;
-
-      default:
-        result += static_cast<wchar_t>(val);
-        break;
-    }
+  UString result = input.readBlock('[', ']');
+  if (result[result.size()-1] == ']') {
+    return result;
+  } else {
+    return ""_u;
   }
-
-  return L"";
 }
 
-wstring
-TMXBuilder::nextBlank(FILE *input)
+UString
+TMXBuilder::nextBlank(InputFile& input)
 {
-  wstring result = L"";
+  UString result;
 
   while(true)
   {
-    wint_t val = fgetwc(input);
-    if(feof(input))
-    {
-      return L"";
+    UChar32 val = input.get();
+    if(input.eof()) {
+      return ""_u;
     }
     switch(val)
     {
-      case L'\\':
-        fgetwc(input);
+      case '\\':
+        input.get();
         break;
-      case L'[':
+      case '[':
 
         result = restOfBlank(input);
         return result;
@@ -125,12 +95,12 @@ TMXBuilder::nextBlank(FILE *input)
 }
 
 bool
-TMXBuilder::compatible(FILE *f1, FILE *f2, bool lazy)
+TMXBuilder::compatible(InputFile& f1, InputFile& f2, bool lazy)
 {
-  wstring s1 = nextBlank(f1), s2 = nextBlank(f2);
+  UString s1 = nextBlank(f1), s2 = nextBlank(f2);
   if(!lazy)
   {
-    while(!feof(f1) && !feof(f2))
+    while(!f1.eof() && !f2.eof())
     {
       if(s1 != s2)
       {
@@ -142,7 +112,7 @@ TMXBuilder::compatible(FILE *f1, FILE *f2, bool lazy)
   }
   else
   {
-    while(!feof(f1) && !feof(f2))
+    while(!f1.eof() && !f2.eof())
     {
       if(s1.size() < s2.size()*(1-0.05) || s1.size() > s2.size()*(1+0.05))
       {
@@ -158,112 +128,87 @@ TMXBuilder::compatible(FILE *f1, FILE *f2, bool lazy)
 bool
 TMXBuilder::check(string const &file1, string const &file2, bool lazy)
 {
-  FILE *f1 = fopen(file1.c_str(), "rb");
-  FILE *f2 = fopen(file2.c_str(), "rb");
-  if(!f1 && !f2)
+  InputFile f1, f2;
+  bool bf1 = f1.open(file1.c_str());
+  bool bf2 = f2.open(file2.c_str());
+  if(!bf1 && !bf2)
   {
-    wcerr << L"Error: Cannot access files '" << UtfConverter::fromUtf8(file1);
-    wcerr << L"' and '" << UtfConverter::fromUtf8(file2) << "'" << endl;
+    cerr << "Error: Cannot access files '" << file1;
+    cerr << "' and '" << file2 << "'" << endl;
     return false;
   }
-  else if(!f1)
+  else if(!bf1)
   {
-    wcerr << L"Error: Cannot access file '";
-    wcerr << UtfConverter::fromUtf8(file2);
-    wcerr << "'" << endl;
-    fclose(f2);
+    cerr << "Error: Cannot access file '" << file1 << "'" << endl;
     return false;
   }
-  else if(!f2)
+  else if(!bf2)
   {
-    wcerr << L"Error: Cannot access file '";
-    wcerr << UtfConverter::fromUtf8(file2);
-    wcerr << "'" << endl;
-    fclose(f1);
+    cerr << "Error: Cannot access file '" << file2 << "'" << endl;
     return false;
   }
 
   bool retval = compatible(f1, f2, lazy);
 
-  fclose(f1);
-  fclose(f2);
   return retval;
 }
 
-wstring
-TMXBuilder::nextTU(FILE *input)
+UString
+TMXBuilder::nextTU(InputFile& input)
 {
-  wstring current_tu = L"";
-  wstring tmp;
+  UString current_tu;
+  UString tmp;
 
   while(true)
   {
-    wint_t symbol = fgetwc_unlocked(input);
-    if(feof(input))
-    {
-      if(current_tu == L"")
-      {
-        return L"";
-      }
-      else
-      {
-        return current_tu;
-      }
+    UChar32 symbol = input.get();
+    if(input.eof()) {
+      return current_tu;
     }
     switch(symbol)
     {
-      case L'\\':
-        symbol = fgetwc_unlocked(input);
-        if(feof(input))
-        {
-          if(current_tu == L"")
-          {
-            return L"";
-          }
-          else
-          {
-            return current_tu;
-          }
+      case '\\':
+        symbol = input.get();
+        if(input.eof()) {
+          return current_tu;
         }
         // continued down
       default:
-        current_tu += static_cast<wchar_t>(symbol);
+        current_tu += symbol;
         break;
 
-      case L'[':
+      case '[':
         tmp = restOfBlank(input);
-        if(tmp.substr(0,2) == L"[ ")
+        if(tmp.substr(0,2) == "[ "_u)
         {
-          current_tu.append(L" ");
+          current_tu += ' ';
         }
-        current_tu.append(L"<ph/>");
-        if(tmp.substr(tmp.size()-2, 2) == L" ]")
+        current_tu.append("<ph/>"_u);
+        if(tmp.substr(tmp.size()-2, 2) == " ]"_u)
         {
-          current_tu.append(L" ");
+          current_tu += ' ';
         }
         break;
 
-      case L'.':
-        current_tu += L'.';
-        symbol = fgetwc_unlocked(input);
+      case '.':
+        current_tu += '.';
+        symbol = input.get();
 
-        if(symbol != L'[' && !iswspace(symbol))
+        if(symbol != '[' && !u_isspace(symbol))
         {
-          if(!feof(input))
-          {
-            ungetwc(symbol, input);
+          if (!input.eof()) {
+            input.unget(symbol);
           }
         }
         else
         {
-          if(!feof(input))
-          {
-            ungetwc(symbol, input);
+          if (!input.eof()) {
+            input.unget(symbol);
           }
 
           return current_tu;
 /*          size_t idx = current_tu.size()-1;
-          while(current_tu[idx] == L'.')
+          while(current_tu[idx] == '.')
           {
             idx--;
           }
@@ -271,9 +216,9 @@ TMXBuilder::nextTU(FILE *input)
         }
         break;
 
-      case L'?':
-      case L'!':
-        current_tu += static_cast<wchar_t>(symbol);
+      case '?':
+      case '!':
+        current_tu += symbol;
         return current_tu;
     }
   }
@@ -281,34 +226,34 @@ TMXBuilder::nextTU(FILE *input)
   return current_tu;
 }
 
-wstring
-TMXBuilder::xmlize(wstring const &str)
+UString
+TMXBuilder::xmlize(UString const &str)
 {
-  wstring result = L"";
+  UString result;
 
   for(size_t i = 0, limit = str.size(); i < limit; i++)
   {
     switch(str[i])
     {
-      case L'<':
-        if(i + 5 <= limit && str.substr(i,5)==L"<ph/>")
+      case '<':
+        if(i + 5 <= limit && str.substr(i,5)=="<ph/>"_u)
         {
-          result.append(L"<ph/>");
+          result.append("<ph/>"_u);
           i += 4;
           break;
         }
         else
         {
-          result.append(L"&lt;");
+          result.append("&lt;"_u);
         }
         break;
 
-      case L'>':
-        result.append(L"&gt;");
+      case '>':
+        result.append("&gt;"_u);
         break;
 
-      case L'&':
-        result.append(L"&amp;");
+      case '&':
+        result.append("&amp;"_u);
         break;
 
       default:
@@ -323,12 +268,12 @@ TMXBuilder::xmlize(wstring const &str)
   while(cambio == true)
   {
     cambio = false;
-    while(result.size() >= 5 && result.substr(0,5) == L"<ph/>")
+    while(result.size() >= 5 && result.substr(0,5) == "<ph/>"_u)
     {
       result = result.substr(5);
       cambio = true;
     }
-    while(result.size() > 0 && !iswalnum(result[0]) && !iswpunct(result[0]))
+    while(result.size() > 0 && !u_isalnum(result[0]) && !u_ispunct(result[0]))
     {
       result = result.substr(1);
       cambio = true;
@@ -340,12 +285,12 @@ TMXBuilder::xmlize(wstring const &str)
   while(cambio == true)
   {
     cambio = false;
-    while(result.size() > 5 && result.substr(result.size()-5) == L"<ph/>")
+    while(result.size() > 5 && result.substr(result.size()-5) == "<ph/>"_u)
     {
       result = result.substr(0, result.size()-5);
       cambio = true;
     }
-    while(result.size() > 0 && !iswalnum(result[result.size()-1]) && !iswpunct(result[result.size()-1]))
+    while(result.size() > 0 && !u_isalnum(result[result.size()-1]) && !u_ispunct(result[result.size()-1]))
     {
       result = result.substr(0, result.size()-1);
       cambio = true;
@@ -376,50 +321,32 @@ void
 TMXBuilder::generate(string const &file1, string const &file2,
                      string const &outfile)
 {
-  FILE *output = stdout;
+  UFILE* output = u_finit(stdout, NULL, NULL);
 
-  if(outfile != "")
+  if(!outfile.empty())
   {
-    output = fopen(outfile.c_str(), "w");
+    output = u_fopen(outfile.c_str(), "w", NULL, NULL);
     if(!output)
     {
-      wcerr << L"Error: file '" << UtfConverter::fromUtf8(outfile);
-      wcerr << L"' cannot be opened for writing" << endl;
+      cerr << "Error: file '" << outfile;
+      cerr << "' cannot be opened for writing" << endl;
       exit(EXIT_FAILURE);
     }
   }
-#ifdef _MSC_VER
-  _setmode(_fileno(output), _O_U8TEXT);
-#endif
-
-  FILE *f1 = fopen(file1.c_str(), "r");
-  if(!f1)
-  {
-    wcerr << L"Error: file '" << UtfConverter::fromUtf8(file1);
-    wcerr << L"' cannot be opened for reading" << endl;
-    exit(EXIT_FAILURE);
-  }
 
-  FILE *f2 = fopen(file2.c_str(), "r");
-  if(!f2)
-  {
-    wcerr << L"Error: file '" << UtfConverter::fromUtf8(file2);
-    wcerr << L"' cannot be opened for reading" << endl;
-    exit(EXIT_FAILURE);
-  }
+  InputFile f1;
+  f1.open_or_exit(file1.c_str());
 
-#ifdef _MSC_VER
-  _setmode(_fileno(f1), _O_U8TEXT);
-  _setmode(_fileno(f2), _O_U8TEXT);
-#endif
+  InputFile f2;
+  f2.open_or_exit(file2.c_str());
 
   generateTMX(f1, f2, output);
 }
 
-vector<wstring>
-TMXBuilder::reverseList(vector<wstring> const &v)
+vector<UString>
+TMXBuilder::reverseList(vector<UString> const &v)
 {
-  vector<wstring> retval(v.size());
+  vector<UString> retval(v.size());
 
   for(int j = v.size() - 1, i = 0; j >=0; j--, i++)
   {
@@ -429,16 +356,15 @@ TMXBuilder::reverseList(vector<wstring> const &v)
   return retval;
 }
 
-vector<wstring>
-TMXBuilder::sentenceList(FILE *file)
+vector<UString>
+TMXBuilder::sentenceList(InputFile& file)
 {
-  vector<wstring> retval;
+  vector<UString> retval;
 
   while(true)
   {
-    wstring f = nextTU(file);
-    if(feof(file))
-    {
+    UString f = nextTU(file);
+    if(file.eof()) {
       break;
     }
     retval.push_back(f);
@@ -447,10 +373,10 @@ TMXBuilder::sentenceList(FILE *file)
   return retval;
 }
 
-vector<wstring>
-TMXBuilder::extractFragment(vector<wstring> const &text, unsigned int base, unsigned int width)
+vector<UString>
+TMXBuilder::extractFragment(vector<UString> const &text, unsigned int base, unsigned int width)
 {
-  vector<wstring> result;
+  vector<UString> result;
 
   for(unsigned int i = base; i < (base + width) && i < text.size(); i++)
   {
@@ -485,21 +411,21 @@ TMXBuilder::argmin(int nw, int n, int w)
 }
 
 void
-TMXBuilder::generateTMX(FILE *f1, FILE *f2, FILE *output)
-{
-  fprintf(output, "<?xml version=\"1.0\"?>\n");
-  fprintf(output, "<tmx version=\"1.4\">\n");
-  fprintf(output, "<header creationtool=\"Apertium TMX Builder\"\n");
-  fprintf(output, "        creationtoolversion=\"%s\"\n", PACKAGE_VERSION);
-  fprintf(output, "        segtype=\"sentence\"\n");
-  fprintf(output, "        srclang=\"%s\"\n", UtfConverter::toUtf8(lang1).c_str());
-  fprintf(output, "        adminlang=\"%s\"\n", UtfConverter::toUtf8(lang2).c_str());
-  fprintf(output, "        datatype=\"plaintext\"\n");
-  fprintf(output, "        o-tmf=\"none\">\n");
-  fprintf(output, "</header>\n");
-  fprintf(output, "<body>\n");
+TMXBuilder::generateTMX(InputFile& f1, InputFile& f2, UFILE* output)
+{
+  u_fprintf(output, "<?xml version=\"1.0\"?>\n");
+  u_fprintf(output, "<tmx version=\"1.4\">\n");
+  u_fprintf(output, "<header creationtool=\"Apertium TMX Builder\"\n");
+  u_fprintf(output, "        creationtoolversion=\"%s\"\n", PACKAGE_VERSION);
+  u_fprintf(output, "        segtype=\"sentence\"\n");
+  u_fprintf(output, "        srclang=\"%S\"\n", lang1.c_str());
+  u_fprintf(output, "        adminlang=\"%S\"\n", lang2.c_str());
+  u_fprintf(output, "        datatype=\"plaintext\"\n");
+  u_fprintf(output, "        o-tmf=\"none\">\n");
+  u_fprintf(output, "</header>\n");
+  u_fprintf(output, "<body>\n");
   outputTU(f1, f2, output);
-  fprintf(output, "</body>\n</tmx>\n");
+  u_fprintf(output, "</body>\n</tmx>\n");
 
 }
 
@@ -512,17 +438,17 @@ TMXBuilder::printTable(int *table, unsigned int nrows, unsigned int ncols)
     {
       if(j != 0)
       {
-        wcerr << L" ";
+        cerr << " ";
       }
-      wcerr << setw(10) << table[i*ncols + j];
+      cerr << setw(10) << table[i*ncols + j];
     }
-    wcerr << endl;
+    cerr << endl;
   }
 }
 
 
 void
-TMXBuilder::printTUCond(FILE *output, wstring const &tu1, wstring const &tu2, bool secure_zone)
+TMXBuilder::printTUCond(UFILE *output, UString const &tu1, UString const &tu2, bool secure_zone)
 {
   if(secure_zone && similar(tu1, tu2))
   {
@@ -531,30 +457,26 @@ TMXBuilder::printTUCond(FILE *output, wstring const &tu1, wstring const &tu2, bo
 }
 
 void
-TMXBuilder::splitAndMove(FILE *f1, string const &filename)
+TMXBuilder::splitAndMove(InputFile& f1, string const &filename)
 {
-  FILE *stream = fopen(filename.c_str(), "w");
-  vector<wstring> fichero_por_cadenas = sentenceList(f1);
-  for(size_t i = 0; i < fichero_por_cadenas.size(); i++)
-  {
-    fputws_unlocked(fichero_por_cadenas[i].c_str(), stream);
-    fputws_unlocked(L"\n", stream);
+  UFILE* stream = u_fopen(filename.c_str(), "w", NULL, NULL);
+  vector<UString> fichero_por_cadenas = sentenceList(f1);
+  for (auto& it : fichero_por_cadenas) {
+    u_fprintf(stream, "%S\n", it.c_str());
   }
-  fclose(stream);
+  u_fclose(stream);
 }
 
 void
-TMXBuilder::outputTU(FILE *f1, FILE *f2, FILE *output)
+TMXBuilder::outputTU(InputFile& f1, InputFile& f2, UFILE* output)
 {
   string left = tmpnam(NULL);
   string right = tmpnam(NULL);
   string out = tmpnam(NULL);
 
   splitAndMove(f1, left);
-  fclose(f1);
 
   splitAndMove(f2, right);
-  fclose(f2);
 
   TMXAligner::DictionaryItems dict;
   AlignParameters ap;
@@ -565,29 +487,25 @@ TMXBuilder::outputTU(FILE *f1, FILE *f2, FILE *output)
 
   TMXAligner::alignerToolWithFilenames(dict, left, right, ap, out);
 
-  FILE *stream = fopen(out.c_str(), "r");
+  InputFile stream;
+  stream.open(out.c_str());
   int conta = 0;
-  wstring partes[2];
-  while(true)
+  UString partes[2];
+  while(!stream.eof())
   {
-    wchar_t val = fgetwc(stream);
-    if(feof(stream))
-    {
-      break;
-    }
+    UChar32 val = stream.get();
 
-    if(val == L'\t')
+    if(val == '\t')
     {
       conta++;
     }
-    else if(val == L'\n')
+    else if(val == '\n')
     {
-      if(partes[0] != L"" && partes[1] != L"")
-      {
+      if (!partes[0].empty() && !partes[1].empty()) {
         printTU(output, partes[0], partes[1]);
       }
-      partes[0] = L"";
-      partes[1] = L"";
+      partes[0].clear();
+      partes[1].clear();
       conta = 0;
     }
     if(conta < 2)
@@ -605,7 +523,7 @@ TMXBuilder::outputTU(FILE *f1, FILE *f2, FILE *output)
 
   int base_i = 0, base_j = 0;
 
-  vector<wstring> lista1 = reverseList(sentenceList(f1)),
+  vector<UString> lista1 = reverseList(sentenceList(f1)),
                   lista2 = reverseList(sentenceList(f2)), lista3;
 
   if(freference != NULL)
@@ -615,8 +533,8 @@ TMXBuilder::outputTU(FILE *f1, FILE *f2, FILE *output)
 
   while(true)
   {
-    vector<wstring> l1 = extractFragment(lista1, base_i, window_size);
-    vector<wstring> l2 = extractFragment(lista2, base_j, window_size) , l3;
+    vector<UString> l1 = extractFragment(lista1, base_i, window_size);
+    vector<UString> l2 = extractFragment(lista2, base_j, window_size) , l3;
 
     if(lista3.size() != 0)
     {
@@ -696,7 +614,7 @@ TMXBuilder::outputTU(FILE *f1, FILE *f2, FILE *output)
 		}
 	    }
 
-	  //          wcerr << L"[" << i << L" " << j << L"]" << endl;
+	  //          cerr << "[" << i << " " << j << "]" << endl;
          break;
 
         case 3:
@@ -755,13 +673,13 @@ TMXBuilder::outputTU(FILE *f1, FILE *f2, FILE *output)
 }
 
 int
-TMXBuilder::weight(wstring const &s)
+TMXBuilder::weight(UString const &s)
 {
   return s.size()*2;  // just the size of the string
 }
 
 int *
-TMXBuilder::levenshteinTable(vector<wstring> &l1, vector<wstring> &l2,
+TMXBuilder::levenshteinTable(vector<UString> &l1, vector<UString> &l2,
 			     unsigned int diagonal_width, unsigned int max_edit)
 {
   unsigned int const nrows = l1.size() + 1;
@@ -809,19 +727,19 @@ TMXBuilder::levenshteinTable(vector<wstring> &l1, vector<wstring> &l2,
   return table;
 }
 
-wstring
-TMXBuilder::filter(wstring const &tu)
+UString
+TMXBuilder::filter(UString const &tu)
 {
   bool has_text = false;
   unsigned int count_blank = 0;
 
   for(unsigned int i = 0, limit = tu.size(); i != limit; i++)
   {
-    if(iswalpha(tu[i]))
+    if(u_isalpha(tu[i]))
     {
       has_text = true;
     }
-    else if(has_text && iswspace(tu[i]))
+    else if(has_text && u_isspace(tu[i]))
     {
       count_blank++;
     }
@@ -829,28 +747,24 @@ TMXBuilder::filter(wstring const &tu)
 
   if(!has_text || count_blank <= 2 || tu.size() == 0)
   {
-    return L"";
+    return ""_u;
   }
 
   return xmlize(tu);
 }
 
 void
-TMXBuilder::printTU(FILE *output, wstring const &tu1, wstring const &tu2) const
+TMXBuilder::printTU(UFILE* output, UString const &tu1, UString const &tu2) const
 {
-  wstring tu1_filtered = filter(tu1);
-  wstring tu2_filtered = filter(tu2);
-
-  if(tu1_filtered != L"" && tu2_filtered != L"")
-  {
+  UString tu1_filtered = filter(tu1);
+  UString tu2_filtered = filter(tu2);
 
-    fprintf(output, "<tu>\n  <tuv xml:lang=\"%s\"><seg>%s</seg></tuv>\n",
-                    UtfConverter::toUtf8(lang1).c_str(),
-                    UtfConverter::toUtf8(tu1_filtered).c_str());
+  if (tu1_filtered.empty() && !tu2_filtered.empty()) {
+    u_fprintf(output, "<tu>\n  <tuv xml:lang=\"%S\"><seg>%S</seg></tuv>\n",
+              lang1.c_str(), tu1_filtered.c_str());
 
-    fprintf(output, "  <tuv xml:lang=\"%s\"><seg>%s</seg></tuv>\n</tu>\n",
-                    UtfConverter::toUtf8(lang2).c_str(),
-                    UtfConverter::toUtf8(tu2_filtered).c_str());
+    u_fprintf(output, "  <tuv xml:lang=\"%S\"><seg>%S</seg></tuv>\n</tu>\n",
+              lang2.c_str(), tu2_filtered.c_str());
   }
 }
 
@@ -892,7 +806,7 @@ TMXBuilder::min2(int i1, int i2)
 }
 
 int
-TMXBuilder::editDistance(wstring const &s1, wstring const &s2, unsigned int max_edit)
+TMXBuilder::editDistance(UString const &s1, UString const &s2, unsigned int max_edit)
 {
   int const nrows = min2(s1.size() + 1, max_edit);
   int const ncols = min2(s2.size() + 1, max_edit);
@@ -974,13 +888,13 @@ TMXBuilder::setEditDistancePercent(double e)
 }
 
 bool
-TMXBuilder::isRemovablePunct(wchar_t const &c)
+TMXBuilder::isRemovablePunct(UChar32 const &c)
 {
-  return c == L'.';
+  return c == '.';
 }
 
 bool
-TMXBuilder::similar(wstring const &s1, wstring const &s2)
+TMXBuilder::similar(UString const &s1, UString const &s2)
 {
   unsigned int l1 = s1.size();
   unsigned int l2 = s2.size();
@@ -1012,8 +926,8 @@ TMXBuilder::setTranslation(string const &filename)
   freference = fopen(filename.c_str(), "r");
   if(!freference)
   {
-    wcerr << L"Error: file '" << UtfConverter::fromUtf8(filename);
-    wcerr << L"' cannot be opened for reading" << endl;
+    cerr << "Error: file '" << filename;
+    cerr << "' cannot be opened for reading" << endl;
     freference = NULL;
   }
 
diff --git a/apertium/tmx_builder.h b/apertium/tmx_builder.h
index 7e92d40..2aca2de 100644
--- a/apertium/tmx_builder.h
+++ b/apertium/tmx_builder.h
@@ -20,14 +20,15 @@
 #include <apertium/transfer_data.h>
 #include <string>
 #include <cstdio>
+#include <lttoolbox/input_file.h>
 
 using namespace std;
 
 class TMXBuilder
 {
 private:
-  wstring lang1;
-  wstring lang2;
+  UString lang1;
+  UString lang2;
   unsigned int max_edit;
   unsigned int diagonal_width;
   unsigned int window_size;
@@ -37,35 +38,35 @@ private:
   unsigned int low_limit;
   FILE *freference;
 
-  static wstring nextTU(FILE *input);
-  static wstring restOfBlank(FILE *input);
-  static wstring nextBlank(FILE *input);
-  static wstring xmlize(wstring const &str);
-  static bool compatible(FILE *input, FILE *output, bool lazy = false);
-  void generateTMX(FILE *f1, FILE *f2, FILE *output);
-  void outputTU(FILE *f1, FILE *f2, FILE *output);
-  static vector<wstring> reverseList(vector<wstring> const &v);
-  static vector<wstring> sentenceList(FILE *file);
+  static UString nextTU(InputFile& input);
+  static UString restOfBlank(InputFile& input);
+  static UString nextBlank(InputFile& input);
+  static UString xmlize(UString const &str);
+  static bool compatible(InputFile& input, InputFile& output, bool lazy = false);
+  void generateTMX(InputFile& f1, InputFile& f2, UFILE* output);
+  void outputTU(InputFile& f1, InputFile& f2, UFILE* output);
+  static vector<UString> reverseList(vector<UString> const &v);
+  static vector<UString> sentenceList(InputFile& file);
   static int argmin(int nw, int n, int w);
-  static int * levenshteinTable(vector<wstring> &l1, vector<wstring> &l2,
+  static int * levenshteinTable(vector<UString> &l1, vector<UString> &l2,
 				unsigned int diagonal_width, unsigned int max_edit);
-  void printTU(FILE *output, wstring const &tu1, wstring const &tu2) const;
-  static wstring filter(wstring const &s);
-  static int weight(wstring const &s);
+  void printTU(UFILE* output, UString const &tu1, UString const &tu2) const;
+  static UString filter(UString const &s);
+  static int weight(UString const &s);
   static void printTable(int *table, unsigned int nrows, unsigned int ncols);
-  static int editDistance(wstring const &s1, wstring const &s2, unsigned int max_edit);
+  static int editDistance(UString const &s1, UString const &s2, unsigned int max_edit);
   static int min3(int i1, int i2, int i3);
   static int min2(int i1, int i2);
-  void printTUCond(FILE *output, wstring const &s1, wstring const &s2, bool secure_zone);
-  static vector<wstring> extractFragment(vector<wstring> const &text, unsigned int base,
+  void printTUCond(UFILE* output, UString const &s1, UString const &s2, bool secure_zone);
+  static vector<UString> extractFragment(vector<UString> const &text, unsigned int base,
 					 unsigned int width);
 
-  static bool isRemovablePunct(wchar_t const &c);
-  bool similar(wstring const &s1, wstring const &s2);
+  static bool isRemovablePunct(UChar32 const &c);
+  bool similar(UString const &s1, UString const &s2);
 
-  void splitAndMove(FILE *file, string const &filename);
+  void splitAndMove(InputFile& file, string const &filename);
 public:
-  TMXBuilder(wstring const &l1, wstring const &l2);
+  TMXBuilder(UString const &l1, UString const &l2);
   ~TMXBuilder();
   static bool check(string const &file1, string const &file2, bool lazy = false);
   void generate(string const &file1, string const &file2,
diff --git a/apertium/tmx_dic_tree.h b/apertium/tmx_dic_tree.h
index 9a0545b..957de53 100644
--- a/apertium/tmx_dic_tree.h
+++ b/apertium/tmx_dic_tree.h
@@ -106,7 +106,7 @@ DicTree<Atom, Identifier>& DicTree<Atom, Identifier>::add( const Atom& word, con
     if ( ( v->id != 0 ) && ( id != 0 ) )
     {
       if (WarnOnConflict)
-        std::wcerr << "warning: conflict in tree" << std::endl;
+        std::cerr << "warning: conflict in tree" << std::endl;
     }
     if ( id != 0 )
     {
@@ -165,7 +165,7 @@ void SubsetLookup<Atom, Identifier>::add( const Atoms& words, const Identifier&
   else
   {
     if (DicTree<Atom, Identifier>::WarnOnConflict)
-      std::wcerr << "warning: conflict in tree" << std::endl;
+      std::cerr << "warning: conflict in tree" << std::endl;
   }
 }
 
diff --git a/apertium/tmx_dictionary.cc b/apertium/tmx_dictionary.cc
index f36c65a..70944ea 100644
--- a/apertium/tmx_dictionary.cc
+++ b/apertium/tmx_dictionary.cc
@@ -22,7 +22,7 @@
 
 #include <cmath>
 
-#define massert(e) if (!(e)) { std::wcerr << #e << " failed" << std::endl; throw "assert"; }
+#define massert(e) if (!(e)) { std::cerr << #e << " failed" << std::endl; throw "assert"; }
 
 namespace TMXAligner
 {
@@ -151,7 +151,7 @@ void readBicorpus( std::istream& is, SentenceList& huSentenceList, SentenceList&
     split( line, halfs );
     if (halfs.size()!=2)
     {
-      std::wcerr << "Incorrect bicorpus file: " << halfs.size() << " records in line " << huSentenceList.size() << std::endl;
+      std::cerr << "Incorrect bicorpus file: " << halfs.size() << " records in line " << huSentenceList.size() << std::endl;
       throw "data error";
     }
 
@@ -565,7 +565,7 @@ void TransLex::build( const DictionaryItems& dictionaryItems )
       ++ignored;
     }
   }
-  std::wcerr << added << " items added to TransLex, " << ignored << " multiword items ignored." << std::endl;
+  std::cerr << added << " items added to TransLex, " << ignored << " multiword items ignored." << std::endl;
 }
 
 TransLex::DictInterval TransLex::lookupLeftWord ( const Word& huWord ) const
diff --git a/apertium/tmx_trail_postprocessors.cc b/apertium/tmx_trail_postprocessors.cc
index 4bde3ed..d0b7312 100644
--- a/apertium/tmx_trail_postprocessors.cc
+++ b/apertium/tmx_trail_postprocessors.cc
@@ -280,7 +280,7 @@ void postprocessTrailStart( Trail& bestTrail,
     {
       if (global_postprocessLogging)
       {
-        std::wcerr << "Thrown away at position " << pos
+        std::cerr << "Thrown away at position " << pos
           << ", avarage " << avg << ", threshold " << qualityThreshold << std::endl;
       }
 
@@ -319,7 +319,7 @@ void postprocessTrailEnd( Trail& bestTrail,
     {
       if (global_postprocessLogging)
       {
-        std::wcerr << "Thrown away at position " << pos
+        std::cerr << "Thrown away at position " << pos
           << ", avarage " << avg << ", threshold " << qualityThreshold << std::endl;
       }
 
@@ -365,7 +365,7 @@ void postprocessTrail( Trail& bestTrail, const TrailScoresInterval& trailScoresI
     {
       if (global_postprocessLogging)
       {
-        std::wcerr << "Thrown away at position " << pos
+        std::cerr << "Thrown away at position " << pos
           << ", avarage " << avg << ", threshold " << qualityThreshold << std::endl;
       }
 
@@ -404,7 +404,7 @@ void postprocessTrailByTopology( Trail& bestTrail, double qualityThreshold )
     {
       if (global_postprocessLogging)
       {
-        std::wcerr << "Thrown away at position " << pos
+        std::cerr << "Thrown away at position " << pos
           << ", avarage " << avg << std::endl;
       }
 
diff --git a/apertium/tmx_translate.cc b/apertium/tmx_translate.cc
index 06db477..dd37da5 100644
--- a/apertium/tmx_translate.cc
+++ b/apertium/tmx_translate.cc
@@ -14,7 +14,7 @@
 #include <apertium/tmx_words.h>
 #include <apertium/tmx_dictionary.h>
 #include <apertium/tmx_dic_tree.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
 #include <algorithm>
 #include <fstream>
@@ -34,7 +34,7 @@ void buildDumbDictionary( const DictionaryItems& dictionary, DumbDictionary& dum
     if (hu.size()==1)
     {
       dumbDictionary[ hu[0] ] = en ;
-      // std::wcerr << hu[0] << "\t" << en << std::endl;
+      // std::cerr << hu[0] << "\t" << en << std::endl;
     }
   }
 }
@@ -98,7 +98,7 @@ void buildDumbDictionary( TMXAligner::DumbDictionary& dumbDictionary,
   {
     std::ifstream is( dictionaryFilename.c_str() );
     dictionary.read( is );
-    std::wcerr << dictionary.size() << " dictionary items read." << std::endl;
+    std::cerr << dictionary.size() << " dictionary items read." << std::endl;
   }
 
   if (!enSentenceList.empty())
@@ -267,7 +267,7 @@ void naiveTranslate(
     {
       subsetLookup.add( dictionary[i].second, i+1 ); // !!! i+1
     }
-    std::wcerr << "Index tree built." << std::endl;
+    std::cerr << "Index tree built." << std::endl;
   }
 
   for ( size_t i=0; i<sentenceList.size(); ++i )
@@ -292,7 +292,7 @@ void naiveTranslate(
     translatedSentenceList.push_back(sentence);
   }
 
-  std::wcerr << "Analysis ready." << std::endl;
+  std::cerr << "Analysis ready." << std::endl;
 }
 
 
@@ -345,13 +345,13 @@ void normalizeTextsForIdentity( const DictionaryItems& dictionary,
   enFreq.build(enSentenceListPretty);
   buildDumbDictionaryUsingFrequencies( dictionary, enFreq, dumbDictionary );
 
-//  std::wcerr << "Simplified dictionary ready." << std::endl;
+//  std::cerr << "Simplified dictionary ready." << std::endl;
 
   SentenceList huSentenceList;
 
   trivialTranslateSentenceList( dumbDictionary, huSentenceListPretty, huSentenceListGarbled );
 
-//  std::wcerr << "Rough translation ready." << std::endl;
+//  std::cerr << "Rough translation ready." << std::endl;
 
   sortNormalizeSentences(huSentenceListGarbled);
 
diff --git a/apertium/transfer.cc b/apertium/transfer.cc
index 31f57cb..1c8f449 100644
--- a/apertium/transfer.cc
+++ b/apertium/transfer.cc
@@ -15,132 +15,19 @@
  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  */
 #include <apertium/transfer.h>
-#include <apertium/trx_reader.h>
-#include <apertium/utf_converter.h>
-#include <apertium/string_utils.h>
-#include <lttoolbox/compression.h>
-#include <lttoolbox/xml_parse_util.h>
-#include <pcre.h>
-
-#include <cctype>
-#include <iostream>
-#include <stack>
-#include <cerrno>
-
-#ifdef _WIN32
-#include <utf8_fwrap.h>
-#endif
-
-using namespace Apertium;
-using namespace std;
-
-void
-Transfer::destroy()
-{
-  if(me)
-  {
-    delete me;
-    me = NULL;
-  }
-  if(doc)
-  {
-    xmlFreeDoc(doc);
-    doc = NULL;
-  }
-}
-
-Transfer::Transfer() :
-word(0),
-lword(0),
-last_lword(0),
-output(0),
-any_char(0),
-any_tag(0),
-nwords(0)
-{
-  me = NULL;
-  doc = NULL;
-  root_element = NULL;
-  lastrule = NULL;
-  defaultAttrs = lu;
-  useBilingual = true;
-  preBilingual = false;
-  isExtended = false;
-  null_flush = false;
-  internal_null_flush = false;
-  trace = false;
-  trace_att = false;
-  in_lu = false;
-  in_let_var = false;
-  in_out = false;
-  in_wblank = false;
-}
-
-Transfer::~Transfer()
-{
-  destroy();
-}
-
-void
-Transfer::readData(FILE *in)
-{
-  alphabet.read(in);
-  any_char = alphabet(TRXReader::ANY_CHAR);
-  any_tag = alphabet(TRXReader::ANY_TAG);
-
-  Transducer t;
-  t.read(in, alphabet.size());
-
-  map<int, int> finals;
-
-  // finals
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    int key = Compression::multibyte_read(in);
-    finals[key] = Compression::multibyte_read(in);
-  }
 
-  me = new MatchExe(t, finals);
+#include <lttoolbox/string_utils.h>
+#include <lttoolbox/xml_walk_util.h>
 
-  // attr_items
-  bool recompile_attrs = Compression::string_read(in) != pcre_version_endian();
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    attr_items[cad_k].read(in);
-    wstring fallback = Compression::wstring_read(in);
-    if(recompile_attrs) {
-      attr_items[cad_k].compile(UtfConverter::toUtf8(fallback));
-    }
-  }
-
-  // variables
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    variables[cad_k] = UtfConverter::toUtf8(Compression::wstring_read(in));
-  }
-
-  // macros
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    macros[cad_k] = Compression::multibyte_read(in);
-  }
+#include <iostream>
 
-  // lists
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+using namespace std;
 
-    for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
-    {
-      wstring const cad_v = Compression::wstring_read(in);
-      lists[cad_k].insert(UtfConverter::toUtf8(cad_v));
-      listslow[cad_k].insert(UtfConverter::toUtf8(StringUtils::tolower(cad_v)));
-    }
-  }
-}
+Transfer::Transfer()
+  : word(nullptr), last_lword(0), in_lu(false), in_wblank(false),
+    isExtended(false), defaultAttrs(lu), preBilingual(false),
+    useBilingual(true), trace_att(false)
+{}
 
 void
 Transfer::readBil(string const &fstfile)
@@ -148,7 +35,7 @@ Transfer::readBil(string const &fstfile)
   FILE *in = fopen(fstfile.c_str(), "rb");
   if(!in)
   {
-    wcerr << "Error: Could not open file '" << fstfile << "'." << endl;
+    cerr << "Error: Could not open file '" << fstfile << "'." << endl;
     exit(EXIT_FAILURE);
   }
   fstp.load(in);
@@ -162,7 +49,7 @@ Transfer::setExtendedDictionary(string const &fstfile)
   FILE *in = fopen(fstfile.c_str(), "rb");
   if(!in)
   {
-    wcerr << "Error: Could not open extended dictionary file '" << fstfile << "'." << endl;
+    cerr << "Error: Could not open extended dictionary file '" << fstfile << "'." << endl;
     exit(EXIT_FAILURE);
   }
   extended.load(in);
@@ -175,2049 +62,829 @@ void
 Transfer::read(string const &transferfile, string const &datafile,
 	       string const &fstfile)
 {
-  readTransfer(transferfile);
+  TransferBase::read(transferfile.c_str(), datafile.c_str());
+  if (getattr(root_element, "default") == "chunk"_u) {
+    defaultAttrs = chunk;
+  } else {
+    defaultAttrs = lu;
+  }
+  if (!fstfile.empty()) {
+    readBil(fstfile);
+  }
+}
 
-  // datafile
-  FILE *in = fopen(datafile.c_str(), "rb");
-  if(!in)
+bool
+Transfer::checkIndex(xmlNode *element, int index, int limit)
+{
+  if(index >= limit)
   {
-    wcerr << "Error: Could not open file '" << datafile << "'." << endl;
-    exit(EXIT_FAILURE);
+    cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index >= limit" << endl;
+    return false;
   }
-  readData(in);
-  fclose(in);
-
-  if(fstfile != "")
+  if(index < 0) {
+    cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index < 0" << endl;
+    return false;
+  }
+  if(word[index] == 0)
   {
-    readBil(fstfile);
+    cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": Null access at word[index]" << endl;
+    return false;
   }
+  return true;
 }
 
-void
-Transfer::readTransfer(string const &in)
+UString
+Transfer::evalCachedString(xmlNode *element)
 {
-  doc = xmlReadFile(in.c_str(), NULL, 0);
+  TransferInstr& ti = evalStringCache[element];
+  switch (ti.getType()) {
+  case ti_clip_sl:
+    if (checkIndex(element, ti.getPos(), lword)) {
+      if (gettingLemmaFromWord(ti.getContent()) && last_lword > 1) {
+        if(in_lu) {
+          out_wblank = combineWblanks(out_wblank, word[ti.getPos()]->getWblank());
+        } else if (in_let_var) {
+          var_out_wblank[var_val] = combineWblanks(var_out_wblank[var_val], word[ti.getPos()]->getWblank());
+        }
+      }
 
-  if(doc == NULL)
-  {
-    wcerr << "Error: Could not parse file '" << in << "'." << endl;
-    exit(EXIT_FAILURE);
-  }
+      return word[ti.getPos()]->source(attr_items[ti.getContent()], ti.getCondition());
+    }
+    break;
 
-  root_element = xmlDocGetRootElement(doc);
+  case ti_clip_tl:
+    if(checkIndex(element, ti.getPos(), lword)) {
+      if(gettingLemmaFromWord(ti.getContent()) && last_lword > 1) {
+        if(in_lu) {
+          out_wblank = combineWblanks(out_wblank, word[ti.getPos()]->getWblank());
+        } else if(in_let_var) {
+          var_out_wblank[var_val] = combineWblanks(var_out_wblank[var_val], word[ti.getPos()]->getWblank());
+        }
+      }
 
-  // search for root element attributes
-  for(xmlAttr *i = root_element->properties; i != NULL; i = i->next)
-  {
-    if(!xmlStrcmp(i->name, (const xmlChar *) "default"))
-    {
-      if(!xmlStrcmp(i->children->content, (const xmlChar *) "chunk"))
-      {
-        defaultAttrs = chunk;
+      return word[ti.getPos()]->target(attr_items[ti.getContent()], ti.getCondition());
+    }
+    break;
+
+  case ti_clip_ref:
+    if(checkIndex(element, ti.getPos(), lword)) {
+      return word[ti.getPos()]->reference(attr_items[ti.getContent()], ti.getCondition());
+    }
+    break;
+
+  case ti_linkto_sl:
+    if(checkIndex(element, ti.getPos(), lword)) {
+      if(!word[ti.getPos()]->source(attr_items[ti.getContent()], ti.getCondition()).empty()) {
+        UString ret;
+        ret += '<';
+        ret += ti.getStrval();
+        ret += '>';
+        return ret;
+      } else {
+        return ""_u;
       }
-      else
-      {
-        defaultAttrs = lu; // default value for 'default'
+    }
+    break;
+
+  case ti_linkto_tl:
+    if(checkIndex(element, ti.getPos(), lword)) {
+      if(!word[ti.getPos()]->target(attr_items[ti.getContent()], ti.getCondition()).empty()) {
+        UString ret;
+        ret += '<';
+        ret += ti.getStrval();
+        ret += '>';
+        return ret;
+      } else {
+        return ""_u;
       }
     }
-  }
+    break;
 
-  // search for macros & rules
-  for(xmlNode *i = root_element->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "section-def-macros"))
-      {
-        collectMacros(i);
+  case ti_linkto_ref:
+    if(checkIndex(element, ti.getPos(), lword)) {
+      if(!word[ti.getPos()]->reference(attr_items[ti.getContent()], ti.getCondition()).empty()) {
+        UString ret;
+        ret += '<';
+        ret += ti.getStrval();
+        ret += '>';
+        return ret;
+      } else {
+        return ""_u;
       }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "section-rules"))
-      {
-        collectRules(i);
+    }
+    break;
+
+  case ti_var:
+    if(last_lword > 1) {
+      out_wblank = combineWblanks(out_wblank, var_out_wblank[ti.getContent()]);
+    }
+    return variables[ti.getContent()];
+
+  case ti_lit_tag:
+  case ti_lit:
+    return ti.getContent();
+
+  case ti_b:
+    if(!blank_queue.empty()) {
+      UString retblank = blank_queue.front();
+      if(in_out) {
+        blank_queue.pop();
       }
+
+      return retblank;
+    } else {
+      return " "_u;
+    }
+    break;
+
+  case ti_get_case_from:
+    if(checkIndex(element, ti.getPos(), lword)) {
+      return StringUtils::copycase(word[ti.getPos()]->source(attr_items[ti.getContent()]),
+                                   evalString((xmlNode *) ti.getPointer()));
+    }
+    break;
+
+  case ti_case_of_sl:
+    if(checkIndex(element, ti.getPos(), lword)) {
+      return StringUtils::getcase(word[ti.getPos()]->source(attr_items[ti.getContent()]));
+    }
+    break;
+
+  case ti_case_of_tl:
+    if(checkIndex(element, ti.getPos(), lword)) {
+      return StringUtils::getcase(word[ti.getPos()]->target(attr_items[ti.getContent()]));
+    }
+    break;
+
+  case ti_case_of_ref:
+    if(checkIndex(element, ti.getPos(), lword)) {
+      return StringUtils::getcase(word[ti.getPos()]->reference(attr_items[ti.getContent()]));
     }
+    break;
+
+  default:
+    return ""_u;
   }
+  return ""_u;
 }
 
 void
-Transfer::collectRules(xmlNode *localroot)
+Transfer::processClip(xmlNode* element)
 {
-  for(xmlNode *rule = localroot->children; rule != NULL; rule = rule->next)
-  {
-    if(rule->type == XML_ELEMENT_NODE)
-    {
-      size_t line = rule->line;
-      for(xmlNode *rulechild = rule->children; ; rulechild = rulechild->next)
-      {
-        if(rulechild->type == XML_ELEMENT_NODE && !xmlStrcmp(rulechild->name, (const xmlChar *) "action"))
-        {
-          rule_map.push_back(rulechild);
-          rule_lines.push_back(line);
-          break;
-        }
-      }
-    }
+  int pos = 0;
+  xmlChar *side = NULL;
+  UString as;
+  UString part;
+  bool queue = true;
+
+  for(xmlAttr *i = element->properties; i != NULL; i = i->next) {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "side")) {
+      side = i->children->content;
+    } else if(!xmlStrcmp(i->name, (const xmlChar *) "part")) {
+      part = to_ustring((const char*) i->children->content);
+    } else if(!xmlStrcmp(i->name, (const xmlChar *) "pos")) {
+      pos = atoi((const char *)i->children->content) - 1;
+    } else if(!xmlStrcmp(i->name, (const xmlChar *) "queue")) {
+      if(!xmlStrcmp(i->children->content, (const xmlChar *) "no")) {
+        queue = false;
+      }
+    } else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to")) {
+      as = to_ustring((const char*)i->children->content);
+    }
+  }
+
+  if(!as.empty()) {
+    if(!xmlStrcmp(side, (const xmlChar *) "sl")) {
+      evalStringCache[element] = TransferInstr(ti_linkto_sl, part, pos, NULL, queue, as);
+    } else if(!xmlStrcmp(side, (const xmlChar *) "ref")) {
+      evalStringCache[element] = TransferInstr(ti_linkto_ref, part, pos, NULL, queue, as);
+    } else {
+      evalStringCache[element] = TransferInstr(ti_linkto_tl, part, pos, NULL, queue, as);
+    }
+  } else if(!xmlStrcmp(side, (const xmlChar *) "sl")) {
+    evalStringCache[element] = TransferInstr(ti_clip_sl, part, pos, NULL, queue);
+  } else if(!xmlStrcmp(side, (const xmlChar *) "ref")) {
+    evalStringCache[element] = TransferInstr(ti_clip_ref, part, pos, NULL, queue);
+  } else {
+    evalStringCache[element] = TransferInstr(ti_clip_tl, part, pos, NULL, queue);
   }
 }
 
 void
-Transfer::collectMacros(xmlNode *localroot)
+Transfer::processBlank(xmlNode* element)
 {
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      macro_map.push_back(i);
+  evalStringCache[element] = TransferInstr(ti_b, " "_u, -1);
+}
+
+void
+Transfer::processCaseOf(xmlNode* element)
+{
+  int pos = 0;
+  xmlChar *side = NULL;
+  UString part;
+
+  for(xmlAttr *i = element->properties; i != NULL; i = i->next) {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "side")) {
+      side = i->children->content;
+    } else if(!xmlStrcmp(i->name, (const xmlChar *) "part")) {
+      part = to_ustring((const char*) i->children->content);
+    } else if(!xmlStrcmp(i->name, (const xmlChar *) "pos")) {
+      pos = atoi((const char *) i->children->content) - 1;
     }
   }
+
+  if(!xmlStrcmp(side, (const xmlChar *) "sl")) {
+    evalStringCache[element] = TransferInstr(ti_case_of_sl, part, pos);
+  } else if(!xmlStrcmp(side, (const xmlChar *) "ref")) {
+    evalStringCache[element] = TransferInstr(ti_case_of_ref, part, pos);
+  } else {
+    evalStringCache[element] = TransferInstr(ti_case_of_tl, part, pos);
+  }
 }
 
-bool
-Transfer::checkIndex(xmlNode *element, int index, int limit)
+UString
+Transfer::processLu(xmlNode* element)
 {
-  if(index >= limit)
-  {
-    wcerr << L"Error in " << UtfConverter::fromUtf8((char *) doc->URL) << L": line " << element->line << L": index >= limit" << endl;
-    return false;
+  in_lu = true;
+  out_wblank.clear();
+
+  UString myword;
+  for (auto i : children(element)) {
+    myword.append(evalString(i));
   }
-  if(index < 0) {
-    wcerr << L"Error in " << UtfConverter::fromUtf8((char *) doc->URL) << L": line " << element->line << L": index < 0" << endl;
-    return false;
+
+  in_lu = false;
+
+  if(last_lword == 1) {
+    out_wblank = word[0]->getWblank();
   }
-  if(word[index] == 0)
-  {
-    wcerr << L"Error in " << UtfConverter::fromUtf8((char *) doc->URL) << L": line " << element->line << L": Null access at word[index]" << endl;
-    return false;
+
+  if(!myword.empty()) {
+    if(myword[0] != '[' || myword[1] != '[') {
+      UString ret = out_wblank;
+      ret += '^';
+      ret += myword;
+      ret += '$';
+      return ret;
+    } else {
+      myword += '$';
+      return myword;
+    }
+  } else {
+    return ""_u;
   }
-  return true;
 }
 
-bool
-Transfer::gettingLemmaFromWord(string attr)
+UString
+Transfer::processMlu(xmlNode* element)
 {
-    return (attr.compare("lem") == 0 || attr.compare("lemh") == 0 || attr.compare("whole") == 0);
+  UString value;
+
+  bool first_time = true;
+  out_wblank.clear();
+
+  in_lu = true;
+  for (auto i : children(element)) {
+    UString myword;
+    for (auto j : children(i)) {
+      myword.append(evalString(j));
+    }
+
+	if (!first_time) {
+	  if(!myword.empty() && myword[0] != '#') { //'+#' problem
+        value += '+';
+      }
+	} else {
+      if (!myword.empty()) {
+	    first_time = false;
+      }
+	}
+
+	value.append(myword);
+  }
+
+  if(last_lword == 1) {
+    out_wblank = word[0]->getWblank();
+  }
+
+  if(!value.empty()) {
+    UString ret = out_wblank;
+    ret += '^';
+    ret += value;
+    ret += '$';
+    return ret;
+  } else {
+    return ""_u;
+  }
 }
 
-string
-Transfer::combineWblanks(string wblank_current, string wblank_to_add)
+void
+Transfer::processLuCount(xmlNode* element)
 {
-  if(wblank_current.empty() && wblank_to_add.empty())
-  {
-    return wblank_current;
-  }
-  else if(wblank_current.empty())
-  {
-    return wblank_to_add;
+  cerr << "Error: unexpected expression: '" << element->name << "'" << endl;
+  exit(EXIT_FAILURE);
+}
+
+void
+Transfer::processOut(xmlNode *localroot)
+{
+  in_out = true;
+
+  for (auto i : children(localroot)) {
+    if(defaultAttrs == lu) {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "lu")) {
+        write(processLu(i), output);
+      } else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) {
+        write(processMlu(i), output);
+      }
+    } else {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "chunk")) {
+        write(processChunk(i), output);
+      } else { // 'b'
+        write(evalString(i), output);
+      }
+    }
   }
-  else if(wblank_to_add.empty())
+  in_out = false;
+}
+
+UString
+Transfer::processChunk(xmlNode *localroot)
+{
+  UString name, namefrom;
+  UString caseofchunk = "aa"_u;
+  UString result;
+
+  for(xmlAttr *i = localroot->properties; i != NULL; i = i->next)
   {
-    return wblank_current;
+    if(!xmlStrcmp(i->name, (const xmlChar *) "name"))
+    {
+      name = to_ustring((const char *) i->children->content);
+    }
+    else if(!xmlStrcmp(i->name, (const xmlChar *) "namefrom"))
+    {
+      namefrom = to_ustring((const char *) i->children->content);
+    }
+    else if(!xmlStrcmp(i->name, (const xmlChar *) "case"))
+    {
+      caseofchunk = to_ustring((const char *) i->children->content);
+    }
   }
-  
-  string new_out_wblank;
-  for(string::const_iterator it = wblank_current.begin(); it != wblank_current.end(); it++)
+
+  result += '^';
+  if(!caseofchunk.empty())
   {
-    if(*it == '\\')
+    if(!name.empty())
     {
-      new_out_wblank += *it;
-      it++;
-      new_out_wblank += *it;
+      result.append(StringUtils::copycase(variables[caseofchunk], name));
     }
-    else if(*it == ']')
+    else if(!namefrom.empty())
     {
-      if(*(it+1) == ']')
-      {
-        new_out_wblank += ';';
-        break;
-      }
+      result.append(StringUtils::copycase(variables[caseofchunk], variables[namefrom]));
     }
     else
     {
-      new_out_wblank += *it;
+      cerr << "Error: you must specify either 'name' or 'namefrom' for the 'chunk' element" << endl;
+      exit(EXIT_FAILURE);
     }
   }
-  
-  for(string::const_iterator it = wblank_to_add.begin(); it != wblank_to_add.end(); it++)
+  else
   {
-    if(*it == '\\')
+    if(!name.empty())
     {
-      new_out_wblank += *it;
-      it++;
-      new_out_wblank += *it;
+      result.append(name);
     }
-    else if(*it == '[')
+    else if(!namefrom.empty())
     {
-      if(*(it+1) == '[')
-      {
-        new_out_wblank += ' ';
-        it++;
-      }
+      result.append(variables[namefrom]);
     }
     else
     {
-      new_out_wblank += *it;
+      cerr << "Error: you must specify either 'name' or 'namefrom' for the 'chunk' element" << endl;
+      exit(EXIT_FAILURE);
+    }
+  }
+
+  for (auto i : children(localroot)) {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "tags")) {
+      result.append(processTags(i));
+      result += '{';
+    } else if(!xmlStrcmp(i->name, (const xmlChar *) "lu")) {
+      result.append(processLu(i));
+    } else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) {
+      result.append(processMlu(i));
+    } else { // 'b'
+      result.append(evalString(i));
+    }
+  }
+  result += '}';
+  result += '$';
+  return result;
+}
+
+UString
+Transfer::processTags(xmlNode *localroot)
+{
+  UString result;
+  for (auto i : children(localroot)) {
+    if (!xmlStrcmp(i->name, (const xmlChar*) "tag")) {
+      for (auto j : children(i)) {
+        result.append(evalString(j));
+      }
     }
   }
-  
-  return new_out_wblank;
+  return result;
 }
 
-string
-Transfer::evalString(xmlNode *element)
+void
+Transfer::processLet(xmlNode *localroot)
 {
-  map<xmlNode *, TransferInstr>::iterator it;
-  it = evalStringCache.find(element);
+  xmlNode *leftSide = NULL, *rightSide = NULL;
+
+  for (auto i : children(localroot)) {
+    if(leftSide == NULL) {
+      leftSide = i;
+    } else {
+      rightSide = i;
+      break;
+    }
+  }
+
+  map<xmlNode *, TransferInstr>::iterator it = evalStringCache.find(leftSide);
   if(it != evalStringCache.end())
   {
     TransferInstr &ti = it->second;
     switch(ti.getType())
     {
+      case ti_var:
+        in_let_var = true;
+        var_val = ti.getContent();
+
+        var_out_wblank[var_val].clear();
+
+        variables[ti.getContent()] = evalString(rightSide);
+
+        in_let_var = false;
+
+        return;
+
       case ti_clip_sl:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          if(gettingLemmaFromWord(ti.getContent()) && last_lword > 1)
+        if (checkIndex(leftSide, ti.getPos(), lword)) {
+          bool match = word[ti.getPos()]->setSource(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition());
+          if (!match && trace)
           {
-            if(in_lu)
-            {
-              out_wblank = combineWblanks(out_wblank, word[ti.getPos()]->getWblank());
-            }
-            else if(in_let_var)
-            {
-              var_out_wblank[var_val] = combineWblanks(var_out_wblank[var_val], word[ti.getPos()]->getWblank());
-            }
+            cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
           }
-          
-          return word[ti.getPos()]->source(attr_items[ti.getContent()], ti.getCondition());
         }
-        break;
+        return;
 
       case ti_clip_tl:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          if(gettingLemmaFromWord(ti.getContent()) && last_lword > 1)
+        if (checkIndex(leftSide, ti.getPos(), lword)) {
+          bool match = word[ti.getPos()]->setTarget(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition());
+          if (!match && trace)
           {
-            if(in_lu)
-            {
-              out_wblank = combineWblanks(out_wblank, word[ti.getPos()]->getWblank());
-            }
-            else if(in_let_var)
-            {
-              var_out_wblank[var_val] = combineWblanks(var_out_wblank[var_val], word[ti.getPos()]->getWblank());
-            }
-          }
-            
-          return word[ti.getPos()]->target(attr_items[ti.getContent()], ti.getCondition());
-        }
-        break;
-
-      case ti_clip_ref:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          return word[ti.getPos()]->reference(attr_items[ti.getContent()], ti.getCondition());
-        }
-        break;
-
-      case ti_linkto_sl:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          if(word[ti.getPos()]->source(attr_items[ti.getContent()], ti.getCondition()) != "")
-          {
-            return "<" + string((char *) ti.getPointer()) + ">";
-          }
-          else
-          {
-            return "";
-          }
-        }
-        break;
-
-      case ti_linkto_tl:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          if(word[ti.getPos()]->target(attr_items[ti.getContent()], ti.getCondition()) != "")
-          {
-            return "<" + string((char *) ti.getPointer()) + ">";
-          }
-          else
-          {
-            return "";
-          }
-        }
-        break;
-
-      case ti_linkto_ref:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          if(word[ti.getPos()]->reference(attr_items[ti.getContent()], ti.getCondition()) != "")
-          {
-            return "<" + string((char *) ti.getPointer()) + ">";
-          }
-          else
-          {
-            return "";
-          }
-        }
-        break;
-
-      case ti_var:
-        if(last_lword > 1)
-        {
-          out_wblank = combineWblanks(out_wblank, var_out_wblank[ti.getContent()]);
-        }
-        return variables[ti.getContent()];
-
-      case ti_lit_tag:
-      case ti_lit:
-        return ti.getContent();
-
-      case ti_b:
-        if(!blank_queue.empty())
-        {
-          string retblank = blank_queue.front();
-          if(in_out)
-          {
-            blank_queue.pop();
-          }
-          
-          return retblank;
-        }
-        else
-        {
-          return " ";
-        }
-        break;
-
-      case ti_get_case_from:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          return copycase(word[ti.getPos()]->source(attr_items[ti.getContent()]),
-                  evalString((xmlNode *) ti.getPointer()));
-        }
-        break;
-
-      case ti_case_of_sl:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          return caseOf(word[ti.getPos()]->source(attr_items[ti.getContent()]));
-        }
-        break;
-
-      case ti_case_of_tl:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          return caseOf(word[ti.getPos()]->target(attr_items[ti.getContent()]));
-        }
-        break;
-
-      case ti_case_of_ref:
-        if(checkIndex(element, ti.getPos(), lword))
-        {
-          return caseOf(word[ti.getPos()]->reference(attr_items[ti.getContent()]));
-        }
-        break;
-
-      default:
-        return "";
-    }
-    return "";
-  }
-
-  if(!xmlStrcmp(element->name, (const xmlChar *) "clip"))
-  {
-    int pos = 0;
-    xmlChar *part = NULL, *side = NULL, *as = NULL;
-    bool queue = true;
-
-    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
-      {
-	side = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
-      {
-	part = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
-      {
-	pos = atoi((const char *)i->children->content) - 1;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "queue"))
-      {
-        if(!xmlStrcmp(i->children->content, (const xmlChar *) "no"))
-        {
-          queue = false;
-        }
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to"))
-      {
-        as = i->children->content;
-      }
-    }
-
-    if(as != NULL)
-    {
-      if(!xmlStrcmp(side, (const xmlChar *) "sl"))
-      {
-        evalStringCache[element] = TransferInstr(ti_linkto_sl, (const char *) part, pos, (void *) as, queue);
-      }
-      else if(!xmlStrcmp(side, (const xmlChar *) "ref"))
-      {
-        evalStringCache[element] = TransferInstr(ti_linkto_ref, (const char *) part, pos, (void *) as, queue);
-      }
-      else
-      {
-        evalStringCache[element] = TransferInstr(ti_linkto_tl, (const char *) part, pos, (void *) as, queue);
-      }
-    }
-    else if(!xmlStrcmp(side, (const xmlChar *) "sl"))
-    {
-      evalStringCache[element] = TransferInstr(ti_clip_sl, (const char *) part, pos, NULL, queue);
-    }
-    else if(!xmlStrcmp(side, (const xmlChar *) "ref"))
-    {
-      evalStringCache[element] = TransferInstr(ti_clip_ref, (const char *) part, pos, NULL, queue);
-    }
-    else
-    {
-      evalStringCache[element] = TransferInstr(ti_clip_tl, (const char *) part, pos, NULL, queue);
-    }
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit-tag"))
-  {
-    evalStringCache[element] = TransferInstr(ti_lit_tag,
-                                             tags((const char *) element->properties->children->content), 0);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit"))
-  {
-    evalStringCache[element] = TransferInstr(ti_lit, string((char *) element->properties->children->content), 0);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "b"))
-  {
-    evalStringCache[element] = TransferInstr(ti_b, " ", -1);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "get-case-from"))
-  {
-    int pos = atoi((const char *) element->properties->children->content) - 1;
-    xmlNode *param = NULL;
-    for(xmlNode *i = element->children; i != NULL; i = i->next)
-    {
-      if(i->type == XML_ELEMENT_NODE)
-      {
-	param = i;
-	break;
-      }
-    }
-
-    evalStringCache[element] = TransferInstr(ti_get_case_from, "lem", pos, param);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "var"))
-  {
-    evalStringCache[element] = TransferInstr(ti_var, (const char *) element->properties->children->content, 0);
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "case-of"))
-  {
-    int pos = 0;
-    xmlChar *part = NULL, *side = NULL;
-
-    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
-      {
-	side = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
-      {
-	part = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
-      {
-	pos = atoi((const char *) i->children->content) - 1;
-      }
-    }
-
-    if(!xmlStrcmp(side, (const xmlChar *) "sl"))
-    {
-      evalStringCache[element] = TransferInstr(ti_case_of_sl, (const char *) part, pos);
-    }
-    else if(!xmlStrcmp(side, (const xmlChar *) "ref"))
-    {
-      evalStringCache[element] = TransferInstr(ti_case_of_ref, (const char *) part, pos);
-    }
-    else
-    {
-      evalStringCache[element] = TransferInstr(ti_case_of_tl, (const char *) part, pos);
-    }
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "concat"))
-  {
-    string value;
-    for(xmlNode *i = element->children; i != NULL; i = i->next)
-    {
-      if(i->type == XML_ELEMENT_NODE)
-      {
-        value.append(evalString(i));
-      }
-    }
-    return value;
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "lu"))
-  {
-    in_lu = true;
-    out_wblank.clear();
-      
-    string myword;
-    for(xmlNode *i = element->children; i != NULL; i = i->next)
-    {
-       if(i->type == XML_ELEMENT_NODE)
-       {
-         myword.append(evalString(i));
-       }
-    }
-    
-    in_lu = false;
-    
-    if(last_lword == 1)
-    {
-      out_wblank = word[0]->getWblank();
-    }
-      
-    if(myword != "")
-    {
-      if(myword[0] != L'[' || myword[1] != L'[')
-      {
-        return out_wblank+"^"+myword+"$";
-      }
-      else
-      {
-        return myword+"$";
-      }
-    }
-    else
-    {
-      return "";
-    }
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "mlu"))
-  {
-    string value;
-
-    bool first_time = true;
-    out_wblank.clear();
-
-    for(xmlNode *i = element->children; i != NULL; i = i->next)
-    {
-      if(i->type == XML_ELEMENT_NODE)
-      {
-        in_lu = true;
-        
-        string myword;
-
-        for(xmlNode *j = i->children; j != NULL; j = j->next)
-        {
-          if(j->type == XML_ELEMENT_NODE)
-          {
-            myword.append(evalString(j));
-          }
-        }
-        
-        in_lu = false;
-
-	if(!first_time)
-	{
-	  if(myword != "" && myword[0] != '#')  //'+#' problem
-	  {
-	    value.append("+");
-          }
-	}
-	else
-	{
-	  if(myword != "")
-	  {
-	    first_time = false;
-          }
-	}
-
-	value.append(myword);
-      }
-    }
-
-    if(last_lword == 1)
-    {
-      out_wblank = word[0]->getWblank();
-    }
-    
-    if(value != "")
-    {
-      return out_wblank+"^"+value+"$";
-    }
-    else
-    {
-      return "";
-    }
-  }
-  else if(!xmlStrcmp(element->name, (const xmlChar *) "chunk"))
-  {
-    return processChunk(element);
-  }
-  else
-  {
-    wcerr << "Error: unexpected rvalue expression '" << element->name << "'" << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  return evalString(element);
-}
-
-void
-Transfer::processOut(xmlNode *localroot)
-{
-  in_out = true;
-  
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(defaultAttrs == lu)
-      {
-        if(!xmlStrcmp(i->name, (const xmlChar *) "lu"))
-        {
-          in_lu = true;
-          out_wblank.clear();
-            
-          string myword;
-          for(xmlNode *j = i->children; j != NULL; j = j->next)
-          {
-            if(j->type == XML_ELEMENT_NODE)
-            {
-              myword.append(evalString(j));
-            }
-          }
-            
-          in_lu = false;
-          
-          if(last_lword == 1)
-          {
-            out_wblank = word[0]->getWblank();
-          }
-
-          if(myword != "")
-          {
-            if(myword[0] != L'[' || myword[1] != L'[')
-            {
-              fputws_unlocked(UtfConverter::fromUtf8(out_wblank).c_str(), output);
-              fputwc_unlocked(L'^', output);
-            }
-            
-            fputws_unlocked(UtfConverter::fromUtf8(myword).c_str(), output);
-            fputwc_unlocked(L'$', output);
-          }
-        }
-        else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu"))
-        {
-          string myword;
-          bool first_time = true;
-          out_wblank.clear();
-          
-          for(xmlNode *j = i->children; j != NULL; j = j->next)
-          {
-            if(j->type == XML_ELEMENT_NODE)
-            {
-              in_lu = true;
-              
-              string mylocalword;
-              for(xmlNode *k = j->children; k != NULL; k = k->next)
-              {
-                if(k->type == XML_ELEMENT_NODE)
-                {
-                  mylocalword.append(evalString(k));
-                }
-              }
-              
-              in_lu = false;
-
-              if(!first_time)
-              {
-                if(mylocalword != "" && mylocalword[0] != '#')  //'+#' problem
-                {
-                  myword += '+';
-                }
-              }
-              else
-              {
-                if(mylocalword != "")
-                {
-                  first_time = false;
-                }
-              }
-              
-              myword.append(mylocalword);
-            }
-          }
-          
-          if(last_lword == 1)
-          {
-            out_wblank = word[0]->getWblank();
-          }
-          
-          if(myword != "")
-          {
-            fputws_unlocked(UtfConverter::fromUtf8(out_wblank).c_str(), output);
-            fputwc_unlocked('^', output);
-            fputws_unlocked(UtfConverter::fromUtf8(myword).c_str(), output);
-            fputwc_unlocked(L'$', output);
-          }
-        }
-        else // 'b'
-        {
-          fputws_unlocked(UtfConverter::fromUtf8(evalString(i)).c_str(),
-			  output);
-        }
-      }
-      else
-      {
-        if(!xmlStrcmp(i->name, (const xmlChar *) "chunk"))
-        {
-          fputws_unlocked(UtfConverter::fromUtf8(processChunk(i)).c_str(), output);
-        }
-        else // 'b'
-        {
-          fputws_unlocked(UtfConverter::fromUtf8(evalString(i)).c_str(), output);
-        }
-      }
-    }
-  }
-  
-  in_out = false;
-}
-
-string
-Transfer::processChunk(xmlNode *localroot)
-{
-  string name, namefrom;
-  string caseofchunk = "aa";
-  string result;
-
-  for(xmlAttr *i = localroot->properties; i != NULL; i = i->next)
-  {
-    if(!xmlStrcmp(i->name, (const xmlChar *) "name"))
-    {
-      name = (const char *) i->children->content;
-    }
-    else if(!xmlStrcmp(i->name, (const xmlChar *) "namefrom"))
-    {
-      namefrom = (const char *) i->children->content;
-    }
-    else if(!xmlStrcmp(i->name, (const xmlChar *) "case"))
-    {
-      caseofchunk = (const char *) i->children->content;
-    }
-  }
-
-  result.append("^");
-  if(caseofchunk != "")
-  {
-    if(name != "")
-    {
-      result.append(copycase(variables[caseofchunk], name));
-    }
-    else if(namefrom != "")
-    {
-      result.append(copycase(variables[caseofchunk], variables[namefrom]));
-    }
-    else
-    {
-      wcerr << "Error: you must specify either 'name' or 'namefrom' for the 'chunk' element" << endl;
-      exit(EXIT_FAILURE);
-    }
-  }
-  else
-  {
-    if(name != "")
-    {
-      result.append(name);
-    }
-    else if(namefrom != "")
-    {
-      result.append(variables[namefrom]);
-    }
-    else
-    {
-      wcerr << "Error: you must specify either 'name' or 'namefrom' for the 'chunk' element" << endl;
-      exit(EXIT_FAILURE);
-    }
-  }
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "tags"))
-      {
-        result.append(processTags(i));
-        result.append("{");
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "lu"))
-      {
-        in_lu = true;
-        out_wblank.clear();
-          
-        string myword;
-        for(xmlNode *j = i->children; j != NULL; j = j->next)
-        {
-          if(j->type == XML_ELEMENT_NODE)
-          {
-            myword.append(evalString(j));
-          }
-        }
-        
-        in_lu = false;
-        
-        if(last_lword == 1)
-        {
-          out_wblank = word[0]->getWblank();
-        }
-          
-        if(myword != "")
-        {
-          result.append(out_wblank);
-          result.append("^");
-          result.append(myword);
-          result.append("$");
-        }
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu"))
-      {
-        bool first_time = true;
-        string myword;
-        
-        out_wblank.clear();
-        
-        for(xmlNode *j = i->children; j != NULL; j = j->next)
-        {
-          string mylocalword;
-          if(j->type == XML_ELEMENT_NODE)
-          {
-            in_lu = true;
-            
-            for(xmlNode *k = j->children; k != NULL; k = k->next)
-            {
-              if(k->type == XML_ELEMENT_NODE)
-              {
-                mylocalword.append(evalString(k));
-              }
-            }
-            
-            in_lu = false;
-
-            if(!first_time)
-            {
-              if(mylocalword != "" && mylocalword[0] != '#')  // '+#' problem
-              {
-                myword += '+';
-              }
-            }
-            else
-            {
-              first_time = false;
-            }
-          }
-          myword.append(mylocalword);
-        }
-        
-        if(last_lword == 1)
-        {
-          out_wblank = word[0]->getWblank();
-        }
-        
-        if(myword != "")
-        {
-          result.append(out_wblank);
-          result.append("^");
-          result.append(myword);
-          result.append("$");
-        }
-      }
-      else // 'b'
-      {
-        result.append(evalString(i));
-      }
-    }
-  }
-  result.append("}$");
-  return result;
-}
-
-string
-Transfer::processTags(xmlNode *localroot)
-{
-  string result;
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(!xmlStrcmp(i->name, (xmlChar const *) "tag"))
-      {
-        for(xmlNode *j = i->children; j != NULL; j = j->next)
-        {
-          if(j->type == XML_ELEMENT_NODE)
-          {
-            result.append(evalString(j));
-          }
-        }
-      }
-    }
-  }
-  return result;
-}
-
-int
-Transfer::processInstruction(xmlNode *localroot)
-{
-  int words_to_consume = -1;
-  if(!xmlStrcmp(localroot->name, (const xmlChar *) "choose"))
-  {
-    words_to_consume = processChoose(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "let"))
-  {
-    processLet(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "append"))
-  {
-    processAppend(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "out"))
-  {
-    processOut(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "call-macro"))
-  {
-    processCallMacro(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "modify-case"))
-  {
-    processModifyCase(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "reject-current-rule"))
-  {
-    words_to_consume = processRejectCurrentRule(localroot);
-  }
-  return words_to_consume;
-}
-
-int
-Transfer::processRejectCurrentRule(xmlNode *localroot)
-{
-  bool shifting = true;
-  string value;
-  for(xmlAttr *i = localroot->properties; i != NULL; i = i->next)
-  {
-    if(!xmlStrcmp(i->name, (const xmlChar *) "shifting"))
-    {
-      value = (char *) i->children->content;
-      break;
-    }
-  }
-
-  if(value == "no")
-  {
-    shifting = false;
-  }
-
-  return shifting ? 1 : 0;
-}
-
-void
-Transfer::processLet(xmlNode *localroot)
-{
-  xmlNode *leftSide = NULL, *rightSide = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(leftSide == NULL)
-      {
-	leftSide = i;
-      }
-      else
-      {
-	rightSide = i;
-	break;
-      }
-    }
-  }
-
-  map<xmlNode *, TransferInstr>::iterator it = evalStringCache.find(leftSide);
-  if(it != evalStringCache.end())
-  {
-    TransferInstr &ti = it->second;
-    switch(ti.getType())
-    {
-      case ti_var:
-        in_let_var = true;
-        var_val = ti.getContent();
-
-        var_out_wblank[var_val].clear();
-        
-        variables[ti.getContent()] = evalString(rightSide);
-          
-        in_let_var = false;
-        
-        return;
-
-      case ti_clip_sl:
-        if (checkIndex(leftSide, ti.getPos(), lword)) {
-          bool match = word[ti.getPos()]->setSource(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition());
-          if (!match && trace)
-          {
-            wcerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
-          }
-        }
-        return;
-
-      case ti_clip_tl:
-        if (checkIndex(leftSide, ti.getPos(), lword)) {
-          bool match = word[ti.getPos()]->setTarget(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition());
-          if (!match && trace)
-          {
-            wcerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
-          }
-        }
-        return;
-
-      case ti_clip_ref:
-        if (checkIndex(leftSide, ti.getPos(), lword)) {
-          bool match = word[ti.getPos()]->setReference(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition());
-          if (!match && trace)
-          {
-            wcerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
-          }
-        }
-        return;
-
-      default:
-        return;
-    }
-  }
-  if(leftSide->name != NULL && !xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
-  {
-    in_let_var = true;
-    
-    string const val = (const char *) leftSide->properties->children->content;
-    
-    var_val = val;
-    var_out_wblank[var_val].clear();
-    
-    variables[val] = evalString(rightSide);
-      
-    in_let_var = false;
-    evalStringCache[leftSide] = TransferInstr(ti_var, val, 0);
-  }
-  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
-  {
-    int pos = 0;
-    xmlChar *part = NULL, *side = NULL, *as = NULL;
-    bool queue = true;
-
-    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
-      {
-	side = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
-      {
-	part = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
-      {
-	pos = atoi((const char *) i->children->content) - 1;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "queue"))
-      {
-        if(!xmlStrcmp(i->children->content, (const xmlChar *) "no"))
-        {
-          queue = false;
-        }
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to"))
-      {
-        as = i->children->content; // TODO: set but never read
-      }
-    }
-
-    if (pos >= lword) {
-      wcerr << L"Error: Transfer::processLet() bad access on pos >= lword" << endl;
-      return;
-    }
-    if (word[pos] == 0) {
-      wcerr << L"Error: Transfer::processLet() null access on word[pos]" << endl;
-      return;
-    }
-
-    if(!xmlStrcmp(side, (const xmlChar *) "tl"))
-    {
-      bool match = word[pos]->setTarget(attr_items[(const char *) part], evalString(rightSide), queue);
-      if(!match && trace)
-      {
-        wcerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
-      }
-      evalStringCache[leftSide] = TransferInstr(ti_clip_tl, (const char *) part, pos, NULL, queue);
-    }
-    else if(!xmlStrcmp(side, (const xmlChar *) "ref"))
-    {
-      bool match = word[pos]->setReference(attr_items[(const char *) part], evalString(rightSide), queue);
-      if(!match && trace)
-      {
-        wcerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
-      }
-      evalStringCache[leftSide] = TransferInstr(ti_clip_ref, (const char *) part, pos, NULL, queue);
-    }
-    else
-    {
-      bool match = word[pos]->setSource(attr_items[(const char *) part], evalString(rightSide), queue);
-      if(!match && trace)
-      {
-        wcerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
-      }
-      evalStringCache[leftSide] = TransferInstr(ti_clip_sl, (const char *) part, pos, NULL, queue);
-    }
-  }
-}
-
-void
-Transfer::processAppend(xmlNode *localroot)
-{
-  string name;
-  for(xmlAttr *i = localroot->properties; i != NULL; i = i->next)
-  {
-    if(!xmlStrcmp(i->name, (const xmlChar *) "n"))
-    {
-      name = (char *) i->children->content;
-      break;
-    }
-  }
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      in_let_var = true;
-      var_val = name;
-      variables[name].append(evalString(i));
-      in_let_var = false;
-    }
-  }
-}
-
-void
-Transfer::processModifyCase(xmlNode *localroot)
-{
-  xmlNode *leftSide = NULL, *rightSide = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(leftSide == NULL)
-      {
-	leftSide = i;
-      }
-      else
-      {
-	rightSide = i;
-	break;
-      }
-    }
-  }
-
-  if(leftSide->name != NULL && !xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
-  {
-    int pos = 0;
-    xmlChar *part = NULL, *side = NULL, *as = NULL;
-    bool queue = true;
-
-    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
-      {
-	side = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
-      {
-	part = i->children->content;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
-      {
-	pos = atoi((const char *) i->children->content) - 1;
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "queue"))
-      {
-        if(!xmlStrcmp(i->children->content, (xmlChar const *) "no"))
-        {
-          queue = false;
-        }
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to"))
-      {
-        as = i->children->content;
-        (void)as; // ToDo, remove "as" and the whole else?
-      }
-    }
-    if(!xmlStrcmp(side, (const xmlChar *) "sl"))
-    {
-      string const result = copycase(evalString(rightSide),
-				      word[pos]->source(attr_items[(const char *) part], queue));
-      bool match = word[pos]->setSource(attr_items[(const char *) part], result);
-      if(!match && trace)
-      {
-        wcerr << "apertium-transfer warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
-      }
-    }
-    else if(!xmlStrcmp(side, (const xmlChar *) "ref"))
-    {
-      string const result = copycase(evalString(rightSide),
-              word[pos]->reference(attr_items[(const char *) part], queue));
-      bool match = word[pos]->setReference(attr_items[(const char *) part], result);
-      if(!match && trace)
-      {
-        wcerr << "apertium-transfer warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
-      }
-    }
-    else
-    {
-      string const result = copycase(evalString(rightSide),
-				     word[pos]->target(attr_items[(const char *) part], queue));
-      bool match = word[pos]->setTarget(attr_items[(const char *) part], result);
-      if(!match && trace)
-      {
-        wcerr << "apertium-transfer warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
-      }
-    }
-  }
-  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
-  {
-    string const val = (const char *) leftSide->properties->children->content;
-    variables[val] = copycase(evalString(rightSide), variables[val]);
-  }
-}
-
-void
-Transfer::processCallMacro(xmlNode *localroot)
-{
-  string const n = (const char *) localroot->properties->children->content;
-  int npar = 0;
-
-  xmlNode *macro = macro_map[macros[n]];
-
-  for(xmlAttr *i = macro->properties; i != NULL; i = i->next)
-  {
-    if(!xmlStrcmp(i->name, (const xmlChar *) "npar"))
-    {
-      npar = atoi((const char *) i->children->content);
-      break;
-    }
-  }
-
-  // ToDo: Is it at all valid if npar <= 0 ?
-
-  TransferWord **myword = NULL;
-  if(npar > 0)
-  {
-    myword = new TransferWord *[npar];
-    std::fill(myword, myword+npar, (TransferWord *)(0));
-  }
-  
-  int idx = 0;
-  for(xmlNode *i = localroot->children; npar && i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if (idx >= npar) {
-      	  wcerr << L"Error: processCallMacro() number of arguments >= npar at line " << i->line << endl;
-      	  return;
-      }
-      int pos = atoi((const char *) i->properties->children->content)-1;
-      myword[idx] = word[pos];
-
-      idx++;
-    }
-  }
-
-  swap(myword, word);
-  swap(npar, lword);
-
-  for(xmlNode *i = macro->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      processInstruction(i);
-    }
-  }
-
-  swap(myword, word);
-  swap(npar, lword);
-
-  delete[] myword;
-}
-
-int
-Transfer::processChoose(xmlNode *localroot)
-{
-  int words_to_consume = -1;
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar *) "when"))
-      {
-        bool picked_option = false;
-
-	for(xmlNode *j = i->children; j != NULL; j = j->next)
-	{
-	  if(j->type == XML_ELEMENT_NODE)
-	  {
-	    if(!xmlStrcmp(j->name, (const xmlChar *) "test"))
-	    {
-	      if(!processTest(j))
-	      {
-		break;
-	      }
-	      else
-	      {
-	        picked_option = true;
-              }
-	    }
-	    else
-	    {
-              words_to_consume = processInstruction(j);
-              if(words_to_consume != -1)
-              {
-                return words_to_consume;
-              }
-	    }
-	  }
-	}
-        if(picked_option)
-        {
-          return words_to_consume;
-        }
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar *) "otherwise"))
-      {
-	for(xmlNode *j = i->children; j != NULL; j = j->next)
-	{
-	  if(j->type == XML_ELEMENT_NODE)
-	  {
-            words_to_consume = processInstruction(j);
-            if(words_to_consume != -1)
-            {
-              return words_to_consume;
-            }
-          }
-        }
-      }
-    }
-  }
-  return words_to_consume;
-}
-
-bool
-Transfer::processLogical(xmlNode *localroot)
-{
-  if(!xmlStrcmp(localroot->name, (const xmlChar *) "equal"))
-  {
-    return processEqual(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with"))
-  {
-    return processBeginsWith(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with-list"))
-  {
-    return processBeginsWithList(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with"))
-  {
-    return processEndsWith(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with-list"))
-  {
-    return processEndsWithList(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "contains-substring"))
-  {
-    return processContainsSubstring(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "or"))
-  {
-    return processOr(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "and"))
-  {
-    return processAnd(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "not"))
-  {
-    return processNot(localroot);
-  }
-  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "in"))
-  {
-    return processIn(localroot);
-  }
-
-  return false;
-}
-
-bool
-Transfer::processIn(xmlNode *localroot)
-{
-  xmlNode *value = NULL;
-  xmlChar *idlist = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(value == NULL)
-      {
-        value = i;
-      }
-      else
-      {
-        idlist = i->properties->children->content;
-        break;
-      }
-    }
-  }
-
-  string sval = evalString(value);
-
-  if(localroot->properties != NULL)
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      set<string, Ltstr> &myset = listslow[(const char *) idlist];
-      if(myset.find(tolower(sval)) != myset.end())
-      {
-	return true;
-      }
-      else
-      {
-	return false;
-      }
-    }
-  }
-
-  set<string, Ltstr> &myset = lists[(const char *) idlist];
-  if(myset.find(sval) != myset.end())
-  {
-    return true;
-  }
-  else
-  {
-    return false;
-  }
-}
-
-bool
-Transfer::processTest(xmlNode *localroot)
-{
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      return processLogical(i);
-    }
-  }
-  return false;
-}
-
-bool
-Transfer::processAnd(xmlNode *localroot)
-{
-  bool val = true;
-  for(xmlNode *i = localroot->children; val && i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      val = val && processLogical(i);
-    }
-  }
-
-  return val;
-}
-
-bool
-Transfer::processOr(xmlNode *localroot)
-{
-  bool val = false;
-  for(xmlNode *i = localroot->children; !val && i != NULL ; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      val = val || processLogical(i);
-    }
-  }
-
-  return val;
-}
-
-bool
-Transfer::processNot(xmlNode *localroot)
-{
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      return !processLogical(i);
-    }
-  }
-  return false;
-}
-
-bool
-Transfer::processEqual(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
-  }
-
-  if(localroot->properties == NULL)
-  {
-    return evalString(first) == evalString(second);
-  }
-  else
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      return tolower(evalString(first)) == tolower(evalString(second));
-    }
-    else
-    {
-      return evalString(first) == evalString(second);
-    }
-  }
-}
-
-bool
-Transfer::beginsWith(string const &s1, string const &s2) const
-{
-  int const limit = s2.size(), constraint = s1.size();
-
-  if(constraint < limit)
-  {
-    return false;
-  }
-  for(int i = 0; i != limit; i++)
-  {
-    if(s1[i] != s2[i])
-    {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-bool
-Transfer::endsWith(string const &s1, string const &s2) const
-{
-  int const limit = s2.size(), constraint = s1.size();
-
-  if(constraint < limit)
-  {
-    return false;
-  }
-  for(int i = limit-1, j = constraint - 1; i >= 0; i--, j--)
-  {
-    if(s1[j] != s2[i])
-    {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-
-bool
-Transfer::processBeginsWith(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
-  }
-
-  if(localroot->properties == NULL)
-  {
-    return beginsWith(evalString(first), evalString(second));
-  }
-  else
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      return beginsWith(tolower(evalString(first)), tolower(evalString(second)));
-    }
-    else
-    {
-      return beginsWith(evalString(first), evalString(second));
-    }
-  }
-}
-
-bool
-Transfer::processEndsWith(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
-  }
-
-  if(localroot->properties == NULL)
-  {
-    return endsWith(evalString(first), evalString(second));
-  }
-  else
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
-    {
-      return endsWith(tolower(evalString(first)), tolower(evalString(second)));
-    }
-    else
-    {
-      return endsWith(evalString(first), evalString(second));
-    }
-  }
-}
-
-bool
-Transfer::processBeginsWithList(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
-
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
-      {
-	second = i;
-	break;
-      }
-    }
-  }
+            cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
+          }
+        }
+        return;
 
-  xmlChar *idlist = second->properties->children->content;
-  string needle = evalString(first);
-  set<string, Ltstr>::iterator it, limit;
+      case ti_clip_ref:
+        if (checkIndex(leftSide, ti.getPos(), lword)) {
+          bool match = word[ti.getPos()]->setReference(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition());
+          if (!match && trace)
+          {
+            cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
+          }
+        }
+        return;
 
-  if(localroot->properties == NULL ||
-     xmlStrcmp(localroot->properties->children->content, (const xmlChar *) "yes"))
-  {
-    it = lists[(const char *) idlist].begin();
-    limit = lists[(const char *) idlist].end();
+      default:
+        return;
+    }
   }
-  else
+  if(leftSide->name != NULL && !xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
   {
-    needle = tolower(needle);
-    it = listslow[(const char *) idlist].begin();
-    limit = listslow[(const char *) idlist].end();
-  }
+    in_let_var = true;
 
-  for(; it != limit; it++)
-  {
-    if(beginsWith(needle, *it))
-    {
-      return true;
-    }
-  }
-  return false;
-}
+    UString const val = to_ustring((const char *) leftSide->properties->children->content);
 
+    var_val = val;
+    var_out_wblank[var_val].clear();
 
-bool
-Transfer::processEndsWithList(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
+    variables[val] = evalString(rightSide);
 
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+    in_let_var = false;
+    evalStringCache[leftSide] = TransferInstr(ti_var, val, 0);
+  }
+  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
   {
-    if(i->type == XML_ELEMENT_NODE)
+    int pos = 0;
+    xmlChar *side = NULL, *as = NULL;
+    UString part;
+    bool queue = true;
+
+    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
     {
-      if(first == NULL)
+      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
       {
-        first = i;
+	side = i->children->content;
       }
-      else
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
       {
-	second = i;
-	break;
+        part = to_ustring((const char*) i->children->content);
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *) i->children->content) - 1;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "queue"))
+      {
+        if(!xmlStrcmp(i->children->content, (const xmlChar *) "no"))
+        {
+          queue = false;
+        }
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to"))
+      {
+        as = i->children->content; // TODO: set but never read
       }
     }
-  }
-
-  xmlChar *idlist = second->properties->children->content;
-  string needle = evalString(first);
-  set<string, Ltstr>::iterator it, limit;
-
-  if(localroot->properties == NULL ||
-     xmlStrcmp(localroot->properties->children->content, (const xmlChar *) "yes"))
-  {
-    it = lists[(const char *) idlist].begin();
-    limit = lists[(const char *) idlist].end();
-  }
-  else
-  {
-    needle = tolower(needle);
-    it = listslow[(const char *) idlist].begin();
-    limit = listslow[(const char *) idlist].end();
-  }
 
-  for(; it != limit; it++)
-  {
-    if(endsWith(needle, *it))
-    {
-      return true;
+    if (pos >= lword) {
+      cerr << "Error: Transfer::processLet() bad access on pos >= lword" << endl;
+      return;
+    }
+    if (word[pos] == 0) {
+      cerr << "Error: Transfer::processLet() null access on word[pos]" << endl;
+      return;
     }
-  }
-  return false;
-}
-
-bool
-Transfer::processContainsSubstring(xmlNode *localroot)
-{
-  xmlNode *first = NULL, *second = NULL;
 
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
+    if(!xmlStrcmp(side, (const xmlChar *) "tl"))
     {
-      if(first == NULL)
-      {
-        first = i;
-      }
-      else
+      bool match = word[pos]->setTarget(attr_items[part], evalString(rightSide), queue);
+      if(!match && trace)
       {
-	second = i;
-	break;
+        cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
       }
+      evalStringCache[leftSide] = TransferInstr(ti_clip_tl, part, pos, NULL, queue);
     }
-  }
-
-  if(localroot->properties == NULL)
-  {
-    return evalString(first).find(evalString(second)) != string::npos;
-  }
-  else
-  {
-    if(!xmlStrcmp(localroot->properties->children->content,
-		  (const xmlChar *) "yes"))
+    else if(!xmlStrcmp(side, (const xmlChar *) "ref"))
     {
-      return tolower(evalString(first)).find(tolower(evalString(second))) != string::npos;
+      bool match = word[pos]->setReference(attr_items[part], evalString(rightSide), queue);
+      if(!match && trace)
+      {
+        cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
+      }
+      evalStringCache[leftSide] = TransferInstr(ti_clip_ref, part, pos, NULL, queue);
     }
     else
     {
-      return evalString(first).find(evalString(second)) != string::npos;
+      bool match = word[pos]->setSource(attr_items[part], evalString(rightSide), queue);
+      if(!match && trace)
+      {
+        cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
+      }
+      evalStringCache[leftSide] = TransferInstr(ti_clip_sl, part, pos, NULL, queue);
     }
   }
 }
 
-string
-Transfer::copycase(string const &source_word, string const &target_word)
+void
+Transfer::processModifyCase(xmlNode *localroot)
 {
-  wstring result;
-  wstring const s_word = UtfConverter::fromUtf8(source_word);
-  wstring const t_word = UtfConverter::fromUtf8(target_word);
-
-  bool firstupper = iswupper(s_word[0]);
-  bool uppercase = firstupper && iswupper(s_word[s_word.size()-1]);
-  bool sizeone = s_word.size() == 1;
+  xmlNode *leftSide = NULL, *rightSide = NULL;
 
-  if(!uppercase || (sizeone && uppercase))
-  {
-    result = t_word;
-    result[0] = towlower(result[0]);
-    //result = StringUtils::tolower(t_word);
-  }
-  else
-  {
-    result = StringUtils::toupper(t_word);
+  for (auto i : children(localroot)) {
+    if(leftSide == NULL) {
+      leftSide = i;
+    } else {
+      rightSide = i;
+      break;
+    }
   }
 
-  if(firstupper)
+  if(leftSide->name != NULL && !xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
   {
-    result[0] = towupper(result[0]);
-  }
-
-  return UtfConverter::toUtf8(result);
-}
-
-string
-Transfer::caseOf(string const &str)
-{
-  wstring const s = UtfConverter::fromUtf8(str);
+    int pos = 0;
+    xmlChar *side = NULL, *as = NULL;
+    UString part;
+    bool queue = true;
 
-  if(s.size() > 1)
-  {
-    if(!iswupper(s[0]))
-    {
-      return "aa";
-    }
-    else if(!iswupper(s[s.size()-1]))
+    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
     {
-      return "Aa";
+      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
+      {
+	side = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+        part = to_ustring((const char*)i->children->content);
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *) i->children->content) - 1;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "queue"))
+      {
+        if(!xmlStrcmp(i->children->content, (xmlChar const *) "no"))
+        {
+          queue = false;
+        }
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to"))
+      {
+        as = i->children->content;
+        (void)as; // ToDo, remove "as" and the whole else?
+      }
     }
-    else
+    if(!xmlStrcmp(side, (const xmlChar *) "sl"))
     {
-      return "AA";
+      UString const result = StringUtils::copycase(evalString(rightSide),
+				      word[pos]->source(attr_items[part], queue));
+      bool match = word[pos]->setSource(attr_items[part], result);
+      if(!match && trace)
+      {
+        cerr << "apertium-transfer warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
+      }
     }
-  }
-  else if(s.size() == 1)
-  {
-    if(!iswupper(s[0]))
+    else if(!xmlStrcmp(side, (const xmlChar *) "ref"))
     {
-      return "aa";
+      UString const result = StringUtils::copycase(evalString(rightSide),
+              word[pos]->reference(attr_items[part], queue));
+      bool match = word[pos]->setReference(attr_items[part], result);
+      if(!match && trace)
+      {
+        cerr << "apertium-transfer warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
+      }
     }
     else
     {
-      return "Aa";
+      UString const result = StringUtils::copycase(evalString(rightSide),
+				     word[pos]->target(attr_items[part], queue));
+      bool match = word[pos]->setTarget(attr_items[part], result);
+      if(!match && trace)
+      {
+        cerr << "apertium-transfer warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
+      }
     }
   }
-  else
+  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
   {
-    return "aa";
+    UString const val = to_ustring((const char *) leftSide->properties->children->content);
+    variables[val] = StringUtils::copycase(evalString(rightSide), variables[val]);
   }
 }
 
-string
-Transfer::tolower(string const &str) const
+void
+Transfer::processCallMacro(xmlNode *localroot)
 {
-  return UtfConverter::toUtf8(StringUtils::tolower(UtfConverter::fromUtf8(str)));
-}
+  UString const n = to_ustring((const char *) localroot->properties->children->content);
+  int npar = 0;
 
-string
-Transfer::tags(string const &str) const
-{
-  string result = "<";
+  xmlNode *macro = macro_map[macros[n]];
 
-  for(unsigned int i = 0, limit = str.size(); i != limit; i++)
+  for(xmlAttr *i = macro->properties; i != NULL; i = i->next)
   {
-    if(str[i] == '.')
-    {
-      result.append("><");
-    }
-    else
+    if(!xmlStrcmp(i->name, (const xmlChar *) "npar"))
     {
-      result += str[i];
+      npar = atoi((const char *) i->children->content);
+      break;
     }
   }
 
-  result += '>';
-
-  return result;
-}
+  // ToDo: Is it at all valid if npar <= 0 ?
 
-int
-Transfer::processRule(xmlNode *localroot)
-{
-  int instruction_return, words_to_consume = -1;
-  // localroot is suposed to be an 'action' tag
-  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  TransferWord **myword = NULL;
+  int idx = 0;
+  if(npar > 0)
   {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      instruction_return = processInstruction(i);
-      // When an instruction which modifies the number of words to be consumed
-      // from the input is found, execution of the rule is stopped
-      if(instruction_return != -1)
-      {
-        words_to_consume = instruction_return;
-        break;
+    myword = new TransferWord *[npar];
+    std::fill(myword, myword+npar, (TransferWord *)(0));
+    for (auto i : children(localroot)) {
+      if (idx >= npar) {
+        cerr << "Error: processCallMacro() number of arguments >= npar at line " << i->line << endl;
+        return;
       }
+      int pos = atoi((const char *) i->properties->children->content)-1;
+      myword[idx] = word[pos];
+
+      idx++;
     }
   }
-  
-  while(!blank_queue.empty()) //flush remaining blanks that are not spaces
-  {
-    if(blank_queue.front().compare(" ") != 0)
-    {
-      fputws_unlocked(UtfConverter::fromUtf8(blank_queue.front()).c_str(), output);
-    }
-    blank_queue.pop();
+
+  swap(myword, word);
+  swap(npar, lword);
+
+  for (auto i : children(macro)) {
+    processInstruction(i);
   }
-  
-  return words_to_consume;
+
+  swap(myword, word);
+  swap(npar, lword);
+
+  delete[] myword;
 }
 
 TransferToken &
-Transfer::readToken(FILE *in)
+Transfer::readToken(InputFile& in)
 {
   if(!input_buffer.isEmpty())
   {
     return input_buffer.next();
   }
 
-  wstring content;
+  UString content;
   while(true)
   {
-    int val = fgetwc_unlocked(in);
-    if(feof(in) || (val == 0 && internal_null_flush))
+    UChar32 val = in.get();
+    if(in.eof() || (val == 0 && internal_null_flush))
     {
       in_wblank = false;
       return input_buffer.add(TransferToken(content, tt_eof));
     }
     if(in_wblank)
     {
-      content = L"[[";
-      content+= wchar_t(val);
-      
+      content = "[["_u;
+      content += val;
+
       while(true)
       {
-        int val3 = fgetwc_unlocked(in);
-        if(val3 == L'\\')
+        UChar32 val3 = in.get();
+        if(val3 == '\\')
         {
-          content += L'\\';
-          content += wchar_t(fgetwc_unlocked(in));
+          content += '\\';
+          content += in.get();
         }
-        else if(val3 == L'$') //[[..]]^..$ is the LU
+        else if(val3 == '$') //[[..]]^..$ is the LU
         {
           in_wblank = false;
           return input_buffer.add(TransferToken(content, tt_word));
         }
-        else if(val3 == L'\0' && null_flush)
+        else if(val3 == '\0' && null_flush)
         {
           in_wblank = false;
-          fflush(output);
+          u_fflush(output);
         }
         else
         {
-          content += wchar_t(val3);
+          content += val3;
         }
       }
     }
     if(val == '\\')
     {
-      content += L'\\';
-      content += (wchar_t) fgetwc_unlocked(in);
+      content += '\\';
+      content += in.get();
     }
-    else if(val == L'[')
+    else if(val == '[')
     {
-      content += L'[';
+      content += '[';
       while(true)
       {
-        int val2 = fgetwc_unlocked(in);
-        if(val2 == L'\\')
+        UChar32 val2 = in.get();
+        if(val2 == '\\')
         {
-          content += L'\\';
-          content += wchar_t(fgetwc_unlocked(in));
+          content += '\\';
+          content += in.get();
         }
-        else if(val2 == L'[')
+        else if(val2 == '[')
         { //wordbound blank
           in_wblank = true;
           content.pop_back();
-          
+
           return input_buffer.add(TransferToken(content, tt_blank));
         }
-        else if(val2 == L']')
+        else if(val2 == ']')
         {
-          content += L']';
+          content += ']';
           break;
         }
         else
         {
-          content += wchar_t(val2);
+          content += val2;
         }
       }
     }
-    else if(val == L'$')
+    else if(val == '$')
     {
       return input_buffer.add(TransferToken(content, tt_word));
     }
-    else if(val == L'^')
+    else if(val == '^')
     {
       return input_buffer.add(TransferToken(content, tt_blank));
     }
-    else if(val == L'\0' && null_flush)
+    else if(val == '\0' && null_flush)
     {
       in_wblank = false;
-      fflush(output);
+      u_fflush(output);
     }
     else
     {
-      content += wchar_t(val);
+      content += val;
     }
   }
 }
 
-bool
-Transfer::getNullFlush(void)
-{
-  return null_flush;
-}
-
-void
-Transfer::setNullFlush(bool null_flush)
-{
-  this->null_flush = null_flush;
-}
-
-void
-Transfer::setTrace(bool trace)
-{
-  this->trace = trace;
-}
-
 void
 Transfer::setTraceATT(bool trace)
 {
@@ -2229,24 +896,21 @@ Transfer::tmp_clear()
 {
   tmpblank.clear();
   tmpword.clear();
+  variables = variable_defaults;
 }
 
 void
-Transfer::transfer_wrapper_null_flush(FILE *in, FILE *out)
+Transfer::transfer_wrapper_null_flush(InputFile& in, UFILE* out)
 {
   null_flush = false;
   internal_null_flush = true;
 
-  while(!feof(in))
+  while(!in.eof())
   {
     tmp_clear();
     transfer(in, out);
-    fputwc_unlocked(L'\0', out);
-    int code = fflush(out);
-    if(code != 0)
-    {
-      wcerr << L"Could not flush output " << errno << endl;
-    }
+    u_fputc('\0', out);
+    u_fflush(out);
   }
 
   internal_null_flush = false;
@@ -2254,7 +918,7 @@ Transfer::transfer_wrapper_null_flush(FILE *in, FILE *out)
 }
 
 void
-Transfer::transfer(FILE *in, FILE *out)
+Transfer::transfer(InputFile& in, UFILE* out)
 {
   if(getNullFlush())
   {
@@ -2274,31 +938,31 @@ Transfer::transfer(FILE *in, FILE *out)
   {
     if(trace_att)
     {
-      wcerr << "Loop start " << endl;
-      wcerr << "ms.size: " << ms.size() << endl;
+      cerr << "Loop start " << endl;
+      cerr << "ms.size: " << ms.size() << endl;
 
-      wcerr << "tmpword.size(): " << tmpword.size() << endl;
+      cerr << "tmpword.size(): " << tmpword.size() << endl;
       for (unsigned int ind = 0; ind < tmpword.size(); ind++)
       {
         if(ind != 0)
         {
-          wcerr << L" ";
+          cerr << " ";
         }
-        wcerr << *tmpword[ind];
+        cerr << *tmpword[ind];
       }
-      wcerr << endl;
+      cerr << endl;
 
-      wcerr << "tmpblank.size(): " << tmpblank.size() << endl;
+      cerr << "tmpblank.size(): " << tmpblank.size() << endl;
       for (unsigned int ind = 0; ind < tmpblank.size(); ind++)
       {
-        wcerr << L"'";
-        wcerr << *tmpblank[ind];
-        wcerr << L"' ";
+        cerr << "'";
+        cerr << *tmpblank[ind];
+        cerr << "' ";
       }
-      wcerr << endl;
+      cerr << endl;
 
-      wcerr << "last: " << last << endl;
-      wcerr << "prev_last: " << prev_last << endl << endl;
+      cerr << "last: " << last << endl;
+      cerr << "prev_last: " << prev_last << endl << endl;
     }
 
     if(ms.size() == 0)
@@ -2309,7 +973,7 @@ Transfer::transfer(FILE *in, FILE *out)
 
         if(trace_att)
         {
-          wcerr << "num_words_to_consume: " << num_words_to_consume << endl;
+          cerr << "num_words_to_consume: " << num_words_to_consume << endl;
         }
 
         //Consume all the words from the input which matched the rule.
@@ -2356,41 +1020,38 @@ Transfer::transfer(FILE *in, FILE *out)
         {
           if(trace_att)
           {
-            wcerr << "printing tmpword[0]" <<endl;
+            cerr << "printing tmpword[0]" <<endl;
           }
 
-          pair<wstring, int> tr;
-          wstring tr_wblank;
+          pair<UString, int> tr;
+          UString tr_wblank;
           if(useBilingual && preBilingual == false)
           {
-	    if(isExtended && (*tmpword[0])[0] == L'*')
-	    {
-	      tr = extended.biltransWithQueue((*tmpword[0]).substr(1), false);
-              if(tr.first[0] == L'@')
-              {
-                tr.first[0] = L'*';
-              }
-              else
-              {
-                tr.first = L"%" + tr.first;
+            if(isExtended && (*tmpword[0])[0] == '*') {
+              tr = extended.biltransWithQueue((*tmpword[0]).substr(1), false);
+              if(tr.first[0] == '@') {
+                tr.first[0] = '*';
+              } else {
+                UString temp;
+                temp += '%';
+                temp.append(tr.first);
+                temp.swap(tr.first);
               }
-            }
-            else
-            {
-	      tr = fstp.biltransWithQueue(*tmpword[0], false);
+            } else {
+              tr = fstp.biltransWithQueue(*tmpword[0], false);
             }
           }
           else if(preBilingual)
           {
-            wstring sl;
-            wstring tl;
-            wstring ref;
-            wstring wblank;
+            UString sl;
+            UString tl;
+            UString ref;
+            UString wblank;
 
             int seenSlash = 0;
-            for(wstring::const_iterator it = tmpword[0]->begin(); it != tmpword[0]->end(); it++)
+            for(UString::const_iterator it = tmpword[0]->begin(); it != tmpword[0]->end(); it++)
             {
-              if(*it == L'\\')
+              if(*it == '\\')
               {
                 if(seenSlash == 0)
                 {
@@ -2412,19 +1073,19 @@ Transfer::transfer(FILE *in, FILE *out)
                 }
                 continue;
               }
-              else if(*it == L'[')
+              else if(*it == '[')
               {
-                if(*(it+1) == L'[') //wordbound blank
+                if(*(it+1) == '[') //wordbound blank
                 {
                   while(true)
                   {
-                    if(*it == L'\\')
+                    if(*it == '\\')
                     {
                       wblank.push_back(*it);
                       it++;
                       wblank.push_back(*it);
                     }
-                    else if(*it == L'^' && *(it-1) == L']' && *(it-2) == L']')
+                    else if(*it == '^' && *(it-1) == ']' && *(it-2) == ']')
                     {
                       break;
                     }
@@ -2432,7 +1093,7 @@ Transfer::transfer(FILE *in, FILE *out)
                     {
                       wblank.push_back(*it);
                     }
-                    
+
                     it++;
                   }
                 }
@@ -2453,7 +1114,7 @@ Transfer::transfer(FILE *in, FILE *out)
                 }
                 continue;
               }
-              else if(*it == L'/')
+              else if(*it == '/')
               {
                 seenSlash++;
 
@@ -2474,44 +1135,29 @@ Transfer::transfer(FILE *in, FILE *out)
               }
             }
             //tmpword[0]->assign(sl);
-            tr = pair<wstring, int>(tl, false);
+            tr = pair<UString, int>(tl, false);
             tr_wblank = wblank;
-            //wcerr << L"pb: " << *tmpword[0] << L" :: " << sl << L" >> " << tl << endl ;
+            //cerr << "pb: " << *tmpword[0] << " :: " << sl << " >> " << tl << endl ;
           }
           else
           {
-            tr = pair<wstring, int>(*tmpword[0], 0);
+            tr = pair<UString, int>(*tmpword[0], 0);
           }
 
-	  if(tr.first.size() != 0)
-	  {
-	    if(defaultAttrs == lu)
-	    {
-        if(tr.first[0] != L'[' || tr.first[1] != L'[')
-        {
-          fputws_unlocked(tr_wblank.c_str(), output);
-          fputwc_unlocked(L'^', output);
-        }
-	      fputws_unlocked(tr.first.c_str(), output);
-	      fputwc_unlocked(L'$', output);
-      }
-      else
-      {
-        if(tr.first[0] == '*')
-        {
-          fputws_unlocked(L"^unknown<unknown>{", output);
-          fputws_unlocked(tr_wblank.c_str(), output);
-          fputwc_unlocked(L'^', output);
-        }
-        else
-        {
-          fputws_unlocked(L"^default<default>{", output);
-          fputws_unlocked(tr_wblank.c_str(), output);
-          fputwc_unlocked(L'^', output);
+	  if(tr.first.size() != 0) {
+	    if(defaultAttrs == lu) {
+          if(tr.first[0] != '[' || tr.first[1] != '[') {
+            u_fprintf(output, "%S^", tr_wblank.c_str());
+          }
+          u_fprintf(output, "%S$", tr.first.c_str());
+        } else {
+          if(tr.first[0] == '*') {
+            u_fprintf(output, "^unknown<unknown>{%S^", tr_wblank.c_str());
+          } else {
+            u_fprintf(output, "^default<default>{%S^", tr_wblank.c_str());
+          }
+          u_fprintf(output, "%S$}$", tr.first.c_str());
         }
-          fputws_unlocked(tr.first.c_str(), output);
-          fputws_unlocked(L"$}$", output);
-      }
 	  }
 	  banned_rules.clear();
 	  tmpword.clear();
@@ -2523,11 +1169,10 @@ Transfer::transfer(FILE *in, FILE *out)
 	}
 	else if(tmpblank.size() != 0)
 	{
-          if(trace_att)
-          {
-            wcerr << "printing tmpblank[0]" <<endl;
+          if(trace_att) {
+            cerr << "printing tmpblank[0]" <<endl;
           }
-          fputws_unlocked(tmpblank[0]->c_str(), output);
+          write(*tmpblank[0], output);
           tmpblank.clear();
           prev_last = last;
           last = input_buffer.getPos();
@@ -2544,18 +1189,12 @@ Transfer::transfer(FILE *in, FILE *out)
       last = input_buffer.getPos();
       last_lword = tmpword.size();
 
-      if(trace)
-      {
-        wcerr << endl << L"apertium-transfer: Rule " << val << L" line " << lastrule_line << L" ";
-        for (unsigned int ind = 0; ind < tmpword.size(); ind++)
-        {
-          if (ind != 0)
-          {
-            wcerr << L" ";
-          }
-          fputws_unlocked(tmpword[ind]->c_str(), stderr);
+      if(trace) {
+        cerr << endl << "apertium-transfer: Rule " << val << " line " << lastrule_line;
+        for (auto& it : tmpword) {
+          cerr << " " << *it;
         }
-        wcerr << endl;
+        cerr << endl;
       }
     }
 
@@ -2569,7 +1208,7 @@ Transfer::transfer(FILE *in, FILE *out)
 	break;
 
       case tt_blank:
-	ms.step(L' ');
+	ms.step(' ');
 	tmpblank.push_back(&current.getContent());
 	break;
 
@@ -2581,13 +1220,13 @@ Transfer::transfer(FILE *in, FILE *out)
 	}
 	else
 	{
-	  fputws_unlocked(current.getContent().c_str(), output);
+      write(current.getContent(), output);
 	  return;
 	}
 	break;
 
       default:
-	wcerr << "Error: Unknown input token." << endl;
+	cerr << "Error: Unknown input token." << endl;
 	return;
     }
   }
@@ -2598,7 +1237,7 @@ Transfer::applyRule()
 {
   int words_to_consume;
   unsigned int limit = tmpword.size();
-  //wcerr << L"applyRule: " << tmpword.size() << endl;
+  //cerr << "applyRule: " << tmpword.size() << endl;
 
   for(unsigned int i = 0; i != limit; i++)
   {
@@ -2612,33 +1251,27 @@ Transfer::applyRule()
     {
       if(int(blank_queue.size()) < last_lword - 1)
       {
-        string blank_to_add = string(UtfConverter::toUtf8(*tmpblank[i-1]));
-        blank_queue.push(blank_to_add);
+        blank_queue.push(*tmpblank[i-1]);
       }
     }
 
-    pair<wstring, int> tr;
+    pair<UString, int> tr;
     if(useBilingual && preBilingual == false)
     {
       tr = fstp.biltransWithQueue(*tmpword[i], false);
-      wstring refx,wblankx;
-      word[i] = new TransferWord(UtfConverter::toUtf8(*tmpword[i]),
-                                 UtfConverter::toUtf8(tr.first),
-                                 UtfConverter::toUtf8(refx),
-                                 UtfConverter::toUtf8(wblankx),
-                                 tr.second);
+      word[i] = new TransferWord(*tmpword[i], tr.first, ""_u, ""_u, tr.second);
     }
     else if(preBilingual)
     {
-      wstring sl;
-      wstring tl;
-      wstring ref;
-      wstring wblank;
+      UString sl;
+      UString tl;
+      UString ref;
+      UString wblank;
 
       int seenSlash = 0;
-      for(wstring::const_iterator it = tmpword[i]->begin(); it != tmpword[i]->end(); it++)
+      for(UString::const_iterator it = tmpword[i]->begin(); it != tmpword[i]->end(); it++)
       {
-        if(*it == L'\\')
+        if(*it == '\\')
         {
           if(seenSlash == 0)
           {
@@ -2660,19 +1293,19 @@ Transfer::applyRule()
           }
           continue;
         }
-        else if(*it == L'[')
+        else if(*it == '[')
         {
-          if(*(it+1) == L'[') //wordbound blank
+          if(*(it+1) == '[') //wordbound blank
           {
             while(true)
             {
-              if(*it == L'\\')
+              if(*it == '\\')
               {
                 wblank.push_back(*it);
                 it++;
                 wblank.push_back(*it);
               }
-              else if(*it == L'^' && *(it-1) == L']' && *(it-2) == L']')
+              else if(*it == '^' && *(it-1) == ']' && *(it-2) == ']')
               {
                 break;
               }
@@ -2680,7 +1313,7 @@ Transfer::applyRule()
               {
                 wblank.push_back(*it);
               }
-              
+
               it++;
             }
           }
@@ -2702,7 +1335,7 @@ Transfer::applyRule()
           continue;
         }
 
-        if(*it == L'/')
+        if(*it == '/')
         {
           seenSlash++;
 
@@ -2722,22 +1355,13 @@ Transfer::applyRule()
           ref.push_back(*it);
         }
       }
-      tr = pair<wstring, int>(tl, false);
-      word[i] = new TransferWord(UtfConverter::toUtf8(sl),
-                                 UtfConverter::toUtf8(tr.first),
-                                 UtfConverter::toUtf8(ref),
-                                 UtfConverter::toUtf8(wblank),
-                                 tr.second);
+      tr = pair<UString, int>(tl, false);
+      word[i] = new TransferWord(sl, tr.first, ref, wblank, tr.second);
     }
     else // neither useBilingual nor preBilingual (sl==tl)
     {
-      tr = pair<wstring, int>(*tmpword[i], false);
-      wstring refx,wblankx;
-      word[i] = new TransferWord(UtfConverter::toUtf8(*tmpword[i]),
-                                 UtfConverter::toUtf8(tr.first),
-                                 UtfConverter::toUtf8(refx),
-                                 UtfConverter::toUtf8(wblankx),
-                                 tr.second);
+      tr = pair<UString, int>(*tmpword[i], false);
+      word[i] = new TransferWord(*tmpword[i], tr.first, ""_u, ""_u, tr.second);
     }
   }
 
@@ -2762,53 +1386,53 @@ Transfer::applyRule()
 
 /* HERE */
 void
-Transfer::applyWord(wstring const &word_str)
+Transfer::applyWord(UString const &word_str)
 {
-  ms.step(L'^');
-    
+  ms.step('^');
+
   for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
   {
     switch(word_str[i])
     {
-      case L'\\':
+      case '\\':
         i++;
-	ms.step(towlower(word_str[i]), any_char);
-	break;
-        
-      case L'[':
-        if(word_str[i+1] == L'[')
+        ms.step(u_tolower(word_str[i]), any_char);
+        break;
+
+      case '[':
+        if(word_str[i+1] == '[')
         {
           while(true)
           {
-            if(word_str[i] == L'\\')
+            if(word_str[i] == '\\')
             {
               i++;
             }
             else if(i >= 4)
             {
-              if(word_str[i] == L'^' && word_str[i-1] == L']' && word_str[i-2] == L']')
+              if(word_str[i] == '^' && word_str[i-1] == ']' && word_str[i-2] == ']')
               {
                 break;
               }
             }
-            
+
             i++;
           }
         }
         else
         {
-          ms.step(towlower(word_str[i]), any_char);
+          ms.step(u_tolower(word_str[i]), any_char);
         }
         break;
-        
-      case L'/':
+
+      case '/':
         i = limit;
         break;
 
-      case L'<':
+      case '<':
 	for(unsigned int j = i+1; j != limit; j++)
 	{
-	  if(word_str[j] == L'>')
+	  if(word_str[j] == '>')
 	  {
 	    int symbol = alphabet(word_str.substr(i, j-i+1));
 	    if(symbol)
@@ -2826,11 +1450,11 @@ Transfer::applyWord(wstring const &word_str)
 	break;
 
       default:
-	ms.step(towlower(word_str[i]), any_char);
+	ms.step(u_tolower(word_str[i]), any_char);
 	break;
     }
   }
-  ms.step(L'$');
+  ms.step('$');
 }
 
 void
diff --git a/apertium/transfer.h b/apertium/transfer.h
index 80ff1bc..9cb77be 100644
--- a/apertium/transfer.h
+++ b/apertium/transfer.h
@@ -17,150 +17,75 @@
 #ifndef _TRANSFER_
 #define _TRANSFER_
 
-#include <apertium/transfer_instr.h>
-#include <apertium/transfer_token.h>
+#include <apertium/transfer_base.h>
+
 #include <apertium/transfer_word.h>
-#include <apertium/apertium_re.h>
-#include <lttoolbox/alphabet.h>
-#include <lttoolbox/buffer.h>
 #include <lttoolbox/fst_processor.h>
-#include <lttoolbox/ltstr.h>
-#include <lttoolbox/match_exe.h>
-#include <lttoolbox/match_state.h>
-
-#include <cstdio>
-#include <libxml/parser.h>
-#include <libxml/tree.h>
-#include <map>
-#include <set>
-#include <vector>
-#include <queue>
+#include <lttoolbox/input_file.h>
 
 using namespace std;
 
-class Transfer
+class Transfer : public TransferBase
 {
 private:
 
-  Alphabet alphabet;
-  MatchExe *me;
-  MatchState ms;
-  map<string, ApertiumRE, Ltstr> attr_items;
-  map<string, string, Ltstr> variables;
-  map<string, int, Ltstr> macros;
-  map<string, set<string, Ltstr>, Ltstr> lists;
-  map<string, set<string, Ltstr>, Ltstr> listslow;
-  vector<xmlNode *> macro_map;
-  vector<xmlNode *> rule_map;
-  vector<size_t> rule_lines;
-  xmlDoc *doc;
-  xmlNode *root_element;
   TransferWord **word;
-  queue <string> blank_queue;
-  int lword;
   int last_lword;
-  Buffer<TransferToken> input_buffer;
-  vector<wstring *> tmpword;
-  vector<wstring *> tmpblank;
-  
-  bool in_out;
   bool in_lu;
-  bool in_let_var;
-  string var_val; //stores the name of the variable being processed (in let or append)
-  
   bool in_wblank;
-  string out_wblank;
-  map <string, string> var_out_wblank;
+  UString out_wblank;
+  map <UString, UString> var_out_wblank;
   
-  bool gettingLemmaFromWord(string attr);
-  string combineWblanks(string wblank_current, string wblank_to_add);
-    
   FSTProcessor fstp;
   FSTProcessor extended;
   bool isExtended;
-  FILE *output;
-  int any_char;
-  int any_tag;
-
-  xmlNode *lastrule;
-  unsigned int nwords;
-
-  map<xmlNode *, TransferInstr> evalStringCache;
 
   enum OutputType{lu,chunk};
 
   OutputType defaultAttrs;
   bool preBilingual;
   bool useBilingual;
-  bool null_flush;
-  bool internal_null_flush;
-  bool trace;
   bool trace_att;
-  string emptyblank;
+  UString emptyblank;
 
-  void destroy();
-  void readData(FILE *input);
   void readBil(string const &filename);
-  void readTransfer(string const &input);
-  void collectMacros(xmlNode *localroot);
-  void collectRules(xmlNode *localroot);
-  string caseOf(string const &str);
-  string copycase(string const &source_word, string const &target_word);
 
   void processLet(xmlNode *localroot);
-  void processAppend(xmlNode *localroot);
-  int processRejectCurrentRule(xmlNode *localroot);
   void processOut(xmlNode *localroot);
   void processCallMacro(xmlNode *localroot);
   void processModifyCase(xmlNode *localroot);
-  bool processLogical(xmlNode *localroot);
-  bool processTest(xmlNode *localroot);
-  bool processAnd(xmlNode *localroot);
-  bool processOr(xmlNode *localroot);
-  bool processEqual(xmlNode *localroot);
-  bool processBeginsWith(xmlNode *localroot);
-  bool processBeginsWithList(xmlNode *localroot);
-  bool processEndsWith(xmlNode *localroot);
-  bool processEndsWithList(xmlNode *local);
-  bool processContainsSubstring(xmlNode *localroot);
-  bool processNot(xmlNode *localroot);
-  bool processIn(xmlNode *localroot);
-  int processRule(xmlNode *localroot);
-  string evalString(xmlNode *localroot);
-  int processInstruction(xmlNode *localroot);
-  int processChoose(xmlNode *localroot);
-  string processChunk(xmlNode *localroot);
-  string processTags(xmlNode *localroot);
+  UString evalCachedString(xmlNode *localroot);
+  UString processChunk(xmlNode *localroot);
+  UString processTags(xmlNode *localroot);
+  void processClip(xmlNode* element);
+  void processBlank(xmlNode* element);
+  void processCaseOf(xmlNode* element);
+  UString processLu(xmlNode* element);
+  UString processMlu(xmlNode* element);
+
+  void processLuCount(xmlNode* element);
 
-  bool beginsWith(string const &str1, string const &str2) const;
-  bool endsWith(string const &str1, string const &str2) const;
-  string tolower(string const &str) const;
-  string tags(string const &str) const;
-  wstring readWord(FILE *in);
-  wstring readBlank(FILE *in);
-  wstring readUntil(FILE *in, int const symbol) const;
-  void applyWord(wstring const &word_str);
+  UString readWord(InputFile& in);
+  UString readBlank(InputFile& in);
+  UString readUntil(InputFile& in, int const symbol) const;
+  void applyWord(UString const &word_str);
   int applyRule();
-  TransferToken & readToken(FILE *in);
+  TransferToken & readToken(InputFile& in);
   bool checkIndex(xmlNode *element, int index, int limit);
-  void transfer_wrapper_null_flush(FILE *in, FILE *out);
+  void transfer_wrapper_null_flush(InputFile& in, UFILE* out);
   void tmp_clear();
 public:
   Transfer();
-  ~Transfer();
 
   void read(string const &transferfile, string const &datafile,
 	    string const &fstfile = "");
-  void transfer(FILE *in, FILE *out);
+  void transfer(InputFile& in, UFILE* out);
   void setUseBilingual(bool value);
   bool getUseBilingual(void) const;
   void setPreBilingual(bool value);
   bool getPreBilingual(void) const;
   void setExtendedDictionary(string const &fstfile);
   void setCaseSensitiveness(bool value);
-  bool getNullFlush(void);
-  void setNullFlush(bool null_flush);
-  void setTrace(bool trace);
   void setTraceATT(bool trace);
 };
 
diff --git a/apertium/transfer_base.cc b/apertium/transfer_base.cc
new file mode 100644
index 0000000..9feb672
--- /dev/null
+++ b/apertium/transfer_base.cc
@@ -0,0 +1,603 @@
+#include <apertium/transfer_base.h>
+#include <lttoolbox/xml_walk_util.h>
+#include <lttoolbox/string_utils.h>
+#include <lttoolbox/compression.h>
+#include <apertium/trx_reader.h>
+
+using namespace std;
+
+TransferBase::TransferBase()
+  : me(nullptr), doc(nullptr), root_element(nullptr),
+    lword(0), lastrule(nullptr), nwords(0), output(nullptr),
+    any_char(0), any_tag(0), in_let_var(false), in_out(false),
+    null_flush(false), internal_null_flush(false), trace(false)
+{}
+
+TransferBase::~TransferBase()
+{
+  if (me) {
+    delete me;
+    me = nullptr;
+  }
+  if (doc) {
+    xmlFreeDoc(doc);
+    doc = nullptr;
+  }
+}
+
+void
+TransferBase::read(const char* transferfile, const char* datafile)
+{
+  doc = xmlReadFile(transferfile, NULL, 0);
+  if (doc == NULL) {
+    cerr << "Error: Could not parse file '" << transferfile << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  root_element = xmlDocGetRootElement(doc);
+
+  for (auto i : children(root_element)) {
+    if (!xmlStrcmp(i->name, (const xmlChar*) "section-def-macros")) {
+      collectMacros(i);
+    } else if (!xmlStrcmp(i->name, (const xmlChar*) "section-rules")) {
+      collectRules(i);
+    }
+  }
+
+
+  FILE* in = fopen(datafile, "rb");
+  if (!in) {
+    cerr << "Error: Could not open file '" << datafile << "' for reading." << endl;
+    exit(EXIT_FAILURE);
+  }
+
+  alphabet.read(in);
+  any_char = alphabet(TRXReader::ANY_CHAR);
+  any_tag = alphabet(TRXReader::ANY_TAG);
+
+  Transducer t;
+  t.read(in, alphabet.size());
+
+  map<int, int> finals;
+
+  // finals
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    int key = Compression::multibyte_read(in);
+    finals[key] = Compression::multibyte_read(in);
+  }
+
+  me = new MatchExe(t, finals);
+
+  // attr_items
+  bool icu = Compression::string_read(in).empty();
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    UString const cad_k = Compression::string_read(in);
+    attr_items[cad_k].read(in);
+    UString fallback = Compression::string_read(in);
+    if (!icu && cad_k == "chname"_u) {
+      // chname was previously "({([^/]+)\\/)"
+      // which is fine for PCRE, but ICU chokes on the unmatched bracket
+      fallback = "(\\{([^/]+)\\/)"_u;
+    }
+    attr_items[cad_k].compile(fallback);
+  }
+
+  // variables
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    UString const cad_k = Compression::string_read(in);
+    variables[cad_k] = Compression::string_read(in);
+    variable_defaults[cad_k] = variables[cad_k];
+  }
+
+  // macros
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    UString const cad_k = Compression::string_read(in);
+    macros[cad_k] = Compression::multibyte_read(in);
+  }
+
+  // lists
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    UString const cad_k = Compression::string_read(in);
+
+    for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
+    {
+      UString const cad_v = Compression::string_read(in);
+      lists[cad_k].insert(cad_v);
+      listslow[cad_k].insert(StringUtils::tolower(cad_v));
+    }
+  }
+}
+
+void
+TransferBase::collectRules(xmlNode* localroot)
+{
+  for (auto rule : children(localroot)) {
+    size_t line = rule->line;
+    for (auto rulechild : children(rule)) {
+      if(!xmlStrcmp(rulechild->name, (const xmlChar *) "action")) {
+        rule_map.push_back(rulechild);
+        rule_lines.push_back(line);
+        break;
+      }
+    }
+  }
+}
+
+void
+TransferBase::collectMacros(xmlNode* localroot)
+{
+  for (auto i : children(localroot)) {
+    macro_map.push_back(i);
+  }
+}
+
+bool
+TransferBase::gettingLemmaFromWord(const UString& attr)
+{
+  return attr == "lem"_u || attr == "lemh"_u || attr == "whole"_u;
+}
+
+UString
+TransferBase::combineWblanks(const UString& first, const UString& second)
+{
+  if (first.empty()) {
+    return second;
+  } else if (second.empty()) {
+    return first;
+  }
+  UString ret;
+  ret.reserve(first.size() + second.size());
+  if (endsWith(first, "]]"_u)) {
+    if (first.size() > 2) {
+      size_t i = first.size() - 3;
+      bool esc = false;
+      while (first[i] == '\\') {
+        i--;
+        esc = !esc;
+      }
+      if (esc) {
+        ret.append(first);
+      } else {
+        ret.append(first.substr(0, first.size()-2));
+      }
+    } else {
+      ret.append(first.substr(0, first.size()-2));
+    }
+  } else {
+    ret.append(first);
+  }
+  ret += ';';
+  ret += ' ';
+  if (beginsWith(second, "[["_u)) {
+    ret.append(second.substr(2));
+  } else {
+    ret.append(second);
+  }
+  return ret;
+}
+
+UString
+TransferBase::evalString(xmlNode* element)
+{
+  if (!element) {
+    throw "evalString() was called on a NULL element";
+  }
+  if (evalStringCache.find(element) != evalStringCache.end()) {
+    return evalCachedString(element);
+  }
+  if (!xmlStrcmp(element->name, (const xmlChar*) "clip")) {
+    processClip(element);
+  } else if (!xmlStrcmp(element->name, (const xmlChar*) "lit-tag")) {
+    evalStringCache[element] = TransferInstr(ti_lit_tag, tags(getattr(element, "v")), 0);
+  } else if (!xmlStrcmp(element->name, (const xmlChar*) "lit")) {
+    evalStringCache[element] = TransferInstr(ti_lit, getattr(element, "v"), 0);
+  } else if (!xmlStrcmp(element->name, (const xmlChar*) "b")) {
+    processBlank(element);
+  } else if (!xmlStrcmp(element->name, (const xmlChar*) "get-case-from")) {
+    int pos = atoi((const char*) element->properties->children->content);
+    xmlNode* param = NULL;
+    for (auto it : children(element)) {
+      param = it;
+      break;
+    }
+    evalStringCache[element] = TransferInstr(ti_get_case_from, "lem"_u, pos, param);
+  } else if (!xmlStrcmp(element->name, (const xmlChar*) "var")) {
+    evalStringCache[element] = TransferInstr(ti_var, getattr(element, "n"), 0);
+  } else if (!xmlStrcmp(element->name, (const xmlChar*) "lu-count")) {
+    processLuCount(element);
+  } else if (!xmlStrcmp(element->name, (const xmlChar*) "case-of")) {
+    processCaseOf(element);
+  } else if (!xmlStrcmp(element->name, (const xmlChar*) "concat")) {
+    UString value;
+    for (auto it : children(element)) {
+      value.append(evalString(it));
+    }
+    return value;
+  } else if (!xmlStrcmp(element->name, (const xmlChar*) "lu")) {
+    return processLu(element);
+  } else if (!xmlStrcmp(element->name, (const xmlChar*) "mlu")) {
+    return processMlu(element);
+  } else if (!xmlStrcmp(element->name, (const xmlChar*) "chunk")) {
+    return processChunk(element);
+  } else {
+    cerr << "Error: unexpected expression: '" << element->name << "'" << endl;
+    exit(EXIT_FAILURE);
+  }
+  return evalCachedString(element);
+}
+
+int
+TransferBase::processRule(xmlNode* localroot)
+{
+  int words_to_consume = -1;
+  // iterating over the <action> tag
+  for (auto i : children(localroot)) {
+    words_to_consume = processInstruction(i);
+    // When an instruction which modifies the number of words to be consumed
+    // from the input is found, execution of the rule is stopped
+    if (words_to_consume != -1) {
+      break;
+    }
+  }
+  // flush remaining non-space blanks
+  while (!blank_queue.empty()) {
+    if (blank_queue.front() != " "_u) {
+      write(blank_queue.front(), output);
+    }
+    blank_queue.pop();
+  }
+  return words_to_consume;
+}
+
+int
+TransferBase::processInstruction(xmlNode* localroot)
+{
+  int words_to_consume = -1;
+  if(!xmlStrcmp(localroot->name, (const xmlChar *) "choose"))
+  {
+    words_to_consume = processChoose(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "let"))
+  {
+    processLet(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "append"))
+  {
+    processAppend(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "out"))
+  {
+    processOut(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "call-macro"))
+  {
+    processCallMacro(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "modify-case"))
+  {
+    processModifyCase(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "reject-current-rule"))
+  {
+    words_to_consume = processRejectCurrentRule(localroot);
+  }
+  return words_to_consume;
+}
+
+int
+TransferBase::processRejectCurrentRule(xmlNode* localroot)
+{
+  bool shifting = (getattr(localroot, "shifting") == "yes"_u);
+  return shifting ? 1 : 0;
+}
+
+int
+TransferBase::processChoose(xmlNode* localroot)
+{
+  int words_to_consume = -1;
+  for (auto option : children(localroot)) {
+    if (!xmlStrcmp(option->name, (const xmlChar*) "when")) {
+      bool picked = false;
+      for (auto it : children(option)) {
+        if (!xmlStrcmp(it->name, (const xmlChar*) "test")) {
+          if (!processTest(it)) {
+            break;
+          } else {
+            picked = true;
+          }
+        } else {
+          words_to_consume = processInstruction(it);
+          if (words_to_consume != -1) {
+            return words_to_consume;
+          }
+        }
+      }
+      if (picked) {
+        return words_to_consume;
+      }
+    } else if (!xmlStrcmp(option->name, (const xmlChar*) "otherwise")) {
+      for (auto it : children(option)) {
+        words_to_consume = processInstruction(it);
+        if (words_to_consume != -1) {
+          return words_to_consume;
+        }
+      }
+    }
+  }
+  return words_to_consume;
+}
+
+void
+TransferBase::processAppend(xmlNode* localroot)
+{
+  UString name = getattr(localroot, "n");
+  for (auto i : children(localroot)) {
+    in_let_var = true;
+    var_val = name;
+    variables[name].append(evalString(i));
+    in_let_var = false;
+  }
+}
+
+bool
+TransferBase::processLogical(xmlNode *localroot)
+{
+  if(!xmlStrcmp(localroot->name, (const xmlChar *) "equal"))
+  {
+    return processEqual(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with"))
+  {
+    return processBeginsWith(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with-list"))
+  {
+    return processBeginsWithList(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with"))
+  {
+    return processEndsWith(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with-list"))
+  {
+    return processEndsWithList(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "contains-substring"))
+  {
+    return processContainsSubstring(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "or"))
+  {
+    return processOr(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "and"))
+  {
+    return processAnd(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "not"))
+  {
+    return processNot(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "in"))
+  {
+    return processIn(localroot);
+  }
+
+  return false;
+}
+
+bool
+TransferBase::processTest(xmlNode* localroot)
+{
+  for (auto i : children(localroot)) {
+    return processLogical(i);
+  }
+  return false;
+}
+
+bool
+TransferBase::processAnd(xmlNode* localroot)
+{
+  for (auto i : children(localroot)) {
+    if (!processLogical(i)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool
+TransferBase::processOr(xmlNode* localroot)
+{
+  for (auto i : children(localroot)) {
+    if (processLogical(i)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool
+TransferBase::processNot(xmlNode* localroot)
+{
+  for (auto i : children(localroot)) {
+    return !processLogical(i);
+  }
+  return false;
+}
+
+bool
+TransferBase::beginsWith(const UString& haystack, const UString& needle)
+{
+  const size_t hlen = haystack.size();
+  const size_t nlen = needle.size();
+  if (hlen < nlen) {
+    return false;
+  }
+  for (size_t i = 0; i < nlen; i++) {
+    if (haystack[i] != needle[i]) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool
+TransferBase::endsWith(const UString& haystack, const UString& needle)
+{
+  if (needle.size() > haystack.size()) {
+    return false;
+  }
+  for (int h = haystack.size()-1, n = needle.size()-1; n >= 0; h--, n--) {
+    if (haystack[h] != needle[n]) {
+      return false;
+    }
+  }
+  return true;
+}
+
+pair<xmlNode*, xmlNode*>
+TransferBase::twoChildren(xmlNode* localroot)
+{
+  xmlNode* first = nullptr;
+  xmlNode* second = nullptr;
+  for (auto i : children(localroot)) {
+    if (!first) {
+      first = i;
+    } else {
+      second = i;
+      break;
+    }
+  }
+  return make_pair(first, second);
+}
+
+bool
+TransferBase::processBeginsWith(xmlNode* localroot)
+{
+  auto ch = twoChildren(localroot);
+  if (getattr(localroot, "caseless") == "yes"_u) {
+    return beginsWith(StringUtils::tolower(evalString(ch.first)),
+                      StringUtils::tolower(evalString(ch.second)));
+  } else {
+    return beginsWith(evalString(ch.first), evalString(ch.second));
+  }
+}
+
+bool
+TransferBase::processBeginsWithList(xmlNode* localroot)
+{
+  auto ch = twoChildren(localroot);
+  UString needle = evalString(ch.first);
+  UString idlist = getattr(ch.second, "n");
+  bool caseless = (getattr(localroot, "caseless") == "yes"_u);
+  if (caseless) {
+    needle = StringUtils::tolower(needle);
+  }
+  for (auto it : (caseless ? listslow[idlist] : lists[idlist])) {
+    if (beginsWith(needle, it)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool
+TransferBase::processEndsWith(xmlNode* localroot)
+{
+  auto ch = twoChildren(localroot);
+  if (getattr(localroot, "caseless") == "yes"_u) {
+    return endsWith(StringUtils::tolower(evalString(ch.first)),
+                    StringUtils::tolower(evalString(ch.second)));
+  } else {
+    return endsWith(evalString(ch.first), evalString(ch.second));
+  }
+}
+
+bool
+TransferBase::processEndsWithList(xmlNode* localroot)
+{
+  auto ch = twoChildren(localroot);
+  UString needle = evalString(ch.first);
+  UString idlist = getattr(ch.second, "n");
+  bool caseless = (getattr(localroot, "caseless") == "yes"_u);
+  if (caseless) {
+    needle = StringUtils::tolower(needle);
+  }
+  for (auto it : (caseless ? listslow[idlist] : lists[idlist])) {
+    if (endsWith(needle, it)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool
+TransferBase::processContainsSubstring(xmlNode* localroot)
+{
+  auto ch = twoChildren(localroot);
+  if (getattr(localroot, "caseless") == "yes"_u) {
+    return StringUtils::tolower(evalString(ch.first)).find(StringUtils::tolower(evalString(ch.second))) != UString::npos;
+  } else {
+    return evalString(ch.first).find(evalString(ch.second)) != UString::npos;
+  }
+}
+
+bool
+TransferBase::processEqual(xmlNode* localroot)
+{
+  auto ch = twoChildren(localroot);
+  if (getattr(localroot, "caseless") == "yes"_u) {
+    return StringUtils::tolower(evalString(ch.first)) == StringUtils::tolower(evalString(ch.second));
+  } else {
+    return evalString(ch.first) == evalString(ch.second);
+  }
+}
+
+bool
+TransferBase::processIn(xmlNode* localroot)
+{
+  auto ch = twoChildren(localroot);
+  UString sval = evalString(ch.first);
+  UString idlist = getattr(ch.second, "n");
+  if (getattr(localroot, "caseless") == "yes"_u) {
+    set<UString>& myset = listslow[idlist];
+    return (myset.find(StringUtils::tolower(sval)) != myset.end());
+  } else {
+    set<UString>& myset = lists[idlist];
+    return (myset.find(sval) != myset.end());
+  }
+}
+
+UString
+TransferBase::tags(const UString& str) const
+{
+  UString ret;
+  ret.reserve(str.size()+2);
+  ret += '<';
+  ret.append(StringUtils::substitute(str, "."_u, "><"_u));
+  ret += '>';
+  return ret;
+}
+
+bool
+TransferBase::getNullFlush(void)
+{
+  return null_flush;
+}
+
+void
+TransferBase::setNullFlush(bool val)
+{
+  null_flush = val;
+}
+
+void
+TransferBase::setTrace(bool val)
+{
+  trace = val;
+}
diff --git a/apertium/transfer_base.h b/apertium/transfer_base.h
new file mode 100644
index 0000000..e283329
--- /dev/null
+++ b/apertium/transfer_base.h
@@ -0,0 +1,123 @@
+#ifndef _APERTIUM_TRANSFER_BASE_
+#define _APERTIUM_TRANSFER_BASE_
+
+#include <lttoolbox/ustring.h>
+#include <lttoolbox/alphabet.h>
+#include <lttoolbox/buffer.h>
+#include <lttoolbox/match_exe.h>
+#include <lttoolbox/match_state.h>
+
+#include <apertium/apertium_re.h>
+#include <apertium/transfer_instr.h>
+#include <apertium/transfer_token.h>
+
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+
+#include <map>
+#include <set>
+#include <queue>
+#include <vector>
+
+using namespace std;
+
+class TransferBase
+{
+protected:
+  Alphabet alphabet;
+  MatchExe* me;
+  MatchState ms;
+  map<UString, ApertiumRE> attr_items;
+  map<UString, UString> variables;
+  map<UString, UString> variable_defaults;
+  map<UString, int> macros;
+  map<UString, set<UString>> lists;
+  map<UString, set<UString>> listslow;
+  vector<xmlNode*> macro_map;
+  vector<xmlNode*> rule_map;
+  vector<size_t> rule_lines;
+  xmlDoc* doc;
+  xmlNode* root_element;
+
+  queue<UString> blank_queue;
+  Buffer<TransferToken> input_buffer;
+  int lword;
+  vector<UString*> tmpword;
+  vector<UString*> tmpblank;
+  xmlNode* lastrule;
+  unsigned int nwords;
+
+  UFILE* output;
+
+  int32_t any_char;
+  int32_t any_tag;
+
+  bool in_let_var;
+  bool in_out;
+  UString var_val;
+  map<xmlNode *, TransferInstr> evalStringCache;
+
+  bool null_flush;
+  bool internal_null_flush;
+  bool trace;
+  
+  void collectMacros(xmlNode *localroot);
+  void collectRules(xmlNode *localroot);
+
+  bool gettingLemmaFromWord(const UString& attr);
+  UString combineWblanks(const UString& first, const UString& second);
+
+  UString evalString(xmlNode* element);
+  virtual UString evalCachedString(xmlNode* element) = 0;
+
+  virtual void processClip(xmlNode* element) = 0;
+  virtual void processBlank(xmlNode* element) = 0;
+  virtual void processLuCount(xmlNode* element) = 0;
+  virtual void processCaseOf(xmlNode* element) = 0;
+  virtual UString processLu(xmlNode* element) = 0;
+  virtual UString processMlu(xmlNode* element) = 0;
+  virtual UString processChunk(xmlNode* element) = 0;
+
+  int processRule(xmlNode* localroot);
+  int processInstruction(xmlNode* localroot);
+  int processRejectCurrentRule(xmlNode* localroot);
+  int processChoose(xmlNode* localroot);
+  void processAppend(xmlNode* localroot);
+
+  virtual void processLet(xmlNode* localroot) = 0;
+  virtual void processOut(xmlNode* localroot) = 0;
+  virtual void processCallMacro(xmlNode* localroot) = 0;
+  virtual void processModifyCase(xmlNode* localroot) = 0;
+
+  bool processLogical(xmlNode *localroot);
+  bool processTest(xmlNode *localroot);
+  bool processAnd(xmlNode *localroot);
+  bool processOr(xmlNode *localroot);
+  bool processNot(xmlNode *localroot);
+  
+  bool beginsWith(const UString& haystack, const UString& needle);
+  bool endsWith(const UString& haystack, const UString& needle);
+
+  pair<xmlNode*, xmlNode*> twoChildren(xmlNode* localroot);
+
+  bool processBeginsWith(xmlNode *localroot);
+  bool processBeginsWithList(xmlNode *localroot);
+  bool processEndsWith(xmlNode *localroot);
+  bool processEndsWithList(xmlNode *localroot);
+  bool processContainsSubstring(xmlNode *localroot);
+  bool processEqual(xmlNode *localroot);
+  bool processIn(xmlNode *localroot);
+
+  UString tags(const UString& s) const;
+
+public:
+  TransferBase();
+  ~TransferBase();
+
+  void read(const char* transferfile, const char* datafile);
+  bool getNullFlush(void);
+  void setNullFlush(bool null_flush);
+  void setTrace(bool trace);
+};
+
+#endif
diff --git a/apertium/transfer_data.cc b/apertium/transfer_data.cc
index fc08552..6e818a3 100644
--- a/apertium/transfer_data.cc
+++ b/apertium/transfer_data.cc
@@ -17,12 +17,10 @@
 
 #include <apertium/transfer_data.h>
 #include <lttoolbox/compression.h>
-#include <apertium/utf_converter.h>
 #include <apertium/apertium_re.h>
 #include <iostream>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
-using namespace Apertium;
 using namespace std;
 
 void
@@ -46,14 +44,14 @@ TransferData::destroy()
 TransferData::TransferData()
 {
   // adding fixed attr_items
-  attr_items[L"lem"] = L"^(([^<]|\"\\<\")+)";
-  attr_items[L"lemq"] = L"\\#[- _][^<]+";
-  attr_items[L"lemh"] = L"^(([^<#]|\"\\<\"|\"\\#\")+)";
-  attr_items[L"whole"] = L"(.+)";
-  attr_items[L"tags"] = L"((<[^>]+>)+)";
-  attr_items[L"chname"] = L"({([^/]+)\\/)"; // includes delimiters { and / !!!
-  attr_items[L"chcontent"] = L"(\\{.+)";
-  attr_items[L"content"] = L"(\\{.+)";
+  attr_items["lem"_u] = "^(([^<]|\"\\<\")+)"_u;
+  attr_items["lemq"_u] = "\\#[- _][^<]+"_u;
+  attr_items["lemh"_u] = "^(([^<#]|\"\\<\"|\"\\#\")+)"_u;
+  attr_items["whole"_u] = "(.+)"_u;
+  attr_items["tags"_u] = "((<[^>]+>)+)"_u;
+  attr_items["chname"_u] = "(\\{([^/]+)\\/)"_u; // includes delimiters { and / !!!
+  attr_items["chcontent"_u] = "(\\{.+)"_u;
+  attr_items["content"_u] = "(\\{.+)"_u;
 }
 
 TransferData::~TransferData()
@@ -89,25 +87,25 @@ TransferData::getTransducer()
   return transducer;
 }
 
-map<wstring, wstring, Ltstr> &
+map<UString, UString> &
 TransferData::getAttrItems()
 {
   return attr_items;
 }
 
-map<wstring, int, Ltstr> &
+map<UString, int> &
 TransferData::getMacros()
 {
   return macros;
 }
 
-map<wstring, set<wstring, Ltstr>, Ltstr> &
+map<UString, set<UString>> &
 TransferData::getLists()
 {
   return lists;
 }
 
-map<wstring, wstring, Ltstr> &
+map<UString, UString> &
 TransferData::getVariables()
 {
   return variables;
@@ -115,7 +113,9 @@ TransferData::getVariables()
 
 int
 TransferData::countToFinalSymbol(const int count) {
-  const wstring count_sym = L"<RULE_NUMBER:" + to_wstring(count) + L">";
+  UChar buf[64];
+  u_snprintf(buf, 64, "<RULE_NUMBER:%d>", count);
+  UString count_sym = buf;
   alphabet.includeSymbol(count_sym);
   const int symbol = alphabet(count_sym);
   final_symbols.insert(symbol);
@@ -134,7 +134,7 @@ TransferData::write(FILE *output)
   // Find all arcs with "final_symbols" in the transitions, let their source node instead be final,
   // and extract the rule number from the arc. Record relation between source node and rule number
   // in finals_rules. It is now no longer safe to minimize -- but we already did that.
-  const wstring rule_sym_pre = L"<RULE_NUMBER:"; // see countToFinalSymbol()
+  const UString rule_sym_pre = "<RULE_NUMBER:"_u; // see countToFinalSymbol()
   for(map<int, multimap<int, pair<int, double> > >::const_iterator it = transitions.begin(),
         limit = transitions.end(); it != limit; ++it)
   {
@@ -152,12 +152,12 @@ TransferData::write(FILE *output)
         continue;
       }
       // Extract the rule number encoded by countToFinalSymbol():
-      wstring s;
+      UString s;
       alphabet.getSymbol(s, symbol);
       if(s.compare(0, rule_sym_pre.size(), rule_sym_pre) != 0) {
         continue;
       }
-      const int rule_num = stoi(s.substr(rule_sym_pre.size()));
+      const int rule_num = StringUtils::stoi(s.substr(rule_sym_pre.size()));
       transducer.setFinal(src, wgt);
       finals_rules[src] = rule_num;
     }
@@ -188,34 +188,34 @@ TransferData::write(FILE *output)
 
   // variables
   Compression::multibyte_write(variables.size(), output);
-  for(map<wstring, wstring, Ltstr>::const_iterator it = variables.begin(), limit = variables.end();
+  for(map<UString, UString>::const_iterator it = variables.begin(), limit = variables.end();
       it != limit; it++)
   {
-    Compression::wstring_write(it->first, output);
-    Compression::wstring_write(it->second, output);
+    Compression::string_write(it->first, output);
+    Compression::string_write(it->second, output);
   }
 
   // macros
   Compression::multibyte_write(macros.size(), output);
-  for(map<wstring, int, Ltstr>::const_iterator it = macros.begin(), limit = macros.end();
+  for(map<UString, int>::const_iterator it = macros.begin(), limit = macros.end();
       it != limit; it++)
   {
-    Compression::wstring_write(it->first, output);
+    Compression::string_write(it->first, output);
     Compression::multibyte_write(it->second, output);
   }
 
   // lists
   Compression::multibyte_write(lists.size(), output);
-  for(map<wstring, set<wstring, Ltstr>, Ltstr>::const_iterator it = lists.begin(), limit = lists.end();
+  for(map<UString, set<UString>>::const_iterator it = lists.begin(), limit = lists.end();
       it != limit; it++)
   {
-    Compression::wstring_write(it->first, output);
+    Compression::string_write(it->first, output);
     Compression::multibyte_write(it->second.size(), output);
 
-    for(set<wstring, Ltstr>::const_iterator it2 = it->second.begin(), limit2 = it->second.end();
+    for(set<UString>::const_iterator it2 = it->second.begin(), limit2 = it->second.end();
 	it2 != limit2; it2++)
     {
-      Compression::wstring_write(*it2, output);
+      Compression::string_write(*it2, output);
     }
   }
 
@@ -224,16 +224,16 @@ TransferData::write(FILE *output)
 void
 TransferData::writeRegexps(FILE *output)
 {
-  Compression::string_write(pcre_version_endian(), output);
+  // since ICU doesn't have a binary form, it doesn't matter
+  // what the version is, so leave it blank
+  Compression::string_write(""_u, output);
   Compression::multibyte_write(attr_items.size(), output);
 
-  map<wstring, wstring, Ltstr>::iterator it, limit;
-  for(it = attr_items.begin(), limit = attr_items.end(); it != limit; it++)
-  {
-    Compression::wstring_write(it->first, output);
-    ApertiumRE my_re;
-    my_re.compile(UtfConverter::toUtf8(it->second));
-    my_re.write(output);
-    Compression::wstring_write(it->second, output);
+  for (auto& it : attr_items) {
+    Compression::string_write(it.first, output);
+    // empty binary form, since ICU doesn't have a dump function
+    // like PCRE did
+    Compression::multibyte_write(0, output);
+    Compression::string_write(it.second, output);
   }
 }
diff --git a/apertium/transfer_data.h b/apertium/transfer_data.h
index 49b5755..1c8ffea 100644
--- a/apertium/transfer_data.h
+++ b/apertium/transfer_data.h
@@ -18,7 +18,6 @@
 #define _TRANSFERDATA_
 
 #include <lttoolbox/alphabet.h>
-#include <lttoolbox/ltstr.h>
 #include <lttoolbox/transducer.h>
 
 #include <map>
@@ -32,10 +31,10 @@ private:
   void copy(TransferData const &o);
   void destroy();
 
-  map<wstring, wstring, Ltstr> attr_items;
-  map<wstring, int, Ltstr> macros;
-  map<wstring, set<wstring, Ltstr>, Ltstr> lists;
-  map<wstring, wstring, Ltstr> variables;
+  map<UString, UString> attr_items;
+  map<UString, int> macros;
+  map<UString, set<UString>> lists;
+  map<UString, UString> variables;
   set<int> final_symbols;
 
   Alphabet alphabet;
@@ -50,13 +49,13 @@ private:
 
   Alphabet & getAlphabet();
   Transducer & getTransducer();
-  map<wstring, wstring, Ltstr> & getAttrItems();
+  map<UString, UString> & getAttrItems();
 
   map<int, int> seen_rules;
 
-  map<wstring, int, Ltstr> & getMacros();
-  map<wstring, set<wstring, Ltstr>, Ltstr> & getLists();
-  map<wstring, wstring, Ltstr> & getVariables();
+  map<UString, int> & getMacros();
+  map<UString, set<UString>> & getLists();
+  map<UString, UString> & getVariables();
 
   /**
    * Encode the rule count in an arc label/symbol (later extracted by
diff --git a/apertium/transfer_instr.cc b/apertium/transfer_instr.cc
index 7efee52..0b2e5c1 100644
--- a/apertium/transfer_instr.cc
+++ b/apertium/transfer_instr.cc
@@ -15,9 +15,8 @@
  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  */
 #include <apertium/transfer_instr.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
-using namespace Apertium;
 void
 TransferInstr::copy(TransferInstr const &o)
 {
@@ -26,6 +25,7 @@ TransferInstr::copy(TransferInstr const &o)
   pos = o.pos;
   pointer = o.pointer;
   condition = o.condition;
+  strval = o.strval;
 }
 
 void
@@ -33,14 +33,16 @@ TransferInstr::destroy()
 {
 }
 
-TransferInstr::TransferInstr(TransferInstrType t, string const &c,
-                             int const p, void *ptr, bool cond)
+TransferInstr::TransferInstr(TransferInstrType t, UString const &c,
+                             int const p, xmlNode* ptr, bool cond,
+                             const UString& sv)
 {
   type = t;
   content = c;
   pos = p;
   pointer = ptr;
   condition = cond;
+  strval = sv;
 }
 
 TransferInstr::~TransferInstr()
@@ -70,7 +72,7 @@ TransferInstr::getType()
   return type;
 }
 
-string const &
+UString const &
 TransferInstr::getContent()
 {
   return content;
@@ -82,7 +84,7 @@ TransferInstr::getPos()
   return pos;
 }
 
-void *
+xmlNode*
 TransferInstr::getPointer()
 {
   return pointer;
@@ -93,3 +95,9 @@ TransferInstr::getCondition()
 {
   return condition;
 }
+
+const UString&
+TransferInstr::getStrval()
+{
+  return strval;
+}
diff --git a/apertium/transfer_instr.h b/apertium/transfer_instr.h
index 977b112..3d51b28 100644
--- a/apertium/transfer_instr.h
+++ b/apertium/transfer_instr.h
@@ -17,7 +17,9 @@
 #ifndef _TRANSFERINSTR_
 #define _TRANSFERINSTR_
 
-#include<string>
+#include <libxml/tree.h>
+#include <string>
+#include <lttoolbox/ustring.h>
 
 using namespace std;
 
@@ -44,10 +46,11 @@ class TransferInstr
 {
 private:
   TransferInstrType type;
-  string content;
+  UString content;
   int pos;
-  void *pointer;
+  xmlNode* pointer;
   bool condition;
+  UString strval;
 
   void copy(TransferInstr const &o);
   void destroy();
@@ -58,18 +61,19 @@ public:
   pointer(0),
   condition(false)
   {}
-  TransferInstr(TransferInstrType t, string const &c, int const p,
-                void *ptr=NULL, bool cond = true);
+  TransferInstr(TransferInstrType t, UString const &c, int const p,
+                xmlNode* ptr=NULL, bool cond = true, const UString& sv = ""_u);
   ~TransferInstr();
   TransferInstr(TransferInstr const &o);
   TransferInstr & operator =(TransferInstr const &o);
 
 
   TransferInstrType getType();
-  string const & getContent();
+  UString const & getContent();
   int getPos();
-  void * getPointer();
+  xmlNode* getPointer();
   bool getCondition();
+  const UString& getStrval();
 };
 
 #endif
diff --git a/apertium/transfer_mult.cc b/apertium/transfer_mult.cc
index 6491c53..b28c4f0 100644
--- a/apertium/transfer_mult.cc
+++ b/apertium/transfer_mult.cc
@@ -18,17 +18,12 @@
 #include <apertium/trx_reader.h>
 #include <lttoolbox/compression.h>
 #include <lttoolbox/xml_parse_util.h>
-#include <apertium/utf_converter.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
 #include <cctype>
 #include <iostream>
 #include <stack>
 
-#ifdef _WIN32
-#include <utf8_fwrap.h>
-#endif
-
 using namespace std;
 
 void
@@ -60,18 +55,6 @@ TransferMult::~TransferMult()
   destroy();
 }
 
-string
-TransferMult::tolower(string const &str) const
-{
-  string result = str;
-  for(unsigned int i = 0, limit = str.size(); i != limit; i++)
-  {
-    result[i] = ::tolower(result[i]);
-  }
-
-  return result;
-}
-
 void
 TransferMult::readData(FILE *in)
 {
@@ -94,41 +77,39 @@ TransferMult::readData(FILE *in)
   me = new MatchExe(t, finals);
 
   // attr_items
-  bool recompile_attrs = Compression::string_read(in) != pcre_version_endian();
+  Compression::string_read(in); // PCRE version placeholder
   for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
   {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    UString const cad_k = Compression::string_read(in);
     attr_items[cad_k].read(in);
-    wstring fallback = Compression::wstring_read(in);
-    if(recompile_attrs) {
-      attr_items[cad_k].compile(UtfConverter::toUtf8(fallback));
-    }
+    UString fallback = Compression::string_read(in);
+    attr_items[cad_k].compile(fallback);
   }
 
   // variables
   for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
   {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    variables[cad_k] = UtfConverter::toUtf8(Compression::wstring_read(in));
+    UString const cad_k = Compression::string_read(in);
+    variables[cad_k] = Compression::string_read(in);
   }
 
   // macros
   for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
   {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    UString const cad_k = Compression::string_read(in);
     macros[cad_k] = Compression::multibyte_read(in);
   }
 
   // lists
   for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
   {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    UString const cad_k = Compression::string_read(in);
 
     for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
     {
-      wstring const cad_v = Compression::wstring_read(in);
-      lists[cad_k].insert(UtfConverter::toUtf8(cad_v));
-      listslow[cad_k].insert(UtfConverter::toUtf8(StringUtils::tolower(cad_v)));
+      UString const cad_v = Compression::string_read(in);
+      lists[cad_k].insert(cad_v);
+      listslow[cad_k].insert(StringUtils::tolower(cad_v));
     }
   }
 }
@@ -139,7 +120,7 @@ TransferMult::readBil(string const &fstfile)
   FILE *in = fopen(fstfile.c_str(), "r");
   if(!in)
   {
-    wcerr << "Error: Could not open file '" << fstfile << "'." << endl;
+    cerr << "Error: Could not open file '" << fstfile << "'." << endl;
     exit(EXIT_FAILURE);
   }
   fstp.load(in);
@@ -154,7 +135,7 @@ TransferMult::read(string const &datafile, string const &fstfile)
   FILE *in = fopen(datafile.c_str(), "r");
   if(!in)
   {
-    wcerr << "Error: Could not open file '" << datafile << "'." << endl;
+    cerr << "Error: Could not open file '" << datafile << "'." << endl;
     exit(EXIT_FAILURE);
   }
   readData(in);
@@ -164,65 +145,65 @@ TransferMult::read(string const &datafile, string const &fstfile)
 }
 
 TransferToken &
-TransferMult::readToken(FILE *in)
+TransferMult::readToken(InputFile& in)
 {
   if(!input_buffer.isEmpty())
   {
     return input_buffer.next();
   }
 
-  wstring content = L"";
+  UString content;
   while(true)
   {
-    int val = fgetwc_unlocked(in);
-    if(feof(in))
+    UChar32 val = in.get();
+    if(in.eof())
     {
       return input_buffer.add(TransferToken(content, tt_eof));
     }
-    if(val == L'\\')
+    if(val == '\\')
     {
-      content += L'\\';
-      content += wchar_t(fgetwc_unlocked(in));
+      content += '\\';
+      content += in.get();
     }
-    else if(val == L'[')
+    else if(val == '[')
     {
-      content += L'[';
+      content += '[';
       while(true)
       {
-	int val2 = fgetwc_unlocked(in);
-	if(val2 == L'\\')
+        UChar32 val2 = in.get();
+	if(val2 == '\\')
 	{
-	  content += L'\\';
-	  content += wchar_t(fgetwc_unlocked(in));
+	  content += '\\';
+      content += in.get();
 	}
-	else if(val2 == L']')
+	else if(val2 == ']')
 	{
-	  content += L']';
+	  content += ']';
 	  break;
 	}
 	else
 	{
-	  content += wchar_t(val2);
+	  content += val2;
 	}
       }
     }
-    else if(val == L'$')
+    else if(val == '$')
     {
       return input_buffer.add(TransferToken(content, tt_word));
     }
-    else if(val == L'^')
+    else if(val == '^')
     {
       return input_buffer.add(TransferToken(content, tt_blank));
     }
     else
     {
-      content += wchar_t(val);
+      content += val;
     }
   }
 }
 
 void
-TransferMult::transfer(FILE *in, FILE *out)
+TransferMult::transfer(InputFile& in, UFILE* out)
 {
   int last = 0;
 
@@ -243,28 +224,25 @@ TransferMult::transfer(FILE *in, FILE *out)
       {
 	if(tmpword.size() != 0)
 	{
-	  pair<wstring, int> tr = fstp.biltransWithQueue(*tmpword[0], false);
+	  pair<UString, int> tr = fstp.biltransWithQueue(*tmpword[0], false);
 	  if(tr.first.size() != 0)
 	  {
-	    vector<wstring> multiword = acceptions(tr.first);
-	    if(multiword.size() > 1)
-	    {
-	      fputws_unlocked(L"[{]", output);
+	    vector<UString> multiword = acceptions(tr.first);
+	    if(multiword.size() > 1) {
+          write("[{]"_u, output);
 	    }
 	    for(unsigned int i = 0, limit = multiword.size(); i != limit; i++)
 	    {
 	      if(i > 0)
 	      {
-	        fputws_unlocked(L"[|]", output);
+	        write("[|]"_u, output);
 	      }
-	      fputwc_unlocked(L'^', output);
-	      fputws_unlocked(multiword[i].c_str(), output);
-	      fputwc_unlocked(L'$', output);
+          u_fprintf(output, "^%S$", multiword[i].c_str());
 	    }
 	    if(multiword.size() > 1)
 	    {
-	      fputws_unlocked(L".[][}]", output);
-            }
+	      write(".[][}]"_u, output);
+        }
 	  }
 	  tmpword.clear();
 	  isRule = false;
@@ -275,7 +253,7 @@ TransferMult::transfer(FILE *in, FILE *out)
 	}
 	else if(tmpblank.size() != 0)
 	{
-	  fputws_unlocked(tmpblank[0]->c_str(), output);
+	  write(*tmpblank[0], output);
 	  tmpblank.clear();
 	  last = input_buffer.getPos();
 	  ms.init(me->getInitial());
@@ -300,7 +278,7 @@ TransferMult::transfer(FILE *in, FILE *out)
 	break;
 
       case tt_blank:
-	ms.step(L' ');
+	ms.step(' ');
 	tmpblank.push_back(&current.getContent());
 	break;
 
@@ -312,45 +290,45 @@ TransferMult::transfer(FILE *in, FILE *out)
 	}
 	else
 	{
-	  fputws_unlocked(current.getContent().c_str(), output);
+	  write(current.getContent(), output);
 	  return;
 	}
 	break;
 
       default:
-	wcerr << L"Error: Unknown input token." << endl;
+	cerr << "Error: Unknown input token." << endl;
 	return;
     }
   }
 }
 
 bool
-TransferMult::isDefaultWord(wstring const &str)
+TransferMult::isDefaultWord(UString const &str)
 {
-  return str.find(L" D<");
+  return str.find(" D<"_u) != UString::npos;
 }
 
-vector<wstring>
-TransferMult::acceptions(wstring str)
+vector<UString>
+TransferMult::acceptions(UString str)
 {
-  vector<wstring> result;
+  vector<UString> result;
   int low = 0;
 
   // removing '@'
-  if(str[0] == L'@')
+  if(str[0] == '@')
   {
     str = str.substr(1);
   }
 
   for(unsigned int i = 0, limit = str.size(); i != limit; i++)
   {
-     if(str[i] == L'\\')
+     if(str[i] == '\\')
      {
        i++;
      }
-     else if(str[i] == L'/')
+     else if(str[i] == '/')
      {
-       wstring new_word = str.substr(low, i-low);
+       UString new_word = str.substr(low, i-low);
 
        if(result.size() > 1 && isDefaultWord(new_word))
        {
@@ -365,7 +343,7 @@ TransferMult::acceptions(wstring str)
      }
   }
 
-  wstring otherword = str.substr(low);
+  UString otherword = str.substr(low);
   if(result.size() > 0 && isDefaultWord(otherword))
   {
     result.push_back(result[0]);
@@ -379,10 +357,10 @@ TransferMult::acceptions(wstring str)
   // eliminar las acepciones sin sentido marcado
   if(result.size() >= 2)
   {
-    vector<wstring> result2;
+    vector<UString> result2;
     for(unsigned int i = 0, limit = result.size(); i != limit; i++)
     {
-      if(result[i].find(L"__") != wstring::npos)
+      if(result[i].find("__"_u) != UString::npos)
       {
         result2.push_back(result[i]);
       }
@@ -397,22 +375,22 @@ TransferMult::acceptions(wstring str)
 }
 
 void
-TransferMult::writeMultiple(list<vector<wstring> >::iterator itwords,
-                            list<wstring>::iterator itblanks,
-                            list<vector<wstring> >::const_iterator limitwords,
-                            wstring acum , bool multiple)
+TransferMult::writeMultiple(list<vector<UString> >::iterator itwords,
+                            list<UString>::iterator itblanks,
+                            list<vector<UString> >::const_iterator limitwords,
+                            UString acum , bool multiple)
 {
   if(itwords == limitwords)
   {
     if(multiple)
     {
-      output_string.append(L"[|]");
+      output_string.append("[|]"_u);
     }
     output_string.append(acum);
   }
   else
   {
-    vector<wstring> &refword = *itwords;
+    vector<UString> &refword = *itwords;
 
     itwords++;
 
@@ -420,19 +398,27 @@ TransferMult::writeMultiple(list<vector<wstring> >::iterator itwords,
     {
       for(unsigned int i = 0, limit = refword.size(); i != limit; i++)
       {
-        writeMultiple(itwords, itblanks, limitwords,
-                      acum + L"^" + refword[i] + L"$", multiple || (i > 0));
+        UString temp = acum;
+        temp += '^';
+        temp += refword[i];
+        temp += '$';
+        writeMultiple(itwords, itblanks, limitwords, temp, multiple || (i > 0));
       }
     }
     else
     {
-      wstring &refblank = *itblanks;
+      UString &refblank = *itblanks;
       itblanks++;
 
       for(unsigned int i = 0, limit = refword.size(); i != limit; i++)
       {
+        UString temp = acum;
+        temp += '^';
+        temp += refword[i];
+        temp += '$';
+        temp += refblank;
         writeMultiple(itwords, itblanks, limitwords,
-                      acum + L"^" + refword[i] + L"$" + refblank,
+                      temp,
                       multiple || (i > 0));
       }
     }
@@ -442,31 +428,26 @@ TransferMult::writeMultiple(list<vector<wstring> >::iterator itwords,
 void
 TransferMult::applyRule()
 {
-  list<wstring> blanks;
-  list<vector<wstring> > words;
+  list<UString> blanks;
+  list<vector<UString> > words;
 
-  pair<wstring, int> tr = fstp.biltransWithQueue(*tmpword[0], false);
+  pair<UString, int> tr = fstp.biltransWithQueue(*tmpword[0], false);
   words.push_back(acceptions(tr.first));
 
   for(unsigned int i = 1; i != numwords; i++)
   {
     blanks.push_back(*tmpblank[i-1]);
-    pair<wstring, int> tr = fstp.biltransWithQueue(*tmpword[i], false);
+    pair<UString, int> tr = fstp.biltransWithQueue(*tmpword[i], false);
     words.push_back(acceptions(tr.first));
   }
 
-  output_string = L"";
+  output_string.clear();
   writeMultiple(words.begin(), blanks.begin(), words.end());
 
-  if(output_string.find(L"[|]") != wstring::npos)
-  {
-    fputws_unlocked(L"[{]", output);
-    fputws_unlocked(output_string.c_str(), output);
-    fputws_unlocked(L".[][}]", output);
-  }
-  else
-  {
-    fputws_unlocked(output_string.c_str(), output);
+  if(output_string.find("[|]"_u) != UString::npos) {
+    u_fprintf(output, "[{]%S.[][}]", output_string.c_str());
+  } else {
+    write(output_string, output);
   }
 
   ms.init(me->getInitial());
@@ -477,22 +458,22 @@ TransferMult::applyRule()
 }
 
 void
-TransferMult::applyWord(wstring const &word_str)
+TransferMult::applyWord(UString const &word_str)
 {
-  ms.step(L'^');
+  ms.step('^');
   for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
   {
     switch(word_str[i])
     {
-      case L'\\':
+      case '\\':
         i++;
-	ms.step(towlower(word_str[i]), any_char);
+	ms.step(u_tolower(word_str[i]), any_char);
 	break;
 
-      case L'<':
+      case '<':
 	for(unsigned int j = i+1; j != limit; j++)
 	{
-	  if(word_str[j] == L'>')
+	  if(word_str[j] == '>')
 	  {
 	    int symbol = alphabet(word_str.substr(i, j-i+1));
 	    if(symbol)
@@ -510,9 +491,9 @@ TransferMult::applyWord(wstring const &word_str)
 	break;
 
       default:
-	ms.step(towlower(word_str[i]), any_char);
+	ms.step(u_tolower(word_str[i]), any_char);
 	break;
     }
   }
-  ms.step(L'$');
+  ms.step('$');
 }
diff --git a/apertium/transfer_mult.h b/apertium/transfer_mult.h
index c6c8920..02d2963 100644
--- a/apertium/transfer_mult.h
+++ b/apertium/transfer_mult.h
@@ -40,20 +40,20 @@ private:
   Alphabet alphabet;
   MatchExe *me;
   MatchState ms;
-  map<string, ApertiumRE, Ltstr> attr_items;
-  map<string, string, Ltstr> variables;
-  map<string, int, Ltstr> macros;
-  map<string, set<string, Ltstr>, Ltstr> lists;
-  map<string, set<string, Ltstr>, Ltstr> listslow;
+  map<UString, ApertiumRE> attr_items;
+  map<UString, UString> variables;
+  map<UString, int> macros;
+  map<UString, set<UString>> lists;
+  map<UString, set<UString>> listslow;
   TransferWord **word;
-  string **blank;
+  UString **blank;
   Buffer<TransferToken> input_buffer;
-  vector<wstring *> tmpword;
-  vector<wstring *> tmpblank;
-  wstring output_string;
+  vector<UString *> tmpword;
+  vector<UString *> tmpblank;
+  UString output_string;
 
   FSTProcessor fstp;
-  FILE *output;
+  UFILE* output;
   int any_char;
   int any_tag;
   bool isRule;
@@ -66,33 +66,33 @@ private:
   OutputType defaultAttrs;
 
   void destroy();
-  void readData(FILE *input);
+  void readData(FILE* input);
   void readBil(string const &filename);
-  string caseOf(string const &str);
-  string copycase(string const &source_word, string const &target_word);
+  UString caseOf(UString const &str);
+  UString copycase(UString const &source_word, UString const &target_word);
 
-  bool beginsWith(string const &str1, string const &str2) const;
-  bool endsWith(string const &str1, string const &str2) const;
-  string tolower(string const &str) const;
-  string tags(string const &str) const;
-  wstring readWord(FILE *in);
-  wstring readBlank(FILE *in);
-  wstring readUntil(FILE *in, int const symbol) const;
-  void applyWord(wstring const &word_str);
+  bool beginsWith(UString const &str1, UString const &str2) const;
+  bool endsWith(UString const &str1, UString const &str2) const;
+  UString tolower(UString const &str) const;
+  UString tags(UString const &str) const;
+  UString readWord(InputFile& in);
+  UString readBlank(InputFile& in);
+  UString readUntil(InputFile& in, int const symbol) const;
+  void applyWord(UString const &word_str);
   void applyRule();
-  TransferToken & readToken(FILE *in);
-  void writeMultiple(list<vector<wstring> >::iterator itwords,
-                     list<wstring>::iterator itblanks,
-                     list<vector<wstring> >::const_iterator limitwords,
-                     wstring acum = L"", bool multiple = false);
-  vector<wstring> acceptions(wstring str);
-  bool isDefaultWord(wstring const &str);
+  TransferToken & readToken(InputFile& in);
+  void writeMultiple(list<vector<UString> >::iterator itwords,
+                     list<UString>::iterator itblanks,
+                     list<vector<UString> >::const_iterator limitwords,
+                     UString acum = ""_u, bool multiple = false);
+  vector<UString> acceptions(UString str);
+  bool isDefaultWord(UString const &str);
 public:
   TransferMult();
   ~TransferMult();
 
   void read(string const &datafile, string const &fstfile);
-  void transfer(FILE *in, FILE *out);
+  void transfer(InputFile& in, UFILE* out);
 };
 
 #endif
diff --git a/apertium/transfer_regex.cc b/apertium/transfer_regex.cc
new file mode 100644
index 0000000..f2cf207
--- /dev/null
+++ b/apertium/transfer_regex.cc
@@ -0,0 +1,109 @@
+#include <transfer_regex.h>
+
+struct TrieNode {
+  UChar32 c;
+  std::vector<TrieNode*> next;
+};
+
+TrieNode*
+add_char(TrieNode* root, UChar32 c)
+{
+  for (auto node : root->next) {
+    if (node->c == c) {
+      return node;
+    }
+  }
+  TrieNode* t = new TrieNode;
+  t->c = c;
+  root->next.push_back(t);
+  return t;
+}
+
+void
+add_entry(TrieNode* root, const std::vector<int32_t>& vec)
+{
+  bool escape = false;
+  TrieNode* cur = root;
+  for (auto c : vec) {
+    if (!escape) {
+      if (c == '\\') {
+        escape = true;
+        continue;
+      } else if (c == '.') {
+        cur = add_char(cur, '>');
+        cur = add_char(cur, '<');
+        continue;
+      }
+    }
+    escape = false;
+    cur = add_char(cur, c);
+  }
+  add_char(cur, '\0');
+}
+
+UString
+unbuildTrie(TrieNode* root)
+{
+  UString single;
+  single += '[';
+  std::vector<UString> groups;
+  bool end = false;
+  int single_count = 0;
+  for (auto it : root->next) {
+    if (it->next.empty()) {
+      end = true;
+    } else if (it->next.size() == 1 && it->next[0]->c == '\0') {
+      single += it->c;
+      single_count++;
+    } else {
+      groups.push_back(unbuildTrie(it));
+    }
+  }
+  if (single_count > 0) {
+    if (single_count == 1) {
+      groups.push_back(single.substr(1));
+    } else {
+      single += ']';
+      groups.push_back(single);
+    }
+  }
+  UString ret;
+  ret += root->c;
+  if (groups.empty()) {
+    return ret;
+  } else if (groups.size() == 1) {
+    ret += groups[0];
+  } else {
+    ret += '('; ret += '?'; ret += ':';
+    for (size_t i = 0; i < groups.size(); i++) {
+      if (i > 0) {
+        ret += '|';
+      }
+      ret += groups[i];
+    }
+    ret += ')';
+  }
+  if (end) {
+    ret += '?';
+  }
+  return ret;
+}
+
+UString
+optimize_regex(const std::vector<UString>& options)
+{
+  TrieNode* root = new TrieNode;
+  root->c = '<';
+  std::vector<int32_t> v;
+  for (auto& s : options) {
+    v.clear();
+    ustring_to_vec32(s, v);
+    add_entry(root, v);
+  }
+  UString ret;
+  ret += '(';
+  ret.append(unbuildTrie(root));
+  ret += '>';
+  ret += ')';
+  return ret;
+}
diff --git a/apertium/transfer_regex.h b/apertium/transfer_regex.h
new file mode 100644
index 0000000..63543a1
--- /dev/null
+++ b/apertium/transfer_regex.h
@@ -0,0 +1,9 @@
+#ifndef __TRANSFER_REGEX_OPTIMIZER__
+#define __TRANSFER_REGEX_OPTIMIZER__
+
+#include <lttoolbox/ustring.h>
+#include <vector>
+
+UString optimize_regex(const std::vector<UString>& options);
+
+#endif // __TRANSFER_REGEX_OPTIMIZER__
diff --git a/apertium/transfer_token.cc b/apertium/transfer_token.cc
index d5b4858..a679f07 100644
--- a/apertium/transfer_token.cc
+++ b/apertium/transfer_token.cc
@@ -15,9 +15,7 @@
  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  */
 #include <apertium/transfer_token.h>
-#include <apertium/string_utils.h>
-
-using namespace Apertium;
+#include <lttoolbox/string_utils.h>
 
 void
 TransferToken::copy(TransferToken const &o)
@@ -36,7 +34,7 @@ type(tt_eof)
 {
 }
 
-TransferToken::TransferToken(wstring const &content,
+TransferToken::TransferToken(UString const &content,
 			     TransferTokenType type)
 {
   this->content = content;
@@ -70,7 +68,7 @@ TransferToken::getType()
   return type;
 }
 
-wstring &
+UString &
 TransferToken::getContent()
 {
   return content;
@@ -83,8 +81,7 @@ TransferToken::setType(TransferTokenType type)
 }
 
 void
-TransferToken::setContent(wstring const &content)
+TransferToken::setContent(UString const &content)
 {
   this->content = content;
 }
-
diff --git a/apertium/transfer_token.h b/apertium/transfer_token.h
index 039e7d6..a0ca3fc 100644
--- a/apertium/transfer_token.h
+++ b/apertium/transfer_token.h
@@ -18,6 +18,7 @@
 #define _TRANSFERTOKEN_
 
 #include <string>
+#include <lttoolbox/ustring.h>
 
 using namespace std;
 
@@ -33,20 +34,20 @@ class TransferToken
 {
 private:
   TransferTokenType type;
-  wstring content;
+  UString content;
 
   void copy(TransferToken const &o);
   void destroy();
 public:
   TransferToken();
-  TransferToken(wstring const &content, TransferTokenType type);
+  TransferToken(UString const &content, TransferTokenType type);
   ~TransferToken();
   TransferToken(TransferToken const &o);
   TransferToken & operator =(TransferToken const &o);
   TransferTokenType getType();
-  wstring & getContent();
+  UString & getContent();
   void setType(TransferTokenType type);
-  void setContent(wstring const &content);
+  void setContent(UString const &content);
 };
 
 #endif
diff --git a/apertium/transfer_word.cc b/apertium/transfer_word.cc
index bca3232..a3ce8c7 100644
--- a/apertium/transfer_word.cc
+++ b/apertium/transfer_word.cc
@@ -17,9 +17,8 @@
 
 #include <apertium/transfer_word.h>
 #include <iostream>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
-using namespace Apertium;
 void
 TransferWord::copy(TransferWord const &o)
 {
@@ -40,7 +39,7 @@ queue_length(0)
 {
 }
 
-TransferWord::TransferWord(string const &src, string const &tgt, string const &ref, string const &wblank, int queue)
+TransferWord::TransferWord(UString const &src, UString const &tgt, UString const &ref, UString const &wblank, int queue)
 {
   init(src, tgt, ref, wblank);
   queue_length = queue;
@@ -68,7 +67,7 @@ TransferWord::operator =(TransferWord const &o)
 }
 
 void
-TransferWord::init(string const &src, string const &tgt, string const &ref, string const &wblank)
+TransferWord::init(UString const &src, UString const &tgt, UString const &ref, UString const &wblank)
 {
   s_str = src;
   t_str = tgt;
@@ -76,7 +75,7 @@ TransferWord::init(string const &src, string const &tgt, string const &ref, stri
   wb_str = wblank;
 }
 
-string
+UString
 TransferWord::source(ApertiumRE const &part, bool with_queue)
 {
   if(with_queue)
@@ -89,7 +88,7 @@ TransferWord::source(ApertiumRE const &part, bool with_queue)
   }
 }
 
-string
+UString
 TransferWord::target(ApertiumRE const &part, bool with_queue)
 {
   if(with_queue)
@@ -102,7 +101,7 @@ TransferWord::target(ApertiumRE const &part, bool with_queue)
   }
 }
 
-string
+UString
 TransferWord::reference(ApertiumRE const &part, bool with_queue)
 {
   if(with_queue)
@@ -115,14 +114,14 @@ TransferWord::reference(ApertiumRE const &part, bool with_queue)
   }
 }
 
-string
+UString
 TransferWord::getWblank()
 {
   return wb_str;
 }
 
 bool
-TransferWord::setSource(ApertiumRE const &part, string const &value,
+TransferWord::setSource(ApertiumRE const &part, UString const &value,
 			bool with_queue)
 {
   if(with_queue)
@@ -131,7 +130,7 @@ TransferWord::setSource(ApertiumRE const &part, string const &value,
   }
   else
   {
-    string mystring = s_str.substr(0, s_str.size() - queue_length);
+    UString mystring = s_str.substr(0, s_str.size() - queue_length);
     bool ret = part.replace(mystring, value);
     s_str = mystring + s_str.substr(s_str.size() - queue_length);
     return ret;
@@ -139,7 +138,7 @@ TransferWord::setSource(ApertiumRE const &part, string const &value,
 }
 
 bool
-TransferWord::setTarget(ApertiumRE const &part, string const &value,
+TransferWord::setTarget(ApertiumRE const &part, UString const &value,
 			bool with_queue)
 {
   if(with_queue)
@@ -148,7 +147,7 @@ TransferWord::setTarget(ApertiumRE const &part, string const &value,
   }
   else
   {
-    string mystring = t_str.substr(0, t_str.size() - queue_length);
+    UString mystring = t_str.substr(0, t_str.size() - queue_length);
     bool ret = part.replace(mystring, value);
     t_str = mystring + t_str.substr(t_str.size() - queue_length);
     return ret;
@@ -156,7 +155,7 @@ TransferWord::setTarget(ApertiumRE const &part, string const &value,
 }
 
 bool
-TransferWord::setReference(ApertiumRE const &part, string const &value,
+TransferWord::setReference(ApertiumRE const &part, UString const &value,
       bool with_queue)
 {
   if(with_queue)
@@ -165,7 +164,7 @@ TransferWord::setReference(ApertiumRE const &part, string const &value,
   }
   else
   {
-    string mystring = r_str.substr(0, r_str.size() - queue_length);
+    UString mystring = r_str.substr(0, r_str.size() - queue_length);
     bool ret = part.replace(mystring, value);
     r_str = mystring + r_str.substr(r_str.size() - queue_length);
     return ret;
diff --git a/apertium/transfer_word.h b/apertium/transfer_word.h
index 5e63133..c286edf 100644
--- a/apertium/transfer_word.h
+++ b/apertium/transfer_word.h
@@ -22,6 +22,7 @@
 #include <apertium/apertium_re.h>
 #include <string>
 #include <cstdlib>
+#include <lttoolbox/ustring.h>
 
 using namespace std;
 
@@ -34,22 +35,22 @@ private:
   /**
    * Source language word
    */
-  string s_str;
+  UString s_str;
 
   /**
    * Target language word
    */
-  string t_str;
+  UString t_str;
 
   /**
    * Reference word
    */
-  string r_str;
+  UString r_str;
   
   /**
    * Wordbound blank
    */
-  string wb_str;
+  UString wb_str;
 
   /**
    * Queue length
@@ -73,7 +74,7 @@ private:
    * @param part regular expression to match/access
    * @return reference to matched/accessed string
    */
-  string access(string const &str, ApertiumRE const &part);
+  UString access(UString const &str, ApertiumRE const &part);
 
   /**
    * Assings a value to the source/target/reference side of a word using the
@@ -82,7 +83,7 @@ private:
    * @param part regular expression to match/access
    * @param value the string to be assigned
    */
-  void assign(string &str, ApertiumRE const &part, string const &value);
+  void assign(UString &str, ApertiumRE const &part, UString const &value);
 
 public:
   /**
@@ -108,7 +109,7 @@ public:
    * @param wblank wordbound blank
    * @param queue queue lenght
    */
-  TransferWord(string const &src, string const &tgt, string const &ref, string const &wblank, int queue = 0);
+  TransferWord(UString const &src, UString const &tgt, UString const &ref, UString const &wblank, int queue = 0);
 
   /**
    * Assignment operator
@@ -125,7 +126,7 @@ public:
    * @param ref reference word
    * @param wblank wordbound blank
    */
-  void init(string const &src, string const &tgt, string const &ref, string const &wblank);
+  void init(UString const &src, UString const &tgt, UString const &ref, UString const &wblank);
 
   /**
    * Reference a source language word part
@@ -133,7 +134,7 @@ public:
    * @param with_queue access taking into account the queue
    * @returns reference to the part of string matched
    */
-  string source(ApertiumRE const &part, bool with_queue = true);
+  UString source(ApertiumRE const &part, bool with_queue = true);
 
   /**
    * Reference a target language word part
@@ -141,7 +142,7 @@ public:
    * @param with_queue access taking into account the queue
    * @returns reference to the part of string matched
    */
-  string target(ApertiumRE const &part, bool with_queue = true);
+  UString target(ApertiumRE const &part, bool with_queue = true);
 
   /**
    * Reference the reference word part
@@ -149,13 +150,13 @@ public:
    * @param with_queue access taking into account the queue
    * @returns reference to the part of string matched
    */
-  string reference(ApertiumRE const &part, bool with_queue = true);
+  UString reference(ApertiumRE const &part, bool with_queue = true);
   
   /**
    * Reference the wordbound blank part
    * @returns reference to the wordbound blank
    */
-  string getWblank();
+  UString getWblank();
 
   /**
    * Sets a value for a source language word part
@@ -164,7 +165,7 @@ public:
    * @param with_queue access taking or not into account the queue
    * @returns whether part matched
    */
-  bool setSource(ApertiumRE const &part, string const &value,
+  bool setSource(ApertiumRE const &part, UString const &value,
 		 bool with_queue = true);
 
   /**
@@ -174,7 +175,7 @@ public:
    * @param with_queue access taking or not into account the queue
    * @returns whether part matched
    */
-  bool setTarget(ApertiumRE const &part, string const &value,
+  bool setTarget(ApertiumRE const &part, UString const &value,
 		 bool with_queue = true);
 
   /**
@@ -184,7 +185,7 @@ public:
    * @param with_queue access taking or not into account the queue
    * @returns whether part matched
    */
-  bool setReference(ApertiumRE const &part, string const &value,
+  bool setReference(ApertiumRE const &part, UString const &value,
      bool with_queue = true);
 };
 
diff --git a/apertium/transfer_word_list.cc b/apertium/transfer_word_list.cc
deleted file mode 100644
index 9b730dd..0000000
--- a/apertium/transfer_word_list.cc
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <https://www.gnu.org/licenses/>.
- */
-#include <apertium/transfer_word_list.h>
-#include <apertium/string_utils.h>
-
-using namespace Apertium;
-void
-TransferWordList::copy(TransferWordList const &o)
-{
-  casefull_set = o.casefull_set;
-  caseless_set = o.caseless_set;
-}
-
-void
-TransferWordList::destroy()
-{
-}
-
-TransferWordList::TransferWordList()
-{
-}
-
-TransferWordList::~TransferWordList()
-{
-  destroy();
-}
-
-TransferWordList::TransferWordList(TransferWordList const &o)
-{
-  copy(o);
-}
-
-TransferWordList &
-TransferWordList::operator =(TransferWordList const &o)
-{
-  if(this != &o)
-  {
-    destroy();
-    copy(o);
-  }
-  return *this;
-}
-
-bool
-TransferWordList::search(string const &cad, bool caseless)
-{
-  if(caseless)
-  {
-    return caseless_set.find(cad) != caseless_set.end();
-  }
-  else
-  {
-    return casefull_set.find(cad) != casefull_set.end();
-  }
-}
-
-void
-TransferWordList::addWord(string const &cad)
-{
-  casefull_set.insert(cad);
-  caseless_set.insert(cad);
-}
diff --git a/apertium/transfer_word_list.h b/apertium/transfer_word_list.h
deleted file mode 100644
index 6e3f602..0000000
--- a/apertium/transfer_word_list.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <https://www.gnu.org/licenses/>.
- */
-#ifndef _TRANSFERWORDLIST_
-#define _TRANSFERWORDLIST_
-
-#include <cstring>
-#include <set>
-#include <string>
-#ifdef _MSC_VER
-#define strcasecmp _stricmp
-#endif
-
-using namespace std;
-
-struct ltstr
-{
-  bool operator()(string const &s1, string const &s2) const
-  {
-    return s1 < s2;
-  }
-};
-
-struct ltstri
-{
-  bool operator()(string const &s1, string const &s2) const
-  {
-    return strcasecmp(s1.c_str(), s2.c_str()) < 0;
-  }
-};
-
-class TransferWordList
-{
-private:
-  set<string, ltstr> casefull_set;
-  set<string, ltstri> caseless_set;
-
-  void copy(TransferWordList const &o);
-  void destroy();
-public:
-  TransferWordList();
-  ~TransferWordList();
-  TransferWordList(TransferWordList const &o);
-  TransferWordList & operator =(TransferWordList const &o);
-
-  bool search(string const &cad, bool caseless = false);
-  void addWord(string const &cad);
-};
-
-#endif
diff --git a/apertium/transferpp.cc b/apertium/transferpp.cc
index 62cf712..278da6b 100644
--- a/apertium/transferpp.cc
+++ b/apertium/transferpp.cc
@@ -18,10 +18,9 @@
 #include <lttoolbox/lt_locale.h>
 #include <cstdlib>
 #include <iostream>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include <libgen.h>
 
-using namespace Apertium;
 using namespace std;
 
 int main(int argc, char *argv[])
@@ -30,7 +29,7 @@ int main(int argc, char *argv[])
 
   if(argc != 3)
   {
-    wcerr << "USAGE: " << basename(argv[0]) << " rules_file transfer_file" << endl;
+    cerr << "USAGE: " << basename(argv[0]) << " rules_file transfer_file" << endl;
     exit(EXIT_FAILURE);
   }
 
diff --git a/apertium/trx_reader.cc b/apertium/trx_reader.cc
index 8cc0e2d..e400cc0 100644
--- a/apertium/trx_reader.cc
+++ b/apertium/trx_reader.cc
@@ -20,14 +20,10 @@
 
 #include <cstdlib>
 #include <iostream>
-#include <apertium/string_utils.h>
+#include <apertium/transfer_regex.h>
 
-using namespace Apertium;
-wstring const
-TRXReader::ANY_TAG = L"<ANY_TAG>";
-
-wstring const
-TRXReader::ANY_CHAR = L"<ANY_CHAR>";
+UString const TRXReader::ANY_TAG         = "<ANY_TAG>"_u;
+UString const TRXReader::ANY_CHAR        = "<ANY_CHAR>"_u;
 
 TRXReader::TRXReader()
 {
@@ -36,29 +32,29 @@ TRXReader::TRXReader()
 }
 
 int
-TRXReader::insertLemma(int const base, wstring const &lemma)
+TRXReader::insertLemma(int const base, UString const &lemma)
 {
   int retval = base;
   static int const any_char = td.getAlphabet()(ANY_CHAR);
-  if(lemma == L"")
+  if(lemma.empty())
   {
     retval = td.getTransducer().insertSingleTransduction(any_char, retval);
     td.getTransducer().linkStates(retval, retval, any_char);
-    int another = td.getTransducer().insertSingleTransduction(L'\\', retval);
+    int another = td.getTransducer().insertSingleTransduction('\\', retval);
     td.getTransducer().linkStates(another, retval, any_char);
   }
   else
   {
     for(unsigned int i = 0, limit = lemma.size();  i != limit; i++)
     {
-      if(lemma[i] == L'\\')
+      if(lemma[i] == '\\')
       {
-        retval = td.getTransducer().insertSingleTransduction(L'\\', retval);
+        retval = td.getTransducer().insertSingleTransduction('\\', retval);
         i++;
         retval = td.getTransducer().insertSingleTransduction(int(lemma[i]),
                                                              retval);
       }
-      else if(lemma[i] == L'*')
+      else if(lemma[i] == '*')
       {
         retval = td.getTransducer().insertSingleTransduction(any_char, retval);
         td.getTransducer().linkStates(retval, retval, any_char);
@@ -75,7 +71,7 @@ TRXReader::insertLemma(int const base, wstring const &lemma)
 }
 
 int
-TRXReader::insertTags(int const base, wstring const &tags)
+TRXReader::insertTags(int const base, UString const &tags)
 {
   int retval = base;
   static int const any_tag = td.getAlphabet()(ANY_TAG);
@@ -83,7 +79,7 @@ TRXReader::insertTags(int const base, wstring const &tags)
   {
     for(unsigned int i = 0, limit = tags.size(); i < limit; i++)
     {
-      if(tags[i] == L'*')
+      if(tags[i] == '*')
       {
         retval = td.getTransducer().insertSingleTransduction(any_tag, retval);
         td.getTransducer().linkStates(retval, retval, any_tag);
@@ -91,10 +87,10 @@ TRXReader::insertTags(int const base, wstring const &tags)
       }
       else
       {
-        wstring symbol = L"<";
+        UString symbol = "<"_u;
         for(unsigned int j = i; j != limit; j++)
         {
-          if(tags[j] == L'.')
+          if(tags[j] == '.')
           {
             symbol.append(tags.substr(i, j-i));
             i = j;
@@ -102,12 +98,12 @@ TRXReader::insertTags(int const base, wstring const &tags)
           }
         }
 
-        if(symbol == L"<")
+        if(symbol == "<"_u)
         {
           symbol.append(tags.substr(i));
           i = limit;
         }
-        symbol += L'>';
+        symbol += '>';
         td.getAlphabet().includeSymbol(symbol);
         retval = td.getTransducer().insertSingleTransduction(td.getAlphabet()(symbol), retval);
       }
@@ -126,56 +122,56 @@ TRXReader::parse()
 {
   procDefCats();
   step();
-  while(name == L"#text" || name == L"#comment")
+  while(name == "#text"_u || name == "#comment"_u)
   {
     step();
   }
 
-  if(name == L"section-def-attrs")
+  if(name == "section-def-attrs"_u)
   {
     procDefAttrs();
     step();
-    while(name == L"#text" || name == L"#comment")
+    while(name == "#text"_u || name == "#comment"_u)
     {
       step();
     }
   }
 
-  if(name == L"section-def-vars")
+  if(name == "section-def-vars"_u)
   {
     procDefVars();
     step();
-    while(name == L"#text" || name == L"#comment")
+    while(name == "#text"_u || name == "#comment"_u)
     {
       step();
     }
   }
 
-  if(name == L"section-def-lists")
+  if(name == "section-def-lists"_u)
   {
     procDefLists();
     step();
-    while(name == L"#text" || name == L"#comment")
+    while(name == "#text"_u || name == "#comment"_u)
     {
       step();
     }
   }
 
-  if(name == L"section-def-macros")
+  if(name == "section-def-macros"_u)
   {
     procDefMacros();
     step();
-    while(name == L"#text" || name == L"#comment")
+    while(name == "#text"_u || name == "#comment"_u)
     {
       step();
     }
   }
 
-  if(name == L"section-rules")
+  if(name == "section-rules"_u)
   {
     procRules();
     step();
-    while(name == L"#text" || name == L"#comment")
+    while(name == "#text"_u || name == "#comment"_u)
     {
       step();
     }
@@ -189,17 +185,17 @@ TRXReader::procRules()
   set<int> alive_states;
 
   while(type != XML_READER_TYPE_END_ELEMENT ||
-	name != L"section-rules")
+	name != "section-rules"_u)
   {
     step();
-    if(name == L"rule")
+    if(name == "rule"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
         count++;
       }
     }
-    else if(name == L"pattern")
+    else if(name == "pattern"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
@@ -220,27 +216,27 @@ TRXReader::procRules()
           }
           else
           {
-            wcerr << L"Warning (" << xmlTextReaderGetParserLineNumber(reader);
-            wcerr << L"): "
-              << L"Paths to rule " << count << " blocked by rule " << td.seen_rules[*it]
-              << L"." << endl;
+            cerr << "Warning (" << xmlTextReaderGetParserLineNumber(reader);
+            cerr << "): "
+              << "Paths to rule " << count << " blocked by rule " << td.seen_rules[*it]
+              << "." << endl;
 
           }
         }
       }
     }
-    else if(name == L"pattern-item")
+    else if(name == "pattern-item"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-        pair<multimap<wstring, LemmaTags, Ltstr>::iterator,
-             multimap<wstring, LemmaTags, Ltstr>::iterator> range;
+        pair<multimap<UString, LemmaTags>::iterator,
+             multimap<UString, LemmaTags>::iterator> range;
 
-        range = cat_items.equal_range(attrib(L"n"));
+        range = cat_items.equal_range(attrib("n"_u));
 
         if(range.first == range.second)
         {
-          parseError(L"Undefined cat-item '" + attrib(L"n"));
+          parseError("Undefined cat-item '"_u + attrib("n"_u));
         }
 
 // new code
@@ -253,12 +249,12 @@ TRXReader::procRules()
               it != limit; it++)
           {
             // mark of begin of word
-            int tmp = td.getTransducer().insertSingleTransduction(L'^', *it);
+            int tmp = td.getTransducer().insertSingleTransduction('^', *it);
             if(*it != td.getTransducer().getInitial())
             {
               // insert optional blank between two words
-              int alt = td.getTransducer().insertSingleTransduction(L' ', *it);
-              td.getTransducer().linkStates(alt, tmp, L'^');
+              int alt = td.getTransducer().insertSingleTransduction(' ', *it);
+              td.getTransducer().linkStates(alt, tmp, '^');
             }
 
             // insert word
@@ -266,7 +262,7 @@ TRXReader::procRules()
             tmp = insertTags(tmp, range.first->second.tags);
 
             // insert mark of end of word
-            tmp = td.getTransducer().insertSingleTransduction(L'$', tmp);
+            tmp = td.getTransducer().insertSingleTransduction('$', tmp);
 
             // set as alive_state
             alive_states_new.insert(tmp);
@@ -277,21 +273,21 @@ TRXReader::procRules()
         alive_states = alive_states_new;
       }
     }
-    else if(name == L"let")
+    else if(name == "let"_u)
     {
       int count = 0;
       int lineno = xmlTextReaderGetParserLineNumber(reader);
-      while(name != L"let" || type != XML_READER_TYPE_END_ELEMENT)
+      while(name != "let"_u || type != XML_READER_TYPE_END_ELEMENT)
       {
         step();
         if(type == XML_ELEMENT_NODE)
         {
           count++;
 
-          if(name == L"clip" && attrib(L"side") == L"sl")
+          if(name == "clip"_u && attrib("side"_u) == "sl"_u)
           {
-            wcerr << L"Warning (" << lineno;
-            wcerr << L"): assignment to 'sl' side has no effect." << endl;
+            cerr << "Warning (" << lineno;
+            cerr << "): assignment to 'sl' side has no effect." << endl;
           }
         }
 
@@ -311,8 +307,8 @@ TRXReader::write(string const &filename)
   FILE *out = fopen(filename.c_str(), "wb");
   if(!out)
   {
-    wcerr << "Error: cannot open '" << filename;
-    wcerr << "' for writing" << endl;
+    cerr << "Error: cannot open '" << filename;
+    cerr << "' for writing" << endl;
     exit(EXIT_FAILURE);
   }
 
@@ -324,41 +320,42 @@ TRXReader::write(string const &filename)
 void
 TRXReader::procDefAttrs()
 {
-  wstring attrname;
+  UString attrname;
+  vector<UString> items;
 
   while(type != XML_READER_TYPE_END_ELEMENT ||
-        name != L"section-def-attrs")
+        name != "section-def-attrs"_u)
   {
     step();
-    if(name == L"attr-item")
+    if(name == "attr-item"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-        insertAttrItem(attrname, attrib(L"tags"));
+        items.push_back(attrib("tags"_u));
       }
     }
-    else if(name == L"def-attr")
+    else if(name == "def-attr"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-        attrname = attrib(L"n");
+        attrname = attrib("n"_u);
       }
       else
       {
-        wstring all = td.getAttrItems()[attrname];
-        td.getAttrItems()[attrname] = L"(" + all + L")";
-        attrname = L"";
+        td.getAttrItems()[attrname] = optimize_regex(items);
+        items.clear();
+        attrname.clear();
       }
     }
-    else if(name == L"#text")
+    else if(name == "#text"_u)
     {
       // do nothing
     }
-    else if(name == L"#comment")
+    else if(name == "#comment"_u)
     {
       // do nothing
     }
-    else if(name == L"section-def-attrs")
+    else if(name == "section-def-attrs"_u)
     {
       // do nothing
     }
@@ -372,56 +369,56 @@ TRXReader::procDefAttrs()
 void
 TRXReader::procDefCats()
 {
-  while(type == XML_READER_TYPE_END_ELEMENT || !(name == L"transfer" || name == L"interchunk" || name == L"postchunk"))
+  while(type == XML_READER_TYPE_END_ELEMENT || !(name == "transfer"_u || name == "interchunk"_u || name == "postchunk"_u))
   {
     step();
-    if(name != L"#text" && name != L"transfer" &&  name != L"interchunk" &&
-       name != L"postchunk" && name != L"section-def-cats" && name != L"#comment")
+    if(name != "#text"_u && name != "transfer"_u &&  name != "interchunk"_u &&
+       name != "postchunk"_u && name != "section-def-cats"_u && name != "#comment"_u)
     {
       unexpectedTag();
     }
   }
 
-  wstring catname;
+  UString catname;
 
   while(type != XML_READER_TYPE_END_ELEMENT ||
-        name != L"section-def-cats")
+        name != "section-def-cats"_u)
   {
     step();
-    if(name == L"cat-item")
+    if(name == "cat-item"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-        if(attrib(L"tags") != L"")
+        if(!attrib("tags"_u).empty())
         {
-          insertCatItem(catname, attrib(L"lemma"), attrib(L"tags"));
+          insertCatItem(catname, attrib("lemma"_u), attrib("tags"_u));
         }
         else
         {
-          insertCatItem(catname, attrib(L"name"), L"");
+          insertCatItem(catname, attrib("name"_u), ""_u);
         }
       }
     }
-    else if(name == L"def-cat")
+    else if(name == "def-cat"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-        catname = attrib(L"n");
+        catname = attrib("n"_u);
       }
       else
       {
-        catname = L"";
+        catname.clear();
       }
     }
-    else if(name == L"#text")
+    else if(name == "#text"_u)
     {
       // do nothing
     }
-    else if(name == L"#comment")
+    else if(name == "#comment"_u)
     {
       // do nothing
     }
-    else if(name == L"section-def-cats")
+    else if(name == "section-def-cats"_u)
     {
       // do nothing
     }
@@ -436,25 +433,25 @@ void
 TRXReader::procDefVars()
 {
   while(type != XML_READER_TYPE_END_ELEMENT ||
-        name != L"section-def-vars")
+        name != "section-def-vars"_u)
   {
     step();
-    if(name == L"def-var")
+    if(name == "def-var"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-        createVar(attrib(L"n"), attrib(L"v"));
+        createVar(attrib("n"_u), attrib("v"_u));
       }
     }
-    else if(name == L"#text")
+    else if(name == "#text"_u)
     {
       // do nothing
     }
-    else if(name == L"#comment")
+    else if(name == "#comment"_u)
     {
       // do nothing
     }
-    else if(name == L"section-def-vars")
+    else if(name == "section-def-vars"_u)
     {
       // do nothing
     }
@@ -468,39 +465,39 @@ TRXReader::procDefVars()
 void
 TRXReader::procDefLists()
 {
-  wstring listname;
+  UString listname;
 
   while(type != XML_READER_TYPE_END_ELEMENT ||
-	name != L"section-def-lists")
+	name != "section-def-lists"_u)
   {
     step();
-    if(name == L"list-item")
+    if(name == "list-item"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-        insertListItem(listname, attrib(L"v"));
+        insertListItem(listname, attrib("v"_u));
       }
     }
-    else if(name == L"def-list")
+    else if(name == "def-list"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-        listname = attrib(L"n");
+        listname = attrib("n"_u);
       }
       else
       {
-        listname = L"";
+        listname.clear();
       }
     }
-    else if(name == L"#text")
+    else if(name == "#text"_u)
     {
       // do nothing
     }
-    else if(name == L"#comment")
+    else if(name == "#comment"_u)
     {
       // do nothing
     }
-    else if(name == L"section-def-lists")
+    else if(name == "section-def-lists"_u)
     {
       // do nothing
     }
@@ -516,72 +513,47 @@ TRXReader::procDefMacros()
 {
   int count = 0;
   while(type != XML_READER_TYPE_END_ELEMENT ||
-	name != L"section-def-macros")
+	name != "section-def-macros"_u)
   {
     step();
-    if(name == L"def-macro")
+    if(name == "def-macro"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-        createMacro(attrib(L"n"), count++);
+        createMacro(attrib("n"_u), count++);
       }
     }
   }
 }
 
 void
-TRXReader::createMacro(wstring const &name, int const value)
+TRXReader::createMacro(UString const &name, int const value)
 {
   if(td.getMacros().find(name) != td.getMacros().end())
   {
-    parseError(L"Macro '" + name + L"' defined at least twice");
+    parseError("Macro '"_u + name + "' defined at least twice"_u);
   }
   td.getMacros()[name] = value;
 }
 
 void
-TRXReader::insertListItem(wstring const &name, wstring const &value)
+TRXReader::insertListItem(UString const &name, UString const &value)
 {
   td.getLists()[name].insert(value);
 }
 
 void
-TRXReader::createVar(wstring const &name, wstring const &initial_value)
+TRXReader::createVar(UString const &name, UString const &initial_value)
 {
   td.getVariables()[name] = initial_value;
 }
 
 void
-TRXReader::insertCatItem(wstring const &name, wstring const &lemma,
-                         wstring const &tags)
+TRXReader::insertCatItem(UString const &name, UString const &lemma,
+                         UString const &tags)
 {
   LemmaTags lt;
   lt.lemma = lemma;
   lt.tags = tags;
-  cat_items.insert(pair<wstring, LemmaTags>(name, lt));
-}
-
-void
-TRXReader::insertAttrItem(wstring const &name, wstring const &tags)
-{
-  if(td.getAttrItems()[name].size() != 0)
-  {
-    td.getAttrItems()[name] += L'|';
-  }
-
-  td.getAttrItems()[name] += '<';
-
-  for(unsigned int i = 0, limit = tags.size(); i != limit; i++)
-  {
-    if(tags[i] == L'.')
-    {
-      td.getAttrItems()[name].append(L"><");
-    }
-    else
-    {
-      td.getAttrItems()[name] += tags[i];
-    }
-  }
-  td.getAttrItems()[name] += L'>';
-
+  cat_items.insert(pair<UString, LemmaTags>(name, lt));
 }
diff --git a/apertium/trx_reader.h b/apertium/trx_reader.h
index 18ebef2..7766123 100644
--- a/apertium/trx_reader.h
+++ b/apertium/trx_reader.h
@@ -19,7 +19,6 @@
 
 #include <apertium/transfer_data.h>
 #include <apertium/xml_reader.h>
-#include <lttoolbox/ltstr.h>
 
 #include <libxml/xmlreader.h>
 #include <map>
@@ -32,11 +31,11 @@ class TRXReader : public XMLReader
 private:
   struct LemmaTags
   {
-    wstring lemma;
-    wstring tags;
+    UString lemma;
+    UString tags;
   };
 
-  multimap<wstring, LemmaTags, Ltstr> cat_items;
+  multimap<UString, LemmaTags> cat_items;
   TransferData td;
 
   void destroy();
@@ -50,22 +49,21 @@ private:
   void procDefMacros();
   void procRules();
 
-  void insertCatItem(wstring const &name, wstring const &lemma,
-		     wstring const &tags);
-  void insertAttrItem(wstring const &name, wstring const &tags);
-  void createVar(wstring const &name, wstring const &initial_value);
-  void insertListItem(wstring const &name, wstring const &value);
-  void createMacro(wstring const &name, int const val);
+  void insertCatItem(UString const &name, UString const &lemma,
+		     UString const &tags);
+  void createVar(UString const &name, UString const &initial_value);
+  void insertListItem(UString const &name, UString const &value);
+  void createMacro(UString const &name, int const val);
 
-  int insertLemma(int const base, wstring const &lemma);
-  int insertTags(int const base, wstring const &tags);
+  int insertLemma(int const base, UString const &lemma);
+  int insertTags(int const base, UString const &tags);
 
 protected:
   virtual void parse();
 
 public:
-  static wstring const ANY_TAG;
-  static wstring const ANY_CHAR;
+  static UString const ANY_TAG;
+  static UString const ANY_CHAR;
 
 
   TRXReader();
diff --git a/apertium/tsx_reader.cc b/apertium/tsx_reader.cc
index 750bbf4..ef20298 100644
--- a/apertium/tsx_reader.cc
+++ b/apertium/tsx_reader.cc
@@ -17,12 +17,11 @@
 #include <apertium/tsx_reader.h>
 #include <lttoolbox/xml_parse_util.h>
 #include <lttoolbox/compression.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 
 #include <cstdlib>
 #include <iostream>
 
-using namespace Apertium;
 void
 TSXReader::copy(TSXReader const &o)
 {
@@ -61,13 +60,13 @@ TSXReader::clearTagIndex()
 {
   tag_index->clear();
   array_tags->clear();
-  newTagIndex(L"LPAR");
-  newTagIndex(L"RPAR");
-  newTagIndex(L"LQUEST");
-  newTagIndex(L"CM");
-  newTagIndex(L"SENT");
-  newTagIndex(L"kEOF");
-  newTagIndex(L"kUNDEF");
+  newTagIndex("LPAR"_u);
+  newTagIndex("RPAR"_u);
+  newTagIndex("LQUEST"_u);
+  newTagIndex("CM"_u);
+  newTagIndex("SENT"_u);
+  newTagIndex("kEOF"_u);
+  newTagIndex("kUNDEF"_u);
 }
 
 TSXReader &
@@ -82,31 +81,31 @@ TSXReader::operator =(TSXReader const &o)
 }
 
 void
-TSXReader::newTagIndex(wstring const &tag)
+TSXReader::newTagIndex(UString const &tag)
 {
-  if(tag_index->find(L"TAG_" + tag) != tag_index->end())
+  if(tag_index->find("TAG_"_u + tag) != tag_index->end())
   {
-    parseError(L"'" + tag + L"' already defined");
+    parseError("'"_u + tag + "' already defined"_u);
   }
 
-  array_tags->push_back(L"TAG_" + tag);
-  (*tag_index)[L"TAG_" + tag] = array_tags->size() - 1;
+  array_tags->push_back("TAG_"_u + tag);
+  (*tag_index)["TAG_"_u + tag] = array_tags->size() - 1;
 }
 
 void
-TSXReader::newDefTag(wstring const &tag)
+TSXReader::newDefTag(UString const &tag)
 {
-  if(tag_index->find(L"TAG_" + tag) != tag_index->end())
+  if(tag_index->find("TAG_"_u + tag) != tag_index->end())
   {
-    parseError(L"'" + tag + L"' already defined");
+    parseError("'"_u + tag + "' already defined"_u);
   }
 
   array_tags->push_back(tag);
-  (*tag_index)[L"TAG_" + tag] = array_tags->size() - 1;
+  (*tag_index)["TAG_"_u + tag] = array_tags->size() - 1;
 }
 
 void
-TSXReader::newConstant(wstring const &constant)
+TSXReader::newConstant(UString const &constant)
 {
   constants->setConstant(constant, array_tags->size());
   array_tags->push_back(constant);
@@ -115,26 +114,26 @@ TSXReader::newConstant(wstring const &constant)
 void
 TSXReader::procDiscardOnAmbiguity()
 {
-  while(type != XML_READER_TYPE_END_ELEMENT || name != L"discard-on-ambiguity")
+  while(type != XML_READER_TYPE_END_ELEMENT || name != "discard-on-ambiguity"_u)
   {
     step();
 
-    if(name == L"discard")
+    if(name == "discard"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-        tdata.addDiscard(L"<" + StringUtils::substitute(attrib(L"tags"), L".", L"><") + L">");
+        tdata.addDiscard("<"_u + StringUtils::substitute(attrib("tags"_u), "."_u, "><"_u) + ">"_u);
       }
     }
-    else if(name == L"#text")
+    else if(name == "#text"_u)
     {
       // do nothing
     }
-    else if(name == L"#comment")
+    else if(name == "#comment"_u)
     {
       // do nothing
     }
-    else if(name == L"discard-on-ambiguity")
+    else if(name == "discard-on-ambiguity"_u)
     {
       if(type == XML_READER_TYPE_END_ELEMENT)
       {
@@ -142,7 +141,7 @@ TSXReader::procDiscardOnAmbiguity()
       }
       else
       {
-	parseError(L"Unexpected 'discard-on-ambiguity' open tag");
+	parseError("Unexpected 'discard-on-ambiguity' open tag"_u);
       }
     }
     else
@@ -155,36 +154,36 @@ TSXReader::procDiscardOnAmbiguity()
 void
 TSXReader::procDefLabel()
 {
-  wstring name_attr = attrib(L"name");
-  wstring closed_attr = attrib(L"closed");
+  UString name_attr = attrib("name"_u);
+  UString closed_attr = attrib("closed"_u);
   newDefTag(name_attr);
 
-  if(closed_attr != L"true")
+  if(closed_attr != "true"_u)
   {
-    open_class->insert((*tag_index)[L"TAG_"+name_attr]);
+    open_class->insert((*tag_index)["TAG_"_u + name_attr]);
   }
 
-  while(type != XML_READER_TYPE_END_ELEMENT || name != L"def-label")
+  while(type != XML_READER_TYPE_END_ELEMENT || name != "def-label"_u)
   {
     step();
 
-    if(name == L"tags-item")
+    if(name == "tags-item"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-	plist->insert((*tag_index)[L"TAG_"+name_attr], attrib(L"lemma"),
-		     attrib(L"tags"));
+	plist->insert((*tag_index)["TAG_"_u + name_attr], attrib("lemma"_u),
+		     attrib("tags"_u));
       }
     }
-    else if(name == L"def-label")
+    else if(name == "def-label"_u)
     {
       return;
     }
-    else if(name == L"#text")
+    else if(name == "#text"_u)
     {
       // do nothing
     }
-    else if(name == L"#comment")
+    else if(name == "#comment"_u)
     {
       // do nothing
     }
@@ -198,50 +197,50 @@ TSXReader::procDefLabel()
 void
 TSXReader::procDefMult()
 {
-  wstring name_attr = attrib(L"name");
-  wstring closed_attr = attrib(L"closed");
+  UString name_attr = attrib("name"_u);
+  UString closed_attr = attrib("closed"_u);
   newDefTag(name_attr);
-  if(closed_attr != L"true")
+  if(closed_attr != "true"_u)
   {
-    open_class->insert((*tag_index)[L"TAG_"+name_attr]);
+    open_class->insert((*tag_index)["TAG_"_u + name_attr]);
   }
 
-  while(type != XML_READER_TYPE_END_ELEMENT || name != L"def-mult")
+  while(type != XML_READER_TYPE_END_ELEMENT || name != "def-mult"_u)
   {
     step();
-    if(name == L"sequence")
+    if(name == "sequence"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
 	plist->beginSequence();
-	while(type != XML_READER_TYPE_END_ELEMENT || name != L"sequence")
+	while(type != XML_READER_TYPE_END_ELEMENT || name != "sequence"_u)
 	{
 	  step();
-	  if(name == L"label-item")
+	  if(name == "label-item"_u)
 	  {
 	    if(type != XML_READER_TYPE_END_ELEMENT)
 	    {
-	      plist->insert((*tag_index)[L"TAG_"+name_attr],
-                            (*tag_index)[L"TAG_"+attrib(L"label")]);
+	      plist->insert((*tag_index)["TAG_"_u + name_attr],
+                            (*tag_index)["TAG_"_u + attrib("label"_u)]);
 	    }
 	  }
-	  else if(name == L"tags-item")
+	  else if(name == "tags-item"_u)
 	  {
 	    if(type != XML_READER_TYPE_END_ELEMENT)
 	    {
-	      plist->insert((*tag_index)[L"TAG_"+name_attr],
-			    attrib(L"lemma"), attrib(L"tags"));
+	      plist->insert((*tag_index)["TAG_"_u + name_attr],
+			    attrib("lemma"_u), attrib("tags"_u));
 	    }
 	  }
-	  else if(name == L"sequence")
+	  else if(name == "sequence"_u)
 	  {
 	    break;
 	  }
-	  else if(name == L"#text")
+	  else if(name == "#text"_u)
 	  {
 	    // do nothing
 	  }
-	  else if(name == L"#comment")
+	  else if(name == "#comment"_u)
 	  {
 	    // do nothing
           }
@@ -249,15 +248,15 @@ TSXReader::procDefMult()
 	plist->endSequence();
       }
     }
-    else if(name == L"#text")
+    else if(name == "#text"_u)
     {
       // do nothing
     }
-    else if(name == L"#comment")
+    else if(name == "#comment"_u)
     {
       // do nothing
     }
-    else if(name == L"def-mult")
+    else if(name == "def-mult"_u)
     {
       // do nothing
     }
@@ -271,41 +270,41 @@ TSXReader::procDefMult()
 void
 TSXReader::procTagset()
 {
-  while(type == XML_READER_TYPE_END_ELEMENT || name != L"tagset")
+  while(type == XML_READER_TYPE_END_ELEMENT || name != "tagset"_u)
   {
     step();
-    if(name != L"#text" && name != L"tagger" && name != L"tagset")
+    if(name != "#text"_u && name != "tagger"_u && name != "tagset"_u)
     {
       unexpectedTag();
     }
   }
 
-  while(type != XML_READER_TYPE_END_ELEMENT || name != L"tagset")
+  while(type != XML_READER_TYPE_END_ELEMENT || name != "tagset"_u)
   {
     step();
-    if(name == L"def-label")
+    if(name == "def-label"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
 	procDefLabel();
       }
     }
-    else if(name == L"def-mult")
+    else if(name == "def-mult"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
         procDefMult();
       }
     }
-    else if(name == L"#text")
+    else if(name == "#text"_u)
     {
       // do nothing
     }
-    else if(name == L"#comment")
+    else if(name == "#comment"_u)
     {
       // do nothing
     }
-    else if(name == L"tagset")
+    else if(name == "tagset"_u)
     {
       // do nothing
     }
@@ -323,27 +322,27 @@ TSXReader::procLabelSequence()
   TForbidRule forbid_rule;
 
   step();
-  while(name == L"#text" || name == L"#comment")
+  while(name == "#text"_u || name == "#comment"_u)
   {
     step();
   }
-  if(name != L"label-item")
+  if(name != "label-item"_u)
   {
-    parseError(L"<label-item> tag expected");
+    parseError("<label-item> tag expected"_u);
   }
 
-  forbid_rule.tagi = (*tag_index)[L"TAG_" + attrib(L"label")];
+  forbid_rule.tagi = (*tag_index)["TAG_"_u + attrib("label"_u)];
 
   step();
-  while(name == L"#text" || name == L"#comment")
+  while(name == "#text"_u || name == "#comment"_u)
   {
     step();
   }
-  if(name != L"label-item")
+  if(name != "label-item"_u)
   {
-    parseError(L"<label-item> tag expected");
+    parseError("<label-item> tag expected"_u);
   }
-  forbid_rule.tagj = (*tag_index)[L"TAG_" + attrib(L"label")];
+  forbid_rule.tagj = (*tag_index)["TAG_"_u + attrib("label"_u)];
 
   forbid_rules->push_back(forbid_rule);
 }
@@ -351,25 +350,25 @@ TSXReader::procLabelSequence()
 void
 TSXReader::procForbid()
 {
-  while(type != XML_READER_TYPE_END_ELEMENT || name != L"forbid")
+  while(type != XML_READER_TYPE_END_ELEMENT || name != "forbid"_u)
   {
     step();
-    if(name == L"label-sequence")
+    if(name == "label-sequence"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
 	procLabelSequence();
       }
     }
-    else if(name == L"#text")
+    else if(name == "#text"_u)
     {
       // do nothing
     }
-    else if(name == L"#comment")
+    else if(name == "#comment"_u)
     {
       // do nothing
     }
-    else if(name == L"forbid")
+    else if(name == "forbid"_u)
     {
       if(type == XML_READER_TYPE_END_ELEMENT)
       {
@@ -377,12 +376,12 @@ TSXReader::procForbid()
       }
       else
       {
-	parseError(L"Unexpected '" + name + L"' open tag");
+	parseError("Unexpected '"_u + name + "' open tag"_u);
       }
     }
     else
     {
-      parseError(L"Unexpected '" + name + L"' tag");
+      parseError("Unexpected '"_u + name + "' tag"_u);
     }
   }
 }
@@ -391,14 +390,14 @@ void
 TSXReader::procEnforce()
 {
   TEnforceAfterRule aux;
-  while(type != XML_READER_TYPE_END_ELEMENT || name != L"enforce-rules")
+  while(type != XML_READER_TYPE_END_ELEMENT || name != "enforce-rules"_u)
   {
     step();
-    if(name == L"enforce-after")
+    if(name == "enforce-after"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-	aux.tagi = (*tag_index)[L"TAG_" + attrib(L"label")];
+	aux.tagi = (*tag_index)["TAG_"_u + attrib("label"_u)];
       }
       else
       {
@@ -406,26 +405,26 @@ TSXReader::procEnforce()
 	aux.tagsj.clear();
       }
     }
-    else if(name == L"label-set")
+    else if(name == "label-set"_u)
     {
       // do nothing
     }
-    else if(name == L"label-item")
+    else if(name == "label-item"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-	aux.tagsj.push_back((*tag_index)[L"TAG_" + attrib(L"label")]);
+	aux.tagsj.push_back((*tag_index)["TAG_"_u + attrib("label"_u)]);
       }
     }
-    else if(name == L"#text")
+    else if(name == "#text"_u)
     {
       // do nothing
     }
-    else if(name == L"#comment")
+    else if(name == "#comment"_u)
     {
       // do nothing
     }
-    else if(name == L"enforce-rules")
+    else if(name == "enforce-rules"_u)
     {
       if(type == XML_READER_TYPE_END_ELEMENT)
       {
@@ -433,12 +432,12 @@ TSXReader::procEnforce()
       }
       else
       {
-	parseError(L"Unexpected 'enforce-rules' open tag");
+	parseError("Unexpected 'enforce-rules' open tag"_u);
       }
     }
     else
     {
-      parseError(L"Unexpected '" + name + L"' tag");
+      parseError("Unexpected '"_u + name + "' tag"_u);
     }
   }
 }
@@ -446,26 +445,26 @@ TSXReader::procEnforce()
 void
 TSXReader::procPreferences()
 {
-  while(type != XML_READER_TYPE_END_ELEMENT || name != L"preferences")
+  while(type != XML_READER_TYPE_END_ELEMENT || name != "preferences"_u)
   {
     step();
-    if(name == L"prefer")
+    if(name == "prefer"_u)
     {
       if(type != XML_READER_TYPE_END_ELEMENT)
       {
-        wstring const tags = L"<" + StringUtils::substitute(attrib(L"tags"), L".", L"><") + L">";
+        UString const tags = "<"_u + StringUtils::substitute(attrib("tags"_u), "."_u, "><"_u) + ">"_u;
 	prefer_rules->push_back(tags);
       }
     }
-    else if(name == L"#text")
+    else if(name == "#text"_u)
     {
       //do nothing
     }
-    else if(name == L"#comment")
+    else if(name == "#comment"_u)
     {
       // do nothing
     }
-    else if(name == L"preferences")
+    else if(name == "preferences"_u)
     {
       if(type == XML_READER_TYPE_END_ELEMENT)
       {
@@ -473,12 +472,12 @@ TSXReader::procPreferences()
       }
       else
       {
-	parseError(L"Unexpected 'preferences' open tag");
+	parseError("Unexpected 'preferences' open tag"_u);
       }
     }
     else
     {
-      parseError(L"Unexpected '" + name + L"' tag");
+      parseError("Unexpected '"_u + name + "' tag"_u);
     }
   }
 }
@@ -494,38 +493,38 @@ TSXReader::parse()
   procTagset();
 
   step();
-  while(name == L"#text" || name == L"#comment")
+  while(name == "#text"_u || name == "#comment"_u)
   {
     step();
   }
-  if(name == L"forbid")
+  if(name == "forbid"_u)
   {
     procForbid();
     step();
-    while(name == L"#text" || name == L"#comment")
+    while(name == "#text"_u || name == "#comment"_u)
     {
       step();
     }
   }
-  if(name == L"enforce-rules")
+  if(name == "enforce-rules"_u)
   {
     procEnforce();
     step();
-    while(name == L"#text" || name == L"#comment")
+    while(name == "#text"_u || name == "#comment"_u)
     {
       step();
     }
   }
-  if(name == L"preferences")
+  if(name == "preferences"_u)
   {
     procPreferences();
     step();
-    while(name == L"#text" || name == L"#comment")
+    while(name == "#text"_u || name == "#comment"_u)
     {
       step();
     }
   }
-  if(name == L"discard-on-ambiguity")
+  if(name == "discard-on-ambiguity"_u)
   {
     if(type != XML_READER_TYPE_END_ELEMENT)
     {
@@ -533,20 +532,20 @@ TSXReader::parse()
     }
   }
 
-  newConstant(L"kMOT");
-  newConstant(L"kDOLLAR");
-  newConstant(L"kBARRA");
-  newConstant(L"kMAS");
-  newConstant(L"kIGNORAR");
-  newConstant(L"kBEGIN");
-  newConstant(L"kUNKNOWN");
-
-  plist->insert((*tag_index)[L"TAG_LPAR"], L"", L"lpar");
-  plist->insert((*tag_index)[L"TAG_RPAR"], L"", L"rpar");
-  plist->insert((*tag_index)[L"TAG_LQUEST"], L"", L"lquest");
-  plist->insert((*tag_index)[L"TAG_CM"], L"", L"cm");
-  plist->insert((*tag_index)[L"TAG_SENT"], L"", L"sent");
-//  plist->insert((*tag_index)[L"TAG_kMAS"], L"+", L"");
+  newConstant("kMOT"_u);
+  newConstant("kDOLLAR"_u);
+  newConstant("kBARRA"_u);
+  newConstant("kMAS"_u);
+  newConstant("kIGNORAR"_u);
+  newConstant("kBEGIN"_u);
+  newConstant("kUNKNOWN"_u);
+
+  plist->insert((*tag_index)["TAG_LPAR"_u], ""_u, "lpar"_u);
+  plist->insert((*tag_index)["TAG_RPAR"_u], ""_u, "rpar"_u);
+  plist->insert((*tag_index)["TAG_LQUEST"_u], ""_u, "lquest"_u);
+  plist->insert((*tag_index)["TAG_CM"_u], ""_u, "cm"_u);
+  plist->insert((*tag_index)["TAG_SENT"_u], ""_u, "sent"_u);
+//  plist->insert((*tag_index)["TAG_kMAS"_u], "+"_u, ""_u);
   plist->buildTransducer();
 }
 
diff --git a/apertium/tsx_reader.h b/apertium/tsx_reader.h
index 8cc4829..9ef82a2 100644
--- a/apertium/tsx_reader.h
+++ b/apertium/tsx_reader.h
@@ -22,7 +22,6 @@
 #include <apertium/ttag.h>
 #include <apertium/xml_reader.h>
 #include <lttoolbox/pattern_list.h>
-#include <lttoolbox/ltstr.h>
 
 #include <libxml/xmlreader.h>
 #include <map>
@@ -37,17 +36,17 @@ class TSXReader : public XMLReader
 private:
   set<TTag> *open_class;
   vector<TForbidRule> *forbid_rules;
-  map<wstring, TTag, Ltstr> *tag_index;
-  vector<wstring> *array_tags;
+  map<UString, TTag> *tag_index;
+  vector<UString> *array_tags;
   vector<TEnforceAfterRule> *enforce_rules;
-  vector<wstring> *prefer_rules;
+  vector<UString> *prefer_rules;
   PatternList *plist;
   ConstantManager *constants;
   TaggerData tdata;
 
-  void newTagIndex(wstring const &tag);
-  void newDefTag(wstring const &tag);
-  void newConstant(wstring const &constant);
+  void newTagIndex(UString const &tag);
+  void newDefTag(UString const &tag);
+  void newConstant(UString const &constant);
   void procDefLabel();
   void procDefMult();
   void procDiscardOnAmbiguity();
diff --git a/apertium/unigram_tagger.cc b/apertium/unigram_tagger.cc
index 5738816..8fb543b 100644
--- a/apertium/unigram_tagger.cc
+++ b/apertium/unigram_tagger.cc
@@ -134,9 +134,9 @@ UnigramTagger::score(const Analysis& Analysis_) {
       }
       if(TheFlags.getDebug())
       {
-        score_DEBUG << L"(" << tokenCount_r_a << L" * "
-                    << tokenCount_a << L") /\n    ("
-                    << tokenCount_a << L" + " << typeCount_a << L")";
+        score_DEBUG << "(" << tokenCount_r_a << " * "
+                    << tokenCount_a << ") /\n    ("
+                    << tokenCount_a << " + " << typeCount_a << ")";
       }
       return (tokenCount_r_a * tokenCount_a) / (tokenCount_a + typeCount_a);
     }
@@ -159,7 +159,7 @@ UnigramTagger::model3_score(const Analysis &Analysis_)
 
   i i_(Analysis_);
   Lemma l_(Analysis_);
-  std::wstringstream score_DEBUG_div;
+  std::stringstream score_DEBUG_div;
   if(Model3_l_t.find(i_) != Model3_l_t.end())
   {
     if(Model3_l_t[i_].find(l_) != Model3_l_t[i_].end())
@@ -174,9 +174,9 @@ UnigramTagger::model3_score(const Analysis &Analysis_)
   }
   if(TheFlags.getDebug())
   {
-    score_DEBUG << L"(" << tokenCount_r_i << L" * " << tokenCount_i;
-    std::wstringstream score_DEBUG_div;
-    score_DEBUG_div << L"(" << tokenCount_i << L" + " << typeCount_i << L")";
+    score_DEBUG << "(" << tokenCount_r_i << " * " << tokenCount_i;
+    std::stringstream score_DEBUG_div;
+    score_DEBUG_div << "(" << tokenCount_i << " + " << typeCount_i << ")";
   }
 
   long double score = tokenCount_r_i * tokenCount_i;
@@ -223,9 +223,9 @@ UnigramTagger::model3_score(const Analysis &Analysis_)
     }
     if(TheFlags.getDebug())
     {
-      score_DEBUG << L" * " << tokenCount_d_i << L" * " << tokenCount_i_d;
-      score_DEBUG_div << L" * (" << tokenCount_i << L" + " << typeCount_i
-                      << L") * (" << tokenCount_d << L" + " << typeCount_d << L")";
+      score_DEBUG << " * " << tokenCount_d_i << " * " << tokenCount_i_d;
+      score_DEBUG_div << " * (" << tokenCount_i << " + " << typeCount_i
+                      << ") * (" << tokenCount_d << " + " << typeCount_d << ")";
     }
 
     score *= (tokenCount_d_i * tokenCount_i_d);
@@ -233,14 +233,14 @@ UnigramTagger::model3_score(const Analysis &Analysis_)
   }
   if(TheFlags.getDebug())
   {
-    score_DEBUG << L") /\n    [" << score_DEBUG_div.str() << L"]";
+    score_DEBUG << ") /\n    [" << score_DEBUG_div.str() << "]";
   }
 
   return score / score_Divisor;
 }
 
 void
-UnigramTagger::tag(Stream &Input, std::wostream &Output)
+UnigramTagger::tag(Stream &Input, std::ostream &Output)
 {
   while (true) {
     StreamedType StreamedType_ = Input.get();
@@ -255,7 +255,7 @@ UnigramTagger::tag(Stream &Input, std::wostream &Output)
     }
     if(TheFlags.getDebug())
     {
-      std::wcerr << L"\n\n";
+      std::cerr << "\n\n";
     }
 
     tag(*StreamedType_.TheLexicalUnit, Output);
@@ -266,7 +266,7 @@ UnigramTagger::tag(Stream &Input, std::wostream &Output)
 }
 
 void
-UnigramTagger::tag(const LexicalUnit &LexicalUnit_, std::wostream &Output)
+UnigramTagger::tag(const LexicalUnit &LexicalUnit_, std::ostream &Output)
 {
   Optional<Analysis> TheAnalysis;
   long double max_score = 0;
@@ -275,7 +275,7 @@ UnigramTagger::tag(const LexicalUnit &LexicalUnit_, std::wostream &Output)
   {
     if(TheFlags.getDebug())
     {
-      score_DEBUG.str(L"");
+      score_DEBUG.str("");
     }
     const Analysis& a_ = LexicalUnit_.TheAnalyses[n];
     long double s = score(a_);
@@ -286,10 +286,10 @@ UnigramTagger::tag(const LexicalUnit &LexicalUnit_, std::wostream &Output)
     }
     if(TheFlags.getDebug())
     {
-      std::wcerr << L"score(\"" << a_ << L"\") ==\n "
-                 << score_DEBUG.str() << L" ==\n  " << std::fixed
+      std::cerr << "score(\"" << a_ << "\") ==\n "
+                 << score_DEBUG.str() << " ==\n  " << std::fixed
                  << std::setprecision(std::numeric_limits<long double>::digits10)
-                 << s << L"\n";
+                 << s << "\n";
     }
   }
 
diff --git a/apertium/unigram_tagger.h b/apertium/unigram_tagger.h
index 58af2e9..70737bc 100644
--- a/apertium/unigram_tagger.h
+++ b/apertium/unigram_tagger.h
@@ -45,8 +45,8 @@ enum UnigramTaggerModel {
 class UnigramTagger : public StreamTagger {
 private:
   long double model3_score(const Analysis &Analysis_);
-  void tag(const LexicalUnit &LexicalUnit_, std::wostream &Output);
-  std::wstringstream score_DEBUG;
+  void tag(const LexicalUnit &LexicalUnit_, std::ostream &Output);
+  std::stringstream score_DEBUG;
 protected:
 
   UnigramTaggerModel model;
@@ -94,7 +94,7 @@ public:
   UnigramTaggerModel getModel();
   void serialise(std::ostream& o) const;
   void deserialise(std::istream& s);
-  void tag(Stream& Input, std::wostream& Output);
+  void tag(Stream& Input, std::ostream& Output);
   void train(Stream& TaggedCorpus);
 };
 }
diff --git a/apertium/unlocked_cstdio.h b/apertium/unlocked_cstdio.h
index 5fe402c..2d043c5 100644
--- a/apertium/unlocked_cstdio.h
+++ b/apertium/unlocked_cstdio.h
@@ -40,24 +40,6 @@
 #define fread_unlocked fread
 #endif
 
-#if !HAVE_DECL_FGETWC_UNLOCKED
-#define fgetwc_unlocked fgetwc
-#endif
-
-#if !HAVE_DECL_FPUTWC_UNLOCKED
-#define fputwc_unlocked fputwc
-#endif
-
-#if !HAVE_DECL_FPUTWS_UNLOCKED
-#define fputws_unlocked fputws
-#endif
-
-#if !HAVE_MBTOWC
-#include <cwchar>
-inline int wctomb(char *s, wchar_t wc) { return wcrtomb(s,wc,NULL); }
-inline int mbtowc(wchar_t *pwc, const char *s, size_t n) { return mbrtowc(pwc, s, n, NULL); }
-#endif
-
 #ifdef _WIN32
 #include <utf8_fwrap.h>
 #endif
diff --git a/apertium/utf_converter.cc b/apertium/utf_converter.cc
deleted file mode 100644
index b79e834..0000000
--- a/apertium/utf_converter.cc
+++ /dev/null
@@ -1,613 +0,0 @@
-/*
- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <https://www.gnu.org/licenses/>.
- */
-#include <apertium/utf_converter.h>
-#include <iostream>
-#include <cstdlib>
-#include <apertium/string_utils.h>
-
-using namespace Apertium;
-
-#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
-#define UNI_MAX_BMP (UTF32)0x0000FFFF
-#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
-#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
-#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
-#define UNI_SUR_HIGH_START  (UTF32)0xD800
-#define UNI_SUR_HIGH_END    (UTF32)0xDBFF
-#define UNI_SUR_LOW_START   (UTF32)0xDC00
-#define UNI_SUR_LOW_END     (UTF32)0xDFFF
-
-using namespace std;
-
-namespace UtfConverter
-{
-
-  typedef unsigned int	 UTF32;	/* at least 32 bits */
-  typedef unsigned short UTF16;	/* at least 16 bits */
-  typedef unsigned char	 UTF8;	/* typically 8 bits */
-
-  /* Some fundamental constants */
-
-  typedef enum {
-    conversionOK, 	/* conversion successful */
-    sourceExhausted,	/* partial character in source, but hit end */
-    targetExhausted,	/* insuff. room in target for conversion */
-    sourceIllegal	/* source sequence is illegal/malformed */
-  } ConversionResult;
-
-  typedef enum {
-    strictConversion = 0,
-    lenientConversion
-  } ConversionFlags;
-
-  static const int halfShift  = 10; /* used for shifting by 10 bits */
-
-  static const UTF32 halfBase = 0x0010000UL;
-  static const UTF32 halfMask = 0x3FFUL;
-
-
-  void conversionError()
-  {
-    wcerr << L"Error: conversion error" << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  ConversionResult ConvertUTF32toUTF16 (
-					const UTF32** sourceStart, const UTF32* sourceEnd,
-					UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF32* source = *sourceStart;
-    UTF16* target = *targetStart;
-    while (source < sourceEnd) {
-      UTF32 ch;
-      if (target >= targetEnd) {
-	result = targetExhausted; break;
-      }
-      ch = *source++;
-      if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
-	/* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
-	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-	  if (flags == strictConversion) {
-	    --source; /* return to the illegal value itself */
-	    result = sourceIllegal;
-	    break;
-	  } else {
-	    *target++ = UNI_REPLACEMENT_CHAR;
-	  }
-	} else {
-	  *target++ = (UTF16)ch; /* normal case */
-	}
-      } else if (ch > UNI_MAX_LEGAL_UTF32) {
-	if (flags == strictConversion) {
-	  result = sourceIllegal;
-	} else {
-	  *target++ = UNI_REPLACEMENT_CHAR;
-	}
-      } else {
-	/* target is a character in range 0xFFFF - 0x10FFFF. */
-	if (target + 1 >= targetEnd) {
-	  --source; /* Back up source pointer! */
-	  result = targetExhausted; break;
-	}
-	ch -= halfBase;
-	*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
-	*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
-      }
-    }
-    *sourceStart = source;
-    *targetStart = target;
-    return result;
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  ConversionResult ConvertUTF16toUTF32 (
-					const UTF16** sourceStart, const UTF16* sourceEnd,
-					UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF16* source = *sourceStart;
-    UTF32* target = *targetStart;
-    UTF32 ch, ch2;
-    while (source < sourceEnd) {
-      const UTF16* oldSource = source; /*  In case we have to back up because of target overflow. */
-      ch = *source++;
-      /* If we have a surrogate pair, convert to UTF32 first. */
-      if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
-	/* If the 16 bits following the high surrogate are in the source buffer... */
-	if (source < sourceEnd) {
-	  ch2 = *source;
-	  /* If it's a low surrogate, convert to UTF32. */
-	  if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
-	    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
-	      + (ch2 - UNI_SUR_LOW_START) + halfBase;
-	    ++source;
-	  } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
-	    --source; /* return to the illegal value itself */
-	    result = sourceIllegal;
-	    break;
-	  }
-	} else { /* We don't have the 16 bits following the high surrogate. */
-	  --source; /* return to the high surrogate */
-	  result = sourceExhausted;
-	  break;
-	}
-      } else if (flags == strictConversion) {
-	/* UTF-16 surrogate values are illegal in UTF-32 */
-	if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
-	  --source; /* return to the illegal value itself */
-	  result = sourceIllegal;
-	  break;
-	}
-      }
-      if (target >= targetEnd) {
-	source = oldSource; /* Back up source pointer! */
-	result = targetExhausted; break;
-      }
-      *target++ = ch;
-    }
-    *sourceStart = source;
-    *targetStart = target;
-
-    return result;
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  /*
-   * Index into the table below with the first byte of a UTF-8 sequence to
-   * get the number of trailing bytes that are supposed to follow it.
-   * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
-   * left as-is for anyone who may want to do such conversion, which was
-   * allowed in earlier algorithms.
-   */
-  static const char trailingBytesForUTF8[256] = {
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
-  };
-
-  /*
-   * Magic values subtracted from a buffer value during UTF8 conversion.
-   * This table contains as many values as there might be trailing bytes
-   * in a UTF-8 sequence.
-   */
-  static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
-					    0x03C82080UL, 0xFA082080UL, 0x82082080UL };
-
-  /*
-   * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
-   * into the first byte, depending on how many bytes follow.  There are
-   * as many entries in this table as there are UTF-8 sequence types.
-   * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
-   * for *legal* UTF-8 will be 4 or fewer bytes total.
-   */
-  static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-
-  /* --------------------------------------------------------------------- */
-
-  /* The interface converts a whole buffer to avoid function-call overhead.
-   * Constants have been gathered. Loops & conditionals have been removed as
-   * much as possible for efficiency, in favor of drop-through switches.
-   * (See "Note A" at the bottom of the file for equivalent code.)
-   * If your compiler supports it, the "isLegalUTF8" call can be turned
-   * into an inline function.
-   */
-
-  /* --------------------------------------------------------------------- */
-
-  ConversionResult ConvertUTF16toUTF8 (
-				       const UTF16** sourceStart, const UTF16* sourceEnd,
-				       UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF16* source = *sourceStart;
-    UTF8* target = *targetStart;
-    while (source < sourceEnd) {
-      UTF32 ch;
-      unsigned short bytesToWrite = 0;
-      const UTF32 byteMask = 0xBF;
-      const UTF32 byteMark = 0x80;
-      const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
-      ch = *source++;
-      /* If we have a surrogate pair, convert to UTF32 first. */
-      if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
-	/* If the 16 bits following the high surrogate are in the source buffer... */
-	if (source < sourceEnd) {
-	  UTF32 ch2 = *source;
-	  /* If it's a low surrogate, convert to UTF32. */
-	  if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
-	    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
-	      + (ch2 - UNI_SUR_LOW_START) + halfBase;
-	    ++source;
-	  } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
-	    --source; /* return to the illegal value itself */
-	    result = sourceIllegal;
-	    break;
-	  }
-	} else { /* We don't have the 16 bits following the high surrogate. */
-	  --source; /* return to the high surrogate */
-	  result = sourceExhausted;
-	  break;
-	}
-      } else if (flags == strictConversion) {
-	/* UTF-16 surrogate values are illegal in UTF-32 */
-	if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
-	  --source; /* return to the illegal value itself */
-	  result = sourceIllegal;
-	  break;
-	}
-      }
-      /* Figure out how many bytes the result will require */
-      if (ch < (UTF32)0x80) {	     bytesToWrite = 1;
-      } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
-      } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
-      } else if (ch < (UTF32)0x110000) {  bytesToWrite = 4;
-      } else {			    bytesToWrite = 3;
-      ch = UNI_REPLACEMENT_CHAR;
-      }
-
-      target += bytesToWrite;
-      if (target > targetEnd) {
-	source = oldSource; /* Back up source pointer! */
-	target -= bytesToWrite; result = targetExhausted; break;
-      }
-      switch (bytesToWrite) { /* note: everything falls through. */
-      case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
-      case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
-      case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
-      case 1: *--target =  (UTF8)(ch | firstByteMark[bytesToWrite]);
-      }
-      target += bytesToWrite;
-    }
-    *sourceStart = source;
-    *targetStart = target;
-    return result;
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  /*
-   * Utility routine to tell whether a sequence of bytes is legal UTF-8.
-   * This must be called with the length pre-determined by the first byte.
-   * If not calling this from ConvertUTF8to*, then the length can be set by:
-   *  length = trailingBytesForUTF8[*source]+1;
-   * and the sequence is illegal right away if there aren't that many bytes
-   * available.
-   * If presented with a length > 4, this returns false.  The Unicode
-   * definition of UTF-8 goes up to 4-byte sequences.
-   */
-
-  static bool isLegalUTF8(const UTF8 *source, int length) {
-    UTF8 a;
-    const UTF8 *srcptr = source+length;
-    switch (length) {
-    default: return false;
-      /* Everything else falls through when "true"... */
-    case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
-    case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
-    case 2: if ((a = (*--srcptr)) > 0xBF) return false;
-
-      switch (*source) {
-	/* no fall-through in this inner switch */
-      case 0xE0: if (a < 0xA0) return false; break;
-      case 0xED: if (a > 0x9F) return false; break;
-      case 0xF0: if (a < 0x90) return false; break;
-      case 0xF4: if (a > 0x8F) return false; break;
-      default:   if (a < 0x80) return false;
-      }
-
-    case 1: if (*source >= 0x80 && *source < 0xC2) return false;
-    }
-    if (*source > 0xF4) return false;
-    return true;
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  /*
-   * Exported function to return whether a UTF-8 sequence is legal or not.
-   * This is not used here; it's just exported.
-   */
-  bool isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
-    int length = trailingBytesForUTF8[*source]+1;
-    if (source+length > sourceEnd) {
-      return false;
-    }
-    return isLegalUTF8(source, length);
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  ConversionResult ConvertUTF8toUTF16 (
-				       const UTF8** sourceStart, const UTF8* sourceEnd,
-				       UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF8* source = *sourceStart;
-    UTF16* target = *targetStart;
-    while (source < sourceEnd) {
-      UTF32 ch = 0;
-      unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
-      if (source + extraBytesToRead >= sourceEnd) {
-	result = sourceExhausted; break;
-      }
-      /* Do this check whether lenient or strict */
-      if (! isLegalUTF8(source, extraBytesToRead+1)) {
-	result = sourceIllegal;
-	break;
-      }
-      /*
-       * The cases all fall through. See "Note A" below.
-       */
-      switch (extraBytesToRead) {
-      case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
-      case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
-      case 3: ch += *source++; ch <<= 6;
-      case 2: ch += *source++; ch <<= 6;
-      case 1: ch += *source++; ch <<= 6;
-      case 0: ch += *source++;
-      }
-      ch -= offsetsFromUTF8[extraBytesToRead];
-
-      if (target >= targetEnd) {
-	source -= (extraBytesToRead+1); /* Back up source pointer! */
-	result = targetExhausted; break;
-      }
-      if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
-	/* UTF-16 surrogate values are illegal in UTF-32 */
-	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-	  if (flags == strictConversion) {
-	    source -= (extraBytesToRead+1); /* return to the illegal value itself */
-	    result = sourceIllegal;
-	    break;
-	  } else {
-	    *target++ = UNI_REPLACEMENT_CHAR;
-	  }
-	} else {
-	  *target++ = (UTF16)ch; /* normal case */
-	}
-      } else if (ch > UNI_MAX_UTF16) {
-	if (flags == strictConversion) {
-	  result = sourceIllegal;
-	  source -= (extraBytesToRead+1); /* return to the start */
-	  break; /* Bail out; shouldn't continue */
-	} else {
-	  *target++ = UNI_REPLACEMENT_CHAR;
-	}
-      } else {
-	/* target is a character in range 0xFFFF - 0x10FFFF. */
-	if (target + 1 >= targetEnd) {
-	  source -= (extraBytesToRead+1); /* Back up source pointer! */
-	  result = targetExhausted; break;
-	}
-	ch -= halfBase;
-	*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
-	*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
-      }
-    }
-    *sourceStart = source;
-    *targetStart = target;
-    return result;
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  ConversionResult ConvertUTF32toUTF8 (
-				       const UTF32** sourceStart, const UTF32* sourceEnd,
-				       UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF32* source = *sourceStart;
-    UTF8* target = *targetStart;
-    while (source < sourceEnd) {
-      UTF32 ch;
-      unsigned short bytesToWrite = 0;
-      const UTF32 byteMask = 0xBF;
-      const UTF32 byteMark = 0x80;
-      ch = *source++;
-      if (flags == strictConversion ) {
-	/* UTF-16 surrogate values are illegal in UTF-32 */
-	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-	  --source; /* return to the illegal value itself */
-	  result = sourceIllegal;
-	  break;
-	}
-      }
-      /*
-       * Figure out how many bytes the result will require. Turn any
-       * illegally large UTF32 things (> Plane 17) into replacement chars.
-       */
-      if (ch < (UTF32)0x80) {	     bytesToWrite = 1;
-      } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
-      } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
-      } else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;
-      } else {			    bytesToWrite = 3;
-      ch = UNI_REPLACEMENT_CHAR;
-      result = sourceIllegal;
-      }
-
-      target += bytesToWrite;
-      if (target > targetEnd) {
-	--source; /* Back up source pointer! */
-	target -= bytesToWrite; result = targetExhausted; break;
-      }
-      switch (bytesToWrite) { /* note: everything falls through. */
-      case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
-      case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
-      case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
-      case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
-      }
-      target += bytesToWrite;
-    }
-    *sourceStart = source;
-    *targetStart = target;
-    return result;
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  ConversionResult ConvertUTF8toUTF32 (
-				       const UTF8** sourceStart, const UTF8* sourceEnd,
-				       UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF8* source = *sourceStart;
-    UTF32* target = *targetStart;
-    while (source < sourceEnd) {
-      UTF32 ch = 0;
-      unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
-      if (source + extraBytesToRead >= sourceEnd) {
-	result = sourceExhausted; break;
-      }
-      /* Do this check whether lenient or strict */
-      if (! isLegalUTF8(source, extraBytesToRead+1)) {
-	result = sourceIllegal;
-	break;
-      }
-      /*
-       * The cases all fall through. See "Note A" below.
-       */
-      switch (extraBytesToRead) {
-      case 5: ch += *source++; ch <<= 6;
-      case 4: ch += *source++; ch <<= 6;
-      case 3: ch += *source++; ch <<= 6;
-      case 2: ch += *source++; ch <<= 6;
-      case 1: ch += *source++; ch <<= 6;
-      case 0: ch += *source++;
-      }
-      ch -= offsetsFromUTF8[extraBytesToRead];
-
-      if (target >= targetEnd) {
-	source -= (extraBytesToRead+1); /* Back up the source pointer! */
-	result = targetExhausted; break;
-      }
-      if (ch <= UNI_MAX_LEGAL_UTF32) {
-	/*
-	 * UTF-16 surrogate values are illegal in UTF-32, and anything
-	 * over Plane 17 (> 0x10FFFF) is illegal.
-	 */
-	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-	  if (flags == strictConversion) {
-	    source -= (extraBytesToRead+1); /* return to the illegal value itself */
-	    result = sourceIllegal;
-	    break;
-	  } else {
-	    *target++ = UNI_REPLACEMENT_CHAR;
-	  }
-	} else {
-	  *target++ = ch;
-	}
-      } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
-	result = sourceIllegal;
-	*target++ = UNI_REPLACEMENT_CHAR;
-      }
-    }
-    *sourceStart = source;
-    *targetStart = target;
-    return result;
-  }
-
-  wstring fromUtf8(string const & utf8string)
-  {
-    size_t widesize = utf8string.length();
-    if (sizeof(wchar_t) == 2)
-      {
-	wstring resultstring;
-	resultstring.resize(widesize+1, L'\0');
-	const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
-	const UTF8* sourceend = sourcestart + widesize;
-	UTF16* targetstart = reinterpret_cast<UTF16*>(&resultstring[0]);
-	UTF16* targetend = targetstart + widesize;
-	ConversionResult res = ConvertUTF8toUTF16(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
-	if (res != conversionOK)
-	  {
-	    conversionError();
-	  }
-	*targetstart = 0;
-	return resultstring.substr(0, wcslen(resultstring.c_str()));
-      }
-    else if (sizeof(wchar_t) == 4)
-      {
-	wstring resultstring;
-	resultstring.resize(widesize+1, L'\0');
-	const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
-	const UTF8* sourceend = sourcestart + widesize;
-	UTF32* targetstart = reinterpret_cast<UTF32*>(&resultstring[0]);
-	UTF32* targetend = targetstart + widesize;
-	ConversionResult res = ConvertUTF8toUTF32(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
-	if (res != conversionOK)
-	  {
-	    conversionError();
-	  }
-	*targetstart = 0;
-	return resultstring.substr(0,wcslen(resultstring.c_str()));
-      }
-    else
-      {
-	conversionError();
-      }
-    return L"";
-  }
-
-  string toUtf8(wstring const &widestring)
-  {
-    size_t widesize = widestring.length();
-
-    if (sizeof(wchar_t) == 2)
-      {
-	size_t utf8size = 3 * widesize + 1;
-	string resultstring;
-	resultstring.resize(utf8size, '\0');
-	const UTF16* sourcestart = reinterpret_cast<const UTF16*>(widestring.c_str());
-	const UTF16* sourceend = sourcestart + widesize;
-	UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]);
-	UTF8* targetend = targetstart + utf8size;
-	ConversionResult res = ConvertUTF16toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
-	if (res != conversionOK)
-	  {
-	    conversionError();
-	  }
-	*targetstart = 0;
-	return resultstring.substr(0, strlen(resultstring.c_str()));
-      }
-    else if (sizeof(wchar_t) == 4)
-      {
-	size_t utf8size = 4 * widesize + 1;
-	string resultstring;
-	resultstring.resize(utf8size, '\0');
-	const UTF32* sourcestart = reinterpret_cast<const UTF32*>(widestring.c_str());
-	const UTF32* sourceend = reinterpret_cast<const UTF32*>(widestring.c_str() + widesize);
-	UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]);
-	UTF8* targetend = targetstart + utf8size;
-	ConversionResult res = ConvertUTF32toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
-	if (res != conversionOK)
-	  {
-	    conversionError();
-	  }
-	*targetstart = 0;
-	return resultstring.substr(0, strlen(resultstring.c_str()));
-      }
-    else
-      {
-	conversionError();
-      }
-    return "";
-  }
-}
diff --git a/apertium/utf_converter.h b/apertium/utf_converter.h
deleted file mode 100644
index 5e1f5b3..0000000
--- a/apertium/utf_converter.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <https://www.gnu.org/licenses/>.
- */
-#ifndef _UTFCONVERTER_
-#define _UTFCONVERTER_
-
-#include <string>
-
-using namespace std;
-
-namespace UtfConverter
-{
-    wstring fromUtf8(string const &utf8string);
-    string toUtf8(wstring const &widestring);
-}
-
-#endif
diff --git a/apertium/xml_reader.cc b/apertium/xml_reader.cc
index b16a3f5..875a484 100644
--- a/apertium/xml_reader.cc
+++ b/apertium/xml_reader.cc
@@ -7,7 +7,7 @@ XMLReader::XmlTextReaderResource::XmlTextReaderResource(
 {
   reader = xmlReaderForFile(filename.c_str(), NULL, 0);
   if (reader == NULL) {
-    wcerr << L"Error: Cannot open '" << filename << L"'." << endl;
+    cerr << "Error: Cannot open '" << filename << "'." << endl;
     exit(EXIT_FAILURE);
   }
 }
@@ -25,7 +25,7 @@ XMLReader::XMLReader() : reader(0), type(0) {}
 void
 XMLReader::stepToTag()
 {
-  while (name == L"#text" || name == L"#comment") {
+  while (name == "#text"_u || name == "#comment"_u) {
     step();
   }
 }
@@ -36,15 +36,15 @@ XMLReader::step()
   int retval = xmlTextReaderRead(reader);
   if (retval != 1)
   {
-    parseError(L"unexpected EOF");
+    parseError("unexpected EOF"_u);
   }
-  name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+  name = XMLParseUtil::readName(reader);
   type = xmlTextReaderNodeType(reader);
-  //std::wcerr << name << L": " << type << "\n";
+  //std::cerr << name << ": " << type << "\n";
 }
 
 void
-XMLReader::stepPastSelfClosingTag(wstring const &tag)
+XMLReader::stepPastSelfClosingTag(UString const &tag)
 {
   // libxml2 expands <foo /> to <foo></foo> inside entities.
   // This method exists to work around this difference.
@@ -63,31 +63,40 @@ XMLReader::stepToNextTag()
   stepToTag();
 }
 
-wstring
-XMLReader::attrib(wstring const &name)
+UString
+XMLReader::attrib(UString const &name)
 {
   return XMLParseUtil::attrib(reader, name);
 }
 
-string
-XMLReader::attrib(string const &name)
+std::string
+XMLReader::attrib_str(const UString& name)
 {
-  return UtfConverter::toUtf8(attrib(UtfConverter::fromUtf8(name)));
+  return XMLParseUtil::attrib_str(reader, name);
 }
 
 void
-XMLReader::parseError(wstring const &message)
+XMLReader::parseError(UString const &message)
 {
-  wcerr << L"Error at line " << xmlTextReaderGetParserLineNumber(reader)
-        << L", column " << xmlTextReaderGetParserColumnNumber(reader)
-        << L": " << message << L"." << endl;
+  cerr << "Error at line " << xmlTextReaderGetParserLineNumber(reader)
+       << ", column " << xmlTextReaderGetParserColumnNumber(reader)
+       << ": " << message << "." << endl;
+  exit(EXIT_FAILURE);
+}
+
+void
+XMLReader::parseError(const std::string& message)
+{
+  cerr << "Error at line " << xmlTextReaderGetParserLineNumber(reader)
+       << ", column " << xmlTextReaderGetParserColumnNumber(reader)
+       << ": " << message << "." << endl;
   exit(EXIT_FAILURE);
 }
 
 void
 XMLReader::unexpectedTag()
 {
-  parseError(L"unexpected '<" + name + L">' tag");
+  parseError("unexpected '<"_u + name + ">' tag"_u);
 }
 
 void
diff --git a/apertium/xml_reader.h b/apertium/xml_reader.h
index 252e2c4..60d94c7 100644
--- a/apertium/xml_reader.h
+++ b/apertium/xml_reader.h
@@ -2,12 +2,10 @@
 #define _XMLREADER_
 
 #include <apertium/constant_manager.h>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 #include <apertium/tagger_data.h>
 #include <apertium/ttag.h>
-#include <apertium/utf_converter.h>
 #include <lttoolbox/pattern_list.h>
-#include <lttoolbox/ltstr.h>
 #include <lttoolbox/xml_parse_util.h>
 
 #include <libxml/xmlreader.h>
@@ -34,13 +32,14 @@ protected:
   XMLReader();
   xmlTextReaderPtr reader;
   int type;
-  wstring name;
-  wstring attrib(wstring const &name);
-  string attrib(string const &name);
-  void parseError(wstring const &message);
+  UString name;
+  UString attrib(UString const &name);
+  string attrib_str(const UString& name);
+  void parseError(UString const &message);
+  void parseError(const string& message);
   void unexpectedTag();
   void stepToTag();
-  void stepPastSelfClosingTag(wstring const &tag);
+  void stepPastSelfClosingTag(UString const &tag);
   void stepToNextTag();
   void step();
   virtual void parse() = 0;
diff --git a/configure.ac b/configure.ac
index 6c992da..b6c1d2c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,8 +1,8 @@
 AC_PREREQ(2.52)
 
 m4_define([PKG_VERSION_MAJOR], [3])
-m4_define([PKG_VERSION_MINOR], [7])
-m4_define([PKG_VERSION_PATCH], [1])
+m4_define([PKG_VERSION_MINOR], [8])
+m4_define([PKG_VERSION_PATCH], [0])
 
 AC_INIT([apertium], [PKG_VERSION_MAJOR.PKG_VERSION_MINOR.PKG_VERSION_PATCH], [apertium-stuff@lists.sourceforge.net], [apertium], [https://wiki.apertium.org/])
 AC_CONFIG_HEADER([apertium/apertium_config.h])
@@ -69,62 +69,21 @@ fi
 
 AC_CHECK_FUNCS(strcasecmp)
 
-if test x$(uname) != xDarwin;
-then
-AC_CHECK_HEADER(pcreposix.h,
-  AC_CHECK_LIB(pcre, pcre_fullinfo,[
-    LIBS="$LIBS -lpcreposix -lpcre"
-    no_comp_check=yes],
-    AC_MSG_ERROR([*** unable to locate pcre library ***])),
-  AC_MSG_ERROR([*** unable to locate pcreposix.h include file ***]))
-
-AC_CHECK_HEADER(pcrecpp.h,
-  AC_CHECK_LIB(pcrecpp,pcre_compile,[
-  LIBS="$LIBS -lpcrecpp"
-  no_comp_check=yes],
-  AC_MSG_ERROR([*** unable to locate pcrecpp library ***])),
-  AC_MSG_ERROR([*** unable to locate pcrecpp.h include file ***]))
-fi
-
-
-PKG_CHECK_MODULES(LTTOOLBOX, [lttoolbox >= 3.5.3], CPPFLAGS="$CPPFLAGS $LTTOOLBOX_CFLAGS"; LIBS="$LIBS $LTTOOLBOX_LIBS")
+PKG_CHECK_MODULES(LTTOOLBOX, [lttoolbox >= 3.6.0], CPPFLAGS="$CPPFLAGS $LTTOOLBOX_CFLAGS"; LIBS="$LIBS $LTTOOLBOX_LIBS")
 PKG_CHECK_MODULES(LIBXML2, [libxml-2.0 >= 2.6.17], CPPFLAGS="$CPPFLAGS $LIBXML2_CFLAGS"; LIBS="$LIBS $LIBXML2_LIBS")
-PKG_CHECK_MODULES(PCRE, [libpcre >= 6.4], CPPFLAGS="$CPPFLAGS $PCRE_CFLAGS"; LIBS="$LIBS $PCRE_LIBS")
-
-# Check for wide strings
-AC_DEFUN([AC_CXX_WSTRING],[
-  AC_CACHE_CHECK(whether the compiler supports wide strings,
-  ac_cv_cxx_wstring,
-  [AC_LANG_SAVE
-   AC_LANG_CPLUSPLUS
-   AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <string>]],[[
-std::wstring test = L"test";
-   ]])],
-   [ac_cv_cxx_wstring=yes], [ac_cv_cxx_wstring=no])
-   AC_LANG_RESTORE
-  ])
-])
+PKG_CHECK_MODULES(ICU, [icu-i18n, icu-io, icu-uc], CPPFLAGS="$CPPFLAGS $ICU_CFLAGS"; LIBS="$LIBS $ICU_LIBS")
 
-AC_CXX_WSTRING
 AC_C_BIGENDIAN
 
-if test "$ac_cv_cxx_wstring" = no
-then
-  AC_MSG_ERROR([Missing wide string support])
-fi
-
-
 # Checks for header files.
 AC_LANG(C++)
 AC_HEADER_STDC
 AC_CHECK_HEADERS([stdlib.h string.h unistd.h stddef.h filesystem string_view])
 AC_CHECK_LIB([stdc++fs], [_ZNSt12experimental10filesystem2v112current_pathEv])
+AC_CHECK_HEADER([utf8.h], [], [AC_MSG_ERROR([You don't have utfcpp installed.])])
 
-# Checks for ICU
-AC_CHECK_ICU(50)
-
-AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, fputc_unlocked, fputs_unlocked, getopt, getopt_long, fgetwc_unlocked, fputwc_unlocked, fgetws_unlocked, fputws_unlocked])
-AC_CHECK_FUNCS([setlocale strdup getopt snprintf mbtowc])
+AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, fputc_unlocked, fputs_unlocked, getopt, getopt_long])
+AC_CHECK_FUNCS([setlocale strdup getopt snprintf])
 AC_REPLACE_FUNCS(getopt_long)
 
 AM_CONDITIONAL([WINDOWS], [test x$version_type = xwindows])
diff --git a/m4/ax_check_icu.m4 b/m4/ax_check_icu.m4
deleted file mode 100644
index 0137afd..0000000
--- a/m4/ax_check_icu.m4
+++ /dev/null
@@ -1,117 +0,0 @@
-# ===========================================================================
-#       https://www.gnu.org/software/autoconf-archive/ax_check_icu.html
-# ===========================================================================
-#
-# SYNOPSIS
-#
-#   AX_CHECK_ICU(version, action-if, action-if-not)
-#
-# DESCRIPTION
-#
-#   Defines ICU_LIBS, ICU_CFLAGS, ICU_CXXFLAGS. See icu-config(1) man page.
-#
-# LICENSE
-#
-#   Copyright (c) 2008 Akos Maroy <darkeye@tyrell.hu>
-#
-#   Copying and distribution of this file, with or without modification, are
-#   permitted in any medium without royalty provided the copyright notice
-#   and this notice are preserved. This file is offered as-is, without any
-#   warranty.
-
-#serial 7
-
-AU_ALIAS([AC_CHECK_ICU], [AX_CHECK_ICU])
-AC_DEFUN([AX_CHECK_ICU], [
-  succeeded=no
-
-  if test -z "$ICU_CONFIG"; then
-    AC_PATH_PROG(ICU_CONFIG, icu-config, no)
-  fi
-
-  if test -z "$PKG_CONFIG"; then
-    AC_PATH_PROG(PKG_CONFIG, pkg-config, no)
-  fi
-
-  if test "$ICU_CONFIG" = "no" && test "$PKG_CONFIG" = "no" ; then
-    echo "*** Neither icu-config nor pkg-config could not be found. Make sure either is"
-    echo "*** in your path, and that taglib is properly installed."
-    echo "*** Or see https://ibm.com/software/globalization/icu/"
-  fi
-
-  if test "$ICU_CONFIG" != "no" ; then
-    ICU_VERSION=`$ICU_CONFIG --version`
-    AC_MSG_CHECKING(for ICU >= $1 via icu-config)
-        VERSION_CHECK=`expr $ICU_VERSION \>\= $1`
-        if test "$VERSION_CHECK" = "1" ; then
-            AC_MSG_RESULT(yes)
-            succeeded=yes
-
-            AC_MSG_CHECKING(ICU_CPPFLAGS)
-            ICU_CPPFLAGS=`$ICU_CONFIG --cppflags`
-            AC_MSG_RESULT($ICU_CPPFLAGS)
-
-            AC_MSG_CHECKING(ICU_CFLAGS)
-            ICU_CFLAGS=`$ICU_CONFIG --cflags`
-            AC_MSG_RESULT($ICU_CFLAGS)
-
-            AC_MSG_CHECKING(ICU_CXXFLAGS)
-            ICU_CXXFLAGS=`$ICU_CONFIG --cxxflags`
-            AC_MSG_RESULT($ICU_CXXFLAGS)
-
-            AC_MSG_CHECKING(ICU_LIBS)
-            ICU_LIBS=`$ICU_CONFIG --ldflags`
-            AC_MSG_RESULT($ICU_LIBS)
-        else
-            ICU_CPPFLAGS=""
-            ICU_CFLAGS=""
-            ICU_CXXFLAGS=""
-            ICU_LIBS=""
-            ## If we have a custom action on failure, don't print errors, but
-            ## do set a variable so people can do so.
-            ifelse([$3], ,echo "can't find ICU >= $1 via icu-config",)
-        fi
-  fi
-
-  if test "$succeeded" != "yes" && test "$PKG_CONFIG" != "no" ; then
-    AC_MSG_CHECKING(for ICU >= $1 via pkg-config)
-        if $PKG_CONFIG --atleast-version=$1 icu-i18n ; then
-            AC_MSG_RESULT(yes)
-            succeeded=yes
-
-            AC_MSG_CHECKING(ICU_CPPFLAGS)
-            ICU_CPPFLAGS=`$PKG_CONFIG --variable=CPPFLAGS icu-i18n`
-            AC_MSG_RESULT($ICU_CPPFLAGS)
-
-            AC_MSG_CHECKING(ICU_CFLAGS)
-            ICU_CFLAGS=`$PKG_CONFIG --variable=CFLAGS icu-i18n`
-            AC_MSG_RESULT($ICU_CFLAGS)
-
-            AC_MSG_CHECKING(ICU_CXXFLAGS)
-            ICU_CXXFLAGS=`$PKG_CONFIG --variable=CXXFLAGS icu-i18n`
-            AC_MSG_RESULT($ICU_CXXFLAGS)
-
-            AC_MSG_CHECKING(ICU_LIBS)
-            ICU_LIBS=`$PKG_CONFIG --libs icu-i18n`
-            AC_MSG_RESULT($ICU_LIBS)
-        else
-            ICU_CPPFLAGS=""
-            ICU_CFLAGS=""
-            ICU_CXXFLAGS=""
-            ICU_LIBS=""
-            ## If we have a custom action on failure, don't print errors, but
-            ## do set a variable so people can do so.
-            ifelse([$3], ,echo "can't find ICU >= $1 via pkg-config",)
-        fi
-  fi
-
-  if test "$succeeded" = "yes"; then
-     AC_SUBST(ICU_CPPFLAGS)
-     AC_SUBST(ICU_CFLAGS)
-     AC_SUBST(ICU_CXXFLAGS)
-     AC_SUBST(ICU_LIBS)
-     ifelse([$2], , :, [$2])
-  else
-     ifelse([$3], , AC_MSG_ERROR([Library requirements (ICU) not met.]), [$3])
-  fi
-])
diff --git a/python/apertium_core.i b/python/apertium_core.i
index 738f383..e9b5fc3 100644
--- a/python/apertium_core.i
+++ b/python/apertium_core.i
@@ -53,8 +53,9 @@
 
 void pretransfer(int argc, char **argv, char *input_path, char *output_path)
 {
-  FILE* input = fopen(input_path, "r");
-  FILE* output = fopen(output_path, "w");
+  InputFile input;
+  input.open(input_path);
+  UFILE* output = u_fopen(output_path, "w", NULL, NULL);
   bool compound_sep = false;
   bool null_flush = false;
   bool surface_forms = false;
@@ -85,8 +86,7 @@ void pretransfer(int argc, char **argv, char *input_path, char *output_path)
     }
   }
   processStream(input, output, null_flush, surface_forms, compound_sep);
-  fclose(input);
-  fclose(output);
+  u_fclose(output);
 }
 
 class ApertiumTransfer: public Transfer
@@ -100,8 +100,9 @@ class ApertiumTransfer: public Transfer
 
   void transfer_text(int argc, char **argv, char *input_path, char *output_path)
   {
-    FILE* input = fopen(input_path, "r");
-    FILE* output = fopen(output_path, "w");
+	InputFile input;
+	input.open(input_path);
+    UFILE* output = u_fopen(output_path, "w", NULL, NULL);
     optind = 1;
     while (true)
     {
@@ -147,8 +148,7 @@ class ApertiumTransfer: public Transfer
       }
     }
     transfer(input, output);
-    fclose(input);
-    fclose(output);
+    u_fclose(output);
   }
 };
 
@@ -173,8 +173,9 @@ class ApertiumInterchunk: public Interchunk
 
   void interchunk_text(int argc, char **argv, char *input_path, char *output_path)
   {
-    FILE* input = fopen(input_path, "r");
-    FILE* output = fopen(output_path, "w");
+	InputFile input;
+	input.open(input_path);
+    UFILE* output = u_fopen(output_path, "w", NULL, NULL);
     optind = 1;
     while (true)
     {
@@ -197,8 +198,7 @@ class ApertiumInterchunk: public Interchunk
       }
     }
     interchunk(input, output);
-    fclose(input);
-    fclose(output);
+    u_fclose(output);
   }
 };
 
@@ -213,8 +213,9 @@ class ApertiumPostchunk: public Postchunk
 
  void postchunk_text(int argc, char **argv, char *input_path, char *output_path)
   {
-    FILE* input = fopen(input_path, "r");
-    FILE* output = fopen(output_path, "w");
+	InputFile input;
+	input.open(input_path);
+    UFILE* output = u_fopen(output_path, "w", NULL, NULL);
     optind = 1;
     while (true)
     {
@@ -237,8 +238,7 @@ class ApertiumPostchunk: public Postchunk
       }
     }
     postchunk(input, output);
-    fclose(input);
-    fclose(output);
+    u_fclose(output);
   }
 };
 
diff --git a/python/setup.py.in b/python/setup.py.in
index a18adcd..7bcd70d 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -21,9 +21,9 @@ def get_sources():
     sources = ['apertium_core.i']
     cc_sources = [
                   # interchunk.cc postchunk.cc transfer.cc
-                  'apertium_re.cc', 'interchunk.cc', 'interchunk_word.cc', 'postchunk.cc', 'string_utils.cc', 'transfer.cc',
+                  'apertium_re.cc', 'interchunk.cc', 'interchunk_word.cc', 'postchunk.cc', 'transfer.cc',
                   'transfer_data.cc', 'transfer_instr.cc', 'transfer_mult.cc', 'transfer_token.cc', 'transfer_word.cc',
-                  'transfer_word_list.cc', 'trx_reader.cc', 'utf_converter.cc', 'xml_reader.cc',
+                  'trx_reader.cc', 'xml_reader.cc',
                   # 'pretransfer.cc'
                   'pretransfer.cc',
                   # tagger.cc
diff --git a/tests/data/nno-nob.t2x.bin b/tests/data/nno-nob.t2x.bin
index c03e145..9b8e124 100644
Binary files a/tests/data/nno-nob.t2x.bin and b/tests/data/nno-nob.t2x.bin differ
diff --git a/tests/tagger/test_find_similar_ambiguity_classes.cc b/tests/tagger/test_find_similar_ambiguity_classes.cc
index a6e299e..938ac3c 100644
--- a/tests/tagger/test_find_similar_ambiguity_classes.cc
+++ b/tests/tagger/test_find_similar_ambiguity_classes.cc
@@ -1,4 +1,4 @@
-#include "apertium/utf_converter.h"
+#include <lttoolbox/ustring.h>
 #include "apertium/tagger_utils.h"
 #include "apertium/tagger_data_hmm.h"
 #include "apertium/tagger_data.h"
@@ -6,35 +6,44 @@
 #include <sstream>
 #include <algorithm>
 
-void print_ambiguity_class(const vector<wstring> &array_tags, const set<TTag> &abgset)
+void print_ambiguity_class(const vector<UString> &array_tags, const set<TTag> &abgset)
 {
   unsigned int j;
   set<TTag>::const_iterator abgseti;
   for (abgseti=abgset.begin(), j=0; abgseti!=abgset.end(); abgseti++, j++) {
-    wcout << array_tags[*abgseti];
+    cout << array_tags[*abgseti];
     if (j < abgset.size() - 1) {
-      wcout << " ";
+      cout << " ";
     }
   }
 }
 
 void find_similar_ambiguity_class_io(TaggerData &td)
 {
-  vector<wstring> &array_tags = td.getArrayTags();
-  wstring line = L"";
-  getline(wcin, line, L'\n');
-
-  wstringstream line_stream(line);
+  vector<UString> &array_tags = td.getArrayTags();
+  UFILE* in = u_finit(stdin, NULL, NULL);
   set<TTag> ambiguity_class;
-  wstring tag_name;
-  while (line_stream >> tag_name) {
-    vector<wstring>::iterator it;
+  while (true) {
+    UString tag_name;
+    UChar32 c;
+    while (true) {
+      c = u_fgetcx(in);
+      if (u_isspace(c)) {
+        break;
+      } else {
+        tag_name += c;
+      }
+    }
+    vector<UString>::iterator it;
     it = find(array_tags.begin(), array_tags.end(), tag_name);
     if (it == array_tags.end()) {
-        wcerr << L"Tag not in model: " << tag_name << L'\n';
+        cerr << "Tag not in model: " << tag_name << '\n';
         exit(-3);
     }
     ambiguity_class.insert(it - array_tags.begin());
+    if (c == '\n') {
+      break;
+    }
   }
   set<TTag> similar_ambiguity_class = tagger_utils::find_similar_ambiguity_class(td, ambiguity_class);
   print_ambiguity_class(array_tags, similar_ambiguity_class);
diff --git a/utf8/utf8.h b/utf8/utf8.h
deleted file mode 100644
index 82b13f5..0000000
--- a/utf8/utf8.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include "utf8/checked.h"
-#include "utf8/unchecked.h"
-
-#endif // header guard
diff --git a/utf8/utf8/checked.h b/utf8/utf8/checked.h
deleted file mode 100644
index 1331155..0000000
--- a/utf8/utf8/checked.h
+++ /dev/null
@@ -1,327 +0,0 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include "core.h"
-#include <stdexcept>
-
-namespace utf8
-{
-    // Base for the exceptions that may be thrown from the library
-    class exception : public ::std::exception {
-    };
-
-    // Exceptions that may be thrown from the library functions.
-    class invalid_code_point : public exception {
-        uint32_t cp;
-    public:
-        invalid_code_point(uint32_t cp) : cp(cp) {}
-        virtual const char* what() const throw() { return "Invalid code point"; }
-        uint32_t code_point() const {return cp;}
-    };
-
-    class invalid_utf8 : public exception {
-        uint8_t u8;
-    public:
-        invalid_utf8 (uint8_t u) : u8(u) {}
-        virtual const char* what() const throw() { return "Invalid UTF-8"; }
-        uint8_t utf8_octet() const {return u8;}
-    };
-
-    class invalid_utf16 : public exception {
-        uint16_t u16;
-    public:
-        invalid_utf16 (uint16_t u) : u16(u) {}
-        virtual const char* what() const throw() { return "Invalid UTF-16"; }
-        uint16_t utf16_word() const {return u16;}
-    };
-
-    class not_enough_room : public exception {
-    public:
-        virtual const char* what() const throw() { return "Not enough space"; }
-    };
-
-    /// The library API - functions intended to be called by the users
-
-    template <typename octet_iterator>
-    octet_iterator append(uint32_t cp, octet_iterator result)
-    {
-        if (!utf8::internal::is_code_point_valid(cp))
-            throw invalid_code_point(cp);
-
-        if (cp < 0x80)                        // one octet
-            *(result++) = static_cast<uint8_t>(cp);
-        else if (cp < 0x800) {                // two octets
-            *(result++) = static_cast<uint8_t>((cp >> 6)            | 0xc0);
-            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
-        }
-        else if (cp < 0x10000) {              // three octets
-            *(result++) = static_cast<uint8_t>((cp >> 12)           | 0xe0);
-            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
-            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
-        }
-        else {                                // four octets
-            *(result++) = static_cast<uint8_t>((cp >> 18)           | 0xf0);
-            *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)  | 0x80);
-            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
-            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
-        }
-        return result;
-    }
-
-    template <typename octet_iterator, typename output_iterator>
-    output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
-    {
-        while (start != end) {
-            octet_iterator sequence_start = start;
-            internal::utf_error err_code = utf8::internal::validate_next(start, end);
-            switch (err_code) {
-                case internal::UTF8_OK :
-                    for (octet_iterator it = sequence_start; it != start; ++it)
-                        *out++ = *it;
-                    break;
-                case internal::NOT_ENOUGH_ROOM:
-                    throw not_enough_room();
-                case internal::INVALID_LEAD:
-                    out = utf8::append (replacement, out);
-                    ++start;
-                    break;
-                case internal::INCOMPLETE_SEQUENCE:
-                case internal::OVERLONG_SEQUENCE:
-                case internal::INVALID_CODE_POINT:
-                    out = utf8::append (replacement, out);
-                    ++start;
-                    // just one replacement mark for the sequence
-                    while (start != end && utf8::internal::is_trail(*start))
-                        ++start;
-                    break;
-            }
-        }
-        return out;
-    }
-
-    template <typename octet_iterator, typename output_iterator>
-    inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
-    {
-        static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
-        return utf8::replace_invalid(start, end, out, replacement_marker);
-    }
-
-    template <typename octet_iterator>
-    uint32_t next(octet_iterator& it, octet_iterator end)
-    {
-        uint32_t cp = 0;
-        internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
-        switch (err_code) {
-            case internal::UTF8_OK :
-                break;
-            case internal::NOT_ENOUGH_ROOM :
-                throw not_enough_room();
-            case internal::INVALID_LEAD :
-            case internal::INCOMPLETE_SEQUENCE :
-            case internal::OVERLONG_SEQUENCE :
-                throw invalid_utf8(*it);
-            case internal::INVALID_CODE_POINT :
-                throw invalid_code_point(cp);
-        }
-        return cp;
-    }
-
-    template <typename octet_iterator>
-    uint32_t peek_next(octet_iterator it, octet_iterator end)
-    {
-        return utf8::next(it, end);
-    }
-
-    template <typename octet_iterator>
-    uint32_t prior(octet_iterator& it, octet_iterator start)
-    {
-        // can't do much if it == start
-        if (it == start)
-            throw not_enough_room();
-
-        octet_iterator end = it;
-        // Go back until we hit either a lead octet or start
-        while (utf8::internal::is_trail(*(--it)))
-            if (it == start)
-                throw invalid_utf8(*it); // error - no lead byte in the sequence
-        return utf8::peek_next(it, end);
-    }
-
-    /// Deprecated in versions that include "prior"
-    template <typename octet_iterator>
-    uint32_t previous(octet_iterator& it, octet_iterator pass_start)
-    {
-        octet_iterator end = it;
-        while (utf8::internal::is_trail(*(--it)))
-            if (it == pass_start)
-                throw invalid_utf8(*it); // error - no lead byte in the sequence
-        octet_iterator temp = it;
-        return utf8::next(temp, end);
-    }
-
-    template <typename octet_iterator, typename distance_type>
-    void advance (octet_iterator& it, distance_type n, octet_iterator end)
-    {
-        for (distance_type i = 0; i < n; ++i)
-            utf8::next(it, end);
-    }
-
-    template <typename octet_iterator>
-    typename std::iterator_traits<octet_iterator>::difference_type
-    distance (octet_iterator first, octet_iterator last)
-    {
-        typename std::iterator_traits<octet_iterator>::difference_type dist;
-        for (dist = 0; first < last; ++dist)
-            utf8::next(first, last);
-        return dist;
-    }
-
-    template <typename u16bit_iterator, typename octet_iterator>
-    octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
-    {
-        while (start != end) {
-            uint32_t cp = utf8::internal::mask16(*start++);
-            // Take care of surrogate pairs first
-            if (utf8::internal::is_lead_surrogate(cp)) {
-                if (start != end) {
-                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
-                    if (utf8::internal::is_trail_surrogate(trail_surrogate))
-                        cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
-                    else
-                        throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
-                }
-                else
-                    throw invalid_utf16(static_cast<uint16_t>(cp));
-
-            }
-            // Lone trail surrogate
-            else if (utf8::internal::is_trail_surrogate(cp))
-                throw invalid_utf16(static_cast<uint16_t>(cp));
-
-            result = utf8::append(cp, result);
-        }
-        return result;
-    }
-
-    template <typename u16bit_iterator, typename octet_iterator>
-    u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
-    {
-        while (start != end) {
-            uint32_t cp = utf8::next(start, end);
-            if (cp > 0xffff) { //make a surrogate pair
-                *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
-                *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
-            }
-            else
-                *result++ = static_cast<uint16_t>(cp);
-        }
-        return result;
-    }
-
-    template <typename octet_iterator, typename u32bit_iterator>
-    octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
-    {
-        while (start != end)
-            result = utf8::append(*(start++), result);
-
-        return result;
-    }
-
-    template <typename octet_iterator, typename u32bit_iterator>
-    u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
-    {
-        while (start != end)
-            (*result++) = utf8::next(start, end);
-
-        return result;
-    }
-
-    // The iterator class
-    template <typename octet_iterator>
-    class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
-      octet_iterator it;
-      octet_iterator range_start;
-      octet_iterator range_end;
-      public:
-      iterator () {}
-      explicit iterator (const octet_iterator& octet_it,
-                         const octet_iterator& range_start,
-                         const octet_iterator& range_end) :
-               it(octet_it), range_start(range_start), range_end(range_end)
-      {
-          if (it < range_start || it > range_end)
-              throw std::out_of_range("Invalid utf-8 iterator position");
-      }
-      // the default "big three" are OK
-      octet_iterator base () const { return it; }
-      uint32_t operator * () const
-      {
-          octet_iterator temp = it;
-          return utf8::next(temp, range_end);
-      }
-      bool operator == (const iterator& rhs) const
-      {
-          if (range_start != rhs.range_start || range_end != rhs.range_end)
-              throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
-          return (it == rhs.it);
-      }
-      bool operator != (const iterator& rhs) const
-      {
-          return !(operator == (rhs));
-      }
-      iterator& operator ++ ()
-      {
-          utf8::next(it, range_end);
-          return *this;
-      }
-      iterator operator ++ (int)
-      {
-          iterator temp = *this;
-          utf8::next(it, range_end);
-          return temp;
-      }
-      iterator& operator -- ()
-      {
-          utf8::prior(it, range_start);
-          return *this;
-      }
-      iterator operator -- (int)
-      {
-          iterator temp = *this;
-          utf8::prior(it, range_start);
-          return temp;
-      }
-    }; // class iterator
-
-} // namespace utf8
-
-#endif //header guard
-
-
diff --git a/utf8/utf8/core.h b/utf8/utf8/core.h
deleted file mode 100644
index f85081f..0000000
--- a/utf8/utf8/core.h
+++ /dev/null
@@ -1,329 +0,0 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include <iterator>
-
-namespace utf8
-{
-    // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
-    // You may need to change them to match your system.
-    // These typedefs have the same names as ones from cstdint, or boost/cstdint
-    typedef unsigned char   uint8_t;
-    typedef unsigned short  uint16_t;
-    typedef unsigned int    uint32_t;
-
-// Helper code - not intended to be directly called by the library users. May be changed at any time
-namespace internal
-{
-    // Unicode constants
-    // Leading (high) surrogates: 0xd800 - 0xdbff
-    // Trailing (low) surrogates: 0xdc00 - 0xdfff
-    const uint16_t LEAD_SURROGATE_MIN  = 0xd800u;
-    const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu;
-    const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
-    const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
-    const uint16_t LEAD_OFFSET         = LEAD_SURROGATE_MIN - (0x10000 >> 10);
-    const uint32_t SURROGATE_OFFSET    = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
-
-    // Maximum valid value for a Unicode code point
-    const uint32_t CODE_POINT_MAX      = 0x0010ffffu;
-
-    template<typename octet_type>
-    inline uint8_t mask8(octet_type oc)
-    {
-        return static_cast<uint8_t>(0xff & oc);
-    }
-    template<typename u16_type>
-    inline uint16_t mask16(u16_type oc)
-    {
-        return static_cast<uint16_t>(0xffff & oc);
-    }
-    template<typename octet_type>
-    inline bool is_trail(octet_type oc)
-    {
-        return ((utf8::internal::mask8(oc) >> 6) == 0x2);
-    }
-
-    template <typename u16>
-    inline bool is_lead_surrogate(u16 cp)
-    {
-        return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
-    }
-
-    template <typename u16>
-    inline bool is_trail_surrogate(u16 cp)
-    {
-        return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
-    }
-
-    template <typename u16>
-    inline bool is_surrogate(u16 cp)
-    {
-        return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
-    }
-
-    template <typename u32>
-    inline bool is_code_point_valid(u32 cp)
-    {
-        return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
-    }
-
-    template <typename octet_iterator>
-    inline typename std::iterator_traits<octet_iterator>::difference_type
-    sequence_length(octet_iterator lead_it)
-    {
-        uint8_t lead = utf8::internal::mask8(*lead_it);
-        if (lead < 0x80)
-            return 1;
-        else if ((lead >> 5) == 0x6)
-            return 2;
-        else if ((lead >> 4) == 0xe)
-            return 3;
-        else if ((lead >> 3) == 0x1e)
-            return 4;
-        else
-            return 0;
-    }
-
-    template <typename octet_difference_type>
-    inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
-    {
-        if (cp < 0x80) {
-            if (length != 1)
-                return true;
-        }
-        else if (cp < 0x800) {
-            if (length != 2)
-                return true;
-        }
-        else if (cp < 0x10000) {
-            if (length != 3)
-                return true;
-        }
-
-        return false;
-    }
-
-    enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
-
-    /// Helper for get_sequence_x
-    template <typename octet_iterator>
-    utf_error increase_safely(octet_iterator& it, octet_iterator end)
-    {
-        if (++it == end)
-            return NOT_ENOUGH_ROOM;
-
-        if (!utf8::internal::is_trail(*it))
-            return INCOMPLETE_SEQUENCE;
-
-        return UTF8_OK;
-    }
-
-    #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
-
-    /// get_sequence_x functions decode utf-8 sequences of the length x
-    template <typename octet_iterator>
-    utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-            return NOT_ENOUGH_ROOM;
-
-        code_point = utf8::internal::mask8(*it);
-
-        return UTF8_OK;
-    }
-
-    template <typename octet_iterator>
-    utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-            return NOT_ENOUGH_ROOM;
-
-        code_point = utf8::internal::mask8(*it);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
-
-        return UTF8_OK;
-    }
-
-    template <typename octet_iterator>
-    utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-            return NOT_ENOUGH_ROOM;
-
-        code_point = utf8::internal::mask8(*it);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point += (*it) & 0x3f;
-
-        return UTF8_OK;
-    }
-
-    template <typename octet_iterator>
-    utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-           return NOT_ENOUGH_ROOM;
-
-        code_point = utf8::internal::mask8(*it);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point += (*it) & 0x3f;
-
-        return UTF8_OK;
-    }
-
-    #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
-
-    template <typename octet_iterator>
-    utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        // Save the original value of it so we can go back in case of failure
-        // Of course, it does not make much sense with i.e. stream iterators
-        octet_iterator original_it = it;
-
-        uint32_t cp = 0;
-        // Determine the sequence length based on the lead octet
-        typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
-        const octet_difference_type length = utf8::internal::sequence_length(it);
-
-        // Get trail octets and calculate the code point
-        utf_error err = UTF8_OK;
-        switch (length) {
-            case 0:
-                return INVALID_LEAD;
-            case 1:
-                err = utf8::internal::get_sequence_1(it, end, cp);
-                break;
-            case 2:
-                err = utf8::internal::get_sequence_2(it, end, cp);
-            break;
-            case 3:
-                err = utf8::internal::get_sequence_3(it, end, cp);
-            break;
-            case 4:
-                err = utf8::internal::get_sequence_4(it, end, cp);
-            break;
-        }
-
-        if (err == UTF8_OK) {
-            // Decoding succeeded. Now, security checks...
-            if (utf8::internal::is_code_point_valid(cp)) {
-                if (!utf8::internal::is_overlong_sequence(cp, length)){
-                    // Passed! Return here.
-                    code_point = cp;
-                    ++it;
-                    return UTF8_OK;
-                }
-                else
-                    err = OVERLONG_SEQUENCE;
-            }
-            else
-                err = INVALID_CODE_POINT;
-        }
-
-        // Failure branch - restore the original value of the iterator
-        it = original_it;
-        return err;
-    }
-
-    template <typename octet_iterator>
-    inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
-        uint32_t ignored;
-        return utf8::internal::validate_next(it, end, ignored);
-    }
-
-} // namespace internal
-
-    /// The library API - functions intended to be called by the users
-
-    // Byte order mark
-    const uint8_t bom[] = {0xef, 0xbb, 0xbf};
-
-    template <typename octet_iterator>
-    octet_iterator find_invalid(octet_iterator start, octet_iterator end)
-    {
-        octet_iterator result = start;
-        while (result != end) {
-            utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
-            if (err_code != internal::UTF8_OK)
-                return result;
-        }
-        return result;
-    }
-
-    template <typename octet_iterator>
-    inline bool is_valid(octet_iterator start, octet_iterator end)
-    {
-        return (utf8::find_invalid(start, end) == end);
-    }
-
-    template <typename octet_iterator>
-    inline bool starts_with_bom (octet_iterator it, octet_iterator end)
-    {
-        return (
-            ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
-            ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
-            ((it != end) && (utf8::internal::mask8(*it))   == bom[2])
-           );
-    }
-
-    //Deprecated in release 2.3
-    template <typename octet_iterator>
-    inline bool is_bom (octet_iterator it)
-    {
-        return (
-            (utf8::internal::mask8(*it++)) == bom[0] &&
-            (utf8::internal::mask8(*it++)) == bom[1] &&
-            (utf8::internal::mask8(*it))   == bom[2]
-           );
-    }
-} // namespace utf8
-
-#endif // header guard
-
-
diff --git a/utf8/utf8/unchecked.h b/utf8/utf8/unchecked.h
deleted file mode 100644
index 989ccef..0000000
--- a/utf8/utf8/unchecked.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include "core.h"
-
-namespace utf8
-{
-    namespace unchecked
-    {
-        template <typename octet_iterator>
-        octet_iterator append(uint32_t cp, octet_iterator result)
-        {
-            if (cp < 0x80)                        // one octet
-                *(result++) = static_cast<uint8_t>(cp);
-            else if (cp < 0x800) {                // two octets
-                *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0);
-                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
-            }
-            else if (cp < 0x10000) {              // three octets
-                *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0);
-                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
-                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
-            }
-            else {                                // four octets
-                *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0);
-                *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
-                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
-                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
-            }
-            return result;
-        }
-
-        template <typename octet_iterator>
-        uint32_t next(octet_iterator& it)
-        {
-            uint32_t cp = utf8::internal::mask8(*it);
-            typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
-            switch (length) {
-                case 1:
-                    break;
-                case 2:
-                    it++;
-                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
-                    break;
-                case 3:
-                    ++it;
-                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
-                    ++it;
-                    cp += (*it) & 0x3f;
-                    break;
-                case 4:
-                    ++it;
-                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
-                    ++it;
-                    cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
-                    ++it;
-                    cp += (*it) & 0x3f;
-                    break;
-            }
-            ++it;
-            return cp;
-        }
-
-        template <typename octet_iterator>
-        uint32_t peek_next(octet_iterator it)
-        {
-            return utf8::unchecked::next(it);
-        }
-
-        template <typename octet_iterator>
-        uint32_t prior(octet_iterator& it)
-        {
-            while (utf8::internal::is_trail(*(--it))) ;
-            octet_iterator temp = it;
-            return utf8::unchecked::next(temp);
-        }
-
-        // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
-        template <typename octet_iterator>
-        inline uint32_t previous(octet_iterator& it)
-        {
-            return utf8::unchecked::prior(it);
-        }
-
-        template <typename octet_iterator, typename distance_type>
-        void advance (octet_iterator& it, distance_type n)
-        {
-            for (distance_type i = 0; i < n; ++i)
-                utf8::unchecked::next(it);
-        }
-
-        template <typename octet_iterator>
-        typename std::iterator_traits<octet_iterator>::difference_type
-        distance (octet_iterator first, octet_iterator last)
-        {
-            typename std::iterator_traits<octet_iterator>::difference_type dist;
-            for (dist = 0; first < last; ++dist)
-                utf8::unchecked::next(first);
-            return dist;
-        }
-
-        template <typename u16bit_iterator, typename octet_iterator>
-        octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
-        {
-            while (start != end) {
-                uint32_t cp = utf8::internal::mask16(*start++);
-            // Take care of surrogate pairs first
-                if (utf8::internal::is_lead_surrogate(cp)) {
-                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
-                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
-                }
-                result = utf8::unchecked::append(cp, result);
-            }
-            return result;
-        }
-
-        template <typename u16bit_iterator, typename octet_iterator>
-        u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
-        {
-            while (start < end) {
-                uint32_t cp = utf8::unchecked::next(start);
-                if (cp > 0xffff) { //make a surrogate pair
-                    *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
-                    *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
-                }
-                else
-                    *result++ = static_cast<uint16_t>(cp);
-            }
-            return result;
-        }
-
-        template <typename octet_iterator, typename u32bit_iterator>
-        octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
-        {
-            while (start != end)
-                result = utf8::unchecked::append(*(start++), result);
-
-            return result;
-        }
-
-        template <typename octet_iterator, typename u32bit_iterator>
-        u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
-        {
-            while (start < end)
-                (*result++) = utf8::unchecked::next(start);
-
-            return result;
-        }
-
-        // The iterator class
-        template <typename octet_iterator>
-          class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
-            octet_iterator it;
-            public:
-            iterator () {}
-            explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
-            // the default "big three" are OK
-            octet_iterator base () const { return it; }
-            uint32_t operator * () const
-            {
-                octet_iterator temp = it;
-                return utf8::unchecked::next(temp);
-            }
-            bool operator == (const iterator& rhs) const
-            {
-                return (it == rhs.it);
-            }
-            bool operator != (const iterator& rhs) const
-            {
-                return !(operator == (rhs));
-            }
-            iterator& operator ++ ()
-            {
-                ::std::advance(it, utf8::internal::sequence_length(it));
-                return *this;
-            }
-            iterator operator ++ (int)
-            {
-                iterator temp = *this;
-                ::std::advance(it, utf8::internal::sequence_length(it));
-                return temp;
-            }
-            iterator& operator -- ()
-            {
-                utf8::unchecked::prior(it);
-                return *this;
-            }
-            iterator operator -- (int)
-            {
-                iterator temp = *this;
-                utf8::unchecked::prior(it);
-                return temp;
-            }
-          }; // class iterator
-
-    } // namespace utf8::unchecked
-} // namespace utf8
-
-
-#endif // header guard
-
diff --git a/utf8/utf8_fwrap.h b/utf8/utf8_fwrap.h
deleted file mode 100644
index 5d41b6b..0000000
--- a/utf8/utf8_fwrap.h
+++ /dev/null
@@ -1,140 +0,0 @@
-#ifndef _UTF8_FWRAP_HPP
-#define _UTF8_FWRAP_HPP
-
-#include <utf8.h>
-#include <string>
-#include <iterator>
-#include <stdexcept>
-#include <cstdio>
-#include <cwchar>
-#include <stdint.h>
-
-#ifdef _WIN32
-	#define utf32to8 utf16to8
-#endif
-
-inline wint_t fgetwc_u8(FILE *in) {
-#ifdef _WIN32
-	struct _cps {
-		FILE *f = 0;
-		wchar_t c = 0;
-	};
-	static _cps cps[4];
-
-	for (auto& cp : cps) {
-		if (cp.f == in) {
-			cp.f = 0;
-			return cp.c;
-		}
-	}
-#endif
-
-	int32_t rv = 0;
-	int c = 0, i = 0;
-	char buf[4];
-	if ((c = fgetc_unlocked(in)) != EOF) {
-		buf[i++] = static_cast<char>(c);
-		if ((c & 0xF0) == 0xF0) {
-			if (fread_unlocked(buf+i, 1, 3, in) != 3) {
-				throw std::runtime_error("Could not read 3 expected bytes from stream");
-			}
-			i += 3;
-		}
-		else if ((c & 0xE0) == 0xE0) {
-			if (fread_unlocked(buf+i, 1, 2, in) != 2) {
-				throw std::runtime_error("Could not read 2 expected bytes from stream");
-			}
-			i += 2;
-		}
-		else if ((c & 0xC0) == 0xC0) {
-			if (fread_unlocked(buf+i, 1, 1, in) != 1) {
-				throw std::runtime_error("Could not read 1 expected byte from stream");
-			}
-			i += 1;
-		}
-	}
-	if (i == 0 && c == EOF) {
-		rv = WEOF;
-	}
-	else {
-#ifdef _WIN32
-		wchar_t u16[2] = {};
-		utf8::unchecked::utf8to16(buf, buf+i, u16);
-
-		if (u16[1]) {
-			for (auto& cp : cps) {
-				if (cp.f == 0) {
-					cp.f = in;
-					cp.c = u16[1];
-					return u16[0];
-				}
-			}
-			throw std::runtime_error("Not enough space to store UTF-16 high surrogate");
-		}
-		rv = u16[0];
-#else
-		utf8::unchecked::utf8to32(buf, buf+i, &rv);
-#endif
-	}
-	return static_cast<wint_t>(rv);
-}
-
-inline wint_t fputwc_u8(wint_t wc, FILE *out) {
-	char buf[4] = {};
-	char *e = utf8::unchecked::utf32to8(&wc, &wc+1, buf);
-	if (fwrite_unlocked(buf, 1, e-buf, out) != static_cast<size_t>(e-buf)) {
-		return WEOF;
-	}
-
-	return wc;
-}
-
-inline int fputws_u8(const wchar_t* str, FILE *out) {
-	static std::string buf;
-	buf.clear();
-	size_t len = wcslen(str);
-	utf8::unchecked::utf32to8(str, str+len, std::back_inserter(buf));
-	if (fwrite_unlocked(&buf[0], 1, buf.size(), out) != buf.size()) {
-		return WEOF;
-	}
-
-	return 1;
-}
-
-inline wint_t ungetwc_u8(wint_t wc, FILE *out) {
-	char buf[4] = {};
-	char *e = utf8::unchecked::utf32to8(&wc, &wc+1, buf);
-	for (char *b = buf ; b != e ; ++b) {
-		if (ungetc(*b, out) == EOF) {
-			return WEOF;
-		}
-	}
-
-	return wc;
-}
-
-#ifdef fgetwc_unlocked
-	#undef fgetwc_unlocked
-#endif
-#define fgetwc_unlocked fgetwc_u8
-
-#ifdef fputwc_unlocked
-	#undef fputwc_unlocked
-#endif
-#define fputwc_unlocked fputwc_u8
-
-#ifdef fputws_unlocked
-	#undef fputws_unlocked
-#endif
-#define fputws_unlocked fputws_u8
-
-#ifdef ungetwc
-	#undef ungetwc
-#endif
-#define ungetwc ungetwc_u8
-
-#ifdef _WIN32
-	#undef utf32to8
-#endif
-
-#endif