commit b74306d181734ee6ba92493a227ff8aeee334a68
Author: Daniel Swanson <popcorn.tomato.dude@gmail.com>
Date:   Wed Jun 30 08:55:14 2021 -0500

    use ICU (#74)
    
    ICU changes
    - convert `std::wstring` to `UString` and `wchar_t` to `UChar`
      - note: if there exist compiled files with non-BMP string literals, this might be a breaking change, but I don't think any such files exist
    - use `lttoolbox/input_file.h` and ICU `UFILE*` for stream I/O
    - rely on shared case functions
    
    efficiency, readability, and code style changes
    - move constant initializers to class headers
    - prefer `str.empty()` to `str == ""`
    - prefer range-for loops
    - remove unused `#include`s
    - make `die()` and `warn()` printf-like in `trx_compiler.cc`
    
    helper function and dependency changes
    - use apertium's `apertium_re` wrapper rather than maintaining a separate copy
    - move regex optimization code to apertium to share it with t*x
    - use XML iterators from `lttoolbox/xml_walk_util.h` in `trx_compiler.cc`

diff --git a/configure.ac b/configure.ac
index 9ac4f1d..58e2008 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,10 +1,10 @@
 AC_PREREQ(2.61)
 
 m4_define([required_libxml_version], [2.6.17])
-m4_define([required_apertium_version], [3.7.0])
-m4_define([required_lttoolbox_version], [3.5.3])
+m4_define([required_apertium_version], [3.8.0])
+m4_define([required_lttoolbox_version], [3.6.0])
 
-AC_INIT([apertium-recursive], [1.0.1], [awesomeevildudes@gmail.com])
+AC_INIT([apertium-recursive], [1.1.0], [awesomeevildudes@gmail.com])
 AM_INIT_AUTOMAKE
 AC_CONFIG_HEADER([src/auto_config.h])
 AC_CONFIG_MACRO_DIR([m4])
@@ -38,17 +38,22 @@ PKG_CHECK_MODULES([LIBXML], [libxml-2.0 >= required_libxml_version])
 AC_SUBST(LIBXML_CFLAGS)
 AC_SUBST(LIBXML_LIBS)
 
-PKG_CHECK_MODULES(PCRE, [libpcre >= 6.4])
+PKG_CHECK_MODULES([ICU], [icu-i18n, icu-io, icu-uc])
+
+AC_SUBST(ICU_CFLAGS)
+AC_SUBST(ICU_LIBS)
 
 # Checks for libraries.
 AC_CHECK_LIB(xml2, xmlReaderForFile)
 
 AC_CHECK_FUNCS([setlocale strdup getopt_long])
 
-AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, fputc_unlocked, fputs_unlocked, getopt_long, fgetwc_unlocked, fputwc_unlocked, fgetws_unlocked, fputws_unlocked])
+AC_CHECK_HEADER([utf8.h], [], [AC_MSG_ERROR([You don't have utfcpp installed.])])
+
+AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, fputc_unlocked, fputs_unlocked, getopt_long])
 
-CPPFLAGS="$CPPFLAGS $CFLAGS $LTTOOLBOX_CFLAGS $APERTIUM_CFLAGS $LIBXML_CFLAGS $PCRE_CFLAGS"
-LIBS="$LIBS $LTTOOLBOX_LIBS $APERTIUM_LIBS $LIBXML_LIBS $PCRE_LIBS"
+CPPFLAGS="$CPPFLAGS $CFLAGS $LTTOOLBOX_CFLAGS $APERTIUM_CFLAGS $LIBXML_CFLAGS $ICU_CFLAGS"
+LIBS="$LIBS $LTTOOLBOX_LIBS $APERTIUM_LIBS $LIBXML_LIBS $ICU_LIBS"
 
 # Checks for highest supported C++ standard
 AC_LANG(C++)
diff --git a/src/Makefile.am b/src/Makefile.am
index c5b81de..1293423 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,9 +2,9 @@ AM_LDFLAGS=$(LIBS)
 
 bin_PROGRAMS = rtx-comp rtx-proc rtx-decomp random-path
 
-rtx_comp_SOURCES = rtx_comp.cc rtx_compiler.cc trx_compiler.cc pattern.cc apertium_re.cc
+rtx_comp_SOURCES = rtx_comp.cc rtx_compiler.cc trx_compiler.cc pattern.cc
 
-rtx_proc_SOURCES = rtx_proc.cc rtx_processor.cc apertium_re.cc chunk.cc
+rtx_proc_SOURCES = rtx_proc.cc rtx_processor.cc chunk.cc
 
 rtx_decomp_SOURCES = rtx_decomp.cc
 
diff --git a/src/apertium_re.cc b/src/apertium_re.cc
deleted file mode 100644
index fa22667..0000000
--- a/src/apertium_re.cc
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-#include <rtx_config.h>
-#include <apertium_re.h>
-#include <lttoolbox/compression.h>
-#include <iostream>
-#include <cstdlib>
-#include <apertium/string_utils.h>
-
-using namespace Apertium;
-using namespace std;
-
-ApertiumRE::ApertiumRE() :
-re(0)
-{
-  empty = true;
-}
-
-ApertiumRE::~ApertiumRE()
-{
-  if(!empty)
-  {
-    pcre_free(re);
-  }
-  empty = true;
-}
-
-void
-ApertiumRE::read(FILE *input)
-{
-  unsigned int size = Compression::multibyte_read(input);
-  re = static_cast<pcre *>(pcre_malloc(size));
-  if(size != fread(re, 1, size, input))
-  {
-    wcerr << L"Error reading regexp" << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  empty = false;
-}
-
-void
-ApertiumRE::compile(string const &str)
-{
-  const char *error;
-  int erroroffset;
-  re = pcre_compile(str.c_str(), PCRE_DOTALL|PCRE_EXTENDED|PCRE_UTF8,
-	            &error, &erroroffset, NULL);
-  if(re == NULL)
-  {
-    wcerr << L"Error: pcre_compile ";
-    wcerr << error << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  empty = false;
-}
-
-void
-ApertiumRE::write(FILE *output) const
-{
-  if(empty)
-  {
-    wcerr << L"Error, cannot write empty regexp" << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  size_t size;
-  int rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
-  if(rc < 0)
-  {
-    wcerr << L"Error calling pcre_fullinfo()\n" << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  Compression::multibyte_write(size, output);
-
-  size_t rc2 = fwrite(re, 1, size, output);
-  if(rc2 != size)
-  {
-    wcerr << L"Error writing precompiled regex\n" << endl;
-    exit(EXIT_FAILURE);
-  }
-}
-
-string
-ApertiumRE::match(string const &str) const
-{
-  if(empty)
-  {
-    return "";
-  }
-
-  int result[3];
-  int workspace[4096];
-//  int rc = pcre_exec(re, NULL, str.c_str(), str.size(), 0, PCRE_NO_UTF8_CHECK, result, 3);
-  int rc = pcre_dfa_exec(re, NULL, str.c_str(), str.size(), 0, PCRE_NO_UTF8_CHECK, result, 3, workspace, 4096);
-
-  if(rc < 0)
-  {
-    switch(rc)
-    {
-      case PCRE_ERROR_NOMATCH:
-	return "";
-
-      default:
-	wcerr << L"Error: Unknown error matching regexp (code " << rc << L")" << endl;
-	exit(EXIT_FAILURE);
-    }
-  }
-
-  return str.substr(result[0], result[1]-result[0]);
-}
-
-void
-ApertiumRE::replace(string &str, string const &value) const
-{
-  if(empty)
-  {
-    return;
-  }
-
-  int result[3];
-  int workspace[4096];
-  // int rc = pcre_exec(re, NULL, str.c_str(), str.size(), 0, PCRE_NO_UTF8_CHECK, result, 3);
-  int rc = pcre_dfa_exec(re, NULL, str.c_str(), str.size(), 0, PCRE_NO_UTF8_CHECK, result, 3, workspace, 4096);
-  if(rc < 0)
-  {
-    switch(rc)
-    {
-      case PCRE_ERROR_NOMATCH:
-	return;
-
-      default:
-	wcerr << L"Error: Unknown error matching regexp (code " << rc << L")" << endl;
-	exit(EXIT_FAILURE);
-    }
-  }
-
-  string res = str.substr(0, result[0]);
-  res.append(value);
-  res.append(str.substr(result[1]));
-  str = res;
-}
diff --git a/src/apertium_re.h b/src/apertium_re.h
deleted file mode 100644
index ee73df3..0000000
--- a/src/apertium_re.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _APERTIUM_RE_
-#define _APERTIUM_RE_
-
-#include <rtx_config.h>
-#include <pcre.h>
-#include <cstdio>
-#include <string>
-
-using namespace std;
-
-class ApertiumRE
-{
-private:
-  bool empty;
-  pcre *re;
-public:
-  ApertiumRE();
-  ~ApertiumRE();
-  void read(FILE *);
-  void write(FILE *) const;
-  string match(string const &str) const;
-  void replace(string &str, string const &value) const;
-  void compile(string const &str);
-};
-
-#endif
diff --git a/src/bytecode.h b/src/bytecode.h
index f8b738d..65acc92 100644
--- a/src/bytecode.h
+++ b/src/bytecode.h
@@ -2,104 +2,105 @@
 #define __RTXBYTECODE__
 
 #include <rtx_config.h>
+#include <unicode/uchar.h>
 
 // Stack Operations
 
-static const wchar_t DROP = L'd';
-static const wchar_t DUP  = L'*';
-static const wchar_t OVER = L'o';
-static const wchar_t SWAP = L'w';
+static const UChar DROP = 'd';
+static const UChar DUP  = '*';
+static const UChar OVER = 'o';
+static const UChar SWAP = 'w';
 
 // Literals
 
-static const wchar_t STRING    = L's';
-static const wchar_t INT       = L'i';
-static const wchar_t PUSHFALSE = L'f';
-static const wchar_t PUSHTRUE  = L't';
-static const wchar_t PUSHNULL  = L'0';
+static const UChar STRING    = 's';
+static const UChar INT       = 'i';
+static const UChar PUSHFALSE = 'f';
+static const UChar PUSHTRUE  = 't';
+static const UChar PUSHNULL  = '0';
 
 // Jumps
 
-static const wchar_t JUMP        = L'j';
-static const wchar_t JUMPONTRUE  = L'J';
-static const wchar_t JUMPONFALSE = L'?';
+static const UChar JUMP        = 'j';
+static const UChar JUMPONTRUE  = 'J';
+static const UChar JUMPONFALSE = '?';
 
 // Logical Operators
 
-static const wchar_t AND = L'&';
-static const wchar_t OR  = L'|';
-static const wchar_t NOT = L'!';
+static const UChar AND = '&';
+static const UChar OR  = '|';
+static const UChar NOT = '!';
 
 // String Comparisons
 
-static const wchar_t EQUAL       = L'=';
-static const wchar_t ISPREFIX    = L'(';
-static const wchar_t ISSUFFIX    = L')';
-static const wchar_t ISSUBSTRING = L'c';
+static const UChar EQUAL       = '=';
+static const UChar ISPREFIX    = '(';
+static const UChar ISSUFFIX    = ')';
+static const UChar ISSUBSTRING = 'c';
 
 // Caseless String Comparisons
 
-static const wchar_t EQUALCL       = L'q';
-static const wchar_t ISPREFIXCL    = L'p';
-static const wchar_t ISSUFFIXCL    = L'u';
-static const wchar_t ISSUBSTRINGCL = L'r';
+static const UChar EQUALCL       = 'q';
+static const UChar ISPREFIXCL    = 'p';
+static const UChar ISSUFFIXCL    = 'u';
+static const UChar ISSUBSTRINGCL = 'r';
 
 // List Comparisons
 
-static const wchar_t HASPREFIX = L'[';
-static const wchar_t HASSUFFIX = L']';
-static const wchar_t IN        = L'n';
+static const UChar HASPREFIX = '[';
+static const UChar HASSUFFIX = ']';
+static const UChar IN        = 'n';
 
 // Caseless List Comparisons
 
-static const wchar_t HASPREFIXCL = L'{';
-static const wchar_t HASSUFFIXCL = L'}';
-static const wchar_t INCL        = L'N';
+static const UChar HASPREFIXCL = '{';
+static const UChar HASSUFFIXCL = '}';
+static const UChar INCL        = 'N';
 
 // Case Operations
 
-static const wchar_t GETCASE = L'a';
-static const wchar_t SETCASE = L'A';
+static const UChar GETCASE = 'a';
+static const UChar SETCASE = 'A';
 
 // Variables
 
-static const wchar_t FETCHVAR   = L'v';
-static const wchar_t SETVAR     = L'$';
-static const wchar_t FETCHCHUNK = L'5';
-static const wchar_t SETCHUNK   = L'6';
+static const UChar FETCHVAR   = 'v';
+static const UChar SETVAR     = '$';
+static const UChar FETCHCHUNK = '5';
+static const UChar SETCHUNK   = '6';
 
 // Clips
 
-static const wchar_t SOURCECLIP    = L'S';
-static const wchar_t TARGETCLIP    = L'T';
-static const wchar_t REFERENCECLIP = L'R';
-static const wchar_t SETCLIP       = L'>';
+static const UChar SOURCECLIP    = 'S';
+static const UChar TARGETCLIP    = 'T';
+static const UChar REFERENCECLIP = 'R';
+static const UChar SETCLIP       = '>';
 
 // Chunks
 
-static const wchar_t CHUNK             = L'C';
-static const wchar_t APPENDCHILD       = L'1';
-static const wchar_t APPENDSURFACE     = L'2';
-static const wchar_t APPENDALLCHILDREN = L'3';
-static const wchar_t APPENDALLINPUT    = L'4';
-static const wchar_t PUSHINPUT         = L'7';
-static const wchar_t APPENDSURFACESL   = L'8';
-static const wchar_t APPENDSURFACEREF  = L'9';
+static const UChar CHUNK             = 'C';
+static const UChar APPENDCHILD       = '1';
+static const UChar APPENDSURFACE     = '2';
+static const UChar APPENDALLCHILDREN = '3';
+static const UChar APPENDALLINPUT    = '4';
+static const UChar PUSHINPUT         = '7';
+static const UChar APPENDSURFACESL   = '8';
+static const UChar APPENDSURFACEREF  = '9';
 
 // Output
 
-static const wchar_t OUTPUT    = L'<';
-static const wchar_t BLANK     = L'b';
-static const wchar_t OUTPUTALL = L'@';
-static const wchar_t CONJOIN   = L'+';
+static const UChar OUTPUT    = '<';
+static const UChar BLANK     = 'b';
+static const UChar OUTPUTALL = '@';
+static const UChar CONJOIN   = '+';
 
 // Other
 
-static const wchar_t CONCAT     = L'-';
-static const wchar_t REJECTRULE = L'X';
-static const wchar_t DISTAG     = L'D';
-static const wchar_t GETRULE    = L'^';
-static const wchar_t SETRULE    = L'%';
-static const wchar_t LUCOUNT    = L'#';
+static const UChar CONCAT     = '-';
+static const UChar REJECTRULE = 'X';
+static const UChar DISTAG     = 'D';
+static const UChar GETRULE    = '^';
+static const UChar SETRULE    = '%';
+static const UChar LUCOUNT    = '#';
 
 #endif
diff --git a/src/chunk.cc b/src/chunk.cc
index 135d4db..8c7a8c2 100644
--- a/src/chunk.cc
+++ b/src/chunk.cc
@@ -1,12 +1,11 @@
 #include <rtx_config.h>
 #include <chunk.h>
-#include <apertium/string_utils.h>
-#include <apertium/utf_converter.h>
+#include <lttoolbox/string_utils.h>
 
 #include <iostream>
 
-wstring
-combineWblanks(wstring wblank_current, wstring wblank_to_add)
+UString
+combineWblanks(UString wblank_current, UString wblank_to_add)
 {
   if(wblank_current.empty() && wblank_to_add.empty())
   {
@@ -21,8 +20,8 @@ combineWblanks(wstring wblank_current, wstring wblank_to_add)
     return wblank_current;
   }
   
-  wstring new_out_wblank;
-  for(wstring::const_iterator it = wblank_current.begin(); it != wblank_current.end(); it++)
+  UString new_out_wblank;
+  for(UString::const_iterator it = wblank_current.begin(); it != wblank_current.end(); it++)
   {
     if(*it == '\\')
     {
@@ -44,7 +43,7 @@ combineWblanks(wstring wblank_current, wstring wblank_to_add)
     }
   }
   
-  for(wstring::const_iterator it = wblank_to_add.begin(); it != wblank_to_add.end(); it++)
+  for(UString::const_iterator it = wblank_to_add.begin(); it != wblank_to_add.end(); it++)
   {
     if(*it == '\\')
     {
@@ -69,67 +68,48 @@ combineWblanks(wstring wblank_current, wstring wblank_to_add)
   return new_out_wblank;
 }
 
-wstring
+UString
 Chunk::chunkPart(ApertiumRE const &part, const ClipType side)
 {
-  string chunk;
   switch(side)
   {
     case SourceClip:
-      chunk = UtfConverter::toUtf8(source);
+      return part.match(source);
       break;
     case TargetClip:
-      chunk = UtfConverter::toUtf8(target);
+      return part.match(target);
       break;
     case ReferenceClip:
-      chunk = UtfConverter::toUtf8(coref);
+      return part.match(coref);
       break;
   }
-  string result = part.match(chunk);
-  if(result.size() == 0)
-  {
-    return wstring(L"");
-  }
-  else
-  {
-    return UtfConverter::fromUtf8(result);
-  }
+  return ""_u;
 }
 
 void
-Chunk::setChunkPart(ApertiumRE const &part, wstring const &value)
+Chunk::setChunkPart(ApertiumRE const &part, UString const &value)
 {
-  string surf = UtfConverter::toUtf8(target);
-  if(part.match(surf).size() == 0)
-  {
-    //target += value;
-  }
-  else
-  {
-    string val = UtfConverter::toUtf8(value);
-    part.replace(surf, val);
-    target = UtfConverter::fromUtf8(surf);
-  }
+  part.replace(target, value);
 }
 
-vector<wstring>
-Chunk::getTags(const vector<wstring>& parentTags)
+vector<UString>
+Chunk::getTags(const vector<UString>& parentTags)
 {
   unsigned int last = 0;
-  vector<wstring> ret;
+  vector<UString> ret;
   for(unsigned int i = 0, limit = target.size(); i < limit; i++)
   {
-    if(target[i] == L'<')
+    if(target[i] == '<')
     {
       last = i;
       bool isNum = true;
       for(unsigned int j = i+1; j < limit; j++)
       {
-        if(target[j] == L'>')
+        if(target[j] == '>')
         {
           if(isNum)
           {
-            unsigned int n = stoul(target.substr(last+1, j-last-1));
+            unsigned int n = StringUtils::stoi(target.substr(last+1, j-last-1));
             if(n != 0 && n <= parentTags.size())
             {
               ret.push_back(parentTags[n-1]);
@@ -137,7 +117,7 @@ Chunk::getTags(const vector<wstring>& parentTags)
               break;
             }
           }
-          wstring tag = target.substr(last, j-last+1);
+          UString tag = target.substr(last, j-last+1);
           ret.push_back(tag);
           last = j+1;
           break;
@@ -148,7 +128,7 @@ Chunk::getTags(const vector<wstring>& parentTags)
         }
       }
     }
-    else if(target[i] == L'\\')
+    else if(target[i] == '\\')
     {
       i++;
     }
@@ -157,27 +137,27 @@ Chunk::getTags(const vector<wstring>& parentTags)
 }
 
 void
-Chunk::updateTags(const vector<wstring>& parentTags)
+Chunk::updateTags(const vector<UString>& parentTags)
 {
   if(isBlank) return;
   unsigned int last = 0;
-  wstring result;
+  UString result;
   result.reserve(target.size() + (2*parentTags.size()));
   // a rough estimate - works if most number tags are 1 digit and most new tags are 3 chars or less
   for(unsigned int i = 0, limit = target.size(); i < limit; i++)
   {
-    if(target[i] == L'<')
+    if(target[i] == '<')
     {
       result += target.substr(last, i-last);
       last = i;
       bool isNum = true;
       for(unsigned int j = i+1; j < limit; j++)
       {
-        if(target[j] == L'>')
+        if(target[j] == '>')
         {
           if(isNum)
           {
-            unsigned int n = stoul(target.substr(last+1, j-last-1));
+            unsigned int n = StringUtils::stoi(target.substr(last+1, j-last-1));
             if(n != 0 && n <= parentTags.size())
             {
               result += parentTags[n-1];
@@ -196,7 +176,7 @@ Chunk::updateTags(const vector<wstring>& parentTags)
         }
       }
     }
-    else if(target[i] == L'\\')
+    else if(target[i] == '\\')
     {
       i++;
     }
@@ -209,11 +189,11 @@ Chunk::updateTags(const vector<wstring>& parentTags)
 }
 
 void
-Chunk::output(const vector<wstring>& parentTags, FILE* out = NULL)
+Chunk::output(const vector<UString>& parentTags, UFILE* out = NULL)
 {
   if(contents.size() > 0)
   {
-    vector<wstring> tags = getTags(parentTags);
+    vector<UString> tags = getTags(parentTags);
     for(unsigned int i = 0; i < contents.size(); i++)
     {
       contents[i]->output(tags, out);
@@ -223,11 +203,11 @@ Chunk::output(const vector<wstring>& parentTags, FILE* out = NULL)
   {
     if(out == NULL)
     {
-      cout << UtfConverter::toUtf8(target);
+      cout << target;
     }
     else
     {
-      fputs_unlocked(UtfConverter::toUtf8(target).c_str(), out);
+      write(target, out);
     }
   }
   else
@@ -238,29 +218,26 @@ Chunk::output(const vector<wstring>& parentTags, FILE* out = NULL)
     }
     else if(out == NULL)
     {
-      cout << UtfConverter::toUtf8(wblank);
+      cout << wblank;
       cout << "^";
-      cout << UtfConverter::toUtf8(target);
+      cout << target;
       cout << "$";
     }
     else
     {
-      fputs_unlocked(UtfConverter::toUtf8(wblank).c_str(), out);
-      fputc_unlocked('^', out);
-      fputs_unlocked(UtfConverter::toUtf8(target).c_str(), out);
-      fputc_unlocked('$', out);
+      u_fprintf(out, "%S^%S$", wblank.c_str(), target.c_str());
     }
   }
 }
 
 void
-Chunk::output(FILE* out)
+Chunk::output(UFILE* out)
 {
-  vector<wstring> tags;
+  vector<UString> tags;
   output(tags, out);
 }
 
-wstring
+UString
 Chunk::matchSurface()
 {
   if(contents.size() == 0)
@@ -282,22 +259,22 @@ Chunk::conjoin(Chunk* other)
   unsigned int lemq_loc = 0;
   for(; lemq_loc < target.size(); lemq_loc++)
   {
-    if(target[lemq_loc] == L'\\')
+    if(target[lemq_loc] == '\\')
     {
       lemq_loc++;
       continue;
     }
-    else if(target[lemq_loc] == L'#')
+    else if(target[lemq_loc] == '#')
     {
       break;
     }
   }
-  target.insert(lemq_loc, L"+" + other->target);
+  target.insert(lemq_loc, "+"_u + other->target);
   wblank = combineWblanks(other->wblank, wblank);
 }
 
 void
-Chunk::writeTree(TreeMode mode, FILE* out)
+Chunk::writeTree(TreeMode mode, UFILE* out)
 {
   switch(mode)
   {
@@ -305,21 +282,21 @@ Chunk::writeTree(TreeMode mode, FILE* out)
     case TreeModeNest: writeTreePlain(out, 0); break;
     case TreeModeLatex:
       if(isBlank) return;
-      writeString(L"\\begin{forest}\n%where n children=0{tier=word}{}\n", out);
-      writeString(L"% Uncomment the preceding line to make the LUs bottom-aligned.\n", out);
+      writeString("\\begin{forest}\n%where n children=0{tier=word}{}\n"_u, out);
+      writeString("% Uncomment the preceding line to make the LUs bottom-aligned.\n"_u, out);
       writeTreeLatex(out);
-      writeString(L"\n\\end{forest}\n", out);
+      writeString("\n\\end{forest}\n"_u, out);
       break;
     case TreeModeDot:
       if(isBlank) return;
-      writeString(L"digraph {", out);
+      writeString("digraph {"_u, out);
       writeTreeDot(out);
-      writeString(L"}\n", out);
+      writeString("}\n"_u, out);
       break;
     case TreeModeBox:
     {
       if(isBlank) return;
-      vector<vector<wstring>> tree = writeTreeBox();
+      vector<vector<UString>> tree = writeTreeBox();
       if(tree.size() == 0) return;
       unsigned int tr = 4, sl = 12, st = 11, tl = 12, tt = 11, rl = 0, rt = 0;
       for(unsigned int i = 0; i < tree.size(); i++)
@@ -335,56 +312,57 @@ Chunk::writeTree(TreeMode mode, FILE* out)
       bool doCoref = (rl > 0 || rt > 0);
       if(doCoref && rl < 17) rl = 17;
       if(doCoref && rt < 16) rt = 16;
-      writeString(L"Tree" + wstring(tr-3, L' '), out);
-      writeString(L"Source Lemma" + wstring(sl - 11, L' '), out);
-      writeString(L"Source Tags" + wstring(st - 10, L' '), out);
-      writeString(L"Target Lemma" + wstring(tl - 11, L' '), out);
-      writeString(L"Target Tags" + wstring(tt - 10, L' '), out);
+      writeString("Tree"_u + UString(tr-3, ' '), out);
+      writeString("Source Lemma"_u + UString(sl - 11, ' '), out);
+      writeString("Source Tags"_u + UString(st - 10, ' '), out);
+      writeString("Target Lemma"_u + UString(tl - 11, ' '), out);
+      writeString("Target Tags"_u + UString(tt - 10, ' '), out);
       if(doCoref)
       {
-        writeString(L"Coreference Lemma" + wstring(rl - 16, L' '), out);
-        writeString(L"Coreference Tags", out);
-        if(rt > 16) writeString(wstring(rt - 16, L' '), out);
+        writeString("Coreference Lemma"_u + UString(rl - 16, ' '), out);
+        writeString("Coreference Tags"_u, out);
+        if(rt > 16) writeString(UString(rt - 16, ' '), out);
       }
-      writeString(L"\n", out);
-      writeString(wstring(tr, L'─') + L" ", out);
-      writeString(wstring(sl, L'─') + L" ", out);
-      writeString(wstring(st, L'─') + L" ", out);
-      writeString(wstring(tl, L'─') + L" ", out);
-      writeString(wstring(tt, L'─'), out);
-      if(doCoref) writeString(L" " + wstring(rl, L'─'), out);
-      if(doCoref) writeString(L" " + wstring(rt, L'─'), out);
-      writeString(L"\n", out);
+      writeString("\n"_u, out);
+      UChar dash = u'\u2500'; // '─'
+      writeString(UString(tr, dash) + " "_u, out);
+      writeString(UString(sl, dash) + " "_u, out);
+      writeString(UString(st, dash) + " "_u, out);
+      writeString(UString(tl, dash) + " "_u, out);
+      writeString(UString(tt, dash), out);
+      if(doCoref) writeString(" "_u + UString(rl, dash), out);
+      if(doCoref) writeString(" "_u + UString(rt, dash), out);
+      writeString("\n"_u, out);
       for(unsigned int i = 0; i < tree.size(); i++)
       {
-        writeString(wstring(tr - tree[i][0].size(), L' ') + tree[i][0] + L" ", out);
-        writeString(tree[i][1] + wstring(sl - tree[i][1].size() + 1, L' '), out);
-        writeString(tree[i][2] + wstring(st - tree[i][2].size() + 1, L' '), out);
-        writeString(tree[i][3] + wstring(tl - tree[i][3].size() + 1, L' '), out);
-        writeString(tree[i][4] + wstring(tt - tree[i][4].size(), L' '), out);
+        writeString(UString(tr - tree[i][0].size(), ' ') + tree[i][0] + " "_u, out);
+        writeString(tree[i][1] + UString(sl - tree[i][1].size() + 1, ' '), out);
+        writeString(tree[i][2] + UString(st - tree[i][2].size() + 1, ' '), out);
+        writeString(tree[i][3] + UString(tl - tree[i][3].size() + 1, ' '), out);
+        writeString(tree[i][4] + UString(tt - tree[i][4].size(), ' '), out);
         if(doCoref)
         {
-          writeString(L" " + tree[i][5] + wstring(rl - tree[i][5].size(), L' '), out);
-          writeString(L" " + tree[i][6], out);
+          writeString(" "_u + tree[i][5] + UString(rl - tree[i][5].size(), ' '), out);
+          writeString(" "_u + tree[i][6], out);
         }
-        writeString(L"\n", out);
+        writeString("\n"_u, out);
       }
-      writeString(L"\n", out);
+      writeString("\n"_u, out);
     }
       break;
     default:
-      wcerr << L"That tree mode has not yet been implemented." << endl;
+      wcerr << "That tree mode has not yet been implemented." << endl;
   }
 }
 
-pair<wstring, wstring>
-Chunk::chopString(wstring s)
+pair<UString, UString>
+Chunk::chopString(UString s)
 {
-  wstring lem;
-  wstring tags;
+  UString lem;
+  UString tags;
   for(unsigned int i = 0; i < s.size(); i++)
   {
-    if(s[i] == L'<')
+    if(s[i] == '<')
     {
       lem = s.substr(0, i);
       tags = s.substr(i+1, s.size()-i-2);
@@ -395,24 +373,24 @@ Chunk::chopString(wstring s)
   {
     lem = s;
   }
-  return make_pair(lem, StringUtils::substitute(tags, L"><", L"."));
+  return make_pair(lem, StringUtils::substitute(tags, "><"_u, "."_u));
 }
 
 void
-Chunk::writeString(wstring s, FILE* out)
+Chunk::writeString(UString s, UFILE* out)
 {
-  if(out == NULL) wcerr << s;
-  else fputs_unlocked(UtfConverter::toUtf8(s).c_str(), out);
+  if(out == NULL) cerr << s;
+  else write(s, out);
 }
 
 void
-Chunk::writeTreePlain(FILE* out, int depth)
+Chunk::writeTreePlain(UFILE* out, int depth)
 {
   if(depth >= 0 && isBlank) return;
-  wstring base;
+  UString base;
   for(int i = 0; i < depth; i++)
   {
-    base += L'\t';
+    base += '\t';
   }
   if(!isBlank)
   {
@@ -420,21 +398,21 @@ Chunk::writeTreePlain(FILE* out, int depth)
     {
       base += wblank;
     }
-    base += L"^";
+    base += '^';
   }
   if(source.size() > 0)
   {
-    base += source + L"/";
+    base += source + "/"_u;
   }
   base += target;
   if(coref.size() > 0)
   {
-    base += L"/" + coref;
+    base += "/"_u + coref;
   }
   writeString(base, out);
   if(contents.size() > 0)
   {
-    writeString((depth == -1) ? L"{" : L"{\n", out);
+    writeString((depth == -1) ? "{"_u : "{\n"_u, out);
     int newdepth = (depth == -1) ? -1 : depth + 1;
     for(unsigned int i = 0; i < contents.size(); i++)
     {
@@ -442,111 +420,114 @@ Chunk::writeTreePlain(FILE* out, int depth)
     }
     for(int i  = 0; i < depth; i++)
     {
-      writeString(L"\t", out);
+      writeString("\t"_u, out);
     }
-    writeString(L"}", out);
+    writeString("}"_u, out);
   }
-  if(!isBlank) writeString(L"$", out);
-  if(depth != -1) writeString(L"\n", out);
+  if(!isBlank) writeString("$"_u, out);
+  if(depth != -1) writeString("\n"_u, out);
 }
 
 void
-Chunk::writeTreeLatex(FILE* out)
+Chunk::writeTreeLatex(UFILE* out)
 {
   if(isBlank) return;
-  wstring nl = L" \\\\ ";
-  wstring base;
-  pair<wstring, wstring> p;
+  UString nl = " \\\\ "_u;
+  UString base;
+  pair<UString, UString> p;
   if(source.size() > 0)
   {
     p = chopString(source);
-    base += L"\\textbf{" + p.first + L"}" + nl + L"\\texttt{" + p.second + L"}" + nl;
+    base += "\\textbf{"_u + p.first + "}"_u + nl + "\\texttt{"_u + p.second + "}"_u + nl;
   }
   p = chopString(target);
   if(contents.size() == 0)
   {
-    base += L"\\textit{" + p.first + L"}" + nl + L"\\texttt{" + p.second + L"}";
+    base += "\\textit{"_u + p.first + "}"_u + nl + "\\texttt{"_u + p.second + "}"_u;
   }
   else
   {
     unsigned int i = 0;
     for(; i < p.second.size(); i++)
     {
-      if(p.second[i] == L'.') break;
+      if(p.second[i] == '.') break;
     }
     if(i < p.second.size())
     {
-      base += p.second.substr(0, i) + nl + L"\\textit{" + p.first + L"}";
-      base += nl + L"\\texttt{" + p.second.substr(i+1) + L"}";
+      base += p.second.substr(0, i) + nl + "\\textit{"_u + p.first + "}"_u;
+      base += nl + "\\texttt{"_u + p.second.substr(i+1) + "}"_u;
     }
     else
     {
-      base += p.second + nl + L"\\textit{" + p.first + L"}";
+      base += p.second + nl + "\\textit{"_u + p.first + "}"_u;
     }
   }
   if(coref.size() > 0)
   {
     p = chopString(coref);
-    base += nl + L"\\textit{" + p.first + L"}" + nl + L"\\texttt{" + p.second + L"}";
+    base += nl + "\\textit{"_u + p.first + "}"_u + nl + "\\texttt{"_u + p.second + "}"_u;
   }
-  base = L"[{ \\begin{tabular}{c} " + base + L" \\end{tabular} } ";
-  base = StringUtils::substitute(base, L"_", L"\\_");
+  base = "[{ \\begin{tabular}{c} "_u + base + " \\end{tabular} } "_u;
+  base = StringUtils::substitute(base, "_"_u, "\\_"_u);
   writeString(base, out);
   for(unsigned int i = 0; i < contents.size(); i++) contents[i]->writeTreeLatex(out);
-  writeString(L" ]", out);
+  writeString(" ]"_u, out);
 }
 
-wstring
-Chunk::writeTreeDot(FILE* out)
+UString
+Chunk::writeTreeDot(UFILE* out)
 {
-  if(isBlank) return L"";
+  if(isBlank) return ""_u;
   static int nodeId = 0;
   nodeId++;
-  wstring name = L"n" + to_wstring(nodeId);
-  wstring node = name + L" \\[label=\"";
+  UString name = "n"_u + StringUtils::itoa(nodeId);
+  UString node = name;
+  node += " \\[label=\""_u;
   if(source.size() > 0)
   {
-    node += source + L"\\\\n";
+    node += source;
+    node += "\\\\n"_u;
   }
   node += target;
   if(coref.size() > 0)
   {
-    node += L"\\\\n" + coref;
+    node += "\\\\n"_u;
+    node += coref;
   }
-  node += L"\"\\];";
+  node += "\"\\];"_u;
   writeString(node, out);
   for(unsigned int i = 0; i < contents.size(); i++)
   {
-    wstring kid = contents[i]->writeTreeDot(out);
-    if(kid.size() > 0) writeString(name + L" -> " + kid + L";", out);
+    UString kid = contents[i]->writeTreeDot(out);
+    if(kid.size() > 0) writeString(name + " -> "_u + kid + ";"_u, out);
   }
   return name;
 }
 
-vector<vector<wstring>>
+vector<vector<UString>>
 Chunk::writeTreeBox()
 {
   if(contents.size() == 0)
   {
-    vector<wstring> ret;
+    vector<UString> ret;
     ret.resize(7);
-    pair<wstring, wstring> p = chopString(source);
+    pair<UString, UString> p = chopString(source);
     ret[1] = p.first; ret[2] = p.second;
     p = chopString(target);
     ret[3] = p.first; ret[4] = p.second;
     p = chopString(coref);
     ret[5] = p.first; ret[6] = p.second;
-    return vector<vector<wstring>>(1, ret);
+    return vector<vector<UString>>(1, ret);
   }
   else
   {
     vector<pair<unsigned int, unsigned int>> bounds;
-    vector<vector<wstring>> tree;
+    vector<vector<UString>> tree;
     for(unsigned int i = 0; i < contents.size(); i++)
     {
       if(!contents[i]->isBlank)
       {
-        vector<vector<wstring>> temp = contents[i]->writeTreeBox();
+        vector<vector<UString>> temp = contents[i]->writeTreeBox();
         tree.insert(tree.end(), temp.begin(), temp.end());
         if(temp.size() == 1)
         {
@@ -556,8 +537,8 @@ Chunk::writeTreeBox()
         int first = -1, last = -1;
         for(unsigned int j = tree.size() - temp.size(); j < tree.size(); j++)
         {
-          if(first == -1 && tree[j][0][0] != L' ') first = j;
-          else if(first != -1 && last == -1 && tree[j][0][0] == L' ') last = j-1;
+          if(first == -1 && tree[j][0][0] != ' ') first = j;
+          else if(first != -1 && last == -1 && tree[j][0][0] == ' ') last = j-1;
         }
         first = (first == -1) ? tree.size() - temp.size() : first;
         last = (last == -1) ? tree.size() - 1 : last;
@@ -566,7 +547,7 @@ Chunk::writeTreeBox()
     }
     if(tree.size() == 1)
     {
-      tree[0][0] = L"─" + tree[0][0];
+      tree[0][0] = u'\u2500' + tree[0][0]; // '─'
       return tree;
     }
     unsigned int center = tree.size() / 2;
@@ -589,7 +570,7 @@ Chunk::writeTreeBox()
       unsigned int sz = tree[i][0].size();
       if(lines.count(i) == 0)
       {
-        tree[i][0] = wstring(len - sz, L' ') + tree[i][0];
+        tree[i][0] = UString(len - sz, ' ') + tree[i][0];
       }
       else
       {
@@ -597,24 +578,36 @@ Chunk::writeTreeBox()
         {
           switch(tree[i][0][0])
           {
-            case L'│': tree[i][0][0] = L'┤'; break;
-            case L'├': tree[i][0][0] = L'┼'; break;
-            case L'┌': tree[i][0][0] = L'┬'; break;
-            case L'└': tree[i][0][0] = L'┴'; break;
-            default: break;
+          case u'\u2502': // '│'
+            tree[i][0][0] = u'\u2524'; break; // '┤'
+          case u'\u251c': // '├'
+            tree[i][0][0] = u'\u253c'; break; // '┼'
+          case u'\u250c': // '┌'
+            tree[i][0][0] = u'\u252c'; break; // '┬'
+          case u'\u2514': // '└'
+            tree[i][0][0] = u'\u2534'; break; // '┴'
+          default: break;
           }
         }
-        tree[i][0] = wstring(len - sz, L'─') + tree[i][0];
+        tree[i][0] = UString(len - sz, u'\u2500') + tree[i][0]; // '─'
       }
-      if(i < firstLine || i > lastLine) tree[i][0] = L' ' + tree[i][0];
-      else if(i == firstLine && i == lastLine) tree[i][0] = L'─' + tree[i][0];
-      else if(i == firstLine) tree[i][0] = L'┌' + tree[i][0];
-      else if(i > firstLine && i < lastLine)
-      {
-        if(lines.count(i) == 0) tree[i][0] = L'│' + tree[i][0];
-        else tree[i][0] = L'├' + tree[i][0];
+      UChar prefix = ' ';
+      if (i > firstLine && i < lastLine) {
+        if (lines.count(i) == 0) {
+          prefix = u'\u2502'; // '│'
+        } else {
+          prefix = u'\u251c'; // '├'
+        }
+      } else if (i == firstLine) {
+        if (i == lastLine) {
+          prefix = u'\u2500'; // '─'
+        } else {
+          prefix = u'\u250c'; // '┌'
+        }
+      } else if (i == lastLine) {
+        prefix = u'\u2514'; // '└'
       }
-      else if(i == lastLine) tree[i][0] = L'└' + tree[i][0];
+      tree[i][0] = prefix + tree[i][0];
     }
     return tree;
   }
diff --git a/src/chunk.h b/src/chunk.h
index 214dc8d..9e6f6d0 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -2,8 +2,7 @@
 #define __RTXCHUNK__
 
 #include <rtx_config.h>
-#include <apertium_re.h>
-#include <apertium/string_utils.h>
+#include <apertium/apertium_re.h>
 
 #include <vector>
 #include <string>
@@ -28,10 +27,10 @@ enum TreeMode
 class Chunk
 {
 public:
-  wstring source;
-  wstring target;
-  wstring coref;
-  wstring wblank;
+  UString source;
+  UString target;
+  UString coref;
+  UString wblank;
   bool isBlank;
   bool isJoiner;
   vector<Chunk*> contents;
@@ -40,13 +39,13 @@ public:
   Chunk()
   : isBlank(false), isJoiner(false), rule(-1)
   {}
-  Chunk(wstring blankContent)
+  Chunk(UString blankContent)
   : target(blankContent), isBlank(true), isJoiner(false), rule(-1)
   {}
-  Chunk(wstring src, wstring dest, wstring cor, wstring wbl)
+  Chunk(UString src, UString dest, UString cor, UString wbl)
   : source(src), target(dest), coref(cor), wblank(wbl), isBlank(false), isJoiner(false), rule(-1)
   {}
-  Chunk(wstring dest, vector<Chunk*>& children, int r = -1)
+  Chunk(UString dest, vector<Chunk*>& children, int r = -1)
   : target(dest), isBlank(false), isJoiner(false), contents(children), rule(r)
   {}
   Chunk(Chunk& other) // copy constructor
@@ -100,29 +99,29 @@ public:
     return ret;
   }
   
-  wstring chunkPart(ApertiumRE const &part, const ClipType side);
-  void setChunkPart(ApertiumRE const &part, wstring const &value);
-  vector<wstring> getTags(const vector<wstring>& parentTags);
-  void updateTags(const vector<wstring>& parentTags);
-  void output(const vector<wstring>& parentTags, FILE* out);
-  void output(FILE* out);
-  wstring matchSurface();
+  UString chunkPart(ApertiumRE const &part, const ClipType side);
+  void setChunkPart(ApertiumRE const &part, UString const &value);
+  vector<UString> getTags(const vector<UString>& parentTags);
+  void updateTags(const vector<UString>& parentTags);
+  void output(const vector<UString>& parentTags, UFILE* out);
+  void output(UFILE* out);
+  UString matchSurface();
   void appendChild(Chunk* kid);
   void conjoin(Chunk* other);
-  void writeTree(TreeMode mode, FILE* out);
+  void writeTree(TreeMode mode, UFILE* out);
   
 private:
-  static pair<wstring, wstring> chopString(wstring s);
-  static void writeString(wstring s, FILE* out);
-  void writeTreePlain(FILE* out, int depth);
-  void writeTreeLatex(FILE* out);
-  wstring writeTreeDot(FILE* out);
-  vector<vector<wstring>> writeTreeBox();
+  static pair<UString, UString> chopString(UString s);
+  static void writeString(UString s, UFILE* out);
+  void writeTreePlain(UFILE* out, int depth);
+  void writeTreeLatex(UFILE* out);
+  UString writeTreeDot(UFILE* out);
+  vector<vector<UString>> writeTreeBox();
 };
 
 /**
  * Combines two wordbound blanks and returns it
 */
-wstring combineWblanks(wstring wblank_current, wstring wblank_to_add);
+UString combineWblanks(UString wblank_current, UString wblank_to_add);
 
 #endif
diff --git a/src/matcher.h b/src/matcher.h
index da69cab..a1c8f78 100644
--- a/src/matcher.h
+++ b/src/matcher.h
@@ -6,6 +6,7 @@
 #include <lttoolbox/alphabet.h>
 #include <chunk.h>
 #include <list>
+#include <cstring>
 
 using namespace std;
 
@@ -134,9 +135,9 @@ public:
 
     initial = t.getInitial();
 
-    any_char = (*a)(L"<ANY_CHAR>");
-    any_tag = (*a)(L"<ANY_TAG>");
-    lookahead = (*a)(L"<LOOK:AHEAD>");
+    any_char = (*a)("<ANY_CHAR>"_u);
+    any_tag = (*a)("<ANY_TAG>"_u);
+    lookahead = (*a)("<LOOK:AHEAD>"_u);
 
     prematchIdx = 0;
   }
@@ -173,26 +174,26 @@ public:
   }
   void matchBlank(int* state, int& first, int& last)
   {
-    step(state, first, last, L' ');
+    step(state, first, last, ' ');
   }
-  void matchChunk(int* state, int& first, int& last, const wstring& ch, bool addInit = true)
+  void matchChunk(int* state, int& first, int& last, const UString& ch, bool addInit = true)
   {
-    step(state, first, last, L'^');
+    step(state, first, last, '^');
     if(addInit)
     {
-      applySymbol(initial, L'^', state, last);
+      applySymbol(initial, '^', state, last);
     }
     for(unsigned int i = 0, limit = ch.size(); i < limit; i++)
     {
       switch(ch[i])
       {
-        case L'\\':
+        case '\\':
           step(state, first, last, towlower(ch[++i]), any_char);
           break;
-        case L'<':
+        case '<':
           for(unsigned int j = i+1; j < ch.size(); j++)
           {
-            if(ch[j] == L'>')
+            if(ch[j] == '>')
             {
               int symbol = (*alpha)(ch.substr(i, j-i+1));
               if(symbol)
@@ -213,23 +214,23 @@ public:
           break;
       }
     }
-    step(state, first, last, L'$');
+    step(state, first, last, '$');
   }
-  void prepareChunk(const wstring& chunk)
+  void prepareChunk(const UString& chunk)
   {
     prematchIdx = 0;
     for(unsigned int i = 0, limit = chunk.size(); i < limit; i++)
     {
       switch(chunk[i])
       {
-        case L'\\':
+        case '\\':
           prematchAlt[prematchIdx] = any_char;
           prematch[prematchIdx++] = towlower(chunk[++i]);
           break;
-        case L'<':
+        case '<':
           for(unsigned int j = i+1; j < chunk.size(); j++)
           {
-            if(chunk[j] == L'>')
+            if(chunk[j] == '>')
             {
               int symbol = (*alpha)(chunk.substr(i, j-i+1));
               prematchAlt[prematchIdx] = any_tag;
@@ -248,8 +249,8 @@ public:
   }
   void matchPreparedChunk(int* state, int& first, int& last)
   {
-    step(state, first, last, L'^');
-    applySymbol(initial, L'^', state, last);
+    step(state, first, last, '^');
+    applySymbol(initial, '^', state, last);
     for(int i = 0; i < prematchIdx; i++)
     {
       if(prematch[i] == any_tag)
@@ -261,20 +262,20 @@ public:
         step(state, first, last, prematch[i], prematchAlt[i]);
       }
     }
-    step(state, first, last, L'$');
+    step(state, first, last, '$');
   }
   bool shouldShift(int* state, int first, int last)
   {
     for(int i = first; i != last; i = (i+1)%RTXStateSize)
     {
-      if(nodes[state[i]].search(L' ') != -1)
+      if(nodes[state[i]].search(' ') != -1)
       {
         return true;
       }
     }
     return false;
   }
-  bool shouldShift(int* state, int first, int last, const wstring& chunk)
+  bool shouldShift(int* state, int first, int last, const UString& chunk)
   {
     int local_state[RTXStateSize];
     memcpy(local_state, state, RTXStateSize*sizeof(int));
@@ -357,8 +358,8 @@ public:
   int firstWord;
   int lastWord;
   int id;
-  map<wstring, wstring, Ltstr> stringVars;
-  map<wstring, wstring, Ltstr> wblankVars;
+  map<UString, UString> stringVars;
+  map<UString, UString> wblankVars;
   vector<Chunk*> chunkVars;
   ParseNode()
   : first(0), last(0), firstWord(0), lastWord(0), id(-1)
diff --git a/src/pattern.cc b/src/pattern.cc
index 32d5312..1e3acd7 100644
--- a/src/pattern.cc
+++ b/src/pattern.cc
@@ -3,9 +3,9 @@
 #include <bytecode.h>
 
 #include <lttoolbox/compression.h>
-#include <apertium/string_utils.h>
-#include <apertium_re.h>
-#include <apertium/utf_converter.h>
+#include <lttoolbox/string_utils.h>
+#include <lttoolbox/input_file.h>
+#include <apertium/transfer_regex.h>
 
 #include <iostream>
 #include <fstream>
@@ -14,28 +14,26 @@ using namespace std;
 
 PatternBuilder::PatternBuilder()
 {
-  alphabet.includeSymbol(L"<ANY_TAG>");
-  alphabet.includeSymbol(L"<ANY_CHAR>");
-  alphabet.includeSymbol(L"<LOOK:AHEAD>");
-  attr_items[L"lem"] = L"^(([^<]|\"\\<\")+)";
-  attr_items[L"lemq"] = L"\\#[- _][^<]+";
-  attr_items[L"lemh"] = L"^(([^<#]|\"\\<\"|\"\\#\")+)";
-  attr_items[L"whole"] = L"(.+)";
-  attr_items[L"tags"] = L"((<[^>]+>)+)";
-  attr_items[L"chname"] = L"({([^/]+)\\/)"; // includes delimiters { and / !!!
-  attr_items[L"chcontent"] = L"(\\{.+)";
-  attr_items[L"content"] = L"(\\{.+)";
-  attr_items[L"pos_tag"] = L"(<[^>]+>)";
-  starCanBeEmpty = false;
-  chunkVarCount = 0;
+  alphabet.includeSymbol("<ANY_TAG>"_u);
+  alphabet.includeSymbol("<ANY_CHAR>"_u);
+  alphabet.includeSymbol("<LOOK:AHEAD>"_u);
+  attr_items["lem"_u] = "^(([^<]|\"\\<\")+)"_u;
+  attr_items["lemq"_u] = "\\#[- _][^<]+"_u;
+  attr_items["lemh"_u] = "^(([^<#]|\"\\<\"|\"\\#\")+)"_u;
+  attr_items["whole"_u] = "(.+)"_u;
+  attr_items["tags"_u] = "((<[^>]+>)+)"_u;
+  attr_items["chname"_u] = "(\\{([^/]+)\\/)"_u; // includes delimiters { and / !!!
+  attr_items["chcontent"_u] = "(\\{.+)"_u;
+  attr_items["content"_u] = "(\\{.+)"_u;
+  attr_items["pos_tag"_u] = "(<[^>]+>)"_u;
 }
 
 int
-PatternBuilder::insertLemma(int const base, wstring const &lemma)
+PatternBuilder::insertLemma(int const base, UString const &lemma)
 {
   int retval = base;
-  static int const any_char = alphabet(L"<ANY_CHAR>");
-  if(lemma == L"")
+  static int const any_char = alphabet("<ANY_CHAR>"_u);
+  if(lemma.empty())
   {
     retval = transducer.insertSingleTransduction(any_char, retval);
     transducer.linkStates(retval, retval, any_char);
@@ -44,14 +42,14 @@ PatternBuilder::insertLemma(int const base, wstring const &lemma)
   {
     for(unsigned int i = 0, limit = lemma.size();  i != limit; i++)
     {
-      if(lemma[i] == L'\\')
+      if(lemma[i] == '\\')
       {
-        //retval = transducer.insertSingleTransduction(L'\\', retval);
+        //retval = transducer.insertSingleTransduction('\\', retval);
         i++;
         retval = transducer.insertSingleTransduction(int(lemma[i]),
                                                              retval);
       }
-      else if(lemma[i] == L'*')
+      else if(lemma[i] == '*')
       {
         retval = transducer.insertSingleTransduction(any_char, retval);
         transducer.linkStates(retval, retval, any_char);
@@ -68,13 +66,13 @@ PatternBuilder::insertLemma(int const base, wstring const &lemma)
 }
 
 int
-PatternBuilder::insertTags(int const base, const vector<wstring>& tags)
+PatternBuilder::insertTags(int const base, const vector<UString>& tags)
 {
   int retval = base;
-  static int const any_tag = alphabet(L"<ANY_TAG>");
+  static int const any_tag = alphabet("<ANY_TAG>"_u);
   for(unsigned int i = 0; i < tags.size(); i++)
   {
-    if(tags[i] == L"*")
+    if(tags[i] == "*"_u)
     {
       if(!starCanBeEmpty)
       {
@@ -84,10 +82,10 @@ PatternBuilder::insertTags(int const base, const vector<wstring>& tags)
     }
     else
     {
-      vector<wstring> tgs = StringUtils::split_wstring(tags[i], L".");
+      vector<UString> tgs = StringUtils::split(tags[i], "."_u);
       for(auto t : tgs)
       {
-        wstring tg = L"<" + t + L">";
+        UString tg = "<"_u + t + ">"_u;
         alphabet.includeSymbol(tg);
         retval = transducer.insertSingleTransduction(alphabet(tg), retval);
       }
@@ -99,106 +97,15 @@ PatternBuilder::insertTags(int const base, const vector<wstring>& tags)
 int
 PatternBuilder::countToFinalSymbol(const int count)
 {
-  const wstring count_sym = L"<RULE_NUMBER:" + to_wstring(count) + L">";
+  const UString count_sym = "<RULE_NUMBER:"_u + StringUtils::itoa(count) + ">"_u;
   alphabet.includeSymbol(count_sym);
   const int symbol = alphabet(count_sym);
   if(count != -1) final_symbols.insert(symbol);
   return symbol;
 }
 
-vector<PatternBuilder::TrieNode*>
-PatternBuilder::buildTrie(vector<wstring> parts)
-{
-  vector<TrieNode*> ret;
-  vector<vector<wstring>> p2;
-  for(auto p : parts)
-  {
-    if(p.size() == 0) continue;
-    bool found = false;
-    for(unsigned int t = 0; t < p2.size(); t++)
-    {
-      if(ret[t]->self == p[0])
-      {
-        p2[t].push_back(p.substr(1));
-        found = true;
-        break;
-      }
-    }
-    if(!found)
-    {
-      TrieNode* t = new TrieNode;
-      t->self = p[0];
-      ret.push_back(t);
-      p2.push_back(vector<wstring>(1, p.substr(1)));
-    }
-  }
-  for(unsigned int i = 0; i < ret.size(); i++)
-  {
-    ret[i]->next = buildTrie(p2[i]);
-  }
-  return ret;
-}
-
-wstring
-PatternBuilder::unbuildTrie(PatternBuilder::TrieNode* t)
-{
-  if(t->self == L'\0') return L"";
-  wstring single;
-  bool end = false;
-  vector<wstring> groups;
-  int ct = t->next.size();
-  for(auto it : t->next)
-  {
-    wstring blob = unbuildTrie(it);
-    if(blob.size() == 0)
-    {
-      end = true;
-      ct--;
-    }
-    else if(blob.size() == 1)
-    {
-      if(single.size() > 0) ct--;
-      single += blob;
-    }
-    else groups.push_back(blob);
-  }
-  wstring ret;
-  if(t->self == L'#') ret += L'\\';
-  ret += t->self;
-  if(single.size() == 0 && groups.size() == 0) return ret;
-  if(single.size() > 1) single = L"[" + single + L"]";
-  if(ct > 1 || (groups.size() == 1 && end)) ret += L"(?:";
-  for(unsigned int i = 0; i < groups.size(); i++)
-  {
-    if(i > 0) ret += L"|";
-    ret += groups[i];
-  }
-  if(single.size() > 0)
-  {
-    if(groups.size() > 0) ret += L"|";
-    ret += single;
-  }
-  if(ct > 1 || (groups.size() == 1 && end)) ret += L")";
-  if(end) ret += L"?";
-  return ret;
-}
-
-wstring
-PatternBuilder::trie(vector<wstring> parts)
-{
-  if(parts.size() == 0) return L"";
-  for(unsigned int i = 0; i < parts.size(); i++)
-  {
-    parts[i] = L"<" + parts[i];
-    parts[i] += L'\0';
-  }
-  vector<TrieNode*> l = buildTrie(parts);
-  // they all start with L'<', so there will only be 1.
-  return L"(" + unbuildTrie(l[0]) + L">)";
-}
-
 void
-PatternBuilder::addPattern(vector<vector<PatternElement*>> pat, int rule, double weight, bool isLex)
+PatternBuilder::addPattern(const vector<vector<PatternElement*>>& pat, int rule, double weight, bool isLex)
 {
   int state = transducer.getInitial();
   for(unsigned int p = 0; p < pat.size(); p++)
@@ -214,17 +121,17 @@ PatternBuilder::addPattern(vector<vector<PatternElement*>> pat, int rule, double
           if(pe->tags.size() > 0) lookahead[state].push_back(pe->tags[0]);
         }
       }
-      state = transducer.insertSingleTransduction(L' ', state);
+      state = transducer.insertSingleTransduction(' ', state);
     }
-    state = transducer.insertNewSingleTransduction(L'^', state);
+    state = transducer.insertNewSingleTransduction('^', state);
     int end = insertLemma(state, pat[p][0]->lemma);
     end = insertTags(end, pat[p][0]->tags);
-    end = transducer.insertSingleTransduction(L'$', end);
+    end = transducer.insertSingleTransduction('$', end);
     for(unsigned int i = 1; i < pat[p].size(); i++)
     {
       int temp = insertLemma(state, pat[p][i]->lemma);
       temp = insertTags(temp, pat[p][i]->tags);
-      transducer.linkStates(temp, end, L'$');
+      transducer.linkStates(temp, end, '$');
     }
     state = end;
   }
@@ -234,7 +141,7 @@ PatternBuilder::addPattern(vector<vector<PatternElement*>> pat, int rule, double
 }
 
 void
-PatternBuilder::addRule(int rule, double weight, vector<vector<PatternElement*>> pattern, vector<wstring> firstChunk, wstring name)
+PatternBuilder::addRule(int rule, double weight, const vector<vector<PatternElement*>>& pattern, const vector<UString>& firstChunk, const UString& name)
 {
   rules[rule] = make_pair(firstChunk, pattern);
   addPattern(pattern, rule, weight, false);
@@ -255,82 +162,64 @@ PatternBuilder::addRule(int rule, double weight, vector<vector<PatternElement*>>
 }
 
 void
-PatternBuilder::addList(wstring name, set<wstring, Ltstr> vals)
+PatternBuilder::addList(const UString& name, const set<UString>& vals)
 {
   lists[name] = vals;
 }
 
 void
-PatternBuilder::addAttr(wstring name, set<wstring, Ltstr> vals)
+PatternBuilder::addAttr(const UString& name, const set<UString>& vals)
 {
-  /*wstring pat = L"(";
-  for(set<wstring, Ltstr>::iterator it = vals.begin(); it != vals.end(); it++)
-  {
-    if(pat.size() > 1)
-    {
-      pat += L"|";
-    }
-    pat += L"<" + StringUtils::substitute(*it, L".", L"><") + L">";
-  }
-  pat += L")";
-  attr_items[name] = pat;*/
-  vector<wstring> pat;
-  for(auto it : vals)
-  {
-    wstring p = StringUtils::substitute(it, L"\\.", L"<>");
-    p = StringUtils::substitute(p, L".", L"><");
-    pat.push_back(StringUtils::substitute(p, L"<>", L"\\."));
-  }
-  wstring pt = trie(pat);
-  //wcerr << name << "\t" << pt << endl;
-  attr_items[name] = pt;
+  vector<UString> pat;
+  pat.assign(vals.begin(), vals.end());
+  attr_items[name] = optimize_regex(pat);
 }
 
 bool
-PatternBuilder::isAttrDefined(wstring name)
+PatternBuilder::isAttrDefined(const UString& name)
 {
   return attr_items.find(name) != attr_items.end();
 }
 
 void
-PatternBuilder::addVar(wstring name, wstring val)
+PatternBuilder::addVar(const UString& name, const UString& val)
 {
   variables[name] = val;
 }
 
-wstring
-PatternBuilder::BCstring(const wstring& s)
+UString
+PatternBuilder::BCstring(const UString& s)
 {
-  wstring ret;
+  UString ret;
   ret += STRING;
-  ret += (wchar_t)s.size();
+  ret += (UChar)s.size();
   ret += s;
   return ret;
 }
 
-wstring
-PatternBuilder::BCifthenelse(const wstring& cond, const wstring& yes, const wstring& no)
+UString
+PatternBuilder::BCifthenelse(const UString& cond, const UString& yes, const UString& no)
 {
-  wstring ret = cond;
+  UString ret = cond;
   if(yes.size() == 0)
   {
     ret += JUMPONTRUE;
-    ret += (wchar_t)no.size();
+    ret += (UChar)no.size();
     ret += no;
   }
   else if(no.size() == 0)
   {
     ret += JUMPONFALSE;
-    ret += (wchar_t)yes.size();
+    ret += (UChar)yes.size();
     ret += yes;
   }
   else
   {
     ret += JUMPONFALSE;
-    ret += (wchar_t)(yes.size() + 2);
+    ret += (UChar)(yes.size() + 2);
     ret += yes;
     ret += JUMP;
-    ret += (wchar_t)no.size();
+    ret += (UChar)no.size();
     ret += no;
   }
   return ret;
@@ -342,11 +231,11 @@ PatternBuilder::buildLookahead()
   for(auto it : firstSet)
   {
     firstSet[it.first].insert(it.first);
-    vector<wstring> todo;
+    vector<UString> todo;
     for(auto op : it.second) todo.push_back(op);
     while(todo.size() > 0)
     {
-      wstring cur = todo.back();
+      UString cur = todo.back();
       todo.pop_back();
       if(cur != it.first && firstSet.find(cur) != firstSet.end())
       {
@@ -363,9 +252,9 @@ PatternBuilder::buildLookahead()
   }
   for(auto it : lookahead)
   {
-    int state = transducer.insertSingleTransduction(alphabet(L"<LOOK:AHEAD>"), it.first);
-    state = transducer.insertSingleTransduction(L'^', state);
-    transducer.linkStates(state, state, alphabet(L"<ANY_CHAR>"));
+    int state = transducer.insertSingleTransduction(alphabet("<LOOK:AHEAD>"_u), it.first);
+    state = transducer.insertSingleTransduction('^', state);
+    transducer.linkStates(state, state, alphabet("<ANY_CHAR>"_u));
     int end = -1;
     for(auto next : it.second)
     {
@@ -373,19 +262,19 @@ PatternBuilder::buildLookahead()
       for(auto tag : firstSet[next])
       {
         int temp = state;
-        if(tag != L"*")
+        if(tag != "*"_u)
         {
-          temp = transducer.insertSingleTransduction(alphabet(L"<" + tag + L">"), temp);
+          temp = transducer.insertSingleTransduction(alphabet("<"_u + tag + ">"_u), temp);
         }
-        transducer.linkStates(temp, temp, alphabet(L"<ANY_TAG>"));
+        transducer.linkStates(temp, temp, alphabet("<ANY_TAG>"_u));
         if(end == -1)
         {
-          end = transducer.insertSingleTransduction(L'$', temp);
+          end = transducer.insertSingleTransduction('$', temp);
           transducer.setFinal(end);
         }
         else
         {
-          transducer.linkStates(temp, end, L'$');
+          transducer.linkStates(temp, end, '$');
         }
       }
     }
@@ -402,7 +291,7 @@ PatternBuilder::isPrefix(const vector<vector<PatternElement*>>& rule, const vect
     for(auto r : rule[i])
     {
       if(r->tags.size() == 0) continue;
-      else if(r->tags[0] == L"*")
+      else if(r->tags[0] == "*"_u)
       {
         found = true;
         break;
@@ -410,7 +299,7 @@ PatternBuilder::isPrefix(const vector<vector<PatternElement*>>& rule, const vect
       for(auto p : prefix[i])
       {
         if(p->tags.size() == 0) continue;
-        else if(p->tags[0] == L"*" || p->tags[0] == r->tags[0])
+        else if(p->tags[0] == "*"_u || p->tags[0] == r->tags[0])
         {
           found = true;
           break;
@@ -430,7 +319,7 @@ PatternBuilder::buildFallback()
   starCanBeEmpty = true;
   vector<PatternElement*> fallback;
   PatternElement* fall = new PatternElement;
-  fall->tags.push_back(L"FALL:BACK");
+  fall->tags.push_back("FALL:BACK"_u);
   fallback.push_back(fall);
   for(auto rule : rules)
   {
@@ -439,13 +328,13 @@ PatternBuilder::buildFallback()
     {
       PatternElement* pe = new PatternElement;
       pe->tags.push_back(tg);
-      pe->tags.push_back(L"*");
+      pe->tags.push_back("*"_u);
       result.push_back(pe);
     }
     vector<vector<PatternElement*>> resultPat;
     resultPat.push_back(result);
-    set<wstring> patPrefix;
-    set<wstring> resultPrefix;
+    set<UString> patPrefix;
+    set<UString> resultPrefix;
     for(auto rule2 : rules)
     {
       if(isPrefix(rule2.second.second, resultPat))
@@ -475,7 +364,7 @@ PatternBuilder::buildFallback()
       {
         PatternElement* pe = new PatternElement;
         pe->tags.push_back(it);
-        pe->tags.push_back(L"*");
+        pe->tags.push_back("*"_u);
         add.push_back(pe);
       }
       resultPat.push_back(add);
@@ -492,33 +381,24 @@ PatternBuilder::buildFallback()
 void
 PatternBuilder::loadLexFile(const string& fname)
 {
-  wifstream lex;
-  lex.open(fname);
-  if(!lex.is_open())
-  {
-    wcerr << "Unable to open file " << fname.c_str() << " for reading." << endl;
-    exit(EXIT_FAILURE);
-  }
-  while(!lex.eof())
-  {
-    wstring name;
-    while(!lex.eof() && lex.peek() != L'\t') name += lex.get();
+  InputFile lex;
+  lex.open_or_exit(fname.c_str());
+  while(!lex.eof()) {
+    UString name;
+    while(!lex.eof() && lex.peek() != '\t') name += lex.get();
     lex.get();
-    wstring weight;
-    while(!lex.eof() && lex.peek() != L'\t') weight += lex.get();
+    UString weight;
+    while(!lex.eof() && lex.peek() != '\t') weight += lex.get();
     lex.get();
     if(lex.eof()) break;
     vector<vector<PatternElement*>> pat;
-    while(!lex.eof() && lex.peek() != L'\n')
-    {
+    while(!lex.eof() && lex.peek() != '\n') {
       PatternElement* p = new PatternElement;
-      while(lex.peek() != L'@') p->lemma += towlower(lex.get());
+      while(lex.peek() != '@') p->lemma += u_tolower(lex.get());
       lex.get();
-      wstring tag;
-      while(lex.peek() != L' ' && lex.peek() != L'\n')
-      {
-        if(lex.peek() == L'.')
-        {
+      UString tag;
+      while(lex.peek() != ' ' && lex.peek() != '\n') {
+        if(lex.peek() == '.') {
           lex.get();
           p->tags.push_back(tag);
           tag.clear();
@@ -526,29 +406,29 @@ PatternBuilder::loadLexFile(const string& fname)
         else tag += lex.get();
       }
       p->tags.push_back(tag);
-      if(lex.peek() == L' ') lex.get();
+      if(lex.peek() == ' ') lex.get();
       pat.push_back(vector<PatternElement*>(1, p));
     }
     lex.get();
-    lexicalizations[name].push_back(make_pair(stod(weight), pat));
+    lexicalizations[name].push_back(make_pair(StringUtils::stod(weight), pat));
   }
 }
 
 void
-PatternBuilder::write(FILE* output, int longest, vector<pair<int, wstring>> inputBytecode, vector<wstring> outputBytecode)
+PatternBuilder::write(FILE* output, int longest, vector<pair<int, UString>> inputBytecode, vector<UString> outputBytecode)
 {
   Compression::multibyte_write(longest, output);
   Compression::multibyte_write(inputBytecode.size(), output);
   for(unsigned int i = 0; i < inputBytecode.size(); i++)
   {
     Compression::multibyte_write(inputBytecode[i].first, output);
-    Compression::wstring_write(inputBytecode[i].second, output);
+    Compression::string_write(inputBytecode[i].second, output);
   }
 
   Compression::multibyte_write(outputBytecode.size(), output);
   for(unsigned int i = 0; i < outputBytecode.size(); i++)
   {
-    Compression::wstring_write(outputBytecode[i], output);
+    Compression::string_write(outputBytecode[i], output);
   }
 
   Compression::multibyte_write(chunkVarCount, output);
@@ -565,7 +445,7 @@ PatternBuilder::write(FILE* output, int longest, vector<pair<int, wstring>> inpu
   // Find all arcs with "final_symbols" in the transitions, let their source node instead be final,
   // and extract the rule number from the arc. Record relation between source node and rule number
   // in finals_rules. It is now no longer safe to minimize -- but we already did that.
-  const wstring rule_sym_pre = L"<RULE_NUMBER:"; // see countToFinalSymbol()
+  const UString rule_sym_pre = "<RULE_NUMBER:"_u; // see countToFinalSymbol()
   for(map<int, multimap<int, pair<int, double> > >::const_iterator it = transitions.begin(),
         limit = transitions.end(); it != limit; ++it)
   {
@@ -579,12 +459,12 @@ PatternBuilder::write(FILE* output, int longest, vector<pair<int, wstring>> inpu
         continue;
       }
       // Extract the rule number encoded by countToFinalSymbol():
-      wstring s;
+      UString s;
       alphabet.getSymbol(s, symbol);
       if(s.compare(0, rule_sym_pre.size(), rule_sym_pre) != 0) {
         continue;
       }
-      const int rule_num = stoi(s.substr(rule_sym_pre.size()));
+      const int rule_num = StringUtils::stoi(s.substr(rule_sym_pre.size()));
       transducer.setFinal(src);
       finals_rules.insert(make_pair(src, make_pair(rule_num, wgt)));
     }
@@ -610,54 +490,42 @@ PatternBuilder::write(FILE* output, int longest, vector<pair<int, wstring>> inpu
 
   // attr_items
 
-  // precompiled regexps
-  Compression::string_write(string(pcre_version()), output);
+  // empty version number since we're not on PCRE anymore
+  Compression::multibyte_write(0, output);
   Compression::multibyte_write(attr_items.size(), output);
 
-  map<wstring, wstring, Ltstr>::iterator it, limit;
-  for(it = attr_items.begin(), limit = attr_items.end(); it != limit; it++)
-  {
-    Compression::wstring_write(it->first, output);
-    ApertiumRE my_re;
-    my_re.compile(UtfConverter::toUtf8(it->second));
-    my_re.write(output);
-    Compression::wstring_write(it->second, output);
+  for (auto& it : attr_items) {
+    Compression::string_write(it.first, output);
+    Compression::multibyte_write(0, output); // empty binary form of regex
+    Compression::string_write(it.second, output);
   }
 
   // variables
   Compression::multibyte_write(variables.size(), output);
-  for(map<wstring, wstring, Ltstr>::const_iterator it = variables.begin(), limit = variables.end();
-      it != limit; it++)
-  {
-    Compression::wstring_write(it->first, output);
-    Compression::wstring_write(it->second, output);
+  for (auto& it : variables) {
+    Compression::string_write(it.first, output);
+    Compression::string_write(it.second, output);
   }
 
   // lists
   Compression::multibyte_write(lists.size(), output);
-  for(map<wstring, set<wstring, Ltstr>, Ltstr>::const_iterator it = lists.begin(), limit = lists.end();
-      it != limit; it++)
-  {
-    Compression::wstring_write(it->first, output);
-    Compression::multibyte_write(it->second.size(), output);
+  for (auto& it : lists) {
+    Compression::string_write(it.first, output);
+    Compression::multibyte_write(it.second.size(), output);
 
-    for(set<wstring, Ltstr>::const_iterator it2 = it->second.begin(), limit2 = it->second.end();
-  it2 != limit2; it2++)
-    {
-      Compression::wstring_write(*it2, output);
+    for (auto& it2 : it.second) {
+      Compression::string_write(it2, output);
     }
   }
 
   // rule names
   Compression::multibyte_write(inRuleNames.size(), output);
-  for(unsigned int i = 0; i < inRuleNames.size(); i++)
-  {
-    Compression::wstring_write(inRuleNames[i], output);
+  for (auto& name : inRuleNames) {
+    Compression::string_write(name, output);
   }
   Compression::multibyte_write(outRuleNames.size(), output);
-  for(unsigned int i = 0; i < outRuleNames.size(); i++)
-  {
-    Compression::wstring_write(outRuleNames[i], output);
+  for (auto& name : outRuleNames) {
+    Compression::string_write(name, output);
   }
 
 }
diff --git a/src/pattern.h b/src/pattern.h
index 0e92d0a..ed48cc6 100644
--- a/src/pattern.h
+++ b/src/pattern.h
@@ -4,7 +4,7 @@
 #include <rtx_config.h>
 #include <iostream>
 #include <lttoolbox/alphabet.h>
-#include <lttoolbox/ltstr.h>
+#include <lttoolbox/ustring.h>
 #include <lttoolbox/transducer.h>
 
 #include <string>
@@ -15,8 +15,8 @@ using namespace std;
 
 struct PatternElement
 {
-  wstring lemma;
-  vector<wstring> tags;
+  UString lemma;
+  vector<UString> tags;
 };
 
 class PatternBuilder
@@ -31,27 +31,27 @@ private:
    * Attribute categories
    * name => regex
    */
-  map<wstring, wstring, Ltstr> attr_items;
+  map<UString, UString> attr_items;
 
   /**
    * Lists
    * name => { values }
    */
-  map<wstring, set<wstring, Ltstr>, Ltstr> lists;
+  map<UString, set<UString>> lists;
 
   /**
    * Global string variables
    * name => initial value
    */
-  map<wstring, wstring, Ltstr> variables;
+  map<UString, UString> variables;
 
   /**
    * Symbols marking ends of rules in pattern transducer
    */
   set<int> final_symbols;
 
-  map<int, vector<wstring>> lookahead;
-  map<wstring, set<wstring>> firstSet;
+  map<int, vector<UString>> lookahead;
+  map<UString, set<UString>> firstSet;
 
   /**
    * Alphabet of pattern transducer
@@ -67,9 +67,9 @@ private:
    * Lexicalized weights for rules
    * rule id => [ ( weight, processed pattern ) ... ]
    */
-  map<wstring, vector<pair<double, vector<vector<PatternElement*>>>>> lexicalizations;
+  map<UString, vector<pair<double, vector<vector<PatternElement*>>>>> lexicalizations;
 
-  map<int, pair<vector<wstring>, vector<vector<PatternElement*>>>> rules;
+  map<int, pair<vector<UString>, vector<vector<PatternElement*>>>> rules;
 
   //////////
   // TRANSDUCER PATH BUILDING
@@ -79,13 +79,13 @@ private:
    * Starting from base, add path for lemma
    * @return end state
    */
-  int insertLemma(int const base, wstring const &lemma);
+  int insertLemma(int const base, UString const &lemma);
 
   /**
    * Starting from base, insert each tag in tags
    * @return end state
    */
-  int insertTags(int const base, const vector<wstring>& tags);
+  int insertTags(int const base, const vector<UString>& tags);
 
   /**
    * Generate symbol of the form L"<RULE_NUMBER:count>" to mark rule end
@@ -95,7 +95,7 @@ private:
   /**
    * Build complete path
    */
-  void addPattern(vector<vector<PatternElement*>> pat, int rule, double weight, bool isLex);
+  void addPattern(const vector<vector<PatternElement*>>& pat, int rule, double weight, bool isLex);
 
   void buildLookahead();
 
@@ -103,31 +103,6 @@ private:
 
   void buildFallback();
 
-  //////////
-  // ATTRIBUTE COMPRESSION
-  //////////
-
-  struct TrieNode
-  {
-    wchar_t self;
-    vector<TrieNode*> next;
-  };
-
-  /**
-   * Construct tries for a set of inputs, return one for each initial character
-   */
-  vector<TrieNode*> buildTrie(vector<wstring> parts);
-
-  /**
-   * Convert trie to regex
-   */
-  wstring unbuildTrie(TrieNode* t);
-
-  /**
-   * Wrapper around buildTrie() and unbuildTrie()
-   */
-  wstring trie(vector<wstring> parts);
-
 public:
 
   //////////
@@ -137,40 +112,39 @@ public:
   // false: * = 1 or more tags, true: * = 0 or more tags
   /**
    * If false, L"*" must match at least one tag, otherwise it can match 0
-   * Default: false
    */
-  bool starCanBeEmpty;
+  bool starCanBeEmpty = false;
 
   /**
    * Number of global Chunk* variables to allocate space for
    */
-  unsigned int chunkVarCount;
+  unsigned int chunkVarCount = 0;
 
   /**
    * Debug names for input-time rules
    */
-  vector<wstring> inRuleNames;
+  vector<UString> inRuleNames;
 
   /**
    * Debug names for output-time rules
    */
-  vector<wstring> outRuleNames;
+  vector<UString> outRuleNames;
 
   PatternBuilder();
 
-  void addRule(int rule, double weight, vector<vector<PatternElement*>> pattern, vector<wstring> firstChunk, wstring name);
-  void addList(wstring name, set<wstring, Ltstr> vals);
-  void addAttr(wstring name, set<wstring, Ltstr> vals);
-  bool isAttrDefined(wstring name);
-  void addVar(wstring name, wstring val);
+  void addRule(int rule, double weight, const vector<vector<PatternElement*>>& pattern, const vector<UString>& firstChunk, const UString& name);
+  void addList(const UString& name, const set<UString>& vals);
+  void addAttr(const UString& name, const set<UString>& vals);
+  bool isAttrDefined(const UString& name);
+  void addVar(const UString& name, const UString& val);
   void loadLexFile(const string& fname);
-  void write(FILE* output, int longest, vector<pair<int, wstring>> inputBytecode, vector<wstring> outputBytecode);
+  void write(FILE* output, int longest, vector<pair<int, UString>> inputBytecode, vector<UString> outputBytecode);
 
   //////////
   // BYTECODE CONSTRUCTION
   //////////
-  wstring BCstring(const wstring& s);
-  wstring BCifthenelse(const wstring& cond, const wstring& yes, const wstring& no);
+  UString BCstring(const UString& s);
+  UString BCifthenelse(const UString& cond, const UString& yes, const UString& no);
 };
 
 #endif
diff --git a/src/randpath.cc b/src/randpath.cc
index a2ed32e..b8ae2da 100644
--- a/src/randpath.cc
+++ b/src/randpath.cc
@@ -3,18 +3,18 @@
 #include <lttoolbox/alphabet.h>
 #include <lttoolbox/compression.h>
 #include <lttoolbox/lt_locale.h>
-#include <apertium/utf_converter.h>
 #include <random>
 #include <vector>
 #include <string>
 #include <chrono>
+#include <cstring>
 
 using namespace std;
 
 Alphabet A;
 Transducer T;
-wstring prefix;
-vector<pair<int, wstring>> paths;
+UString prefix;
+vector<pair<int, UString>> paths;
 unsigned int donecount = 0;
 
 bool load(FILE* input)
@@ -53,13 +53,7 @@ bool load(FILE* input)
 
   while(len > 0)
   {
-    int len2 = Compression::multibyte_read(input);
-    wstring name = L"";
-    while(len2 > 0)
-    {
-      name += static_cast<wchar_t>(Compression::multibyte_read(input));
-      len2--;
-    }
+    UString name = Compression::string_read(input);
     T.read(input);
     len--;
     return true;
@@ -97,14 +91,14 @@ void followPath(int idx)
     {
       paths.push_back(make_pair(ops[i].second, paths[idx].second));
       A.getSymbol(paths.back().second, ops[i].first);
-      if(paths.back().second.size() > 0 && paths.back().second.back() == L'+')
+      if(paths.back().second.size() > 0 && paths.back().second.back() == '+')
       {
         paths.pop_back();
       }
     }
     state = ops[0].second;
     A.getSymbol(paths[idx].second, ops[0].first);
-    if(paths[idx].second.size() > 0 && paths[idx].second.back() == L'+')
+    if(paths[idx].second.size() > 0 && paths[idx].second.back() == '+')
     {
       paths.erase(paths.begin() + idx);
       return;
@@ -120,12 +114,12 @@ void generatePaths()
   for(unsigned int i = 0; i < prefix.size(); i++)
   {
     int sym = prefix[i];
-    int sym2 = towlower(prefix[i]);
-    if(prefix[i] == L'<')
+    int sym2 = u_tolower(prefix[i]);
+    if(prefix[i] == '<')
     {
       for(unsigned int j = i+1; j < prefix.size(); j++)
       {
-        if(prefix[j] == L'>')
+        if(prefix[j] == '>')
         {
           sym = A(prefix.substr(i, j-i+1));
           i = j;
@@ -151,7 +145,7 @@ void generatePaths()
   }
   for(auto s : states)
   {
-    paths.push_back(make_pair(s, L""));
+    paths.push_back(make_pair(s, ""_u));
     followPath(paths.size() - 1);
   }
   while(donecount < paths.size())
@@ -172,30 +166,30 @@ int main(int argc, char *argv[])
   LtLocale::tryToSetLocale();
   if(argc != 3)
   {
-    wcerr << "Usage: " << argv[0] << " transducer prefix" << endl;
+    cerr << "Usage: " << argv[0] << " transducer prefix" << endl;
     return EXIT_FAILURE;
   }
   FILE* tf = fopen(argv[1], "rb");
   if(tf == NULL)
   {
-    wcerr << "Unable to open " << argv[1] << " for reading." << endl;
+    cerr << "Unable to open " << argv[1] << " for reading." << endl;
     return EXIT_FAILURE;
   }
   if(!load(tf))
   {
-    wcerr << "Unable to read transducer." << endl;
+    cerr << "Unable to read transducer." << endl;
     return EXIT_FAILURE;
   }
-  prefix = UtfConverter::fromUtf8(argv[2]);
+  prefix = to_ustring(argv[2]);
   generatePaths();
   if(paths.size() == 0)
   {
-    wcerr << "No paths begin with that prefix." << endl;
+    cerr << "No paths begin with that prefix." << endl;
     return EXIT_FAILURE;
   }
   //seed_seq s (prefix.begin(), prefix.end());
   unsigned s = chrono::system_clock::now().time_since_epoch().count();
   minstd_rand0 g (s);
-  wcout << prefix << paths[g() % paths.size()].second << endl;
+  cout << prefix << paths[g() % paths.size()].second << endl;
   return EXIT_SUCCESS;
 }
diff --git a/src/rtx_comp.cc b/src/rtx_comp.cc
index 145a05a..d9deb8e 100644
--- a/src/rtx_comp.cc
+++ b/src/rtx_comp.cc
@@ -3,14 +3,11 @@
 #include <lttoolbox/lt_locale.h>
 #include <cstdlib>
 #include <iostream>
-#include <apertium/string_utils.h>
-#include <apertium/utf_converter.h>
 #include <libgen.h>
 #include <getopt.h>
 #include <libxml/xmlreader.h>
 #include <trx_compiler.h>
 
-using namespace Apertium;
 using namespace std;
 
 void endProgram(char *name)
@@ -51,7 +48,7 @@ int main(int argc, char *argv[])
 
   bool stats = false;
   bool summary = false;
-  vector<wstring> exclude;
+  vector<UString> exclude;
   vector<string> lexFiles;
 
   while(true)
@@ -71,7 +68,7 @@ int main(int argc, char *argv[])
     switch(c)
     {
     case 'e':
-      exclude.push_back(UtfConverter::fromUtf8(optarg));
+      exclude.push_back(to_ustring(optarg));
       break;
 
     case 'l':
diff --git a/src/rtx_compiler.cc b/src/rtx_compiler.cc
index 4318cb8..8993d1a 100644
--- a/src/rtx_compiler.cc
+++ b/src/rtx_compiler.cc
@@ -1,15 +1,14 @@
 #include <rtx_config.h>
 #include <rtx_compiler.h>
-#include <apertium/string_utils.h>
-#include <apertium/utf_converter.h>
+#include <lttoolbox/string_utils.h>
 
 using namespace std;
 
-wstring const
-RTXCompiler::ANY_TAG = L"<ANY_TAG>";
+UString const
+RTXCompiler::ANY_TAG = "<ANY_TAG>"_u;
 
-wstring const
-RTXCompiler::ANY_CHAR = L"<ANY_CHAR>";
+UString const
+RTXCompiler::ANY_CHAR = "<ANY_CHAR>"_u;
 
 RTXCompiler::RTXCompiler()
 {
@@ -23,48 +22,48 @@ RTXCompiler::RTXCompiler()
   currentLocType = LocTypeNone;
   PB.starCanBeEmpty = true;
   summarizing = false;
-  outputRules[L"UNKNOWN:INTERNAL"] = vector<wstring>(1, L"_");
+  outputRules["UNKNOWN:INTERNAL"_u] = vector<UString>(1, "_"_u);
 }
 
-wstring const
-RTXCompiler::SPECIAL_CHARS = L"!@$%()={}[]|/:;<>,.→";
+UString const
+RTXCompiler::SPECIAL_CHARS = "!@$%()={}[]|/:;<>,.→"_u;
 
 void
-RTXCompiler::die(wstring message)
+RTXCompiler::die(UString message)
 {
   if(errorsAreSyntax)
   {
-    wcerr << L"Syntax error on line " << currentLine << L" of ";
+    cerr << "Syntax error on line " << currentLine << " of ";
   }
   else
   {
-    wcerr << L"Error in ";
+    cerr << "Error in ";
     while(macroNameStack.size() > 0)
     {
-      wcerr << "macro '" << macroNameStack.back() << "', invoked by ";
+      cerr << "macro '" << macroNameStack.back() << "', invoked by ";
       macroNameStack.pop_back();
     }
-    wcerr << L"rule beginning on line " << currentRule->line << L" of ";
+    cerr << "rule beginning on line " << currentRule->line << " of ";
   }
-  wcerr << UtfConverter::fromUtf8(sourceFile) << L": " << message << endl;
+  cerr << sourceFile << ": " << message << endl;
   if(errorsAreSyntax && !source.eof())
   {
-    wstring arr = wstring(recentlyRead.size()-2, L' ');
+    UString arr = UString(recentlyRead.size()-2, ' ');
     recentlyRead += unreadbuf;
-    while(!source.eof() && peekchar() != L'\n')
+    while(!source.eof() && peekchar() != '\n')
     {
       recentlyRead += source.get();
     }
-    wcerr << recentlyRead << endl;
-    wcerr << arr << L"^^^" << endl;
+    cerr << recentlyRead << endl;
+    cerr << arr << "^^^" << endl;
   }
   exit(EXIT_FAILURE);
 }
 
-wchar_t
+UChar
 RTXCompiler::getchar()
 {
-  wchar_t c;
+  UChar c;
   if(unreadbuf.size() > 0)
   {
     c = unreadbuf[0];
@@ -75,7 +74,7 @@ RTXCompiler::getchar()
   return c;
 }
 
-wchar_t
+UChar
 RTXCompiler::peekchar()
 {
   if(unreadbuf.size() > 0) return unreadbuf[0];
@@ -98,12 +97,12 @@ RTXCompiler::unread()
 void
 RTXCompiler::eatSpaces()
 {
-  wchar_t c;
+  UChar c;
   bool inComment = false;
   while(!source.eof())
   {
     c = peekchar();
-    if(c == L'\n')
+    if(c == '\n')
     {
       getchar();
       inComment = false;
@@ -115,7 +114,7 @@ RTXCompiler::eatSpaces()
     {
       getchar();
     }
-    else if(c == L'!')
+    else if(c == '!')
     {
       getchar();
       inComment = true;
@@ -127,62 +126,61 @@ RTXCompiler::eatSpaces()
   }
 }
 
-wstring
+UString
 RTXCompiler::nextTokenNoSpace()
 {
   if(source.eof())
   {
-    die(L"Unexpected end of file");
+    die("Unexpected end of file"_u);
   }
-  wchar_t c = getchar();
-  wchar_t next = peekchar();
-  wstring ret;
-  if(c == L'→')
-  {
-    ret = L"->";
+  UChar c = getchar();
+  UChar next = peekchar();
+  UString ret;
+  if (c == u'\u2192') { // '→'
+    ret = "->"_u;
   }
   else if(SPECIAL_CHARS.find(c) != string::npos)
   {
-    ret = wstring(1, c);
+    ret = UString(1, c);
   }
-  else if(c == L'-' && next == L'>')
+  else if(c == '-' && next == '>')
   {
     getchar();
-    ret = wstring(1, c) + wstring(1, next);
+    ret = UString(1, c) + UString(1, next);
   }
   else if(isspace(c))
   {
-    die(L"unexpected space");
+    die("unexpected space"_u);
   }
-  else if(c == L'!')
+  else if(c == '!')
   {
-    die(L"unexpected comment");
+    die("unexpected comment"_u);
   }
-  else if(c == L'"')
+  else if(c == '"')
   {
     next = getchar();
-    while(!source.eof() && next != L'"')
+    while(!source.eof() && next != '"')
     {
-      if(next == L'\\') next = getchar();
+      if(next == '\\') next = getchar();
       ret += next;
-      if(source.eof()) die(L"Unexpected end of file.");
+      if(source.eof()) die("Unexpected end of file."_u);
       next = getchar();
     }
   }
   else
   {
-    ret = wstring(1, c);
+    ret = UString(1, c);
     while(!source.eof())
     {
       c = peekchar();
-      if(c == L'\\')
+      if(c == '\\')
       {
         getchar();
         ret += getchar();
       }
       else if(SPECIAL_CHARS.find(c) == string::npos && !isspace(c))
       {
-        ret += wstring(1, getchar());
+        ret += UString(1, getchar());
       }
       else
       {
@@ -194,7 +192,7 @@ RTXCompiler::nextTokenNoSpace()
 }
 
 bool
-RTXCompiler::isNextToken(wchar_t c)
+RTXCompiler::isNextToken(UChar c)
 {
   if(peekchar() == c)
   {
@@ -204,46 +202,46 @@ RTXCompiler::isNextToken(wchar_t c)
   return false;
 }
 
-wstring
-RTXCompiler::nextToken(wstring check1 = L"", wstring check2 = L"")
+UString
+RTXCompiler::nextToken(UString check1 = ""_u, UString check2 = ""_u)
 {
   eatSpaces();
-  wstring tok = nextTokenNoSpace();
-  if(tok == check1 || tok == check2 || (check1 == L"" && check2 == L""))
+  UString tok = nextTokenNoSpace();
+  if(tok == check1 || tok == check2 || (check1.empty() && check2.empty()))
   {
   }
-  else if(check1 != L"" && check2 != L"")
+  else if(!check1.empty() && !check2.empty())
   {
-    die(L"expected '" + check1 + L"' or '" + check2 + L"', found '" + tok + L"'");
+    die("expected '"_u + check1 + "' or '"_u + check2 + "', found '"_u + tok + "'"_u);
   }
-  else if(check1 != L"")
+  else if(!check1.empty())
   {
-    die(L"expected '" + check1 + L"', found '" + tok + L"'");
+    die("expected '"_u + check1 + "', found '"_u + tok + "'"_u);
   }
   else
   {
-    die(L"expected '" + check2 + L"', found '" + tok + L"'");
+    die("expected '"_u + check2 + "', found '"_u + tok + "'"_u);
   }
   return tok;
 }
 
-wstring
+UString
 RTXCompiler::parseIdent(bool prespace = false)
 {
   if(prespace)
   {
     eatSpaces();
   }
-  wchar_t next = peekchar();
-  wstring ret = nextTokenNoSpace();
-  if(next == L'"')
+  UChar next = peekchar();
+  UString ret = nextTokenNoSpace();
+  if(next == '"')
   {
     // so that quoted special characters don't fail the next check
     return ret;
   }
-  if(ret == L"->" || (ret.size() == 1 && SPECIAL_CHARS.find(ret[0]) != string::npos))
+  if(ret == "->"_u || (ret.size() == 1 && SPECIAL_CHARS.find(ret[0]) != string::npos))
   {
-    die(L"expected identifier, found '" + ret + L"'");
+    die("expected identifier, found '"_u + ret + "'"_u);
   }
   return ret;
 }
@@ -251,35 +249,30 @@ RTXCompiler::parseIdent(bool prespace = false)
 unsigned int
 RTXCompiler::parseInt()
 {
-  wstring ret;
+  UString ret;
   while(isdigit(peekchar()))
   {
     ret += getchar();
   }
-  return stoul(ret);
+  return StringUtils::stoi(ret);
 }
 
 float
 RTXCompiler::parseWeight()
 {
-  wstring ret;
-  while(isdigit(peekchar()) || peekchar() == L'.')
+  UString ret;
+  while(isdigit(peekchar()) || peekchar() == '.')
   {
     ret += getchar();
   }
   float r;
   try
   {
-    wstring::size_type loc;
-    r = stof(ret, &loc);
-    if(loc != ret.size())
-    {
-      die(L"unable to parse weight: " + ret);
-    }
+    r = StringUtils::stod(ret);
   }
   catch(const invalid_argument& ia)
   {
-    die(L"unable to parse weight: " + ret);
+    die("unable to parse weight: "_u + ret);
   }
   return r;
 }
@@ -287,17 +280,17 @@ RTXCompiler::parseWeight()
 void
 RTXCompiler::parseRule()
 {
-  wstring firstLabel = parseIdent();
-  wstring next = nextToken();
-  if(next == L":")
+  UString firstLabel = parseIdent();
+  UString next = nextToken();
+  if(next == ":"_u)
   {
     parseOutputRule(firstLabel);
   }
-  else if(next == L">")
+  else if(next == ">"_u)
   {
     parseRetagRule(firstLabel);
   }
-  else if(next == L"=")
+  else if(next == "="_u)
   {
     parseAttrRule(firstLabel);
   }
@@ -308,74 +301,74 @@ RTXCompiler::parseRule()
 }
 
 void
-RTXCompiler::parseOutputRule(wstring pattern)
+RTXCompiler::parseOutputRule(UString pattern)
 {
-  nodeIsSurface[pattern] = !isNextToken(L':');
+  nodeIsSurface[pattern] = !isNextToken(':');
   eatSpaces();
-  vector<wstring> output;
-  if(peekchar() == L'(')
+  vector<UString> output;
+  if(peekchar() == '(')
   {
     LocationType typewas = currentLocType;
     Location locwas = currentLoc;
     currentLoc = LocChunk;
     currentLocType = LocTypeMacro;
     macros[pattern] = parseOutputCond();
-    output.push_back(L"macro");
+    output.push_back("macro"_u);
     currentLocType = typewas;
     currentLoc = locwas;
-    nextToken(L";");
+    nextToken(";"_u);
   }
-  else if(peekchar() == L'%')
+  else if(peekchar() == '%')
   {
-    output.push_back(L"%");
-    nextToken(L"%");
-    nextToken(L";");
+    output.push_back("%"_u);
+    nextToken("%"_u);
+    nextToken(";"_u);
   }
   else
   {
-    wstring cur;
+    UString cur;
     while(!source.eof())
     {
       cur = nextToken();
-      if(cur == L"<")
+      if(cur == "<"_u)
       {
         cur = cur + parseIdent();
-        cur += nextToken(L">");
+        cur += nextToken(">"_u);
       }
       output.push_back(cur);
-      if(nextToken(L".", L";") == L";")
+      if(nextToken("."_u, ";"_u) == ";"_u)
       {
         break;
       }
     }
     if(output.size() == 0)
     {
-      die(L"empty tag order rule");
+      die("empty tag order rule"_u);
     }
   }
   outputRules[pattern] = output;
 }
 
 void
-RTXCompiler::parseRetagRule(wstring srcTag)
+RTXCompiler::parseRetagRule(UString srcTag)
 {
-  wstring destTag = parseIdent(true);
-  nextToken(L":");
-  vector<pair<wstring, wstring>> rule;
-  rule.push_back(pair<wstring, wstring>(srcTag, destTag));
+  UString destTag = parseIdent(true);
+  nextToken(":"_u);
+  vector<pair<UString, UString>> rule;
+  rule.push_back(pair<UString, UString>(srcTag, destTag));
   while(!source.eof())
   {
     eatSpaces();
-    bool list = isNextToken(L'[');
-    wstring cs = parseIdent(true);
+    bool list = isNextToken('[');
+    UString cs = parseIdent(true);
     if(list)
     {
-      nextToken(L"]");
-      cs = L"[]" + cs;
+      nextToken("]"_u);
+      cs = "[]"_u + cs;
     }
-    wstring cd = parseIdent(true);
-    rule.push_back(pair<wstring, wstring>(cs, cd));
-    if(nextToken(L";", L",") == L";")
+    UString cd = parseIdent(true);
+    rule.push_back(pair<UString, UString>(cs, cd));
+    if(nextToken(";"_u, ","_u) == ";"_u)
     {
       break;
     }
@@ -386,7 +379,7 @@ RTXCompiler::parseRetagRule(wstring srcTag)
     if(other[0].first == srcTag && other[0].second == destTag)
     {
       found = true;
-      wcerr << "Warning: Tag-rewrite rule '" << srcTag << "' > '" << destTag << "' is defined multiple times. Mappings in earlier definition may be overwritten." << endl;
+      cerr << "Warning: Tag-rewrite rule '" << srcTag << "' > '" << destTag << "' is defined multiple times. Mappings in earlier definition may be overwritten." << endl;
       other.insert(other.begin()+1, rule.begin()+1, rule.end());
       break;
     }
@@ -403,38 +396,38 @@ RTXCompiler::parseRetagRule(wstring srcTag)
 }
 
 void
-RTXCompiler::parseAttrRule(wstring categoryName)
+RTXCompiler::parseAttrRule(UString categoryName)
 {
   if(collections.find(categoryName) != collections.end()
      || PB.isAttrDefined(categoryName))
   {
-    die(L"Redefinition of attribute category '" + categoryName + L"'.");
+    die("Redefinition of attribute category '"_u + categoryName + "'."_u);
   }
   eatSpaces();
-  if(isNextToken(L'('))
+  if(isNextToken('('))
   {
-    wstring undef = parseIdent(true);
-    wstring def = parseIdent(true);
+    UString undef = parseIdent(true);
+    UString def = parseIdent(true);
     attrDefaults[categoryName] = make_pair(undef, def);
-    nextToken(L")");
+    nextToken(")"_u);
   }
-  vector<wstring> members;
-  vector<wstring> noOver;
+  vector<UString> members;
+  vector<UString> noOver;
   while(true)
   {
     eatSpaces();
-    if(isNextToken(L';'))
+    if(isNextToken(';'))
     {
       break;
     }
-    if(isNextToken(L'['))
+    if(isNextToken('['))
     {
-      wstring other = parseIdent(true);
+      UString other = parseIdent(true);
       if(collections.find(other) == collections.end())
       {
-        die(L"Use of category '" + other + L"' in set arithmetic before definition.");
+        die("Use of category '"_u + other + "' in set arithmetic before definition."_u);
       }
-      vector<wstring> otherstuff = collections[other];
+      vector<UString> otherstuff = collections[other];
       for(unsigned int i = 0; i < otherstuff.size(); i++)
       {
         members.push_back(otherstuff[i]);
@@ -444,11 +437,11 @@ RTXCompiler::parseAttrRule(wstring categoryName)
       {
         noOver.push_back(otherstuff[i]);
       }
-      nextToken(L"]");
+      nextToken("]"_u);
     }
-    else if(isNextToken(L'@'))
+    else if(isNextToken('@'))
     {
-      wstring next = parseIdent();
+      UString next = parseIdent();
       members.push_back(next);
       noOver.push_back(next);
     }
@@ -459,18 +452,18 @@ RTXCompiler::parseAttrRule(wstring categoryName)
   }
   if(members.size() == 0)
   {
-    die(L"empty attribute list");
+    die("empty attribute list"_u);
   }
-  collections.insert(pair<wstring, vector<wstring>>(categoryName, members));
-  noOverwrite.insert(pair<wstring, vector<wstring>>(categoryName, noOver));
+  collections.insert(pair<UString, vector<UString>>(categoryName, members));
+  noOverwrite.insert(pair<UString, vector<UString>>(categoryName, noOver));
   if(noOver.size() > 0)
   {
     for(unsigned int i = 0; i < noOver.size(); i++)
     {
-      noOver[i] = L"<" + noOver[i] + L">";
+      noOver[i] = "<"_u + noOver[i] + ">"_u;
     }
   }
-  collections.insert(make_pair(categoryName + L" over", noOver));
+  collections.insert(make_pair(categoryName + " over"_u, noOver));
 }
 
 RTXCompiler::Clip*
@@ -483,26 +476,26 @@ RTXCompiler::parseClip(int src = -2)
   {
     ret->src = src;
   }
-  else if(isNextToken(L'>'))
+  else if(isNextToken('>'))
   {
     ret->src = parseInt();
-    nextToken(L".");
+    nextToken("."_u);
     bounds = false;
   }
   else if(isdigit(peekchar()))
   {
     ret->src = parseInt();
-    nextToken(L".");
+    nextToken("."_u);
   }
-  else if(isNextToken(L'$'))
+  else if(isNextToken('$'))
   {
-    if(isNextToken(L'$'))
+    if(isNextToken('$'))
     {
       ret->src = ChunkVarClip;
       ret->varName = parseIdent();
-      nextToken(L".");
+      nextToken("."_u);
     }
-    else if(isNextToken(L'%'))
+    else if(isNextToken('%'))
     {
       ret->src = StringVarClip;
       ret->varName = parseIdent();
@@ -512,11 +505,11 @@ RTXCompiler::parseClip(int src = -2)
       ret->src = ParentClip;
       if(currentLocType != LocTypeOutput)
       {
-        die(L"Chunk tags can only be accessed from output sections of reduction rules.");
+        die("Chunk tags can only be accessed from output sections of reduction rules."_u);
       }
     }
   }
-  else if(peekchar() == L'(')
+  else if(peekchar() == '(')
   {
     OutputChunk* chunkwas = currentChunk;
     OutputChoice* choicewas = currentChoice;
@@ -541,54 +534,54 @@ RTXCompiler::parseClip(int src = -2)
   {
     if(ret->src == ParentClip || ret->src > 1)
     {
-      die(L"Macros can only access their single argument.");
+      die("Macros can only access their single argument."_u);
     }
   }
   else if(bounds && src == -2 && ret->src > (int)currentRule->pattern.size())
   {
-    die(L"Clip source is out of bounds (position " + to_wstring(ret->src) + L" requested, but rule has only " + to_wstring(currentRule->pattern.size()) + L" elements in its pattern).");
+    die("Clip source is out of bounds (position "_u + StringUtils::itoa(ret->src) + " requested, but rule has only "_u + StringUtils::itoa(currentRule->pattern.size()) + " elements in its pattern)."_u);
   }
   if(ret->src != StringVarClip)
   {
     ret->part = (src == -3) ? nextToken() : parseIdent();
   }
-  if(isNextToken(L'/'))
+  if(isNextToken('/'))
   {
     if(ret->src == ConstantClip)
     {
-      die(L"literal value cannot have a side");
+      die("literal value cannot have a side"_u);
     }
     else if(ret->src == StringVarClip)
     {
-      die(L"variable cannot have a side");
+      die("variable cannot have a side"_u);
     }
     ret->side = parseIdent();
   }
   else if(ret->src == ParentClip)
   {
-    ret->side = L"tl";
+    ret->side = "tl"_u;
   }
-  if(isNextToken(L'>'))
+  if(isNextToken('>'))
   {
     if(ret->src == ConstantClip)
     {
-      die(L"literal value cannot be rewritten");
+      die("literal value cannot be rewritten"_u);
     }
     else if(ret->src == ParentClip || ret->src == StringVarClip)
     {
-      die(L"variable cannot be rewritten");
+      die("variable cannot be rewritten"_u);
     }
     ret->rewrite.push_back(parseIdent());
   }
   return ret;
 }
 
-wchar_t
-RTXCompiler::lookupOperator(wstring op)
+UChar
+RTXCompiler::lookupOperator(UString op)
 {
-  wstring key = StringUtils::tolower(op);
-  key = StringUtils::substitute(key, L"-", L"");
-  key = StringUtils::substitute(key, L"_", L"");
+  UString key = StringUtils::tolower(op);
+  key = StringUtils::substitute(key, "-"_u, ""_u);
+  key = StringUtils::substitute(key, "_"_u, ""_u);
   for(unsigned int i = 0; i < OPERATORS.size(); i++)
   {
     if(key == OPERATORS[i].first)
@@ -602,12 +595,12 @@ RTXCompiler::lookupOperator(wstring op)
 RTXCompiler::Cond*
 RTXCompiler::parseCond()
 {
-  nextToken(L"(");
+  nextToken("("_u);
   eatSpaces();
   vector<Cond*> parts;
-  while(!source.eof() && peekchar() != L')')
+  while(!source.eof() && peekchar() != ')')
   {
-    if(peekchar() == L'(')
+    if(peekchar() == '(')
     {
       parts.push_back(parseCond());
     }
@@ -620,8 +613,8 @@ RTXCompiler::parseCond()
     }
     eatSpaces();
   }
-  nextToken(L")");
-  if(parts.size() == 0) die(L"Empty conditional.");
+  nextToken(")"_u);
+  if(parts.size() == 0) die("Empty conditional."_u);
   vector<pair<bool, Cond*>> denot;
   bool negated = false;
   for(unsigned int i = 0; i < parts.size(); i++)
@@ -629,7 +622,7 @@ RTXCompiler::parseCond()
     if(i != parts.size() - 1 && parts[i]->op == 0
        && parts[i]->val->src == 0)
     {
-      wchar_t op = lookupOperator(parts[i]->val->part);
+      UChar op = lookupOperator(parts[i]->val->part);
       if(op == NOT)
       {
         negated = !negated;
@@ -645,14 +638,14 @@ RTXCompiler::parseCond()
     if(i != 0 && i != denot.size() - 1 && denot[i].second->op == 0
        && denot[i].second->val->src == 0)
     {
-      wchar_t op = lookupOperator(denot[i].second->val->part);
+      UChar op = lookupOperator(denot[i].second->val->part);
       if(op != 0 && op != AND && op != OR && op != NOT)
       {
         if(destring.back().second->op == 0 && denot[i+1].second->op == 0)
         {
           if(destring.back().first || denot[i+1].first)
           {
-            die(L"Cannot negate string (I can't parse 'not a = b', use 'not (a = b)' or 'a not = b' instead).");
+            die("Cannot negate string (I can't parse 'not a = b', use 'not (a = b)' or 'a not = b' instead)."_u);
           }
           denot[i].second->left = destring.back().second;
           denot[i].second->right = denot[i+1].second;
@@ -675,13 +668,13 @@ RTXCompiler::parseCond()
     ret->right = destring[0].second;
   }
   else ret = destring[0].second;
-  if(destring.size() % 2 == 0) die(L"ANDs, ORs, and conditions don't come out evenly.");
+  if(destring.size() % 2 == 0) die("ANDs, ORs, and conditions don't come out evenly."_u);
   for(unsigned int i = 1; i < destring.size(); i += 2)
   {
-    if(destring[i].second->op != 0) die(L"Expected operator, found condition.");
-    if(destring[i].second->val->src != 0) die(L"Expected operator, found clip.");
-    wchar_t op = lookupOperator(destring[i].second->val->part);
-    if(op == 0) die(L"Unknown operator '" + destring[i].second->val->part + L"'.");
+    if(destring[i].second->op != 0) die("Expected operator, found condition."_u);
+    if(destring[i].second->val->src != 0) die("Expected operator, found clip."_u);
+    UChar op = lookupOperator(destring[i].second->val->part);
+    if(op == 0) die("Unknown operator '"_u + destring[i].second->val->part + "'."_u);
     Cond* temp = ret;
     ret = new Cond;
     ret->left = temp;
@@ -707,64 +700,64 @@ RTXCompiler::parseCond()
 void
 RTXCompiler::parsePatternElement(Rule* rule)
 {
-  vector<wstring> pat;
-  if(isNextToken(L'%'))
+  vector<UString> pat;
+  if(isNextToken('%'))
   {
     rule->grab_all = rule->pattern.size()+1;
   }
-  wstring t1 = nextToken();
-  if(t1 == L"$")
+  UString t1 = nextToken();
+  if(t1 == "$"_u)
   {
     t1 += parseIdent();
   }
-  else if(t1 == L"[")
+  else if(t1 == "["_u)
   {
-    t1 = L"$" + parseIdent();
-    if(!isNextToken(L']')) die(L"expected closing bracket after lemma category");
+    t1 = "$"_u + parseIdent();
+    if(!isNextToken(']')) die("expected closing bracket after lemma category"_u);
   }
-  if(isNextToken(L'@'))
+  if(isNextToken('@'))
   {
     pat.push_back(t1);
     pat.push_back(parseIdent());
   }
-  else if(t1[0] == L'$')
+  else if(t1[0] == '$')
   {
-    die(L"first tag in pattern element must be literal");
+    die("first tag in pattern element must be literal"_u);
   }
   else
   {
-    pat.push_back(L"");
+    pat.push_back(""_u);
     pat.push_back(t1);
   }
   while(!source.eof())
   {
-    if(!isNextToken(L'.'))
+    if(!isNextToken('.'))
     {
       break;
     }
-    wstring cur = nextToken();
-    if(cur == L"$")
+    UString cur = nextToken();
+    if(cur == "$"_u)
     {
       Clip* cl = parseClip(rule->pattern.size()+1);
       if(rule->vars.find(cl->part) != rule->vars.end())
       {
-        die(L"rule has multiple sources for attribute " + cl->part);
+        die("rule has multiple sources for attribute "_u + cl->part);
       }
       rule->vars[cl->part] = cl;
     }
-    else if(cur == L"[")
+    else if(cur == "["_u)
     {
-      pat.push_back(L"[" + parseIdent() + L"]");
-      nextToken(L"]");
+      pat.push_back("["_u + parseIdent() + "]"_u);
+      nextToken("]"_u);
     }
     else
     {
       pat.push_back(cur);
     }
   }
-  if(pat.size() == 2 && pat[1] == L"*")
+  if(pat.size() == 2 && pat[1] == "*"_u)
   {
-    pat[1] = L"UNKNOWN:INTERNAL";
+    pat[1] = "UNKNOWN:INTERNAL"_u;
   }
   rule->pattern.push_back(pat);
   eatSpaces();
@@ -784,59 +777,59 @@ RTXCompiler::OutputChunk*
 RTXCompiler::parseOutputElement()
 {
   OutputChunk* ret = new OutputChunk;
-  ret->conjoined = isNextToken(L'+');
+  ret->conjoined = isNextToken('+');
   ret->interpolated = false;
-  if(!ret->conjoined) ret->interpolated = isNextToken(L'<');
+  if(!ret->conjoined) ret->interpolated = isNextToken('<');
   ret->nextConjoined = false;
   if(ret->conjoined || ret->interpolated)
   {
-    wstring verb = (ret->conjoined ? L"conjoin" : L"interpolate");
+    UString verb = (ret->conjoined ? "conjoin"_u : "interpolate"_u);
     if(currentChunk == NULL)
     {
-      die(L"Cannot " + verb + L" from within if statement.");
+      die("Cannot "_u + verb + " from within if statement."_u);
     }
     if(currentChunk->children.size() == 0)
     {
-      die(L"Cannot " + verb + L" first element.");
+      die("Cannot "_u + verb + " first element."_u);
     }
     if(currentChunk->children.back()->conds.size() > 0)
     {
-      die(L"Cannot " + verb + L" to something in an if statement.");
+      die("Cannot "_u + verb + " to something in an if statement."_u);
     }
     if(currentChunk->children.back()->chunks.size() == 0)
     {
-      die(L"Cannot " + verb + L" inside and outside of if statement and cannot " + verb + L" first element.");
+      die("Cannot "_u + verb + " inside and outside of if statement and cannot "_u + verb + " first element."_u);
     }
-    if(currentChunk->children.back()->chunks[0]->mode == L"_")
+    if(currentChunk->children.back()->chunks[0]->mode == "_"_u)
     {
-      die(L"Cannot " + verb + L" to a blank.");
+      die("Cannot "_u + verb + " to a blank."_u);
     }
     eatSpaces();
     if(ret->interpolated) currentChunk->children.back()->chunks[0]->nextConjoined = true;
   }
-  bool isInterp = isNextToken(L'>');
+  bool isInterp = isNextToken('>');
   eatSpaces();
-  ret->getall = isNextToken(L'%');
-  if(peekchar() == L'_')
+  ret->getall = isNextToken('%');
+  if(peekchar() == '_')
   {
     if(ret->getall)
     {
-      die(L"% cannot be used on blanks");
+      die("% cannot be used on blanks"_u);
     }
-    ret->mode = L"_";
+    ret->mode = "_"_u;
     getchar();
     if(isdigit(peekchar()))
     {
       ret->pos = parseInt();
       if(currentRule->pattern.size() == 1)
       {
-        die(L"Cannot output indexed blank because pattern is one element long and thus does not include blanks.");
+        die("Cannot output indexed blank because pattern is one element long and thus does not include blanks."_u);
       }
       if(ret->pos < 1 || ret->pos >= currentRule->pattern.size())
       {
-        die(L"Position index of blank out of bounds, expected an integer from 1 to " + to_wstring(currentRule->pattern.size()-1) + L".");
+        die("Position index of blank out of bounds, expected an integer from 1 to "_u + StringUtils::itoa(currentRule->pattern.size()-1) + "."_u);
       }
-      wcerr << L"Warning: Use of indexed blank on line " << currentLine << L" is deprecated." << endl;
+      cerr << "Warning: Use of indexed blank on line " << currentLine << " is deprecated." << endl;
     }
     else
     {
@@ -845,81 +838,81 @@ RTXCompiler::parseOutputElement()
   }
   else if(isdigit(peekchar()))
   {
-    ret->mode = L"#";
+    ret->mode = "#"_u;
     ret->pos = parseInt();
     if(ret->pos == 0)
     {
-      die(L"There is no position 0.");
+      die("There is no position 0."_u);
     }
     else if(currentLocType != LocTypeMacro && !isInterp && ret->pos > currentRule->pattern.size())
     {
-      die(L"There are only " + to_wstring(currentRule->pattern.size()) + L" elements in the pattern.");
+      die("There are only "_u + StringUtils::itoa(currentRule->pattern.size()) + " elements in the pattern."_u);
     }
-    if(peekchar() == L'(')
+    if(peekchar() == '(')
     {
-      nextToken(L"(");
+      nextToken("("_u);
       ret->pattern = parseIdent();
-      nextToken(L")");
+      nextToken(")"_u);
     }
     else if(currentLocType == LocTypeMacro)
     {
-      die(L"Outputs in a macro must specify a pattern.");
+      die("Outputs in a macro must specify a pattern."_u);
     }
   }
-  else if(isNextToken(L'*'))
+  else if(isNextToken('*'))
   {
-    if(peekchar() != L'(')
+    if(peekchar() != '(')
     {
-      die(L"No macro name specified.");
+      die("No macro name specified."_u);
     }
-    nextToken(L"(");
+    nextToken("("_u);
     ret->pattern = parseIdent(true);
-    nextToken(L")");
+    nextToken(")"_u);
     ret->pos = 0;
-    ret->mode = L"#";
+    ret->mode = "#"_u;
   }
-  else if(isNextToken(L'$'))
+  else if(isNextToken('$'))
   {
-    if(isInterp) die(L"Interpolating a global variable does not make sense.");
-    if(ret->getall) die(L"Using % with a global variable does not make sense.");
-    nextToken(L"$");
-    ret->mode = L"$$";
+    if(isInterp) die("Interpolating a global variable does not make sense."_u);
+    if(ret->getall) die("Using % with a global variable does not make sense."_u);
+    nextToken("$"_u);
+    ret->mode = "$$"_u;
     ret->pattern = parseIdent(true);
   }
   else
   {
     ret->lemma = parseIdent();
     ret->pos = 0;
-    wstring mode = nextToken(L"@", L"(");
-    if(mode == L"@")
+    UString mode = nextToken("@"_u, "("_u);
+    if(mode == "@"_u)
     {
       if(ret->getall)
       {
-        die(L"% not supported on output literals with @. Use %lemma(pos).");
+        die("% not supported on output literals with @. Use %lemma(pos)."_u);
       }
-      ret->mode = L"@";
+      ret->mode = "@"_u;
       while(true)
       {
-        wstring cur = nextToken();
-        wstring var = to_wstring(ret->tags.size());
+        UString cur = nextToken();
+        UString var = StringUtils::itoa(ret->tags.size());
         ret->tags.push_back(var);
         Clip* cl = new Clip;
-        if(cur == L"$")
+        if(cur == "$"_u)
         {
           cl->src = -1;
           cl->part = parseIdent();
         }
-        else if(cur == L"[")
+        else if(cur == "["_u)
         {
           cl = parseClip();
-          nextToken(L"]");
+          nextToken("]"_u);
         }
-        else if(cur == L"{")
+        else if(cur == "{"_u)
         {
           ret->tags.pop_back();
-          var = L"lemcase";
+          var = "lemcase"_u;
           cl = parseClip();
-          nextToken(L"}");
+          nextToken("}"_u);
         }
         else
         {
@@ -927,7 +920,7 @@ RTXCompiler::parseOutputElement()
           cl->part = cur;
         }
         ret->vars[var] = cl;
-        if(!isNextToken(L'.'))
+        if(!isNextToken('.'))
         {
           break;
         }
@@ -935,58 +928,58 @@ RTXCompiler::parseOutputElement()
     }
     else
     {
-      ret->mode = L"#@";
+      ret->mode = "#@"_u;
       ret->pattern = parseIdent(true);
-      nextToken(L")");
+      nextToken(")"_u);
       Clip* pos = new Clip;
       pos->src = 0;
       pos->part = ret->pattern;
-      pos->rewrite.push_back(L"pos_tag");
-      ret->vars[L"pos_tag"] = pos;
+      pos->rewrite.push_back("pos_tag"_u);
+      ret->vars["pos_tag"_u] = pos;
       unsigned int i = 0;
       for(; i < ret->lemma.size(); i++)
       {
-        if(ret->lemma[i] == L'#') break;
+        if(ret->lemma[i] == '#') break;
       }
       Clip* lemh = new Clip;
       lemh->part = ret->lemma.substr(0, i);
       lemh->src = 0;
-      lemh->rewrite.push_back(L"lemh");
-      ret->vars[L"lemh"] = lemh;
+      lemh->rewrite.push_back("lemh"_u);
+      ret->vars["lemh"_u] = lemh;
       if(i < ret->lemma.size())
       {
         Clip* lemq = new Clip;
         lemq->part = ret->lemma.substr(i+2);
         lemq->src = 0;
-        lemq->rewrite.push_back(L"lemq");
-        ret->vars[L"lemq"] = lemq;
+        lemq->rewrite.push_back("lemq"_u);
+        ret->vars["lemq"_u] = lemq;
       }
       Clip* lem = new Clip;
       lem->part = ret->lemma;
       lem->src = 0;
-      lem->rewrite.push_back(L"lem");
-      ret->vars[L"lem"] = lem;
+      lem->rewrite.push_back("lem"_u);
+      ret->vars["lem"_u] = lem;
     }
   }
-  if(isNextToken(L'['))
+  if(isNextToken('['))
   {
-    while(!source.eof() && peekchar() != L']')
+    while(!source.eof() && peekchar() != ']')
     {
       eatSpaces();
-      wstring var = parseIdent();
-      nextToken(L"=");
+      UString var = parseIdent();
+      nextToken("="_u);
       eatSpaces();
       Clip* cl = parseClip();
-      if(cl->part == L"_")
+      if(cl->part == "_"_u)
       {
-        cl->part = L"";
+        cl->part.clear();
       }
       if(cl->src != 0 && cl->src != -2)
       {
         cl->rewrite.push_back(var);
       }
       ret->vars[var] = cl;
-      if(nextToken(L",", L"]") == L"]")
+      if(nextToken(","_u, "]"_u) == "]"_u)
       {
         break;
       }
@@ -999,7 +992,7 @@ RTXCompiler::parseOutputElement()
 RTXCompiler::OutputChoice*
 RTXCompiler::parseOutputCond()
 {
-  nextToken(L"(");
+  nextToken("("_u);
   OutputChoice* choicewas = currentChoice;
   OutputChunk* chunkwas = currentChunk;
   Clip* clipwas = currentClip;
@@ -1009,31 +1002,31 @@ RTXCompiler::parseOutputCond()
   currentClip = NULL;
   while(true)
   {
-    wstring mode = StringUtils::tolower(nextToken());
-    mode = StringUtils::substitute(mode, L"-", L"");
-    mode = StringUtils::substitute(mode, L"_", L"");
-    if(ret->conds.size() == 0 && mode != L"if" && mode != L"always")
+    UString mode = StringUtils::tolower(nextToken());
+    mode = StringUtils::substitute(mode, "-"_u, ""_u);
+    mode = StringUtils::substitute(mode, "_"_u, ""_u);
+    if(ret->conds.size() == 0 && mode != "if"_u && mode != "always"_u)
     {
-      die(L"If statement must begin with 'if'.");
+      die("If statement must begin with 'if'."_u);
     }
-    if(ret->conds.size() > 0 && mode == L"always")
+    if(ret->conds.size() > 0 && mode == "always"_u)
     {
-      die(L"Always clause must be only clause.");
+      die("Always clause must be only clause."_u);
     }
-    if(mode == L"if" || mode == L"elif" || mode == L"elseif")
+    if(mode == "if"_u || mode == "elif"_u || mode == "elseif"_u)
     {
       ret->conds.push_back(parseCond());
     }
-    else if(mode == L")")
+    else if(mode == ")"_u)
     {
       break;
     }
-    else if(mode != L"else" && mode != L"otherwise" && mode != L"always")
+    else if(mode != "else"_u && mode != "otherwise"_u && mode != "always"_u)
     {
-      die(L"Unknown statement: '" + mode + L"'.");
+      die("Unknown statement: '"_u + mode + "'."_u);
     }
     eatSpaces();
-    if(peekchar() == L'(')
+    if(peekchar() == '(')
     {
       ret->nest.push_back(parseOutputCond());
       ret->chunks.push_back(NULL);
@@ -1045,29 +1038,29 @@ RTXCompiler::parseOutputCond()
       ret->chunks.push_back(NULL);
       ret->nest.push_back(NULL);
     }
-    else if(peekchar() == L'{')
+    else if(peekchar() == '{')
     {
       if(currentLoc == LocChunk)
       {
-        die(L"Nested chunks are currently not allowed.");
+        die("Nested chunks are currently not allowed."_u);
       }
       else if(currentLocType == LocTypeMacro)
       {
-        die(L"Macros cannot generate entire chunks.");
+        die("Macros cannot generate entire chunks."_u);
       }
       else if(currentLoc == LocVarSet)
       {
-        die(L"Global variables cannot be set to chunks.");
+        die("Global variables cannot be set to chunks."_u);
       }
       ret->nest.push_back(NULL);
       ret->clips.push_back(NULL);
       ret->chunks.push_back(parseOutputChunk());
     }
-    else if(peekchar() == L'[')
+    else if(peekchar() == '[')
     {
       if(currentLoc == LocVarSet)
       {
-        die(L"Global variables must be set to single nodes.");
+        die("Global variables must be set to single nodes."_u);
       }
       ret->nest.push_back(NULL);
       ret->clips.push_back(NULL);
@@ -1077,15 +1070,15 @@ RTXCompiler::parseOutputCond()
     {
       if(currentLoc != LocChunk && currentLoc != LocVarSet)
       {
-        die(L"Conditional non-chunk output current not possible.");
+        die("Conditional non-chunk output current not possible."_u);
       }
       ret->chunks.push_back(parseOutputElement());
       ret->nest.push_back(NULL);
       ret->clips.push_back(NULL);
     }
-    if(mode == L"else" || mode == L"otherwise" || mode == L"always")
+    if(mode == "else"_u || mode == "otherwise"_u || mode == "always"_u)
     {
-      nextToken(L")");
+      nextToken(")"_u);
       break;
     }
   }
@@ -1094,29 +1087,29 @@ RTXCompiler::parseOutputCond()
   currentClip = clipwas;
   if(ret->chunks.size() == 0)
   {
-    die(L"If statement cannot be empty.");
+    die("If statement cannot be empty."_u);
   }
   if(ret->conds.size() == ret->nest.size())
   {
     if(currentLoc == LocChunk && currentLocType == LocTypeMacro)
     {
-      wcerr << L"Warning: if statement without else in macro on line " << currentLine << L"." << endl;
-      wcerr << L"  This may fail to produce output and cause crashes at runtime." << endl;
+      cerr << "Warning: if statement without else in macro on line " << currentLine << "." << endl;
+      cerr << "  This may fail to produce output and cause crashes at runtime." << endl;
     }
-    //die(L"If statement has no else clause and thus could produce no output.");
+    //die("If statement has no else clause and thus could produce no output."_u);
     ret->nest.push_back(NULL);
     if(currentLoc == LocClip)
     {
       Clip* blank = new Clip;
       blank->src = 0;
-      blank->part = L"";
+      blank->part.clear();
       ret->clips.push_back(blank);
       ret->chunks.push_back(NULL);
     }
     else
     {
       OutputChunk* temp = new OutputChunk;
-      temp->mode = L"[]";
+      temp->mode = "[]"_u;
       temp->pos = 0;
       temp->conjoined = false;
       ret->chunks.push_back(temp);
@@ -1133,20 +1126,20 @@ RTXCompiler::parseOutputChunk()
   int end;
   OutputChunk* ch = new OutputChunk;
   ch->conjoined = false;
-  if(nextToken(L"{", L"[") == L"{")
+  if(nextToken("{"_u, "["_u) == "{"_u)
   {
     currentLoc = LocChunk;
-    ch->mode = L"{}";
-    end = L'}';
+    ch->mode = "{}"_u;
+    end = '}';
   }
   else
   {
     if(currentLoc != LocChunk)
     {
-      die(L"Output grouping with [] only valid inside chunks.");
+      die("Output grouping with [] only valid inside chunks."_u);
     }
-    ch->mode = L"[]";
-    end = L']';
+    ch->mode = "[]"_u;
+    end = ']';
   }
   eatSpaces();
   OutputChunk* chunkwas = currentChunk;
@@ -1156,7 +1149,7 @@ RTXCompiler::parseOutputChunk()
   ch->pos = 0;
   while(peekchar() != end)
   {
-    if(peekchar() == L'(')
+    if(peekchar() == '(')
     {
       ch->children.push_back(parseOutputCond());
     }
@@ -1165,8 +1158,8 @@ RTXCompiler::parseOutputChunk()
       ch->children.push_back(chunkToCond(parseOutputElement()));
     }
   }
-  nextToken(wstring(1, end));
-  if(end == L'}') currentLoc = LocTopLevel;
+  nextToken(UString(1, end));
+  if(end == '}') currentLoc = LocTopLevel;
   eatSpaces();
   currentChunk = chunkwas;
   currentChoice = choicewas;
@@ -1174,18 +1167,18 @@ RTXCompiler::parseOutputChunk()
 }
 
 void
-RTXCompiler::parseReduceRule(wstring output, wstring next)
+RTXCompiler::parseReduceRule(UString output, UString next)
 {
-  vector<wstring> outNodes;
+  vector<UString> outNodes;
   outNodes.push_back(output);
-  if(next != L"->")
+  if(next != "->"_u)
   {
-    wstring cur = next;
-    while(cur != L"->")
+    UString cur = next;
+    while(cur != "->"_u)
     {
-      if(SPECIAL_CHARS.find(cur) != wstring::npos)
+      if(SPECIAL_CHARS.find(cur) != UString::npos)
       {
-        die(L"Chunk names must be identifiers. (I think I'm parsing a reduction rule.)\nIf this error doesn't make sense to you, a common reason is that on the line before this you have ; instead of |");
+        die("Chunk names must be identifiers. (I think I'm parsing a reduction rule.)\nIf this error doesn't make sense to you, a common reason is that on the line before this you have ; instead of |"_u);
       }
       outNodes.push_back(cur);
       cur = nextToken();
@@ -1204,11 +1197,11 @@ RTXCompiler::parseReduceRule(wstring output, wstring next)
     rule->line = currentLine;
     currentLocType = LocTypeInput;
     currentLoc = LocTopLevel;
-    if(!source.eof() && peekchar() == L'"')
+    if(!source.eof() && peekchar() == '"')
     {
       setUnreadMark();
-      wstring nm = parseIdent();
-      if(peekchar() == L'@')
+      UString nm = parseIdent();
+      if(peekchar() == '@')
       {
         unread();
       }
@@ -1221,41 +1214,41 @@ RTXCompiler::parseReduceRule(wstring output, wstring next)
     if(isdigit(peekchar()))
     {
       rule->weight = parseWeight();
-      nextToken(L":");
+      nextToken(":"_u);
       eatSpaces();
     }
     else
     {
       rule->weight = 0;
     }
-    while(!source.eof() && peekchar() != L'{' && peekchar() != L'(' && peekchar() != L'?')
+    while(!source.eof() && peekchar() != '{' && peekchar() != '(' && peekchar() != '?')
     {
-      if(peekchar() == L'[')
+      if(peekchar() == '[')
       {
         setUnreadMark();
         getchar();
-        wchar_t next = peekchar();
+        UChar next = peekchar();
         unread();
-        if(next == L'$' || isspace(next)) break;
+        if(next == '$' || isspace(next)) break;
       }
       parsePatternElement(rule);
     }
     if(rule->pattern.size() == 0)
     {
-      die(L"empty pattern");
+      die("empty pattern"_u);
     }
     eatSpaces();
-    if(isNextToken(L'?'))
+    if(isNextToken('?'))
     {
       rule->cond = parseCond();
       eatSpaces();
     }
-    if(isNextToken(L'['))
+    if(isNextToken('['))
     {
       while(!source.eof())
       {
         eatSpaces();
-        if(!isNextToken(L'$'))
+        if(!isNextToken('$'))
         {
           unsigned int idx = 1;
           if(isdigit(peekchar()))
@@ -1264,20 +1257,20 @@ RTXCompiler::parseReduceRule(wstring output, wstring next)
           }
           if(idx == 0 || idx > outNodes.size())
           {
-            die(L"Chunk index for setting source or reference is out of range.");
+            die("Chunk index for setting source or reference is out of range."_u);
           }
-          nextToken(L"/");
-          bool sl = (nextToken(L"sl", L"ref") == L"sl");
-          nextToken(L"=");
+          nextToken("/"_u);
+          bool sl = (nextToken("sl"_u, "ref"_u) == "sl"_u);
+          nextToken("="_u);
           currentLoc = LocVarSet;
           OutputChoice* cond;
-          if(peekchar() == L'(') cond = parseOutputCond();
+          if(peekchar() == '(') cond = parseOutputCond();
           else cond = chunkToCond(parseOutputElement());
           if(sl)
           {
             if(rule->output_sl[idx-1] != NULL)
             {
-              die(L"Rule sets chunk source multiple times.");
+              die("Rule sets chunk source multiple times."_u);
             }
             rule->output_sl[idx-1] = cond;
           }
@@ -1285,21 +1278,21 @@ RTXCompiler::parseReduceRule(wstring output, wstring next)
           {
             if(rule->output_ref[idx-1] != NULL)
             {
-              die(L"Rule sets chunk reference multiple times.");
+              die("Rule sets chunk reference multiple times."_u);
             }
             rule->output_ref[idx-1] = cond;
           }
         }
-        else if(isNextToken(L'$'))
+        else if(isNextToken('$'))
         {
-          wstring var = parseIdent();
+          UString var = parseIdent();
           if(rule->globals.find(var) != rule->globals.end())
           {
-            die(L"Rule sets global variable $$" + var + L" multiple times.");
+            die("Rule sets global variable $$"_u + var + " multiple times."_u);
           }
-          nextToken(L"=");
+          nextToken("="_u);
           currentLoc = LocVarSet;
-          if(peekchar() == L'(') rule->globals[var] = parseOutputCond();
+          if(peekchar() == '(') rule->globals[var] = parseOutputCond();
           else rule->globals[var] = chunkToCond(parseOutputElement());
           currentLoc = LocTopLevel;
           if(globalVarNames.find(var) == globalVarNames.end())
@@ -1308,27 +1301,27 @@ RTXCompiler::parseReduceRule(wstring output, wstring next)
             globalVarNames[var] = temp;
           }
         }
-        else if(isNextToken(L'%'))
+        else if(isNextToken('%'))
         {
-          wstring var = parseIdent();
+          UString var = parseIdent();
           if(rule->stringGlobals.find(var) != rule->stringGlobals.end())
           {
-            die(L"Rule sets global variable $%" + var + L" multiple times.");
+            die("Rule sets global variable $%"_u + var + " multiple times."_u);
           }
-          nextToken(L"=");
+          nextToken("="_u);
           rule->stringGlobals[var] = parseClip();
         }
         else
         {
-          wstring var = parseIdent();
+          UString var = parseIdent();
           if(rule->vars.find(var) != rule->vars.end())
           {
-            die(L"rule has multiple sources for attribute " + var);
+            die("rule has multiple sources for attribute "_u + var);
           }
-          nextToken(L"=");
+          nextToken("="_u);
           rule->vars[var] = parseClip();
         }
-        if(nextToken(L",", L"]") == L"]")
+        if(nextToken(","_u, "]"_u) == "]"_u)
         {
           break;
         }
@@ -1338,34 +1331,34 @@ RTXCompiler::parseReduceRule(wstring output, wstring next)
     currentLocType = LocTypeOutput;
     if(rule->result.size() > 1)
     {
-      nextToken(L"{");
+      nextToken("{"_u);
     }
     unsigned int chunk_count = 0;
     while(chunk_count < rule->result.size())
     {
       eatSpaces();
-      if(source.eof()) die(L"Unexpected end of file.");
+      if(source.eof()) die("Unexpected end of file."_u);
       switch(peekchar())
       {
-        case L'(':
+        case '(':
           rule->output.push_back(parseOutputCond());
           chunk_count++;
           break;
-        case L'{':
+        case '{':
           rule->output.push_back(chunkToCond(parseOutputChunk()));
           chunk_count++;
           break;
-        case L'_':
+        case '_':
           rule->output.push_back(chunkToCond(parseOutputElement()));
           break;
-        case L'}':
+        case '}':
           if(rule->result.size() == 1)
           {
-            die(L"Unexpected } in output pattern.");
+            die("Unexpected } in output pattern."_u);
           }
           else if(chunk_count < rule->result.size())
           {
-            die(L"Output pattern does not have enough chunks.");
+            die("Output pattern does not have enough chunks."_u);
           }
           break;
         default:
@@ -1376,10 +1369,10 @@ RTXCompiler::parseReduceRule(wstring output, wstring next)
     }
     if(rule->result.size() > 1)
     {
-      nextToken(L"}");
+      nextToken("}"_u);
     }
     reductionRules.push_back(rule);
-    if(nextToken(L"|", L";") == L";")
+    if(nextToken("|"_u, ";"_u) == ";"_u)
     {
       break;
     }
@@ -1392,28 +1385,28 @@ RTXCompiler::processRetagRules()
 {
   for(auto rule : retagRules)
   {
-    map<wstring, vector<wstring>> vals;
-    wstring src = rule[0].first;
-    wstring dest = rule[0].second;
+    map<UString, vector<UString>> vals;
+    UString src = rule[0].first;
+    UString dest = rule[0].second;
     if(!PB.isAttrDefined(src) && collections.find(src) == collections.end())
     {
-      wcerr << L"Warning: Source category for tag-rewrite rule '" << src << "' > '" << dest << "' is undefined." << endl;
+      cerr << "Warning: Source category for tag-rewrite rule '" << src << "' > '" << dest << "' is undefined." << endl;
       continue;
     }
     if(!PB.isAttrDefined(dest) && collections.find(dest) == collections.end())
     {
-      wcerr << L"Warning: Destination category for tag-rewrite rule '" << src << "' > '" << dest << "' is undefined." << endl;
+      cerr << "Warning: Destination category for tag-rewrite rule '" << src << "' > '" << dest << "' is undefined." << endl;
       continue;
     }
     if(collections.find(src) == collections.end() || collections.find(dest) == collections.end()) continue;
     for(unsigned int i = 1; i < rule.size(); i++)
     {
-      if(rule[i].first[0] == L'[')
+      if(rule[i].first[0] == '[')
       {
-        wstring cat = rule[i].first.substr(2);
+        UString cat = rule[i].first.substr(2);
         if(collections.find(cat) == collections.end())
         {
-          wcerr << L"Warning: Tag-rewrite rule '" << src << "' > '" << dest << "' contains mapping from undefined category '" << cat << "'." << endl;
+          cerr << "Warning: Tag-rewrite rule '" << src << "' > '" << dest << "' contains mapping from undefined category '" << cat << "'." << endl;
           continue;
         }
         for(auto v : collections[cat]) vals[v].push_back(rule[i].second);
@@ -1440,14 +1433,14 @@ RTXCompiler::processRetagRules()
           }
           if(!found)
           {
-            wcerr << L"Warning: Tag-rewrite rule '" << src << "' > '" << dest << "' does not convert '" << a << "'." << endl;
+            cerr << "Warning: Tag-rewrite rule '" << src << "' > '" << dest << "' does not convert '" << a << "'." << endl;
           }
         }
         else if(vals[a].size() > 1)
         {
-          wcerr << L"Warning: Tag-rewrite rule '" << src << "' > '" << dest << "' converts '" << a << "' to multiple values: ";
-          for(auto b : vals[a]) wcerr << "'" << b << "', ";
-          wcerr << "defaulting to '" << vals[a][0] << "'." << endl;
+          cerr << "Warning: Tag-rewrite rule '" << src << "' > '" << dest << "' converts '" << a << "' to multiple values: ";
+          for(auto b : vals[a]) cerr << "'" << b << "', ";
+          cerr << "defaulting to '" << vals[a][0] << "'." << endl;
         }
       }
     }
@@ -1465,24 +1458,24 @@ RTXCompiler::makePattern(int ruleid)
   vector<vector<PatternElement*>> pat;
   for(unsigned int i = 0; i < rule->pattern.size(); i++)
   {
-    vector<vector<wstring>> tags;
-    tags.push_back(vector<wstring>());
+    vector<vector<UString>> tags;
+    tags.push_back(vector<UString>());
     for(unsigned int j = 1; j < rule->pattern[i].size(); j++)
     {
-      wstring tg = rule->pattern[i][j];
-      if(rule->pattern[i][j][0] == L'[')
+      UString tg = rule->pattern[i][j];
+      if(rule->pattern[i][j][0] == '[')
       {
         tg = tg.substr(1, tg.size()-2);
         if(collections.find(tg) == collections.end())
         {
-          die(L"unknown attribute category '" + tg + L"'");
+          die("unknown attribute category '"_u + tg + "'"_u);
         }
-        vector<vector<wstring>> tmp;
+        vector<vector<UString>> tmp;
         for(auto tls : tags)
         {
           for(auto t : collections[tg])
           {
-            vector<wstring> tmp2;
+            vector<UString> tmp2;
             tmp2.assign(tls.begin(), tls.end());
             tmp2.push_back(t);
             tmp.push_back(tmp2);
@@ -1498,9 +1491,9 @@ RTXCompiler::makePattern(int ruleid)
         }
       }
     }
-    for(unsigned int t = 0; t < tags.size(); t++) tags[t].push_back(L"*");
-    wstring lem = rule->pattern[i][0];
-    if(lem.size() == 0 || lem[0] != L'$')
+    for(unsigned int t = 0; t < tags.size(); t++) tags[t].push_back("*"_u);
+    UString lem = rule->pattern[i][0];
+    if(lem.size() == 0 || lem[0] != '$')
     {
       vector<PatternElement*> pel;
       for(auto tls : tags)
@@ -1514,7 +1507,7 @@ RTXCompiler::makePattern(int ruleid)
     }
     else
     {
-      vector<wstring> lems = collections[lem.substr(1)];
+      vector<UString> lems = collections[lem.substr(1)];
       vector<PatternElement*> el;
       for(unsigned int j = 0; j < lems.size(); j++)
       {
@@ -1531,45 +1524,45 @@ RTXCompiler::makePattern(int ruleid)
   }
   if(excluded.find(rule->name) == excluded.end())
   {
-    PB.addRule(ruleid+1, rule->weight, pat, vector<wstring>(1, rule->result[0]), rule->name);
+    PB.addRule(ruleid+1, rule->weight, pat, vector<UString>(1, rule->result[0]), rule->name);
   }
 }
 
-wstring
-RTXCompiler::compileString(wstring s)
+UString
+RTXCompiler::compileString(UString s)
 {
-  wstring ret;
+  UString ret;
   ret += STRING;
-  ret += (wchar_t)s.size();
+  ret += (UChar)s.size();
   ret += s;
   return ret;
 }
 
-wstring
-RTXCompiler::compileTag(wstring s)
+UString
+RTXCompiler::compileTag(UString s)
 {
   if(s.size() == 0)
   {
     return compileString(s);
   }
-  wstring tag;
-  tag += L'<';
+  UString tag;
+  tag += '<';
   tag += s;
-  tag += L'>';
-  return compileString(StringUtils::substitute(tag, L".", L"><"));
+  tag += '>';
+  return compileString(StringUtils::substitute(tag, "."_u, "><"_u));
 }
 
-wstring
-RTXCompiler::compileClip(Clip* c, wstring _dest = L"")
+UString
+RTXCompiler::compileClip(Clip* c, UString _dest = ""_u)
 {
-  if(c->src == -1 && c->part == L"lu-count")
+  if(c->src == -1 && c->part == "lu-count"_u)
   {
-    return wstring(1, LUCOUNT);
+    return UString(1, LUCOUNT);
   }
   if(c->src == -2)
   {
-    wstring ret = processOutputChoice(c->choice);
-    if(_dest == L"lem" || _dest == L"lemh" || _dest == L"lemq" || _dest == L"lemcase")
+    UString ret = processOutputChoice(c->choice);
+    if(_dest == "lem"_u || _dest == "lemh"_u || _dest == "lemq"_u || _dest == "lemcase"_u)
     {
       ret += DISTAG;
     }
@@ -1579,18 +1572,18 @@ RTXCompiler::compileClip(Clip* c, wstring _dest = L"")
   {
     return PB.BCstring(c->varName) + FETCHVAR;
   }
-  if(c->src != 0 && !(c->part == L"lemcase" ||
+  if(c->src != 0 && !(c->part == "lemcase"_u ||
       collections.find(c->part) != collections.end() || PB.isAttrDefined(c->part)))
   {
-    die(L"Attempt to clip undefined attribute '" + c->part + L"'.");
+    die("Attempt to clip undefined attribute '"_u + c->part + "'."_u);
   }
   int src = (c->src == -1) ? 0 : c->src;
   bool useReplace = (currentLocType == LocTypeOutput);
-  wstring cl;
+  UString cl;
   if(src == -4)
   {
     cl += INT;
-    cl += globalVarNames[c->varName];
+    cl += (UChar)globalVarNames[c->varName];
     cl += FETCHCHUNK;
   }
   else
@@ -1599,15 +1592,15 @@ RTXCompiler::compileClip(Clip* c, wstring _dest = L"")
     cl += src;
     cl += PUSHINPUT;
   }
-  if(c->part == L"whole" || c->part == L"chcontent") return cl;
-  cl += (c->part == L"lemcase") ? compileString(L"lem") : compileString(c->part);
-  wstring ret = cl;
-  wstring undeftag;
-  wstring deftag;
-  wstring thedefault;
-  wstring blank;
+  if(c->part == "whole"_u || c->part == "chcontent"_u) return cl;
+  cl += (c->part == "lemcase"_u) ? compileString("lem"_u) : compileString(c->part);
+  UString ret = cl;
+  UString undeftag;
+  UString deftag;
+  UString thedefault;
+  UString blank;
   blank += DUP;
-  blank += compileString(L"");
+  blank += compileString(""_u);
   blank += EQUAL;
   if(useReplace && undeftag.size() > 0)
   {
@@ -1633,60 +1626,60 @@ RTXCompiler::compileClip(Clip* c, wstring _dest = L"")
   blank += JUMPONFALSE;
   if(c->src == 0)
   {
-    if(_dest == L"lem" || _dest == L"lemh" || _dest == L"lemq" || _dest == L"lemcase" ||
+    if(_dest == "lem"_u || _dest == "lemh"_u || _dest == "lemq"_u || _dest == "lemcase"_u ||
        (c->rewrite.size() > 0 &&
-        (c->rewrite.back() == L"lem" || c->rewrite.back() == L"lemh" ||
-         c->rewrite.back() == L"lemq" || c->rewrite.back() == L"lemcase")))
+        (c->rewrite.back() == "lem"_u || c->rewrite.back() == "lemh"_u ||
+         c->rewrite.back() == "lemq"_u || c->rewrite.back() == "lemcase"_u)))
     {
       return compileString(c->part);
     }
     else return compileTag(c->part);
   }
-  else if(c->side == L"sl")
+  else if(c->side == "sl"_u)
   {
     ret += SOURCECLIP;
     ret += blank;
-    ret += (wchar_t)thedefault.size();
+    ret += (UChar)thedefault.size();
     ret += thedefault;
   }
-  else if(c->side == L"ref")
+  else if(c->side == "ref"_u)
   {
     ret += REFERENCECLIP;
     ret += blank;
-    ret += (wchar_t)thedefault.size();
+    ret += (UChar)thedefault.size();
     ret += thedefault;
   }
-  else if(c->side == L"tl" || c->part == L"lemcase" ||
+  else if(c->side == "tl"_u || c->part == "lemcase"_u ||
           (c->src > 0 && !nodeIsSurface[currentRule->pattern[c->src-1][1]]))
   {
     ret += TARGETCLIP;
     ret += blank;
-    ret += (wchar_t)thedefault.size();
+    ret += (UChar)thedefault.size();
     ret += thedefault;
   }
   else
   {
     ret += TARGETCLIP;
     ret += blank;
-    ret += (wchar_t)(6 + 2*cl.size() + 2*blank.size() + thedefault.size());
+    ret += (UChar)(6 + 2*cl.size() + 2*blank.size() + thedefault.size());
     ret += DROP;
     ret += cl;
     ret += REFERENCECLIP;
     ret += blank;
-    ret += (wchar_t)(3 + cl.size() + blank.size() + thedefault.size());
+    ret += (UChar)(3 + cl.size() + blank.size() + thedefault.size());
     ret += DROP;
     ret += cl;
     ret += SOURCECLIP;
     ret += blank;
-    ret += (wchar_t)thedefault.size();
+    ret += (UChar)thedefault.size();
     ret += thedefault;
   }
-  if(c->part == L"lemcase")
+  if(c->part == "lemcase"_u)
   {
     ret += GETCASE;
   }
-  wstring src_cat = c->part;
-  vector<wstring> rewrite = c->rewrite;
+  UString src_cat = c->part;
+  vector<UString> rewrite = c->rewrite;
   if(_dest.size() > 0 && rewrite.size() == 0 && currentLocType == LocTypeOutput)
   {
     rewrite.push_back(_dest);
@@ -1694,7 +1687,7 @@ RTXCompiler::compileClip(Clip* c, wstring _dest = L"")
   for(auto dest : rewrite)
   {
     bool found = false;
-    vector<pair<wstring, wstring>> rule;
+    vector<pair<UString, UString>> rule;
     for(unsigned int i = 0; i < retagRules.size(); i++)
     {
       if(retagRules[i][0].first == src_cat && retagRules[i][0].second == dest)
@@ -1706,21 +1699,21 @@ RTXCompiler::compileClip(Clip* c, wstring _dest = L"")
     }
     if(!found && dest != src_cat)
     {
-      if(dest == L"lem" || dest == L"lemh" || dest == L"lemq")
+      if(dest == "lem"_u || dest == "lemh"_u || dest == "lemq"_u)
       {
         ret += DISTAG;
         return ret;
       }
-      die(L"There is no tag-rewrite rule from '" + src_cat + L"' to '" + dest + L"'.");
+      die("There is no tag-rewrite rule from '"_u + src_cat + "' to '"_u + dest + "'."_u);
     }
-    wstring check;
+    UString check;
     for(unsigned int i = 1; i < rule.size(); i++)
     {
-      wstring cur;
+      UString cur;
       cur += DUP;
       cur += DISTAG;
       if(rule[i].first.size() > 2 &&
-         rule[i].first[0] == L'[' && rule[i].first[1] == L']')
+         rule[i].first[0] == '[' && rule[i].first[1] == ']')
       {
         cur += compileString(rule[i].first.substr(2));
         cur += IN;
@@ -1731,31 +1724,31 @@ RTXCompiler::compileClip(Clip* c, wstring _dest = L"")
         cur += EQUAL;
       }
       cur += JUMPONFALSE;
-      cur += (wchar_t)(rule[i].second.size() + (i == 1 ? 5 : 7));
+      cur += (UChar)(rule[i].second.size() + (i == 1 ? 5 : 7));
       cur += DROP;
       cur += compileTag(rule[i].second);
       if(i != 1)
       {
         cur += JUMP;
-        cur += (wchar_t)check.size();
+        cur += (UChar)check.size();
       }
       check = cur + check;
     }
     ret += check;
-    if(dest == L"lemh" || dest == L"lem" || dest == L"lemq")
+    if(dest == "lemh"_u || dest == "lem"_u || dest == "lemq"_u)
     {
       if(dest != dest) ret += DISTAG;
     }
   }
-  if(_dest == L"lem" || _dest == L"lemh" || _dest == L"lemq" || _dest == L"lemcase")
+  if(_dest == "lem"_u || _dest == "lemh"_u || _dest == "lemq"_u || _dest == "lemcase"_u)
   {
     ret += DISTAG;
   }
   return ret;
 }
 
-wstring
-RTXCompiler::compileClip(wstring part, int pos, wstring side = L"")
+UString
+RTXCompiler::compileClip(UString part, int pos, UString side = ""_u)
 {
   Clip cl;
   cl.part = part;
@@ -1791,7 +1784,7 @@ RTXCompiler::processMacroClip(Clip* mac, OutputChunk* arg)
       }
       else
       {
-        die(L"Macro not given value for attribute '" + mac->part + L"'.");
+        die("Macro not given value for attribute '"_u + mac->part + "'."_u);
       }
     }
     else ret->src = arg->pos;
@@ -1839,7 +1832,7 @@ RTXCompiler::processMacroChunk(OutputChunk* mac, OutputChunk* arg)
   {
     ret->children.push_back(processMacroChoice(mac->children[i], arg));
   }
-  for(map<wstring, Clip*>::iterator it = mac->vars.begin(),
+  for(map<UString, Clip*>::iterator it = mac->vars.begin(),
           limit = mac->vars.end(); it != limit; it++)
   {
     ret->vars[it->first] = processMacroClip(it->second, arg);
@@ -1847,7 +1840,7 @@ RTXCompiler::processMacroChunk(OutputChunk* mac, OutputChunk* arg)
   if(mac->pos == 1)
   {
     ret->pos = arg->pos;
-    for(map<wstring, Clip*>::iterator it = arg->vars.begin(),
+    for(map<UString, Clip*>::iterator it = arg->vars.begin(),
             limit = arg->vars.end(); it != limit; it++)
     {
       if(ret->vars.find(it->first) == ret->vars.end() || arg->pos == 0)
@@ -1887,29 +1880,29 @@ RTXCompiler::processMacroChoice(OutputChoice* mac, OutputChunk* arg)
   return ret;
 }
 
-wstring
+UString
 RTXCompiler::processOutputChunk(OutputChunk* r)
 {
-  wstring ret;
+  UString ret;
   if(r->conjoined && currentLocType == LocTypeOutput)
   {
     ret += CONJOIN;
     ret += OUTPUT;
   }
-  if(r->mode == L"_")
+  if(r->mode == "_"_u)
   {
     ret += INT;
-    ret += (wchar_t)r->pos;
+    ret += (UChar)r->pos;
     ret += BLANK;
     if(currentLocType == LocTypeOutput)
     {
       ret += OUTPUT;
     }
   }
-  else if(r->mode == L"$$")
+  else if(r->mode == "$$"_u)
   {
     ret += INT;
-    ret += (wchar_t)globalVarNames[r->pattern];
+    ret += (UChar)globalVarNames[r->pattern];
     ret += FETCHCHUNK;
     if(r->interpolated) ret += APPENDCHILD;
     if(currentLocType == LocTypeOutput)
@@ -1917,39 +1910,39 @@ RTXCompiler::processOutputChunk(OutputChunk* r)
       ret += OUTPUT;
     }
   }
-  else if(r->mode == L"{}" || r->mode == L"[]" || r->mode == L"")
+  else if(r->mode == "{}"_u || r->mode == "[]"_u || r->mode.empty())
   {
     for(unsigned int i = 0; i < r->children.size(); i++)
     {
       ret += processOutputChoice(r->children[i]);
     }
   }
-  else if(r->mode == L"#" || r->mode == L"#@")
+  else if(r->mode == "#"_u || r->mode == "#@"_u)
   {
     bool interp = r->pos > currentRule->pattern.size();
-    wstring pos;
+    UString pos;
     if(!interp && r->pos != 0)
     {
       if(currentRule->pattern[r->pos-1].size() < 2)
       {
-        die(L"could not find tag order for element " + to_wstring(r->pos));
+        die("could not find tag order for element "_u + StringUtils::itoa(r->pos));
       }
       pos = currentRule->pattern[r->pos-1][1];
     }
-    wstring patname = (r->pattern != L"") ? r->pattern : pos;
-    pos = (pos != L"") ? pos : patname;
+    UString patname = (r->pattern.empty()) ? pos : r->pattern;
+    pos = (pos.empty()) ? patname : pos;
     if(outputRules.find(patname) == outputRules.end())
     {
       if(interp)
       {
-        ret += compileClip(L"whole", r->pos, L"tl");
+        ret += compileClip("whole"_u, r->pos, "tl"_u);
         if(r->interpolated) ret += APPENDCHILD;
         ret += OUTPUT;
         return ret;
       }
-      die(L"Could not find output pattern '" + patname + L"'.");
+      die("Could not find output pattern '"_u + patname + "'."_u);
     }
-    vector<wstring> pattern = outputRules[patname];
+    vector<UString> pattern = outputRules[patname];
 
     if(r->getall)
     {
@@ -1968,20 +1961,20 @@ RTXCompiler::processOutputChunk(OutputChunk* r)
     if(r->interpolated)
     {
       ret += INT;
-      ret += (wchar_t)0;
+      ret += (UChar)0;
       ret += BLANK;
       ret += APPENDCHILD;
     }
-    if(pattern.size() == 1 && pattern[0] == L"macro")
+    if(pattern.size() == 1 && pattern[0] == "macro"_u)
     {
       macroNameStack.push_back(patname);
       ret += processOutputChoice(processMacroChoice(macros[patname], r));
       macroNameStack.pop_back();
       return ret;
     }
-    if(pattern.size() == 1 && pattern[0] == L"%")
+    if(pattern.size() == 1 && pattern[0] == "%"_u)
     {
-      ret += compileClip(L"whole", r->pos, L"tl");
+      ret += compileClip("whole"_u, r->pos, "tl"_u);
       if(currentLocType == LocTypeOutput && !r->nextConjoined)
       {
         ret += OUTPUT;
@@ -1992,12 +1985,12 @@ RTXCompiler::processOutputChunk(OutputChunk* r)
     {
       ret += CHUNK;
     }
-    if(r->mode == L"#@")
+    if(r->mode == "#@"_u)
     {
       unsigned int j;
       for(j = 0; j < r->lemma.size(); j++)
       {
-        if(r->lemma[j] == L'#') break;
+        if(r->lemma[j] == '#') break;
       }
       if(j < r->lemma.size())
       {
@@ -2005,57 +1998,57 @@ RTXCompiler::processOutputChunk(OutputChunk* r)
         Clip* c = new Clip;
         c->part = r->lemma.substr(j);
         c->src = 0;
-        c->rewrite.push_back(L"lemq");
-        r->vars[L"lemq"] = c;
+        c->rewrite.push_back("lemq"_u);
+        r->vars["lemq"_u] = c;
       }
       else ret += compileString(r->lemma);
     }
-    else if(r->vars.find(L"lem") != r->vars.end())
+    else if(r->vars.find("lem"_u) != r->vars.end())
     {
-      ret += compileClip(r->vars[L"lem"], L"lem");
+      ret += compileClip(r->vars["lem"_u], "lem"_u);
     }
-    else if(r->vars.find(L"lemh") != r->vars.end())
+    else if(r->vars.find("lemh"_u) != r->vars.end())
     {
-      ret += compileClip(r->vars[L"lemh"], L"lemh");
+      ret += compileClip(r->vars["lemh"_u], "lemh"_u);
     }
     else if(r->pos == 0)
     {
       if(currentRule->grab_all != -1)
       {
-        ret += compileClip(L"lem", currentRule->grab_all, L"tl");
+        ret += compileClip("lem"_u, currentRule->grab_all, "tl"_u);
       }
       else
       {
-        ret += compileString(L"default");
+        ret += compileString("default"_u);
       }
     }
     else
     {
       Clip* c = new Clip;
-      c->part = L"lemh";
+      c->part = "lemh"_u;
       c->src = r->pos;
-      c->side = L"tl";
-      c->rewrite.push_back(L"lemh");
-      ret += compileClip(c, L"lemh");
+      c->side = "tl"_u;
+      c->rewrite.push_back("lemh"_u);
+      ret += compileClip(c, "lemh"_u);
     }
-    if(r->vars.find(L"lemcase") != r->vars.end())
+    if(r->vars.find("lemcase"_u) != r->vars.end())
     {
-      ret += compileClip(r->vars[L"lemcase"], L"lemcase");
+      ret += compileClip(r->vars["lemcase"_u], "lemcase"_u);
       ret += SETCASE;
     }
     ret += currentSurface;
     for(unsigned int i = 0; i < pattern.size(); i++)
     {
-      if(pattern[i] == L"_")
+      if(pattern[i] == "_"_u)
       {
-        if(r->vars.find(L"pos_tag") != r->vars.end())
+        if(r->vars.find("pos_tag"_u) != r->vars.end())
         {
-          ret += compileClip(r->vars[L"pos_tag"]);
+          ret += compileClip(r->vars["pos_tag"_u]);
         }
         else if(r->pos != 0)
         {
           //ret += compileTag(currentRule->pattern[r->pos-1][1]);
-          ret += compileClip(L"pos_tag", r->pos, L"tl");
+          ret += compileClip("pos_tag"_u, r->pos, "tl"_u);
         }
         else
         {
@@ -2063,20 +2056,20 @@ RTXCompiler::processOutputChunk(OutputChunk* r)
         }
         ret += currentSurface;
       }
-      else if(pattern[i][0] == L'<')
+      else if(pattern[i][0] == '<')
       {
         ret += compileString(pattern[i]);
         ret += currentSurface;
       }
       else
       {
-        wstring ret_temp;
-        vector<wstring> ops = altAttrs[pattern[i]];
+        UString ret_temp;
+        vector<UString> ops = altAttrs[pattern[i]];
         if(ops.size() == 0)
         {
           ops.push_back(pattern[i]);
         }
-        wstring var;
+        UString var;
         for(unsigned int v = 0; v < ops.size(); v++)
         {
           if(r->vars.find(ops[v]) != r->vars.end())
@@ -2085,7 +2078,7 @@ RTXCompiler::processOutputChunk(OutputChunk* r)
             break;
           }
         }
-        if(var == L"" && r->pos != 0)
+        if(var.empty() && r->pos != 0)
         {
           Clip* cl = new Clip;
           cl->src = r->pos;
@@ -2093,14 +2086,14 @@ RTXCompiler::processOutputChunk(OutputChunk* r)
           if(currentLocType == LocTypeOutput) cl->rewrite.push_back(pattern[i]);
           ret_temp += compileClip(cl, pattern[i]);
         }
-        else if(var == L"")
+        else if(var.empty())
         {
           bool found = false;
           for(unsigned int t = 0; t < parentTags.size(); t++)
           {
             if(parentTags[t] == pattern[i])
             {
-              ret_temp += compileTag(to_wstring(t+1));
+              ret_temp += compileTag(StringUtils::itoa(t+1));
               found = true;
               break;
             }
@@ -2117,7 +2110,7 @@ RTXCompiler::processOutputChunk(OutputChunk* r)
             }
             else if(r->pos == 0)
             {
-              die(L"Cannot find source for tag '" + pattern[i] + L"'.");
+              die("Cannot find source for tag '"_u + pattern[i] + "'."_u);
             }
             else
             {
@@ -2131,37 +2124,37 @@ RTXCompiler::processOutputChunk(OutputChunk* r)
         }
         if(currentLocType == LocTypeOutput && noOverwrite[pattern[i]].size() > 0)
         {
-          ret += compileClip(pattern[i], r->pos, L"tl");
+          ret += compileClip(pattern[i], r->pos, "tl"_u);
           ret += DUP;
-          ret += compileString(pattern[i] + L" over");
+          ret += compileString(pattern[i] + " over"_u);
           ret += IN;
           ret += JUMPONTRUE;
-          ret += (wchar_t)(1+ret_temp.size());
+          ret += (UChar)(1+ret_temp.size());
           ret += DROP;
         }
         ret += ret_temp;
         ret += currentSurface;
       }
     }
-    if(r->vars.find(L"lemq") != r->vars.end())
+    if(r->vars.find("lemq"_u) != r->vars.end())
     {
-      ret += compileClip(r->vars[L"lemq"], L"lemq");
+      ret += compileClip(r->vars["lemq"_u], "lemq"_u);
       ret += currentSurface;
     }
     else if(r->pos != 0)
     {
-      ret += compileClip(L"lemq", r->pos, L"tl");
+      ret += compileClip("lemq"_u, r->pos, "tl"_u);
       ret += currentSurface;
     }
     if(r->pos != 0)
     {
-      ret += compileClip(L"whole", r->pos, L"tl");
+      ret += compileClip("whole"_u, r->pos, "tl"_u);
       ret += APPENDALLCHILDREN;
       ret += INT;
-      ret += (wchar_t)r->pos;
+      ret += (UChar)r->pos;
       ret += GETRULE;
       ret += INT;
-      ret += (wchar_t)0;
+      ret += (UChar)0;
       ret += SETRULE;
     }
     if(r->interpolated) ret += APPENDCHILD;
@@ -2172,7 +2165,7 @@ RTXCompiler::processOutputChunk(OutputChunk* r)
     else if(currentLoc == LocVarSet)
     {
       ret += INT;
-      ret += (wchar_t)currentVar;
+      ret += (UChar)currentVar;
       ret += SETCHUNK;
     }
   }
@@ -2181,15 +2174,15 @@ RTXCompiler::processOutputChunk(OutputChunk* r)
     if(r->interpolated)
     {
       ret += INT;
-      ret += (wchar_t)0;
+      ret += (UChar)0;
       ret += BLANK;
       ret += APPENDCHILD;
     }
     ret += CHUNK;
     ret += compileString(r->lemma);
-    if(r->vars.find(L"lemcase") != r->vars.end())
+    if(r->vars.find("lemcase"_u) != r->vars.end())
     {
-      ret += compileClip(r->vars[L"lemcase"]);
+      ret += compileClip(r->vars["lemcase"_u]);
       ret += SETCASE;
     }
     ret += currentSurface;
@@ -2209,17 +2202,17 @@ RTXCompiler::processOutputChunk(OutputChunk* r)
     else if(currentLoc == LocVarSet)
     {
       ret += INT;
-      ret += (wchar_t)currentVar;
+      ret += (UChar)currentVar;
       ret += SETCHUNK;
     }
   }
   return ret;
 }
 
-wstring
+UString
 RTXCompiler::processCond(Cond* cond)
 {
-  wstring ret;
+  UString ret;
   if(cond == NULL)
   {
     ret += PUSHTRUE;
@@ -2229,31 +2222,31 @@ RTXCompiler::processCond(Cond* cond)
   {
     if(cond->left->op == 0 || cond->right->op == 0)
     {
-      die(L"Cannot evaluate AND with string as operand (try adding parentheses).");
+      die("Cannot evaluate AND with string as operand (try adding parentheses)."_u);
     }
   }
   else if(cond->op == OR)
   {
     if(cond->left->op == 0 || cond->right->op == 0)
     {
-      die(L"Cannot evaluate OR with string as operand (try adding parentheses).");
+      die("Cannot evaluate OR with string as operand (try adding parentheses)."_u);
     }
   }
   else if(cond->op == NOT)
   {
     if(cond->right->op == 0)
     {
-      die(L"Attempt to negate string value.");
+      die("Attempt to negate string value."_u);
     }
   }
   else if(cond->op != 0 && (cond->left->op != 0 || cond->right->op != 0))
   {
-    die(L"String operator cannot take condition as operand.");
+    die("String operator cannot take condition as operand."_u);
   }
   else if(cond->op == EQUAL)
   {
-    wstring lit;
-    wstring attr;
+    UString lit;
+    UString attr;
     bool rew = false;
     Clip* l = cond->left->val;
     if(l->src == 0) lit = l->part;
@@ -2281,7 +2274,7 @@ RTXCompiler::processCond(Cond* cond)
           break;
         }
       }
-      if(!found) die(L"'" + lit + L"' is not an element of list '" + attr + L"', so this check will always fail.");
+      if(!found) die("'"_u + lit + "' is not an element of list '"_u + attr + "', so this check will always fail."_u);
     }
   }
   if(cond->op == 0)
@@ -2293,7 +2286,7 @@ RTXCompiler::processCond(Cond* cond)
     else
     {
       ret = compileClip(cond->val);
-      if(cond->val->part != L"lem" && cond->val->part != L"lemh" && cond->val->part != L"lemq")
+      if(cond->val->part != "lem"_u && cond->val->part != "lemh"_u && cond->val->part != "lemq"_u)
       {
         ret += DISTAG;
       }
@@ -2313,10 +2306,10 @@ RTXCompiler::processCond(Cond* cond)
   return ret;
 }
 
-wstring
+UString
 RTXCompiler::processOutputChoice(OutputChoice* choice)
 {
-  wstring ret;
+  UString ret;
   if(choice->nest.back() != NULL)
   {
     ret += processOutputChoice(choice->nest.back());
@@ -2332,7 +2325,7 @@ RTXCompiler::processOutputChoice(OutputChoice* choice)
   int n = choice->conds.size();
   for(int i = 1; i <= n; i++)
   {
-    wstring act;
+    UString act;
     if(choice->nest[n-i] != NULL)
     {
       act = processOutputChoice(choice->nest[n-i]);
@@ -2346,10 +2339,10 @@ RTXCompiler::processOutputChoice(OutputChoice* choice)
       act = processOutputChunk(choice->chunks[n-i]);
     }
     act += JUMP;
-    act += (wchar_t)ret.size();
-    wstring cond = processCond(choice->conds[n-i]);
+    act += (UChar)ret.size();
+    UString cond = processCond(choice->conds[n-i]);
     cond += JUMPONFALSE;
-    cond += (wchar_t)act.size();
+    cond += (UChar)act.size();
     ret = cond + act + ret;
   }
   return ret;
@@ -2364,21 +2357,21 @@ RTXCompiler::processRules()
     rule = reductionRules[ruleid];
     if(summarizing)
     {
-      if(rule->name.size() > 0) wcerr << "\"" << rule->name << "\": ";
-      for(auto it : rule->result) wcerr << it << " ";
-      wcerr << "->";
-      for(auto it : rule->pattern) wcerr << " " << it[1];
-      wcerr << endl;
+      if(rule->name.size() > 0) cerr << "\"" << rule->name << "\": ";
+      for(auto it : rule->result) cerr << it << " ";
+      cerr << "->";
+      for(auto it : rule->pattern) cerr << " " << it[1];
+      cerr << endl;
     }
     currentRule = rule;
     currentChunk = NULL;
     currentChoice = NULL;
     makePattern(ruleid);
-    wstring comp;
+    UString comp;
     if(rule->cond != NULL)
     {
       currentLocType = LocTypeInput;
-      comp = processCond(rule->cond) + JUMPONTRUE + (wchar_t)1 + REJECTRULE;
+      comp = processCond(rule->cond) + JUMPONTRUE + (UChar)1 + REJECTRULE;
     }
     for(auto it : rule->globals)
     {
@@ -2395,7 +2388,7 @@ RTXCompiler::processRules()
       comp += SETVAR;
     }
     currentLoc = LocTopLevel;
-    vector<wstring> outcomp;
+    vector<UString> outcomp;
     outcomp.resize(rule->pattern.size());
     parentTags.clear();
     unsigned int patidx = 0;
@@ -2403,12 +2396,12 @@ RTXCompiler::processRules()
     {
       currentLocType = LocTypeInput;
       OutputChoice* cur = rule->output[i];
-      if(cur->chunks.size() == 1 && cur->chunks[0]->mode == L"_")
+      if(cur->chunks.size() == 1 && cur->chunks[0]->mode == "_"_u)
       {
         currentSurface = APPENDSURFACE;
         comp += processOutputChoice(cur);
       }
-      else if(cur->chunks.size() == 1 && cur->chunks[0]->mode == L"#")
+      else if(cur->chunks.size() == 1 && cur->chunks[0]->mode == "#"_u)
       {
         currentSurface = APPENDSURFACE;
         comp += processOutputChoice(cur);
@@ -2417,17 +2410,17 @@ RTXCompiler::processRules()
       else
       {
         OutputChunk* ch = new OutputChunk;
-        ch->mode = L"#";
+        ch->mode = "#"_u;
         ch->pos = 0;
         ch->getall = true;
         ch->vars = rule->vars;
-        if(ch->vars.find(L"lemcase") == ch->vars.end())
+        if(ch->vars.find("lemcase"_u) == ch->vars.end())
         {
           Clip* lemcase = new Clip;
           lemcase->src = 1;
-          lemcase->part = L"lemcase";
-          lemcase->side = L"tl";
-          ch->vars[L"lemcase"] = lemcase;
+          lemcase->part = "lemcase"_u;
+          lemcase->side = "tl"_u;
+          ch->vars["lemcase"_u] = lemcase;
         }
         ch->conjoined = false;
         ch->interpolated = false;
@@ -2446,9 +2439,9 @@ RTXCompiler::processRules()
           comp += processOutputChoice(rule->output_ref[patidx]);
         }
         comp += INT;
-        comp += (wchar_t)outputBytecode.size();
+        comp += (UChar)outputBytecode.size();
         comp += INT;
-        comp += (wchar_t)0;
+        comp += (UChar)0;
         comp += SETRULE;
         comp += APPENDALLINPUT;
         parentTags = outputRules[ch->pattern];
@@ -2457,11 +2450,11 @@ RTXCompiler::processRules()
         outputBytecode.push_back(processOutputChoice(cur));
         if(rule->name.size() > 0)
         {
-          PB.outRuleNames.push_back(rule->name + L" - line " + to_wstring(rule->line));
+          PB.outRuleNames.push_back(rule->name + " - line "_u + StringUtils::itoa(rule->line));
         }
         else
         {
-          PB.outRuleNames.push_back(L"line " + to_wstring(rule->line));
+          PB.outRuleNames.push_back("line "_u + StringUtils::itoa(rule->line));
         }
         parentTags.clear();
         patidx++;
@@ -2486,7 +2479,7 @@ RTXCompiler::read(const string &fname)
   source.open(fname);
   if(!source.is_open())
   {
-    wcerr << L"Unable to open file " << fname.c_str() << " for reading." << endl;
+    cerr << "Unable to open file " << fname.c_str() << " for reading." << endl;
     exit(EXIT_FAILURE);
   }
   while(true)
@@ -2502,15 +2495,13 @@ RTXCompiler::read(const string &fname)
   errorsAreSyntax = false;
   processRetagRules();
   processRules();
-  for(map<wstring, vector<wstring>>::iterator it=collections.begin(); it != collections.end(); ++it)
-  {
-    set<wstring, Ltstr> vals;
-    for(unsigned int i = 0; i < it->second.size(); i++)
-    {
-      vals.insert(it->second[i]);
+  for (auto& it : collections) {
+    set<UString> vals;
+    for (auto& it2 : it.second) {
+      vals.insert(it2);
     }
-    PB.addList(it->first, vals);
-    PB.addAttr(it->first, vals);
+    PB.addList(it.first, vals);
+    PB.addAttr(it.first, vals);
   }
 }
 
@@ -2524,18 +2515,18 @@ RTXCompiler::write(const string &fname)
     exit(EXIT_FAILURE);
   }
 
-  vector<pair<int, wstring>> inRules;
+  vector<pair<int, UString>> inRules;
   for(unsigned int i = 0; i < reductionRules.size(); i++)
   {
     inRules.push_back(make_pair(2*reductionRules[i]->pattern.size() - 1,
                                 reductionRules[i]->compiled));
     if(reductionRules[i]->name.size() > 0)
     {
-      PB.inRuleNames.push_back(reductionRules[i]->name + L" - line " + to_wstring(reductionRules[i]->line));
+      PB.inRuleNames.push_back(reductionRules[i]->name + " - line "_u + StringUtils::itoa(reductionRules[i]->line));
     }
     else
     {
-      PB.inRuleNames.push_back(L"line " + to_wstring(reductionRules[i]->line));
+      PB.inRuleNames.push_back("line "_u + StringUtils::itoa(reductionRules[i]->line));
     }
   }
 
diff --git a/src/rtx_compiler.h b/src/rtx_compiler.h
index 9bd0924..6d333c6 100644
--- a/src/rtx_compiler.h
+++ b/src/rtx_compiler.h
@@ -36,16 +36,16 @@ private:
   struct Clip
   {
     int src;
-    wstring part;
-    wstring side;
-    vector<wstring> rewrite;
+    UString part;
+    UString side;
+    vector<UString> rewrite;
     OutputChoice* choice;
-    wstring varName;
+    UString varName;
   };
 
   struct Cond
   {
-    wchar_t op;
+    UChar op;
     Clip* val;
     Cond* left;
     Cond* right;
@@ -53,13 +53,13 @@ private:
 
   struct OutputChunk
   {
-    wstring mode;
+    UString mode;
     unsigned int pos;
-    wstring lemma;
-    vector<wstring> tags;
+    UString lemma;
+    vector<UString> tags;
     bool getall;
-    map<wstring, Clip*> vars;
-    wstring pattern;
+    map<UString, Clip*> vars;
+    UString pattern;
     vector<OutputChoice*> children;
     bool conjoined;
     bool interpolated;
@@ -79,16 +79,16 @@ private:
     int line;
     int grab_all;
     float weight;
-    wstring name;
-    vector<vector<wstring>> pattern;
+    UString name;
+    vector<vector<UString>> pattern;
     vector<OutputChoice*> output;
     vector<OutputChoice*> output_sl;
     vector<OutputChoice*> output_ref;
-    map<wstring, Clip*> vars;
-    map<wstring, OutputChoice*> globals;
-    map<wstring, Clip*> stringGlobals;
-    vector<wstring> result;
-    wstring compiled;
+    map<UString, Clip*> vars;
+    map<UString, OutputChoice*> globals;
+    map<UString, Clip*> stringGlobals;
+    vector<UString> result;
+    UString compiled;
     Cond* cond;
   };
 
@@ -121,7 +121,7 @@ private:
   /**
    * Names of rules that should be excluded from the pattern transducer
    */
-  set<wstring> excluded;
+  set<UString> excluded;
 
   //////////
   // COLLECTIONS AND DATA STRUCTURES
@@ -130,10 +130,10 @@ private:
   /**
    * All characters not allowed in identifiers
    */
-  static wstring const SPECIAL_CHARS;
+  static UString const SPECIAL_CHARS;
 
-  static wstring const ANY_TAG;
-  static wstring const ANY_CHAR;
+  static UString const ANY_TAG;
+  static UString const ANY_CHAR;
 
   /**
    * Pattern-file generator
@@ -143,20 +143,20 @@ private:
   /**
    * Map of names to attribute lists
    */
-  map<wstring, vector<wstring>> collections;
+  map<UString, vector<UString>> collections;
 
   /**
    * Map of attribute names to default and replacement values
    * First value of pair is value to return if the attribute is not found
    * Second value is value to overwrite it with if it's still there at output
    */
-  map<wstring, pair<wstring, wstring>> attrDefaults;
+  map<UString, pair<UString, UString>> attrDefaults;
 
   /**
    * Map of attribute names to values that should never be modified
    * Note: This is not currently used
    */
-  map<wstring, vector<wstring>> noOverwrite;
+  map<UString, vector<UString>> noOverwrite;
   
   /**
    * List of tag-replacement rules
@@ -164,33 +164,33 @@ private:
    * Followed by some number of pair<old attribute value, new attribute value>
    * Note: This is not currently used
    */
-  vector<vector<pair<wstring, wstring>>> retagRules;
+  vector<vector<pair<UString, UString>>> retagRules;
 
   /**
    * Map key => [ value ]
    * Where key and value both name attribute lists
    * Where for each value, there is a tag-replacement rule from value to key
    */
-  map<wstring, vector<wstring>> altAttrs;
+  map<UString, vector<UString>> altAttrs;
   
   /**
    * Map of pattern names to output patterns
    * Where '_' represents "lemh" and the part of speech tag
    * (which is usually the pattern name)
    * "lemq" is automatically appended to the end
-   * If the contents of the vector is L"macro", look at macros
+   * If the contents of the vector is "macro"_u, look at macros
    */
-  map<wstring, vector<wstring>> outputRules;
+  map<UString, vector<UString>> outputRules;
 
   /**
    * Map of pattern names to conditioned output patterns
    */
-  map<wstring, OutputChoice*> macros;
+  map<UString, OutputChoice*> macros;
 
   /**
    * Names of global chunk-type variables and corresponding indecies
    */
-  map<wstring, unsigned int> globalVarNames;
+  map<UString, unsigned int> globalVarNames;
 
   /**
    * Map of pattern names to booleans
@@ -198,7 +198,7 @@ private:
    * and thus all clips should be target clips
    * true indicates both surface only and unspecified
    */
-  map<wstring, bool> nodeIsSurface;
+  map<UString, bool> nodeIsSurface;
 
   /**
    * List of all reduction rules in the order they were parsed
@@ -209,7 +209,7 @@ private:
    * List of compiled forms of output-time rules
    * in the order they were generated
    */
-  vector<wstring> outputBytecode;
+  vector<UString> outputBytecode;
 
   /**
    * Either the current rule being parsed or the current rule being compiled
@@ -240,13 +240,13 @@ private:
    * Which surface of a chunk is being assigned to
    * one of APPENDSURFACE, APPENDSURFACESL, APPENDSURFACEREF
    */
-  wchar_t currentSurface;
+  UChar currentSurface;
 
   /**
    * All attributes which can be clipped from the chunk whose children
    * are currently being compiled
    */
-  vector<wstring> parentTags;
+  vector<UString> parentTags;
 
   /**
    * Current construct being parsed or compiled
@@ -266,7 +266,7 @@ private:
   /**
    * Input stream
    */
-  wifstream source;
+  ifstream source;
 
   //////////
   // ERROR REPORTING
@@ -274,19 +274,19 @@ private:
 
   // for generating error messages
   int currentLine;
-  wstring recentlyRead;
-  wstring unreadbuf;
+  UString recentlyRead;
+  UString unreadbuf;
   int unreadmark;
   bool errorsAreSyntax;
   string sourceFile;
-  vector<wstring> macroNameStack;
+  vector<UString> macroNameStack;
 
   /**
    * Report an error in the input file and exit
    * if errorsAreSyntax == true, will also print the most recently read line
    * with a marker of the approximate location of the error
    */
-  void die(wstring message);
+  void die(UString message);
 
   //////////
   // TOKENIZATION
@@ -298,7 +298,7 @@ private:
    * to ensure that recentlyRead gets updated properly
    * @return character
    */
-  wchar_t getchar();
+  UChar getchar();
 
   /**
    * Return the next character in the input stream without reading
@@ -306,7 +306,7 @@ private:
    * in order to properly manage unreadbuf
    * @ return character
    */
-  wchar_t peekchar();
+  UChar peekchar();
 
   /**
    * Mark the current location so that it can be jumped back to with unread()
@@ -328,7 +328,7 @@ private:
    * Report a syntax error if it is preceded by spaces
    * @return token
    */
-  wstring nextTokenNoSpace();
+  UString nextTokenNoSpace();
 
   /**
    * Parse the next token
@@ -337,14 +337,14 @@ private:
    * report a syntax error
    * @return token
    */
-  wstring nextToken(wstring check1, wstring check2);
+  UString nextToken(UString check1, UString check2);
 
   /**
    * Parse an identifier
    * Calls eatSpaces() beforehand if prespace == true
    * @return identifier
    */
-  wstring parseIdent(bool prespace);
+  UString parseIdent(bool prespace);
 
   /**
    * Parse an integer
@@ -362,7 +362,7 @@ private:
    * If the next character in the input stream is c, consume it and return true
    * Otherwise return false
    */
-  bool isNextToken(wchar_t c);
+  bool isNextToken(UChar c);
 
   //////////
   // COMPONENT PARSING
@@ -392,106 +392,106 @@ private:
   /**
    * Convert a string to an operator
    * @param op - the string from the rule
-   * @return bytecode for corresponding operation or L'\0' if not found
-   */
-  wchar_t lookupOperator(wstring op);
-
-  const vector<pair<wstring, wchar_t>> OPERATORS = {
-    make_pair(L"and", AND),
-    make_pair(L"&", AND),
-
-    make_pair(L"or", OR),
-    make_pair(L"|", OR),
-
-    make_pair(L"not", NOT),
-    make_pair(L"~", NOT),
-    make_pair(L"⌐", NOT),
-
-    make_pair(L"equal", EQUAL),
-    make_pair(L"=", EQUAL),
-
-    make_pair(L"isprefix", ISPREFIX),
-    make_pair(L"startswith", ISPREFIX),
-    make_pair(L"beginswith", ISPREFIX),
-
-    make_pair(L"issuffix", ISSUFFIX),
-    make_pair(L"endswith", ISSUFFIX),
-
-    make_pair(L"issubstring", ISSUBSTRING),
-    make_pair(L"contains", ISSUBSTRING),
-
-    make_pair(L"equalcl", EQUALCL),
-    make_pair(L"equalcaseless", EQUALCL),
-    make_pair(L"equalfold", EQUALCL),
-    make_pair(L"equalfoldcase", EQUALCL),
-
-    make_pair(L"isprefixcl", ISPREFIXCL),
-    make_pair(L"startswithcl", ISPREFIXCL),
-    make_pair(L"beginswithcl", ISPREFIXCL),
-    make_pair(L"isprefixcaseless", ISPREFIXCL),
-    make_pair(L"startswithcaseless", ISPREFIXCL),
-    make_pair(L"beginswithcaseless", ISPREFIXCL),
-    make_pair(L"isprefixfold", ISPREFIXCL),
-    make_pair(L"startswithfold", ISPREFIXCL),
-    make_pair(L"beginswithfold", ISPREFIXCL),
-    make_pair(L"isprefixfoldcase", ISPREFIXCL),
-    make_pair(L"startswithfoldcase", ISPREFIXCL),
-    make_pair(L"beginswithfoldcase", ISPREFIXCL),
-
-    make_pair(L"issuffixcl", ISSUFFIXCL),
-    make_pair(L"endswithcl", ISSUFFIXCL),
-    make_pair(L"issuffixcaseless", ISSUFFIXCL),
-    make_pair(L"endswithcaseless", ISSUFFIXCL),
-    make_pair(L"issuffixfold", ISSUFFIXCL),
-    make_pair(L"endswithfold", ISSUFFIXCL),
-    make_pair(L"issuffixfoldcase", ISSUFFIXCL),
-    make_pair(L"endswithfoldcase", ISSUFFIXCL),
-
-    make_pair(L"issubstringcl", ISSUBSTRINGCL),
-    make_pair(L"issubstringcaseless", ISSUBSTRINGCL),
-    make_pair(L"issubstringfold", ISSUBSTRINGCL),
-    make_pair(L"issubstringfoldcase", ISSUBSTRINGCL),
-
-    make_pair(L"hasprefix", HASPREFIX),
-    make_pair(L"startswithlist", HASPREFIX),
-    make_pair(L"beginswithlist", HASPREFIX),
-
-    make_pair(L"hassuffix", HASSUFFIX),
-    make_pair(L"endswithlist", HASSUFFIX),
-
-    make_pair(L"in", IN),
-    make_pair(L"∈", IN),
-
-    make_pair(L"hasprefixcl", HASPREFIXCL),
-    make_pair(L"startswithlistcl", HASPREFIXCL),
-    make_pair(L"beginswithlistcl", HASPREFIXCL),
-    make_pair(L"hasprefixcaseless", HASPREFIXCL),
-    make_pair(L"startswithlistcaseless", HASPREFIXCL),
-    make_pair(L"beginswithlistcaseless", HASPREFIXCL),
-    make_pair(L"hasprefixfold", HASPREFIXCL),
-    make_pair(L"startswithlistfold", HASPREFIXCL),
-    make_pair(L"beginswithlistfold", HASPREFIXCL),
-    make_pair(L"hasprefixfoldcase", HASPREFIXCL),
-    make_pair(L"startswithlistfoldcase", HASPREFIXCL),
-    make_pair(L"beginswithlistfoldcase", HASPREFIXCL),
-
-    make_pair(L"hassuffixcl", HASSUFFIXCL),
-    make_pair(L"endswithlistcl", HASSUFFIXCL),
-    make_pair(L"hassuffixcaseless", HASSUFFIXCL),
-    make_pair(L"endswithlistcaseless", HASSUFFIXCL),
-    make_pair(L"hassuffixfold", HASSUFFIXCL),
-    make_pair(L"endswithlistfold", HASSUFFIXCL),
-    make_pair(L"hassuffixfoldcase", HASSUFFIXCL),
-    make_pair(L"endswithlistfoldcase", HASSUFFIXCL),
-
-    make_pair(L"incl", INCL),
-    make_pair(L"∈cl", INCL), // why you would want to use ∈ here I'm not sure
-    make_pair(L"incaseless", INCL),
-    make_pair(L"∈caseless", INCL), // but the documentation implies they exist
-    make_pair(L"infold", INCL),
-    make_pair(L"∈fold", INCL), // so here they are
-    make_pair(L"infoldcase", INCL),
-    make_pair(L"∈foldcase", INCL)
+   * @return bytecode for corresponding operation or '\0' if not found
+   */
+  UChar lookupOperator(UString op);
+
+  const vector<pair<UString, UChar>> OPERATORS = {
+    make_pair("and"_u, AND),
+    make_pair("&"_u, AND),
+
+    make_pair("or"_u, OR),
+    make_pair("|"_u, OR),
+
+    make_pair("not"_u, NOT),
+    make_pair("~"_u, NOT),
+    make_pair("⌐"_u, NOT),
+
+    make_pair("equal"_u, EQUAL),
+    make_pair("="_u, EQUAL),
+
+    make_pair("isprefix"_u, ISPREFIX),
+    make_pair("startswith"_u, ISPREFIX),
+    make_pair("beginswith"_u, ISPREFIX),
+
+    make_pair("issuffix"_u, ISSUFFIX),
+    make_pair("endswith"_u, ISSUFFIX),
+
+    make_pair("issubstring"_u, ISSUBSTRING),
+    make_pair("contains"_u, ISSUBSTRING),
+
+    make_pair("equalcl"_u, EQUALCL),
+    make_pair("equalcaseless"_u, EQUALCL),
+    make_pair("equalfold"_u, EQUALCL),
+    make_pair("equalfoldcase"_u, EQUALCL),
+
+    make_pair("isprefixcl"_u, ISPREFIXCL),
+    make_pair("startswithcl"_u, ISPREFIXCL),
+    make_pair("beginswithcl"_u, ISPREFIXCL),
+    make_pair("isprefixcaseless"_u, ISPREFIXCL),
+    make_pair("startswithcaseless"_u, ISPREFIXCL),
+    make_pair("beginswithcaseless"_u, ISPREFIXCL),
+    make_pair("isprefixfold"_u, ISPREFIXCL),
+    make_pair("startswithfold"_u, ISPREFIXCL),
+    make_pair("beginswithfold"_u, ISPREFIXCL),
+    make_pair("isprefixfoldcase"_u, ISPREFIXCL),
+    make_pair("startswithfoldcase"_u, ISPREFIXCL),
+    make_pair("beginswithfoldcase"_u, ISPREFIXCL),
+
+    make_pair("issuffixcl"_u, ISSUFFIXCL),
+    make_pair("endswithcl"_u, ISSUFFIXCL),
+    make_pair("issuffixcaseless"_u, ISSUFFIXCL),
+    make_pair("endswithcaseless"_u, ISSUFFIXCL),
+    make_pair("issuffixfold"_u, ISSUFFIXCL),
+    make_pair("endswithfold"_u, ISSUFFIXCL),
+    make_pair("issuffixfoldcase"_u, ISSUFFIXCL),
+    make_pair("endswithfoldcase"_u, ISSUFFIXCL),
+
+    make_pair("issubstringcl"_u, ISSUBSTRINGCL),
+    make_pair("issubstringcaseless"_u, ISSUBSTRINGCL),
+    make_pair("issubstringfold"_u, ISSUBSTRINGCL),
+    make_pair("issubstringfoldcase"_u, ISSUBSTRINGCL),
+
+    make_pair("hasprefix"_u, HASPREFIX),
+    make_pair("startswithlist"_u, HASPREFIX),
+    make_pair("beginswithlist"_u, HASPREFIX),
+
+    make_pair("hassuffix"_u, HASSUFFIX),
+    make_pair("endswithlist"_u, HASSUFFIX),
+
+    make_pair("in"_u, IN),
+    make_pair("∈"_u, IN),
+
+    make_pair("hasprefixcl"_u, HASPREFIXCL),
+    make_pair("startswithlistcl"_u, HASPREFIXCL),
+    make_pair("beginswithlistcl"_u, HASPREFIXCL),
+    make_pair("hasprefixcaseless"_u, HASPREFIXCL),
+    make_pair("startswithlistcaseless"_u, HASPREFIXCL),
+    make_pair("beginswithlistcaseless"_u, HASPREFIXCL),
+    make_pair("hasprefixfold"_u, HASPREFIXCL),
+    make_pair("startswithlistfold"_u, HASPREFIXCL),
+    make_pair("beginswithlistfold"_u, HASPREFIXCL),
+    make_pair("hasprefixfoldcase"_u, HASPREFIXCL),
+    make_pair("startswithlistfoldcase"_u, HASPREFIXCL),
+    make_pair("beginswithlistfoldcase"_u, HASPREFIXCL),
+
+    make_pair("hassuffixcl"_u, HASSUFFIXCL),
+    make_pair("endswithlistcl"_u, HASSUFFIXCL),
+    make_pair("hassuffixcaseless"_u, HASSUFFIXCL),
+    make_pair("endswithlistcaseless"_u, HASSUFFIXCL),
+    make_pair("hassuffixfold"_u, HASSUFFIXCL),
+    make_pair("endswithlistfold"_u, HASSUFFIXCL),
+    make_pair("hassuffixfoldcase"_u, HASSUFFIXCL),
+    make_pair("endswithlistfoldcase"_u, HASSUFFIXCL),
+
+    make_pair("incl"_u, INCL),
+    make_pair("∈cl"_u, INCL), // why you would want to use ∈ here I'm not sure
+    make_pair("incaseless"_u, INCL),
+    make_pair("∈caseless"_u, INCL), // but the documentation implies they exist
+    make_pair("infold"_u, INCL),
+    make_pair("∈fold"_u, INCL), // so here they are
+    make_pair("infoldcase"_u, INCL),
+    make_pair("∈foldcase"_u, INCL)
   };
 
   /**
@@ -532,23 +532,23 @@ private:
   /**
    * Parse a tag-order rule
    */
-  void parseOutputRule(wstring pattern);
+  void parseOutputRule(UString pattern);
 
   /**
    * Parse a tag-replacement rule
    * Note: these rules currently have no effect
    */
-  void parseRetagRule(wstring srcTag);
+  void parseRetagRule(UString srcTag);
 
   /**
    * Parse an attribute category
    */
-  void parseAttrRule(wstring name);
+  void parseAttrRule(UString name);
 
   /**
    * Parse a reduction rule and append it to reductionRules
    */
-  void parseReduceRule(wstring firstnode, wstring next);
+  void parseReduceRule(UString firstnode, UString next);
 
   //////////
   // ANALYSIS
@@ -574,14 +574,14 @@ private:
    * @param s - the string
    * @return bytecode
    */
-  wstring compileString(wstring s);
+  UString compileString(UString s);
 
   /**
    * Compiles a string as to a literal tag
    * @param s - the tag
    * @return bytecode
    */
-  wstring compileTag(wstring s);
+  UString compileTag(UString s);
 
   /**
    * Compile a Clip object
@@ -591,12 +591,12 @@ private:
    * @param dest - the destination attribute
    * @return bytecode
    */
-  wstring compileClip(Clip* c, wstring dest);
+  UString compileClip(Clip* c, UString dest);
 
   /**
    * Wrapper around compileClip(Clip*)
    */
-  wstring compileClip(wstring part, int pos, wstring side);
+  UString compileClip(UString part, int pos, UString side);
 
   // TODO
   Clip* processMacroClip(Clip* mac, OutputChunk* arg);
@@ -609,28 +609,28 @@ private:
    * @param ch - the element
    * @return bytecode
    */
-  wstring processOutputChunk(OutputChunk* ch);
+  UString processOutputChunk(OutputChunk* ch);
 
   /**
    * Compile and the output rule for a chunk
    * @param chunk - the chunk
    * @return bytecode
    */
-  wstring processOutput(OutputChunk* chunk);
+  UString processOutput(OutputChunk* chunk);
 
   /**
    * Compile the output rule for an if statement
    * @param chunk - the chunk
    * @return bytecode
    */
-  wstring processOutputChoice(OutputChoice* choice);
+  UString processOutputChoice(OutputChoice* choice);
 
   /**
    * Compile a Cond object
    * @param cond - the conditional
    * @return bytecode
    */
-  wstring processCond(Cond* cond);
+  UString processCond(Cond* cond);
 
   /**
    * Iterate over reductionRules, compiling them
@@ -646,7 +646,7 @@ public:
   {
     summarizing = value;
   }
-  void excludeRule(wstring name)
+  void excludeRule(UString name)
   {
     excluded.insert(name);
   }
diff --git a/src/rtx_decomp.cc b/src/rtx_decomp.cc
index f64b24b..9ecbc21 100644
--- a/src/rtx_decomp.cc
+++ b/src/rtx_decomp.cc
@@ -22,9 +22,9 @@ void endProgram(char *name)
   exit(EXIT_FAILURE);
 }
 
-void writeRule(wstring rule, FILE* out)
+void writeRule(UString rule, FILE* out)
 {
-  wstring line;
+  UString line;
   for(unsigned int i = 0; i < rule.size(); i++)
   {
     line.clear();
@@ -51,7 +51,7 @@ void writeRule(wstring rule, FILE* out)
         {
           fputwc(rule[++i], out);
         }
-        //wstring s = rule.substr(i+1, len);
+        //UString s = rule.substr(i+1, len);
         fwprintf(out, L"\"\n");
       }
         break;
@@ -278,11 +278,11 @@ int main(int argc, char *argv[])
   fwprintf(out, L"Input rules:\n");
   fwprintf(out, L"Longest pattern: %d chunks\nNumber of rules: %d\n\n", longestPattern, count);
   int patlen;
-  wstring cur;
+  UString cur;
   for(int i = 0; i < count; i++)
   {
     patlen = Compression::multibyte_read(in);
-    cur = Compression::wstring_read(in);
+    cur = Compression::string_read(in);
     fwprintf(out, L"Rule %d (%d bytes, pattern %d chunks)\n", i+1, cur.size(), patlen);
     writeRule(cur, out);
   }
@@ -291,7 +291,7 @@ int main(int argc, char *argv[])
   fwprintf(out, L"Output rules:\nNumber of rules: %d\n\n", count);
   for(int i = 0; i < count; i++)
   {
-    cur = Compression::wstring_read(in);
+    cur = Compression::string_read(in);
     fwprintf(out, L"Rule %d (%d bytes)\n", i, cur.size());
     writeRule(cur, out);
   }
diff --git a/src/rtx_proc.cc b/src/rtx_proc.cc
index fb18746..63093cb 100644
--- a/src/rtx_proc.cc
+++ b/src/rtx_proc.cc
@@ -3,6 +3,7 @@
 #include <lttoolbox/lt_locale.h>
 #include <getopt.h>
 #include <libgen.h>
+#include <iostream>
 
 void endProgram(char *name)
 {
@@ -147,23 +148,24 @@ int main(int argc, char *argv[])
 
   p.read(argv[optind]);
 
-  FILE *input = stdin, *output = stdout;
+  FILE *input = stdin;
+  UFILE* output = u_finit(stdout, NULL, NULL);
 
   if(optind <= (argc - 2))
   {
     input = fopen(argv[optind+1], "rb");
     if(input == NULL)
     {
-      wcerr << "Unable to open " << argv[optind+1] << " for reading." << endl;
+      cerr << "Unable to open " << argv[optind+1] << " for reading." << endl;
       exit(EXIT_FAILURE);
     }
   }
   if(optind <= (argc - 3))
   {
-    output = fopen(argv[optind+2], "wb");
+    output = u_fopen(argv[optind+2], "w", NULL, NULL);
     if(input == NULL)
     {
-      wcerr << "Unable to open " << argv[optind+2] << " for writing." << endl;
+      cerr << "Unable to open " << argv[optind+2] << " for writing." << endl;
       exit(EXIT_FAILURE);
     }
   }
@@ -171,6 +173,6 @@ int main(int argc, char *argv[])
   p.process(input, output);
 
   fclose(input);
-  fclose(output);
+  u_fclose(output);
   return EXIT_SUCCESS;
 }
diff --git a/src/rtx_processor.cc b/src/rtx_processor.cc
index f2056a8..65532b4 100644
--- a/src/rtx_processor.cc
+++ b/src/rtx_processor.cc
@@ -1,35 +1,17 @@
 #include <rtx_config.h>
 #include <rtx_processor.h>
 #include <bytecode.h>
-#include <apertium/trx_reader.h>
-#include <apertium/utf_converter.h>
+//#include <apertium/trx_reader.h>
 #include <lttoolbox/compression.h>
 
 #include <iostream>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
 //#include <apertium/unlocked_cstdio.h>
 
-using namespace Apertium;
 using namespace std;
 
 RTXProcessor::RTXProcessor()
 {
-  furtherInput = true;
-  inword = false;
-  inwblank = false;
-  printingSteps = false;
-  printingRules = false;
-  printingBranches = false;
-  printingAll = false;
-  noCoref = true;
-  isLinear = false;
-  null_flush = false;
-  printingTrees = false;
-  printingText = true;
-  treePrintMode = TreeModeNest;
-  newBranchId = 0;
-  noFilter = true;
-  currentBranch = NULL;
 }
 
 RTXProcessor::~RTXProcessor()
@@ -43,7 +25,7 @@ RTXProcessor::read(string const &filename)
   FILE *in = fopen(filename.c_str(), "rb");
   if(in == NULL)
   {
-    wcerr << "Unable to open file " << filename.c_str() << endl;
+    cerr << "Unable to open file " << filename.c_str() << endl;
     exit(EXIT_FAILURE);
   }
 
@@ -54,13 +36,13 @@ RTXProcessor::read(string const &filename)
   for(int i = 0; i < count; i++)
   {
     pat_size.push_back(Compression::multibyte_read(in));
-    rule_map.push_back(Compression::wstring_read(in));
+    rule_map.push_back(Compression::string_read(in));
   }
   count = Compression::multibyte_read(in);
   output_rules.reserve(count);
   for(int i = 0; i < count; i++)
   {
-    output_rules.push_back(Compression::wstring_read(in));
+    output_rules.push_back(Compression::string_read(in));
   }
 
   varCount = Compression::multibyte_read(in);
@@ -88,32 +70,35 @@ RTXProcessor::read(string const &filename)
   delete t;
 
   // attr_items
-  bool recompile_attrs = Compression::string_read(in) != string(pcre_version());
+  bool recompile_attrs = !Compression::string_read(in).empty();
   for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
   {
-    wstring const cad_k = Compression::wstring_read(in);
+    UString const cad_k = Compression::string_read(in);
     attr_items[cad_k].read(in);
-    wstring fallback = Compression::wstring_read(in);
-    if(recompile_attrs) {
-      attr_items[cad_k].compile(UtfConverter::toUtf8(fallback));
+    UString fallback = Compression::string_read(in);
+    if (recompile_attrs && cad_k == "chname"_u) {
+      // chname was previously "({([^/]+)\\/)"
+      // which was fine for PCRE, but ICU chokes on the unmatched bracket
+      fallback = "(\\{([^/]+)\\/)"_u;
     }
+    attr_items[cad_k].compile(fallback);
   }
 
   // variables
   for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
   {
-    wstring const cad_k = Compression::wstring_read(in);
-    variables[cad_k] = Compression::wstring_read(in);
+    UString const cad_k = Compression::string_read(in);
+    variables[cad_k] = Compression::string_read(in);
   }
 
   // lists
   for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
   {
-    wstring const cad_k = Compression::wstring_read(in);
+    UString const cad_k = Compression::string_read(in);
 
     for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
     {
-      wstring const cad_v = Compression::wstring_read(in);
+      UString const cad_v = Compression::string_read(in);
       lists[cad_k].insert(cad_v);
       listslow[cad_k].insert(StringUtils::tolower(cad_v));
     }
@@ -122,19 +107,19 @@ RTXProcessor::read(string const &filename)
   int nameCount = Compression::multibyte_read(in);
   for(int i = 0; i < nameCount; i++)
   {
-    inRuleNames.push_back(Compression::wstring_read(in));
+    inRuleNames.push_back(Compression::string_read(in));
   }
   nameCount = Compression::multibyte_read(in);
   for(int i = 0; i < nameCount; i++)
   {
-    outRuleNames.push_back(Compression::wstring_read(in));
+    outRuleNames.push_back(Compression::string_read(in));
   }
 
   fclose(in);
 }
 
 bool
-RTXProcessor::beginsWith(wstring const &s1, wstring const &s2) const
+RTXProcessor::beginsWith(UString const &s1, UString const &s2) const
 {
   int const limit = s2.size(), constraint = s1.size();
 
@@ -154,7 +139,7 @@ RTXProcessor::beginsWith(wstring const &s1, wstring const &s2) const
 }
 
 bool
-RTXProcessor::endsWith(wstring const &s1, wstring const &s2) const
+RTXProcessor::endsWith(UString const &s1, UString const &s2) const
 {
   int const limit = s2.size(), constraint = s1.size();
 
@@ -173,43 +158,6 @@ RTXProcessor::endsWith(wstring const &s1, wstring const &s2) const
   return true;
 }
 
-wstring
-RTXProcessor::copycase(wstring const &source_word, wstring const &target_word)
-{
-  wstring result;
-
-  bool firstupper = iswupper(source_word[0]);
-  bool uppercase = firstupper && iswupper(source_word[source_word.size()-1]);
-  bool sizeone = source_word.size() == 1;
-
-  if(!uppercase || (sizeone && uppercase))
-  {
-    if(isLinear)
-    {
-      result = target_word;
-      result[0] = towlower(result[0]);
-    }
-    else result = StringUtils::tolower(target_word);
-  }
-  else
-  {
-    result = StringUtils::toupper(target_word);
-  }
-
-  if(firstupper)
-  {
-    result[0] = towupper(result[0]);
-  }
-
-  return result;
-}
-
-wstring
-RTXProcessor::caseOf(wstring const &s)
-{
-  return copycase(s, wstring(L"aa"));
-}
-
 inline bool
 RTXProcessor::popBool()
 {
@@ -219,7 +167,7 @@ RTXProcessor::popBool()
   }
   else
   {
-    wcerr << "tried to pop bool but mode is " << theStack[stackIdx].mode << endl;
+    cerr << "tried to pop bool but mode is " << theStack[stackIdx].mode << endl;
     exit(1);
   }
 }
@@ -233,12 +181,12 @@ RTXProcessor::popInt()
   }
   else
   {
-    wcerr << "tried to pop int but mode is " << theStack[stackIdx].mode << endl;
+    cerr << "tried to pop int but mode is " << theStack[stackIdx].mode << endl;
     exit(1);
   }
 }
 
-inline wstring
+inline UString
 RTXProcessor::popString()
 {
   if(theStack[stackIdx].mode == 2)
@@ -251,13 +199,13 @@ RTXProcessor::popString()
   }
   else
   {
-    wcerr << "tried to pop wstring but mode is " << theStack[stackIdx].mode << endl;
+    cerr << "tried to pop UString but mode is " << theStack[stackIdx].mode << endl;
     exit(1);
   }
 }
 
 inline void
-RTXProcessor::popString(wstring& dest)
+RTXProcessor::popString(UString& dest)
 {
   if(theStack[stackIdx].mode == 2)
   {
@@ -269,7 +217,7 @@ RTXProcessor::popString(wstring& dest)
   }
   else
   {
-    wcerr << "tried to pop wstring but mode is " << theStack[stackIdx].mode << endl;
+    cerr << "tried to pop UString but mode is " << theStack[stackIdx].mode << endl;
     exit(1);
   }
 }
@@ -283,8 +231,8 @@ RTXProcessor::popChunk()
   }
   else
   {
-    wcerr << "tried to pop Chunk but mode is " << theStack[stackIdx].mode << endl;
-    wcerr << "The most common reason for getting this error is a macro that is missing an else clause." << endl;
+    cerr << "tried to pop Chunk but mode is " << theStack[stackIdx].mode << endl;
+    cerr << "The most common reason for getting this error is a macro that is missing an else clause." << endl;
     exit(1);
   }
 }
@@ -312,43 +260,43 @@ RTXProcessor::stackCopy(int src, int dest)
       theWblankStack[dest] = theWblankStack[src];
       break;
     default:
-      wcerr << "Unknown StackElement mode " << theStack[src].mode;
+      cerr << "Unknown StackElement mode " << theStack[src].mode;
       break;
   }
 }
 
 bool
-RTXProcessor::gettingLemmaFromWord(wstring attr)
+RTXProcessor::gettingLemmaFromWord(UString attr)
 {
-    return (attr.compare(L"lem") == 0 || attr.compare(L"lemh") == 0 || attr.compare(L"whole") == 0);
+    return (attr.compare("lem"_u) == 0 || attr.compare("lemh"_u) == 0 || attr.compare("whole"_u) == 0);
 }
 
 bool
-RTXProcessor::applyRule(const wstring& rule)
+RTXProcessor::applyRule(const UString& rule)
 {
   stackIdx = 0;
   vector<bool> editted = vector<bool>(currentInput.size(), false);
-  const wchar_t* rule_data = rule.data();
+  const UChar* rule_data = rule.data();
   for(unsigned int i = 0, rule_size = rule.size(); i < rule_size; i++)
   {
     switch(rule_data[i])
     {
       case DROP:
-        if(printingSteps) { wcerr << "drop" << endl; }
+        if(printingSteps) { cerr << "drop" << endl; }
         stackIdx--;
         break;
       case DUP:
-        if(printingSteps) { wcerr << "dup" << endl; }
+        if(printingSteps) { cerr << "dup" << endl; }
         stackCopy(stackIdx, stackIdx+1);
         stackIdx++;
         break;
       case OVER:
-        if(printingSteps) { wcerr << "over" << endl; }
+        if(printingSteps) { cerr << "over" << endl; }
         stackCopy(stackIdx-1, stackIdx+1);
         stackIdx++;
         break;
       case SWAP:
-        if(printingSteps) { wcerr << "swap" << endl; }
+        if(printingSteps) { cerr << "swap" << endl; }
       {
         stackCopy(stackIdx, stackIdx+1);
         stackCopy(stackIdx-1, stackIdx);
@@ -357,67 +305,67 @@ RTXProcessor::applyRule(const wstring& rule)
         break;
       case STRING:
       {
-        if(printingSteps) { wcerr << "string" << endl; }
+        if(printingSteps) { cerr << "string" << endl; }
         int ct = rule_data[++i];
         stackIdx++;
         theStack[stackIdx].mode = 2;
         theStack[stackIdx].s.assign(rule, i+1, ct);
         //pushStack(rule.substr(i+1, ct));
         i += ct;
-        if(printingSteps) { wcerr << " -> " << theStack[stackIdx].s << endl; }
+        if(printingSteps) { cerr << " -> " << theStack[stackIdx].s << endl; }
       }
         break;
       case INT:
-        if(printingSteps) { wcerr << "int " << (int)rule[i+1] << endl; }
+        if(printingSteps) { cerr << "int " << (int)rule[i+1] << endl; }
         pushStack((int)rule_data[++i]);
         break;
       case PUSHFALSE:
-        if(printingSteps) { wcerr << "pushfalse" << endl; }
+        if(printingSteps) { cerr << "pushfalse" << endl; }
         pushStack(false);
         break;
       case PUSHTRUE:
-        if(printingSteps) { wcerr << "pushtrue" << endl; }
+        if(printingSteps) { cerr << "pushtrue" << endl; }
         pushStack(true);
         break;
       case PUSHNULL:
-        if(printingSteps) { wcerr << "pushnull" << endl; }
+        if(printingSteps) { cerr << "pushnull" << endl; }
         pushStack((Chunk*)NULL);
         break;
       case JUMP:
-        if(printingSteps) { wcerr << "jump" << endl; }
+        if(printingSteps) { cerr << "jump" << endl; }
         ++i;
         i += rule_data[i];
         break;
       case JUMPONTRUE:
-        if(printingSteps) { wcerr << "jumpontrue" << endl; }
+        if(printingSteps) { cerr << "jumpontrue" << endl; }
         if(!popBool())
         {
           i++;
-          if(printingSteps) { wcerr << " -> false" << endl; }
+          if(printingSteps) { cerr << " -> false" << endl; }
         }
         else
         {
           ++i;
           i += rule_data[i];
-          if(printingSteps) { wcerr << " -> true, jumping" << endl; }
+          if(printingSteps) { cerr << " -> true, jumping" << endl; }
         }
         break;
       case JUMPONFALSE:
-        if(printingSteps) { wcerr << "jumponfalse" << endl; }
+        if(printingSteps) { cerr << "jumponfalse" << endl; }
         if(popBool())
         {
           i++;
-          if(printingSteps) { wcerr << " -> true" << endl; }
+          if(printingSteps) { cerr << " -> true" << endl; }
         }
         else
         {
           ++i;
           i += rule_data[i];
-          if(printingSteps) { wcerr << " -> false, jumping" << endl; }
+          if(printingSteps) { cerr << " -> false, jumping" << endl; }
         }
         break;
       case AND:
-        if(printingSteps) { wcerr << "and" << endl; }
+        if(printingSteps) { cerr << "and" << endl; }
       {
         bool a = popBool();
         bool b = popBool();
@@ -425,7 +373,7 @@ RTXProcessor::applyRule(const wstring& rule)
       }
         break;
       case OR:
-        if(printingSteps) { wcerr << "or" << endl; }
+        if(printingSteps) { cerr << "or" << endl; }
       {
         bool a = popBool();
         bool b = popBool();
@@ -433,16 +381,16 @@ RTXProcessor::applyRule(const wstring& rule)
       }
         break;
       case NOT:
-        if(printingSteps) { wcerr << "not" << endl; }
+        if(printingSteps) { cerr << "not" << endl; }
         theStack[stackIdx].b = !theStack[stackIdx].b;
         break;
       case EQUAL:
       case EQUALCL:
-        if(printingSteps) { wcerr << "equal" << endl; }
+        if(printingSteps) { cerr << "equal" << endl; }
       {
-        wstring a;
+        UString a;
         popString(a);
-        wstring b;
+        UString b;
         popString(b);
         if(rule_data[i] == EQUALCL)
         {
@@ -450,15 +398,15 @@ RTXProcessor::applyRule(const wstring& rule)
           b = StringUtils::tolower(b);
         }
         pushStack(a == b);
-        if(printingSteps) { wcerr << " -> " << (a == b ? "true" : "false") << endl; }
+        if(printingSteps) { cerr << " -> " << (a == b ? "true" : "false") << endl; }
       }
         break;
       case ISPREFIX:
       case ISPREFIXCL:
-        if(printingSteps) { wcerr << "isprefix" << endl; }
+        if(printingSteps) { cerr << "isprefix" << endl; }
       {
-        wstring substr = popString();
-        wstring str = popString();
+        UString substr = popString();
+        UString str = popString();
         if(rule[i] == ISPREFIXCL)
         {
           pushStack(beginsWith(StringUtils::tolower(str), StringUtils::tolower(substr)));
@@ -471,10 +419,10 @@ RTXProcessor::applyRule(const wstring& rule)
         break;
       case ISSUFFIX:
       case ISSUFFIXCL:
-        if(printingSteps) { wcerr << "issuffix" << endl; }
+        if(printingSteps) { cerr << "issuffix" << endl; }
       {
-        wstring substr = popString();
-        wstring str = popString();
+        UString substr = popString();
+        UString str = popString();
         if(rule[i] == ISSUFFIXCL)
         {
           pushStack(endsWith(StringUtils::tolower(str), StringUtils::tolower(substr)));
@@ -487,11 +435,11 @@ RTXProcessor::applyRule(const wstring& rule)
         break;
       case HASPREFIX:
       case HASPREFIXCL:
-        if(printingSteps) { wcerr << "hasprefix" << endl; }
+        if(printingSteps) { cerr << "hasprefix" << endl; }
       {
-        wstring list = popString();
-        wstring needle = popString();
-        set<wstring, Ltstr>::iterator it, limit;
+        UString list = popString();
+        UString needle = popString();
+        set<UString>::iterator it, limit;
 
         if(rule[i] == HASPREFIX)
         {
@@ -519,11 +467,11 @@ RTXProcessor::applyRule(const wstring& rule)
         break;
       case HASSUFFIX:
       case HASSUFFIXCL:
-        if(printingSteps) { wcerr << "hassuffix" << endl; }
+        if(printingSteps) { cerr << "hassuffix" << endl; }
       {
-        wstring list = popString();
-        wstring needle = popString();
-        set<wstring, Ltstr>::iterator it, limit;
+        UString list = popString();
+        UString needle = popString();
+        set<UString>::iterator it, limit;
 
         if(rule[i] == HASSUFFIX)
         {
@@ -551,50 +499,50 @@ RTXProcessor::applyRule(const wstring& rule)
         break;
       case ISSUBSTRING:
       case ISSUBSTRINGCL:
-        if(printingSteps) { wcerr << "issubstring" << endl; }
+        if(printingSteps) { cerr << "issubstring" << endl; }
       {
-        wstring needle = popString();
-        wstring haystack = popString();
+        UString needle = popString();
+        UString haystack = popString();
         if(rule[i] == ISSUBSTRINGCL)
         {
           needle = StringUtils::tolower(needle);
           haystack = StringUtils::tolower(haystack);
         }
-        pushStack(haystack.find(needle) != wstring::npos);
+        pushStack(haystack.find(needle) != UString::npos);
       }
         break;
       case IN:
       case INCL:
-        if(printingSteps) { wcerr << "in" << endl; }
+        if(printingSteps) { cerr << "in" << endl; }
       {
-        wstring list = popString();
-        wstring str = popString();
+        UString list = popString();
+        UString str = popString();
         if(rule[i] == INCL)
         {
           str = StringUtils::tolower(str);
-          set<wstring, Ltstr> &myset = listslow[list];
+          set<UString> &myset = listslow[list];
           pushStack(myset.find(str) != myset.end());
         }
         else
         {
-          set<wstring, Ltstr> &myset = lists[list];
+          set<UString> &myset = lists[list];
           pushStack(myset.find(str) != myset.end());
         }
       }
         break;
       case SETVAR:
-        if(printingSteps) { wcerr << "setvar" << endl; }
+        if(printingSteps) { cerr << "setvar" << endl; }
       {
-        wstring var = popString();
-        wstring val = popString();
+        UString var = popString();
+        UString val = popString();
         currentBranch->stringVars[var] = val;
         currentBranch->wblankVars[var] = theWblankStack[stackIdx+1];
         theWblankStack[stackIdx+1].clear();
-        if(printingSteps) { wcerr << " -> " << var << " = '" << val << "'" << endl; }
+        if(printingSteps) { cerr << " -> " << var << " = '" << val << "'" << endl; }
       }
         break;
       case OUTPUT:
-        if(printingSteps) { wcerr << "output" << endl; }
+        if(printingSteps) { cerr << "output" << endl; }
       {
         Chunk* ch = popChunk();
         if(ch == NULL) break; // FETCHCHUNK
@@ -602,14 +550,14 @@ RTXProcessor::applyRule(const wstring& rule)
         {
           bool word = true;
           unsigned int last = 0;
-          const wchar_t* targ = ch->target.data();
+          const UChar* targ = ch->target.data();
           bool chunk = false;
           for(unsigned int c = 0, limit = ch->target.size(); c < limit; c++)
           {
-            if(targ[c] == L'\\') c++;
-            else if((targ[c] == L'{' || targ[c] == L'$') && word)
+            if(targ[c] == '\\') c++;
+            else if((targ[c] == '{' || targ[c] == '$') && word)
             {
-              if(targ[c] == L'{') chunk = true;
+              if(targ[c] == '{') chunk = true;
               Chunk* temp = chunkPool.next();
               temp->isBlank = false;
               temp->target = ch->target.substr(last, c-last);
@@ -620,7 +568,7 @@ RTXProcessor::applyRule(const wstring& rule)
               last = c+1;
               word = false;
             }
-            else if((targ[c] == L'^' || targ[c] == L'}') && !word)
+            else if((targ[c] == '^' || targ[c] == '}') && !word)
             {
               if(c > last)
               {
@@ -630,7 +578,7 @@ RTXProcessor::applyRule(const wstring& rule)
                 if(chunk) currentOutput.back()->contents.push_back(temp);
                 else currentOutput.push_back(temp);
               }
-              if(targ[c] == L'}') chunk = false;
+              if(targ[c] == '}') chunk = false;
               last = c+1;
               word = true;
             }
@@ -656,12 +604,12 @@ RTXProcessor::applyRule(const wstring& rule)
       }
         break;
       case OUTPUTALL:
-        if(printingSteps) { wcerr << "outputall" << endl; }
+        if(printingSteps) { cerr << "outputall" << endl; }
         currentOutput = currentInput;
         return true;
         break;
       case PUSHINPUT:
-        if(printingSteps) { wcerr << "pushinput" << endl; }
+        if(printingSteps) { cerr << "pushinput" << endl; }
       {
         int loc = popInt();
         int pos = 2*(loc-1);
@@ -682,7 +630,7 @@ RTXProcessor::applyRule(const wstring& rule)
           }
           if(ch == NULL)
           {
-            //wcerr << L"Clip index is out of bounds." << endl;
+            //cerr << "Clip index is out of bounds." << endl;
             //exit(EXIT_FAILURE);
             ch = currentInput.back();
           }
@@ -691,12 +639,12 @@ RTXProcessor::applyRule(const wstring& rule)
       }
         break;
       case SOURCECLIP:
-        if(printingSteps) { wcerr << "sourceclip" << endl; }
+        if(printingSteps) { cerr << "sourceclip" << endl; }
       {
-        wstring part;
+        UString part;
         popString(part);
         Chunk* ch = popChunk();
-        if(ch == NULL) pushStack(L"");
+        if(ch == NULL) pushStack("");
         else
         {
           if(gettingLemmaFromWord(part))
@@ -708,16 +656,16 @@ RTXProcessor::applyRule(const wstring& rule)
             pushStack(ch->chunkPart(attr_items[part], SourceClip));
           }
         }
-        if(printingSteps) { wcerr << " -> " << theStack[stackIdx].s << endl; }
+        if(printingSteps) { cerr << " -> " << theStack[stackIdx].s << endl; }
       }
         break;
       case TARGETCLIP:
-        if(printingSteps) { wcerr << "targetclip" << endl; }
+        if(printingSteps) { cerr << "targetclip" << endl; }
       {
-        wstring part;
+        UString part;
         popString(part);
         Chunk* ch = popChunk();
-        if(ch == NULL) pushStack(L"");
+        if(ch == NULL) pushStack("");
         else
         {
           if(gettingLemmaFromWord(part))
@@ -729,25 +677,25 @@ RTXProcessor::applyRule(const wstring& rule)
              pushStack(ch->chunkPart(attr_items[part], TargetClip));
           }
         }
-        if(printingSteps) { wcerr << " -> " << theStack[stackIdx].s << endl; }
+        if(printingSteps) { cerr << " -> " << theStack[stackIdx].s << endl; }
       }
         break;
       case REFERENCECLIP:
-        if(printingSteps) { wcerr << "referenceclip" << endl; }
+        if(printingSteps) { cerr << "referenceclip" << endl; }
       {
-        wstring part;
+        UString part;
         popString(part);
         Chunk* ch = popChunk();
-        if(ch == NULL) pushStack(L"");
+        if(ch == NULL) pushStack("");
         else pushStack(ch->chunkPart(attr_items[part], ReferenceClip));
-        if(printingSteps) { wcerr << " -> " << theStack[stackIdx].s << endl; }
+        if(printingSteps) { cerr << " -> " << theStack[stackIdx].s << endl; }
       }
         break;
       case SETCLIP:
-        if(printingSteps) { wcerr << "setclip" << endl; }
+        if(printingSteps) { cerr << "setclip" << endl; }
       {
         int pos = 2*(popInt()-1);
-        wstring part = popString();
+        UString part = popString();
         if(pos >= 0)
         {
           if(!editted[pos])
@@ -756,7 +704,7 @@ RTXProcessor::applyRule(const wstring& rule)
             editted[pos] = true;
           }
           currentInput[pos]->setChunkPart(attr_items[part], popString());
-          if(printingSteps) { wcerr << " -> " << currentInput[pos]->target << endl; }
+          if(printingSteps) { cerr << " -> " << currentInput[pos]->target << endl; }
         }
         else
         {
@@ -765,46 +713,46 @@ RTXProcessor::applyRule(const wstring& rule)
       }
         break;
       case FETCHVAR:
-        if(printingSteps) { wcerr << "fetchvar" << endl; }
+        if(printingSteps) { cerr << "fetchvar" << endl; }
         {
-          wstring name = popString();
-          wstring val = currentBranch->stringVars[name];
-          wstring wblank_val = currentBranch->wblankVars[name];
+          UString name = popString();
+          UString val = currentBranch->stringVars[name];
+          UString wblank_val = currentBranch->wblankVars[name];
           pushStack(val, wblank_val);
-          if(printingSteps) { wcerr << " -> " << name << " = " << val << endl; }
+          if(printingSteps) { cerr << " -> " << name << " = " << val << endl; }
         }
         break;
       case FETCHCHUNK:
-        if(printingSteps) { wcerr << "fetchchunk" << endl; }
+        if(printingSteps) { cerr << "fetchchunk" << endl; }
         pushStack(currentBranch->chunkVars[popInt()]);
         break;
       case SETCHUNK:
-        if(printingSteps) { wcerr << "setchunk" << endl; }
+        if(printingSteps) { cerr << "setchunk" << endl; }
         {
           int pos = popInt();
           currentBranch->chunkVars[pos] = popChunk();
         }
         break;
       case GETCASE:
-        if(printingSteps) { wcerr << "getcase" << endl; }
-        pushStack(caseOf(popString()));
-        if(printingSteps) { wcerr << " -> " << theStack[stackIdx].s << endl; }
+        if(printingSteps) { cerr << "getcase" << endl; }
+        pushStack(StringUtils::getcase(popString()));
+        if(printingSteps) { cerr << " -> " << theStack[stackIdx].s << endl; }
         break;
       case SETCASE:
-        if(printingSteps) { wcerr << "setcase" << endl; }
+        if(printingSteps) { cerr << "setcase" << endl; }
       {
-        wstring src = popString();
-        wstring dest = popString();
-        pushStack(copycase(src, dest));
+        UString src = popString();
+        UString dest = popString();
+        pushStack(StringUtils::copycase(src, dest));
       }
-        if(printingSteps) { wcerr << " -> " << theStack[stackIdx].s << endl; }
+        if(printingSteps) { cerr << " -> " << theStack[stackIdx].s << endl; }
         break;
       case CONCAT:
-        if(printingSteps) { wcerr << "concat" << endl; }
+        if(printingSteps) { cerr << "concat" << endl; }
       {
         if(theStack[stackIdx].mode != 2 || theStack[stackIdx-1].mode != 2)
         {
-          wcerr << L"Cannot CONCAT non-strings." << endl;
+          cerr << "Cannot CONCAT non-strings." << endl;
           exit(EXIT_FAILURE);
         }
         stackIdx--;
@@ -812,7 +760,7 @@ RTXProcessor::applyRule(const wstring& rule)
       }
         break;
       case CHUNK:
-        if(printingSteps) { wcerr << "chunk" << endl; }
+        if(printingSteps) { cerr << "chunk" << endl; }
       {
         Chunk* ch = chunkPool.next();
         ch->isBlank = false;
@@ -820,15 +768,15 @@ RTXProcessor::applyRule(const wstring& rule)
       }
         break;
       case APPENDCHILD:
-        if(printingSteps) { wcerr << "appendchild" << endl; }
+        if(printingSteps) { cerr << "appendchild" << endl; }
       {
         Chunk* kid = popChunk();
-        if(isLinear && kid->target[0] == L'^')
+        if(isLinear && kid->target[0] == '^')
         {
           unsigned int j = 0;
           for(; j < kid->target.size(); j++)
           {
-            if(kid->target[j] == L'$') break;
+            if(kid->target[j] == '$') break;
           }
           Chunk* ch = chunkPool.next();
           ch->isBlank = false;
@@ -847,21 +795,21 @@ RTXProcessor::applyRule(const wstring& rule)
           out_wblank.clear();
           theStack[stackIdx].c->contents.push_back(kid);
         }
-        if(printingSteps) { wcerr << " -> child with surface '" << kid->target << L"' appended" << endl; }
+        if(printingSteps) { cerr << " -> child with surface '" << kid->target << "' appended" << endl; }
       }
         break;
       case APPENDSURFACE:
-        if(printingSteps) { wcerr << "appendsurface" << endl; }
+        if(printingSteps) { cerr << "appendsurface" << endl; }
       {
         if(theStack[stackIdx].mode != 2 && theStack[stackIdx].mode != 3)
         {
-          wcerr << L"Cannot append non-string to chunk surface." << endl;
+          cerr << "Cannot append non-string to chunk surface." << endl;
           exit(EXIT_FAILURE);
         }
         stackIdx--;
         if(theStack[stackIdx].mode != 3)
         {
-          wcerr << L"Cannot APPENDSURFACE to non-chunk." << endl;
+          cerr << "Cannot APPENDSURFACE to non-chunk." << endl;
           exit(EXIT_FAILURE);
         }
         if(theStack[stackIdx+1].mode == 2)
@@ -875,21 +823,21 @@ RTXProcessor::applyRule(const wstring& rule)
           theStack[stackIdx].c->target += theStack[stackIdx+1].c->target;
           theStack[stackIdx].c->wblank += theStack[stackIdx+1].c->wblank;
         }
-        if(printingSteps) { wcerr << " -> " << theStack[stackIdx+1].s << endl; }
+        if(printingSteps) { cerr << " -> " << theStack[stackIdx+1].s << endl; }
       }
         break;
       case APPENDSURFACESL:
-        if(printingSteps) { wcerr << "appendsurfacesl" << endl; }
+        if(printingSteps) { cerr << "appendsurfacesl" << endl; }
       {
         if(theStack[stackIdx].mode != 2 && theStack[stackIdx].mode != 3)
         {
-          wcerr << L"Cannot append non-string to chunk surface." << endl;
+          cerr << "Cannot append non-string to chunk surface." << endl;
           exit(EXIT_FAILURE);
         }
         stackIdx--;
         if(theStack[stackIdx].mode != 3)
         {
-          wcerr << L"Cannot APPENDSURFACESL to non-chunk." << endl;
+          cerr << "Cannot APPENDSURFACESL to non-chunk." << endl;
           exit(EXIT_FAILURE);
         }
         if(theStack[stackIdx+1].mode == 2)
@@ -903,21 +851,21 @@ RTXProcessor::applyRule(const wstring& rule)
           theStack[stackIdx].c->source += theStack[stackIdx+1].c->source;
           theStack[stackIdx].c->wblank += theStack[stackIdx+1].c->wblank;
         }
-        if(printingSteps) { wcerr << " -> " << theStack[stackIdx+1].s << endl; }
+        if(printingSteps) { cerr << " -> " << theStack[stackIdx+1].s << endl; }
       }
         break;
       case APPENDSURFACEREF:
-        if(printingSteps) { wcerr << "appendsurfaceref" << endl; }
+        if(printingSteps) { cerr << "appendsurfaceref" << endl; }
       {
         if(theStack[stackIdx].mode != 2 && theStack[stackIdx].mode != 3)
         {
-          wcerr << L"Cannot append non-string to chunk surface." << endl;
+          cerr << "Cannot append non-string to chunk surface." << endl;
           exit(EXIT_FAILURE);
         }
         stackIdx--;
         if(theStack[stackIdx].mode != 3)
         {
-          wcerr << L"Cannot APPENDSURFACEREF to non-chunk." << endl;
+          cerr << "Cannot APPENDSURFACEREF to non-chunk." << endl;
           exit(EXIT_FAILURE);
         }
         if(theStack[stackIdx+1].mode == 2)
@@ -928,11 +876,11 @@ RTXProcessor::applyRule(const wstring& rule)
         {
           theStack[stackIdx].c->coref += theStack[stackIdx+1].c->coref;
         }
-        if(printingSteps) { wcerr << " -> " << theStack[stackIdx+1].s << endl; }
+        if(printingSteps) { cerr << " -> " << theStack[stackIdx+1].s << endl; }
       }
         break;
       case APPENDALLCHILDREN:
-        if(printingSteps) { wcerr << "appendallchildren" << endl; }
+        if(printingSteps) { cerr << "appendallchildren" << endl; }
       {
         Chunk* ch = popChunk();
         for(unsigned int k = 0; k < ch->contents.size(); k++)
@@ -942,20 +890,20 @@ RTXProcessor::applyRule(const wstring& rule)
       }
         break;
       case APPENDALLINPUT:
-        if(printingSteps) { wcerr << "appendallinput" << endl; }
+        if(printingSteps) { cerr << "appendallinput" << endl; }
       {
         vector<Chunk*>& vec = theStack[stackIdx].c->contents;
         vec.insert(vec.end(), currentInput.begin(), currentInput.end());
       }
         break;
       case BLANK:
-        if(printingSteps) { wcerr << "blank" << endl; }
+        if(printingSteps) { cerr << "blank" << endl; }
       {
         int loc = 2*(popInt()-1) + 1;
         if(loc == -1)
         {
           Chunk* ch = chunkPool.next();
-          ch->target = L" ";
+          ch->target = " "_u;
           ch->isBlank = true;
           pushStack(ch);
         }
@@ -966,43 +914,43 @@ RTXProcessor::applyRule(const wstring& rule)
       }
         break;
       case CONJOIN:
-        if(printingSteps) { wcerr << "conjoin" << endl; }
+        if(printingSteps) { cerr << "conjoin" << endl; }
       {
         Chunk* join = chunkPool.next();
         join->isBlank = true;
         join->isJoiner = true;
-        join->target = L"+";
+        join->target = "+"_u;
         pushStack(join);
       }
         break;
       case REJECTRULE:
-        if(printingSteps) { wcerr << "rejectrule" << endl; }
+        if(printingSteps) { cerr << "rejectrule" << endl; }
         return false;
         break;
       case DISTAG:
-        if(printingSteps) { wcerr << "distag" << endl; }
+        if(printingSteps) { cerr << "distag" << endl; }
       {
         if(theStack[stackIdx].mode != 2)
         {
-          wcerr << L"Cannot DISTAG non-string." << endl;
+          cerr << "Cannot DISTAG non-string." << endl;
           exit(EXIT_FAILURE);
         }
-        wstring& s = theStack[stackIdx].s;
-        if(s.size() > 0 && s[0] == L'<' && s[s.size()-1] == L'>')
+        UString& s = theStack[stackIdx].s;
+        if(s.size() > 0 && s[0] == '<' && s[s.size()-1] == '>')
         {
-          s = StringUtils::substitute(s.substr(1, s.size()-2), L"><", L".");
+          s = StringUtils::substitute(s.substr(1, s.size()-2), "><"_u, "."_u);
         }
       }
         break;
       case GETRULE:
-        if(printingSteps) { wcerr << "getrule" << endl; }
+        if(printingSteps) { cerr << "getrule" << endl; }
       {
         int pos = 2*(popInt()-1);
         pushStack(currentInput[pos]->rule);
       }
         break;
       case SETRULE:
-        if(printingSteps) { wcerr << "setrule" << endl; }
+        if(printingSteps) { cerr << "setrule" << endl; }
       {
         int pos = 2*(popInt()-1);
         int rl = popInt();
@@ -1010,9 +958,9 @@ RTXProcessor::applyRule(const wstring& rule)
         {
           if(stackIdx == 0 || theStack[stackIdx].mode != 3)
           {
-            wcerr << "Empty stack or top item is not chunk." << endl;
-            wcerr << "Check for conditionals that might not generate output" << endl;
-            wcerr << "and ensure that lists of attributes are complete." << endl;
+            cerr << "Empty stack or top item is not chunk." << endl;
+            cerr << "Check for conditionals that might not generate output" << endl;
+            cerr << "and ensure that lists of attributes are complete." << endl;
             exit(1);
           }
           theStack[stackIdx].c->rule = rl;
@@ -1024,11 +972,11 @@ RTXProcessor::applyRule(const wstring& rule)
       }
         break;
       case LUCOUNT:
-        if(printingSteps) { wcerr << "lucount" << endl; }
-        pushStack(to_wstring((currentInput.size() + 1) / 2));
+        if(printingSteps) { cerr << "lucount" << endl; }
+        pushStack(StringUtils::itoa((currentInput.size() + 1) / 2));
         break;
       default:
-        wcerr << "unknown instruction: " << rule[i] << endl;
+        cerr << "unknown instruction: " << rule[i] << endl;
         exit(1);
     }
   }
@@ -1036,37 +984,35 @@ RTXProcessor::applyRule(const wstring& rule)
 }
 
 Chunk *
-RTXProcessor::readToken(FILE *in)
+RTXProcessor::readToken()
 {
   int pos = 0;
-  wstring cur;
-  wstring wbl;
-  wstring src;
-  wstring dest;
-  wstring coref;
+  UString cur;
+  UString wbl;
+  UString src;
+  UString dest;
+  UString coref;
   cur.reserve(256);
-  bool inSquare = false;
   while(true)
   {
-    int val = fgetwc_unlocked(in);
-    if(feof(in) || (null_flush && val == 0))
-    {
+    UChar32 val = infile.get();
+    if (infile.eof() || (null_flush && val == '\0')) {
       furtherInput = false;
       Chunk* ret = chunkPool.next();
       ret->target = cur;
       ret->isBlank = true;
       return ret;
     }
-    else if(val == L'\\')
+    else if(val == '\\')
     {
-      cur += L'\\';
-      cur += wchar_t(fgetwc_unlocked(in));
+      cur += '\\';
+      cur += infile.get();
     }
-    else if(val == L'[' && !inword)
+    else if(val == '[' && !inword)
     {
-      val = fgetwc_unlocked(in);
+      val = infile.get();
       
-      if(val == L'[')
+      if(val == '[')
       {
         inwblank = true;
         Chunk* ret = chunkPool.next();
@@ -1076,64 +1022,42 @@ RTXProcessor::readToken(FILE *in)
       }
       else
       {
-        cur += L'[';
-        inSquare = true;
-        
-        if(val == L'\\')
-        {
-          cur += L'\\';
-          cur += static_cast<wchar_t>(fgetwc_unlocked(in));
-        }
-        else
-        {
-          cur += val;
-          if(val == L']')
-          {
-            inSquare = false;
-          }
-        }
-      }
-    }
-    else if(inSquare)
-    {
-      cur += val;
-      if(val == L']')
-      {
-        inSquare = false;
+        infile.unget(val);
+        cur += infile.readBlock('[', ']');
       }
     }
     else if(inwblank)
     {
-      if(val == L']')
+      if(val == ']')
       {
         cur += val;
-        val = fgetwc_unlocked(in);
+        val = infile.get();
         
-        if(val == L'\\')
+        if(val == '\\')
         {
-          cur += L'\\';
-          cur += static_cast<wchar_t>(fgetwc_unlocked(in));
+          cur += '\\';
+          cur += infile.get();
         }
-        else if(val == L']')
+        else if(val == ']')
         {
           cur += val;
-          val = fgetwc_unlocked(in);
+          val = infile.get();
           
-          if(val == L'\\')
+          if(val == '\\')
           {
-            cur += L'\\';
-            cur += static_cast<wchar_t>(fgetwc_unlocked(in));
+            cur += '\\';
+            cur += infile.get();
           }
-          else if(val == L'^')
+          else if(val == '^')
           {
             inwblank = false;
-            cur = L"[[" + cur;
+            cur = "[["_u + cur;
             wbl.swap(cur);
             inword = true;
           }
           else
           {
-            wcerr << L"Parse Error: Wordbound blank should be immediately followed by a Lexical Unit -> [[..]]^..$" << endl;
+            cerr << "Parse Error: Wordbound blank should be immediately followed by a Lexical Unit -> [[..]]^..$" << endl;
             exit(EXIT_FAILURE);
           }
         }
@@ -1147,7 +1071,7 @@ RTXProcessor::readToken(FILE *in)
         cur += val;
       }
     }
-    else if(inword && (val == L'$' || val == L'/'))
+    else if(inword && (val == '$' || val == '/'))
     {
       if(pos == 0)
       {
@@ -1157,7 +1081,7 @@ RTXProcessor::readToken(FILE *in)
       {
         dest.swap(cur);
       }
-      else if(pos >= 2 && !noCoref && val == L'$')
+      else if(pos >= 2 && !noCoref && val == '$')
       {
         coref.swap(cur);
       }
@@ -1166,7 +1090,7 @@ RTXProcessor::readToken(FILE *in)
         cur.clear();
       }
       pos++;
-      if(val == L'$')
+      if(val == '$')
       {
         inword = false;
         Chunk* ret = chunkPool.next();
@@ -1175,10 +1099,10 @@ RTXProcessor::readToken(FILE *in)
         ret->target = dest;
         ret->coref = coref;
         ret->isBlank = false;
-        if(src.size() > 0 && src[0] == L'*' && dest.size() > 0 && dest[0] == L'*')
+        if(src.size() > 0 && src[0] == '*' && dest.size() > 0 && dest[0] == '*')
         {
           Chunk* ret2 = chunkPool.next();
-          ret2->target = ret->target.substr(1) + L"<UNKNOWN:INTERNAL>";
+          ret2->target = ret->target.substr(1) + "<UNKNOWN:INTERNAL>"_u;
           ret2->contents.push_back(ret);
           ret2->rule = -1;
           ret2->isBlank = false;
@@ -1187,7 +1111,7 @@ RTXProcessor::readToken(FILE *in)
         return ret;
       }
     }
-    else if(!inword && val == L'^')
+    else if(!inword && val == '^')
     {
       inword = true;
       Chunk* ret = chunkPool.next();
@@ -1197,7 +1121,7 @@ RTXProcessor::readToken(FILE *in)
     }
     else
     {
-      cur += wchar_t(val);
+      cur += val;
     }
   }
 }
@@ -1284,7 +1208,7 @@ RTXProcessor::lookahead(ParseNode* node)
 void
 RTXProcessor::checkForReduce(vector<ParseNode*>& result, ParseNode* node)
 {
-  if(printingAll) wcerr << "Checking for reductions for branch " << node->id << endl;
+  if(printingAll) cerr << "Checking for reductions for branch " << node->id << endl;
   mx->resetRejected();
   pair<int, double> rule_and_weight = node->getRule();
   int rule = rule_and_weight.first;
@@ -1299,28 +1223,28 @@ RTXProcessor::checkForReduce(vector<ParseNode*>& result, ParseNode* node)
     node->getChunks(currentInput, len-1);
     currentOutput.clear();
     if(printingRules || printingAll) {
-      if(printingAll && treePrintMode == TreeModeLatex) wcerr << "\\subsection{";
-      else wcerr << endl;
-      wcerr << "Applying rule " << rule;
+      if(printingAll && treePrintMode == TreeModeLatex) cerr << "\\subsection{";
+      else cerr << endl;
+      cerr << "Applying rule " << rule;
       if(rule <= (int)inRuleNames.size())
       {
-        wcerr << " (" << inRuleNames[rule-1] << ")";
+        cerr << " (" << inRuleNames[rule-1] << ")";
       }
-      if(printingAll) wcerr << " to branch " << node->id << " with weight " << rule_and_weight.second;
-      if(printingAll && treePrintMode == TreeModeLatex) wcerr << "}" << endl << endl;
-      else wcerr << ": ";
+      if(printingAll) cerr << " to branch " << node->id << " with weight " << rule_and_weight.second;
+      if(printingAll && treePrintMode == TreeModeLatex) cerr << "}" << endl << endl;
+      else cerr << ": ";
       for(unsigned int i = 0; i < currentInput.size(); i++)
       {
         currentInput[i]->writeTree((printingAll ? treePrintMode : TreeModeFlat), NULL);
       }
-      wcerr << endl;
+      cerr << endl;
     }
     if(applyRule(rule_map[rule-1]))
     {
       if(printingAll)
       {
         for(auto c : currentOutput) c->writeTree(treePrintMode, NULL);
-        wcerr << endl;
+        cerr << endl;
       }
       vector<Chunk*> temp;
       temp.reserve(currentOutput.size());
@@ -1383,8 +1307,8 @@ RTXProcessor::checkForReduce(vector<ParseNode*>& result, ParseNode* node)
     }
     else
     {
-      if(printingRules) { wcerr << " -> rule was rejected" << endl; }
-      if(printingAll) wcerr << "This rule was rejeced." << endl << endl;
+      if(printingRules) { cerr << " -> rule was rejected" << endl; }
+      if(printingAll) cerr << "This rule was rejeced." << endl << endl;
       mx->rejectRule(rule);
       rule_and_weight = node->getRule();
       rule = rule_and_weight.first;
@@ -1393,19 +1317,19 @@ RTXProcessor::checkForReduce(vector<ParseNode*>& result, ParseNode* node)
   }
   if(rule == -1)
   {
-    if(printingAll) wcerr << "No further reductions possible for branch " << node->id << "." << endl;
+    if(printingAll) cerr << "No further reductions possible for branch " << node->id << "." << endl;
     result.push_back(node);
   }
   else if(lookahead(node))
   {
     node->id = ++newBranchId;
-    if(printingAll) wcerr << endl << "Splitting stack and creating branch " << node->id << endl;
+    if(printingAll) cerr << endl << "Splitting stack and creating branch " << node->id << endl;
     result.push_back(node);
   }
 }
 
 void
-RTXProcessor::outputAll(FILE* out)
+RTXProcessor::outputAll(UFILE* out)
 {
   unsigned int queueSize = outputQueue.size() - 1;
   bool conjoining = false;
@@ -1416,31 +1340,31 @@ RTXProcessor::outputAll(FILE* out)
     outputQueue.pop_front();
     if(printingTrees && outputQueue.size() == queueSize)
     {
-      if(printingText) fputc_unlocked('\n', out);
+      if(printingText) u_fputc('\n', out);
       queueSize--;
       ch->writeTree(treePrintMode, out);
-      fflush(out);
+      u_fflush(out);
       if(!printingText) continue;
     }
     if(ch->rule == -1)
     {
       if(printingRules && !ch->isBlank)
       {
-        fflush(out);
-        wcerr << endl << "No rule specified: ";
+        u_fflush(out);
+        cerr << endl << "No rule specified: ";
         ch->writeTree(TreeModeFlat, NULL);
-        wcerr << endl;
+        cerr << endl;
       }
       if(printingAll && !ch->isBlank)
       {
-        if(treePrintMode == TreeModeLatex) wcerr << "\\subsubsection{Output Node}" << endl;
-        else wcerr << "Output Node:" << endl;
+        if(treePrintMode == TreeModeLatex) cerr << "\\subsubsection{Output Node}" << endl;
+        else cerr << "Output Node:" << endl;
         ch->writeTree(treePrintMode, NULL);
-        wcerr << endl;
+        cerr << endl;
       }
       if(ch->contents.size() > 0)
       {
-        vector<wstring> tags = ch->getTags(vector<wstring>());
+        vector<UString> tags = ch->getTags(vector<UString>());
         for(auto it = ch->contents.rbegin(); it != ch->contents.rend(); it++)
         {
           (*it)->updateTags(tags);
@@ -1473,7 +1397,7 @@ RTXProcessor::outputAll(FILE* out)
     else
     {
       parentChunk = ch;
-      vector<wstring> tags = ch->getTags(vector<wstring>());
+      vector<UString> tags = ch->getTags(vector<UString>());
       currentInput = ch->contents;
       for(unsigned int i = 0; i < currentInput.size(); i++)
       {
@@ -1481,40 +1405,40 @@ RTXProcessor::outputAll(FILE* out)
       }
       currentOutput.clear();
       if(printingRules) {
-        fflush(out);
-        wcerr << endl << "Applying output rule " << ch->rule;
+        u_fflush(out);
+        cerr << endl << "Applying output rule " << ch->rule;
         if(ch->rule < (int)outRuleNames.size())
         {
-          wcerr << " (" << outRuleNames[ch->rule] << ")";
+          cerr << " (" << outRuleNames[ch->rule] << ")";
         }
-        wcerr << ": " << parentChunk->target << " -> ";
+        cerr << ": " << parentChunk->target << " -> ";
         for(unsigned int i = 0; i < currentInput.size(); i++)
         {
           currentInput[i]->writeTree(TreeModeFlat, NULL);
         }
-        wcerr << endl;
+        cerr << endl;
       }
       if(printingAll)
       {
         if(treePrintMode == TreeModeLatex)
         {
-          wcerr << "\\subsubsection{Applying Output Rule " << ch->rule;
+          cerr << "\\subsubsection{Applying Output Rule " << ch->rule;
           if(ch->rule < (int)outRuleNames.size())
           {
-            wcerr << ": " << outRuleNames[ch->rule] << "}" << endl << endl;
+            cerr << ": " << outRuleNames[ch->rule] << "}" << endl << endl;
           }
         }
         else
         {
-          wcerr << "Applying Output Rule " << ch->rule;
+          cerr << "Applying Output Rule " << ch->rule;
           if(ch->rule < (int)outRuleNames.size())
           {
-            wcerr << ": " << outRuleNames[ch->rule] << endl << endl;
+            cerr << ": " << outRuleNames[ch->rule] << endl << endl;
           }
         }
         ch->writeTree(treePrintMode, NULL);
       }
-      fflush(out);
+      u_fflush(out);
       applyRule(output_rules[ch->rule]);
       for(vector<Chunk*>::reverse_iterator it = currentOutput.rbegin(),
               limit = currentOutput.rend(); it != limit; it++)
@@ -1526,7 +1450,7 @@ RTXProcessor::outputAll(FILE* out)
   if(tojoin != NULL) tojoin->output(out);
   while(!blankQueue.empty())
   {
-    if(blankQueue.front() == L" ")
+    if(blankQueue.front() == " "_u)
     {
       blankQueue.pop_front();
     }
@@ -1538,17 +1462,22 @@ RTXProcessor::outputAll(FILE* out)
 }
 
 void
-RTXProcessor::writeBlank(FILE* out)
+RTXProcessor::writeBlank(UFILE* out)
 {
   if(blankQueue.empty())
   {
-    blankQueue.push_back(L" ");
+    blankQueue.push_back(" "_u);
   }
   Chunk* blank = chunkPool.next();
   blank->target = blankQueue.front();
   blankQueue.pop_front();
   blank->isBlank = true;
-  blank->output(out);
+  if (printingText) {
+    blank->output(out);
+  }
+  if (printingTrees) {
+    blank->writeTree(treePrintMode, out);
+  }
 }
 
 bool
@@ -1558,9 +1487,9 @@ RTXProcessor::filterParseGraph()
   {
     if(treePrintMode == TreeModeLatex)
     {
-      wcerr << "\\subsection{Filtering Branches}\n\n\\begin{itemize}" << endl;
+      cerr << "\\subsection{Filtering Branches}\n\n\\begin{itemize}" << endl;
     }
-    else wcerr << endl << "Filtering Branches:" << endl;
+    else cerr << endl << "Filtering Branches:" << endl;
   }
   bool shouldOutput = !furtherInput && inputBuffer.size() == 1;
   int state[parseGraph.size()];
@@ -1584,9 +1513,9 @@ RTXProcessor::filterParseGraph()
       {
         if(treePrintMode == TreeModeLatex)
         {
-          wcerr << L"\\item No branch can accept further input." << endl;
+          cerr << "\\item No branch can accept further input." << endl;
         }
-        else wcerr << L"No branch can accept further input." << endl;
+        else cerr << "No branch can accept further input." << endl;
       }
       shouldOutput = true;
       memset(state, 1, N*sizeof(int));
@@ -1597,33 +1526,33 @@ RTXProcessor::filterParseGraph()
   {
     if(treePrintMode == TreeModeLatex)
     {
-      wcerr << "\\item Input buffer is empty." << endl;
+      cerr << "\\item Input buffer is empty." << endl;
     }
-    else wcerr << L"Input buffer is empty." << endl;
+    else cerr << "Input buffer is empty." << endl;
   }
   int min = -1;
   ParseNode* minNode = NULL;
   ParseNode* cur = NULL;
   map<int, vector<int>> filter;
-  if(printingBranches) { wcerr << L"shouldOutput: " << shouldOutput << L" branch count: " << N << endl; }
+  if(printingBranches) { cerr << "shouldOutput: " << shouldOutput << " branch count: " << N << endl; }
   for(int i = 0; i < N; i++)
   {
-    if(printingBranches) { wcerr << "examining node " << i << "(length: " << parseGraph[i]->length << ", weight: " << parseGraph[i]->weight << ") ... "; }
+    if(printingBranches) { cerr << "examining node " << i << "(length: " << parseGraph[i]->length << ", weight: " << parseGraph[i]->weight << ") ... "; }
     if(printingAll)
     {
-      if(treePrintMode == TreeModeLatex) wcerr << "\\item ";
-      wcerr << "Branch " << parseGraph[i]->id << " ";
+      if(treePrintMode == TreeModeLatex) cerr << "\\item ";
+      cerr << "Branch " << parseGraph[i]->id << " ";
     }
     if(state[i] == 0)
     {
-      if(printingAll) wcerr << " has no possible continuations." << endl;
+      if(printingAll) cerr << " has no possible continuations." << endl;
       continue;
     }
     else if(noFilter && !shouldOutput) continue;
     if(min == -1)
     {
-      if(printingAll) wcerr << " has no active branch to compare to." << endl;
-      if(printingBranches) { wcerr << "FIRST!" << endl; }
+      if(printingAll) cerr << " has no active branch to compare to." << endl;
+      if(printingBranches) { cerr << "FIRST!" << endl; }
       min = i;
       minNode = parseGraph[i];
       cur = minNode;
@@ -1637,8 +1566,8 @@ RTXProcessor::filterParseGraph()
         if(cur->length < minNode->length
             || (cur->length == minNode->length && cur->weight >= minNode->weight))
         {
-          if(printingBranches) { wcerr << i << L" beats " << min << " in length or weight" << endl; }
-          if(printingAll) wcerr << " has fewer partial parses or a higher weight than branch " << minNode->id << "." << endl;
+          if(printingBranches) { cerr << i << " beats " << min << " in length or weight" << endl; }
+          if(printingAll) cerr << " has fewer partial parses or a higher weight than branch " << minNode->id << "." << endl;
           state[min] = 0;
           min = i;
           minNode = cur;
@@ -1646,16 +1575,16 @@ RTXProcessor::filterParseGraph()
         else
         {
           state[i] = 0;
-          if(printingBranches) {wcerr << min << L" beats " << i << " in length or weight" << endl; }
-          if(printingAll) wcerr << " has more partial parses or a lower weight than branch " << minNode->id << "." << endl;
+          if(printingBranches) {cerr << min << " beats " << i << " in length or weight" << endl; }
+          if(printingAll) cerr << " has more partial parses or a lower weight than branch " << minNode->id << "." << endl;
         }
         count--;
       }
       else if(filter.find(cur->firstWord) == filter.end())
       {
         filter[cur->firstWord].push_back(i);
-        if(printingBranches) { wcerr << i << " has nothing to compare with" << endl; }
-        if(printingAll) wcerr << " has no prior branch covering the same final span." << endl;
+        if(printingBranches) { cerr << i << " has nothing to compare with" << endl; }
+        if(printingAll) cerr << " has no prior branch covering the same final span." << endl;
       }
       else
       {
@@ -1663,19 +1592,19 @@ RTXProcessor::filterParseGraph()
         double w = parseGraph[other[0]]->weight;
         if(w > cur->weight)
         {
-          if(printingBranches) { wcerr << i << L" has lower weight - discarding." << endl; }
-          if(printingAll) wcerr << " has a lower weight than branch " << parseGraph[other[0]]->id << " and will be discarded." << endl;
+          if(printingBranches) { cerr << i << " has lower weight - discarding." << endl; }
+          if(printingAll) cerr << " has a lower weight than branch " << parseGraph[other[0]]->id << " and will be discarded." << endl;
           state[i] = 0;
           count--;
         }
         else if(w < cur->weight)
         {
-          if(printingBranches) { wcerr << i << L" has higher weight - discarding others." << endl; }
+          if(printingBranches) { cerr << i << " has higher weight - discarding others." << endl; }
           if(printingAll)
           {
-            wcerr << " has a higher weight than ";
-            for(auto it : other) wcerr << "branch " << parseGraph[it]->id << ", ";
-            wcerr << "which will be discarded." << endl;
+            cerr << " has a higher weight than ";
+            for(auto it : other) cerr << "branch " << parseGraph[it]->id << ", ";
+            cerr << "which will be discarded." << endl;
           }
           for(vector<int>::iterator it = other.begin(), limit = other.end();
                 it != limit; it++)
@@ -1688,14 +1617,14 @@ RTXProcessor::filterParseGraph()
         }
         else
         {
-          if(printingBranches) { wcerr << i << " has same weight - keeping all." << endl; }
-          if(printingAll) wcerr << " has the same weight as branch " << parseGraph[other[0]]->id << "." << endl;
+          if(printingBranches) { cerr << i << " has same weight - keeping all." << endl; }
+          if(printingAll) cerr << " has the same weight as branch " << parseGraph[other[0]]->id << "." << endl;
           other.push_back(i);
         }
       }
     }
   }
-  if(printingAll && treePrintMode == TreeModeLatex) wcerr << "\\end{itemize}" << endl << endl;
+  if(printingAll && treePrintMode == TreeModeLatex) cerr << "\\end{itemize}" << endl << endl;
   if(count == N) return shouldOutput;
   if(count > 100 && filter.size() > 0)
   {
@@ -1718,34 +1647,34 @@ RTXProcessor::filterParseGraph()
       temp.push_back(parseGraph[i]);
       if(printingBranches)
       {
-        wcerr << L"keeping branch " << i << " first word: " << parseGraph[i]->firstWord << " ending with ";
+        cerr << "keeping branch " << i << " first word: " << parseGraph[i]->firstWord << " ending with ";
         parseGraph[i]->chunk->writeTree(TreeModeFlat, NULL);
-        wcerr << endl;
+        cerr << endl;
       }
     }
     else if(printingBranches)
     {
-      wcerr << L"discarding branch " << i << " first word: " << parseGraph[i]->firstWord << " ending with ";
+      cerr << "discarding branch " << i << " first word: " << parseGraph[i]->firstWord << " ending with ";
       parseGraph[i]->chunk->writeTree(TreeModeFlat, NULL);
-      wcerr << endl;
+      cerr << endl;
     }
   }
-  if(printingBranches) { wcerr << L"remaining branches: " << temp.size() << endl << endl; }
+  if(printingBranches) { cerr << "remaining branches: " << temp.size() << endl << endl; }
   parseGraph.swap(temp);
   return shouldOutput;
 }
 
 void
-RTXProcessor::processGLR(FILE *in, FILE *out)
+RTXProcessor::processGLR(UFILE *out)
 {
   int sentenceId = 1;
   if(printingAll && treePrintMode == TreeModeLatex)
   {
-    wcerr << "\\section{Sentence " << sentenceId << "}" << endl << endl;
+    cerr << "\\section{Sentence " << sentenceId << "}" << endl << endl;
   }
   while(furtherInput && inputBuffer.size() < 5)
   {
-    inputBuffer.push_back(readToken(in));
+    inputBuffer.push_back(readToken());
   }
   bool real_printingAll = printingAll;
   while(true)
@@ -1766,11 +1695,11 @@ RTXProcessor::processGLR(FILE *in, FILE *out)
     }
     if(printingAll)
     {
-      wcerr << endl;
-      if(treePrintMode == TreeModeLatex) wcerr << "\\subsection{Reading Input}" << endl << endl;
-      else wcerr << "Reading Input:" << endl;
+      cerr << endl;
+      if(treePrintMode == TreeModeLatex) cerr << "\\subsection{Reading Input}" << endl << endl;
+      else cerr << "Reading Input:" << endl;
       next->writeTree(treePrintMode, NULL);
-      wcerr << endl;
+      cerr << endl;
     }
     inputBuffer.pop_front();
     if(parseGraph.size() == 0)
@@ -1778,15 +1707,20 @@ RTXProcessor::processGLR(FILE *in, FILE *out)
       // skip parseGraph stuff if a blank is the only thing being processed
       if(next->isBlank)
       {
-        next->output(out);
+        if (printingText) {
+          next->output(out);
+        }
+        if (printingTrees) {
+          next->writeTree(treePrintMode, out);
+        }
         if(furtherInput)
         {
-          inputBuffer.push_back(readToken(in));
+          inputBuffer.push_back(readToken());
         }
         if(inputBuffer.empty())
         {
-          wcerr.flush();
-          fflush(out);
+          cerr.flush();
+          u_fflush(out);
           break;
         }
         continue;
@@ -1820,45 +1754,45 @@ RTXProcessor::processGLR(FILE *in, FILE *out)
     {
       for(auto branch : parseGraph)
       {
-        wcerr << "Branch " << branch->id << ": " << branch->length << " nodes, weight = " << branch->weight << endl;
+        cerr << "Branch " << branch->id << ": " << branch->length << " nodes, weight = " << branch->weight << endl;
         vector<Chunk*> parts;
         parts.resize(branch->length);
         branch->getChunks(parts, branch->length-1);
         for(auto node : parts)
         {
-          if(node->isBlank) wcerr << "[Blank]: " << endl;
-          else wcerr << "[Chunk]: " << endl;
+          if(node->isBlank) cerr << "[Blank]: " << endl;
+          else cerr << "[Chunk]: " << endl;
           node->writeTree(treePrintMode, NULL);
         }
       }
     }
-    if(furtherInput) inputBuffer.push_back(readToken(in));
+    if(furtherInput) inputBuffer.push_back(readToken());
     if(filterParseGraph())
     {
-      wcerr.flush();
+      cerr.flush();
       if(printingAll)
       {
-        if(treePrintMode == TreeModeLatex) wcerr << "\\subsection{Outputting Branch " << parseGraph[0]->id << "}" << endl << endl;
+        if(treePrintMode == TreeModeLatex) cerr << "\\subsection{Outputting Branch " << parseGraph[0]->id << "}" << endl << endl;
         else
         {
-          wcerr << endl;
-          wcerr << "************************************************************" << endl;
-          wcerr << "************************************************************" << endl;
-          wcerr << "************************************************************" << endl;
-          wcerr << "Outputting Branch " << parseGraph[0]->id << endl << endl;
+          cerr << endl;
+          cerr << "************************************************************" << endl;
+          cerr << "************************************************************" << endl;
+          cerr << "************************************************************" << endl;
+          cerr << "Outputting Branch " << parseGraph[0]->id << endl << endl;
           vector<Chunk*> parts;
           parts.resize(parseGraph[0]->length);
           parseGraph[0]->getChunks(parts, parseGraph[0]->length-1);
           for(auto node : parts)
           {
-            if(node->isBlank) wcerr << "[Blank]: " << endl;
-            else wcerr << "[Chunk]: " << endl;
+            if(node->isBlank) cerr << "[Blank]: " << endl;
+            else cerr << "[Chunk]: " << endl;
             node->writeTree(treePrintMode, NULL);
           }
-          wcerr << "************************************************************" << endl;
-          wcerr << "************************************************************" << endl;
-          wcerr << "************************************************************" << endl;
-          wcerr << endl;
+          cerr << "************************************************************" << endl;
+          cerr << "************************************************************" << endl;
+          cerr << "************************************************************" << endl;
+          cerr << endl;
         }
       }
       currentBranch = parseGraph[0];
@@ -1867,11 +1801,11 @@ RTXProcessor::processGLR(FILE *in, FILE *out)
       outputAll(out);
       variables = currentBranch->stringVars;
       wblank_variables = currentBranch->wblankVars;
-      fflush(out);
-      vector<wstring> wblanks;
-      vector<wstring> sources;
-      vector<wstring> targets;
-      vector<wstring> corefs;
+      u_fflush(out);
+      vector<UString> wblanks;
+      vector<UString> sources;
+      vector<UString> targets;
+      vector<UString> corefs;
       vector<bool> blanks;
       vector<bool> unknowns;
       int N = inputBuffer.size();
@@ -1894,15 +1828,15 @@ RTXProcessor::processGLR(FILE *in, FILE *out)
         blanks.push_back(temp->isBlank);
         inputBuffer.pop_front();
       }
-      //wcerr << "clearing chunkPool, size was " << chunkPool.size() << endl;
-      //wcerr << "clearing parsePool, size was " << parsePool.size() << endl;
+      //cerr << "clearing chunkPool, size was " << chunkPool.size() << endl;
+      //cerr << "clearing parsePool, size was " << parsePool.size() << endl;
       chunkPool.reset();
       parsePool.reset();
       newBranchId = 0;
       if(printingAll) sentenceId++;
       if((furtherInput || inputBuffer.size() > 1) && printingAll && treePrintMode == TreeModeLatex)
       {
-        wcerr << endl << endl << "\\section{Sentence " << sentenceId << "}" << endl << endl;
+        cerr << endl << endl << "\\section{Sentence " << sentenceId << "}" << endl << endl;
       }
       for(int i = 0; i < N; i++)
       {
@@ -1915,7 +1849,7 @@ RTXProcessor::processGLR(FILE *in, FILE *out)
         if(unknowns[i])
         {
           Chunk* c2 = chunkPool.next();
-          c2->target = targets[i].substr(1) + L"<UNKNOWN:INTERNAL>";
+          c2->target = targets[i].substr(1) + "<UNKNOWN:INTERNAL>"_u;
           c2->contents.push_back(c);
           c = c2;
         }
@@ -1926,11 +1860,11 @@ RTXProcessor::processGLR(FILE *in, FILE *out)
     if(!furtherInput && inputBuffer.size() == 1)
     {
       // if stream is empty, the last token is definitely a blank
-      wcerr.flush();
+      cerr.flush();
       inputBuffer.front()->output(out);
       blankQueue.clear();
       inputBuffer.pop_front();
-      fflush(out);
+      u_fflush(out);
       break;
     }
     else if(!furtherInput && inputBuffer.size() == 0) break;
@@ -2003,17 +1937,17 @@ RTXProcessor::processTRXLayer(list<Chunk*>& t1x, list<Chunk*>& t2x)
       }
       currentOutput.clear();
       if(printingRules) {
-        wcerr << endl << "Applying rule " << rule;
+        cerr << endl << "Applying rule " << rule;
         if(rule <= (int)inRuleNames.size())
         {
-          wcerr << " (" << inRuleNames[rule-1] << ")";
+          cerr << " (" << inRuleNames[rule-1] << ")";
         }
-        wcerr << ": ";
+        cerr << ": ";
         for(unsigned int i = 0; i < currentInput.size(); i++)
         {
           currentInput[i]->writeTree(TreeModeFlat, NULL);
         }
-        wcerr << endl;
+        cerr << endl;
       }
       if(applyRule(rule_map[rule-1]))
       {
@@ -2035,7 +1969,7 @@ RTXProcessor::processTRXLayer(list<Chunk*>& t1x, list<Chunk*>& t2x)
 }
 
 void
-RTXProcessor::processTRX(FILE *in, FILE *out)
+RTXProcessor::processTRX(UFILE *out)
 {
   list<Chunk*> t1x;
   list<Chunk*> t2x;
@@ -2044,7 +1978,7 @@ RTXProcessor::processTRX(FILE *in, FILE *out)
   {
     while(furtherInput && t1x.size() < 2*longestPattern)
     {
-      t1x.push_back(readToken(in));
+      t1x.push_back(readToken());
     }
     if(furtherInput)
     {
@@ -2066,7 +2000,7 @@ RTXProcessor::processTRX(FILE *in, FILE *out)
     {
       Chunk* cur = t3x.front();
       t3x.pop_front();
-      vector<wstring> tags = cur->getTags(vector<wstring>());
+      vector<UString> tags = cur->getTags(vector<UString>());
       if(cur->rule == -1)
       {
         if(cur->contents.size() == 0) cur->output(out);
@@ -2083,14 +2017,14 @@ RTXProcessor::processTRX(FILE *in, FILE *out)
       else
       {
         if(printingRules) {
-          wcerr << endl << L"Applying output rule " << cur->rule;
+          cerr << endl << "Applying output rule " << cur->rule;
           if(cur->rule < (int)outRuleNames.size())
           {
-            wcerr << " (" << outRuleNames[cur->rule] << ")";
+            cerr << " (" << outRuleNames[cur->rule] << ")";
           }
-          wcerr << ": ";
+          cerr << ": ";
           cur->writeTree(TreeModeFlat, NULL);
-          wcerr << endl;
+          cerr << endl;
         }
         parentChunk = cur;
         currentInput = cur->contents;
@@ -2110,32 +2044,33 @@ RTXProcessor::processTRX(FILE *in, FILE *out)
 }
 
 void
-RTXProcessor::process(FILE* in, FILE* out)
+RTXProcessor::process(FILE* in, UFILE* out)
 {
   if(printingAll && treePrintMode == TreeModeLatex)
   {
-    wcerr << "\\documentclass{article}" << endl;
-    wcerr << "\\usepackage{fontspec}" << endl;
-    wcerr << "\\setmainfont{FreeSans}" << endl;
-    wcerr << "\\usepackage{forest}" << endl;
-    wcerr << "\\usepackage[cm]{fullpage}" << endl << endl;
-    wcerr << "\\begin{document}" << endl << endl;
+    cerr << "\\documentclass{article}" << endl;
+    cerr << "\\usepackage{fontspec}" << endl;
+    cerr << "\\setmainfont{FreeSans}" << endl;
+    cerr << "\\usepackage{forest}" << endl;
+    cerr << "\\usepackage[cm]{fullpage}" << endl << endl;
+    cerr << "\\begin{document}" << endl << endl;
   }
+  infile.wrap(in);
   if(null_flush)
   {
-    while(!feof(in))
+    while(!infile.eof())
     {
       furtherInput = true;
       if(isLinear)
       {
-        processTRX(in, out);
+        processTRX(out);
       }
       else
       {
-        processGLR(in, out);
+        processGLR(out);
       }
-      fputc_unlocked('\0', out);
-      fflush(out);
+      u_fputc('\0', out);
+      u_fflush(out);
       chunkPool.reset();
       parsePool.reset();
       inputBuffer.clear();
@@ -2148,14 +2083,14 @@ RTXProcessor::process(FILE* in, FILE* out)
   }
   else if(isLinear)
   {
-    processTRX(in, out);
+    processTRX(out);
   }
   else
   {
-    processGLR(in, out);
+    processGLR(out);
   }
   if(printingAll && treePrintMode == TreeModeLatex)
   {
-    wcerr << endl << endl << "\\end{document}" << endl;
+    cerr << endl << endl << "\\end{document}" << endl;
   }
 }
diff --git a/src/rtx_processor.h b/src/rtx_processor.h
index afb5920..7607e1c 100644
--- a/src/rtx_processor.h
+++ b/src/rtx_processor.h
@@ -2,16 +2,13 @@
 #define __RTXPROCESSOR__
 
 #include <rtx_config.h>
-#include <apertium_re.h>
-#include <apertium/utf_converter.h>
+#include <apertium/apertium_re.h>
 #include <lttoolbox/alphabet.h>
-#include <lttoolbox/buffer.h>
-#include <lttoolbox/ltstr.h>
 #include <matcher.h>
 #include <chunk.h>
 #include <pool.h>
+#include <lttoolbox/input_file.h>
 
-#include <cstring>
 #include <cstdio>
 #include <map>
 #include <set>
@@ -24,7 +21,7 @@ struct StackElement
   int mode;
   bool b;
   int i;
-  wstring s;
+  UString s;
   Chunk* c;
 };
 
@@ -49,52 +46,52 @@ private:
   /**
    * Attribute category regular expressions
    */
-  map<wstring, ApertiumRE, Ltstr> attr_items;
+  map<UString, ApertiumRE> attr_items;
 
   /**
    * Virtual machine global variables
    * name => value
    */
-  map<wstring, wstring, Ltstr> variables;
+  map<UString, UString> variables;
   
   /**
    * Virtual machine global variables to wblank map
    * name => value
    */
-  map<wstring, wstring, Ltstr> wblank_variables;
+  map<UString, UString> wblank_variables;
 
   /**
    * Lists
    * name => { values }
    */
-  map<wstring, set<wstring, Ltstr>, Ltstr> lists;
+  map<UString, set<UString>> lists;
 
   /**
    * Lists, but all values are converted to lower case
    * Used for case-insensitive comparison
    * name => { values }
    */
-  map<wstring, set<wstring, Ltstr>, Ltstr> listslow;
+  map<UString, set<UString>> listslow;
 
   /**
    * Bytecode for input-time rules
    */
-  vector<wstring> rule_map;
+  vector<UString> rule_map;
 
   /**
    * Bytecode for output-time rules
    */
-  vector<wstring> output_rules;
+  vector<UString> output_rules;
 
   /**
    * Debug names for input-time rules (may be empty)
    */
-  vector<wstring> inRuleNames;
+  vector<UString> inRuleNames;
 
   /**
    * Debug names for output-time rules (may be empty)
    */
-  vector<wstring> outRuleNames;
+  vector<UString> outRuleNames;
 
   /**
    * Length of pattern of each input-time rule, including blanks
@@ -114,7 +111,7 @@ private:
   /**
    * false if EOF or \0 has been reached in the input stream, true otherwise
    */
-  bool furtherInput;
+  bool furtherInput = true;
 
   /**
    * The stack used by the virtual machine
@@ -133,12 +130,12 @@ private:
    * A parallel stack to store wordbound blanks that mimics the operations
    * of the main stack. wblanks are added everytime lemmas are clipped
    */
-  wstring theWblankStack[32];
+  UString theWblankStack[32];
   
   /**
    * wordbound blank to be output
    */
-  wstring out_wblank;
+  UString out_wblank;
 
   /**
    * Input to the virtual machine
@@ -173,7 +170,7 @@ private:
    * then we want to output it directly, particularly if it's empty
    * and because of lookahead, only processGLR() knows which blanks are which
    */
-  list<wstring> blankQueue;
+  list<UString> blankQueue;
 
   /**
    * The parse stack
@@ -211,7 +208,7 @@ private:
    * Branch of parseGraph currently being operated on
    * Needed by applyRule() for FETCHCHUNK and SETCHUNK
    */
-  ParseNode* currentBranch;
+  ParseNode* currentBranch = nullptr;
 
   //////////
   // SETTINGS
@@ -219,122 +216,95 @@ private:
 
   /**
    * true if the next input token should be parsed as an LU, false otherwise
-   * Initial value: false
    */
-  bool inword;
+  bool inword = false;
   
   /**
    * true if the next input token should be parsed as a wordbound blank, false otherwise
-   * Initial value: false
    */
-  bool inwblank;
+  bool inwblank = false;
 
   /**
    * Whether output should flush on \0
-   * Default: false
    */
-  bool null_flush;
+  bool null_flush = false;
 
   /**
    * If true, each instruction of virtual machine will be printed to wcerr
-   * Default: false
    */
-  bool printingSteps;
+  bool printingSteps = false;
 
   /**
    * If true, each rule that is applied will be printed to wcerr
-   * Default: false
    */
-  bool printingRules;
+  bool printingRules = false;
 
   /**
    * If true, each action of filterParseGraph() will be logged to wcerr
-   * Default: false
    */
-  bool printingBranches;
+  bool printingBranches = false;
 
   /**
    * If true, produce a full report, similar to (printingRules && printingBranches)
    * Affected by treePrintMode
-   * Default: false
    */
-  bool printingAll;
+  bool printingAll = false;
 
   /**
    * false if input comes from apertium-anaphora, true otherwise
-   * Default: true
    */
-  bool noCoref;
+  bool noCoref = true;
 
   /**
    * true if rule application should mimic the chunker-interchunk-postchunk
    * pipeline, false otherwise
-   * Default: false
    */
-  bool isLinear;
+  bool isLinear = false;
 
   /**
    * If true, parse tree will be printed according to treePrintMode
    * before output-time rules are applied
-   * Default: false
    */
-  bool printingTrees;
+  bool printingTrees = false;
 
   /**
    * If false, output-time rules will not be applied and linear output
    * will not be produced
-   * Default: true
    */
-  bool printingText;
+  bool printingText = true;
 
   /**
    * Manner in which to print trees
    * Set by setOutputMode()
    * Enum defined in chunk.h
-   * Default: TreeModeNest
    */
-  TreeMode treePrintMode;
+  TreeMode treePrintMode = TreeModeNest;
 
   /**
    * Counter used to give distinct, consistent identifiers to ParseNodes
    * for tracing purposes
    */
-  int newBranchId;
+  int newBranchId = 0;
 
   /**
    * If this is set to true, filterParseGraph() will only discard branches
    * on parse error
    */
-  bool noFilter;
+  bool noFilter = true;
 
   //////////
   // VIRTUAL MACHINE
   //////////
 
-  /**
-   * Determine capitalization of a string
-   * @param str - input string
-   * @return L"AA", L"Aa", or L"aa"
-   */
-  wstring caseOf(wstring const &str);
-
-  /**
-   * Produce a version of target_word with the case of source_word
-   * @param source_word - source of case
-   * @param target_word - source of content
-   * @return generated string
-   */
-  wstring copycase(wstring const &source_word, wstring const &target_word);
-
   /**
    * Return whether str1 begins with str2
    */
-  bool beginsWith(wstring const &str1, wstring const &str2) const;
+  bool beginsWith(UString const &str1, UString const &str2) const;
 
   /**
    * Return whether str1 ends with str2
    */
-  bool endsWith(wstring const &str1, wstring const &str2) const;
+  bool endsWith(UString const &str1, UString const &str2) const;
 
   /**
    * The virtual machine
@@ -343,7 +313,7 @@ private:
    * @param rule - bytecode for rule to be applied
    * @return false if REJECTRULE was executed, true otherwise
    */
-  bool applyRule(const wstring& rule);
+  bool applyRule(const UString& rule);
 
   /**
    * Pop and return a boolean from theStack
@@ -358,20 +328,20 @@ private:
   int popInt();
 
   /**
-   * Pop and return a wstring from theStack
-   * Log error and call exit(1) if top element is not a wstring
+   * Pop and return a UString from theStack
+   * Log error and call exit(1) if top element is not a UString
    */
-  wstring popString();
+  UString popString();
 
   /**
    * Equivalent to popString(), but with called as
-   * wstring x; popString(x);
+   * UString x; popString(x);
    * rather than
-   * wstring x = popString();
+   * UString x = popString();
    * This uses a swap to save an allocation and a copy, which is almost twice
    * as fast, which has a noticeable impact on overall speed
    */
-  void popString(wstring& dest);
+  void popString(UString& dest);
 
   /**
    * Pop and return a Chunk pointer from theStack
@@ -391,7 +361,7 @@ private:
     theStack[stackIdx].i = i;
     theWblankStack[stackIdx].clear();
   }
-  inline void pushStack(const wstring& s, wstring wbl = L"")
+  inline void pushStack(const UString& s, UString wbl = ""_u)
   {
     theStack[++stackIdx].mode = 2;
     theStack[stackIdx].s.assign(s);
@@ -413,13 +383,15 @@ private:
   // RULE SELECTION AND I/O
   //////////
 
+  InputFile infile;
+
   /**
    * Read an LU or a blank
    * Modifies: furtherInput
    * @param in - input stream
    * @return pointer to token read
    */
-  Chunk* readToken(FILE *in);
+  Chunk* readToken();
 
   bool lookahead(ParseNode* node);
 
@@ -434,13 +406,13 @@ private:
   /**
    * Output the next blank in blankQueue, or a space if the queue is empty
    */
-  void writeBlank(FILE* out);
+  void writeBlank(UFILE* out);
 
   /**
    * Apply output-time rules and write nodes to output stream
    * @param out - output stream
    */
-  void outputAll(FILE* out);
+  void outputAll(UFILE* out);
 
   /**
    * Prune any ParseNodes that have reached error states
@@ -453,7 +425,7 @@ private:
    * Process input as a GLR parser
    * Read input, call checkForReduce(), call filterParseGraph(), call outputAll()
    */
-  void processGLR(FILE* in, FILE* out);
+  void processGLR(UFILE* out);
 
   /**
    * Apply longest rule matching the beginning of t1x and append the result to t2x
@@ -464,19 +436,19 @@ private:
    * Mimic apertium-transfer | apertium-interchunk | apertium-postchunk
    * Read input, call processTRXLayer twice, apply output-time rules, output
    */
-  void processTRX(FILE* in, FILE* out);
+  void processTRX(UFILE* out);
   
   /**
    * True if clipping lem/lemh/whole
   */
-  bool gettingLemmaFromWord(wstring attr);
+  bool gettingLemmaFromWord(UString attr);
   
 public:
   RTXProcessor();
   ~RTXProcessor();
 
   void read(string const &filename);
-  void process(FILE *in, FILE *out);
+  void process(FILE *in, UFILE *out);
   bool getNullFlush(void);
   void setNullFlush(bool null_flush);
   void printSteps(bool val)
diff --git a/src/trx_compiler.cc b/src/trx_compiler.cc
index f0f5959..8b01907 100644
--- a/src/trx_compiler.cc
+++ b/src/trx_compiler.cc
@@ -8,9 +8,9 @@
 #include <cstdlib>
 #include <fstream>
 #include <iostream>
-#include <apertium/string_utils.h>
+#include <lttoolbox/string_utils.h>
+#include <lttoolbox/xml_walk_util.h>
 
-using namespace Apertium;
 using namespace std;
 
 TRXCompiler::TRXCompiler()
@@ -23,19 +23,43 @@ TRXCompiler::~TRXCompiler()
   // TODO
 }
 
+UString
+name(xmlNode* node)
+{
+  return to_ustring((const char*) node->name);
+}
+
+bool
+nameIs(xmlNode* node, const char* name)
+{
+  return !xmlStrcmp(node->name, (const xmlChar*) name);
+}
+
 void
-TRXCompiler::die(xmlNode* node, wstring msg)
+TRXCompiler::die(xmlNode* node, const char* fmt, ...)
 {
-  wcerr << "Error in " << UtfConverter::fromUtf8((char*) curDoc->URL);
-  wcerr << " on line " << node->line << ": " << msg << endl;
+  UFILE* out = u_finit(stderr, NULL, NULL);
+  u_fprintf(out, "Error in %S on line %d: ",
+            to_ustring((char*) curDoc->URL).c_str(), node->line);
+  va_list argptr;
+  va_start(argptr, fmt);
+  u_vfprintf(out, fmt, argptr);
+  va_end(argptr);
+  u_fputc('\n', out);
   exit(EXIT_FAILURE);
 }
 
 void
-TRXCompiler::warn(xmlNode* node, wstring msg)
+TRXCompiler::warn(xmlNode* node, const char* fmt, ...)
 {
-  wcerr << "Warning in " << UtfConverter::fromUtf8((char*) curDoc->URL);
-  wcerr << " on line " << node->line << ": " << msg << endl;
+  UFILE* out = u_finit(stderr, NULL, NULL);
+  u_fprintf(out, "Warning in %S on line %d: ",
+            to_ustring((char*) curDoc->URL).c_str(), node->line);
+  va_list argptr;
+  va_start(argptr, fmt);
+  u_vfprintf(out, fmt, argptr);
+  va_end(argptr);
+  u_fputc('\n', out);
 }
 
 void
@@ -44,7 +68,7 @@ TRXCompiler::compile(string file)
   curDoc = xmlReadFile(file.c_str(), NULL, 0);
   if(curDoc == NULL)
   {
-    wcerr << "Error: Could not parse file '" << file << "'." << endl;
+    cerr << "Error: Could not parse file '" << file << "'." << endl;
     exit(EXIT_FAILURE);
   }
   processFile(xmlDocGetRootElement(curDoc));
@@ -53,90 +77,49 @@ TRXCompiler::compile(string file)
 void
 TRXCompiler::processFile(xmlNode* node)
 {
-  for(xmlNode* i = node->children; i != NULL; i = i->next)
-  {
-    if(i->type == XML_ELEMENT_NODE)
-    {
-      if(!xmlStrcmp(i->name, (const xmlChar*) "section-def-cats"))
-      {
-        processCats(i);
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar*) "section-def-attrs"))
-      {
-        processAttrs(i);
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar*) "section-def-vars"))
-      {
-        processVars(i);
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar*) "section-def-lists"))
-      {
-        processLists(i);
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar*) "section-def-macros"))
-      {
-        gatherMacros(i);
-      }
-      else if(!xmlStrcmp(i->name, (const xmlChar*) "section-rules"))
-      {
-        processRules(i);
-      }
-    }
-  }
-}
-
-xmlChar*
-TRXCompiler::requireAttr(xmlNode* node, const xmlChar* attr)
-{
-  for(xmlAttr* a = node->properties; a != NULL; a = a->next)
-  {
-    if(!xmlStrcmp(a->name, attr))
-    {
-      return a->children->content;
+  for (auto i : children(node)) {
+    if(nameIs(i, "section-def-cats")) {
+      processCats(i);
+    } else if(nameIs(i, "section-def-attrs")) {
+      processAttrs(i);
+    } else if(nameIs(i, "section-def-vars")) {
+      processVars(i);
+    } else if(nameIs(i, "section-def-lists")) {
+      processLists(i);
+    } else if(nameIs(i, "section-def-macros")) {
+      gatherMacros(i);
+    } else if(nameIs(i, "section-rules")) {
+      processRules(i);
     }
   }
-  die(node, L"Expected attribute '" + UtfConverter::fromUtf8((const char*) attr) + L"'");
-  return NULL;
-  // since die() ends the process, NULL will never be returned,
-  // but this keeps the compiler from complaining about the lack of a return statement
 }
 
-xmlChar*
-TRXCompiler::getAttr(xmlNode* node, const xmlChar* attr)
+UString
+TRXCompiler::requireAttr(xmlNode* node, const char* attr)
 {
-  for(xmlAttr* a = node->properties; a != NULL; a = a->next)
-  {
-    if(!xmlStrcmp(a->name, attr))
-    {
-      return a->children->content;
+  for (xmlAttr* a = node->properties; a != NULL; a = a->next) {
+    if (!xmlStrcmp(a->name, (const xmlChar*) attr)) {
+      return to_ustring((const char*) a->children->content);
     }
   }
-  return NULL;
-}
-
-inline wstring
-TRXCompiler::toWstring(const xmlChar* s)
-{
-  return (s == NULL) ? L"" : UtfConverter::fromUtf8((char*) s);
+  die(node, "Expected attribute '%S'", to_ustring(attr).c_str());
+  return ""_u; // since die() exits, this will not be returned
+  // but we each do our part to keep the typechecker happy...
 }
 
 int
 TRXCompiler::getPos(xmlNode* node, bool isBlank = false)
 {
-  wstring v;
-  if(!xmlStrcmp(node->name, (const xmlChar*) "b"))
-  {
-    v = toWstring(getAttr(node, (const xmlChar*) "pos"));
-    if(v == L"")
-    {
+  UString v;
+  if(nameIs(node, "b")) {
+    v = getattr(node, "pos");
+    if (v.empty()) {
       return 0;
     }
+  } else {
+    v = requireAttr(node, "pos");
   }
-  else
-  {
-    v = toWstring(requireAttr(node, (const xmlChar*) "pos"));
-  }
-  if(v.size() == 0)
+  if(v.empty())
   {
     if(isBlank)
     {
@@ -144,7 +127,7 @@ TRXCompiler::getPos(xmlNode* node, bool isBlank = false)
     }
     else
     {
-      die(node, L"Cannot interpret empty pos attribute.");
+      die(node, "Cannot interpret empty pos attribute.");
     }
   }
   for(unsigned int i = 0; i < v.size(); i++)
@@ -153,13 +136,13 @@ TRXCompiler::getPos(xmlNode* node, bool isBlank = false)
     {
       if(isBlank)
       {
-        warn(node, L"Disregarding non-integer position.");
+        warn(node, "Disregarding non-integer position.");
         return 0;
       }
-      die(node, L"Position must be an integer.");
+      die(node, "Position must be an integer.");
     }
   }
-  int ret = stoi(v);
+  int ret = StringUtils::stoi(v);
   if(inOutput && ret == 0)
   {
     return ret;
@@ -173,10 +156,10 @@ TRXCompiler::getPos(xmlNode* node, bool isBlank = false)
   {
     if(isBlank)
     {
-      warn(node, L"Disregarding out-of-bounds position.");
+      warn(node, "Disregarding out-of-bounds position.");
       return 0;
     }
-    die(node, L"Position " + to_wstring(ret) + L" is out of bounds.");
+    die(node, "Position %d is out of bounds.", ret);
   }
   if(macroPosShift.size() > 0)
   {
@@ -188,70 +171,52 @@ TRXCompiler::getPos(xmlNode* node, bool isBlank = false)
 void
 TRXCompiler::processCats(xmlNode* node)
 {
-  for(xmlNode* cat = node->children; cat != NULL; cat = cat->next)
-  {
-    if(cat->type == XML_ELEMENT_NODE)
-    {
-      if(xmlStrcmp(cat->name, (const xmlChar*) "def-cat"))
-      {
-        warn(cat, L"Unexpected tag in section-def-cats - ignoring");
+  for (auto cat : children(node)) {
+    if (!nameIs(cat, "def-cat")) {
+      warn(cat, "Unexpected tag in section-def-cats - ignoring");
+      continue;
+    }
+    UString pat_name = requireAttr(cat, "n");
+    vector<PatternElement*> pat;
+    for (auto item : children(cat)) {
+      if (!nameIs(item, "cat-item")) {
+        warn(cat, "Unexpected tag <%S> in def-cat - ignoring", name(item).c_str());
         continue;
       }
-      wstring name = toWstring(requireAttr(cat, (const xmlChar*) "n"));
-      vector<PatternElement*> pat;
-      for(xmlNode* item = cat->children; item != NULL; item = item->next)
-      {
-        if(item->type != XML_ELEMENT_NODE) continue;
-        if(xmlStrcmp(item->name, (const xmlChar*) "cat-item"))
-        {
-          warn(cat, L"Unexpected tag <" + toWstring(item->name) + L"> in def-cat - ignoring");
-          continue;
-        }
-        PatternElement* cur = new PatternElement;
-        cur->lemma = toWstring(getAttr(item, (const xmlChar*) "lemma"));
-        wstring tags = toWstring(requireAttr(item, (const xmlChar*) "tags"));
-        if(tags == L"") tags = L"UNKNOWN:INTERNAL";
-        cur->tags = StringUtils::split_wstring(tags, L".");
-        pat.push_back(cur);
-      }
-      if(patterns.find(name) != patterns.end())
-      {
-        warn(cat, L"Redefinition of pattern '" + name + L"', using later value");
-      }
-      patterns[name] = pat;
+      PatternElement* cur = new PatternElement;
+      cur->lemma = getattr(item, "lemma");
+      UString tags = requireAttr(item, "tags");
+      if(tags.empty()) tags = "UNKNOWN:INTERNAL"_u;
+      cur->tags = StringUtils::split(tags, "."_u);
+      pat.push_back(cur);
     }
+    if(patterns.find(pat_name) != patterns.end()) {
+      warn(cat, "Redefinition of pattern '%S', using later value", pat_name.c_str());
+    }
+    patterns[pat_name] = pat;
   }
 }
 
 void
 TRXCompiler::processAttrs(xmlNode* node)
 {
-  for(xmlNode* cat = node->children; cat != NULL; cat = cat->next)
-  {
-    if(cat->type != XML_ELEMENT_NODE)
-    {
-      continue;
-    }
-    if(xmlStrcmp(cat->name, (const xmlChar*) "def-attr"))
-    {
-      warn(cat, L"Unexpected tag in section-def-attrs - ignoring");
+  for (auto cat : children(node)) {
+    if (!nameIs(cat, "def-attr")) {
+      warn(cat, "Unexpected tag in section-def-attrs - ignoring");
       continue;
     }
-    wstring name = toWstring(getAttr(cat, (const xmlChar*) "n"));
-    set<wstring, Ltstr> ats;
-    for(xmlNode* item = cat->children; item != NULL; item = item->next)
-    {
-      if(item->type != XML_ELEMENT_NODE) continue;
-      if(xmlStrcmp(item->name, (const xmlChar*) "attr-item"))
-      {
-        warn(item, L"Unexpected tag in def-attr - ignoring");
+    UString name = getattr(cat, "n");
+    set<UString> ats;
+    for (auto item : children(cat)) {
+      if (!nameIs(item, "attr-item")) {
+        warn(item, "Unexpected tag in def-attr - ignoring");
         continue;
       }
-      ats.insert(toWstring(getAttr(item, (const xmlChar*) "tags")));
+      ats.insert(getattr(item, "tags"));
     }
     if(PB.isAttrDefined(name))
     {
-      warn(cat, L"Redefinition of attribute '" + name + L"' - using later definition");
+      warn(cat, "Redefinition of attribute '%S' - using later definition", name.c_str());
     }
     PB.addAttr(name, ats);
   }
@@ -260,16 +225,13 @@ TRXCompiler::processAttrs(xmlNode* node)
 void
 TRXCompiler::processVars(xmlNode* node)
 {
-  for(xmlNode* var = node->children; var != NULL; var = var->next)
-  {
-    if(var->type != XML_ELEMENT_NODE) continue;
-    if(xmlStrcmp(var->name, (const xmlChar*) "def-var"))
-    {
-      warn(var, L"Unexpected tag in section-def-vars - ignoring");
+  for (auto var : children(node)) {
+    if (!nameIs(var, "def-var")) {
+      warn(var, "Unexpected tag in section-def-vars - ignoring");
       continue;
     }
-    wstring name = toWstring(requireAttr(var, (const xmlChar*) "n"));
-    vars[name] = toWstring(getAttr(var, (const xmlChar*) "v"));
+    UString name = requireAttr(var, "n");
+    vars[name] = getattr(var, "v");
     PB.addVar(name, vars[name]);
   }
 }
@@ -277,32 +239,23 @@ TRXCompiler::processVars(xmlNode* node)
 void
 TRXCompiler::processLists(xmlNode* node)
 {
-  for(xmlNode* cat = node->children; cat != NULL; cat = cat->next)
-  {
-    if(cat->type != XML_ELEMENT_NODE)
-    {
-      continue;
-    }
-    if(xmlStrcmp(cat->name, (const xmlChar*) "def-list"))
-    {
-      warn(cat, L"Unexpected tag in section-def-lists - ignoring");
+  for (auto cat : children(node)) {
+    if (!nameIs(cat, "def-list")) {
+      warn(cat, "Unexpected tag in section-def-lists - ignoring");
       continue;
     }
-    wstring name = toWstring(getAttr(cat, (const xmlChar*) "n"));
-    set<wstring, Ltstr> ats;
-    for(xmlNode* item = cat->children; item != NULL; item = item->next)
-    {
-      if(item->type != XML_ELEMENT_NODE) continue;
-      if(xmlStrcmp(item->name, (const xmlChar*) "list-item"))
-      {
-        warn(item, L"Unexpected tag in def-list - ignoring");
+    UString name = getattr(cat, "n");
+    set<UString> ats;
+    for (auto item : children(cat)) {
+      if (!nameIs(item, "list-item")) {
+        warn(item, "Unexpected tag in def-list - ignoring");
         continue;
       }
-      ats.insert(toWstring(getAttr(item, (const xmlChar*) "v")));
+      ats.insert(getattr(item, "v"));
     }
     if(lists.find(name) != lists.end())
     {
-      warn(cat, L"Redefinition of list '" + name + L"' - using later definition");
+      warn(cat, "Redefinition of list '%S' - using later definition", name.c_str());
     }
     lists[name] = ats;
     PB.addList(name, ats);
@@ -312,19 +265,16 @@ TRXCompiler::processLists(xmlNode* node)
 void
 TRXCompiler::gatherMacros(xmlNode* node)
 {
-  for(xmlNode* mac = node->children; mac != NULL; mac = mac->next)
-  {
-    if(mac->type != XML_ELEMENT_NODE) continue;
-    if(xmlStrcmp(mac->name, (const xmlChar*) "def-macro"))
-    {
-      warn(mac, L"Unexpected tag in section-def-macros - ignoring");
+  for (auto mac : children(node)) {
+    if (!nameIs(mac, "def-macro")) {
+      warn(mac, "Unexpected tag in section-def-macros - ignoring");
       continue;
     }
-    wstring name = toWstring(requireAttr(mac, (const xmlChar*) "n"));
-    int npar = atoi((const char*) requireAttr(mac, (const xmlChar*) "npar"));
+    UString name = requireAttr(mac, "n");
+    int npar = StringUtils::stoi(requireAttr(mac, "npar"));
     if(macros.find(name) != macros.end())
     {
-      warn(mac, L"Redefinition of macro '" + name + L"' - using later definition");
+      warn(mac, "Redefinition of macro '%S' - using later definition", name.c_str());
     }
     macros[name] = make_pair(npar, mac);
   }
@@ -333,65 +283,54 @@ TRXCompiler::gatherMacros(xmlNode* node)
 void
 TRXCompiler::processRules(xmlNode* node)
 {
-  for(xmlNode* rule = node->children; rule != NULL; rule = rule->next)
-  {
-    if(rule->type != XML_ELEMENT_NODE) continue;
+  for (auto rule : children(node)) {
     if(xmlStrcmp(rule->name, (const xmlChar*) "rule"))
     {
-      warn(rule, L"Ignoring non-<rule> element in <section-rules>.");
+      warn(rule, "Ignoring non-<rule> element in <section-rules>.");
       continue;
     }
-    if(!xmlStrcmp(getAttr(rule, (const xmlChar*) "i"), (const xmlChar*) "yes"))
-    {
+    if (getattr(rule, "i") == "yes"_u) {
       continue;
     }
     curPatternSize = 0;
     localVars.clear();
 
-    wstring id = toWstring(getAttr(rule, (const xmlChar*) "id"));
-    wstring weight = toWstring(getAttr(rule, (const xmlChar*) "weight"));
-    wstring firstChunk = toWstring(getAttr(rule, (const xmlChar*) "firstChunk"));
-    if(firstChunk == L"") firstChunk = L"*";
+    UString id = getattr(rule, "id");
+    UString weight = getattr(rule, "weight");
+    UString firstChunk = getattr(rule, "firstChunk");
+    if(firstChunk.empty()) firstChunk = "*"_u;
 
     xmlNode* action = NULL;
-    wstring outputAction;
+    UString outputAction;
     bool pat = false;
-    wstring assertClause = L"";
-    for(xmlNode* part = rule->children; part != NULL; part = part->next)
-    {
-      if(part->type != XML_ELEMENT_NODE) continue;
-      if(!xmlStrcmp(part->name, (const xmlChar*) "local"))
+    UString assertClause;
+    for (auto part : children(rule)) {
+      if(nameIs(part, "local"))
       {
-        for(xmlNode* var = rule->children; var != NULL; var = var->next)
-        {
-          if(var->type == XML_ELEMENT_NODE &&
-             !xmlStrcmp(var->name, (const xmlChar*) "var"))
-          {
-            localVars.insert(toWstring(requireAttr(var, (const xmlChar*) "n")));
+        for (auto var : children(rule)) {
+          if(nameIs(var, "var")) {
+            localVars.insert(requireAttr(var, "n"));
           }
         }
       }
-      else if(!xmlStrcmp(part->name, (const xmlChar*) "pattern"))
+      else if(nameIs(part, "pattern"))
       {
         if(pat)
         {
-          die(rule, L"Rule cannot have multiple <pattern>s.");
+          die(rule, "Rule cannot have multiple <pattern>s.");
         }
         pat = true;
         vector<vector<PatternElement*>> pls;
-        for(xmlNode* pi = part->children; pi != NULL; pi = pi->next)
-        {
-          if(pi->type != XML_ELEMENT_NODE) continue;
-          if(xmlStrcmp(pi->name, (const xmlChar*) "pattern-item"))
-          {
-            warn(pi, L"Ignoring non-<pattern-item> in <pattern>.");
+        for (auto pi : children(part)) {
+          if (!nameIs(pi, "pattern-item")) {
+            warn(pi, "Ignoring non-<pattern-item> in <pattern>.");
             continue;
           }
           curPatternSize++;
-          wstring name = toWstring(requireAttr(pi, (const xmlChar*) "n"));
+          UString name = requireAttr(pi, "n");
           if(patterns.find(name) == patterns.end())
           {
-            die(pi, L"Unknown pattern '" + name + L"'.");
+            die(pi, "Unknown pattern '%S'.", name.c_str());
           }
           else
           {
@@ -400,7 +339,7 @@ TRXCompiler::processRules(xmlNode* node)
         }
         if(curPatternSize == 0)
         {
-          die(rule, L"Rule cannot have empty pattern.");
+          die(rule, "Rule cannot have empty pattern.");
         }
         if(curPatternSize > longestPattern)
         {
@@ -408,16 +347,14 @@ TRXCompiler::processRules(xmlNode* node)
         }
         if(excludedRules.find(id) == excludedRules.end())
         {
-          PB.addRule(inputRules.size() + 1, (weight.size() > 0 ? stod(weight) : 0.0), pls, StringUtils::split_wstring(firstChunk, L" "), id);
+          PB.addRule(inputRules.size() + 1, (weight.size() > 0 ? StringUtils::stod(weight) : 0.0), pls, StringUtils::split(firstChunk, " "_u), id);
         }
         inputRuleSizes.push_back(pls.size());
       }
-      else if(!xmlStrcmp(part->name, (const xmlChar*) "assert"))
+      else if(nameIs(part, "assert"))
       {
         bool firstAssert = (assertClause.size() == 0);
-        for(xmlNode* clause = part->children; clause != NULL; clause = clause->next)
-        {
-          if(clause->type != XML_ELEMENT_NODE) continue;
+        for (auto clause : children(part)) {
           assertClause += processCond(clause);
           if(!firstAssert)
           {
@@ -426,38 +363,37 @@ TRXCompiler::processRules(xmlNode* node)
           firstAssert = false;
         }
       }
-      else if(!xmlStrcmp(part->name, (const xmlChar*) "action"))
+      else if(nameIs(part, "action"))
       {
         if(action != NULL)
         {
-          die(rule, L"Rule cannot have multiple <action>s.");
+          die(rule, "Rule cannot have multiple <action>s.");
         }
         action = part;
       }
-      else if(!xmlStrcmp(part->name, (const xmlChar*) "output-action"))
+      else if(nameIs(part, "output-action"))
       {
         if(outputAction.size() > 0)
         {
-          die(part, L"Rule cannot have multiple <output-action>s.");
+          die(part, "Rule cannot have multiple <output-action>s.");
         }
         inOutput = true;
-        for(xmlNode* state = part->children; state != NULL; state = state->next)
-        {
-          if(state->type == XML_ELEMENT_NODE) outputAction += processStatement(state);
+        for (auto state : children(part)) {
+          outputAction += processStatement(state);
         }
       }
       else
       {
-        warn(part, L"Unknown element <" + toWstring(part->name) + L"> in <rule>, ignoring.");
+        warn(part, "Unknown element <%S> in <rule>, ignoring.", name(part).c_str());
       }
     }
     if(!pat)
     {
-      die(rule, L"Rule must have <pattern>.");
+      die(rule, "Rule must have <pattern>.");
     }
     if(action == NULL)
     {
-      die(rule, L"Rule must have <action>.");
+      die(rule, "Rule must have <action>.");
     }
     else
     {
@@ -471,17 +407,15 @@ TRXCompiler::processRules(xmlNode* node)
         currentOutputRule = -1;
       }
       inOutput = false;
-      wstring actionStr;
+      UString actionStr;
       if(assertClause.size() > 0)
       {
         actionStr = assertClause;
         actionStr += JUMPONTRUE;
-        actionStr += (wchar_t)1;
+        actionStr += (UChar)1;
         actionStr += REJECTRULE;
       }
-      for(xmlNode* state = action->children; state != NULL; state = state->next)
-      {
-        if(state->type != XML_ELEMENT_NODE) continue;
+      for (auto state : children(action)) {
         actionStr += processStatement(state);
       }
       inputRules.push_back(actionStr);
@@ -489,53 +423,46 @@ TRXCompiler::processRules(xmlNode* node)
   }
 }
 
-wstring
+UString
 TRXCompiler::processStatement(xmlNode* node)
 {
-  if(!xmlStrcmp(getAttr(node, (const xmlChar*) "i"), (const xmlChar*) "yes"))
-  {
-    return L"";
+  if (getattr(node, "i") == "yes"_u) {
+    return ""_u;
   }
-  wstring ret;
-  if(!xmlStrcmp(node->name, (const xmlChar*) "let") ||
-     !xmlStrcmp(node->name, (const xmlChar*) "modify-case"))
-  {
-    wstring name = toWstring(node->name);
+  UString ret;
+  if(nameIs(node, "let") || nameIs(node, "modify-case")) {
     xmlNode* var = NULL;
-    wstring val;
+    UString val;
     bool val_is_clip = false;
-    for(xmlNode* n = node->children; n != NULL; n = n->next)
-    {
-      if(n->type != XML_ELEMENT_NODE) continue;
+    for (auto n : children(node)) {
       if(var == NULL)
       {
         var = n;
       }
       else if(val.size() == 0)
       {
-        val_is_clip = (!xmlStrcmp(n->name, (const xmlChar*) "clip"));
+        val_is_clip = (nameIs(n, "clip"));
         val = processValue(n);
       }
-      else
-      {
-        die(node, L"<" + name + L"> cannot have more than two children.");
+      else {
+        die(node, "<%S> cannot have more than two children.", name(node).c_str());
       }
     }
     if(val.size() == 0)
     {
-      die(node, L"<" + name + L"> must have two children.");
+      die(node, "<%S> must have two children.", name(node).c_str());
     }
-    if(!xmlStrcmp(var->name, (const xmlChar*) "var"))
+    if(nameIs(var, "var"))
     {
-      wstring vname = toWstring(requireAttr(var, (const xmlChar*) "n"));
+      UString vname = requireAttr(var, "n");
       if(vars.find(vname) == vars.end())
       {
-        die(var, L"Undefined variable '" + vname + L"'.");
+        die(var, "Undefined variable '%S'.", vname.c_str());
       }
-      if(name == L"modify-case")
+      if(nameIs(node, "modify-case"))
       {
         ret += STRING;
-        ret += (wchar_t)vname.size();
+        ret += (UChar)vname.size();
         ret += vname;
         ret += FETCHVAR;
         ret += val;
@@ -546,34 +473,34 @@ TRXCompiler::processStatement(xmlNode* node)
         ret += val;
       }
       ret += STRING;
-      ret += (wchar_t)vname.size();
+      ret += (UChar)vname.size();
       ret += vname;
       ret += SETVAR;
     }
-    else if(!xmlStrcmp(var->name, (const xmlChar*) "clip"))
+    else if(nameIs(var, "clip"))
     {
-      wstring side = toWstring(getAttr(var, (const xmlChar*) "side"));
-      if(!(side == L"" || side == L"tl"))
+      UString side = getattr(var, "side");
+      if(!(side.empty() || side == "tl"_u))
       {
-        warn(var, L"Cannot set side '" + side + L"', setting 'tl' instead.");
+        warn(var, "Cannot set side '%S', setting 'tl' instead.", side.c_str());
       }
-      wstring part = toWstring(requireAttr(var, (const xmlChar*) "part"));
+      UString part = requireAttr(var, "part");
       if(!PB.isAttrDefined(part))
       {
-        die(var, L"Unknown attribute '" + part + L"'");
+        die(var, "Unknown attribute '%S'", part.c_str());
       }
-      wstring set_str;
+      UString set_str;
       set_str += PB.BCstring(part);
       set_str += INT;
-      set_str += (wchar_t)getPos(var);
+      set_str += (UChar)getPos(var);
       set_str += SETCLIP;
-      if(name == L"modify-case")
+      if(nameIs(node, "modify-case"))
       {
         ret += INT;
-        ret += (wchar_t)getPos(var);
+        ret += (UChar)getPos(var);
         ret += PUSHINPUT;
         ret += STRING;
-        ret += (wchar_t)part.size();
+        ret += (UChar)part.size();
         ret += part;
         ret += TARGETCLIP;
         ret += val;
@@ -585,11 +512,11 @@ TRXCompiler::processStatement(xmlNode* node)
         ret = val;
         if(val_is_clip)
         {
-          wstring cond;
+          UString cond;
           cond += DUP;
-          cond += PB.BCstring(L"");
+          cond += PB.BCstring(""_u);
           cond += EQUAL;
-          ret += PB.BCifthenelse(cond, wstring(1, DROP), set_str);
+          ret += PB.BCifthenelse(cond, UString(1, DROP), set_str);
         }
         else
         {
@@ -599,344 +526,301 @@ TRXCompiler::processStatement(xmlNode* node)
     }
     else
     {
-      die(node, L"Cannot set value of <" + toWstring(var->name) + L">.");
+      die(node, "Cannot set value of <%S>.", name(var).c_str());
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "out"))
+  else if(nameIs(node, "out"))
   {
-    for(xmlNode* o = node->children; o != NULL; o = o->next)
-    {
-      if(o->type == XML_ELEMENT_NODE)
-      {
-        ret += processValue(o);
-        ret += OUTPUT;
-      }
+    for (auto o : children(node)) {
+      ret += processValue(o);
+      ret += OUTPUT;
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "choose"))
+  else if(nameIs(node, "choose"))
   {
     ret = processChoose(node);
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "call-macro"))
+  else if(nameIs(node, "call-macro"))
   {
     // TODO: DTD implies number of arguments can be variable
-    wstring name = toWstring(requireAttr(node, (const xmlChar*) "n"));
+    UString name = requireAttr(node, "n");
     if(macros.find(name) == macros.end())
     {
-      die(node, L"Unknown macro '" + name + L"'.");
+      die(node, "Unknown macro '%S'.", name.c_str());
     }
     vector<int> temp;
-    for(xmlNode* param = node->children; param != NULL; param = param->next)
-    {
-      if(param->type != XML_ELEMENT_NODE) continue;
-      if(xmlStrcmp(param->name, (const xmlChar*) "with-param"))
-      {
-        warn(param, L"Ignoring non-<with-param> in <call-macro>");
-      }
-      else
-      {
+    for (auto param : children(node)) {
+      if (nameIs(param, "with-param")) {
         temp.push_back(getPos(param));
+      } else {
+        warn(param, "Ignoring non-<with-param> in <call-macro>");
       }
     }
     unsigned int shouldbe = macros[name].first;
     if(shouldbe < temp.size())
     {
-      die(node, L"Too many parameters, macro '" + name + L"' expects " + to_wstring(shouldbe) + L", got " + to_wstring(temp.size()) + L".");
+      die(node, "Too many parameters, macro '%S' expects %d, got %d.", name.c_str(), shouldbe, temp.size());
     }
     if(shouldbe > temp.size())
     {
-      die(node, L"Not enough parameters, macro '" + name + L"' expects " + to_wstring(shouldbe) + L", got " + to_wstring(temp.size()) + L".");
+      die(node, "Not enough parameters, macro '%S' expects %d, got %d.", name.c_str(), shouldbe, temp.size());
     }
     macroPosShift.push_back(temp);
     xmlNode* mac = macros[name].second;
-    for(xmlNode* state = mac->children; state != NULL; state = state->next)
-    {
-      if(state->type != XML_ELEMENT_NODE) continue;
+    for (auto state : children(mac)) {
       ret += processStatement(state);
     }
     macroPosShift.pop_back();
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "append"))
+  else if(nameIs(node, "append"))
   {
     // TODO: DTD says this can append to a clip
-    wstring name = toWstring(requireAttr(node, (const xmlChar*) "n"));
+    UString name = requireAttr(node, "n");
     if(vars.find(name) == vars.end() && localVars.find(name) == localVars.end())
     {
-      die(node, L"Unknown variable '" + name + L"'.");
+      die(node, "Unknown variable '%S'.", name.c_str());
     }
     ret += STRING;
-    ret += (wchar_t)name.size();
+    ret += (UChar)name.size();
     ret += name;
     ret += FETCHVAR;
-    for(xmlNode* part = node->children; part != NULL; part = part->next)
-    {
-      if(part->type == XML_ELEMENT_NODE)
-      {
-        ret += processValue(part);
-        ret += CONCAT;
-      }
+    for (auto part : children(node)) {
+      ret += processValue(part);
+      ret += CONCAT;
     }
     ret += STRING;
-    ret += (wchar_t)name.size();
+    ret += (UChar)name.size();
     ret += name;
     ret += SETVAR;
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "reject-current-rule"))
+  else if(nameIs(node, "reject-current-rule"))
   {
     ret += REJECTRULE;
   }
   else
   {
-    die(node, L"Unrecognized statement '" + toWstring(node->name) + L"'");
+    die(node, "Unrecognized statement '%S'", name(node).c_str());
   }
   return ret;
 }
 
-wstring
+UString
 TRXCompiler::processValue(xmlNode* node)
 {
-  wstring ret;
-  if(!xmlStrcmp(node->name, (const xmlChar*) "b"))
+  UString ret;
+  if(nameIs(node, "b"))
   {
     ret += INT;
-    ret += (wchar_t)getPos(node);
+    ret += (UChar)getPos(node);
     ret += BLANK;
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "clip"))
+  else if(nameIs(node, "clip"))
   {
     ret += INT;
-    ret += (wchar_t)getPos(node);
+    ret += (UChar)getPos(node);
     ret += PUSHINPUT;
     ret += STRING;
-    wstring part = toWstring(requireAttr(node, (const xmlChar*) "part"));
+    UString part = requireAttr(node, "part");
     if(!PB.isAttrDefined(part))
     {
-      die(node, L"Unknown attribute '" + part + L"'");
+      die(node, "Unknown attribute '%S'", part.c_str());
     }
-    ret += (wchar_t)part.size();
+    ret += (UChar)part.size();
     ret += part;
-    wstring side = toWstring(getAttr(node, (const xmlChar*) "side"));
-    if(side == L"sl")
+    UString side = getattr(node, "side");
+    if(side == "sl"_u)
     {
       ret += SOURCECLIP;
     }
-    else if(side == L"tl" || side == L"")
+    else if(side == "tl"_u || side.empty())
     {
       ret += TARGETCLIP;
     }
-    else if(side == L"ref")
+    else if(side == "ref"_u)
     {
       ret += REFERENCECLIP;
     }
     else
     {
-      warn(node, L"Unknown clip side '" + side + L"', defaulting to 'tl'.");
+      warn(node, "Unknown clip side '%S', defaulting to 'tl'.", side.c_str());
       ret += TARGETCLIP;
     }
-    wstring link = toWstring(getAttr(node, (const xmlChar*) "link-to"));
+    UString link = getattr(node, "link-to");
     if(link.size() > 0)
     {
       ret += DUP;
       ret += STRING;
-      ret += (wchar_t)0;
+      ret += (UChar)0;
       ret += EQUAL;
       ret += JUMPONTRUE;
-      ret += (wchar_t)(link.size() + 5);
+      ret += (UChar)(link.size() + 5);
       ret += DROP;
       ret += STRING;
-      ret += (wchar_t)(link.size() + 2);
-      ret += L'<';
+      ret += (UChar)(link.size() + 2);
+      ret += '<';
       ret += link;
-      ret += L'>';
+      ret += '>';
     }
     // TODO: what does attribute "queue" do?
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "lit"))
+  else if(nameIs(node, "lit"))
   {
     ret += STRING;
-    wstring v = toWstring(requireAttr(node, (const xmlChar*) "v"));
-    ret += (wchar_t)v.size();
+    UString v = requireAttr(node, "v");
+    ret += (UChar)v.size();
     ret += v;
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "lit-tag"))
+  else if(nameIs(node, "lit-tag"))
   {
     ret += STRING;
-    wstring v = L"<" + toWstring(requireAttr(node, (const xmlChar*) "v")) + L">";
-    v = StringUtils::substitute(v, L".", L"><");
-    if(v == L"<>")
+    UString v = "<"_u + requireAttr(node, "v") + ">"_u;
+    v = StringUtils::substitute(v, "."_u, "><"_u);
+    if(v == "<>"_u)
     {
-      v = L"";
+      v.clear();
     }
-    ret += (wchar_t)v.size();
+    ret += (UChar)v.size();
     ret += v;
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "var"))
+  else if(nameIs(node, "var"))
   {
     ret += STRING;
-    wstring v = toWstring(requireAttr(node, (const xmlChar*) "n"));
+    UString v = requireAttr(node, "n");
     if(vars.find(v) == vars.end() && localVars.find(v) == localVars.end())
     {
-      die(node, L"Unknown variable '" + v + L"'.");
+      die(node, "Unknown variable '%S'.", v.c_str());
     }
-    ret += (wchar_t)v.size();
+    ret += (UChar)v.size();
     ret += v;
     ret += FETCHVAR;
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "get-case-from"))
+  else if(nameIs(node, "get-case-from"))
   {
-    for(xmlNode* c = node->children; c != NULL; c = c->next)
-    {
-      if(c->type == XML_ELEMENT_NODE)
-      {
-        if(ret.size() > 0)
-        {
-          die(node, L"<get-case-from> cannot have multiple children.");
-        }
-        ret += processValue(c);
+    for (auto c : children(node)) {
+      if (ret.empty()) {
+        ret.append(processValue(c));
+      } else {
+        die(node, "<get-case-from> cannot have multiple children.");
       }
     }
     if(ret.size() == 0)
     {
-      die(node, L"<get-case-from> cannot be empty.");
+      die(node, "<get-case-from> cannot be empty.");
     }
     ret += INT;
-    ret += (wchar_t)getPos(node);
+    ret += (UChar)getPos(node);
     ret += PUSHINPUT;
     ret += STRING;
-    ret += (wchar_t)3;
-    ret += L"lem";
+    ret += (UChar)3;
+    ret += "lem"_u;
     ret += (inOutput ? TARGETCLIP : SOURCECLIP);
     ret += SETCASE;
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "case-of"))
+  else if(nameIs(node, "case-of"))
   {
     ret += INT;
     ret += getPos(node);
     ret += PUSHINPUT;
     ret += STRING;
-    wstring part = toWstring(requireAttr(node, (const xmlChar*) "part"));
-    ret += (wchar_t)part.size();
+    UString part = requireAttr(node, "part");
+    ret += (UChar)part.size();
     ret += part;
-    wstring side = toWstring(getAttr(node, (const xmlChar*) "side"));
-    if(side == L"sl")
+    UString side = getattr(node, "side");
+    if(side == "sl"_u)
     {
       ret += SOURCECLIP;
     }
-    else if(side == L"tl" || side == L"")
+    else if(side == "tl"_u || side.empty())
     {
       ret += TARGETCLIP;
     }
-    else if(side == L"ref")
+    else if(side == "ref"_u)
     {
       ret += REFERENCECLIP;
     }
     else
     {
-      warn(node, L"Unknown side '" + side + L"', defaulting to target.");
+      warn(node, "Unknown side '%S', defaulting to target.", side.c_str());
       ret += TARGETCLIP;
     }
     ret += GETCASE;
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "concat"))
+  else if(nameIs(node, "concat"))
   {
-    for(xmlNode* c = node->children; c != NULL; c = c->next)
-    {
+    for (auto c : children(node)) {
       unsigned int l = ret.size();
-      if(c->type == XML_ELEMENT_NODE)
-      {
-        ret += processValue(c);
-        if(l > 0 && ret.size() > l)
-        {
-          ret += CONCAT;
-        }
+      ret += processValue(c);
+      if(l > 0 && ret.size() > l) {
+        ret += CONCAT;
       }
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "lu"))
+  else if(nameIs(node, "lu"))
   {
     ret += CHUNK;
-    wstring children;
-    for(xmlNode* p = node->children; p != NULL; p = p->next)
-    {
-      if(p->type == XML_ELEMENT_NODE)
-      {
-        if(!xmlStrcmp(p->name, (const xmlChar*) "clip"))
-        {
-          wstring part = toWstring(getAttr(p, (const xmlChar*) "part"));
-          if(part == L"whole" || part == L"chcontent" || part == L"content")
-          {
-            children += INT;
-            children += (wchar_t)getPos(p);
-            children += PUSHINPUT;
-            children += APPENDALLCHILDREN;
-            if(part != L"whole") continue;
-          }
+    UString children_str;
+    for (auto p : children(node)) {
+      if(nameIs(p, "clip")) {
+        UString part = getattr(p, "part");
+        if(part == "whole"_u || part == "chcontent"_u || part == "content"_u) {
+          children_str += INT;
+          children_str += (UChar)getPos(p);
+          children_str += PUSHINPUT;
+          children_str += APPENDALLCHILDREN;
+          if(part != "whole"_u) continue;
         }
-        ret += processValue(p);
-        ret += APPENDSURFACE;
       }
+      ret += processValue(p);
+      ret += APPENDSURFACE;
     }
-    ret += children;
+    ret += children_str;
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "mlu"))
+  else if(nameIs(node, "mlu"))
   {
     ret += CHUNK;
-    for(xmlNode* lu = node->children; lu != NULL; lu = lu->next)
-    {
-      if(lu->type != XML_ELEMENT_NODE) continue;
-      if(xmlStrcmp(lu->name, (const xmlChar*) "lu"))
-      {
-        die(node, L"<mlu> can only contain <lu>s.");
+    for (auto lu : children(node)) {
+      if (!nameIs(lu, "lu")) {
+        die(node, "<mlu> can only contain <lu>s.");
       }
       if(ret.size() > 1)
       {
         ret += CONJOIN;
         ret += APPENDCHILD;
-        // apertium/transfer.cc has checks against appending '' wstring or '+#'
+        // apertium/transfer.cc has checks against appending '' UString or '+#'
         // TODO?
       }
       ret += processValue(lu);
       ret += APPENDCHILD;
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "chunk"))
+  else if(nameIs(node, "chunk"))
   {
     ret += CHUNK;
-    for(xmlNode* part = node->children; part != NULL; part = part->next)
-    {
-      if(part->type != XML_ELEMENT_NODE) continue;
-      if(!xmlStrcmp(part->name, (const xmlChar*) "source"))
+    for (auto part : children(node)) {
+      if(nameIs(part, "source"))
       {
-        for(xmlNode* seg = part->children; seg != NULL; seg = seg->next)
-        {
-          if(seg->type != XML_ELEMENT_NODE) continue;
+        for (auto seg : children(part)) {
           ret += processValue(seg);
           ret += APPENDSURFACESL;
         }
       }
-      else if(!xmlStrcmp(part->name, (const xmlChar*) "target"))
+      else if(nameIs(part, "target"))
       {
-        for(xmlNode* seg = part->children; seg != NULL; seg = seg->next)
-        {
-          if(seg->type != XML_ELEMENT_NODE) continue;
+        for (auto seg : children(part)) {
           ret += processValue(seg);
           ret += APPENDSURFACE;
         }
       }
-      else if(!xmlStrcmp(part->name, (const xmlChar*) "reference"))
+      else if(nameIs(part, "reference"))
       {
-        for(xmlNode* seg = part->children; seg != NULL; seg = seg->next)
-        {
-          if(seg->type != XML_ELEMENT_NODE) continue;
+        for (auto seg : children(part)) {
           ret += processValue(seg);
           ret += APPENDSURFACEREF;
         }
       }
-      else if(!xmlStrcmp(part->name, (const xmlChar*) "contents"))
+      else if(nameIs(part, "contents"))
       {
-        for(xmlNode* seg = part->children; seg != NULL; seg = seg->next)
-        {
-          if(seg->type != XML_ELEMENT_NODE) continue;
+        for (auto seg : children(part)) {
           ret += processValue(seg);
           ret += APPENDCHILD;
         }
@@ -944,31 +828,29 @@ TRXCompiler::processValue(xmlNode* node)
     }
     if(!inOutput && currentOutputRule != -1)
     {
-      ret += (wchar_t)currentOutputRule;
-      ret += (wchar_t)0;
+      ret += (UChar)currentOutputRule;
+      ret += (UChar)0;
       ret += SETRULE;
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "lu-count"))
+  else if(nameIs(node, "lu-count"))
   {
     ret += LUCOUNT;
   }
   else
   {
-    die(node, L"Unrecognized expression '" + toWstring(node->name) + L"'");
+    die(node, "Unrecognized expression '%S'", name(node).c_str());
   }
   return ret;
 }
 
-wstring
+UString
 TRXCompiler::processCond(xmlNode* node)
 {
-  wstring ret;
-  if(!xmlStrcmp(node->name, (const xmlChar*) "and"))
+  UString ret;
+  if(nameIs(node, "and"))
   {
-    for(xmlNode* op = node->children; op != NULL; op = op->next)
-    {
-      if(op->type != XML_ELEMENT_NODE) continue;
+    for (auto op : children(node)) {
       unsigned int len = ret.size();
       ret += processCond(op);
       if(len > 0 && ret.size() > len)
@@ -977,11 +859,9 @@ TRXCompiler::processCond(xmlNode* node)
       }
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "or"))
+  else if(nameIs(node, "or"))
   {
-    for(xmlNode* op = node->children; op != NULL; op = op->next)
-    {
-      if(op->type != XML_ELEMENT_NODE) continue;
+    for (auto op : children(node)) {
       unsigned int len = ret.size();
       ret += processCond(op);
       if(len > 0 && ret.size() > len)
@@ -990,14 +870,12 @@ TRXCompiler::processCond(xmlNode* node)
       }
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "not"))
+  else if(nameIs(node, "not"))
   {
-    for(xmlNode* op = node->children; op != NULL; op = op->next)
-    {
-      if(op->type != XML_ELEMENT_NODE) continue;
+    for (auto op : children(node)) {
       if(ret.size() > 0)
       {
-        die(node, L"<not> cannot have multiple children");
+        die(node, "<not> cannot have multiple children");
       }
       else
       {
@@ -1006,22 +884,18 @@ TRXCompiler::processCond(xmlNode* node)
       }
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "equal"))
+  else if(nameIs(node, "equal"))
   {
     int i = 0;
-    for(xmlNode* op = node->children; op != NULL; op = op->next)
-    {
-      if(op->type == XML_ELEMENT_NODE)
-      {
-        ret += processValue(op);
-        i++;
-      }
+    for (auto op : children(node)) {
+      ret += processValue(op);
+      i++;
     }
     if(i != 2)
     {
-      die(node, L"<equal> must have exactly two children");
+      die(node, "<equal> must have exactly two children");
     }
-    if(toWstring(getAttr(node, (const xmlChar*) "caseless")) == L"yes")
+    if(getattr(node, "caseless") == "yes"_u)
     {
       ret += EQUALCL;
     }
@@ -1030,22 +904,18 @@ TRXCompiler::processCond(xmlNode* node)
       ret += EQUAL;
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "begins-with"))
+  else if(nameIs(node, "begins-with"))
   {
     int i = 0;
-    for(xmlNode* op = node->children; op != NULL; op = op->next)
-    {
-      if(op->type == XML_ELEMENT_NODE)
-      {
-        ret += processValue(op);
-        i++;
-      }
+    for (auto op : children(node)) {
+      ret += processValue(op);
+      i++;
     }
     if(i != 2)
     {
-      die(node, L"<begins-with> must have exactly two children");
+      die(node, "<begins-with> must have exactly two children");
     }
-    if(toWstring(getAttr(node, (const xmlChar*) "caseless")) == L"yes")
+    if(getattr(node, "caseless") == "yes"_u)
     {
       ret += ISPREFIXCL;
     }
@@ -1054,42 +924,40 @@ TRXCompiler::processCond(xmlNode* node)
       ret += ISPREFIX;
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "begins-with-list"))
+  else if(nameIs(node, "begins-with-list"))
   {
     bool list = false;
-    for(xmlNode* op = node->children; op != NULL; op = op->next)
-    {
-      if(op->type != XML_ELEMENT_NODE) continue;
+    for (auto op : children(node)) {
       if(ret.size() == 0)
       {
         ret += processValue(op);
       }
       else if(list)
       {
-        die(node, L"<begins-with-list> cannot have more than two children.");
+        die(node, "<begins-with-list> cannot have more than two children.");
       }
       else if(xmlStrcmp(op->name, (const xmlChar*) "list"))
       {
-        die(op, L"Expected <list>, found <" + toWstring(op->name) + L"> instead.");
+        die(op, "Expected <list>, found <%S> instead.", to_ustring((const char*)op->name).c_str());
       }
       else
       {
-        wstring name = toWstring(requireAttr(op, (const xmlChar*) "n"));
+        UString name = requireAttr(op, "n");
         if(lists.find(name) == lists.end())
         {
-          die(op, L"Unknown list '" + name + L"'.");
+          die(op, "Unknown list '%S'.", name.c_str());
         }
         ret += STRING;
-        ret += (wchar_t)name.size();
+        ret += (UChar)name.size();
         ret += name;
         list = true;
       }
     }
     if(!list)
     {
-      die(node, L"<begins-with-list> must have two children.");
+      die(node, "<begins-with-list> must have two children.");
     }
-    if(toWstring(getAttr(node, (const xmlChar*) "caseless")) == L"yes")
+    if(getattr(node, "caseless") == "yes"_u)
     {
       ret += HASPREFIXCL;
     }
@@ -1098,22 +966,18 @@ TRXCompiler::processCond(xmlNode* node)
       ret += HASPREFIX;
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "ends-with"))
+  else if(nameIs(node, "ends-with"))
   {
     int i = 0;
-    for(xmlNode* op = node->children; op != NULL; op = op->next)
-    {
-      if(op->type == XML_ELEMENT_NODE)
-      {
-        ret += processValue(op);
-        i++;
-      }
+    for (auto op : children(node)) {
+      ret += processValue(op);
+      i++;
     }
     if(i != 2)
     {
-      die(node, L"<ends-with> must have exactly two children");
+      die(node, "<ends-with> must have exactly two children");
     }
-    if(toWstring(getAttr(node, (const xmlChar*) "caseless")) == L"yes")
+    if(getattr(node, "caseless") == "yes"_u)
     {
       ret += ISSUFFIXCL;
     }
@@ -1122,42 +986,40 @@ TRXCompiler::processCond(xmlNode* node)
       ret += ISSUFFIX;
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "ends-with-list"))
+  else if(nameIs(node, "ends-with-list"))
   {
     bool list = false;
-    for(xmlNode* op = node->children; op != NULL; op = op->next)
-    {
-      if(op->type != XML_ELEMENT_NODE) continue;
+    for (auto op : children(node)) {
       if(ret.size() == 0)
       {
         ret += processValue(op);
       }
       else if(list)
       {
-        die(node, L"<ends-with-list> cannot have more than two children.");
+        die(node, "<ends-with-list> cannot have more than two children.");
       }
       else if(xmlStrcmp(op->name, (const xmlChar*) "list"))
       {
-        die(op, L"Expected <list>, found <" + toWstring(op->name) + L"> instead.");
+        die(op, "Expected <list>, found <%S> instead.", name(op).c_str());
       }
       else
       {
-        wstring name = toWstring(requireAttr(op, (const xmlChar*) "n"));
+        UString name = requireAttr(op, "n");
         if(lists.find(name) == lists.end())
         {
-          die(op, L"Unknown list '" + name + L"'.");
+          die(op, "Unknown list '%S'.", name.c_str());
         }
         ret += STRING;
-        ret += (wchar_t)name.size();
+        ret += (UChar)name.size();
         ret += name;
         list = true;
       }
     }
     if(!list)
     {
-      die(node, L"<ends-with-list> must have two children.");
+      die(node, "<ends-with-list> must have two children.");
     }
-    if(toWstring(getAttr(node, (const xmlChar*) "caseless")) == L"yes")
+    if(getattr(node, "caseless") == "yes"_u)
     {
       ret += HASSUFFIXCL;
     }
@@ -1166,22 +1028,18 @@ TRXCompiler::processCond(xmlNode* node)
       ret += HASSUFFIX;
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "contains-substring"))
+  else if(nameIs(node, "contains-substring"))
   {
     int i = 0;
-    for(xmlNode* op = node->children; op != NULL; op = op->next)
-    {
-      if(op->type == XML_ELEMENT_NODE)
-      {
-        ret += processValue(op);
-        i++;
-      }
+    for (auto op : children(node)) {
+      ret += processValue(op);
+      i++;
     }
     if(i != 2)
     {
-      die(node, L"<contains-substring> must have exactly two children");
+      die(node, "<contains-substring> must have exactly two children");
     }
-    if(toWstring(getAttr(node, (const xmlChar*) "caseless")) == L"yes")
+    if(getattr(node, "caseless") == "yes"_u)
     {
       ret += ISSUBSTRINGCL;
     }
@@ -1190,42 +1048,41 @@ TRXCompiler::processCond(xmlNode* node)
       ret += ISSUBSTRING;
     }
   }
-  else if(!xmlStrcmp(node->name, (const xmlChar*) "in"))
+  else if(nameIs(node, "in"))
   {
     bool list = false;
-    for(xmlNode* op = node->children; op != NULL; op = op->next)
-    {
-      if(op->type != XML_ELEMENT_NODE) continue;
+    for (auto op : children(node)) {
       if(ret.size() == 0)
       {
         ret += processValue(op);
       }
       else if(list)
       {
-        die(node, L"<in> cannot have more than two children.");
+        die(node, "<in> cannot have more than two children.");
       }
       else if(xmlStrcmp(op->name, (const xmlChar*) "list"))
       {
-        die(op, L"Expected <list>, found <" + toWstring(op->name) + L"> instead.");
+        die(op, "Expected <list>, found <%S> instead.",
+            name(op).c_str());
       }
       else
       {
-        wstring name = toWstring(requireAttr(op, (const xmlChar*) "n"));
+        UString name = requireAttr(op, "n");
         if(lists.find(name) == lists.end())
         {
-          die(op, L"Unknown list '" + name + L"'.");
+          die(op, "Unknown list '%S'.", name.c_str());
         }
         ret += STRING;
-        ret += (wchar_t)name.size();
+        ret += (UChar)name.size();
         ret += name;
         list = true;
       }
     }
     if(!list)
     {
-      die(node, L"<in> must have two children.");
+      die(node, "<in> must have two children.");
     }
-    if(toWstring(getAttr(node, (const xmlChar*) "caseless")) == L"yes")
+    if(getattr(node, "caseless") == "yes"_u)
     {
       ret += INCL;
     }
@@ -1236,111 +1093,101 @@ TRXCompiler::processCond(xmlNode* node)
   }
   else
   {
-    die(node, L"Unrecognized condition '" + toWstring(node->name) + L"'");
+    die(node, "Unrecognized condition '%S'", name(node).c_str());
   }
   return ret;
 }
 
-wstring
+UString
 TRXCompiler::processChoose(xmlNode* node)
 {
-  vector<pair<wstring, wstring>> clauses;
+  vector<pair<UString, UString>> clauses;
   int when = 0;
   int otherwise = 0;
-  for(xmlNode* cl = node->children; cl != NULL; cl = cl->next)
-  {
-    if(cl->type != XML_ELEMENT_NODE) continue;
-    if(!xmlStrcmp(cl->name, (const xmlChar*) "when"))
+  for (auto cl : children(node)) {
+    if(nameIs(cl, "when"))
     {
       if(otherwise > 0)
       {
-        warn(cl, L"Clauses after <otherwise> will not be executed.");
+        warn(cl, "Clauses after <otherwise> will not be executed.");
         continue;
       }
       when++;
-      wstring test, block;
-      for(xmlNode* n = cl->children; n != NULL; n = n->next)
-      {
-        if(n->type != XML_ELEMENT_NODE) continue;
-        if(!xmlStrcmp(n->name, (const xmlChar*) "test"))
+      UString test, block;
+      for (auto n : children(cl)) {
+        if(nameIs(n, "test"))
         {
           if(test.size() != 0)
           {
-            die(n, L"Cannot have multiple <test>s in a <when> clause.");
+            die(n, "Cannot have multiple <test>s in a <when> clause.");
           }
-          for(xmlNode* t = n->children; t != NULL; t = t->next)
-          {
-            if(t->type != XML_ELEMENT_NODE) continue;
+          for (auto t : children(n)) {
             if(test.size() == 0)
             {
               test = processCond(t);
             }
             else
             {
-              die(t, L"<test> must have exactly one child.");
+              die(t, "<test> must have exactly one child.");
             }
           }
           if(test.size() == 0)
           {
-            die(n, L"<test> cannot be empty.");
+            die(n, "<test> cannot be empty.");
           }
         }
         else
         {
           if(test.size() == 0)
           {
-            die(n, L"<when> clause must begin with <test>.");
+            die(n, "<when> clause must begin with <test>.");
           }
           block += processStatement(n);
         }
       }
       clauses.push_back(make_pair(test, block));
     }
-    else if(!xmlStrcmp(cl->name, (const xmlChar*) "otherwise"))
+    else if(nameIs(cl, "otherwise"))
     {
       otherwise++;
       if(otherwise > 1)
       {
-        warn(cl, L"Multiple <otherwise> clauses will not be executed.");
+        warn(cl, "Multiple <otherwise> clauses will not be executed.");
         continue;
       }
-      wstring block;
-      for(xmlNode* state = cl->children; state != NULL; state = state->next)
-      {
-        if(state->type == XML_ELEMENT_NODE)
-        {
-          block += processStatement(state);
-        }
+      UString block;
+      for (auto state : children(cl)) {
+        block += processStatement(state);
       }
       if(block.size() > 0)
       {
-        clauses.push_back(make_pair(L"", block));
+        clauses.push_back(make_pair(""_u, block));
       }
       else
       {
-        warn(cl, L"Empty <otherwise> clause.");
+        warn(cl, "Empty <otherwise> clause.");
       }
     }
     else
     {
-      warn(cl, L"Ignoring unexpected clause in <choose>.");
+      warn(cl, "Ignoring unexpected clause in <choose>.");
     }
   }
-  wstring ret;
-  for(vector<pair<wstring, wstring>>::reverse_iterator it = clauses.rbegin(),
+  UString ret;
+  for(vector<pair<UString, UString>>::reverse_iterator it = clauses.rbegin(),
             limit = clauses.rend(); it != limit; it++)
   {
-    wstring act = it->second;
+    UString act = it->second;
     if(ret.size() > 0)
     {
       act += JUMP;
-      act += (wchar_t)ret.size();
+      act += (UChar)ret.size();
     }
-    wstring test = it->first;
+    UString test = it->first;
     if(test.size() > 0)
     {
       test += JUMPONFALSE;
-      test += (wchar_t)act.size();
+      test += (UChar)act.size();
     }
     ret = test + act + ret;
   }
@@ -1359,10 +1206,10 @@ TRXCompiler::write(const char* binfile)
   FILE* bin = fopen(binfile, "wb");
   if(bin == NULL)
   {
-    wcerr << L"Error: Cannot open " << binfile << L" for writing." << endl;
+    cerr << "Error: Cannot open " << binfile << " for writing." << endl;
     exit(EXIT_FAILURE);
   }
-  vector<pair<int, wstring>> inRules;
+  vector<pair<int, UString>> inRules;
   for(unsigned int i = 0; i < inputRules.size(); i++)
   {
     inRules.push_back(make_pair((inputRuleSizes[i]*2 - 1), inputRules[i]));
@@ -1374,6 +1221,6 @@ TRXCompiler::write(const char* binfile)
 void
 TRXCompiler::printStats()
 {
-  wcout << "Rules: " << inputRules.size() << endl;
-  wcout << "Macros: " << macros.size() << endl;
+  cout << "Rules: " << inputRules.size() << endl;
+  cout << "Macros: " << macros.size() << endl;
 }
diff --git a/src/trx_compiler.h b/src/trx_compiler.h
index 7f17fc7..86cb3dc 100644
--- a/src/trx_compiler.h
+++ b/src/trx_compiler.h
@@ -4,12 +4,12 @@
 #include <rtx_config.h>
 #include <apertium/transfer_data.h>
 #include <apertium/xml_reader.h>
-#include <lttoolbox/ltstr.h>
 #include <pattern.h>
 
 #include <libxml/xmlreader.h>
 #include <map>
 #include <string>
+#include <cstdarg>
 
 using namespace std;
 
@@ -26,38 +26,38 @@ private:
    * Macros defined in the current file
    * name => ( parameters, xml node )
    */
-  map<wstring, pair<int, xmlNode*>, Ltstr> macros;
+  map<UString, pair<int, xmlNode*>> macros;
 
   /**
    * Patterns defined in the current file
    */
-  map<wstring, vector<PatternElement*>, Ltstr> patterns;
+  map<UString, vector<PatternElement*>> patterns;
 
   /**
    * Global string variables
    * name => initial value
    */
-  map<wstring, wstring, Ltstr> vars;
+  map<UString, UString> vars;
 
   /**
    * Rule-specific string variable names
    */
-  set<wstring, Ltstr> localVars;
+  set<UString> localVars;
 
   /**
    * All lists
    */
-  map<wstring, set<wstring, Ltstr>, Ltstr> lists;
+  map<UString, set<UString>> lists;
 
   /**
    * Ids of rules which should not be compiled
    */
-  set<wstring, Ltstr> excludedRules;
+  set<UString> excludedRules;
 
   /**
    * Bytecode for non-postchunk rules
    */
-  vector<wstring> inputRules;
+  vector<UString> inputRules;
 
   /**
    * Sizes of patterns for non-postchunk rules
@@ -67,7 +67,7 @@ private:
   /**
    * Bytecode for postchunk rules
    */
-  vector<wstring> outputRules;
+  vector<UString> outputRules;
 
   /**
    * Remapped positions within macros
@@ -109,40 +109,25 @@ private:
    * Report a fatal error and exit
    * @param node - xml element closest to the error
    */
-  void die(xmlNode* node, wstring msg);
+  void die(xmlNode* node, const char* fmt, ...);
 
   /**
    * Report a non-fatal error
    * @param node - xml element closest to the error
    */
-  void warn(xmlNode* node, wstring msg);
+  void warn(xmlNode* node, const char* fmt, ...);
 
   //////////
   // PARSING UTILITIES
   //////////
 
-  /**
-   * Return the value of an attribute or an empty string
-   * @param node - xml element
-   * @param attr - name of attribute
-   * @return attribute value or empty string
-   */
-  xmlChar* getAttr(xmlNode* node, const xmlChar* attr);
-
   /**
    * getAttr(), but calls die() if attribute isn't found
    * @param node - xml element
    * @param attr - name of attribute
    * @return attribute value
    */
-  xmlChar* requireAttr(xmlNode* node, const xmlChar* attr);
-
-  /**
-   * Convert a the libxml string format to std::wstring
-   * @param s - libxml string
-   * @return equivalent wstring
-   */
-  wstring toWstring(const xmlChar* s);
+  UString requireAttr(xmlNode* node, const char* attr);
 
   /**
    * Parse pos attribute and convert appropriately if in a macro
@@ -169,7 +154,7 @@ private:
    * @param ats - category elements
    * @return inserted name, may or may not be equal to name
    */
-  wstring insertAttr(wstring name, set<wstring, Ltstr> ats);
+  UString insertAttr(UString name, set<UString> ats);
 
   /**
    * Pass a list to PatternBuilder, name-mangling if necessary
@@ -177,7 +162,7 @@ private:
    * @param ats - list elements
    * @return inserted name, may or may not be equal to name
    */
-  wstring insertList(wstring name, set<wstring, Ltstr> ats);
+  UString insertList(UString name, set<UString> ats);
 
   //////////
   // XML PARSING
@@ -224,7 +209,7 @@ private:
    * <let>, <out>, <choose>, <modify-case>, <call-macro>, <append>, <reject-current-rule>
    * @return bytecode
    */
-  wstring processStatement(xmlNode* node);
+  UString processStatement(xmlNode* node);
 
   /**
    * Parse and compile one of
@@ -232,7 +217,7 @@ private:
    * <concat>, <lu>, <mlu>, <chunk>, <lu-count>
    * @return bytecode
    */
-  wstring processValue(xmlNode* node);
+  UString processValue(xmlNode* node);
 
   /**
    * Parse and compile one of
@@ -240,13 +225,13 @@ private:
    * <ends-with>, <ends-with-list>, <contains-substring>, <in>
    * @return bytecode
    */
-  wstring processCond(xmlNode* node);
+  UString processCond(xmlNode* node);
 
   /**
    * Parse and compile <choose>
    * @return bytecode
    */
-  wstring processChoose(xmlNode* node);
+  UString processChoose(xmlNode* node);
 
 public:
   TRXCompiler();
@@ -254,7 +239,7 @@ public:
   void loadLex(const string& fname);
   void compile(string file);
   void write(const char* binfile);
-  void excludeRule(wstring name)
+  void excludeRule(UString name)
   {
     excludedRules.insert(name);
   }