commit 985127e770de9cc4f09c7f485db19cd6ae031be8
Author: Daniel Swanson <popcorn.tomato.dude@gmail.com>
Date:   Fri Jun 11 13:23:40 2021 -0500

    unbundle utfcpp and drop old utf_converter code

diff --git a/Makefile.am b/Makefile.am
index 444db77..dc6873f 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -15,7 +15,7 @@ if HAVE_PYTHON_BINDINGS
 SUBDIRS += python
 endif
 
-EXTRA_DIST=autogen.sh README-MODES apertium.m4 utf8 tests
+EXTRA_DIST=autogen.sh README-MODES apertium.m4 tests
 
 install-data-local:
 	mkdir -p $(DESTDIR)$(modesdir)
diff --git a/apertium/Makefile.am b/apertium/Makefile.am
index 494cfd4..e3cecc2 100644
--- a/apertium/Makefile.am
+++ b/apertium/Makefile.am
@@ -75,7 +75,6 @@ h_sources = a.h \
 	    ttag.h \
 	    unigram_tagger.h \
 	    unlocked_cstdio.h \
-	    utf_converter.h \
 	    utils.h \
 	    xml_reader.h \
 		xml_walk_util.h
@@ -140,7 +139,6 @@ cc_sources = a.cc \
 	     trx_reader.cc \
 	     tsx_reader.cc \
 	     unigram_tagger.cc \
-	     utf_converter.cc \
 	     xml_reader.cc \
 		 xml_walk_util.cc
 
@@ -303,7 +301,7 @@ apertium_gen_modes_SOURCES = gen_modes.cc
 apertium_gen_modes_LDADD = -lapertium$(VERSION_MAJOR) $(lib_LTLIBRARIES)
 
 if WINDOWS
-AM_CPPFLAGS = -I$(top_srcdir)/utf8 -I$(top_srcdir)/apertium/win32 -I$(top_srcdir) $(APERTIUM_CFLAGS) $(ICU_CFLAGS)
+AM_CPPFLAGS = -I$(top_srcdir)/apertium/win32 -I$(top_srcdir) $(APERTIUM_CFLAGS) $(ICU_CFLAGS)
 else
 AM_CPPFLAGS = -I$(top_srcdir) $(APERTIUM_CFLAGS) $(ICU_CFLAGS)
 endif
diff --git a/apertium/adapt_docx.cc b/apertium/adapt_docx.cc
index 4699a0c..91f4937 100644
--- a/apertium/adapt_docx.cc
+++ b/apertium/adapt_docx.cc
@@ -30,7 +30,7 @@
 #include <fcntl.h>
 #endif
 
-#include "utf8/utf8.h"
+#include <utf8.h>
 #include "unicode/uchar.h"
 
 using namespace std;
diff --git a/apertium/apertium-postlatex.l b/apertium/apertium-postlatex.l
index 4cf5edf..39bea76 100644
--- a/apertium/apertium-postlatex.l
+++ b/apertium/apertium-postlatex.l
@@ -23,7 +23,7 @@ extern "C" {
 #include "apertium_config.h"
 #endif
 #include <apertium/unlocked_cstdio.h>
-#include <utf8/utf8.h>
+#include <utf8.h>
 #ifdef _WIN32
 #include <io.h>
 #include <fcntl.h>
diff --git a/apertium/apertium_pretransfer.cc b/apertium/apertium_pretransfer.cc
index b67fac8..25bccc0 100644
--- a/apertium/apertium_pretransfer.cc
+++ b/apertium/apertium_pretransfer.cc
@@ -103,7 +103,7 @@ int main(int argc, char *argv[])
     if(!input.open(argv[argc-1])) {
       usage(argv[0]);
     }
-    u_finit(stdout, NULL, NULL);
+    output = u_finit(stdout, NULL, NULL);
   }
   else
   {
diff --git a/apertium/apertium_tmxbuild.cc b/apertium/apertium_tmxbuild.cc
index 06588d8..55f81b3 100644
--- a/apertium/apertium_tmxbuild.cc
+++ b/apertium/apertium_tmxbuild.cc
@@ -24,7 +24,6 @@
 
 #include <apertium/apertium_config.h>
 #include <apertium/tmx_builder.h>
-#include <apertium/utf_converter.h>
 #include <apertium/string_utils.h>
 #include "apertium_config.h"
 #include <apertium/unlocked_cstdio.h>
diff --git a/apertium/deformat.xsl b/apertium/deformat.xsl
index 56c2aa6..6e71285 100644
--- a/apertium/deformat.xsl
+++ b/apertium/deformat.xsl
@@ -168,7 +168,7 @@ extern "C" {
 #ifndef GENFORMAT
 #include "apertium_config.h"
 #endif
-#include &lt;utf8/utf8.h&gt;
+#include &lt;utf8.h&gt;
 #include &lt;apertium/unlocked_cstdio.h&gt;
 #ifdef _WIN32
 #include &lt;io.h&gt;
diff --git a/apertium/exception_type.cc b/apertium/exception_type.cc
index c83dc3f..7c1eec8 100644
--- a/apertium/exception_type.cc
+++ b/apertium/exception_type.cc
@@ -15,7 +15,6 @@
 
 #include "exception_type.h"
 
-#include "utf_converter.h"
 #include <sstream>
 #include <string>
 
diff --git a/apertium/gen_modes.cc b/apertium/gen_modes.cc
index 1dc7f8f..0a4f07f 100644
--- a/apertium/gen_modes.cc
+++ b/apertium/gen_modes.cc
@@ -22,7 +22,6 @@
 #include <iostream>
 #include <fstream>
 #include "string_utils.h"
-#include "utf_converter.h"
 #include <libgen.h>
 #include <getopt.h>
 #include <libxml/xmlreader.h>
diff --git a/apertium/mtx_reader.cc b/apertium/mtx_reader.cc
index 159e8cc..70f85de 100644
--- a/apertium/mtx_reader.cc
+++ b/apertium/mtx_reader.cc
@@ -22,8 +22,6 @@
 #include <apertium/tsx_reader.h>
 #include <apertium/perceptron_spec.h>
 
-#include <utf8/utf8.h> // TODO
-
 #include <cstdlib>
 #include <iostream>
 #include <sstream>
diff --git a/apertium/perceptron_spec.cc b/apertium/perceptron_spec.cc
index 378d22b..b70560a 100644
--- a/apertium/perceptron_spec.cc
+++ b/apertium/perceptron_spec.cc
@@ -4,7 +4,7 @@
 #include <lttoolbox/match_state.h>
 #include <iomanip>
 #include <apertium/string_utils.h>
-#include <utf8/utf8.h>
+#include <utf8.h>
 
 
 namespace Apertium {
diff --git a/apertium/reformat.xsl b/apertium/reformat.xsl
index 0fca2bc..07ad1b2 100644
--- a/apertium/reformat.xsl
+++ b/apertium/reformat.xsl
@@ -26,7 +26,7 @@
 #ifndef GENFORMAT
 #include "apertium_config.h"
 #endif
-#include &lt;utf8/utf8.h&gt;
+#include &lt;utf8.h&gt;
 #include &lt;apertium/unlocked_cstdio.h&gt;
 
 #include &lt;cstdlib&gt;
diff --git a/apertium/tagger_utils.cc b/apertium/tagger_utils.cc
index 7a1bcf6..0e8738d 100644
--- a/apertium/tagger_utils.cc
+++ b/apertium/tagger_utils.cc
@@ -23,18 +23,6 @@
 #include <algorithm>
 #include <climits>
 #include <apertium/string_utils.h>
-#ifdef _MSC_VER
-#define wcstok wcstok_s
-#endif
-#ifdef __MINGW32__
-
-wchar_t *_wcstok(wchar_t *wcs, const wchar_t *delim, wchar_t **ptr) {
-  (void)ptr;
-  return wcstok(wcs, delim);
-}
-
-#define wcstok _wcstok
-#endif
 
 using namespace Apertium;
 
@@ -75,25 +63,6 @@ int tagger_utils::ntokens_multiword(UString const &s)
     }
   }
   return n;
-  /*
-   wchar_t *news = new wchar_t[s.size()+1];
-   wcscpy(news, s.c_str());
-   news[s.size()] = 0;
-   cerr << news << endl;
-
-   wchar_t const *delim = "_";
-   wchar_t *ptr;
-   int n=0;
-
-   if (wcstok(news, delim, &ptr))
-     n++;
-   while (wcstok(NULL, delim, &ptr))
-     n++;
-
-   delete[] news;
-
-   return n;
-   */
 }
 
 int tagger_utils::nguiones_fs(UString const & s) {
@@ -105,24 +74,6 @@ int tagger_utils::nguiones_fs(UString const & s) {
     }
   }
   return n;
-  /*
-   UChar *news = new UChar[s.size()+1];
-   wcscpy(news, s.c_str());
-   news[s.size()] = 0;
-   cerr << news << endl;
-   wchar_t const *delim = "-";
-   wchar_t *ptr;
-   int n=0;
-
-   if (wcstok(news, delim, &ptr))
-     n++;
-   while (wcstok(NULL, delim, &ptr))
-     n++;
-
-   delete[] news;
-
-   return n;
-  */
 }
 
 UString tagger_utils::trim(UString s)
@@ -200,7 +151,6 @@ set<TTag> &
 tagger_utils::find_similar_ambiguity_class(TaggerData &td, set<TTag> &c) {
   set<TTag> &ret = td.getOpenClass();
   Collection &output = td.getOutput();
-  int ret_idx = output[ret];
 
   for (int k=0; k<output.size(); k++) {
     const set<TTag> &ambg_class = output[k];
@@ -209,7 +159,6 @@ tagger_utils::find_similar_ambiguity_class(TaggerData &td, set<TTag> &c) {
       continue;
     }
     if (includes(ambg_class.begin(), ambg_class.end(), c.begin(), c.end())) {
-      ret_idx = k;
       ret = ambg_class;
     }
   }
@@ -304,4 +253,3 @@ ostream& operator<< (ostream& os, const set<T>& s) {
   os<<'}';
   return os;
 }
-
diff --git a/apertium/tagger_word.cc b/apertium/tagger_word.cc
index cb593b5..83f15a6 100644
--- a/apertium/tagger_word.cc
+++ b/apertium/tagger_word.cc
@@ -15,7 +15,6 @@
  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  */
 #include <apertium/tagger_word.h>
-#include <apertium/utf_converter.h>
 #include <apertium/string_utils.h>
 #include "apertium_config.h"
 #include <apertium/unlocked_cstdio.h>
diff --git a/apertium/tmx_builder.cc b/apertium/tmx_builder.cc
index d87eb7a..b535b3b 100644
--- a/apertium/tmx_builder.cc
+++ b/apertium/tmx_builder.cc
@@ -15,7 +15,6 @@
  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  */
 #include <apertium/tmx_builder.h>
-#include <apertium/utf_converter.h>
 #include <apertium/string_utils.h>
 #include <apertium/tmx_aligner_tool.h>
 #include <lttoolbox/compression.h>
diff --git a/apertium/transfer_data.cc b/apertium/transfer_data.cc
index f350891..d5e2b6c 100644
--- a/apertium/transfer_data.cc
+++ b/apertium/transfer_data.cc
@@ -17,7 +17,6 @@
 
 #include <apertium/transfer_data.h>
 #include <lttoolbox/compression.h>
-#include <apertium/utf_converter.h>
 #include <apertium/apertium_re.h>
 #include <iostream>
 #include <apertium/string_utils.h>
diff --git a/apertium/transfer_mult.cc b/apertium/transfer_mult.cc
index a163430..3f00496 100644
--- a/apertium/transfer_mult.cc
+++ b/apertium/transfer_mult.cc
@@ -18,17 +18,12 @@
 #include <apertium/trx_reader.h>
 #include <lttoolbox/compression.h>
 #include <lttoolbox/xml_parse_util.h>
-#include <apertium/utf_converter.h>
 #include <apertium/string_utils.h>
 
 #include <cctype>
 #include <iostream>
 #include <stack>
 
-#ifdef _WIN32
-#include <utf8_fwrap.h>
-#endif
-
 using namespace std;
 
 void
diff --git a/apertium/utf_converter.cc b/apertium/utf_converter.cc
deleted file mode 100644
index e721152..0000000
--- a/apertium/utf_converter.cc
+++ /dev/null
@@ -1,613 +0,0 @@
-/*
- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <https://www.gnu.org/licenses/>.
- */
-#include <apertium/utf_converter.h>
-#include <iostream>
-#include <cstdlib>
-#include <apertium/string_utils.h>
-
-using namespace Apertium;
-
-#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
-#define UNI_MAX_BMP (UTF32)0x0000FFFF
-#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
-#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
-#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
-#define UNI_SUR_HIGH_START  (UTF32)0xD800
-#define UNI_SUR_HIGH_END    (UTF32)0xDBFF
-#define UNI_SUR_LOW_START   (UTF32)0xDC00
-#define UNI_SUR_LOW_END     (UTF32)0xDFFF
-
-using namespace std;
-
-namespace UtfConverter
-{
-
-  typedef unsigned int	 UTF32;	/* at least 32 bits */
-  typedef unsigned short UTF16;	/* at least 16 bits */
-  typedef unsigned char	 UTF8;	/* typically 8 bits */
-
-  /* Some fundamental constants */
-
-  typedef enum {
-    conversionOK, 	/* conversion successful */
-    sourceExhausted,	/* partial character in source, but hit end */
-    targetExhausted,	/* insuff. room in target for conversion */
-    sourceIllegal	/* source sequence is illegal/malformed */
-  } ConversionResult;
-
-  typedef enum {
-    strictConversion = 0,
-    lenientConversion
-  } ConversionFlags;
-
-  static const int halfShift  = 10; /* used for shifting by 10 bits */
-
-  static const UTF32 halfBase = 0x0010000UL;
-  static const UTF32 halfMask = 0x3FFUL;
-
-
-  void conversionError()
-  {
-    cerr << "Error: conversion error" << endl;
-    exit(EXIT_FAILURE);
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  ConversionResult ConvertUTF32toUTF16 (
-					const UTF32** sourceStart, const UTF32* sourceEnd,
-					UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF32* source = *sourceStart;
-    UTF16* target = *targetStart;
-    while (source < sourceEnd) {
-      UTF32 ch;
-      if (target >= targetEnd) {
-	result = targetExhausted; break;
-      }
-      ch = *source++;
-      if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
-	/* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
-	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-	  if (flags == strictConversion) {
-	    --source; /* return to the illegal value itself */
-	    result = sourceIllegal;
-	    break;
-	  } else {
-	    *target++ = UNI_REPLACEMENT_CHAR;
-	  }
-	} else {
-	  *target++ = (UTF16)ch; /* normal case */
-	}
-      } else if (ch > UNI_MAX_LEGAL_UTF32) {
-	if (flags == strictConversion) {
-	  result = sourceIllegal;
-	} else {
-	  *target++ = UNI_REPLACEMENT_CHAR;
-	}
-      } else {
-	/* target is a character in range 0xFFFF - 0x10FFFF. */
-	if (target + 1 >= targetEnd) {
-	  --source; /* Back up source pointer! */
-	  result = targetExhausted; break;
-	}
-	ch -= halfBase;
-	*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
-	*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
-      }
-    }
-    *sourceStart = source;
-    *targetStart = target;
-    return result;
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  ConversionResult ConvertUTF16toUTF32 (
-					const UTF16** sourceStart, const UTF16* sourceEnd,
-					UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF16* source = *sourceStart;
-    UTF32* target = *targetStart;
-    UTF32 ch, ch2;
-    while (source < sourceEnd) {
-      const UTF16* oldSource = source; /*  In case we have to back up because of target overflow. */
-      ch = *source++;
-      /* If we have a surrogate pair, convert to UTF32 first. */
-      if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
-	/* If the 16 bits following the high surrogate are in the source buffer... */
-	if (source < sourceEnd) {
-	  ch2 = *source;
-	  /* If it's a low surrogate, convert to UTF32. */
-	  if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
-	    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
-	      + (ch2 - UNI_SUR_LOW_START) + halfBase;
-	    ++source;
-	  } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
-	    --source; /* return to the illegal value itself */
-	    result = sourceIllegal;
-	    break;
-	  }
-	} else { /* We don't have the 16 bits following the high surrogate. */
-	  --source; /* return to the high surrogate */
-	  result = sourceExhausted;
-	  break;
-	}
-      } else if (flags == strictConversion) {
-	/* UTF-16 surrogate values are illegal in UTF-32 */
-	if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
-	  --source; /* return to the illegal value itself */
-	  result = sourceIllegal;
-	  break;
-	}
-      }
-      if (target >= targetEnd) {
-	source = oldSource; /* Back up source pointer! */
-	result = targetExhausted; break;
-      }
-      *target++ = ch;
-    }
-    *sourceStart = source;
-    *targetStart = target;
-
-    return result;
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  /*
-   * Index into the table below with the first byte of a UTF-8 sequence to
-   * get the number of trailing bytes that are supposed to follow it.
-   * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
-   * left as-is for anyone who may want to do such conversion, which was
-   * allowed in earlier algorithms.
-   */
-  static const char trailingBytesForUTF8[256] = {
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
-    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
-  };
-
-  /*
-   * Magic values subtracted from a buffer value during UTF8 conversion.
-   * This table contains as many values as there might be trailing bytes
-   * in a UTF-8 sequence.
-   */
-  static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
-					    0x03C82080UL, 0xFA082080UL, 0x82082080UL };
-
-  /*
-   * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
-   * into the first byte, depending on how many bytes follow.  There are
-   * as many entries in this table as there are UTF-8 sequence types.
-   * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
-   * for *legal* UTF-8 will be 4 or fewer bytes total.
-   */
-  static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
-
-  /* --------------------------------------------------------------------- */
-
-  /* The interface converts a whole buffer to avoid function-call overhead.
-   * Constants have been gathered. Loops & conditionals have been removed as
-   * much as possible for efficiency, in favor of drop-through switches.
-   * (See "Note A" at the bottom of the file for equivalent code.)
-   * If your compiler supports it, the "isLegalUTF8" call can be turned
-   * into an inline function.
-   */
-
-  /* --------------------------------------------------------------------- */
-
-  ConversionResult ConvertUTF16toUTF8 (
-				       const UTF16** sourceStart, const UTF16* sourceEnd,
-				       UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF16* source = *sourceStart;
-    UTF8* target = *targetStart;
-    while (source < sourceEnd) {
-      UTF32 ch;
-      unsigned short bytesToWrite = 0;
-      const UTF32 byteMask = 0xBF;
-      const UTF32 byteMark = 0x80;
-      const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
-      ch = *source++;
-      /* If we have a surrogate pair, convert to UTF32 first. */
-      if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
-	/* If the 16 bits following the high surrogate are in the source buffer... */
-	if (source < sourceEnd) {
-	  UTF32 ch2 = *source;
-	  /* If it's a low surrogate, convert to UTF32. */
-	  if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
-	    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
-	      + (ch2 - UNI_SUR_LOW_START) + halfBase;
-	    ++source;
-	  } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
-	    --source; /* return to the illegal value itself */
-	    result = sourceIllegal;
-	    break;
-	  }
-	} else { /* We don't have the 16 bits following the high surrogate. */
-	  --source; /* return to the high surrogate */
-	  result = sourceExhausted;
-	  break;
-	}
-      } else if (flags == strictConversion) {
-	/* UTF-16 surrogate values are illegal in UTF-32 */
-	if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
-	  --source; /* return to the illegal value itself */
-	  result = sourceIllegal;
-	  break;
-	}
-      }
-      /* Figure out how many bytes the result will require */
-      if (ch < (UTF32)0x80) {	     bytesToWrite = 1;
-      } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
-      } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
-      } else if (ch < (UTF32)0x110000) {  bytesToWrite = 4;
-      } else {			    bytesToWrite = 3;
-      ch = UNI_REPLACEMENT_CHAR;
-      }
-
-      target += bytesToWrite;
-      if (target > targetEnd) {
-	source = oldSource; /* Back up source pointer! */
-	target -= bytesToWrite; result = targetExhausted; break;
-      }
-      switch (bytesToWrite) { /* note: everything falls through. */
-      case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
-      case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
-      case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
-      case 1: *--target =  (UTF8)(ch | firstByteMark[bytesToWrite]);
-      }
-      target += bytesToWrite;
-    }
-    *sourceStart = source;
-    *targetStart = target;
-    return result;
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  /*
-   * Utility routine to tell whether a sequence of bytes is legal UTF-8.
-   * This must be called with the length pre-determined by the first byte.
-   * If not calling this from ConvertUTF8to*, then the length can be set by:
-   *  length = trailingBytesForUTF8[*source]+1;
-   * and the sequence is illegal right away if there aren't that many bytes
-   * available.
-   * If presented with a length > 4, this returns false.  The Unicode
-   * definition of UTF-8 goes up to 4-byte sequences.
-   */
-
-  static bool isLegalUTF8(const UTF8 *source, int length) {
-    UTF8 a;
-    const UTF8 *srcptr = source+length;
-    switch (length) {
-    default: return false;
-      /* Everything else falls through when "true"... */
-    case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
-    case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
-    case 2: if ((a = (*--srcptr)) > 0xBF) return false;
-
-      switch (*source) {
-	/* no fall-through in this inner switch */
-      case 0xE0: if (a < 0xA0) return false; break;
-      case 0xED: if (a > 0x9F) return false; break;
-      case 0xF0: if (a < 0x90) return false; break;
-      case 0xF4: if (a > 0x8F) return false; break;
-      default:   if (a < 0x80) return false;
-      }
-
-    case 1: if (*source >= 0x80 && *source < 0xC2) return false;
-    }
-    if (*source > 0xF4) return false;
-    return true;
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  /*
-   * Exported function to return whether a UTF-8 sequence is legal or not.
-   * This is not used here; it's just exported.
-   */
-  bool isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
-    int length = trailingBytesForUTF8[*source]+1;
-    if (source+length > sourceEnd) {
-      return false;
-    }
-    return isLegalUTF8(source, length);
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  ConversionResult ConvertUTF8toUTF16 (
-				       const UTF8** sourceStart, const UTF8* sourceEnd,
-				       UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF8* source = *sourceStart;
-    UTF16* target = *targetStart;
-    while (source < sourceEnd) {
-      UTF32 ch = 0;
-      unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
-      if (source + extraBytesToRead >= sourceEnd) {
-	result = sourceExhausted; break;
-      }
-      /* Do this check whether lenient or strict */
-      if (! isLegalUTF8(source, extraBytesToRead+1)) {
-	result = sourceIllegal;
-	break;
-      }
-      /*
-       * The cases all fall through. See "Note A" below.
-       */
-      switch (extraBytesToRead) {
-      case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
-      case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
-      case 3: ch += *source++; ch <<= 6;
-      case 2: ch += *source++; ch <<= 6;
-      case 1: ch += *source++; ch <<= 6;
-      case 0: ch += *source++;
-      }
-      ch -= offsetsFromUTF8[extraBytesToRead];
-
-      if (target >= targetEnd) {
-	source -= (extraBytesToRead+1); /* Back up source pointer! */
-	result = targetExhausted; break;
-      }
-      if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
-	/* UTF-16 surrogate values are illegal in UTF-32 */
-	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-	  if (flags == strictConversion) {
-	    source -= (extraBytesToRead+1); /* return to the illegal value itself */
-	    result = sourceIllegal;
-	    break;
-	  } else {
-	    *target++ = UNI_REPLACEMENT_CHAR;
-	  }
-	} else {
-	  *target++ = (UTF16)ch; /* normal case */
-	}
-      } else if (ch > UNI_MAX_UTF16) {
-	if (flags == strictConversion) {
-	  result = sourceIllegal;
-	  source -= (extraBytesToRead+1); /* return to the start */
-	  break; /* Bail out; shouldn't continue */
-	} else {
-	  *target++ = UNI_REPLACEMENT_CHAR;
-	}
-      } else {
-	/* target is a character in range 0xFFFF - 0x10FFFF. */
-	if (target + 1 >= targetEnd) {
-	  source -= (extraBytesToRead+1); /* Back up source pointer! */
-	  result = targetExhausted; break;
-	}
-	ch -= halfBase;
-	*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
-	*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
-      }
-    }
-    *sourceStart = source;
-    *targetStart = target;
-    return result;
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  ConversionResult ConvertUTF32toUTF8 (
-				       const UTF32** sourceStart, const UTF32* sourceEnd,
-				       UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF32* source = *sourceStart;
-    UTF8* target = *targetStart;
-    while (source < sourceEnd) {
-      UTF32 ch;
-      unsigned short bytesToWrite = 0;
-      const UTF32 byteMask = 0xBF;
-      const UTF32 byteMark = 0x80;
-      ch = *source++;
-      if (flags == strictConversion ) {
-	/* UTF-16 surrogate values are illegal in UTF-32 */
-	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-	  --source; /* return to the illegal value itself */
-	  result = sourceIllegal;
-	  break;
-	}
-      }
-      /*
-       * Figure out how many bytes the result will require. Turn any
-       * illegally large UTF32 things (> Plane 17) into replacement chars.
-       */
-      if (ch < (UTF32)0x80) {	     bytesToWrite = 1;
-      } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
-      } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
-      } else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;
-      } else {			    bytesToWrite = 3;
-      ch = UNI_REPLACEMENT_CHAR;
-      result = sourceIllegal;
-      }
-
-      target += bytesToWrite;
-      if (target > targetEnd) {
-	--source; /* Back up source pointer! */
-	target -= bytesToWrite; result = targetExhausted; break;
-      }
-      switch (bytesToWrite) { /* note: everything falls through. */
-      case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
-      case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
-      case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
-      case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
-      }
-      target += bytesToWrite;
-    }
-    *sourceStart = source;
-    *targetStart = target;
-    return result;
-  }
-
-  /* --------------------------------------------------------------------- */
-
-  ConversionResult ConvertUTF8toUTF32 (
-				       const UTF8** sourceStart, const UTF8* sourceEnd,
-				       UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
-    ConversionResult result = conversionOK;
-    const UTF8* source = *sourceStart;
-    UTF32* target = *targetStart;
-    while (source < sourceEnd) {
-      UTF32 ch = 0;
-      unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
-      if (source + extraBytesToRead >= sourceEnd) {
-	result = sourceExhausted; break;
-      }
-      /* Do this check whether lenient or strict */
-      if (! isLegalUTF8(source, extraBytesToRead+1)) {
-	result = sourceIllegal;
-	break;
-      }
-      /*
-       * The cases all fall through. See "Note A" below.
-       */
-      switch (extraBytesToRead) {
-      case 5: ch += *source++; ch <<= 6;
-      case 4: ch += *source++; ch <<= 6;
-      case 3: ch += *source++; ch <<= 6;
-      case 2: ch += *source++; ch <<= 6;
-      case 1: ch += *source++; ch <<= 6;
-      case 0: ch += *source++;
-      }
-      ch -= offsetsFromUTF8[extraBytesToRead];
-
-      if (target >= targetEnd) {
-	source -= (extraBytesToRead+1); /* Back up the source pointer! */
-	result = targetExhausted; break;
-      }
-      if (ch <= UNI_MAX_LEGAL_UTF32) {
-	/*
-	 * UTF-16 surrogate values are illegal in UTF-32, and anything
-	 * over Plane 17 (> 0x10FFFF) is illegal.
-	 */
-	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
-	  if (flags == strictConversion) {
-	    source -= (extraBytesToRead+1); /* return to the illegal value itself */
-	    result = sourceIllegal;
-	    break;
-	  } else {
-	    *target++ = UNI_REPLACEMENT_CHAR;
-	  }
-	} else {
-	  *target++ = ch;
-	}
-      } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
-	result = sourceIllegal;
-	*target++ = UNI_REPLACEMENT_CHAR;
-      }
-    }
-    *sourceStart = source;
-    *targetStart = target;
-    return result;
-  }
-
-  wstring fromUtf8(string const & utf8string)
-  {
-    size_t widesize = utf8string.length();
-    if (sizeof(wchar_t) == 2)
-      {
-	wstring resultstring;
-	resultstring.resize(widesize+1, L'\0');
-	const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
-	const UTF8* sourceend = sourcestart + widesize;
-	UTF16* targetstart = reinterpret_cast<UTF16*>(&resultstring[0]);
-	UTF16* targetend = targetstart + widesize;
-	ConversionResult res = ConvertUTF8toUTF16(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
-	if (res != conversionOK)
-	  {
-	    conversionError();
-	  }
-	*targetstart = 0;
-	return resultstring.substr(0, wcslen(resultstring.c_str()));
-      }
-    else if (sizeof(wchar_t) == 4)
-      {
-	wstring resultstring;
-	resultstring.resize(widesize+1, L'\0');
-	const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
-	const UTF8* sourceend = sourcestart + widesize;
-	UTF32* targetstart = reinterpret_cast<UTF32*>(&resultstring[0]);
-	UTF32* targetend = targetstart + widesize;
-	ConversionResult res = ConvertUTF8toUTF32(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
-	if (res != conversionOK)
-	  {
-	    conversionError();
-	  }
-	*targetstart = 0;
-	return resultstring.substr(0,wcslen(resultstring.c_str()));
-      }
-    else
-      {
-	conversionError();
-      }
-    return L"";
-  }
-
-  string toUtf8(wstring const &widestring)
-  {
-    size_t widesize = widestring.length();
-
-    if (sizeof(wchar_t) == 2)
-      {
-	size_t utf8size = 3 * widesize + 1;
-	string resultstring;
-	resultstring.resize(utf8size, '\0');
-	const UTF16* sourcestart = reinterpret_cast<const UTF16*>(widestring.c_str());
-	const UTF16* sourceend = sourcestart + widesize;
-	UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]);
-	UTF8* targetend = targetstart + utf8size;
-	ConversionResult res = ConvertUTF16toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
-	if (res != conversionOK)
-	  {
-	    conversionError();
-	  }
-	*targetstart = 0;
-	return resultstring.substr(0, strlen(resultstring.c_str()));
-      }
-    else if (sizeof(wchar_t) == 4)
-      {
-	size_t utf8size = 4 * widesize + 1;
-	string resultstring;
-	resultstring.resize(utf8size, '\0');
-	const UTF32* sourcestart = reinterpret_cast<const UTF32*>(widestring.c_str());
-	const UTF32* sourceend = reinterpret_cast<const UTF32*>(widestring.c_str() + widesize);
-	UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]);
-	UTF8* targetend = targetstart + utf8size;
-	ConversionResult res = ConvertUTF32toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
-	if (res != conversionOK)
-	  {
-	    conversionError();
-	  }
-	*targetstart = 0;
-	return resultstring.substr(0, strlen(resultstring.c_str()));
-      }
-    else
-      {
-	conversionError();
-      }
-    return "";
-  }
-}
diff --git a/apertium/utf_converter.h b/apertium/utf_converter.h
deleted file mode 100644
index 5e1f5b3..0000000
--- a/apertium/utf_converter.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation; either version 2 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <https://www.gnu.org/licenses/>.
- */
-#ifndef _UTFCONVERTER_
-#define _UTFCONVERTER_
-
-#include <string>
-
-using namespace std;
-
-namespace UtfConverter
-{
-    wstring fromUtf8(string const &utf8string);
-    string toUtf8(wstring const &widestring);
-}
-
-#endif
diff --git a/apertium/xml_reader.h b/apertium/xml_reader.h
index 3ad28c9..a1c7028 100644
--- a/apertium/xml_reader.h
+++ b/apertium/xml_reader.h
@@ -5,7 +5,6 @@
 #include <apertium/string_utils.h>
 #include <apertium/tagger_data.h>
 #include <apertium/ttag.h>
-#include <apertium/utf_converter.h>
 #include <lttoolbox/pattern_list.h>
 #include <lttoolbox/xml_parse_util.h>
 
diff --git a/configure.ac b/configure.ac
index 7965f13..b71d6dd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -80,6 +80,7 @@ AC_LANG(C++)
 AC_HEADER_STDC
 AC_CHECK_HEADERS([stdlib.h string.h unistd.h stddef.h filesystem string_view])
 AC_CHECK_LIB([stdc++fs], [_ZNSt12experimental10filesystem2v112current_pathEv])
+AC_CHECK_HEADER([utf8.h], [], [AC_MSG_ERROR([You don't have utfcpp installed.])])
 
 AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, fputc_unlocked, fputs_unlocked, getopt, getopt_long])
 AC_CHECK_FUNCS([setlocale strdup getopt snprintf])
diff --git a/python/setup.py.in b/python/setup.py.in
index 10d57cf..b5ed70a 100644
--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -23,7 +23,7 @@ def get_sources():
                   # interchunk.cc postchunk.cc transfer.cc
                   'apertium_re.cc', 'interchunk.cc', 'interchunk_word.cc', 'postchunk.cc', 'string_utils.cc', 'transfer.cc',
                   'transfer_data.cc', 'transfer_instr.cc', 'transfer_mult.cc', 'transfer_token.cc', 'transfer_word.cc',
-                  'trx_reader.cc', 'utf_converter.cc', 'xml_reader.cc',
+                  'trx_reader.cc', 'xml_reader.cc',
                   # 'pretransfer.cc'
                   'pretransfer.cc',
                   # tagger.cc
diff --git a/tests/tagger/test_find_similar_ambiguity_classes.cc b/tests/tagger/test_find_similar_ambiguity_classes.cc
index 8178788..938ac3c 100644
--- a/tests/tagger/test_find_similar_ambiguity_classes.cc
+++ b/tests/tagger/test_find_similar_ambiguity_classes.cc
@@ -1,5 +1,4 @@
 #include <lttoolbox/ustring.h>
-#include "apertium/utf_converter.h"
 #include "apertium/tagger_utils.h"
 #include "apertium/tagger_data_hmm.h"
 #include "apertium/tagger_data.h"
diff --git a/utf8/utf8.h b/utf8/utf8.h
deleted file mode 100644
index 82b13f5..0000000
--- a/utf8/utf8.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include "utf8/checked.h"
-#include "utf8/unchecked.h"
-
-#endif // header guard
diff --git a/utf8/utf8/checked.h b/utf8/utf8/checked.h
deleted file mode 100644
index 1331155..0000000
--- a/utf8/utf8/checked.h
+++ /dev/null
@@ -1,327 +0,0 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include "core.h"
-#include <stdexcept>
-
-namespace utf8
-{
-    // Base for the exceptions that may be thrown from the library
-    class exception : public ::std::exception {
-    };
-
-    // Exceptions that may be thrown from the library functions.
-    class invalid_code_point : public exception {
-        uint32_t cp;
-    public:
-        invalid_code_point(uint32_t cp) : cp(cp) {}
-        virtual const char* what() const throw() { return "Invalid code point"; }
-        uint32_t code_point() const {return cp;}
-    };
-
-    class invalid_utf8 : public exception {
-        uint8_t u8;
-    public:
-        invalid_utf8 (uint8_t u) : u8(u) {}
-        virtual const char* what() const throw() { return "Invalid UTF-8"; }
-        uint8_t utf8_octet() const {return u8;}
-    };
-
-    class invalid_utf16 : public exception {
-        uint16_t u16;
-    public:
-        invalid_utf16 (uint16_t u) : u16(u) {}
-        virtual const char* what() const throw() { return "Invalid UTF-16"; }
-        uint16_t utf16_word() const {return u16;}
-    };
-
-    class not_enough_room : public exception {
-    public:
-        virtual const char* what() const throw() { return "Not enough space"; }
-    };
-
-    /// The library API - functions intended to be called by the users
-
-    template <typename octet_iterator>
-    octet_iterator append(uint32_t cp, octet_iterator result)
-    {
-        if (!utf8::internal::is_code_point_valid(cp))
-            throw invalid_code_point(cp);
-
-        if (cp < 0x80)                        // one octet
-            *(result++) = static_cast<uint8_t>(cp);
-        else if (cp < 0x800) {                // two octets
-            *(result++) = static_cast<uint8_t>((cp >> 6)            | 0xc0);
-            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
-        }
-        else if (cp < 0x10000) {              // three octets
-            *(result++) = static_cast<uint8_t>((cp >> 12)           | 0xe0);
-            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
-            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
-        }
-        else {                                // four octets
-            *(result++) = static_cast<uint8_t>((cp >> 18)           | 0xf0);
-            *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)  | 0x80);
-            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
-            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
-        }
-        return result;
-    }
-
-    template <typename octet_iterator, typename output_iterator>
-    output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
-    {
-        while (start != end) {
-            octet_iterator sequence_start = start;
-            internal::utf_error err_code = utf8::internal::validate_next(start, end);
-            switch (err_code) {
-                case internal::UTF8_OK :
-                    for (octet_iterator it = sequence_start; it != start; ++it)
-                        *out++ = *it;
-                    break;
-                case internal::NOT_ENOUGH_ROOM:
-                    throw not_enough_room();
-                case internal::INVALID_LEAD:
-                    out = utf8::append (replacement, out);
-                    ++start;
-                    break;
-                case internal::INCOMPLETE_SEQUENCE:
-                case internal::OVERLONG_SEQUENCE:
-                case internal::INVALID_CODE_POINT:
-                    out = utf8::append (replacement, out);
-                    ++start;
-                    // just one replacement mark for the sequence
-                    while (start != end && utf8::internal::is_trail(*start))
-                        ++start;
-                    break;
-            }
-        }
-        return out;
-    }
-
-    template <typename octet_iterator, typename output_iterator>
-    inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
-    {
-        static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
-        return utf8::replace_invalid(start, end, out, replacement_marker);
-    }
-
-    template <typename octet_iterator>
-    uint32_t next(octet_iterator& it, octet_iterator end)
-    {
-        uint32_t cp = 0;
-        internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
-        switch (err_code) {
-            case internal::UTF8_OK :
-                break;
-            case internal::NOT_ENOUGH_ROOM :
-                throw not_enough_room();
-            case internal::INVALID_LEAD :
-            case internal::INCOMPLETE_SEQUENCE :
-            case internal::OVERLONG_SEQUENCE :
-                throw invalid_utf8(*it);
-            case internal::INVALID_CODE_POINT :
-                throw invalid_code_point(cp);
-        }
-        return cp;
-    }
-
-    template <typename octet_iterator>
-    uint32_t peek_next(octet_iterator it, octet_iterator end)
-    {
-        return utf8::next(it, end);
-    }
-
-    template <typename octet_iterator>
-    uint32_t prior(octet_iterator& it, octet_iterator start)
-    {
-        // can't do much if it == start
-        if (it == start)
-            throw not_enough_room();
-
-        octet_iterator end = it;
-        // Go back until we hit either a lead octet or start
-        while (utf8::internal::is_trail(*(--it)))
-            if (it == start)
-                throw invalid_utf8(*it); // error - no lead byte in the sequence
-        return utf8::peek_next(it, end);
-    }
-
-    /// Deprecated in versions that include "prior"
-    template <typename octet_iterator>
-    uint32_t previous(octet_iterator& it, octet_iterator pass_start)
-    {
-        octet_iterator end = it;
-        while (utf8::internal::is_trail(*(--it)))
-            if (it == pass_start)
-                throw invalid_utf8(*it); // error - no lead byte in the sequence
-        octet_iterator temp = it;
-        return utf8::next(temp, end);
-    }
-
-    template <typename octet_iterator, typename distance_type>
-    void advance (octet_iterator& it, distance_type n, octet_iterator end)
-    {
-        for (distance_type i = 0; i < n; ++i)
-            utf8::next(it, end);
-    }
-
-    template <typename octet_iterator>
-    typename std::iterator_traits<octet_iterator>::difference_type
-    distance (octet_iterator first, octet_iterator last)
-    {
-        typename std::iterator_traits<octet_iterator>::difference_type dist;
-        for (dist = 0; first < last; ++dist)
-            utf8::next(first, last);
-        return dist;
-    }
-
-    template <typename u16bit_iterator, typename octet_iterator>
-    octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
-    {
-        while (start != end) {
-            uint32_t cp = utf8::internal::mask16(*start++);
-            // Take care of surrogate pairs first
-            if (utf8::internal::is_lead_surrogate(cp)) {
-                if (start != end) {
-                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
-                    if (utf8::internal::is_trail_surrogate(trail_surrogate))
-                        cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
-                    else
-                        throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
-                }
-                else
-                    throw invalid_utf16(static_cast<uint16_t>(cp));
-
-            }
-            // Lone trail surrogate
-            else if (utf8::internal::is_trail_surrogate(cp))
-                throw invalid_utf16(static_cast<uint16_t>(cp));
-
-            result = utf8::append(cp, result);
-        }
-        return result;
-    }
-
-    template <typename u16bit_iterator, typename octet_iterator>
-    u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
-    {
-        while (start != end) {
-            uint32_t cp = utf8::next(start, end);
-            if (cp > 0xffff) { //make a surrogate pair
-                *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
-                *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
-            }
-            else
-                *result++ = static_cast<uint16_t>(cp);
-        }
-        return result;
-    }
-
-    template <typename octet_iterator, typename u32bit_iterator>
-    octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
-    {
-        while (start != end)
-            result = utf8::append(*(start++), result);
-
-        return result;
-    }
-
-    template <typename octet_iterator, typename u32bit_iterator>
-    u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
-    {
-        while (start != end)
-            (*result++) = utf8::next(start, end);
-
-        return result;
-    }
-
-    // The iterator class
-    template <typename octet_iterator>
-    class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
-      octet_iterator it;
-      octet_iterator range_start;
-      octet_iterator range_end;
-      public:
-      iterator () {}
-      explicit iterator (const octet_iterator& octet_it,
-                         const octet_iterator& range_start,
-                         const octet_iterator& range_end) :
-               it(octet_it), range_start(range_start), range_end(range_end)
-      {
-          if (it < range_start || it > range_end)
-              throw std::out_of_range("Invalid utf-8 iterator position");
-      }
-      // the default "big three" are OK
-      octet_iterator base () const { return it; }
-      uint32_t operator * () const
-      {
-          octet_iterator temp = it;
-          return utf8::next(temp, range_end);
-      }
-      bool operator == (const iterator& rhs) const
-      {
-          if (range_start != rhs.range_start || range_end != rhs.range_end)
-              throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
-          return (it == rhs.it);
-      }
-      bool operator != (const iterator& rhs) const
-      {
-          return !(operator == (rhs));
-      }
-      iterator& operator ++ ()
-      {
-          utf8::next(it, range_end);
-          return *this;
-      }
-      iterator operator ++ (int)
-      {
-          iterator temp = *this;
-          utf8::next(it, range_end);
-          return temp;
-      }
-      iterator& operator -- ()
-      {
-          utf8::prior(it, range_start);
-          return *this;
-      }
-      iterator operator -- (int)
-      {
-          iterator temp = *this;
-          utf8::prior(it, range_start);
-          return temp;
-      }
-    }; // class iterator
-
-} // namespace utf8
-
-#endif //header guard
-
-
diff --git a/utf8/utf8/core.h b/utf8/utf8/core.h
deleted file mode 100644
index f85081f..0000000
--- a/utf8/utf8/core.h
+++ /dev/null
@@ -1,329 +0,0 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include <iterator>
-
-namespace utf8
-{
-    // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
-    // You may need to change them to match your system.
-    // These typedefs have the same names as ones from cstdint, or boost/cstdint
-    typedef unsigned char   uint8_t;
-    typedef unsigned short  uint16_t;
-    typedef unsigned int    uint32_t;
-
-// Helper code - not intended to be directly called by the library users. May be changed at any time
-namespace internal
-{
-    // Unicode constants
-    // Leading (high) surrogates: 0xd800 - 0xdbff
-    // Trailing (low) surrogates: 0xdc00 - 0xdfff
-    const uint16_t LEAD_SURROGATE_MIN  = 0xd800u;
-    const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu;
-    const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
-    const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
-    const uint16_t LEAD_OFFSET         = LEAD_SURROGATE_MIN - (0x10000 >> 10);
-    const uint32_t SURROGATE_OFFSET    = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
-
-    // Maximum valid value for a Unicode code point
-    const uint32_t CODE_POINT_MAX      = 0x0010ffffu;
-
-    template<typename octet_type>
-    inline uint8_t mask8(octet_type oc)
-    {
-        return static_cast<uint8_t>(0xff & oc);
-    }
-    template<typename u16_type>
-    inline uint16_t mask16(u16_type oc)
-    {
-        return static_cast<uint16_t>(0xffff & oc);
-    }
-    template<typename octet_type>
-    inline bool is_trail(octet_type oc)
-    {
-        return ((utf8::internal::mask8(oc) >> 6) == 0x2);
-    }
-
-    template <typename u16>
-    inline bool is_lead_surrogate(u16 cp)
-    {
-        return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
-    }
-
-    template <typename u16>
-    inline bool is_trail_surrogate(u16 cp)
-    {
-        return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
-    }
-
-    template <typename u16>
-    inline bool is_surrogate(u16 cp)
-    {
-        return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
-    }
-
-    template <typename u32>
-    inline bool is_code_point_valid(u32 cp)
-    {
-        return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
-    }
-
-    template <typename octet_iterator>
-    inline typename std::iterator_traits<octet_iterator>::difference_type
-    sequence_length(octet_iterator lead_it)
-    {
-        uint8_t lead = utf8::internal::mask8(*lead_it);
-        if (lead < 0x80)
-            return 1;
-        else if ((lead >> 5) == 0x6)
-            return 2;
-        else if ((lead >> 4) == 0xe)
-            return 3;
-        else if ((lead >> 3) == 0x1e)
-            return 4;
-        else
-            return 0;
-    }
-
-    template <typename octet_difference_type>
-    inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
-    {
-        if (cp < 0x80) {
-            if (length != 1)
-                return true;
-        }
-        else if (cp < 0x800) {
-            if (length != 2)
-                return true;
-        }
-        else if (cp < 0x10000) {
-            if (length != 3)
-                return true;
-        }
-
-        return false;
-    }
-
-    enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
-
-    /// Helper for get_sequence_x
-    template <typename octet_iterator>
-    utf_error increase_safely(octet_iterator& it, octet_iterator end)
-    {
-        if (++it == end)
-            return NOT_ENOUGH_ROOM;
-
-        if (!utf8::internal::is_trail(*it))
-            return INCOMPLETE_SEQUENCE;
-
-        return UTF8_OK;
-    }
-
-    #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
-
-    /// get_sequence_x functions decode utf-8 sequences of the length x
-    template <typename octet_iterator>
-    utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-            return NOT_ENOUGH_ROOM;
-
-        code_point = utf8::internal::mask8(*it);
-
-        return UTF8_OK;
-    }
-
-    template <typename octet_iterator>
-    utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-            return NOT_ENOUGH_ROOM;
-
-        code_point = utf8::internal::mask8(*it);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
-
-        return UTF8_OK;
-    }
-
-    template <typename octet_iterator>
-    utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-            return NOT_ENOUGH_ROOM;
-
-        code_point = utf8::internal::mask8(*it);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point += (*it) & 0x3f;
-
-        return UTF8_OK;
-    }
-
-    template <typename octet_iterator>
-    utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        if (it == end)
-           return NOT_ENOUGH_ROOM;
-
-        code_point = utf8::internal::mask8(*it);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
-
-        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
-
-        code_point += (*it) & 0x3f;
-
-        return UTF8_OK;
-    }
-
-    #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
-
-    template <typename octet_iterator>
-    utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
-    {
-        // Save the original value of it so we can go back in case of failure
-        // Of course, it does not make much sense with i.e. stream iterators
-        octet_iterator original_it = it;
-
-        uint32_t cp = 0;
-        // Determine the sequence length based on the lead octet
-        typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
-        const octet_difference_type length = utf8::internal::sequence_length(it);
-
-        // Get trail octets and calculate the code point
-        utf_error err = UTF8_OK;
-        switch (length) {
-            case 0:
-                return INVALID_LEAD;
-            case 1:
-                err = utf8::internal::get_sequence_1(it, end, cp);
-                break;
-            case 2:
-                err = utf8::internal::get_sequence_2(it, end, cp);
-            break;
-            case 3:
-                err = utf8::internal::get_sequence_3(it, end, cp);
-            break;
-            case 4:
-                err = utf8::internal::get_sequence_4(it, end, cp);
-            break;
-        }
-
-        if (err == UTF8_OK) {
-            // Decoding succeeded. Now, security checks...
-            if (utf8::internal::is_code_point_valid(cp)) {
-                if (!utf8::internal::is_overlong_sequence(cp, length)){
-                    // Passed! Return here.
-                    code_point = cp;
-                    ++it;
-                    return UTF8_OK;
-                }
-                else
-                    err = OVERLONG_SEQUENCE;
-            }
-            else
-                err = INVALID_CODE_POINT;
-        }
-
-        // Failure branch - restore the original value of the iterator
-        it = original_it;
-        return err;
-    }
-
-    template <typename octet_iterator>
-    inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
-        uint32_t ignored;
-        return utf8::internal::validate_next(it, end, ignored);
-    }
-
-} // namespace internal
-
-    /// The library API - functions intended to be called by the users
-
-    // Byte order mark
-    const uint8_t bom[] = {0xef, 0xbb, 0xbf};
-
-    template <typename octet_iterator>
-    octet_iterator find_invalid(octet_iterator start, octet_iterator end)
-    {
-        octet_iterator result = start;
-        while (result != end) {
-            utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
-            if (err_code != internal::UTF8_OK)
-                return result;
-        }
-        return result;
-    }
-
-    template <typename octet_iterator>
-    inline bool is_valid(octet_iterator start, octet_iterator end)
-    {
-        return (utf8::find_invalid(start, end) == end);
-    }
-
-    template <typename octet_iterator>
-    inline bool starts_with_bom (octet_iterator it, octet_iterator end)
-    {
-        return (
-            ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
-            ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
-            ((it != end) && (utf8::internal::mask8(*it))   == bom[2])
-           );
-    }
-
-    //Deprecated in release 2.3
-    template <typename octet_iterator>
-    inline bool is_bom (octet_iterator it)
-    {
-        return (
-            (utf8::internal::mask8(*it++)) == bom[0] &&
-            (utf8::internal::mask8(*it++)) == bom[1] &&
-            (utf8::internal::mask8(*it))   == bom[2]
-           );
-    }
-} // namespace utf8
-
-#endif // header guard
-
-
diff --git a/utf8/utf8/unchecked.h b/utf8/utf8/unchecked.h
deleted file mode 100644
index 989ccef..0000000
--- a/utf8/utf8/unchecked.h
+++ /dev/null
@@ -1,228 +0,0 @@
-// Copyright 2006 Nemanja Trifunovic
-
-/*
-Permission is hereby granted, free of charge, to any person or organization
-obtaining a copy of the software and accompanying documentation covered by
-this license (the "Software") to use, reproduce, display, distribute,
-execute, and transmit the Software, and to prepare derivative works of the
-Software, and to permit third-parties to whom the Software is furnished to
-do so, all subject to the following:
-
-The copyright notices in the Software and this entire statement, including
-the above license grant, this restriction and the following disclaimer,
-must be included in all copies of the Software, in whole or in part, and
-all derivative works of the Software, unless such copies or derivative
-works are solely in the form of machine-executable object code generated by
-a source language processor.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
-SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
-FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
-ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
-*/
-
-
-#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
-
-#include "core.h"
-
-namespace utf8
-{
-    namespace unchecked
-    {
-        template <typename octet_iterator>
-        octet_iterator append(uint32_t cp, octet_iterator result)
-        {
-            if (cp < 0x80)                        // one octet
-                *(result++) = static_cast<uint8_t>(cp);
-            else if (cp < 0x800) {                // two octets
-                *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0);
-                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
-            }
-            else if (cp < 0x10000) {              // three octets
-                *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0);
-                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
-                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
-            }
-            else {                                // four octets
-                *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0);
-                *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
-                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
-                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
-            }
-            return result;
-        }
-
-        template <typename octet_iterator>
-        uint32_t next(octet_iterator& it)
-        {
-            uint32_t cp = utf8::internal::mask8(*it);
-            typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
-            switch (length) {
-                case 1:
-                    break;
-                case 2:
-                    it++;
-                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
-                    break;
-                case 3:
-                    ++it;
-                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
-                    ++it;
-                    cp += (*it) & 0x3f;
-                    break;
-                case 4:
-                    ++it;
-                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
-                    ++it;
-                    cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
-                    ++it;
-                    cp += (*it) & 0x3f;
-                    break;
-            }
-            ++it;
-            return cp;
-        }
-
-        template <typename octet_iterator>
-        uint32_t peek_next(octet_iterator it)
-        {
-            return utf8::unchecked::next(it);
-        }
-
-        template <typename octet_iterator>
-        uint32_t prior(octet_iterator& it)
-        {
-            while (utf8::internal::is_trail(*(--it))) ;
-            octet_iterator temp = it;
-            return utf8::unchecked::next(temp);
-        }
-
-        // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
-        template <typename octet_iterator>
-        inline uint32_t previous(octet_iterator& it)
-        {
-            return utf8::unchecked::prior(it);
-        }
-
-        template <typename octet_iterator, typename distance_type>
-        void advance (octet_iterator& it, distance_type n)
-        {
-            for (distance_type i = 0; i < n; ++i)
-                utf8::unchecked::next(it);
-        }
-
-        template <typename octet_iterator>
-        typename std::iterator_traits<octet_iterator>::difference_type
-        distance (octet_iterator first, octet_iterator last)
-        {
-            typename std::iterator_traits<octet_iterator>::difference_type dist;
-            for (dist = 0; first < last; ++dist)
-                utf8::unchecked::next(first);
-            return dist;
-        }
-
-        template <typename u16bit_iterator, typename octet_iterator>
-        octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
-        {
-            while (start != end) {
-                uint32_t cp = utf8::internal::mask16(*start++);
-            // Take care of surrogate pairs first
-                if (utf8::internal::is_lead_surrogate(cp)) {
-                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
-                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
-                }
-                result = utf8::unchecked::append(cp, result);
-            }
-            return result;
-        }
-
-        template <typename u16bit_iterator, typename octet_iterator>
-        u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
-        {
-            while (start < end) {
-                uint32_t cp = utf8::unchecked::next(start);
-                if (cp > 0xffff) { //make a surrogate pair
-                    *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
-                    *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
-                }
-                else
-                    *result++ = static_cast<uint16_t>(cp);
-            }
-            return result;
-        }
-
-        template <typename octet_iterator, typename u32bit_iterator>
-        octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
-        {
-            while (start != end)
-                result = utf8::unchecked::append(*(start++), result);
-
-            return result;
-        }
-
-        template <typename octet_iterator, typename u32bit_iterator>
-        u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
-        {
-            while (start < end)
-                (*result++) = utf8::unchecked::next(start);
-
-            return result;
-        }
-
-        // The iterator class
-        template <typename octet_iterator>
-          class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
-            octet_iterator it;
-            public:
-            iterator () {}
-            explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
-            // the default "big three" are OK
-            octet_iterator base () const { return it; }
-            uint32_t operator * () const
-            {
-                octet_iterator temp = it;
-                return utf8::unchecked::next(temp);
-            }
-            bool operator == (const iterator& rhs) const
-            {
-                return (it == rhs.it);
-            }
-            bool operator != (const iterator& rhs) const
-            {
-                return !(operator == (rhs));
-            }
-            iterator& operator ++ ()
-            {
-                ::std::advance(it, utf8::internal::sequence_length(it));
-                return *this;
-            }
-            iterator operator ++ (int)
-            {
-                iterator temp = *this;
-                ::std::advance(it, utf8::internal::sequence_length(it));
-                return temp;
-            }
-            iterator& operator -- ()
-            {
-                utf8::unchecked::prior(it);
-                return *this;
-            }
-            iterator operator -- (int)
-            {
-                iterator temp = *this;
-                utf8::unchecked::prior(it);
-                return temp;
-            }
-          }; // class iterator
-
-    } // namespace utf8::unchecked
-} // namespace utf8
-
-
-#endif // header guard
-
diff --git a/utf8/utf8_fwrap.h b/utf8/utf8_fwrap.h
deleted file mode 100644
index 5d41b6b..0000000
--- a/utf8/utf8_fwrap.h
+++ /dev/null
@@ -1,140 +0,0 @@
-#ifndef _UTF8_FWRAP_HPP
-#define _UTF8_FWRAP_HPP
-
-#include <utf8.h>
-#include <string>
-#include <iterator>
-#include <stdexcept>
-#include <cstdio>
-#include <cwchar>
-#include <stdint.h>
-
-#ifdef _WIN32
-	#define utf32to8 utf16to8
-#endif
-
-inline wint_t fgetwc_u8(FILE *in) {
-#ifdef _WIN32
-	struct _cps {
-		FILE *f = 0;
-		wchar_t c = 0;
-	};
-	static _cps cps[4];
-
-	for (auto& cp : cps) {
-		if (cp.f == in) {
-			cp.f = 0;
-			return cp.c;
-		}
-	}
-#endif
-
-	int32_t rv = 0;
-	int c = 0, i = 0;
-	char buf[4];
-	if ((c = fgetc_unlocked(in)) != EOF) {
-		buf[i++] = static_cast<char>(c);
-		if ((c & 0xF0) == 0xF0) {
-			if (fread_unlocked(buf+i, 1, 3, in) != 3) {
-				throw std::runtime_error("Could not read 3 expected bytes from stream");
-			}
-			i += 3;
-		}
-		else if ((c & 0xE0) == 0xE0) {
-			if (fread_unlocked(buf+i, 1, 2, in) != 2) {
-				throw std::runtime_error("Could not read 2 expected bytes from stream");
-			}
-			i += 2;
-		}
-		else if ((c & 0xC0) == 0xC0) {
-			if (fread_unlocked(buf+i, 1, 1, in) != 1) {
-				throw std::runtime_error("Could not read 1 expected byte from stream");
-			}
-			i += 1;
-		}
-	}
-	if (i == 0 && c == EOF) {
-		rv = WEOF;
-	}
-	else {
-#ifdef _WIN32
-		wchar_t u16[2] = {};
-		utf8::unchecked::utf8to16(buf, buf+i, u16);
-
-		if (u16[1]) {
-			for (auto& cp : cps) {
-				if (cp.f == 0) {
-					cp.f = in;
-					cp.c = u16[1];
-					return u16[0];
-				}
-			}
-			throw std::runtime_error("Not enough space to store UTF-16 high surrogate");
-		}
-		rv = u16[0];
-#else
-		utf8::unchecked::utf8to32(buf, buf+i, &rv);
-#endif
-	}
-	return static_cast<wint_t>(rv);
-}
-
-inline wint_t fputwc_u8(wint_t wc, FILE *out) {
-	char buf[4] = {};
-	char *e = utf8::unchecked::utf32to8(&wc, &wc+1, buf);
-	if (fwrite_unlocked(buf, 1, e-buf, out) != static_cast<size_t>(e-buf)) {
-		return WEOF;
-	}
-
-	return wc;
-}
-
-inline int fputws_u8(const wchar_t* str, FILE *out) {
-	static std::string buf;
-	buf.clear();
-	size_t len = wcslen(str);
-	utf8::unchecked::utf32to8(str, str+len, std::back_inserter(buf));
-	if (fwrite_unlocked(&buf[0], 1, buf.size(), out) != buf.size()) {
-		return WEOF;
-	}
-
-	return 1;
-}
-
-inline wint_t ungetwc_u8(wint_t wc, FILE *out) {
-	char buf[4] = {};
-	char *e = utf8::unchecked::utf32to8(&wc, &wc+1, buf);
-	for (char *b = buf ; b != e ; ++b) {
-		if (ungetc(*b, out) == EOF) {
-			return WEOF;
-		}
-	}
-
-	return wc;
-}
-
-#ifdef fgetwc_unlocked
-	#undef fgetwc_unlocked
-#endif
-#define fgetwc_unlocked fgetwc_u8
-
-#ifdef fputwc_unlocked
-	#undef fputwc_unlocked
-#endif
-#define fputwc_unlocked fputwc_u8
-
-#ifdef fputws_unlocked
-	#undef fputws_unlocked
-#endif
-#define fputws_unlocked fputws_u8
-
-#ifdef ungetwc
-	#undef ungetwc
-#endif
-#define ungetwc ungetwc_u8
-
-#ifdef _WIN32
-	#undef utf32to8
-#endif
-
-#endif