commit 3a293af7262f649d268aec4be95c707ae70a6a9a
Author: Daniel Swanson <popcorn.tomato.dude@gmail.com>
Date:   Fri Jun 4 11:48:57 2021 -0500

    extracting string constants

diff --git a/lttoolbox/att_compiler.cc b/lttoolbox/att_compiler.cc
index 12509f5..31d2865 100644
--- a/lttoolbox/att_compiler.cc
+++ b/lttoolbox/att_compiler.cc
@@ -55,13 +55,14 @@ AttCompiler::clear()
 void
 AttCompiler::convert_hfst(UString& symbol)
 {
-  if (symbol == "@0@"_u || symbol == "ε"_u)
-  {
+  if (symbol == Transducer::HFST_EPSILON_SYMBOL_SHORT ||
+      symbol == Transducer::HFST_EPSILON_SYMBOL_LONG ||
+      symbol == Transducer::LTTB_EPSILON_SYMBOL) {
     symbol.clear();
-  }
-  else if (symbol == "@_SPACE_@"_u)
-  {
+  } else if (symbol == Transducer::HFST_SPACE_SYMBOL) {
     symbol = " "_u;
+  } else if (symbol == Transducer::HFST_TAB_SYMBOL) {
+    symbol = "\t"_u;
   }
 }
 
diff --git a/lttoolbox/compiler.cc b/lttoolbox/compiler.cc
index b016d84..851c68b 100644
--- a/lttoolbox/compiler.cc
+++ b/lttoolbox/compiler.cc
@@ -464,10 +464,9 @@ Compiler::skip(UString &name, UString const &elem, bool open)
 }
 
 EntryToken
-Compiler::procIdentity(UString const &wsweight, bool ig)
+Compiler::procIdentity(double const entry_weight, bool ig)
 {
   vector<int> both_sides;
-  double entry_weight = stod(wsweight);
 
   if(!xmlTextReaderIsEmptyElement(reader))
   {
@@ -507,10 +506,9 @@ Compiler::procIdentity(UString const &wsweight, bool ig)
 }
 
 EntryToken
-Compiler::procTransduction(UString const &wsweight)
+Compiler::procTransduction(double const entry_weight)
 {
   vector<int> lhs, rhs;
-  double entry_weight = stod(wsweight);
   UString name;
 
   skip(name, COMPILER_LEFT_ELEM);
@@ -718,7 +716,7 @@ Compiler::procSection()
     requireAttribute(type, COMPILER_TYPE_ATTR, COMPILER_SECTION_ELEM);
 
     current_section = id;
-    current_section += "@"_u;
+    current_section += '@';
     current_section.append(type);
   }
   else
@@ -758,9 +756,10 @@ Compiler::procEntry()
     return;
   }
 
-  if(wsweight.empty())
+  double weight = 0.0;
+  if(!wsweight.empty())
   {
-    wsweight = "0.0000"_u;
+    weight = stod(wsweight);
   }
 
   vector<EntryToken> elements;
@@ -785,15 +784,15 @@ Compiler::procEntry()
     int type = xmlTextReaderNodeType(reader);
     if(name == COMPILER_PAIR_ELEM)
     {
-      elements.push_back(procTransduction(wsweight));
+      elements.push_back(procTransduction(weight));
     }
     else if(name == COMPILER_IDENTITY_ELEM)
     {
-      elements.push_back(procIdentity(wsweight, false));
+      elements.push_back(procIdentity(weight, false));
     }
     else if(name == COMPILER_IDENTITYGROUP_ELEM)
     {
-      elements.push_back(procIdentity(wsweight, true));
+      elements.push_back(procIdentity(weight, true));
     }
     else if(name == COMPILER_REGEXP_ELEM)
     {
diff --git a/lttoolbox/compiler.h b/lttoolbox/compiler.h
index 5ad073d..ad18f69 100644
--- a/lttoolbox/compiler.h
+++ b/lttoolbox/compiler.h
@@ -222,13 +222,13 @@ private:
    * Parse the &lt;p&gt; element
    * @return a list of tokens from the dictionary's entry
    */
-  EntryToken procTransduction(UString const &wsweight);
+  EntryToken procTransduction(double const entry_weight);
 
   /**
    * Parse the &lt;i&gt; element
    * @return a list of tokens from the dictionary's entry
    */
-  EntryToken procIdentity(UString const &wsweight, bool ig = false);
+  EntryToken procIdentity(double const entry_weight, bool ig = false);
 
   /**
    * Parse the &lt;par&gt; element
diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc
index 90a1212..0bc6cc6 100644
--- a/lttoolbox/fst_processor.cc
+++ b/lttoolbox/fst_processor.cc
@@ -27,6 +27,18 @@
 using namespace std;
 
 
+UString const FSTProcessor::XML_TEXT_NODE           = "#text"_u;
+UString const FSTProcessor::XML_COMMENT_NODE        = "#comment"_u;
+UString const FSTProcessor::XML_IGNORED_CHARS_ELEM  = "ignored-chars"_u;
+UString const FSTProcessor::XML_RESTORE_CHAR_ELEM   = "restore-char"_u;
+UString const FSTProcessor::XML_RESTORE_CHARS_ELEM  = "restore-chars"_u;
+UString const FSTProcessor::XML_VALUE_ATTR          = "value"_u;
+UString const FSTProcessor::XML_CHAR_ELEM           = "char"_u;
+UString const FSTProcessor::WBLANK_START            = "[["_u;
+UString const FSTProcessor::WBLANK_END              = "]]"_u;
+UString const FSTProcessor::WBLANK_FINAL            = "[[/]]"_u;
+
+
 FSTProcessor::FSTProcessor() :
 default_weight(0.0000),
 outOfWord(false),
@@ -123,19 +135,19 @@ void
 FSTProcessor::procNodeICX()
 {
   UString name = XMLParseUtil::readName(reader);
-  if(name == "#text"_u)
+  if(name == XML_TEXT_NODE)
   {
     /* ignore */
   }
-  else if(name == "ignored-chars"_u)
+  else if(name == XML_IGNORED_CHARS_ELEM)
   {
     /* ignore */
   }
-  else if(name == "char"_u)
+  else if(name == XML_CHAR_ELEM)
   {
-    ignored_chars.insert(static_cast<int32_t>(XMLParseUtil::attrib(reader, "value"_u)[0]));
+    ignored_chars.insert(static_cast<int32_t>(XMLParseUtil::attrib(reader, XML_VALUE_ATTR)[0]));
   }
-  else if(name == "#comment"_u)
+  else if(name == XML_COMMENT_NODE)
   {
     /* ignore */
   }
@@ -157,23 +169,23 @@ void
 FSTProcessor::procNodeRCX()
 {
   UString name = XMLParseUtil::readName(reader);
-  if(name == "#text"_u)
+  if(name == XML_TEXT_NODE)
   {
     /* ignore */
   }
-  else if(name == "restore-chars"_u)
+  else if(name == XML_RESTORE_CHARS_ELEM)
   {
     /* ignore */
   }
-  else if(name == "char"_u)
+  else if(name == XML_CHAR_ELEM)
   {
-    rcx_current_char = static_cast<int32_t>(XMLParseUtil::attrib(reader, "value"_u)[0]);
+    rcx_current_char = static_cast<int32_t>(XMLParseUtil::attrib(reader, XML_VALUE_ATTR)[0]);
   }
-  else if(name == "restore-char"_u)
+  else if(name == XML_RESTORE_CHAR_ELEM)
   {
-    rcx_map[rcx_current_char].insert(static_cast<int32_t>(XMLParseUtil::attrib(reader, "value"_u)[0]));
+    rcx_map[rcx_current_char].insert(static_cast<int32_t>(XMLParseUtil::attrib(reader, XML_VALUE_ATTR)[0]));
   }
-  else if(name == "#comment"_u)
+  else if(name == XML_COMMENT_NODE)
   {
     /* ignore */
   }
@@ -235,8 +247,7 @@ FSTProcessor::readFullBlock(InputFile& input, UChar32 const delim1, UChar32 cons
 UString
 FSTProcessor::readWblank(InputFile& input)
 {
-  UString result;
-  result += "[["_u;
+  UString result = WBLANK_START;
   UChar32 c = 0;
 
   while(!input.eof())
@@ -271,8 +282,7 @@ FSTProcessor::readWblank(InputFile& input)
 bool
 FSTProcessor::wblankPostGen(InputFile& input, UFILE *output)
 {
-  UString result;
-  result += "[["_u;
+  UString result = WBLANK_START;
   UChar32 c = 0;
 
   while(!input.eof())
@@ -781,11 +791,11 @@ FSTProcessor::combineWblanks()
 
   while(wblankqueue.size() > 0)
   {
-    if(wblankqueue.front().compare("[[/]]"_u) == 0)
+    if(wblankqueue.front().compare(WBLANK_FINAL) == 0)
     {
       if(final_wblank.empty())
       {
-        final_wblank += "[["_u;
+        final_wblank += WBLANK_START;
       }
       else if(final_wblank.size() > 2)
       {
@@ -809,7 +819,7 @@ FSTProcessor::combineWblanks()
 
   if(!final_wblank.empty())
   {
-    final_wblank += "]]"_u;
+    final_wblank += WBLANK_END;
     need_end_wblank = true;
   }
 
@@ -1899,7 +1909,7 @@ FSTProcessor::postgeneration(InputFile& input, UFILE *output)
       {
         if(need_end_wblank)
         {
-          write("[[/]]"_u, output);
+          write(WBLANK_FINAL, output);
           need_end_wblank = false;
         }
 
@@ -1920,7 +1930,7 @@ FSTProcessor::postgeneration(InputFile& input, UFILE *output)
 
         if(need_end_wblank)
         {
-          write("[[/]]"_u, output);
+          write(WBLANK_FINAL, output);
           need_end_wblank = false;
         }
       }
@@ -2028,7 +2038,7 @@ FSTProcessor::postgeneration(InputFile& input, UFILE *output)
 
             if(need_end_wblank)
             {
-              write("[[/]]"_u, output);
+              write(WBLANK_FINAL, output);
               need_end_wblank = false;
               u_fputc(sf[space_index], output);
               flushWblanks(output);
@@ -2394,32 +2404,17 @@ FSTProcessor::biltransfull(UString const &input_word, bool with_delim)
     }
     if(current_state.isFinal(all_finals))
     {
-      result = current_state.filterFinals(all_finals, alphabet,
-                                          escaped_chars,
-                                          displayWeightsMode, maxAnalyses, maxWeightClasses,
-                                          uppercase, firstupper, 0);
-      if(with_delim)
-      {
-        if(mark)
-        {
-          result = "^="_u + result.substr(1);
-        }
-        else
-        {
-          result[0] = '^';
-        }
+      result.clear();
+      if(with_delim) {
+        result += '^';
       }
-      else
-      {
-        if(mark)
-        {
-          result = "="_u + result.substr(1);
-        }
-        else
-        {
-          result = result.substr(1);
-        }
+      if(mark) {
+        result += '=';
       }
+      result += current_state.filterFinals(all_finals, alphabet,
+                                           escaped_chars,
+                                           displayWeightsMode, maxAnalyses, maxWeightClasses,
+                                           uppercase, firstupper, 0).substr(1);
     }
 
     if(current_state.size() == 0)
@@ -2562,32 +2557,17 @@ FSTProcessor::biltrans(UString const &input_word, bool with_delim)
     }
     if(current_state.isFinal(all_finals))
     {
-      result = current_state.filterFinals(all_finals, alphabet,
-                                          escaped_chars,
-                                          displayWeightsMode, maxAnalyses, maxWeightClasses,
-                                          uppercase, firstupper, 0);
-      if(with_delim)
-      {
-        if(mark)
-        {
-          result = "^="_u + result.substr(1);
-        }
-        else
-        {
-          result[0] = '^';
-        }
+      result.clear();
+      if (with_delim) {
+        result += '^';
       }
-      else
-      {
-        if(mark)
-        {
-          result = "="_u + result.substr(1);
-        }
-        else
-        {
-          result = result.substr(1);
-        }
+      if (mark) {
+        result += '=';
       }
+      result += current_state.filterFinals(all_finals, alphabet,
+                                           escaped_chars,
+                                           displayWeightsMode, maxAnalyses, maxWeightClasses,
+                                           uppercase, firstupper, 0).substr(1);
     }
 
     if(current_state.size() == 0)
@@ -2671,6 +2651,7 @@ UString
 FSTProcessor::compose(UString const &lexforms, UString const &queue) const
 {
   UString result;
+  result += '/';
 
   for(unsigned int i = 1; i< lexforms.size(); i++)
   {
@@ -2686,7 +2667,7 @@ FSTProcessor::compose(UString const &lexforms, UString const &queue) const
     result += lexforms[i];
   }
 
-  return "/"_u + result + queue;
+  return result + queue;
 }
 
 void
@@ -2937,32 +2918,17 @@ FSTProcessor::biltransWithQueue(UString const &input_word, bool with_delim)
     }
     if(current_state.isFinal(all_finals))
     {
-      result = current_state.filterFinals(all_finals, alphabet,
-                                          escaped_chars,
-                                          displayWeightsMode, maxAnalyses, maxWeightClasses,
-                                          uppercase, firstupper, 0);
-      if(with_delim)
-      {
-        if(mark)
-        {
-          result = "^="_u + result.substr(1);
-        }
-        else
-        {
-          result[0] = '^';
-        }
+      result.clear();
+      if (with_delim) {
+        result += '^';
       }
-      else
-      {
-        if(mark)
-        {
-          result = "="_u + result.substr(1);
-        }
-        else
-        {
-          result = result.substr(1);
-        }
+      if (mark) {
+        result += '=';
       }
+      result += current_state.filterFinals(all_finals, alphabet,
+                                           escaped_chars,
+                                           displayWeightsMode, maxAnalyses, maxWeightClasses,
+                                           uppercase, firstupper, 0).substr(1);
     }
 
     if(current_state.size() == 0)
@@ -2988,10 +2954,9 @@ FSTProcessor::biltransWithQueue(UString const &input_word, bool with_delim)
   }
 
   if (!seentags
-      && ""_u == current_state.filterFinals(all_finals, alphabet,
-                                           escaped_chars,
-                                           displayWeightsMode, maxAnalyses, maxWeightClasses,
-                                           uppercase, firstupper, 0))
+      && current_state.filterFinals(all_finals, alphabet, escaped_chars,
+                                    displayWeightsMode, maxAnalyses, maxWeightClasses,
+                                    uppercase, firstupper, 0).empty())
   {
     // word is not present
     if(with_delim)
@@ -3118,32 +3083,17 @@ FSTProcessor::biltransWithoutQueue(UString const &input_word, bool with_delim)
     }
     if(current_state.isFinal(all_finals))
     {
-      result = current_state.filterFinals(all_finals, alphabet,
-                                          escaped_chars,
-                                          displayWeightsMode, maxAnalyses, maxWeightClasses,
-                                          uppercase, firstupper, 0);
-      if(with_delim)
-      {
-        if(mark)
-        {
-          result = "^="_u + result.substr(1);
-        }
-        else
-        {
-          result[0] = '^';
-        }
+      result.clear();
+      if (with_delim) {
+        result += '^';
       }
-      else
-      {
-        if(mark)
-        {
-          result = "="_u + result.substr(1);
-        }
-        else
-        {
-          result = result.substr(1);
-        }
+      if (mark) {
+        result += '=';
       }
+      result += current_state.filterFinals(all_finals, alphabet,
+                                           escaped_chars,
+                                           displayWeightsMode, maxAnalyses, maxWeightClasses,
+                                           uppercase, firstupper, 0).substr(1);
     }
 
     if(current_state.size() == 0)
diff --git a/lttoolbox/fst_processor.h b/lttoolbox/fst_processor.h
index 8be5eb0..6e5c218 100644
--- a/lttoolbox/fst_processor.h
+++ b/lttoolbox/fst_processor.h
@@ -489,6 +489,21 @@ private:
 
   xmlTextReaderPtr reader;
 public:
+
+  /*
+   * String constants
+   */
+  static UString const XML_TEXT_NODE;
+  static UString const XML_COMMENT_NODE;
+  static UString const XML_IGNORED_CHARS_ELEM;
+  static UString const XML_RESTORE_CHAR_ELEM;
+  static UString const XML_RESTORE_CHARS_ELEM;
+  static UString const XML_VALUE_ATTR;
+  static UString const XML_CHAR_ELEM;
+  static UString const WBLANK_START;
+  static UString const WBLANK_END;
+  static UString const WBLANK_FINAL;
+
   FSTProcessor();
 
   void initAnalysis();
diff --git a/lttoolbox/lt_print.cc b/lttoolbox/lt_print.cc
index bc92108..c3c9ec4 100644
--- a/lttoolbox/lt_print.cc
+++ b/lttoolbox/lt_print.cc
@@ -179,7 +179,7 @@ int main(int argc, char *argv[])
     it->second.show(alphabet, output, 0, hfst);
     if(it != penum)
     {
-      u_fputs("--\n"_u, output);
+      u_fputs("--"_u, output);
     }
   }
 
diff --git a/lttoolbox/regexp_compiler.cc b/lttoolbox/regexp_compiler.cc
index 677fbf5..96d98c8 100644
--- a/lttoolbox/regexp_compiler.cc
+++ b/lttoolbox/regexp_compiler.cc
@@ -25,6 +25,7 @@ index(0),
 alphabet(0),
 state(0),
 letter(0),
+postop(0),
 default_weight(0.0000)
 {
 }
@@ -202,20 +203,20 @@ RegexpCompiler::Term()
     e = t.insertNewSingleTransduction((*alphabet)(letter, letter), e, default_weight);
     t.setFinal(e, default_weight);
     Postop();
-    if(postop == "*"_u)
+    if(postop == '*')
     {
       t.zeroOrMore((*alphabet)(0, 0));
     }
-    else if(postop == "+"_u)
+    else if(postop == '+')
     {
       t.oneOrMore((*alphabet)(0, 0));
     }
-    else if(postop == "?"_u)
+    else if(postop == '?')
     {
       t.optional((*alphabet)(0, 0));
     }
 
-    postop.clear();
+    postop = 0;
     state = transducer.insertTransducer(state, t, (*alphabet)(0, 0));
   }
   else if(token == '(')
@@ -229,20 +230,20 @@ RegexpCompiler::Term()
     consume(')');
     transducer.setFinal(state, default_weight);
     Postop();
-    if(postop == "*"_u)
+    if(postop == '*')
     {
       transducer.zeroOrMore((*alphabet)(0, 0));
     }
-    else if(postop == "+"_u)
+    else if(postop == '+')
     {
       transducer.oneOrMore((*alphabet)(0, 0));
     }
-    else if(postop == "?"_u)
+    else if(postop == '?')
     {
       transducer.optional((*alphabet)(0, 0));
     }
 
-    postop.clear();
+    postop = 0;
     state = t.insertTransducer(e, transducer, (*alphabet)(0, 0));
     transducer = t;
   }
@@ -300,17 +301,17 @@ RegexpCompiler::Postop()
   if(token == '*')
   {
     consume('*');
-    postop = "*"_u;
+    postop = '*';
   }
   else if(token == '?')
   {
     consume('?');
-    postop = "?"_u;
+    postop = '?';
   }
   else if(token == '+')
   {
     consume('+');
-    postop = "+"_u;
+    postop = '+';
   }
   else if(token == '(' || token == '[' || !isReserved(token) ||
           token == '\\' || token == '|' ||  token == FIN_FICHERO ||
@@ -369,20 +370,20 @@ RegexpCompiler::Esp()
     error();
   }
 
-  if(postop == "+"_u)
+  if(postop == '+')
   {
     t.oneOrMore((*alphabet)(0, 0));
   }
-  else if(postop == "*"_u)
+  else if(postop == '*')
   {
     t.zeroOrMore((*alphabet)(0, 0));
   }
-  else if(postop == "?"_u)
+  else if(postop == '?')
   {
     t.optional((*alphabet)(0, 0));
   }
   brackets.clear();
-  postop.clear();
+  postop = 0;
 
   state = transducer.insertTransducer(state, t, (*alphabet)(0, 0));
 }
@@ -480,5 +481,5 @@ RegexpCompiler::initialize(Alphabet *a)
   setAlphabet(a);
   transducer.clear();
   brackets.clear();
-  postop.clear();
+  postop = 0;
 }
diff --git a/lttoolbox/regexp_compiler.h b/lttoolbox/regexp_compiler.h
index ab7e460..e9bdb30 100644
--- a/lttoolbox/regexp_compiler.h
+++ b/lttoolbox/regexp_compiler.h
@@ -74,7 +74,7 @@ private:
   /**
    * Post-operator: '+', '?', '*'
    */
-  UString postop;
+  UChar32 postop;
 
   /**
    * Default value of weight
diff --git a/lttoolbox/state.cc b/lttoolbox/state.cc
index d476efc..4edbb85 100644
--- a/lttoolbox/state.cc
+++ b/lttoolbox/state.cc
@@ -894,7 +894,8 @@ State::restartFinals(const map<Node *, double> &finals, int requiredSymbol, Stat
 UString
 State::getReadableString(const Alphabet &a)
 {
-  UString retval = "["_u;
+  UString retval;
+  retval += '[';
 
   for(unsigned int i=0; i<state.size(); i++)
   {
@@ -908,9 +909,10 @@ State::getReadableString(const Alphabet &a)
 
     if(i+1 < state.size())
     {
-      retval.append(", "_u);
+      retval += ',';
+      retval += ' ';
     }
   }
-  retval.append("]"_u);
+  retval += ']';
   return retval;
 }
diff --git a/lttoolbox/tmx_compiler.cc b/lttoolbox/tmx_compiler.cc
index 9b7e332..b013e98 100644
--- a/lttoolbox/tmx_compiler.cc
+++ b/lttoolbox/tmx_compiler.cc
@@ -37,14 +37,20 @@ UString const TMXCompiler::TMX_COMPILER_XMLLANG_ATTR = "xml:lang"_u;
 UString const TMXCompiler::TMX_COMPILER_LANG_ATTR    = "lang"_u;
 UString const TMXCompiler::TMX_COMPILER_SEG_ELEM     = "seg"_u;
 UString const TMXCompiler::TMX_COMPILER_PROP_ELEM    = "prop"_u;
+UString const TMXCompiler::TMX_COMPILER_TEXT_NODE    = "#text"_u;
+UString const TMXCompiler::TMX_COMPILER_COMMENT_NODE = "#comment"_u;
+UString const TMXCompiler::TMX_COMPILER_NUMBER_TAG   = "<n>"_u;
+UString const TMXCompiler::TMX_COMPILER_BLANK_TAG    = "<b>"_u;
 
 TMXCompiler::TMXCompiler() :
 reader(0),
 default_weight(0.0000)
 {
   LtLocale::tryToSetLocale();
-  alphabet.includeSymbol("<n>"_u); // -1 -> numbers
-  alphabet.includeSymbol("<b>"_u); // -2 -> blanks
+  alphabet.includeSymbol(TMX_COMPILER_NUMBER_TAG); // -1 -> numbers
+  alphabet.includeSymbol(TMX_COMPILER_BLANK_TAG); // -2 -> blanks
+  number_tag = alphabet(TMX_COMPILER_NUMBER_TAG);
+  blank_tag = alphabet(TMX_COMPILER_BLANK_TAG);
 }
 
 TMXCompiler::~TMXCompiler()
@@ -96,23 +102,23 @@ TMXCompiler::requireEmptyError(UString const &name)
 bool
 TMXCompiler::allBlanks()
 {
-  bool flag = true;
   UString text = XMLParseUtil::readValue(reader);
 
   for(auto c : text)
   {
-    flag = flag && u_isspace(c);
+    if (!u_isspace(c)) {
+      return false;
+    }
   }
-
-  return flag;
+  return true;
 }
 
 void
 TMXCompiler::skipBlanks(UString &name)
 {
-  while(name == "#text"_u || name == "#comment"_u)
+  while(name == TMX_COMPILER_TEXT_NODE || name == TMX_COMPILER_COMMENT_NODE)
   {
-    if(name != "#comment"_u)
+    if(name != TMX_COMPILER_COMMENT_NODE)
     {
       if(!allBlanks())
       {
@@ -133,9 +139,9 @@ TMXCompiler::skip(UString &name, UString const &elem)
   xmlTextReaderRead(reader);
   name = XMLParseUtil::readName(reader);
 
-  while(name == "#text"_u || name == "#comment"_u)
+  while(name == TMX_COMPILER_TEXT_NODE || name == TMX_COMPILER_COMMENT_NODE)
   {
-    if(name != "#comment"_u)
+    if(name != TMX_COMPILER_COMMENT_NODE)
     {
       if(!allBlanks())
       {
@@ -192,7 +198,7 @@ TMXCompiler::insertTU(vector<int> const &origin, vector<int> const &meta)
     return;
   }
 
-  if(origin[0] == alphabet("<b>"_u) || meta[0] == alphabet("<b>"_u))
+  if(origin[0] == blank_tag || meta[0] == blank_tag)
   {
     return;
   }
@@ -268,12 +274,10 @@ TMXCompiler::align_blanks(vector<int> &o, vector<int> &m)
   vector<unsigned int> puntos;
   vector<int> resultado_o, resultado_m;
 
-  int const symbol = alphabet("<b>"_u);
-
   vector<vector<int> > so, sm;
 
-  split(o, so, symbol);
-  split(m, sm, symbol);
+  split(o, so, blank_tag);
+  split(m, sm, blank_tag);
 
   if(so.size() == sm.size())
   {
@@ -357,19 +361,15 @@ TMXCompiler::procTU()
         name = XMLParseUtil::readName(reader);
         type = xmlTextReaderNodeType(reader);
 
-        if(name == "#text"_u)
+        if(name == TMX_COMPILER_TEXT_NODE)
         {
-          UString l = XMLParseUtil::readValue(reader);
-          for(size_t i = 0, limit = l.size(); i != limit; i++)
-          {
-            ref->push_back(l[i]);
-          }
+          XMLParseUtil::readValueInto32(reader, *ref);
         }
         else if(name == TMX_COMPILER_HI_ELEM || name == TMX_COMPILER_PH_ELEM)
         {
           if(type != XML_READER_TYPE_END_ELEMENT)
           {
-            ref->push_back(alphabet("<b>"_u));
+            ref->push_back(blank_tag);
           }
         }
       }
@@ -394,7 +394,7 @@ TMXCompiler::procNode()
 
   // HACER: optimizar el orden de ejecución de esta ristra de "ifs"
 
-  if(name == "#text"_u)
+  if(name == TMX_COMPILER_TEXT_NODE)
   {
     /* ignorar */
   }
@@ -418,7 +418,7 @@ TMXCompiler::procNode()
   {
     procTU();
   }
-  else if(name== "#comment"_u)
+  else if(name== TMX_COMPILER_COMMENT_NODE)
   {
     /* ignorar */
   }
@@ -438,14 +438,14 @@ TMXCompiler::write(FILE *output)
   write_le(output, features);
 
   // letters (empty to keep the file format)
-  Compression::string_write(""_u, output);
+  Compression::multibyte_write(0, output);
 
   // symbols
   alphabet.write(output);
 
-  // transducers
+  // transducers (1, with empty name)
   Compression::multibyte_write(1, output); // keeping file format
-  Compression::string_write(""_u, output); // keeping file format
+  Compression::multibyte_write(0, output); // keeping file format
   transducer.write(output);
 
   cout << origin_language << "->" << meta_language << " ";
@@ -498,7 +498,7 @@ TMXCompiler::align(vector<int> &origin, vector<int> &meta)
       numbers_origin_start.push_back(i);
       numbers_origin_length.push_back(nl);
       i += nl-1;
-      modified_origin.push_back(alphabet("<n>"_u));
+      modified_origin.push_back(number_tag);
     }
     else
     {
diff --git a/lttoolbox/tmx_compiler.h b/lttoolbox/tmx_compiler.h
index 7d0633a..9cf9595 100644
--- a/lttoolbox/tmx_compiler.h
+++ b/lttoolbox/tmx_compiler.h
@@ -76,6 +76,9 @@ private:
    */
   UString meta_language_inner_code;
 
+  int32_t number_tag;
+  int32_t blank_tag;
+
 
   /**
    * Method to parse an XML Node
@@ -163,6 +166,10 @@ public:
   static UString const TMX_COMPILER_LANG_ATTR;
   static UString const TMX_COMPILER_SEG_ELEM;
   static UString const TMX_COMPILER_PROP_ELEM;
+  static UString const TMX_COMPILER_TEXT_NODE;
+  static UString const TMX_COMPILER_COMMENT_NODE;
+  static UString const TMX_COMPILER_NUMBER_TAG;
+  static UString const TMX_COMPILER_BLANK_TAG;
 
 
   /**
diff --git a/lttoolbox/transducer.cc b/lttoolbox/transducer.cc
index cf534a1..e27c972 100644
--- a/lttoolbox/transducer.cc
+++ b/lttoolbox/transducer.cc
@@ -26,6 +26,23 @@
 #include <vector>
 #include <cstring>
 
+UString const Transducer::HFST_EPSILON_SYMBOL_SHORT   = "@0@"_u;
+UString const Transducer::HFST_EPSILON_SYMBOL_LONG    = "@_EPSILON_SYMBOL_@"_u;
+// could extend the ""_u helper to include u""_u
+// this is the only place that needs it
+UString const Transducer::LTTB_EPSILON_SYMBOL         = UString(1, (UChar)0x3B5);
+                                                   // = "ε"_u;
+UString const Transducer::HFST_SPACE_SYMBOL           = "@_SPACE_@"_u;
+UString const Transducer::HFST_TAB_SYMBOL             = "@_TAB_@"_u;
+UString const Transducer::GROUP_SYMBOL                = "#"_u;
+UString const Transducer::JOIN_SYMBOL                 = "+"_u;
+UString const Transducer::ANY_TAG_SYMBOL              = "<ANY_TAG>"_u;
+UString const Transducer::ANY_CHAR_SYMBOL             = "<ANY_CHAR>"_u;
+UString const Transducer::LSX_BOUNDARY_SYMBOL         = "<$>"_u;
+UString const Transducer::COMPOUND_ONLY_L_SYMBOL      = "<compound-only-L>"_u;
+UString const Transducer::COMPOUND_R_SYMBOL           = "<compound-R>"_u;
+
+
 int
 Transducer::newState()
 {
@@ -720,21 +737,20 @@ Transducer::escapeSymbol(UString& symbol, bool hfst) const
   {
     if(hfst)
     {
-      symbol = "@0@"_u;
+      symbol = HFST_EPSILON_SYMBOL_SHORT;
     }
     else
     {
-      //symbol = "ε"_u;
-      symbol += (UChar)949;
+      symbol = LTTB_EPSILON_SYMBOL;
     }
   }
   else if(hfst && symbol == " "_u)
   {
-    symbol = "@_SPACE_@"_u;
+    symbol = HFST_SPACE_SYMBOL;
   }
   else if(hfst && symbol == "\t"_u)
   {
-    symbol = "@_TAB_@"_u;
+    symbol = HFST_TAB_SYMBOL;
   }
 }
 
@@ -983,10 +999,6 @@ Transducer
 Transducer::moveLemqsLast(Alphabet const &alphabet,
                           int const epsilon_tag)
 {
-  // TODO: These should be in file which is included by both
-  // fst_processor.cc and compiler.cc:
-  UString COMPILER_GROUP_ELEM = "#"_u;
-
   Transducer new_t;
   typedef int SearchState;
   std::set<SearchState> seen;
@@ -1009,7 +1021,7 @@ Transducer::moveLemqsLast(Alphabet const &alphabet,
       alphabet.getSymbol(left, alphabet.decode(label).first);
       int new_src = states_this_new[this_src];
 
-      if(left == COMPILER_GROUP_ELEM)
+      if(left == GROUP_SYMBOL)
       {
         Transducer tagsFirst = copyWithTagsFirst(this_trg, label, alphabet, epsilon_tag);
         new_t.finals.insert(make_pair(
@@ -1054,16 +1066,6 @@ Transducer::intersect(Transducer &trimmer,
    * The trimmer is typically a bidix passed through appendDotStar.
    */
 
-  // TODO: These should be in file which is included by both
-  // fst_processor.cc and compiler.cc:
-  UString compoundOnlyLSymbol = "<compound-only-L>"_u;
-  UString compoundRSymbol = "<compound-R>"_u;
-  UString COMPILER_JOIN_ELEM = "+"_u;
-  UString COMPILER_GROUP_ELEM = "#"_u;
-  UString COMPILER_ANY_TAG = "<ANY_TAG>"_u;
-  UString COMPILER_ANY_CHAR = "<ANY_CHAR>"_u;
-  UString COMPILER_SEPARABLE_BOUNDARY = "<$>"_u;
-
   // When searching, we need to record (this, (trimmer, trimmer_pre_plus))
   typedef std::pair<int, std::pair<int, int > > SearchState;
   // first: currently searched state in this;
@@ -1138,7 +1140,7 @@ Transducer::intersect(Transducer &trimmer,
       UString this_right;
       this_a.getSymbol(this_right, this_a.decode(this_label).second);
 
-      if(this_right == COMPILER_JOIN_ELEM || this_right == COMPILER_SEPARABLE_BOUNDARY)
+      if(this_right == JOIN_SYMBOL || this_right == LSX_BOUNDARY_SYMBOL)
       {
         if(trimmer_preplus == trimmer_src) {
           // Keep the old preplus state if it was set; equal to current trimmer state means unset:
@@ -1159,13 +1161,13 @@ Transducer::intersect(Transducer &trimmer,
                            trimmed_trg, // toState
                            this_label, // symbol-pair, using this alphabet
                            this_wt); //weight of transduction
-        if(this_right == COMPILER_SEPARABLE_BOUNDARY && isFinal(this_trg))
+        if(this_right == LSX_BOUNDARY_SYMBOL && isFinal(this_trg))
         {
           trimmed.setFinal(trimmed_trg, default_weight);
         }
       }
-      else if ( this_right == compoundOnlyLSymbol
-                || this_right == compoundRSymbol
+      else if ( this_right == COMPOUND_ONLY_L_SYMBOL
+                || this_right == COMPOUND_R_SYMBOL
                 || this_right.empty() )
       {
         // Stay put in the trimmer FST
@@ -1196,7 +1198,7 @@ Transducer::intersect(Transducer &trimmer,
         // Loop through non-epsilon arcs from the live state of trimmer
 
         // If we see a hash/group, we may have to rewind our trimmer state first:
-        if(this_right == COMPILER_GROUP_ELEM && trimmer_preplus != trimmer_src)
+        if(this_right == GROUP_SYMBOL && trimmer_preplus != trimmer_src)
         {
           states_this_trimmed.insert(make_pair(make_pair(this_src, make_pair(trimmer_preplus,
                                                                              trimmer_preplus)),
@@ -1218,7 +1220,7 @@ Transducer::intersect(Transducer &trimmer,
 
           if(!trimmer_left.empty() && // we've already dealt with trimmer epsilons
              (this_right == trimmer_left ||
-              (this_right == ((trimmer_left[0] == '<') ? COMPILER_ANY_TAG : COMPILER_ANY_CHAR))))
+              (this_right == ((trimmer_left[0] == '<') ? ANY_TAG_SYMBOL : ANY_CHAR_SYMBOL))))
           {
             next = make_pair(this_trg, make_pair(trimmer_trg, trimmer_preplus_next));
             if(seen.find(next) == seen.end())
diff --git a/lttoolbox/transducer.h b/lttoolbox/transducer.h
index 5dcabba..3dd91d4 100644
--- a/lttoolbox/transducer.h
+++ b/lttoolbox/transducer.h
@@ -91,6 +91,22 @@ private:
 
 public:
 
+  /**
+   * String constants
+   */
+  static UString const HFST_EPSILON_SYMBOL_SHORT;
+  static UString const HFST_EPSILON_SYMBOL_LONG;
+  static UString const LTTB_EPSILON_SYMBOL;
+  static UString const HFST_SPACE_SYMBOL;
+  static UString const HFST_TAB_SYMBOL;
+  static UString const GROUP_SYMBOL;
+  static UString const JOIN_SYMBOL;
+  static UString const ANY_TAG_SYMBOL;
+  static UString const ANY_CHAR_SYMBOL;
+  static UString const LSX_BOUNDARY_SYMBOL;
+  static UString const COMPOUND_ONLY_L_SYMBOL;
+  static UString const COMPOUND_R_SYMBOL;
+
   /**
    * Constructor
    */