Index: branches/apertium-tagger/apertium2/apertium/apertium_tagger.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_tagger.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_tagger.cc	(revision 69632)
@@ -0,0 +1,737 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "apertium_tagger.h"
+
+#include "apertium_config.h"
+
+#include "align.h"
+#include "basic_exception_type.h"
+#include "basic_stream_tagger.h"
+#include "basic_stream_tagger_trainer.h"
+#include "basic_tagger.h"
+#include "err_exception.h"
+#include "exception.h"
+#include "file_tagger.h"
+#include "linebreak.h"
+#include "stream_5_3_1_tagger.h"
+#include "stream_5_3_1_tagger_trainer.h"
+#include "stream_5_3_2_tagger.h"
+#include "stream_5_3_2_tagger_trainer.h"
+#include "stream_5_3_3_tagger.h"
+#include "stream_5_3_3_tagger_trainer.h"
+#include <apertium/hmm.cc>
+#include <apertium/lswpost.h>
+#include <apertium/tagger_word.h>
+
+#include <lttoolbox/lt_locale.h>
+
+#include "getopt_long.h"
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <iomanip>
+#include <ios>
+#include <iostream>
+#include <locale>
+#include <sstream>
+#include <string>
+#include <unistd.h>
+
+#ifdef _MSC_VER
+#include <fcntl.h>
+#include <io.h>
+#endif // _MSC_VER
+
+namespace Apertium {
+apertium_tagger::apertium_tagger(int &argc, char **&argv)
+    : argc(argc), argv(argv), The_val(),
+
+      The_indexptr(), FunctionTypeTypeOption_indexptr(),
+      FunctionTypeOption_indexptr(),
+
+      TheFunctionTypeType(), TheUnigramType(), TheFunctionType(),
+      TheFunctionTypeOptionArgument(0), TheFlags() {
+  try {
+    while (true) {
+      The_val = getopt_long(argc, argv, "dfgmpr:s:t:u:wz", longopts, &The_indexptr);
+
+      if (The_val == -1)
+        break;
+
+      set_indexptr();
+
+      switch (The_val) {
+      case 'd':
+        flagOptionCase(&basic_Tagger::Flags::getDebug,
+                       &basic_Tagger::Flags::setDebug);
+        break;
+      case 'f':
+        flagOptionCase(&basic_Tagger::Flags::getFirst,
+                       &basic_Tagger::Flags::setFirst);
+        break;
+      case 'm':
+        flagOptionCase(&basic_Tagger::Flags::getMark,
+                       &basic_Tagger::Flags::setMark);
+        break;
+      case 'p':
+        flagOptionCase(&basic_Tagger::Flags::getShowSuperficial,
+                       &basic_Tagger::Flags::setShowSuperficial);
+        break;
+      case 'z':
+        flagOptionCase(&basic_Tagger::Flags::getNullFlush,
+                       &basic_Tagger::Flags::setNullFlush);
+        break;
+      case 'u':
+        functionTypeTypeOptionCase(Unigram);
+
+        if (std::strncmp(optarg, "1", sizeof "1" - 1) == 0) {
+          TheUnigramType = Stream_5_3_1;
+          break;
+        }
+
+        if (std::strncmp(optarg, "2", sizeof "2" - 1) == 0) {
+          TheUnigramType = Stream_5_3_2;
+          break;
+        }
+
+        if (std::strncmp(optarg, "3", sizeof "3" - 1) == 0) {
+          TheUnigramType = Stream_5_3_3;
+          break;
+        }
+
+        {
+          std::stringstream what_;
+          what_ << "invalid argument '" << optarg << "' for '--unigram'\n"
+                                                     "Valid arguments are:\n"
+                                                     "  - '1'\n"
+                                                     "  - '2'\n"
+                                                     "  - '3'";
+          throw Exception::apertium_tagger::InvalidArgument(what_);
+        }
+        break;
+      case 'w':
+        functionTypeTypeOptionCase(SlidingWindow);
+        break;
+      case 'g':
+        functionTypeOptionCase(Tagger);
+        break;
+      case 'r':
+        functionTypeOptionCase(Retrain);
+        getIterationsArgument();
+        break;
+      case 's':
+        functionTypeOptionCase(Supervised);
+        getIterationsArgument();
+        break;
+      case 't':
+        functionTypeOptionCase(Train);
+        getIterationsArgument();
+        break;
+      case 'h':
+        help();
+        return;
+      default:
+        throw err_Exception();
+      }
+    }
+
+    if (!TheFunctionType) {
+      help();
+      return;
+    }
+
+    switch (*TheFunctionType) {
+    case Tagger:
+      if (!TheFunctionTypeType) {
+        HMM HiddenMarkovModelTagger_;
+        g_FILE_Tagger(HiddenMarkovModelTagger_);
+        break;
+      }
+
+      switch (*TheFunctionTypeType) {
+      case Unigram: {
+        switch (*TheUnigramType) {
+        case Stream_5_3_1: {
+          Stream_5_3_1_Tagger Stream_5_3_1_Tagger_(TheFlags);
+          g_StreamTagger(Stream_5_3_1_Tagger_);
+        } break;
+        case Stream_5_3_2: {
+          Stream_5_3_2_Tagger Stream_5_3_2_Tagger_(TheFlags);
+          g_StreamTagger(Stream_5_3_2_Tagger_);
+        } break;
+        case Stream_5_3_3: {
+          Stream_5_3_3_Tagger Stream_5_3_3_Tagger_(TheFlags);
+          g_StreamTagger(Stream_5_3_3_Tagger_);
+        } break;
+        default:
+          std::abort();
+        }
+      } break;
+      case SlidingWindow: {
+        LSWPoST SlidingWindowTagger_;
+        g_FILE_Tagger(SlidingWindowTagger_);
+      } break;
+      default:
+        std::abort();
+      }
+
+      break;
+    case Retrain:
+      if (!TheFunctionTypeType) {
+        HMM HiddenMarkovModelTagger_;
+        r_FILE_Tagger(HiddenMarkovModelTagger_);
+        break;
+      }
+
+      switch (*TheFunctionTypeType) {
+      case Unigram: {
+        std::stringstream what_;
+        what_ << "invalid option -- 'u'";
+        throw Exception::apertium_tagger::InvalidOption(what_);
+      }
+      case SlidingWindow: {
+        LSWPoST SlidingWindowTagger_;
+        r_FILE_Tagger(SlidingWindowTagger_);
+      } break;
+      default:
+        std::abort();
+      }
+
+      break;
+    case Supervised:
+      if (!TheFunctionTypeType) {
+        HMM HiddenMarkovModelTagger_;
+        s_FILE_Tagger(HiddenMarkovModelTagger_);
+        break;
+      }
+
+      switch (*TheFunctionTypeType) {
+      case Unigram: {
+        switch (*TheUnigramType) {
+        case Stream_5_3_1: {
+          Stream_5_3_1_TaggerTrainer Stream_5_3_1_TaggerTrainer_(TheFlags);
+          s_StreamTaggerTrainer(Stream_5_3_1_TaggerTrainer_);
+        } break;
+        case Stream_5_3_2: {
+          Stream_5_3_2_TaggerTrainer Stream_5_3_2_TaggerTrainer_(TheFlags);
+          s_StreamTaggerTrainer(Stream_5_3_2_TaggerTrainer_);
+        } break;
+        case Stream_5_3_3: {
+          Stream_5_3_3_TaggerTrainer Stream_5_3_3_TaggerTrainer_(TheFlags);
+          s_StreamTaggerTrainer(Stream_5_3_3_TaggerTrainer_);
+        } break;
+        default:
+          std::abort();
+        }
+      } break;
+      case SlidingWindow: {
+        std::stringstream what_;
+        what_ << "invalid option -- 'w'";
+        throw Exception::apertium_tagger::InvalidOption(what_);
+      }
+      default:
+        std::abort();
+      }
+
+      break;
+    case Train:
+      if (!TheFunctionTypeType) {
+        HMM HiddenMarkovModelTagger_;
+        t_FILE_Tagger(HiddenMarkovModelTagger_);
+        break;
+      }
+
+      switch (*TheFunctionTypeType) {
+      case Unigram: {
+        std::stringstream what_;
+        what_ << "invalid option -- 'u'";
+        throw Exception::apertium_tagger::InvalidOption(what_);
+      }
+      case SlidingWindow: {
+        LSWPoST SlidingWindowTagger_;
+        t_FILE_Tagger(SlidingWindowTagger_);
+      } break;
+      default:
+        std::abort();
+      }
+
+      break;
+    default:
+      std::abort();
+    }
+  } catch (const basic_ExceptionType &basic_ExceptionType_) {
+    std::cerr << "apertium-tagger: " << basic_ExceptionType_.what() << '\n';
+    throw err_Exception();
+  }
+}
+
+void apertium_tagger::help() {
+
+  std::cerr <<
+"Usage: apertium-tagger [OPTION]... -g SERIALISED_TAGGER                        \\\n"
+"                                      [INPUT                                   \\\n"
+"                                      [OUTPUT]]\n"
+"\n"
+"  or:  apertium-tagger [OPTION]... -r ITERATIONS                               \\\n"
+"                                      CORPUS                                   \\\n"
+"                                      SERIALISED_TAGGER\n"
+"\n"
+"  or:  apertium-tagger [OPTION]... -s ITERATIONS                               \\\n"
+"                                      DICTIONARY                               \\\n"
+"                                      CORPUS                                   \\\n"
+"                                      TAGGER_SPECIFICATION                     \\\n"
+"                                      SERIALISED_TAGGER                        \\\n"
+"                                      TAGGED_CORPUS                            \\\n"
+"                                      UNTAGGED_CORPUS\n"
+"\n"
+"  or:  apertium-tagger [OPTION]... -s 0                                        \\\n"
+"                                   -u MODEL                                    \\\n"
+"                                      SERIALISED_TAGGER                        \\\n"
+"                                      TAGGED_CORPUS\n"
+"\n"
+"  or:  apertium-tagger [OPTION]... -t ITERATIONS                               \\\n"
+"                                      DICTIONARY                               \\\n"
+"                                      CORPUS                                   \\\n"
+"                                      TAGGER_SPECIFICATION                     \\\n"
+"                                      SERIALISED_TAGGER\n"
+"\n"
+"\n"
+"Mandatory arguments to long options are mandatory for short options too.\n"
+"\n";
+
+  std::vector<std::pair<std::string, std::string> > options_description_;
+  options_description_.push_back(std::make_pair("-d, --debug",            "with -g, print error messages about the input"));
+  options_description_.push_back(std::make_pair("-f, --first",            "with -g, reorder each lexical unit's analyses so that the chosen one is first"));
+  options_description_.push_back(std::make_pair("-m, --mark",             "with -g, mark disambiguated lexical units"));
+  options_description_.push_back(std::make_pair("-p, --show-superficial", "with -g, output each lexical unit's surface form"));
+  options_description_.push_back(std::make_pair("-z, --null-flush",       "with -g, flush the output after getting each null character"));
+  align::align_(options_description_);
+  std::cerr << '\n';
+  options_description_.clear();
+  options_description_.push_back(std::make_pair("-u, --unigram=MODEL", "use unigram algorithm MODEL from <http://coltekin.net/cagri/papers/trmorph-tools.pdf>"));
+  align::align_(options_description_);
+  std::cerr << '\n';
+  options_description_.clear();
+  options_description_.push_back(std::make_pair("-w, --sliding-window", "use the Light Sliding Window algorithm"));
+  align::align_(options_description_);
+  std::cerr << '\n';
+  options_description_.clear();
+  options_description_.push_back(std::make_pair("-g, --tagger", "disambiguate the input"));
+  align::align_(options_description_);
+  std::cerr << '\n';
+  options_description_.clear();
+  options_description_.push_back(std::make_pair("-r, --retrain=ITERATIONS", "with -u: exit;\notherwise: retrain the tagger with ITERATIONS unsupervised iterations"));
+  options_description_.push_back(std::make_pair("-s, --supervised=ITERATIONS", "with -u: train the tagger with a hand-tagged corpus;\nwith -w: exit;\notherwise: initialise the tagger with a hand-tagged corpus and retrain it with ITERATIONS unsupervised iterations"));
+  options_description_.push_back(std::make_pair("-t, --train=ITERATIONS", "with -u: exit;\notherwise: train the tagger with ITERATIONS unsupervised iterations"));
+  align::align_(options_description_);
+  std::cerr << '\n';
+  options_description_.clear();
+  options_description_.push_back(std::make_pair("-h, --help", "display this help and exit"));
+  align::align_(options_description_);
+}
+
+std::string apertium_tagger::option_string(const int &indexptr_) {
+  return option_string(longopts[indexptr_]);
+}
+
+std::string apertium_tagger::option_string(const struct option &option_) {
+  std::stringstream option_string_;
+  option_string_ << "--" << option_.name;
+  return option_string_.str();
+}
+
+void apertium_tagger::locale_global_() {
+
+#if defined __clang__
+
+  std::locale::global(std::locale(""));
+
+#else
+#if defined __APPLE__
+
+  LtLocale::tryToSetLocale();
+
+#else
+
+  std::locale::global(std::locale(""));
+
+#endif // defined __APPLE__
+#endif // defined __clang__
+}
+
+const struct option apertium_tagger::longopts[] = {
+    {"help", no_argument, 0, 'h'},
+    {"debug", no_argument, 0, 'd'},
+    {"first", no_argument, 0, 'f'},
+    {"mark", no_argument, 0, 'm'},
+    {"show-superficial", no_argument, 0, 'p'},
+    {"null-flush", no_argument, 0, 'z'},
+    {"unigram", required_argument, 0, 'u'},
+    {"sliding-window", no_argument, 0, 'w'},
+    {"tagger", no_argument, 0, 'g'},
+    {"retrain", required_argument, 0, 'r'},
+    {"supervised", required_argument, 0, 's'},
+    {"train", required_argument, 0, 't'},
+    {0, 0, 0, 0}};
+
+void apertium_tagger::set_indexptr() {
+  if (The_val == longopts[The_indexptr].val)
+    return;
+
+  for (std::size_t longopts_Index = 0; longopts[longopts_Index].val != 0;
+       ++longopts_Index) {
+    if (The_val == longopts[longopts_Index].val) {
+      The_indexptr = longopts_Index;
+      return;
+    }
+  }
+}
+
+void apertium_tagger::flagOptionCase(
+    bool (basic_Tagger::Flags::*GetFlag)() const,
+    void (basic_Tagger::Flags::*SetFlag)(const bool &)) {
+  if ((TheFlags.*GetFlag)()) {
+    std::stringstream what_;
+    what_ << "unexpected '" << option_string() << "' following '"
+          << option_string() << '\'';
+    throw Exception::apertium_tagger::UnexpectedFlagOption(what_);
+  }
+
+  (TheFlags.*SetFlag)(true);
+}
+
+std::string apertium_tagger::option_string() {
+  return option_string(The_indexptr);
+}
+
+void apertium_tagger::functionTypeTypeOptionCase(
+    const FunctionTypeType &FunctionTypeType_) {
+  if (FunctionTypeTypeOption_indexptr) {
+    std::stringstream what_;
+    what_ << "unexpected '" << option_string() << "' following '"
+          << option_string(*FunctionTypeTypeOption_indexptr)
+          << '\'';
+    throw Exception::apertium_tagger::UnexpectedFunctionTypeTypeOption(what_);
+  }
+
+  TheFunctionTypeType = FunctionTypeType_;
+  FunctionTypeTypeOption_indexptr = The_indexptr;
+}
+
+void apertium_tagger::functionTypeOptionCase(
+    const FunctionType &FunctionType_) {
+  if (FunctionTypeOption_indexptr) {
+    std::stringstream what_;
+    what_ << "unexpected '" << option_string() << "' following '"
+          << option_string(*FunctionTypeOption_indexptr)
+          << '\'';
+    throw Exception::apertium_tagger::UnexpectedFunctionTypeOption(what_);
+  }
+
+  TheFunctionType = FunctionType_;
+  FunctionTypeOption_indexptr = The_indexptr;
+}
+
+void apertium_tagger::getIterationsArgument() {
+  try {
+    TheFunctionTypeOptionArgument = optarg_unsigned_long();
+  } catch (const ExceptionType &ExceptionType_) {
+    std::stringstream what_;
+    what_ << "invalid argument '" << optarg << "' for '" << option_string()
+          << '\'';
+    throw Exception::apertium_tagger::InvalidArgument(what_);
+  }
+}
+
+unsigned long apertium_tagger::optarg_unsigned_long() const {
+  char *str_end;
+  errno = 0;
+  unsigned long N_0 = std::strtoul(optarg, &str_end, 10);
+
+  if (*str_end != '\0') {
+    std::stringstream what_;
+    what_ << "can't convert char *optarg \"" << optarg << "\" to unsigned long";
+    throw Exception::apertium_tagger::str_end_not_eq_NULL(what_);
+  }
+
+  if (*optarg == '\0') {
+    std::stringstream what_;
+    what_ << "can't convert char *optarg of size 1 \"\" to unsigned long";
+    throw Exception::apertium_tagger::optarg_eq_NULL(what_);
+  }
+
+  if (errno == ERANGE) {
+    std::stringstream what_;
+    what_ << "can't convert char *optarg \"" << optarg
+          << "\" to unsigned long, not in unsigned long range";
+    throw Exception::apertium_tagger::ERANGE_(what_);
+  }
+
+  return N_0;
+}
+
+template <typename T>
+static void try_open_fstream(const char *metavar, const char *filename,
+                             T &stream) {
+  stream.open(filename);
+  if (stream.fail()) {
+    std::stringstream what_;
+    what_ << "can't open " << metavar << " file \"" << filename << "\"";
+    throw Exception::apertium_tagger::open_stream_fail(what_);
+  }
+}
+
+static FILE *try_open_file(const char *metavar, const char *filename,
+                           const char *flags) {
+  FILE *f = std::fopen(filename, flags);
+  if (f == NULL) {
+    std::stringstream what_;
+    what_ << "can't open " << metavar << " file \"" << filename << "\"";
+    throw Exception::apertium_tagger::fopen(what_);
+  }
+  return f;
+}
+
+static inline FILE *try_open_file_utf8(const char *metavar, const char *filename,
+                                       const char *flags) {
+  FILE *f = try_open_file(metavar, filename, flags);
+#ifdef _MSC_VER
+  _setmode(_fileno(f), _O_U8TEXT);
+#endif // _MSC_VER
+  return f;
+}
+
+static void try_close_file(const char *metavar, const char *filename, FILE *file) {
+  if (std::fclose(file) != 0) {
+    std::stringstream what_;
+    what_ << "can't close " << metavar << " file \"" << filename << "\"";
+    throw Exception::apertium_tagger::fclose(what_);
+  }
+}
+
+void apertium_tagger::g_StreamTagger(basic_StreamTagger &StreamTagger_) {
+  locale_global_();
+
+  if (argc - optind < 1 || !(argc - optind < 4)) {
+    std::stringstream what_;
+    what_ << "expected 1, 2, or 3 file arguments, got " << argc - optind;
+    throw Exception::apertium_tagger::UnexpectedFileArgumentCount(what_);
+  }
+
+  std::ifstream SerialisedAnalysisFrequencies;
+  try_open_fstream("SERIALISED_TAGGER", argv[optind],
+                   SerialisedAnalysisFrequencies);
+
+  try {
+    StreamTagger_.deserialise(SerialisedAnalysisFrequencies);
+  } catch (const basic_ExceptionType &basic_ExceptionType_) {
+    std::stringstream what_;
+    what_ << "can't deserialise SERIALISED_TAGGER file \"" << argv[optind]
+          << "\" Reason: " << basic_ExceptionType_.what();
+    throw Exception::apertium_tagger::deserialise(what_);
+  }
+
+  if (argc - optind < 2) {
+    Stream Input(TheFlags);
+    StreamTagger_.tag(Input, std::wcout);
+    return;
+  }
+
+  std::wifstream Input_stream;
+  try_open_fstream("INPUT", argv[optind + 1], Input_stream);
+
+  if (argc - optind < 3) {
+    Stream Input(TheFlags, Input_stream, argv[optind + 1]);
+    StreamTagger_.tag(Input, std::wcout);
+    return;
+  }
+
+  std::wofstream Output_stream;
+  try_open_fstream("OUTPUT", argv[optind + 2], Input_stream);
+
+  Stream Input(TheFlags, Input_stream, argv[optind + 1]);
+  StreamTagger_.tag(Input, Output_stream);
+}
+
+void apertium_tagger::s_StreamTaggerTrainer(
+    basic_StreamTaggerTrainer &StreamTaggerTrainer_) {
+  locale_global_();
+
+  if (TheFunctionTypeOptionArgument != 0) {
+    std::stringstream what_;
+    what_ << "invalid argument '" << TheFunctionTypeOptionArgument
+          << "' for '--supervised'";
+    throw Exception::apertium_tagger::InvalidArgument(what_);
+  }
+
+  if (argc - optind < 2 || !(argc - optind < 3)) {
+    std::stringstream what_;
+    what_ << "expected 2 file arguments, got " << argc - optind;
+    throw Exception::apertium_tagger::UnexpectedFileArgumentCount(what_);
+  }
+
+  std::wifstream TaggedCorpus_stream;
+  try_open_fstream("TAGGED_CORPUS", argv[optind + 1], TaggedCorpus_stream);
+
+  Stream TaggedCorpus(TheFlags, TaggedCorpus_stream, argv[optind]);
+  StreamTaggerTrainer_.train(TaggedCorpus);
+
+  std::ofstream Serialised_basic_Tagger;
+  try_open_fstream("SERIALISED_TAGGER", argv[optind],
+                   Serialised_basic_Tagger);
+
+  StreamTaggerTrainer_.serialise(Serialised_basic_Tagger);
+}
+
+void apertium_tagger::g_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
+  LtLocale::tryToSetLocale();
+
+  if (argc - optind < 1 || !(argc - optind < 4)) {
+    std::stringstream what_;
+    what_ << "expected 1, 2, or 3 file arguments, got " << argc - optind;
+    throw Exception::apertium_tagger::UnexpectedFileArgumentCount(what_);
+  }
+
+  FILE *Serialised_FILE_Tagger =
+      try_open_file("SERIALISED_TAGGER", argv[optind], "rb");
+  FILE_Tagger_.deserialise(Serialised_FILE_Tagger);
+  try_close_file("SERIALISED_TAGGER", argv[optind], Serialised_FILE_Tagger);
+
+  FILE_Tagger_.set_debug(TheFlags.getDebug());
+  TaggerWord::setArrayTags(FILE_Tagger_.getArrayTags());
+  TaggerWord::generate_marks = TheFlags.getMark();
+  FILE_Tagger_.set_show_sf(TheFlags.getShowSuperficial());
+  FILE_Tagger_.setNullFlush(TheFlags.getNullFlush());
+
+  if (argc - optind < 2)
+    FILE_Tagger_.tagger(stdin, stdout, TheFlags.getFirst());
+  else {
+    FILE *Input = try_open_file("INPUT", argv[optind + 1], "r");
+
+    if (argc - optind < 3)
+      FILE_Tagger_.tagger(Input, stdout, TheFlags.getFirst());
+    else {
+      FILE *Output = try_open_file_utf8("OUTPUT", argv[optind + 2], "w");
+      FILE_Tagger_.tagger(Input, Output, TheFlags.getFirst());
+      try_close_file("OUTPUT", argv[optind + 2], Output);
+    }
+
+    try_close_file("INPUT", argv[optind + 1], Input);
+  }
+}
+
+void apertium_tagger::r_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
+  LtLocale::tryToSetLocale();
+
+  if (argc - optind < 2 || !(argc - optind < 3)) {
+    std::stringstream what_;
+    what_ << "expected 2 file arguments, got " << argc - optind;
+    throw Exception::apertium_tagger::UnexpectedFileArgumentCount(what_);
+  }
+
+  FILE *Serialised_FILE_Tagger =
+      try_open_file("SERIALISED_TAGGER", argv[optind + 1], "rb");
+  FILE_Tagger_.deserialise(Serialised_FILE_Tagger);
+  try_close_file("SERIALISED_TAGGER", argv[optind + 1], Serialised_FILE_Tagger);
+
+  FILE_Tagger_.set_debug(TheFlags.getDebug());
+  TaggerWord::setArrayTags(FILE_Tagger_.getArrayTags());
+
+  FILE *Corpus = try_open_file_utf8("CORPUS", argv[optind], "r");
+  FILE_Tagger_.train(Corpus, TheFunctionTypeOptionArgument);
+  try_close_file("CORPUS", argv[optind], Corpus);
+
+  Serialised_FILE_Tagger =
+      try_open_file("SERIALISED_TAGGER", argv[optind + 1], "wb");
+  FILE_Tagger_.serialise(Serialised_FILE_Tagger);
+  try_close_file("SERIALISED_TAGGER", argv[optind + 1], Serialised_FILE_Tagger);
+}
+
+void apertium_tagger::s_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
+  LtLocale::tryToSetLocale();
+
+  if (argc - optind < 6 || !(argc - optind < 7)) {
+    std::stringstream what_;
+    what_ << "expected 6 file arguments, got " << argc - optind;
+    throw Exception::apertium_tagger::UnexpectedFileArgumentCount(what_);
+  }
+
+  FILE_Tagger_.deserialise(argv[optind + 2]);
+  FILE_Tagger_.set_debug(TheFlags.getDebug());
+  TaggerWord::setArrayTags(FILE_Tagger_.getArrayTags());
+
+  FILE *Dictionary = try_open_file("DICTIONARY", argv[optind], "r");
+  FILE_Tagger_.read_dictionary(Dictionary);
+  try_close_file("DICTIONARY", argv[optind], Dictionary);
+
+  FILE *TaggedCorpus = try_open_file_utf8("TAGGED_CORPUS", argv[optind + 4], "r");
+  FILE *UntaggedCorpus = try_open_file_utf8("UNTAGGED_CORPUS", argv[optind + 5], "r");
+  FILE_Tagger_.init_probabilities_from_tagged_text_(TaggedCorpus,
+                                                    UntaggedCorpus);
+  try_close_file("TAGGED_CORPUS", argv[optind + 4], TaggedCorpus);
+  try_close_file("UNTAGGED_CORPUS", argv[optind + 5], UntaggedCorpus);
+
+  FILE *Corpus = try_open_file_utf8("CORPUS", argv[optind + 1], "r");
+  FILE_Tagger_.train(Corpus, TheFunctionTypeOptionArgument);
+  try_close_file("CORPUS", argv[optind + 1], UntaggedCorpus);
+
+  FILE *Serialised_FILE_Tagger =
+      try_open_file("SERIALISED_TAGGER", argv[optind + 3], "wb");
+  FILE_Tagger_.serialise(Serialised_FILE_Tagger);
+  try_close_file("SERIALISED_TAGGER", argv[optind + 3], UntaggedCorpus);
+}
+
+void apertium_tagger::t_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
+  LtLocale::tryToSetLocale();
+
+  if (argc - optind < 4 || !(argc - optind < 5)) {
+    std::stringstream what_;
+    what_ << "expected 4 file arguments, got " << argc - optind;
+    throw Exception::apertium_tagger::UnexpectedFileArgumentCount(what_);
+  }
+
+  FILE_Tagger_.deserialise(argv[optind + 2]);
+  FILE_Tagger_.set_debug(TheFlags.getDebug());
+  TaggerWord::setArrayTags(FILE_Tagger_.getArrayTags());
+
+  FILE *Dictionary = try_open_file("DICTIONARY", argv[optind], "r");
+  FILE_Tagger_.read_dictionary(Dictionary);
+  try_close_file("DICTIONARY", argv[optind], Dictionary);
+
+  FILE *Corpus = try_open_file_utf8("CORPUS", argv[optind + 1], "r");
+  FILE_Tagger_.init_probabilities_kupiec_(Corpus);
+  FILE_Tagger_.train(Corpus, TheFunctionTypeOptionArgument);
+  try_close_file("CORPUS", argv[optind + 1], Corpus);
+
+  FILE *Serialised_FILE_Tagger =
+      try_open_file("SERIALISED_TAGGER", argv[optind + 3], "wb");
+  FILE_Tagger_.serialise(Serialised_FILE_Tagger);
+  try_close_file("SERIALISED_TAGGER", argv[optind + 3], Serialised_FILE_Tagger);
+}
+}
+
+int main(int argc, char **argv) {
+  try {
+    apertium_tagger(argc, argv);
+  } catch (const err_Exception &err_Exception_) {
+    std::cerr << "Try 'apertium-tagger --help' for more information.\n";
+    return 1;
+  } catch (...) {
+    throw;
+  }
+}
Index: branches/apertium-tagger/apertium2/apertium/exception.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/exception.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/exception.h	(revision 69632)
@@ -0,0 +1,92 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EXCEPTION_APERTIUM_TAGGER_H
+#define EXCEPTION_APERTIUM_TAGGER_H
+
+#include "exception_type.h"
+
+#include <sstream>
+
+namespace Apertium {
+namespace Exception {
+
+#define EXCEPTION(EXCEPTION_TYPE)                                              \
+  class EXCEPTION_TYPE : public ::Apertium::ExceptionType {                    \
+  public:                                                                      \
+    EXCEPTION_TYPE(const char *const what_) : ExceptionType(what_) {}          \
+    EXCEPTION_TYPE(const std::string &what_) : ExceptionType(what_) {}         \
+    EXCEPTION_TYPE(const std::stringstream &what_) : ExceptionType(what_) {}   \
+    ~EXCEPTION_TYPE() throw() {}                                               \
+  };
+
+namespace Analysis {
+EXCEPTION(TheMorphemes_empty)
+}
+
+namespace apertium_tagger {
+EXCEPTION(deserialise)
+EXCEPTION(fclose)
+EXCEPTION(fopen)
+EXCEPTION(open_stream_fail)
+EXCEPTION(optarg_eq_NULL)
+EXCEPTION(str_end_not_eq_NULL)
+EXCEPTION(ERANGE_)
+EXCEPTION(InvalidArgument)
+EXCEPTION(InvalidOption)
+EXCEPTION(UnexpectedFileArgumentCount)
+EXCEPTION(UnexpectedFlagOption)
+EXCEPTION(UnexpectedFunctionTypeOption)
+EXCEPTION(UnexpectedFunctionTypeTypeOption)
+}
+
+namespace Deserialiser {
+EXCEPTION(size_t_)
+EXCEPTION(not_Stream_good)
+EXCEPTION(wchar_t_)
+}
+
+namespace LexicalUnit {
+EXCEPTION(TheAnalyses_empty)
+}
+
+namespace Morpheme {
+EXCEPTION(TheLemma_empty)
+EXCEPTION(TheTags_empty)
+}
+
+namespace Optional {
+EXCEPTION(TheOptionalTypePointer_null)
+}
+
+namespace Serialiser {
+EXCEPTION(not_Stream_good)
+EXCEPTION(size_t_)
+EXCEPTION(wchar_t_)
+}
+
+namespace Tag {
+EXCEPTION(TheTags_empty)
+}
+
+namespace wchar_t_ExceptionType {
+EXCEPTION(EILSEQ_)
+}
+
+#undef EXCEPTION
+}
+}
+
+#endif // EXCEPTION_APERTIUM_TAGGER_H
Index: branches/apertium-tagger/apertium2/apertium/lswpost.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lswpost.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lswpost.cc	(revision 69632)
@@ -0,0 +1,402 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+/**
+ *  Light Sliding-Window Part of Speech Tagger (LSWPoST) implementation (source)
+ *
+ *  @author   Gang Chen - pkuchengang@gmail.com
+ */
+
+
+#include <apertium/lswpost.h>
+#include <apertium/tagger_utils.h>
+#include  "apertium_config.h"
+#include <apertium/unlocked_cstdio.h>
+#include <lttoolbox/compression.h>
+
+#ifdef WIN32
+#define isnan(n) _isnan(n)
+#define isinf(n) (!_finite(n))
+#endif
+
+#ifdef __clang__
+#undef __GNUC__
+#endif
+
+#include <stdio.h>
+#include <unistd.h>
+#include <vector>
+#include <algorithm>
+#include <apertium/string_utils.h>
+#include <cstdlib>
+
+using namespace std;
+using namespace Apertium;
+using namespace tagger_utils;
+
+void LSWPoST::deserialise(FILE *Serialised_FILE_Tagger) {
+  tdlsw.read(Serialised_FILE_Tagger);
+  eos = (tdlsw.getTagIndex())[L"TAG_SENT"];
+}
+
+std::vector<std::wstring> &LSWPoST::getArrayTags() {
+  return tdlsw.getArrayTags();
+}
+
+void LSWPoST::serialise(FILE *Stream_) { tdlsw.write(Stream_); }
+
+void LSWPoST::deserialise(const TaggerData &Deserialised_FILE_Tagger) {
+  tdlsw = TaggerDataLSW(Deserialised_FILE_Tagger);
+  eos = (tdlsw.getTagIndex())[L"TAG_SENT"];
+}
+
+void LSWPoST::init_probabilities_from_tagged_text_(FILE *TaggedCorpus,
+                                                   FILE *UntaggedCorpus) {
+  std::abort();
+}
+
+void LSWPoST::init_probabilities_kupiec_(FILE *Corpus) {
+  init_probabilities(Corpus);
+}
+
+void LSWPoST::train(FILE *Corpus, unsigned long Count) {
+  for (; Count > 0; --Count) {
+    std::fseek(Corpus, 0, SEEK_SET);
+    train(Corpus);
+  }
+}
+
+LSWPoST::LSWPoST() {}
+
+LSWPoST::LSWPoST(TaggerDataLSW t) {
+  tdlsw = t;
+  eos = (tdlsw.getTagIndex())[L"TAG_SENT"];  
+}
+
+LSWPoST::~LSWPoST() {}
+
+LSWPoST::LSWPoST(TaggerDataLSW *tdlsw) : tdlsw(*tdlsw) {}
+
+void
+LSWPoST::set_eos(TTag t) { 
+  eos = t; 
+} 
+
+void
+LSWPoST::init_probabilities(FILE *ftxt) {
+
+  int N = tdlsw.getN();
+  int nw = 0;
+  TaggerWord *word = NULL;
+  set<TTag> tags_left, tags_mid, tags_right;
+  set<TTag>::iterator iter_left, iter_mid, iter_right;
+  vector<vector<vector<double> > > para_matrix(N, vector<vector<double> >(N, vector<double>(N, 0)));
+  MorphoStream morpho_stream(ftxt, true, &tdlsw);
+  int num_valid_seq = 0;
+  
+  word = new TaggerWord();          // word for tags left
+  word->add_tag(eos, L"sent", tdlsw.getPreferRules());
+  tags_left = word->get_tags();     // tags left
+  if (tags_left.size()==0) { //This is an unknown word
+    tags_left = tdlsw.getOpenClass();
+  }
+
+  require_ambiguity_class(tdlsw, tags_left, *word, nw);
+  ++nw;
+  delete word;
+  word = morpho_stream.get_next_word();  // word for tags mid
+  tags_mid = word->get_tags();           // tags mid
+  if (tags_mid.size()==0) { //This is an unknown word
+    tags_mid = tdlsw.getOpenClass();
+  }
+  require_ambiguity_class(tdlsw, tags_mid, *word, nw);
+  ++nw;
+  delete word;
+  if (morpho_stream.getEndOfFile()) {
+    return;
+  }
+
+  word = morpho_stream.get_next_word();   // word for tags right
+
+  // count each element of the para matrix
+  while (word != NULL) {
+    if (++nw % 10000 == 0) {
+      wcerr << L'.' << flush;
+    }
+
+    tags_right = word->get_tags();       // tags right
+    if (tags_right.size()==0) { //This is an unknown word
+      tags_right = tdlsw.getOpenClass();
+    }
+    require_ambiguity_class(tdlsw, tags_right, *word, nw);
+
+    num_valid_seq = tags_left.size() * tags_mid.size() * tags_right.size();
+    for (iter_left = tags_left.begin(); iter_left != tags_left.end(); ++iter_left) {
+      for (iter_mid = tags_mid.begin(); iter_mid != tags_mid.end(); ++iter_mid) {
+        for (iter_right = tags_right.begin(); iter_right != tags_right.end(); ++iter_right) {
+          if (!is_valid_seq(*iter_left, *iter_mid, *iter_right)) {
+            --num_valid_seq;
+          }
+        } // for iter_right
+      } // for iter_mid
+    } // for iter_left
+
+    if (num_valid_seq != 0) {
+      for (iter_left = tags_left.begin(); iter_left != tags_left.end(); ++iter_left) {
+        for (iter_mid = tags_mid.begin(); iter_mid != tags_mid.end(); ++iter_mid) {
+          for (iter_right = tags_right.begin(); iter_right != tags_right.end(); ++iter_right) {
+            if (is_valid_seq(*iter_left, *iter_mid, *iter_right)) {
+              para_matrix[*iter_left][*iter_mid][*iter_right] += 1.0 / num_valid_seq;
+            }
+          } // for iter_right
+        } // for iter_mid
+      } // for iter_left
+    }
+
+    tags_left = tags_mid;
+    tags_mid = tags_right;
+    delete word;
+    word = morpho_stream.get_next_word();
+  } // while word != NULL
+
+  for (int i = 0; i < N; ++i) {
+    for (int j = 0; j < N; ++j) {
+      for (int k = 0; k < N; ++k) {
+        tdlsw.getD()[i][j][k] = para_matrix[i][j][k];
+      }
+    }
+  }
+
+  wcerr << L"\n";
+}
+
+bool LSWPoST::is_valid_seq(TTag left, TTag mid, TTag right) {
+
+  vector<TForbidRule> &forbid_rules = tdlsw.getForbidRules();
+  vector<TEnforceAfterRule> &enforce_rules = tdlsw.getEnforceRules();
+
+  for (size_t r = 0; r < forbid_rules.size(); ++r) {
+    if ((left == forbid_rules[r].tagi && mid == forbid_rules[r].tagj)
+        || (mid == forbid_rules[r].tagi && right == forbid_rules[r].tagj)) {
+      return false;
+    }
+  }// for r in forbid rules
+
+  for (size_t r = 0; r < enforce_rules.size(); ++r) {
+    if (left == enforce_rules[r].tagi) {
+      bool found = false;
+      for (size_t j = 0; j < enforce_rules[r].tagsj.size(); ++j) {
+        if (enforce_rules[r].tagsj[j] == mid) {
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        return false;
+      }
+    } else if (mid == enforce_rules[r].tagi) {
+      bool found = false;
+      for (size_t j = 0; j < enforce_rules[r].tagsj.size(); ++j) {
+        if (enforce_rules[r].tagsj[j] == right) {
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        return false;
+      }
+    }
+  } // for r in enforce rules
+
+  return true;
+}
+
+void
+LSWPoST::read_dictionary(FILE *fdic) {
+  tagger_utils::read_dictionary(fdic, tdlsw);
+  int N = (tdlsw.getTagIndex()).size();
+  int M = (tdlsw.getOutput()).size();
+  wcerr << N << L" states and " << M <<L" ambiguity classes\n";
+
+  // set up the probability matrix of tdlsw, the pointer to the TaggerDataLSW object
+  tdlsw.setProbabilities(N);
+}
+
+void
+LSWPoST::train(FILE *ftxt) {
+
+  int N = tdlsw.getN();
+  int nw = 0;
+  TaggerWord *word = NULL;
+  set<TTag> tags_left, tags_mid, tags_right;
+  set<TTag>::iterator iter_left, iter_mid, iter_right;
+  vector<vector<vector<double> > > para_matrix_new(N, vector<vector<double> >(N, vector<double>(N, 0)));
+  MorphoStream morpho_stream(ftxt, true, &tdlsw);
+
+  word = new TaggerWord();          // word for tags left
+  word->add_tag(eos, L"sent", tdlsw.getPreferRules());
+  tags_left = word->get_tags();     // tags left
+  if (tags_left.size()==0) { //This is an unknown word
+    tags_left = tdlsw.getOpenClass();
+  }
+  require_ambiguity_class(tdlsw, tags_left, *word, nw);
+  ++nw;
+  delete word;
+  word = morpho_stream.get_next_word();  // word for tags mid
+  tags_mid = word->get_tags();           // tags mid
+  if (tags_mid.size()==0) { //This is an unknown word
+    tags_mid = tdlsw.getOpenClass();
+  }
+  require_ambiguity_class(tdlsw, tags_mid, *word, nw);
+  ++nw;
+  delete word;
+  if (morpho_stream.getEndOfFile()) {
+    return;
+  }
+
+  word = morpho_stream.get_next_word();   // word for tags right
+
+  while (word) {
+    if (++nw % 10000 == 0) {
+      wcerr << L'.' << flush;
+    }
+
+    tags_right = word->get_tags();       // tags right
+    if (tags_right.size()==0) { //This is an unknown word
+      tags_right = tdlsw.getOpenClass();
+    }
+    require_ambiguity_class(tdlsw, tags_right, *word, nw);
+
+    double normalization = 0;
+
+    for (iter_left = tags_left.begin(); iter_left != tags_left.end(); ++iter_left) {
+      for (iter_mid = tags_mid.begin(); iter_mid != tags_mid.end(); ++iter_mid) {
+        for (iter_right = tags_right.begin(); iter_right != tags_right.end(); ++iter_right) {
+          normalization += tdlsw.getD()[*iter_left][*iter_mid][*iter_right];
+        }
+      }
+    }
+
+    for (iter_left = tags_left.begin(); iter_left != tags_left.end(); ++iter_left) {
+      for (iter_mid = tags_mid.begin(); iter_mid != tags_mid.end(); ++iter_mid) {
+        for (iter_right = tags_right.begin(); iter_right != tags_right.end(); ++iter_right) {
+          if (normalization > ZERO) {
+            para_matrix_new[*iter_left][*iter_mid][*iter_right] +=
+                tdlsw.getD()[*iter_left][*iter_mid][*iter_right] / normalization;
+          }
+        }
+      }
+    }
+
+    tags_left = tags_mid;
+    tags_mid = tags_right;
+    delete word;
+    word = morpho_stream.get_next_word();
+  }
+
+  for (int i = 0; i < N; ++i) {
+    for (int j = 0; j < N; ++j) {
+      for (int k = 0; k < N; ++k) {
+        tdlsw.getD()[i][j][k] = para_matrix_new[i][j][k];
+      }
+    }
+  }
+}
+
+void
+LSWPoST::print_para_matrix() {
+  wcout << L"para matrix D\n----------------------------\n";
+  for (int i = 0; i < tdlsw.getN(); ++i) {
+    for (int j = 0; j < tdlsw.getN(); ++j) {
+      for (int k = 0; k < tdlsw.getN(); ++k) {
+        wcout << L"D[" << i << L"][" << j << L"][" << k << L"] = "
+            << tdlsw.getD()[i][j][k] << "\n";
+      }
+    }
+  }
+}
+
+void 
+LSWPoST::tagger(FILE *Input, FILE *Output, const bool &First) {
+  TaggerWord *word_left = NULL, *word_mid = NULL, *word_right = NULL;
+  set<TTag> tags_left, tags_mid, tags_right;
+  set<TTag>::iterator iter_left, iter_mid, iter_right;
+  MorphoStream morpho_stream(Input, debug, &tdlsw);
+  morpho_stream.setNullFlush(null_flush);                      
+ 
+  word_left = new TaggerWord();          // word left
+  word_left->add_tag(eos, L"sent", tdlsw.getPreferRules());
+  word_left->set_show_sf(show_sf);
+  tags_left = word_left->get_tags();          // tags left
+
+  warn_absent_ambiguity_class(tdlsw, tags_left, *word_left, debug);
+  word_mid = morpho_stream.get_next_word(); // word mid
+  word_mid->set_show_sf(show_sf);
+  tags_mid = word_mid->get_tags();          // tags mid
+
+  warn_absent_ambiguity_class(tdlsw, tags_mid, *word_mid, debug);
+  if (morpho_stream.getEndOfFile()) {
+    delete word_left;
+    delete word_mid;
+    return;
+  }
+  word_right = morpho_stream.get_next_word(); // word_right
+  word_right->set_show_sf(show_sf);
+
+  wstring micad;
+
+  while (word_right) {
+    tags_right = word_right->get_tags();
+    warn_absent_ambiguity_class(tdlsw, tags_right, *word_right, debug);
+
+    double max = -1;
+    TTag tag_max = *tags_mid.begin();
+    for (iter_mid = tags_mid.begin(); iter_mid != tags_mid.end(); ++iter_mid) {
+      double n = 0;
+      for (iter_left = tags_left.begin(); iter_left != tags_left.end(); ++iter_left) {
+        for (iter_right = tags_right.begin(); iter_right != tags_right.end(); ++iter_right) {
+          n += tdlsw.getD()[*iter_left][*iter_mid][*iter_right];
+        }
+      }
+      if (n > max) {
+        max = n;
+        tag_max = *iter_mid;
+      }
+    }
+
+    micad = word_mid->get_lexical_form(tag_max, (tdlsw.getTagIndex())[L"TAG_kEOF"]);
+    fputws_unlocked(micad.c_str(), Output);
+    if (morpho_stream.getEndOfFile()) {
+      if (null_flush) {
+        fputwc_unlocked(L'\0', Output);
+      }
+      fflush(Output);
+      morpho_stream.setEndOfFile(false);
+    }
+  
+    delete word_left;
+    word_left = word_mid;
+    tags_left = tags_mid;
+    word_mid = word_right;
+    tags_mid = tags_right;
+    word_right = morpho_stream.get_next_word();
+    if (word_right != NULL) {
+      word_right->set_show_sf(show_sf);
+    }
+  }
+  delete word_left;
+  delete word_mid;
+}
Index: branches/apertium-tagger/apertium2/apertium/tagger_utils.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger_utils.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger_utils.cc	(revision 69632)
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <apertium/tagger_utils.h>
+#include <apertium/morpho_stream.h>
+
+#include <stdio.h>
+#include <sstream>
+#include <algorithm>
+#include <climits>
+#include <apertium/string_utils.h>
+#ifdef _MSC_VER
+#define wcstok wcstok_s
+#endif
+#ifdef __MINGW32__
+
+wchar_t *_wcstok(wchar_t *wcs, const wchar_t *delim, wchar_t **ptr) {
+  (void)ptr;
+  return wcstok(wcs, delim);
+}
+
+#define wcstok _wcstok
+#endif
+
+using namespace Apertium;
+
+
+void tagger_utils::fatal_error (wstring const &s) {
+  wcerr<<L"Error: "<<s<<L"\n";
+  exit(1);
+}
+
+void tagger_utils::file_name_error (string const &s) { 
+  cerr << "Error: " << s << endl;
+  exit(1);
+}
+
+char * tagger_utils::itoa(int i) {                 
+  static char buf[512];
+  sprintf(buf,"%d",i);
+  return buf;
+}
+
+void tagger_utils::clear_array_double(double a[], int l) {
+  for(int i=0; i<l; i++)
+    a[i]=0.0;
+}
+
+void tagger_utils::clear_array_vector(vector<TTag> v[], int l) {
+  for(int i=0; i<l; i++)
+    v[i].clear();
+}
+
+int tagger_utils::ntokens_multiword(wstring const &s) 
+{
+   wchar_t *news = new wchar_t[s.size()+1];
+   wcscpy(news, s.c_str());
+   news[s.size()] = 0;
+   wcerr << news << endl;
+   
+   wchar_t const *delim = L"_";
+   wchar_t *ptr;
+   int n=0;
+   
+   if (wcstok(news, delim, &ptr))
+     n++;  
+   while (wcstok(NULL, delim, &ptr))
+     n++;
+     
+   delete[] news;
+     
+   return n;   
+}
+ 
+int tagger_utils::nguiones_fs(wstring const & s) {
+   wchar_t *news = new wchar_t[s.size()+1];
+   wcscpy(news, s.c_str());
+   news[s.size()] = 0;
+   wcerr << news << endl;   
+   wchar_t const *delim = L"-";
+   wchar_t *ptr;
+   int n=0;
+   
+   if (wcstok(news, delim, &ptr))
+     n++;  
+   while (wcstok(NULL, delim, &ptr))
+     n++;
+     
+   delete[] news;
+     
+   return n;   
+} 
+
+wstring tagger_utils::trim(wstring s) 
+{
+  if (s.length()==0)
+    return L"";
+      
+  for (unsigned int i=0; i<(s.length()-1); i++) {
+    if ((s.at(i)==L' ')&&(s.at(i+1)==L' ')) {
+      s.erase(i,1);
+      i--;
+    }
+  }
+                              
+  if ((s.length()>0)&&(s.at(s.length()-1)==L' '))
+    s.erase(s.length()-1,1);
+  if ((s.length()>0)&&(s.at(0)==L' '))
+    s.erase(0,1);  
+
+  return s;
+}
+
+void
+tagger_utils::read_dictionary(FILE *fdic, TaggerData &td) {
+  int i, k, nw = 0;
+  TaggerWord *word = NULL;
+  set <TTag> tags;
+  Collection &output = td.getOutput();
+
+  MorphoStream morpho_stream(fdic, true, &td);
+
+  // In the input dictionary there must be all punctuation marks, including the end-of-sentece mark
+
+  word = morpho_stream.get_next_word();
+
+  while (word) {
+    if (++nw % 10000 == 0)
+      wcerr << L'.' << flush;
+
+    tags = word->get_tags();
+
+    if (tags.size() > 0)
+      k = output[tags];
+
+    delete word;
+    word = morpho_stream.get_next_word();
+  }
+  wcerr << L"\n";
+
+  // OPEN AMBIGUITY CLASS
+  // It contains all tags that are not closed.
+  // Unknown words are assigned the open ambiguity class
+  k = output[td.getOpenClass()];
+
+  // Create ambiguity class holding one single tag for each tag.
+  // If not created yet
+  int N = (td.getTagIndex()).size();
+  for(i = 0; i != N; i++) {
+    set<TTag> amb_class;
+    amb_class.insert(i);
+    k = output[amb_class];
+  }
+}
+
+set<TTag>
+tagger_utils::find_similar_ambiguity_class(TaggerData &td, set<TTag> &c) {
+  set<TTag> &ret = td.getOpenClass();
+  Collection &output = td.getOutput();
+
+  for (int k=0; k<output.size(); k++) {
+    const set<TTag> &ambg_class = output[k];
+    if (ambg_class.size() >= ret.size()) {
+      continue;
+    }
+    if (includes(ambg_class.begin(), ambg_class.end(), c.begin(), c.end())) {
+      ret = ambg_class;
+    }
+  }
+  return ret;
+}
+
+void
+tagger_utils::require_ambiguity_class(TaggerData &td, set<TTag> &tags, TaggerWord &word, int nw) {
+  if (td.getOutput().has_not(tags)) {
+    wstring errors;
+    errors = L"A new ambiguity class was found. I cannot continue.\n";
+    errors+= L"Word '" + word.get_superficial_form() + L"' not found in the dictionary.\n";
+    errors+= L"New ambiguity class: " + word.get_string_tags() + L"\n";
+    if (nw >= 0) {
+      std::wostringstream ws;
+      ws << (nw + 1);
+      errors+= L"Line number: " + ws.str() + L"\n";
+    }
+    errors+= L"Take a look at the dictionary, then retrain.";
+    fatal_error(errors);
+  }
+}
+
+static void _warn_absent_ambiguity_class(TaggerWord &word) {
+  wstring errors;
+  errors = L"A new ambiguity class was found. \n";
+  errors += L"Retraining the tagger is necessary so as to take it into account.\n";
+  errors += L"Word '" + word.get_superficial_form() + L"'.\n";
+  errors += L"New ambiguity class: " + word.get_string_tags() + L"\n";
+  wcerr << L"Error: " << errors;
+}
+
+set<TTag>
+tagger_utils::require_similar_ambiguity_class(TaggerData &td, set<TTag> &tags, TaggerWord &word, bool debug) {
+  if (td.getOutput().has_not(tags)) {
+    if (debug) {
+      _warn_absent_ambiguity_class(word);
+    }
+    return find_similar_ambiguity_class(td, tags);
+  }
+  return tags;
+}
+
+void
+tagger_utils::warn_absent_ambiguity_class(TaggerData &td, set<TTag> &tags, TaggerWord &word, bool debug) {
+  if (td.getOutput().has_not(tags) && debug) {
+    _warn_absent_ambiguity_class(word);
+  }
+}
+
+template <class T>
+ostream& operator<< (ostream& os, const map <int, T> & f){
+  typename map <int, T>::const_iterator it;
+  os<<f.size();
+  for (it=f.begin(); it!=f.end(); it++) 
+    os<<' '<<it->first<<' '<<it->second;
+  return os;
+}
+
+template <class T>
+istream& operator>> (istream& is, map <int, T> & f) {
+  int n, i, k;
+  f.clear();
+  is>>n; 
+  for (k=0; k<n; k++) {
+    is>>i;     // warning: does not work if both
+    is>>f[i];  // lines merged in a single one
+  }
+  if (is.bad()) tagger_utils::fatal_error(L"reading map");
+  return is;
+}
+
+template <class T>
+ostream& operator<< (ostream& os, const set<T>& s) {
+  typename set<T>::iterator it = s.begin();
+  os<<'{';
+  if (it!=s.end()) {
+    os<<*it;
+    while (++it!=s.end()) os<<','<<*it;
+  }
+  os<<'}';
+  return os;
+}
+
Index: branches/apertium-tagger/apertium2/apertium/tagger_utils.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger_utils.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger_utils.h	(revision 69632)
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __TAGGERUTILS_H
+#define __TAGGERUTILS_H
+
+#include <map>
+#include <string>
+#include <set>
+#include <fstream>
+#include <iostream>
+#include <vector>
+#include <apertium/ttag.h>
+#include <cstdlib>
+#include <apertium/tagger_data.h>
+#include <apertium/tagger_word.h>
+
+using namespace std;
+
+namespace tagger_utils
+{
+/** Print a fatal error message
+ *  @param s the error message to print
+ */
+void fatal_error (wstring const &s);
+
+/** Print a fatal error message related to a file
+ *  @param s the file name to be printted in the error message
+ */
+void file_name_error (string const &s);
+
+/** Convert from int to string
+ *  @param i the int value to convert
+ *  @return an string representing the number recived as input
+ */
+char *itoa(int i);
+
+/** Make all array positions equal to zero
+ *  @param a the array
+ *  @param l length of the array a
+ */
+void clear_array_double(double a[], int l);
+
+/** Clear all vectors stored in array v
+ *  @param v array of vectors
+ *  @param l length of the array v
+ */
+void clear_array_vector(vector<TTag> v[], int l);
+
+/** Return the number of tokens in the multiword unit
+ */
+ int ntokens_multiword(wstring const &s);
+ 
+/** Devuelve el n� de guiones que contiene la cadena pasada como argumento
+  */
+int nguiones_fs(wstring const &cadena);
+
+/** Reads the expanded dictionary received as a parameter puts the resulting
+ *  ambiguity classes that the tagger will manage.
+ *  @param fdic the input stream with the expanded dictionary to read
+ *  @param td the tagger data instance to mutate
+ */
+void read_dictionary(FILE *fdic, TaggerData &td);
+
+/** This method returns a known ambiguity class that is a subset of
+*  the one received as a parameter. This is useful when a new
+*  ambiguity class is found because of changes in the morphological
+*  dictionary used by the MT system.
+*  @param c set of tags (ambiguity class)
+*  @return a known ambiguity class
+*/
+set<TTag> find_similar_ambiguity_class(TaggerData &td, set<TTag> &c);
+
+/** Dies with an error message if the tags aren't in the tagger data */
+void require_ambiguity_class(TaggerData &td, set<TTag> &tags, TaggerWord &word, int nw);
+
+/** As with find_similar_ambiguity_class, but returns tags if it's already fine
+ * & prints a warning if debug */
+set<TTag> require_similar_ambiguity_class(TaggerData &td, set<TTag> &tags, TaggerWord &word, bool debug);
+
+/** Just prints a warning if debug */
+void warn_absent_ambiguity_class(TaggerData &td, set<TTag> &tags, TaggerWord &word, bool debug);
+
+wstring trim(wstring s);
+
+};
+
+template <class T>
+ostream& operator<< (ostream& os, const map <int, T> & f);
+template <class T>
+istream& operator>> (istream& is, map <int, T> & f);
+template <class T>
+ostream& operator<< (ostream& os, const set<T>& s);
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/hmm.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/hmm.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/hmm.cc	(revision 69632)
@@ -0,0 +1,872 @@
+
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+/*
+ *  First order hidden Markov model (HMM) implementation (source)
+ *
+ *  @author	Felipe Sánchez-Martínez - fsanchez@dlsi.ua.es
+ */
+
+#include <apertium/hmm.h>
+#include <apertium/tagger_utils.h>
+#include  "apertium_config.h"
+#include <apertium/unlocked_cstdio.h>
+#include <lttoolbox/compression.h>
+
+#ifdef WIN32
+#define isnan(n) _isnan(n)
+#define isinf(n) (!_finite(n))
+#endif
+
+#ifdef __clang__
+#undef __GNUC__
+#endif
+
+#include <stdio.h>
+#include <unistd.h>
+#include <vector>
+#include <algorithm>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+using namespace tagger_utils;
+
+void HMM::deserialise(FILE *Serialised_FILE_Tagger) {
+  tdhmm.read(Serialised_FILE_Tagger);
+  eos = (tdhmm.getTagIndex())[L"TAG_SENT"];
+}
+
+std::vector<std::wstring> &HMM::getArrayTags() {
+  return tdhmm.getArrayTags();
+}
+
+void HMM::serialise(FILE *Stream_) { tdhmm.write(Stream_); }
+
+void HMM::deserialise(const TaggerData &Deserialised_FILE_Tagger) {
+  tdhmm = TaggerDataHMM(Deserialised_FILE_Tagger);
+  eos = (tdhmm.getTagIndex())[L"TAG_SENT"];
+}
+
+void HMM::init_probabilities_from_tagged_text_(FILE *TaggedCorpus,
+                                               FILE *UntaggedCorpus) {
+  init_probabilities_from_tagged_text(TaggedCorpus, UntaggedCorpus);
+  apply_rules();
+}
+
+void HMM::init_probabilities_kupiec_(FILE *Corpus) {
+  init_probabilities_kupiec(Corpus);
+  apply_rules();
+}
+
+void HMM::train(FILE *Corpus, unsigned long Count) {
+  for (; Count > 0; --Count) {
+    std::fseek(Corpus, 0, SEEK_SET);
+    train(Corpus);
+  }
+
+  apply_rules();
+}
+
+HMM::HMM() {}
+
+HMM::HMM(TaggerDataHMM tdhmm)
+{
+  tdhmm = tdhmm;
+  eos = (tdhmm.getTagIndex())[L"TAG_SENT"];  
+}
+
+HMM::HMM(TaggerDataHMM *tdhmm) : tdhmm(*tdhmm) {}
+
+HMM::~HMM() {}
+
+void
+HMM::init()
+{
+}
+
+void
+HMM::set_eos(TTag t) 
+{ 
+  eos = t; 
+} 
+
+void 
+HMM::read_ambiguity_classes(FILE *in) 
+{
+  while(in)
+  {
+    int ntags = Compression::multibyte_read(in);
+
+    if(feof(in))
+    {
+      break;
+    }
+    set<TTag> ambiguity_class;
+
+    for(; ntags != 0; ntags--)
+    {
+      ambiguity_class.insert(Compression::multibyte_read(in));
+    }
+    
+    if(ambiguity_class.size() != 0)
+    {
+      tdhmm.getOutput().add(ambiguity_class);
+    }     
+  }
+  
+  tdhmm.setProbabilities(tdhmm.getTagIndex().size(), tdhmm.getOutput().size());
+}
+
+void 
+HMM::write_ambiguity_classes(FILE *out) 
+{
+  for(int i=0, limit = tdhmm.getOutput().size(); i != limit; i++) 
+  {
+    set<TTag> const &ac = (tdhmm.getOutput())[i];
+    Compression::multibyte_write(ac.size(), out);
+    for(set<TTag>::const_iterator it = ac.begin(), limit2 = ac.end();
+        it != limit2; it++)
+    {
+      Compression::multibyte_write(*it, out);
+    }
+  } 
+}  
+
+void 
+HMM::read_probabilities(FILE *in)
+{
+  tdhmm.read(in);
+}
+
+void 
+HMM::write_probabilities(FILE *out)
+{
+  tdhmm.write(out);  
+}  
+
+void 
+HMM::init_probabilities_kupiec (FILE *is)
+{
+  int N = tdhmm.getN();
+  int M = tdhmm.getM();
+  int i=0, j=0, k=0, k1=0, k2=0, nw=0;
+#ifdef __GNUC__
+  double classes_ocurrences[M]; //M = Number of ambiguity classes
+  double classes_pair_ocurrences[M][M];
+  double tags_estimate[N]; //N = Number of tags (states)
+  double tags_pair_estimate[N][N];
+#else
+  vector <double> classes_ocurrences (M, 1);
+  vector <vector <double> > classes_pair_ocurrences(M, vector<double>(M, 1));
+  vector <double> tags_estimate(N, 0);
+  vector <vector <double> > tags_pair_estimate(N, vector<double>(N, 0));
+#endif
+  
+  Collection &output = tdhmm.getOutput();
+ 
+  MorphoStream lexmorfo(is, true, &tdhmm);
+  
+  TaggerWord *word=NULL;
+
+#ifdef __GNUC__
+  for(k=0; k<M; k++) {
+    classes_ocurrences[k]=1; 
+    for (k2=0; k2<M; k2++)
+      classes_pair_ocurrences[k][k2]=1;
+  }
+#endif
+
+  set<TTag> tags;
+  tags.insert(eos);  
+  k1=output[tags]; //The first tag (ambiguity class) seen is the end-of-sentence
+  
+  //We count for each ambiguity class the number of ocurrences
+  word = lexmorfo.get_next_word();
+  while((word)) {
+    if (++nw%10000==0) wcerr<<L'.'<<flush; 
+    
+    tags=word->get_tags();
+
+    if (tags.size()==0) { //This is an unknown word
+      tags = tdhmm.getOpenClass();
+    }
+    else {
+      require_ambiguity_class(tdhmm, tags, *word, nw);
+    }
+
+    k2=output[tags];
+
+    classes_ocurrences[k1]++;
+    classes_pair_ocurrences[k1][k2]++;  //k1 followed by k2
+    delete word;
+    word=lexmorfo.get_next_word();
+
+    k1=k2;
+
+  }  
+
+  //Estimation of the number of time each tags occurs in the training text
+  for(i=0; i<N; i++) {  
+#ifdef __GNUC__
+    tags_estimate[i]=0;
+#endif
+    for(k=0; k<M;  k++) { 
+  
+      if(output[k].find(i) != output[k].end())
+        tags_estimate[i] += classes_ocurrences[k]/output[k].size();	
+    }
+  }
+  
+#ifdef __GNUC__
+  //Estimation of the number of times each tag pair occurs
+  for(i=0; i<N; i++)
+    for(j=0; j<N; j++)
+      tags_pair_estimate[i][j]=0;
+#endif
+
+  set<TTag> tags1, tags2;
+  set<TTag>::iterator itag1, itag2;
+  for(k1=0; k1<M; k1++) {
+    tags1=output[k1];
+    for(k2=0; k2<M; k2++) {
+      tags2=output[k2];
+      double nocurrences=classes_pair_ocurrences[k1][k2]/((double)(tags1.size()*tags2.size()));
+      for (itag1=tags1.begin(); itag1!=tags1.end(); itag1++) {
+        for (itag2=tags2.begin(); itag2!=tags2.end(); itag2++)
+          tags_pair_estimate[*itag1][*itag2]+=nocurrences;
+      }
+    }
+  }
+
+   //a[i][j] estimation.
+  double sum;
+  for(i=0; i<N; i++) {
+    sum=0;
+    for(j=0; j<N; j++)
+      sum+=tags_pair_estimate[i][j];
+
+    for(j=0; j<N; j++) {  
+      if (sum>0)
+        (tdhmm.getA())[i][j] = tags_pair_estimate[i][j]/sum;
+      else {
+        (tdhmm.getA())[i][j] = 0;
+      }
+    }
+  }
+
+  //b[i][k] estimation
+  for(i=0; i<N; i++) {
+    for(k=0; k<M; k++)  {
+      if (output[k].find(i)!=output[k].end()) {
+        if (tags_estimate[i]>0)
+          (tdhmm.getB())[i][k] = (classes_ocurrences[k]/output[k].size())/tags_estimate[i];
+        else 
+	  (tdhmm.getB())[i][k] = 0;
+      }
+    }
+  }
+  wcerr<<L"\n";
+}
+
+void 
+HMM::init_probabilities_from_tagged_text(FILE *ftagged, FILE *funtagged) {
+  int i, j, k, nw=0;
+  int N = tdhmm.getN();
+  int M = tdhmm.getM();
+#ifdef __GNUC__
+  double tags_pair[N][N];
+  double emission[N][M];
+#else
+  vector <vector <double> > tags_pair(N, vector<double>(N, 0));
+  vector <vector <double> > emission(N, vector<double>(M, 0));
+#endif
+
+
+  MorphoStream stream_tagged(ftagged, true, &tdhmm);
+  MorphoStream stream_untagged(funtagged, true, &tdhmm);
+  
+  TaggerWord *word_tagged=NULL, *word_untagged=NULL;
+  Collection &output = tdhmm.getOutput();
+
+  
+  set<TTag> tags;
+
+#ifdef __GNUC__ 
+  // Init counters - each event appears at least once. 
+  // Espected likelihood estimate (ELE) with a fixed initial count of 1
+  for(i=0; i<N; i++) {
+    for(j=0; j<N; j++)
+      tags_pair[i][j]=0;
+  }
+  for(k=0; k<M; k++) {
+    for(i=0; i<N; i++) {
+      if (output[k].find(i)!=output[k].end())
+        emission[i][k] = 0;
+    }  
+  }
+#endif 
+
+  TTag tag1, tag2;  
+  tag1 = eos; // The first seen tag is the end-of-sentence tag
+  
+  word_tagged = stream_tagged.get_next_word();
+  word_untagged = stream_untagged.get_next_word();
+  while(word_tagged) {
+    wcerr<<*word_tagged;
+    wcerr<<L" -- "<<*word_untagged<<L"\n"; 
+
+    if (word_tagged->get_superficial_form()!=word_untagged->get_superficial_form()) {              
+      wcerr<<L"\nTagged text (.tagged) and analyzed text (.untagged) streams are not aligned.\n";
+      wcerr<<L"Take a look at tagged text (.tagged).\n";
+      wcerr<<L"Perhaps this is caused by a multiword unit that is not a multiword unit in one of the two files.\n";
+      wcerr<<*word_tagged<<L" -- "<<*word_untagged<<L"\n"; 
+      exit(1);
+    }
+
+    if (++nw%100==0) wcerr<<L'.'<<flush; 
+    
+    tag2 = tag1;
+   
+    if (word_untagged==NULL) {
+      wcerr<<L"word_untagged==NULL\n";
+      exit(1);
+    }
+
+    if (word_tagged->get_tags().size()==0) // Unknown word
+      tag1 = -1;
+    else if (word_tagged->get_tags().size()>1) // Ambiguous word
+      wcerr<<L"Error in tagged text. An ambiguous word was found: "<<word_tagged->get_superficial_form()<<L"\n";
+    else
+      tag1 = *(word_tagged->get_tags()).begin();
+
+
+    if ((tag1>=0) && (tag2>=0))
+      tags_pair[tag2][tag1]++;
+    
+
+    if (word_untagged->get_tags().size()==0) { // Unknown word
+      tags = tdhmm.getOpenClass();
+    }
+    else {
+      require_ambiguity_class(tdhmm, word_untagged->get_tags(), *word_untagged, nw);
+      tags = word_untagged->get_tags();
+    }
+
+    k=output[tags];
+    if(tag1>=0)
+      emission[tag1][k]++;
+                   
+    delete word_tagged;
+    word_tagged=stream_tagged.get_next_word();
+    delete word_untagged;
+    word_untagged=stream_untagged.get_next_word();       
+  }
+  
+  
+  //Estimate of a[i][j]
+  for(i=0; i<N; i++) {
+    double sum=0;
+    for(j=0; j<N; j++)  
+      sum += tags_pair[i][j]+1.0;
+    for(j=0; j<N; j++)  
+      (tdhmm.getA())[i][j] = (tags_pair[i][j]+1.0)/sum;
+  }
+    
+  
+  //Estimate of b[i][k]
+  for(i=0; i<N; i++) {
+    int nclasses_appear=0;
+    double times_appear=0.0;
+    for(k=0; k<M; k++)  {
+      if (output[k].find(i)!=output[k].end())  {
+	nclasses_appear++;	
+	times_appear+=emission[i][k];
+      }
+    }	      
+    for(k=0; k<M; k++)  {
+      if (output[k].find(i)!=output[k].end())
+	(tdhmm.getB())[i][k] = (emission[i][k]+(((double)1.0)/((double)nclasses_appear)))/(times_appear+((double)1.0));
+    }
+   }
+  
+  wcerr<<L"\n";  
+}
+  
+void
+HMM::apply_rules()
+{
+  vector<TForbidRule> &forbid_rules = tdhmm.getForbidRules();
+  vector<TEnforceAfterRule> &enforce_rules = tdhmm.getEnforceRules();
+  int N = tdhmm.getN();
+  int i, j, j2;
+  bool found;
+   
+  for(i=0; i<(int) forbid_rules.size(); i++) {
+    (tdhmm.getA())[forbid_rules[i].tagi][forbid_rules[i].tagj] = ZERO;
+  }
+
+  for(i=0; i<(int) enforce_rules.size(); i++) {
+    for(j=0; j<N; j++) {
+      found = false;
+      for (j2=0; j2<(int) enforce_rules[i].tagsj.size(); j2++) {
+	if (enforce_rules[i].tagsj[j2]==j) {
+	  found = true;
+	  break;
+	}	  
+      }
+      if (!found)
+        (tdhmm.getA())[enforce_rules[i].tagi][j] = ZERO;
+    }
+  }
+    
+  // Normalize probabilities
+  for(i=0; i<N; i++) {
+    double sum=0;
+    for(j=0; j<N; j++) 
+      sum += (tdhmm.getA())[i][j];
+    for(j=0; j<N; j++) {
+      if (sum>0)
+	(tdhmm.getA())[i][j] = (tdhmm.getA())[i][j]/sum;
+      else
+	(tdhmm.getA())[i][j] = 0;
+    }
+  }
+}
+
+void 
+HMM::read_dictionary(FILE *fdic) {
+  tagger_utils::read_dictionary(fdic, tdhmm);
+  int N = (tdhmm.getTagIndex()).size();
+  int M = (tdhmm.getOutput()).size();
+  wcerr << N << L" states and " << M <<L" ambiguity classes\n";
+
+  tdhmm.setProbabilities(N, M);
+}
+
+void
+HMM::filter_ambiguity_classes(FILE *in, FILE *out) {
+  set<set<TTag> > ambiguity_classes;
+  MorphoStream morpho_stream(in, true, &tdhmm);
+  
+  TaggerWord *word = morpho_stream.get_next_word();
+  
+  while(word) {
+    set<TTag> tags = word->get_tags();
+    if(tags.size() > 0) {     
+      if(ambiguity_classes.find(tags) == ambiguity_classes.end()) {
+	    ambiguity_classes.insert(tags);
+	    word->outputOriginal(out);
+	    //wcerr<<word->get_string_tags()<<L"\n";
+      }
+    }
+    delete word;
+    word = morpho_stream.get_next_word();
+  }
+}
+
+void 
+HMM::train (FILE *ftxt) {
+  int i, j, k, t, len, nw = 0;
+  TaggerWord *word=NULL;
+  TTag tag; 
+  set<TTag> tags, pretags;
+  set<TTag>::iterator itag, jtag;
+  map <int, double> gamma;
+  map <int, double>::iterator jt, kt;
+  map < int, map <int, double> > alpha, beta, xsi, phi;
+  map < int, map <int, double> >::iterator it;
+  double prob, loli;              
+  vector < set<TTag> > pending;
+  Collection &output = tdhmm.getOutput();
+  
+  int ndesconocidas=0;
+  // alpha => forward probabilities
+  // beta  => backward probabilities
+  
+  MorphoStream morpho_stream(ftxt, true, &tdhmm);
+
+  loli = 0;
+  tag = eos;
+  tags.clear();
+  tags.insert(tag);
+  pending.push_back(tags);
+
+  alpha[0].clear();      
+  alpha[0][tag] = 1;
+
+  word = morpho_stream.get_next_word();
+
+  while (word) {   
+
+    //wcerr<<L"Enter para continuar\n";
+    //getchar();
+
+    if (++nw%10000==0) wcerr<<L'.'<<flush;
+
+    //wcerr<<*word<<L"\n";
+
+    pretags = pending.back();
+
+    tags = word->get_tags();    
+    
+    if (tags.size()==0) { // This is an unknown word
+      tags = tdhmm.getOpenClass();
+      ndesconocidas++;
+    }
+    
+    require_ambiguity_class(tdhmm, tags, *word, nw);
+    
+    k = output[tags];    
+    len = pending.size();
+    alpha[len].clear();     
+      
+    //Forward probabilities
+    for (itag=tags.begin(); itag!=tags.end(); itag++) {
+      i=*itag;
+      for (jtag=pretags.begin(); jtag!=pretags.end(); jtag++) {
+         j=*jtag;
+         //cerr<<"previous alpha["<<len<<"]["<<i<<"]="<<alpha[len][i]<<"\n";
+	 //cerr<<"alpha["<<len-1<<"]["<<j<<"]="<<alpha[len-1][j]<<"\n";
+         //cerr<<"a["<<j<<"]["<<i<<"]="<<a[j][i]<<"\n";
+         //cerr<<"b["<<i<<"]["<<k<<"]="<<b[i][k]<<"\n";
+	 alpha[len][i] += alpha[len-1][j]*(tdhmm.getA())[j][i]*(tdhmm.getB())[i][k];
+      }
+      if (alpha[len][i]==0)
+        alpha[len][i]=DBL_MIN;
+      //cerr<<"alpha["<<len<<"]["<<i<<"]="<<alpha[len][i]<<"\n--------\n";
+    }
+
+    if (tags.size()>1) {
+      pending.push_back(tags);
+    } else {  // word is unambiguous
+      tag = *tags.begin(); 
+      beta[0].clear();
+      beta[0][tag] = 1;   
+      
+      prob = alpha[len][tag];
+      
+      //cerr<<"prob="<<prob<<"\n";
+      //cerr<<"alpha["<<len<<"]["<<tag<<"]="<<alpha[len][tag]<<"\n";
+      loli -= log(prob);  
+      
+      for (t=0; t<len; t++) {  // loop from T-1 to 0	
+	  pretags = pending.back();
+	  pending.pop_back();
+   	  k = output[tags];
+	     beta[1-t%2].clear();
+	     for (itag=tags.begin(); itag!=tags.end(); itag++) {
+	       i=*itag;
+	       for (jtag=pretags.begin(); jtag!=pretags.end(); jtag++) {
+	         j = *jtag;	      
+	         beta[1-t%2][j] += (tdhmm.getA())[j][i]*(tdhmm.getB())[i][k]*beta[t%2][i];
+	         xsi[j][i] += alpha[len-t-1][j]*(tdhmm.getA())[j][i]*(tdhmm.getB())[i][k]*beta[t%2][i]/prob;
+	       }
+	       double previous_value = gamma[i];
+       
+	       gamma[i] +=  alpha[len-t][i]*beta[t%2][i]/prob;		       
+	       if (isnan(gamma[i])) {
+	          wcerr<<L"NAN(3) gamma["<<i<<L"] = "<<gamma[i]<<L" alpha["<<len-t<<L"]["<<i<<L"]= "<<alpha[len-t][i]
+	               <<L" beta["<<t%2<<L"]["<<i<<L"] = "<<beta[t%2][i]<<L" prob = "<<prob<<L" previous gamma = "<<previous_value<<L"\n";
+	          exit(1);	               
+	       }
+	       if (isinf(gamma[i])) {
+	          wcerr<<L"INF(3) gamma["<<i<<L"] = "<<gamma[i]<<L" alpha["<<len-t<<L"]["<<i<<L"]= "<<alpha[len-t][i]
+	               <<L" beta["<<t%2<<L"]["<<i<<L"] = "<<beta[t%2][i]<<L" prob = "<<prob<<L" previous gamma = "<<previous_value<<L"\n";
+	          exit(1);	               
+	       }
+	       if (gamma[i]==0) {
+	          //cout<<"ZERO(3) gamma["<<i<<"] = "<<gamma[i]<<" alpha["<<len-t<<"]["<<i<<"]= "<<alpha[len-t][i]
+	          //    <<" beta["<<t%2<<"]["<<i<<"] = "<<beta[t%2][i]<<" prob = "<<prob<<" previous gamma = "<<previous_value<<"\n";
+	          gamma[i]=DBL_MIN;
+	          //exit(1);	               
+	       }
+	        phi[i][k] += alpha[len-t][i]*beta[t%2][i]/prob;
+	     }
+	     tags=pretags;
+      }
+	
+      tags.clear();
+      tags.insert(tag);
+      pending.push_back(tags);
+      alpha[0].clear();
+      alpha[0][tag] = 1;
+    }
+    
+    delete word; 
+    word = morpho_stream.get_next_word();
+  }  
+
+  if ((pending.size()>1) || ((tag!=eos)&&(tag != (tdhmm.getTagIndex())[L"TAG_kEOF"]))) 
+    wcerr<<L"Warning: Thee las tag is not the end-of-sentence-tag\n";
+  
+  
+  int N = tdhmm.getN();
+  int M = tdhmm.getM();
+  
+  //Clean previous values  
+  for(i=0; i<N; i++) {
+     for(j=0; j<N; j++)
+        (tdhmm.getA())[i][j]=ZERO;
+     for(k=0; k<M; k++)
+        (tdhmm.getB())[i][k]=ZERO;
+  }
+  
+  // new parameters
+  for (it=xsi.begin(); it!=xsi.end(); it++) {
+    i = it->first;
+    for (jt=xsi[i].begin(); jt!=xsi[i].end(); jt++) {
+      j = jt->first;
+      if (xsi[i][j]>0) {        
+        if (gamma[i]==0) {
+          wcerr<<L"Warning: gamma["<<i<<L"]=0\n";
+          gamma[i]=DBL_MIN;
+        }
+        
+        (tdhmm.getA())[i][j] = xsi[i][j]/gamma[i];
+	
+        if (isnan((tdhmm.getA())[i][j])) {
+          wcerr<<L"NAN\n";
+          wcerr <<L"Error: BW - NAN(1) a["<<i<<L"]["<<j<<L"]="<<(tdhmm.getA())[i][j]<<L"\txsi["<<i<<L"]["<<j<<L"]="<<xsi[i][j]<<L"\tgamma["<<i<<L"]="<<gamma[i]<<L"\n";
+	  exit(1);
+        }
+	if (isinf((tdhmm.getA())[i][j])) {
+	  wcerr<<L"INF\n"; 
+          wcerr <<L"Error: BW - INF(1) a["<<i<<L"]["<<j<<L"]="<<(tdhmm.getA())[i][j]<<L"\txsi["<<i<<L"]["<<j<<L"]="<<xsi[i][j]<<L"\tgamma["<<i<<L"]="<<gamma[i]<<L"\n";
+          exit(1);
+        }
+	if ((tdhmm.getA())[i][j]==0) {
+          //cerr <<"Error: BW - ZERO(1) a["<<i<<"]["<<j<<"]="<<(tdhmm.getA())[i][j]<<"\txsi["<<i<<"]["<<j<<"]="<<xsi[i][j]<<"\tgamma["<<i<<"]="<<gamma[i]<<"\n";
+	  //     exit(1);
+        }
+      }
+    }
+  }
+
+  for (it=phi.begin(); it!=phi.end(); it++) {
+    i = it->first;
+    for (kt=phi[i].begin(); kt!=phi[i].end(); kt++) {
+      k = kt->first;
+      if (phi[i][k]>0) {
+        (tdhmm.getB())[i][k] = phi[i][k]/gamma[i];	
+        
+	if (isnan((tdhmm.getB())[i][k])) {
+          wcerr<<L"Error: BW - NAN(2) b["<<i<<L"]["<<k<<L"]="<<(tdhmm.getB())[i][k]<<L"\tphi["<<i<<L"]["<<k<<L"]="<<phi[i][k]<<L"\tgamma["<<i<<L"]="<<gamma[i]<<L"\n";
+	       exit(1);
+        }
+	if (isinf((tdhmm.getB())[i][k])) {
+          wcerr<<L"Error: BW - INF(2) b["<<i<<L"]["<<k<<L"]="<<(tdhmm.getB())[i][k]<<L"\tphi["<<i<<L"]["<<k<<L"]="<<phi[i][k]<<L"\tgamma["<<i<<L"]="<<gamma[i]<<L"\n";
+	       exit(1);
+        }
+	if ((tdhmm.getB())[i][k]==0) {
+          //cerr <<"Error: BW - ZERO(2) b["<<i<<"]["<<k<<"]="<<(tdhmm.getB())[i][k]<<"\tphi["<<i<<"]["<<k<<"]="<<phi[i][k]<<"\tgamma["<<i<<"]="<<gamma[i]<<"\n";
+	  //     exit(1);
+        }
+      }
+    }
+  }
+
+  //It can be possible that a probability is not updated
+  //We normalize the probabilitites
+  for(i=0; i<N; i++) {
+    double sum=0;
+    for(j=0; j<N; j++)
+      sum+=(tdhmm.getA())[i][j];
+    for(j=0; j<N; j++)
+      (tdhmm.getA())[i][j]=(tdhmm.getA())[i][j]/sum;
+  }
+
+  for(i=0; i<N; i++) {
+    double sum=0;
+    for(k=0; k<M; k++) {
+      if(output[k].find(i)!=output[k].end())
+        sum+=(tdhmm.getB())[i][k];
+    }
+    for(k=0; k<M; k++) {
+      if(output[k].find(i)!=output[k].end())
+        (tdhmm.getB())[i][k]=(tdhmm.getB())[i][k]/sum;
+    }
+  }
+
+  wcerr<<L"Log="<<loli<<L"\n";
+}
+
+void 
+HMM::tagger(FILE *Input, FILE *Output, const bool &First) {
+  int i, j, k, nw;
+  TaggerWord *word=NULL;
+  TTag tag;
+  
+  set <TTag> ambg_class_tags, tags, pretags;
+  set <TTag>::iterator itag, jtag;
+  
+  double prob, loli, x;
+  int N = tdhmm.getN();  
+#ifdef __GNUC__
+  double alpha[2][N];
+  vector<TTag> best[2][N];
+#else
+  vector <vector <double> > alpha(2, vector<double>(N));
+  vector <vector <vector<TTag> > > best(2, vector <vector <TTag> >(N));
+#endif
+  
+  vector <TaggerWord> wpend; 
+  int nwpend;
+  
+  MorphoStream morpho_stream(Input, debug, &tdhmm);                             
+  morpho_stream.setNullFlush(null_flush);
+  
+  Collection &output = tdhmm.getOutput();
+  
+  loli = nw = 0;
+  
+  //Initialization
+  tags.insert(eos);
+  alpha[0][eos] = 1;
+   
+  word = morpho_stream.get_next_word();
+ 
+  while (word) {
+    wpend.push_back(*word);    	    
+    nwpend = wpend.size();
+    
+    pretags = tags; // Tags from the previous word
+
+    tags = word->get_tags();
+  
+    if (tags.size()==0) // This is an unknown word
+      tags = tdhmm.getOpenClass();
+                       
+    ambg_class_tags = require_similar_ambiguity_class(tdhmm, tags, *word, debug);
+         
+    k = output[ambg_class_tags];  //Ambiguity class the word belongs to
+    
+#ifdef __GNUC__
+    clear_array_double(alpha[nwpend%2], N);    
+    clear_array_vector(best[nwpend%2], N);
+#else
+    clear_array_double(&alpha[nwpend%2][0], N);    
+    clear_array_vector(&best[nwpend%2][0], N);
+#endif
+    
+    //Induction
+    for (itag=tags.begin(); itag!=tags.end(); itag++) { //For all tag from the current word
+      i=*itag;
+      for (jtag=pretags.begin(); jtag!=pretags.end(); jtag++) {	//For all tags from the previous word
+	j=*jtag;
+	x = alpha[1-nwpend%2][j]*(tdhmm.getA())[j][i]*(tdhmm.getB())[i][k];
+	if (alpha[nwpend%2][i]<=x) {
+	  if (nwpend>1) 
+	    best[nwpend%2][i] = best[1-nwpend%2][j];
+	  best[nwpend%2][i].push_back(i);
+	  alpha[nwpend%2][i] = x;
+	}
+      }
+    }
+    
+    //Backtracking
+    if (tags.size()==1) {       
+      tag = *tags.begin();      
+      
+      prob = alpha[nwpend%2][tag];
+      
+      if (prob>0) 
+	loli -= log(prob);
+      else {
+        if (debug)
+	  wcerr<<L"Problem with word '"<<word->get_superficial_form()<<L"' "<<word->get_string_tags()<<L"\n";
+      }
+      for (unsigned t=0; t<best[nwpend%2][tag].size(); t++) {
+	if (First) {
+	  wstring const &micad = wpend[t].get_all_chosen_tag_first(best[nwpend%2][tag][t], (tdhmm.getTagIndex())[L"TAG_kEOF"]);
+	  fputws_unlocked(micad.c_str(), Output); 
+	} else {
+	  // print Output
+	  wpend[t].set_show_sf(show_sf);
+	  wstring const &micad = wpend[t].get_lexical_form(best[nwpend%2][tag][t], (tdhmm.getTagIndex())[L"TAG_kEOF"]);
+	  fputws_unlocked(micad.c_str(), Output); 
+	}
+      }
+      
+      //Return to the initial state
+      wpend.clear();   
+      alpha[0][tag] = 1;
+    }
+    
+    delete word;
+    
+    if(morpho_stream.getEndOfFile())
+    {
+      if(null_flush)
+      { 
+        fputwc_unlocked(L'\0', Output);
+        tags.clear();
+        tags.insert(eos);
+        alpha[0][eos] = 1;
+      }
+      
+      fflush(Output);
+      morpho_stream.setEndOfFile(false);
+    }
+    word = morpho_stream.get_next_word();    
+  }
+  
+  if ((tags.size()>1)&&(debug)) {
+    wstring errors;
+    errors = L"The text to disambiguate has finished, but there are ambiguous words that has not been disambiguated.\n";
+    errors+= L"This message should never appears. If you are reading this ..... these are very bad news.\n";
+    wcerr<<L"Error: "<<errors;
+  }  
+}
+
+
+void
+HMM::print_A() {
+  int i,j;
+    
+  cout<<"TRANSITION MATRIX (A)\n------------------------------\n";  
+  for(i=0; i != tdhmm.getN(); i++)
+    for(j=0; j != tdhmm.getN(); j++) {
+      cout<<"A["<<i<<"]["<<j<<"] = "<<(tdhmm.getA())[i][j]<<"\n";
+    }    
+}
+
+void
+HMM::print_B() {
+  int i,k;  
+
+  cout<<"EMISSION MATRIX (B)\n-------------------------------\n";
+  for(i=0; i != tdhmm.getN(); i++)
+    for(k=0; k != tdhmm.getM(); k++) {
+      Collection &output = tdhmm.getOutput();
+      if(output[k].find(i)!=output[k].end())
+        cout<<"B["<<i<<"]["<<k<<"] = "<<(tdhmm.getB())[i][k]<<"\n";
+    }
+}
+
+void HMM::print_ambiguity_classes() {
+  set<TTag> ambiguity_class;
+  set<TTag>::iterator itag;
+  cout<<"AMBIGUITY CLASSES\n-------------------------------\n";
+  for(int i=0; i != tdhmm.getM(); i++) {
+    ambiguity_class = (tdhmm.getOutput())[i];
+    cout <<i<<": ";
+    for (itag=ambiguity_class.begin(); itag!=ambiguity_class.end(); itag++) {
+      cout << *itag <<" ";
+    }
+    cout << "\n";
+  }
+}   
Index: branches/apertium-tagger/apertium2/apertium/apertium-createmodes.awk
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-createmodes.awk	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-createmodes.awk	(revision 69632)
@@ -0,0 +1,30 @@
+#!/usr/bin/awk -f
+
+# Parse output from modes2bash.xsl
+
+BEGIN {
+  FS="^ *# *"
+  guesswarned=0
+}
+
+NF==2 && /\.mode$/ {
+  filename = $2
+  if(filename ~ /NAMEME/ && !guesswarned) {
+    print "apertium-createmodes.awk: At least one program in a gendebug=\"yes\" mode needs a debug-suff attribute; couldn't guess what suffix to use for the debug mode." > "/dev/stderr"
+    guesswarned=1
+  }
+  if(seen[filename]) {
+    print "apertium-createmodes.awk: "filename" seen twice" > "/dev/stderr"
+    filename = 0
+  }
+  else {
+    print "" > filename
+    seen[filename] = 1
+  }
+  next
+}
+
+filename {
+  print $0 >> filename
+  close(filename)
+}

Property changes on: branches/apertium-tagger/apertium2/apertium/apertium-createmodes.awk
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: branches/apertium-tagger/apertium2/apertium/modes.dtd
===================================================================
--- branches/apertium-tagger/apertium2/apertium/modes.dtd	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/modes.dtd	(revision 69632)
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Copyright (C) 2005-2016 Universitat d'Alacant / Universidad de Alicante
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+  DTD for the modes.xml file
+-->
+
+<!ELEMENT modes (mode+)>
+
+<!ELEMENT mode (pipeline)>
+<!ATTLIST mode name ID #REQUIRED>
+<!ATTLIST mode install CDATA #IMPLIED>
+<!ATTLIST mode gendebug CDATA #IMPLIED>
+
+<!ELEMENT pipeline (program+)>
+
+<!ELEMENT program (file|arg)*>
+<!ATTLIST program name CDATA #REQUIRED>
+<!ATTLIST program debug-suff CDATA #IMPLIED>
+
+<!ELEMENT file EMPTY>
+<!ATTLIST file name CDATA #REQUIRED>
+
+<!ELEMENT arg EMPTY>
+<!ATTLIST arg name CDATA #REQUIRED>
Index: branches/apertium-tagger/apertium2/apertium/modes.rnc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/modes.rnc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/modes.rnc	(revision 69632)
@@ -0,0 +1,33 @@
+#  Copyright (C) 2005-2016 Universitat d'Alacant / Universidad de Alicante
+# 
+#  This program is free software; you can redistribute it and/or
+#  modify it under the terms of the GNU General Public License as
+#  published by the Free Software Foundation; either version 2 of the
+#  License, or (at your option) any later version.
+# 
+#  This program is distributed in the hope that it will be useful, but
+#  WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+#  General Public License for more details.
+# 
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, see <http://www.gnu.org/licenses/>.
+# 
+# DTD for the modes.xml file
+
+modes = element modes { attlist.modes, mode+ }
+attlist.modes &= empty
+mode = element mode { attlist.mode, pipeline }
+attlist.mode &= attribute name { xsd:ID }
+attlist.mode &= attribute install { text }?
+attlist.mode &= attribute gendebug { text }?
+pipeline = element pipeline { attlist.pipeline, program+ }
+attlist.pipeline &= empty
+program = element program { attlist.program, (file | arg)* }
+attlist.program &= attribute name { text }
+attlist.program &= attribute debug-suff { text }?
+file = element file { attlist.file, empty }
+attlist.file &= attribute name { text }
+arg = element arg { attlist.arg, empty }
+attlist.arg &= attribute name { text }
+start = modes
Index: branches/apertium-tagger/apertium2/apertium/modes.rng
===================================================================
--- branches/apertium-tagger/apertium2/apertium/modes.rng	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/modes.rng	(revision 69632)
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+   Copyright (C) 2005-2016 Universitat d'Alacant / Universidad de Alicante
+  
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+  
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+  
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+  
+  DTD for the modes.xml file
+-->
+<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
+  <define name="modes">
+    <element name="modes">
+      <ref name="attlist.modes"/>
+      <oneOrMore>
+        <ref name="mode"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.modes" combine="interleave">
+    <empty/>
+  </define>
+  <define name="mode">
+    <element name="mode">
+      <ref name="attlist.mode"/>
+      <ref name="pipeline"/>
+    </element>
+  </define>
+  <define name="attlist.mode" combine="interleave">
+    <attribute name="name">
+      <data type="ID"/>
+    </attribute>
+  </define>
+  <define name="attlist.mode" combine="interleave">
+    <optional>
+      <attribute name="install"/>
+    </optional>
+  </define>
+  <define name="attlist.mode" combine="interleave">
+    <optional>
+      <attribute name="gendebug"/>
+    </optional>
+  </define>
+  <define name="pipeline">
+    <element name="pipeline">
+      <ref name="attlist.pipeline"/>
+      <oneOrMore>
+        <ref name="program"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.pipeline" combine="interleave">
+    <empty/>
+  </define>
+  <define name="program">
+    <element name="program">
+      <ref name="attlist.program"/>
+      <zeroOrMore>
+        <choice>
+          <ref name="file"/>
+          <ref name="arg"/>
+        </choice>
+      </zeroOrMore>
+    </element>
+  </define>
+  <define name="attlist.program" combine="interleave">
+    <attribute name="name"/>
+  </define>
+  <define name="attlist.program" combine="interleave">
+    <optional>
+      <attribute name="debug-suff"/>
+    </optional>
+  </define>
+  <define name="file">
+    <element name="file">
+      <ref name="attlist.file"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.file" combine="interleave">
+    <attribute name="name"/>
+  </define>
+  <define name="arg">
+    <element name="arg">
+      <ref name="attlist.arg"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.arg" combine="interleave">
+    <attribute name="name"/>
+  </define>
+  <start>
+    <choice>
+      <ref name="modes"/>
+    </choice>
+  </start>
+</grammar>
Index: branches/apertium-tagger/apertium2/apertium/modes2bash.xsl
===================================================================
--- branches/apertium-tagger/apertium2/apertium/modes2bash.xsl	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/modes2bash.xsl	(revision 69632)
@@ -0,0 +1,99 @@
+<?xml version="1.0" encoding="UTF-8"?><!-- -*- nxml -*- -->
+<!--
+ Copyright (C) 2005-2014 Universitat d'Alacant / Universidad de Alicante
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+-->
+
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:output method="text" encoding="UTF-8" indent="no"/>
+
+<xsl:param name="installdir"/>
+<xsl:param name="devdir"/>
+
+<xsl:template match="modes">
+  <xsl:apply-templates/>
+</xsl:template>
+
+
+<!-- Print filenames first, then the contents of each file. This gets
+     parsed by apertium-createmodes.awk which splits it into the
+     specific files. -->
+<xsl:template match="mode">
+  <xsl:choose>
+    <xsl:when test="@install = 'yes'">
+      <xsl:text>
+# </xsl:text>
+      <xsl:value-of select="./@name"/>
+      <xsl:text>.mode
+      </xsl:text>
+      <xsl:apply-templates>
+        <xsl:with-param name="dir"><xsl:value-of select="$installdir"/></xsl:with-param>
+      </xsl:apply-templates>
+    </xsl:when>
+  </xsl:choose>
+  <xsl:variable name="dir">
+    <xsl:text>'</xsl:text>
+    <xsl:value-of select="$devdir"/>
+    <xsl:text>'</xsl:text>
+  </xsl:variable>
+  <xsl:text>
+# modes/</xsl:text>
+  <xsl:value-of select="./@name"/>
+  <xsl:text>.mode
+  </xsl:text>
+  <xsl:apply-templates>
+    <xsl:with-param name="dir"><xsl:value-of select="$devdir"/></xsl:with-param>
+  </xsl:apply-templates>
+</xsl:template>
+
+
+<xsl:template match="pipeline">
+  <xsl:param name="dir" />
+  <xsl:for-each select="./*">
+    <xsl:if test="not(position()=1)">
+      <xsl:text>| </xsl:text>
+    </xsl:if>
+    <xsl:apply-templates select=".">
+      <xsl:with-param name="dir"><xsl:value-of select="$dir"/></xsl:with-param>
+    </xsl:apply-templates>
+  </xsl:for-each>
+</xsl:template>
+
+<xsl:template match="program">
+  <xsl:param name="dir" />
+  <xsl:value-of select="./@name"/>
+  <xsl:for-each select="./*">
+    <xsl:text> </xsl:text>
+    <xsl:apply-templates select=".">
+      <xsl:with-param name="dir"><xsl:value-of select="$dir"/></xsl:with-param>
+    </xsl:apply-templates>
+  </xsl:for-each>
+</xsl:template>
+
+<xsl:template match="arg">
+  <xsl:value-of select="./@name"/>
+</xsl:template>
+
+<xsl:template match="file">
+  <xsl:param name="dir" />
+  <xsl:text>'</xsl:text>
+  <xsl:value-of select="$dir" />
+  <xsl:text>/</xsl:text>
+  <xsl:value-of select="./@name"/>
+  <xsl:text>' </xsl:text>
+</xsl:template>
+
+
+</xsl:stylesheet>
Index: branches/apertium-tagger/apertium2/apertium/modes2debugmodes.xsl
===================================================================
--- branches/apertium-tagger/apertium2/apertium/modes2debugmodes.xsl	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/modes2debugmodes.xsl	(revision 69632)
@@ -0,0 +1,167 @@
+<?xml version="1.0" encoding="UTF-8"?> <!-- -*- nxml -*- -->
+<!--
+Copyright (C) 2016 Universitat d'Alacant / Universidad de Alicante
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, see <http://www.gnu.org/licenses/>.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:output method="xml" encoding="UTF-8"/>
+
+  <xsl:template name="replaceString">
+    <xsl:param name="haystack"/>
+    <xsl:param name="needle"/>
+    <xsl:param name="replacement"/>
+    <xsl:choose>
+      <xsl:when test="contains($haystack, $needle)">
+        <xsl:value-of select="substring-before($haystack, $needle)"/>
+        <xsl:value-of select="$replacement"/>
+        <xsl:call-template name="replaceString">
+          <xsl:with-param name="haystack"
+                          select="substring-after($haystack, $needle)"/>
+          <xsl:with-param name="needle" select="$needle"/>
+          <xsl:with-param name="replacement" select="$replacement"/>
+        </xsl:call-template>
+      </xsl:when>
+      <xsl:otherwise>
+        <xsl:value-of select="$haystack"/>
+      </xsl:otherwise>
+    </xsl:choose>
+  </xsl:template>
+
+  <xsl:template match="mode[not(@gendebug='yes')]">
+    <!-- Output these unchanged; don't apply templates here -->
+    <xsl:copy-of select="."/>
+  </xsl:template>
+
+  <xsl:template match="mode[@gendebug='yes']">
+    <xsl:comment>
+      <xsl:text> **************** </xsl:text>
+      <xsl:value-of select="./@name"/>
+      <xsl:text>: **************** </xsl:text>
+    </xsl:comment>
+    <xsl:copy-of select="."/>
+    <xsl:apply-templates select="./pipeline/program"/>
+  </xsl:template>
+
+  <xsl:template name="debugSuffix">
+    <xsl:param name="progname"/>
+    <xsl:param name="suff-attr"/>
+    <xsl:variable name="p" select="normalize-space($progname)"/>
+    <!-- TODO: We also need to know what names have been used already
+         to make them unique! Might be easier to do uniquifying
+         outside XSLT -->
+    <xsl:choose>
+      <xsl:when test="$suff-attr != ''">
+        <xsl:text>-</xsl:text><xsl:value-of select="$suff-attr"/>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'cg-proc')">
+        <xsl:text>-disam</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'apertium-tagger')">
+        <xsl:text>-tagger</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'apertium-pretransfer')">
+        <xsl:text>-pretransfer</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'lrx-proc')">
+        <xsl:text>-lex</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'apertium-transfer')">
+        <xsl:text>-chunker</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'apertium-interchunk')">
+        <xsl:text>-interchunk</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'apertium-postchunk')">
+        <xsl:text>-postchunk</xsl:text>
+      </xsl:when>
+      <xsl:when test="contains($p, '$1')">
+        <xsl:text>-dgen</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'lt-proc') and contains($p, ' -b')">
+        <xsl:text>-biltrans</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'lt-proc') and contains($p, ' -p')">
+        <xsl:text>-pgen</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'lt-proc')">
+        <xsl:text>-morph</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'hfst-proc')">
+        <xsl:text>-morph</xsl:text>
+      </xsl:when>
+      <xsl:otherwise>
+        <xsl:text>-NAMEME</xsl:text>
+      </xsl:otherwise>
+    </xsl:choose>
+  </xsl:template>
+
+  <xsl:template name="traceOpt">
+    <xsl:param name="progname"/>
+    <xsl:variable name="p" select="normalize-space($progname)"/>
+    <xsl:choose>
+      <xsl:when test="starts-with($p, 'cg-proc')">
+        <xsl:text> -t</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'lrx-proc')">
+        <xsl:text> -t</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'apertium-transfer')">
+        <xsl:text> -t</xsl:text>
+      </xsl:when>
+      <xsl:when test="starts-with($p, 'apertium-interchunk')">
+        <xsl:text> -t</xsl:text>
+      </xsl:when>
+      <xsl:otherwise>
+        <xsl:text></xsl:text>
+      </xsl:otherwise>
+    </xsl:choose>
+  </xsl:template>
+
+  <xsl:template match="program">
+    <mode install="no">
+      <xsl:attribute name="name">
+        <xsl:value-of select="../../@name"/>
+        <xsl:call-template name="debugSuffix">
+          <xsl:with-param name="progname" select="./@name"/>
+          <xsl:with-param name="suff-attr" select="./@debug-suff"/>
+        </xsl:call-template>
+      </xsl:attribute>
+      <pipeline>
+        <xsl:copy-of select="./preceding-sibling::*"/>
+        <program>
+          <xsl:attribute name="name">
+            <xsl:call-template name="replaceString">
+              <xsl:with-param name="haystack" select="./@name"/>
+              <xsl:with-param name="needle" select="'$1'"/>
+              <xsl:with-param name="replacement" select="'-d'"/>
+            </xsl:call-template>
+            <xsl:call-template name="traceOpt">
+              <xsl:with-param name="progname" select="./@name"/>
+            </xsl:call-template>
+          </xsl:attribute>
+          <xsl:copy-of select="./*"/>
+        </program>
+      </pipeline>
+    </mode>
+  </xsl:template>
+
+  <!-- catch-all -->
+  <xsl:template match="@* | node()">
+    <xsl:copy>
+      <xsl:apply-templates select="@* | node()"/>
+    </xsl:copy>
+  </xsl:template>
+
+</xsl:stylesheet>
Index: branches/apertium-tagger/apertium2/apertium/apertium_gen_wlist_lextor_translation.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_gen_wlist_lextor_translation.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_gen_wlist_lextor_translation.cc	(revision 69632)
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
+ * 
+ * author: Felipe S�nchez-Mart�nez
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <iostream>
+#include <fstream>
+#include "getopt_long.h"
+#include <string>
+
+#include <lttoolbox/fst_processor.h>
+
+#include <apertium/lextor_word.h>
+#include <apertium/utf_converter.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+using namespace std;
+
+
+void help(char *name) {
+  wcerr<<L"USAGE:\n";
+  wcerr<<name<<L" --mono dic.bin --bil bildic.bin --wlist wlistfile\n\n";
+  wcerr<<L"ARGUMENTS: \n"
+      <<L"   --mono|-m: Specifies the monolingual lexical selection dictionary to use.\n"
+      <<L"   --bil|-b: Specifies the bilingual lexical selection dictionary to use.\n"
+      <<L"   --wlist|-w: Specifies the list of words to translate.\n"
+      <<L"   --help|-h: Show this help\n"
+      <<L"   --version|-v: Show version information\n\n"
+      <<L"Write to standard output all possible translations of words found in wlistfile\n";
+}
+
+int main(int argc, char* argv[]) {
+  int c;
+  
+  int option_index=0;
+  string monodic_file="";
+  string bildic_file="";
+  string wlist_file="";
+
+  while (true) {
+    static struct option long_options[] =
+      {
+	{"mono",    required_argument, 0, 'm'},
+	{"bil",     required_argument, 0, 'b'},
+	{"wlist",   required_argument, 0, 'w'},
+	{"help",        no_argument,   0, 'h'},
+	{"version",     no_argument,   0, 'v'},
+	{0, 0, 0, 0}
+      };
+
+    c=getopt_long(argc, argv, "m:b:w:hv",long_options, &option_index);
+    if (c==-1)
+      break;
+      
+    switch (c) {
+    case 'm':
+      monodic_file=optarg;
+      break;
+    case 'b':
+      bildic_file=optarg;
+      break;
+    case 'w':
+      wlist_file=optarg;
+      break;
+    case 'h': 
+      help(argv[0]);
+      exit(EXIT_SUCCESS);
+      break;
+    case 'v':
+      wcerr<<L"APERTIUM"<<L"\n"; //"APERTIUM" era PACKAGE_STRING
+      wcerr<<L"LICENSE:\n\n"
+	  <<L"   Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante\n\n"
+	  <<L"   This program is free software; you can redistribute it and/or\n"
+	  <<L"   modify it under the terms of the GNU General Public License as\n"
+	  <<L"   published by the Free Software Foundation; either version 2 of the\n"
+	  <<L"   License, or (at your option) any later version.\n"
+	  <<L"   This program is distributed in the hope that it will be useful, but\n"
+	  <<L"   WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+	  <<L"   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n"
+	  <<L"   General Public License for more details.\n"
+	  <<L"\n"
+	  <<L"   You should have received a copy of the GNU General Public License\n"
+	  <<L"   along with this program; if not, see <http://www.gnu.org/licenses/>.\n";
+      exit(EXIT_SUCCESS);
+      break;    
+    default:
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+      break;
+    }
+  }
+
+  if(monodic_file=="") {
+    wcerr<<L"Error: no monolingual dictionary file was given\n";
+    help(argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  if (bildic_file=="") {
+    wcerr<<L"Error: no bilingual dictionary file was given\n";
+    help(argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  if (wlist_file=="") {
+    wcerr<<L"Error: no word list file was given\n";
+    help(argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  wifstream fwlist;
+  FILE *fmonodic, *fbildic;
+
+  fmonodic=fopen(monodic_file.c_str(), "rb");
+  if (!fmonodic) {
+    wcerr<<L"Error: Cannot open file '"
+         <<UtfConverter::fromUtf8(monodic_file)<<L"'\n";
+    exit(EXIT_FAILURE);
+  }
+
+  fbildic=fopen(bildic_file.c_str(), "rb");
+  if (!fbildic) {
+    wcerr<<L"Error: Cannot open file '"
+         <<UtfConverter::fromUtf8(bildic_file)<<L"'\n";
+    exit(EXIT_FAILURE);
+  }
+
+  fwlist.open(wlist_file.c_str(), ios::in);
+  if (fwlist.fail()) {
+    wcerr<<L"Error: Cannot open file '"
+         <<UtfConverter::fromUtf8(wlist_file)<<L"'\n";
+    exit(EXIT_FAILURE);
+  }
+
+  FSTProcessor fstp_monodic, fstp_bildic;
+
+  fstp_monodic.load(fmonodic);
+  fstp_monodic.initBiltrans();
+  fclose(fmonodic);
+
+  fstp_bildic.load(fbildic);
+  fstp_bildic.initBiltrans();
+  fclose(fbildic);
+
+  wstring strword=L"";
+  while (!fwlist.eof()) {
+    getline(fwlist, strword);
+    if (!fwlist.eof()) {
+      LexTorWord word(strword, &fstp_monodic);
+      wcerr<<strword<<L" =>\n";
+      for (int i=0; i<word.n_lexical_choices(); i++) {
+	wcerr<<L"\t"<<word.translate(fstp_bildic,i)<<L" ("<<word.get_lexical_choice(i)<<L")\n";
+	wcout<<word.translate(fstp_bildic,i)<<L"\n";
+      }
+      if (word.n_lexical_choices()<=1) {
+        wcerr<<L"Warning: word '"<<strword<<L"' is supossed to be polysemous, but it has "<<word.n_lexical_choices()<<L" different translations\n";
+      }
+    }
+  }
+
+  fwlist.close();
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium_interchunk.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_interchunk.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_interchunk.cc	(revision 69632)
@@ -0,0 +1,166 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/interchunk.h>
+#include <lttoolbox/lt_locale.h>
+
+#include <cstdlib>
+#include <iostream>
+#include <libgen.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <apertium/string_utils.h>
+#include "getopt_long.h"
+
+#ifdef _MSC_VER
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+using namespace Apertium;
+using namespace std;
+
+void message(char *progname)
+{
+  cerr << "USAGE: " << basename(progname) << " [-tz] t2x preproc [input [output]]" << endl;
+  cerr << "  t2x        t2x rules file" << endl;
+  cerr << "  preproc    result of preprocess trules file" << endl;
+  cerr << "  input      input file, standard input by default" << endl;
+  cerr << "  output     output file, standard output by default" << endl;
+  cerr << "OPTIONS" <<endl;
+  cerr << "  -t         trace mode" << endl;
+  cerr << "  -z         flush buffer on '\0'" << endl;
+
+  exit(EXIT_FAILURE);
+}
+
+void testfile(string const &filename)
+{
+  struct stat mybuf;
+  if(stat(filename.c_str(), &mybuf) == -1)
+  {
+    cerr << "Error: can't stat file '";
+    cerr << filename << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+}
+
+FILE * open_input(string const &filename)
+{
+  FILE *input = fopen(filename.c_str(), "r");
+  if(!input)
+  {
+    cerr << "Error: can't open input file '";
+    cerr << filename.c_str() << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  
+  return input;
+}  
+
+FILE * open_output(string const &filename)
+{
+  FILE *output = fopen(filename.c_str(), "w");
+  if(!output)
+  {
+    cerr << "Error: can't open output file '";
+    cerr << filename.c_str() << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  return output;
+}
+
+int main(int argc, char *argv[])
+{
+  LtLocale::tryToSetLocale();
+  
+  Interchunk i;
+
+  int option_index=0;
+
+  while (true) {
+    static struct option long_options[] =
+    {
+      {"null-flush", no_argument, 0, 'z'},
+      {"trace", no_argument, 0, 't'},
+      {"help", no_argument, 0, 'h'},
+      {0, 0, 0, 0}
+    };
+
+    int c=getopt_long(argc, argv, "zth", long_options, &option_index);
+    if (c == -1)
+      break;
+      
+    switch (c)
+    {
+      case 'z':
+        i.setNullFlush(true);
+        break;
+
+      case 't':
+        i.setTrace(true);
+        break;
+
+      case 'h':
+      default:
+        message(argv[0]);
+        break;
+    }    
+  }
+
+  FILE *input = stdin, *output = stdout;
+  string f1, f2;
+  switch(argc - optind + 1)
+  {
+    case 5:
+      output = open_output(argv[argc-1]);
+      input = open_input(argv[argc-2]);
+      testfile(argv[argc-3]);
+      testfile(argv[argc-4]);
+      f1 = argv[argc-4];
+      f2 = argv[argc-3];
+      break;
+      
+    case 4:
+      input = open_input(argv[argc-1]);
+      testfile(argv[argc-2]);
+      testfile(argv[argc-3]);
+      f1 = argv[argc-3];
+      f2 = argv[argc-2];
+      break;
+
+    case 3:
+      testfile(argv[argc-1]);
+      testfile(argv[argc-2]);
+      f1 = argv[argc-2];
+      f2 = argv[argc-1];
+      break;
+    
+    default:
+      message(argv[0]);
+      break;
+  }  
+
+#ifdef _MSC_VER
+  _setmode(_fileno(input), _O_U8TEXT);
+  _setmode(_fileno(output), _O_U8TEXT);
+#endif
+
+  i.read(f1, f2);
+  i.interchunk(input, output);
+  return EXIT_SUCCESS;
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium_lextor.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_lextor.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_lextor.cc	(revision 69632)
@@ -0,0 +1,646 @@
+/*
+ * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
+ * 
+ * author: Felipe S�nchez-Mart�nez
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <iostream>
+#include <fstream>
+#include "getopt_long.h"
+
+#include <lttoolbox/fst_processor.h>
+
+#include <apertium/lextor.h>
+#include <apertium/lextor_word.h>
+#include <apertium/lextor_data.h>
+#include <apertium/utf_converter.h>
+#include <clocale>
+#include <cstdlib>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+
+#define MODE_TRAINWRD 0
+#define MODE_TRAINLCH 1
+#define MODE_LEXTOR 2
+#define MODE_LEXTORTL 3
+
+using namespace std;
+
+
+void help(char *name) {
+  cerr<<"USAGE:\n";
+  cerr<<name<<" --trainwrd stopwords words n left right corpus model [--weightexp w]\nOR\n";
+  cerr<<name<<" --trainlch stopwords lexchoices n left right corpus wordmodel dic bildic model [--weightexp w]\nOR\n";
+  cerr<<name<<" --lextor model dic left right [--debug] [--weightexp w]\n\n";
+  //cerr<<name<<" --lextortl stopwords words tlmodel dic bildic left right [--debug] [--weightexp w]\n\n"; 
+  cerr<<"ARGUMENTS: \n"
+      <<"   --trainwrd|-t: Train word co-occurrences model.\n"
+      <<"   Required parameters:\n"
+      <<"      stopwords: file containing a list of stop words. Stop words are ignored\n"
+      <<"      words: file containing a list of words. For each word a co-occurrence model is built\n"
+      <<"      n: number of words per co-occurrence model (for each model, the n most frequent words)\n"
+      <<"      left: left-side context to take into account (number of words)\n"
+      <<"      right: right-side context to take into account (number of words)\n"
+      <<"      corpus: file containing the training corpus\n"
+      <<"      model: output file on which the co-occurrence models are saved\n\n"
+
+      <<"   --trainlch|-r: Train lexical choices co-occurrences model using a target-language co-occurrence model.\n"
+      <<"   Required parameters:\n"
+      <<"      stopwords: file containing a list of stop words. Stop words are ignored\n"
+      <<"      lexchoices: file containing a list of lexical choices. For each lexical choice a co-occurrence model is built\n"
+      <<"      n: number of words per co-occurrence model (for each model, the n most frequent words)\n"
+      <<"      left: left-side context to take into account (number of words)\n"
+      <<"      right: right-side context to take into account (number of words)\n"
+      <<"      corpus: file containing the training corpus\n"
+      <<"      wordmodel: target-language word co-occurrence model (previously trained by means of the --trainwrd option)\n"
+      <<"      dic: lexical-selection dictionary (binary format)\n"
+      <<"      bildic: bilingual dictionary (binary format)\n"
+      <<"      model: output file on which the co-occurrence models are saved\n\n"
+
+      <<"   --lextor|-l: Perform the lexical selection on the input stream.\n"
+      <<"   Required parameters:\n"
+      <<"      model: file containing the model to be used for the lexical selection\n"
+      <<"      dic: lexical-selection dictionary (binary format)\n"
+      <<"      left: left-side context to take into account (number of words)\n"
+      <<"      right: right-side context to take into account (number of words)\n\n"
+
+    //      <<"   --lextortl|-e: Perform the lexical selection on the input stream by using a tl model.\n"
+    //      <<"   Required parameters:\n"
+    //      <<"      stopwords: file containing a list of stop words in the source language. Stop words are ignored\n"
+    //      <<"      words: file containing the list of polysemous words in the source language\n"
+    //      <<"      tlmodel: file containing the target-language model to be used for the lexical selection\n"
+    //      <<"      dic: lexical-selection dictionary (binary format)\n"
+    //      <<"      bildic: bilingual dictionary (binary format)\n"
+    //      <<"      left: left-side context to take into account (number of words)\n"
+    //      <<"      right: right-side context to take into account (number of words)\n\n"
+
+      <<"   --weightexp|-w: Specify a weight value to change the influence of surrounding words while training or\n"
+      <<"     performing the lexica selection. It must be positive.\n\n"
+
+      <<"   --debug|-d: Show debug information while operating\n"
+      <<"   --help|-h: Show this help\n"
+      <<"   --version|-v: Show version information\n\n";
+  cerr<<"Reads from standard input and writes to standard output\n";
+}
+
+int main(int argc, char* argv[]) {
+  int c;
+  int option_index=0;
+  int mode=-1;
+
+  //Parameters for the "trainwrd" or the "trainlch" mode option
+  string stopwords_file="";
+  string words_file="";
+  string corpus_file="";
+  int nwords_model=0;
+  int nwords_left=-1;
+  int nwords_right=-1;
+
+  string model_file="";
+
+  string lexchoices_file="";
+  string wordmodel_file="";
+  string bildic_file="";
+
+  //Parameters for the "lextor" option
+  string dic_file="";
+
+  double weight_exponent=0.0;
+
+  LexTor::debug=false;
+
+  //cerr<<"LOCALE: "<<setlocale(LC_ALL,"")<<"\n";
+
+  while (true) {
+    static struct option long_options[] =
+      {
+	{"trainwrd",  required_argument, 0, 't'},
+	{"trainlch",  required_argument, 0, 'r'},
+	{"lextor",    required_argument, 0, 'l'},
+	//	{"lextortl",  required_argument, 0, 'e'},
+        {"weightexp", required_argument, 0, 'w'},
+	{"debug",        no_argument,    0, 'd'},
+	{"help",         no_argument,    0, 'h'},
+	{"version",      no_argument,    0, 'v'},
+	{0, 0, 0, 0}
+      };
+
+    c=getopt_long(argc, argv, "t:r:l:e:w:dhv",long_options, &option_index);
+    if (c==-1)
+      break;
+      
+    switch (c) {
+    case 't':
+      mode=MODE_TRAINWRD;
+      stopwords_file=optarg;
+      words_file=argv[optind++];
+      nwords_model=atoi(argv[optind++]);
+      nwords_left=atoi(argv[optind++]);
+      nwords_right=atoi(argv[optind++]);
+      corpus_file=argv[optind++];
+      model_file=argv[optind++];
+      break;
+    case 'r':
+      //--trainlch stopwords lexchoices n left right corpus wordmodel dic bildic model
+      mode=MODE_TRAINLCH;
+      stopwords_file=optarg;
+      lexchoices_file=argv[optind++];
+      nwords_model=atoi(argv[optind++]);
+      nwords_left=atoi(argv[optind++]);
+      nwords_right=atoi(argv[optind++]);
+      corpus_file=argv[optind++];
+      wordmodel_file=argv[optind++];
+      dic_file=argv[optind++];
+      bildic_file=argv[optind++];
+      model_file=argv[optind++];
+      break;
+    case 'l':
+      mode=MODE_LEXTOR;
+      model_file=optarg;
+      dic_file=argv[optind++];
+      nwords_left=atoi(argv[optind++]);
+      nwords_right=atoi(argv[optind++]);
+      break;
+    case 'e':
+      mode=MODE_LEXTORTL;
+      stopwords_file=optarg;
+      words_file=argv[optind++];
+      model_file=argv[optind++];
+      dic_file=argv[optind++];
+      bildic_file=argv[optind++];
+      nwords_left=atoi(argv[optind++]);
+      nwords_right=atoi(argv[optind++]);
+      break;
+    case 'w':
+      weight_exponent=atof(optarg);
+      break;
+    case 'd':
+      LexTor::debug=true;
+      break;
+    case 'h': 
+      help(argv[0]);
+      exit(EXIT_SUCCESS);
+      break;
+    case 'v':
+      wcerr<<L"APERTIUM"<<L"\n"; //"APERTIUM" era PACKAGE_STRING
+      wcerr<<L"LICENSE:\n\n"
+	  <<L"   Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante\n\n"
+	  <<L"   This program is free software; you can redistribute it and/or\n"
+	  <<L"   modify it under the terms of the GNU General Public License as\n"
+	  <<L"   published by the Free Software Foundation; either version 2 of the\n"
+	  <<L"   License, or (at your option) any later version.\n"
+	  <<L"   This program is distributed in the hope that it will be useful, but\n"
+	  <<L"   WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+	  <<L"   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n"
+	  <<L"   General Public License for more details.\n"
+	  <<L"\n"
+	  <<L"   You should have received a copy of the GNU General Public License\n"
+	  <<L"   along with this program; if not, see <http://www.gnu.org/licenses/>.\n";
+      exit(EXIT_SUCCESS);
+      break;    
+    default:
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+      break;
+    }
+  }
+
+  if (weight_exponent<0) {
+    wcerr<<L"Error: the weight exponent provided is less than zero. It must be positive\n";
+    help(argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  //When reading from the input stream '*all* characters must be
+  //processed, including ' ','\n', .....
+  wcin.unsetf(ios::skipws);
+
+  if (mode==MODE_TRAINWRD) {
+    if(stopwords_file=="") {
+      wcerr<<L"Error: no stopwords file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (words_file=="") {
+      wcerr<<L"Error: no words file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_model==0) {
+      wcerr<<L"Error: the number of word per co-occurrence model must be greater than 0\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_left<0) {
+      wcerr<<L"Error: no left-side context number of words was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_right<0) {
+      wcerr<<L"Error: no right-side context number of words was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (corpus_file=="") {
+      wcerr<<L"Error: No training corpus file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (model_file=="") {
+      wcerr<<L"Error: No output file to save the co-occurrence models was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+
+    wifstream fstopwords, fwords, fcorpus;
+
+    fstopwords.open(stopwords_file.c_str(), ios::in);
+    if (fstopwords.fail()) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(stopwords_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fwords.open(words_file.c_str(), ios::in);
+    if (fwords.fail()) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(words_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fcorpus.open(corpus_file.c_str(), ios::in);
+    if(fcorpus.fail()) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(corpus_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    FILE *fmodel = fopen(model_file.c_str(), "wb");
+    if(!fmodel)
+    {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(model_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+    
+    LexTorData lextor_data;
+
+    lextor_data.read_stopwords(fstopwords);
+    lextor_data.read_words(fwords);
+    lextor_data.set_nwords_per_set(nwords_model);
+
+    fstopwords.close();
+    fwords.close();
+
+    LexTor lexical_selector;
+    lexical_selector.set_lextor_data(&lextor_data);
+
+    //Whe reading from the input corpus '*all* characters must be
+    //processed, including ' ','\n', .....
+    fcorpus.unsetf(ios::skipws);
+
+    //Train
+    lexical_selector.trainwrd(fcorpus, nwords_left, nwords_right, weight_exponent);
+    fcorpus.close();
+
+    //Write parameters
+    lextor_data.write(fmodel);
+    fclose(fmodel);
+  } 
+
+  else if (mode==MODE_TRAINLCH) {
+    if(stopwords_file=="") {
+      wcerr<<L"Error: no stopwords file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (lexchoices_file=="") {
+      wcerr<<L"Error: no lexical choices file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_model==0) {
+      wcerr<<L"Error: the number of word per co-occurrence model must be greater than 0\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_left<0) {
+      wcerr<<L"Error: no left-side context number of words was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_right<0) {
+      wcerr<<L"Error: no rigth-side context number of words was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (corpus_file=="") {
+      wcerr<<L"Error: No training corpus file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if(wordmodel_file=="") {
+      wcerr<<L"Error: No target-language word co-occurrence model was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (dic_file=="") {
+      wcerr<<L"Error: No lexical-selection dictionary was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (bildic_file=="") {
+      cerr<<"Error: No bilingual dictionary was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (model_file=="") {
+      wcerr<<L"Error: No output file to save the co-occurrence models was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+
+    wifstream fstopwords, flexchoices, fcorpus;
+    FILE *fdic=NULL, *fbildic=NULL, *fwordmodel=NULL;
+
+    fstopwords.open(stopwords_file.c_str(), ios::in);
+    if (fstopwords.fail()) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(stopwords_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    flexchoices.open(lexchoices_file.c_str(), ios::in);
+    if (flexchoices.fail()) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(lexchoices_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fcorpus.open(corpus_file.c_str(), ios::in);
+    if(fcorpus.fail()) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(corpus_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fwordmodel = fopen(wordmodel_file.c_str(), "rb");
+    if(!fwordmodel) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(wordmodel_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fdic=fopen(dic_file.c_str(), "rb");
+    if(!fdic) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(dic_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fbildic=fopen(bildic_file.c_str(), "rb");
+    if(!fbildic) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(bildic_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    FILE *fmodel = fopen(model_file.c_str(), "wb");
+    if(!fmodel) {
+      wcerr<<L"Error: Cannot open file '"
+          <<UtfConverter::fromUtf8(model_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    LexTorData lextor_data;
+
+    lextor_data.read_stopwords(fstopwords);
+    lextor_data.read_words(flexchoices);
+    lextor_data.set_nwords_per_set(nwords_model);
+
+    fstopwords.close();
+    flexchoices.close();
+
+    LexTor lexical_selector;
+    lexical_selector.set_lextor_data(&lextor_data);
+
+    LexTorData wordmodel;
+    wordmodel.read(fwordmodel);
+    fclose(fwordmodel);
+
+    FSTProcessor fstpdic;
+    fstpdic.load(fdic);
+    fstpdic.initBiltrans();
+    fclose(fdic);
+
+    lextor_data.read_lexical_choices(fstpdic);
+
+    FSTProcessor fstpbildic;
+    fstpbildic.load(fbildic);
+    fstpbildic.initBiltrans();
+    fclose(fbildic);
+
+
+    //Whe reading from the input corpus '*all* characters must be
+    //processed, including ' ','\n', .....
+    fcorpus.unsetf(ios::skipws);
+
+    //Train
+    lexical_selector.trainlch(fcorpus, nwords_left, nwords_right, wordmodel, fstpdic, fstpbildic, weight_exponent);
+
+    fcorpus.close();
+
+    //Write parameters
+    lextor_data.write(fmodel);
+    fclose(fmodel);
+  }
+
+  else if (mode==MODE_LEXTOR) {
+    if(model_file=="") {
+      wcerr<<L"Error: no model file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (dic_file=="") {
+      wcerr<<L"Error: no dic file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_left<0) {
+      wcerr<<L"Error: no left-side context number of words was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_right<0) {
+      wcerr<<L"Error: no rigth-side context number of words was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+
+    FILE *fdic=NULL;
+    fdic=fopen(dic_file.c_str(), "rb");
+    if (!fdic) {
+      wcerr<<L"Error: Cannot open dictionary file '"
+           <<UtfConverter::fromUtf8(dic_file)<<L"' for lexical selection\n";
+      exit(EXIT_FAILURE);
+    }
+    FSTProcessor fstp;
+    fstp.load(fdic);
+    fstp.initBiltrans();
+    fclose(fdic);
+
+    FILE *fmodel = fopen(model_file.c_str(), "rb");
+    if(!fmodel) {
+      wcerr<<L"Error: Cannot open file '"
+	   <<UtfConverter::fromUtf8(model_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    //Whe reading from the input stream '*all* characters must be
+    //processed, including ' ','\n', .....
+    wcin.unsetf(ios::skipws);
+
+    LexTorData lextor_model;
+    lextor_model.read(fmodel);
+    fclose(fmodel);
+
+    LexTor lexical_selector;
+    lexical_selector.set_lextor_data(&lextor_model);
+
+    lexical_selector.lexical_selector(wcin, fstp, nwords_left, nwords_right, weight_exponent);
+  } 
+
+  else if (mode==MODE_LEXTORTL) {
+    if(stopwords_file=="") {
+      wcerr<<L"Error: no stopwords file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if(words_file=="") {
+      wcerr<<L"Error: no words file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if(model_file=="") {
+      wcerr<<L"Error: no target-language model file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (dic_file=="") {
+      wcerr<<L"Error: No lexical-selection dictionary was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (bildic_file=="") {
+      wcerr<<L"Error: No bilingual dictionary was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_left<0) {
+      wcerr<<L"Error: no left-side context number of words was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_right<0) {
+      wcerr<<L"Error: no rigth-side context number of words was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+
+    wifstream fstopwords, fwords;
+    FILE *fdic=NULL, *fbildic=NULL, *fmodel = NULL;
+
+    fstopwords.open(stopwords_file.c_str(), ios::in);
+    if (fstopwords.fail()) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(stopwords_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fwords.open(words_file.c_str(), ios::in);
+    if (fwords.fail()) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(words_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fmodel = fopen(model_file.c_str(), "rb");
+    if(!fmodel) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(model_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fdic=fopen(dic_file.c_str(), "rb");
+    if(!fdic) {
+      wcerr<<L"Error: Cannot open file '"
+           <<UtfConverter::fromUtf8(dic_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fbildic=fopen(bildic_file.c_str(), "rb");
+    if(!fbildic) {
+      wcerr<<L"Error: Cannot open file '"
+	   <<UtfConverter::fromUtf8(bildic_file)<<L"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    LexTorData lextor_data;
+
+    lextor_data.read_stopwords(fstopwords);
+    fstopwords.close();
+
+    lextor_data.read_words(fwords);
+    fwords.close();
+
+    LexTor lexical_selector;
+    lexical_selector.set_lextor_data(&lextor_data);
+
+    LexTorData tlmodel;
+    tlmodel.read(fmodel);
+    fclose(fmodel);
+
+    FSTProcessor fstpdic;
+    fstpdic.load(fdic);
+    fstpdic.initBiltrans();
+    fclose(fdic);
+
+    FSTProcessor fstpbildic;
+    fstpbildic.load(fbildic);
+    fstpbildic.initBiltrans();
+    fclose(fbildic);
+
+
+    lextor_data.read_lexical_choices(fstpdic);
+
+    //Whe reading from the input stream '*all* characters must be
+    //processed, including ' ','\n', .....
+    wcin.unsetf(ios::skipws);
+
+
+    lexical_selector.set_tlmodel(&tlmodel);
+    lexical_selector.set_bildic(&fstpbildic);
+
+    lexical_selector.lexical_selector(wcin, fstpdic, nwords_left, nwords_right, weight_exponent);
+  } 
+
+  else {
+    wcerr<<L"Error: No operation mode was provided\n";
+    help(argv[0]);
+    exit(EXIT_FAILURE);
+  }
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium_lextor_eval.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_lextor_eval.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_lextor_eval.cc	(revision 69632)
@@ -0,0 +1,379 @@
+/*
+ * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
+ * 
+ * author: Felipe S�nchez-Mart�nez
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <iostream>
+#include <fstream>
+#include "getopt_long.h"
+
+#include <lttoolbox/fst_processor.h>
+
+#include <apertium/lextor.h>
+#include <apertium/lextor_word.h>
+#include <apertium/lextor_data.h>
+#include <apertium/lextor_eval.h>
+#include <clocale>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+#define MODE_LEXTOR 1
+#define MODE_LEXTORTL 2
+
+using namespace std;
+
+
+void help(char *name) {
+  cerr<<"USAGE:\n";
+  cerr<<name<<" --angle ang --reference reftext --parameters|-p model dic left right [--debug]\n\n";
+  cerr<<"ARGUMENTS: \n"
+      <<"  --angle|-a: To specify the angle threshold to use............\n"
+      <<"   --reference|-r: To specify the reference corpus used for evaluation (one word per\n"
+      <<"               line with the correct translation sense for those words with more than one)\n"
+      <<"   --parameters|-p: to specify the parameters used for the lexical selection task:\n"
+      <<"   Required parameters:\n"
+      <<"      model: file containing the model to be used for the lexical selection\n"
+      <<"      dic: lexical-selection dictionary (binary format)\n"
+      <<"      left: left-side context to take into account (number of words)\n"
+      <<"      right: right-side context to take into account (number of words)\n"
+      <<"   --weightexp|-w: Specify a weight value to change the influence of surrounding words while\n"
+      <<"     performing the lexical selection. It must be positive.\n"
+      <<"   --help|-h: Show this help\n"
+      <<"   --version|-v: Show version information\n\n";
+
+  cerr<<"NOTE: It reads from the standard input the corpus to work with. That corpus must be\n"
+      <<"      in the intermediate format used by Apertium.\n";
+}
+
+int main(int argc, char* argv[]) {
+  int c;
+  int option_index=0;
+
+  int mode=MODE_LEXTOR;
+
+  string model_file="";
+  string dic_file="";
+  int nwords_left=-1;
+  int nwords_right=-1;
+
+  //string in_file;
+  string ref_file;
+
+  double weight_exponent=0.0;
+
+  LexTor::angleth=0.0;
+
+
+  //For mode LEXTORTL
+  string stopwords_file;
+  string words_file;
+  string bildic_file;
+
+  //cerr<<"LOCALE: "<<setlocale(LC_ALL,"")<<"\n";
+  cerr<<"Command line: ";
+  for(int i=0; i<argc; i++)
+    cerr<<argv[i]<<" ";
+  cerr<<"\n";
+
+  while (true) {
+    static struct option long_options[] =
+      {
+	//{"input",       required_argument, 0, 'i'},
+	{"reference",   required_argument, 0, 'r'},
+	{"parameters",  required_argument, 0, 'p'},
+        {"weightexp",   required_argument, 0, 'w'},
+	{"angle",       required_argument, 0, 'a'},
+	{"lextortl",    required_argument, 0, 'e'},
+	{"debug",       no_argument,    0, 'd'},
+	{"help",        no_argument,    0, 'h'},
+	{"version",     no_argument,    0, 'v'},
+	{0, 0, 0, 0}
+      };
+
+    c=getopt_long(argc, argv, "r:p:w:a:e:dhv",long_options, &option_index);
+    if (c==-1)
+      break;
+      
+    switch (c) {
+      //case 'i':
+      //in_file=optarg;
+      //break;
+    case 'r':
+      ref_file=optarg;
+      break;
+    case 'p':
+      mode=MODE_LEXTOR;
+      model_file=optarg;
+      dic_file=argv[optind++];
+      nwords_left=atoi(argv[optind++]);
+      nwords_right=atoi(argv[optind++]);
+      break;
+    case 'e':
+      mode=MODE_LEXTORTL;
+      stopwords_file=optarg;
+      words_file=argv[optind++];
+      model_file=argv[optind++];
+      dic_file=argv[optind++];
+      bildic_file=argv[optind++];
+      nwords_left=atoi(argv[optind++]);
+      nwords_right=atoi(argv[optind++]);
+      break;
+    case 'w':
+      weight_exponent=atof(optarg);
+      break;
+    case 'a':
+      LexTor::angleth=atof(optarg);
+      break;
+    case 'd':
+      LexTor::debug=true;
+      break;
+    case 'h': 
+      help(argv[0]);
+      exit(EXIT_SUCCESS);
+      break;
+    case 'v':
+      cerr<<"APERTIUM"<<"\n"; //"APERTIUM" era PACKAGE_STRING
+      cerr<<"LICENSE:\n\n"
+	  <<"   Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante\n\n"
+	  <<"   This program is free software; you can redistribute it and/or\n"
+	  <<"   modify it under the terms of the GNU General Public License as\n"
+	  <<"   published by the Free Software Foundation; either version 2 of the\n"
+	  <<"   License, or (at your option) any later version.\n"
+	  <<"   This program is distributed in the hope that it will be useful, but\n"
+	  <<"   WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+	  <<"   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n"
+	  <<"   General Public License for more details.\n"
+	  <<"\n"
+	  <<"   You should have received a copy of the GNU General Public License\n"
+	  <<"   along with this program; if not, see <http://www.gnu.org/licenses/>.\n";
+      exit(EXIT_SUCCESS);
+      break;    
+    default:
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+      break;
+    }
+  }
+
+  cerr<<"TH ANGLE: "<<LexTor::angleth<<"\n";
+
+  if (ref_file=="") {
+    cerr<<"Error: No reference corpus was given\n";
+    help(argv[0]);
+    exit(EXIT_FAILURE);
+  }
+  ifstream fref;
+
+  fref.open(ref_file.c_str(), ios::in);
+  if (fref.fail()) {
+    cerr<<"Error: Cannot open file '"<<ref_file<<"'\n";
+    exit(EXIT_FAILURE);
+  }
+
+  if (mode==MODE_LEXTOR) {
+    if (nwords_left<0) {
+      cerr<<"Error: no left-side context number of words was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_right<0) {
+      cerr<<"Error: no right-side context number of words was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    //if (in_file=="") {
+    //  cerr<<"Error: No input corpus was given\n";
+    //  help(argv[0]);
+    //  exit(EXIT_FAILURE);
+    //}
+ 
+    if (model_file=="") {
+      cerr<<"Error: No model file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (dic_file=="") {
+      cerr<<"Error: No dictionary file (bin format) was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+
+    if (weight_exponent<0) {
+      cerr<<"Error: the weight exponent provided is less than zero. It must be positive\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+
+    ifstream fin, fmodel;
+
+    //fin.open(in_file.c_str(), ios::in);
+    //if (fin.fail()) {
+    //  cerr<<"Error: Cannot open file '"<<in_file<<"'\n";
+    //  exit(EXIT_FAILURE);
+    //}
+
+    fmodel.open(model_file.c_str(), ios::in);
+    if(fmodel.fail()) {
+      cerr<<"Error: Cannot open file '"<<model_file<<"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    FILE *fdic=NULL;
+    fdic=fopen(dic_file.c_str(), "rb");
+    if (!fdic) {
+      cerr<<"Error: Cannot open file '"<<dic_file<<"'\n";
+      exit(EXIT_FAILURE);
+    }
+    FSTProcessor fstp;
+    fstp.load(fdic);
+    fstp.initBiltrans();
+    fclose(fdic);
+
+
+    //When reading from the input stream '*all* characters must be
+    //processed, including ' ','\n', .....
+    cin.unsetf(ios::skipws);
+
+
+    LexTorData lextor_model;
+    lextor_model.read(fmodel);
+    fmodel.close();
+
+    LexTor lexical_selector;
+    lexical_selector.set_lextor_data(&lextor_model);
+
+    LexTorEval lteval(&fref);
+    lexical_selector.lexical_selector(cin, fstp, nwords_left, nwords_right, weight_exponent, &lteval);
+    lteval.print_evaluation();
+
+    fref.close();
+  }
+
+  else if (mode==MODE_LEXTORTL) {
+    if(stopwords_file=="") {
+      cerr<<"Error: no stopwords file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if(words_file=="") {
+      cerr<<"Error: no words file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if(model_file=="") {
+      cerr<<"Error: no target-language model file was given\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (dic_file=="") {
+      cerr<<"Error: No lexical-selection dictionary was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (bildic_file=="") {
+      cerr<<"Error: No bilingual dictionary was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_left<0) {
+      cerr<<"Error: no left-side context number of words was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+    if (nwords_right<0) {
+      cerr<<"Error: no right-side context number of words was provided\n";
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+    }
+
+    ifstream fstopwords, fwords, fmodel;
+    FILE *fdic=NULL, *fbildic=NULL;
+
+    fstopwords.open(stopwords_file.c_str(), ios::in);
+    if (fstopwords.fail()) {
+      cerr<<"Error: Cannot open file '"<<stopwords_file<<"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fwords.open(words_file.c_str(), ios::in);
+    if (fwords.fail()) {
+      cerr<<"Error: Cannot open file '"<<words_file<<"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fmodel.open(model_file.c_str(), ios::in);
+    if(fmodel.fail()) {
+      cerr<<"Error: Cannot open file '"<<model_file<<"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fdic=fopen(dic_file.c_str(), "rb");
+    if(!fdic) {
+      cerr<<"Error: Cannot open file '"<<dic_file<<"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    fbildic=fopen(bildic_file.c_str(), "rb");
+    if(!fbildic) {
+      cerr<<"Error: Cannot open file '"<<bildic_file<<"'\n";
+      exit(EXIT_FAILURE);
+    }
+
+    LexTorData lextor_data;
+
+    lextor_data.read_stopwords(fstopwords);
+    fstopwords.close();
+
+    lextor_data.read_words(fwords);
+    fwords.close();
+
+    LexTor lexical_selector;
+    lexical_selector.set_lextor_data(&lextor_data);
+
+    LexTorData tlmodel;
+    tlmodel.read(fmodel);
+    fmodel.close();
+
+    FSTProcessor fstpdic;
+    fstpdic.load(fdic);
+    fstpdic.initBiltrans();
+    fclose(fdic);
+
+    FSTProcessor fstpbildic;
+    fstpbildic.load(fbildic);
+    fstpbildic.initBiltrans();
+    fclose(fbildic);
+
+
+    lextor_data.read_lexical_choices(fstpdic);
+
+    //Whe reading from the input stream '*all* characters must be
+    //processed, including ' ','\n', .....
+    cin.unsetf(ios::skipws);
+
+
+    lexical_selector.set_tlmodel(&tlmodel);
+    lexical_selector.set_bildic(&fstpbildic);
+
+
+    LexTorEval lteval(&fref);
+    lexical_selector.lexical_selector(cin, fstpdic, nwords_left, nwords_right, weight_exponent, &lteval);
+    lteval.print_evaluation();
+
+    fref.close();
+  } 
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium_postchunk.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_postchunk.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_postchunk.cc	(revision 69632)
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/postchunk.h>
+#include <lttoolbox/lt_locale.h>
+
+#include <cstdlib>
+#include "getopt_long.h"
+#include <iostream>
+#include <libgen.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <apertium/string_utils.h>
+#ifdef _MSC_VER
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+using namespace Apertium;
+using namespace std;
+
+void message(char *progname)
+{
+  cerr << "USAGE: " << basename(progname) << " [-z] t3x preproc [input [output]]" << endl;
+  cerr << "  t3x        t3x rules file" << endl;
+  cerr << "  preproc    result of preprocess trules file" << endl;
+  cerr << "  input      input file, standard input by default" << endl;
+  cerr << "  output     output file, standard output by default" << endl;
+  cerr << "OPTIONS" <<endl;
+  cerr << "  -z         flush buffer on '\0'" << endl;
+  
+  exit(EXIT_FAILURE);
+}
+
+void testfile(string const &filename)
+{
+  struct stat mybuf;
+  if(stat(filename.c_str(), &mybuf) == -1)
+  {
+    cerr << "Error: can't stat file '";
+    cerr << filename << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+}
+
+FILE * open_input(string const &filename)
+{
+  FILE *input = fopen(filename.c_str(), "r");
+  if(!input)
+  {
+    cerr << "Error: can't open input file '";
+    cerr << filename.c_str() << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  
+  return input;
+}  
+
+FILE * open_output(string const &filename)
+{
+  FILE *output = fopen(filename.c_str(), "w");
+  if(!output)
+  {
+    cerr << "Error: can't open output file '";
+    cerr << filename.c_str() << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  return output;
+}
+
+int main(int argc, char *argv[])
+{
+  LtLocale::tryToSetLocale();
+
+  Postchunk p;
+  
+  int option_index=0;
+
+  while (true) {
+    static struct option long_options[] =
+    {
+      {"null-flush", no_argument, 0, 'z'},
+      {"help", no_argument, 0, 'h'},
+      {0, 0, 0, 0}
+    };
+
+    int c=getopt_long(argc, argv, "zh", long_options, &option_index);
+    if (c == -1)
+      break;
+      
+    switch (c)
+    {
+      case 'z':
+        p.setNullFlush(true);
+        break;
+
+      case 'h':
+      default:
+        message(argv[0]);
+        break;
+    }    
+  }
+
+  FILE *input = stdin, *output = stdout;
+  string f1, f2;
+  switch(argc - optind + 1)
+  {
+    case 5:
+      output = open_output(argv[argc-1]);
+      input = open_input(argv[argc-2]);
+      testfile(argv[argc-3]);
+      testfile(argv[argc-4]);
+      f1 = argv[argc-4];
+      f2 = argv[argc-3];
+      break;
+      
+    case 4:
+      input = open_input(argv[argc-1]);
+      testfile(argv[argc-2]);
+      testfile(argv[argc-3]);
+      f1 = argv[argc-3];
+      f2 = argv[argc-2];
+      break;
+
+    case 3:
+      testfile(argv[argc-1]);
+      testfile(argv[argc-2]);
+      f1 = argv[argc-2];
+      f2 = argv[argc-1];
+      break;
+    
+    default:
+      message(argv[0]);
+      break;
+  }  
+
+#ifdef _MSC_VER
+  _setmode(_fileno(input), _O_U8TEXT);
+  _setmode(_fileno(output), _O_U8TEXT);
+#endif
+
+  p.read(f1, f2);
+  p.postchunk(input, output);
+
+  return EXIT_SUCCESS;
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium_pretransfer.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_pretransfer.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_pretransfer.cc	(revision 69632)
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+#include <libgen.h>
+#include <string>
+#include "getopt_long.h"
+
+#include <lttoolbox/lt_locale.h>
+#include "apertium_config.h"
+#include <apertium/unlocked_cstdio.h>
+
+#ifdef _MSC_VER
+#include <io.h>
+#include <fcntl.h>
+#endif
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+using namespace std;
+
+bool compound_sep = false;
+
+void readAndWriteUntil(FILE *input, FILE *output, int const charcode)
+{
+  int mychar;
+
+  while((mychar = fgetwc_unlocked(input)) != charcode)
+  {
+    if(feof(input))
+    {
+      wcerr << L"ERROR: Unexpected EOF" << endl;
+      exit(EXIT_FAILURE);
+    }
+    fputwc_unlocked(mychar, output);
+    if(mychar == L'\\')
+    {
+      mychar = fgetwc(input);
+      fputwc(mychar, output);
+    }
+  }
+}
+
+void procWord(FILE *input, FILE *output, bool surface_forms)
+{
+  int mychar;
+  wstring buffer = L"";
+
+  bool buffer_mode = false;
+  bool in_tag = false;
+  bool queuing = false;
+
+  if(surface_forms)
+  {
+    while((mychar = fgetwc_unlocked(input)) != L'/') ;
+  } 
+
+  while((mychar = fgetwc_unlocked(input)) != L'$')
+  {
+    if(feof(input))
+    {
+      wcerr << L"ERROR: Unexpected EOF" << endl;
+      exit(EXIT_FAILURE);
+    }
+  
+    switch(mychar)
+    {
+    case L'<':
+      in_tag = true;
+      if(!buffer_mode)
+      {
+        buffer_mode = true;
+      }
+      break;
+      
+    case L'>':
+      in_tag = false;
+      break;
+      
+    case L'#':
+      if(buffer_mode)
+      {
+        buffer_mode = false;
+        queuing = true;
+      }
+      break;
+    }
+
+    if(buffer_mode)
+    { 
+      if((mychar != L'+' || (mychar == L'+' && in_tag == true)) && 
+         (mychar != L'~' || (mychar == L'~' && in_tag == true)))
+      {
+        buffer += static_cast<wchar_t>(mychar);
+      }
+      else if(in_tag == false && mychar == L'+')
+      {
+        buffer.append(L"$ ^");
+      }
+      else if(in_tag == false && mychar == L'~' and compound_sep == true)
+      {
+        buffer.append(L"$^");
+      }
+    }
+    else
+    {
+      if(mychar == L'+' && queuing == true)  
+      {
+        buffer.append(L"$ ^");
+        buffer_mode = true;
+      }
+      else 
+      {
+        fputwc_unlocked(mychar, output);
+      }
+    }
+
+  }
+  fputws_unlocked(buffer.c_str(), output);
+}
+
+void processStream(FILE *input, FILE *output, bool null_flush, bool surface_forms)
+{
+  while(true)
+  {
+    int mychar = fgetwc_unlocked(input);
+    if(feof(input))
+    {
+      break;
+    }
+    switch(mychar)
+    {
+      case L'[':
+        fputwc_unlocked(L'[', output);
+        readAndWriteUntil(input, output, L']');
+        fputwc_unlocked(L']', output);
+        break;
+ 
+      case L'\\':
+        fputwc_unlocked(mychar, output);
+        fputwc_unlocked(fgetwc_unlocked(input), output);
+        break;
+ 
+      case L'^':
+        fputwc_unlocked(mychar, output);
+        procWord(input, output, surface_forms);
+        fputwc_unlocked(L'$', output);
+        break;
+      
+      case L'\0':
+        fputwc_unlocked(mychar, output);
+        
+        if(null_flush)
+        {
+          fflush(output);
+        }
+        break;  
+ 
+      default:
+        fputwc_unlocked(mychar, output);
+        break;
+    }
+  }
+}
+
+void usage(char *progname)
+{
+  wcerr << L"USAGE: " << basename(progname) << L" [input_file [output_file]]" << endl;
+  exit(EXIT_FAILURE);
+}
+
+
+
+
+int main(int argc, char *argv[])
+{ 
+  LtLocale::tryToSetLocale();
+  bool null_flush = false;
+  bool surface_forms = false;
+  
+  int option_index=0;
+
+  while (true) {
+    static struct option long_options[] =
+    {
+      {"null-flush", no_argument, 0, 'z'},
+      {"no-surface-forms", no_argument, 0, 'n'},
+      {"compounds", no_argument, 0, 'e'},
+      {"help", no_argument, 0, 'h'},
+      {0, 0, 0, 0}
+    };
+
+    int c=getopt_long(argc, argv, "enzh", long_options, &option_index);
+    if (c==-1)
+      break;
+      
+    switch (c)
+    {
+      case 'z':
+        null_flush = true;
+        break;
+
+      case 'e':
+        compound_sep = true;
+        break;
+     
+      case 'n':
+        surface_forms = true;
+        break;
+       
+      case 'h':
+      default:
+        usage(argv[0]);
+        break;
+    }
+  }
+
+  if((argc-optind+1) > 3)
+  {
+    usage(argv[0]);
+  }
+
+  FILE *input, *output;
+  
+  if((argc-optind+1) == 1)
+  {
+    input = stdin;
+    output = stdout;
+  }
+  else if ((argc-optind+1) == 2)
+  {
+    input = fopen(argv[argc-1], "r");
+    if(!input)
+    {
+      usage(argv[0]);
+    }
+    output = stdout;
+  }
+  else
+  {
+    input = fopen(argv[argc-2], "r");
+    output = fopen(argv[argc-1], "w");
+
+    if(!input || !output)
+    {
+      usage(argv[0]);
+    }
+  }
+
+  if(feof(input))
+  {
+    wcerr << L"ERROR: Can't read file '" << argv[1] << L"'" << endl;
+    exit(EXIT_FAILURE);
+  }
+
+#ifdef _MSC_VER
+    _setmode(_fileno(input), _O_U8TEXT);
+    _setmode(_fileno(output), _O_U8TEXT);
+#endif
+
+  processStream(input, output, null_flush, surface_forms);
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium_tagger.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_tagger.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_tagger.h	(revision 69632)
@@ -0,0 +1,88 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef APERTIUM_TAGGER_H
+#define APERTIUM_TAGGER_H
+
+#include "apertium_config.h"
+
+#include "basic_stream_tagger.h"
+#include "basic_stream_tagger_trainer.h"
+#include "basic_tagger.h"
+#include "constructor_eq_delete.h"
+#include "file_tagger.h"
+#include "optional.h"
+
+#include "getopt_long.h"
+#include <string>
+
+namespace Apertium {
+class apertium_tagger : private constructor_eq_delete {
+public:
+  apertium_tagger(int &argc, char **&argv);
+
+private:
+  enum FunctionTypeType { Unigram, SlidingWindow };
+  enum UnigramType { Stream_5_3_1, Stream_5_3_2, Stream_5_3_3 };
+  enum FunctionType { Tagger, Retrain, Supervised, Train };
+  static void help();
+
+
+  static std::string option_string(const int &indexptr_);
+  static std::string option_string(const struct option &option_);
+
+
+  static void locale_global_();
+
+
+  static const struct option longopts[];
+
+
+
+  void set_indexptr();
+
+
+  void flagOptionCase(bool (basic_Tagger::Flags::*GetFlag)() const,
+                      void (basic_Tagger::Flags::*SetFlag)(const bool &));
+  std::string option_string();
+  void functionTypeTypeOptionCase(const FunctionTypeType &FunctionTypeType_);
+  void functionTypeOptionCase(const FunctionType &FunctionType_);
+  void getIterationsArgument();
+  unsigned long optarg_unsigned_long() const;
+  void g_StreamTagger(basic_StreamTagger &StreamTagger_);
+  void s_StreamTaggerTrainer(basic_StreamTaggerTrainer &StreamTaggerTrainer_);
+  void g_FILE_Tagger(FILE_Tagger &FILE_Tagger_);
+  void r_FILE_Tagger(FILE_Tagger &FILE_Tagger_);
+  void s_FILE_Tagger(FILE_Tagger &FILE_Tagger_);
+  void t_FILE_Tagger(FILE_Tagger &FILE_Tagger_);
+  int &argc;
+  char **&argv;
+  int The_val;
+
+
+  int The_indexptr;
+  Optional<int> FunctionTypeTypeOption_indexptr;
+  Optional<int> FunctionTypeOption_indexptr;
+
+
+  Optional<FunctionTypeType> TheFunctionTypeType;
+  Optional<UnigramType> TheUnigramType;
+  Optional<FunctionType> TheFunctionType;
+  unsigned long TheFunctionTypeOptionArgument;
+  basic_Tagger::Flags TheFlags;
+};
+}
+
+#endif // APERTIUM_TAGGER_H
Index: branches/apertium-tagger/apertium2/apertium/apertium_tagger_apply_new_rules.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_tagger_apply_new_rules.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_tagger_apply_new_rules.cc	(revision 69632)
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2004-2006 Felipe S�nchez-Mart�nez
+ * Copyright (C) 2006 Universitat d'Alacant
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <fstream>
+#include <iostream>
+#include <string>
+#include <sys/types.h>
+#include <unistd.h>
+#include "getopt_long.h"
+
+#include <apertium/hmm.h>
+#include <apertium/tagger_data_hmm.h>
+#include <apertium/tsx_reader.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+
+using namespace std;
+
+//Global vars
+TaggerDataHMM tagger_data_hmm;
+TTag eos; //End-of-sentence tag
+
+void check_file(FILE *f, const string& path) {
+  if (!f) {
+    cerr<<"Error: cannot open file '"<<path<<"'\n";
+    exit(EXIT_FAILURE);
+  }
+}
+
+void help(char *name) {
+  cerr<<"Forbid and enforce rules are applied to the given HMM parameters\n\n";
+  cerr<<"USAGE:\n";
+  cerr<<name<<" --filein filein.prob --fileout fileout.prob --tsxfile file.tsx\n\n";
+
+  cerr<<"ARGUMENTS: \n"
+      <<"   --filein|-i: To specify the file with the HMM parameter to process\n\n"
+      <<"   --fileout|-o: To specify the file to which the HMM will be written\n\n"
+      <<"   --tsxfile|-x: File containing the rules to apply\n\n"
+      <<"NOTE: Parameters are read from and written to the files provided\n";
+}
+
+int main(int argc, char* argv[]) {
+  string filein="";
+  string fileout="";
+  string filetsx="";
+
+  int c;
+  int option_index=0;
+
+  cerr<<"Command line: ";
+  for(int i=0; i<argc; i++)
+    cerr<<argv[i]<<" ";
+  cerr<<"\n";
+
+  while (true) {
+    static struct option long_options[] =
+      {
+	{"filein",    required_argument, 0, 'i'},
+	{"fileout",   required_argument, 0, 'o'},
+	{"tsxfile",   required_argument, 0, 'x'},
+	{0, 0, 0, 0}
+      };
+
+    c=getopt_long(argc, argv, "i:o:x:hv",long_options, &option_index);
+    if (c==-1)
+      break;
+      
+    switch (c) {
+    case 'i':
+      filein=optarg; 
+      break;
+    case 'o':
+      fileout=optarg; 
+      break;
+    case 'h': 
+      help(argv[0]);
+      exit(EXIT_SUCCESS);
+    case 'x':
+      filetsx=optarg;
+      break;
+    case 'v':
+      cerr<<"LICENSE:\n\n"
+	  <<"   Copyright (C) 2006 Felipe S�nchez Mart�nez\n\n"
+	  <<"   This program is free software; you can redistribute it and/or\n"
+	  <<"   modify it under the terms of the GNU General Public License as\n"
+	  <<"   published by the Free Software Foundation; either version 2 of the\n"
+	  <<"   License, or (at your option) any later version.\n"
+	  <<"   This program is distributed in the hope that it will be useful, but\n"
+	  <<"   WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+	  <<"   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n"
+	  <<"   General Public License for more details.\n"
+	  <<"\n"
+	  <<"   You should have received a copy of the GNU General Public License\n"
+	  <<"   along with this program; if not, see <http://www.gnu.org/licenses/>.\n";
+      exit(EXIT_SUCCESS);
+      break;    
+    default:
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+      break;
+    }
+  }
+
+  //Now we check the command line arguments
+  if (filein=="") {
+    cerr<<"Error: You did not provide an input file (.prob). Use --filein to do that\n";
+    help(argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  if (fileout=="") {
+    cerr<<"Error: You did not provide an output file (.prob). Use --fileout to do that\n";
+    help(argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  if (filetsx=="") {
+    cerr<<"Error: You did not provide a tagger definition file (.tsx). Use --filetsx to do that\n";
+    help(argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  FILE *fin, *fout;
+
+  fin=fopen(filein.c_str(), "rb");
+  check_file(fin, filein);
+
+  cerr<<"Reading apertium-tagger data from file '"<<filein<<"' ... "<<flush;
+  tagger_data_hmm.read(fin);
+  fclose(fin);
+  cerr<<"done.\n";
+
+  cerr<<"Reading apertium-tagger definition from file '"<<filetsx<<"' ... "<<flush;
+  TSXReader treader;
+  treader.read(filetsx);
+  cerr<<"done.\n";
+  
+  tagger_data_hmm.setForbidRules(treader.getTaggerData().getForbidRules());
+  tagger_data_hmm.setEnforceRules(treader.getTaggerData().getEnforceRules());
+  tagger_data_hmm.setPreferRules(treader.getTaggerData().getPreferRules());
+
+  HMM hmm(&tagger_data_hmm);
+  hmm.apply_rules();
+
+  fout=fopen(fileout.c_str(), "wb");
+  check_file(fout, fileout);
+  cerr<<"Writing apertium-tagger data to file '"<<fileout<<"' ... "<<flush;
+  hmm.serialise(fout);
+  fclose(fout);
+  cerr<<"done.\n";
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium_tagger_readwords.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_tagger_readwords.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_tagger_readwords.cc	(revision 69632)
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2004-2006 Felipe Sánchez-Martínez
+ * Copyright (C) 2006 Universitat d'Alacant
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+#include <iostream>
+#include <clocale>
+#include <apertium/tagger_word.h>
+#include <apertium/tagger_data.h>
+#include <apertium/tsx_reader.h>
+*/
+
+#include "getopt_long.h"
+#include <apertium/utf_converter.h>
+#include <apertium/morpho_stream.h>
+#include <apertium/tsx_reader.h>
+#include <apertium/tagger_data_hmm.h>
+#include <lttoolbox/lt_locale.h>
+#include <iostream>
+
+#include <cstdlib>
+#include <apertium/string_utils.h>
+
+
+using namespace std;
+
+//Global vars
+TaggerDataHMM tagger_data_hmm;
+bool check_ambclasses;
+
+void check_file(FILE *f, const string& path) {
+  if (!f) {
+    cerr<<"Error: cannot open file '"<<path<<"'\n";
+    exit(EXIT_FAILURE);
+  }
+}
+
+void readwords (FILE *is, int corpus_length) {
+  MorphoStream lexmorfo(is, true, &tagger_data_hmm);
+  TaggerWord *word=NULL;
+  int nwords=0;
+
+  word = lexmorfo.get_next_word();
+  while(word) {
+    nwords++;
+
+    cout<<UtfConverter::toUtf8(word->get_superficial_form())<<" "<<UtfConverter::toUtf8(word->get_string_tags())<<"\n";
+
+    if (check_ambclasses) {
+      int k=tagger_data_hmm.getOutput()[word->get_tags()];
+
+      if ((k>=tagger_data_hmm.getM())||(k<0)) {
+	cerr<<"Error: Ambiguity class number out of range: "<<k<<"\n";
+	cerr<<"Word: "<<UtfConverter::toUtf8(word->get_superficial_form())<<"\n";
+	cerr<<"Ambiguity class: "<<UtfConverter::toUtf8(word->get_string_tags())<<"\n";
+      }
+    }
+
+    delete word;
+
+    if ((corpus_length>0) && (nwords>=corpus_length))
+      break;
+
+    word=lexmorfo.get_next_word();
+  }
+  cerr<<nwords<<" were readed\n";
+}
+
+
+void help(char *name) {
+  cerr<<"USAGE:\n";
+  cerr<<name<<" {--tsxfile file.tsx | --probfile file.prob} [--clength <corpus_length>] < file.crp \n\n";
+
+  cerr<<"ARGUMENTS: \n"
+      <<"   --tsxfile|-x: Specify a tagger specification file\n"
+      <<"   --probfile|-p: Specify a tagger parameter file\n"
+      <<"   --clength|-l: Specify the length of the corpus to process\n";
+}
+
+
+int main(int argc, char* argv[]) {
+  string tsxfile="";
+  string probfile="";
+  int corpus_length=-1;
+
+  int c;
+  int option_index=0;
+
+  cerr<<"LOCALE: "<<setlocale(LC_ALL,"")<<"\n";
+
+  cerr<<"Command line: ";
+  for(int i=0; i<argc; i++)
+    cerr<<argv[i]<<" ";
+  cerr<<"\n";
+
+  while (true) {
+    static struct option long_options[] =
+      {
+	{"tsxfile",  required_argument, 0, 'x'},
+	{"probfile", required_argument, 0, 'p'},
+	{"clength",  required_argument, 0, 'l'},
+	{"help",       no_argument,     0, 'h'},
+	{"version",    no_argument,     0, 'v'},
+	{0, 0, 0, 0}
+      };
+
+    c=getopt_long(argc, argv, "x:p:l:hv",long_options, &option_index);
+    if (c==-1)
+      break;
+
+    switch (c) {
+    case 'l':
+      corpus_length=atoi(optarg);
+      if(corpus_length<=0) {
+	cerr<<"Error: corpus length provided with --clength must be a positive integer\n";
+	help(argv[0]);
+	exit(EXIT_FAILURE);
+      }
+      break;
+    case 'x':
+      tsxfile=optarg;
+      break;
+    case 'p':
+      probfile=optarg;
+      break;
+    case 'h':
+      help(argv[0]);
+      exit(EXIT_SUCCESS);
+      break;
+    case 'v':
+      cerr<<"apertium-tagger-readwords\n";
+      cerr<<"LICENSE:\n\n"
+	  <<"   Copyright (C) 2006 Felipe Sánchez Martínez\n\n"
+	  <<"   This program is free software; you can redistribute it and/or\n"
+	  <<"   modify it under the terms of the GNU General Public License as\n"
+	  <<"   published by the Free Software Foundation; either version 2 of the\n"
+	  <<"   License, or (at your option) any later version.\n"
+	  <<"   This program is distributed in the hope that it will be useful, but\n"
+	  <<"   WITHOUT ANY WARRANTY; without even the implied warranty of\n"
+	  <<"   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU\n"
+	  <<"   General Public License for more details.\n"
+	  <<"\n"
+	  <<"   You should have received a copy of the GNU General Public License\n"
+	  <<"   along with this program; if not, see <http://www.gnu.org/licenses/>.\n";
+      exit(EXIT_SUCCESS);
+      break;
+    default:
+      help(argv[0]);
+      exit(EXIT_FAILURE);
+      break;
+    }
+  }
+
+  if((tsxfile=="") && (probfile=="")) {
+    cerr<<"Error: You have provided neither a tagger specification file (.tsx) nor a tagger probability file (.prob). Use --tsxfile or --probfile to provide one of them\n";
+    help(argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  if((tsxfile!="") && (probfile!="")) {
+    cerr<<"Error: You provided a tagger specification file and a tagger probability file. Only one of them can be provided, not both\n";
+    help(argv[0]);
+    exit(EXIT_FAILURE);
+  }
+
+  if (tsxfile!="") {
+    cerr<<"Reading tagger specification from file '"<<tsxfile<<"' ..."<<flush;
+    TSXReader treader;
+    treader.read(tsxfile);
+    tagger_data_hmm=treader.getTaggerData();
+    cerr<<"done.\n";
+    check_ambclasses=false;
+  }
+
+  if (probfile!="") {
+    cerr<<"Reading tagger parameters from file '"<<probfile<<"' ..."<<flush;
+    FILE* fin=NULL;
+    fin=fopen(probfile.c_str(), "r");
+    check_file(fin, probfile);
+    tagger_data_hmm.read(fin);
+    cerr<<"done.\n";
+    fclose(fin);
+    check_ambclasses=true;
+  }
+
+  TaggerWord::setArrayTags(tagger_data_hmm.getArrayTags());
+
+  readwords(stdin, corpus_length);
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium_tmxbuild.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_tmxbuild.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_tmxbuild.cc	(revision 69632)
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <cstdlib>
+#include "getopt_long.h"
+#include <iostream>
+#include <libgen.h>
+#include <string>
+#include <cstdio>
+#include <lttoolbox/lt_locale.h>
+
+#include <apertium/apertium_config.h>
+#include <apertium/tmx_builder.h>
+#include <apertium/utf_converter.h>
+#include <apertium/string_utils.h>
+#include "apertium_config.h"
+#include <apertium/unlocked_cstdio.h>
+
+using namespace Apertium;
+using namespace std;
+
+void usage(char *progname)
+{
+  wcerr << L"USAGE: " << basename(progname) << L" [options] code1 code2 doc1 doc2 [output_file]" << endl;
+  wcerr << L"Options:" << endl;
+  wcerr << L"  -p percent    number 0 < n <= 1 to set margin of confidence of TU's " << endl;
+  wcerr << L"                (0.85 by default) in length terms" << endl;
+  wcerr << L"  -e edit       number 0 < n <= 1 to set margin of confidence of TU's " << endl;
+  wcerr << L"                (0.30 by default) in edit distance terms" << endl;
+  wcerr << L"  -l low-limit  ignore percent if the segment is less than lowlimit" <<endl;
+  wcerr << L"                (15 by default)" << endl;
+  wcerr << L"  -m max-edit   characters to be taken into account when aligning" << endl;
+  wcerr << L"                sentences (50 by default)" << endl;
+  wcerr << L"  -d diagonal   diagonal width for using edit distance, 10 by default" << endl;
+  wcerr << L"  -w window     window size of the edit distance with sentences" << endl;
+  wcerr << L"                (100 sentences by default)" << endl;
+  wcerr << L"  -s step       step for moving the window during the alingment" <<endl;
+  wcerr << L"                (75 sentences by default)" << endl;
+  wcerr << L"  -h help       display this help" << endl;
+  wcerr << L"Other parameters:" << endl;
+  wcerr << L"  code1, code2 codes of the languages (i.e. ISO-631 ones)" << endl;
+  wcerr << L"  doc1, doc2    unformatted docs to build the TMX file" << endl;
+  wcerr << L"  output_file   if not specified, the result will be printed to stdout" << endl;
+  
+  exit(EXIT_FAILURE);
+}
+
+
+int main(int argc, char *argv[])
+{ 
+  LtLocale::tryToSetLocale();
+  string output_file = "";
+  string doc1 = "", doc2 = "";
+  string lang1 = "", lang2 = "";
+
+  double percent = 0.85;
+  int low_limit = 15;
+  int max_edit = 50;
+  int diagonal_width = 10;
+  int window_size = 100;
+  int step = 75;
+  double edit_distance_percent = 0.30;
+  string translation = "";
+
+
+  int option_index=0;
+
+  while (true) {
+    static struct option long_options[] =
+    {
+      {"percent",      required_argument, 0, 'p'},
+      {"edit-distance-percent",      required_argument, 0, 'e'},
+      {"low-limit", required_argument, 0, 'l'},
+      {"max-edit", required_argument, 0, 'm'},
+      {"diagonal", required_argument, 0, 'd'},
+      {"window", required_argument, 0, 'w'},
+      {"step", required_argument, 0, 's'},
+      {"translation", required_argument, 0, 't'},
+      {"help",       no_argument,       0, 'h'}, 
+      {0, 0, 0, 0}
+    };
+
+    int c=getopt_long(argc, argv, "p:e:l:m:d:w:s:t:h", long_options, &option_index);
+    if (c==-1)
+      break;
+      
+    switch (c)
+    {
+      case 'p':
+        percent = strtod(optarg, NULL);
+        if(percent <= 0 || percent > 1)
+        {
+          usage(argv[0]);
+        }
+        break;
+      case 'e':
+        edit_distance_percent = strtod(optarg, NULL);
+        if(edit_distance_percent <= 0 || edit_distance_percent > 1)
+        {
+          usage(argv[0]);
+        }
+        break;
+        
+      case 'l':
+        low_limit = atoi(optarg);
+        if(low_limit < 0)
+        {
+          usage(argv[0]);
+        }
+        break;
+
+      case 'm':
+        max_edit = atoi(optarg);
+        if(max_edit < 0)
+        {
+          usage(argv[0]);
+        }
+        break;
+
+      case 'd':
+        diagonal_width = atoi(optarg);
+        if(diagonal_width < 0)
+        {
+          usage(argv[0]);
+        }
+        break;
+
+      case 'w':
+        window_size = atoi(optarg);
+        if(window_size < 0)
+        {
+          usage(argv[0]);
+        }
+        break;
+
+      case 's':
+        step = atoi(optarg);
+        if(step < 0)
+        {
+          usage(argv[0]);
+        }
+        break;
+     
+      case 't':
+	translation = optarg;
+	break;
+
+
+      default:
+        //wcerr<<L"Error: getopt() returned the char code '"<<c<<L"'\n";
+        usage(argv[0]);
+        break;
+    }    
+  }
+
+  switch(argc - optind + 1)
+  {
+    case 6:
+      output_file = argv[optind - 1 + 5];
+      // continued down  
+    case 5:
+      doc1 = argv[optind - 1 + 3];
+      doc2 = argv[optind - 1 + 4];
+      lang1 = argv[optind - 1 + 1];
+      lang2 = argv[optind - 1 + 2];
+      break;
+      
+    default:
+      usage(argv[0]);
+      return EXIT_FAILURE;
+  } 
+  
+  TMXBuilder tmxb(UtfConverter::fromUtf8(lang1), UtfConverter::fromUtf8(lang2));
+//  if(!tmxb.check(doc1, doc2))
+//  {
+//    wcerr << L"Error: The two files are incompatible for building a TMX." << endl;
+//    exit(EXIT_FAILURE);
+//  }
+   
+  // Set parameters
+
+  tmxb.setPercent(percent);
+  tmxb.setEditDistancePercent(edit_distance_percent);
+  tmxb.setMaxEdit(max_edit);
+  tmxb.setDiagonalWidth(diagonal_width);
+  tmxb.setWindowSize(window_size);
+  tmxb.setStep(step);
+  tmxb.setLowLimit(low_limit);
+  if(translation != "")
+  {
+    tmxb.setTranslation(translation);  
+  }
+  
+  tmxb.generate(doc1, doc2, output_file);
+  return EXIT_SUCCESS;
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium_transfer.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_transfer.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_transfer.cc	(revision 69632)
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/transfer.h>
+#include <lttoolbox/lt_locale.h>
+
+#include <cstdlib>
+#include <iostream>
+#include <libgen.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <apertium/string_utils.h>
+#include "getopt_long.h"
+#ifdef _MSC_VER
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+using namespace Apertium;
+using namespace std;
+
+void message(char *progname)
+{
+  cerr << "USAGE: " << basename(progname) << " trules preproc biltrans [input [output]]" << endl;
+  cerr << "       " << basename(progname) << " -b trules preproc [input [output]]" << endl;
+  cerr << "       " << basename(progname) << " -n trules preproc [input [output]]" << endl;
+  cerr << "       " << basename(progname) << " -x extended trules preproc biltrans [input [output]]" << endl;
+  cerr << "       " << basename(progname) << " -c trules preproc biltrans [input [output]]" << endl;
+  cerr << "       " << basename(progname) << " -t trules preproc biltrans [input [output]]" << endl;
+  cerr << "  trules     transfer rules file" << endl;
+  cerr << "  preproc    result of preprocess trules file" << endl;
+  cerr << "  biltrans   bilingual letter transducer file" << endl;
+  cerr << "  input      input file, standard input by default" << endl;
+  cerr << "  output     output file, standard output by default" << endl;
+  cerr << "  -b         input from lexical transfer" << endl;
+  cerr << "  -n         don't use bilingual dictionary" << endl;
+  cerr << "  -x bindix  extended mode with user dictionary" << endl;
+  cerr << "  -c         case-sensitiveness while accessing bilingual dictionary" << endl;
+  cerr << "  -t         trace (show rule numbers and patterns matched)" << endl;
+  cerr << "  -T         trace, for apertium-transfer-tools (also sets -t)" << endl;
+  cerr << "  -z         null-flushing output on '\0'" << endl;
+  cerr << "  -h         shows this message" << endl;
+  
+
+  exit(EXIT_FAILURE);
+}
+
+void testfile(string const &filename)
+{
+  struct stat mybuf;
+  if(stat(filename.c_str(), &mybuf) == -1)
+  {
+    cerr << "Error: can't stat file '";
+    cerr << filename << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+}
+
+FILE * open_input(string const &filename)
+{
+  FILE *input = fopen(filename.c_str(), "r");
+  if(!input)
+  {
+    cerr << "Error: can't open input file '";
+    cerr << filename.c_str() << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  
+  return input;
+}  
+
+FILE * open_output(string const &filename)
+{
+  FILE *output = fopen(filename.c_str(), "w");
+  if(!output)
+  {
+    cerr << "Error: can't open output file '";
+    cerr << filename.c_str() << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  return output;
+}
+
+int main(int argc, char *argv[])
+{
+  LtLocale::tryToSetLocale();
+ 
+  Transfer t;
+
+  int option_index=0;
+
+  while (true) {
+    static struct option long_options[] =
+    {
+      {"from-bilingual",      no_argument, 0, 'b'},
+      {"no-bilingual",      no_argument, 0, 'n'},
+      {"extended",      required_argument, 0, 'x'},
+      {"case-sensitive", no_argument, 0, 'c'},
+      {"null-flush", no_argument, 0, 'z'},
+      {"trace", no_argument, 0, 't'},
+      {"trace_att", no_argument, 0, 'T'},
+      {"help", no_argument, 0, 'h'},
+      {0, 0, 0, 0}
+    };
+
+    int c=getopt_long(argc, argv, "nbx:cztTh", long_options, &option_index);
+    if (c==-1)
+      break;
+      
+    switch (c)
+    {
+      case 'b':
+        t.setPreBilingual(true);
+        t.setUseBilingual(false);
+        break;
+
+      case 'n':
+        t.setUseBilingual(false);
+        break;
+        
+      case 'x':
+        t.setExtendedDictionary(optarg);
+        break;
+        
+      case 'c':
+        t.setCaseSensitiveness(true);
+        break;
+      
+      case 't':
+        t.setTrace(true);
+        break;
+      
+      case 'T':
+        t.setTrace(true);
+        t.setTraceATT(true);
+        break;
+      
+      case 'z':
+        t.setNullFlush(true);
+        break;
+
+      case 'h':
+      default:
+        message(argv[0]);
+        break;
+    }    
+  }
+
+  FILE *input = stdin, *output = stdout;
+
+  switch(argc - optind + 1)
+  {
+    case 6:
+      output = open_output(argv[argc-1]);
+      input = open_input(argv[argc-2]);
+      testfile(argv[argc-3]);
+      testfile(argv[argc-4]);
+      testfile(argv[argc-5]);
+      t.read(argv[argc-5], argv[argc-4], argv[argc-3]);
+      break;
+      
+    case 5:
+      if(t.getUseBilingual() == false || t.getPreBilingual() == true)
+      {
+        output = open_output(argv[argc-1]);
+        input = open_input(argv[argc-2]);
+        testfile(argv[argc-3]);
+        testfile(argv[argc-4]);
+        t.read(argv[argc-4], argv[argc-3]);
+      }
+      else
+      {
+        input = open_input(argv[argc-1]);
+        testfile(argv[argc-2]);
+        testfile(argv[argc-3]);
+        testfile(argv[argc-4]);
+        t.read(argv[argc-4], argv[argc-3], argv[argc-2]);
+      }
+      break;
+      
+    case 4:
+      if(t.getUseBilingual() == false || t.getPreBilingual() == true)
+      {
+        input = open_input(argv[argc-1]);
+        testfile(argv[argc-2]);
+        testfile(argv[argc-3]);
+        t.read(argv[argc-3], argv[argc-2]);
+      }
+      else
+      {
+        testfile(argv[argc-1]);
+        testfile(argv[argc-2]);
+        testfile(argv[argc-3]);
+        t.read(argv[argc-3], argv[argc-2], argv[argc-1]);
+      }
+      break;
+    case 3:
+      if(t.getUseBilingual() == false || t.getPreBilingual() == true)
+      {
+        testfile(argv[argc-1]);
+        testfile(argv[argc-2]);
+        t.read(argv[argc-2], argv[argc-1]);
+      }
+      else
+      {
+        message(argv[0]);
+      }
+      break;
+    
+    default:
+      message(argv[0]);
+      break;
+  }  
+  
+#ifdef _MSC_VER
+  _setmode(_fileno(input), _O_U8TEXT);
+  _setmode(_fileno(output), _O_U8TEXT);
+#endif 
+
+  t.transfer(input, output);
+  return EXIT_SUCCESS; 
+}
Index: branches/apertium-tagger/apertium2/apertium/getopt_long.c
===================================================================
--- branches/apertium-tagger/apertium2/apertium/getopt_long.c	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/getopt_long.c	(revision 69632)
@@ -0,0 +1,1236 @@
+/*
+ * THIS IS NOT A CLEAN COPY OF GETOPT.C AND GETOPT1.C 
+ *
+ * Implementation of getopt_long, cobbled together from getopt.c and
+ * getopt1.c from the GNU binutils distribution.  This is more-or-less
+ * getopt.c inserted into getopt1.c, with the definition of getopt()
+ * commented out.
+ *
+ * Need to ifdef out optarg, optind, opterr, optopt, to handle the
+ * case where these are already defined for the benefit of system
+ * getopt()
+ *
+ * No, it's not pretty.
+ */
+
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+   Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98
+     Free Software Foundation, Inc.
+
+   NOTE: This source is derived from an old version taken from the GNU C
+   Library (glibc).
+
+   This program is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 2, or (at your option) any
+   later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+   USA.  */
+
+#include <apertium_config.h>
+
+#ifndef HAVE_GETOPT_LONG
+/* We shouldn't be compiling this module in this case, but we clearly
+   are (damned configuration tools!), so avoid messing up. */
+
+#include "getopt_long.h"
+/* See getopt_long.h for discussion of THIS_IS__STDC__ */
+
+
+#if !defined THIS_IS__STDC__ || !THIS_IS__STDC__
+/* This is a separate conditional since some stdc systems
+   reject `defined (const)'.  */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+
+
+/* ******************** getopt.c ******************** */
+/* Getopt for GNU.
+   NOTE: getopt is now part of the C library, so if you don't know what
+   "Keep this file name-space clean" means, talk to drepper@gnu.org
+   before changing it!
+
+   Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98
+   	Free Software Foundation, Inc.
+
+   NOTE: This source is derived from an old version taken from the GNU C
+   Library (glibc).
+
+   This program is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 2, or (at your option) any
+   later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+   USA.  */
+
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+   Ditto for AIX 3.2 and <stdlib.h>.  */
+#ifndef _NO_PROTO
+# define _NO_PROTO
+#endif
+
+
+#if !defined THIS_IS__STDC__ || !THIS_IS__STDC__
+/* This is a separate conditional since some stdc systems
+   reject `defined (const)'.  */
+# ifndef const
+#  define const
+# endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+# include <gnu-versions.h>
+# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#  define ELIDE_CODE
+# endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef	__GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+   contain conflicting prototypes for getopt.  */
+# include <stdlib.h>
+# include <unistd.h>
+#endif	/* GNU C library.  */
+
+#ifdef VMS
+# include <unixlib.h>
+# if HAVE_STRING_H - 0
+#  include <string.h>
+# endif
+#endif
+
+#ifndef _
+/* This is for other GNU distributions with internationalized messages.
+   When compiling libc, the _ macro is predefined.  */
+# ifdef HAVE_LIBINTL_H
+#  include <libintl.h>
+#  define _(msgid)	gettext (msgid)
+# else
+#  define _(msgid)	(msgid)
+# endif
+#endif
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+   but it behaves differently for the user, since it allows the user
+   to intersperse the options with the other arguments.
+
+   As `getopt' works, it permutes the elements of ARGV so that,
+   when it is done, all the options precede everything else.  Thus
+   all application programs are extended to handle flexible argument order.
+
+   Setting the environment variable POSIXLY_CORRECT disables permutation.
+   Then the behavior is completely standard.
+
+   GNU application programs can use a third alternative mode in which
+   they can distinguish the relative order of options and other arguments.  */
+
+
+
+/* Define HAVE_GETOPT if the getopt function (and thus, which is more
+ * important to us, the getopt globals, optarg, optind, opterr and
+ * optopt) is defined by the system.  Leave undefined if they should be
+ * defined here instead.
+ */
+#ifndef HAVE_GETOPT
+
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+char *optarg = NULL;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns -1, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+/* 1003.2 says this must be 1 before any call.  */
+int optind = 1;
+
+/* Callers store zero here to inhibit the error message
+   for unrecognized options.  */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+   This must be initialized on some systems to avoid linking in the
+   system's own getopt implementation.  */
+
+int optopt = '?';
+
+#endif /* #ifndef HAVE_GETOPT */
+
+/* Formerly, initialization of getopt depended on optind==0, which
+   causes problems with re-calling getopt as programs generally don't
+   know that. */
+
+int __getopt_initialized = 0;
+
+/* The next char to be scanned in the option-element
+   in which the last option character we returned was found.
+   This allows us to pick up the scan where we left off.
+
+   If this is zero, or a null string, it means resume the scan
+   by advancing to the next ARGV-element.  */
+
+static char *nextchar;
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+   If the caller did not specify anything,
+   the default is REQUIRE_ORDER if the environment variable
+   POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+   REQUIRE_ORDER means don't recognize them as options;
+   stop option processing when the first non-option is seen.
+   This is what Unix does.
+   This mode of operation is selected by either setting the environment
+   variable POSIXLY_CORRECT, or using `+' as the first character
+   of the list of option characters.
+
+   PERMUTE is the default.  We permute the contents of ARGV as we scan,
+   so that eventually all the non-options are at the end.  This allows options
+   to be given in any order, even with programs that were not written to
+   expect this.
+
+   RETURN_IN_ORDER is an option available to programs that were written
+   to expect options and other ARGV-elements in any order and that care about
+   the ordering of the two.  We describe each non-option ARGV-element
+   as if it were the argument of an option with character code 1.
+   Using `-' as the first character of the list of option characters
+   selects this mode of operation.
+
+   The special argument `--' forces an end of option-scanning regardless
+   of the value of `ordering'.  In the case of RETURN_IN_ORDER, only
+   `--' can cause `getopt' to return -1 with `optind' != ARGC.  */
+
+static enum
+{
+  REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+/* Value of POSIXLY_CORRECT environment variable.  */
+static char *posixly_correct;
+
+#ifdef	__GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+   because there are many ways it can cause trouble.
+   On some systems, it contains special magic macros that don't work
+   in GCC.  */
+# include <string.h>
+# define my_index	strchr
+#else
+
+# if HAVE_STRING_H
+#  include <string.h>
+# else
+#  if HAVE_STRINGS_H
+#   include <strings.h>
+#  endif
+# endif
+
+/* Avoid depending on library functions or files
+   whose names are inconsistent.  */
+
+#ifndef getenv
+extern char *getenv ();
+#endif
+
+static char *
+my_index (str, chr)
+     const char *str;
+     int chr;
+{
+  while (*str)
+    {
+      if (*str == chr)
+	return (char *) str;
+      str++;
+    }
+  return 0;
+}
+
+/* If using GCC, we can safely declare strlen this way.
+   If not using GCC, it is ok not to declare it.  */
+#ifdef __GNUC__
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+   That was relevant to code that was here before.  */
+# if (!defined THIS_IS__STDC__ || !THIS_IS__STDC__) && !defined strlen
+/* gcc with -traditional declares the built-in strlen to return int,
+   and has done so at least since version 2.4.5. -- rms.  */
+extern int strlen (const char *);
+# endif /* not THIS_IS__STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
+
+/* Handle permutation of arguments.  */
+
+/* Describe the part of ARGV that contains non-options that have
+   been skipped.  `first_nonopt' is the index in ARGV of the first of them;
+   `last_nonopt' is the index after the last of them.  */
+
+static int first_nonopt;
+static int last_nonopt;
+
+#ifdef _LIBC
+/* Bash 2.0 gives us an environment variable containing flags
+   indicating ARGV elements that should not be considered arguments.  */
+
+/* Defined in getopt_init.c  */
+extern char *__getopt_nonoption_flags;
+
+static int nonoption_flags_max_len;
+static int nonoption_flags_len;
+
+static int original_argc;
+static char *const *original_argv;
+
+/* Make sure the environment variable bash 2.0 puts in the environment
+   is valid for the getopt call we must make sure that the ARGV passed
+   to getopt is that one passed to the process.  */
+static void
+__attribute__ ((unused))
+store_args_and_env (int argc, char *const *argv)
+{
+  /* XXX This is no good solution.  We should rather copy the args so
+     that we can compare them later.  But we must not use malloc(3).  */
+  original_argc = argc;
+  original_argv = argv;
+}
+# ifdef text_set_element
+text_set_element (__libc_subinit, store_args_and_env);
+# endif /* text_set_element */
+
+# define SWAP_FLAGS(ch1, ch2) \
+  if (nonoption_flags_len > 0)						      \
+    {									      \
+      char __tmp = __getopt_nonoption_flags[ch1];			      \
+      __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2];	      \
+      __getopt_nonoption_flags[ch2] = __tmp;				      \
+    }
+#else	/* !_LIBC */
+# define SWAP_FLAGS(ch1, ch2)
+#endif	/* _LIBC */
+
+/* Exchange two adjacent subsequences of ARGV.
+   One subsequence is elements [first_nonopt,last_nonopt)
+   which contains all the non-options that have been skipped so far.
+   The other is elements [last_nonopt,optind), which contains all
+   the options processed since those non-options were skipped.
+
+   `first_nonopt' and `last_nonopt' are relocated so that they describe
+   the new indices of the non-options in ARGV after they are moved.  */
+
+#if defined THIS_IS__STDC__ && THIS_IS__STDC__
+static void exchange (char **);
+#endif
+
+static void
+exchange (argv)
+     char **argv;
+{
+  int bottom = first_nonopt;
+  int middle = last_nonopt;
+  int top = optind;
+  char *tem;
+
+  /* Exchange the shorter segment with the far end of the longer segment.
+     That puts the shorter segment into the right place.
+     It leaves the longer segment in the right place overall,
+     but it consists of two parts that need to be swapped next.  */
+
+#ifdef _LIBC
+  /* First make sure the handling of the `__getopt_nonoption_flags'
+     string can work normally.  Our top argument must be in the range
+     of the string.  */
+  if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len)
+    {
+      /* We must extend the array.  The user plays games with us and
+	 presents new arguments.  */
+      char *new_str = malloc (top + 1);
+      if (new_str == NULL)
+	nonoption_flags_len = nonoption_flags_max_len = 0;
+      else
+	{
+	  memset (__mempcpy (new_str, __getopt_nonoption_flags,
+			     nonoption_flags_max_len),
+		  '\0', top + 1 - nonoption_flags_max_len);
+	  nonoption_flags_max_len = top + 1;
+	  __getopt_nonoption_flags = new_str;
+	}
+    }
+#endif
+
+  while (top > middle && middle > bottom)
+    {
+      if (top - middle > middle - bottom)
+	{
+	  /* Bottom segment is the short one.  */
+	  int len = middle - bottom;
+	  register int i;
+
+	  /* Swap it with the top part of the top segment.  */
+	  for (i = 0; i < len; i++)
+	    {
+	      tem = argv[bottom + i];
+	      argv[bottom + i] = argv[top - (middle - bottom) + i];
+	      argv[top - (middle - bottom) + i] = tem;
+	      SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
+	    }
+	  /* Exclude the moved bottom segment from further swapping.  */
+	  top -= len;
+	}
+      else
+	{
+	  /* Top segment is the short one.  */
+	  int len = top - middle;
+	  register int i;
+
+	  /* Swap it with the bottom part of the bottom segment.  */
+	  for (i = 0; i < len; i++)
+	    {
+	      tem = argv[bottom + i];
+	      argv[bottom + i] = argv[middle + i];
+	      argv[middle + i] = tem;
+	      SWAP_FLAGS (bottom + i, middle + i);
+	    }
+	  /* Exclude the moved top segment from further swapping.  */
+	  bottom += len;
+	}
+    }
+
+  /* Update records for the slots the non-options now occupy.  */
+
+  first_nonopt += (optind - last_nonopt);
+  last_nonopt = optind;
+}
+
+/* Initialize the internal data when the first call is made.  */
+
+#if defined THIS_IS__STDC__ && THIS_IS__STDC__
+static const char *_getopt_initialize (int, char *const *, const char *);
+#endif
+static const char *
+_getopt_initialize (argc, argv, optstring)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+{
+  /* Start processing options with ARGV-element 1 (since ARGV-element 0
+     is the program name); the sequence of previously skipped
+     non-option ARGV-elements is empty.  */
+
+  first_nonopt = last_nonopt = optind;
+
+  nextchar = NULL;
+
+  posixly_correct = getenv ("POSIXLY_CORRECT");
+
+  /* Determine how to handle the ordering of options and nonoptions.  */
+
+  if (optstring[0] == '-')
+    {
+      ordering = RETURN_IN_ORDER;
+      ++optstring;
+    }
+  else if (optstring[0] == '+')
+    {
+      ordering = REQUIRE_ORDER;
+      ++optstring;
+    }
+  else if (posixly_correct != NULL)
+    ordering = REQUIRE_ORDER;
+  else
+    ordering = PERMUTE;
+
+#ifdef _LIBC
+  if (posixly_correct == NULL
+      && argc == original_argc && argv == original_argv)
+    {
+      if (nonoption_flags_max_len == 0)
+	{
+	  if (__getopt_nonoption_flags == NULL
+	      || __getopt_nonoption_flags[0] == '\0')
+	    nonoption_flags_max_len = -1;
+	  else
+	    {
+	      const char *orig_str = __getopt_nonoption_flags;
+	      int len = nonoption_flags_max_len = strlen (orig_str);
+	      if (nonoption_flags_max_len < argc)
+		nonoption_flags_max_len = argc;
+	      __getopt_nonoption_flags =
+		(char *) malloc (nonoption_flags_max_len);
+	      if (__getopt_nonoption_flags == NULL)
+		nonoption_flags_max_len = -1;
+	      else
+		memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
+			'\0', nonoption_flags_max_len - len);
+	    }
+	}
+      nonoption_flags_len = nonoption_flags_max_len;
+    }
+  else
+    nonoption_flags_len = 0;
+#endif
+
+  return optstring;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+   given in OPTSTRING.
+
+   If an element of ARGV starts with '-', and is not exactly "-" or "--",
+   then it is an option element.  The characters of this element
+   (aside from the initial '-') are option characters.  If `getopt'
+   is called repeatedly, it returns successively each of the option characters
+   from each of the option elements.
+
+   If `getopt' finds another option character, it returns that character,
+   updating `optind' and `nextchar' so that the next call to `getopt' can
+   resume the scan with the following option character or ARGV-element.
+
+   If there are no more option characters, `getopt' returns -1.
+   Then `optind' is the index in ARGV of the first ARGV-element
+   that is not an option.  (The ARGV-elements have been permuted
+   so that those that are not options now come last.)
+
+   OPTSTRING is a string containing the legitimate option characters.
+   If an option character is seen that is not listed in OPTSTRING,
+   return '?' after printing an error message.  If you set `opterr' to
+   zero, the error message is suppressed but we still return '?'.
+
+   If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+   so the following text in the same ARGV-element, or the text of the following
+   ARGV-element, is returned in `optarg'.  Two colons mean an option that
+   wants an optional arg; if there is text in the current ARGV-element,
+   it is returned in `optarg', otherwise `optarg' is set to zero.
+
+   If OPTSTRING starts with `-' or `+', it requests different methods of
+   handling the non-option ARGV-elements.
+   See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+   Long-named options begin with `--' instead of `-'.
+   Their names may be abbreviated as long as the abbreviation is unique
+   or is an exact match for some defined option.  If they have an
+   argument, it follows the option name in the same ARGV-element, separated
+   from the option name by a `=', or else the in next ARGV-element.
+   When `getopt' finds a long-named option, it returns 0 if that option's
+   `flag' field is nonzero, the value of the option's `val' field
+   if the `flag' field is zero.
+
+   The elements of ARGV aren't really const, because we permute them.
+   But we pretend they're const in the prototype to be compatible
+   with other systems.
+
+   LONGOPTS is a vector of `struct option' terminated by an
+   element containing a name which is zero.
+
+   LONGIND returns the index in LONGOPT of the long-named option found.
+   It is only valid when a long-named option has been found by the most
+   recent call.
+
+   If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+   long-named options.  */
+
+#if 0
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+     const struct option *longopts;
+     int *longind;
+     int long_only;
+#endif
+int
+_getopt_internal (int argc,
+	char *const *argv,
+	const char *optstring,
+	const struct option *longopts,
+	int *longind,
+	int long_only)
+{
+  optarg = NULL;
+
+  if (optind == 0 || !__getopt_initialized)
+    {
+      if (optind == 0)
+	optind = 1;	/* Don't scan ARGV[0], the program name.  */
+      optstring = _getopt_initialize (argc, argv, optstring);
+      __getopt_initialized = 1;
+    }
+
+  /* Test whether ARGV[optind] points to a non-option argument.
+     Either it does not have option syntax, or there is an environment flag
+     from the shell indicating it is not an option.  The later information
+     is only used when the used in the GNU libc.  */
+#ifdef _LIBC
+# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0'	      \
+		      || (optind < nonoption_flags_len			      \
+			  && __getopt_nonoption_flags[optind] == '1'))
+#else
+# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
+#endif
+
+  if (nextchar == NULL || *nextchar == '\0')
+    {
+      /* Advance to the next ARGV-element.  */
+
+      /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
+	 moved back by the user (who may also have changed the arguments).  */
+      if (last_nonopt > optind)
+	last_nonopt = optind;
+      if (first_nonopt > optind)
+	first_nonopt = optind;
+
+      if (ordering == PERMUTE)
+	{
+	  /* If we have just processed some options following some non-options,
+	     exchange them so that the options come first.  */
+
+	  if (first_nonopt != last_nonopt && last_nonopt != optind)
+	    exchange ((char **) argv);
+	  else if (last_nonopt != optind)
+	    first_nonopt = optind;
+
+	  /* Skip any additional non-options
+	     and extend the range of non-options previously skipped.  */
+
+	  while (optind < argc && NONOPTION_P)
+	    optind++;
+	  last_nonopt = optind;
+	}
+
+      /* The special ARGV-element `--' means premature end of options.
+	 Skip it like a null option,
+	 then exchange with previous non-options as if it were an option,
+	 then skip everything else like a non-option.  */
+
+      if (optind != argc && !strcmp (argv[optind], "--"))
+	{
+	  optind++;
+
+	  if (first_nonopt != last_nonopt && last_nonopt != optind)
+	    exchange ((char **) argv);
+	  else if (first_nonopt == last_nonopt)
+	    first_nonopt = optind;
+	  last_nonopt = argc;
+
+	  optind = argc;
+	}
+
+      /* If we have done all the ARGV-elements, stop the scan
+	 and back over any non-options that we skipped and permuted.  */
+
+      if (optind == argc)
+	{
+	  /* Set the next-arg-index to point at the non-options
+	     that we previously skipped, so the caller will digest them.  */
+	  if (first_nonopt != last_nonopt)
+	    optind = first_nonopt;
+	  return -1;
+	}
+
+      /* If we have come to a non-option and did not permute it,
+	 either stop the scan or describe it to the caller and pass it by.  */
+
+      if (NONOPTION_P)
+	{
+	  if (ordering == REQUIRE_ORDER)
+	    return -1;
+	  optarg = argv[optind++];
+	  return 1;
+	}
+
+      /* We have found another option-ARGV-element.
+	 Skip the initial punctuation.  */
+
+      nextchar = (argv[optind] + 1
+		  + (longopts != NULL && argv[optind][1] == '-'));
+    }
+
+  /* Decode the current option-ARGV-element.  */
+
+  /* Check whether the ARGV-element is a long option.
+
+     If long_only and the ARGV-element has the form "-f", where f is
+     a valid short option, don't consider it an abbreviated form of
+     a long option that starts with f.  Otherwise there would be no
+     way to give the -f short option.
+
+     On the other hand, if there's a long option "fubar" and
+     the ARGV-element is "-fu", do consider that an abbreviation of
+     the long option, just like "--fu", and not "-f" with arg "u".
+
+     This distinction seems to be the most useful approach.  */
+
+  if (longopts != NULL
+      && (argv[optind][1] == '-'
+	  || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
+    {
+      char *nameend;
+      const struct option *p;
+      const struct option *pfound = NULL;
+      int exact = 0;
+      int ambig = 0;
+      int indfound = -1;
+      int option_index;
+
+      for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+	/* Do nothing.  */ ;
+
+      /* Test all long options for either exact match
+	 or abbreviated matches.  */
+      for (p = longopts, option_index = 0; p->name; p++, option_index++)
+	if (!strncmp (p->name, nextchar, nameend - nextchar))
+	  {
+	    if ((unsigned int) (nameend - nextchar)
+		== (unsigned int) strlen (p->name))
+	      {
+		/* Exact match found.  */
+		pfound = p;
+		indfound = option_index;
+		exact = 1;
+		break;
+	      }
+	    else if (pfound == NULL)
+	      {
+		/* First nonexact match found.  */
+		pfound = p;
+		indfound = option_index;
+	      }
+	    else
+	      /* Second or later nonexact match found.  */
+	      ambig = 1;
+	  }
+
+      if (ambig && !exact)
+	{
+	  if (opterr)
+	    fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
+		     argv[0], argv[optind]);
+	  nextchar += strlen (nextchar);
+	  optind++;
+	  optopt = 0;
+	  return '?';
+	}
+
+      if (pfound != NULL)
+	{
+	  option_index = indfound;
+	  optind++;
+	  if (*nameend)
+	    {
+	      /* Don't test has_arg with >, because some C compilers don't
+		 allow it to be used on enums.  */
+	      if (pfound->has_arg)
+		optarg = nameend + 1;
+	      else
+		{
+		  if (opterr)
+		    {
+		      if (argv[optind - 1][1] == '-')
+			/* --option */
+			fprintf (stderr,
+				 _("%s: option `--%s' doesn't allow an argument\n"),
+				 argv[0], pfound->name);
+		      else
+			/* +option or -option */
+			fprintf (stderr,
+				 _("%s: option `%c%s' doesn't allow an argument\n"),
+				 argv[0], argv[optind - 1][0], pfound->name);
+
+		      nextchar += strlen (nextchar);
+
+		      optopt = pfound->val;
+		      return '?';
+		    }
+		}
+	    }
+	  else if (pfound->has_arg == 1)
+	    {
+	      if (optind < argc)
+		optarg = argv[optind++];
+	      else
+		{
+		  if (opterr)
+		    fprintf (stderr,
+			   _("%s: option `%s' requires an argument\n"),
+			   argv[0], argv[optind - 1]);
+		  nextchar += strlen (nextchar);
+		  optopt = pfound->val;
+		  return optstring[0] == ':' ? ':' : '?';
+		}
+	    }
+	  nextchar += strlen (nextchar);
+	  if (longind != NULL)
+	    *longind = option_index;
+	  if (pfound->flag)
+	    {
+	      *(pfound->flag) = pfound->val;
+	      return 0;
+	    }
+	  return pfound->val;
+	}
+
+      /* Can't find it as a long option.  If this is not getopt_long_only,
+	 or the option starts with '--' or is not a valid short
+	 option, then it's an error.
+	 Otherwise interpret it as a short option.  */
+      if (!long_only || argv[optind][1] == '-'
+	  || my_index (optstring, *nextchar) == NULL)
+	{
+	  if (opterr)
+	    {
+	      if (argv[optind][1] == '-')
+		/* --option */
+		fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
+			 argv[0], nextchar);
+	      else
+		/* +option or -option */
+		fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
+			 argv[0], argv[optind][0], nextchar);
+	    }
+	  nextchar = (char *) "";
+	  optind++;
+	  optopt = 0;
+	  return '?';
+	}
+    }
+
+  /* Look at and handle the next short option-character.  */
+
+  {
+    char c = *nextchar++;
+    char *temp = my_index (optstring, c);
+
+    /* Increment `optind' when we start to process its last character.  */
+    if (*nextchar == '\0')
+      ++optind;
+
+    if (temp == NULL || c == ':')
+      {
+	if (opterr)
+	  {
+	    if (posixly_correct)
+	      /* 1003.2 specifies the format of this message.  */
+	      fprintf (stderr, _("%s: illegal option -- %c\n"),
+		       argv[0], c);
+	    else
+	      fprintf (stderr, _("%s: invalid option -- %c\n"),
+		       argv[0], c);
+	  }
+	optopt = c;
+	return '?';
+      }
+    /* Convenience. Treat POSIX -W foo same as long option --foo */
+    if (temp[0] == 'W' && temp[1] == ';')
+      {
+	char *nameend;
+	const struct option *p;
+	const struct option *pfound = NULL;
+	int exact = 0;
+	int ambig = 0;
+	int indfound = 0;
+	int option_index;
+
+	/* This is an option that requires an argument.  */
+	if (*nextchar != '\0')
+	  {
+	    optarg = nextchar;
+	    /* If we end this ARGV-element by taking the rest as an arg,
+	       we must advance to the next element now.  */
+	    optind++;
+	  }
+	else if (optind == argc)
+	  {
+	    if (opterr)
+	      {
+		/* 1003.2 specifies the format of this message.  */
+		fprintf (stderr, _("%s: option requires an argument -- %c\n"),
+			 argv[0], c);
+	      }
+	    optopt = c;
+	    if (optstring[0] == ':')
+	      c = ':';
+	    else
+	      c = '?';
+	    return c;
+	  }
+	else
+	  /* We already incremented `optind' once;
+	     increment it again when taking next ARGV-elt as argument.  */
+	  optarg = argv[optind++];
+
+	/* optarg is now the argument, see if it's in the
+	   table of longopts.  */
+
+	for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
+	  /* Do nothing.  */ ;
+
+	/* Test all long options for either exact match
+	   or abbreviated matches.  */
+	for (p = longopts, option_index = 0; p->name; p++, option_index++)
+	  if (!strncmp (p->name, nextchar, nameend - nextchar))
+	    {
+	      if ((unsigned int) (nameend - nextchar) == strlen (p->name))
+		{
+		  /* Exact match found.  */
+		  pfound = p;
+		  indfound = option_index;
+		  exact = 1;
+		  break;
+		}
+	      else if (pfound == NULL)
+		{
+		  /* First nonexact match found.  */
+		  pfound = p;
+		  indfound = option_index;
+		}
+	      else
+		/* Second or later nonexact match found.  */
+		ambig = 1;
+	    }
+	if (ambig && !exact)
+	  {
+	    if (opterr)
+	      fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
+		       argv[0], argv[optind]);
+	    nextchar += strlen (nextchar);
+	    optind++;
+	    return '?';
+	  }
+	if (pfound != NULL)
+	  {
+	    option_index = indfound;
+	    if (*nameend)
+	      {
+		/* Don't test has_arg with >, because some C compilers don't
+		   allow it to be used on enums.  */
+		if (pfound->has_arg)
+		  optarg = nameend + 1;
+		else
+		  {
+		    if (opterr)
+		      fprintf (stderr, _("\
+%s: option `-W %s' doesn't allow an argument\n"),
+			       argv[0], pfound->name);
+
+		    nextchar += strlen (nextchar);
+		    return '?';
+		  }
+	      }
+	    else if (pfound->has_arg == 1)
+	      {
+		if (optind < argc)
+		  optarg = argv[optind++];
+		else
+		  {
+		    if (opterr)
+		      fprintf (stderr,
+			       _("%s: option `%s' requires an argument\n"),
+			       argv[0], argv[optind - 1]);
+		    nextchar += strlen (nextchar);
+		    return optstring[0] == ':' ? ':' : '?';
+		  }
+	      }
+	    nextchar += strlen (nextchar);
+	    if (longind != NULL)
+	      *longind = option_index;
+	    if (pfound->flag)
+	      {
+		*(pfound->flag) = pfound->val;
+		return 0;
+	      }
+	    return pfound->val;
+	  }
+	  nextchar = NULL;
+	  return 'W';	/* Let the application handle it.   */
+      }
+    if (temp[1] == ':')
+      {
+	if (temp[2] == ':')
+	  {
+	    /* This is an option that accepts an argument optionally.  */
+	    if (*nextchar != '\0')
+	      {
+		optarg = nextchar;
+		optind++;
+	      }
+	    else
+	      optarg = NULL;
+	    nextchar = NULL;
+	  }
+	else
+	  {
+	    /* This is an option that requires an argument.  */
+	    if (*nextchar != '\0')
+	      {
+		optarg = nextchar;
+		/* If we end this ARGV-element by taking the rest as an arg,
+		   we must advance to the next element now.  */
+		optind++;
+	      }
+	    else if (optind == argc)
+	      {
+		if (opterr)
+		  {
+		    /* 1003.2 specifies the format of this message.  */
+		    fprintf (stderr,
+			   _("%s: option requires an argument -- %c\n"),
+			   argv[0], c);
+		  }
+		optopt = c;
+		if (optstring[0] == ':')
+		  c = ':';
+		else
+		  c = '?';
+	      }
+	    else
+	      /* We already incremented `optind' once;
+		 increment it again when taking next ARGV-elt as argument.  */
+	      optarg = argv[optind++];
+	    nextchar = NULL;
+	  }
+      }
+    return c;
+  }
+}
+
+/*
+int
+getopt (argc, argv, optstring)
+     int argc;
+     char *const *argv;
+     const char *optstring;
+{
+  return _getopt_internal (argc, argv, optstring,
+			   (const struct option *) 0,
+			   (int *) 0,
+			   0);
+}
+*/
+
+#endif	/* Not ELIDE_CODE.  */
+/* ******************** ...getopt.c ******************** */
+
+
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+   actually compiling the library itself.  This code is part of the GNU C
+   Library, but also included in many other GNU distributions.  Compiling
+   and linking in this code is a waste when using the GNU C library
+   (especially if it is a shared library).  Rather than having every GNU
+   program understand `configure --with-gnu-libc' and omit the object files,
+   it is simpler to just do this in the source for each such file.  */
+
+#define GETOPT_INTERFACE_VERSION 2
+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
+#include <gnu-versions.h>
+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
+#define ELIDE_CODE
+#endif
+#endif
+
+#ifndef ELIDE_CODE
+
+
+/* This needs to come after some library #include
+   to get __GNU_LIBRARY__ defined.  */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#endif
+
+#ifndef	NULL
+#define NULL 0
+#endif
+
+/* K&R declarations!?  C'mon... */
+/* Just say no to all this gymnastics */
+#if 0
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+     int argc;
+     char *const *argv;
+     const char *options;
+     const struct option *long_options;
+     int *opt_index;
+#endif
+int getopt_long (int argc,
+	char *const *argv,
+	const char *options,
+	const struct option *long_options,
+	int *opt_index)
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+   If an option that starts with '-' (not '--') doesn't match a long option,
+   but does match a short option, it is parsed as a short option
+   instead.  */
+
+#if 0
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+     int argc;
+     char *const *argv;
+     const char *options;
+     const struct option *long_options;
+     int *opt_index;
+#endif
+int
+getopt_long_only (int argc,
+	char *const *argv,
+	const char *options,
+	const struct option *long_options,
+	int *opt_index)
+{
+  return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+
+#endif	/* Not ELIDE_CODE.  */
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  int c;
+  int digit_optind = 0;
+
+  while (1)
+    {
+      int this_option_optind = optind ? optind : 1;
+      int option_index = 0;
+      static struct option long_options[] =
+      {
+	{"add", 1, 0, 0},
+	{"append", 0, 0, 0},
+	{"delete", 1, 0, 0},
+	{"verbose", 0, 0, 0},
+	{"create", 0, 0, 0},
+	{"file", 1, 0, 0},
+	{0, 0, 0, 0}
+      };
+
+      c = getopt_long (argc, argv, "abc:d:0123456789",
+		       long_options, &option_index);
+      if (c == -1)
+	break;
+
+      switch (c)
+	{
+	case 0:
+	  printf ("option %s", long_options[option_index].name);
+	  if (optarg)
+	    printf (" with arg %s", optarg);
+	  printf ("\n");
+	  break;
+
+	case '0':
+	case '1':
+	case '2':
+	case '3':
+	case '4':
+	case '5':
+	case '6':
+	case '7':
+	case '8':
+	case '9':
+	  if (digit_optind != 0 && digit_optind != this_option_optind)
+	    printf ("digits occur in two different argv-elements.\n");
+	  digit_optind = this_option_optind;
+	  printf ("option %c\n", c);
+	  break;
+
+	case 'a':
+	  printf ("option a\n");
+	  break;
+
+	case 'b':
+	  printf ("option b\n");
+	  break;
+
+	case 'c':
+	  printf ("option c with value `%s'\n", optarg);
+	  break;
+
+	case 'd':
+	  printf ("option d with value `%s'\n", optarg);
+	  break;
+
+	case '?':
+	  break;
+
+	default:
+	  printf ("?? getopt returned character code 0%o ??\n", c);
+	}
+    }
+
+  if (optind < argc)
+    {
+      printf ("non-option ARGV-elements: ");
+      while (optind < argc)
+	printf ("%s ", argv[optind++]);
+      printf ("\n");
+    }
+
+  exit (0);
+}
+
+#endif /* TEST */
+
+#endif /* #ifndef HAVE_GETOPT_LONG */
Index: branches/apertium-tagger/apertium2/apertium/getopt_long.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/getopt_long.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/getopt_long.h	(revision 69632)
@@ -0,0 +1,175 @@
+/* Declarations for getopt.
+   Copyright 1989, 1990, 1991, 1992, 1993, 1994, 1996, 1997, 1998, 2000
+   Free Software Foundation, Inc.
+
+   NOTE: The canonical source of this file is maintained with the GNU C Library.
+   Bugs can be reported to bug-glibc@gnu.org.
+
+   This program is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 2, or (at your option) any
+   later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+   USA.  */
+
+#ifndef _GETOPT_LONG_H
+#define _GETOPT_LONG_H 1
+
+#include <apertium_config.h>
+
+#if HAVE_UNISTD_H
+/* Declares getopt, if present */
+#include <unistd.h>
+#endif
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* We're building this with a C++ compiler, essentially.  Such
+   compilers are not required to define __STDC__, but the path we
+   should follow, below, is indeed that marked by __STDC__.  We don't
+   want to force a definition of __STDC__ (though that works), because
+   (a) that feels bad, and (b) some compilers perfectly reasonable
+   complain bitterly about it.  So define THIS_IS__STDC__, and replace
+   occurrences of __STDC__ throughout with that.
+
+   That means that all of the occurrences of THIS_IS__STDC__ in this
+   file and in getopt_long.c are redundant, but I'm leaving them here
+   in case it becomes necessary to do cleverer things with it than
+   simply define it to be 1, and also as a sort of warped documentation. */
+#define THIS_IS__STDC__ 1
+
+#if !HAVE_DECL_GETOPT
+/* For communication from `getopt' to the caller.
+   When `getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `getopt'.
+
+   On entry to `getopt', zero means this is the first call; initialize.
+
+   When `getopt' returns -1, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+   for unrecognized options.  */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized.  */
+
+extern int optopt;
+
+#endif /* ifndef HAVE_DECL_GETOPT */
+
+#if !HAVE_DECL_GETOPT_LONG
+/* Describe the long-named options requested by the application.
+   The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+   of `struct option' terminated by an element containing a name which is
+   zero.
+
+   The field `has_arg' is:
+   no_argument		(or 0) if the option does not take an argument,
+   required_argument	(or 1) if the option requires an argument,
+   optional_argument 	(or 2) if the option takes an optional argument.
+
+   If the field `flag' is not NULL, it points to a variable that is set
+   to the value given in the field `val' when the option is found, but
+   left unchanged if the option is not found.
+
+   To have a long-named option do something other than set an `int' to
+   a compiled-in constant, such as set a value from `optarg', set the
+   option's `flag' field to zero and its `val' field to a nonzero
+   value (the equivalent single-letter option character, if there is
+   one).  For long options that have a zero `flag' field, `getopt'
+   returns the contents of the `val' field.  */
+
+struct option
+{
+#if defined (THIS_IS__STDC__) && THIS_IS__STDC__
+  const char *name;
+#else
+  char *name;
+#endif
+  /* has_arg can't be an enum because some compilers complain about
+     type mismatches in all the code that assumes it is an int.  */
+  int has_arg;
+  int *flag;
+  int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'.  */
+
+#define	no_argument		0
+#define required_argument	1
+#define optional_argument	2
+
+#endif /* #if !HAVE_DECL_GETOPT_LONG */
+
+#if defined (THIS_IS__STDC__) && THIS_IS__STDC__
+/* HAVE_DECL_* is a three-state macro: undefined, 0 or 1.  If it is
+   undefined, we haven't run the autoconf check so provide the
+   declaration without arguments.  If it is 0, we checked and failed
+   to find the declaration so provide a fully prototyped one.  If it
+   is 1, we found it so don't provide any declaration at all.  */
+#if defined (__GNU_LIBRARY__) || (defined (HAVE_DECL_GETOPT) && !HAVE_DECL_GETOPT)
+/* Many other libraries have conflicting prototypes for getopt, with
+   differences in the consts, in stdlib.h.  To avoid compilation
+   errors, only prototype getopt for the GNU C library.  */
+extern int getopt (int argc, char *const *argv, const char *shortopts);
+#else /* not __GNU_LIBRARY__ */
+# if !defined (HAVE_DECL_GETOPT)
+extern int getopt ();
+# endif
+#endif /* __GNU_LIBRARY__ */
+#if !HAVE_DECL_GETOPT_LONG
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
+		        const struct option *longopts, int *longind);
+extern int getopt_long_only (int argc, char *const *argv,
+			     const char *shortopts,
+		             const struct option *longopts, int *longind);
+
+/* Internal only.  Users should not call this directly.  */
+extern int _getopt_internal (int argc, char *const *argv,
+			     const char *shortopts,
+		             const struct option *longopts, int *longind,
+			     int long_only);
+#endif /* HAVE_DECL_GETOPT_LONG */
+#else /* not THIS_IS__STDC__ */
+#if !HAVE_DECL_GETOPT
+extern int getopt ();
+#endif /* HAVE_DECL_GETOPT */
+#if !HAVE_DECL_GETOPT_LONG
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+#endif /* HAVE_DECL_GETOPT_LONG */
+#endif /* THIS_IS__STDC__ */
+
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif /* getopt.h */
Index: branches/apertium-tagger/apertium2/apertium/win32/unistd.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/win32/unistd.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/win32/unistd.h	(revision 69632)
@@ -0,0 +1,13 @@
+// This should really be defined elsewhere
+#define YY_INPUT(buf,result,max_size) \
+	if ( (result = fread( (char *) buf, 1, max_size, yyin )) < 0 ) \
+		YY_FATAL_ERROR( "input in flex scanner failed" );
+
+#define fileno _fileno
+
+#if defined(_WIN32) && defined(isatty)
+#undef isatty
+#define isatty _isatty
+#endif
+
+#define unlink _unlink
Index: branches/apertium-tagger/apertium2/apertium/win32/snprintf.c
===================================================================
--- branches/apertium-tagger/apertium2/apertium/win32/snprintf.c	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/win32/snprintf.c	(revision 69632)
@@ -0,0 +1,1025 @@
+/*
+ * snprintf.c - a portable implementation of snprintf
+ *
+ * AUTHOR
+ *   Mark Martinec <mark.martinec@ijs.si>, April 1999.
+ *
+ *   Copyright 1999, Mark Martinec. All rights reserved.
+ *
+ * TERMS AND CONDITIONS
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the "Frontier Artistic License" which comes
+ *   with this Kit.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty
+ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *   See the Frontier Artistic License for more details.
+ *
+ *   You should have received a copy of the Frontier Artistic License
+ *   with this Kit in the file named LICENSE.txt .
+ *   If not, I'll be glad to provide one.
+ *
+ * FEATURES
+ * - careful adherence to specs regarding flags, field width and precision;
+ * - good performance for large string handling (large format, large
+ *   argument or large paddings). Performance is similar to system's sprintf
+ *   and in several cases significantly better (make sure you compile with
+ *   optimizations turned on, tell the compiler the code is strict ANSI
+ *   if necessary to give it more freedom for optimizations);
+ * - return value semantics per ISO/IEC 9899:1999 ("ISO C99");
+ * - written in standard ISO/ANSI C - requires an ANSI C compiler.
+ *
+ * SUPPORTED CONVERSION SPECIFIERS AND DATA TYPES
+ *
+ * This snprintf only supports the following conversion specifiers:
+ * s, c, d, u, o, x, X, p  (and synonyms: i, D, U, O - see below)
+ * with flags: '-', '+', ' ', '0' and '#'.
+ * An asterisk is supported for field width as well as precision.
+ *
+ * Length modifiers 'h' (short int), 'l' (long int),
+ * and 'll' (long long int) are supported.
+ * NOTE:
+ *   If macro SNPRINTF_LONGLONG_SUPPORT is not defined (default) the
+ *   length modifier 'll' is recognized but treated the same as 'l',
+ *   which may cause argument value truncation! Defining
+ *   SNPRINTF_LONGLONG_SUPPORT requires that your system's sprintf also
+ *   handles length modifier 'll'.  long long int is a language extension
+ *   which may not be portable.
+ *
+ * Conversion of numeric data (conversion specifiers d, u, o, x, X, p)
+ * with length modifiers (none or h, l, ll) is left to the system routine
+ * sprintf, but all handling of flags, field width and precision as well as
+ * c and s conversions is done very carefully by this portable routine.
+ * If a string precision (truncation) is specified (e.g. %.8s) it is
+ * guaranteed the string beyond the specified precision will not be referenced.
+ *
+ * Length modifiers h, l and ll are ignored for c and s conversions (data
+ * types wint_t and wchar_t are not supported).
+ *
+ * The following common synonyms for conversion characters are supported:
+ *   - i is a synonym for d
+ *   - D is a synonym for ld, explicit length modifiers are ignored
+ *   - U is a synonym for lu, explicit length modifiers are ignored
+ *   - O is a synonym for lo, explicit length modifiers are ignored
+ * The D, O and U conversion characters are nonstandard, they are supported
+ * for backward compatibility only, and should not be used for new code.
+ *
+ * The following is specifically NOT supported:
+ *   - flag ' (thousands' grouping character) is recognized but ignored
+ *   - numeric conversion specifiers: f, e, E, g, G and synonym F,
+ *     as well as the new a and A conversion specifiers
+ *   - length modifier 'L' (long double) and 'q' (quad - use 'll' instead)
+ *   - wide character/string conversions: lc, ls, and nonstandard
+ *     synonyms C and S
+ *   - writeback of converted string length: conversion character n
+ *   - the n$ specification for direct reference to n-th argument
+ *   - locales
+ *
+ * It is permitted for str_m to be zero, and it is permitted to specify NULL
+ * pointer for resulting string argument if str_m is zero (as per ISO C99).
+ *
+ * The return value is the number of characters which would be generated
+ * for the given input, excluding the trailing null. If this value
+ * is greater or equal to str_m, not all characters from the result
+ * have been stored in str, output bytes beyond the (str_m-1) -th character
+ * are discarded. If str_m is greater than zero it is guaranteed
+ * the resulting string will be null-terminated.
+ *
+ * NOTE that this matches the ISO C99, OpenBSD, and GNU C library 2.1,
+ * but is different from some older and vendor implementations,
+ * and is also different from XPG, XSH5, SUSv2 specifications.
+ * For historical discussion on changes in the semantics and standards
+ * of snprintf see printf(3) man page in the Linux programmers manual.
+ *
+ * Routines asprintf and vasprintf return a pointer (in the ptr argument)
+ * to a buffer sufficiently large to hold the resulting string. This pointer
+ * should be passed to free(3) to release the allocated storage when it is
+ * no longer needed. If sufficient space cannot be allocated, these functions
+ * will return -1 and set ptr to be a NULL pointer. These two routines are a
+ * GNU C library extensions (glibc).
+ *
+ * Routines asnprintf and vasnprintf are similar to asprintf and vasprintf,
+ * yet, like snprintf and vsnprintf counterparts, will write at most str_m-1
+ * characters into the allocated output string, the last character in the
+ * allocated buffer then gets the terminating null. If the formatted string
+ * length (the return value) is greater than or equal to the str_m argument,
+ * the resulting string was truncated and some of the formatted characters
+ * were discarded. These routines present a handy way to limit the amount
+ * of allocated memory to some sane value.
+ *
+ * AVAILABILITY
+ *   http://www.ijs.si/software/snprintf/
+ *
+ * REVISION HISTORY
+ * 1999-04	V0.9  Mark Martinec
+ *		- initial version, some modifications after comparing printf
+ *		  man pages for Digital Unix 4.0, Solaris 2.6 and HPUX 10,
+ *		  and checking how Perl handles sprintf (differently!);
+ * 1999-04-09	V1.0  Mark Martinec <mark.martinec@ijs.si>
+ *		- added main test program, fixed remaining inconsistencies,
+ *		  added optional (long long int) support;
+ * 1999-04-12	V1.1  Mark Martinec <mark.martinec@ijs.si>
+ *		- support the 'p' conversion (pointer to void);
+ *		- if a string precision is specified
+ *		  make sure the string beyond the specified precision
+ *		  will not be referenced (e.g. by strlen);
+ * 1999-04-13	V1.2  Mark Martinec <mark.martinec@ijs.si>
+ *		- support synonyms %D=%ld, %U=%lu, %O=%lo;
+ *		- speed up the case of long format string with few conversions;
+ * 1999-06-30	V1.3  Mark Martinec <mark.martinec@ijs.si>
+ *		- fixed runaway loop (eventually crashing when str_l wraps
+ *		  beyond 2^31) while copying format string without
+ *		  conversion specifiers to a buffer that is too short
+ *		  (thanks to Edwin Young <edwiny@autonomy.com> for
+ *		  spotting the problem);
+ *		- added macros PORTABLE_SNPRINTF_VERSION_(MAJOR|MINOR)
+ *		  to snprintf.h
+ * 2000-02-14	V2.0 (never released) Mark Martinec <mark.martinec@ijs.si>
+ *		- relaxed license terms: The Artistic License now applies.
+ *		  You may still apply the GNU GENERAL PUBLIC LICENSE
+ *		  as was distributed with previous versions, if you prefer;
+ *		- changed REVISION HISTORY dates to use ISO 8601 date format;
+ *		- added vsnprintf (patch also independently proposed by
+ *		  Caolan McNamara 2000-05-04, and Keith M Willenson 2000-06-01)
+ * 2000-06-27	V2.1  Mark Martinec <mark.martinec@ijs.si>
+ *		- removed POSIX check for str_m<1; value 0 for str_m is
+ *		  allowed by ISO C99 (and GNU C library 2.1) - (pointed out
+ *		  on 2000-05-04 by Caolan McNamara, caolan@ csn dot ul dot ie).
+ *		  Besides relaxed license this change in standards adherence
+ *		  is the main reason to bump up the major version number;
+ *		- added nonstandard routines asnprintf, vasnprintf, asprintf,
+ *		  vasprintf that dynamically allocate storage for the
+ *		  resulting string; these routines are not compiled by default,
+ *		  see comments where NEED_V?ASN?PRINTF macros are defined;
+ *		- autoconf contributed by Caolan McNamara
+ * 2000-10-06	V2.2  Mark Martinec <mark.martinec@ijs.si>
+ *		- BUG FIX: the %c conversion used a temporary variable
+ *		  that was no longer in scope when referenced,
+ *		  possibly causing incorrect resulting character;
+ *		- BUG FIX: make precision and minimal field width unsigned
+ *		  to handle huge values (2^31 <= n < 2^32) correctly;
+ *		  also be more careful in the use of signed/unsigned/size_t
+ *		  internal variables - probably more careful than many
+ *		  vendor implementations, but there may still be a case
+ *		  where huge values of str_m, precision or minimal field
+ *		  could cause incorrect behaviour;
+ *		- use separate variables for signed/unsigned arguments,
+ *		  and for short/int, long, and long long argument lengths
+ *		  to avoid possible incompatibilities on certain
+ *		  computer architectures. Also use separate variable
+ *		  arg_sign to hold sign of a numeric argument,
+ *		  to make code more transparent;
+ *		- some fiddling with zero padding and "0x" to make it
+ *		  Linux compatible;
+ *		- systematically use macros fast_memcpy and fast_memset
+ *		  instead of case-by-case hand optimization; determine some
+ *		  breakeven string lengths for different architectures;
+ *		- terminology change: 'format' -> 'conversion specifier',
+ *		  'C9x' -> 'ISO/IEC 9899:1999 ("ISO C99")',
+ *		  'alternative form' -> 'alternate form',
+ *		  'data type modifier' -> 'length modifier';
+ *		- several comments rephrased and new ones added;
+ *		- make compiler not complain about 'credits' defined but
+ *		  not used;
+ */
+
+
+/* Define HAVE_SNPRINTF if your system already has snprintf and vsnprintf.
+ *
+ * If HAVE_SNPRINTF is defined this module will not produce code for
+ * snprintf and vsnprintf, unless PREFER_PORTABLE_SNPRINTF is defined as well,
+ * causing this portable version of snprintf to be called portable_snprintf
+ * (and portable_vsnprintf).
+ */
+/* #define HAVE_SNPRINTF */
+
+/* Define PREFER_PORTABLE_SNPRINTF if your system does have snprintf and
+ * vsnprintf but you would prefer to use the portable routine(s) instead.
+ * In this case the portable routine is declared as portable_snprintf
+ * (and portable_vsnprintf) and a macro 'snprintf' (and 'vsnprintf')
+ * is defined to expand to 'portable_v?snprintf' - see file snprintf.h .
+ * Defining this macro is only useful if HAVE_SNPRINTF is also defined,
+ * but does does no harm if defined nevertheless.
+ */
+/* #define PREFER_PORTABLE_SNPRINTF */
+
+/* Define SNPRINTF_LONGLONG_SUPPORT if you want to support
+ * data type (long long int) and length modifier 'll' (e.g. %lld).
+ * If undefined, 'll' is recognized but treated as a single 'l'.
+ *
+ * If the system's sprintf does not handle 'll'
+ * the SNPRINTF_LONGLONG_SUPPORT must not be defined!
+ *
+ * This is off by default as (long long int) is a language extension.
+ */
+/* #define SNPRINTF_LONGLONG_SUPPORT */
+
+/* Define NEED_SNPRINTF_ONLY if you only need snprintf, and not vsnprintf.
+ * If NEED_SNPRINTF_ONLY is defined, the snprintf will be defined directly,
+ * otherwise both snprintf and vsnprintf routines will be defined
+ * and snprintf will be a simple wrapper around vsnprintf, at the expense
+ * of an extra procedure call.
+ */
+/* #define NEED_SNPRINTF_ONLY */
+
+/* Define NEED_V?ASN?PRINTF macros if you need library extension
+ * routines asprintf, vasprintf, asnprintf, vasnprintf respectively,
+ * and your system library does not provide them. They are all small
+ * wrapper routines around portable_vsnprintf. Defining any of the four
+ * NEED_V?ASN?PRINTF macros automatically turns off NEED_SNPRINTF_ONLY
+ * and turns on PREFER_PORTABLE_SNPRINTF.
+ *
+ * Watch for name conflicts with the system library if these routines
+ * are already present there.
+ *
+ * NOTE: vasprintf and vasnprintf routines need va_copy() from stdarg.h, as
+ * specified by C99, to be able to traverse the same list of arguments twice.
+ * I don't know of any other standard and portable way of achieving the same.
+ * With some versions of gcc you may use __va_copy(). You might even get away
+ * with "ap2 = ap", in this case you must not call va_end(ap2) !
+ *   #define va_copy(ap2,ap) ap2 = ap
+ */
+/* #define NEED_ASPRINTF   */
+/* #define NEED_ASNPRINTF  */
+/* #define NEED_VASPRINTF  */
+/* #define NEED_VASNPRINTF */
+
+
+/* Define the following macros if desired:
+ *   SOLARIS_COMPATIBLE, SOLARIS_BUG_COMPATIBLE,
+ *   HPUX_COMPATIBLE, HPUX_BUG_COMPATIBLE, LINUX_COMPATIBLE,
+ *   DIGITAL_UNIX_COMPATIBLE, DIGITAL_UNIX_BUG_COMPATIBLE,
+ *   PERL_COMPATIBLE, PERL_BUG_COMPATIBLE,
+ *
+ * - For portable applications it is best not to rely on peculiarities
+ *   of a given implementation so it may be best not to define any
+ *   of the macros that select compatibility and to avoid features
+ *   that vary among the systems.
+ *
+ * - Selecting compatibility with more than one operating system
+ *   is not strictly forbidden but is not recommended.
+ *
+ * - 'x'_BUG_COMPATIBLE implies 'x'_COMPATIBLE .
+ *
+ * - 'x'_COMPATIBLE refers to (and enables) a behaviour that is
+ *   documented in a sprintf man page on a given operating system
+ *   and actually adhered to by the system's sprintf (but not on
+ *   most other operating systems). It may also refer to and enable
+ *   a behaviour that is declared 'undefined' or 'implementation specific'
+ *   in the man page but a given implementation behaves predictably
+ *   in a certain way.
+ *
+ * - 'x'_BUG_COMPATIBLE refers to (and enables) a behaviour of system's sprintf
+ *   that contradicts the sprintf man page on the same operating system.
+ *
+ * - I do not claim that the 'x'_COMPATIBLE and 'x'_BUG_COMPATIBLE
+ *   conditionals take into account all idiosyncrasies of a particular
+ *   implementation, there may be other incompatibilities.
+ */
+
+
+
+/* ============================================= */
+/* NO USER SERVICABLE PARTS FOLLOWING THIS POINT */
+/* ============================================= */
+
+#define PORTABLE_SNPRINTF_VERSION_MAJOR 2
+#define PORTABLE_SNPRINTF_VERSION_MINOR 2
+
+#if defined(NEED_ASPRINTF) || defined(NEED_ASNPRINTF) || defined(NEED_VASPRINTF) || defined(NEED_VASNPRINTF)
+# if defined(NEED_SNPRINTF_ONLY)
+# undef NEED_SNPRINTF_ONLY
+# endif
+# if !defined(PREFER_PORTABLE_SNPRINTF)
+# define PREFER_PORTABLE_SNPRINTF
+# endif
+#endif
+
+#if defined(SOLARIS_BUG_COMPATIBLE) && !defined(SOLARIS_COMPATIBLE)
+#define SOLARIS_COMPATIBLE
+#endif
+
+#if defined(HPUX_BUG_COMPATIBLE) && !defined(HPUX_COMPATIBLE)
+#define HPUX_COMPATIBLE
+#endif
+
+#if defined(DIGITAL_UNIX_BUG_COMPATIBLE) && !defined(DIGITAL_UNIX_COMPATIBLE)
+#define DIGITAL_UNIX_COMPATIBLE
+#endif
+
+#if defined(PERL_BUG_COMPATIBLE) && !defined(PERL_COMPATIBLE)
+#define PERL_COMPATIBLE
+#endif
+
+#if defined(LINUX_BUG_COMPATIBLE) && !defined(LINUX_COMPATIBLE)
+#define LINUX_COMPATIBLE
+#endif
+
+#include <sys/types.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <errno.h>
+
+#ifdef isdigit
+#undef isdigit
+#endif
+#define isdigit(c) ((c) >= '0' && (c) <= '9')
+
+/* For copying strings longer or equal to 'breakeven_point'
+ * it is more efficient to call memcpy() than to do it inline.
+ * The value depends mostly on the processor architecture,
+ * but also on the compiler and its optimization capabilities.
+ * The value is not critical, some small value greater than zero
+ * will be just fine if you don't care to squeeze every drop
+ * of performance out of the code.
+ *
+ * Small values favor memcpy, large values favor inline code.
+ */
+#if defined(__alpha__) || defined(__alpha)
+#  define breakeven_point   2	/* AXP (DEC Alpha)     - gcc or cc or egcs */
+#endif
+#if defined(__i386__)  || defined(__i386)
+#  define breakeven_point  12	/* Intel Pentium/Linux - gcc 2.96 */
+#endif
+#if defined(__hppa)
+#  define breakeven_point  10	/* HP-PA               - gcc */
+#endif
+#if defined(__sparc__) || defined(__sparc)
+#  define breakeven_point  33	/* Sun Sparc 5         - gcc 2.8.1 */
+#endif
+
+/* some other values of possible interest: */
+/* #define breakeven_point  8 */  /* VAX 4000          - vaxc */
+/* #define breakeven_point 19 */  /* VAX 4000          - gcc 2.7.0 */
+
+#ifndef breakeven_point
+#  define breakeven_point   6	/* some reasonable one-size-fits-all value */
+#endif
+
+#define fast_memcpy(d,s,n) \
+  { register size_t nn = (size_t)(n); \
+    if (nn >= breakeven_point) memcpy((d), (s), nn); \
+    else if (nn > 0) { /* proc call overhead is worth only for large strings*/\
+      register char *dd; register const char *ss; \
+      for (ss=(s), dd=(d); nn>0; nn--) *dd++ = *ss++; } }
+
+#define fast_memset(d,c,n) \
+  { register size_t nn = (size_t)(n); \
+    if (nn >= breakeven_point) memset((d), (int)(c), nn); \
+    else if (nn > 0) { /* proc call overhead is worth only for large strings*/\
+      register char *dd; register const int cc=(int)(c); \
+      for (dd=(d); nn>0; nn--) *dd++ = cc; } }
+
+/* prototypes */
+
+#if defined(NEED_ASPRINTF)
+int asprintf   (char **ptr, const char *fmt, /*args*/ ...);
+#endif
+#if defined(NEED_VASPRINTF)
+int vasprintf  (char **ptr, const char *fmt, va_list ap);
+#endif
+#if defined(NEED_ASNPRINTF)
+int asnprintf  (char **ptr, size_t str_m, const char *fmt, /*args*/ ...);
+#endif
+#if defined(NEED_VASNPRINTF)
+int vasnprintf (char **ptr, size_t str_m, const char *fmt, va_list ap);
+#endif
+
+#if defined(HAVE_SNPRINTF)
+/* declare our portable snprintf  routine under name portable_snprintf  */
+/* declare our portable vsnprintf routine under name portable_vsnprintf */
+#else
+/* declare our portable routines under names snprintf and vsnprintf */
+#define portable_snprintf snprintf
+#if !defined(NEED_SNPRINTF_ONLY)
+#define portable_vsnprintf vsnprintf
+#endif
+#endif
+
+#if !defined(HAVE_SNPRINTF) || defined(PREFER_PORTABLE_SNPRINTF)
+int portable_snprintf(char *str, size_t str_m, const char *fmt, /*args*/ ...);
+#if !defined(NEED_SNPRINTF_ONLY)
+int portable_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap);
+#endif
+#endif
+
+/* declarations */
+
+static char credits[] = "\n\
+@(#)snprintf.c, v2.2: Mark Martinec, <mark.martinec@ijs.si>\n\
+@(#)snprintf.c, v2.2: Copyright 1999, Mark Martinec. Frontier Artistic License applies.\n\
+@(#)snprintf.c, v2.2: http://www.ijs.si/software/snprintf/\n";
+
+#if defined(NEED_ASPRINTF)
+int asprintf(char **ptr, const char *fmt, /*args*/ ...) {
+  va_list ap;
+  size_t str_m;
+  int str_l;
+
+  *ptr = NULL;
+  va_start(ap, fmt);                            /* measure the required size */
+  str_l = portable_vsnprintf(NULL, (size_t)0, fmt, ap);
+  va_end(ap);
+  assert(str_l >= 0);        /* possible integer overflow if str_m > INT_MAX */
+  *ptr = (char *) malloc(str_m = (size_t)str_l + 1);
+  if (*ptr == NULL) { errno = ENOMEM; str_l = -1; }
+  else {
+    int str_l2;
+    va_start(ap, fmt);
+    str_l2 = portable_vsnprintf(*ptr, str_m, fmt, ap);
+    va_end(ap);
+    assert(str_l2 == str_l);
+  }
+  return str_l;
+}
+#endif
+
+#if defined(NEED_VASPRINTF)
+int vasprintf(char **ptr, const char *fmt, va_list ap) {
+  size_t str_m;
+  int str_l;
+
+  *ptr = NULL;
+  { va_list ap2;
+    va_copy(ap2, ap);  /* don't consume the original ap, we'll need it again */
+    str_l = portable_vsnprintf(NULL, (size_t)0, fmt, ap2);/*get required size*/
+    va_end(ap2);
+  }
+  assert(str_l >= 0);        /* possible integer overflow if str_m > INT_MAX */
+  *ptr = (char *) malloc(str_m = (size_t)str_l + 1);
+  if (*ptr == NULL) { errno = ENOMEM; str_l = -1; }
+  else {
+    int str_l2 = portable_vsnprintf(*ptr, str_m, fmt, ap);
+    assert(str_l2 == str_l);
+  }
+  return str_l;
+}
+#endif
+
+#if defined(NEED_ASNPRINTF)
+int asnprintf (char **ptr, size_t str_m, const char *fmt, /*args*/ ...) {
+  va_list ap;
+  int str_l;
+
+  *ptr = NULL;
+  va_start(ap, fmt);                            /* measure the required size */
+  str_l = portable_vsnprintf(NULL, (size_t)0, fmt, ap);
+  va_end(ap);
+  assert(str_l >= 0);        /* possible integer overflow if str_m > INT_MAX */
+  if ((size_t)str_l + 1 < str_m) str_m = (size_t)str_l + 1;      /* truncate */
+  /* if str_m is 0, no buffer is allocated, just set *ptr to NULL */
+  if (str_m == 0) {  /* not interested in resulting string, just return size */
+  } else {
+    *ptr = (char *) malloc(str_m);
+    if (*ptr == NULL) { errno = ENOMEM; str_l = -1; }
+    else {
+      int str_l2;
+      va_start(ap, fmt);
+      str_l2 = portable_vsnprintf(*ptr, str_m, fmt, ap);
+      va_end(ap);
+      assert(str_l2 == str_l);
+    }
+  }
+  return str_l;
+}
+#endif
+
+#if defined(NEED_VASNPRINTF)
+int vasnprintf (char **ptr, size_t str_m, const char *fmt, va_list ap) {
+  int str_l;
+
+  *ptr = NULL;
+  { va_list ap2;
+    va_copy(ap2, ap);  /* don't consume the original ap, we'll need it again */
+    str_l = portable_vsnprintf(NULL, (size_t)0, fmt, ap2);/*get required size*/
+    va_end(ap2);
+  }
+  assert(str_l >= 0);        /* possible integer overflow if str_m > INT_MAX */
+  if ((size_t)str_l + 1 < str_m) str_m = (size_t)str_l + 1;      /* truncate */
+  /* if str_m is 0, no buffer is allocated, just set *ptr to NULL */
+  if (str_m == 0) {  /* not interested in resulting string, just return size */
+  } else {
+    *ptr = (char *) malloc(str_m);
+    if (*ptr == NULL) { errno = ENOMEM; str_l = -1; }
+    else {
+      int str_l2 = portable_vsnprintf(*ptr, str_m, fmt, ap);
+      assert(str_l2 == str_l);
+    }
+  }
+  return str_l;
+}
+#endif
+
+/*
+ * If the system does have snprintf and the portable routine is not
+ * specifically required, this module produces no code for snprintf/vsnprintf.
+ */
+#if !defined(HAVE_SNPRINTF) || defined(PREFER_PORTABLE_SNPRINTF)
+
+#if !defined(NEED_SNPRINTF_ONLY)
+int portable_snprintf(char *str, size_t str_m, const char *fmt, /*args*/ ...) {
+  va_list ap;
+  int str_l;
+
+  va_start(ap, fmt);
+  str_l = portable_vsnprintf(str, str_m, fmt, ap);
+  va_end(ap);
+  return str_l;
+}
+#endif
+
+#if defined(NEED_SNPRINTF_ONLY)
+int portable_snprintf(char *str, size_t str_m, const char *fmt, /*args*/ ...) {
+#else
+int portable_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap) {
+#endif
+
+#if defined(NEED_SNPRINTF_ONLY)
+  va_list ap;
+#endif
+  size_t str_l = 0;
+  const char *p = fmt;
+
+/* In contrast with POSIX, the ISO C99 now says
+ * that str can be NULL and str_m can be 0.
+ * This is more useful than the old:  if (str_m < 1) return -1; */
+
+#if defined(NEED_SNPRINTF_ONLY)
+  va_start(ap, fmt);
+#endif
+  if (!p) p = "";
+  while (*p) {
+    if (*p != '%') {
+   /* if (str_l < str_m) str[str_l++] = *p++;    -- this would be sufficient */
+   /* but the following code achieves better performance for cases
+    * where format string is long and contains few conversions */
+      const char *q = strchr(p+1,'%');
+      size_t n = !q ? strlen(p) : (q-p);
+      if (str_l < str_m) {
+        size_t avail = str_m-str_l;
+        fast_memcpy(str+str_l, p, (n>avail?avail:n));
+      }
+      p += n; str_l += n;
+    } else {
+      const char *starting_p;
+      size_t min_field_width = 0, precision = 0;
+      int zero_padding = 0, precision_specified = 0, justify_left = 0;
+      int alternate_form = 0, force_sign = 0;
+      int space_for_positive = 1; /* If both the ' ' and '+' flags appear,
+                                     the ' ' flag should be ignored. */
+      char length_modifier = '\0';            /* allowed values: \0, h, l, L */
+      char tmp[32];/* temporary buffer for simple numeric->string conversion */
+
+      const char *str_arg;      /* string address in case of string argument */
+      size_t str_arg_l;         /* natural field width of arg without padding
+                                   and sign */
+      unsigned char uchar_arg;
+        /* unsigned char argument value - only defined for c conversion.
+           N.B. standard explicitly states the char argument for
+           the c conversion is unsigned */
+
+      size_t number_of_zeros_to_pad = 0;
+        /* number of zeros to be inserted for numeric conversions
+           as required by the precision or minimal field width */
+
+      size_t zero_padding_insertion_ind = 0;
+        /* index into tmp where zero padding is to be inserted */
+
+      char fmt_spec = '\0';
+        /* current conversion specifier character */
+
+      str_arg = credits;/* just to make compiler happy (defined but not used)*/
+      str_arg = NULL;
+      starting_p = p; p++;  /* skip '%' */
+   /* parse flags */
+      while (*p == '0' || *p == '-' || *p == '+' ||
+             *p == ' ' || *p == '#' || *p == '\'') {
+        switch (*p) {
+        case '0': zero_padding = 1; break;
+        case '-': justify_left = 1; break;
+        case '+': force_sign = 1; space_for_positive = 0; break;
+        case ' ': force_sign = 1;
+     /* If both the ' ' and '+' flags appear, the ' ' flag should be ignored */
+#ifdef PERL_COMPATIBLE
+     /* ... but in Perl the last of ' ' and '+' applies */
+                  space_for_positive = 1;
+#endif
+                  break;
+        case '#': alternate_form = 1; break;
+        case '\'': break;
+        }
+        p++;
+      }
+   /* If the '0' and '-' flags both appear, the '0' flag should be ignored. */
+
+   /* parse field width */
+      if (*p == '*') {
+        int j;
+        p++; j = va_arg(ap, int);
+        if (j >= 0) min_field_width = j;
+        else { min_field_width = -j; justify_left = 1; }
+      } else if (isdigit((int)(*p))) {
+        /* size_t could be wider than unsigned int;
+           make sure we treat argument like common implementations do */
+        unsigned int uj = *p++ - '0';
+        while (isdigit((int)(*p))) uj = 10*uj + (unsigned int)(*p++ - '0');
+        min_field_width = uj;
+      }
+   /* parse precision */
+      if (*p == '.') {
+        p++; precision_specified = 1;
+        if (*p == '*') {
+          int j = va_arg(ap, int);
+          p++;
+          if (j >= 0) precision = j;
+          else {
+            precision_specified = 0; precision = 0;
+         /* NOTE:
+          *   Solaris 2.6 man page claims that in this case the precision
+          *   should be set to 0.  Digital Unix 4.0, HPUX 10 and BSD man page
+          *   claim that this case should be treated as unspecified precision,
+          *   which is what we do here.
+          */
+          }
+        } else if (isdigit((int)(*p))) {
+          /* size_t could be wider than unsigned int;
+             make sure we treat argument like common implementations do */
+          unsigned int uj = *p++ - '0';
+          while (isdigit((int)(*p))) uj = 10*uj + (unsigned int)(*p++ - '0');
+          precision = uj;
+        }
+      }
+   /* parse 'h', 'l' and 'll' length modifiers */
+      if (*p == 'h' || *p == 'l') {
+        length_modifier = *p; p++;
+        if (length_modifier == 'l' && *p == 'l') {   /* double l = long long */
+#ifdef SNPRINTF_LONGLONG_SUPPORT
+          length_modifier = '2';                  /* double l encoded as '2' */
+#else
+          length_modifier = 'l';                 /* treat it as a single 'l' */
+#endif
+          p++;
+        }
+      }
+      fmt_spec = *p;
+   /* common synonyms: */
+      switch (fmt_spec) {
+      case 'i': fmt_spec = 'd'; break;
+      case 'D': fmt_spec = 'd'; length_modifier = 'l'; break;
+      case 'U': fmt_spec = 'u'; length_modifier = 'l'; break;
+      case 'O': fmt_spec = 'o'; length_modifier = 'l'; break;
+      default: break;
+      }
+   /* get parameter value, do initial processing */
+      switch (fmt_spec) {
+      case '%': /* % behaves similar to 's' regarding flags and field widths */
+      case 'c': /* c behaves similar to 's' regarding flags and field widths */
+      case 's':
+        length_modifier = '\0';          /* wint_t and wchar_t not supported */
+     /* the result of zero padding flag with non-numeric conversion specifier*/
+     /* is undefined. Solaris and HPUX 10 does zero padding in this case,    */
+     /* Digital Unix and Linux does not. */
+#if !defined(SOLARIS_COMPATIBLE) && !defined(HPUX_COMPATIBLE)
+        zero_padding = 0;    /* turn zero padding off for string conversions */
+#endif
+        str_arg_l = 1;
+        switch (fmt_spec) {
+        case '%':
+          str_arg = p; break;
+        case 'c': {
+          int j = va_arg(ap, int);
+          uchar_arg = (unsigned char) j;   /* standard demands unsigned char */
+          str_arg = (const char *) &uchar_arg;
+          break;
+        }
+        case 's':
+          str_arg = va_arg(ap, const char *);
+          if (!str_arg) str_arg_l = 0;
+       /* make sure not to address string beyond the specified precision !!! */
+          else if (!precision_specified) str_arg_l = strlen(str_arg);
+       /* truncate string if necessary as requested by precision */
+          else if (precision == 0) str_arg_l = 0;
+          else {
+       /* memchr on HP does not like n > 2^31  !!! */
+            const char *q = memchr(str_arg, '\0',
+                             precision <= 0x7fffffff ? precision : 0x7fffffff);
+            str_arg_l = !q ? precision : (q-str_arg);
+          }
+          break;
+        default: break;
+        }
+        break;
+      case 'd': case 'u': case 'o': case 'x': case 'X': case 'p': {
+        /* NOTE: the u, o, x, X and p conversion specifiers imply
+                 the value is unsigned;  d implies a signed value */
+
+        int arg_sign = 0;
+          /* 0 if numeric argument is zero (or if pointer is NULL for 'p'),
+            +1 if greater than zero (or nonzero for unsigned arguments),
+            -1 if negative (unsigned argument is never negative) */
+
+        int int_arg = 0;  unsigned int uint_arg = 0;
+          /* only defined for length modifier h, or for no length modifiers */
+
+        long int long_arg = 0;  unsigned long int ulong_arg = 0;
+          /* only defined for length modifier l */
+
+        void *ptr_arg = NULL;
+          /* pointer argument value -only defined for p conversion */
+
+#ifdef SNPRINTF_LONGLONG_SUPPORT
+        long long int long_long_arg = 0;
+        unsigned long long int ulong_long_arg = 0;
+          /* only defined for length modifier ll */
+#endif
+        if (fmt_spec == 'p') {
+        /* HPUX 10: An l, h, ll or L before any other conversion character
+         *   (other than d, i, u, o, x, or X) is ignored.
+         * Digital Unix:
+         *   not specified, but seems to behave as HPUX does.
+         * Solaris: If an h, l, or L appears before any other conversion
+         *   specifier (other than d, i, u, o, x, or X), the behavior
+         *   is undefined. (Actually %hp converts only 16-bits of address
+         *   and %llp treats address as 64-bit data which is incompatible
+         *   with (void *) argument on a 32-bit system).
+         */
+#ifdef SOLARIS_COMPATIBLE
+#  ifdef SOLARIS_BUG_COMPATIBLE
+          /* keep length modifiers even if it represents 'll' */
+#  else
+          if (length_modifier == '2') length_modifier = '\0';
+#  endif
+#else
+          length_modifier = '\0';
+#endif
+          ptr_arg = va_arg(ap, void *);
+          if (ptr_arg != NULL) arg_sign = 1;
+        } else if (fmt_spec == 'd') {  /* signed */
+          switch (length_modifier) {
+          case '\0':
+          case 'h':
+         /* It is non-portable to specify a second argument of char or short
+          * to va_arg, because arguments seen by the called function
+          * are not char or short.  C converts char and short arguments
+          * to int before passing them to a function.
+          */
+            int_arg = va_arg(ap, int);
+            if      (int_arg > 0) arg_sign =  1;
+            else if (int_arg < 0) arg_sign = -1;
+            break;
+          case 'l':
+            long_arg = va_arg(ap, long int);
+            if      (long_arg > 0) arg_sign =  1;
+            else if (long_arg < 0) arg_sign = -1;
+            break;
+#ifdef SNPRINTF_LONGLONG_SUPPORT
+          case '2':
+            long_long_arg = va_arg(ap, long long int);
+            if      (long_long_arg > 0) arg_sign =  1;
+            else if (long_long_arg < 0) arg_sign = -1;
+            break;
+#endif
+          }
+        } else {  /* unsigned */
+          switch (length_modifier) {
+          case '\0':
+          case 'h':
+            uint_arg = va_arg(ap, unsigned int);
+            if (uint_arg) arg_sign = 1;
+            break;
+          case 'l':
+            ulong_arg = va_arg(ap, unsigned long int);
+            if (ulong_arg) arg_sign = 1;
+            break;
+#ifdef SNPRINTF_LONGLONG_SUPPORT
+          case '2':
+            ulong_long_arg = va_arg(ap, unsigned long long int);
+            if (ulong_long_arg) arg_sign = 1;
+            break;
+#endif
+          }
+        }
+        str_arg = tmp; str_arg_l = 0;
+     /* NOTE:
+      *   For d, i, u, o, x, and X conversions, if precision is specified,
+      *   the '0' flag should be ignored. This is so with Solaris 2.6,
+      *   Digital UNIX 4.0, HPUX 10, Linux, FreeBSD, NetBSD; but not with Perl.
+      */
+#ifndef PERL_COMPATIBLE
+        if (precision_specified) zero_padding = 0;
+#endif
+        if (fmt_spec == 'd') {
+          if (force_sign && arg_sign >= 0)
+            tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
+         /* leave negative numbers for sprintf to handle,
+            to avoid handling tricky cases like (short int)(-32768) */
+#ifdef LINUX_COMPATIBLE
+        } else if (fmt_spec == 'p' && force_sign && arg_sign > 0) {
+          tmp[str_arg_l++] = space_for_positive ? ' ' : '+';
+#endif
+        } else if (alternate_form) {
+          if (arg_sign != 0 && (fmt_spec == 'x' || fmt_spec == 'X') )
+            { tmp[str_arg_l++] = '0'; tmp[str_arg_l++] = fmt_spec; }
+         /* alternate form should have no effect for p conversion, but ... */
+#ifdef HPUX_COMPATIBLE
+          else if (fmt_spec == 'p'
+         /* HPUX 10: for an alternate form of p conversion,
+          *          a nonzero result is prefixed by 0x. */
+#ifndef HPUX_BUG_COMPATIBLE
+         /* Actually it uses 0x prefix even for a zero value. */
+                   && arg_sign != 0
+#endif
+                  ) { tmp[str_arg_l++] = '0'; tmp[str_arg_l++] = 'x'; }
+#endif
+        }
+        zero_padding_insertion_ind = str_arg_l;
+        if (!precision_specified) precision = 1;   /* default precision is 1 */
+        if (precision == 0 && arg_sign == 0
+#if defined(HPUX_BUG_COMPATIBLE) || defined(LINUX_COMPATIBLE)
+            && fmt_spec != 'p'
+         /* HPUX 10 man page claims: With conversion character p the result of
+          * converting a zero value with a precision of zero is a null string.
+          * Actually HP returns all zeroes, and Linux returns "(nil)". */
+#endif
+        ) {
+         /* converted to null string */
+         /* When zero value is formatted with an explicit precision 0,
+            the resulting formatted string is empty (d, i, u, o, x, X, p).   */
+        } else {
+          char f[5]; int f_l = 0;
+          f[f_l++] = '%';    /* construct a simple format string for sprintf */
+          if (!length_modifier) { }
+          else if (length_modifier=='2') { f[f_l++] = 'l'; f[f_l++] = 'l'; }
+          else f[f_l++] = length_modifier;
+          f[f_l++] = fmt_spec; f[f_l++] = '\0';
+          if (fmt_spec == 'p') str_arg_l += sprintf(tmp+str_arg_l, f, ptr_arg);
+          else if (fmt_spec == 'd') {  /* signed */
+            switch (length_modifier) {
+            case '\0':
+            case 'h': str_arg_l+=sprintf(tmp+str_arg_l, f, int_arg);  break;
+            case 'l': str_arg_l+=sprintf(tmp+str_arg_l, f, long_arg); break;
+#ifdef SNPRINTF_LONGLONG_SUPPORT
+            case '2': str_arg_l+=sprintf(tmp+str_arg_l,f,long_long_arg); break;
+#endif
+            }
+          } else {  /* unsigned */
+            switch (length_modifier) {
+            case '\0':
+            case 'h': str_arg_l+=sprintf(tmp+str_arg_l, f, uint_arg);  break;
+            case 'l': str_arg_l+=sprintf(tmp+str_arg_l, f, ulong_arg); break;
+#ifdef SNPRINTF_LONGLONG_SUPPORT
+            case '2': str_arg_l+=sprintf(tmp+str_arg_l,f,ulong_long_arg);break;
+#endif
+            }
+          }
+         /* include the optional minus sign and possible "0x"
+            in the region before the zero padding insertion point */
+          if (zero_padding_insertion_ind < str_arg_l &&
+              tmp[zero_padding_insertion_ind] == '-') {
+            zero_padding_insertion_ind++;
+          }
+          if (zero_padding_insertion_ind+1 < str_arg_l &&
+              tmp[zero_padding_insertion_ind]   == '0' &&
+             (tmp[zero_padding_insertion_ind+1] == 'x' ||
+              tmp[zero_padding_insertion_ind+1] == 'X') ) {
+            zero_padding_insertion_ind += 2;
+          }
+        }
+        { size_t num_of_digits = str_arg_l - zero_padding_insertion_ind;
+          if (alternate_form && fmt_spec == 'o'
+#ifdef HPUX_COMPATIBLE                                  /* ("%#.o",0) -> ""  */
+              && (str_arg_l > 0)
+#endif
+#ifdef DIGITAL_UNIX_BUG_COMPATIBLE                      /* ("%#o",0) -> "00" */
+#else
+              /* unless zero is already the first character */
+              && !(zero_padding_insertion_ind < str_arg_l
+                   && tmp[zero_padding_insertion_ind] == '0')
+#endif
+          ) {        /* assure leading zero for alternate-form octal numbers */
+            if (!precision_specified || precision < num_of_digits+1) {
+             /* precision is increased to force the first character to be zero,
+                except if a zero value is formatted with an explicit precision
+                of zero */
+              precision = num_of_digits+1; precision_specified = 1;
+            }
+          }
+       /* zero padding to specified precision? */
+          if (num_of_digits < precision) 
+            number_of_zeros_to_pad = precision - num_of_digits;
+        }
+     /* zero padding to specified minimal field width? */
+        if (!justify_left && zero_padding) {
+          int n = min_field_width - (str_arg_l+number_of_zeros_to_pad);
+          if (n > 0) number_of_zeros_to_pad += n;
+        }
+        break;
+      }
+      default: /* unrecognized conversion specifier, keep format string as-is*/
+        zero_padding = 0;  /* turn zero padding off for non-numeric convers. */
+#ifndef DIGITAL_UNIX_COMPATIBLE
+        justify_left = 1; min_field_width = 0;                /* reset flags */
+#endif
+#if defined(PERL_COMPATIBLE) || defined(LINUX_COMPATIBLE)
+     /* keep the entire format string unchanged */
+        str_arg = starting_p; str_arg_l = p - starting_p;
+     /* well, not exactly so for Linux, which does something inbetween,
+      * and I don't feel an urge to imitate it: "%+++++hy" -> "%+y"  */
+#else
+     /* discard the unrecognized conversion, just keep *
+      * the unrecognized conversion character          */
+        str_arg = p; str_arg_l = 0;
+#endif
+        if (*p) str_arg_l++;  /* include invalid conversion specifier unchanged
+                                 if not at end-of-string */
+        break;
+      }
+      if (*p) p++;      /* step over the just processed conversion specifier */
+   /* insert padding to the left as requested by min_field_width;
+      this does not include the zero padding in case of numerical conversions*/
+      if (!justify_left) {                /* left padding with blank or zero */
+        int n = min_field_width - (str_arg_l+number_of_zeros_to_pad);
+        if (n > 0) {
+          if (str_l < str_m) {
+            size_t avail = str_m-str_l;
+            fast_memset(str+str_l, (zero_padding?'0':' '), (n>avail?avail:n));
+          }
+          str_l += n;
+        }
+      }
+   /* zero padding as requested by the precision or by the minimal field width
+    * for numeric conversions required? */
+      if (number_of_zeros_to_pad <= 0) {
+     /* will not copy first part of numeric right now, *
+      * force it to be copied later in its entirety    */
+        zero_padding_insertion_ind = 0;
+      } else {
+     /* insert first part of numerics (sign or '0x') before zero padding */
+        int n = zero_padding_insertion_ind;
+        if (n > 0) {
+          if (str_l < str_m) {
+            size_t avail = str_m-str_l;
+            fast_memcpy(str+str_l, str_arg, (n>avail?avail:n));
+          }
+          str_l += n;
+        }
+     /* insert zero padding as requested by the precision or min field width */
+        n = number_of_zeros_to_pad;
+        if (n > 0) {
+          if (str_l < str_m) {
+            size_t avail = str_m-str_l;
+            fast_memset(str+str_l, '0', (n>avail?avail:n));
+          }
+          str_l += n;
+        }
+      }
+   /* insert formatted string
+    * (or as-is conversion specifier for unknown conversions) */
+      { int n = str_arg_l - zero_padding_insertion_ind;
+        if (n > 0) {
+          if (str_l < str_m) {
+            size_t avail = str_m-str_l;
+            fast_memcpy(str+str_l, str_arg+zero_padding_insertion_ind,
+                        (n>avail?avail:n));
+          }
+          str_l += n;
+        }
+      }
+   /* insert right padding */
+      if (justify_left) {          /* right blank padding to the field width */
+        int n = min_field_width - (str_arg_l+number_of_zeros_to_pad);
+        if (n > 0) {
+          if (str_l < str_m) {
+            size_t avail = str_m-str_l;
+            fast_memset(str+str_l, ' ', (n>avail?avail:n));
+          }
+          str_l += n;
+        }
+      }
+    }
+  }
+#if defined(NEED_SNPRINTF_ONLY)
+  va_end(ap);
+#endif
+  if (str_m > 0) { /* make sure the string is null-terminated
+                      even at the expense of overwriting the last character
+                      (shouldn't happen, but just in case) */
+    str[str_l <= str_m-1 ? str_l : str_m-1] = '\0';
+  }
+  /* Return the number of characters formatted (excluding trailing null
+   * character), that is, the number of characters that would have been
+   * written to the buffer if it were large enough.
+   *
+   * The value of str_l should be returned, but str_l is of unsigned type
+   * size_t, and snprintf is int, possibly leading to an undetected
+   * integer overflow, resulting in a negative return value, which is illegal.
+   * Both XSH5 and ISO C99 (at least the draft) are silent on this issue.
+   * Should errno be set to EOVERFLOW and EOF returned in this case???
+   */
+  return (int) str_l;
+}
+#endif
Index: branches/apertium-tagger/apertium2/apertium/win32/snprintf.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/win32/snprintf.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/win32/snprintf.h	(revision 69632)
@@ -0,0 +1,26 @@
+#ifndef _PORTABLE_SNPRINTF_H_
+#define _PORTABLE_SNPRINTF_H_
+
+#define PORTABLE_SNPRINTF_VERSION_MAJOR 2
+#define PORTABLE_SNPRINTF_VERSION_MINOR 2
+
+#ifdef HAVE_SNPRINTF
+#include <stdio.h>
+#else
+extern int snprintf(char *, size_t, const char *, /*args*/ ...);
+extern int vsnprintf(char *, size_t, const char *, va_list);
+#endif
+
+#if defined(HAVE_SNPRINTF) && defined(PREFER_PORTABLE_SNPRINTF)
+extern int portable_snprintf(char *str, size_t str_m, const char *fmt, /*args*/ ...);
+extern int portable_vsnprintf(char *str, size_t str_m, const char *fmt, va_list ap);
+#define snprintf  portable_snprintf
+#define vsnprintf portable_vsnprintf
+#endif
+
+extern int asprintf  (char **ptr, const char *fmt, /*args*/ ...);
+extern int vasprintf (char **ptr, const char *fmt, va_list ap);
+extern int asnprintf (char **ptr, size_t str_m, const char *fmt, /*args*/ ...);
+extern int vasnprintf(char **ptr, size_t str_m, const char *fmt, va_list ap);
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/win32/libgen.c
===================================================================
--- branches/apertium-tagger/apertium2/apertium/win32/libgen.c	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/win32/libgen.c	(revision 69632)
@@ -0,0 +1,25 @@
+#include <string.h>
+
+#include "libgen.h"
+
+// http://www.opengroup.org/onlinepubs/007908775/xsh/basename.html
+
+char* basename(char *path) {
+	if (path != NULL) {
+		// Find the last position of the \ in the path name
+		char* pos = strrchr(path, '\\');
+
+		if (pos != NULL) { // If a \ char was found...
+			if (pos + 1 != NULL) // If it is not the last character in the string...
+				return pos + 1; // then return a pointer to the first character after \.
+			else
+				return pos; // else return a pointer to \
+
+		} else { // If a \ char was NOT found
+			return path; // return the pointer passed to basename (this is probably non-conformant)
+		}
+
+	} else { // If path == NULL, return "."
+		return ".";
+	}
+}
Index: branches/apertium-tagger/apertium2/apertium/win32/libgen.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/win32/libgen.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/win32/libgen.h	(revision 69632)
@@ -0,0 +1,14 @@
+#ifndef LIBGEN_H
+#define LIBGEN_H
+
+#ifdef __cplusplus
+	extern "C" {
+#endif
+
+char  *basename(char *);
+
+#ifdef __cplusplus
+	}
+#endif
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/win32/regex.c
===================================================================
--- branches/apertium-tagger/apertium2/apertium/win32/regex.c	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/win32/regex.c	(revision 69632)
@@ -0,0 +1,4948 @@
+/* Extended regular expression matching and search library,
+   version 0.12.
+   (Implements POSIX draft P10003.2/D11.2, except for
+   internationalization features.)
+
+   Copyright (C) 1993 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+/* AIX requires this to be the first thing in the file. */
+#if defined (_AIX) && !defined (REGEX_MALLOC)
+  #pragma alloca
+#endif
+
+#define _GNU_SOURCE
+
+/* We need this for `regex.h', and perhaps for the Emacs include files.  */
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* The `emacs' switch turns on certain matching commands
+   that make sense only in Emacs. */
+#ifdef emacs
+
+#include "lisp.h"
+#include "buffer.h"
+#include "syntax.h"
+
+/* Emacs uses `NULL' as a predicate.  */
+#undef NULL
+
+#else  /* not emacs */
+
+/* We used to test for `BSTRING' here, but only GCC and Emacs define
+   `BSTRING', as far as I know, and neither of them use this code.  */
+#if HAVE_STRING_H || STDC_HEADERS
+#include <string.h>
+#ifndef bcmp
+#define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
+#endif
+#ifndef bcopy
+#define bcopy(s, d, n)	memcpy ((d), (s), (n))
+#endif
+#ifndef bzero
+#define bzero(s, n)	memset ((s), 0, (n))
+#endif
+#else
+#include <strings.h>
+#endif
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+
+/* Define the syntax stuff for \<, \>, etc.  */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+   commands in re_match_2.  */
+#ifndef Sword
+#define Sword 1
+#endif
+
+#ifdef SYNTAX_TABLE
+
+extern char *re_syntax_table;
+
+#else /* not SYNTAX_TABLE */
+
+/* How many characters in the character set.  */
+#define CHAR_SET_SIZE 256
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void
+init_syntax_once ()
+{
+   register int c;
+   static int done = 0;
+
+   if (done)
+     return;
+
+   bzero (re_syntax_table, sizeof re_syntax_table);
+
+   for (c = 'a'; c <= 'z'; c++)
+     re_syntax_table[c] = Sword;
+
+   for (c = 'A'; c <= 'Z'; c++)
+     re_syntax_table[c] = Sword;
+
+   for (c = '0'; c <= '9'; c++)
+     re_syntax_table[c] = Sword;
+
+   re_syntax_table['_'] = Sword;
+
+   done = 1;
+}
+
+#endif /* not SYNTAX_TABLE */
+
+#define SYNTAX(c) re_syntax_table[c]
+
+#endif /* not emacs */
+
+/* Get the interface, including the syntax bits.  */
+#include "regex.h"
+
+/* isalpha etc. are used for the character classes.  */
+#include <ctype.h>
+
+#ifndef isascii
+#define isascii(c) 1
+#endif
+
+#ifdef isblank
+#define ISBLANK(c) (isascii (c) && isblank (c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+#define ISGRAPH(c) (isascii (c) && isgraph (c))
+#else
+#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
+#endif
+
+#define ISPRINT(c) (isascii (c) && isprint (c))
+#define ISDIGIT(c) (isascii (c) && isdigit (c))
+#define ISALNUM(c) (isascii (c) && isalnum (c))
+#define ISALPHA(c) (isascii (c) && isalpha (c))
+#define ISCNTRL(c) (isascii (c) && iscntrl (c))
+#define ISLOWER(c) (isascii (c) && islower (c))
+#define ISPUNCT(c) (isascii (c) && ispunct (c))
+#define ISSPACE(c) (isascii (c) && isspace (c))
+#define ISUPPER(c) (isascii (c) && isupper (c))
+#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+   since ours (we hope) works properly with all combinations of
+   machines, compilers, `char' and `unsigned char' argument types.
+   (Per Bothner suggested the basic approach.)  */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else  /* not __STDC__ */
+/* As in Harbison and Steele.  */
+#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
+/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
+   use `alloca' instead of `malloc'.  This is because using malloc in
+   re_search* or re_match* could cause memory leaks when C-g is used in
+   Emacs; also, malloc is slower and causes storage fragmentation.  On
+   the other hand, malloc is more portable, and easier to debug.
+
+   Because we sometimes use alloca, some routines have to be macros,
+   not functions -- `alloca'-allocated space disappears at the end of the
+   function it is called in.  */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE malloc
+#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+
+#else /* not REGEX_MALLOC  */
+
+/* Emacs already defines alloca, sometimes.  */
+#ifndef alloca
+
+/* Make alloca work the best possible way.  */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#ifndef _AIX /* Already did AIX, up at the top.  */
+char *alloca ();
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#endif /* not alloca */
+
+#define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable.  */
+#define REGEX_REALLOCATE(source, osize, nsize)				\
+  (destination = (char *) alloca (nsize),				\
+   bcopy (source, destination, osize),					\
+   destination)
+
+#endif /* not REGEX_MALLOC */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+   `string1' or just past its end.  This works if PTR is NULL, which is
+   a good thing.  */
+#define FIRST_STRING_P(ptr) 					\
+  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail.  */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits.  */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+/* These are the command codes that appear in compiled regular
+   expressions.  Some opcodes are followed by argument bytes.  A
+   command code can specify any interpretation whatsoever for its
+   arguments.  Zero bytes may appear in the compiled regular expression.
+
+   The value of `exactn' is needed in search.c (search_buffer) in Emacs.
+   So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
+   `exactn' we use here must also be 1.  */
+
+typedef enum
+{
+  no_op = 0,
+
+        /* Followed by one byte giving n, then by n literal bytes.  */
+  exactn = 1,
+
+        /* Matches any (more or less) character.  */
+  anychar,
+
+        /* Matches any one char belonging to specified set.  First
+           following byte is number of bitmap bytes.  Then come bytes
+           for a bitmap saying which chars are in.  Bits in each byte
+           are ordered low-bit-first.  A character is in the set if its
+           bit is 1.  A character too large to have a bit in the map is
+           automatically not in the set.  */
+  charset,
+
+        /* Same parameters as charset, but match any character that is
+           not one of those specified.  */
+  charset_not,
+
+        /* Start remembering the text that is matched, for storing in a
+           register.  Followed by one byte with the register number, in
+           the range 0 to one less than the pattern buffer's re_nsub
+           field.  Then followed by one byte with the number of groups
+           inner to this one.  (This last has to be part of the
+           start_memory only because we need it in the on_failure_jump
+           of re_match_2.)  */
+  start_memory,
+
+        /* Stop remembering the text that is matched and store it in a
+           memory register.  Followed by one byte with the register
+           number, in the range 0 to one less than `re_nsub' in the
+           pattern buffer, and one byte with the number of inner groups,
+           just like `start_memory'.  (We need the number of inner
+           groups here because we don't have any easy way of finding the
+           corresponding start_memory when we're at a stop_memory.)  */
+  stop_memory,
+
+        /* Match a duplicate of something remembered. Followed by one
+           byte containing the register number.  */
+  duplicate,
+
+        /* Fail unless at beginning of line.  */
+  begline,
+
+        /* Fail unless at end of line.  */
+  endline,
+
+        /* Succeeds if at beginning of buffer (if emacs) or at beginning
+           of string to be matched (if not).  */
+  begbuf,
+
+        /* Analogously, for end of buffer/string.  */
+  endbuf,
+
+        /* Followed by two byte relative address to which to jump.  */
+  jump,
+
+    /* Same as jump, but marks the end of an alternative.  */
+  jump_past_alt,
+
+        /* Followed by two-byte relative address of place to resume at
+           in case of failure.  */
+  on_failure_jump,
+
+        /* Like on_failure_jump, but pushes a placeholder instead of the
+           current string position when executed.  */
+  on_failure_keep_string_jump,
+
+        /* Throw away latest failure point and then jump to following
+           two-byte relative address.  */
+  pop_failure_jump,
+
+        /* Change to pop_failure_jump if know won't have to backtrack to
+           match; otherwise change to jump.  This is used to jump
+           back to the beginning of a repeat.  If what follows this jump
+           clearly won't match what the repeat does, such that we can be
+           sure that there is no use backtracking out of repetitions
+           already matched, then we change it to a pop_failure_jump.
+           Followed by two-byte address.  */
+  maybe_pop_jump,
+
+        /* Jump to following two-byte address, and push a dummy failure
+           point. This failure point will be thrown away if an attempt
+           is made to use it for a failure.  A `+' construct makes this
+           before the first repeat.  Also used as an intermediary kind
+           of jump when compiling an alternative.  */
+  dummy_failure_jump,
+
+    /* Push a dummy failure point and continue.  Used at the end of
+       alternatives.  */
+  push_dummy_failure,
+
+        /* Followed by two-byte relative address and two-byte number n.
+           After matching N times, jump to the address upon failure.  */
+  succeed_n,
+
+        /* Followed by two-byte relative address, and two-byte number n.
+           Jump to the address N times, then fail.  */
+  jump_n,
+
+        /* Set the following two-byte relative address to the
+           subsequent two-byte number.  The address *includes* the two
+           bytes of number.  */
+  set_number_at,
+
+  wordchar,	/* Matches any word-constituent character.  */
+  notwordchar,	/* Matches any char that is not a word-constituent.  */
+
+  wordbeg,	/* Succeeds if at word beginning.  */
+  wordend,	/* Succeeds if at word end.  */
+
+  wordbound,	/* Succeeds if at a word boundary.  */
+  notwordbound	/* Succeeds if not at a word boundary.  */
+
+#ifdef emacs
+  ,before_dot,	/* Succeeds if before point.  */
+  at_dot,	/* Succeeds if at point.  */
+  after_dot,	/* Succeeds if after point.  */
+
+    /* Matches any character whose syntax is specified.  Followed by
+           a byte which contains a syntax code, e.g., Sword.  */
+  syntaxspec,
+
+    /* Matches any character whose syntax is not that specified.  */
+  notsyntaxspec
+#endif /* emacs */
+} re_opcode_t;
+
+/* Common operations on the compiled pattern.  */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
+
+#define STORE_NUMBER(destination, number)				\
+  do {									\
+    (destination)[0] = (number) & 0377;					\
+    (destination)[1] = (number) >> 8;					\
+  } while (0)
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+   the byte after where the number is stored.  Therefore, DESTINATION
+   must be an lvalue.  */
+
+#define STORE_NUMBER_AND_INCR(destination, number)			\
+  do {									\
+    STORE_NUMBER (destination, number);					\
+    (destination) += 2;							\
+  } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+   at SOURCE.  */
+
+#define EXTRACT_NUMBER(destination, source)				\
+  do {									\
+    (destination) = *(source) & 0377;					\
+    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
+  } while (0)
+
+#ifdef DEBUG
+static void
+extract_number (dest, source)
+    int *dest;
+    unsigned char *source;
+{
+  int temp = SIGN_EXTEND_CHAR (*(source + 1));
+  *dest = *source & 0377;
+  *dest += temp << 8;
+}
+
+#ifndef EXTRACT_MACROS /* To debug the macros.  */
+#undef EXTRACT_NUMBER
+#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+   SOURCE must be an lvalue.  */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
+  do {									\
+    EXTRACT_NUMBER (destination, source);				\
+    (source) += 2; 							\
+  } while (0)
+
+#ifdef DEBUG
+static void
+extract_number_and_incr (destination, source)
+    int *destination;
+    unsigned char **source;
+{
+  extract_number (destination, *source);
+  *source += 2;
+}
+
+#ifndef EXTRACT_MACROS
+#undef EXTRACT_NUMBER_AND_INCR
+#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+  extract_number_and_incr (&dest, &src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+   it is doing (if the variable `debug' is nonzero).  If linked with the
+   main program in `iregex.c', you can enter patterns and strings
+   interactively.  And if linked with the main program in `main.c' and
+   the other test files, you can run the already-written tests.  */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging.  */
+#include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging.  */
+#include <assert.h>
+
+static int debug = 0;
+
+#define DEBUG_STATEMENT(e) e
+#define DEBUG_PRINT1(x) if (debug) printf (x)
+#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
+  if (debug) print_partial_compiled_pattern (s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
+  if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+extern void printchar ();
+
+/* Print the fastmap in human-readable form.  */
+
+void
+print_fastmap (fastmap)
+    char *fastmap;
+{
+  unsigned was_a_range = 0;
+  unsigned i = 0;
+
+  while (i < (1 << BYTEWIDTH))
+    {
+      if (fastmap[i++])
+    {
+      was_a_range = 0;
+          printchar (i - 1);
+          while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
+            {
+              was_a_range = 1;
+              i++;
+            }
+      if (was_a_range)
+            {
+              printf ("-");
+              printchar (i - 1);
+            }
+        }
+    }
+  putchar ('\n');
+}
+
+
+/* Print a compiled pattern string in human-readable form, starting at
+   the START pointer into it and ending just before the pointer END.  */
+
+void
+print_partial_compiled_pattern (start, end)
+    unsigned char *start;
+    unsigned char *end;
+{
+  int mcnt, mcnt2;
+  unsigned char *p = start;
+  unsigned char *pend = end;
+
+  if (start == NULL)
+    {
+      printf ("(null)\n");
+      return;
+    }
+
+  /* Loop over pattern commands.  */
+  while (p < pend)
+    {
+      switch ((re_opcode_t) *p++)
+    {
+        case no_op:
+          printf ("/no_op");
+          break;
+
+    case exactn:
+      mcnt = *p++;
+          printf ("/exactn/%d", mcnt);
+          do
+        {
+              putchar ('/');
+          printchar (*p++);
+            }
+          while (--mcnt);
+          break;
+
+    case start_memory:
+          mcnt = *p++;
+          printf ("/start_memory/%d/%d", mcnt, *p++);
+          break;
+
+    case stop_memory:
+          mcnt = *p++;
+      printf ("/stop_memory/%d/%d", mcnt, *p++);
+          break;
+
+    case duplicate:
+      printf ("/duplicate/%d", *p++);
+      break;
+
+    case anychar:
+      printf ("/anychar");
+      break;
+
+    case charset:
+        case charset_not:
+          {
+            register int c;
+
+            printf ("/charset%s",
+                (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
+
+            assert (p + *p < pend);
+
+            for (c = 0; c < *p; c++)
+              {
+                unsigned bit;
+                unsigned char map_byte = p[1 + c];
+
+                putchar ('/');
+
+        for (bit = 0; bit < BYTEWIDTH; bit++)
+                  if (map_byte & (1 << bit))
+                    printchar (c * BYTEWIDTH + bit);
+              }
+        p += 1 + *p;
+        break;
+      }
+
+    case begline:
+      printf ("/begline");
+          break;
+
+    case endline:
+          printf ("/endline");
+          break;
+
+    case on_failure_jump:
+          extract_number_and_incr (&mcnt, &p);
+      printf ("/on_failure_jump/0/%d", mcnt);
+          break;
+
+    case on_failure_keep_string_jump:
+          extract_number_and_incr (&mcnt, &p);
+      printf ("/on_failure_keep_string_jump/0/%d", mcnt);
+          break;
+
+    case dummy_failure_jump:
+          extract_number_and_incr (&mcnt, &p);
+      printf ("/dummy_failure_jump/0/%d", mcnt);
+          break;
+
+    case push_dummy_failure:
+          printf ("/push_dummy_failure");
+          break;
+
+        case maybe_pop_jump:
+          extract_number_and_incr (&mcnt, &p);
+      printf ("/maybe_pop_jump/0/%d", mcnt);
+      break;
+
+        case pop_failure_jump:
+      extract_number_and_incr (&mcnt, &p);
+      printf ("/pop_failure_jump/0/%d", mcnt);
+      break;
+
+        case jump_past_alt:
+      extract_number_and_incr (&mcnt, &p);
+      printf ("/jump_past_alt/0/%d", mcnt);
+      break;
+
+        case jump:
+      extract_number_and_incr (&mcnt, &p);
+      printf ("/jump/0/%d", mcnt);
+      break;
+
+        case succeed_n:
+          extract_number_and_incr (&mcnt, &p);
+          extract_number_and_incr (&mcnt2, &p);
+      printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
+          break;
+
+        case jump_n:
+          extract_number_and_incr (&mcnt, &p);
+          extract_number_and_incr (&mcnt2, &p);
+      printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
+          break;
+
+        case set_number_at:
+          extract_number_and_incr (&mcnt, &p);
+          extract_number_and_incr (&mcnt2, &p);
+      printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
+          break;
+
+        case wordbound:
+      printf ("/wordbound");
+      break;
+
+    case notwordbound:
+      printf ("/notwordbound");
+          break;
+
+    case wordbeg:
+      printf ("/wordbeg");
+      break;
+
+    case wordend:
+      printf ("/wordend");
+
+#ifdef emacs
+    case before_dot:
+      printf ("/before_dot");
+          break;
+
+    case at_dot:
+      printf ("/at_dot");
+          break;
+
+    case after_dot:
+      printf ("/after_dot");
+          break;
+
+    case syntaxspec:
+          printf ("/syntaxspec");
+      mcnt = *p++;
+      printf ("/%d", mcnt);
+          break;
+
+    case notsyntaxspec:
+          printf ("/notsyntaxspec");
+      mcnt = *p++;
+      printf ("/%d", mcnt);
+      break;
+#endif /* emacs */
+
+    case wordchar:
+      printf ("/wordchar");
+          break;
+
+    case notwordchar:
+      printf ("/notwordchar");
+          break;
+
+    case begbuf:
+      printf ("/begbuf");
+          break;
+
+    case endbuf:
+      printf ("/endbuf");
+          break;
+
+        default:
+          printf ("?%d", *(p-1));
+    }
+    }
+  printf ("/\n");
+}
+
+
+void
+print_compiled_pattern (bufp)
+    struct re_pattern_buffer *bufp;
+{
+  unsigned char *buffer = bufp->buffer;
+
+  print_partial_compiled_pattern (buffer, buffer + bufp->used);
+  printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
+
+  if (bufp->fastmap_accurate && bufp->fastmap)
+    {
+      printf ("fastmap: ");
+      print_fastmap (bufp->fastmap);
+    }
+
+  printf ("re_nsub: %d\t", bufp->re_nsub);
+  printf ("regs_alloc: %d\t", bufp->regs_allocated);
+  printf ("can_be_null: %d\t", bufp->can_be_null);
+  printf ("newline_anchor: %d\n", bufp->newline_anchor);
+  printf ("no_sub: %d\t", bufp->no_sub);
+  printf ("not_bol: %d\t", bufp->not_bol);
+  printf ("not_eol: %d\t", bufp->not_eol);
+  printf ("syntax: %d\n", bufp->syntax);
+  /* Perhaps we should print the translate table?  */
+}
+
+
+void
+print_double_string (where, string1, size1, string2, size2)
+    const char *where;
+    const char *string1;
+    const char *string2;
+    int size1;
+    int size2;
+{
+  unsigned this_char;
+
+  if (where == NULL)
+    printf ("(null)");
+  else
+    {
+      if (FIRST_STRING_P (where))
+        {
+          for (this_char = where - string1; this_char < size1; this_char++)
+            printchar (string1[this_char]);
+
+          where = string2;
+        }
+
+      for (this_char = where - string2; this_char < size2; this_char++)
+        printchar (string2[this_char]);
+    }
+}
+
+#else /* not DEBUG */
+
+#undef assert
+#define assert(e)
+
+#define DEBUG_STATEMENT(e)
+#define DEBUG_PRINT1(x)
+#define DEBUG_PRINT2(x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
+   also be assigned to arbitrarily: each pattern buffer stores its own
+   syntax, so it can be changed between regex compilations.  */
+reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
+
+
+/* Specify the precise syntax of regexps for compilation.  This provides
+   for compatibility for various utilities which historically have
+   different, incompatible syntaxes.
+
+   The argument SYNTAX is a bit mask comprised of the various bits
+   defined in regex.h.  We return the old syntax.  */
+
+reg_syntax_t
+re_set_syntax (syntax)
+    reg_syntax_t syntax;
+{
+  reg_syntax_t ret = re_syntax_options;
+
+  re_syntax_options = syntax;
+  return ret;
+}
+
+/* This table gives an error message for each of the error codes listed
+   in regex.h.  Obviously the order here has to be same as there.  */
+
+static const char *re_error_msg[] =
+  { NULL,					/* REG_NOERROR */
+    "No match",					/* REG_NOMATCH */
+    "Invalid regular expression",		/* REG_BADPAT */
+    "Invalid collation character",		/* REG_ECOLLATE */
+    "Invalid character class name",		/* REG_ECTYPE */
+    "Trailing backslash",			/* REG_EESCAPE */
+    "Invalid back reference",			/* REG_ESUBREG */
+    "Unmatched [ or [^",			/* REG_EBRACK */
+    "Unmatched ( or \\(",			/* REG_EPAREN */
+    "Unmatched \\{",				/* REG_EBRACE */
+    "Invalid content of \\{\\}",		/* REG_BADBR */
+    "Invalid range end",			/* REG_ERANGE */
+    "Memory exhausted",				/* REG_ESPACE */
+    "Invalid preceding regular expression",	/* REG_BADRPT */
+    "Premature end of regular expression",	/* REG_EEND */
+    "Regular expression too big",		/* REG_ESIZE */
+    "Unmatched ) or \\)",			/* REG_ERPAREN */
+  };
+
+/* Subroutine declarations and macros for regex_compile.  */
+
+static void store_op1 (), store_op2 ();
+static void insert_op1 (), insert_op2 ();
+static boolean at_begline_loc_p (), at_endline_loc_p ();
+static boolean group_in_compile_stack ();
+static reg_errcode_t compile_range ();
+
+/* Fetch the next character in the uncompiled pattern---translating it
+   if necessary.  Also cast from a signed character in the constant
+   string passed to us by the user to an unsigned char that we can use
+   as an array index (in, e.g., `translate').  */
+#define PATFETCH(c)							\
+  do {if (p == pend) return REG_EEND;					\
+    c = (unsigned char) *p++;						\
+    if (translate) c = translate[c]; 					\
+  } while (0)
+
+/* Fetch the next character in the uncompiled pattern, with no
+   translation.  */
+#define PATFETCH_RAW(c)							\
+  do {if (p == pend) return REG_EEND;					\
+    c = (unsigned char) *p++; 						\
+  } while (0)
+
+/* Go backwards one character in the pattern.  */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D.  We
+   cast the subscript to translate because some data is declared as
+   `char *', to avoid warnings when a string constant is passed.  But
+   when we use a character as a subscript we must make it unsigned.  */
+#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+
+
+/* Macros for outputting the compiled pattern into `buffer'.  */
+
+/* If the buffer isn't allocated when it comes in, use this.  */
+#define INIT_BUF_SIZE  32
+
+/* Make sure we have at least N more bytes of space in buffer.  */
+#define GET_BUFFER_SPACE(n)						\
+    while (b - bufp->buffer + (n) > bufp->allocated)			\
+      EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it.  */
+#define BUF_PUSH(c)							\
+  do {									\
+    GET_BUFFER_SPACE (1);						\
+    *b++ = (unsigned char) (c);						\
+  } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
+#define BUF_PUSH_2(c1, c2)						\
+  do {									\
+    GET_BUFFER_SPACE (2);						\
+    *b++ = (unsigned char) (c1);					\
+    *b++ = (unsigned char) (c2);					\
+  } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes.  */
+#define BUF_PUSH_3(c1, c2, c3)						\
+  do {									\
+    GET_BUFFER_SPACE (3);						\
+    *b++ = (unsigned char) (c1);					\
+    *b++ = (unsigned char) (c2);					\
+    *b++ = (unsigned char) (c3);					\
+  } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO.  We store a
+   relative address offset by the three bytes the jump itself occupies.  */
+#define STORE_JUMP(op, loc, to) \
+  store_op1 (op, loc, (to) - (loc) - 3)
+
+/* Likewise, for a two-argument jump.  */
+#define STORE_JUMP2(op, loc, to, arg) \
+  store_op2 (op, loc, (to) - (loc) - 3, arg)
+
+/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
+#define INSERT_JUMP(op, loc, to) \
+  insert_op1 (op, loc, (to) - (loc) - 3, b)
+
+/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
+#define INSERT_JUMP2(op, loc, to, arg) \
+  insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+   into the pattern are two bytes long.  So if 2^16 bytes turns out to
+   be too small, many things would have to change.  */
+#define MAX_BUF_SIZE (1L << 16)
+
+
+/* Extend the buffer by twice its current size via realloc and
+   reset the pointers that pointed into the old block to point to the
+   correct places in the new one.  If extending the buffer results in it
+   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
+#define EXTEND_BUFFER()							\
+  do { 									\
+    unsigned char *old_buffer = bufp->buffer;				\
+    if (bufp->allocated == MAX_BUF_SIZE) 				\
+      return REG_ESIZE;							\
+    bufp->allocated <<= 1;						\
+    if (bufp->allocated > MAX_BUF_SIZE)					\
+      bufp->allocated = MAX_BUF_SIZE; 					\
+    bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+    if (bufp->buffer == NULL)						\
+      return REG_ESPACE;						\
+    /* If the buffer moved, move all the pointers into it.  */		\
+    if (old_buffer != bufp->buffer)					\
+      {									\
+        b = (b - old_buffer) + bufp->buffer;				\
+        begalt = (begalt - old_buffer) + bufp->buffer;			\
+        if (fixup_alt_jump)						\
+          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+        if (laststart)							\
+          laststart = (laststart - old_buffer) + bufp->buffer;		\
+        if (pending_exact)						\
+          pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
+      }									\
+  } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+   {start,stop}_memory, the maximum number of groups we can report
+   things about is what fits in that byte.  */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers.  We just
+   ignore the excess.  */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack.  */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
+typedef int pattern_offset_t;
+
+typedef struct
+{
+  pattern_offset_t begalt_offset;
+  pattern_offset_t fixup_alt_jump;
+  pattern_offset_t inner_group_offset;
+  pattern_offset_t laststart_offset;
+  regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+  compile_stack_elt_t *stack;
+  unsigned size;
+  unsigned avail;			/* Offset of next open position.  */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
+
+/* The next available element.  */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list.  */
+#define SET_LIST_BIT(c)                               \
+  (b[((unsigned char) (c)) / BYTEWIDTH]               \
+   |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern.  */
+#define GET_UNSIGNED_NUMBER(num) 					\
+  { if (p != pend)							\
+     {									\
+       PATFETCH (c); 							\
+       while (ISDIGIT (c)) 						\
+         { 								\
+           if (num < 0)							\
+              num = 0;							\
+           num = num * 10 + c - '0'; 					\
+           if (p == pend) 						\
+              break; 							\
+           PATFETCH (c);						\
+         } 								\
+       } 								\
+    }
+
+#define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
+
+#define IS_CHAR_CLASS(string)						\
+   (STREQ (string, "alpha") || STREQ (string, "upper")			\
+    || STREQ (string, "lower") || STREQ (string, "digit")		\
+    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
+    || STREQ (string, "space") || STREQ (string, "print")		\
+    || STREQ (string, "punct") || STREQ (string, "graph")		\
+    || STREQ (string, "cntrl") || STREQ (string, "blank"))
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+   Returns one of error codes defined in `regex.h', or zero for success.
+
+   Assumes the `allocated' (and perhaps `buffer') and `translate'
+   fields are set in BUFP on entry.
+
+   If it succeeds, results are put in BUFP (if it returns an error, the
+   contents of BUFP are undefined):
+     `buffer' is the compiled pattern;
+     `syntax' is set to SYNTAX;
+     `used' is set to the length of the compiled pattern;
+     `fastmap_accurate' is zero;
+     `re_nsub' is the number of subexpressions in PATTERN;
+     `not_bol' and `not_eol' are zero;
+
+   The `fastmap' and `newline_anchor' fields are neither
+   examined nor set.  */
+
+static reg_errcode_t
+regex_compile (pattern, size, syntax, bufp)
+     const char *pattern;
+     int size;
+     reg_syntax_t syntax;
+     struct re_pattern_buffer *bufp;
+{
+  /* We fetch characters from PATTERN here.  Even though PATTERN is
+     `char *' (i.e., signed), we declare these variables as unsigned, so
+     they can be reliably used as array indices.  */
+  register unsigned char c, c1;
+
+  /* A random tempory spot in PATTERN.  */
+  const char *p1;
+
+  /* Points to the end of the buffer, where we should append.  */
+  register unsigned char *b;
+
+  /* Keeps track of unclosed groups.  */
+  compile_stack_type compile_stack;
+
+  /* Points to the current (ending) position in the pattern.  */
+  const char *p = pattern;
+  const char *pend = pattern + size;
+
+  /* How to translate the characters in the pattern.  */
+  char *translate = bufp->translate;
+
+  /* Address of the count-byte of the most recently inserted `exactn'
+     command.  This makes it possible to tell if a new exact-match
+     character can be added to that command or if the character requires
+     a new `exactn' command.  */
+  unsigned char *pending_exact = 0;
+
+  /* Address of start of the most recently finished expression.
+     This tells, e.g., postfix * where to find the start of its
+     operand.  Reset at the beginning of groups and alternatives.  */
+  unsigned char *laststart = 0;
+
+  /* Address of beginning of regexp, or inside of last group.  */
+  unsigned char *begalt;
+
+  /* Place in the uncompiled pattern (i.e., the {) to
+     which to go back if the interval is invalid.  */
+  const char *beg_interval;
+
+  /* Address of the place where a forward jump should go to the end of
+     the containing expression.  Each alternative of an `or' -- except the
+     last -- ends with a forward jump of this sort.  */
+  unsigned char *fixup_alt_jump = 0;
+
+  /* Counts open-groups as they are encountered.  Remembered for the
+     matching close-group on the compile stack, so the same register
+     number is put in the stop_memory as the start_memory.  */
+  regnum_t regnum = 0;
+
+#ifdef DEBUG
+  DEBUG_PRINT1 ("\nCompiling pattern: ");
+  if (debug)
+    {
+      unsigned debug_count;
+
+      for (debug_count = 0; debug_count < size; debug_count++)
+        printchar (pattern[debug_count]);
+      putchar ('\n');
+    }
+#endif /* DEBUG */
+
+  /* Initialize the compile stack.  */
+  compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+  if (compile_stack.stack == NULL)
+    return REG_ESPACE;
+
+  compile_stack.size = INIT_COMPILE_STACK_SIZE;
+  compile_stack.avail = 0;
+
+  /* Initialize the pattern buffer.  */
+  bufp->syntax = syntax;
+  bufp->fastmap_accurate = 0;
+  bufp->not_bol = bufp->not_eol = 0;
+
+  /* Set `used' to zero, so that if we return an error, the pattern
+     printer (for debugging) will think there's no pattern.  We reset it
+     at the end.  */
+  bufp->used = 0;
+
+  /* Always count groups, whether or not bufp->no_sub is set.  */
+  bufp->re_nsub = 0;
+
+#if !defined (emacs) && !defined (SYNTAX_TABLE)
+  /* Initialize the syntax table.  */
+   init_syntax_once ();
+#endif
+
+  if (bufp->allocated == 0)
+    {
+      if (bufp->buffer)
+    { /* If zero allocated, but buffer is non-null, try to realloc
+             enough space.  This loses if buffer's address is bogus, but
+             that is the user's responsibility.  */
+          RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
+        }
+      else
+        { /* Caller did not allocate a buffer.  Do it for them.  */
+          bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
+        }
+      if (!bufp->buffer) return REG_ESPACE;
+
+      bufp->allocated = INIT_BUF_SIZE;
+    }
+
+  begalt = b = bufp->buffer;
+
+  /* Loop through the uncompiled pattern until we're at the end.  */
+  while (p != pend)
+    {
+      PATFETCH (c);
+
+      switch (c)
+        {
+        case '^':
+          {
+            if (   /* If at start of pattern, it's an operator.  */
+                   p == pattern + 1
+                   /* If context independent, it's an operator.  */
+                || syntax & RE_CONTEXT_INDEP_ANCHORS
+                   /* Otherwise, depends on what's come before.  */
+                || at_begline_loc_p (pattern, p, syntax))
+              BUF_PUSH (begline);
+            else
+              goto normal_char;
+          }
+          break;
+
+
+        case '$':
+          {
+            if (   /* If at end of pattern, it's an operator.  */
+                   p == pend
+                   /* If context independent, it's an operator.  */
+                || syntax & RE_CONTEXT_INDEP_ANCHORS
+                   /* Otherwise, depends on what's next.  */
+                || at_endline_loc_p (p, pend, syntax))
+               BUF_PUSH (endline);
+             else
+               goto normal_char;
+           }
+           break;
+
+
+    case '+':
+        case '?':
+          if ((syntax & RE_BK_PLUS_QM)
+              || (syntax & RE_LIMITED_OPS))
+            goto normal_char;
+        handle_plus:
+        case '*':
+          /* If there is no previous pattern... */
+          if (!laststart)
+            {
+              if (syntax & RE_CONTEXT_INVALID_OPS)
+                return REG_BADRPT;
+              else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+                goto normal_char;
+            }
+
+          {
+            /* Are we optimizing this jump?  */
+            boolean keep_string_p = false;
+
+            /* 1 means zero (many) matches is allowed.  */
+            char zero_times_ok = 0, many_times_ok = 0;
+
+            /* If there is a sequence of repetition chars, collapse it
+               down to just one (the right one).  We can't combine
+               interval operators with these because of, e.g., `a{2}*',
+               which should only match an even number of `a's.  */
+
+            for (;;)
+              {
+                zero_times_ok |= c != '+';
+                many_times_ok |= c != '?';
+
+                if (p == pend)
+                  break;
+
+                PATFETCH (c);
+
+                if (c == '*'
+                    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+                  ;
+
+                else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
+                  {
+                    if (p == pend) return REG_EESCAPE;
+
+                    PATFETCH (c1);
+                    if (!(c1 == '+' || c1 == '?'))
+                      {
+                        PATUNFETCH;
+                        PATUNFETCH;
+                        break;
+                      }
+
+                    c = c1;
+                  }
+                else
+                  {
+                    PATUNFETCH;
+                    break;
+                  }
+
+                /* If we get here, we found another repeat character.  */
+               }
+
+            /* Star, etc. applied to an empty pattern is equivalent
+               to an empty pattern.  */
+            if (!laststart)
+              break;
+
+            /* Now we know whether or not zero matches is allowed
+               and also whether or not two or more matches is allowed.  */
+            if (many_times_ok)
+              { /* More than one repetition is allowed, so put in at the
+                   end a backward relative jump from `b' to before the next
+                   jump we're going to put in below (which jumps from
+                   laststart to after this jump).
+
+                   But if we are at the `*' in the exact sequence `.*\n',
+                   insert an unconditional jump backwards to the .,
+                   instead of the beginning of the loop.  This way we only
+                   push a failure point once, instead of every time
+                   through the loop.  */
+                assert (p - 1 > pattern);
+
+                /* Allocate the space for the jump.  */
+                GET_BUFFER_SPACE (3);
+
+                /* We know we are not at the first character of the pattern,
+                   because laststart was nonzero.  And we've already
+                   incremented `p', by the way, to be the character after
+                   the `*'.  Do we have to do something analogous here
+                   for null bytes, because of RE_DOT_NOT_NULL?  */
+                if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+            && zero_times_ok
+                    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+                    && !(syntax & RE_DOT_NEWLINE))
+                  { /* We have .*\n.  */
+                    STORE_JUMP (jump, b, laststart);
+                    keep_string_p = true;
+                  }
+                else
+                  /* Anything else.  */
+                  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+
+                /* We've added more stuff to the buffer.  */
+                b += 3;
+              }
+
+            /* On failure, jump from laststart to b + 3, which will be the
+               end of the buffer after this jump is inserted.  */
+            GET_BUFFER_SPACE (3);
+            INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+                                       : on_failure_jump,
+                         laststart, b + 3);
+            pending_exact = 0;
+            b += 3;
+
+            if (!zero_times_ok)
+              {
+                /* At least one repetition is required, so insert a
+                   `dummy_failure_jump' before the initial
+                   `on_failure_jump' instruction of the loop. This
+                   effects a skip over that instruction the first time
+                   we hit that loop.  */
+                GET_BUFFER_SPACE (3);
+                INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
+                b += 3;
+              }
+            }
+      break;
+
+
+    case '.':
+          laststart = b;
+          BUF_PUSH (anychar);
+          break;
+
+
+        case '[':
+          {
+            boolean had_char_class = false;
+
+            if (p == pend) return REG_EBRACK;
+
+            /* Ensure that we have enough space to push a charset: the
+               opcode, the length count, and the bitset; 34 bytes in all.  */
+        GET_BUFFER_SPACE (34);
+
+            laststart = b;
+
+            /* We test `*p == '^' twice, instead of using an if
+               statement, so we only need one BUF_PUSH.  */
+            BUF_PUSH (*p == '^' ? charset_not : charset);
+            if (*p == '^')
+              p++;
+
+            /* Remember the first position in the bracket expression.  */
+            p1 = p;
+
+            /* Push the number of bytes in the bitmap.  */
+            BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+            /* Clear the whole map.  */
+            bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+            /* charset_not matches newline according to a syntax bit.  */
+            if ((re_opcode_t) b[-2] == charset_not
+                && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+              SET_LIST_BIT ('\n');
+
+            /* Read in characters and ranges, setting map bits.  */
+            for (;;)
+              {
+                if (p == pend) return REG_EBRACK;
+
+                PATFETCH (c);
+
+                /* \ might escape characters inside [...] and [^...].  */
+                if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+                  {
+                    if (p == pend) return REG_EESCAPE;
+
+                    PATFETCH (c1);
+                    SET_LIST_BIT (c1);
+                    continue;
+                  }
+
+                /* Could be the end of the bracket expression.  If it's
+                   not (i.e., when the bracket expression is `[]' so
+                   far), the ']' character bit gets set way below.  */
+                if (c == ']' && p != p1 + 1)
+                  break;
+
+                /* Look ahead to see if it's a range when the last thing
+                   was a character class.  */
+                if (had_char_class && c == '-' && *p != ']')
+                  return REG_ERANGE;
+
+                /* Look ahead to see if it's a range when the last thing
+                   was a character: if this is a hyphen not at the
+                   beginning or the end of a list, then it's the range
+                   operator.  */
+                if (c == '-'
+                    && !(p - 2 >= pattern && p[-2] == '[')
+                    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+                    && *p != ']')
+                  {
+                    reg_errcode_t ret
+                      = compile_range (&p, pend, translate, syntax, b);
+                    if (ret != REG_NOERROR) return ret;
+                  }
+
+                else if (p[0] == '-' && p[1] != ']')
+                  { /* This handles ranges made up of characters only.  */
+                    reg_errcode_t ret;
+
+            /* Move past the `-'.  */
+                    PATFETCH (c1);
+
+                    ret = compile_range (&p, pend, translate, syntax, b);
+                    if (ret != REG_NOERROR) return ret;
+                  }
+
+                /* See if we're at the beginning of a possible character
+                   class.  */
+
+                else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+                  { /* Leave room for the null.  */
+                    char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+                    PATFETCH (c);
+                    c1 = 0;
+
+                    /* If pattern is `[[:'.  */
+                    if (p == pend) return REG_EBRACK;
+
+                    for (;;)
+                      {
+                        PATFETCH (c);
+                        if (c == ':' || c == ']' || p == pend
+                            || c1 == CHAR_CLASS_MAX_LENGTH)
+                          break;
+                        str[c1++] = c;
+                      }
+                    str[c1] = '\0';
+
+                    /* If isn't a word bracketed by `[:' and:`]':
+                       undo the ending character, the letters, and leave
+                       the leading `:' and `[' (but set bits for them).  */
+                    if (c == ':' && *p == ']')
+                      {
+                        int ch;
+                        boolean is_alnum = STREQ (str, "alnum");
+                        boolean is_alpha = STREQ (str, "alpha");
+                        boolean is_blank = STREQ (str, "blank");
+                        boolean is_cntrl = STREQ (str, "cntrl");
+                        boolean is_digit = STREQ (str, "digit");
+                        boolean is_graph = STREQ (str, "graph");
+                        boolean is_lower = STREQ (str, "lower");
+                        boolean is_print = STREQ (str, "print");
+                        boolean is_punct = STREQ (str, "punct");
+                        boolean is_space = STREQ (str, "space");
+                        boolean is_upper = STREQ (str, "upper");
+                        boolean is_xdigit = STREQ (str, "xdigit");
+
+                        if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+
+                        /* Throw away the ] at the end of the character
+                           class.  */
+                        PATFETCH (c);
+
+                        if (p == pend) return REG_EBRACK;
+
+                        for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+                          {
+                            if (   (is_alnum  && ISALNUM (ch))
+                                || (is_alpha  && ISALPHA (ch))
+                                || (is_blank  && ISBLANK (ch))
+                                || (is_cntrl  && ISCNTRL (ch))
+                                || (is_digit  && ISDIGIT (ch))
+                                || (is_graph  && ISGRAPH (ch))
+                                || (is_lower  && ISLOWER (ch))
+                                || (is_print  && ISPRINT (ch))
+                                || (is_punct  && ISPUNCT (ch))
+                                || (is_space  && ISSPACE (ch))
+                                || (is_upper  && ISUPPER (ch))
+                                || (is_xdigit && ISXDIGIT (ch)))
+                            SET_LIST_BIT (ch);
+                          }
+                        had_char_class = true;
+                      }
+                    else
+                      {
+                        c1++;
+                        while (c1--)
+                          PATUNFETCH;
+                        SET_LIST_BIT ('[');
+                        SET_LIST_BIT (':');
+                        had_char_class = false;
+                      }
+                  }
+                else
+                  {
+                    had_char_class = false;
+                    SET_LIST_BIT (c);
+                  }
+              }
+
+            /* Discard any (non)matching list bytes that are all 0 at the
+               end of the map.  Decrease the map-length byte too.  */
+            while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+              b[-1]--;
+            b += b[-1];
+          }
+          break;
+
+
+    case '(':
+          if (syntax & RE_NO_BK_PARENS)
+            goto handle_open;
+          else
+            goto normal_char;
+
+
+        case ')':
+          if (syntax & RE_NO_BK_PARENS)
+            goto handle_close;
+          else
+            goto normal_char;
+
+
+        case '\n':
+          if (syntax & RE_NEWLINE_ALT)
+            goto handle_alt;
+          else
+            goto normal_char;
+
+
+    case '|':
+          if (syntax & RE_NO_BK_VBAR)
+            goto handle_alt;
+          else
+            goto normal_char;
+
+
+        case '{':
+           if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
+             goto handle_interval;
+           else
+             goto normal_char;
+
+
+        case '\\':
+          if (p == pend) return REG_EESCAPE;
+
+          /* Do not translate the character after the \, so that we can
+             distinguish, e.g., \B from \b, even if we normally would
+             translate, e.g., B to b.  */
+          PATFETCH_RAW (c);
+
+          switch (c)
+            {
+            case '(':
+              if (syntax & RE_NO_BK_PARENS)
+                goto normal_backslash;
+
+            handle_open:
+              bufp->re_nsub++;
+              regnum++;
+
+              if (COMPILE_STACK_FULL)
+                {
+                  RETALLOC (compile_stack.stack, compile_stack.size << 1,
+                            compile_stack_elt_t);
+                  if (compile_stack.stack == NULL) return REG_ESPACE;
+
+                  compile_stack.size <<= 1;
+                }
+
+              /* These are the values to restore when we hit end of this
+                 group.  They are all relative offsets, so that if the
+                 whole pattern moves because of realloc, they will still
+                 be valid.  */
+              COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
+              COMPILE_STACK_TOP.fixup_alt_jump
+                = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+              COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+              COMPILE_STACK_TOP.regnum = regnum;
+
+              /* We will eventually replace the 0 with the number of
+                 groups inner to this one.  But do not push a
+                 start_memory for groups beyond the last one we can
+                 represent in the compiled pattern.  */
+              if (regnum <= MAX_REGNUM)
+                {
+                  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
+                  BUF_PUSH_3 (start_memory, regnum, 0);
+                }
+
+              compile_stack.avail++;
+
+              fixup_alt_jump = 0;
+              laststart = 0;
+              begalt = b;
+          /* If we've reached MAX_REGNUM groups, then this open
+         won't actually generate any code, so we'll have to
+         clear pending_exact explicitly.  */
+          pending_exact = 0;
+              break;
+
+
+            case ')':
+              if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+
+              if (COMPILE_STACK_EMPTY)
+                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+                  goto normal_backslash;
+                else
+                  return REG_ERPAREN;
+
+            handle_close:
+              if (fixup_alt_jump)
+                { /* Push a dummy failure point at the end of the
+                     alternative for a possible future
+                     `pop_failure_jump' to pop.  See comments at
+                     `push_dummy_failure' in `re_match_2'.  */
+                  BUF_PUSH (push_dummy_failure);
+
+                  /* We allocated space for this jump when we assigned
+                     to `fixup_alt_jump', in the `handle_alt' case below.  */
+                  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+                }
+
+              /* See similar code for backslashed left paren above.  */
+              if (COMPILE_STACK_EMPTY)
+                if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+                  goto normal_char;
+                else
+                  return REG_ERPAREN;
+
+              /* Since we just checked for an empty stack above, this
+                 ``can't happen''.  */
+              assert (compile_stack.avail != 0);
+              {
+                /* We don't just want to restore into `regnum', because
+                   later groups should continue to be numbered higher,
+                   as in `(ab)c(de)' -- the second group is #2.  */
+                regnum_t this_group_regnum;
+
+                compile_stack.avail--;
+                begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
+                fixup_alt_jump
+                  = COMPILE_STACK_TOP.fixup_alt_jump
+                    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
+                    : 0;
+                laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
+                this_group_regnum = COMPILE_STACK_TOP.regnum;
+        /* If we've reached MAX_REGNUM groups, then this open
+           won't actually generate any code, so we'll have to
+           clear pending_exact explicitly.  */
+        pending_exact = 0;
+
+                /* We're at the end of the group, so now we know how many
+                   groups were inside this one.  */
+                if (this_group_regnum <= MAX_REGNUM)
+                  {
+                    unsigned char *inner_group_loc
+                      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
+
+                    *inner_group_loc = regnum - this_group_regnum;
+                    BUF_PUSH_3 (stop_memory, this_group_regnum,
+                                regnum - this_group_regnum);
+                  }
+              }
+              break;
+
+
+            case '|':					/* `\|'.  */
+              if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
+                goto normal_backslash;
+            handle_alt:
+              if (syntax & RE_LIMITED_OPS)
+                goto normal_char;
+
+              /* Insert before the previous alternative a jump which
+                 jumps to this alternative if the former fails.  */
+              GET_BUFFER_SPACE (3);
+              INSERT_JUMP (on_failure_jump, begalt, b + 6);
+              pending_exact = 0;
+              b += 3;
+
+              /* The alternative before this one has a jump after it
+                 which gets executed if it gets matched.  Adjust that
+                 jump so it will jump to this alternative's analogous
+                 jump (put in below, which in turn will jump to the next
+                 (if any) alternative's such jump, etc.).  The last such
+                 jump jumps to the correct final destination.  A picture:
+                          _____ _____
+                          |   | |   |
+                          |   v |   v
+                         a | b   | c
+
+                 If we are at `b', then fixup_alt_jump right now points to a
+                 three-byte space after `a'.  We'll put in the jump, set
+                 fixup_alt_jump to right after `b', and leave behind three
+                 bytes which we'll fill in when we get to after `c'.  */
+
+              if (fixup_alt_jump)
+                STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+              /* Mark and leave space for a jump after this alternative,
+                 to be filled in later either by next alternative or
+                 when know we're at the end of a series of alternatives.  */
+              fixup_alt_jump = b;
+              GET_BUFFER_SPACE (3);
+              b += 3;
+
+              laststart = 0;
+              begalt = b;
+              break;
+
+
+            case '{':
+              /* If \{ is a literal.  */
+              if (!(syntax & RE_INTERVALS)
+                     /* If we're at `\{' and it's not the open-interval
+                        operator.  */
+                  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+                  || (p - 2 == pattern  &&  p == pend))
+                goto normal_backslash;
+
+            handle_interval:
+              {
+                /* If got here, then the syntax allows intervals.  */
+
+                /* At least (most) this many matches must be made.  */
+                int lower_bound = -1, upper_bound = -1;
+
+                beg_interval = p - 1;
+
+                if (p == pend)
+                  {
+                    if (syntax & RE_NO_BK_BRACES)
+                      goto unfetch_interval;
+                    else
+                      return REG_EBRACE;
+                  }
+
+                GET_UNSIGNED_NUMBER (lower_bound);
+
+                if (c == ',')
+                  {
+                    GET_UNSIGNED_NUMBER (upper_bound);
+                    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
+                  }
+                else
+                  /* Interval such as `{1}' => match exactly once. */
+                  upper_bound = lower_bound;
+
+                if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+                    || lower_bound > upper_bound)
+                  {
+                    if (syntax & RE_NO_BK_BRACES)
+                      goto unfetch_interval;
+                    else
+                      return REG_BADBR;
+                  }
+
+                if (!(syntax & RE_NO_BK_BRACES))
+                  {
+                    if (c != '\\') return REG_EBRACE;
+
+                    PATFETCH (c);
+                  }
+
+                if (c != '}')
+                  {
+                    if (syntax & RE_NO_BK_BRACES)
+                      goto unfetch_interval;
+                    else
+                      return REG_BADBR;
+                  }
+
+                /* We just parsed a valid interval.  */
+
+                /* If it's invalid to have no preceding re.  */
+                if (!laststart)
+                  {
+                    if (syntax & RE_CONTEXT_INVALID_OPS)
+                      return REG_BADRPT;
+                    else if (syntax & RE_CONTEXT_INDEP_OPS)
+                      laststart = b;
+                    else
+                      goto unfetch_interval;
+                  }
+
+                /* If the upper bound is zero, don't want to succeed at
+                   all; jump from `laststart' to `b + 3', which will be
+                   the end of the buffer after we insert the jump.  */
+                 if (upper_bound == 0)
+                   {
+                     GET_BUFFER_SPACE (3);
+                     INSERT_JUMP (jump, laststart, b + 3);
+                     b += 3;
+                   }
+
+                 /* Otherwise, we have a nontrivial interval.  When
+                    we're all done, the pattern will look like:
+                      set_number_at <jump count> <upper bound>
+                      set_number_at <succeed_n count> <lower bound>
+                      succeed_n <after jump addr> <succed_n count>
+                      <body of loop>
+                      jump_n <succeed_n addr> <jump count>
+                    (The upper bound and `jump_n' are omitted if
+                    `upper_bound' is 1, though.)  */
+                 else
+                   { /* If the upper bound is > 1, we need to insert
+                        more at the end of the loop.  */
+                     unsigned nbytes = 10 + (upper_bound > 1) * 10;
+
+                     GET_BUFFER_SPACE (nbytes);
+
+                     /* Initialize lower bound of the `succeed_n', even
+                        though it will be set during matching by its
+                        attendant `set_number_at' (inserted next),
+                        because `re_compile_fastmap' needs to know.
+                        Jump to the `jump_n' we might insert below.  */
+                     INSERT_JUMP2 (succeed_n, laststart,
+                                   b + 5 + (upper_bound > 1) * 5,
+                                   lower_bound);
+                     b += 5;
+
+                     /* Code to initialize the lower bound.  Insert
+                        before the `succeed_n'.  The `5' is the last two
+                        bytes of this `set_number_at', plus 3 bytes of
+                        the following `succeed_n'.  */
+                     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+                     b += 5;
+
+                     if (upper_bound > 1)
+                       { /* More than one repetition is allowed, so
+                            append a backward jump to the `succeed_n'
+                            that starts this interval.
+
+                            When we've reached this during matching,
+                            we'll have matched the interval once, so
+                            jump back only `upper_bound - 1' times.  */
+                         STORE_JUMP2 (jump_n, b, laststart + 5,
+                                      upper_bound - 1);
+                         b += 5;
+
+                         /* The location we want to set is the second
+                            parameter of the `jump_n'; that is `b-2' as
+                            an absolute address.  `laststart' will be
+                            the `set_number_at' we're about to insert;
+                            `laststart+3' the number to set, the source
+                            for the relative address.  But we are
+                            inserting into the middle of the pattern --
+                            so everything is getting moved up by 5.
+                            Conclusion: (b - 2) - (laststart + 3) + 5,
+                            i.e., b - laststart.
+
+                            We insert this at the beginning of the loop
+                            so that if we fail during matching, we'll
+                            reinitialize the bounds.  */
+                         insert_op2 (set_number_at, laststart, b - laststart,
+                                     upper_bound - 1, b);
+                         b += 5;
+                       }
+                   }
+                pending_exact = 0;
+                beg_interval = NULL;
+              }
+              break;
+
+            unfetch_interval:
+              /* If an invalid interval, match the characters as literals.  */
+               assert (beg_interval);
+               p = beg_interval;
+               beg_interval = NULL;
+
+               /* normal_char and normal_backslash need `c'.  */
+               PATFETCH (c);
+
+               if (!(syntax & RE_NO_BK_BRACES))
+                 {
+                   if (p > pattern  &&  p[-1] == '\\')
+                     goto normal_backslash;
+                 }
+               goto normal_char;
+
+#ifdef emacs
+            /* There is no way to specify the before_dot and after_dot
+               operators.  rms says this is ok.  --karl  */
+            case '=':
+              BUF_PUSH (at_dot);
+              break;
+
+            case 's':
+              laststart = b;
+              PATFETCH (c);
+              BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+              break;
+
+            case 'S':
+              laststart = b;
+              PATFETCH (c);
+              BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+              break;
+#endif /* emacs */
+
+
+            case 'w':
+              laststart = b;
+              BUF_PUSH (wordchar);
+              break;
+
+
+            case 'W':
+              laststart = b;
+              BUF_PUSH (notwordchar);
+              break;
+
+
+            case '<':
+              BUF_PUSH (wordbeg);
+              break;
+
+            case '>':
+              BUF_PUSH (wordend);
+              break;
+
+            case 'b':
+              BUF_PUSH (wordbound);
+              break;
+
+            case 'B':
+              BUF_PUSH (notwordbound);
+              break;
+
+            case '`':
+              BUF_PUSH (begbuf);
+              break;
+
+            case '\'':
+              BUF_PUSH (endbuf);
+              break;
+
+            case '1': case '2': case '3': case '4': case '5':
+            case '6': case '7': case '8': case '9':
+              if (syntax & RE_NO_BK_REFS)
+                goto normal_char;
+
+              c1 = c - '0';
+
+              if (c1 > regnum)
+                return REG_ESUBREG;
+
+              /* Can't back reference to a subexpression if inside of it.  */
+              if (group_in_compile_stack (compile_stack, c1))
+                goto normal_char;
+
+              laststart = b;
+              BUF_PUSH_2 (duplicate, c1);
+              break;
+
+
+            case '+':
+            case '?':
+              if (syntax & RE_BK_PLUS_QM)
+                goto handle_plus;
+              else
+                goto normal_backslash;
+
+            default:
+            normal_backslash:
+              /* You might think it would be useful for \ to mean
+                 not to translate; but if we don't translate it
+                 it will never match anything.  */
+              c = TRANSLATE (c);
+              goto normal_char;
+            }
+          break;
+
+
+    default:
+        /* Expects the character in `c'.  */
+    normal_char:
+          /* If no exactn currently being built.  */
+          if (!pending_exact
+
+              /* If last exactn not at current position.  */
+              || pending_exact + *pending_exact + 1 != b
+
+              /* We have only one byte following the exactn for the count.  */
+          || *pending_exact == (1 << BYTEWIDTH) - 1
+
+              /* If followed by a repetition operator.  */
+              || *p == '*' || *p == '^'
+          || ((syntax & RE_BK_PLUS_QM)
+          ? *p == '\\' && (p[1] == '+' || p[1] == '?')
+          : (*p == '+' || *p == '?'))
+          || ((syntax & RE_INTERVALS)
+                  && ((syntax & RE_NO_BK_BRACES)
+              ? *p == '{'
+                      : (p[0] == '\\' && p[1] == '{'))))
+        {
+          /* Start building a new exactn.  */
+
+              laststart = b;
+
+          BUF_PUSH_2 (exactn, 0);
+          pending_exact = b - 1;
+            }
+
+      BUF_PUSH (c);
+          (*pending_exact)++;
+      break;
+        } /* switch (c) */
+    } /* while p != pend */
+
+
+  /* Through the pattern now.  */
+
+  if (fixup_alt_jump)
+    STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+  if (!COMPILE_STACK_EMPTY)
+    return REG_EPAREN;
+
+  free (compile_stack.stack);
+
+  /* We have succeeded; set the length of the buffer.  */
+  bufp->used = b - bufp->buffer;
+
+#ifdef DEBUG
+  if (debug)
+    {
+      DEBUG_PRINT1 ("\nCompiled pattern: ");
+      print_compiled_pattern (bufp);
+    }
+#endif /* DEBUG */
+
+  return REG_NOERROR;
+} /* regex_compile */
+
+/* Subroutines for `regex_compile'.  */
+
+/* Store OP at LOC followed by two-byte integer parameter ARG.  */
+
+static void
+store_op1 (op, loc, arg)
+    re_opcode_t op;
+    unsigned char *loc;
+    int arg;
+{
+  *loc = (unsigned char) op;
+  STORE_NUMBER (loc + 1, arg);
+}
+
+
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
+
+static void
+store_op2 (op, loc, arg1, arg2)
+    re_opcode_t op;
+    unsigned char *loc;
+    int arg1, arg2;
+{
+  *loc = (unsigned char) op;
+  STORE_NUMBER (loc + 1, arg1);
+  STORE_NUMBER (loc + 3, arg2);
+}
+
+
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+   for OP followed by two-byte integer parameter ARG.  */
+
+static void
+insert_op1 (op, loc, arg, end)
+    re_opcode_t op;
+    unsigned char *loc;
+    int arg;
+    unsigned char *end;
+{
+  register unsigned char *pfrom = end;
+  register unsigned char *pto = end + 3;
+
+  while (pfrom != loc)
+    *--pto = *--pfrom;
+
+  store_op1 (op, loc, arg);
+}
+
+
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
+
+static void
+insert_op2 (op, loc, arg1, arg2, end)
+    re_opcode_t op;
+    unsigned char *loc;
+    int arg1, arg2;
+    unsigned char *end;
+{
+  register unsigned char *pfrom = end;
+  register unsigned char *pto = end + 5;
+
+  while (pfrom != loc)
+    *--pto = *--pfrom;
+
+  store_op2 (op, loc, arg1, arg2);
+}
+
+
+/* P points to just after a ^ in PATTERN.  Return true if that ^ comes
+   after an alternative or a begin-subexpression.  We assume there is at
+   least one character before the ^.  */
+
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+    const char *pattern, *p;
+    reg_syntax_t syntax;
+{
+  const char *prev = p - 2;
+  boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+
+  return
+       /* After a subexpression?  */
+       (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+       /* After an alternative?  */
+    || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+}
+
+
+/* The dual of at_begline_loc_p.  This one is for $.  We assume there is
+   at least one character after the $, i.e., `P < PEND'.  */
+
+static boolean
+at_endline_loc_p (p, pend, syntax)
+    const char *p, *pend;
+    int syntax;
+{
+  const char *next = p;
+  boolean next_backslash = *next == '\\';
+  const char *next_next = p + 1 < pend ? p + 1 : NULL;
+
+  return
+       /* Before a subexpression?  */
+       (syntax & RE_NO_BK_PARENS ? *next == ')'
+        : next_backslash && next_next && *next_next == ')')
+       /* Before an alternative?  */
+    || (syntax & RE_NO_BK_VBAR ? *next == '|'
+        : next_backslash && next_next && *next_next == '|');
+}
+
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+   false if it's not.  */
+
+static boolean
+group_in_compile_stack (compile_stack, regnum)
+    compile_stack_type compile_stack;
+    regnum_t regnum;
+{
+  int this_element;
+
+  for (this_element = compile_stack.avail - 1;
+       this_element >= 0;
+       this_element--)
+    if (compile_stack.stack[this_element].regnum == regnum)
+      return true;
+
+  return false;
+}
+
+
+/* Read the ending character of a range (in a bracket expression) from the
+   uncompiled pattern *P_PTR (which ends at PEND).  We assume the
+   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
+   Then we set the translation of all bits between the starting and
+   ending characters (inclusive) in the compiled pattern B.
+
+   Return an error code.
+
+   We use these short variable names so we can use the same macros as
+   `regex_compile' itself.  */
+
+static reg_errcode_t
+compile_range (p_ptr, pend, translate, syntax, b)
+    const char **p_ptr, *pend;
+    char *translate;
+    reg_syntax_t syntax;
+    unsigned char *b;
+{
+  unsigned this_char;
+
+  const char *p = *p_ptr;
+  int range_start, range_end;
+
+  if (p == pend)
+    return REG_ERANGE;
+
+  /* Even though the pattern is a signed `char *', we need to fetch
+     with unsigned char *'s; if the high bit of the pattern character
+     is set, the range endpoints will be negative if we fetch using a
+     signed char *.
+
+     We also want to fetch the endpoints without translating them; the
+     appropriate translation is done in the bit-setting loop below.  */
+  range_start = ((unsigned char *) p)[-2];
+  range_end   = ((unsigned char *) p)[0];
+
+  /* Have to increment the pointer into the pattern string, so the
+     caller isn't still at the ending character.  */
+  (*p_ptr)++;
+
+  /* If the start is after the end, the range is empty.  */
+  if (range_start > range_end)
+    return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+  /* Here we see why `this_char' has to be larger than an `unsigned
+     char' -- the range is inclusive, so if `range_end' == 0xff
+     (assuming 8-bit characters), we would otherwise go into an infinite
+     loop, since all characters <= 0xff.  */
+  for (this_char = range_start; this_char <= range_end; this_char++)
+    {
+      SET_LIST_BIT (TRANSLATE (this_char));
+    }
+
+  return REG_NOERROR;
+}
+
+/* Failure stack declarations and macros; both re_compile_fastmap and
+   re_match_2 use a failure stack.  These have to be macros because of
+   REGEX_ALLOCATE.  */
+
+
+/* Number of failure points for which to initially allocate space
+   when matching.  If this number is exceeded, we allocate more
+   space, so it is not a hard limit.  */
+#ifndef INIT_FAILURE_ALLOC
+#define INIT_FAILURE_ALLOC 5
+#endif
+
+/* Roughly the maximum number of failure points on the stack.  Would be
+   exactly that if always used MAX_FAILURE_SPACE each time we failed.
+   This is a variable only so users of regex can assign to it; we never
+   change it ourselves.  */
+int re_max_failures = 2000;
+
+typedef const unsigned char *fail_stack_elt_t;
+
+typedef struct
+{
+  fail_stack_elt_t *stack;
+  unsigned size;
+  unsigned avail;			/* Offset of next open position.  */
+} fail_stack_type;
+
+#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
+#define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
+
+
+/* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
+
+#define INIT_FAIL_STACK()						\
+  do {									\
+    fail_stack.stack = (fail_stack_elt_t *)				\
+      REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
+                                    \
+    if (fail_stack.stack == NULL)					\
+      return -2;							\
+                                    \
+    fail_stack.size = INIT_FAILURE_ALLOC;				\
+    fail_stack.avail = 0;						\
+  } while (0)
+
+
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+
+   Return 1 if succeeds, and 0 if either ran out of memory
+   allocating space for it or it was already too large.
+
+   REGEX_REALLOCATE requires `destination' be declared.   */
+
+#define DOUBLE_FAIL_STACK(fail_stack)					\
+  ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
+   ? 0									\
+   : ((fail_stack).stack = (fail_stack_elt_t *)				\
+        REGEX_REALLOCATE ((fail_stack).stack, 				\
+          (fail_stack).size * sizeof (fail_stack_elt_t),		\
+          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
+                                    \
+      (fail_stack).stack == NULL					\
+      ? 0								\
+      : ((fail_stack).size <<= 1, 					\
+         1)))
+
+
+/* Push PATTERN_OP on FAIL_STACK.
+
+   Return 1 if was able to do so and 0 if ran out of memory allocating
+   space to do so.  */
+#define PUSH_PATTERN_OP(pattern_op, fail_stack)				\
+  ((FAIL_STACK_FULL ()							\
+    && !DOUBLE_FAIL_STACK (fail_stack))					\
+    ? 0									\
+    : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,		\
+       1))
+
+/* This pushes an item onto the failure stack.  Must be a four-byte
+   value.  Assumes the variable `fail_stack'.  Probably should only
+   be called from within `PUSH_FAILURE_POINT'.  */
+#define PUSH_FAILURE_ITEM(item)						\
+  fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
+
+/* The complement operation.  Assumes `fail_stack' is nonempty.  */
+#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging.  */
+#ifdef DEBUG
+#define DEBUG_PUSH PUSH_FAILURE_ITEM
+#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
+#else
+#define DEBUG_PUSH(item)
+#define DEBUG_POP(item_addr)
+#endif
+
+
+/* Push the information about the state we will need
+   if we ever fail back to it.
+
+   Requires variables fail_stack, regstart, regend, reg_info, and
+   num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
+   declared.
+
+   Does `return FAILURE_CODE' if runs out of memory.  */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
+  do {									\
+    char *destination;							\
+    /* Must be int, so when we don't save any registers, the arithmetic	\
+       of 0 + -1 isn't done as unsigned.  */				\
+    int this_reg;							\
+                                        \
+    DEBUG_STATEMENT (failure_id++);					\
+    DEBUG_STATEMENT (nfailure_points_pushed++);				\
+    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
+    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
+    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
+                                    \
+    DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
+    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
+                                    \
+    /* Ensure we have enough space allocated for what we will push.  */	\
+    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
+      {									\
+        if (!DOUBLE_FAIL_STACK (fail_stack))			\
+          return failure_code;						\
+                                    \
+        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
+               (fail_stack).size);				\
+        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+      }									\
+                                    \
+    /* Push the info, starting with the registers.  */			\
+    DEBUG_PRINT1 ("\n");						\
+                                    \
+    for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;	\
+         this_reg++)							\
+      {									\
+    DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);			\
+        DEBUG_STATEMENT (num_regs_pushed++);				\
+                                    \
+    DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);		\
+        PUSH_FAILURE_ITEM (regstart[this_reg]);				\
+                                                                        \
+    DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
+        PUSH_FAILURE_ITEM (regend[this_reg]);				\
+                                    \
+    DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
+        DEBUG_PRINT2 (" match_null=%d",					\
+                      REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
+        DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
+        DEBUG_PRINT2 (" matched_something=%d",				\
+                      MATCHED_SOMETHING (reg_info[this_reg]));		\
+        DEBUG_PRINT2 (" ever_matched=%d",				\
+                      EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
+    DEBUG_PRINT1 ("\n");						\
+        PUSH_FAILURE_ITEM (reg_info[this_reg].word);			\
+      }									\
+                                    \
+    DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
+    PUSH_FAILURE_ITEM (lowest_active_reg);				\
+                                    \
+    DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
+    PUSH_FAILURE_ITEM (highest_active_reg);				\
+                                    \
+    DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
+    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
+    PUSH_FAILURE_ITEM (pattern_place);					\
+                                    \
+    DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
+    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
+                 size2);				\
+    DEBUG_PRINT1 ("'\n");						\
+    PUSH_FAILURE_ITEM (string_place);					\
+                                    \
+    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
+    DEBUG_PUSH (failure_id);						\
+  } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+   for each register.  */
+#define NUM_REG_ITEMS  3
+
+/* Individual items aside from the registers.  */
+#ifdef DEBUG
+#define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
+#else
+#define NUM_NONREG_ITEMS 4
+#endif
+
+/* We push at most this many items on the stack.  */
+#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items.  */
+#define NUM_FAILURE_ITEMS						\
+  ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
+    + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it.  */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+   We restore into the parameters, all of which should be lvalues:
+     STR -- the saved data position.
+     PAT -- the saved pattern position.
+     LOW_REG, HIGH_REG -- the highest and lowest active registers.
+     REGSTART, REGEND -- arrays of string positions.
+     REG_INFO -- array of information about each subexpression.
+
+   Also assumes the variables `fail_stack' and (if debugging), `bufp',
+   `pend', `string1', `size1', `string2', and `size2'.  */
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{									\
+  DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
+  int this_reg;								\
+  const unsigned char *string_temp;					\
+                                    \
+  assert (!FAIL_STACK_EMPTY ());					\
+                                    \
+  /* Remove failure points and point to how many regs pushed.  */	\
+  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
+  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
+  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
+                                    \
+  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
+                                    \
+  DEBUG_POP (&failure_id);						\
+  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
+                                    \
+  /* If the saved string location is NULL, it came from an		\
+     on_failure_keep_string_jump opcode, and we want to throw away the	\
+     saved NULL, thus retaining our current position in the string.  */	\
+  string_temp = POP_FAILURE_ITEM ();					\
+  if (string_temp != NULL)						\
+    str = (const char *) string_temp;					\
+                                    \
+  DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
+  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
+  DEBUG_PRINT1 ("'\n");							\
+                                    \
+  pat = (unsigned char *) POP_FAILURE_ITEM ();				\
+  DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
+  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
+                                    \
+  /* Restore register info.  */						\
+  high_reg = (unsigned) POP_FAILURE_ITEM ();				\
+  DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
+                                    \
+  low_reg = (unsigned) POP_FAILURE_ITEM ();				\
+  DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
+                                    \
+  for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
+    {									\
+      DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);			\
+                                    \
+      reg_info[this_reg].word = POP_FAILURE_ITEM ();			\
+      DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);		\
+                                    \
+      regend[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
+      DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
+                                    \
+      regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
+      DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);		\
+    }									\
+                                    \
+  DEBUG_STATEMENT (nfailure_points_popped++);				\
+} /* POP_FAILURE_POINT */
+
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
+   characters can start a string that matches the pattern.  This fastmap
+   is used by re_search to skip quickly over impossible starting points.
+
+   The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+   area as BUFP->fastmap.
+
+   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+   the pattern buffer.
+
+   Returns 0 if we succeed, -2 if an internal error.   */
+
+int
+re_compile_fastmap (bufp)
+     struct re_pattern_buffer *bufp;
+{
+  int j, k;
+  fail_stack_type fail_stack;
+#ifndef REGEX_MALLOC
+  char *destination;
+#endif
+  /* We don't push any register information onto the failure stack.  */
+  unsigned num_regs = 0;
+
+  register char *fastmap = bufp->fastmap;
+  unsigned char *pattern = bufp->buffer;
+  unsigned long size = bufp->used;
+  const unsigned char *p = pattern;
+  register unsigned char *pend = pattern + size;
+
+  /* Assume that each path through the pattern can be null until
+     proven otherwise.  We set this false at the bottom of switch
+     statement, to which we get only if a particular path doesn't
+     match the empty string.  */
+  boolean path_can_be_null = true;
+
+  /* We aren't doing a `succeed_n' to begin with.  */
+  boolean succeed_n_p = false;
+
+  assert (fastmap != NULL && p != NULL);
+
+  INIT_FAIL_STACK ();
+  bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
+  bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
+  bufp->can_be_null = 0;
+
+  while (p != pend || !FAIL_STACK_EMPTY ())
+    {
+      if (p == pend)
+        {
+          bufp->can_be_null |= path_can_be_null;
+
+          /* Reset for next path.  */
+          path_can_be_null = true;
+
+          p = fail_stack.stack[--fail_stack.avail];
+    }
+
+      /* We should never be about to go beyond the end of the pattern.  */
+      assert (p < pend);
+
+#ifdef SWITCH_ENUM_BUG
+      switch ((int) ((re_opcode_t) *p++))
+#else
+      switch ((re_opcode_t) *p++)
+#endif
+    {
+
+        /* I guess the idea here is to simply not bother with a fastmap
+           if a backreference is used, since it's too hard to figure out
+           the fastmap for the corresponding group.  Setting
+           `can_be_null' stops `re_search_2' from using the fastmap, so
+           that is all we do.  */
+    case duplicate:
+      bufp->can_be_null = 1;
+          return 0;
+
+
+      /* Following are the cases which match a character.  These end
+         with `break'.  */
+
+    case exactn:
+          fastmap[p[1]] = 1;
+      break;
+
+
+        case charset:
+          for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+        if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+              fastmap[j] = 1;
+      break;
+
+
+    case charset_not:
+      /* Chars beyond end of map must be allowed.  */
+      for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+            fastmap[j] = 1;
+
+      for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+        if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+              fastmap[j] = 1;
+          break;
+
+
+    case wordchar:
+      for (j = 0; j < (1 << BYTEWIDTH); j++)
+        if (SYNTAX (j) == Sword)
+          fastmap[j] = 1;
+      break;
+
+
+    case notwordchar:
+      for (j = 0; j < (1 << BYTEWIDTH); j++)
+        if (SYNTAX (j) != Sword)
+          fastmap[j] = 1;
+      break;
+
+
+        case anychar:
+          /* `.' matches anything ...  */
+      for (j = 0; j < (1 << BYTEWIDTH); j++)
+            fastmap[j] = 1;
+
+          /* ... except perhaps newline.  */
+          if (!(bufp->syntax & RE_DOT_NEWLINE))
+            fastmap['\n'] = 0;
+
+          /* Return if we have already set `can_be_null'; if we have,
+             then the fastmap is irrelevant.  Something's wrong here.  */
+      else if (bufp->can_be_null)
+        return 0;
+
+          /* Otherwise, have to check alternative paths.  */
+      break;
+
+
+#ifdef emacs
+        case syntaxspec:
+      k = *p++;
+      for (j = 0; j < (1 << BYTEWIDTH); j++)
+        if (SYNTAX (j) == (enum syntaxcode) k)
+          fastmap[j] = 1;
+      break;
+
+
+    case notsyntaxspec:
+      k = *p++;
+      for (j = 0; j < (1 << BYTEWIDTH); j++)
+        if (SYNTAX (j) != (enum syntaxcode) k)
+          fastmap[j] = 1;
+      break;
+
+
+      /* All cases after this match the empty string.  These end with
+         `continue'.  */
+
+
+    case before_dot:
+    case at_dot:
+    case after_dot:
+          continue;
+#endif /* not emacs */
+
+
+        case no_op:
+        case begline:
+        case endline:
+    case begbuf:
+    case endbuf:
+    case wordbound:
+    case notwordbound:
+    case wordbeg:
+    case wordend:
+        case push_dummy_failure:
+          continue;
+
+
+    case jump_n:
+        case pop_failure_jump:
+    case maybe_pop_jump:
+    case jump:
+        case jump_past_alt:
+    case dummy_failure_jump:
+          EXTRACT_NUMBER_AND_INCR (j, p);
+      p += j;
+      if (j > 0)
+        continue;
+
+          /* Jump backward implies we just went through the body of a
+             loop and matched nothing.  Opcode jumped to should be
+             `on_failure_jump' or `succeed_n'.  Just treat it like an
+             ordinary jump.  For a * loop, it has pushed its failure
+             point already; if so, discard that as redundant.  */
+          if ((re_opcode_t) *p != on_failure_jump
+          && (re_opcode_t) *p != succeed_n)
+        continue;
+
+          p++;
+          EXTRACT_NUMBER_AND_INCR (j, p);
+          p += j;
+
+          /* If what's on the stack is where we are now, pop it.  */
+          if (!FAIL_STACK_EMPTY ()
+          && fail_stack.stack[fail_stack.avail - 1] == p)
+            fail_stack.avail--;
+
+          continue;
+
+
+        case on_failure_jump:
+        case on_failure_keep_string_jump:
+    handle_on_failure_jump:
+          EXTRACT_NUMBER_AND_INCR (j, p);
+
+          /* For some patterns, e.g., `(a?)?', `p+j' here points to the
+             end of the pattern.  We don't want to push such a point,
+             since when we restore it above, entering the switch will
+             increment `p' past the end of the pattern.  We don't need
+             to push such a point since we obviously won't find any more
+             fastmap entries beyond `pend'.  Such a pattern can match
+             the null string, though.  */
+          if (p + j < pend)
+            {
+              if (!PUSH_PATTERN_OP (p + j, fail_stack))
+                return -2;
+            }
+          else
+            bufp->can_be_null = 1;
+
+          if (succeed_n_p)
+            {
+              EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
+              succeed_n_p = false;
+        }
+
+          continue;
+
+
+    case succeed_n:
+          /* Get to the number of times to succeed.  */
+          p += 2;
+
+          /* Increment p past the n for when k != 0.  */
+          EXTRACT_NUMBER_AND_INCR (k, p);
+          if (k == 0)
+        {
+              p -= 4;
+          succeed_n_p = true;  /* Spaghetti code alert.  */
+              goto handle_on_failure_jump;
+            }
+          continue;
+
+
+    case set_number_at:
+          p += 4;
+          continue;
+
+
+    case start_memory:
+        case stop_memory:
+      p += 2;
+      continue;
+
+
+    default:
+          abort (); /* We have listed all the cases.  */
+        } /* switch *p++ */
+
+      /* Getting here means we have found the possible starting
+         characters for one path of the pattern -- and that the empty
+         string does not match.  We need not follow this path further.
+         Instead, look at the next alternative (remembered on the
+         stack), or quit if no more.  The test at the top of the loop
+         does these things.  */
+      path_can_be_null = false;
+      p = pend;
+    } /* while p */
+
+  /* Set `can_be_null' for the last path (also the first path, if the
+     pattern is empty).  */
+  bufp->can_be_null |= path_can_be_null;
+  return 0;
+} /* re_compile_fastmap */
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
+   this memory for recording register information.  STARTS and ENDS
+   must be allocated using the malloc library routine, and must each
+   be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+    struct re_pattern_buffer *bufp;
+    struct re_registers *regs;
+    unsigned num_regs;
+    regoff_t *starts, *ends;
+{
+  if (num_regs)
+    {
+      bufp->regs_allocated = REGS_REALLOCATE;
+      regs->num_regs = num_regs;
+      regs->start = starts;
+      regs->end = ends;
+    }
+  else
+    {
+      bufp->regs_allocated = REGS_UNALLOCATED;
+      regs->num_regs = 0;
+      regs->start = regs->end = (regoff_t) 0;
+    }
+}
+
+/* Searching routines.  */
+
+/* Like re_search_2, below, but only one string is specified, and
+   doesn't let you say where to stop matching. */
+
+int
+re_search (bufp, string, size, startpos, range, regs)
+     struct re_pattern_buffer *bufp;
+     const char *string;
+     int size, startpos, range;
+     struct re_registers *regs;
+{
+  return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
+              regs, size);
+}
+
+
+/* Using the compiled pattern in BUFP->buffer, first tries to match the
+   virtual concatenation of STRING1 and STRING2, starting first at index
+   STARTPOS, then at STARTPOS + 1, and so on.
+
+   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+
+   RANGE is how far to scan while trying to match.  RANGE = 0 means try
+   only at STARTPOS; in general, the last start tried is STARTPOS +
+   RANGE.
+
+   In REGS, return the indices of the virtual concatenation of STRING1
+   and STRING2 that matched the entire BUFP->buffer and its contained
+   subexpressions.
+
+   Do not consider matching one past the index STOP in the virtual
+   concatenation of STRING1 and STRING2.
+
+   We return either the position in the strings at which the match was
+   found, -1 if no match, or -2 if error (such as failure
+   stack overflow).  */
+
+int
+re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
+     struct re_pattern_buffer *bufp;
+     const char *string1, *string2;
+     int size1, size2;
+     int startpos;
+     int range;
+     struct re_registers *regs;
+     int stop;
+{
+  int val;
+  register char *fastmap = bufp->fastmap;
+  register char *translate = bufp->translate;
+  int total_size = size1 + size2;
+  int endpos = startpos + range;
+
+  /* Check for out-of-range STARTPOS.  */
+  if (startpos < 0 || startpos > total_size)
+    return -1;
+
+  /* Fix up RANGE if it might eventually take us outside
+     the virtual concatenation of STRING1 and STRING2.  */
+  if (endpos < -1)
+    range = -1 - startpos;
+  else if (endpos > total_size)
+    range = total_size - startpos;
+
+  /* If the search isn't to be a backwards one, don't waste time in a
+     search for a pattern that must be anchored.  */
+  if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+    {
+      if (startpos > 0)
+    return -1;
+      else
+    range = 1;
+    }
+
+  /* Update the fastmap now if not correct already.  */
+  if (fastmap && !bufp->fastmap_accurate)
+    if (re_compile_fastmap (bufp) == -2)
+      return -2;
+
+  /* Loop through the string, looking for a place to start matching.  */
+  for (;;)
+    {
+      /* If a fastmap is supplied, skip quickly over characters that
+         cannot be the start of a match.  If the pattern can match the
+         null string, however, we don't need to skip characters; we want
+         the first null string.  */
+      if (fastmap && startpos < total_size && !bufp->can_be_null)
+    {
+      if (range > 0)	/* Searching forwards.  */
+        {
+          register const char *d;
+          register int lim = 0;
+          int irange = range;
+
+              if (startpos < size1 && startpos + range >= size1)
+                lim = range - (size1 - startpos);
+
+          d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
+
+              /* Written out as an if-else to avoid testing `translate'
+                 inside the loop.  */
+          if (translate)
+                while (range > lim
+                       && !fastmap[(unsigned char)
+                   translate[(unsigned char) *d++]])
+                  range--;
+          else
+                while (range > lim && !fastmap[(unsigned char) *d++])
+                  range--;
+
+          startpos += irange - range;
+        }
+      else				/* Searching backwards.  */
+        {
+          register char c = (size1 == 0 || startpos >= size1
+                                 ? string2[startpos - size1]
+                                 : string1[startpos]);
+
+          if (!fastmap[(unsigned char) TRANSLATE (c)])
+        goto advance;
+        }
+    }
+
+      /* If can't match the null string, and that's all we have left, fail.  */
+      if (range >= 0 && startpos == total_size && fastmap
+          && !bufp->can_be_null)
+    return -1;
+
+      val = re_match_2 (bufp, string1, size1, string2, size2,
+                    startpos, regs, stop);
+      if (val >= 0)
+    return startpos;
+
+      if (val == -2)
+    return -2;
+
+    advance:
+      if (!range)
+        break;
+      else if (range > 0)
+        {
+          range--;
+          startpos++;
+        }
+      else
+        {
+          range++;
+          startpos--;
+        }
+    }
+  return -1;
+} /* re_search_2 */
+
+/* Declarations and macros for re_match_2.  */
+
+static int bcmp_translate ();
+static boolean alt_match_null_string_p (),
+               common_op_match_null_string_p (),
+               group_match_null_string_p ();
+
+/* Structure for per-register (a.k.a. per-group) information.
+   This must not be longer than one word, because we push this value
+   onto the failure stack.  Other register information, such as the
+   starting and ending positions (which are addresses), and the list of
+   inner groups (which is a bits list) are maintained in separate
+   variables.
+
+   We are making a (strictly speaking) nonportable assumption here: that
+   the compiler will pack our bit fields into something that fits into
+   the type of `word', i.e., is something that fits into one item on the
+   failure stack.  */
+typedef union
+{
+  fail_stack_elt_t word;
+  struct
+  {
+      /* This field is one if this group can match the empty string,
+         zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
+#define MATCH_NULL_UNSET_VALUE 3
+    unsigned match_null_string_p : 2;
+    unsigned is_active : 1;
+    unsigned matched_something : 1;
+    unsigned ever_matched_something : 1;
+  } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R)  ((R).bits.is_active)
+#define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+   for the subexpressions which we are currently inside.  Also records
+   that those subexprs have matched.  */
+#define SET_REGS_MATCHED()						\
+  do									\
+    {									\
+      unsigned r;							\
+      for (r = lowest_active_reg; r <= highest_active_reg; r++)		\
+        {								\
+          MATCHED_SOMETHING (reg_info[r])				\
+            = EVER_MATCHED_SOMETHING (reg_info[r])			\
+            = 1;							\
+        }								\
+    }									\
+  while (0)
+
+
+/* This converts PTR, a pointer into one of the search strings `string1'
+   and `string2' into an offset from the beginning of that string.  */
+#define POINTER_TO_OFFSET(ptr)						\
+  (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
+
+/* Registers are set to a sentinel when they haven't yet matched.  */
+#define REG_UNSET_VALUE ((char *) -1)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+
+/* Macros for dealing with the split strings in re_match_2.  */
+
+#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
+
+/* Call before fetching a character with *d.  This switches over to
+   string2 if necessary.  */
+#define PREFETCH()							\
+  while (d == dend)						    	\
+    {									\
+      /* End of string2 => fail.  */					\
+      if (dend == end_match_2) 						\
+        goto fail;							\
+      /* End of string1 => advance to string2.  */ 			\
+      d = string2;						        \
+      dend = end_match_2;						\
+    }
+
+
+/* Test if at very beginning or at very end of the virtual concatenation
+   of `string1' and `string2'.  If only one string, it's `string2'.  */
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END(d) ((d) == end2)
+
+
+/* Test if D points to a character which is word-constituent.  We have
+   two special cases to check for: if past the end of string1, look at
+   the first character in string2; and if before the beginning of
+   string2, look at the last character in string1.  */
+#define WORDCHAR_P(d)							\
+  (SYNTAX ((d) == end1 ? *string2					\
+           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
+   == Sword)
+
+/* Test if the character before D and the one at D differ with respect
+   to being word-constituent.  */
+#define AT_WORD_BOUNDARY(d)						\
+  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
+   || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
+
+
+/* Free everything we malloc.  */
+#ifdef REGEX_MALLOC
+#define FREE_VAR(var) if (var) free (var); var = NULL
+#define FREE_VARIABLES()						\
+  do {									\
+    FREE_VAR (fail_stack.stack);					\
+    FREE_VAR (regstart);						\
+    FREE_VAR (regend);							\
+    FREE_VAR (old_regstart);						\
+    FREE_VAR (old_regend);						\
+    FREE_VAR (best_regstart);						\
+    FREE_VAR (best_regend);						\
+    FREE_VAR (reg_info);						\
+    FREE_VAR (reg_dummy);						\
+    FREE_VAR (reg_info_dummy);						\
+  } while (0)
+#else /* not REGEX_MALLOC */
+/* Some MIPS systems (at least) want this to free alloca'd storage.  */
+#define FREE_VARIABLES() alloca (0)
+#endif /* not REGEX_MALLOC */
+
+
+/* These values must meet several constraints.  They must not be valid
+   register values; since we have a limit of 255 registers (because
+   we use only one byte in the pattern for the register number), we can
+   use numbers larger than 255.  They must differ by 1, because of
+   NUM_FAILURE_ITEMS above.  And the value for the lowest register must
+   be larger than the value for the highest register, so we do not try
+   to actually save any registers when none are active.  */
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
+
+/* Matching routines.  */
+
+#ifndef emacs   /* Emacs never uses this.  */
+/* re_match is like re_match_2 except it takes only a single string.  */
+
+int
+re_match (bufp, string, size, pos, regs)
+     struct re_pattern_buffer *bufp;
+     const char *string;
+     int size, pos;
+     struct re_registers *regs;
+ {
+  return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
+}
+#endif /* not emacs */
+
+
+/* re_match_2 matches the compiled pattern in BUFP against the
+   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
+   and SIZE2, respectively).  We start matching at POS, and stop
+   matching at STOP.
+
+   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
+   store offsets for the substring each group matched in REGS.  See the
+   documentation for exactly how many groups we fill.
+
+   We return -1 if no match, -2 if an internal error (such as the
+   failure stack overflowing).  Otherwise, we return the length of the
+   matched substring.  */
+
+int
+re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+     struct re_pattern_buffer *bufp;
+     const char *string1, *string2;
+     int size1, size2;
+     int pos;
+     struct re_registers *regs;
+     int stop;
+{
+  /* General temporaries.  */
+  int mcnt;
+  unsigned char *p1;
+
+  /* Just past the end of the corresponding string.  */
+  const char *end1, *end2;
+
+  /* Pointers into string1 and string2, just past the last characters in
+     each to consider matching.  */
+  const char *end_match_1, *end_match_2;
+
+  /* Where we are in the data, and the end of the current string.  */
+  const char *d, *dend;
+
+  /* Where we are in the pattern, and the end of the pattern.  */
+  unsigned char *p = bufp->buffer;
+  register unsigned char *pend = p + bufp->used;
+
+  /* We use this to map every character in the string.  */
+  char *translate = bufp->translate;
+
+  /* Failure point stack.  Each place that can handle a failure further
+     down the line pushes a failure point on this stack.  It consists of
+     restart, regend, and reg_info for all registers corresponding to
+     the subexpressions we're currently inside, plus the number of such
+     registers, and, finally, two char *'s.  The first char * is where
+     to resume scanning the pattern; the second one is where to resume
+     scanning the strings.  If the latter is zero, the failure point is
+     a ``dummy''; if a failure happens and the failure point is a dummy,
+     it gets discarded and the next next one is tried.  */
+  fail_stack_type fail_stack;
+#ifdef DEBUG
+  static unsigned failure_id = 0;
+  unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+#endif
+
+  /* We fill all the registers internally, independent of what we
+     return, for use in backreferences.  The number here includes
+     an element for register zero.  */
+  unsigned num_regs = bufp->re_nsub + 1;
+
+  /* The currently active registers.  */
+  unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+  unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+
+  /* Information on the contents of registers. These are pointers into
+     the input strings; they record just what was matched (on this
+     attempt) by a subexpression part of the pattern, that is, the
+     regnum-th regstart pointer points to where in the pattern we began
+     matching and the regnum-th regend points to right after where we
+     stopped matching the regnum-th subexpression.  (The zeroth register
+     keeps track of what the whole pattern matches.)  */
+  const char **regstart, **regend;
+
+  /* If a group that's operated upon by a repetition operator fails to
+     match anything, then the register for its start will need to be
+     restored because it will have been set to wherever in the string we
+     are when we last see its open-group operator.  Similarly for a
+     register's end.  */
+  const char **old_regstart, **old_regend;
+
+  /* The is_active field of reg_info helps us keep track of which (possibly
+     nested) subexpressions we are currently in. The matched_something
+     field of reg_info[reg_num] helps us tell whether or not we have
+     matched any of the pattern so far this time through the reg_num-th
+     subexpression.  These two fields get reset each time through any
+     loop their register is in.  */
+  register_info_type *reg_info;
+
+  /* The following record the register info as found in the above
+     variables when we find a match better than any we've seen before.
+     This happens as we backtrack through the failure points, which in
+     turn happens only if we have not yet matched the entire string. */
+  unsigned best_regs_set = false;
+  const char **best_regstart, **best_regend;
+
+  /* Logically, this is `best_regend[0]'.  But we don't want to have to
+     allocate space for that if we're not allocating space for anything
+     else (see below).  Also, we never need info about register 0 for
+     any of the other register vectors, and it seems rather a kludge to
+     treat `best_regend' differently than the rest.  So we keep track of
+     the end of the best match so far in a separate variable.  We
+     initialize this to NULL so that when we backtrack the first time
+     and need to test it, it's not garbage.  */
+  const char *match_end = NULL;
+
+  /* Used when we pop values we don't care about.  */
+  const char **reg_dummy;
+  register_info_type *reg_info_dummy;
+
+#ifdef DEBUG
+  /* Counts the total number of registers pushed.  */
+  unsigned num_regs_pushed = 0;
+#endif
+
+  DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
+
+  INIT_FAIL_STACK ();
+
+  /* Do not bother to initialize all the register variables if there are
+     no groups in the pattern, as it takes a fair amount of time.  If
+     there are groups, we include space for register 0 (the whole
+     pattern), even though we never use it, since it simplifies the
+     array indexing.  We should fix this.  */
+  if (bufp->re_nsub)
+    {
+      regstart = REGEX_TALLOC (num_regs, const char *);
+      regend = REGEX_TALLOC (num_regs, const char *);
+      old_regstart = REGEX_TALLOC (num_regs, const char *);
+      old_regend = REGEX_TALLOC (num_regs, const char *);
+      best_regstart = REGEX_TALLOC (num_regs, const char *);
+      best_regend = REGEX_TALLOC (num_regs, const char *);
+      reg_info = REGEX_TALLOC (num_regs, register_info_type);
+      reg_dummy = REGEX_TALLOC (num_regs, const char *);
+      reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
+
+      if (!(regstart && regend && old_regstart && old_regend && reg_info
+            && best_regstart && best_regend && reg_dummy && reg_info_dummy))
+        {
+          FREE_VARIABLES ();
+          return -2;
+        }
+    }
+#ifdef REGEX_MALLOC
+  else
+    {
+      /* We must initialize all our variables to NULL, so that
+         `FREE_VARIABLES' doesn't try to free them.  */
+      regstart = regend = old_regstart = old_regend = best_regstart
+        = best_regend = reg_dummy = NULL;
+      reg_info = reg_info_dummy = (register_info_type *) NULL;
+    }
+#endif /* REGEX_MALLOC */
+
+  /* The starting position is bogus.  */
+  if (pos < 0 || pos > size1 + size2)
+    {
+      FREE_VARIABLES ();
+      return -1;
+    }
+
+  /* Initialize subexpression text positions to -1 to mark ones that no
+     start_memory/stop_memory has been seen for. Also initialize the
+     register information struct.  */
+  for (mcnt = 1; mcnt < num_regs; mcnt++)
+    {
+      regstart[mcnt] = regend[mcnt]
+        = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
+
+      REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+      IS_ACTIVE (reg_info[mcnt]) = 0;
+      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+      EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+    }
+
+  /* We move `string1' into `string2' if the latter's empty -- but not if
+     `string1' is null.  */
+  if (size2 == 0 && string1 != NULL)
+    {
+      string2 = string1;
+      size2 = size1;
+      string1 = 0;
+      size1 = 0;
+    }
+  end1 = string1 + size1;
+  end2 = string2 + size2;
+
+  /* Compute where to stop matching, within the two strings.  */
+  if (stop <= size1)
+    {
+      end_match_1 = string1 + stop;
+      end_match_2 = string2;
+    }
+  else
+    {
+      end_match_1 = end1;
+      end_match_2 = string2 + stop - size1;
+    }
+
+  /* `p' scans through the pattern as `d' scans through the data.
+     `dend' is the end of the input string that `d' points within.  `d'
+     is advanced into the following input string whenever necessary, but
+     this happens before fetching; therefore, at the beginning of the
+     loop, `d' can be pointing at the end of a string, but it cannot
+     equal `string2'.  */
+  if (size1 > 0 && pos <= size1)
+    {
+      d = string1 + pos;
+      dend = end_match_1;
+    }
+  else
+    {
+      d = string2 + pos - size1;
+      dend = end_match_2;
+    }
+
+  DEBUG_PRINT1 ("The compiled pattern is: ");
+  DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
+  DEBUG_PRINT1 ("The string to match is: `");
+  DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
+  DEBUG_PRINT1 ("'\n");
+
+  /* This loops over pattern commands.  It exits by returning from the
+     function if the match is complete, or it drops through if the match
+     fails at this starting point in the input data.  */
+  for (;;)
+    {
+      DEBUG_PRINT2 ("\n0x%x: ", p);
+
+      if (p == pend)
+    { /* End of pattern means we might have succeeded.  */
+          DEBUG_PRINT1 ("end of pattern ... ");
+
+      /* If we haven't matched the entire string, and we want the
+             longest match, try backtracking.  */
+          if (d != end_match_2)
+        {
+              DEBUG_PRINT1 ("backtracking.\n");
+
+              if (!FAIL_STACK_EMPTY ())
+                { /* More failure points to try.  */
+                  boolean same_str_p = (FIRST_STRING_P (match_end)
+                                == MATCHING_IN_FIRST_STRING);
+
+                  /* If exceeds best match so far, save it.  */
+                  if (!best_regs_set
+                      || (same_str_p && d > match_end)
+                      || (!same_str_p && !MATCHING_IN_FIRST_STRING))
+                    {
+                      best_regs_set = true;
+                      match_end = d;
+
+                      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
+
+                      for (mcnt = 1; mcnt < num_regs; mcnt++)
+                        {
+                          best_regstart[mcnt] = regstart[mcnt];
+                          best_regend[mcnt] = regend[mcnt];
+                        }
+                    }
+                  goto fail;
+                }
+
+              /* If no failure points, don't restore garbage.  */
+              else if (best_regs_set)
+                {
+            restore_best_regs:
+                  /* Restore best match.  It may happen that `dend ==
+                     end_match_1' while the restored d is in string2.
+                     For example, the pattern `x.*y.*z' against the
+                     strings `x-' and `y-z-', if the two strings are
+                     not consecutive in memory.  */
+                  DEBUG_PRINT1 ("Restoring best registers.\n");
+
+                  d = match_end;
+                  dend = ((d >= string1 && d <= end1)
+                   ? end_match_1 : end_match_2);
+
+          for (mcnt = 1; mcnt < num_regs; mcnt++)
+            {
+              regstart[mcnt] = best_regstart[mcnt];
+              regend[mcnt] = best_regend[mcnt];
+            }
+                }
+            } /* d != end_match_2 */
+
+          DEBUG_PRINT1 ("Accepting match.\n");
+
+          /* If caller wants register contents data back, do it.  */
+          if (regs && !bufp->no_sub)
+        {
+              /* Have the register data arrays been allocated?  */
+              if (bufp->regs_allocated == REGS_UNALLOCATED)
+                { /* No.  So allocate them with malloc.  We need one
+                     extra element beyond `num_regs' for the `-1' marker
+                     GNU code uses.  */
+                  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
+                  regs->start = TALLOC (regs->num_regs, regoff_t);
+                  regs->end = TALLOC (regs->num_regs, regoff_t);
+                  if (regs->start == NULL || regs->end == NULL)
+                    return -2;
+                  bufp->regs_allocated = REGS_REALLOCATE;
+                }
+              else if (bufp->regs_allocated == REGS_REALLOCATE)
+                { /* Yes.  If we need more elements than were already
+                     allocated, reallocate them.  If we need fewer, just
+                     leave it alone.  */
+                  if (regs->num_regs < num_regs + 1)
+                    {
+                      regs->num_regs = num_regs + 1;
+                      RETALLOC (regs->start, regs->num_regs, regoff_t);
+                      RETALLOC (regs->end, regs->num_regs, regoff_t);
+                      if (regs->start == NULL || regs->end == NULL)
+                        return -2;
+                    }
+                }
+              else
+                assert (bufp->regs_allocated == REGS_FIXED);
+
+              /* Convert the pointer data in `regstart' and `regend' to
+                 indices.  Register zero has to be set differently,
+                 since we haven't kept track of any info for it.  */
+              if (regs->num_regs > 0)
+                {
+                  regs->start[0] = pos;
+                  regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
+                      : d - string2 + size1);
+                }
+
+              /* Go through the first `min (num_regs, regs->num_regs)'
+                 registers, since that is all we initialized.  */
+          for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
+        {
+                  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
+                    regs->start[mcnt] = regs->end[mcnt] = -1;
+                  else
+                    {
+              regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
+                      regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
+                    }
+        }
+
+              /* If the regs structure we return has more elements than
+                 were in the pattern, set the extra elements to -1.  If
+                 we (re)allocated the registers, this is the case,
+                 because we always allocate enough to have at least one
+                 -1 at the end.  */
+              for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
+                regs->start[mcnt] = regs->end[mcnt] = -1;
+        } /* regs && !bufp->no_sub */
+
+          FREE_VARIABLES ();
+          DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
+                        nfailure_points_pushed, nfailure_points_popped,
+                        nfailure_points_pushed - nfailure_points_popped);
+          DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
+
+          mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+                ? string1
+                : string2 - size1);
+
+          DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+
+          return mcnt;
+        }
+
+      /* Otherwise match next pattern command.  */
+#ifdef SWITCH_ENUM_BUG
+      switch ((int) ((re_opcode_t) *p++))
+#else
+      switch ((re_opcode_t) *p++)
+#endif
+    {
+        /* Ignore these.  Used to ignore the n of succeed_n's which
+           currently have n == 0.  */
+        case no_op:
+          DEBUG_PRINT1 ("EXECUTING no_op.\n");
+          break;
+
+
+        /* Match the next n pattern characters exactly.  The following
+           byte in the pattern defines n, and the n bytes after that
+           are the characters to match.  */
+    case exactn:
+      mcnt = *p++;
+          DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
+
+          /* This is written out as an if-else so we don't waste time
+             testing `translate' inside the loop.  */
+          if (translate)
+        {
+          do
+        {
+          PREFETCH ();
+          if (translate[(unsigned char) *d++] != (char) *p++)
+                    goto fail;
+        }
+          while (--mcnt);
+        }
+      else
+        {
+          do
+        {
+          PREFETCH ();
+          if (*d++ != (char) *p++) goto fail;
+        }
+          while (--mcnt);
+        }
+      SET_REGS_MATCHED ();
+          break;
+
+
+        /* Match any character except possibly a newline or a null.  */
+    case anychar:
+          DEBUG_PRINT1 ("EXECUTING anychar.\n");
+
+          PREFETCH ();
+
+          if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
+              || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
+        goto fail;
+
+          SET_REGS_MATCHED ();
+          DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
+          d++;
+      break;
+
+
+    case charset:
+    case charset_not:
+      {
+        register unsigned char c;
+        boolean not = (re_opcode_t) *(p - 1) == charset_not;
+
+            DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+
+        PREFETCH ();
+        c = TRANSLATE (*d); /* The character to match.  */
+
+            /* Cast to `unsigned' instead of `unsigned char' in case the
+               bit list is a full 32 bytes long.  */
+        if (c < (unsigned) (*p * BYTEWIDTH)
+        && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+          not = !not;
+
+        p += 1 + *p;
+
+        if (!not) goto fail;
+
+        SET_REGS_MATCHED ();
+            d++;
+        break;
+      }
+
+
+        /* The beginning of a group is represented by start_memory.
+           The arguments are the register number in the next byte, and the
+           number of groups inner to this one in the next.  The text
+           matched within the group is recorded (in the internal
+           registers data structure) under the register number.  */
+        case start_memory:
+      DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
+
+          /* Find out if this group can match the empty string.  */
+      p1 = p;		/* To send to group_match_null_string_p.  */
+
+          if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
+            REG_MATCH_NULL_STRING_P (reg_info[*p])
+              = group_match_null_string_p (&p1, pend, reg_info);
+
+          /* Save the position in the string where we were the last time
+             we were at this open-group operator in case the group is
+             operated upon by a repetition operator, e.g., with `(a*)*b'
+             against `ab'; then we want to ignore where we are now in
+             the string in case this attempt to match fails.  */
+          old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+                             ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
+                             : regstart[*p];
+      DEBUG_PRINT2 ("  old_regstart: %d\n",
+             POINTER_TO_OFFSET (old_regstart[*p]));
+
+          regstart[*p] = d;
+      DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
+
+          IS_ACTIVE (reg_info[*p]) = 1;
+          MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+          /* This is the new highest active register.  */
+          highest_active_reg = *p;
+
+          /* If nothing was active before, this is the new lowest active
+             register.  */
+          if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+            lowest_active_reg = *p;
+
+          /* Move past the register number and inner group count.  */
+          p += 2;
+          break;
+
+
+        /* The stop_memory opcode represents the end of a group.  Its
+           arguments are the same as start_memory's: the register
+           number, and the number of inner groups.  */
+    case stop_memory:
+      DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
+
+          /* We need to save the string position the last time we were at
+             this close-group operator in case the group is operated
+             upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+             against `aba'; then we want to ignore where we are now in
+             the string in case this attempt to match fails.  */
+          old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+                           ? REG_UNSET (regend[*p]) ? d : regend[*p]
+               : regend[*p];
+      DEBUG_PRINT2 ("      old_regend: %d\n",
+             POINTER_TO_OFFSET (old_regend[*p]));
+
+          regend[*p] = d;
+      DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
+
+          /* This register isn't active anymore.  */
+          IS_ACTIVE (reg_info[*p]) = 0;
+
+          /* If this was the only register active, nothing is active
+             anymore.  */
+          if (lowest_active_reg == highest_active_reg)
+            {
+              lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+              highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+            }
+          else
+            { /* We must scan for the new highest active register, since
+                 it isn't necessarily one less than now: consider
+                 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
+                 new highest active register is 1.  */
+              unsigned char r = *p - 1;
+              while (r > 0 && !IS_ACTIVE (reg_info[r]))
+                r--;
+
+              /* If we end up at register zero, that means that we saved
+                 the registers as the result of an `on_failure_jump', not
+                 a `start_memory', and we jumped to past the innermost
+                 `stop_memory'.  For example, in ((.)*) we save
+                 registers 1 and 2 as a result of the *, but when we pop
+                 back to the second ), we are at the stop_memory 1.
+                 Thus, nothing is active.  */
+          if (r == 0)
+                {
+                  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+                  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+                }
+              else
+                highest_active_reg = r;
+            }
+
+          /* If just failed to match something this time around with a
+             group that's operated on by a repetition operator, try to
+             force exit from the ``loop'', and restore the register
+             information for this group that we had before trying this
+             last match.  */
+          if ((!MATCHED_SOMETHING (reg_info[*p])
+               || (re_opcode_t) p[-3] == start_memory)
+          && (p + 2) < pend)
+            {
+              boolean is_a_jump_n = false;
+
+              p1 = p + 2;
+              mcnt = 0;
+              switch ((re_opcode_t) *p1++)
+                {
+                  case jump_n:
+            is_a_jump_n = true;
+                  case pop_failure_jump:
+          case maybe_pop_jump:
+          case jump:
+          case dummy_failure_jump:
+                    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+            if (is_a_jump_n)
+              p1 += 2;
+                    break;
+
+                  default:
+                    /* do nothing */ ;
+                }
+          p1 += mcnt;
+
+              /* If the next operation is a jump backwards in the pattern
+             to an on_failure_jump right before the start_memory
+                 corresponding to this stop_memory, exit from the loop
+                 by forcing a failure after pushing on the stack the
+                 on_failure_jump's jump in the pattern, and d.  */
+              if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
+                  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
+        {
+                  /* If this group ever matched anything, then restore
+                     what its registers were before trying this last
+                     failed match, e.g., with `(a*)*b' against `ab' for
+                     regstart[1], and, e.g., with `((a*)*(b*)*)*'
+                     against `aba' for regend[3].
+
+                     Also restore the registers for inner groups for,
+                     e.g., `((a*)(b*))*' against `aba' (register 3 would
+                     otherwise get trashed).  */
+
+                  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
+            {
+              unsigned r;
+
+                      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+              /* Restore this and inner groups' (if any) registers.  */
+                      for (r = *p; r < *p + *(p + 1); r++)
+                        {
+                          regstart[r] = old_regstart[r];
+
+                          /* xx why this test?  */
+                          if ((int) old_regend[r] >= (int) regstart[r])
+                            regend[r] = old_regend[r];
+                        }
+                    }
+          p1++;
+                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
+
+                  goto fail;
+                }
+            }
+
+          /* Move past the register number and the inner group count.  */
+          p += 2;
+          break;
+
+
+    /* \<digit> has been turned into a `duplicate' command which is
+           followed by the numeric value of <digit> as the register number.  */
+        case duplicate:
+      {
+        register const char *d2, *dend2;
+        int regno = *p++;   /* Get which register to match against.  */
+        DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
+
+        /* Can't back reference a group which we've never matched.  */
+            if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
+              goto fail;
+
+            /* Where in input to try to start matching.  */
+            d2 = regstart[regno];
+
+            /* Where to stop matching; if both the place to start and
+               the place to stop matching are in the same string, then
+               set to the place to stop, otherwise, for now have to use
+               the end of the first string.  */
+
+            dend2 = ((FIRST_STRING_P (regstart[regno])
+              == FIRST_STRING_P (regend[regno]))
+             ? regend[regno] : end_match_1);
+        for (;;)
+          {
+        /* If necessary, advance to next segment in register
+                   contents.  */
+        while (d2 == dend2)
+          {
+            if (dend2 == end_match_2) break;
+            if (dend2 == regend[regno]) break;
+
+                    /* End of string1 => advance to string2. */
+                    d2 = string2;
+                    dend2 = regend[regno];
+          }
+        /* At end of register contents => success */
+        if (d2 == dend2) break;
+
+        /* If necessary, advance to next segment in data.  */
+        PREFETCH ();
+
+        /* How many characters left in this segment to match.  */
+        mcnt = dend - d;
+
+        /* Want how many consecutive characters we can match in
+                   one shot, so, if necessary, adjust the count.  */
+                if (mcnt > dend2 - d2)
+          mcnt = dend2 - d2;
+
+        /* Compare that many; failure if mismatch, else move
+                   past them.  */
+        if (translate
+                    ? bcmp_translate (d, d2, mcnt, translate)
+                    : bcmp (d, d2, mcnt))
+          goto fail;
+        d += mcnt, d2 += mcnt;
+          }
+      }
+      break;
+
+
+        /* begline matches the empty string at the beginning of the string
+           (unless `not_bol' is set in `bufp'), and, if
+           `newline_anchor' is set, after newlines.  */
+    case begline:
+          DEBUG_PRINT1 ("EXECUTING begline.\n");
+
+          if (AT_STRINGS_BEG (d))
+            {
+              if (!bufp->not_bol) break;
+            }
+          else if (d[-1] == '\n' && bufp->newline_anchor)
+            {
+              break;
+            }
+          /* In all other cases, we fail.  */
+          goto fail;
+
+
+        /* endline is the dual of begline.  */
+    case endline:
+          DEBUG_PRINT1 ("EXECUTING endline.\n");
+
+          if (AT_STRINGS_END (d))
+            {
+              if (!bufp->not_eol) break;
+            }
+
+          /* We have to ``prefetch'' the next character.  */
+          else if ((d == end1 ? *string2 : *d) == '\n'
+                   && bufp->newline_anchor)
+            {
+              break;
+            }
+          goto fail;
+
+
+    /* Match at the very beginning of the data.  */
+        case begbuf:
+          DEBUG_PRINT1 ("EXECUTING begbuf.\n");
+          if (AT_STRINGS_BEG (d))
+            break;
+          goto fail;
+
+
+    /* Match at the very end of the data.  */
+        case endbuf:
+          DEBUG_PRINT1 ("EXECUTING endbuf.\n");
+      if (AT_STRINGS_END (d))
+        break;
+          goto fail;
+
+
+        /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
+           pushes NULL as the value for the string on the stack.  Then
+           `pop_failure_point' will keep the current value for the
+           string, instead of restoring it.  To see why, consider
+           matching `foo\nbar' against `.*\n'.  The .* matches the foo;
+           then the . fails against the \n.  But the next thing we want
+           to do is match the \n against the \n; if we restored the
+           string value, we would be back at the foo.
+
+           Because this is used only in specific cases, we don't need to
+           check all the things that `on_failure_jump' does, to make
+           sure the right things get saved on the stack.  Hence we don't
+           share its code.  The only reason to push anything on the
+           stack at all is that otherwise we would have to change
+           `anychar's code to do something besides goto fail in this
+           case; that seems worse than this.  */
+        case on_failure_keep_string_jump:
+          DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
+
+          EXTRACT_NUMBER_AND_INCR (mcnt, p);
+          DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
+
+          PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
+          break;
+
+
+    /* Uses of on_failure_jump:
+
+           Each alternative starts with an on_failure_jump that points
+           to the beginning of the next alternative.  Each alternative
+           except the last ends with a jump that in effect jumps past
+           the rest of the alternatives.  (They really jump to the
+           ending jump of the following alternative, because tensioning
+           these jumps is a hassle.)
+
+           Repeats start with an on_failure_jump that points past both
+           the repetition text and either the following jump or
+           pop_failure_jump back to this on_failure_jump.  */
+    case on_failure_jump:
+        on_failure:
+          DEBUG_PRINT1 ("EXECUTING on_failure_jump");
+
+          EXTRACT_NUMBER_AND_INCR (mcnt, p);
+          DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
+
+          /* If this on_failure_jump comes right before a group (i.e.,
+             the original * applied to a group), save the information
+             for that group and all inner ones, so that if we fail back
+             to this point, the group's information will be correct.
+             For example, in \(a*\)*\1, we need the preceding group,
+             and in \(\(a*\)b*\)\2, we need the inner group.  */
+
+          /* We can't use `p' to check ahead because we push
+             a failure point to `p + mcnt' after we do this.  */
+          p1 = p;
+
+          /* We need to skip no_op's before we look for the
+             start_memory in case this on_failure_jump is happening as
+             the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
+             against aba.  */
+          while (p1 < pend && (re_opcode_t) *p1 == no_op)
+            p1++;
+
+          if (p1 < pend && (re_opcode_t) *p1 == start_memory)
+            {
+              /* We have a new highest active register now.  This will
+                 get reset at the start_memory we are about to get to,
+                 but we will have saved all the registers relevant to
+                 this repetition op, as described above.  */
+              highest_active_reg = *(p1 + 1) + *(p1 + 2);
+              if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+                lowest_active_reg = *(p1 + 1);
+            }
+
+          DEBUG_PRINT1 (":\n");
+          PUSH_FAILURE_POINT (p + mcnt, d, -2);
+          break;
+
+
+        /* A smart repeat ends with `maybe_pop_jump'.
+       We change it to either `pop_failure_jump' or `jump'.  */
+        case maybe_pop_jump:
+          EXTRACT_NUMBER_AND_INCR (mcnt, p);
+          DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
+          {
+        register unsigned char *p2 = p;
+
+            /* Compare the beginning of the repeat with what in the
+               pattern follows its end. If we can establish that there
+               is nothing that they would both match, i.e., that we
+               would have to backtrack because of (as in, e.g., `a*a')
+               then we can change to pop_failure_jump, because we'll
+               never have to backtrack.
+
+               This is not true in the case of alternatives: in
+               `(a|ab)*' we do need to backtrack to the `ab' alternative
+               (e.g., if the string was `ab').  But instead of trying to
+               detect that here, the alternative has put on a dummy
+               failure point which is what we will end up popping.  */
+
+        /* Skip over open/close-group commands.  */
+        while (p2 + 2 < pend
+           && ((re_opcode_t) *p2 == stop_memory
+               || (re_opcode_t) *p2 == start_memory))
+          p2 += 3;			/* Skip over args, too.  */
+
+            /* If we're at the end of the pattern, we can change.  */
+            if (p2 == pend)
+          {
+        /* Consider what happens when matching ":\(.*\)"
+           against ":/".  I don't really understand this code
+           yet.  */
+            p[-3] = (unsigned char) pop_failure_jump;
+                DEBUG_PRINT1
+                  ("  End of pattern: change to `pop_failure_jump'.\n");
+              }
+
+            else if ((re_opcode_t) *p2 == exactn
+             || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
+          {
+        register unsigned char c
+                  = *p2 == (unsigned char) endline ? '\n' : p2[2];
+        p1 = p + mcnt;
+
+                /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+                   to the `maybe_finalize_jump' of this case.  Examine what
+                   follows.  */
+                if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
+                  {
+            p[-3] = (unsigned char) pop_failure_jump;
+                    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
+                                  c, p1[5]);
+                  }
+
+        else if ((re_opcode_t) p1[3] == charset
+             || (re_opcode_t) p1[3] == charset_not)
+          {
+            int not = (re_opcode_t) p1[3] == charset_not;
+
+            if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+            && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+              not = !not;
+
+                    /* `not' is equal to 1 if c would match, which means
+                        that we can't change to pop_failure_jump.  */
+            if (!not)
+                      {
+                p[-3] = (unsigned char) pop_failure_jump;
+                        DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
+                      }
+          }
+          }
+      }
+      p -= 2;		/* Point at relative address again.  */
+      if ((re_opcode_t) p[-1] != pop_failure_jump)
+        {
+          p[-1] = (unsigned char) jump;
+              DEBUG_PRINT1 ("  Match => jump.\n");
+          goto unconditional_jump;
+        }
+        /* Note fall through.  */
+
+
+    /* The end of a simple repeat has a pop_failure_jump back to
+           its matching on_failure_jump, where the latter will push a
+           failure point.  The pop_failure_jump takes off failure
+           points put on by this pop_failure_jump's matching
+           on_failure_jump; we got through the pattern to here from the
+           matching on_failure_jump, so didn't fail.  */
+        case pop_failure_jump:
+          {
+            /* We need to pass separate storage for the lowest and
+               highest registers, even though we don't care about the
+               actual values.  Otherwise, we will restore only one
+               register from the stack, since lowest will == highest in
+               `pop_failure_point'.  */
+            unsigned dummy_low_reg, dummy_high_reg;
+            unsigned char *pdummy;
+            const char *sdummy;
+
+            DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
+            POP_FAILURE_POINT (sdummy, pdummy,
+                               dummy_low_reg, dummy_high_reg,
+                               reg_dummy, reg_dummy, reg_info_dummy);
+          }
+          /* Note fall through.  */
+
+
+        /* Unconditionally jump (without popping any failure points).  */
+        case jump:
+    unconditional_jump:
+      EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
+          DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
+      p += mcnt;				/* Do the jump.  */
+          DEBUG_PRINT2 ("(to 0x%x).\n", p);
+      break;
+
+
+        /* We need this opcode so we can detect where alternatives end
+           in `group_match_null_string_p' et al.  */
+        case jump_past_alt:
+          DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
+          goto unconditional_jump;
+
+
+        /* Normally, the on_failure_jump pushes a failure point, which
+           then gets popped at pop_failure_jump.  We will end up at
+           pop_failure_jump, also, and with a pattern of, say, `a+', we
+           are skipping over the on_failure_jump, so we have to push
+           something meaningless for pop_failure_jump to pop.  */
+        case dummy_failure_jump:
+          DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
+          /* It doesn't matter what we push for the string here.  What
+             the code at `fail' tests is the value for the pattern.  */
+          PUSH_FAILURE_POINT (0, 0, -2);
+          goto unconditional_jump;
+
+
+        /* At the end of an alternative, we need to push a dummy failure
+           point in case we are followed by a `pop_failure_jump', because
+           we don't want the failure point for the alternative to be
+           popped.  For example, matching `(a|ab)*' against `aab'
+           requires that we match the `ab' alternative.  */
+        case push_dummy_failure:
+          DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
+          /* See comments just above at `dummy_failure_jump' about the
+             two zeroes.  */
+          PUSH_FAILURE_POINT (0, 0, -2);
+          break;
+
+        /* Have to succeed matching what follows at least n times.
+           After that, handle like `on_failure_jump'.  */
+        case succeed_n:
+          EXTRACT_NUMBER (mcnt, p + 2);
+          DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
+
+          assert (mcnt >= 0);
+          /* Originally, this is how many times we HAVE to succeed.  */
+          if (mcnt > 0)
+            {
+               mcnt--;
+           p += 2;
+               STORE_NUMBER_AND_INCR (p, mcnt);
+               DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
+            }
+      else if (mcnt == 0)
+            {
+              DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
+          p[2] = (unsigned char) no_op;
+              p[3] = (unsigned char) no_op;
+              goto on_failure;
+            }
+          break;
+
+        case jump_n:
+          EXTRACT_NUMBER (mcnt, p + 2);
+          DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
+
+          /* Originally, this is how many times we CAN jump.  */
+          if (mcnt)
+            {
+               mcnt--;
+               STORE_NUMBER (p + 2, mcnt);
+           goto unconditional_jump;
+            }
+          /* If don't have to jump any more, skip over the rest of command.  */
+      else
+        p += 4;
+          break;
+
+    case set_number_at:
+      {
+            DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
+
+            EXTRACT_NUMBER_AND_INCR (mcnt, p);
+            p1 = p + mcnt;
+            EXTRACT_NUMBER_AND_INCR (mcnt, p);
+            DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
+        STORE_NUMBER (p1, mcnt);
+            break;
+          }
+
+        case wordbound:
+          DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+          if (AT_WORD_BOUNDARY (d))
+        break;
+          goto fail;
+
+    case notwordbound:
+          DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+      if (AT_WORD_BOUNDARY (d))
+        goto fail;
+          break;
+
+    case wordbeg:
+          DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+      if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
+        break;
+          goto fail;
+
+    case wordend:
+          DEBUG_PRINT1 ("EXECUTING wordend.\n");
+      if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
+              && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
+        break;
+          goto fail;
+
+#ifdef emacs
+#ifdef emacs19
+    case before_dot:
+          DEBUG_PRINT1 ("EXECUTING before_dot.\n");
+      if (PTR_CHAR_POS ((unsigned char *) d) >= point)
+        goto fail;
+      break;
+
+    case at_dot:
+          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+      if (PTR_CHAR_POS ((unsigned char *) d) != point)
+        goto fail;
+      break;
+
+    case after_dot:
+          DEBUG_PRINT1 ("EXECUTING after_dot.\n");
+          if (PTR_CHAR_POS ((unsigned char *) d) <= point)
+        goto fail;
+      break;
+#else /* not emacs19 */
+    case at_dot:
+          DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+      if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
+        goto fail;
+      break;
+#endif /* not emacs19 */
+
+    case syntaxspec:
+          DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
+      mcnt = *p++;
+      goto matchsyntax;
+
+        case wordchar:
+          DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
+      mcnt = (int) Sword;
+        matchsyntax:
+      PREFETCH ();
+      if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
+            goto fail;
+          SET_REGS_MATCHED ();
+      break;
+
+    case notsyntaxspec:
+          DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
+      mcnt = *p++;
+      goto matchnotsyntax;
+
+        case notwordchar:
+          DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
+      mcnt = (int) Sword;
+        matchnotsyntax:
+      PREFETCH ();
+      if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
+            goto fail;
+      SET_REGS_MATCHED ();
+          break;
+
+#else /* not emacs */
+    case wordchar:
+          DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+      PREFETCH ();
+          if (!WORDCHAR_P (d))
+            goto fail;
+      SET_REGS_MATCHED ();
+          d++;
+      break;
+
+    case notwordchar:
+          DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+      PREFETCH ();
+      if (WORDCHAR_P (d))
+            goto fail;
+          SET_REGS_MATCHED ();
+          d++;
+      break;
+#endif /* not emacs */
+
+        default:
+          abort ();
+    }
+      continue;  /* Successfully executed one pattern command; keep going.  */
+
+
+    /* We goto here if a matching operation fails. */
+    fail:
+      if (!FAIL_STACK_EMPTY ())
+    { /* A restart point is known.  Restore to that state.  */
+          DEBUG_PRINT1 ("\nFAIL:\n");
+          POP_FAILURE_POINT (d, p,
+                             lowest_active_reg, highest_active_reg,
+                             regstart, regend, reg_info);
+
+          /* If this failure point is a dummy, try the next one.  */
+          if (!p)
+        goto fail;
+
+          /* If we failed to the end of the pattern, don't examine *p.  */
+      assert (p <= pend);
+          if (p < pend)
+            {
+              boolean is_a_jump_n = false;
+
+              /* If failed to a backwards jump that's part of a repetition
+                 loop, need to pop this failure point and use the next one.  */
+              switch ((re_opcode_t) *p)
+                {
+                case jump_n:
+                  is_a_jump_n = true;
+                case maybe_pop_jump:
+                case pop_failure_jump:
+                case jump:
+                  p1 = p + 1;
+                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                  p1 += mcnt;
+
+                  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+                      || (!is_a_jump_n
+                          && (re_opcode_t) *p1 == on_failure_jump))
+                    goto fail;
+                  break;
+                default:
+                  /* do nothing */ ;
+                }
+            }
+
+          if (d >= string1 && d <= end1)
+        dend = end_match_1;
+        }
+      else
+        break;   /* Matching at this starting point really fails.  */
+    } /* for (;;) */
+
+  if (best_regs_set)
+    goto restore_best_regs;
+
+  FREE_VARIABLES ();
+
+  return -1;         			/* Failure to match.  */
+} /* re_match_2 */
+
+/* Subroutine definitions for re_match_2.  */
+
+
+/* We are passed P pointing to a register number after a start_memory.
+
+   Return true if the pattern up to the corresponding stop_memory can
+   match the empty string, and false otherwise.
+
+   If we find the matching stop_memory, sets P to point to one past its number.
+   Otherwise, sets P to an undefined byte less than or equal to END.
+
+   We don't handle duplicates properly (yet).  */
+
+static boolean
+group_match_null_string_p (p, end, reg_info)
+    unsigned char **p, *end;
+    register_info_type *reg_info;
+{
+  int mcnt;
+  /* Point to after the args to the start_memory.  */
+  unsigned char *p1 = *p + 2;
+
+  while (p1 < end)
+    {
+      /* Skip over opcodes that can match nothing, and return true or
+     false, as appropriate, when we get to one that can't, or to the
+         matching stop_memory.  */
+
+      switch ((re_opcode_t) *p1)
+        {
+        /* Could be either a loop or a series of alternatives.  */
+        case on_failure_jump:
+          p1++;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+          /* If the next operation is not a jump backwards in the
+         pattern.  */
+
+      if (mcnt >= 0)
+        {
+              /* Go through the on_failure_jumps of the alternatives,
+                 seeing if any of the alternatives cannot match nothing.
+                 The last alternative starts with only a jump,
+                 whereas the rest start with on_failure_jump and end
+                 with a jump, e.g., here is the pattern for `a|b|c':
+
+                 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+                 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+                 /exactn/1/c
+
+                 So, we have to first go through the first (n-1)
+                 alternatives and then deal with the last one separately.  */
+
+
+              /* Deal with the first (n-1) alternatives, which start
+                 with an on_failure_jump (see above) that jumps to right
+                 past a jump_past_alt.  */
+
+              while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+                {
+                  /* `mcnt' holds how many bytes long the alternative
+                     is, including the ending `jump_past_alt' and
+                     its number.  */
+
+                  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+                                      reg_info))
+                    return false;
+
+                  /* Move to right after this alternative, including the
+             jump_past_alt.  */
+                  p1 += mcnt;
+
+                  /* Break if it's the beginning of an n-th alternative
+                     that doesn't begin with an on_failure_jump.  */
+                  if ((re_opcode_t) *p1 != on_failure_jump)
+                    break;
+
+          /* Still have to check that it's not an n-th
+             alternative that starts with an on_failure_jump.  */
+          p1++;
+                  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+                  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+                    {
+              /* Get to the beginning of the n-th alternative.  */
+                      p1 -= 3;
+                      break;
+                    }
+                }
+
+              /* Deal with the last alternative: go back and get number
+                 of the `jump_past_alt' just before it.  `mcnt' contains
+                 the length of the alternative.  */
+              EXTRACT_NUMBER (mcnt, p1 - 2);
+
+              if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+                return false;
+
+              p1 += mcnt;	/* Get past the n-th alternative.  */
+            } /* if mcnt > 0 */
+          break;
+
+
+        case stop_memory:
+      assert (p1[1] == **p);
+          *p = p1 + 2;
+          return true;
+
+
+        default:
+          if (!common_op_match_null_string_p (&p1, end, reg_info))
+            return false;
+        }
+    } /* while p1 < end */
+
+  return false;
+} /* group_match_null_string_p */
+
+
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+   It expects P to be the first byte of a single alternative and END one
+   byte past the last. The alternative can contain groups.  */
+
+static boolean
+alt_match_null_string_p (p, end, reg_info)
+    unsigned char *p, *end;
+    register_info_type *reg_info;
+{
+  int mcnt;
+  unsigned char *p1 = p;
+
+  while (p1 < end)
+    {
+      /* Skip over opcodes that can match nothing, and break when we get
+         to one that can't.  */
+
+      switch ((re_opcode_t) *p1)
+        {
+    /* It's a loop.  */
+        case on_failure_jump:
+          p1++;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+          p1 += mcnt;
+          break;
+
+    default:
+          if (!common_op_match_null_string_p (&p1, end, reg_info))
+            return false;
+        }
+    }  /* while p1 < end */
+
+  return true;
+} /* alt_match_null_string_p */
+
+
+/* Deals with the ops common to group_match_null_string_p and
+   alt_match_null_string_p.
+
+   Sets P to one after the op and its arguments, if any.  */
+
+static boolean
+common_op_match_null_string_p (p, end, reg_info)
+    unsigned char **p, *end;
+    register_info_type *reg_info;
+{
+  int mcnt;
+  boolean ret;
+  int reg_no;
+  unsigned char *p1 = *p;
+
+  switch ((re_opcode_t) *p1++)
+    {
+    case no_op:
+    case begline:
+    case endline:
+    case begbuf:
+    case endbuf:
+    case wordbeg:
+    case wordend:
+    case wordbound:
+    case notwordbound:
+#ifdef emacs
+    case before_dot:
+    case at_dot:
+    case after_dot:
+#endif
+      break;
+
+    case start_memory:
+      reg_no = *p1;
+      assert (reg_no > 0 && reg_no <= MAX_REGNUM);
+      ret = group_match_null_string_p (&p1, end, reg_info);
+
+      /* Have to set this here in case we're checking a group which
+         contains a group and a back reference to it.  */
+
+      if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
+        REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+
+      if (!ret)
+        return false;
+      break;
+
+    /* If this is an optimized succeed_n for zero times, make the jump.  */
+    case jump:
+      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+      if (mcnt >= 0)
+        p1 += mcnt;
+      else
+        return false;
+      break;
+
+    case succeed_n:
+      /* Get to the number of times to succeed.  */
+      p1 += 2;
+      EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+      if (mcnt == 0)
+        {
+          p1 -= 4;
+          EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+          p1 += mcnt;
+        }
+      else
+        return false;
+      break;
+
+    case duplicate:
+      if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+        return false;
+      break;
+
+    case set_number_at:
+      p1 += 4;
+
+    default:
+      /* All other opcodes mean we cannot match the empty string.  */
+      return false;
+  }
+
+  *p = p1;
+  return true;
+} /* common_op_match_null_string_p */
+
+
+/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
+   bytes; nonzero otherwise.  */
+
+static int
+bcmp_translate (s1, s2, len, translate)
+     unsigned char *s1, *s2;
+     register int len;
+     char *translate;
+{
+  register unsigned char *p1 = s1, *p2 = s2;
+  while (len)
+    {
+      if (translate[*p1++] != translate[*p2++]) return 1;
+      len--;
+    }
+  return 0;
+}
+
+/* Entry points for GNU code.  */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+   compiles PATTERN (of length SIZE) and puts the result in BUFP.
+   Returns 0 if the pattern was valid, otherwise an error string.
+
+   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+   are set in BUFP on entry.
+
+   We call regex_compile to do the actual compilation.  */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+     const char *pattern;
+     int length;
+     struct re_pattern_buffer *bufp;
+{
+  reg_errcode_t ret;
+
+  /* GNU code is written to assume at least RE_NREGS registers will be set
+     (and at least one extra will be -1).  */
+  bufp->regs_allocated = REGS_UNALLOCATED;
+
+  /* And GNU code determines whether or not to get register information
+     by passing null for the REGS argument to re_match, etc., not by
+     setting no_sub.  */
+  bufp->no_sub = 0;
+
+  /* Match anchors at newline.  */
+  bufp->newline_anchor = 1;
+
+  ret = regex_compile (pattern, length, re_syntax_options, bufp);
+
+  return re_error_msg[(int) ret];
+}
+
+/* Entry points compatible with 4.2 BSD regex library.  We don't define
+   them if this is an Emacs or POSIX compilation.  */
+
+#if !defined (emacs) && !defined (_POSIX_SOURCE)
+
+/* BSD has one and only one pattern buffer.  */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+re_comp (s)
+    const char *s;
+{
+  reg_errcode_t ret;
+
+  if (!s)
+    {
+      if (!re_comp_buf.buffer)
+    return "No previous regular expression";
+      return 0;
+    }
+
+  if (!re_comp_buf.buffer)
+    {
+      re_comp_buf.buffer = (unsigned char *) malloc (200);
+      if (re_comp_buf.buffer == NULL)
+        return "Memory exhausted";
+      re_comp_buf.allocated = 200;
+
+      re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
+      if (re_comp_buf.fastmap == NULL)
+    return "Memory exhausted";
+    }
+
+  /* Since `re_exec' always passes NULL for the `regs' argument, we
+     don't need to initialize the pattern buffer fields which affect it.  */
+
+  /* Match anchors at newlines.  */
+  re_comp_buf.newline_anchor = 1;
+
+  ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+
+  /* Yes, we're discarding `const' here.  */
+  return (char *) re_error_msg[(int) ret];
+}
+
+
+int
+re_exec (s)
+    const char *s;
+{
+  const int len = strlen (s);
+  return
+    0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+}
+#endif /* not emacs and not _POSIX_SOURCE */
+
+/* POSIX.2 functions.  Don't define these for Emacs.  */
+
+#ifndef emacs
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+   PREG is a regex_t *.  We do not expect any fields to be initialized,
+   since POSIX says we shouldn't.  Thus, we set
+
+     `buffer' to the compiled pattern;
+     `used' to the length of the compiled pattern;
+     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+       REG_EXTENDED bit in CFLAGS is set; otherwise, to
+       RE_SYNTAX_POSIX_BASIC;
+     `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+     `fastmap' and `fastmap_accurate' to zero;
+     `re_nsub' to the number of subexpressions in PATTERN.
+
+   PATTERN is the address of the pattern string.
+
+   CFLAGS is a series of bits which affect compilation.
+
+     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+     use POSIX basic syntax.
+
+     If REG_NEWLINE is set, then . and [^...] don't match newline.
+     Also, regexec will try a match beginning after every newline.
+
+     If REG_ICASE is set, then we considers upper- and lowercase
+     versions of letters to be equivalent when matching.
+
+     If REG_NOSUB is set, then when PREG is passed to regexec, that
+     routine will report only success or failure, and nothing about the
+     registers.
+
+   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
+   the return codes and their meanings.)  */
+
+int
+regcomp (preg, pattern, cflags)
+    regex_t *preg;
+    const char *pattern;
+    int cflags;
+{
+  reg_errcode_t ret;
+  unsigned syntax
+    = (cflags & REG_EXTENDED) ?
+      RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+  /* regex_compile will allocate the space for the compiled pattern.  */
+  preg->buffer = 0;
+  preg->allocated = 0;
+
+  /* Don't bother to use a fastmap when searching.  This simplifies the
+     REG_NEWLINE case: if we used a fastmap, we'd have to put all the
+     characters after newlines into the fastmap.  This way, we just try
+     every character.  */
+  preg->fastmap = 0;
+
+  if (cflags & REG_ICASE)
+    {
+      unsigned i;
+
+      preg->translate = (char *) malloc (CHAR_SET_SIZE);
+      if (preg->translate == NULL)
+        return (int) REG_ESPACE;
+
+      /* Map uppercase characters to corresponding lowercase ones.  */
+      for (i = 0; i < CHAR_SET_SIZE; i++)
+        preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+    }
+  else
+    preg->translate = NULL;
+
+  /* If REG_NEWLINE is set, newlines are treated differently.  */
+  if (cflags & REG_NEWLINE)
+    { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
+      syntax &= ~RE_DOT_NEWLINE;
+      syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+      /* It also changes the matching behavior.  */
+      preg->newline_anchor = 1;
+    }
+  else
+    preg->newline_anchor = 0;
+
+  preg->no_sub = !!(cflags & REG_NOSUB);
+
+  /* POSIX says a null character in the pattern terminates it, so we
+     can use strlen here in compiling the pattern.  */
+  ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+
+  /* POSIX doesn't distinguish between an unmatched open-group and an
+     unmatched close-group: both are REG_EPAREN.  */
+  if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+  return (int) ret;
+}
+
+
+/* regexec searches for a given pattern, specified by PREG, in the
+   string STRING.
+
+   If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
+   least NMATCH elements, and we set them to the offsets of the
+   corresponding matched substrings.
+
+   EFLAGS specifies `execution flags' which affect matching: if
+   REG_NOTBOL is set, then ^ does not match at the beginning of the
+   string; if REG_NOTEOL is set, then $ does not match at the end.
+
+   We return 0 if we find a match and REG_NOMATCH if not.  */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+    const regex_t *preg;
+    const char *string;
+    size_t nmatch;
+    regmatch_t pmatch[];
+    int eflags;
+{
+  int ret;
+  struct re_registers regs;
+  regex_t private_preg;
+  int len = strlen (string);
+  boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+  private_preg = *preg;
+
+  private_preg.not_bol = !!(eflags & REG_NOTBOL);
+  private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+  /* The user has told us exactly how many registers to return
+     information about, via `nmatch'.  We have to pass that on to the
+     matching routines.  */
+  private_preg.regs_allocated = REGS_FIXED;
+
+  if (want_reg_info)
+    {
+      regs.num_regs = nmatch;
+      regs.start = TALLOC (nmatch, regoff_t);
+      regs.end = TALLOC (nmatch, regoff_t);
+      if (regs.start == NULL || regs.end == NULL)
+        return (int) REG_NOMATCH;
+    }
+
+  /* Perform the searching operation.  */
+  ret = re_search (&private_preg, string, len,
+                   /* start: */ 0, /* range: */ len,
+                   want_reg_info ? &regs : (struct re_registers *) 0);
+
+  /* Copy the register information to the POSIX structure.  */
+  if (want_reg_info)
+    {
+      if (ret >= 0)
+        {
+          unsigned r;
+
+          for (r = 0; r < nmatch; r++)
+            {
+              pmatch[r].rm_so = regs.start[r];
+              pmatch[r].rm_eo = regs.end[r];
+            }
+        }
+
+      /* If we needed the temporary register info, free the space now.  */
+      free (regs.start);
+      free (regs.end);
+    }
+
+  /* We want zero return to mean success, unlike `re_search'.  */
+  return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+   from either regcomp or regexec.   We don't use PREG here.  */
+
+size_t
+regerror (errcode_v, preg, errbuf, errbuf_size)
+    int errcode_v;
+    const regex_t *preg;
+    char *errbuf;
+    size_t errbuf_size;
+{
+  const char *msg;
+  size_t msg_size;
+
+  if (errcode_v < 0
+      || errcode_v >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
+    /* Only error codes returned by the rest of the code should be passed
+       to this routine.  If we are given anything else, or if other regex
+       code generates an invalid error code, then the program has a bug.
+       Dump core so we can fix it.  */
+    abort ();
+
+  msg = re_error_msg[errcode_v];
+
+  /* POSIX doesn't require that we do anything in this case, but why
+     not be nice.  */
+  if (! msg)
+    msg = "Success";
+
+  msg_size = strlen (msg) + 1; /* Includes the null.  */
+
+  if (errbuf_size != 0)
+    {
+      if (msg_size > errbuf_size)
+        {
+          strncpy (errbuf, msg, errbuf_size - 1);
+          errbuf[errbuf_size - 1] = 0;
+        }
+      else
+        strcpy (errbuf, msg);
+    }
+
+  return msg_size;
+}
+
+
+/* Free dynamically allocated space used by PREG.  */
+
+void
+regfree (preg)
+    regex_t *preg;
+{
+  if (preg->buffer != NULL)
+    free (preg->buffer);
+  preg->buffer = NULL;
+
+  preg->allocated = 0;
+  preg->used = 0;
+
+  if (preg->fastmap != NULL)
+    free (preg->fastmap);
+  preg->fastmap = NULL;
+  preg->fastmap_accurate = 0;
+
+  if (preg->translate != NULL)
+    free (preg->translate);
+  preg->translate = NULL;
+}
+
+#endif /* not emacs  */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
Index: branches/apertium-tagger/apertium2/apertium/win32/regex.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/win32/regex.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/win32/regex.h	(revision 69632)
@@ -0,0 +1,498 @@
+/* Definitions for data structures and routines for the regular
+   expression library, version 0.12.
+
+   Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__
+
+#ifdef __cplusplus
+  extern "C" {
+#endif
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+   <regex.h>.  */
+
+#ifdef VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+   should be there.  */
+#include <stddef.h>
+#endif
+
+
+/* The following bits are used to determine the regexp syntax we
+   recognize.  The set/not-set meanings are chosen so that Emacs syntax
+   remains the value 0.  The bits are given in alphabetical order, and
+   the definitions shifted by one from the previous bit; thus, when we
+   add or remove a bit, only one other definition need change.  */
+typedef unsigned reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+   If set, then such a \ quotes the following character.  */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+	 literals.
+   If set, then \+ and \? are operators and + and ? are literals.  */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported.  They are:
+	 [:alpha:], [:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
+	 [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+   If not set, then character classes are not supported.  */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+	 expressions, of course).
+   If this bit is not set, then it depends:
+		^  is an anchor if it is at the beginning of a regular
+		   expression or after an open-group or an alternation operator;
+		$  is an anchor if it is at the end of a regular expression, or
+		   before a close-group or an alternation operator.
+
+   This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+   POSIX draft 11.2 says that * etc. in leading positions is undefined.
+   We already implemented a previous draft which made those constructs
+   invalid, though, so we haven't changed the code back.  */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+	 regardless of where they are in the pattern.
+   If this bit is not set, then special characters are special only in
+	 some contexts; otherwise they are ordinary.  Specifically,
+	 * + ? and intervals are only special when not after the beginning,
+	 open-group, or alternation operator.  */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+	 immediately after an alternation or begin-group operator.  */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+   If not set, then it doesn't.  */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+   If not set, then it does.  */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+   If not set, they do.  */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+	 interval, depending on RE_NO_BK_BRACES.
+   If not set, \{, \}, {, and } are literals.  */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+   If not set, they are.  */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+   If not set, newline is literal.  */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+	 are literals.
+  If not set, then `\{...\}' defines an interval.  */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+   If not set, \(...\) defines a group, and ( and ) are literals.  */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+   If not set, then \<digit> is a back-reference.  */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+   If not set, then \| is an alternation operator, and | is literal.  */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+	 than the starting range point, as in [z-a], is invalid.
+   If not set, then when ending range point collates higher than the
+	 starting range point, the range is ignored.  */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+   If not set, then an unmatched ) is invalid.  */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+   some interfaces).  When a regexp is compiled, the syntax used is
+   stored in the pattern buffer, so changing this does not affect
+   already-compiled regexps.  */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+   (The [[[ comments delimit what gets put into the Texinfo file, so
+   don't delete them!)  */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK							\
+  (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL			\
+   | RE_NO_BK_PARENS            | RE_NO_BK_REFS				\
+   | RE_NO_BK_VBAR               | RE_NO_EMPTY_RANGES			\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+#define RE_SYNTAX_POSIX_AWK 						\
+  (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+#define RE_SYNTAX_GREP							\
+  (RE_BK_PLUS_QM              | RE_CHAR_CLASSES				\
+   | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS				\
+   | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP							\
+  (RE_CHAR_CLASSES        | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE			\
+   | RE_NEWLINE_ALT       | RE_NO_BK_PARENS				\
+   | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP						\
+  (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff.  */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax.  */
+#define _RE_SYNTAX_POSIX_COMMON						\
+  (RE_CHAR_CLASSES | RE_DOT_NEWLINE      | RE_DOT_NOT_NULL		\
+   | RE_INTERVALS  | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC						\
+  (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+   RE_LIMITED_OPS, i.e., \? \+ \| are not recognized.  Actually, this
+   isn't minimal, since other operators, such as \`, aren't disabled.  */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED					\
+  (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INDEP_OPS  | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS       | RE_NO_BK_VBAR				\
+   | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+   replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added.  */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED				\
+  (_RE_SYNTAX_POSIX_COMMON  | RE_CONTEXT_INDEP_ANCHORS			\
+   | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES				\
+   | RE_NO_BK_PARENS        | RE_NO_BK_REFS				\
+   | RE_NO_BK_VBAR	    | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow.  Some systems
+   (erroneously) define this in other header files, but we want our
+   value, so remove any previous define.  */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+#define RE_DUP_MAX ((1 << 15) - 1)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp').  */
+
+/* If this bit is set, then use extended regular expression syntax.
+   If not set, then use basic regular expression syntax.  */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+   If not set, then case is significant.  */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+	 characters in the string.
+   If not set, then anchors do match at newlines.  */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+   If not set, then returns differ between not matching and errors.  */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec).  */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+	 the beginning of the string (presumably because it's not the
+	 beginning of a line).
+   If not set, then the beginning-of-line operator does match the
+	 beginning of the string.  */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line.  */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+   `re_error_msg' table in regex.c.  */
+typedef enum
+{
+  REG_NOERROR = 0,	/* Success.  */
+  REG_NOMATCH,		/* Didn't find a match (for regexec).  */
+
+  /* POSIX regcomp return error codes.  (In the order listed in the
+	 standard.)  */
+  REG_BADPAT,		/* Invalid pattern.  */
+  REG_ECOLLATE,		/* Not implemented.  */
+  REG_ECTYPE,		/* Invalid character class name.  */
+  REG_EESCAPE,		/* Trailing backslash.  */
+  REG_ESUBREG,		/* Invalid back reference.  */
+  REG_EBRACK,		/* Unmatched left bracket.  */
+  REG_EPAREN,		/* Parenthesis imbalance.  */
+  REG_EBRACE,		/* Unmatched \{.  */
+  REG_BADBR,		/* Invalid contents of \{\}.  */
+  REG_ERANGE,		/* Invalid range end.  */
+  REG_ESPACE,		/* Ran out of memory.  */
+  REG_BADRPT,		/* No preceding re for repetition op.  */
+
+  /* Error codes we've added.  */
+  REG_EEND,		/* Premature end.  */
+  REG_ESIZE,		/* Compiled pattern bigger than 2^16 bytes.  */
+  REG_ERPAREN		/* Unmatched ) or \); not returned from regcomp.  */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern.  Before calling
+   the pattern compiler, the fields `buffer', `allocated', `fastmap',
+   `translate', and `no_sub' can be set.  After the pattern has been
+   compiled, the `re_nsub' field is available.  All other fields are
+   private to the regex routines.  */
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+	/* Space that holds the compiled pattern.  It is declared as
+		  `unsigned char *' because its elements are
+		   sometimes used as array indexes.  */
+  unsigned char *buffer;
+
+	/* Number of bytes to which `buffer' points.  */
+  unsigned long allocated;
+
+	/* Number of bytes actually used in `buffer'.  */
+  unsigned long used;
+
+		/* Syntax setting with which the pattern was compiled.  */
+  reg_syntax_t syntax;
+
+		/* Pointer to a fastmap, if any, otherwise zero.  re_search uses
+		   the fastmap, if there is one, to skip over impossible
+		   starting points for matches.  */
+  char *fastmap;
+
+		/* Either a translate table to apply to all characters before
+		   comparing them, or zero for no translation.  The translation
+		   is applied to a pattern when it is compiled and to a string
+		   when it is matched.  */
+  char *translate;
+
+	/* Number of subexpressions found by the compiler.  */
+  size_t re_nsub;
+
+		/* Zero if this pattern cannot match the empty string, one else.
+		   Well, in truth it's used only in `re_search_2', to see
+		   whether or not we should use the fastmap, so we don't set
+		   this absolutely perfectly; see `re_compile_fastmap' (the
+		   `duplicate' case).  */
+  unsigned can_be_null : 1;
+
+		/* If REGS_UNALLOCATED, allocate space in the `regs' structure
+			 for `max (RE_NREGS, re_nsub + 1)' groups.
+		   If REGS_REALLOCATE, reallocate space if necessary.
+		   If REGS_FIXED, use what's there.  */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+  unsigned regs_allocated : 2;
+
+		/* Set to zero when `regex_compile' compiles a pattern; set to one
+		   by `re_compile_fastmap' if it updates the fastmap.  */
+  unsigned fastmap_accurate : 1;
+
+		/* If set, `re_match_2' does not return information about
+		   subexpressions.  */
+  unsigned no_sub : 1;
+
+		/* If set, a beginning-of-line anchor doesn't match at the
+		   beginning of the string.  */
+  unsigned not_bol : 1;
+
+		/* Similarly for an end-of-line anchor.  */
+  unsigned not_eol : 1;
+
+		/* If true, an anchor at a newline matches.  */
+  unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+
+/* search.c (search_buffer) in Emacs needs this one opcode value.  It is
+   defined both in `regex.c' and here.  */
+#define RE_EXACTN_VALUE 1
+
+/* Type for byte offsets within the string.  POSIX mandates this.  */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in.  See
+   regex.texinfo for a full description of what registers match.  */
+struct re_registers
+{
+  unsigned num_regs;
+  regoff_t *start;
+  regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+   `re_match_2' returns information about at least this many registers
+   the first time a `regs' structure is passed.  */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers.  Aside from the different names than
+   `re_registers', POSIX uses an array of structures, instead of a
+   structure of arrays.  */
+typedef struct
+{
+  regoff_t rm_so;  /* Byte offset from string's start to substring's start.  */
+  regoff_t rm_eo;  /* Byte offset from string's start to substring's end.  */
+} regmatch_t;
+
+/* Declarations for routines.  */
+
+/* To avoid duplicating every routine declaration -- once with a
+   prototype (if we are ANSI), and once without (if we aren't) -- we
+   use the following macro to declare argument types.  This
+   unfortunately clutters up the declarations a bit, but I think it's
+   worth it.  */
+
+#if __STDC__
+
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+   You can also simply assign to the `re_syntax_options' variable.  */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+   and syntax given by the global `re_syntax_options', into the buffer
+   BUFFER.  Return NULL if successful, and an error string if not.  */
+extern const char *re_compile_pattern
+  _RE_ARGS ((const char *pattern, int length,
+			 struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+   accelerate searches.  Return 0 if successful and -2 if was an
+   internal error.  */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+   compiled into BUFFER.  Start searching at position START, for RANGE
+   characters.  Return the starting position of the match, -1 for no
+   match, or -2 for an internal error.  Also return register
+   information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
+extern int re_search
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+			int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+   STRING2.  Also, stop searching at index START + STOP.  */
+extern int re_search_2
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+			 int length1, const char *string2, int length2,
+			 int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+   in BUFFER matched, starting at position START.  */
+extern int re_match
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+			 int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'.  */
+extern int re_match_2
+  _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+			 int length1, const char *string2, int length2,
+			 int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+   ENDS.  Subsequent matches using BUFFER and REGS will use this memory
+   for recording register information.  STARTS and ENDS must be
+   allocated with malloc, and must each be at least `NUM_REGS * sizeof
+   (regoff_t)' bytes long.
+
+   If NUM_REGS == 0, then subsequent matches should allocate their own
+   register data.
+
+   Unless this function is called, the first search or match using
+   PATTERN_BUFFER will allocate its own register data, without
+   freeing the old data.  */
+extern void re_set_registers
+  _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+			 unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+/* 4.2 bsd compatibility.  */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+
+/* POSIX compatibility.  */
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+  _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+			 regmatch_t pmatch[], int eflags));
+extern size_t regerror
+  _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+			 size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));
+
+#ifdef __cplusplus
+  }
+#endif
+
+#endif /* not __REGEXP_LIBRARY_H__ */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
Index: branches/apertium-tagger/apertium2/apertium/win32/runner_skeleton.c
===================================================================
--- branches/apertium-tagger/apertium2/apertium/win32/runner_skeleton.c	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/win32/runner_skeleton.c	(revision 69632)
@@ -0,0 +1,77 @@
+#include <stdio.h>
+#include <conio.h>
+#include <process.h>
+#include <direct.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+
+#define PATH_BUF_SIZE 8191
+#define NUM_EXEC_ARGS 3
+#define ARG_BUF_SIZE (8191 - NUM_EXEC_ARGS)
+#define ENV_VAR_SIZE 32768
+
+/* Strip the last component off a pathname.
+   Thus, parent("a\b\c") -> "a\b" */
+char* parent(char* parent_buf) {
+        char* pos = strrchr(parent_buf, '\\');
+        pos[0] = '\0';
+
+        return parent_buf;
+}
+
+/* Remove the .exe if the user invoked this executable with its extension.
+   That is, if the user typed something like apertium.exe instead of apertium. */
+char* remove_extension(char* buf) {
+        char* pos = strrchr(buf, '.');
+
+        if (pos != NULL && strcmp(pos, ".exe") == 0) {
+                pos[0] = '\0';
+        }
+
+        return buf;
+}
+
+#define MIN(x, y) ((x) < (y) ? x : y)
+
+int main(int argc, char* argv[]) {
+        char *args[ARG_BUF_SIZE];
+        char base_path[PATH_BUF_SIZE + 1];
+        char script_path[PATH_BUF_SIZE + 1];
+        char shell_path[PATH_BUF_SIZE + 1];
+        char env_path[ENV_VAR_SIZE];
+        int argi;
+
+        _fullpath(shell_path, argv[0], PATH_BUF_SIZE);
+        strcpy(script_path, shell_path);
+        strcpy(base_path, shell_path);
+
+        parent(shell_path);
+        strcat(shell_path, "\\sh.exe");
+
+        remove_extension(script_path);
+        parent(base_path);
+
+        args[0] = shell_path;
+        args[1] = "--norc";
+        args[2] = script_path;
+
+        /* Any parameters passed on the command line will be passed through to the shell script */
+        for (argi = 0; argi < MIN(argc - 1, ARG_BUF_SIZE); argi++) {
+                printf("%s\n", argv[argi + 1]);
+                args[argi + NUM_EXEC_ARGS] = argv[argi + 1];
+        }
+        /* Signal the end of the argument list */
+        args[argi + NUM_EXEC_ARGS] = NULL;
+
+        /* Add this executable's directory to the path */
+        strcpy(env_path, "PATH=");
+        strcat(env_path, getenv("PATH"));
+        strcat(env_path, ";");
+        strcat(env_path, base_path);
+        _putenv(env_path);
+
+        _spawnv(_P_WAIT, args[0], &args[1]);
+
+        _flushall();
+}
Index: branches/apertium-tagger/apertium2/apertium/serialiser.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/serialiser.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/serialiser.h	(revision 69632)
@@ -0,0 +1,288 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef SERIALISER_H
+#define SERIALISER_H
+
+#include "a.h"
+#include "basic_exception_type.h"
+#include "analysis.h"
+#include "exception.h"
+#include "i.h"
+#include "lemma.h"
+#include "morpheme.h"
+#include "tag.h"
+
+#include <cstddef>
+#include <ios>
+#include <limits>
+#include <map>
+#include <ostream>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace Apertium {
+namespace {
+template <typename SerialisedType>
+static unsigned char compressedSize(const SerialisedType &SerialisedType_) {
+  unsigned char compressedSize_ = 0;
+
+  for (; (SerialisedType_ >>
+          std::numeric_limits<unsigned char>::digits * compressedSize_) != 0;
+       ++compressedSize_) {
+  }
+
+  return compressedSize_;
+}
+
+template <typename SerialisedType> class Serialiser;
+
+template <> class Serialiser<a> {
+public:
+  inline static void serialise(const a &SerialisedType_, std::ostream &Output);
+};
+
+template <> class Serialiser<Analysis> {
+public:
+  inline static void serialise(const Analysis &SerialisedType_,
+                               std::ostream &Output);
+};
+
+template <> class Serialiser<i> {
+public:
+  inline static void serialise(const i &SerialisedType_, std::ostream &Output);
+};
+
+template <> class Serialiser<Lemma> {
+public:
+  inline static void serialise(const Lemma &SerialisedType_,
+                               std::ostream &Output);
+};
+
+template <> class Serialiser<Morpheme> {
+public:
+  inline static void serialise(const Morpheme &SerialisedType_,
+                               std::ostream &Output);
+};
+
+template <> class Serialiser<Tag> {
+public:
+  inline static void serialise(const Tag &SerialisedType_,
+                               std::ostream &Output);
+};
+
+template <typename value_type>
+class Serialiser<std::basic_string<value_type> > {
+public:
+  inline static void
+  serialise(const std::basic_string<value_type> &SerialisedType_,
+            std::ostream &Output);
+};
+
+template <typename key_type, typename mapped_type>
+class Serialiser<std::map<key_type, mapped_type> > {
+public:
+  inline static void
+  serialise(const std::map<key_type, mapped_type> &SerialisedType_,
+            std::ostream &Output);
+};
+
+template <typename first_type, typename second_type>
+class Serialiser<std::pair<first_type, second_type> > {
+public:
+  inline static void
+  serialise(const std::pair<first_type, second_type> &SerialisedType_,
+            std::ostream &Output);
+};
+
+template <> class Serialiser<std::size_t> {
+public:
+  inline static void serialise(const std::size_t &SerialisedType_,
+                               std::ostream &Output);
+};
+
+template <typename value_type> class Serialiser<std::vector<value_type> > {
+public:
+  inline static void serialise(const std::vector<value_type> &SerialisedType_,
+                               std::ostream &Output);
+};
+
+template <> class Serialiser<wchar_t> {
+public:
+  inline static void serialise(const wchar_t &SerialisedType_,
+                               std::ostream &Output);
+};
+}
+
+template <typename SerialisedType>
+inline void serialise(const SerialisedType &SerialisedType_,
+                      std::ostream &Output) {
+  Serialiser<SerialisedType>::serialise(SerialisedType_, Output);
+}
+
+void Serialiser<a>::serialise(const a &SerialisedType_, std::ostream &Output) {
+  ::Apertium::serialise(SerialisedType_.TheTags, Output);
+  ::Apertium::serialise(SerialisedType_.TheMorphemes, Output);
+}
+
+void Serialiser<Analysis>::serialise(const Analysis &SerialisedType_,
+                                     std::ostream &Output) {
+  ::Apertium::serialise(SerialisedType_.TheMorphemes, Output);
+}
+
+void Serialiser<i>::serialise(const i &SerialisedType_, std::ostream &Output) {
+  ::Apertium::serialise(SerialisedType_.TheTags, Output);
+}
+
+void Serialiser<Lemma>::serialise(const Lemma &SerialisedType_,
+                                  std::ostream &Output) {
+  ::Apertium::serialise(SerialisedType_.TheLemma, Output);
+}
+
+void Serialiser<Morpheme>::serialise(const Morpheme &SerialisedType_,
+                                     std::ostream &Output) {
+  ::Apertium::serialise(SerialisedType_.TheLemma, Output);
+  ::Apertium::serialise(SerialisedType_.TheTags, Output);
+}
+
+void Serialiser<Tag>::serialise(const Tag &SerialisedType_,
+                                std::ostream &Output) {
+  ::Apertium::serialise(SerialisedType_.TheTag, Output);
+}
+
+template <typename value_type>
+void Serialiser<std::basic_string<value_type> >::serialise(
+    const std::basic_string<value_type> &SerialisedType_,
+    std::ostream &Output) {
+  ::Apertium::serialise(SerialisedType_.size(), Output);
+
+  for (typename std::basic_string<value_type>::const_iterator
+           SerialisedType_iterator = SerialisedType_.begin();
+       // Call .end() each iteration to save memory.
+       SerialisedType_iterator != SerialisedType_.end();
+       ++SerialisedType_iterator) {
+    ::Apertium::serialise(*SerialisedType_iterator, Output);
+  }
+}
+
+template <typename key_type, typename mapped_type>
+void Serialiser<std::map<key_type, mapped_type> >::serialise(
+    const std::map<key_type, mapped_type> &SerialisedType_,
+    std::ostream &Output) {
+  ::Apertium::serialise(SerialisedType_.size(), Output);
+
+  for (typename std::map<key_type, mapped_type>::const_iterator
+           SerialisedType_iterator = SerialisedType_.begin();
+       // Call .end() each iteration to save memory.
+       SerialisedType_iterator != SerialisedType_.end();
+       ++SerialisedType_iterator) {
+    ::Apertium::serialise(*SerialisedType_iterator, Output);
+  }
+}
+
+template <typename first_type, typename second_type>
+void Serialiser<std::pair<first_type, second_type> >::serialise(
+    const std::pair<first_type, second_type> &SerialisedType_,
+    std::ostream &Output) {
+  ::Apertium::serialise(SerialisedType_.first, Output);
+  ::Apertium::serialise(SerialisedType_.second, Output);
+}
+
+void Serialiser<std::size_t>::serialise(const std::size_t &SerialisedType_,
+                                        std::ostream &Output) {
+  try {
+    Output.put(compressedSize(SerialisedType_));
+
+    if (!Output) {
+      std::stringstream what_;
+      what_ << "can't serialise size " << std::hex
+            << /* [1] */ +compressedSize(SerialisedType_) << std::dec;
+      throw Exception::Serialiser::not_Stream_good(what_);
+    }
+
+    for (unsigned char CompressedSize = compressedSize(SerialisedType_);
+         CompressedSize != 0; Output.put(static_cast<unsigned char>(
+             SerialisedType_ >>
+             std::numeric_limits<unsigned char>::digits * --CompressedSize))) {
+      if (!Output) {
+        std::stringstream what_;
+        what_ << "can't serialise byte " << std::hex
+              << /* [1] */ +static_cast<unsigned char>(
+                     SerialisedType_ >>
+                     std::numeric_limits<unsigned char>::digits *
+                         CompressedSize) << std::dec;
+        throw Exception::Serialiser::not_Stream_good(what_);
+      }
+    }
+  } catch (const basic_ExceptionType &basic_ExceptionType_) {
+    std::stringstream what_;
+    what_ << "can't serialise const std::size_t & : "
+          << basic_ExceptionType_.what();
+    throw Exception::Serialiser::size_t_(what_);
+  }
+}
+
+template <typename value_type>
+void Serialiser<std::vector<value_type> >::serialise(
+    const std::vector<value_type> &SerialisedType_, std::ostream &Output) {
+  ::Apertium::serialise(SerialisedType_.size(), Output);
+
+  for (typename std::vector<value_type>::const_iterator value_type_ =
+           SerialisedType_.begin();
+       // Call .end() each iteration to save memory.
+       value_type_ != SerialisedType_.end(); ++value_type_) {
+    ::Apertium::serialise(*value_type_, Output);
+  }
+}
+
+void Serialiser<wchar_t>::serialise(const wchar_t &SerialisedType_,
+                                    std::ostream &Output) {
+  try {
+    Output.put(compressedSize(SerialisedType_));
+
+    if (!Output) {
+      std::stringstream what_;
+      what_ << "can't serialise size " << std::hex
+            << /* [1] */ +compressedSize(SerialisedType_);
+      throw Exception::Serialiser::not_Stream_good(what_);
+    }
+
+    for (unsigned char CompressedSize = compressedSize(SerialisedType_);
+         CompressedSize != 0; Output.put(static_cast<unsigned char>(
+             static_cast<unsigned wchar_t>(SerialisedType_) >>
+             std::numeric_limits<unsigned char>::digits * --CompressedSize))) {
+      if (!Output) {
+        std::stringstream what_;
+        what_ << "can't serialise byte " << std::hex
+              << /* [1] */ +(static_cast<unsigned wchar_t>(SerialisedType_) >>
+                             std::numeric_limits<unsigned char>::digits *
+                                 CompressedSize);
+        throw Exception::Serialiser::not_Stream_good(what_);
+      }
+    }
+  } catch (const basic_ExceptionType &basic_ExceptionType_) {
+    std::stringstream what_;
+    what_ << "can't serialise const wchar_t & : "
+          << basic_ExceptionType_.what();
+    throw Exception::Serialiser::wchar_t_(what_);
+  }
+}
+}
+
+// [1] operator+ promotes its operand to a printable integral type.
+
+#endif // SERIALISER_H
Index: branches/apertium-tagger/apertium2/apertium/stream_5_3_1_tagger_trainer.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream_5_3_1_tagger_trainer.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream_5_3_1_tagger_trainer.cc	(revision 69632)
@@ -0,0 +1,50 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "stream_5_3_1_tagger_trainer.h"
+
+#include "analysis.h"
+#include "basic_tagger.h"
+#include "serialiser.h"
+
+#include <cstddef>
+#include <map>
+#include <ostream>
+#include <utility>
+
+namespace Apertium {
+Stream_5_3_1_TaggerTrainer::Stream_5_3_1_TaggerTrainer(
+    const basic_Tagger::Flags &Flags_)
+    : basic_5_3_1_Tagger(), basic_StreamTaggerTrainer(Flags_) {}
+
+void Stream_5_3_1_TaggerTrainer::serialise(
+    std::ostream &Serialised_basic_Tagger) const {
+  ::Apertium::serialise(Model, Serialised_basic_Tagger);
+}
+
+void
+Stream_5_3_1_TaggerTrainer::train_Analysis(const Analysis &Analysis_,
+                                           const std::size_t &Coefficient_) {
+  Model.insert(std::make_pair(Analysis_, 0)).first->second += Coefficient_;
+}
+
+void Stream_5_3_1_TaggerTrainer::multiplyModel(
+    const std::size_t &OccurrenceCoefficientMultiplier) {
+  for (std::map<Analysis, std::size_t>::iterator Analysis_ = Model.begin();
+       Analysis_ != Model.end(); ++Analysis_) {
+    Analysis_->second *= OccurrenceCoefficientMultiplier;
+  }
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/stream_5_3_2_tagger_trainer.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream_5_3_2_tagger_trainer.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream_5_3_2_tagger_trainer.cc	(revision 69632)
@@ -0,0 +1,55 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "stream_5_3_2_tagger_trainer.h"
+
+#include "a.h"
+#include "analysis.h"
+#include "lemma.h"
+#include "serialiser.h"
+
+#include <map>
+#include <ostream>
+#include <utility>
+
+namespace Apertium {
+Stream_5_3_2_TaggerTrainer::Stream_5_3_2_TaggerTrainer(const Flags &Flags_)
+    : basic_StreamTaggerTrainer(Flags_) {}
+
+void Stream_5_3_2_TaggerTrainer::serialise(
+    std::ostream &Serialised_basic_Tagger) const {
+  ::Apertium::serialise(Model, Serialised_basic_Tagger);
+}
+
+void
+Stream_5_3_2_TaggerTrainer::train_Analysis(const Analysis &Analysis_,
+                                           const std::size_t &Coefficient_) {
+  Model.insert(std::make_pair(static_cast<a>(Analysis_),
+                              std::map<Lemma, std::size_t>()))
+      .first->second.insert(std::make_pair(static_cast<Lemma>(Analysis_), 0))
+      .first->second += Coefficient_;
+}
+
+void Stream_5_3_2_TaggerTrainer::multiplyModel(
+    const std::size_t &OccurrenceCoefficientMultiplier) {
+  for (std::map<a, std::map<Lemma, std::size_t> >::iterator a_ = Model.begin();
+       a_ != Model.end(); ++a_) {
+    for (std::map<Lemma, std::size_t>::iterator r_ = a_->second.begin();
+         r_ != a_->second.end(); ++r_) {
+      r_->second *= OccurrenceCoefficientMultiplier;
+    }
+  }
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/stream_5_3_3_tagger_trainer.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream_5_3_3_tagger_trainer.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream_5_3_3_tagger_trainer.cc	(revision 69632)
@@ -0,0 +1,88 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "analysis.h"
+#include "i.h"
+#include "lemma.h"
+#include "serialiser.h"
+#include "stream_5_3_3_tagger_trainer.h"
+
+#include <cstddef>
+#include <map>
+#include <ostream>
+#include <utility>
+#include <vector>
+
+namespace Apertium {
+Stream_5_3_3_TaggerTrainer::Stream_5_3_3_TaggerTrainer(const Flags &Flags_)
+    : basic_StreamTaggerTrainer(Flags_) {}
+
+void Stream_5_3_3_TaggerTrainer::serialise(
+    std::ostream &Serialised_basic_Tagger) const {
+  ::Apertium::serialise(Model, Serialised_basic_Tagger);
+}
+
+void
+Stream_5_3_3_TaggerTrainer::train_Analysis(const Analysis &Analysis_,
+                                           const std::size_t &Coefficient_) {
+  Model.first.insert(
+                  std::make_pair(i(Analysis_), std::map<Lemma, std::size_t>()))
+      .first->second.insert(std::make_pair(Lemma(Analysis_), 0))
+      .first->second += Coefficient_;
+
+  for (std::vector<Morpheme>::const_iterator Morpheme_ =
+           Analysis_.TheMorphemes.begin() + 1;
+       Morpheme_ != Analysis_.TheMorphemes.end(); ++Morpheme_) {
+    Model.second.first.insert(std::make_pair(i(*(Morpheme_ - 1)),
+                                             std::map<Lemma, std::size_t>()))
+        .first->second.insert(std::make_pair(Lemma(*Morpheme_), 0))
+        .first->second += Coefficient_;
+    Model.second.second.insert(std::make_pair(Lemma(*Morpheme_),
+                                              std::map<i, std::size_t>()))
+        .first->second.insert(std::make_pair(i(*Morpheme_), 0))
+        .first->second += Coefficient_;
+  }
+}
+
+void Stream_5_3_3_TaggerTrainer::multiplyModel(
+    const std::size_t &OccurrenceCoefficientMultiplier) {
+  for (std::map<i, std::map<Lemma, std::size_t> >::iterator i_ =
+           Model.first.begin();
+       i_ != Model.first.end(); ++i_) {
+    for (std::map<Lemma, std::size_t>::iterator Lemma_ = i_->second.begin();
+         Lemma_ != i_->second.end(); ++Lemma_) {
+      Lemma_->second *= OccurrenceCoefficientMultiplier;
+    }
+  }
+
+  for (std::map<i, std::map<Lemma, std::size_t> >::iterator i_ =
+           Model.second.first.begin();
+       i_ != Model.second.first.end(); ++i_) {
+    for (std::map<Lemma, std::size_t>::iterator Lemma_ = i_->second.begin();
+         Lemma_ != i_->second.end(); ++Lemma_) {
+      Lemma_->second *= OccurrenceCoefficientMultiplier;
+    }
+  }
+
+  for (std::map<Lemma, std::map<i, std::size_t> >::iterator Lemma_ =
+           Model.second.second.begin();
+       Lemma_ != Model.second.second.end(); ++Lemma_) {
+    for (std::map<i, std::size_t>::iterator i_ = Lemma_->second.begin();
+         i_ != Lemma_->second.end(); ++i_) {
+      i_->second *= OccurrenceCoefficientMultiplier;
+    }
+  }
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/Makefile.am
===================================================================
--- branches/apertium-tagger/apertium2/apertium/Makefile.am	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/Makefile.am	(revision 69632)
@@ -0,0 +1,651 @@
+AUTOMAKE_OPTIONS = subdir-objects
+
+h_sources = a.h \
+	    align.h \
+	    analysis.h \
+	    apertium_re.h \
+	    apertium_tagger.h \
+	    basic_5_3_1_tagger.h \
+	    basic_5_3_2_tagger.h \
+	    basic_5_3_3_tagger.h \
+	    basic_exception_type.h \
+	    basic_stream_tagger.h \
+	    basic_stream_tagger_trainer.h \
+	    basic_tagger.h \
+	    collection.h \
+	    constant_manager.h \
+	    constructor_eq_delete.h \
+	    deserialiser.h \
+	    endian_double_util.h \
+	    err_exception.h \
+	    exception.h \
+	    exception_type.h \
+	    file_tagger.h \
+	    hmm.h \
+	    i.h \
+	    interchunk.h \
+	    interchunk_word.h \
+	    latex_accentsmap.h \
+	    lemma.h \
+	    lexical_unit.h \
+	    linebreak.h \
+	    lswpost.h \
+	    morpheme.h \
+	    morpho_stream.h \
+	    optional.h \
+	    postchunk.h \
+	    serialiser.h \
+	    stream.h \
+	    stream_5_3_1_tagger.h \
+	    stream_5_3_2_tagger.h \
+	    stream_5_3_3_tagger.h \
+	    stream_5_3_1_tagger_trainer.h \
+	    stream_5_3_2_tagger_trainer.h \
+	    stream_5_3_3_tagger_trainer.h \
+	    streamed_type.h \
+	    string_utils.h \
+	    tag.h \
+	    tagger_data.h \
+	    tagger_data_hmm.h \
+	    tagger_data_lsw.h \
+	    tagger_utils.h \
+	    tagger_word.h \
+	    tmx_aligner_tool.h \
+	    tmx_alignment.h \
+	    tmx_align_parameters.h \
+	    tmx_arguments_parser.h \
+	    tmx_book_to_matrix.h \
+	    tmx_builder.h \
+	    tmx_dictionary.h \
+	    tmx_dic_tree.h \
+	    tmx_quasi_diagonal.h \
+	    tmx_serialize_impl.h \
+	    tmx_strings_and_streams.h \
+	    tmx_trail_postprocessors.h \
+	    tmx_translate.h \
+	    tmx_words.h \
+	    transfer_data.h \
+	    transfer.h \
+	    transfer_instr.h \
+	    transfer_mult.h \
+	    transfer_token.h \
+	    transfer_word.h \
+	    transfer_word_list.h \
+	    trx_reader.h \
+	    tsx_reader.h \
+	    ttag.h \
+	    unlocked_cstdio.h \
+	    utf_converter.h \
+	    wchar_t_exception.h \
+	    wchar_t_exception_type.h
+
+#DEPR.:
+#	    lextor_data.h
+#	    lextor_eval.h
+#	    lextor.h
+#	    lextor_word.h
+
+cc_sources = a.cc \
+	     align.cc \
+	     analysis.cc \
+	     apertium_re.cc \
+	     basic_5_3_1_tagger.cc \
+	     basic_5_3_2_tagger.cc \
+	     basic_exception_type.cc \
+	     basic_stream_tagger.cc \
+	     basic_stream_tagger_trainer.cc \
+	     basic_tagger.cc \
+	     collection.cc \
+	     constant_manager.cc \
+	     endian_double_util.cc \
+	     exception_type.cc \
+	     file_tagger.cc \
+	     hmm.cc \
+	     i.cc \
+	     interchunk.cc \
+	     interchunk_word.cc \
+	     latex_accentsmap.cc \
+	     lemma.cc \
+	     linebreak.cc \
+	     lswpost.cc \
+	     morpheme.cc \
+	     morpho_stream.cc \
+	     postchunk.cc \
+	     stream.cc \
+	     stream_5_3_1_tagger.cc \
+	     stream_5_3_2_tagger.cc \
+	     stream_5_3_3_tagger.cc \
+	     stream_5_3_1_tagger_trainer.cc \
+	     stream_5_3_2_tagger_trainer.cc \
+	     stream_5_3_3_tagger_trainer.cc \
+	     string_utils.cc \
+	     tag.cc \
+	     tagger_data.cc \
+	     tagger_data_hmm.cc \
+	     tagger_data_lsw.cc \
+	     tagger_utils.cc \
+	     tagger_word.cc \
+	     tmx_aligner_tool.cc \
+	     tmx_alignment.cc \
+	     tmx_arguments_parser.cc \
+	     tmx_book_to_matrix.cc \
+	     tmx_builder.cc \
+	     tmx_dictionary.cc \
+	     tmx_strings_and_streams.cc \
+	     tmx_trail_postprocessors.cc \
+	     tmx_translate.cc \
+	     transfer.cc \
+	     transfer_data.cc \
+	     transfer_instr.cc \
+	     transfer_mult.cc \
+	     transfer_token.cc \
+	     transfer_word.cc \
+	     transfer_word_list.cc \
+	     trx_reader.cc \
+	     tsx_reader.cc \
+	     utf_converter.cc \
+	     wchar_t_exception_type.cc
+#DEPR.:
+#	     lextor.cc
+#	     lextor_data.cc
+#	     lextor_eval.cc
+#	     lextor_word.cc
+
+library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME)-$(GENERIC_API_VERSION)/$(GENERIC_LIBRARY_NAME)
+library_include_HEADERS = $(h_sources)
+
+GENERATEDSCRIPTS = apertium-gen-deformat apertium-gen-reformat \
+                   apertium-validate-tagger \
+                   apertium-validate-transfer apertium-validate-dictionary \
+                   apertium-validate-modes \
+                   apertium-validate-interchunk \
+                   apertium-validate-postchunk apertium apertium-unformat \
+                   apertium-gen-modes apertium-validate-acx \
+                   apertium-utils-fixlatex
+#DEPR.:
+                   #apertium-preprocess-corpus-lextor
+                   #apertium-gen-stopwords-lextor
+                   #apertium-gen-lextorbil
+                   #apertium-gen-lextormono apertium-gen-wlist-lextor
+
+lib_LTLIBRARIES = libapertium3.la
+libapertium3_la_SOURCES = $(h_sources) $(cc_sources)
+libapertium3_la_LDFLAGS = -version-info $(GENERIC_LIBRARY_VERSION) -release $(GENERIC_RELEASE)
+
+bin_PROGRAMS = apertium-deshtml \
+	       apertium-deslatex \
+	       apertium-desmediawiki \
+	       apertium-desodt \
+	       apertium-despptx \
+	       apertium-desrtf \
+	       apertium-destxt \
+	       apertium-deswxml \
+	       apertium-desxlsx \
+	       apertium-desxpresstag \
+	       apertium-filter-ambiguity \
+	       apertium-interchunk \
+	       apertium-multiple-translations \
+	       apertium-postchunk \
+	       apertium-postlatex \
+	       apertium-postlatex-raw \
+	       apertium-prelatex \
+	       apertium-preprocess-transfer \
+	       apertium-pretransfer \
+	       apertium-rehtml \
+	       apertium-rehtml-noent \
+	       apertium-relatex \
+	       apertium-remediawiki \
+	       apertium-reodt \
+	       apertium-repptx \
+	       apertium-rertf \
+	       apertium-retxt \
+	       apertium-rewxml \
+	       apertium-rexlsx \
+	       apertium-rexpresstag \
+	       apertium-tagger \
+	       apertium-tagger-apply-new-rules \
+	       apertium-tagger-readwords \
+	       apertium-tmxbuild \
+	       apertium-transfer
+
+bin_SCRIPTS =  $(GENERATEDSCRIPTS)
+
+instdir = apertium
+
+apertiumdir = $(prefix)/share/apertium
+apertiuminclude = $(prefix)/include/apertium-$(GENERIC_API_VERSION)
+apertiumlib = $(prefix)/lib
+apertiumsysconf = $(prefix)/etc/apertium
+
+apertium_DATA = deformat.xsl reformat.xsl new2old.xsl lexchoice.xsl \
+                lexchoicebil.xsl \
+				tagger.dtd interchunk.dtd format.dtd  transfer.dtd postchunk.dtd modes.dtd \
+				tagger.rnc interchunk.rnc format.rnc  transfer.rnc postchunk.rnc modes.rnc \
+				modes2bash.xsl modes2debugmodes.xsl \
+                apertium-createmodes.awk
+
+apertium_pretransfer_SOURCES = apertium_pretransfer.cc
+apertium_multiple_translations_SOURCES = apertium-multiple-translations.cc
+apertium_multiple_translations_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+apertium_destxt_SOURCES = apertium_destxt.cc
+apertium_retxt_SOURCES = apertium_retxt.cc
+apertium_deshtml_SOURCES = apertium_deshtml.cc
+apertium_rehtml_SOURCES = apertium_rehtml.cc
+apertium_rehtml_noent_SOURCES = apertium_rehtml_noent.cc
+apertium_desxpresstag_SOURCES = apertium_desxpresstag.cc
+apertium_rexpresstag_SOURCES = apertium_rexpresstag.cc
+apertium_desodt_SOURCES = apertium_desodt.cc
+apertium_reodt_SOURCES = apertium_reodt.cc
+apertium_desrtf_SOURCES = apertium_desrtf.cc
+apertium_rertf_SOURCES = apertium_rertf.cc
+apertium_deswxml_SOURCES = apertium_deswxml.cc
+apertium_rewxml_SOURCES = apertium_rewxml.cc
+apertium_deslatex_SOURCES = apertium_deslatex.cc
+apertium_relatex_SOURCES = apertium_relatex.cc
+apertium_desxlsx_SOURCES = apertium_desxlsx.cc
+apertium_rexlsx_SOURCES = apertium_rexlsx.cc
+apertium_despptx_SOURCES = apertium_despptx.cc
+apertium_repptx_SOURCES = apertium_repptx.cc
+apertium_desmediawiki_SOURCES = apertium_desmediawiki.cc
+apertium_remediawiki_SOURCES = apertium_remediawiki.cc
+apertium_prelatex_SOURCES = apertium_prelatex.cc
+apertium_prelatex_LDADD= $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+apertium_postlatex_SOURCES = apertium_postlatex.cc
+apertium_postlatex_LDADD= $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+apertium_postlatex_raw_SOURCES = apertium_postlatex_raw.cc
+apertium_postlatex_raw_LDADD= $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+apertium_tagger_SOURCES = apertium_tagger.cc
+apertium_tagger_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+apertium_tmxbuild_SOURCES = apertium_tmxbuild.cc
+apertium_tmxbuild_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+apertium_preprocess_transfer_SOURCES = transferpp.cc
+apertium_preprocess_transfer_LDADD = $(APERTIUM_LIBS) \
+                                     -lapertium$(GENERIC_MAJOR_VERSION)
+
+apertium_filter_ambiguity_SOURCES = apertium_filter_ambiguity.cc
+apertium_filter_ambiguity_LDADD = $(APERTIUM_LIBS) \
+                                  -lapertium$(GENERIC_MAJOR_VERSION)
+
+apertium_transfer_SOURCES = apertium_transfer.cc
+apertium_transfer_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+apertium_interchunk_SOURCES = apertium_interchunk.cc
+apertium_interchunk_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+apertium_postchunk_SOURCES = apertium_postchunk.cc
+apertium_postchunk_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+###apertium_lextor_SOURCES = apertium_lextor.cc
+###apertium_lextor_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+#apertium_lextor_eval_SOURCES = apertium-lextor-eval.C
+#apertium_lextor_eval_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+apertium_tagger_apply_new_rules_SOURCES = apertium_tagger_apply_new_rules.cc
+apertium_tagger_apply_new_rules_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+apertium_tagger_readwords_SOURCES = apertium_tagger_readwords.cc
+apertium_tagger_readwords_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+###apertium_lextor_search_SOURCES = apertium-lextor-search.C
+###apertium_lextor_search_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+###pruebas_lextor_SOURCES = pruebas-lextor.C
+###pruebas_lextor_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+###apertium_gen_wlist_lextor_translation_SOURCES = apertium_gen_wlist_lextor_translation.cc
+###apertium_gen_wlist_lextor_translation_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION)
+
+
+if WINDOWS
+INCLUDES = -I$(top_srcdir)/apertium/win32 -I$(top_srcdir) $(APERTIUM_CFLAGS)
+else
+INCLUDES = -I$(top_srcdir) $(APERTIUM_CFLAGS)
+endif
+CLEANFILES = *~ apertium_destxt.cc apertium_retxt.cc apertium_deshtml.cc \
+             apertium_rehtml.cc apertium_desrtf.cc apertium_rertf.cc \
+             apertium_rehtml_noent.cc \
+             apertium_deswxml.cc apertium_rewxml.cc \
+             apertium_deslatex.cc apertium_relatex.cc \
+             apertium_desxlsx.cc apertium_rexlsx.cc \
+             apertium_despptx.cc apertium_repptx.cc \
+             apertium_desodt.cc apertium_reodt.cc \
+	     apertium_desxpresstag.cc apertium_rexpresstag.cc \
+             apertium_desmediawiki.cc apertium_remediawiki.cc \
+             apertium_prelatex.cc apertium_postlatex.cc \
+             $(GENERATEDSCRIPTS)
+
+apertium_destxt.cc: txt-format.xml Makefile.am deformat.xsl
+	$(XSLTPROC) deformat.xsl txt-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_desxpresstag.cc: xpresstag-format.xml Makefile.am deformat.xsl
+	$(XSLTPROC) deformat.xsl xpresstag-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_rexpresstag.cc: xpresstag-format.xml Makefile.am reformat.xsl
+	$(XSLTPROC) reformat.xsl xpresstag-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_retxt.cc: txt-format.xml Makefile.am reformat.xsl
+	$(XSLTPROC) reformat.xsl txt-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_deshtml.cc: html-format.xml Makefile.am deformat.xsl
+	$(XSLTPROC) deformat.xsl html-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_rehtml.cc: html-format.xml Makefile.am reformat.xsl
+	$(XSLTPROC) reformat.xsl html-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_rehtml_noent.cc: html-noent-format.xml Makefile.am reformat.xsl
+	$(XSLTPROC) reformat.xsl html-noent-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_desodt.cc: odt-format.xml Makefile.am deformat.xsl
+	$(XSLTPROC) deformat.xsl odt-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_reodt.cc: odt-format.xml Makefile.am reformat.xsl
+	$(XSLTPROC) reformat.xsl odt-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_desrtf.cc: rtf-format.xml Makefile.am deformat.xsl
+	$(XSLTPROC) deformat.xsl rtf-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_rertf.cc: rtf-format.xml Makefile.am reformat.xsl
+	$(XSLTPROC) reformat.xsl rtf-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_deswxml.cc: wxml-format.xml Makefile.am deformat.xsl
+	$(XSLTPROC) deformat.xsl wxml-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_rewxml.cc: wxml-format.xml Makefile.am reformat.xsl
+	$(XSLTPROC) reformat.xsl wxml-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_deslatex.cc: latex-format.xml Makefile.am deformat.xsl
+	$(XSLTPROC) deformat.xsl latex-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_relatex.cc: latex-format.xml Makefile.am reformat.xsl
+	$(XSLTPROC) reformat.xsl latex-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+
+
+apertium_desxlsx.cc: xlsx-format.xml Makefile.am deformat.xsl
+	$(XSLTPROC) deformat.xsl xlsx-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_rexlsx.cc: xlsx-format.xml Makefile.am reformat.xsl
+	$(XSLTPROC) reformat.xsl xlsx-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_despptx.cc: pptx-format.xml Makefile.am deformat.xsl
+	$(XSLTPROC) deformat.xsl pptx-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_repptx.cc: pptx-format.xml Makefile.am reformat.xsl
+	$(XSLTPROC) reformat.xsl pptx-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_desmediawiki.cc: mediawiki-format.xml Makefile.am deformat.xsl
+	$(XSLTPROC) deformat.xsl mediawiki-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_remediawiki.cc: mediawiki-format.xml Makefile.am reformat.xsl
+	$(XSLTPROC) reformat.xsl mediawiki-format.xml >$@tmp
+	$(FLEX) -Cfer -o$@ $@tmp
+	rm $@tmp
+
+apertium_prelatex.cc: apertium-prelatex.l
+	$(FLEX) -Cfer -o$@ apertium-prelatex.l
+
+apertium_postlatex.cc: apertium-postlatex.l
+	$(FLEX) -Cfer -o$@ apertium-postlatex.l
+
+apertium_postlatex_raw.cc: apertium-postlatex-raw.l
+	$(FLEX) -Cfer -o$@ apertium-postlatex-raw.l
+
+apertium-validate-tagger: Makefile.am validate-header.sh
+	@echo "Creating apertium-validate-tagger script"
+	@echo "#!$(BASH)" > $@
+	@cat validate-header.sh >> $@
+	@echo "$(XMLLINT) --dtdvalid \"$(apertiumdir)\"/tagger.dtd --noout \"\$$FILE1\"" >>$@
+	@chmod a+x $@
+
+apertium-validate-transfer: Makefile.am validate-header.sh
+	@echo "Creating apertium-validate-transfer script"
+	@echo "#!$(BASH)" > $@
+	@cat validate-header.sh >> $@
+	@echo "$(XMLLINT) --dtdvalid \"$(apertiumdir)\"/transfer.dtd --noout \"\$$FILE1\"" >>$@
+	@chmod a+x $@
+
+apertium-validate-interchunk: Makefile.am validate-header.sh
+	@echo "Creating apertium-validate-interchunk script"
+	@echo "#!$(BASH)" > $@
+	@cat validate-header.sh >> $@
+	@echo "$(XMLLINT) --dtdvalid \"$(apertiumdir)\"/interchunk.dtd --noout \"\$$FILE1\"" >>$@
+	@chmod a+x $@
+
+apertium-validate-postchunk: Makefile.am validate-header.sh
+	@echo "Creating apertium-validate-postchunk script"
+	@echo "#!$(BASH)" > $@
+	@cat validate-header.sh >> $@
+	@echo "$(XMLLINT) --dtdvalid \"$(apertiumdir)\"/postchunk.dtd --noout \"\$$FILE1\"" >>$@
+	@chmod a+x $@
+
+apertium-validate-acx: Makefile.am validate-header.sh
+	@echo "Creating apertium-validate-acx script"
+	@echo "#!$(BASH)" > $@
+	@cat validate-header.sh >> $@
+	@echo "$(XMLLINT) --relaxng \"$(prefix)\"/share/lttoolbox/acx.rng --schema \"$(prefix)\"/share/lttoolbox/acx.xsd --noout \"\$$FILE1\"" >>$@
+	@chmod a+x $@
+
+apertium-validate-modes: Makefile.am validate-header.sh
+	@echo "Creating apertium-validate-modes script"
+	@echo "#!$(BASH)" > $@
+	@cat validate-header.sh >> $@
+	@echo "$(XMLLINT) --dtdvalid \"$(apertiumdir)\"/modes.dtd --noout \"\$$FILE1\"" >>$@
+	@chmod a+x $@
+
+
+apertium-validate-dictionary: Makefile.am validate-header.sh
+	@echo "Creating apertium-validate-dictionary script"
+	@echo "#!$(BASH)" > $@
+	@cat validate-header.sh >> $@
+	@echo "# xsd is a non-final command, so just treated as a warning when compiling:" >> $@
+	@echo "$(XMLLINT) --schema \"$(prefix)\"/share/lttoolbox/dix.xsd --noout \"\$$FILE1\" | grep -vF ' fails to validate'" >> $@
+	@echo "$(XMLLINT) --dtdvalid \"$(prefix)\"/share/lttoolbox/dix.dtd --noout \"\$$FILE1\"" >> $@
+	@chmod a+x $@
+
+apertium-gen-deformat: Makefile.am deformat-header.sh
+	@echo "Creating apertium-gen-deformat script"
+	@echo "#!$(BASH)" > $@
+	@cat deformat-header.sh >> $@
+	@echo "$(XMLLINT) --dtdvalid \"$(apertiumdir)\"/format.dtd --noout \$$FILE1 && \\" >> $@
+	@if [ `basename $(XSLTPROC)` == xsltproc ]; \
+	  then echo "$(XSLTPROC) --stringparam mode \$$MODE \"$(apertiumdir)\"/deformat.xsl \$$FILE1 >/tmp/\$$\$$.deformat.l && \\"; \
+          else echo "$(XSLTPROC) \"$(apertiumdir)\"/deformat.xsl \$$FILE1 \"\\\$$mode=\$$MODE\" >/tmp/\$$\$$.deformat.l && \\"; \
+          fi >> $@
+	@echo "$(FLEX) \$$FLEXOPTS -o/tmp/\$$\$$.lex.cc /tmp/\$$\$$.deformat.l && \\" >> $@
+	@echo "$(CXX) -DGENFORMAT $(CXXFLAGS) -w $(APERTIUM_CFLAGS) -I $(apertiuminclude) -o \$$FILE2 /tmp/\$$\$$.lex.cc $(APERTIUM_LIBS) 2>/dev/null && \\" >> $@
+	@echo "rm /tmp/\$$\$$.deformat.l /tmp/\$$\$$.lex.cc" >> $@
+	@chmod a+x $@
+
+apertium-gen-reformat: Makefile.am gen-header.sh
+	@echo "Creating apertium-gen-reformat script"
+	@echo "#!$(BASH)" > $@
+	@cat gen-header.sh >> $@
+	@echo "$(XMLLINT) --dtdvalid \"$(apertiumdir)\"/format.dtd --noout \$$FILE1 && \\" >> $@
+	@echo "$(XSLTPROC) \"$(apertiumdir)\"/reformat.xsl \$$FILE1 >/tmp/\$$\$$.reformat.l && \\" >> $@
+	@echo "$(FLEX) \$$FLEXOPTS -o/tmp/\$$\$$.lex.cc /tmp/\$$\$$.reformat.l && \\" >> $@
+	@echo "$(CXX) -DGENFORMAT $(CXXFLAGS) -w $(APERTIUM_CFLAGS) -I $(apertiuminclude) -o \$$FILE2 /tmp/\$$\$$.lex.cc $(APERTIUM_LIBS) 2>/dev/null &&\\" >> $@
+	@echo "rm /tmp/\$$\$$.reformat.l /tmp/\$$\$$.lex.cc" >> $@
+	@chmod a+x $@
+
+apertium-gen-modes: apertium-gen-modes.in Makefile.am
+	@echo "#!$(BASH)" > $@
+	@echo "APERTIUMDIR=$(apertiumdir)" >> $@
+	@echo "XMLLINT=$(XMLLINT)" >> $@
+	@echo "XSLTPROC=$(XSLTPROC)" >> $@
+	@cat $< >> $@
+	@chmod a+x $@
+
+apertium-utils-fixlatex: Makefile.am utils-fixlatex-header.sh
+	@echo "Creating apertium-utils-fixlatex script"
+	@echo "#!$(BASH)" > $@
+	@cat utils-fixlatex-header.sh >> $@
+	@chmod a+x $@
+
+apertium: Makefile.am apertium-header.sh
+	@echo "Creating apertium script"
+	@echo "#!$(BASH)" > $@
+	@echo "APERTIUM_PATH=\"$(prefix)/bin\"" >>$@
+	@echo "LTTOOLBOX_PATH=\"$(prefix)/bin\"" >>$@
+	@echo "DEFAULT_DIRECTORY=\"$(prefix)/share/apertium\"" >>$@
+	@cat apertium-header.sh >>$@
+	@chmod a+x $@
+
+apertium-unformat: Makefile.am apertium-unformat-header.sh
+	@echo "Creating apertium-unformat script"
+	@echo "#!$(BASH)" > $@
+	@echo "APERTIUM_PATH=\"$(prefix)/bin\"" >>$@
+	@echo "LTTOOLBOX_PATH=\"$(prefix)/bin\"" >>$@
+	@echo "DEFAULT_DIRECTORY=\"$(prefix)/share/apertium\"" >>$@
+	@cat apertium-unformat-header.sh >>$@
+	@chmod a+x $@
+
+
+#apertium-translator-lextor: Makefile.am trans-lextor-header.sh
+#	@echo "Creating apertium-translator-lextor script"
+#	@echo "#!$(BASH)" > $@
+#	@echo "APERTIUM_PATH=\"$(prefix)/bin\"" >>$@
+#	@echo "LTTOOLBOX_PATH=\"$(prefix)/bin\"" >>$@
+#	@cat trans-lextor-header.sh >>$@
+#	@chmod a+x $@
+
+#apertium-gen-oldbil: Makefile.am transformdicbil-header.sh
+#	@echo "Creating apertium-gen-oldbil script"
+#	@echo "#!$(BASH)" >$@
+#	@echo "APERTIUM_PATH=\"$(prefix)/bin\"" >>$@
+#	@echo "LTTOOLBOX_PATH=\"$(prefix)/bin\"" >>$@
+#	@echo "XSLTPROC_OPTIONS=\"\"">>$@
+#	@echo "STYLESHEET=\"$(apertiumdir)/new2old.xsl\"">>$@
+#	@cat transformdicbil-header.sh >>$@
+#	@chmod a+x $@
+
+apertium-gen-lextorbil: Makefile.am transformdic-header.sh
+	@echo "Creating apertium-gen-lextorbil script"
+	@echo "#!$(BASH)" >$@
+	@echo "APERTIUM_PATH=\"$(prefix)/bin\"" >>$@
+	@echo "LTTOOLBOX_PATH=\"$(prefix)/bin\"" >>$@
+	@echo "XSLTPROC_OPTIONS_LR=\"\"">>$@
+	@echo "XSLTPROC_OPTIONS_RL=\"--stringparam r2l yes\"">>$@
+	@echo "STYLESHEET=\"$(apertiumdir)/lexchoicebil.xsl\"">>$@
+	@cat transformdic-header.sh >>$@
+	@chmod a+x $@
+
+apertium-gen-lextormono: Makefile.am transformdic-header.sh
+	@echo "Creating apertium-gen-lextormono script"
+	@echo "#!$(BASH)" >$@
+	@echo "APERTIUM_PATH=\"$(prefix)/bin\"" >>$@
+	@echo "LTTOOLBOX_PATH=\"$(prefix)/bin\"" >>$@
+	@echo "XSLTPROC_OPTIONS_LR=\"\"">>$@
+	@echo "XSLTPROC_OPTIONS_RL=\"--stringparam r2l yes\"">>$@
+	@echo "STYLESHEET=\"$(apertiumdir)/lexchoice.xsl\"">>$@
+	@cat transformdic-header.sh >>$@
+	@chmod a+x $@
+
+apertium-gen-wlist-lextor: Makefile.am gen-wlist-lextor-header.sh
+	@echo "Creating apertium-gen-wlist-lextor script"
+	@echo "#!$(BASH)" >$@
+	@echo "LTTOOLBOX_PATH=\"$(prefix)/bin\"" >>$@
+	@cat gen-wlist-lextor-header.sh >>$@
+	@chmod a+x $@
+
+apertium-preprocess-corpus-lextor: Makefile.am preprocess-corpus-lextor.sh
+	@echo "Creating apertium-preprocess-corpus-lextor script"
+	@echo "#!$(BASH)" >$@
+	@echo "LTTOOLBOX_PATH=\"$(prefix)/bin\"" >>$@
+	@echo "APERTIUM_PATH=\"$(prefix)/bin\"" >>$@
+	@cat preprocess-corpus-lextor.sh >>$@
+	@chmod a+x $@
+
+apertium-gen-stopwords-lextor: Makefile.am gen-stopwords-lextor.sh
+	@echo "Creating apertium-gen-stopwords-lextor script"
+	@echo "#!$(BASH)" >$@
+	@cat gen-stopwords-lextor.sh >>$@
+	@chmod a+x $@
+
+man_MANS=apertium.1 apertium-deshtml.1 apertium-desrtf.1 apertium-destxt.1 \
+         apertium-desodt.1 apertium-reodt.1 \
+         apertium-deswxml.1 apertium-rewxml.1 \
+         apertium-deslatex.1 apertium-relatex.1 \
+         apertium-prelatex.1 apertium-postlatex.1 apertium-postlatex-raw.1 \
+         apertium-desxlsx.1 apertium-rexlsx.1 \
+         apertium-despptx.1 apertium-repptx.1 \
+         apertium-desmediawiki.1 apertium-remediawiki.1 \
+         apertium-filter-ambiguity.1 apertium-gen-deformat.1 \
+         apertium-gen-reformat.1 \
+         apertium-preprocess-transfer.1 apertium-pretransfer.1 apertium-rehtml.1 \
+         apertium-rertf.1 apertium-retxt.1 apertium-tagger.1 apertium-transfer.1 \
+         apertium-validate-dictionary.1 apertium-validate-tagger.1 \
+         apertium-validate-transfer.1 apertium-gen-modes.1 apertium-interchunk.1 \
+         apertium-postchunk.1 apertium-validate-interchunk.1 apertium-utils-fixlatex.1 \
+         apertium-validate-postchunk.1 apertium-validate-modes.1 apertium-tagger-apply-new-rules.1 \
+	 apertium-validate-acx.1 apertium-multiple-translations.1 \
+	 apertium-unformat.1
+#DEPR.:
+#         apertium-lextor-eval.1
+#         apertium-gen-lextorbil.1
+#         apertium-gen-lextormono.1 apertium-gen-stopwords-lextor.1
+#         apertium-gen-wlist-lextor.1 apertium-gen-wlist-lextor-translation.1
+#         apertium-lextor.1 apertium-preprocess-corpus-lextor.1
+
+EXTRA_DIST = gen-header.sh deformat-header.sh \
+             reformat.xsl deformat.xsl new2old.xsl lexchoice.xsl lexchoicebil.xsl \
+             txt-format.xml \
+             html-format.xml odt-format.xml rtf-format.xml wxml-format.xml latex-format.xml\
+             html-noent-format.xml \
+             xlsx-format.xml pptx-format.xml mediawiki-format.xml trans-header.sh \
+             apertium-postlatex.l  apertium-postlatex-raw.l  apertium-prelatex.l \
+             apertium-header.sh apertium-unformat-header.sh $(man_MANS) \
+             xpresstag-format.xml \
+             validate-header.sh transformdic-header.sh transformdicbil-header.sh \
+			 tagger.dtd interchunk.dtd format.dtd  transfer.dtd postchunk.dtd modes.dtd \
+			 tagger.rnc interchunk.rnc format.rnc  transfer.rnc postchunk.rnc modes.rnc \
+             utils-fixlatex-header.sh \
+             apertium-gen-modes.in apertium-createmodes.awk modes2bash.xsl modes2debugmodes.xsl
+#DEPR.:
+# trans-lextor-header.sh
+# gen-wlist-lextor-header.sh
+# gen-stopwords-lextor.sh
+# preprocess-corpus-lextor.sh
Index: branches/apertium-tagger/apertium2/apertium/validate-header.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/validate-header.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/validate-header.sh	(revision 69632)
@@ -0,0 +1,12 @@
+if [[ $# != 1 ]]; then
+    echo "USAGE: $(basename "$0") <input_file>"
+    exit 1
+fi
+
+FILE1=$1
+
+if [[ ! -e $FILE1 ]]; then
+    echo "ERROR: '$1' file not found"
+    exit 1
+fi
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-gen-modes.in
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-gen-modes.in	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-gen-modes.in	(revision 69632)
@@ -0,0 +1,85 @@
+#!/bin/bash
+# Makefile.am prepends APERTIUMDIR, XMLLINT, XSLTPROC and the right shebang
+
+show_help () {
+    cat <<EOF
+USAGE: $(basename "$0") modes.xml
+       $(basename "$0") modes.xml BASENAME
+       $(basename "$0") -f modes.xml INSTALLDIR
+
+Creates all modes under the 'modes/' subdirectory of the directory of
+modes.xml, and further creates copies of installable modes in the same
+directory as modes.xml.
+
+If only modes.xml is given, all files refer only to datafiles under
+the same directory as modes.xml.
+
+If only modes.xml and BASENAME are given, installable modes will refer
+to datafiles in ${APERTIUMDIR}/\${BASENAME}.
+
+If -f is given, the second non-option argument INSTALLDIR is the full
+path to where installed data files for installable modes are.
+
+
+If a mode has attribute gendebug="yes", the script will also
+auto-generate debug modes (e.g. -morph, -tagger, -chunker).
+
+Use option -v to show the actual commands this script runs.
+EOF
+    exit 1
+}
+
+verbose=false
+fullpath=false
+OPTIND=1
+while getopts "hHfv" opt; do
+    case "$opt" in
+        h|H)
+            echo show_help
+            exit 0
+            ;;
+        v)  verbose=true
+            ;;
+        f)  fullpath=true
+            ;;
+        '?')
+            show_help >&2
+            exit 1
+            ;;
+    esac
+done
+shift $((OPTIND-1))
+
+xmlfile="$1"
+if [[ ! -e "${xmlfile}" ]]; then
+    echo "ERROR: '${xmlfile}' file not found"
+    exit 1
+fi
+xmldir=$(cd "$(dirname "${xmlfile}")"; pwd)
+
+case $# in
+    1) installdir="${xmldir}";;
+    2) if ${fullpath}; then
+           installdir="$2"
+       else
+           installdir="${APERTIUMDIR}/$2"
+       fi
+       ;;
+    *) show_help >&2
+       exit 1
+       ;;
+esac
+
+$verbose && set -x
+set -o pipefail # introduced in bash 3; available in OSX>=10.5; should be safe
+
+[[ -d "${xmldir}"/modes ]] || mkdir "${xmldir}"/modes
+
+"${XMLLINT}" --dtdvalid "${APERTIUMDIR}"/modes.dtd --noout "${xmlfile}" || exit $?
+
+"${XSLTPROC}" "${APERTIUMDIR}"/modes2debugmodes.xsl "${xmlfile}" \
+    | "${XSLTPROC}" --stringparam devdir "${xmldir}" \
+                    --stringparam installdir "${installdir}" \
+                    "${APERTIUMDIR}"/modes2bash.xsl \
+                    - \
+    | awk -f "${APERTIUMDIR}"/apertium-createmodes.awk
Index: branches/apertium-tagger/apertium2/apertium/hmm.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/hmm.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/hmm.h	(revision 69632)
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+/**
+ *  First order hidden Markov model (HMM) implementation (header)
+ *
+ *  @author   Felipe S�nchez-Mart�nez - fsanchez@dlsi.ua.es
+ */
+
+#ifndef __HMM_H
+#define __HMM_H
+
+#include "file_tagger.h"
+
+#include <cstdio>
+#include <fstream>
+#include <math.h>
+#include <string>
+#include <vector>
+#include <set>
+#include <map>
+#include <cfloat>
+#include <cstring>
+
+#include <apertium/collection.h>
+#include <apertium/constant_manager.h>
+#include <apertium/morpho_stream.h>
+#include <apertium/tagger_data_hmm.h>
+#include <apertium/tagger_utils.h>
+#include <apertium/tagger_word.h>
+
+using namespace std;
+
+#define ZERO 1e-10
+
+/** HMM
+ *  first-order hidden Markov Model
+ */
+class HMM : public Apertium::FILE_Tagger {
+private:
+   TaggerDataHMM tdhmm;
+   TTag eos; // end-of-sentence tag
+   
+   /** It allocs memory for the transition (a) and the emission (b) matrices.
+    *  Before calling this method the number of ambiguity classes must be known.
+    *  This methos is called within read_ambiguity_classes and read_dictionary.
+    *  @see: read_ambiguity_classes, read_dictionary
+    */
+   void init(); 
+public:  
+   void deserialise(FILE *Serialised_FILE_Tagger);
+   std::vector<std::wstring> &getArrayTags();
+   void train(FILE *Corpus, unsigned long Count);
+   void serialise(FILE *Stream_);
+   void deserialise(const TaggerData &Deserialised_FILE_Tagger);
+   void init_probabilities_from_tagged_text_(FILE *TaggedCorpus,
+                                            FILE *UntaggedCorpus);
+   void init_probabilities_kupiec_(FILE *Corpus);
+   HMM();
+   HMM(TaggerDataHMM *tdhmm);
+ 
+   /** Constructor
+    */
+   HMM(TaggerDataHMM tdhmm);
+
+   /** Destructor
+    */
+   ~HMM();
+  
+   /** Used to set the end-of-sentence tag
+    *  @param t the end-of-sentence tag
+    */
+   void set_eos(TTag t);
+
+   /** It reads the ambiguity classes from the stream received as
+    *  input
+    *  @param is the input stream
+    */  
+   void read_ambiguity_classes(FILE *in);
+  
+   /** It writes the ambiguity classes to the stream received as
+    *  a parameter
+    *  @param iosthe output stream
+    */
+   void write_ambiguity_classes(FILE *out);
+  
+   /** It reads the probabilities (matrices a and b) from the stream 
+    *  received as a parameter
+    *  @param is the input stream
+    */
+   void read_probabilities(FILE *in);
+
+   /** It writes the probabilities (matrices a and b) to the stream 
+    *  received as a parameter
+    *  @param os the output stream
+    */ 
+   void write_probabilities(FILE *out);
+  
+   /** It reads the expanded dictionary received as a parameter and calculates
+    *  the set of ambiguity classes that the tagger will manage.
+    *  @param is the input stream with the expanded dictionary to read
+    */
+   void read_dictionary(FILE *is);  
+           
+   /** It initializes the transtion (a) and emission (b) probabilities
+    *  from an untagged input text by means of Kupiec's method
+    *  @param is the input stream with the untagged corpus to process
+    */
+   void init_probabilities_kupiec (FILE *is);
+  
+   /** It initializes the transtion (a) and emission (b) probabilities
+    *  from a tagged input text by means of the expected-likelihood 
+    *  estimate (ELE) method
+    *  @param ftagged the input stream with the tagged corpus to process
+    *  @param funtagged the same corpus to process but untagged
+    */   
+   void init_probabilities_from_tagged_text(FILE *ftagged, FILE *funtagged);
+
+   /** It applies the forbid and enforce rules found in tagger specification.
+    *  To do so the transition matrix is modified by introducing null probabilities
+    *  in the involved transitions.
+    */
+   void apply_rules();
+   
+   /** Unsupervised training algorithm (Baum-Welch implementation).
+    *  @param is the input stream with the untagged corpus to process
+    */  
+   void train (FILE *is);  
+  
+   /** Tagging algorithm (Viterbi implementation).
+    *  @param in the input stream with the untagged text to tag
+    *  @param out the output stream with the tagged text
+    */
+   void tagger(FILE *Input, FILE *Output, const bool &First = false);
+
+   /** Prints the A matrix.
+    */
+   void print_A();
+
+   /** Prints the B matrix.
+    */ 
+   void print_B();
+
+   /** Prints the ambiguity classes.
+    */
+   void print_ambiguity_classes();
+
+   void filter_ambiguity_classes(FILE *in, FILE *out);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/lswpost.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lswpost.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lswpost.h	(revision 69632)
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+/**
+ *  Light Sliding-Window Part of Speech Tagger (LSWPoST) implementation (header)
+ *
+ *  @author   Gang Chen - pkuchengang@gmail.com
+ */
+
+#ifndef __LSWPOST_H
+#define __LSWPOST_H
+
+#include "file_tagger.h"
+
+#include <cstdio>
+#include <fstream>
+#include <math.h>
+#include <string>
+#include <vector>
+#include <set>
+#include <map>
+#include <cfloat>
+#include <cstring>
+
+#include <apertium/collection.h>
+#include <apertium/constant_manager.h>
+#include <apertium/morpho_stream.h>
+#include <apertium/tagger_data_lsw.h>
+#include <apertium/tagger_utils.h>
+#include <apertium/tagger_word.h>
+
+
+#define ZERO 1e-10
+
+/** LSWPoST
+ *  Light Sliding-Window Part of Speech Tagger
+ */
+class LSWPoST : public Apertium::FILE_Tagger {
+private:
+  TaggerDataLSW tdlsw;
+  TTag eos; // end-of-sentence tag
+
+public:
+  void deserialise(FILE *Serialised_FILE_Tagger);
+  std::vector<std::wstring> &getArrayTags();
+  void train(FILE *Corpus, unsigned long Count);
+  void serialise(FILE *Stream_);
+  void deserialise(const TaggerData &Deserialised_FILE_Tagger);
+  void init_probabilities_from_tagged_text_(FILE *TaggedCorpus,
+                                            FILE *UntaggedCorpus);
+  void init_probabilities_kupiec_(FILE *Corpus);
+  LSWPoST();
+  LSWPoST(TaggerDataLSW *tdlsw);
+
+   /** Constructor
+    */
+   LSWPoST(TaggerDataLSW t);
+
+   /** Destructor
+    */
+   ~LSWPoST();
+
+   /** Used to set the end-of-sentence tag
+    *  @param t the end-of-sentence tag
+    */
+   void set_eos(TTag t);
+
+   /** It reads the expanded dictionary received as a parameter and calculates
+    *  the set of ambiguity classes that the tagger will manage.
+    *  @param fdic the input stream with the expanded dictionary to read
+    */
+   void read_dictionary(FILE *fdic);
+
+   /** Whether a tag sequence is valid, according to the forbid and enforce rules
+    */
+   bool is_valid_seq(TTag left, TTag mid, TTag right);
+
+   /** Init probabilities
+    *  It applies the forbid and enforce rules found in tagger specification.
+    *  To do so, the joint probability of a tag sequence that contains a forbid
+    *  rule, or doesn't satisfy a enforce rule, is set to 0.
+    */
+   void init_probabilities(FILE *ftxt);
+
+   /** Unsupervised training algorithm (Baum-Welch implementation).
+    *  @param ftxt the input stream with the untagged corpus to process
+    */
+   void train (FILE *ftxt);
+
+   /** Prints the para matrix.
+    */
+   void print_para_matrix();
+
+   /** Do the tagging
+    */
+   void tagger(FILE *Input, FILE *Output, const bool &First = false);
+};
+#endif
Index: branches/apertium-tagger/apertium2/apertium/file_tagger.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/file_tagger.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/file_tagger.h	(revision 69632)
@@ -0,0 +1,52 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef FILE_TAGGER_H
+#define FILE_TAGGER_H
+
+#include <apertium/tagger_data.h>
+
+#include <cstdio>
+#include <string>
+#include <vector>
+
+namespace Apertium {
+class FILE_Tagger {
+public:
+  FILE_Tagger();
+  virtual ~FILE_Tagger();
+  virtual void deserialise(FILE *Serialised_FILE_Tagger) = 0;
+  void set_debug(const bool &Debug);
+  void set_show_sf(const bool &ShowSuperficial);
+  void setNullFlush(const bool &NullFlush);
+  virtual void tagger(FILE *Input, FILE *Output, const bool &First = false) = 0;
+  virtual std::vector<std::wstring> &getArrayTags() = 0;
+  virtual void train(FILE *Corpus, unsigned long Count) = 0;
+  virtual void serialise(FILE *Stream_) = 0;
+  void deserialise(char *const TaggerSpecificationFilename);
+  virtual void read_dictionary(FILE *Dictionary) = 0;
+  virtual void init_probabilities_from_tagged_text_(FILE *TaggedCorpus,
+                                                    FILE *Corpus) = 0;
+  virtual void init_probabilities_kupiec_(FILE *Corpus) = 0;
+
+protected:
+  virtual void deserialise(const TaggerData &Deserialised_FILE_Tagger) = 0;
+  bool debug;
+  bool show_sf;
+  bool null_flush;
+};
+}
+
+#endif // FILE_TAGGER_H
Index: branches/apertium-tagger/apertium2/apertium/format.rng
===================================================================
--- branches/apertium-tagger/apertium2/apertium/format.rng	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/format.rng	(revision 69632)
@@ -0,0 +1,303 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+  
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+  
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+  
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+  
+     DTD for the format specification files
+     Sergio Ortiz  2005.05.13
+-->
+<grammar xmlns="http://relaxng.org/ns/structure/1.0">
+  <define name="format">
+    <element name="format">
+      <ref name="attlist.format"/>
+      <ref name="options"/>
+      <ref name="rules"/>
+    </element>
+  </define>
+  <define name="attlist.format" combine="interleave">
+    <attribute name="name"/>
+  </define>
+  <!--
+    'format' is the root element containing the whole format specification
+    file.  The attribute 'name' specifies the name of the format
+  -->
+  <define name="options">
+    <element name="options">
+      <ref name="attlist.options"/>
+      <ref name="largeblocks"/>
+      <ref name="input"/>
+      <ref name="output"/>
+      <ref name="tag-name"/>
+      <ref name="escape-chars"/>
+      <ref name="space-chars"/>
+      <ref name="case-sensitive"/>
+    </element>
+  </define>
+  <define name="attlist.options" combine="interleave">
+    <empty/>
+  </define>
+  <!-- General options of the format -->
+  <define name="largeblocks">
+    <element name="largeblocks">
+      <ref name="attlist.largeblocks"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.largeblocks" combine="interleave">
+    <attribute name="size"/>
+  </define>
+  <!--
+    The attribute size is used to define the maximal size in bytes of
+    inline format blocks
+  -->
+  <define name="input">
+    <element name="input">
+      <ref name="attlist.input"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.input" combine="interleave">
+    <optional>
+      <attribute name="zip-path"/>
+    </optional>
+  </define>
+  <define name="attlist.input" combine="interleave">
+    <attribute name="encoding"/>
+  </define>
+  <!-- Reserved for future extensions -->
+  <define name="output">
+    <element name="output">
+      <ref name="attlist.output"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.output" combine="interleave">
+    <optional>
+      <attribute name="zip-path"/>
+    </optional>
+  </define>
+  <define name="attlist.output" combine="interleave">
+    <attribute name="encoding"/>
+  </define>
+  <!-- Reserved for future extensions -->
+  <define name="tag-name">
+    <element name="tag-name">
+      <ref name="attlist.tag-name"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.tag-name" combine="interleave">
+    <attribute name="regexp"/>
+  </define>
+  <!--
+    The attribute regexp defines (whith a _flex_ regular expression) how 
+    take a tag name from a whole tag. '\'
+  -->
+  <define name="escape-chars">
+    <element name="escape-chars">
+      <ref name="attlist.escape-chars"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.escape-chars" combine="interleave">
+    <attribute name="regexp"/>
+  </define>
+  <!--
+    The attribute regexp defines (whith a _flex_ regular expression) the
+    set of characters to be escaped with preceding a backslash '\'
+  -->
+  <define name="space-chars">
+    <element name="space-chars">
+      <ref name="attlist.space-chars"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.space-chars" combine="interleave">
+    <attribute name="regexp"/>
+  </define>
+  <!--
+    Define the space characters (in regexp) with a _flex_ regular 
+    expression
+  -->
+  <define name="case-sensitive">
+    <element name="case-sensitive">
+      <ref name="attlist.case-sensitive"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.case-sensitive" combine="interleave">
+    <attribute name="value">
+      <choice>
+        <value>yes</value>
+        <value>no</value>
+      </choice>
+    </attribute>
+  </define>
+  <!--
+    The attribute 'value' is set to 'yes' if the case is relevant in the 
+    specification of the format.  Otherwise is set to 'no'
+  -->
+  <define name="rules">
+    <element name="rules">
+      <ref name="attlist.rules"/>
+      <oneOrMore>
+        <choice>
+          <ref name="format-rule"/>
+          <ref name="replacement-rule"/>
+        </choice>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.rules" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    Group the rules of processing format and the rules of substitute 
+    expressions by characters that are part of the text
+  -->
+  <define name="format-rule">
+    <element name="format-rule">
+      <ref name="attlist.format-rule"/>
+      <choice>
+        <ref name="tag"/>
+        <group>
+          <ref name="begin"/>
+          <ref name="end"/>
+        </group>
+      </choice>
+    </element>
+  </define>
+  <define name="attlist.format-rule" combine="interleave">
+    <optional>
+      <attribute name="type">
+        <choice>
+          <value>comment</value>
+          <value>empty</value>
+          <value>open</value>
+          <value>close</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <define name="attlist.format-rule" combine="interleave">
+    <optional>
+      <attribute name="eos">
+        <choice>
+          <value>yes</value>
+          <value>no</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <define name="attlist.format-rule" combine="interleave">
+    <attribute name="priority"/>
+  </define>
+  <!--
+    Format rule parent element.  It may include a 'tag' element or
+    a couple of elements 'begin', 'end'.  In the first case, this element is 
+    considered to be part of the format.  In the second case, the begin and 
+    the end element are considered to enclosing format.  The attribute
+    'eos' (end of sentence) is set to 'yes' if that rule defines a dot in
+    the text being processed (is no by default).  The attribute 'priority' 
+    marks the order of precedence of the rule
+  -->
+  <define name="tag">
+    <element name="tag">
+      <ref name="attlist.tag"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.tag" combine="interleave">
+    <attribute name="regexp"/>
+  </define>
+  <!--
+    Define an element that is part of the format by the pattern specified
+    as a value for the regexp attribute
+  -->
+  <define name="begin">
+    <element name="begin">
+      <ref name="attlist.begin"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.begin" combine="interleave">
+    <attribute name="regexp"/>
+  </define>
+  <!--
+    The attribute 'regexp' is the regular expression that detects the
+    begining delimiter of a block of format
+  -->
+  <define name="end">
+    <element name="end">
+      <ref name="attlist.end"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.end" combine="interleave">
+    <attribute name="regexp"/>
+  </define>
+  <!--
+    The attribute 'regexp' is the regular expression that detects the
+    ending delimiter of a block of format
+  -->
+  <define name="replacement-rule">
+    <element name="replacement-rule">
+      <ref name="attlist.replacement-rule"/>
+      <oneOrMore>
+        <ref name="replace"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.replacement-rule" combine="interleave">
+    <attribute name="regexp"/>
+  </define>
+  <!--
+    Root element for a replacement rule.  The attribute 'regexp' is the
+    general expression to detect the elements to replace
+  -->
+  <define name="replace">
+    <element name="replace">
+      <ref name="attlist.replace"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.replace" combine="interleave">
+    <attribute name="source"/>
+  </define>
+  <define name="attlist.replace" combine="interleave">
+    <attribute name="target"/>
+  </define>
+  <define name="attlist.replace" combine="interleave">
+    <optional>
+      <attribute name="prefer">
+        <choice>
+          <value>yes</value>
+          <value>no</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <start>
+    <choice>
+      <ref name="format"/>
+    </choice>
+  </start>
+</grammar>
+<!--
+  Replacement rule.  The 'source' is a string of one or more characters.
+  The 'target' MUST be a single character.  The 'prefer' attribute, when 
+  set to 'yes' defines the preferred reverse translation of the 
+  replacement.
+-->
Index: branches/apertium-tagger/apertium2/apertium/interchunk.rng
===================================================================
--- branches/apertium-tagger/apertium2/apertium/interchunk.rng	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/interchunk.rng	(revision 69632)
@@ -0,0 +1,971 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+  Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+  
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+  
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+  
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+  
+   Draft of DTD for the structural transfer rule files 
+  
+   Sergio Ortiz, Gema Ram�rez-S�nchez, Mireia Ginest�, Mikel L. Forcada, 
+   2005.07.29. 
+-->
+<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
+  <define name="condition">
+    <choice>
+      <ref name="and"/>
+      <ref name="or"/>
+      <ref name="not"/>
+      <ref name="equal"/>
+      <ref name="begins-with"/>
+      <ref name="begins-with-list"/>
+      <ref name="ends-with"/>
+      <ref name="ends-with-list"/>
+      <ref name="contains-substring"/>
+      <ref name="in"/>
+    </choice>
+  </define>
+  <define name="container">
+    <choice>
+      <ref name="var"/>
+      <ref name="clip"/>
+    </choice>
+  </define>
+  <define name="sentence">
+    <choice>
+      <ref name="let"/>
+      <ref name="out"/>
+      <ref name="choose"/>
+      <ref name="modify-case"/>
+      <ref name="call-macro"/>
+      <ref name="append"/>
+    </choice>
+  </define>
+  <define name="value">
+    <choice>
+      <ref name="b"/>
+      <ref name="clip"/>
+      <ref name="lit"/>
+      <ref name="lit-tag"/>
+      <ref name="var"/>
+      <ref name="get-case-from"/>
+      <ref name="case-of"/>
+      <ref name="concat"/>
+      <ref name="chunk"/>
+    </choice>
+  </define>
+  <define name="stringvalue">
+    <choice>
+      <ref name="clip"/>
+      <ref name="lit"/>
+      <ref name="var"/>
+      <ref name="get-case-from"/>
+      <ref name="case-of"/>
+    </choice>
+  </define>
+  <define name="interchunk">
+    <element name="interchunk">
+      <ref name="attlist.interchunk"/>
+      <ref name="section-def-cats"/>
+      <ref name="section-def-attrs"/>
+      <ref name="section-def-vars"/>
+      <optional>
+        <ref name="section-def-lists"/>
+      </optional>
+      <optional>
+        <ref name="section-def-macros"/>
+      </optional>
+      <ref name="section-rules"/>
+    </element>
+  </define>
+  <define name="attlist.interchunk" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    'interchunk' is the root element containing the whole structural
+    interchunk rule file.  
+  -->
+  <define name="section-def-cats">
+    <element name="section-def-cats">
+      <ref name="attlist.section-def-cats"/>
+      <oneOrMore>
+        <ref name="def-cat"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-cats" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+         The 'def-cats' section defines the categories used to build the
+    patterns used in rules
+  -->
+  <define name="def-cat">
+    <element name="def-cat">
+      <ref name="attlist.def-cat"/>
+      <oneOrMore>
+        <ref name="cat-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-cat" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Each 'def-cat' defines one category in terms of a list of
+    category items and has a unique name 'n', which is mandatory
+  -->
+  <define name="cat-item">
+    <element name="cat-item">
+      <ref name="attlist.cat-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.cat-item" combine="interleave">
+    <optional>
+      <attribute name="lemma"/>
+    </optional>
+    <attribute name="tags"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+         Each 'cat-item' (category item) represents a set of lexical forms
+    and has a mandatory attribute 'tags' whose value is a sequence of
+    dot-separated tag names; this sequence is a subsequence of the
+    tag sequence defining each possible lexical form. For example,
+    tags="n.f" would match all lexical forms containing this tag
+    sequence, such as "^casa<n><f><pl>$".
+    
+    In addition, an optional attribute, "lemma", may be used to
+    define lexical forms having a particular substring in their lemma
+  -->
+  <define name="section-def-attrs">
+    <element name="section-def-attrs">
+      <ref name="attlist.section-def-attrs"/>
+      <oneOrMore>
+        <ref name="def-attr"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-attrs" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    The 'def-attrs' section defines the attributes that will be
+    identified in matched lexical forms 
+  -->
+  <define name="def-attr">
+    <element name="def-attr">
+      <ref name="attlist.def-attr"/>
+      <oneOrMore>
+        <ref name="attr-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-attr" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Each def-attr defines one attribute in terms of a list of
+    attribute items and has a mandatory unique name n 
+  -->
+  <define name="attr-item">
+    <element name="attr-item">
+      <ref name="attlist.attr-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.attr-item" combine="interleave">
+    <optional>
+      <attribute name="tags"/>
+    </optional>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Each 'attr-item' specifies a subsequence of the tags in
+    that lexical form (attribute 'tags')
+  -->
+  <define name="section-def-vars">
+    <element name="section-def-vars">
+      <ref name="attlist.section-def-vars"/>
+      <oneOrMore>
+        <ref name="def-var"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-vars" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    The 'def-vars' section defines the global variables
+    that will be used to transfer information between rules
+  -->
+  <define name="def-var">
+    <element name="def-var">
+      <ref name="attlist.def-var"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.def-var" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+    <optional>
+      <attribute name="v"/>
+    </optional>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    The definition of a global variable has a mandatory unique name 'n' that
+    will be used to refer to it. A value of initialization can also be specified
+    by means the 'v' attribute.  The default value of the initialization is the
+    empty string.
+  -->
+  <define name="section-def-lists">
+    <element name="section-def-lists">
+      <ref name="attlist.section-def-lists"/>
+      <oneOrMore>
+        <ref name="def-list"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-lists" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Element 'section-def-lists' encloses a set of list definitions -->
+  <define name="def-list">
+    <element name="def-list">
+      <ref name="attlist.def-list"/>
+      <oneOrMore>
+        <ref name="list-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-list" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    The 'def-list' element defines a named list to search with the 'in' 
+    element.  Attribute 'n' sets the name of the list
+  -->
+  <define name="list-item">
+    <element name="list-item">
+      <ref name="attlist.list-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.list-item" combine="interleave">
+    <attribute name="v"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Attribute 'v' of 'list-item' element contains the value to be added to 
+    the list being defined     
+  -->
+  <define name="section-def-macros">
+    <element name="section-def-macros">
+      <ref name="attlist.section-def-macros"/>
+      <oneOrMore>
+        <ref name="def-macro"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-macros" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    
+    The 'def-macros' section defines macros containing portions of
+    code frequently used in the action part of rules
+    
+  -->
+  <define name="def-macro">
+    <element name="def-macro">
+      <ref name="attlist.def-macro"/>
+      <oneOrMore>
+        <ref name="sentence"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-macro" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+  </define>
+  <define name="attlist.def-macro" combine="interleave">
+    <attribute name="npar"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Macro definition:
+    
+    A macro has a mandatory name (the value of 'n'), a number of parameters
+    (the value of 'npar') and a body containing arguments and statements.  
+  -->
+  <define name="section-rules">
+    <element name="section-rules">
+      <ref name="attlist.section-rules"/>
+      <oneOrMore>
+        <ref name="rule"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-rules" combine="interleave">
+    <empty/>
+  </define>
+  <!-- The rules section contains a sequence of one or more rules -->
+  <define name="rule">
+    <element name="rule">
+      <ref name="attlist.rule"/>
+      <ref name="pattern"/>
+      <ref name="action"/>
+    </element>
+  </define>
+  <define name="attlist.rule" combine="interleave">
+    <optional>
+      <attribute name="comment"/>
+    </optional>
+  </define>
+  <!--
+    Each rule has a pattern and an action 
+    * attribute 'comment' allows to put in comments about the purpose of
+      the rule being defined
+  -->
+  <define name="pattern">
+    <element name="pattern">
+      <ref name="attlist.pattern"/>
+      <oneOrMore>
+        <ref name="pattern-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.pattern" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    The pattern is specified in terms of pattern items, each one
+    representing a lexical form in the matched pattern 
+  -->
+  <define name="pattern-item">
+    <element name="pattern-item">
+      <ref name="attlist.pattern-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.pattern-item" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!-- Each attribute to be activated is referred to by its name in the def-cats section -->
+  <define name="action">
+    <element name="action">
+      <ref name="attlist.action"/>
+      <zeroOrMore>
+        <ref name="sentence"/>
+      </zeroOrMore>
+    </element>
+  </define>
+  <define name="attlist.action" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!-- Encloses the procedural part of a rule -->
+  <define name="choose">
+    <element name="choose">
+      <ref name="attlist.choose"/>
+      <oneOrMore>
+        <ref name="when"/>
+      </oneOrMore>
+      <optional>
+        <ref name="otherwise"/>
+      </optional>
+    </element>
+  </define>
+  <define name="attlist.choose" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    The choose statement is a selection statement (similar to a case
+    statement) composed of one or more tested cases and an optional
+    otherwise 
+  -->
+  <define name="when">
+    <element name="when">
+      <ref name="attlist.when"/>
+      <ref name="test"/>
+      <zeroOrMore>
+        <ref name="sentence"/>
+      </zeroOrMore>
+    </element>
+  </define>
+  <define name="attlist.when" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!-- Each tested case is a block of zero or more statements -->
+  <define name="otherwise">
+    <element name="otherwise">
+      <ref name="attlist.otherwise"/>
+      <oneOrMore>
+        <ref name="sentence"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.otherwise" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!-- The otherwise case is also a block of one or more statements -->
+  <define name="test">
+    <element name="test">
+      <ref name="attlist.test"/>
+      <ref name="condition"/>
+    </element>
+  </define>
+  <define name="attlist.test" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    The test in a tested case may be a conjunction, a disjunction, or
+    a negation of simpler tests, as well as a simple equality test
+  -->
+  <define name="and">
+    <element name="and">
+      <ref name="attlist.and"/>
+      <ref name="condition"/>
+      <oneOrMore>
+        <ref name="condition"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.and" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Each conjuntion test contains two or more simpler tests -->
+  <define name="or">
+    <element name="or">
+      <ref name="attlist.or"/>
+      <ref name="condition"/>
+      <oneOrMore>
+        <ref name="condition"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.or" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Each disjunction test contains two or more simpler tests -->
+  <define name="not">
+    <element name="not">
+      <ref name="attlist.not"/>
+      <ref name="condition"/>
+    </element>
+  </define>
+  <define name="attlist.not" combine="interleave">
+    <empty/>
+  </define>
+  <!-- The negation of a simpler test is a test itself -->
+  <define name="equal">
+    <element name="equal">
+      <ref name="attlist.equal"/>
+      <ref name="value"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.equal" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    The simplest test is an equality test. The right part and the
+    left part of the equality may both be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="begins-with">
+    <element name="begins-with">
+      <ref name="attlist.begins-with"/>
+      <ref name="value"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.begins-with" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part at the beginning.
+    Both parts of the test may both be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="ends-with">
+    <element name="ends-with">
+      <ref name="attlist.ends-with"/>
+      <ref name="value"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.ends-with" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part at the end.
+    Both parts of the test may both be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="begins-with-list">
+    <element name="begins-with-list">
+      <ref name="attlist.begins-with-list"/>
+      <ref name="value"/>
+      <ref name="list"/>
+    </element>
+  </define>
+  <define name="attlist.begins-with-list" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part at the beginning.
+    First parts of the test may be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section. The second part
+    must be always a list.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="ends-with-list">
+    <element name="ends-with-list">
+      <ref name="attlist.ends-with-list"/>
+      <ref name="value"/>
+      <ref name="list"/>
+    </element>
+  </define>
+  <define name="attlist.ends-with-list" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part at the end.
+    First parts of the test may be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section. The second part
+    must be always a list.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="contains-substring">
+    <element name="contains-substring">
+      <ref name="attlist.contains-substring"/>
+      <ref name="value"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.contains-substring" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part.
+    Both parts of the test may both be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="in">
+    <element name="in">
+      <ref name="attlist.in"/>
+      <ref name="value"/>
+      <ref name="list"/>
+    </element>
+  </define>
+  <define name="attlist.in" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    'in' performs a search of a value in a list.  If 'caseless' is set to yes,
+    this search is performed without attending to the case
+  -->
+  <define name="list">
+    <element name="list">
+      <ref name="attlist.list"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.list" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!--
+    'list' refers, with the name in attribute 'n', a list defined before in
+    the 'section-def-list' section
+  -->
+  <define name="let">
+    <element name="let">
+      <ref name="attlist.let"/>
+      <ref name="container"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.let" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    An assignment statement ('let') assigns the value of a clip (see
+    below), a literal string ('lit'), a literal tag('lit-tag') or the 
+    value of a global variable ('var') to either a global variable ('var') 
+    or a clip
+  -->
+  <define name="append">
+    <element name="append">
+      <ref name="attlist.append"/>
+      <oneOrMore>
+        <ref name="value"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.append" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!--
+    This instruction appends the value of a clip (see
+    below), a literal string ('lit'), a literal tag('lit-tag') or the 
+    value of a global variable ('var') to either a global variable ('var') 
+    or a clip, identified by the "n" attribute
+  -->
+  <define name="out">
+    <element name="out">
+      <ref name="attlist.out"/>
+      <oneOrMore>
+        <choice>
+          <ref name="b"/>
+          <ref name="chunk"/>
+          <ref name="var"/>
+        </choice>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.out" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!-- 'out' is an output statement; it may output blanks or chunks -->
+  <define name="modify-case">
+    <element name="modify-case">
+      <ref name="attlist.modify-case"/>
+      <ref name="container"/>
+      <ref name="stringvalue"/>
+    </element>
+  </define>
+  <define name="attlist.modify-case" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    The first argument of 'modify-case' copy the case of the second 
+    argument.
+  -->
+  <define name="call-macro">
+    <element name="call-macro">
+      <ref name="attlist.call-macro"/>
+      <zeroOrMore>
+        <ref name="with-param"/>
+      </zeroOrMore>
+    </element>
+  </define>
+  <define name="attlist.call-macro" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!--
+    A macro may be called anywhere by name with one or more
+    arguments
+  -->
+  <define name="with-param">
+    <element name="with-param">
+      <ref name="attlist.with-param"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.with-param" combine="interleave">
+    <attribute name="pos"/>
+  </define>
+  <!--
+    The attribute pos in each argument is used to refer to a lexical
+    form in the current rule. For example, if a 2-parameter macro
+    has been defined to perform noun-adjective agreement operations,
+    it may be used with arguments 1 and 2 in a noun-adjective rule,
+    with arguments 2, 3 and 1 in a determiner-noun-adjective rule, with
+    arguments 1 and 3 in a noun-adverb-adjective rule, and with
+    arguments 2 and 1 in an adjective-noun rule 
+  -->
+  <define name="clip">
+    <element name="clip">
+      <ref name="attlist.clip"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.clip" combine="interleave">
+    <attribute name="pos"/>
+    <attribute name="part"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    A 'clip' is a substring of a source-language or target-language
+    lexical form, extracted according to an attribute:
+    
+    * 'pos' is an index (1, 2, 3...) used to select a lexical form
+       inside the rule;
+    
+    * the value of 'part' is the name of an attribute defined in
+      def-attrs, but may take also the values 'lem' (referring to
+      the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+      (lemma queue) and 'whole' (referring to the whole lexical form).
+    
+  -->
+  <define name="lit">
+    <element name="lit">
+      <ref name="attlist.lit"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.lit" combine="interleave">
+    <attribute name="v"/>
+  </define>
+  <!--
+    A literal string value: the value of the literal is the value of
+    the 'v' attribute
+  -->
+  <define name="lit-tag">
+    <element name="lit-tag">
+      <ref name="attlist.lit-tag"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.lit-tag" combine="interleave">
+    <attribute name="v"/>
+  </define>
+  <!--
+    A literal string value: the value of the literal is the value of
+    the 'v' attribute
+  -->
+  <define name="var">
+    <element name="var">
+      <ref name="attlist.var"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.var" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!--
+    Each 'var' is a variable identifier: the attribute n is the name
+    of the variable. When it is in an 'out', a 'test', or the right
+    part of a 'let', it represents the value of the variable; when in
+    the left part of a 'let' it represents the reference of the
+    variable. 
+  -->
+  <define name="get-case-from">
+    <element name="get-case-from">
+      <ref name="attlist.get-case-from"/>
+      <choice>
+        <ref name="clip"/>
+        <ref name="lit"/>
+        <ref name="var"/>
+      </choice>
+    </element>
+  </define>
+  <define name="attlist.get-case-from" combine="interleave">
+    <attribute name="pos"/>
+  </define>
+  <!--
+    Atenci�n, falta modificar todos los comentarios donde intervenga
+    get-case-from
+  -->
+  <define name="case-of">
+    <element name="case-of">
+      <ref name="attlist.case-of"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.case-of" combine="interleave">
+    <attribute name="pos"/>
+    <attribute name="part"/>
+  </define>
+  <!--
+    A 'case-of' is a value representing the case of a "clip".  This value 
+    will be "aa" (all lowercase), "Aa" (first uppercase) and "AA",
+    (all uppercase).
+    
+    * 'pos' is an index (1, 2, 3...) used to select a lexical form
+       inside the rule;
+    
+    * the value of 'part' is the name of an attribute defined in
+      def-attrs, but may take also the values 'lem' (referring to
+      the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+      (lemma queue) and 'whole' (referring to the whole lexical form).
+  -->
+  <define name="concat">
+    <element name="concat">
+      <ref name="attlist.concat"/>
+      <oneOrMore>
+        <ref name="value"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.concat" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Concatenates a sequence of values -->
+  <define name="chunk">
+    <element name="chunk">
+      <ref name="attlist.chunk"/>
+      <oneOrMore>
+        <ref name="value"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.chunk" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Encloses a chunk      -->
+  <define name="pseudolemma">
+    <element name="pseudolemma">
+      <ref name="attlist.pseudolemma"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.pseudolemma" combine="interleave">
+    <empty/>
+  </define>
+  <define name="b">
+    <element name="b">
+      <ref name="attlist.b"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.b" combine="interleave">
+    <optional>
+      <attribute name="pos"/>
+    </optional>
+  </define>
+  <start>
+    <choice>
+      <ref name="interchunk"/>
+      <ref name="pseudolemma"/>
+    </choice>
+  </start>
+</grammar>
+<!--
+  'b' is a [super]blanks item, indexed by pos; for example, a 'b'
+  with pos="2" refers to the [super]blanks (including format data
+  encapsulated by the de-formatter) between lexical form 2 and
+  lexical form 3. Managing [super]blanks explicitly allows for the
+  correct placement of format when the result of structural
+  transfer has more or less lexical items than the original or has
+  been reordered in some way.  If attribute "pos" is not specified, then
+  a single blank (ASCII 32) is generated.
+-->
Index: branches/apertium-tagger/apertium2/apertium/postchunk.rng
===================================================================
--- branches/apertium-tagger/apertium2/apertium/postchunk.rng	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/postchunk.rng	(revision 69632)
@@ -0,0 +1,971 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+  Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+  
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+  
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+  
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+  
+   Draft of DTD for the structural transfer rule files 
+  
+   Sergio Ortiz, Gema Ram�rez-S�nchez, Mireia Ginest�, Mikel L. Forcada, 
+   2005.07.29. 
+-->
+<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
+  <define name="condition">
+    <choice>
+      <ref name="and"/>
+      <ref name="or"/>
+      <ref name="not"/>
+      <ref name="equal"/>
+      <ref name="begins-with"/>
+      <ref name="begins-with-list"/>
+      <ref name="ends-with"/>
+      <ref name="ends-with-list"/>
+      <ref name="contains-substring"/>
+      <ref name="in"/>
+    </choice>
+  </define>
+  <define name="container">
+    <choice>
+      <ref name="var"/>
+      <ref name="clip"/>
+    </choice>
+  </define>
+  <define name="sentence">
+    <choice>
+      <ref name="let"/>
+      <ref name="out"/>
+      <ref name="choose"/>
+      <ref name="modify-case"/>
+      <ref name="call-macro"/>
+      <ref name="append"/>
+    </choice>
+  </define>
+  <define name="value">
+    <choice>
+      <ref name="b"/>
+      <ref name="clip"/>
+      <ref name="lit"/>
+      <ref name="lit-tag"/>
+      <ref name="var"/>
+      <ref name="get-case-from"/>
+      <ref name="case-of"/>
+      <ref name="concat"/>
+      <ref name="lu-count"/>
+      <ref name="lu"/>
+      <ref name="mlu"/>
+    </choice>
+  </define>
+  <define name="stringvalue">
+    <choice>
+      <ref name="clip"/>
+      <ref name="lit"/>
+      <ref name="var"/>
+      <ref name="get-case-from"/>
+      <ref name="case-of"/>
+      <ref name="lu-count"/>
+    </choice>
+  </define>
+  <define name="postchunk">
+    <element name="postchunk">
+      <ref name="attlist.postchunk"/>
+      <ref name="section-def-cats"/>
+      <ref name="section-def-attrs"/>
+      <ref name="section-def-vars"/>
+      <optional>
+        <ref name="section-def-lists"/>
+      </optional>
+      <optional>
+        <ref name="section-def-macros"/>
+      </optional>
+      <ref name="section-rules"/>
+    </element>
+  </define>
+  <define name="attlist.postchunk" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    'postchunk' is the root element containing the whole structural
+    postchunk rule file.  
+  -->
+  <define name="section-def-cats">
+    <element name="section-def-cats">
+      <ref name="attlist.section-def-cats"/>
+      <oneOrMore>
+        <ref name="def-cat"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-cats" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+         The 'def-cats' section defines the categories used to build the
+    patterns used in rules
+  -->
+  <define name="def-cat">
+    <element name="def-cat">
+      <ref name="attlist.def-cat"/>
+      <oneOrMore>
+        <ref name="cat-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-cat" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Each 'def-cat' defines one category in terms of a list of
+    category items and has a unique name 'n', which is mandatory
+  -->
+  <define name="cat-item">
+    <element name="cat-item">
+      <ref name="attlist.cat-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.cat-item" combine="interleave">
+    <attribute name="name"/>
+  </define>
+  <!--
+    In addition, a required attribute, "name", is used to specify 
+    wich chunk name is detected by this cat-item
+  -->
+  <define name="section-def-attrs">
+    <element name="section-def-attrs">
+      <ref name="attlist.section-def-attrs"/>
+      <oneOrMore>
+        <ref name="def-attr"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-attrs" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    The 'def-attrs' section defines the attributes that will be
+    identified in matched lexical forms 
+  -->
+  <define name="def-attr">
+    <element name="def-attr">
+      <ref name="attlist.def-attr"/>
+      <oneOrMore>
+        <ref name="attr-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-attr" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Each def-attr defines one attribute in terms of a list of
+    attribute items and has a mandatory unique name n 
+  -->
+  <define name="attr-item">
+    <element name="attr-item">
+      <ref name="attlist.attr-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.attr-item" combine="interleave">
+    <optional>
+      <attribute name="tags"/>
+    </optional>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Each 'attr-item' specifies a subsequence of the tags in
+    that lexical form (attribute 'tags')
+  -->
+  <define name="section-def-vars">
+    <element name="section-def-vars">
+      <ref name="attlist.section-def-vars"/>
+      <oneOrMore>
+        <ref name="def-var"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-vars" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    The 'def-vars' section defines the global variables
+    that will be used to transfer information between rules
+  -->
+  <define name="def-var">
+    <element name="def-var">
+      <ref name="attlist.def-var"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.def-var" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+    <optional>
+      <attribute name="v"/>
+    </optional>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    The definition of a global variable has a mandatory unique name 'n' that
+    will be used to refer to it. A value of initialization can also be specified
+    by means the 'v' attribute.  The default value of the initialization is the
+    empty string.
+  -->
+  <define name="section-def-lists">
+    <element name="section-def-lists">
+      <ref name="attlist.section-def-lists"/>
+      <oneOrMore>
+        <ref name="def-list"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-lists" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Element 'section-def-lists' encloses a set of list definitions -->
+  <define name="def-list">
+    <element name="def-list">
+      <ref name="attlist.def-list"/>
+      <oneOrMore>
+        <ref name="list-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-list" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    The 'def-list' element defines a named list to search with the 'in' 
+    element.  Attribute 'n' sets the name of the list
+  -->
+  <define name="list-item">
+    <element name="list-item">
+      <ref name="attlist.list-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.list-item" combine="interleave">
+    <attribute name="v"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Attribute 'v' of 'list-item' element contains the value to be added to 
+    the list being defined     
+  -->
+  <define name="section-def-macros">
+    <element name="section-def-macros">
+      <ref name="attlist.section-def-macros"/>
+      <oneOrMore>
+        <ref name="def-macro"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-macros" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    
+    The 'def-macros' section defines macros containing portions of
+    code frequently used in the action part of rules
+    
+  -->
+  <define name="def-macro">
+    <element name="def-macro">
+      <ref name="attlist.def-macro"/>
+      <oneOrMore>
+        <ref name="sentence"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-macro" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+  </define>
+  <define name="attlist.def-macro" combine="interleave">
+    <attribute name="npar"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Macro definition:
+    
+    A macro has a mandatory name (the value of 'n'), a number of parameters
+    (the value of 'npar') and a body containing arguments and statements.  
+  -->
+  <define name="section-rules">
+    <element name="section-rules">
+      <ref name="attlist.section-rules"/>
+      <oneOrMore>
+        <ref name="rule"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-rules" combine="interleave">
+    <empty/>
+  </define>
+  <!-- The rules section contains a sequence of one or more rules -->
+  <define name="rule">
+    <element name="rule">
+      <ref name="attlist.rule"/>
+      <ref name="pattern"/>
+      <ref name="action"/>
+    </element>
+  </define>
+  <define name="attlist.rule" combine="interleave">
+    <optional>
+      <attribute name="comment"/>
+    </optional>
+  </define>
+  <!--
+    Each rule has a pattern and an action 
+    * Attribute 'comment' allows to include a comment with the rule
+  -->
+  <define name="pattern">
+    <element name="pattern">
+      <ref name="attlist.pattern"/>
+      <ref name="pattern-item"/>
+    </element>
+  </define>
+  <define name="attlist.pattern" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    The pattern is specified in terms of pattern items, each one
+    representing a lexical form in the matched pattern 
+  -->
+  <define name="pattern-item">
+    <element name="pattern-item">
+      <ref name="attlist.pattern-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.pattern-item" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!-- Each attribute to be activated is referred to by its name in the def-cats section -->
+  <define name="action">
+    <element name="action">
+      <ref name="attlist.action"/>
+      <zeroOrMore>
+        <ref name="sentence"/>
+      </zeroOrMore>
+    </element>
+  </define>
+  <define name="attlist.action" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!-- Encloses the procedural part of a rule -->
+  <define name="choose">
+    <element name="choose">
+      <ref name="attlist.choose"/>
+      <oneOrMore>
+        <ref name="when"/>
+      </oneOrMore>
+      <optional>
+        <ref name="otherwise"/>
+      </optional>
+    </element>
+  </define>
+  <define name="attlist.choose" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    The choose statement is a selection statement (similar to a case
+    statement) composed of one or more tested cases and an optional
+    otherwise 
+  -->
+  <define name="when">
+    <element name="when">
+      <ref name="attlist.when"/>
+      <ref name="test"/>
+      <zeroOrMore>
+        <ref name="sentence"/>
+      </zeroOrMore>
+    </element>
+  </define>
+  <define name="attlist.when" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!-- Each tested case is a block of zero or more statements -->
+  <define name="otherwise">
+    <element name="otherwise">
+      <ref name="attlist.otherwise"/>
+      <oneOrMore>
+        <ref name="sentence"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.otherwise" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!-- The otherwise case is also a block of one or more statements -->
+  <define name="test">
+    <element name="test">
+      <ref name="attlist.test"/>
+      <ref name="condition"/>
+    </element>
+  </define>
+  <define name="attlist.test" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    The test in a tested case may be a conjunction, a disjunction, or
+    a negation of simpler tests, as well as a simple equality test
+  -->
+  <define name="and">
+    <element name="and">
+      <ref name="attlist.and"/>
+      <ref name="condition"/>
+      <oneOrMore>
+        <ref name="condition"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.and" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Each conjuntion test contains two or more simpler tests -->
+  <define name="or">
+    <element name="or">
+      <ref name="attlist.or"/>
+      <ref name="condition"/>
+      <oneOrMore>
+        <ref name="condition"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.or" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Each disjunction test contains two or more simpler tests -->
+  <define name="not">
+    <element name="not">
+      <ref name="attlist.not"/>
+      <ref name="condition"/>
+    </element>
+  </define>
+  <define name="attlist.not" combine="interleave">
+    <empty/>
+  </define>
+  <!-- The negation of a simpler test is a test itself -->
+  <define name="equal">
+    <element name="equal">
+      <ref name="attlist.equal"/>
+      <ref name="value"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.equal" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    The simplest test is an equality test. The right part and the
+    left part of the equality may both be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="begins-with">
+    <element name="begins-with">
+      <ref name="attlist.begins-with"/>
+      <ref name="value"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.begins-with" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part at the beginning.
+    Both parts of the test may both be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="ends-with">
+    <element name="ends-with">
+      <ref name="attlist.ends-with"/>
+      <ref name="value"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.ends-with" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part at the end.
+    Both parts of the test may both be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="begins-with-list">
+    <element name="begins-with-list">
+      <ref name="attlist.begins-with-list"/>
+      <ref name="value"/>
+      <ref name="list"/>
+    </element>
+  </define>
+  <define name="attlist.begins-with-list" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part at the beginning.
+    First parts of the test may be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section. The second part
+    must be always a list.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="ends-with-list">
+    <element name="ends-with-list">
+      <ref name="attlist.ends-with-list"/>
+      <ref name="value"/>
+      <ref name="list"/>
+    </element>
+  </define>
+  <define name="attlist.ends-with-list" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part at the end.
+    First parts of the test may be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section. The second part
+    must be always a list.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="contains-substring">
+    <element name="contains-substring">
+      <ref name="attlist.contains-substring"/>
+      <ref name="value"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.contains-substring" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part.
+    Both parts of the test may both be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="in">
+    <element name="in">
+      <ref name="attlist.in"/>
+      <ref name="value"/>
+      <ref name="list"/>
+    </element>
+  </define>
+  <define name="attlist.in" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    'in' performs a search of a value in a list.  If 'caseless' is set to yes,
+    this search is performed without attending to the case
+  -->
+  <define name="list">
+    <element name="list">
+      <ref name="attlist.list"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.list" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!--
+    'list' refers, with the name in attribute 'n', a list defined before in
+    the 'section-def-list' section
+  -->
+  <define name="let">
+    <element name="let">
+      <ref name="attlist.let"/>
+      <ref name="container"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.let" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    An assignment statement ('let') assigns the value of a clip (see
+    below), a literal string ('lit'), a literal tag('lit-tag') or the 
+    value of a global variable ('var') to either a global variable ('var') 
+    or a clip
+  -->
+  <define name="append">
+    <element name="append">
+      <ref name="attlist.append"/>
+      <oneOrMore>
+        <ref name="value"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.append" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!--
+    This instruction appends the value of a clip (see
+    below), a literal string ('lit'), a literal tag('lit-tag') or the 
+    value of a global variable ('var') to either a global variable ('var') 
+    or a clip, identified by the "n" attribute
+  -->
+  <define name="out">
+    <element name="out">
+      <ref name="attlist.out"/>
+      <oneOrMore>
+        <choice>
+          <ref name="b"/>
+          <ref name="lu"/>
+          <ref name="mlu"/>
+          <ref name="var"/>
+        </choice>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.out" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!-- 'out' is an output statement; it may output blanks or chunks -->
+  <define name="modify-case">
+    <element name="modify-case">
+      <ref name="attlist.modify-case"/>
+      <ref name="container"/>
+      <ref name="stringvalue"/>
+    </element>
+  </define>
+  <define name="attlist.modify-case" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    The first argument of 'modify-case' copy the case of the second 
+    argument.
+  -->
+  <define name="call-macro">
+    <element name="call-macro">
+      <ref name="attlist.call-macro"/>
+      <zeroOrMore>
+        <ref name="with-param"/>
+      </zeroOrMore>
+    </element>
+  </define>
+  <define name="attlist.call-macro" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!--
+    A macro may be called anywhere by name with one or more
+    arguments
+  -->
+  <define name="with-param">
+    <element name="with-param">
+      <ref name="attlist.with-param"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.with-param" combine="interleave">
+    <attribute name="pos"/>
+  </define>
+  <!--
+    The attribute pos in each argument is used to refer to a lexical
+    form in the current rule. For example, if a 2-parameter macro
+    has been defined to perform noun-adjective agreement operations,
+    it may be used with arguments 1 and 2 in a noun-adjective rule,
+    with arguments 2, 3 and 1 in a determiner-noun-adjective rule, with
+    arguments 1 and 3 in a noun-adverb-adjective rule, and with
+    arguments 2 and 1 in an adjective-noun rule 
+  -->
+  <define name="clip">
+    <element name="clip">
+      <ref name="attlist.clip"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.clip" combine="interleave">
+    <attribute name="pos"/>
+    <attribute name="part"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    A 'clip' is a substring of a source-language or target-language
+    lexical form, extracted according to an attribute:
+    
+    * 'pos' is an index (1, 2, 3...) used to select a lexical form
+       inside the rule;
+    
+    * the value of 'part' is the name of an attribute defined in
+      def-attrs, but may take also the values 'lem' (referring to
+      the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+      (lemma queue) and 'whole' (referring to the whole lexical form).
+    
+  -->
+  <define name="lit">
+    <element name="lit">
+      <ref name="attlist.lit"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.lit" combine="interleave">
+    <attribute name="v"/>
+  </define>
+  <!--
+    A literal string value: the value of the literal is the value of
+    the 'v' attribute
+  -->
+  <define name="lit-tag">
+    <element name="lit-tag">
+      <ref name="attlist.lit-tag"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.lit-tag" combine="interleave">
+    <attribute name="v"/>
+  </define>
+  <!--
+    A literal string value: the value of the literal is the value of
+    the 'v' attribute
+  -->
+  <define name="var">
+    <element name="var">
+      <ref name="attlist.var"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.var" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!--
+    Each 'var' is a variable identifier: the attribute n is the name
+    of the variable. When it is in an 'out', a 'test', or the right
+    part of a 'let', it represents the value of the variable; when in
+    the left part of a 'let' it represents the reference of the
+    variable. 
+  -->
+  <define name="get-case-from">
+    <element name="get-case-from">
+      <ref name="attlist.get-case-from"/>
+      <choice>
+        <ref name="clip"/>
+        <ref name="lit"/>
+        <ref name="var"/>
+      </choice>
+    </element>
+  </define>
+  <define name="attlist.get-case-from" combine="interleave">
+    <attribute name="pos"/>
+  </define>
+  <!--
+    Atenci�n, falta modificar todos los comentarios donde intervenga
+    get-case-from
+  -->
+  <define name="case-of">
+    <element name="case-of">
+      <ref name="attlist.case-of"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.case-of" combine="interleave">
+    <attribute name="pos"/>
+    <attribute name="part"/>
+  </define>
+  <!--
+    A 'case-of' is a value representing the case of a "clip".  This value 
+    will be "aa" (all lowercase), "Aa" (first uppercase) and "AA",
+    (all uppercase).
+    
+    * 'pos' is an index (1, 2, 3...) used to select a lexical form
+       inside the rule;
+    
+    * the value of 'part' is the name of an attribute defined in
+      def-attrs, but may take also the values 'lem' (referring to
+      the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+      (lemma queue) and 'whole' (referring to the whole lexical form).
+  -->
+  <define name="concat">
+    <element name="concat">
+      <ref name="attlist.concat"/>
+      <oneOrMore>
+        <ref name="value"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.concat" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Concatenates a sequence of values -->
+  <define name="mlu">
+    <element name="mlu">
+      <ref name="attlist.mlu"/>
+      <oneOrMore>
+        <ref name="lu"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.mlu" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Encloses a multiword -->
+  <define name="lu">
+    <element name="lu">
+      <ref name="attlist.lu"/>
+      <oneOrMore>
+        <ref name="value"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.lu" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Encloses a word -->
+  <define name="b">
+    <element name="b">
+      <ref name="attlist.b"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.b" combine="interleave">
+    <optional>
+      <attribute name="pos"/>
+    </optional>
+  </define>
+  <!--
+    'b' is a [super]blanks item, indexed by pos; for example, a 'b'
+    with pos="2" refers to the [super]blanks (including format data
+    encapsulated by the de-formatter) between lexical form 2 and
+    lexical form 3. Managing [super]blanks explicitly allows for the
+    correct placement of format when the result of structural
+    transfer has more or less lexical items than the original or has
+    been reordered in some way.  If attribute "pos" is not specified, then
+    a single blank (ASCII 32) is generated.
+  -->
+  <define name="lu-count">
+    <element name="lu-count">
+      <ref name="attlist.lu-count"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.lu-count" combine="interleave">
+    <empty/>
+  </define>
+  <start>
+    <choice>
+      <ref name="postchunk"/>
+    </choice>
+  </start>
+</grammar>
+<!-- Number of lexical units (words inside the chunk) in the rule -->
Index: branches/apertium-tagger/apertium2/apertium/tagger.rng
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger.rng	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger.rng	(revision 69632)
@@ -0,0 +1,310 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+  Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+  
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+  
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+  
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+  
+    DTD for the tagset and the rules to enforce the state to state
+    transition probabilities used by the part-of-speech tagger. 
+    2005.07.29.
+-->
+<grammar xmlns="http://relaxng.org/ns/structure/1.0">
+  <define name="tagger">
+    <element name="tagger">
+      <ref name="attlist.tagger"/>
+      <ref name="tagset"/>
+      <optional>
+        <ref name="forbid"/>
+      </optional>
+      <optional>
+        <ref name="enforce-rules"/>
+      </optional>
+      <optional>
+        <ref name="preferences"/>
+      </optional>
+      <optional>
+        <ref name="discard-on-ambiguity"/>
+      </optional>
+    </element>
+  </define>
+  <define name="attlist.tagger" combine="interleave">
+    <attribute name="name"/>
+  </define>
+  <!--
+        'tagger' is the root element containing the whole tagset for a given
+    language specified through the mandatory attribute 'name'
+  -->
+  <define name="tagset">
+    <element name="tagset">
+      <ref name="attlist.tagset"/>
+      <oneOrMore>
+        <ref name="def-label"/>
+      </oneOrMore>
+      <zeroOrMore>
+        <ref name="def-mult"/>
+      </zeroOrMore>
+    </element>
+  </define>
+  <define name="attlist.tagset" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+        The 'tagset' section defines the correspondance between simple 
+    or multiple morphological categories defining a lexical form and the coarser 
+    ones with which the part-of-speech tagger works
+  -->
+  <define name="def-label">
+    <element name="def-label">
+      <ref name="attlist.def-label"/>
+      <oneOrMore>
+        <ref name="tags-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-label" combine="interleave">
+    <attribute name="name"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+    <optional>
+      <attribute name="closed"/>
+    </optional>
+  </define>
+  <!--
+        Each 'def-label' defines one coarse tag in terms of a list of fine tags 
+    and has a mandatory unique name. The optional attribute 'closed="true"' may be used
+    to specify if the defined fine tags belong to a closed list.
+    c is for comments and is ignored
+  -->
+  <define name="tags-item">
+    <element name="tags-item">
+      <ref name="attlist.tags-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.tags-item" combine="interleave">
+    <attribute name="tags"/>
+    <optional>
+      <attribute name="lemma"/>
+    </optional>
+  </define>
+  <!--
+        Each 'tags-item' may be a dot-separated subsequence of the morphological tags
+    corresponding to a coarse tag optionally in association with a given lemma 
+  -->
+  <define name="def-mult">
+    <element name="def-mult">
+      <ref name="attlist.def-mult"/>
+      <oneOrMore>
+        <ref name="sequence"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-mult" combine="interleave">
+    <attribute name="name"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+    <optional>
+      <attribute name="closed"/>
+    </optional>
+  </define>
+  <!--
+        Each 'def-mult' defines one coarse tag in terms of a sequence of coarse
+    tags previously defined as 'def-labels' or a sequence of fine tags. A mandatory 
+    name is required for each 'def-mult' which may also has an optional attribute 
+    'closed="true"' if it belongs to a closed list
+    c is for comments and is ignored
+  -->
+  <define name="sequence">
+    <element name="sequence">
+      <ref name="attlist.sequence"/>
+      <oneOrMore>
+        <choice>
+          <ref name="tags-item"/>
+          <ref name="label-item"/>
+        </choice>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.sequence" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+        Element 'sequence' encloses a set of tags or labels which defines 
+    a unit with more than one label
+  -->
+  <define name="label-item">
+    <element name="label-item">
+      <ref name="attlist.label-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.label-item" combine="interleave">
+    <attribute name="label"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+        Each 'label' of the 'label-item' correspond to a coarse tag previously 
+    defined as a 'def-label' by a name.
+    c is for comments and is ignored
+  -->
+  <define name="forbid">
+    <element name="forbid">
+      <ref name="attlist.forbid"/>
+      <oneOrMore>
+        <ref name="label-sequence"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.forbid" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+        Element 'forbid' contains sequences of morphological categories that are not 
+    allowed in a given language
+  -->
+  <define name="label-sequence">
+    <element name="label-sequence">
+      <ref name="attlist.label-sequence"/>
+      <oneOrMore>
+        <ref name="label-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.label-sequence" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+        Each 'label-sequence' is restricted to two 'label-items' 
+    c is for comments and is ignored
+  -->
+  <define name="enforce-rules">
+    <element name="enforce-rules">
+      <ref name="attlist.enforce-rules"/>
+      <oneOrMore>
+        <ref name="enforce-after"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.enforce-rules" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Element 'enforce-rules' defines sets of coarse tags that must follow specified ones -->
+  <define name="enforce-after">
+    <element name="enforce-after">
+      <ref name="attlist.enforce-after"/>
+      <ref name="label-set"/>
+    </element>
+  </define>
+  <define name="attlist.enforce-after" combine="interleave">
+    <attribute name="label"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+        Each 'enforce-after' encloses the set of coarse tags ('label-set') that must follow 
+    the one defined in 'label', as a mandatory attribute
+    c is for comments and is ignored
+  -->
+  <define name="label-set">
+    <element name="label-set">
+      <ref name="attlist.label-set"/>
+      <oneOrMore>
+        <ref name="label-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.label-set" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+        The set of 'label-items' enforced after a 'label' are enclosed inside element 'label-set'  
+    c is for comments and is ignored
+  -->
+  <define name="preferences">
+    <element name="preferences">
+      <ref name="attlist.preferences"/>
+      <oneOrMore>
+        <ref name="prefer"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.preferences" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+        Element 'preferences' allows to decide amongst two or more fine tag sequences 
+    which are grouped in the same coarse tag. 
+  -->
+  <define name="prefer">
+    <element name="prefer">
+      <ref name="attlist.prefer"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.prefer" combine="interleave">
+    <attribute name="tags"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+        Each 'prefer' element has a mandatory attribute 'tags' made of a sequence of fine tags 
+    c is for comments and is ignored
+  -->
+  <define name="discard-on-ambiguity">
+    <element name="discard-on-ambiguity">
+      <ref name="attlist.discard-on-ambiguity"/>
+      <oneOrMore>
+        <ref name="discard"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.discard-on-ambiguity" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    List of label-item or tags-item to be discarded when an ambiguity
+    occurs inside a word
+  -->
+  <define name="discard">
+    <element name="discard">
+      <ref name="attlist.discard"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.discard" combine="interleave">
+    <attribute name="tags"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <start>
+    <choice>
+      <ref name="tagger"/>
+    </choice>
+  </start>
+</grammar>
+<!--
+      Each 'discard' element has a mandatory attribute 'tags' made of a sequence of fine tags 
+  c is for comments and is ignored
+-->
Index: branches/apertium-tagger/apertium2/apertium/format.rnc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/format.rnc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/format.rnc	(revision 69632)
@@ -0,0 +1,111 @@
+# Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+# 
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+# 
+#    DTD for the format specification files
+#    Sergio Ortiz  2005.05.13
+
+format = element format { attlist.format, options, rules }
+attlist.format &= attribute name { text }
+# 'format' is the root element containing the whole format specification
+# file.  The attribute 'name' specifies the name of the format
+options =
+  element options {
+    attlist.options,
+    largeblocks,
+    input,
+    output,
+    tag-name,
+    escape-chars,
+    space-chars,
+    case-sensitive
+  }
+attlist.options &= empty
+# General options of the format 
+largeblocks = element largeblocks { attlist.largeblocks, empty }
+attlist.largeblocks &= attribute size { text }
+# The attribute size is used to define the maximal size in bytes of
+# inline format blocks
+input = element input { attlist.input, empty }
+attlist.input &= attribute zip-path { text }?
+attlist.input &= attribute encoding { text }
+# Reserved for future extensions
+output = element output { attlist.output, empty }
+attlist.output &= attribute zip-path { text }?
+attlist.output &= attribute encoding { text }
+# Reserved for future extensions
+tag-name = element tag-name { attlist.tag-name, empty }
+attlist.tag-name &= attribute regexp { text }
+# The attribute regexp defines (whith a _flex_ regular expression) how 
+# take a tag name from a whole tag. '\'
+escape-chars = element escape-chars { attlist.escape-chars, empty }
+attlist.escape-chars &= attribute regexp { text }
+# The attribute regexp defines (whith a _flex_ regular expression) the
+# set of characters to be escaped with preceding a backslash '\'
+space-chars = element space-chars { attlist.space-chars, empty }
+attlist.space-chars &= attribute regexp { text }
+# Define the space characters (in regexp) with a _flex_ regular 
+# expression
+case-sensitive =
+  element case-sensitive { attlist.case-sensitive, empty }
+attlist.case-sensitive &= attribute value { "yes" | "no" }
+# The attribute 'value' is set to 'yes' if the case is relevant in the 
+# specification of the format.  Otherwise is set to 'no'
+rules =
+  element rules { attlist.rules, (format-rule | replacement-rule)+ }
+attlist.rules &= empty
+# Group the rules of processing format and the rules of substitute 
+# expressions by characters that are part of the text
+format-rule =
+  element format-rule {
+    attlist.format-rule,
+    (tag | (begin, end))
+  }
+attlist.format-rule &=
+  attribute type { "comment" | "empty" | "open" | "close" }?
+attlist.format-rule &= attribute eos { "yes" | "no" }?
+attlist.format-rule &= attribute priority { text }
+# Format rule parent element.  It may include a 'tag' element or
+# a couple of elements 'begin', 'end'.  In the first case, this element is 
+# considered to be part of the format.  In the second case, the begin and 
+# the end element are considered to enclosing format.  The attribute
+# 'eos' (end of sentence) is set to 'yes' if that rule defines a dot in
+# the text being processed (is no by default).  The attribute 'priority' 
+# marks the order of precedence of the rule
+tag = element tag { attlist.tag, empty }
+attlist.tag &= attribute regexp { text }
+# Define an element that is part of the format by the pattern specified
+# as a value for the regexp attribute
+begin = element begin { attlist.begin, empty }
+attlist.begin &= attribute regexp { text }
+# The attribute 'regexp' is the regular expression that detects the
+# begining delimiter of a block of format
+end = element end { attlist.end, empty }
+attlist.end &= attribute regexp { text }
+# The attribute 'regexp' is the regular expression that detects the
+# ending delimiter of a block of format
+replacement-rule =
+  element replacement-rule { attlist.replacement-rule, replace+ }
+attlist.replacement-rule &= attribute regexp { text }
+# Root element for a replacement rule.  The attribute 'regexp' is the
+# general expression to detect the elements to replace
+replace = element replace { attlist.replace, empty }
+attlist.replace &= attribute source { text }
+attlist.replace &= attribute target { text }
+attlist.replace &= attribute prefer { "yes" | "no" }?
+start = format
+# Replacement rule.  The 'source' is a string of one or more characters.
+# The 'target' MUST be a single character.  The 'prefer' attribute, when 
+# set to 'yes' defines the preferred reverse translation of the 
+# replacement.
Index: branches/apertium-tagger/apertium2/apertium/interchunk.rnc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/interchunk.rnc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/interchunk.rnc	(revision 69632)
@@ -0,0 +1,353 @@
+# Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+# 
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+# 
+#  Draft of DTD for the structural transfer rule files 
+# 
+#  Sergio Ortiz, Gema Ramírez-Sánchez, Mireia Ginestí, Mikel L. Forcada, 
+#  2005.07.29. 
+
+condition =
+  and
+  | or
+  | not
+  | equal
+  | begins-with
+  | begins-with-list
+  | ends-with
+  | ends-with-list
+  | contains-substring
+  | in
+container = var | clip
+sentence = let | out | choose | modify-case | call-macro | append
+value =
+  b
+  | clip
+  | lit
+  | lit-tag
+  | var
+  | get-case-from
+  | case-of
+  | concat
+  | chunk
+stringvalue = clip | lit | var | get-case-from | case-of
+interchunk =
+  element interchunk {
+    attlist.interchunk,
+    section-def-cats,
+    section-def-attrs,
+    section-def-vars,
+    section-def-lists?,
+    section-def-macros?,
+    section-rules
+  }
+attlist.interchunk &= empty
+# 'interchunk' is the root element containing the whole structural
+# interchunk rule file.  
+section-def-cats =
+  element section-def-cats { attlist.section-def-cats, def-cat+ }
+attlist.section-def-cats &= empty
+#      The 'def-cats' section defines the categories used to build the
+# patterns used in rules
+def-cat = element def-cat { attlist.def-cat, cat-item+ }
+attlist.def-cat &=
+  attribute n { xsd:ID },
+  attribute c { text }?
+# Each 'def-cat' defines one category in terms of a list of
+# category items and has a unique name 'n', which is mandatory
+cat-item = element cat-item { attlist.cat-item, empty }
+attlist.cat-item &=
+  attribute lemma { text }?,
+  attribute tags { text },
+  attribute c { text }?
+#      Each 'cat-item' (category item) represents a set of lexical forms
+# and has a mandatory attribute 'tags' whose value is a sequence of
+# dot-separated tag names; this sequence is a subsequence of the
+# tag sequence defining each possible lexical form. For example,
+# tags="n.f" would match all lexical forms containing this tag
+# sequence, such as "^casa<n><f><pl>$".
+# 
+# In addition, an optional attribute, "lemma", may be used to
+# define lexical forms having a particular substring in their lemma
+section-def-attrs =
+  element section-def-attrs { attlist.section-def-attrs, def-attr+ }
+attlist.section-def-attrs &= empty
+# The 'def-attrs' section defines the attributes that will be
+# identified in matched lexical forms 
+def-attr = element def-attr { attlist.def-attr, attr-item+ }
+attlist.def-attr &=
+  attribute n { xsd:ID },
+  attribute c { text }?
+# Each def-attr defines one attribute in terms of a list of
+# attribute items and has a mandatory unique name n 
+attr-item = element attr-item { attlist.attr-item, empty }
+attlist.attr-item &=
+  attribute tags { text }?,
+  attribute c { text }?
+# Each 'attr-item' specifies a subsequence of the tags in
+# that lexical form (attribute 'tags')
+section-def-vars =
+  element section-def-vars { attlist.section-def-vars, def-var+ }
+attlist.section-def-vars &= empty
+# The 'def-vars' section defines the global variables
+# that will be used to transfer information between rules
+def-var = element def-var { attlist.def-var, empty }
+attlist.def-var &=
+  attribute n { xsd:ID },
+  attribute v { text }?,
+  attribute c { text }?
+# The definition of a global variable has a mandatory unique name 'n' that
+# will be used to refer to it. A value of initialization can also be specified
+# by means the 'v' attribute.  The default value of the initialization is the
+# empty string.
+section-def-lists =
+  element section-def-lists { attlist.section-def-lists, def-list+ }
+attlist.section-def-lists &= empty
+# Element 'section-def-lists' encloses a set of list definitions
+def-list = element def-list { attlist.def-list, list-item+ }
+attlist.def-list &=
+  attribute n { xsd:ID },
+  attribute c { text }?
+# The 'def-list' element defines a named list to search with the 'in' 
+# element.  Attribute 'n' sets the name of the list
+list-item = element list-item { attlist.list-item, empty }
+attlist.list-item &=
+  attribute v { text },
+  attribute c { text }?
+# Attribute 'v' of 'list-item' element contains the value to be added to 
+# the list being defined     
+section-def-macros =
+  element section-def-macros { attlist.section-def-macros, def-macro+ }
+attlist.section-def-macros &= empty
+# 
+# The 'def-macros' section defines macros containing portions of
+# code frequently used in the action part of rules
+#
+def-macro = element def-macro { attlist.def-macro, sentence+ }
+attlist.def-macro &= attribute n { xsd:ID }
+attlist.def-macro &=
+  attribute npar { text },
+  attribute c { text }?
+# Macro definition:
+# 
+# A macro has a mandatory name (the value of 'n'), a number of parameters
+# (the value of 'npar') and a body containing arguments and statements.  
+section-rules = element section-rules { attlist.section-rules, rule+ }
+attlist.section-rules &= empty
+# The rules section contains a sequence of one or more rules
+rule = element rule { attlist.rule, pattern, action }
+attlist.rule &= attribute comment { text }?
+# Each rule has a pattern and an action 
+# * attribute 'comment' allows to put in comments about the purpose of
+#   the rule being defined
+pattern = element pattern { attlist.pattern, pattern-item+ }
+attlist.pattern &= empty
+# The pattern is specified in terms of pattern items, each one
+# representing a lexical form in the matched pattern 
+pattern-item = element pattern-item { attlist.pattern-item, empty }
+attlist.pattern-item &= attribute n { xsd:IDREF }
+# Each attribute to be activated is referred to by its name in the def-cats section 
+action = element action { attlist.action, sentence* }
+attlist.action &= attribute c { text }?
+# Encloses the procedural part of a rule
+choose = element choose { attlist.choose, when+, otherwise? }
+attlist.choose &= attribute c { text }?
+# The choose statement is a selection statement (similar to a case
+# statement) composed of one or more tested cases and an optional
+# otherwise 
+when = element when { attlist.when, test, sentence* }
+attlist.when &= attribute c { text }?
+# Each tested case is a block of zero or more statements 
+otherwise = element otherwise { attlist.otherwise, sentence+ }
+attlist.otherwise &= attribute c { text }?
+# The otherwise case is also a block of one or more statements 
+test = element test { attlist.test, condition }
+attlist.test &= attribute c { text }?
+# The test in a tested case may be a conjunction, a disjunction, or
+# a negation of simpler tests, as well as a simple equality test
+and = element and { attlist.and, condition, condition+ }
+attlist.and &= empty
+# Each conjuntion test contains two or more simpler tests 
+or = element or { attlist.or, condition, condition+ }
+attlist.or &= empty
+# Each disjunction test contains two or more simpler tests 
+not = element not { attlist.not, condition }
+attlist.not &= empty
+# The negation of a simpler test is a test itself 
+equal = element equal { attlist.equal, value, value }
+attlist.equal &= attribute caseless { "no" | "yes" }?
+# The simplest test is an equality test. The right part and the
+# left part of the equality may both be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+begins-with = element begins-with { attlist.begins-with, value, value }
+attlist.begins-with &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part at the beginning.
+# Both parts of the test may both be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+ends-with = element ends-with { attlist.ends-with, value, value }
+attlist.ends-with &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part at the end.
+# Both parts of the test may both be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+begins-with-list =
+  element begins-with-list { attlist.begins-with-list, value, \list }
+attlist.begins-with-list &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part at the beginning.
+# First parts of the test may be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section. The second part
+# must be always a list.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+ends-with-list =
+  element ends-with-list { attlist.ends-with-list, value, \list }
+attlist.ends-with-list &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part at the end.
+# First parts of the test may be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section. The second part
+# must be always a list.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+contains-substring =
+  element contains-substring {
+    attlist.contains-substring, value, value
+  }
+attlist.contains-substring &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part.
+# Both parts of the test may both be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+in = element in { attlist.in, value, \list }
+attlist.in &= attribute caseless { "no" | "yes" }?
+# 'in' performs a search of a value in a list.  If 'caseless' is set to yes,
+# this search is performed without attending to the case
+\list = element list { attlist.list, empty }
+attlist.list &= attribute n { xsd:IDREF }
+# 'list' refers, with the name in attribute 'n', a list defined before in
+# the 'section-def-list' section
+let = element let { attlist.let, container, value }
+attlist.let &= empty
+# An assignment statement ('let') assigns the value of a clip (see
+# below), a literal string ('lit'), a literal tag('lit-tag') or the 
+# value of a global variable ('var') to either a global variable ('var') 
+# or a clip
+append = element append { attlist.append, value+ }
+attlist.append &= attribute n { xsd:IDREF }
+# This instruction appends the value of a clip (see
+# below), a literal string ('lit'), a literal tag('lit-tag') or the 
+# value of a global variable ('var') to either a global variable ('var') 
+# or a clip, identified by the "n" attribute
+out = element out { attlist.out, (b | chunk | var)+ }
+attlist.out &= attribute c { text }?
+# 'out' is an output statement; it may output blanks or chunks
+modify-case =
+  element modify-case { attlist.modify-case, container, stringvalue }
+attlist.modify-case &= empty
+# The first argument of 'modify-case' copy the case of the second 
+# argument.
+call-macro = element call-macro { attlist.call-macro, with-param* }
+attlist.call-macro &= attribute n { xsd:IDREF }
+# A macro may be called anywhere by name with one or more
+# arguments
+with-param = element with-param { attlist.with-param, empty }
+attlist.with-param &= attribute pos { text }
+# The attribute pos in each argument is used to refer to a lexical
+# form in the current rule. For example, if a 2-parameter macro
+# has been defined to perform noun-adjective agreement operations,
+# it may be used with arguments 1 and 2 in a noun-adjective rule,
+# with arguments 2, 3 and 1 in a determiner-noun-adjective rule, with
+# arguments 1 and 3 in a noun-adverb-adjective rule, and with
+# arguments 2 and 1 in an adjective-noun rule 
+clip = element clip { attlist.clip, empty }
+attlist.clip &=
+  attribute pos { text },
+  attribute part { text },
+  attribute c { text }?
+# A 'clip' is a substring of a source-language or target-language
+# lexical form, extracted according to an attribute:
+# 
+# * 'pos' is an index (1, 2, 3...) used to select a lexical form
+#    inside the rule;
+# 
+# * the value of 'part' is the name of an attribute defined in
+#   def-attrs, but may take also the values 'lem' (referring to
+#   the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+#   (lemma queue) and 'whole' (referring to the whole lexical form).
+#
+lit = element lit { attlist.lit, empty }
+attlist.lit &= attribute v { text }
+# A literal string value: the value of the literal is the value of
+# the 'v' attribute
+lit-tag = element lit-tag { attlist.lit-tag, empty }
+attlist.lit-tag &= attribute v { text }
+# A literal string value: the value of the literal is the value of
+# the 'v' attribute
+var = element var { attlist.var, empty }
+attlist.var &= attribute n { xsd:IDREF }
+# Each 'var' is a variable identifier: the attribute n is the name
+# of the variable. When it is in an 'out', a 'test', or the right
+# part of a 'let', it represents the value of the variable; when in
+# the left part of a 'let' it represents the reference of the
+# variable. 
+get-case-from =
+  element get-case-from { attlist.get-case-from, (clip | lit | var) }
+attlist.get-case-from &= attribute pos { text }
+# Atención, falta modificar todos los comentarios donde intervenga
+# get-case-from
+case-of = element case-of { attlist.case-of, empty }
+attlist.case-of &=
+  attribute pos { text },
+  attribute part { text }
+# A 'case-of' is a value representing the case of a "clip".  This value 
+# will be "aa" (all lowercase), "Aa" (first uppercase) and "AA",
+# (all uppercase).
+# 
+# * 'pos' is an index (1, 2, 3...) used to select a lexical form
+#    inside the rule;
+# 
+# * the value of 'part' is the name of an attribute defined in
+#   def-attrs, but may take also the values 'lem' (referring to
+#   the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+#   (lemma queue) and 'whole' (referring to the whole lexical form).
+concat = element concat { attlist.concat, value+ }
+attlist.concat &= empty
+# Concatenates a sequence of values
+chunk = element chunk { attlist.chunk, value+ }
+attlist.chunk &= empty
+# Encloses a chunk      
+pseudolemma = element pseudolemma { attlist.pseudolemma, value }
+attlist.pseudolemma &= empty
+b = element b { attlist.b, empty }
+attlist.b &= attribute pos { text }?
+start = interchunk | pseudolemma
+# 'b' is a [super]blanks item, indexed by pos; for example, a 'b'
+# with pos="2" refers to the [super]blanks (including format data
+# encapsulated by the de-formatter) between lexical form 2 and
+# lexical form 3. Managing [super]blanks explicitly allows for the
+# correct placement of format when the result of structural
+# transfer has more or less lexical items than the original or has
+# been reordered in some way.  If attribute "pos" is not specified, then
+# a single blank (ASCII 32) is generated.
Index: branches/apertium-tagger/apertium2/apertium/postchunk.rnc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/postchunk.rnc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/postchunk.rnc	(revision 69632)
@@ -0,0 +1,348 @@
+# Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+# 
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+# 
+#  Draft of DTD for the structural transfer rule files 
+# 
+#  Sergio Ortiz, Gema Ramírez-Sánchez, Mireia Ginestí, Mikel L. Forcada, 
+#  2005.07.29. 
+
+condition =
+  and
+  | or
+  | not
+  | equal
+  | begins-with
+  | begins-with-list
+  | ends-with
+  | ends-with-list
+  | contains-substring
+  | in
+container = var | clip
+sentence = let | out | choose | modify-case | call-macro | append
+value =
+  b
+  | clip
+  | lit
+  | lit-tag
+  | var
+  | get-case-from
+  | case-of
+  | concat
+  | lu-count
+  | lu
+  | mlu
+stringvalue = clip | lit | var | get-case-from | case-of | lu-count
+postchunk =
+  element postchunk {
+    attlist.postchunk,
+    section-def-cats,
+    section-def-attrs,
+    section-def-vars,
+    section-def-lists?,
+    section-def-macros?,
+    section-rules
+  }
+attlist.postchunk &= empty
+# 'postchunk' is the root element containing the whole structural
+# postchunk rule file.  
+section-def-cats =
+  element section-def-cats { attlist.section-def-cats, def-cat+ }
+attlist.section-def-cats &= empty
+#      The 'def-cats' section defines the categories used to build the
+# patterns used in rules
+def-cat = element def-cat { attlist.def-cat, cat-item+ }
+attlist.def-cat &=
+  attribute n { xsd:ID },
+  attribute c { text }?
+# Each 'def-cat' defines one category in terms of a list of
+# category items and has a unique name 'n', which is mandatory
+cat-item = element cat-item { attlist.cat-item, empty }
+attlist.cat-item &= attribute name { text }
+# In addition, a required attribute, "name", is used to specify 
+# wich chunk name is detected by this cat-item
+section-def-attrs =
+  element section-def-attrs { attlist.section-def-attrs, def-attr+ }
+attlist.section-def-attrs &= empty
+# The 'def-attrs' section defines the attributes that will be
+# identified in matched lexical forms 
+def-attr = element def-attr { attlist.def-attr, attr-item+ }
+attlist.def-attr &=
+  attribute n { xsd:ID },
+  attribute c { text }?
+# Each def-attr defines one attribute in terms of a list of
+# attribute items and has a mandatory unique name n 
+attr-item = element attr-item { attlist.attr-item, empty }
+attlist.attr-item &=
+  attribute tags { text }?,
+  attribute c { text }?
+# Each 'attr-item' specifies a subsequence of the tags in
+# that lexical form (attribute 'tags')
+section-def-vars =
+  element section-def-vars { attlist.section-def-vars, def-var+ }
+attlist.section-def-vars &= empty
+# The 'def-vars' section defines the global variables
+# that will be used to transfer information between rules
+def-var = element def-var { attlist.def-var, empty }
+attlist.def-var &=
+  attribute n { xsd:ID },
+  attribute v { text }?,
+  attribute c { text }?
+# The definition of a global variable has a mandatory unique name 'n' that
+# will be used to refer to it. A value of initialization can also be specified
+# by means the 'v' attribute.  The default value of the initialization is the
+# empty string.
+section-def-lists =
+  element section-def-lists { attlist.section-def-lists, def-list+ }
+attlist.section-def-lists &= empty
+# Element 'section-def-lists' encloses a set of list definitions
+def-list = element def-list { attlist.def-list, list-item+ }
+attlist.def-list &=
+  attribute n { xsd:ID },
+  attribute c { text }?
+# The 'def-list' element defines a named list to search with the 'in' 
+# element.  Attribute 'n' sets the name of the list
+list-item = element list-item { attlist.list-item, empty }
+attlist.list-item &=
+  attribute v { text },
+  attribute c { text }?
+# Attribute 'v' of 'list-item' element contains the value to be added to 
+# the list being defined     
+section-def-macros =
+  element section-def-macros { attlist.section-def-macros, def-macro+ }
+attlist.section-def-macros &= empty
+# 
+# The 'def-macros' section defines macros containing portions of
+# code frequently used in the action part of rules
+#
+def-macro = element def-macro { attlist.def-macro, sentence+ }
+attlist.def-macro &= attribute n { xsd:ID }
+attlist.def-macro &=
+  attribute npar { text },
+  attribute c { text }?
+# Macro definition:
+# 
+# A macro has a mandatory name (the value of 'n'), a number of parameters
+# (the value of 'npar') and a body containing arguments and statements.  
+section-rules = element section-rules { attlist.section-rules, rule+ }
+attlist.section-rules &= empty
+# The rules section contains a sequence of one or more rules
+rule = element rule { attlist.rule, pattern, action }
+attlist.rule &= attribute comment { text }?
+# Each rule has a pattern and an action 
+# * Attribute 'comment' allows to include a comment with the rule
+pattern = element pattern { attlist.pattern, pattern-item }
+attlist.pattern &= empty
+# The pattern is specified in terms of pattern items, each one
+# representing a lexical form in the matched pattern 
+pattern-item = element pattern-item { attlist.pattern-item, empty }
+attlist.pattern-item &= attribute n { xsd:IDREF }
+# Each attribute to be activated is referred to by its name in the def-cats section 
+action = element action { attlist.action, sentence* }
+attlist.action &= attribute c { text }?
+# Encloses the procedural part of a rule
+choose = element choose { attlist.choose, when+, otherwise? }
+attlist.choose &= attribute c { text }?
+# The choose statement is a selection statement (similar to a case
+# statement) composed of one or more tested cases and an optional
+# otherwise 
+when = element when { attlist.when, test, sentence* }
+attlist.when &= attribute c { text }?
+# Each tested case is a block of zero or more statements 
+otherwise = element otherwise { attlist.otherwise, sentence+ }
+attlist.otherwise &= attribute c { text }?
+# The otherwise case is also a block of one or more statements 
+test = element test { attlist.test, condition }
+attlist.test &= attribute c { text }?
+# The test in a tested case may be a conjunction, a disjunction, or
+# a negation of simpler tests, as well as a simple equality test
+and = element and { attlist.and, condition, condition+ }
+attlist.and &= empty
+# Each conjuntion test contains two or more simpler tests 
+or = element or { attlist.or, condition, condition+ }
+attlist.or &= empty
+# Each disjunction test contains two or more simpler tests 
+not = element not { attlist.not, condition }
+attlist.not &= empty
+# The negation of a simpler test is a test itself 
+equal = element equal { attlist.equal, value, value }
+attlist.equal &= attribute caseless { "no" | "yes" }?
+# The simplest test is an equality test. The right part and the
+# left part of the equality may both be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+begins-with = element begins-with { attlist.begins-with, value, value }
+attlist.begins-with &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part at the beginning.
+# Both parts of the test may both be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+ends-with = element ends-with { attlist.ends-with, value, value }
+attlist.ends-with &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part at the end.
+# Both parts of the test may both be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+begins-with-list =
+  element begins-with-list { attlist.begins-with-list, value, \list }
+attlist.begins-with-list &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part at the beginning.
+# First parts of the test may be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section. The second part
+# must be always a list.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+ends-with-list =
+  element ends-with-list { attlist.ends-with-list, value, \list }
+attlist.ends-with-list &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part at the end.
+# First parts of the test may be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section. The second part
+# must be always a list.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+contains-substring =
+  element contains-substring {
+    attlist.contains-substring, value, value
+  }
+attlist.contains-substring &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part.
+# Both parts of the test may both be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+in = element in { attlist.in, value, \list }
+attlist.in &= attribute caseless { "no" | "yes" }?
+# 'in' performs a search of a value in a list.  If 'caseless' is set to yes,
+# this search is performed without attending to the case
+\list = element list { attlist.list, empty }
+attlist.list &= attribute n { xsd:IDREF }
+# 'list' refers, with the name in attribute 'n', a list defined before in
+# the 'section-def-list' section
+let = element let { attlist.let, container, value }
+attlist.let &= empty
+# An assignment statement ('let') assigns the value of a clip (see
+# below), a literal string ('lit'), a literal tag('lit-tag') or the 
+# value of a global variable ('var') to either a global variable ('var') 
+# or a clip
+append = element append { attlist.append, value+ }
+attlist.append &= attribute n { xsd:IDREF }
+# This instruction appends the value of a clip (see
+# below), a literal string ('lit'), a literal tag('lit-tag') or the 
+# value of a global variable ('var') to either a global variable ('var') 
+# or a clip, identified by the "n" attribute
+out = element out { attlist.out, (b | lu | mlu | var)+ }
+attlist.out &= attribute c { text }?
+# 'out' is an output statement; it may output blanks or chunks
+modify-case =
+  element modify-case { attlist.modify-case, container, stringvalue }
+attlist.modify-case &= empty
+# The first argument of 'modify-case' copy the case of the second 
+# argument.
+call-macro = element call-macro { attlist.call-macro, with-param* }
+attlist.call-macro &= attribute n { xsd:IDREF }
+# A macro may be called anywhere by name with one or more
+# arguments
+with-param = element with-param { attlist.with-param, empty }
+attlist.with-param &= attribute pos { text }
+# The attribute pos in each argument is used to refer to a lexical
+# form in the current rule. For example, if a 2-parameter macro
+# has been defined to perform noun-adjective agreement operations,
+# it may be used with arguments 1 and 2 in a noun-adjective rule,
+# with arguments 2, 3 and 1 in a determiner-noun-adjective rule, with
+# arguments 1 and 3 in a noun-adverb-adjective rule, and with
+# arguments 2 and 1 in an adjective-noun rule 
+clip = element clip { attlist.clip, empty }
+attlist.clip &=
+  attribute pos { text },
+  attribute part { text },
+  attribute c { text }?
+# A 'clip' is a substring of a source-language or target-language
+# lexical form, extracted according to an attribute:
+# 
+# * 'pos' is an index (1, 2, 3...) used to select a lexical form
+#    inside the rule;
+# 
+# * the value of 'part' is the name of an attribute defined in
+#   def-attrs, but may take also the values 'lem' (referring to
+#   the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+#   (lemma queue) and 'whole' (referring to the whole lexical form).
+#
+lit = element lit { attlist.lit, empty }
+attlist.lit &= attribute v { text }
+# A literal string value: the value of the literal is the value of
+# the 'v' attribute
+lit-tag = element lit-tag { attlist.lit-tag, empty }
+attlist.lit-tag &= attribute v { text }
+# A literal string value: the value of the literal is the value of
+# the 'v' attribute
+var = element var { attlist.var, empty }
+attlist.var &= attribute n { xsd:IDREF }
+# Each 'var' is a variable identifier: the attribute n is the name
+# of the variable. When it is in an 'out', a 'test', or the right
+# part of a 'let', it represents the value of the variable; when in
+# the left part of a 'let' it represents the reference of the
+# variable. 
+get-case-from =
+  element get-case-from { attlist.get-case-from, (clip | lit | var) }
+attlist.get-case-from &= attribute pos { text }
+# Atención, falta modificar todos los comentarios donde intervenga
+# get-case-from
+case-of = element case-of { attlist.case-of, empty }
+attlist.case-of &=
+  attribute pos { text },
+  attribute part { text }
+# A 'case-of' is a value representing the case of a "clip".  This value 
+# will be "aa" (all lowercase), "Aa" (first uppercase) and "AA",
+# (all uppercase).
+# 
+# * 'pos' is an index (1, 2, 3...) used to select a lexical form
+#    inside the rule;
+# 
+# * the value of 'part' is the name of an attribute defined in
+#   def-attrs, but may take also the values 'lem' (referring to
+#   the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+#   (lemma queue) and 'whole' (referring to the whole lexical form).
+concat = element concat { attlist.concat, value+ }
+attlist.concat &= empty
+# Concatenates a sequence of values
+mlu = element mlu { attlist.mlu, lu+ }
+attlist.mlu &= empty
+# Encloses a multiword
+lu = element lu { attlist.lu, value+ }
+attlist.lu &= empty
+# Encloses a word
+b = element b { attlist.b, empty }
+attlist.b &= attribute pos { text }?
+# 'b' is a [super]blanks item, indexed by pos; for example, a 'b'
+# with pos="2" refers to the [super]blanks (including format data
+# encapsulated by the de-formatter) between lexical form 2 and
+# lexical form 3. Managing [super]blanks explicitly allows for the
+# correct placement of format when the result of structural
+# transfer has more or less lexical items than the original or has
+# been reordered in some way.  If attribute "pos" is not specified, then
+# a single blank (ASCII 32) is generated.
+lu-count = element lu-count { attlist.lu-count, empty }
+attlist.lu-count &= empty
+start = postchunk
+# Number of lexical units (words inside the chunk) in the rule
Index: branches/apertium-tagger/apertium2/apertium/tagger.rnc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger.rnc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger.rnc	(revision 69632)
@@ -0,0 +1,122 @@
+# Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+# 
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+# 
+#   DTD for the tagset and the rules to enforce the state to state
+#   transition probabilities used by the part-of-speech tagger. 
+#   2005.07.29.
+
+tagger =
+  element tagger {
+    attlist.tagger,
+    tagset,
+    forbid?,
+    enforce-rules?,
+    preferences?,
+    discard-on-ambiguity?
+  }
+attlist.tagger &= attribute name { text }
+#     'tagger' is the root element containing the whole tagset for a given
+# language specified through the mandatory attribute 'name'
+tagset = element tagset { attlist.tagset, def-label+, def-mult* }
+attlist.tagset &= empty
+#     The 'tagset' section defines the correspondance between simple 
+# or multiple morphological categories defining a lexical form and the coarser 
+# ones with which the part-of-speech tagger works
+def-label = element def-label { attlist.def-label, tags-item+ }
+attlist.def-label &=
+  attribute name { text },
+  attribute c { text }?,
+  attribute closed { text }?
+#     Each 'def-label' defines one coarse tag in terms of a list of fine tags 
+# and has a mandatory unique name. The optional attribute 'closed="true"' may be used
+# to specify if the defined fine tags belong to a closed list.
+# c is for comments and is ignored
+tags-item = element tags-item { attlist.tags-item, empty }
+attlist.tags-item &=
+  attribute tags { text },
+  attribute lemma { text }?
+#     Each 'tags-item' may be a dot-separated subsequence of the morphological tags
+# corresponding to a coarse tag optionally in association with a given lemma 
+def-mult = element def-mult { attlist.def-mult, sequence+ }
+attlist.def-mult &=
+  attribute name { text },
+  attribute c { text }?,
+  attribute closed { text }?
+#     Each 'def-mult' defines one coarse tag in terms of a sequence of coarse
+# tags previously defined as 'def-labels' or a sequence of fine tags. A mandatory 
+# name is required for each 'def-mult' which may also has an optional attribute 
+# 'closed="true"' if it belongs to a closed list
+# c is for comments and is ignored
+sequence =
+  element sequence { attlist.sequence, (tags-item | label-item)+ }
+attlist.sequence &= empty
+#     Element 'sequence' encloses a set of tags or labels which defines 
+# a unit with more than one label
+label-item = element label-item { attlist.label-item, empty }
+attlist.label-item &=
+  attribute label { text },
+  attribute c { text }?
+#     Each 'label' of the 'label-item' correspond to a coarse tag previously 
+# defined as a 'def-label' by a name.
+# c is for comments and is ignored
+forbid = element forbid { attlist.forbid, label-sequence+ }
+attlist.forbid &= empty
+#     Element 'forbid' contains sequences of morphological categories that are not 
+# allowed in a given language
+label-sequence =
+  element label-sequence { attlist.label-sequence, label-item+ }
+attlist.label-sequence &= attribute c { text }?
+#     Each 'label-sequence' is restricted to two 'label-items' 
+# c is for comments and is ignored
+enforce-rules =
+  element enforce-rules { attlist.enforce-rules, enforce-after+ }
+attlist.enforce-rules &= empty
+# Element 'enforce-rules' defines sets of coarse tags that must follow specified ones
+enforce-after =
+  element enforce-after { attlist.enforce-after, label-set }
+attlist.enforce-after &=
+  attribute label { text },
+  attribute c { text }?
+#     Each 'enforce-after' encloses the set of coarse tags ('label-set') that must follow 
+# the one defined in 'label', as a mandatory attribute
+# c is for comments and is ignored
+label-set = element label-set { attlist.label-set, label-item+ }
+attlist.label-set &= attribute c { text }?
+#     The set of 'label-items' enforced after a 'label' are enclosed inside element 'label-set'  
+# c is for comments and is ignored
+preferences = element preferences { attlist.preferences, prefer+ }
+attlist.preferences &= empty
+#     Element 'preferences' allows to decide amongst two or more fine tag sequences 
+# which are grouped in the same coarse tag. 
+prefer = element prefer { attlist.prefer, empty }
+attlist.prefer &=
+  attribute tags { text },
+  attribute c { text }?
+#     Each 'prefer' element has a mandatory attribute 'tags' made of a sequence of fine tags 
+# c is for comments and is ignored
+discard-on-ambiguity =
+  element discard-on-ambiguity {
+    attlist.discard-on-ambiguity, discard+
+  }
+attlist.discard-on-ambiguity &= empty
+# List of label-item or tags-item to be discarded when an ambiguity
+# occurs inside a word
+discard = element discard { attlist.discard, empty }
+attlist.discard &=
+  attribute tags { text },
+  attribute c { text }?
+start = tagger
+#     Each 'discard' element has a mandatory attribute 'tags' made of a sequence of fine tags 
+# c is for comments and is ignored
Index: branches/apertium-tagger/apertium2/apertium/transfer.rnc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer.rnc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer.rnc	(revision 69632)
@@ -0,0 +1,407 @@
+# Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+# 
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+# 
+#  Draft of DTD for the structural transfer rule files 
+# 
+#  Sergio Ortiz, Gema Ramírez-Sánchez, Mireia Ginestí, Mikel L. Forcada, 
+#  2005.07.29. 
+
+condition =
+  and
+  | or
+  | not
+  | equal
+  | begins-with
+  | begins-with-list
+  | ends-with
+  | ends-with-list
+  | contains-substring
+  | in
+container = var | clip
+sentence =
+  let
+  | out
+  | choose
+  | modify-case
+  | call-macro
+  | append
+  | reject-current-rule
+value =
+  b
+  | clip
+  | lit
+  | lit-tag
+  | var
+  | get-case-from
+  | case-of
+  | concat
+  | lu
+  | mlu
+  | chunk
+stringvalue = clip | lit | var | get-case-from | case-of
+transfer =
+  element transfer {
+    attlist.transfer,
+    section-def-cats,
+    section-def-attrs?,
+    section-def-vars?,
+    section-def-lists?,
+    section-def-macros?,
+    section-rules
+  }
+attlist.transfer &= attribute default { "lu" | "chunk" }?
+# 'transfer' is the root element containing the whole structural
+# transfer rule file.  Attribute 'default' specifies if
+# unmatched words have to be written as lexical units ("lu", this is
+# the default value) or as chunks ("chunk").
+section-def-cats =
+  element section-def-cats { attlist.section-def-cats, def-cat+ }
+attlist.section-def-cats &= empty
+#      The 'def-cats' section defines the categories used to build the
+# patterns used in rules
+def-cat = element def-cat { attlist.def-cat, cat-item+ }
+attlist.def-cat &=
+  attribute n { xsd:ID },
+  attribute c { text }?
+# Each 'def-cat' defines one category in terms of a list of
+# category items and has a unique name 'n', which is mandatory
+cat-item = element cat-item { attlist.cat-item, empty }
+attlist.cat-item &=
+  attribute lemma { text }?,
+  attribute tags { text },
+  attribute c { text }?
+#      Each 'cat-item' (category item) represents a set of lexical forms
+# and has a mandatory attribute 'tags' whose value is a sequence of
+# dot-separated tag names; this sequence is a subsequence of the
+# tag sequence defining each possible lexical form. For example,
+# tags="n.f" would match all lexical forms containing this tag
+# sequence, such as "^casa<n><f><pl>$".
+# 
+# In addition, an optional attribute, "lemma", may be used to
+# define lexical forms having a particular substring in their lemma
+section-def-attrs =
+  element section-def-attrs { attlist.section-def-attrs, def-attr+ }
+attlist.section-def-attrs &= empty
+# The 'def-attrs' section defines the attributes that will be
+# identified in matched lexical forms 
+def-attr = element def-attr { attlist.def-attr, attr-item+ }
+attlist.def-attr &=
+  attribute n { xsd:ID },
+  attribute c { text }?
+# Each def-attr defines one attribute in terms of a list of
+# attribute items and has a mandatory unique name n 
+attr-item = element attr-item { attlist.attr-item, empty }
+attlist.attr-item &=
+  attribute tags { text }?,
+  attribute c { text }?
+# Each 'attr-item' specifies a subsequence of the tags in
+# that lexical form (attribute 'tags')
+section-def-vars =
+  element section-def-vars { attlist.section-def-vars, def-var+ }
+attlist.section-def-vars &= empty
+# The 'def-vars' section defines the global variables
+# that will be used to transfer information between rules
+def-var = element def-var { attlist.def-var, empty }
+attlist.def-var &=
+  attribute n { xsd:ID },
+  attribute v { text }?,
+  attribute c { text }?
+# The definition of a global variable has a mandatory unique name 'n' that
+# will be used to refer to it. A value of initialization can also be specified
+# by means the 'v' attribute.  The default value of the initialization is the
+# empty string.
+section-def-lists =
+  element section-def-lists { attlist.section-def-lists, def-list+ }
+attlist.section-def-lists &= empty
+# Element 'section-def-lists' encloses a set of list definitions
+def-list = element def-list { attlist.def-list, list-item+ }
+attlist.def-list &=
+  attribute n { xsd:ID },
+  attribute c { text }?
+# The 'def-list' element defines a named list to search with the 'in' 
+# element.  Attribute 'n' sets the name of the list
+list-item = element list-item { attlist.list-item, empty }
+attlist.list-item &=
+  attribute v { text },
+  attribute c { text }?
+# Attribute 'v' of 'list-item' element contains the value to be added to 
+# the list being defined     
+section-def-macros =
+  element section-def-macros { attlist.section-def-macros, def-macro+ }
+attlist.section-def-macros &= empty
+# 
+# The 'def-macros' section defines macros containing portions of
+# code frequently used in the action part of rules
+#
+def-macro = element def-macro { attlist.def-macro, sentence+ }
+attlist.def-macro &= attribute n { xsd:ID }
+attlist.def-macro &=
+  attribute npar { text },
+  attribute c { text }?
+# Macro definition:
+# 
+# A macro has a mandatory name (the value of 'n'), a number of parameters
+# (the value of 'npar') and a body containing arguments and statements.  
+section-rules = element section-rules { attlist.section-rules, rule+ }
+attlist.section-rules &= empty
+# The rules section contains a sequence of one or more rules
+rule = element rule { attlist.rule, pattern, action }
+attlist.rule &= attribute comment { text }?
+# Each rule has a pattern and an action 
+# * attribute 'comment' allows to put in comments about the purpose of
+#   the rule being defined
+pattern = element pattern { attlist.pattern, pattern-item+ }
+attlist.pattern &= empty
+# The pattern is specified in terms of pattern items, each one
+# representing a lexical form in the matched pattern 
+pattern-item = element pattern-item { attlist.pattern-item, empty }
+attlist.pattern-item &= attribute n { xsd:IDREF }
+# Each attribute to be activated is referred to by its name in the def-cats section 
+action = element action { attlist.action, sentence* }
+attlist.action &= attribute c { text }?
+# Encloses the procedural part of a rule
+choose = element choose { attlist.choose, when+, otherwise? }
+attlist.choose &= attribute c { text }?
+# The choose statement is a selection statement (similar to a case
+# statement) composed of one or more tested cases and an optional
+# otherwise 
+when = element when { attlist.when, test, sentence* }
+attlist.when &= attribute c { text }?
+# Each tested case is a block of zero or more statements 
+otherwise = element otherwise { attlist.otherwise, sentence+ }
+attlist.otherwise &= attribute c { text }?
+# The otherwise case is also a block of one or more statements 
+test = element test { attlist.test, condition }
+attlist.test &= attribute c { text }?
+# The test in a tested case may be a conjunction, a disjunction, or
+# a negation of simpler tests, as well as a simple equality test
+and = element and { attlist.and, condition, condition+ }
+attlist.and &= empty
+# Each conjuntion test contains two or more simpler tests 
+or = element or { attlist.or, condition, condition+ }
+attlist.or &= empty
+# Each disjunction test contains two or more simpler tests 
+not = element not { attlist.not, condition }
+attlist.not &= empty
+# The negation of a simpler test is a test itself 
+equal = element equal { attlist.equal, value, value }
+attlist.equal &= attribute caseless { "no" | "yes" }?
+# The simplest test is an equality test. The right part and the
+# left part of the equality may both be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+begins-with = element begins-with { attlist.begins-with, value, value }
+attlist.begins-with &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part at the beginning.
+# Both parts of the test may both be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+ends-with = element ends-with { attlist.ends-with, value, value }
+attlist.ends-with &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part at the end.
+# Both parts of the test may both be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+begins-with-list =
+  element begins-with-list { attlist.begins-with-list, value, \list }
+attlist.begins-with-list &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part at the beginning.
+# First parts of the test may be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section. The second part
+# must be always a list.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+ends-with-list =
+  element ends-with-list { attlist.ends-with-list, value, \list }
+attlist.ends-with-list &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part at the end.
+# First parts of the test may be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section. The second part
+# must be always a list.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+contains-substring =
+  element contains-substring {
+    attlist.contains-substring, value, value
+  }
+attlist.contains-substring &= attribute caseless { "no" | "yes" }?
+# Tests if the left part contains the right part.
+# Both parts of the test may both be a clip (see below), a
+# literal string ('lit'), a literal tag ('lit-tag') or the value of 
+# a variable ('var') defined in the def-vars section.  When the attribute
+# 'caseless' is set to 'yes', the comparison is made without attending
+# to the case.
+in = element in { attlist.in, value, \list }
+attlist.in &= attribute caseless { "no" | "yes" }?
+# 'in' performs a search of a value in a list.  If 'caseless' is set to yes,
+# this search is performed without attending to the case
+\list = element list { attlist.list, empty }
+attlist.list &= attribute n { xsd:IDREF }
+# 'list' refers, with the name in attribute 'n', a list defined before in
+# the 'section-def-list' section
+let = element let { attlist.let, container, value }
+attlist.let &= empty
+# An assignment statement ('let') assigns the value of a clip (see
+# below), a literal string ('lit'), a literal tag('lit-tag') or the 
+# value of a global variable ('var') to either a global variable ('var') 
+# or a clip
+append = element append { attlist.append, value+ }
+attlist.append &= attribute n { xsd:IDREF }
+# This instruction appends the value of a clip (see
+# below), a literal string ('lit'), a literal tag('lit-tag') or the 
+# value of a global variable ('var') to either a global variable ('var') 
+# or a clip, identified by the "n" attribute
+out = element out { attlist.out, (mlu | lu | b | chunk | var)+ }
+attlist.out &= attribute c { text }?
+# 'out' is an output statement; it may output any sequence of
+# clips, literal strings, literal tags, variables, and whitespace items 
+# (see below) 
+modify-case =
+  element modify-case { attlist.modify-case, container, stringvalue }
+attlist.modify-case &= empty
+# The first argument of 'modify-case' copy the case of the second 
+# argument.
+call-macro = element call-macro { attlist.call-macro, with-param* }
+attlist.call-macro &= attribute n { xsd:IDREF }
+# A macro may be called anywhere by name with one or more
+# arguments
+with-param = element with-param { attlist.with-param, empty }
+attlist.with-param &= attribute pos { text }
+# The attribute pos in each argument is used to refer to a lexical
+# form in the current rule. For example, if a 2-parameter macro
+# has been defined to perform noun-adjective agreement operations,
+# it may be used with arguments 1 and 2 in a noun-adjective rule,
+# with arguments 2, 3 and 1 in a determiner-noun-adjective rule, with
+# arguments 1 and 3 in a noun-adverb-adjective rule, and with
+# arguments 2 and 1 in an adjective-noun rule 
+clip = element clip { attlist.clip, empty }
+attlist.clip &=
+  attribute pos { text },
+  attribute side { "sl" | "tl" },
+  attribute part { text },
+  attribute queue { text }?,
+  attribute link-to { text }?,
+  attribute c { text }?
+# A 'clip' is a substring of a source-language or target-language
+# lexical form, extracted according to an attribute:
+# 
+# * 'pos' is an index (1, 2, 3...) used to select a lexical form
+#    inside the rule;
+# 
+# * 'side' is used to select a source-language ('sl') or a
+#   target-language ('tl') clip
+# 
+# * the value of 'part' is the name of an attribute defined in
+#   def-attrs, but may take also the values 'lem' (referring to
+#   the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+#   (lemma queue) and 'whole' (referring to the whole lexical form).
+# 
+# * the value of 'queue' may be 'no' or 'yes'.  'yes' is assumed  by 
+#   default.
+# 
+# * 'link-to' causes the other attributes to be ignored in clip evaluation
+#   when using 'clip' as a right hand side element (as value), and 
+#   returns its value.  When using as a left hand side (as reference), 
+#   the value of the 'as' attribute is ignored.
+lit = element lit { attlist.lit, empty }
+attlist.lit &= attribute v { text }
+# A literal string value: the value of the literal is the value of
+# the 'v' attribute
+lit-tag = element lit-tag { attlist.lit-tag, empty }
+attlist.lit-tag &= attribute v { text }
+# A literal string value: the value of the literal is the value of
+# the 'v' attribute
+var = element var { attlist.var, empty }
+attlist.var &= attribute n { xsd:IDREF }
+# Each 'var' is a variable identifier: the attribute n is the name
+# of the variable. When it is in an 'out', a 'test', or the right
+# part of a 'let', it represents the value of the variable; when in
+# the left part of a 'let' it represents the reference of the
+# variable. 
+get-case-from =
+  element get-case-from { attlist.get-case-from, (clip | lit | var) }
+attlist.get-case-from &= attribute pos { text }
+# Atención, falta modificar todos los comentarios donde intervenga
+# get-case-from
+case-of = element case-of { attlist.case-of, empty }
+attlist.case-of &=
+  attribute pos { text },
+  attribute side { "sl" | "tl" },
+  attribute part { text }
+# A 'case-of' is a value representing the case of a "clip".  This value 
+# will be "aa" (all lowercase), "Aa" (first uppercase) and "AA",
+# (all uppercase).
+# 
+# * 'pos' is an index (1, 2, 3...) used to select a lexical form
+#    inside the rule;
+# 
+# * 'side' is used to select a source-language ('sl') or a
+#   target-language ('tl') clip
+# 
+# * the value of 'part' is the name of an attribute defined in
+#   def-attrs, but may take also the values 'lem' (referring to
+#   the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+#   (lemma queue) and 'whole' (referring to the whole lexical form).
+concat = element concat { attlist.concat, value+ }
+attlist.concat &= empty
+# Concatenates a sequence of values
+mlu = element mlu { attlist.mlu, lu+ }
+attlist.mlu &= empty
+# Encloses a multiword
+lu = element lu { attlist.lu, value+ }
+attlist.lu &= empty
+# Encloses a word inside an 'out' element.
+reject-current-rule =
+  element reject-current-rule { attlist.reject-current-rule, empty }
+attlist.reject-current-rule &= attribute shifting { "yes" | "no" }?
+# This instruction cancels the execution of the rule being processed.
+# If "shifting" is set to "yes" or is not specified, the matching process
+# consumes exactly one word at the input. If "shifting" is set to "no"
+# then marks the rule to not to be considered in the current matching 
+# until the input buffer advances at least one single word 
+chunk = element chunk { attlist.chunk, tags, (mlu | lu | b | var)+ }
+attlist.chunk &=
+  attribute name { text }?,
+  attribute namefrom { text }?,
+  attribute case { text }?,
+  attribute c { text }?
+# Encloses a chunk inside an 'out' element.      
+# * 'name' the pseudolemma of the chunk.
+# * 'namefrom' get the name from a variable.
+# * 'case' the variable to get the uppercase/lowercase policy
+#    to apply it to the chunk name
+tags = element tags { attlist.tags, tag+ }
+attlist.tags &= empty
+tag = element tag { attlist.tag, value }
+attlist.tag &= empty
+b = element b { attlist.b, empty }
+attlist.b &= attribute pos { text }?
+start = transfer
+# 'b' is a [super]blanks item, indexed by pos; for example, a 'b'
+# with pos="2" refers to the [super]blanks (including format data
+# encapsulated by the de-formatter) between lexical form 2 and
+# lexical form 3. Managing [super]blanks explicitly allows for the
+# correct placement of format when the result of structural
+# transfer has more or less lexical items than the original or has
+# been reordered in some way.  If attribute "pos" is not specified, then
+# a single blank (ASCII 32) is generated.
Index: branches/apertium-tagger/apertium2/apertium/transfer.rng
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer.rng	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer.rng	(revision 69632)
@@ -0,0 +1,1104 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+  Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+  
+  This program is free software; you can redistribute it and/or
+  modify it under the terms of the GNU General Public License as
+  published by the Free Software Foundation; either version 2 of the
+  License, or (at your option) any later version.
+  
+  This program is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+  General Public License for more details.
+  
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, see <http://www.gnu.org/licenses/>.
+  
+   Draft of DTD for the structural transfer rule files 
+  
+   Sergio Ortiz, Gema Ram�rez-S�nchez, Mireia Ginest�, Mikel L. Forcada, 
+   2005.07.29. 
+-->
+<grammar xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
+  <define name="condition">
+    <choice>
+      <ref name="and"/>
+      <ref name="or"/>
+      <ref name="not"/>
+      <ref name="equal"/>
+      <ref name="begins-with"/>
+      <ref name="begins-with-list"/>
+      <ref name="ends-with"/>
+      <ref name="ends-with-list"/>
+      <ref name="contains-substring"/>
+      <ref name="in"/>
+    </choice>
+  </define>
+  <define name="container">
+    <choice>
+      <ref name="var"/>
+      <ref name="clip"/>
+    </choice>
+  </define>
+  <define name="sentence">
+    <choice>
+      <ref name="let"/>
+      <ref name="out"/>
+      <ref name="choose"/>
+      <ref name="modify-case"/>
+      <ref name="call-macro"/>
+      <ref name="append"/>
+      <ref name="reject-current-rule"/>
+    </choice>
+  </define>
+  <define name="value">
+    <choice>
+      <ref name="b"/>
+      <ref name="clip"/>
+      <ref name="lit"/>
+      <ref name="lit-tag"/>
+      <ref name="var"/>
+      <ref name="get-case-from"/>
+      <ref name="case-of"/>
+      <ref name="concat"/>
+      <ref name="lu"/>
+      <ref name="mlu"/>
+      <ref name="chunk"/>
+    </choice>
+  </define>
+  <define name="stringvalue">
+    <choice>
+      <ref name="clip"/>
+      <ref name="lit"/>
+      <ref name="var"/>
+      <ref name="get-case-from"/>
+      <ref name="case-of"/>
+    </choice>
+  </define>
+  <define name="transfer">
+    <element name="transfer">
+      <ref name="attlist.transfer"/>
+      <ref name="section-def-cats"/>
+      <optional>
+        <ref name="section-def-attrs"/>
+      </optional>
+      <optional>
+        <ref name="section-def-vars"/>
+      </optional>
+      <optional>
+        <ref name="section-def-lists"/>
+      </optional>
+      <optional>
+        <ref name="section-def-macros"/>
+      </optional>
+      <ref name="section-rules"/>
+    </element>
+  </define>
+  <define name="attlist.transfer" combine="interleave">
+    <optional>
+      <attribute name="default">
+        <choice>
+          <value>lu</value>
+          <value>chunk</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    'transfer' is the root element containing the whole structural
+    transfer rule file.  Attribute 'default' specifies if
+    unmatched words have to be written as lexical units ("lu", this is
+    the default value) or as chunks ("chunk").
+  -->
+  <define name="section-def-cats">
+    <element name="section-def-cats">
+      <ref name="attlist.section-def-cats"/>
+      <oneOrMore>
+        <ref name="def-cat"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-cats" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+         The 'def-cats' section defines the categories used to build the
+    patterns used in rules
+  -->
+  <define name="def-cat">
+    <element name="def-cat">
+      <ref name="attlist.def-cat"/>
+      <oneOrMore>
+        <ref name="cat-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-cat" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Each 'def-cat' defines one category in terms of a list of
+    category items and has a unique name 'n', which is mandatory
+  -->
+  <define name="cat-item">
+    <element name="cat-item">
+      <ref name="attlist.cat-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.cat-item" combine="interleave">
+    <optional>
+      <attribute name="lemma"/>
+    </optional>
+    <attribute name="tags"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+         Each 'cat-item' (category item) represents a set of lexical forms
+    and has a mandatory attribute 'tags' whose value is a sequence of
+    dot-separated tag names; this sequence is a subsequence of the
+    tag sequence defining each possible lexical form. For example,
+    tags="n.f" would match all lexical forms containing this tag
+    sequence, such as "^casa<n><f><pl>$".
+    
+    In addition, an optional attribute, "lemma", may be used to
+    define lexical forms having a particular substring in their lemma
+  -->
+  <define name="section-def-attrs">
+    <element name="section-def-attrs">
+      <ref name="attlist.section-def-attrs"/>
+      <oneOrMore>
+        <ref name="def-attr"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-attrs" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    The 'def-attrs' section defines the attributes that will be
+    identified in matched lexical forms 
+  -->
+  <define name="def-attr">
+    <element name="def-attr">
+      <ref name="attlist.def-attr"/>
+      <oneOrMore>
+        <ref name="attr-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-attr" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Each def-attr defines one attribute in terms of a list of
+    attribute items and has a mandatory unique name n 
+  -->
+  <define name="attr-item">
+    <element name="attr-item">
+      <ref name="attlist.attr-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.attr-item" combine="interleave">
+    <optional>
+      <attribute name="tags"/>
+    </optional>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Each 'attr-item' specifies a subsequence of the tags in
+    that lexical form (attribute 'tags')
+  -->
+  <define name="section-def-vars">
+    <element name="section-def-vars">
+      <ref name="attlist.section-def-vars"/>
+      <oneOrMore>
+        <ref name="def-var"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-vars" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    The 'def-vars' section defines the global variables
+    that will be used to transfer information between rules
+  -->
+  <define name="def-var">
+    <element name="def-var">
+      <ref name="attlist.def-var"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.def-var" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+    <optional>
+      <attribute name="v"/>
+    </optional>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    The definition of a global variable has a mandatory unique name 'n' that
+    will be used to refer to it. A value of initialization can also be specified
+    by means the 'v' attribute.  The default value of the initialization is the
+    empty string.
+  -->
+  <define name="section-def-lists">
+    <element name="section-def-lists">
+      <ref name="attlist.section-def-lists"/>
+      <oneOrMore>
+        <ref name="def-list"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-lists" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Element 'section-def-lists' encloses a set of list definitions -->
+  <define name="def-list">
+    <element name="def-list">
+      <ref name="attlist.def-list"/>
+      <oneOrMore>
+        <ref name="list-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-list" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    The 'def-list' element defines a named list to search with the 'in' 
+    element.  Attribute 'n' sets the name of the list
+  -->
+  <define name="list-item">
+    <element name="list-item">
+      <ref name="attlist.list-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.list-item" combine="interleave">
+    <attribute name="v"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Attribute 'v' of 'list-item' element contains the value to be added to 
+    the list being defined     
+  -->
+  <define name="section-def-macros">
+    <element name="section-def-macros">
+      <ref name="attlist.section-def-macros"/>
+      <oneOrMore>
+        <ref name="def-macro"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-def-macros" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    
+    The 'def-macros' section defines macros containing portions of
+    code frequently used in the action part of rules
+    
+  -->
+  <define name="def-macro">
+    <element name="def-macro">
+      <ref name="attlist.def-macro"/>
+      <oneOrMore>
+        <ref name="sentence"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.def-macro" combine="interleave">
+    <attribute name="n">
+      <data type="ID"/>
+    </attribute>
+  </define>
+  <define name="attlist.def-macro" combine="interleave">
+    <attribute name="npar"/>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Macro definition:
+    
+    A macro has a mandatory name (the value of 'n'), a number of parameters
+    (the value of 'npar') and a body containing arguments and statements.  
+  -->
+  <define name="section-rules">
+    <element name="section-rules">
+      <ref name="attlist.section-rules"/>
+      <oneOrMore>
+        <ref name="rule"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.section-rules" combine="interleave">
+    <empty/>
+  </define>
+  <!-- The rules section contains a sequence of one or more rules -->
+  <define name="rule">
+    <element name="rule">
+      <ref name="attlist.rule"/>
+      <ref name="pattern"/>
+      <ref name="action"/>
+    </element>
+  </define>
+  <define name="attlist.rule" combine="interleave">
+    <optional>
+      <attribute name="comment"/>
+    </optional>
+  </define>
+  <!--
+    Each rule has a pattern and an action 
+    * attribute 'comment' allows to put in comments about the purpose of
+      the rule being defined
+  -->
+  <define name="pattern">
+    <element name="pattern">
+      <ref name="attlist.pattern"/>
+      <oneOrMore>
+        <ref name="pattern-item"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.pattern" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    The pattern is specified in terms of pattern items, each one
+    representing a lexical form in the matched pattern 
+  -->
+  <define name="pattern-item">
+    <element name="pattern-item">
+      <ref name="attlist.pattern-item"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.pattern-item" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!-- Each attribute to be activated is referred to by its name in the def-cats section -->
+  <define name="action">
+    <element name="action">
+      <ref name="attlist.action"/>
+      <zeroOrMore>
+        <ref name="sentence"/>
+      </zeroOrMore>
+    </element>
+  </define>
+  <define name="attlist.action" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!-- Encloses the procedural part of a rule -->
+  <define name="choose">
+    <element name="choose">
+      <ref name="attlist.choose"/>
+      <oneOrMore>
+        <ref name="when"/>
+      </oneOrMore>
+      <optional>
+        <ref name="otherwise"/>
+      </optional>
+    </element>
+  </define>
+  <define name="attlist.choose" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    The choose statement is a selection statement (similar to a case
+    statement) composed of one or more tested cases and an optional
+    otherwise 
+  -->
+  <define name="when">
+    <element name="when">
+      <ref name="attlist.when"/>
+      <ref name="test"/>
+      <zeroOrMore>
+        <ref name="sentence"/>
+      </zeroOrMore>
+    </element>
+  </define>
+  <define name="attlist.when" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!-- Each tested case is a block of zero or more statements -->
+  <define name="otherwise">
+    <element name="otherwise">
+      <ref name="attlist.otherwise"/>
+      <oneOrMore>
+        <ref name="sentence"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.otherwise" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!-- The otherwise case is also a block of one or more statements -->
+  <define name="test">
+    <element name="test">
+      <ref name="attlist.test"/>
+      <ref name="condition"/>
+    </element>
+  </define>
+  <define name="attlist.test" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    The test in a tested case may be a conjunction, a disjunction, or
+    a negation of simpler tests, as well as a simple equality test
+  -->
+  <define name="and">
+    <element name="and">
+      <ref name="attlist.and"/>
+      <ref name="condition"/>
+      <oneOrMore>
+        <ref name="condition"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.and" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Each conjuntion test contains two or more simpler tests -->
+  <define name="or">
+    <element name="or">
+      <ref name="attlist.or"/>
+      <ref name="condition"/>
+      <oneOrMore>
+        <ref name="condition"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.or" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Each disjunction test contains two or more simpler tests -->
+  <define name="not">
+    <element name="not">
+      <ref name="attlist.not"/>
+      <ref name="condition"/>
+    </element>
+  </define>
+  <define name="attlist.not" combine="interleave">
+    <empty/>
+  </define>
+  <!-- The negation of a simpler test is a test itself -->
+  <define name="equal">
+    <element name="equal">
+      <ref name="attlist.equal"/>
+      <ref name="value"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.equal" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    The simplest test is an equality test. The right part and the
+    left part of the equality may both be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="begins-with">
+    <element name="begins-with">
+      <ref name="attlist.begins-with"/>
+      <ref name="value"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.begins-with" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part at the beginning.
+    Both parts of the test may both be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="ends-with">
+    <element name="ends-with">
+      <ref name="attlist.ends-with"/>
+      <ref name="value"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.ends-with" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part at the end.
+    Both parts of the test may both be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="begins-with-list">
+    <element name="begins-with-list">
+      <ref name="attlist.begins-with-list"/>
+      <ref name="value"/>
+      <ref name="list"/>
+    </element>
+  </define>
+  <define name="attlist.begins-with-list" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part at the beginning.
+    First parts of the test may be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section. The second part
+    must be always a list.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="ends-with-list">
+    <element name="ends-with-list">
+      <ref name="attlist.ends-with-list"/>
+      <ref name="value"/>
+      <ref name="list"/>
+    </element>
+  </define>
+  <define name="attlist.ends-with-list" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part at the end.
+    First parts of the test may be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section. The second part
+    must be always a list.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="contains-substring">
+    <element name="contains-substring">
+      <ref name="attlist.contains-substring"/>
+      <ref name="value"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.contains-substring" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    Tests if the left part contains the right part.
+    Both parts of the test may both be a clip (see below), a
+    literal string ('lit'), a literal tag ('lit-tag') or the value of 
+    a variable ('var') defined in the def-vars section.  When the attribute
+    'caseless' is set to 'yes', the comparison is made without attending
+    to the case.
+  -->
+  <define name="in">
+    <element name="in">
+      <ref name="attlist.in"/>
+      <ref name="value"/>
+      <ref name="list"/>
+    </element>
+  </define>
+  <define name="attlist.in" combine="interleave">
+    <optional>
+      <attribute name="caseless">
+        <choice>
+          <value>no</value>
+          <value>yes</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    'in' performs a search of a value in a list.  If 'caseless' is set to yes,
+    this search is performed without attending to the case
+  -->
+  <define name="list">
+    <element name="list">
+      <ref name="attlist.list"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.list" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!--
+    'list' refers, with the name in attribute 'n', a list defined before in
+    the 'section-def-list' section
+  -->
+  <define name="let">
+    <element name="let">
+      <ref name="attlist.let"/>
+      <ref name="container"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.let" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    An assignment statement ('let') assigns the value of a clip (see
+    below), a literal string ('lit'), a literal tag('lit-tag') or the 
+    value of a global variable ('var') to either a global variable ('var') 
+    or a clip
+  -->
+  <define name="append">
+    <element name="append">
+      <ref name="attlist.append"/>
+      <oneOrMore>
+        <ref name="value"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.append" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!--
+    This instruction appends the value of a clip (see
+    below), a literal string ('lit'), a literal tag('lit-tag') or the 
+    value of a global variable ('var') to either a global variable ('var') 
+    or a clip, identified by the "n" attribute
+  -->
+  <define name="out">
+    <element name="out">
+      <ref name="attlist.out"/>
+      <oneOrMore>
+        <choice>
+          <ref name="mlu"/>
+          <ref name="lu"/>
+          <ref name="b"/>
+          <ref name="chunk"/>
+          <ref name="var"/>
+        </choice>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.out" combine="interleave">
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    'out' is an output statement; it may output any sequence of
+    clips, literal strings, literal tags, variables, and whitespace items 
+    (see below) 
+  -->
+  <define name="modify-case">
+    <element name="modify-case">
+      <ref name="attlist.modify-case"/>
+      <ref name="container"/>
+      <ref name="stringvalue"/>
+    </element>
+  </define>
+  <define name="attlist.modify-case" combine="interleave">
+    <empty/>
+  </define>
+  <!--
+    The first argument of 'modify-case' copy the case of the second 
+    argument.
+  -->
+  <define name="call-macro">
+    <element name="call-macro">
+      <ref name="attlist.call-macro"/>
+      <zeroOrMore>
+        <ref name="with-param"/>
+      </zeroOrMore>
+    </element>
+  </define>
+  <define name="attlist.call-macro" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!--
+    A macro may be called anywhere by name with one or more
+    arguments
+  -->
+  <define name="with-param">
+    <element name="with-param">
+      <ref name="attlist.with-param"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.with-param" combine="interleave">
+    <attribute name="pos"/>
+  </define>
+  <!--
+    The attribute pos in each argument is used to refer to a lexical
+    form in the current rule. For example, if a 2-parameter macro
+    has been defined to perform noun-adjective agreement operations,
+    it may be used with arguments 1 and 2 in a noun-adjective rule,
+    with arguments 2, 3 and 1 in a determiner-noun-adjective rule, with
+    arguments 1 and 3 in a noun-adverb-adjective rule, and with
+    arguments 2 and 1 in an adjective-noun rule 
+  -->
+  <define name="clip">
+    <element name="clip">
+      <ref name="attlist.clip"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.clip" combine="interleave">
+    <attribute name="pos"/>
+    <attribute name="side">
+      <choice>
+        <value>sl</value>
+        <value>tl</value>
+      </choice>
+    </attribute>
+    <attribute name="part"/>
+    <optional>
+      <attribute name="queue"/>
+    </optional>
+    <optional>
+      <attribute name="link-to"/>
+    </optional>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    A 'clip' is a substring of a source-language or target-language
+    lexical form, extracted according to an attribute:
+    
+    * 'pos' is an index (1, 2, 3...) used to select a lexical form
+       inside the rule;
+    
+    * 'side' is used to select a source-language ('sl') or a
+      target-language ('tl') clip
+    
+    * the value of 'part' is the name of an attribute defined in
+      def-attrs, but may take also the values 'lem' (referring to
+      the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+      (lemma queue) and 'whole' (referring to the whole lexical form).
+    
+    * the value of 'queue' may be 'no' or 'yes'.  'yes' is assumed  by 
+      default.
+    
+    * 'link-to' causes the other attributes to be ignored in clip evaluation
+      when using 'clip' as a right hand side element (as value), and 
+      returns its value.  When using as a left hand side (as reference), 
+      the value of the 'as' attribute is ignored.
+  -->
+  <define name="lit">
+    <element name="lit">
+      <ref name="attlist.lit"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.lit" combine="interleave">
+    <attribute name="v"/>
+  </define>
+  <!--
+    A literal string value: the value of the literal is the value of
+    the 'v' attribute
+  -->
+  <define name="lit-tag">
+    <element name="lit-tag">
+      <ref name="attlist.lit-tag"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.lit-tag" combine="interleave">
+    <attribute name="v"/>
+  </define>
+  <!--
+    A literal string value: the value of the literal is the value of
+    the 'v' attribute
+  -->
+  <define name="var">
+    <element name="var">
+      <ref name="attlist.var"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.var" combine="interleave">
+    <attribute name="n">
+      <data type="IDREF"/>
+    </attribute>
+  </define>
+  <!--
+    Each 'var' is a variable identifier: the attribute n is the name
+    of the variable. When it is in an 'out', a 'test', or the right
+    part of a 'let', it represents the value of the variable; when in
+    the left part of a 'let' it represents the reference of the
+    variable. 
+  -->
+  <define name="get-case-from">
+    <element name="get-case-from">
+      <ref name="attlist.get-case-from"/>
+      <choice>
+        <ref name="clip"/>
+        <ref name="lit"/>
+        <ref name="var"/>
+      </choice>
+    </element>
+  </define>
+  <define name="attlist.get-case-from" combine="interleave">
+    <attribute name="pos"/>
+  </define>
+  <!--
+    Atenci�n, falta modificar todos los comentarios donde intervenga
+    get-case-from
+  -->
+  <define name="case-of">
+    <element name="case-of">
+      <ref name="attlist.case-of"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.case-of" combine="interleave">
+    <attribute name="pos"/>
+    <attribute name="side">
+      <choice>
+        <value>sl</value>
+        <value>tl</value>
+      </choice>
+    </attribute>
+    <attribute name="part"/>
+  </define>
+  <!--
+    A 'case-of' is a value representing the case of a "clip".  This value 
+    will be "aa" (all lowercase), "Aa" (first uppercase) and "AA",
+    (all uppercase).
+    
+    * 'pos' is an index (1, 2, 3...) used to select a lexical form
+       inside the rule;
+    
+    * 'side' is used to select a source-language ('sl') or a
+      target-language ('tl') clip
+    
+    * the value of 'part' is the name of an attribute defined in
+      def-attrs, but may take also the values 'lem' (referring to
+      the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+      (lemma queue) and 'whole' (referring to the whole lexical form).
+  -->
+  <define name="concat">
+    <element name="concat">
+      <ref name="attlist.concat"/>
+      <oneOrMore>
+        <ref name="value"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.concat" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Concatenates a sequence of values -->
+  <define name="mlu">
+    <element name="mlu">
+      <ref name="attlist.mlu"/>
+      <oneOrMore>
+        <ref name="lu"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.mlu" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Encloses a multiword -->
+  <define name="lu">
+    <element name="lu">
+      <ref name="attlist.lu"/>
+      <oneOrMore>
+        <ref name="value"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.lu" combine="interleave">
+    <empty/>
+  </define>
+  <!-- Encloses a word inside an 'out' element. -->
+  <define name="reject-current-rule">
+    <element name="reject-current-rule">
+      <ref name="attlist.reject-current-rule"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.reject-current-rule" combine="interleave">
+    <optional>
+      <attribute name="shifting">
+        <choice>
+          <value>yes</value>
+          <value>no</value>
+        </choice>
+      </attribute>
+    </optional>
+  </define>
+  <!--
+    This instruction cancels the execution of the rule being processed.
+    If "shifting" is set to "yes" or is not specified, the matching process
+    consumes exactly one word at the input. If "shifting" is set to "no"
+    then marks the rule to not to be considered in the current matching 
+    until the input buffer advances at least one single word 
+  -->
+  <define name="chunk">
+    <element name="chunk">
+      <ref name="attlist.chunk"/>
+      <ref name="tags"/>
+      <oneOrMore>
+        <choice>
+          <ref name="mlu"/>
+          <ref name="lu"/>
+          <ref name="b"/>
+          <ref name="var"/>
+        </choice>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.chunk" combine="interleave">
+    <optional>
+      <attribute name="name"/>
+    </optional>
+    <optional>
+      <attribute name="namefrom"/>
+    </optional>
+    <optional>
+      <attribute name="case"/>
+    </optional>
+    <optional>
+      <attribute name="c"/>
+    </optional>
+  </define>
+  <!--
+    Encloses a chunk inside an 'out' element.      
+    * 'name' the pseudolemma of the chunk.
+    * 'namefrom' get the name from a variable.
+    * 'case' the variable to get the uppercase/lowercase policy
+       to apply it to the chunk name
+  -->
+  <define name="tags">
+    <element name="tags">
+      <ref name="attlist.tags"/>
+      <oneOrMore>
+        <ref name="tag"/>
+      </oneOrMore>
+    </element>
+  </define>
+  <define name="attlist.tags" combine="interleave">
+    <empty/>
+  </define>
+  <define name="tag">
+    <element name="tag">
+      <ref name="attlist.tag"/>
+      <ref name="value"/>
+    </element>
+  </define>
+  <define name="attlist.tag" combine="interleave">
+    <empty/>
+  </define>
+  <define name="b">
+    <element name="b">
+      <ref name="attlist.b"/>
+      <empty/>
+    </element>
+  </define>
+  <define name="attlist.b" combine="interleave">
+    <optional>
+      <attribute name="pos"/>
+    </optional>
+  </define>
+  <start>
+    <choice>
+      <ref name="transfer"/>
+    </choice>
+  </start>
+</grammar>
+<!--
+  'b' is a [super]blanks item, indexed by pos; for example, a 'b'
+  with pos="2" refers to the [super]blanks (including format data
+  encapsulated by the de-formatter) between lexical form 2 and
+  lexical form 3. Managing [super]blanks explicitly allows for the
+  correct placement of format when the result of structural
+  transfer has more or less lexical items than the original or has
+  been reordered in some way.  If attribute "pos" is not specified, then
+  a single blank (ASCII 32) is generated.
+-->
Index: branches/apertium-tagger/apertium2/apertium/apertium-transfer.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-transfer.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-transfer.1	(revision 69632)
@@ -0,0 +1,80 @@
+.TH apertium-transfer 1 2006-03-08 "" ""
+.SH NAME
+apertium-transfer \- This application is part of (
+.B apertium
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-transfer
+[\-n] trules preproc biltrans [input [output]]
+.PP
+.B apertium-transfer
+trules preproc [input [output]]
+.PP
+.B apertium-transfer
+\-x extended trules preproc biltrans [input [output]]
+.PP
+.B apertium-transfer
+\-c trules preproc biltrans [input [output]]
+.PP
+.B apertium-transfer
+\-t trules preproc biltrans [input [output]]
+.SH DESCRIPTION
+.BR apertium-transfer 
+is the program that performs the transfer from input language
+into output language. Normally this program will not be used independently, but in combination with other programs:
+.PP
+.RE
+.SH FILES
+These are the five files that can be used with this command:
+.PP
+.B trules
+Transfer rules file
+.PP
+.B preproc    
+Result of preprocess trules file
+.PP
+.B biltrans   
+Bilingual letter transducer file
+.PP
+.B infile
+Input file (stdin by default).
+.PP
+.B outfile
+Output file (stdout by default).
+.PP
+\-.B \-b
+\-input from lexical transfer (single level transfer only)
+\-.PP
+\-.B \-h
+\-shows this message
+\-.PP
+.B -n
+Do not use a bilingual dictionary to process the input.
+.PP
+.B  -x bindix  
+extended mode with user dictionary
+.PP
+.B -c
+case-sensitiveness while accessing bilingual dictionary
+.PP
+.B -t
+trace mode: show rule numbers and matched content
+.PP
+.B -T
+extended trace mode, for use with apertium-transfer-tools
+.PP
+.B -z
+null-flushing output on
+.PP
+.SH SEE ALSO
+.I apertium \fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/transfer.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer.cc	(revision 69632)
@@ -0,0 +1,2346 @@
+/*
+ * Copyright (C) 2005--2015 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/transfer.h>
+#include <apertium/trx_reader.h>
+#include <apertium/utf_converter.h>
+#include <apertium/string_utils.h>
+#include <lttoolbox/compression.h>
+#include <lttoolbox/xml_parse_util.h>
+#include <pcre.h>
+
+#include <cctype>
+#include <iostream>
+#include <stack>
+#include <cerrno>
+
+using namespace Apertium;
+using namespace std;
+
+void
+Transfer::destroy()
+{
+  if(me)
+  {
+    delete me;
+    me = NULL;
+  }
+  if(doc)
+  {
+    xmlFreeDoc(doc);
+    doc = NULL;
+  }
+}
+
+Transfer::Transfer() :
+word(0),
+blank(0),
+lword(0),
+lblank(0),
+output(0),
+any_char(0),
+any_tag(0),
+nwords(0)
+{
+  me = NULL;
+  doc = NULL;
+  root_element = NULL;
+  lastrule = NULL;
+  defaultAttrs = lu;
+  useBilingual = true;
+  preBilingual = false;
+  isExtended = false;
+  null_flush = false;
+  internal_null_flush = false;
+  trace = false;
+  trace_att = false;
+  emptyblank = "";
+}
+
+Transfer::~Transfer()
+{
+  destroy();
+}
+
+void
+Transfer::readData(FILE *in)
+{
+  alphabet.read(in);
+  any_char = alphabet(TRXReader::ANY_CHAR);
+  any_tag = alphabet(TRXReader::ANY_TAG);
+
+  Transducer t;
+  t.read(in, alphabet.size());
+
+  map<int, int> finals;
+
+  // finals
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    int key = Compression::multibyte_read(in);
+    finals[key] = Compression::multibyte_read(in);
+  }
+
+  me = new MatchExe(t, finals);
+
+  // attr_items
+  bool recompile_attrs = Compression::string_read(in) != string(pcre_version());
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    attr_items[cad_k].read(in);
+    wstring fallback = Compression::wstring_read(in);
+    if(recompile_attrs) {
+      attr_items[cad_k].compile(UtfConverter::toUtf8(fallback));
+    }
+  }
+
+  // variables
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    variables[cad_k] = UtfConverter::toUtf8(Compression::wstring_read(in));
+  }
+
+  // macros
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    macros[cad_k] = Compression::multibyte_read(in);
+  }
+
+  // lists
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+
+    for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
+    {
+      wstring const cad_v = Compression::wstring_read(in);
+      lists[cad_k].insert(UtfConverter::toUtf8(cad_v));
+      listslow[cad_k].insert(UtfConverter::toUtf8(StringUtils::tolower(cad_v)));
+    }
+  }
+}
+
+void
+Transfer::readBil(string const &fstfile)
+{
+  FILE *in = fopen(fstfile.c_str(), "rb");
+  if(!in)
+  {
+    cerr << "Error: Could not open file '" << fstfile << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  fstp.load(in);
+  fstp.initBiltrans();
+  fclose(in);
+}
+
+void
+Transfer::setExtendedDictionary(string const &fstfile)
+{
+  FILE *in = fopen(fstfile.c_str(), "rb");
+  if(!in)
+  {
+    cerr << "Error: Could not open extended dictionary file '" << fstfile << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  extended.load(in);
+  extended.initBiltrans();
+  fclose(in);
+  isExtended = true;
+}
+
+void
+Transfer::read(string const &transferfile, string const &datafile,
+	       string const &fstfile)
+{
+  readTransfer(transferfile);
+
+  // datafile
+  FILE *in = fopen(datafile.c_str(), "rb");
+  if(!in)
+  {
+    cerr << "Error: Could not open file '" << datafile << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  readData(in);
+  fclose(in);
+
+  if(fstfile != "")
+  {
+    readBil(fstfile);
+  }
+}
+
+void
+Transfer::readTransfer(string const &in)
+{
+  doc = xmlReadFile(in.c_str(), NULL, 0);
+
+  if(doc == NULL)
+  {
+    cerr << "Error: Could not parse file '" << in << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+
+  root_element = xmlDocGetRootElement(doc);
+
+  // search for root element attributes
+  for(xmlAttr *i = root_element->properties; i != NULL; i = i->next)
+  {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "default"))
+    {
+      if(!xmlStrcmp(i->children->content, (const xmlChar *) "chunk"))
+      {
+        defaultAttrs = chunk;
+      }
+      else
+      {
+        defaultAttrs = lu; // default value for 'default'
+      }
+    }
+  }
+
+  // search for macros & rules
+  for(xmlNode *i = root_element->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "section-def-macros"))
+      {
+        collectMacros(i);
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "section-rules"))
+      {
+        collectRules(i);
+      }
+    }
+  }
+}
+
+void
+Transfer::collectRules(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      for(xmlNode *j = i->children; ; j = j->next)
+      {
+        if(j->type == XML_ELEMENT_NODE && !xmlStrcmp(j->name, (const xmlChar *) "action"))
+        {
+          rule_map.push_back(j);
+          break;
+        }
+      }
+    }
+  }
+}
+
+void
+Transfer::collectMacros(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      macro_map.push_back(i);
+    }
+  }
+}
+
+bool
+Transfer::checkIndex(xmlNode *element, int index, int limit)
+{
+  if(index >= limit)
+  {
+    wcerr << L"Error in " << UtfConverter::fromUtf8((char *) doc->URL) <<L": line " << element->line << endl;
+    return false;
+  }
+  return true;
+}
+
+
+string
+Transfer::evalString(xmlNode *element)
+{
+  map<xmlNode *, TransferInstr>::iterator it;
+  it = evalStringCache.find(element);
+  if(it != evalStringCache.end())
+  {
+    TransferInstr &ti = it->second;
+    switch(ti.getType())
+    {
+      case ti_clip_sl:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          return word[ti.getPos()]->source(attr_items[ti.getContent()], ti.getCondition());
+        }
+        break;
+
+      case ti_clip_tl:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          return word[ti.getPos()]->target(attr_items[ti.getContent()], ti.getCondition());
+        }
+        break;
+
+      case ti_linkto_sl:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          if(word[ti.getPos()]->source(attr_items[ti.getContent()], ti.getCondition()) != "")
+          {
+            return "<" + string((char *) ti.getPointer()) + ">";
+          }
+          else
+          {
+            return "";
+          }
+        }
+        break;
+
+      case ti_linkto_tl:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          if(word[ti.getPos()]->target(attr_items[ti.getContent()], ti.getCondition()) != "")
+          {
+            return "<" + string((char *) ti.getPointer()) + ">";
+          }
+          else
+          {
+            return "";
+          }
+        }
+        break;
+
+      case ti_var:
+        return variables[ti.getContent()];
+
+      case ti_lit_tag:
+      case ti_lit:
+        return ti.getContent();
+
+      case ti_b:
+        if(checkIndex(element, ti.getPos(), lblank))
+        {
+          if(ti.getPos() >= 0)
+          {
+            return !blank?"":*(blank[ti.getPos()]);
+          }
+          return " ";
+        }
+        break;
+
+      case ti_get_case_from:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          return copycase(word[ti.getPos()]->source(attr_items[ti.getContent()]),
+                  evalString((xmlNode *) ti.getPointer()));
+        }
+        break;
+
+      case ti_case_of_sl:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          return caseOf(word[ti.getPos()]->source(attr_items[ti.getContent()]));
+        }
+        break;
+
+      case ti_case_of_tl:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          return caseOf(word[ti.getPos()]->target(attr_items[ti.getContent()]));
+        }
+        break;
+
+      default:
+        return "";
+    }
+    return "";
+  }
+
+  if(!xmlStrcmp(element->name, (const xmlChar *) "clip"))
+  {
+    int pos = 0;
+    xmlChar *part = NULL, *side = NULL, *as = NULL;
+    bool queue = true;
+
+    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
+      {
+	side = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+	part = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *)i->children->content) - 1;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "queue"))
+      {
+        if(!xmlStrcmp(i->children->content, (const xmlChar *) "no"))
+        {
+          queue = false;
+        }
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to"))
+      {
+        as = i->children->content;
+      }
+    }
+
+    if(as != NULL)
+    {
+      if(!xmlStrcmp(side, (const xmlChar *) "sl"))
+      {
+        evalStringCache[element] = TransferInstr(ti_linkto_sl, (const char *) part, pos, (void *) as, queue);
+      }
+      else
+      {
+        evalStringCache[element] = TransferInstr(ti_linkto_tl, (const char *) part, pos, (void *) as, queue);
+      }
+    }
+    else if(!xmlStrcmp(side, (const xmlChar *) "sl"))
+    {
+      evalStringCache[element] = TransferInstr(ti_clip_sl, (const char *) part, pos, NULL, queue);
+    }
+    else
+    {
+      evalStringCache[element] = TransferInstr(ti_clip_tl, (const char *) part, pos, NULL, queue);
+    }
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit-tag"))
+  {
+    evalStringCache[element] = TransferInstr(ti_lit_tag,
+                                             tags((const char *) element->properties->children->content), 0);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit"))
+  {
+    evalStringCache[element] = TransferInstr(ti_lit, string((char *) element->properties->children->content), 0);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "b"))
+  {
+    if(element->properties == NULL)
+    {
+      evalStringCache[element] = TransferInstr(ti_b, " ", -1);
+    }
+    else
+    {
+      int pos = atoi((const char *) element->properties->children->content) - 1;
+      evalStringCache[element] = TransferInstr(ti_b, "", pos);
+    }
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "get-case-from"))
+  {
+    int pos = atoi((const char *) element->properties->children->content) - 1;
+    xmlNode *param = NULL;
+    for(xmlNode *i = element->children; i != NULL; i = i->next)
+    {
+      if(i->type == XML_ELEMENT_NODE)
+      {
+	param = i;
+	break;
+      }
+    }
+
+    evalStringCache[element] = TransferInstr(ti_get_case_from, "lem", pos, param);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "var"))
+  {
+    evalStringCache[element] = TransferInstr(ti_var, (const char *) element->properties->children->content, 0);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "case-of"))
+  {
+    int pos = 0;
+    xmlChar *part = NULL, *side = NULL;
+
+    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
+      {
+	side = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+	part = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *) i->children->content) - 1;
+      }
+    }
+
+    if(!xmlStrcmp(side, (const xmlChar *) "sl"))
+    {
+      evalStringCache[element] = TransferInstr(ti_case_of_sl, (const char *) part, pos);
+    }
+    else
+    {
+      evalStringCache[element] = TransferInstr(ti_case_of_tl, (const char *) part, pos);
+    }
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "concat"))
+  {
+    string value;
+    for(xmlNode *i = element->children; i != NULL; i = i->next)
+    {
+      if(i->type == XML_ELEMENT_NODE)
+      {
+        value.append(evalString(i));
+      }
+    }
+    return value;
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "lu"))
+  {
+    string myword;
+    for(xmlNode *i = element->children; i != NULL; i = i->next)
+    {
+       if(i->type == XML_ELEMENT_NODE)
+       {
+         myword.append(evalString(i));
+       }
+    }
+
+    if(myword != "")
+    {
+      return "^"+myword+"$";
+    }
+    else
+    {
+      return "";
+    }
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "mlu"))
+  {
+    string value;
+
+    bool first_time = true;
+
+    for(xmlNode *i = element->children; i != NULL; i = i->next)
+    {
+      if(i->type == XML_ELEMENT_NODE)
+      {
+        string myword;
+
+        for(xmlNode *j = i->children; j != NULL; j = j->next)
+        {
+          if(j->type == XML_ELEMENT_NODE)
+	  {
+            myword.append(evalString(j));
+	  }
+        }
+
+	if(!first_time)
+	{
+	  if(myword != "" && myword[0] != '#')  //'+#' problem
+	  {
+	    value.append("+");
+          }
+	}
+	else
+	{
+	  if(myword != "")
+	  {
+	    first_time = false;
+          }
+	}
+
+	value.append(myword);
+      }
+    }
+
+    if(value != "")
+    {
+      return "^"+value+"$";
+    }
+    else
+    {
+      return "";
+    }
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "chunk"))
+  {
+    return processChunk(element);
+  }
+  else
+  {
+    cerr << "Error: unexpected rvalue expression '" << element->name << "'" << endl;
+    exit(EXIT_FAILURE);
+  }
+
+  return evalString(element);
+}
+
+void
+Transfer::processOut(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(defaultAttrs == lu)
+      {
+        if(!xmlStrcmp(i->name, (const xmlChar *) "lu"))
+        {
+  	  string myword;
+	  for(xmlNode *j = i->children; j != NULL; j = j->next)
+	  {
+	    if(j->type == XML_ELEMENT_NODE)
+	    {
+	      myword.append(evalString(j));
+            }
+	  }
+	  if(myword != "")
+	  {
+  	    fputwc_unlocked(L'^', output);
+   	    fputws_unlocked(UtfConverter::fromUtf8(myword).c_str(), output);
+	    fputwc_unlocked(L'$', output);
+          }
+        }
+        else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu"))
+        {
+	  fputwc_unlocked('^', output);
+	  bool first_time = true;
+	  for(xmlNode *j = i->children; j != NULL; j = j->next)
+	  {
+	    if(j->type == XML_ELEMENT_NODE)
+	    {
+              string myword;
+	      for(xmlNode *k = j->children; k != NULL; k = k->next)
+	      {
+	        if(k->type == XML_ELEMENT_NODE)
+	        {
+                  myword.append(evalString(k));
+	        }
+	      }
+
+	      if(!first_time)
+	      {
+	        if(myword != "" && myword[0] != '#')  //'+#' problem
+	        {
+	          fputwc_unlocked(L'+', output);
+                }
+	      }
+	      else
+	      {
+	        if(myword != "")
+	        {
+	          first_time = false;
+                }
+	      }
+	      fputws_unlocked(UtfConverter::fromUtf8(myword).c_str(), output);
+	    }
+	  }
+	  fputwc_unlocked(L'$', output);
+        }
+        else // 'b'
+        {
+          fputws_unlocked(UtfConverter::fromUtf8(evalString(i)).c_str(),
+			  output);
+        }
+      }
+      else
+      {
+        if(!xmlStrcmp(i->name, (const xmlChar *) "chunk"))
+        {
+          fputws_unlocked(UtfConverter::fromUtf8(processChunk(i)).c_str(), output);
+        }
+        else // 'b'
+        {
+          fputws_unlocked(UtfConverter::fromUtf8(evalString(i)).c_str(), output);
+        }
+      }
+    }
+  }
+}
+
+string
+Transfer::processChunk(xmlNode *localroot)
+{
+  string name, namefrom;
+  string caseofchunk = "aa";
+  string result;
+
+
+  for(xmlAttr *i = localroot->properties; i != NULL; i = i->next)
+  {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "name"))
+    {
+      name = (const char *) i->children->content;
+    }
+    else if(!xmlStrcmp(i->name, (const xmlChar *) "namefrom"))
+    {
+      namefrom = (const char *) i->children->content;
+    }
+    else if(!xmlStrcmp(i->name, (const xmlChar *) "case"))
+    {
+      caseofchunk = (const char *) i->children->content;
+    }
+  }
+
+  result.append("^");
+  if(caseofchunk != "")
+  {
+    if(name != "")
+    {
+      result.append(copycase(variables[caseofchunk], name));
+    }
+    else if(namefrom != "")
+    {
+      result.append(copycase(variables[caseofchunk], variables[namefrom]));
+    }
+    else
+    {
+      cerr << "Error: you must specify either 'name' or 'namefrom' for the 'chunk' element" << endl;
+      exit(EXIT_FAILURE);
+    }
+  }
+  else
+  {
+    if(name != "")
+    {
+      result.append(name);
+    }
+    else if(namefrom != "")
+    {
+      result.append(variables[namefrom]);
+    }
+    else
+    {
+      cerr << "Error: you must specify either 'name' or 'namefrom' for the 'chunk' element" << endl;
+      exit(EXIT_FAILURE);
+    }
+  }
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "tags"))
+      {
+        result.append(processTags(i));
+        result.append("{");
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "lu"))
+      {
+        string myword;
+        for(xmlNode *j = i->children; j != NULL; j = j->next)
+        {
+          if(j->type == XML_ELEMENT_NODE)
+          {
+            myword.append(evalString(j));
+          }
+        }
+        if(myword != "")
+        {
+          result.append("^");
+          result.append(myword);
+          result.append("$");
+        }
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu"))
+      {
+        bool first_time = true;
+        string myword;
+        for(xmlNode *j = i->children; j != NULL; j = j->next)
+        {
+          string mylocalword;
+          if(j->type == XML_ELEMENT_NODE)
+          {
+            for(xmlNode *k = j->children; k != NULL; k = k->next)
+            {
+              if(k->type == XML_ELEMENT_NODE)
+              {
+                mylocalword.append(evalString(k));
+              }
+            }
+
+            if(!first_time)
+            {
+              if(mylocalword != "" && mylocalword[0] != '#')  // '+#' problem
+              {
+                myword += '+';
+              }
+            }
+            else
+            {
+              first_time = false;
+            }
+          }
+          myword.append(mylocalword);
+        }
+        if(myword != "")
+        {
+          result.append("^");
+          result.append(myword);
+          result.append("$");
+        }
+      }
+      else // 'b'
+      {
+        result.append(evalString(i));
+      }
+    }
+  }
+  result.append("}$");
+  return result;
+}
+
+string
+Transfer::processTags(xmlNode *localroot)
+{
+  string result;
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(!xmlStrcmp(i->name, (xmlChar const *) "tag"))
+      {
+        for(xmlNode *j = i->children; j != NULL; j = j->next)
+        {
+          if(j->type == XML_ELEMENT_NODE)
+          {
+            result.append(evalString(j));
+          }
+        }
+      }
+    }
+  }
+  return result;
+}
+
+int
+Transfer::processInstruction(xmlNode *localroot)
+{
+  int words_to_consume = -1;
+  if(!xmlStrcmp(localroot->name, (const xmlChar *) "choose"))
+  {
+    words_to_consume = processChoose(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "let"))
+  {
+    processLet(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "append"))
+  {
+    processAppend(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "out"))
+  {
+    processOut(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "call-macro"))
+  {
+    processCallMacro(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "modify-case"))
+  {
+    processModifyCase(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "reject-current-rule"))
+  {
+    words_to_consume = processRejectCurrentRule(localroot);
+  }
+  return words_to_consume;
+}
+
+int
+Transfer::processRejectCurrentRule(xmlNode *localroot)
+{
+  bool shifting = true;
+  string value;
+  for(xmlAttr *i = localroot->properties; i != NULL; i = i->next)
+  {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "shifting"))
+    {
+      value = (char *) i->children->content;
+      break;
+    }
+  }
+
+  if(value == "no")
+  {
+    shifting = false;
+  }
+
+  return shifting ? 1 : 0;
+}
+
+void
+Transfer::processLet(xmlNode *localroot)
+{
+  xmlNode *leftSide = NULL, *rightSide = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(leftSide == NULL)
+      {
+	leftSide = i;
+      }
+      else
+      {
+	rightSide = i;
+	break;
+      }
+    }
+  }
+
+  map<xmlNode *, TransferInstr>::iterator it = evalStringCache.find(leftSide);
+  if(it != evalStringCache.end())
+  {
+    TransferInstr &ti = it->second;
+    switch(ti.getType())
+    {
+      case ti_var:
+        variables[ti.getContent()] = evalString(rightSide);
+        return;
+
+      case ti_clip_sl:
+        word[ti.getPos()]->setSource(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition());
+        return;
+
+      case ti_clip_tl:
+        word[ti.getPos()]->setTarget(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition());
+        return;
+
+      default:
+        return;
+    }
+  }
+  if(leftSide->name != NULL && !xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
+  {
+    string const val = (const char *) leftSide->properties->children->content;
+    variables[val] = evalString(rightSide);
+    evalStringCache[leftSide] = TransferInstr(ti_var, val, 0);
+  }
+  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
+  {
+    int pos = 0;
+    xmlChar *part = NULL, *side = NULL, *as = NULL;
+    bool queue = true;
+
+    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
+      {
+	side = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+	part = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *) i->children->content) - 1;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "queue"))
+      {
+        if(!xmlStrcmp(i->children->content, (const xmlChar *) "no"))
+        {
+          queue = false;
+        }
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to"))
+      {
+        as = i->children->content;
+      }
+    }
+
+    if(!xmlStrcmp(side, (const xmlChar *) "tl"))
+    {
+      word[pos]->setTarget(attr_items[(const char *) part], evalString(rightSide), queue);
+      evalStringCache[leftSide] = TransferInstr(ti_clip_tl, (const char *) part, pos, NULL, queue);
+    }
+    else
+    {
+      word[pos]->setSource(attr_items[(const char *) part], evalString(rightSide), queue);
+      evalStringCache[leftSide] = TransferInstr(ti_clip_sl, (const char *) part, pos, NULL, queue);
+    }
+  }
+}
+
+void
+Transfer::processAppend(xmlNode *localroot)
+{
+  string name;
+  for(xmlAttr *i = localroot->properties; i != NULL; i = i->next)
+  {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "n"))
+    {
+      name = (char *) i->children->content;
+      break;
+    }
+  }
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      variables[name].append(evalString(i));
+    }
+  }
+}
+
+void
+Transfer::processModifyCase(xmlNode *localroot)
+{
+  xmlNode *leftSide = NULL, *rightSide = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(leftSide == NULL)
+      {
+	leftSide = i;
+      }
+      else
+      {
+	rightSide = i;
+	break;
+      }
+    }
+  }
+
+  if(leftSide->name != NULL && !xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
+  {
+    int pos = 0;
+    xmlChar *part = NULL, *side = NULL, *as = NULL;
+    bool queue = true;
+
+    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
+      {
+	side = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+	part = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *) i->children->content) - 1;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "queue"))
+      {
+        if(!xmlStrcmp(i->children->content, (xmlChar const *) "no"))
+        {
+          queue = false;
+        }
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to"))
+      {
+        as = i->children->content;
+        (void)as; // ToDo, remove "as" and the whole else?
+      }
+    }
+    if(!xmlStrcmp(side, (const xmlChar *) "sl"))
+    {
+      string const result = copycase(evalString(rightSide),
+				      word[pos]->source(attr_items[(const char *) part], queue));
+      word[pos]->setSource(attr_items[(const char *) part], result);
+    }
+    else
+    {
+      string const result = copycase(evalString(rightSide),
+				     word[pos]->target(attr_items[(const char *) part], queue));
+      word[pos]->setTarget(attr_items[(const char *) part], result);
+    }
+  }
+  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
+  {
+    string const val = (const char *) leftSide->properties->children->content;
+    variables[val] = copycase(evalString(rightSide), variables[val]);
+  }
+}
+
+void
+Transfer::processCallMacro(xmlNode *localroot)
+{
+  string const n = (const char *) localroot->properties->children->content;
+  int npar = 0;
+
+  xmlNode *macro = macro_map[macros[n]];
+
+  for(xmlAttr *i = macro->properties; i != NULL; i = i->next)
+  {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "npar"))
+    {
+      npar = atoi((const char *) i->children->content);
+      break;
+    }
+  }
+  
+  // ToDo: Is it at all valid if npar <= 0 ?
+
+  TransferWord **myword = NULL;
+  if(npar > 0)
+  {
+    myword = new TransferWord *[npar];
+  }
+  string **myblank = NULL;
+  if(npar > 0)
+  {
+    myblank = new string *[npar];
+    myblank[npar-1] = &emptyblank;
+  }
+
+  int idx = 0;
+  int lastpos = 0;
+  for(xmlNode *i = localroot->children; npar && i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      int pos = atoi((const char *) i->properties->children->content)-1;
+      myword[idx] = word[pos];
+      if(idx-1 >= 0)
+      {
+        myblank[idx-1] = blank[lastpos];
+      }
+      idx++;
+      lastpos = pos;
+    }
+  }
+
+  swap(myword, word);
+  swap(myblank, blank);
+  swap(npar, lword);
+
+  for(xmlNode *i = macro->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      processInstruction(i);
+    }
+  }
+
+  swap(myword, word);
+  swap(myblank, blank);
+  swap(npar, lword);
+  
+  delete[] myword;
+  delete[] myblank;
+}
+
+int
+Transfer::processChoose(xmlNode *localroot)
+{
+  int words_to_consume = -1;
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "when"))
+      {
+        bool picked_option = false;
+
+	for(xmlNode *j = i->children; j != NULL; j = j->next)
+	{
+	  if(j->type == XML_ELEMENT_NODE)
+	  {
+	    if(!xmlStrcmp(j->name, (const xmlChar *) "test"))
+	    {
+	      if(!processTest(j))
+	      {
+		break;
+	      }
+	      else
+	      {
+	        picked_option = true;
+              }
+	    }
+	    else
+	    {
+              words_to_consume = processInstruction(j);
+              if(words_to_consume != -1)
+              {
+                return words_to_consume;
+              }
+	    }
+	  }
+	}
+        if(picked_option)
+        {
+          return words_to_consume;
+        }
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "otherwise"))
+      {
+	for(xmlNode *j = i->children; j != NULL; j = j->next)
+	{
+	  if(j->type == XML_ELEMENT_NODE)
+	  {
+            words_to_consume = processInstruction(j);
+            if(words_to_consume != -1)
+            {
+              return words_to_consume;
+            }
+          }
+        }
+      }
+    }
+  }
+  return words_to_consume;
+}
+
+bool
+Transfer::processLogical(xmlNode *localroot)
+{
+  if(!xmlStrcmp(localroot->name, (const xmlChar *) "equal"))
+  {
+    return processEqual(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with"))
+  {
+    return processBeginsWith(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with-list"))
+  {
+    return processBeginsWithList(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with"))
+  {
+    return processEndsWith(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with-list"))
+  {
+    return processEndsWithList(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "contains-substring"))
+  {
+    return processContainsSubstring(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "or"))
+  {
+    return processOr(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "and"))
+  {
+    return processAnd(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "not"))
+  {
+    return processNot(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "in"))
+  {
+    return processIn(localroot);
+  }
+
+  return false;
+}
+
+bool
+Transfer::processIn(xmlNode *localroot)
+{
+  xmlNode *value = NULL;
+  xmlChar *idlist = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(value == NULL)
+      {
+        value = i;
+      }
+      else
+      {
+        idlist = i->properties->children->content;
+        break;
+      }
+    }
+  }
+
+  string sval = evalString(value);
+
+  if(localroot->properties != NULL)
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      set<string, Ltstr> &myset = listslow[(const char *) idlist];
+      if(myset.find(tolower(sval)) != myset.end())
+      {
+	return true;
+      }
+      else
+      {
+	return false;
+      }
+    }
+  }
+
+  set<string, Ltstr> &myset = lists[(const char *) idlist];
+  if(myset.find(sval) != myset.end())
+  {
+    return true;
+  }
+  else
+  {
+    return false;
+  }
+}
+
+bool
+Transfer::processTest(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      return processLogical(i);
+    }
+  }
+  return false;
+}
+
+bool
+Transfer::processAnd(xmlNode *localroot)
+{
+  bool val = true;
+  for(xmlNode *i = localroot->children; val && i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      val = val && processLogical(i);
+    }
+  }
+
+  return val;
+}
+
+bool
+Transfer::processOr(xmlNode *localroot)
+{
+  bool val = false;
+  for(xmlNode *i = localroot->children; !val && i != NULL ; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      val = val || processLogical(i);
+    }
+  }
+
+  return val;
+}
+
+bool
+Transfer::processNot(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      return !processLogical(i);
+    }
+  }
+  return false;
+}
+
+bool
+Transfer::processEqual(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  if(localroot->properties == NULL)
+  {
+    return evalString(first) == evalString(second);
+  }
+  else
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      return tolower(evalString(first)) == tolower(evalString(second));
+    }
+    else
+    {
+      return evalString(first) == evalString(second);
+    }
+  }
+}
+
+bool
+Transfer::beginsWith(string const &s1, string const &s2) const
+{
+  int const limit = s2.size(), constraint = s1.size();
+
+  if(constraint < limit)
+  {
+    return false;
+  }
+  for(int i = 0; i != limit; i++)
+  {
+    if(s1[i] != s2[i])
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool
+Transfer::endsWith(string const &s1, string const &s2) const
+{
+  int const limit = s2.size(), constraint = s1.size();
+
+  if(constraint < limit)
+  {
+    return false;
+  }
+  for(int i = limit-1, j = constraint - 1; i >= 0; i--, j--)
+  {
+    if(s1[j] != s2[i])
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+
+bool
+Transfer::processBeginsWith(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  if(localroot->properties == NULL)
+  {
+    return beginsWith(evalString(first), evalString(second));
+  }
+  else
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      return beginsWith(tolower(evalString(first)), tolower(evalString(second)));
+    }
+    else
+    {
+      return beginsWith(evalString(first), evalString(second));
+    }
+  }
+}
+
+bool
+Transfer::processEndsWith(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  if(localroot->properties == NULL)
+  {
+    return endsWith(evalString(first), evalString(second));
+  }
+  else
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      return endsWith(tolower(evalString(first)), tolower(evalString(second)));
+    }
+    else
+    {
+      return endsWith(evalString(first), evalString(second));
+    }
+  }
+}
+
+bool
+Transfer::processBeginsWithList(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  xmlChar *idlist = second->properties->children->content;
+  string needle = evalString(first);
+  set<string, Ltstr>::iterator it, limit;
+
+  if(localroot->properties == NULL ||
+     xmlStrcmp(localroot->properties->children->content, (const xmlChar *) "yes"))
+  {
+    it = lists[(const char *) idlist].begin();
+    limit = lists[(const char *) idlist].end();
+  }
+  else
+  {
+    needle = tolower(needle);
+    it = listslow[(const char *) idlist].begin();
+    limit = listslow[(const char *) idlist].end();
+  }
+
+  for(; it != limit; it++)
+  {
+    if(beginsWith(needle, *it))
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+
+bool
+Transfer::processEndsWithList(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  xmlChar *idlist = second->properties->children->content;
+  string needle = evalString(first);
+  set<string, Ltstr>::iterator it, limit;
+
+  if(localroot->properties == NULL ||
+     xmlStrcmp(localroot->properties->children->content, (const xmlChar *) "yes"))
+  {
+    it = lists[(const char *) idlist].begin();
+    limit = lists[(const char *) idlist].end();
+  }
+  else
+  {
+    needle = tolower(needle);
+    it = listslow[(const char *) idlist].begin();
+    limit = listslow[(const char *) idlist].end();
+  }
+
+  for(; it != limit; it++)
+  {
+    if(endsWith(needle, *it))
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool
+Transfer::processContainsSubstring(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  if(localroot->properties == NULL)
+  {
+    return evalString(first).find(evalString(second)) != string::npos;
+  }
+  else
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      return tolower(evalString(first)).find(tolower(evalString(second))) != string::npos;
+    }
+    else
+    {
+      return evalString(first).find(evalString(second)) != string::npos;
+    }
+  }
+}
+
+string
+Transfer::copycase(string const &source_word, string const &target_word)
+{
+  wstring result;
+  wstring const s_word = UtfConverter::fromUtf8(source_word);
+  wstring const t_word = UtfConverter::fromUtf8(target_word);
+
+  bool firstupper = iswupper(s_word[0]);
+  bool uppercase = firstupper && iswupper(s_word[s_word.size()-1]);
+  bool sizeone = s_word.size() == 1;
+
+  if(!uppercase || (sizeone && uppercase))
+  {
+    result = t_word;
+    result[0] = towlower(result[0]);
+    //result = StringUtils::tolower(t_word);
+  }
+  else
+  {
+    result = StringUtils::toupper(t_word);
+  }
+
+  if(firstupper)
+  {
+    result[0] = towupper(result[0]);
+  }
+
+  return UtfConverter::toUtf8(result);
+}
+
+string
+Transfer::caseOf(string const &str)
+{
+  wstring const s = UtfConverter::fromUtf8(str);
+
+  if(s.size() > 1)
+  {
+    if(!iswupper(s[0]))
+    {
+      return "aa";
+    }
+    else if(!iswupper(s[s.size()-1]))
+    {
+      return "Aa";
+    }
+    else
+    {
+      return "AA";
+    }
+  }
+  else if(s.size() == 1)
+  {
+    if(!iswupper(s[0]))
+    {
+      return "aa";
+    }
+    else
+    {
+      return "Aa";
+    }
+  }
+  else
+  {
+    return "aa";
+  }
+}
+
+string
+Transfer::tolower(string const &str) const
+{
+  return UtfConverter::toUtf8(StringUtils::tolower(UtfConverter::fromUtf8(str)));
+}
+
+string
+Transfer::tags(string const &str) const
+{
+  string result = "<";
+
+  for(unsigned int i = 0, limit = str.size(); i != limit; i++)
+  {
+    if(str[i] == '.')
+    {
+      result.append("><");
+    }
+    else
+    {
+      result += str[i];
+    }
+  }
+
+  result += '>';
+
+  return result;
+}
+
+int
+Transfer::processRule(xmlNode *localroot)
+{
+  int instruction_return, words_to_consume = -1;
+  // localroot is suposed to be an 'action' tag
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      instruction_return = processInstruction(i);
+      // When an instruction which modifies the number of words to be consumed
+      // from the input is found, execution of the rule is stopped
+      if(instruction_return != -1)
+      {
+        words_to_consume = instruction_return;
+        break;
+      }
+    }
+  }
+  return words_to_consume;
+}
+
+TransferToken &
+Transfer::readToken(FILE *in)
+{
+  if(!input_buffer.isEmpty())
+  {
+    return input_buffer.next();
+  }
+
+  wstring content;
+  while(true)
+  {
+    int val = fgetwc_unlocked(in);
+    if(feof(in) || (val == 0 && internal_null_flush))
+    {
+      return input_buffer.add(TransferToken(content, tt_eof));
+    }
+    if(val == '\\')
+    {
+      content += L'\\';
+      content += (wchar_t) fgetwc_unlocked(in);
+    }
+    else if(val == L'[')
+    {
+      content += L'[';
+      while(true)
+      {
+	int val2 = fgetwc_unlocked(in);
+	if(val2 == L'\\')
+	{
+	  content += L'\\';
+	  content += wchar_t(fgetwc_unlocked(in));
+	}
+	else if(val2 == L']')
+	{
+	  content += L']';
+	  break;
+	}
+	else
+	{
+	  content += wchar_t(val2);
+	}
+      }
+    }
+    else if(val == L'$')
+    {
+      return input_buffer.add(TransferToken(content, tt_word));
+    }
+    else if(val == L'^')
+    {
+      return input_buffer.add(TransferToken(content, tt_blank));
+    }
+    else if(val == L'\0' && null_flush)
+    {
+      fflush(output);
+    }
+    else
+    {
+      content += wchar_t(val);
+    }
+  }
+}
+
+bool
+Transfer::getNullFlush(void)
+{
+  return null_flush;
+}
+
+void
+Transfer::setNullFlush(bool null_flush)
+{
+  this->null_flush = null_flush;
+}
+
+void
+Transfer::setTrace(bool trace)
+{
+  this->trace = trace;
+}
+
+void
+Transfer::setTraceATT(bool trace)
+{
+  this->trace_att = trace;
+}
+
+void
+Transfer::transfer_wrapper_null_flush(FILE *in, FILE *out)
+{
+  null_flush = false;
+  internal_null_flush = true;
+
+  while(!feof(in))
+  {
+    transfer(in, out);
+    fputwc_unlocked(L'\0', out);
+    int code = fflush(out);
+    if(code != 0)
+    {
+      wcerr << L"Could not flush output " << errno << endl;
+    }
+  }
+
+  internal_null_flush = false;
+  null_flush = true;
+}
+
+void
+Transfer::transfer(FILE *in, FILE *out)
+{
+  if(getNullFlush())
+  {
+    transfer_wrapper_null_flush(in, out);
+  }
+
+  int last = 0;
+  int prev_last = 0;
+  int lastrule_id = -1;
+  set<int> banned_rules;
+
+  output = out;
+  ms.init(me->getInitial());
+
+  while(true)
+  {
+    if(trace_att)
+    {
+      cerr << "Loop start " << endl;
+      cerr << "ms.size: " << ms.size() << endl;
+
+      cerr << "tmpword.size(): " << tmpword.size() << endl;
+      for (unsigned int ind = 0; ind < tmpword.size(); ind++)
+      {
+        if(ind != 0)
+        {
+          wcerr << L" ";
+        }
+        wcerr << *tmpword[ind];
+      }
+      wcerr << endl;
+
+      cerr << "tmpblank.size(): " << tmpblank.size() << endl;
+      for (unsigned int ind = 0; ind < tmpblank.size(); ind++)
+      {
+        wcerr << L"'";
+        wcerr << *tmpblank[ind];
+        wcerr << L"' ";
+      }
+      wcerr << endl;
+
+      cerr << "last: " << last << endl;
+      cerr << "prev_last: " << prev_last << endl << endl;
+    }
+
+    if(ms.size() == 0)
+    {
+      if(lastrule != NULL)
+      {
+        int num_words_to_consume = applyRule();
+
+        if(trace_att)
+        {
+          cerr << "num_words_to_consume: " << num_words_to_consume << endl;
+        }
+
+        //Consume all the words from the input which matched the rule.
+        //This piece of code is executed unless the rule contains a "reject-current-rule" instruction
+        if(num_words_to_consume < 0)
+        {
+          banned_rules.clear();
+          input_buffer.setPos(last);
+        }
+        else if(num_words_to_consume > 0)
+        {
+          banned_rules.clear();
+          if(prev_last >= input_buffer.getSize())
+          {
+            input_buffer.setPos(0);
+          }
+          else
+          {
+            input_buffer.setPos(prev_last+1);
+          }
+          int num_consumed_words = 0;
+          while(num_consumed_words < num_words_to_consume)
+          {
+            TransferToken& local_tt = input_buffer.next();
+            if (local_tt.getType() == tt_word)
+            {
+              num_consumed_words++;
+            }
+          }
+        }
+        else
+        {
+          //Add rule to banned rules
+          banned_rules.insert(lastrule_id);
+          input_buffer.setPos(prev_last);
+          input_buffer.next();
+          last = input_buffer.getPos();
+        }
+        lastrule_id = -1;
+      }
+      else
+      {
+        if(tmpword.size() != 0)
+        {
+          if(trace_att)
+          {
+            cerr << "printing tmpword[0]" <<endl;
+          }
+
+          pair<wstring, int> tr;
+          if(useBilingual && preBilingual == false)
+          {
+	    if(isExtended && (*tmpword[0])[0] == L'*')
+	    {
+	      tr = extended.biltransWithQueue((*tmpword[0]).substr(1), false);
+              if(tr.first[0] == L'@')
+              {
+                tr.first[0] = L'*';
+              }
+              else
+              {
+                tr.first = L"%" + tr.first;
+              }
+            }
+            else
+            {
+	      tr = fstp.biltransWithQueue(*tmpword[0], false);
+            }
+          }
+          else if(preBilingual)
+          {
+            wstring sl;
+            wstring tl;
+            int seenSlash = 0;
+            for(wstring::const_iterator it = tmpword[0]->begin(); it != tmpword[0]->end(); it++)
+            {
+              if(*it == L'\\')
+              {
+                if(seenSlash == 0)
+                {
+                  sl.push_back(*it);
+                  it++;
+                  sl.push_back(*it);
+                }
+                else
+                {
+                  tl.push_back(*it);
+                  it++;
+                  tl.push_back(*it);
+                }
+                continue;
+              }
+              else if(*it == L'/')
+              {
+                seenSlash++;
+                continue;
+              }
+              if(seenSlash == 0)
+              {
+                sl.push_back(*it);
+              }
+              else if(seenSlash == 1)
+              {
+                tl.push_back(*it);
+              }
+              else if(seenSlash > 1)
+              {
+                break;
+              }
+            }
+            //tmpword[0]->assign(sl);
+            tr = pair<wstring, int>(tl, false);
+            //wcerr << L"pb: " << *tmpword[0] << L" :: " << sl << L" >> " << tl << endl ;
+          }
+          else
+          {
+            tr = pair<wstring, int>(*tmpword[0], 0);
+          }
+
+	  if(tr.first.size() != 0)
+	  {
+	    if(defaultAttrs == lu)
+	    {
+	      fputwc_unlocked(L'^', output);
+	      fputws_unlocked(tr.first.c_str(), output);
+	      fputwc_unlocked(L'$', output);
+            }
+            else
+            {
+              if(tr.first[0] == '*')
+              {
+                fputws_unlocked(L"^unknown<unknown>{^", output);
+              }
+              else
+              {
+	        fputws_unlocked(L"^default<default>{^", output);
+              }
+	      fputws_unlocked(tr.first.c_str(), output);
+	      fputws_unlocked(L"$}$", output);
+            }
+	  }
+	  banned_rules.clear();
+	  tmpword.clear();
+	  input_buffer.setPos(last);
+	  input_buffer.next();
+	  prev_last = last;
+	  last = input_buffer.getPos();
+	  ms.init(me->getInitial());
+	}
+	else if(tmpblank.size() != 0)
+	{
+          if(trace_att)
+          {
+            cerr << "printing tmpblank[0]" <<endl;
+          }
+          fputws_unlocked(tmpblank[0]->c_str(), output);
+          tmpblank.clear();
+          prev_last = last;
+          last = input_buffer.getPos();
+          ms.init(me->getInitial());
+	}
+      }
+    }
+    int val = ms.classifyFinals(me->getFinals(), banned_rules);
+    if(val != -1)
+    {
+      lastrule = rule_map[val-1];
+      lastrule_id = val;
+      last = input_buffer.getPos();
+
+      if(trace)
+      {
+        wcerr << endl << L"apertium-transfer: Rule " << val << L" ";
+        for (unsigned int ind = 0; ind < tmpword.size(); ind++)
+        {
+          if (ind != 0)
+          {
+            wcerr << L" ";
+          }
+          wcerr << *tmpword[ind];
+        }
+        wcerr << endl;
+      }
+    }
+
+    TransferToken &current = readToken(in);
+
+    switch(current.getType())
+    {
+      case tt_word:
+	applyWord(current.getContent());
+        tmpword.push_back(&current.getContent());
+	break;
+
+      case tt_blank:
+	ms.step(L' ');
+	tmpblank.push_back(&current.getContent());
+	break;
+
+      case tt_eof:
+	if(tmpword.size() != 0)
+	{
+	  tmpblank.push_back(&current.getContent());
+	  ms.clear();
+	}
+	else
+	{
+	  fputws_unlocked(current.getContent().c_str(), output);
+	  return;
+	}
+	break;
+
+      default:
+	cerr << "Error: Unknown input token." << endl;
+	return;
+    }
+  }
+}
+
+int
+Transfer::applyRule()
+{
+  int words_to_consume;
+  unsigned int limit = tmpword.size();
+  //wcerr << L"applyRule: " << tmpword.size() << endl;
+
+  for(unsigned int i = 0; i != limit; i++)
+  {
+    if(i == 0)
+    {
+      word = new TransferWord *[limit];
+      lword = limit;
+      if(limit != 1)
+      {
+        blank = new string *[limit - 1];
+        lblank = limit - 1;
+      }
+      else
+      {
+        blank = NULL;
+        lblank = 0;
+      }
+    }
+    else
+    {
+      blank[i-1] = new string(UtfConverter::toUtf8(*tmpblank[i-1]));
+    }
+
+    pair<wstring, int> tr;
+    if(useBilingual && preBilingual == false)
+    {
+      tr = fstp.biltransWithQueue(*tmpword[i], false);
+    }
+    else if(preBilingual)
+    {
+      //wcerr << "applyRule: " << *tmpword[i] << endl;
+      wstring sl;
+      wstring tl;
+      int seenSlash = 0;
+      for(wstring::const_iterator it = tmpword[i]->begin(); it != tmpword[i]->end(); it++)
+      {
+        if(*it == L'\\')
+        {
+          if(seenSlash == 0)
+          {
+            sl.push_back(*it);
+            it++;
+            sl.push_back(*it);
+          }
+          else
+          {
+            tl.push_back(*it);
+            it++;
+            tl.push_back(*it);
+          }
+          continue;
+        }
+
+        if(*it == L'/')
+        {
+          seenSlash++;
+          continue;
+        }
+        if(seenSlash == 0)
+        {
+          sl.push_back(*it);
+        }
+        else if(seenSlash == 1)
+        {
+          tl.push_back(*it);
+        }
+        else if(seenSlash > 1)
+        {
+          break;
+        }
+      }
+      //tmpword[i]->assign(sl);
+      tr = pair<wstring, int>(tl, false);
+    }
+    else
+    {
+      tr = pair<wstring, int>(*tmpword[i], false);
+    }
+
+    word[i] = new TransferWord(UtfConverter::toUtf8(*tmpword[i]),
+			       UtfConverter::toUtf8(tr.first), tr.second);
+  }
+
+  words_to_consume = processRule(lastrule);
+  lastrule = NULL;
+
+  if(word)
+  {
+    for(unsigned int i = 0; i != limit; i++)
+    {
+      delete word[i];
+    }
+    delete[] word;
+  }
+  if(blank)
+  {
+    for(unsigned int i = 0; i != limit - 1; i++)
+    {
+      delete blank[i];
+    }
+    delete[] blank;
+  }
+  word = NULL;
+  blank = NULL;
+  tmpword.clear();
+  tmpblank.clear();
+  ms.init(me->getInitial());
+  return words_to_consume;
+}
+
+/* HERE */
+void
+Transfer::applyWord(wstring const &word_str)
+{
+  ms.step(L'^');
+  for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
+  {
+    switch(word_str[i])
+    {
+      case L'\\':
+        i++;
+	ms.step(towlower(word_str[i]), any_char);
+	break;
+
+      case L'/':
+        i = limit;
+        break;
+
+      case L'<':
+	for(unsigned int j = i+1; j != limit; j++)
+	{
+	  if(word_str[j] == L'>')
+	  {
+	    int symbol = alphabet(word_str.substr(i, j-i+1));
+	    if(symbol)
+	    {
+	      ms.step(symbol, any_tag);
+	    }
+	    else
+	    {
+	      ms.step(any_tag);
+	    }
+	    i = j;
+	    break;
+	  }
+	}
+	break;
+
+      default:
+	ms.step(towlower(word_str[i]), any_char);
+	break;
+    }
+  }
+  ms.step(L'$');
+}
+
+void
+Transfer::setPreBilingual(bool value)
+{
+  preBilingual = value;
+}
+
+bool
+Transfer::getPreBilingual(void) const
+{
+  return preBilingual;
+}
+
+void
+Transfer::setUseBilingual(bool value)
+{
+  useBilingual = value;
+}
+
+bool
+Transfer::getUseBilingual(void) const
+{
+  return useBilingual;
+}
+
+void
+Transfer::setCaseSensitiveness(bool value)
+{
+  fstp.setCaseSensitiveMode(value);
+}
Index: branches/apertium-tagger/apertium2/apertium/transfer.dtd
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer.dtd	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer.dtd	(revision 69632)
@@ -0,0 +1,489 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!-- 
+   Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+  
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+  
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+  
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+    Draft of DTD for the structural transfer rule files 
+     
+    Sergio Ortiz, Gema Ram�rez-S�nchez, Mireia Ginest�, Mikel L. Forcada, 
+    2005.07.29. 
+-->    
+
+<!ENTITY % condition "(and|or|not|equal|begins-with|begins-with-list|ends-with|ends-with-list|contains-substring|in)">
+<!ENTITY % container "(var|clip)">
+<!ENTITY % sentence "(let|out|choose|modify-case|call-macro|append|reject-current-rule)">
+<!ENTITY % value "(b|clip|lit|lit-tag|var|get-case-from|case-of|concat|lu|mlu|chunk)">
+<!ENTITY % stringvalue "(clip|lit|var|get-case-from|case-of)">
+
+<!ELEMENT transfer (section-def-cats, section-def-attrs?, section-def-vars?, section-def-lists?, section-def-macros?, section-rules)>
+<!ATTLIST transfer default (lu|chunk) #IMPLIED>
+<!-- 
+     'transfer' is the root element containing the whole structural
+     transfer rule file.  Attribute 'default' specifies if
+     unmatched words have to be written as lexical units ("lu", this is
+     the default value) or as chunks ("chunk").
+-->
+
+<!ELEMENT section-def-cats (def-cat+)>
+<!-- 
+     The 'def-cats' section defines the categories used to build the
+patterns used in rules
+ -->
+
+<!ELEMENT def-cat (cat-item+)>
+<!ATTLIST def-cat  n ID #REQUIRED
+                   c CDATA #IMPLIED>
+<!-- 
+     Each 'def-cat' defines one category in terms of a list of
+     category items and has a unique name 'n', which is mandatory
+-->
+
+<!ELEMENT cat-item EMPTY>
+<!ATTLIST cat-item lemma CDATA #IMPLIED 
+                   tags CDATA #REQUIRED 
+                   c CDATA #IMPLIED> 
+<!-- 
+          Each 'cat-item' (category item) represents a set of lexical forms
+     and has a mandatory attribute 'tags' whose value is a sequence of
+     dot-separated tag names; this sequence is a subsequence of the
+     tag sequence defining each possible lexical form. For example,
+     tags="n.f" would match all lexical forms containing this tag
+     sequence, such as "^casa<n><f><pl>$".
+
+     In addition, an optional attribute, "lemma", may be used to
+     define lexical forms having a particular substring in their lemma
+-->
+ 
+<!ELEMENT section-def-attrs (def-attr+)>
+
+<!-- 
+     The 'def-attrs' section defines the attributes that will be
+     identified in matched lexical forms 
+-->
+
+<!ELEMENT def-attr (attr-item+)>
+<!ATTLIST def-attr n ID #REQUIRED
+                   c CDATA #IMPLIED>
+<!-- 
+     Each def-attr defines one attribute in terms of a list of
+     attribute items and has a mandatory unique name n 
+-->
+
+<!ELEMENT attr-item EMPTY>
+<!ATTLIST attr-item tags CDATA #IMPLIED
+                    c CDATA #IMPLIED>
+<!-- 
+     Each 'attr-item' specifies a subsequence of the tags in
+     that lexical form (attribute 'tags')
+-->
+
+<!ELEMENT section-def-vars (def-var+)>
+<!-- 
+     The 'def-vars' section defines the global variables
+     that will be used to transfer information between rules
+-->
+
+<!ELEMENT def-var EMPTY>
+<!ATTLIST def-var n ID #REQUIRED
+                  v CDATA #IMPLIED
+                  c CDATA #IMPLIED>
+<!-- 
+     The definition of a global variable has a mandatory unique name 'n' that
+     will be used to refer to it. A value of initialization can also be specified
+     by means the 'v' attribute.  The default value of the initialization is the
+     empty string.
+-->
+
+<!ELEMENT section-def-lists (def-list)+>
+<!--
+     Element 'section-def-lists' encloses a set of list definitions
+-->
+
+<!ELEMENT def-list (list-item+)>
+<!ATTLIST def-list n ID #REQUIRED
+                   c CDATA #IMPLIED>
+<!--
+     The 'def-list' element defines a named list to search with the 'in' 
+     element.  Attribute 'n' sets the name of the list
+-->
+
+<!ELEMENT list-item EMPTY>
+<!ATTLIST list-item v CDATA #REQUIRED
+                    c CDATA #IMPLIED>
+<!--
+     Attribute 'v' of 'list-item' element contains the value to be added to 
+     the list being defined     
+-->
+
+<!ELEMENT section-def-macros (def-macro)+>
+<!-- 
+
+     The 'def-macros' section defines macros containing portions of
+     code frequently used in the action part of rules
+
+-->
+
+<!ELEMENT def-macro (%sentence;)+>
+<!ATTLIST def-macro n ID #REQUIRED>
+<!ATTLIST def-macro npar CDATA #REQUIRED
+                    c CDATA #IMPLIED>
+<!-- 
+     Macro definition:
+     
+     A macro has a mandatory name (the value of 'n'), a number of parameters
+     (the value of 'npar') and a body containing arguments and statements.  
+-->
+
+<!ELEMENT section-rules (rule+)>
+<!-- 
+     The rules section contains a sequence of one or more rules
+-->
+
+<!ELEMENT rule (pattern, action)>
+<!ATTLIST rule comment CDATA #IMPLIED>
+<!-- 
+      Each rule has a pattern and an action 
+      * attribute 'comment' allows to put in comments about the purpose of
+        the rule being defined
+-->
+
+<!ELEMENT pattern (pattern-item+)>
+<!-- 
+The pattern is specified in terms of pattern items, each one
+representing a lexical form in the matched pattern 
+-->
+
+<!ELEMENT pattern-item EMPTY>
+<!ATTLIST pattern-item n IDREF #REQUIRED>
+<!-- 
+       Each attribute to be activated is referred to by its name in the def-cats section 
+-->
+
+<!ELEMENT action (%sentence;)*>
+<!ATTLIST action c CDATA #IMPLIED>
+<!-- 
+       Encloses the procedural part of a rule
+-->
+
+<!ELEMENT choose (when+,otherwise?)>
+<!ATTLIST choose c CDATA #IMPLIED>
+<!-- 
+     The choose statement is a selection statement (similar to a case
+     statement) composed of one or more tested cases and an optional
+     otherwise 
+-->
+
+<!ELEMENT when (test,(%sentence;)*)>
+<!ATTLIST when c CDATA #IMPLIED>
+<!-- 
+     Each tested case is a block of zero or more statements 
+-->
+
+<!ELEMENT otherwise (%sentence;)+>
+<!ATTLIST otherwise c CDATA #IMPLIED>
+<!-- 
+     The otherwise case is also a block of one or more statements 
+-->
+
+<!ELEMENT test (%condition;)>
+<!ATTLIST test c CDATA #IMPLIED>
+<!-- 
+     The test in a tested case may be a conjunction, a disjunction, or
+     a negation of simpler tests, as well as a simple equality test
+-->
+
+<!ELEMENT and ((%condition;),(%condition;)+)>
+<!--  
+     Each conjuntion test contains two or more simpler tests 
+-->
+
+<!ELEMENT or ((%condition;),(%condition;)+)>
+<!-- 
+     Each disjunction test contains two or more simpler tests 
+-->
+
+<!ELEMENT not (%condition;)>
+<!-- 
+     The negation of a simpler test is a test itself 
+-->
+
+<!ELEMENT equal (%value;,%value;)> 
+<!ATTLIST equal caseless (no|yes) #IMPLIED>
+<!-- 
+      The simplest test is an equality test. The right part and the
+      left part of the equality may both be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+<!ELEMENT begins-with (%value;,%value;)> 
+<!ATTLIST begins-with caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part at the beginning.
+      Both parts of the test may both be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+<!ELEMENT ends-with (%value;,%value;)> 
+<!ATTLIST ends-with caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part at the end.
+      Both parts of the test may both be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+<!ELEMENT begins-with-list (%value;,list)> 
+<!ATTLIST begins-with-list caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part at the beginning.
+      First parts of the test may be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section. The second part
+      must be always a list.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+
+<!ELEMENT ends-with-list (%value;,list)> 
+<!ATTLIST ends-with-list caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part at the end.
+      First parts of the test may be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section. The second part
+      must be always a list.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+
+<!ELEMENT contains-substring (%value;,%value;)> 
+<!ATTLIST contains-substring caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part.
+      Both parts of the test may both be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+
+
+
+<!ELEMENT in (%value;, list)>
+<!ATTLIST in caseless (no|yes) #IMPLIED>
+<!--
+    'in' performs a search of a value in a list.  If 'caseless' is set to yes,
+    this search is performed without attending to the case
+-->
+
+<!ELEMENT list EMPTY>
+<!ATTLIST list n IDREF #REQUIRED>
+<!--
+    'list' refers, with the name in attribute 'n', a list defined before in
+    the 'section-def-list' section
+-->
+
+<!ELEMENT let (%container;, %value;)>
+<!-- 
+      An assignment statement ('let') assigns the value of a clip (see
+      below), a literal string ('lit'), a literal tag('lit-tag') or the 
+      value of a global variable ('var') to either a global variable ('var') 
+      or a clip
+-->
+
+<!ELEMENT append (%value;)+>
+<!ATTLIST append n IDREF #REQUIRED>
+<!-- 
+      This instruction appends the value of a clip (see
+      below), a literal string ('lit'), a literal tag('lit-tag') or the 
+      value of a global variable ('var') to either a global variable ('var') 
+      or a clip, identified by the "n" attribute
+-->
+
+
+<!ELEMENT out (mlu|lu|b|chunk|var)+>
+<!ATTLIST out c CDATA #IMPLIED>
+<!-- 
+      'out' is an output statement; it may output any sequence of
+      clips, literal strings, literal tags, variables, and whitespace items 
+      (see below) 
+-->
+
+<!ELEMENT modify-case (%container;, %stringvalue;)>
+<!--
+      The first argument of 'modify-case' copy the case of the second 
+      argument.
+--> 
+
+<!ELEMENT call-macro (with-param)*>
+<!ATTLIST call-macro n IDREF #REQUIRED>
+<!-- 
+      A macro may be called anywhere by name with one or more
+      arguments
+-->
+
+<!ELEMENT with-param EMPTY>
+<!ATTLIST with-param pos CDATA #REQUIRED>
+<!-- 
+      The attribute pos in each argument is used to refer to a lexical
+      form in the current rule. For example, if a 2-parameter macro
+      has been defined to perform noun-adjective agreement operations,
+      it may be used with arguments 1 and 2 in a noun-adjective rule,
+      with arguments 2, 3 and 1 in a determiner-noun-adjective rule, with
+      arguments 1 and 3 in a noun-adverb-adjective rule, and with
+      arguments 2 and 1 in an adjective-noun rule 
+-->
+
+<!ELEMENT clip EMPTY>
+<!ATTLIST clip pos CDATA #REQUIRED
+               side (sl|tl) #REQUIRED
+               part CDATA #REQUIRED
+               queue CDATA #IMPLIED
+               link-to CDATA #IMPLIED
+               c CDATA #IMPLIED>
+<!-- 
+      A 'clip' is a substring of a source-language or target-language
+      lexical form, extracted according to an attribute:
+
+      * 'pos' is an index (1, 2, 3...) used to select a lexical form
+         inside the rule;
+   
+      * 'side' is used to select a source-language ('sl') or a
+        target-language ('tl') clip
+   
+      * the value of 'part' is the name of an attribute defined in
+        def-attrs, but may take also the values 'lem' (referring to
+        the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+        (lemma queue) and 'whole' (referring to the whole lexical form).
+
+      * the value of 'queue' may be 'no' or 'yes'.  'yes' is assumed  by 
+        default.
+    
+      * 'link-to' causes the other attributes to be ignored in clip evaluation
+        when using 'clip' as a right hand side element (as value), and 
+        returns its value.  When using as a left hand side (as reference), 
+        the value of the 'as' attribute is ignored.
+-->
+
+<!ELEMENT lit EMPTY>
+<!ATTLIST lit v CDATA #REQUIRED>
+<!-- 
+      A literal string value: the value of the literal is the value of
+      the 'v' attribute
+-->
+
+<!ELEMENT lit-tag EMPTY>
+<!ATTLIST lit-tag v CDATA #REQUIRED>
+<!-- 
+      A literal string value: the value of the literal is the value of
+      the 'v' attribute
+-->
+
+
+<!ELEMENT var EMPTY>
+<!ATTLIST var n IDREF #REQUIRED>
+<!-- 
+     Each 'var' is a variable identifier: the attribute n is the name
+     of the variable. When it is in an 'out', a 'test', or the right
+     part of a 'let', it represents the value of the variable; when in
+     the left part of a 'let' it represents the reference of the
+     variable. 
+-->
+
+<!ELEMENT get-case-from (clip|lit|var)> 
+<!ATTLIST get-case-from pos CDATA #REQUIRED>
+<!-- Atenci�n, falta modificar todos los comentarios donde intervenga
+get-case-from -->
+
+<!ELEMENT case-of EMPTY>
+<!ATTLIST case-of pos CDATA #REQUIRED
+               side (sl|tl) #REQUIRED
+               part CDATA #REQUIRED>
+<!--
+      A 'case-of' is a value representing the case of a "clip".  This value 
+      will be "aa" (all lowercase), "Aa" (first uppercase) and "AA",
+      (all uppercase).
+
+      * 'pos' is an index (1, 2, 3...) used to select a lexical form
+         inside the rule;
+   
+      * 'side' is used to select a source-language ('sl') or a
+        target-language ('tl') clip
+   
+      * the value of 'part' is the name of an attribute defined in
+        def-attrs, but may take also the values 'lem' (referring to
+        the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+        (lemma queue) and 'whole' (referring to the whole lexical form).
+-->
+
+
+<!ELEMENT concat (%value;)+>
+<!-- Concatenates a sequence of values -->
+
+<!ELEMENT mlu (lu+)>
+<!-- Encloses a multiword -->
+
+<!ELEMENT lu (%value;)+>
+<!-- Encloses a word inside an 'out' element. -->
+
+<!ELEMENT reject-current-rule EMPTY>
+<!ATTLIST reject-current-rule shifting (yes|no) #IMPLIED>
+<!--
+      This instruction cancels the execution of the rule being processed.
+      If "shifting" is set to "yes" or is not specified, the matching process
+      consumes exactly one word at the input. If "shifting" is set to "no"
+      then marks the rule to not to be considered in the current matching 
+      until the input buffer advances at least one single word 
+-->
+
+<!ELEMENT chunk (tags,(mlu|lu|b|var)+)>
+<!ATTLIST chunk name CDATA #IMPLIED
+                namefrom CDATA #IMPLIED
+                case CDATA #IMPLIED
+                c CDATA #IMPLIED>
+<!-- 
+     Encloses a chunk inside an 'out' element.      
+     * 'name' the pseudolemma of the chunk.
+     * 'namefrom' get the name from a variable.
+     * 'case' the variable to get the uppercase/lowercase policy
+        to apply it to the chunk name
+-->
+
+<!ELEMENT tags (tag+)>
+<!ELEMENT tag (%value;)>
+
+<!ELEMENT b EMPTY>
+<!ATTLIST b pos CDATA #IMPLIED>
+<!-- 
+     'b' is a [super]blanks item, indexed by pos; for example, a 'b'
+     with pos="2" refers to the [super]blanks (including format data
+     encapsulated by the de-formatter) between lexical form 2 and
+     lexical form 3. Managing [super]blanks explicitly allows for the
+     correct placement of format when the result of structural
+     transfer has more or less lexical items than the original or has
+     been reordered in some way.  If attribute "pos" is not specified, then
+     a single blank (ASCII 32) is generated.
+-->
Index: branches/apertium-tagger/apertium2/apertium/transfer.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer.h	(revision 69632)
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TRANSFER_
+#define _TRANSFER_
+
+#include <apertium/transfer_instr.h>
+#include <apertium/transfer_token.h>
+#include <apertium/transfer_word.h>
+#include <apertium/apertium_re.h>
+#include <lttoolbox/alphabet.h>
+#include <lttoolbox/buffer.h>
+#include <lttoolbox/fst_processor.h>
+#include <lttoolbox/ltstr.h>
+#include <lttoolbox/match_exe.h>
+#include <lttoolbox/match_state.h>
+
+#include <cstdio>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+class Transfer
+{
+private:
+
+  Alphabet alphabet;
+  MatchExe *me;
+  MatchState ms;
+  map<string, ApertiumRE, Ltstr> attr_items;
+  map<string, string, Ltstr> variables;
+  map<string, int, Ltstr> macros;
+  map<string, set<string, Ltstr>, Ltstr> lists;
+  map<string, set<string, Ltstr>, Ltstr> listslow;
+  vector<xmlNode *> macro_map;
+  vector<xmlNode *> rule_map;
+  xmlDoc *doc;
+  xmlNode *root_element;
+  TransferWord **word;
+  string **blank;
+  int lword, lblank;
+  Buffer<TransferToken> input_buffer;
+  vector<wstring *> tmpword;
+  vector<wstring *> tmpblank;
+
+  FSTProcessor fstp;
+  FSTProcessor extended;
+  bool isExtended;
+  FILE *output;
+  int any_char;
+  int any_tag;
+
+  xmlNode *lastrule;
+  unsigned int nwords;
+
+  map<xmlNode *, TransferInstr> evalStringCache;
+
+  enum OutputType{lu,chunk};
+
+  OutputType defaultAttrs;
+  bool preBilingual;
+  bool useBilingual;
+  bool null_flush;
+  bool internal_null_flush;
+  bool trace;
+  bool trace_att;
+  string emptyblank;
+  
+  void destroy();
+  void readData(FILE *input);
+  void readBil(string const &filename);
+  void readTransfer(string const &input);
+  void collectMacros(xmlNode *localroot);
+  void collectRules(xmlNode *localroot);
+  string caseOf(string const &str);
+  string copycase(string const &source_word, string const &target_word);
+
+  void processLet(xmlNode *localroot);
+  void processAppend(xmlNode *localroot);
+  int processRejectCurrentRule(xmlNode *localroot);
+  void processOut(xmlNode *localroot);
+  void processCallMacro(xmlNode *localroot);
+  void processModifyCase(xmlNode *localroot);
+  bool processLogical(xmlNode *localroot);
+  bool processTest(xmlNode *localroot);
+  bool processAnd(xmlNode *localroot);
+  bool processOr(xmlNode *localroot);
+  bool processEqual(xmlNode *localroot);
+  bool processBeginsWith(xmlNode *localroot);
+  bool processBeginsWithList(xmlNode *localroot);
+  bool processEndsWith(xmlNode *localroot);
+  bool processEndsWithList(xmlNode *local);
+  bool processContainsSubstring(xmlNode *localroot);
+  bool processNot(xmlNode *localroot);
+  bool processIn(xmlNode *localroot);
+  int processRule(xmlNode *localroot);
+  string evalString(xmlNode *localroot);
+  int processInstruction(xmlNode *localroot);
+  int processChoose(xmlNode *localroot);
+  string processChunk(xmlNode *localroot);
+  string processTags(xmlNode *localroot);
+
+  bool beginsWith(string const &str1, string const &str2) const;
+  bool endsWith(string const &str1, string const &str2) const;
+  string tolower(string const &str) const;
+  string tags(string const &str) const;
+  wstring readWord(FILE *in);
+  wstring readBlank(FILE *in);
+  wstring readUntil(FILE *in, int const symbol) const;
+  void applyWord(wstring const &word_str);
+  int applyRule();
+  TransferToken & readToken(FILE *in);
+  bool checkIndex(xmlNode *element, int index, int limit);
+  void transfer_wrapper_null_flush(FILE *in, FILE *out);
+public:
+  Transfer();
+  ~Transfer();
+  
+  void read(string const &transferfile, string const &datafile,
+	    string const &fstfile = "");
+  void transfer(FILE *in, FILE *out);
+  void setUseBilingual(bool value);
+  bool getUseBilingual(void) const;
+  void setPreBilingual(bool value);
+  bool getPreBilingual(void) const;
+  void setExtendedDictionary(string const &fstfile);
+  void setCaseSensitiveness(bool value);
+  bool getNullFlush(void);
+  void setNullFlush(bool null_flush);
+  void setTrace(bool trace);
+  void setTraceATT(bool trace);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/a.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/a.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/a.cc	(revision 69632)
@@ -0,0 +1,50 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "a.h"
+
+#include "analysis.h"
+#include "exception.h"
+
+namespace Apertium {
+bool operator==(const a &a_, const a &b_) {
+  return a_.TheTags == b_.TheTags && a_.TheMorphemes == b_.TheMorphemes;
+}
+
+bool operator<(const a &a_, const a &b_) {
+  if (a_.TheTags == b_.TheTags)
+    return a_.TheMorphemes < b_.TheMorphemes;
+
+  return a_.TheTags < b_.TheTags;
+}
+
+a::a() : TheTags(), TheMorphemes() {}
+
+a::a(const Analysis &Analysis_) : TheTags(), TheMorphemes() {
+  if (Analysis_.TheMorphemes.empty())
+    throw Exception::Analysis::TheMorphemes_empty("can't convert const "
+                                                  "Analysis & comprising empty "
+                                                  "Morpheme std::vector to a");
+
+  if (Analysis_.TheMorphemes.front().TheTags.empty())
+    throw Exception::Morpheme::TheTags_empty("can't convert const Analysis & "
+                                             "comprising Morpheme comprising "
+                                             "empty Tag std::vector to a");
+
+  TheTags = Analysis_.TheMorphemes.front().TheTags;
+  TheMorphemes = std::vector<Morpheme>(Analysis_.TheMorphemes.begin() + 1,
+                                       Analysis_.TheMorphemes.end());
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/a.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/a.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/a.h	(revision 69632)
@@ -0,0 +1,37 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef A_H
+#define A_H
+
+#include "analysis.h"
+#include "morpheme.h"
+#include "tag.h"
+
+#include <vector>
+
+namespace Apertium {
+class a {
+public:
+  friend bool operator==(const a &a_, const a &b_);
+  friend bool operator<(const a &a_, const a &b_);
+  a();
+  a(const Analysis &Analysis_);
+  std::vector<Tag> TheTags;
+  std::vector<Morpheme> TheMorphemes;
+};
+}
+
+#endif // A_H
Index: branches/apertium-tagger/apertium2/apertium/align.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/align.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/align.cc	(revision 69632)
@@ -0,0 +1,56 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "align.h"
+
+#include "linebreak.h"
+
+#include <iomanip>
+#include <ios>
+#include <iostream>
+#include <ostream>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace Apertium {
+void align::align_(
+    const std::vector<std::pair<std::string, std::string> > &string_) {
+  const std::streamsize width_ = col(string_) + 2;
+
+  for (std::vector<std::pair<std::string, std::string> >::const_iterator i_ =
+           string_.begin();
+       i_ != string_.end(); ++i_) {
+    std::cerr << "  " << std::setw(width_) << std::left << i_->first
+              << std::setw(0)
+              << linebreak::linebreak_(i_->second, width_ + 2, width_ + 4)
+              << '\n';
+  }
+}
+
+std::string::size_type
+align::col(const std::vector<std::pair<std::string, std::string> > &string_) {
+  std::string::size_type col_ = 0;
+
+  for (std::vector<std::pair<std::string, std::string> >::const_iterator i_ =
+           string_.begin();
+       i_ != string_.end(); ++i_) {
+    if (i_->first.size() > col_)
+      col_ = i_->first.size();
+  }
+
+  return col_;
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/align.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/align.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/align.h	(revision 69632)
@@ -0,0 +1,35 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef ALIGN_H
+#define ALIGN_H
+
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace Apertium {
+class align {
+public:
+  static void
+  align_(const std::vector<std::pair<std::string, std::string> > &string_);
+
+private:
+  static std::string::size_type
+  col(const std::vector<std::pair<std::string, std::string> > &string_);
+};
+}
+
+#endif // ALIGN_H
Index: branches/apertium-tagger/apertium2/apertium/analysis.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/analysis.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/analysis.cc	(revision 69632)
@@ -0,0 +1,55 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "analysis.h"
+
+#include "exception.h"
+#include "morpheme.h"
+
+#include <string>
+#include <vector>
+
+namespace Apertium {
+std::wostream &operator<<(std::wostream &Stream_, const Analysis &Analysis_) {
+  Stream_ << static_cast<std::wstring>(Analysis_);
+  return Stream_;
+}
+
+bool operator==(const Analysis &a, const Analysis &b) {
+  return a.TheMorphemes == b.TheMorphemes;
+}
+
+bool operator<(const Analysis &a, const Analysis &b) {
+  return a.TheMorphemes < b.TheMorphemes;
+}
+
+Analysis::operator std::wstring() const {
+  if (TheMorphemes.empty())
+    throw Exception::Analysis::TheMorphemes_empty(
+        "can't convert Analysis comprising empty Morpheme std::vector to "
+        "std::wstring");
+
+  std::vector<Morpheme>::const_iterator Morpheme_ = TheMorphemes.begin();
+  std::wstring wstring_ = *Morpheme_;
+  ++Morpheme_;
+
+  // Call .end() each iteration to save memory.
+  for (; Morpheme_ != TheMorphemes.end(); ++Morpheme_) {
+    wstring_ += L"+" + static_cast<std::wstring>(*Morpheme_);
+  }
+
+  return wstring_;
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/analysis.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/analysis.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/analysis.h	(revision 69632)
@@ -0,0 +1,37 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef ANALYSIS_H
+#define ANALYSIS_H
+
+#include "morpheme.h"
+
+#include <ostream>
+#include <string>
+#include <vector>
+
+namespace Apertium {
+class Analysis {
+public:
+  friend std::wostream &operator<<(std::wostream &Stream_,
+                                   const Analysis &Analysis_);
+  friend bool operator==(const Analysis &a, const Analysis &b);
+  friend bool operator<(const Analysis &a, const Analysis &b);
+  operator std::wstring() const;
+  std::vector<Morpheme> TheMorphemes;
+};
+}
+
+#endif // ANALYSIS_H
Index: branches/apertium-tagger/apertium2/apertium/basic_5_3_1_tagger.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_5_3_1_tagger.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_5_3_1_tagger.cc	(revision 69632)
@@ -0,0 +1,20 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "basic_5_3_1_tagger.h"
+
+namespace Apertium {
+basic_5_3_1_Tagger::basic_5_3_1_Tagger() {}
+}
Index: branches/apertium-tagger/apertium2/apertium/basic_5_3_1_tagger.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_5_3_1_tagger.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_5_3_1_tagger.h	(revision 69632)
@@ -0,0 +1,32 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef BASIC_5_3_1_TAGGER_H
+#define BASIC_5_3_1_TAGGER_H
+
+#include "analysis.h"
+
+#include <cstddef>
+#include <map>
+
+namespace Apertium {
+class basic_5_3_1_Tagger {
+protected:
+  basic_5_3_1_Tagger();
+  std::map<Analysis, std::size_t> Model;
+};
+}
+
+#endif // BASIC_5_3_1_TAGGER_H
Index: branches/apertium-tagger/apertium2/apertium/basic_5_3_2_tagger.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_5_3_2_tagger.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_5_3_2_tagger.cc	(revision 69632)
@@ -0,0 +1,20 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "basic_5_3_2_tagger.h"
+
+namespace Apertium {
+basic_5_3_2_Tagger::basic_5_3_2_Tagger() {}
+}
Index: branches/apertium-tagger/apertium2/apertium/basic_5_3_2_tagger.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_5_3_2_tagger.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_5_3_2_tagger.h	(revision 69632)
@@ -0,0 +1,33 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef BASIC_5_3_2_TAGGER_H
+#define BASIC_5_3_2_TAGGER_H
+
+#include "a.h"
+#include "lemma.h"
+
+#include <cstddef>
+#include <map>
+
+namespace Apertium {
+class basic_5_3_2_Tagger {
+protected:
+  basic_5_3_2_Tagger();
+  std::map<a, std::map<Lemma, std::size_t> > Model;
+};
+}
+
+#endif // BASIC_5_3_2_TAGGER_H
Index: branches/apertium-tagger/apertium2/apertium/basic_5_3_3_tagger.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_5_3_3_tagger.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_5_3_3_tagger.h	(revision 69632)
@@ -0,0 +1,35 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef BASIC_5_3_3_TAGGER_H
+#define BASIC_5_3_3_TAGGER_H
+
+#include "i.h"
+#include "lemma.h"
+
+#include <cstddef>
+#include <map>
+#include <utility>
+
+namespace Apertium {
+class basic_5_3_3_Tagger {
+protected:
+  std::pair<std::map<i, std::map<Lemma, std::size_t> >,
+            std::pair<std::map<i, std::map<Lemma, std::size_t> >,
+                      std::map<Lemma, std::map<i, std::size_t> > > > Model;
+};
+}
+
+#endif // BASIC_5_3_3_TAGGER_H
Index: branches/apertium-tagger/apertium2/apertium/basic_exception_type.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_exception_type.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_exception_type.cc	(revision 69632)
@@ -0,0 +1,20 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "basic_exception_type.h"
+
+namespace Apertium {
+basic_ExceptionType::~basic_ExceptionType() throw() {}
+}
Index: branches/apertium-tagger/apertium2/apertium/basic_exception_type.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_exception_type.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_exception_type.h	(revision 69632)
@@ -0,0 +1,29 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef BASIC_EXCEPTION_TYPE_H
+#define BASIC_EXCEPTION_TYPE_H
+
+#include <exception>
+
+namespace Apertium {
+class basic_ExceptionType : public std::exception {
+public:
+  virtual ~basic_ExceptionType() throw() = 0;
+  virtual const char *what() const throw() = 0;
+};
+}
+
+#endif // BASIC_EXCEPTION_TYPE_H
Index: branches/apertium-tagger/apertium2/apertium/basic_stream_tagger.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_stream_tagger.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_stream_tagger.cc	(revision 69632)
@@ -0,0 +1,125 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "basic_stream_tagger.h"
+
+#include "apertium_config.h"
+
+#include "basic_tagger.h"
+#include "lexical_unit.h"
+#include "stream.h"
+#include "streamed_type.h"
+
+#include <ostream>
+
+#if ENABLE_DEBUG
+
+#include <iomanip>
+#include <iostream>
+#include <limits>
+
+#endif // ENABLE_DEBUG
+
+namespace Apertium {
+basic_StreamTagger::~basic_StreamTagger() {}
+
+void basic_StreamTagger::tag(Stream &Input, std::wostream &Output) const {
+  while (true) {
+    StreamedType StreamedType_ = Input.get();
+    Output << StreamedType_.TheString;
+
+    if (!StreamedType_.TheLexicalUnit) {
+      if (!Input.flush_())
+        break;
+
+      Output << std::flush;
+      continue;
+    }
+
+#if ENABLE_DEBUG
+
+    std::wcerr << L"\n\n";
+
+#endif // ENABLE_DEBUG
+
+    tag(*StreamedType_.TheLexicalUnit, Output);
+
+    if (Input.flush_())
+      Output << std::flush;
+  }
+}
+
+basic_StreamTagger::basic_StreamTagger(const basic_Tagger::Flags &Flags_)
+    : basic_Tagger(Flags_) {}
+
+void basic_StreamTagger::tag(const LexicalUnit &LexicalUnit_,
+                             std::wostream &Output) const {
+#if ENABLE_DEBUG
+
+  for (std::vector<Analysis>::const_iterator Analysis_ =
+           LexicalUnit_.TheAnalyses.begin();
+       Analysis_ != LexicalUnit_.TheAnalyses.end(); ++Analysis_) {
+    std::wcerr << L"score(\"" << *Analysis_ << L"\") ==\n  "
+               << score_DEBUG(*Analysis_) << L" ==\n  " << std::fixed
+               << std::setprecision(std::numeric_limits<long double>::digits10)
+               << score(*Analysis_) << L"\n";
+  }
+
+#endif // ENABLE_DEBUG
+
+  Output << L"^";
+
+  if (LexicalUnit_.TheAnalyses.empty()) {
+    if (TheFlags.getShowSuperficial())
+      Output << LexicalUnit_.TheSurfaceForm << L"/";
+
+    Output << L"*" << LexicalUnit_.TheSurfaceForm << L"$";
+    return;
+  }
+
+  if (TheFlags.getMark()) {
+    if (LexicalUnit_.TheAnalyses.size() != 1)
+      Output << L"=";
+  }
+
+  if (TheFlags.getShowSuperficial())
+    Output << LexicalUnit_.TheSurfaceForm << L"/";
+
+  std::vector<Analysis>::const_iterator TheAnalysis =
+      LexicalUnit_.TheAnalyses.begin();
+
+  for (std::vector<Analysis>::const_iterator Analysis_ =
+           LexicalUnit_.TheAnalyses.begin() + 1;
+       // Call .end() each iteration to save memory.
+       Analysis_ != LexicalUnit_.TheAnalyses.end(); ++Analysis_) {
+    if (score(*Analysis_) > score(*TheAnalysis))
+      TheAnalysis = Analysis_;
+  }
+
+  Output << *TheAnalysis;
+
+  if (TheFlags.getFirst()) {
+    for (std::vector<Analysis>::const_iterator Analysis_ =
+             LexicalUnit_.TheAnalyses.begin();
+         // Call .end() each iteration to save memory.
+         Analysis_ != LexicalUnit_.TheAnalyses.end(); ++Analysis_) {
+      if (Analysis_ != TheAnalysis)
+        Output << L"/" << *Analysis_;
+    }
+  }
+
+  Output << L"$";
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/basic_stream_tagger.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_stream_tagger.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_stream_tagger.h	(revision 69632)
@@ -0,0 +1,56 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef BASIC_STREAM_TAGGER_H
+#define BASIC_STREAM_TAGGER_H
+
+#include "apertium_config.h"
+
+#include "basic_tagger.h"
+#include "lexical_unit.h"
+#include "stream.h"
+
+#include <istream>
+#include <ostream>
+
+#if ENABLE_DEBUG
+
+#include <string>
+
+#endif // ENABLE_DEBUG
+
+namespace Apertium {
+class basic_StreamTagger : protected basic_Tagger {
+public:
+  virtual ~basic_StreamTagger();
+  virtual void deserialise(std::istream &Serialised_basic_Tagger) = 0;
+  void tag(Stream &Input, std::wostream &Output) const;
+
+protected:
+  basic_StreamTagger(const Flags &Flags_);
+  virtual long double score(const Analysis &Analysis_) const = 0;
+
+#if ENABLE_DEBUG
+
+  virtual std::wstring score_DEBUG(const Analysis &Analysis_) const = 0;
+
+#endif // ENABLE_DEBUG
+
+private:
+  void tag(const LexicalUnit &LexicalUnit_, std::wostream &Output) const;
+};
+}
+
+#endif // BASIC_STREAM_TAGGER_H
Index: branches/apertium-tagger/apertium2/apertium/basic_stream_tagger_trainer.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_stream_tagger_trainer.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_stream_tagger_trainer.cc	(revision 69632)
@@ -0,0 +1,59 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "basic_stream_tagger_trainer.h"
+
+#include "analysis.h"
+#include "basic_tagger.h"
+#include "exception.h"
+#include "stream.h"
+#include "streamed_type.h"
+
+namespace Apertium {
+basic_StreamTaggerTrainer::~basic_StreamTaggerTrainer() {}
+
+void basic_StreamTaggerTrainer::train(Stream &TaggedCorpus) {
+  while (true) {
+    StreamedType StreamedType_ = TaggedCorpus.get();
+
+    if (!StreamedType_.TheLexicalUnit)
+      break;
+
+    if (StreamedType_.TheLexicalUnit->TheAnalyses.empty())
+      throw Exception::LexicalUnit::TheAnalyses_empty(
+          "can't train LexicalUnit comprising empty Analysis std::vector");
+
+    if (OccurrenceCoefficient %
+            StreamedType_.TheLexicalUnit->TheAnalyses.size() !=
+        0) {
+      OccurrenceCoefficient *= StreamedType_.TheLexicalUnit->TheAnalyses.size();
+      multiplyModel(StreamedType_.TheLexicalUnit->TheAnalyses.size());
+    }
+
+    for (std::vector<Analysis>::const_iterator Analysis_ =
+             StreamedType_.TheLexicalUnit->TheAnalyses.begin();
+         Analysis_ != StreamedType_.TheLexicalUnit->TheAnalyses.end();
+         ++Analysis_) {
+      train_Analysis(*Analysis_,
+                     OccurrenceCoefficient /
+                         StreamedType_.TheLexicalUnit->TheAnalyses.size());
+    }
+  }
+}
+
+basic_StreamTaggerTrainer::basic_StreamTaggerTrainer(
+    const basic_Tagger::Flags &Flags_)
+    : basic_Tagger(Flags_), OccurrenceCoefficient(1) {}
+}
Index: branches/apertium-tagger/apertium2/apertium/basic_stream_tagger_trainer.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_stream_tagger_trainer.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_stream_tagger_trainer.h	(revision 69632)
@@ -0,0 +1,41 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef BASIC_STREAM_TAGGER_TRAINER_H
+#define BASIC_STREAM_TAGGER_TRAINER_H
+
+#include "basic_tagger.h"
+#include "stream.h"
+
+#include <ostream>
+
+namespace Apertium {
+class basic_StreamTaggerTrainer : protected basic_Tagger {
+public:
+  virtual ~basic_StreamTaggerTrainer();
+  void train(Stream &TaggedCorpus);
+  virtual void serialise(std::ostream &Serialised_basic_Tagger) const = 0;
+
+protected:
+  basic_StreamTaggerTrainer(const Flags &Flags_);
+  virtual void train_Analysis(const Analysis &Analysis_,
+                              const std::size_t &Coefficient_) = 0;
+  virtual void
+  multiplyModel(const std::size_t &OccurrenceCoefficientMultiplier) = 0;
+  std::size_t OccurrenceCoefficient;
+};
+}
+
+#endif // BASIC_STREAM_TAGGER_TRAINER_H
Index: branches/apertium-tagger/apertium2/apertium/basic_tagger.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_tagger.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_tagger.cc	(revision 69632)
@@ -0,0 +1,48 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "basic_tagger.h"
+
+namespace Apertium {
+basic_Tagger::Flags::Flags()
+    : Debug(false), First(false), Mark(false), ShowSuperficial(false),
+      NullFlush(false) {}
+
+bool basic_Tagger::Flags::getDebug() const { return Debug; }
+
+void basic_Tagger::Flags::setDebug(const bool &Debug_) { Debug = Debug_; }
+
+bool basic_Tagger::Flags::getFirst() const { return First; }
+
+void basic_Tagger::Flags::setFirst(const bool &First_) { First = First_; }
+
+bool basic_Tagger::Flags::getMark() const { return Mark; }
+
+void basic_Tagger::Flags::setMark(const bool &Mark_) { Mark = Mark_; }
+
+bool basic_Tagger::Flags::getShowSuperficial() const { return ShowSuperficial; }
+
+void basic_Tagger::Flags::setShowSuperficial(const bool &ShowSuperficial_) {
+  ShowSuperficial = ShowSuperficial_;
+}
+
+bool basic_Tagger::Flags::getNullFlush() const { return NullFlush; }
+
+void basic_Tagger::Flags::setNullFlush(const bool &NullFlush_) {
+  NullFlush = NullFlush_;
+}
+
+basic_Tagger::basic_Tagger(const Flags &Flags_) : TheFlags(Flags_) {}
+}
Index: branches/apertium-tagger/apertium2/apertium/basic_tagger.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/basic_tagger.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/basic_tagger.h	(revision 69632)
@@ -0,0 +1,60 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef BASIC_TAGGER_H
+#define BASIC_TAGGER_H
+
+namespace Apertium {
+class basic_Tagger {
+public:
+  class Flags {
+  public:
+    Flags();
+    bool getDebug() const;
+    void setDebug(const bool &Debug_);
+    bool getFirst() const;
+    void setFirst(const bool &First_);
+    bool getMark() const;
+    void setMark(const bool &Mark_);
+    bool getShowSuperficial() const;
+    void setShowSuperficial(const bool &ShowSuperficial_);
+    bool getNullFlush() const;
+    void setNullFlush(const bool &NullFlush_);
+    static bool (Flags::*GetDebug)() const;
+    static void (Flags::*SetDebug)(const bool &);
+    static bool (Flags::*GetFirst)() const;
+    static void (Flags::*SetFirst)(const bool &);
+    static bool (Flags::*GetMark)() const;
+    static void (Flags::*SetMark)(const bool &);
+    static bool (Flags::*GetShowSuperficial)() const;
+    static void (Flags::*SetShowSuperficial)(const bool &);
+    static bool (Flags::*GetNullFlush)() const;
+    static void (Flags::*SetNullFlush)(const bool &);
+
+  private:
+    bool Debug : 1;
+    bool First : 1;
+    bool Mark : 1;
+    bool ShowSuperficial : 1;
+    bool NullFlush : 1;
+  };
+
+protected:
+  basic_Tagger(const Flags &Flags_);
+  Flags TheFlags;
+};
+}
+
+#endif // BASIC_TAGGER_H
Index: branches/apertium-tagger/apertium2/apertium/constructor_eq_delete.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/constructor_eq_delete.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/constructor_eq_delete.h	(revision 69632)
@@ -0,0 +1,32 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef CONSTRUCTOR_EQ_DELETE_H
+#define CONSTRUCTOR_EQ_DELETE_H
+
+namespace Apertium {
+class constructor_eq_delete {
+protected:
+  constructor_eq_delete() {}
+  ~constructor_eq_delete() {}
+
+private:
+  constructor_eq_delete(const constructor_eq_delete &constructor_eq_delete_);
+  constructor_eq_delete &
+  operator=(constructor_eq_delete constructor_eq_delete_);
+};
+}
+
+#endif // CONSTRUCTOR_EQ_DELETE_H
Index: branches/apertium-tagger/apertium2/apertium/deserialiser.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/deserialiser.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/deserialiser.h	(revision 69632)
@@ -0,0 +1,255 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef DESERIALISER_H
+#define DESERIALISER_H
+
+#include "a.h"
+#include "analysis.h"
+#include "basic_exception_type.h"
+#include "exception.h"
+#include "i.h"
+#include "lemma.h"
+#include "morpheme.h"
+#include "tag.h"
+
+#include <cstddef>
+#include <istream>
+#include <limits>
+#include <map>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace Apertium {
+template <typename DeserialisedType> class Deserialiser;
+
+template <> class Deserialiser<a> {
+public:
+  inline static a deserialise(std::istream &Stream_);
+};
+
+template <> class Deserialiser<Analysis> {
+public:
+  inline static Analysis deserialise(std::istream &Stream_);
+};
+
+template <> class Deserialiser<i> {
+public:
+  inline static i deserialise(std::istream &Stream_);
+};
+
+template <> class Deserialiser<Lemma> {
+public:
+  inline static Lemma deserialise(std::istream &Stream_);
+};
+
+template <> class Deserialiser<Morpheme> {
+public:
+  inline static Morpheme deserialise(std::istream &Stream_);
+};
+
+template <> class Deserialiser<Tag> {
+public:
+  inline static Tag deserialise(std::istream &Stream_);
+};
+
+template <typename value_type>
+class Deserialiser<std::basic_string<value_type> > {
+public:
+  inline static std::basic_string<value_type>
+  deserialise(std::istream &Stream_);
+};
+
+template <typename key_type, typename mapped_type>
+class Deserialiser<std::map<key_type, mapped_type> > {
+public:
+  inline static std::map<key_type, mapped_type>
+  deserialise(std::istream &Stream_);
+};
+
+template <typename first_type, typename second_type>
+class Deserialiser<std::pair<first_type, second_type> > {
+public:
+  inline static std::pair<first_type, second_type>
+  deserialise(std::istream &Stream_);
+};
+
+template <> class Deserialiser<std::size_t> {
+public:
+  inline static std::size_t deserialise(std::istream &Stream_);
+};
+
+template <typename value_type> class Deserialiser<std::vector<value_type> > {
+public:
+  inline static std::vector<value_type> deserialise(std::istream &Stream_);
+};
+
+template <> class Deserialiser<wchar_t> {
+public:
+  inline static wchar_t deserialise(std::istream &Stream_);
+};
+
+a Deserialiser<a>::deserialise(std::istream &Stream_) {
+  a StreamedType_;
+  StreamedType_.TheTags = Deserialiser<std::vector<Tag> >::deserialise(Stream_);
+  StreamedType_.TheMorphemes =
+      Deserialiser<std::vector<Morpheme> >::deserialise(Stream_);
+  return StreamedType_;
+}
+
+Analysis Deserialiser<Analysis>::deserialise(std::istream &Stream_) {
+  Analysis SerialisedType_;
+  SerialisedType_.TheMorphemes =
+      Deserialiser<std::vector<Morpheme> >::deserialise(Stream_);
+  return SerialisedType_;
+}
+
+i Deserialiser<i>::deserialise(std::istream &Stream_) {
+  i StreamedType_;
+  StreamedType_.TheTags = Deserialiser<std::vector<Tag> >::deserialise(Stream_);
+  return StreamedType_;
+}
+
+Lemma Deserialiser<Lemma>::deserialise(std::istream &Stream_) {
+  Lemma StreamedType_;
+  StreamedType_.TheLemma = Deserialiser<std::wstring>::deserialise(Stream_);
+  return StreamedType_;
+}
+
+Morpheme Deserialiser<Morpheme>::deserialise(std::istream &Stream_) {
+  Morpheme SerialisedType_;
+  SerialisedType_.TheLemma = Deserialiser<std::wstring>::deserialise(Stream_);
+  SerialisedType_.TheTags =
+      Deserialiser<std::vector<Tag> >::deserialise(Stream_);
+  return SerialisedType_;
+}
+
+Tag Deserialiser<Tag>::deserialise(std::istream &Stream_) {
+  Tag SerialisedType_;
+  SerialisedType_.TheTag = Deserialiser<std::wstring>::deserialise(Stream_);
+  return SerialisedType_;
+}
+
+template <typename value_type>
+std::basic_string<value_type>
+Deserialiser<std::basic_string<value_type> >::deserialise(
+    std::istream &Stream_) {
+  std::size_t SerialisedValueCount =
+      Deserialiser<std::size_t>::deserialise(Stream_);
+  std::basic_string<value_type> SerialisedType_;
+
+  for (; SerialisedValueCount != 0; --SerialisedValueCount) {
+    SerialisedType_.push_back(Deserialiser<value_type>::deserialise(Stream_));
+  }
+
+  return SerialisedType_;
+}
+
+template <typename key_type, typename mapped_type>
+std::map<key_type, mapped_type>
+Deserialiser<std::map<key_type, mapped_type> >::deserialise(
+    std::istream &Stream_) {
+  std::size_t SerialisedValueCount =
+      Deserialiser<std::size_t>::deserialise(Stream_);
+  std::map<key_type, mapped_type> SerialisedType_;
+
+  for (; SerialisedValueCount != 0; --SerialisedValueCount) {
+    SerialisedType_.insert(
+        Deserialiser<std::pair<key_type, mapped_type> >::deserialise(Stream_));
+  }
+
+  return SerialisedType_;
+}
+
+template <typename first_type, typename second_type>
+std::pair<first_type, second_type>
+Deserialiser<std::pair<first_type, second_type> >::deserialise(
+    std::istream &Stream_) {
+  std::pair<first_type, second_type> SerialisedType_;
+  SerialisedType_.first = Deserialiser<first_type>::deserialise(Stream_);
+  SerialisedType_.second = Deserialiser<second_type>::deserialise(Stream_);
+  return SerialisedType_;
+}
+
+std::size_t Deserialiser<std::size_t>::deserialise(std::istream &Stream_) {
+  try {
+    std::size_t SerialisedType_ = 0;
+    unsigned char SerialisedTypeSize = Stream_.get();
+
+    if (!Stream_)
+      throw Exception::Deserialiser::not_Stream_good("can't deserialise size");
+
+    for (; SerialisedTypeSize != 0;) {
+      SerialisedType_ +=
+          static_cast<std::size_t>(Stream_.get())
+          << std::numeric_limits<unsigned char>::digits * --SerialisedTypeSize;
+
+      if (!Stream_)
+        throw Exception::Deserialiser::not_Stream_good(
+            "can't deserialise byte");
+    }
+
+    return SerialisedType_;
+  } catch (const basic_ExceptionType &basic_ExceptionType_) {
+    std::stringstream what_;
+    what_ << "can't deserialise std::size_t: " << basic_ExceptionType_.what();
+    throw Exception::Deserialiser::size_t_(what_);
+  }
+}
+
+template <typename value_type>
+std::vector<value_type>
+Deserialiser<std::vector<value_type> >::deserialise(std::istream &Stream_) {
+  std::size_t SerialisedValueCount =
+      Deserialiser<std::size_t>::deserialise(Stream_);
+  std::vector<value_type> SerialisedType_;
+
+  for (; SerialisedValueCount != 0; --SerialisedValueCount) {
+    SerialisedType_.push_back(Deserialiser<value_type>::deserialise(Stream_));
+  }
+
+  return SerialisedType_;
+}
+
+wchar_t Deserialiser<wchar_t>::deserialise(std::istream &Stream_) {
+  try {
+    unsigned wchar_t SerialisedType_ = 0;
+    unsigned char SerialisedTypeSize = Stream_.get();
+
+    if (!Stream_)
+      throw Exception::Deserialiser::not_Stream_good("can't deserialise size");
+
+    for (; SerialisedTypeSize != 0;) {
+      SerialisedType_ +=
+          static_cast<std::size_t>(Stream_.get())
+          << std::numeric_limits<unsigned char>::digits * --SerialisedTypeSize;
+
+      if (!Stream_)
+        throw Exception::Deserialiser::not_Stream_good(
+            "can't deserialise byte");
+    }
+
+    return static_cast<wchar_t>(SerialisedType_);
+  } catch (const basic_ExceptionType &basic_ExceptionType_) {
+    std::stringstream what_;
+    what_ << "can't deserialise wchar_t: " << basic_ExceptionType_.what();
+    throw Exception::Deserialiser::wchar_t_(what_);
+  }
+}
+}
+
+#endif // DESERIALISER_H
Index: branches/apertium-tagger/apertium2/apertium/err_exception.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/err_exception.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/err_exception.h	(revision 69632)
@@ -0,0 +1,23 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef ERR_EXCEPTION_H
+#define ERR_EXCEPTION_H
+
+namespace Apertium {
+class err_Exception {};
+}
+
+#endif // ERR_EXCEPTION_H
Index: branches/apertium-tagger/apertium2/apertium/exception_type.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/exception_type.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/exception_type.cc	(revision 69632)
@@ -0,0 +1,32 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "exception_type.h"
+
+#include <sstream>
+#include <string>
+
+namespace Apertium {
+ExceptionType::ExceptionType(const char *const what_) : what_(what_) {}
+
+ExceptionType::ExceptionType(const std::string &what_) : what_(what_) {}
+
+ExceptionType::ExceptionType(const std::stringstream &what_)
+    : what_(what_.str()) {}
+
+ExceptionType::~ExceptionType() throw() {}
+
+const char *ExceptionType::what() const throw() { return what_.c_str(); }
+}
Index: branches/apertium-tagger/apertium2/apertium/exception_type.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/exception_type.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/exception_type.h	(revision 69632)
@@ -0,0 +1,38 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef EXCEPTION_TYPE_H
+#define EXCEPTION_TYPE_H
+
+#include "basic_exception_type.h"
+
+#include <sstream>
+#include <string>
+
+namespace Apertium {
+class ExceptionType : public basic_ExceptionType {
+public:
+  ExceptionType(const char *const what_);
+  ExceptionType(const std::string &what_);
+  ExceptionType(const std::stringstream &what_);
+  virtual ~ExceptionType() throw() = 0;
+  const char *what() const throw();
+
+protected:
+  const std::string what_;
+};
+}
+
+#endif // EXCEPTION_TYPE_H
Index: branches/apertium-tagger/apertium2/apertium/file_tagger.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/file_tagger.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/file_tagger.cc	(revision 69632)
@@ -0,0 +1,42 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "file_tagger.h"
+
+#include <apertium/tsx_reader.h>
+
+#include <cstdio>
+
+namespace Apertium {
+FILE_Tagger::FILE_Tagger() : debug(false), show_sf(false), null_flush(false) {}
+
+FILE_Tagger::~FILE_Tagger() {}
+
+void FILE_Tagger::set_debug(const bool &Debug) { debug = Debug; }
+
+void FILE_Tagger::set_show_sf(const bool &ShowSuperficial) {
+  show_sf = ShowSuperficial;
+}
+
+void FILE_Tagger::setNullFlush(const bool &NullFlush) {
+  null_flush = NullFlush;
+}
+
+void FILE_Tagger::deserialise(char *const TaggerSpecificationFilename) {
+  TSXReader TaggerSpecificationReader_;
+  TaggerSpecificationReader_.read(TaggerSpecificationFilename);
+  deserialise(TaggerSpecificationReader_.getTaggerData());
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/i.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/i.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/i.cc	(revision 69632)
@@ -0,0 +1,50 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "i.h"
+
+#include "analysis.h"
+#include "exception.h"
+#include "morpheme.h"
+
+namespace Apertium {
+bool operator==(const i &a_, const i &b_) { return a_.TheTags == b_.TheTags; }
+
+bool operator<(const i &a_, const i &b_) { return a_.TheTags < b_.TheTags; }
+
+i::i() {}
+
+i::i(const Analysis &Analysis_) : TheTags() {
+  if (Analysis_.TheMorphemes.empty())
+    throw Exception::Analysis::TheMorphemes_empty("can't convert const "
+                                                  "Analysis & comprising empty "
+                                                  "Morpheme std::vector to i");
+
+  if (Analysis_.TheMorphemes.front().TheTags.empty())
+    throw Exception::Morpheme::TheTags_empty("can't convert const Analysis & "
+                                             "comprising Morpheme comprising "
+                                             "empty Tag std::vector to i");
+
+  TheTags = Analysis_.TheMorphemes.front().TheTags;
+}
+
+i::i(const Morpheme &Morpheme_) : TheTags() {
+  if (Morpheme_.TheTags.empty())
+    throw Exception::Morpheme::TheTags_empty(
+        "can't convert const Morpheme & comprising empty Tag std::vector to i");
+
+  TheTags = Morpheme_.TheTags;
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/i.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/i.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/i.h	(revision 69632)
@@ -0,0 +1,38 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef I_H
+#define I_H
+
+#include "analysis.h"
+#include "morpheme.h"
+#include "tag.h"
+
+#include <vector>
+
+namespace Apertium {
+class i {
+  friend bool operator==(const i &a_, const i &b_);
+  friend bool operator<(const i &a_, const i &b_);
+
+public:
+  i();
+  i(const Analysis &Analysis_);
+  i(const Morpheme &Morpheme_);
+  std::vector<Tag> TheTags;
+};
+}
+
+#endif // I_H
Index: branches/apertium-tagger/apertium2/apertium/lemma.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lemma.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lemma.cc	(revision 69632)
@@ -0,0 +1,55 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "lemma.h"
+
+#include "analysis.h"
+#include "exception.h"
+#include "morpheme.h"
+
+namespace Apertium {
+bool operator==(const Lemma &a_, const Lemma &b_) {
+  return a_.TheLemma == b_.TheLemma;
+}
+
+bool operator<(const Lemma &a_, const Lemma &b_) {
+  return a_.TheLemma < b_.TheLemma;
+}
+
+Lemma::Lemma() : TheLemma() {}
+
+Lemma::Lemma(const Analysis &Analysis_) : TheLemma() {
+  if (Analysis_.TheMorphemes.empty())
+    throw Exception::Analysis::TheMorphemes_empty(
+        "can't convert const Analysis & comprising empty Morpheme std::vector "
+        "to Lemma");
+
+  if (Analysis_.TheMorphemes.front().TheLemma.empty())
+    throw Exception::Morpheme::TheLemma_empty(
+        "can't convert const Analysis & comprising Morpheme comprising empty "
+        "Lemma std::wstring to Lemma");
+
+  TheLemma = Analysis_.TheMorphemes.front().TheLemma;
+}
+
+Lemma::Lemma(const Morpheme &Morpheme_) : TheLemma() {
+  if (Morpheme_.TheLemma.empty())
+    throw Exception::Morpheme::TheLemma_empty("can't convert const Morpheme & "
+                                              "comprising empty Lemma "
+                                              "std::wstring to Lemma");
+
+  TheLemma = Morpheme_.TheLemma;
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/lemma.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lemma.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lemma.h	(revision 69632)
@@ -0,0 +1,36 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef LEMMA_H
+#define LEMMA_H
+
+#include "analysis.h"
+#include "morpheme.h"
+
+#include <string>
+
+namespace Apertium {
+class Lemma {
+public:
+  friend bool operator==(const Lemma &a_, const Lemma &b_);
+  friend bool operator<(const Lemma &a_, const Lemma &b_);
+  Lemma();
+  Lemma(const Analysis &Analysis_);
+  Lemma(const Morpheme &Morpheme_);
+  std::wstring TheLemma;
+};
+}
+
+#endif // LEMMA_H
Index: branches/apertium-tagger/apertium2/apertium/lexical_unit.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lexical_unit.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lexical_unit.h	(revision 69632)
@@ -0,0 +1,32 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef TAGGING_EXPRESSION_H
+#define TAGGING_EXPRESSION_H
+
+#include "analysis.h"
+
+#include <string>
+#include <vector>
+
+namespace Apertium {
+class LexicalUnit {
+public:
+  std::wstring TheSurfaceForm;
+  std::vector<Analysis> TheAnalyses;
+};
+}
+
+#endif // LEXICAL_UNIT_H
Index: branches/apertium-tagger/apertium2/apertium/linebreak.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/linebreak.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/linebreak.cc	(revision 69632)
@@ -0,0 +1,94 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "linebreak.h"
+
+#include <string>
+
+namespace Apertium {
+std::string linebreak::linebreak_(std::string string_,
+                                  std::string::size_type col,
+                                  const std::string::size_type &wrapmargin) {
+  std::string::size_type i_ = 0;
+
+  while (true) {
+    if (i_ == string_.size())
+      return string_;
+
+    if (col < 79) {
+      if (string_.at(i_) == '\n') {
+        if (i_ + 1 == string_.size()) {
+          string_.erase(i_, 1);
+          return string_;
+        }
+
+        string_.insert(i_ + 1, wrapmargin, ' ');
+        col = wrapmargin;
+        i_ += wrapmargin;
+        continue;
+      }
+
+      ++col;
+      ++i_;
+      continue;
+    }
+
+    if (string_.at(i_) == ' ') {
+      std::string::size_type j_ = i_ + 1;
+
+      for (; i_ != 0; --i_) {
+        if (string_.at(i_ - 1) != ' ')
+          break;
+      }
+
+      for (;; ++j_) {
+        if (j_ == string_.size()) {
+          string_.erase(i_, j_ - i_);
+          return string_;
+        }
+
+        if (string_.at(j_) != ' ')
+          break;
+      }
+
+      linebreak_(string_, col, wrapmargin, i_, j_);
+      continue;
+    }
+
+    std::string::size_type j_ = i_;
+
+    for (; j_ != 0; --j_) {
+      if (string_.at(j_ - 1) == ' ')
+        break;
+    }
+
+    for (i_ = j_; i_ != 0; --i_) {
+      if (string_.at(i_ - 1) != ' ')
+        break;
+    }
+
+    linebreak_(string_, col, wrapmargin, i_, j_);
+  }
+}
+
+void linebreak::linebreak_(std::string &string_, std::string::size_type &col,
+                           const std::string::size_type &wrapmargin,
+                           std::string::size_type &i_,
+                           const std::string::size_type &j_) {
+  string_.replace(i_, j_ - i_, '\n' + std::string(wrapmargin, ' '));
+  col = wrapmargin;
+  i_ += 1 /* '\n' */ + wrapmargin /* std::string(wrapmargin, ' ') */;
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/linebreak.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/linebreak.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/linebreak.h	(revision 69632)
@@ -0,0 +1,36 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef LINEBREAK_H
+#define LINEBREAK_H
+
+#include <string>
+
+namespace Apertium {
+class linebreak {
+public:
+  static std::string linebreak_(std::string string_,
+                                std::string::size_type col,
+                                const std::string::size_type &wrapmargin);
+
+private:
+  static void linebreak_(std::string &string_, std::string::size_type &col,
+                         const std::string::size_type &wrapmargin,
+                         std::string::size_type &i_,
+                         const std::string::size_type &j_);
+};
+}
+
+#endif // LINEBREAK_H
Index: branches/apertium-tagger/apertium2/apertium/morpheme.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/morpheme.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/morpheme.cc	(revision 69632)
@@ -0,0 +1,57 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "morpheme.h"
+
+#include "exception.h"
+#include "tag.h"
+
+#include <string>
+#include <vector>
+
+namespace Apertium {
+bool operator==(const Morpheme &a, const Morpheme &b) {
+  return a.TheLemma == b.TheLemma && a.TheTags == b.TheTags;
+}
+
+bool operator<(const Morpheme &a, const Morpheme &b) {
+  if (a.TheLemma != b.TheLemma)
+    return a.TheLemma < b.TheLemma;
+
+  return a.TheTags < b.TheTags;
+}
+
+Morpheme::operator std::wstring() const {
+  if (TheTags.empty())
+    throw Exception::Morpheme::TheTags_empty("can't convert Morpheme "
+                                             "comprising empty Tag std::vector "
+                                             "to std::wstring");
+
+  if (TheLemma.empty())
+    throw Exception::Morpheme::TheLemma_empty("can't convert Morpheme "
+                                              "comprising empty TheLemma "
+                                              "std::wstring to std::wstring");
+
+  std::wstring wstring_ = TheLemma;
+
+  for (std::vector<Tag>::const_iterator Tag_ = TheTags.begin();
+       // Call .end() each iteration to save memory.
+       Tag_ != TheTags.end(); ++Tag_) {
+    wstring_ += static_cast<std::wstring>(*Tag_);
+  }
+
+  return wstring_;
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/morpheme.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/morpheme.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/morpheme.h	(revision 69632)
@@ -0,0 +1,35 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef MORPHEME_H
+#define MORPHEME_H
+
+#include "tag.h"
+
+#include <string>
+#include <vector>
+
+namespace Apertium {
+class Morpheme {
+public:
+  friend bool operator==(const Morpheme &a, const Morpheme &b);
+  friend bool operator<(const Morpheme &a, const Morpheme &b);
+  operator std::wstring() const;
+  std::wstring TheLemma;
+  std::vector<Tag> TheTags;
+};
+}
+
+#endif // MORPHEME_H
Index: branches/apertium-tagger/apertium2/apertium/optional.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/optional.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/optional.h	(revision 69632)
@@ -0,0 +1,123 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef OPTIONAL_H
+#define OPTIONAL_H
+
+#include "exception.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <exception>
+#include <new>
+
+namespace Apertium {
+template <typename OptionalType> class Optional;
+
+template <typename OptionalType>
+void swap(Optional<OptionalType> &A, Optional<OptionalType> &B);
+
+template <typename OptionalType> class Optional {
+public:
+  friend void swap<OptionalType>(Optional &A, Optional &B);
+  Optional();
+  Optional(const OptionalType &OptionalType_);
+  Optional(const Optional &Optional_);
+  Optional &operator=(Optional Optional_);
+  ~Optional();
+  const OptionalType &operator*() const;
+  OptionalType &operator*();
+  const OptionalType *operator->() const;
+  OptionalType *operator->();
+  operator bool() const;
+
+private:
+  OptionalType *TheOptionalTypePointer;
+};
+
+template <typename OptionalType>
+void swap(Optional<OptionalType> &A, Optional<OptionalType> &B) {
+  using std::swap;
+  swap(A.TheOptionalTypePointer, B.TheOptionalTypePointer);
+}
+
+template <typename OptionalType>
+Optional<OptionalType>::Optional()
+    : TheOptionalTypePointer(NULL) {}
+
+template <typename OptionalType>
+Optional<OptionalType>::Optional(const OptionalType &OptionalType_)
+    : TheOptionalTypePointer(new OptionalType(OptionalType_)) {}
+
+template <typename OptionalType>
+Optional<OptionalType>::Optional(const Optional &Optional_) {
+  if (Optional_.TheOptionalTypePointer == NULL) {
+    TheOptionalTypePointer = NULL;
+    return;
+  }
+
+  TheOptionalTypePointer =
+      new OptionalType(*(Optional_.TheOptionalTypePointer));
+}
+
+template <typename OptionalType>
+Optional<OptionalType> &Optional<OptionalType>::operator=(Optional Optional_) {
+  swap(*this, Optional_);
+  return *this;
+}
+
+template <typename OptionalType> Optional<OptionalType>::~Optional() {
+  if (TheOptionalTypePointer == NULL)
+    return;
+
+  delete TheOptionalTypePointer;
+}
+
+template <typename OptionalType>
+const OptionalType &Optional<OptionalType>::operator*() const {
+  if (TheOptionalTypePointer == NULL)
+    throw Exception::Optional::TheOptionalTypePointer_null(
+        "can't dereference Optional comprising null OptionalType pointer");
+
+  return *TheOptionalTypePointer;
+}
+
+template <typename OptionalType>
+OptionalType &Optional<OptionalType>::operator*() {
+  return const_cast<OptionalType &>(
+      static_cast<const Optional &>(*this).operator*());
+}
+
+template <typename OptionalType>
+const OptionalType *Optional<OptionalType>::operator->() const {
+  if (TheOptionalTypePointer == NULL)
+    throw Exception::Optional::TheOptionalTypePointer_null(
+        "can't dereference Optional comprising null OptionalType pointer");
+
+  return TheOptionalTypePointer;
+}
+
+template <typename OptionalType>
+OptionalType *Optional<OptionalType>::operator->() {
+  return const_cast<OptionalType *>(
+      static_cast<const Optional &>(*this).operator->());
+}
+
+template <typename OptionalType> Optional<OptionalType>::operator bool() const {
+  return TheOptionalTypePointer != NULL;
+}
+}
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/stream.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream.cc	(revision 69632)
@@ -0,0 +1,774 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "stream.h"
+
+#include "analysis.h"
+#include "basic_tagger.h"
+#include "streamed_type.h"
+#include "wchar_t_exception.h"
+
+#include <climits>
+#include <cstdlib>
+#include <fstream>
+#include <iostream>
+#include <istream>
+#include <ostream>
+#include <sstream>
+#include <string>
+
+namespace Apertium {
+Stream::Stream(const basic_Tagger::Flags &Flags_)
+    : TheCharacterStream(std::wcin), TheFilename(), TheLineNumber(1), TheLine(),
+      TheFlags(Flags_), private_flush_(false), ThePreviousCase() {}
+
+Stream::Stream(const basic_Tagger::Flags &Flags_,
+               std::wifstream &CharacterStream_, const char *const Filename_)
+    : TheCharacterStream(CharacterStream_), TheFilename(Filename_),
+      TheLineNumber(1), TheLine(), TheFlags(Flags_), private_flush_(false),
+      ThePreviousCase() {}
+
+Stream::Stream(const basic_Tagger::Flags &Flags_,
+               std::wifstream &CharacterStream_, const std::string &Filename_)
+    : TheCharacterStream(CharacterStream_), TheFilename(Filename_),
+      TheLineNumber(1), TheLine(), TheFlags(Flags_), private_flush_(false),
+      ThePreviousCase() {}
+
+Stream::Stream(const basic_Tagger::Flags &Flags_,
+               std::wifstream &CharacterStream_,
+               const std::stringstream &Filename_)
+    : TheCharacterStream(CharacterStream_), TheFilename(Filename_.str()),
+      TheLineNumber(1), TheLine(), TheFlags(Flags_), private_flush_(false),
+      ThePreviousCase() {}
+
+StreamedType Stream::get() {
+  StreamedType TheStreamedType;
+  std::wstring Lemma;
+  private_flush_ = false;
+
+  if (!is_eof_throw_if_not_TheCharacterStream_good()) {
+    while (true) {
+      const wchar_t Character_ = TheCharacterStream.get();
+
+      if (is_eof_throw_if_not_TheCharacterStream_good(TheStreamedType, Lemma,
+                                                      Character_))
+        break;
+
+      TheLine.push_back(Character_);
+
+      switch (Character_) {
+      case L'\\': // <\>  92,  Hex 5c,  Octal 134
+        case_0x5c(TheStreamedType, Lemma, Character_);
+        continue;
+      case L'[':
+        if (ThePreviousCase) {
+          switch (ThePreviousCase->ThePreviousCase) {
+          case L']':
+          case L'$':
+            break;
+          default:
+            std::wstringstream Message;
+            Message << L"unexpected '" << Character_ << L"' following '"
+                    << ThePreviousCase->ThePreviousCase
+                    << L"', '[' expected to follow ']' or '$'";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+        }
+
+        push_back_Character(TheStreamedType, Lemma, Character_);
+        ThePreviousCase = PreviousCaseType(Character_);
+        continue;
+      case L']':
+        if (!ThePreviousCase) {
+          std::wstringstream Message;
+          Message << L"unexpected '" << Character_
+                  << L"', ']' expected to follow '['";
+          throw wchar_t_Exception::Stream::UnexpectedCase(
+              Message_what(Message));
+        }
+
+        switch (ThePreviousCase->ThePreviousCase) {
+        case L'[':
+          push_back_Character(TheStreamedType, Lemma, Character_);
+          ThePreviousCase = PreviousCaseType(Character_);
+          continue;
+        default:
+          std::wstringstream Message;
+          Message << L"unexpected '" << Character_ << L"' following '"
+                  << ThePreviousCase->ThePreviousCase
+                  << L"', ']' expected to follow '['";
+          throw wchar_t_Exception::Stream::UnexpectedCase(
+              Message_what(Message));
+        }
+
+        std::abort();
+      case L'^':
+        if (ThePreviousCase) {
+          switch (ThePreviousCase->ThePreviousCase) {
+          case L'[':
+            push_back_Character(TheStreamedType, Lemma, Character_);
+            continue;
+          case L']':
+          case L'$':
+            break;
+          default:
+            std::wstringstream Message;
+            Message << L"unexpected '" << Character_ << L"' following '"
+                    << ThePreviousCase->ThePreviousCase
+                    << L"', '^' expected to follow '[', ']', or '$'";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+        }
+
+        TheStreamedType.TheLexicalUnit = LexicalUnit();
+        ThePreviousCase = PreviousCaseType(Character_);
+        continue;
+      case L'/':
+        if (!ThePreviousCase) {
+          std::wstringstream Message;
+          Message
+              << L"unexpected '" << Character_
+              << L"', '/' expected to follow '[', to follow '>' immediately, "
+                 L"or to follow '^' or '#' not immediately";
+          throw wchar_t_Exception::Stream::UnexpectedCase(
+              Message_what(Message));
+        }
+
+        switch (ThePreviousCase->ThePreviousCase) {
+        case L'[':
+          push_back_Character(TheStreamedType, Lemma, Character_);
+          continue;
+        case L'^':
+          if (ThePreviousCase->isPreviousCharacter) {
+            std::wstringstream Message;
+            Message << L"unexpected '" << Character_
+                    << L"' immediately following '"
+                    << ThePreviousCase->ThePreviousCase
+                    << L"', '/' expected to follow '[', to follow '>' "
+                       L"immediately, or to follow '^' or '#' not immediately";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+
+          ThePreviousCase = PreviousCaseType(Character_);
+
+          {
+            const wchar_t Character_ = TheCharacterStream.get();
+
+            if (is_eof_throw_if_not_TheCharacterStream_good(
+                    TheStreamedType, Lemma, Character_)) {
+              std::wstringstream Message;
+              Message << L"unexpected end-of-file following '"
+                      << ThePreviousCase->ThePreviousCase
+                      << "', end-of-file expected to follow ']' or '$'";
+              throw wchar_t_Exception::Stream::UnexpectedEndOfFile(
+                  Message_what(Message));
+            }
+
+            TheLine.push_back(Character_);
+
+            switch (Character_) {
+            case L'\\':
+              TheStreamedType.TheLexicalUnit->TheAnalyses.push_back(Analysis());
+              TheStreamedType.TheLexicalUnit->TheAnalyses.back()
+                  .TheMorphemes.push_back(Morpheme());
+              case_0x5c(TheStreamedType, Lemma, Character_);
+              continue;
+            case L'*':
+              ThePreviousCase = PreviousCaseType(Character_);
+              continue;
+            case L'\n': {
+              std::wstringstream Message;
+              Message << L"unexpected newline following '"
+                      << ThePreviousCase->ThePreviousCase
+                      << "', newline expected to follow '[', ']', or '$'";
+              throw wchar_t_Exception::Stream::UnexpectedCharacter(
+                  Message_what(Message));
+            };
+            case L'[':
+            case L']':
+            case L'^':
+            case L'#':
+            case L'<':
+            case L'>':
+            case L'+':
+            case L'$': {
+              std::wstringstream Message;
+              Message << L"unexpected '" << Character_
+                      << L"' immediately following '"
+                      << ThePreviousCase->ThePreviousCase << L"', expected '*'";
+              throw wchar_t_Exception::Stream::UnexpectedPreviousCase(
+                  Message_what(Message));
+            }
+            default:
+              TheStreamedType.TheLexicalUnit->TheAnalyses.push_back(Analysis());
+              TheStreamedType.TheLexicalUnit->TheAnalyses.back()
+                  .TheMorphemes.push_back(Morpheme());
+              push_back_Character(TheStreamedType, Lemma, Character_);
+              continue;
+            }
+          }
+
+          continue;
+        case L'>':
+          if (!ThePreviousCase->isPreviousCharacter) {
+            std::wstringstream Message;
+            Message << L"unexpected '" << Character_
+                    << L"' not immediately following '"
+                    << ThePreviousCase->ThePreviousCase
+                    << L"', '/' expected to follow '[', to follow '>' "
+                       L"immediately, or to follow '^' or '#' not immediately";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+
+          break;
+        case L'#':
+          if (ThePreviousCase->isPreviousCharacter) {
+            std::wstringstream Message;
+            Message << L"unexpected '" << Character_
+                    << L"' immediately following '"
+                    << ThePreviousCase->ThePreviousCase
+                    << L"', '/' expected to follow '[', to follow '>' "
+                       L"immediately, or to follow '^' or '#' not immediately";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+
+          break;
+        default:
+          std::wstringstream Message;
+          Message << L"unexpected '" << Character_ << L"' following '"
+                  << ThePreviousCase->ThePreviousCase
+                  << L"', '/' expected to follow '[', to follow '>' "
+                     L"immediately, or to follow '^' or '#' not immediately";
+          throw wchar_t_Exception::Stream::UnexpectedCase(
+              Message_what(Message));
+        }
+
+        TheStreamedType.TheLexicalUnit->TheAnalyses.push_back(Analysis());
+        TheStreamedType.TheLexicalUnit->TheAnalyses.back()
+            .TheMorphemes.push_back(Morpheme());
+        ThePreviousCase = PreviousCaseType(Character_);
+        continue;
+      case L'*':
+        if (ThePreviousCase) {
+          switch (ThePreviousCase->ThePreviousCase) {
+          case L'[':
+          case L']':
+          case L'$':
+            break;
+          default:
+            std::wstringstream Message;
+            Message
+                << L"unexpected '" << Character_ << L"' following '"
+                << ThePreviousCase->ThePreviousCase
+                << L"', '*' expected to follow '[', ']', or '$' or to follow "
+                   L"'/' immediately";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+        }
+
+        push_back_Character(TheStreamedType, Lemma, Character_);
+        continue;
+      case L'<':
+        if (!ThePreviousCase) {
+          std::wstringstream Message;
+          Message
+              << L"unexpected '" << Character_
+              << L"', '<' expected to follow '[', to follow '>' immediately, "
+                 L"or to follow '#', '/' or '+' not immediately";
+          throw wchar_t_Exception::Stream::UnexpectedCase(
+              Message_what(Message));
+        }
+
+        switch (ThePreviousCase->ThePreviousCase) {
+        case L'[':
+          push_back_Character(TheStreamedType, Lemma, Character_);
+          continue;
+        case L'/':
+        case L'#':
+        case L'+':
+          if (ThePreviousCase->isPreviousCharacter) {
+            std::wstringstream Message;
+            Message
+                << L"unexpected '" << Character_ << L"' immediately following '"
+                << ThePreviousCase->ThePreviousCase
+                << L"', '<' expected to follow '[', to follow '>' immediately, "
+                   L"or to follow '#', '/' or '+' not immediately";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+
+          break;
+        case L'>':
+          if (!ThePreviousCase->isPreviousCharacter) {
+            std::wstringstream Message;
+            Message
+                << L"unexpected '" << Character_
+                << L"' not immediately following '"
+                << ThePreviousCase->ThePreviousCase
+                << L"', '<' expected to follow '[', to follow '>' immediately, "
+                   L"or to follow '#', '/' or '+' not immediately";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+
+          break;
+        default:
+          std::wstringstream Message;
+          Message
+              << L"unexpected '" << Character_ << L"' following '"
+              << ThePreviousCase->ThePreviousCase
+              << L"', '<' expected to follow '[', to follow '>' immediately, "
+                 L"or to follow '#', '/' or '+' not immediately";
+          throw wchar_t_Exception::Stream::UnexpectedCase(
+              Message_what(Message));
+        }
+
+        TheStreamedType.TheLexicalUnit->TheAnalyses.back()
+            .TheMorphemes.back()
+            .TheTags.push_back(Tag());
+        ThePreviousCase = PreviousCaseType(Character_);
+        continue;
+      case L'>':
+        if (!ThePreviousCase) {
+          std::wstringstream Message;
+          Message << L"unexpected '" << Character_
+                  << L"', '>' expected to "
+                     L"follow '[' or to follow "
+                     L"'<' not immediately";
+          throw wchar_t_Exception::Stream::UnexpectedCase(
+              Message_what(Message));
+        }
+
+        switch (ThePreviousCase->ThePreviousCase) {
+        case L'[':
+          push_back_Character(TheStreamedType, Lemma, Character_);
+          continue;
+        case L'<':
+          if (ThePreviousCase->isPreviousCharacter) {
+            std::wstringstream Message;
+            Message << L"unexpected '" << Character_
+                    << L"' immediately following '"
+                    << ThePreviousCase->ThePreviousCase
+                    << L"', '>' expected to "
+                       L"follow '[' or to follow "
+                       L"'<' not immediately";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+
+          ThePreviousCase = PreviousCaseType(Character_);
+          continue;
+        default:
+          std::wstringstream Message;
+          Message << L"unexpected '" << Character_ << L"' following '"
+                  << ThePreviousCase->ThePreviousCase
+                  << L"', '>' expected to "
+                     L"follow '[' or to follow "
+                     L"'<' not immediately";
+          throw wchar_t_Exception::Stream::UnexpectedCase(
+              Message_what(Message));
+        }
+
+        std::abort();
+      case L'#':
+        if (ThePreviousCase) {
+          switch (ThePreviousCase->ThePreviousCase) {
+          case L'[':
+          case L']':
+          case L'$':
+            push_back_Character(TheStreamedType, Lemma, Character_);
+            continue;
+          case L'/':
+            if (ThePreviousCase->isPreviousCharacter) {
+              std::wstringstream Message;
+              Message
+                  << L"unexpected '" << Character_
+                  << L"' immediately following '"
+                  << ThePreviousCase->ThePreviousCase
+                  << L"', '#' expected to follow '[', ']', or '$', to follow "
+                     L"'>' immediately, or to follow '/' not immediately";
+              throw wchar_t_Exception::Stream::UnexpectedCase(
+                  Message_what(Message));
+            }
+
+            break;
+          case L'>':
+            if (!ThePreviousCase->isPreviousCharacter) {
+              std::wstringstream Message;
+              Message
+                  << L"unexpected '" << Character_
+                  << L"' not immediately following '"
+                  << ThePreviousCase->ThePreviousCase
+                  << L"', '#' expected to follow '[', ']', or '$', to follow "
+                     L"'>' immediately, or to follow '/' not immediately";
+              throw wchar_t_Exception::Stream::UnexpectedCase(
+                  Message_what(Message));
+            }
+
+            break;
+          default:
+            std::wstringstream Message;
+            Message << L"unexpected '" << Character_ << L"' following '"
+                    << ThePreviousCase->ThePreviousCase
+                    << L"', '#' expected to follow '[', ']', or '$', to follow "
+                       L"'>' immediately, or to follow '/' not immediately";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+
+          ThePreviousCase = PreviousCaseType(Character_);
+          continue;
+        }
+
+        push_back_Character(TheStreamedType, Lemma, Character_);
+        continue;
+      case L'+':
+        if (ThePreviousCase) {
+          switch (ThePreviousCase->ThePreviousCase) {
+          case L'[':
+          case L']':
+          case L'$':
+            push_back_Character(TheStreamedType, Lemma, Character_);
+            continue;
+          case L'>':
+            if (!ThePreviousCase->isPreviousCharacter) {
+              std::wstringstream Message;
+              Message
+                  << L"unexpected '" << Character_
+                  << L"' not immediately following '"
+                  << ThePreviousCase->ThePreviousCase
+                  << L"', '+' expected to follow '[', ']', or '$', to follow "
+                     L"'>' "
+                     L"immediately, or to follow '#' not immediately";
+              throw wchar_t_Exception::Stream::UnexpectedCase(
+                  Message_what(Message));
+            }
+
+            break;
+          case L'#':
+            if (ThePreviousCase->isPreviousCharacter) {
+              std::wstringstream Message;
+              Message
+                  << L"unexpected '" << Character_
+                  << L"' immediately following '"
+                  << ThePreviousCase->ThePreviousCase
+                  << L"', '+' expected to follow '[', ']', or '$', to follow "
+                     L"'>' "
+                     L"immediately, or to follow '#' not immediately";
+              throw wchar_t_Exception::Stream::UnexpectedCase(
+                  Message_what(Message));
+            }
+
+            break;
+          default: {
+            std::wstringstream Message;
+            Message << L"unexpected '" << Character_ << L"' following '"
+                    << ThePreviousCase->ThePreviousCase
+                    << L"', '+' expected to follow '[', ']', or '$', to follow "
+                       L"'>' immediately, or to follow '#' not immediately";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+          }
+
+          TheStreamedType.TheLexicalUnit->TheAnalyses.back()
+              .TheMorphemes.push_back(Morpheme());
+          ThePreviousCase = PreviousCaseType(Character_);
+          continue;
+        }
+
+        push_back_Character(TheStreamedType, Lemma, Character_);
+        continue;
+      case L'$':
+        if (!ThePreviousCase) {
+          std::wstringstream Message;
+          Message
+              << L"unexpected '" << Character_
+              << L"', '$' expected to follow '[', to follow '>' immediately, "
+                 L"or to follow '*' or '#' not immediately";
+          throw wchar_t_Exception::Stream::UnexpectedCase(
+              Message_what(Message));
+        }
+
+        switch (ThePreviousCase->ThePreviousCase) {
+        case L'[':
+          push_back_Character(TheStreamedType, Lemma, Character_);
+          continue;
+        case L'*':
+          if (ThePreviousCase->isPreviousCharacter) {
+            std::wstringstream Message;
+            Message
+                << L"unexpected '" << Character_ << L"' immediately following '"
+                << ThePreviousCase->ThePreviousCase
+                << L"', '$' expected to follow '[', to follow '>' immediately, "
+                   L"or to follow '*' or '#' not immediately";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+
+          if (TheFlags.getDebug()) {
+            if (Lemma != TheStreamedType.TheLexicalUnit->TheSurfaceForm)
+              std::wcerr << L"unexpected lemma \"" << Lemma
+                         << L"\", expected \""
+                         << TheStreamedType.TheLexicalUnit->TheSurfaceForm
+                         << L"\"\n";
+          }
+
+          ThePreviousCase = PreviousCaseType(Character_);
+          return TheStreamedType;
+        case L'>':
+          if (!ThePreviousCase->isPreviousCharacter) {
+            std::wstringstream Message;
+            Message
+                << L"unexpected '" << Character_
+                << L"' not immediately following '"
+                << ThePreviousCase->ThePreviousCase
+                << L"', '$' expected to follow '[', to follow '>' immediately, "
+                   L"or to follow '*' or '#' not immediately";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+
+          break;
+        case L'#':
+          if (ThePreviousCase->isPreviousCharacter) {
+            std::wstringstream Message;
+            Message
+                << L"unexpected '" << Character_ << L"' immediately following '"
+                << ThePreviousCase->ThePreviousCase
+                << L"', '$' expected to follow '[', to follow '>' immediately, "
+                   L"or to follow '*' or '#' not immediately";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+
+          break;
+        default:
+          std::wstringstream Message;
+          Message
+              << L"unexpected '" << Character_ << L"' following '"
+              << ThePreviousCase->ThePreviousCase
+              << L"', '$' expected to follow '[', to follow '>' immediately, "
+                 L"or to follow '*' or '#' not immediately";
+          throw wchar_t_Exception::Stream::UnexpectedCase(
+              Message_what(Message));
+        }
+
+        ThePreviousCase = PreviousCaseType(Character_);
+        return TheStreamedType;
+      case L'\n':
+        if (ThePreviousCase) {
+          switch (ThePreviousCase->ThePreviousCase) {
+          case L'[':
+          case L']':
+          case L'$':
+            break;
+          default:
+            std::wstringstream Message;
+            Message << L"unexpected newline following '"
+                    << ThePreviousCase->ThePreviousCase
+                    << L"', newline expected to follow '[', ']', or '$'";
+            throw wchar_t_Exception::Stream::UnexpectedCase(
+                Message_what(Message));
+          }
+        }
+
+        push_back_Character(TheStreamedType, Lemma, Character_);
+        ++TheLineNumber;
+        TheLine.clear();
+        continue;
+      default:
+        push_back_Character(TheStreamedType, Lemma, Character_);
+        continue;
+      }
+
+      std::abort();
+    }
+  }
+
+  if (ThePreviousCase) {
+    switch (ThePreviousCase->ThePreviousCase) {
+    case L']':
+    case L'$':
+      break;
+    default:
+      std::wstringstream Message;
+      Message << L"unexpected end-of-file following '"
+              << ThePreviousCase->ThePreviousCase
+              << L"', end-of-file expected to follow ']' "
+                 L"or '$'";
+      throw wchar_t_Exception::Stream::UnexpectedEndOfFile(
+          Message_what(Message));
+    }
+  }
+
+  return TheStreamedType;
+}
+
+bool Stream::flush_() const { return private_flush_; }
+
+Stream::PreviousCaseType::PreviousCaseType(const wchar_t &PreviousCase_)
+    : ThePreviousCase(PreviousCase_), isPreviousCharacter(true) {}
+
+bool Stream::is_eof_throw_if_not_TheCharacterStream_good() const {
+  if (TheCharacterStream.eof())
+    return true;
+
+  if (!TheCharacterStream) {
+    std::wstringstream Message;
+    Message << L"can't get const wchar_t: TheCharacterStream not good";
+    throw wchar_t_Exception::Stream::TheCharacterStream_not_good(
+        Message_what(Message));
+  }
+
+  return false;
+}
+
+std::wstring Stream::Message_what(const std::wstringstream &Message) const {
+  std::wstringstream what_;
+
+  if (TheFilename)
+    what_ << std::wstring(TheFilename->begin(), TheFilename->end()) << L": ";
+
+  what_ << TheLineNumber << L":" << TheLine.size() << L": " << Message.str()
+        << L'\n' << TheLine << L'\n' << std::wstring(TheLine.size() - 1, L' ')
+        << L'^';
+  return what_.str();
+}
+
+bool
+Stream::is_eof_throw_if_not_TheCharacterStream_good(StreamedType &StreamedType_,
+                                                    std::wstring &Lemma,
+                                                    const wchar_t &Character_) {
+  if (isTheCharacterStream_eof(StreamedType_, Lemma, Character_))
+    return true;
+
+  if (!TheCharacterStream) {
+    std::wstringstream Message;
+    Message << L"can't get const wchar_t: TheCharacterStream not good";
+    throw wchar_t_Exception::Stream::TheCharacterStream_not_good(
+        Message_what(Message));
+  }
+
+  return false;
+}
+
+bool Stream::isTheCharacterStream_eof(StreamedType &StreamedType_,
+                                      std::wstring &Lemma,
+                                      const wchar_t &Character_) {
+  if (TheCharacterStream.eof())
+    return true;
+
+  if (TheFlags.getNullFlush()) {
+    if (Character_ == L'\0') {
+      push_back_Character(StreamedType_, Lemma, Character_);
+      private_flush_ = true;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void Stream::push_back_Character(StreamedType &StreamedType_,
+                                 std::wstring &Lemma,
+                                 const wchar_t &Character_) {
+  if (ThePreviousCase) {
+    switch (ThePreviousCase->ThePreviousCase) {
+    case L'[':
+      StreamedType_.TheString += Character_;
+      break;
+    case L']':
+      StreamedType_.TheString += Character_;
+      break;
+    case L'^':
+      StreamedType_.TheLexicalUnit->TheSurfaceForm += Character_;
+      break;
+    case L'/':
+      StreamedType_.TheLexicalUnit->TheAnalyses.back()
+          .TheMorphemes.back()
+          .TheLemma.push_back(Character_);
+      break;
+    case L'*':
+      Lemma += Character_;
+      break;
+    case L'<':
+      StreamedType_.TheLexicalUnit->TheAnalyses.back()
+          .TheMorphemes.back()
+          .TheTags.back()
+          .TheTag += Character_;
+      break;
+    case L'>': {
+      std::wstringstream Message;
+      Message << L"unexpected '" << Character_ << L"' immediately following '"
+              << ThePreviousCase->ThePreviousCase << L"'";
+      throw wchar_t_Exception::Stream::UnexpectedCharacter(
+          Message_what(Message));
+    }
+    case L'#':
+      StreamedType_.TheLexicalUnit->TheAnalyses.back()
+          .TheMorphemes.back()
+          .TheLemma.push_back(Character_);
+      break;
+    case L'+':
+      StreamedType_.TheLexicalUnit->TheAnalyses.back()
+          .TheMorphemes.back()
+          .TheLemma.push_back(Character_);
+      break;
+    case L'$':
+      StreamedType_.TheString += Character_;
+      break;
+    default:
+      std::wstringstream Message;
+      Message << L"unexpected previous reserved or special character '"
+              << ThePreviousCase->ThePreviousCase << L"'";
+      throw wchar_t_Exception::Stream::UnexpectedPreviousCase(
+          Message_what(Message));
+    }
+
+    ThePreviousCase->isPreviousCharacter = false;
+    return;
+  }
+
+  StreamedType_.TheString += Character_;
+}
+
+void Stream::case_0x5c(StreamedType &StreamedType_, std::wstring &Lemma,
+                       const wchar_t &Character_) {
+  push_back_Character(StreamedType_, Lemma, Character_);
+
+  {
+    const wchar_t Character_ = TheCharacterStream.get();
+
+    if (is_eof_throw_if_not_TheCharacterStream_good(StreamedType_, Lemma,
+                                                    Character_)) {
+      std::wstringstream Message;
+      Message << L"unexpected end-of-file following '\\', end-of-file "
+                 L"expected to follow ']' or '$'";
+      throw wchar_t_Exception::Stream::UnexpectedEndOfFile(
+          Message_what(Message));
+    }
+
+    TheLine.push_back(Character_);
+    push_back_Character(StreamedType_, Lemma, Character_);
+  }
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/stream.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream.h	(revision 69632)
@@ -0,0 +1,69 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef STREAM_H
+#define STREAM_H
+
+#include "basic_tagger.h"
+#include "optional.h"
+#include "streamed_type.h"
+
+#include <cstddef>
+#include <istream>
+#include <sstream>
+#include <string>
+
+namespace Apertium {
+class Stream {
+public:
+  Stream(const basic_Tagger::Flags &Flags_);
+  Stream(const basic_Tagger::Flags &Flags_, std::wifstream &CharacterStream_,
+         const char *const Filename_);
+  Stream(const basic_Tagger::Flags &Flags_, std::wifstream &CharacterStream_,
+         const std::string &Filename_);
+  Stream(const basic_Tagger::Flags &Flags_, std::wifstream &CharacterStream_,
+         const std::stringstream &Filename_);
+  StreamedType get();
+  bool flush_() const;
+
+private:
+  class PreviousCaseType {
+  public:
+    PreviousCaseType(const wchar_t &PreviousCase_);
+    wchar_t ThePreviousCase;
+    bool isPreviousCharacter : 1;
+  };
+  bool is_eof_throw_if_not_TheCharacterStream_good() const;
+  std::wstring Message_what(const std::wstringstream &Message) const;
+  bool is_eof_throw_if_not_TheCharacterStream_good(StreamedType &StreamedType_,
+                                                   std::wstring &Lemma,
+                                                   const wchar_t &Character_);
+  bool isTheCharacterStream_eof(StreamedType &StreamedType_,
+                                std::wstring &Lemma, const wchar_t &Character_);
+  void push_back_Character(StreamedType &StreamedType_, std::wstring &Lemma,
+                           const wchar_t &Character_);
+  void case_0x5c(StreamedType &StreamedType_, std::wstring &Lemma,
+                 const wchar_t &Character_);
+  std::wistream &TheCharacterStream;
+  Optional<std::string> TheFilename;
+  std::size_t TheLineNumber;
+  std::wstring TheLine;
+  const basic_Tagger::Flags &TheFlags;
+  bool private_flush_ : 1;
+  Optional<PreviousCaseType> ThePreviousCase;
+};
+}
+
+#endif // STREAM_H
Index: branches/apertium-tagger/apertium2/apertium/stream_5_3_1_tagger.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream_5_3_1_tagger.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream_5_3_1_tagger.cc	(revision 69632)
@@ -0,0 +1,68 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "stream_5_3_1_tagger.h"
+
+#include "apertium_config.h"
+
+#include "analysis.h"
+#include "deserialiser.h"
+#include "lexical_unit.h"
+#include "stream.h"
+#include "streamed_type.h"
+
+#include <cstddef>
+#include <istream>
+#include <map>
+#include <ostream>
+
+#if ENABLE_DEBUG
+
+#include <sstream>
+#include <string>
+
+#endif // ENABLE_DEBUG
+
+namespace Apertium {
+Stream_5_3_1_Tagger::Stream_5_3_1_Tagger(const Flags &Flags_)
+    : basic_5_3_1_Tagger(), basic_StreamTagger(Flags_) {}
+
+void Stream_5_3_1_Tagger::deserialise(std::istream &Serialised_basic_Tagger) {
+  Model = Deserialiser<std::map<Analysis, std::size_t> >::deserialise(
+      Serialised_basic_Tagger);
+}
+
+long double Stream_5_3_1_Tagger::score(const Analysis &Analysis_) const {
+  return tokenCount_T(Analysis_);
+}
+
+long double Stream_5_3_1_Tagger::tokenCount_T(const Analysis &Analysis_) const {
+  if (Model.find(Analysis_) == Model.end())
+    return 1;
+
+  return 1 + Model.find(Analysis_)->second;
+}
+
+#if ENABLE_DEBUG
+
+std::wstring Stream_5_3_1_Tagger::score_DEBUG(const Analysis &Analysis_) const {
+  std::wstringstream score_DEBUG_;
+  score_DEBUG_ << tokenCount_T(Analysis_);
+  return score_DEBUG_.str();
+}
+
+#endif // ENABLE_DEBUG
+
+}
Index: branches/apertium-tagger/apertium2/apertium/stream_5_3_1_tagger.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream_5_3_1_tagger.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream_5_3_1_tagger.h	(revision 69632)
@@ -0,0 +1,53 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef STREAM_5_3_1_TAGGER_H
+#define STREAM_5_3_1_TAGGER_H
+
+#include "apertium_config.h"
+
+#include "analysis.h"
+#include "basic_5_3_1_tagger.h"
+#include "basic_stream_tagger.h"
+
+#include <istream>
+
+#if ENABLE_DEBUG
+
+#include <string>
+
+#endif // ENABLE_DEBUG
+
+namespace Apertium {
+class Stream_5_3_1_Tagger : private basic_5_3_1_Tagger,
+                            public basic_StreamTagger {
+public:
+  Stream_5_3_1_Tagger(const Flags &Flags_);
+  void deserialise(std::istream &Serialised_basic_Tagger);
+
+private:
+  long double score(const Analysis &Analysis_) const;
+  long double tokenCount_T(const Analysis &Analysis_) const;
+
+#if ENABLE_DEBUG
+
+  std::wstring score_DEBUG(const Analysis &Analysis_) const;
+
+#endif // ENABLE_DEBUG
+
+};
+}
+
+#endif // STREAM_5_3_1_TAGGER_H
Index: branches/apertium-tagger/apertium2/apertium/stream_5_3_1_tagger_trainer.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream_5_3_1_tagger_trainer.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream_5_3_1_tagger_trainer.h	(revision 69632)
@@ -0,0 +1,41 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef STREAM_5_3_1_TAGGER_TRAINER_H
+#define STREAM_5_3_1_TAGGER_TRAINER_H
+
+#include "basic_5_3_1_tagger.h"
+#include "basic_stream_tagger_trainer.h"
+
+#include "analysis.h"
+#include "stream.h"
+
+#include <ostream>
+
+namespace Apertium {
+class Stream_5_3_1_TaggerTrainer : private basic_5_3_1_Tagger,
+                                   public basic_StreamTaggerTrainer {
+public:
+  Stream_5_3_1_TaggerTrainer(const Flags &Flags_);
+  void serialise(std::ostream &Serialised_basic_Tagger) const;
+
+private:
+  void train_Analysis(const Analysis &Analysis_,
+                      const std::size_t &Coefficient_);
+  void multiplyModel(const std::size_t &OccurrenceCoefficientMultiplier);
+};
+}
+
+#endif // STREAM_5_3_1_TAGGER_TRAINER_H
Index: branches/apertium-tagger/apertium2/apertium/stream_5_3_2_tagger.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream_5_3_2_tagger.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream_5_3_2_tagger.cc	(revision 69632)
@@ -0,0 +1,104 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "stream_5_3_2_tagger.h"
+
+#include "apertium_config.h"
+
+#include "a.h"
+#include "analysis.h"
+#include "deserialiser.h"
+#include "lemma.h"
+
+#include <cstddef>
+#include <istream>
+#include <map>
+
+#if ENABLE_DEBUG
+
+#include <sstream>
+#include <string>
+
+#endif // ENABLE_DEBUG
+
+namespace Apertium {
+Stream_5_3_2_Tagger::Stream_5_3_2_Tagger(const Flags &Flags_)
+    : basic_5_3_2_Tagger(), basic_StreamTagger(Flags_) {}
+
+void Stream_5_3_2_Tagger::deserialise(std::istream &Serialised_basic_Tagger) {
+  Model =
+      Deserialiser<std::map<a, std::map<Lemma, std::size_t> > >::deserialise(
+          Serialised_basic_Tagger);
+}
+
+long double Stream_5_3_2_Tagger::score(const Analysis &Analysis_) const {
+  return (tokenCount_r_a(Analysis_) * tokenCount_a(Analysis_)) /
+         (tokenCount_a(Analysis_) + typeCount_a(Analysis_));
+}
+
+long double
+Stream_5_3_2_Tagger::tokenCount_r_a(const Analysis &Analysis_) const {
+  if (Model.find(a(Analysis_)) == Model.end())
+    return 1;
+
+  if (Model.find(a(Analysis_))->second.find(Lemma(Analysis_)) ==
+      Model.find(a(Analysis_))->second.end())
+    return 1;
+
+  return 1 + Model.find(a(Analysis_))->second.find(Lemma(Analysis_))->second;
+}
+
+long double Stream_5_3_2_Tagger::tokenCount_a(const Analysis &Analysis_) const {
+  if (Model.find(a(Analysis_)) == Model.end())
+    return 1;
+
+  long double tokenCount_a_ = 1;
+
+  for (std::map<Lemma, std::size_t>::const_iterator Lemma_ =
+           Model.find(a(Analysis_))->second.begin();
+       Lemma_ != Model.find(a(Analysis_))->second.end(); ++Lemma_) {
+    tokenCount_a_ += Lemma_->second;
+  }
+
+  return tokenCount_a_;
+}
+
+long double Stream_5_3_2_Tagger::typeCount_a(const Analysis &Analysis_) const {
+  if (Model.find(a(Analysis_)) == Model.end())
+    return 1;
+
+  return (Model.find(a(Analysis_))->second.find(Lemma(Analysis_)) ==
+                  Model.find(a(Analysis_))->second.end()
+              ? 1
+              : 0) +
+         Model.find(a(Analysis_))->second.size();
+}
+
+#if ENABLE_DEBUG
+
+std::wstring Stream_5_3_2_Tagger::score_DEBUG(const Analysis &Analysis_) const {
+  std::wstringstream score_DEBUG_;
+
+  score_DEBUG_ << L"(" << tokenCount_r_a(Analysis_) << L" * "
+               << tokenCount_a(Analysis_) << L") /\n    ("
+               << tokenCount_a(Analysis_) << L" + " << typeCount_a(Analysis_)
+               << L")";
+
+  return score_DEBUG_.str();
+}
+
+#endif // ENABLE_DEBUG
+
+}
Index: branches/apertium-tagger/apertium2/apertium/stream_5_3_2_tagger.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream_5_3_2_tagger.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream_5_3_2_tagger.h	(revision 69632)
@@ -0,0 +1,55 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef STREAM_5_3_2_TAGGER_H
+#define STREAM_5_3_2_TAGGER_H
+
+#include "apertium_config.h"
+
+#include "analysis.h"
+#include "basic_5_3_2_tagger.h"
+#include "basic_stream_tagger.h"
+
+#include <istream>
+
+#if ENABLE_DEBUG
+
+#include <string>
+
+#endif // ENABLE_DEBUG
+
+namespace Apertium {
+class Stream_5_3_2_Tagger : private basic_5_3_2_Tagger,
+                            public basic_StreamTagger {
+public:
+  Stream_5_3_2_Tagger(const Flags &Flags_);
+  void deserialise(std::istream &Serialised_basic_Tagger);
+
+private:
+  long double score(const Analysis &Analysis_) const;
+  long double tokenCount_r_a(const Analysis &Analysis_) const;
+  long double tokenCount_a(const Analysis &Analysis_) const;
+  long double typeCount_a(const Analysis &Analysis_) const;
+
+#if ENABLE_DEBUG
+
+  std::wstring score_DEBUG(const Analysis &Analysis_) const;
+
+#endif // ENABLE_DEBUG
+
+};
+}
+
+#endif // STREAM_5_3_2_TAGGER_H
Index: branches/apertium-tagger/apertium2/apertium/stream_5_3_2_tagger_trainer.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream_5_3_2_tagger_trainer.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream_5_3_2_tagger_trainer.h	(revision 69632)
@@ -0,0 +1,38 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef STREAM_5_3_2_TAGGER_TRAINER_H
+#define STREAM_5_3_2_TAGGER_TRAINER_H
+
+#include "basic_5_3_2_tagger.h"
+#include "basic_stream_tagger_trainer.h"
+
+#include <ostream>
+
+namespace Apertium {
+class Stream_5_3_2_TaggerTrainer : private basic_5_3_2_Tagger,
+                                   public basic_StreamTaggerTrainer {
+public:
+  Stream_5_3_2_TaggerTrainer(const Flags &Flags_);
+  void serialise(std::ostream &Serialised_basic_Tagger) const;
+
+private:
+  void train_Analysis(const Analysis &Analysis_,
+                      const std::size_t &Coefficient_);
+  void multiplyModel(const std::size_t &OccurrenceCoefficientMultiplier);
+};
+}
+
+#endif // STREAM_5_3_2_TAGGER_TRAINER_H
Index: branches/apertium-tagger/apertium2/apertium/stream_5_3_3_tagger.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream_5_3_3_tagger.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream_5_3_3_tagger.cc	(revision 69632)
@@ -0,0 +1,223 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "stream_5_3_3_tagger.h"
+
+#include "apertium_config.h"
+
+#include "analysis.h"
+#include "deserialiser.h"
+#include "i.h"
+#include "lemma.h"
+#include "morpheme.h"
+
+#include <vector>
+
+#if ENABLE_DEBUG
+
+#include <sstream>
+#include <string>
+
+#endif // ENABLE_DEBUG
+
+namespace Apertium {
+Stream_5_3_3_Tagger::Stream_5_3_3_Tagger(const Flags &Flags_)
+    : basic_StreamTagger(Flags_) {}
+
+void Stream_5_3_3_Tagger::deserialise(std::istream &Serialised_basic_Tagger) {
+  Model = Deserialiser<
+      std::pair<std::map<i, std::map<Lemma, std::size_t> >,
+                std::pair<std::map<i, std::map<Lemma, std::size_t> >,
+                          std::map<Lemma, std::map<i, std::size_t> > > > >::
+      deserialise(Serialised_basic_Tagger);
+}
+
+long double Stream_5_3_3_Tagger::score(const Analysis &Analysis_) const {
+  long double score = tokenCount_r_i(Analysis_) * tokenCount_i(Analysis_),
+              score_Divisor = tokenCount_i(Analysis_) + typeCount_i(Analysis_);
+
+  for (std::vector<Morpheme>::const_iterator Morpheme_ =
+           Analysis_.TheMorphemes.begin() + 1;
+       Morpheme_ != Analysis_.TheMorphemes.end(); ++Morpheme_) {
+    score *= tokenCount_d_i_Morpheme(Lemma(*Morpheme_), i(*(Morpheme_ - 1))) *
+             tokenCount_i_d_Morpheme(i(*Morpheme_), Lemma(*Morpheme_));
+    score_Divisor *=
+        (tokenCount_i_Morpheme(i(*(Morpheme_ - 1))) +
+         typeCount_i_Morpheme(i(*(Morpheme_ - 1)), Lemma(*Morpheme_))) *
+        (tokenCount_d_Morpheme(Lemma(*Morpheme_)) +
+         typeCount_d_Morpheme(Lemma(*Morpheme_), i(*Morpheme_)));
+  }
+
+  return score / score_Divisor;
+}
+
+long double
+Stream_5_3_3_Tagger::tokenCount_r_i(const Analysis &Analysis_) const {
+  if (Model.first.find(i(Analysis_)) == Model.first.end())
+    return 1;
+
+  if (Model.first.find(i(Analysis_))->second.find(Lemma(Analysis_)) ==
+      Model.first.find(i(Analysis_))->second.end())
+    return 1;
+
+  return 1 +
+         Model.first.find(i(Analysis_))->second.find(Lemma(Analysis_))->second;
+}
+
+long double Stream_5_3_3_Tagger::tokenCount_i(const Analysis &Analysis_) const {
+  if (Model.first.find(i(Analysis_)) == Model.first.end())
+    return 1;
+
+  long double tokenCount_i_ = 1;
+
+  for (std::map<Lemma, std::size_t>::const_iterator Lemma_ =
+           Model.first.find(i(Analysis_))->second.begin();
+       Lemma_ != Model.first.find(i(Analysis_))->second.end(); ++Lemma_) {
+    tokenCount_i_ += Lemma_->second;
+  }
+
+  return tokenCount_i_;
+}
+
+long double Stream_5_3_3_Tagger::typeCount_i(const Analysis &Analysis_) const {
+  if (Model.first.find(i(Analysis_)) == Model.first.end())
+    return 1;
+
+  return (Model.first.find(i(Analysis_))->second.find(Lemma(Analysis_)) ==
+                  Model.first.find(i(Analysis_))->second.end()
+              ? 1
+              : 0) +
+         Model.first.find(i(Analysis_))->second.size();
+}
+
+long double Stream_5_3_3_Tagger::tokenCount_d_i_Morpheme(const Lemma &Lemma_,
+                                                         const i &i_) const {
+  if (Model.second.first.find(i_) == Model.second.first.end())
+    return 1;
+
+  if (Model.second.first.find(i_)->second.find(Lemma_) ==
+      Model.second.first.find(i_)->second.end())
+    return 1;
+
+  return 1 + Model.second.first.find(i_)->second.find(Lemma_)->second;
+}
+
+long double
+Stream_5_3_3_Tagger::tokenCount_i_d_Morpheme(const i &i_,
+                                             const Lemma &Lemma_) const {
+  if (Model.second.second.find(Lemma_) == Model.second.second.end())
+    return 1;
+
+  if (Model.second.second.find(Lemma_)->second.find(i_) ==
+      Model.second.second.find(Lemma_)->second.end())
+    return 1;
+
+  return 1 + Model.second.second.find(Lemma_)->second.find(i_)->second;
+}
+
+long double Stream_5_3_3_Tagger::tokenCount_i_Morpheme(const i &i_) const {
+  if (Model.second.first.find(i_) == Model.second.first.end())
+    return 1;
+
+  long double typeCount_i_Morpheme_ = 1;
+
+  for (std::map<Lemma, std::size_t>::const_iterator Lemma_ =
+           Model.second.first.find(i_)->second.begin();
+       Lemma_ != Model.second.first.find(i_)->second.end(); ++Lemma_) {
+    typeCount_i_Morpheme_ += Lemma_->second;
+  }
+
+  return typeCount_i_Morpheme_;
+}
+
+long double
+Stream_5_3_3_Tagger::typeCount_i_Morpheme(const i &i_,
+                                          const Lemma &Lemma_) const {
+  if (Model.second.first.find(i_) == Model.second.first.end())
+    return 1;
+
+  return (Model.second.first.find(i_)->second.find(Lemma_) ==
+                  Model.second.first.find(i_)->second.end()
+              ? 1
+              : 0) +
+         Model.second.first.find(i_)->second.size();
+}
+
+long double
+Stream_5_3_3_Tagger::tokenCount_d_Morpheme(const Lemma &Lemma_) const {
+  if (Model.second.second.find(Lemma_) == Model.second.second.end())
+    return 1;
+
+  long double tokenCount_d_Morpheme_ = 1;
+
+  for (std::map<i, std::size_t>::const_iterator i_ =
+           Model.second.second.find(Lemma_)->second.begin();
+       i_ != Model.second.second.find(Lemma_)->second.end(); ++i_) {
+    tokenCount_d_Morpheme_ += i_->second;
+  }
+
+  return tokenCount_d_Morpheme_;
+}
+
+long double Stream_5_3_3_Tagger::typeCount_d_Morpheme(const Lemma &Lemma_,
+                                                      const i &i_) const {
+  if (Model.second.second.find(Lemma_) == Model.second.second.end())
+    return 1;
+
+  return (Model.second.second.find(Lemma_)->second.find(i_) ==
+                  Model.second.second.find(Lemma_)->second.end()
+              ? 1
+              : 0) +
+         Model.second.second.find(Lemma_)->second.size();
+}
+
+#if ENABLE_DEBUG
+
+std::wstring Stream_5_3_3_Tagger::score_DEBUG(const Analysis &Analysis_) const {
+  std::wstringstream score_DEBUG_;
+
+  score_DEBUG_ << L"(" << tokenCount_r_i(Analysis_) << L" * "
+               << tokenCount_i(Analysis_);
+
+  for (std::vector<Morpheme>::const_iterator Morpheme_ =
+           Analysis_.TheMorphemes.begin() + 1;
+       Morpheme_ != Analysis_.TheMorphemes.end(); ++Morpheme_) {
+    score_DEBUG_ << L" * " << tokenCount_d_i_Morpheme(Lemma(*Morpheme_),
+                                                      i(*(Morpheme_ - 1)))
+                 << L" * "
+                 << tokenCount_i_d_Morpheme(i(*Morpheme_), Lemma(*Morpheme_));
+  }
+
+  score_DEBUG_ << L") /\n    [(" << tokenCount_i(Analysis_) << L" + "
+               << typeCount_i(Analysis_) << L")";
+
+  for (std::vector<Morpheme>::const_iterator Morpheme_ =
+           Analysis_.TheMorphemes.begin() + 1;
+       Morpheme_ != Analysis_.TheMorphemes.end(); ++Morpheme_) {
+    score_DEBUG_ << L" * (" << tokenCount_i_Morpheme(i(*(Morpheme_ - 1)))
+                 << L" + "
+                 << typeCount_i_Morpheme(i(*(Morpheme_ - 1)), Lemma(*Morpheme_))
+                 << L") * (" << tokenCount_d_Morpheme(Lemma(*Morpheme_))
+                 << L" + "
+                 << typeCount_d_Morpheme(Lemma(*Morpheme_), i(*Morpheme_))
+                 << L")";
+  }
+
+  score_DEBUG_ << L"]";
+  return score_DEBUG_.str();
+}
+
+#endif // ENABLE_DEBUG
+}
Index: branches/apertium-tagger/apertium2/apertium/stream_5_3_3_tagger.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream_5_3_3_tagger.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream_5_3_3_tagger.h	(revision 69632)
@@ -0,0 +1,62 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef STREAM_5_3_3_TAGGER_H
+#define STREAM_5_3_3_TAGGER_H
+
+#include "apertium_config.h"
+
+#include "analysis.h"
+#include "basic_5_3_3_tagger.h"
+#include "basic_stream_tagger.h"
+#include "i.h"
+#include "lemma.h"
+
+#include <istream>
+
+#if ENABLE_DEBUG
+
+#include <string>
+
+#endif // ENABLE_DEBUG
+
+namespace Apertium {
+class Stream_5_3_3_Tagger : private basic_5_3_3_Tagger,
+                            public basic_StreamTagger {
+public:
+  Stream_5_3_3_Tagger(const Flags &Flags_);
+  void deserialise(std::istream &Serialised_basic_Tagger);
+
+private:
+  long double score(const Analysis &Analysis_) const;
+  long double tokenCount_r_i(const Analysis &Analysis_) const;
+  long double tokenCount_i(const Analysis &Analysis_) const;
+  long double typeCount_i(const Analysis &Analysis_) const;
+  long double tokenCount_d_i_Morpheme(const Lemma &Lemma_, const i &i_) const;
+  long double tokenCount_i_d_Morpheme(const i &i_, const Lemma &Lemma_) const;
+  long double tokenCount_i_Morpheme(const i &i_) const;
+  long double typeCount_i_Morpheme(const i &i_, const Lemma &Lemma_) const;
+  long double tokenCount_d_Morpheme(const Lemma &Lemma_) const;
+  long double typeCount_d_Morpheme(const Lemma &Lemma_, const i &i_) const;
+
+#if ENABLE_DEBUG
+
+  std::wstring score_DEBUG(const Analysis &Analysis_) const;
+
+#endif // ENABLE_DEBUG
+};
+}
+
+#endif // STREAM_5_3_3_TAGGER_H
Index: branches/apertium-tagger/apertium2/apertium/stream_5_3_3_tagger_trainer.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/stream_5_3_3_tagger_trainer.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/stream_5_3_3_tagger_trainer.h	(revision 69632)
@@ -0,0 +1,39 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef STREAM_5_3_3_TAGGER_TRAINER_H
+#define STREAM_5_3_3_TAGGER_TRAINER_H
+
+#include "analysis.h"
+#include "basic_5_3_3_tagger.h"
+#include "basic_stream_tagger_trainer.h"
+
+#include <ostream>
+
+namespace Apertium {
+class Stream_5_3_3_TaggerTrainer : private basic_5_3_3_Tagger,
+                                   public basic_StreamTaggerTrainer {
+public:
+  Stream_5_3_3_TaggerTrainer(const Flags &Flags_);
+  void serialise(std::ostream &Serialised_basic_Tagger) const;
+
+private:
+  void train_Analysis(const Analysis &Analysis_,
+                      const std::size_t &Coefficient_);
+  void multiplyModel(const std::size_t &OccurrenceCoefficientMultiplier);
+};
+}
+
+#endif // STREAM_5_3_3_TAGGER_TRAINER_H
Index: branches/apertium-tagger/apertium2/apertium/streamed_type.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/streamed_type.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/streamed_type.h	(revision 69632)
@@ -0,0 +1,32 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef STREAMED_TYPE_H
+#define STREAMED_TYPE_H
+
+#include "lexical_unit.h"
+#include "optional.h"
+
+#include <string>
+
+namespace Apertium {
+class StreamedType {
+public:
+  std::wstring TheString;
+  Optional<LexicalUnit> TheLexicalUnit;
+};
+}
+
+#endif // STREAMED_TYPE_H
Index: branches/apertium-tagger/apertium2/apertium/tag.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tag.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tag.cc	(revision 69632)
@@ -0,0 +1,34 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "tag.h"
+
+#include "exception.h"
+
+#include <string>
+
+namespace Apertium {
+bool operator==(const Tag &a, const Tag &b) { return a.TheTag == b.TheTag; }
+
+bool operator<(const Tag &a, const Tag &b) { return a.TheTag < b.TheTag; }
+
+Tag::operator std::wstring() const {
+  if (TheTag.empty())
+    throw Exception::Tag::TheTags_empty("can't convert Tag comprising empty "
+                                        "TheTag std::wstring to std::wstring");
+
+  return L"<" + TheTag + L">";
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/tag.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tag.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tag.h	(revision 69632)
@@ -0,0 +1,31 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef TAG_H
+#define TAG_H
+
+#include <string>
+
+namespace Apertium {
+class Tag {
+public:
+  friend bool operator==(const Tag &a, const Tag &b);
+  friend bool operator<(const Tag &a, const Tag &b);
+  operator std::wstring() const;
+  std::wstring TheTag;
+};
+}
+
+#endif // TAG_H
Index: branches/apertium-tagger/apertium2/apertium/wchar_t_exception.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/wchar_t_exception.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/wchar_t_exception.h	(revision 69632)
@@ -0,0 +1,53 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef WCHAR_T_EXCEPTION_H
+#define WCHAR_T_EXCEPTION_H
+
+#include "wchar_t_exception_type.h"
+
+#include <sstream>
+#include <string>
+
+namespace Apertium {
+namespace wchar_t_Exception {
+
+#define WCHAR_T_EXCEPTION(WCHAR_T_EXCEPTION_TYPE)                              \
+  class WCHAR_T_EXCEPTION_TYPE : public ::Apertium::wchar_t_ExceptionType {    \
+  public:                                                                      \
+    WCHAR_T_EXCEPTION_TYPE(const wchar_t *wchar_t_what_)                       \
+        : wchar_t_ExceptionType(wchar_t_what_) {}                              \
+    WCHAR_T_EXCEPTION_TYPE(const std::wstring &wchar_t_what_)                  \
+        : wchar_t_ExceptionType(wchar_t_what_) {}                              \
+    WCHAR_T_EXCEPTION_TYPE(const std::wstringstream &wchar_t_what_)            \
+        : wchar_t_ExceptionType(wchar_t_what_) {}                              \
+    ~WCHAR_T_EXCEPTION_TYPE() throw() {}                                       \
+  };
+
+namespace Stream {
+WCHAR_T_EXCEPTION(TheCharacterStream_not_good)
+WCHAR_T_EXCEPTION(UnexpectedAnalysis)
+WCHAR_T_EXCEPTION(UnexpectedCase)
+WCHAR_T_EXCEPTION(UnexpectedCharacter)
+WCHAR_T_EXCEPTION(UnexpectedEndOfFile)
+WCHAR_T_EXCEPTION(UnexpectedLemma)
+WCHAR_T_EXCEPTION(UnexpectedPreviousCase)
+}
+
+#undef WCHAR_T_EXCEPTION
+}
+}
+
+#endif // WCHAR_T_EXCEPTION_H
Index: branches/apertium-tagger/apertium2/apertium/wchar_t_exception_type.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/wchar_t_exception_type.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/wchar_t_exception_type.cc	(revision 69632)
@@ -0,0 +1,90 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#include "wchar_t_exception_type.h"
+
+#include "exception.h"
+
+#include <algorithm>
+#include <cerrno>
+#include <cstdlib>
+#include <cstring>
+#include <cwchar>
+#include <sstream>
+#include <string>
+
+namespace Apertium {
+void swap(wchar_t_ExceptionType &a, wchar_t_ExceptionType &b) {
+  using std::swap;
+
+  swap(a.what_, b.what_);
+}
+
+wchar_t_ExceptionType::wchar_t_ExceptionType(const wchar_t *wchar_t_what_)
+    : what_(new char[size(wchar_t_what_)]) {
+  constructor(wchar_t_what_);
+}
+
+wchar_t_ExceptionType::wchar_t_ExceptionType(const std::wstring &wchar_t_what_)
+    : what_(new char[size(wchar_t_what_.c_str())]) {
+  constructor(wchar_t_what_.c_str());
+}
+
+wchar_t_ExceptionType::wchar_t_ExceptionType(
+    const std::wstringstream &wchar_t_what_)
+    : what_(new char[size(wchar_t_what_.str().c_str())]) {
+  constructor(wchar_t_what_.str().c_str());
+}
+
+wchar_t_ExceptionType::wchar_t_ExceptionType(
+    const wchar_t_ExceptionType &wchar_t_ExceptionType_)
+    : what_(new char[std::strlen(wchar_t_ExceptionType_.what_) + 1]) {
+  std::strcpy(what_, wchar_t_ExceptionType_.what_);
+}
+
+wchar_t_ExceptionType &wchar_t_ExceptionType::
+operator=(wchar_t_ExceptionType wchar_t_ExceptionType_) {
+  swap(*this, wchar_t_ExceptionType_);
+  return *this;
+}
+
+wchar_t_ExceptionType::~wchar_t_ExceptionType() throw() { delete[] what_; }
+
+const char *wchar_t_ExceptionType::what() const throw() { return what_; }
+
+std::size_t wchar_t_ExceptionType::size(const wchar_t *wchar_t_what_) {
+  std::mbstate_t ps = {0};
+  errno = 0;
+  std::size_t size_ = std::wcsrtombs(NULL, &wchar_t_what_, 0, &ps);
+
+  if (errno == EILSEQ)
+    throw Exception::wchar_t_ExceptionType::EILSEQ_(
+        "can't convert const wchar_t *wchar_t_what_ to char * : unexpected "
+        "wide character");
+
+  return size_ + 1;
+}
+
+void wchar_t_ExceptionType::constructor(const wchar_t *wchar_t_what_) {
+  std::mbstate_t ps = {0};
+  errno = 0;
+  std::wcsrtombs(what_, &wchar_t_what_, size(wchar_t_what_), &ps);
+
+  if (errno == EILSEQ)
+    throw Exception::wchar_t_ExceptionType::EILSEQ_(
+        "can't convert const wchar_t *const wchar_t_what_ to char *what_: "
+        "unexpected wide character");
+}
+}
Index: branches/apertium-tagger/apertium2/apertium/wchar_t_exception_type.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/wchar_t_exception_type.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/wchar_t_exception_type.h	(revision 69632)
@@ -0,0 +1,45 @@
+// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+//
+// This program is free software; you can redistribute it and/or
+// modify it under the terms of the GNU General Public License as
+// published by the Free Software Foundation; either version 2 of the
+// License, or (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful, but
+// WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+// General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+#ifndef WCHAR_T_EXCEPTION_TYPE_H
+#define WCHAR_T_EXCEPTION_TYPE_H
+
+#include "basic_exception_type.h"
+
+#include <cstddef>
+#include <sstream>
+#include <string>
+
+namespace Apertium {
+class wchar_t_ExceptionType : public basic_ExceptionType {
+public:
+  friend void swap(wchar_t_ExceptionType &a, wchar_t_ExceptionType &b);
+  wchar_t_ExceptionType(const wchar_t *wchar_t_what_);
+  wchar_t_ExceptionType(const std::wstring &wchar_t_what_);
+  wchar_t_ExceptionType(const std::wstringstream &wchar_t_what_);
+  wchar_t_ExceptionType(const wchar_t_ExceptionType &wchar_t_ExceptionType_);
+  wchar_t_ExceptionType &
+  operator=(wchar_t_ExceptionType wchar_t_ExceptionType_);
+  virtual ~wchar_t_ExceptionType() throw();
+  const char *what() const throw();
+
+private:
+  static std::size_t size(const wchar_t *wchar_t_what_);
+  void constructor(const wchar_t *wchar_t_what_);
+  char *what_;
+};
+}
+
+#endif // WCHAR_T_EXCEPTION_TYPE_H
Index: branches/apertium-tagger/apertium2/apertium/apertium-desmediawiki.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-desmediawiki.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-desmediawiki.1	(revision 69632)
@@ -0,0 +1,46 @@
+.TH apertium-desmediawiki 1 2009-08-30 "" ""
+.SH NAME
+apertium-desmediawiki \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-desmediawiki
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-desmediawiki
+is a processor for mediawiki XML dumps (i.e., those produced using 
+Special:Export. Data should be passed through this 
+processor before being piped to lt-proc. The program takes input
+in the form of a text file and produces output suitable for
+processing with lt-proc. Format information (newlines, tabs, etc.) is enclosed in brackets so that lt-proc treats it as whitespace between words.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH EXAMPLE
+.TP
+You could write the following to show how the word "gener" is analysed: 
+.TP
+echo "gener" | apertium-destxt | lt-proc ca-es.automorf.bin
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-deshtml\fR(1),
+.I apertium-desrtf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Complicated links - [[page|alternative text]], [[link]]s, etc. are not
+supported.
+.PP
+The mediawiki parser has special support for mixing apostrophes and 
+apostrophes as formatting. This is not supported either.
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-header.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-header.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-header.sh	(revision 69632)
@@ -0,0 +1,660 @@
+# -*- sh-basic-offset: 2 -*-
+
+# Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+
+message ()
+{
+  echo "USAGE: $(basename $0) [-d datadir] [-f format] [-u] <direction> [in [out]]"
+  echo " -d datadir       directory of linguistic data"
+  echo " -f format        one of: txt (default), html, rtf, odt, docx, wxml, xlsx, pptx,"
+  echo "                  xpresstag, html-noent, latex, latex-raw"
+  echo " -a               display ambiguity"
+  echo " -u               don't display marks '*' for unknown words"
+  echo " -n               don't insert period before possible sentence-ends"
+  echo " -m memory.tmx    use a translation memory to recycle translations"
+  echo " -o direction     translation direction using the translation memory,"
+  echo "                  by default 'direction' is used instead"
+  echo " -l               lists the available translation directions and exits"
+  echo " direction        typically, LANG1-LANG2, but see modes.xml in language data"
+  echo " in               input file (stdin by default)"
+  echo " out              output file (stdout by default)"
+  exit 1
+}
+
+list_directions ()
+{
+  for mode in "$DATADIR"/modes/*.mode; do
+    echo "  $(basename "${mode%%.mode}")"
+  done
+}
+
+locale_utf8 ()
+{
+  export LC_CTYPE=$(locale -a|grep -i "utf[.]*8"|head -1);
+  if [ LC_CTYPE = "" ]; then
+    echo "Error: Install an UTF-8 locale in your system";
+    exit 1;
+  fi
+}
+
+locale_latin1 ()
+{
+  export LC_CTYPE=$(locale -a|grep -i -e "8859-1" -e "@euro"|head -1);
+  if [ LC_CTYPE = "" ]; then
+    echo "Error: Install a Latin-1 locale in your system";
+    exit 1;
+  fi
+}
+
+test_zip ()
+{
+  if [ "$(which zip)" = "" ]; then
+    echo "Error: Install 'zip' command in your system";
+    exit 1;
+  fi
+
+  if [ "$(which unzip)" = "" ]; then
+    echo "Error: Install 'unzip' command in your system";
+    exit 1;
+  fi
+}
+
+test_gawk ()
+{
+  GAWK=$(which gawk)
+  if [ "$GAWK" = "" ]; then
+    echo "Error: Install 'gawk' in your system"
+    exit 1
+  fi
+}
+
+
+translate_latex()
+{
+  test_gawk
+
+  if [ "$INFILE" = ""  -o "$INFILE" = /dev/stdin ]; then
+    INFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
+    cat > "$INFILE"
+    BORRAFICHERO="true"
+  fi
+
+  if [ "$(file -b --mime-encoding "$INFILE")" == "utf-8" ]; then
+    locale_latin1
+  else locale_utf8
+  fi
+
+  "$APERTIUM_PATH/apertium-prelatex" "$INFILE" | \
+    "$APERTIUM_PATH/apertium-utils-fixlatex" | \
+    "$APERTIUM_PATH/apertium-deslatex" ${FORMAT_OPTIONS} | \
+    if [ "$TRANSLATION_MEMORY_FILE" = "" ];
+    then cat;
+    else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
+    fi | \
+      if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
+      sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+    else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+    fi | \
+      "$APERTIUM_PATH/apertium-relatex"| \
+      awk '{gsub("</CONTENTS-noeos>", "</CONTENTS>"); print;}' | \
+      if [ "$REDIR" == "" ]; then "$APERTIUM_PATH/apertium-postlatex-raw"; else "$APERTIUM_PATH/apertium-postlatex-raw" > "$SALIDA"; fi
+
+    if [ "$BORRAFICHERO" = "true" ]; then
+      rm -Rf "$INFILE"
+    fi
+}
+
+
+translate_latex_raw()
+{
+  test_gawk
+
+  if [ "$INFILE" = "" -o "$INFILE" = /dev/stdin ]; then
+    INFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
+    cat > "$INFILE"
+    BORRAFICHERO="true"
+  fi
+
+  if [ "$(file -b --mime-encoding "$INFILE")" = "utf-8" ]; then
+    locale_latin1
+  else locale_utf8
+  fi
+
+  "$APERTIUM_PATH/apertium-prelatex" "$INFILE" | \
+    "$APERTIUM_PATH/apertium-utils-fixlatex" | \
+    "$APERTIUM_PATH/apertium-deslatex" ${FORMAT_OPTIONS} | \
+    if [ "$TRANSLATION_MEMORY_FILE" = "" ];
+    then cat;
+    else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
+    fi | \
+      if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
+      sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+    else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+    fi | \
+      "$APERTIUM_PATH/apertium-relatex"| \
+      awk '{gsub("</CONTENTS-noeos>", "</CONTENTS>"); print;}' | \
+      if [ "$REDIR" == "" ]; then "$APERTIUM_PATH/apertium-postlatex-raw"; else "$APERTIUM_PATH/apertium-postlatex-raw" > "$SALIDA"; fi
+}
+
+
+translate_odt ()
+{
+  INPUT_TMPDIR=$(mktemp -d "$TMPDIR/apertium.XXXXXXXX")
+
+  locale_utf8
+  test_zip
+
+  if [ "$INFILE" = "" ]; then
+    INFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
+    cat > "$INFILE"
+    BORRAFICHERO="true"
+  fi
+  OTRASALIDA=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
+
+  unzip -q -o -d "$INPUT_TMPDIR" "$INFILE"
+  find "$INPUT_TMPDIR" | grep "content\\.xml\\|styles\\.xml" |\
+  awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
+  "$APERTIUM_PATH/apertium-desodt" ${FORMAT_OPTIONS} |\
+  if [ "$TRANSLATION_MEMORY_FILE" = "" ];
+  then cat;
+  else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
+  fi | \
+    if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
+    sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+  else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+  fi | \
+    "$APERTIUM_PATH/apertium-reodt"|\
+  awk '{punto = index($0, "/>") + 3; cabeza = substr($0, 1, punto-1); cola = substr($0, punto); n1 = substr(cabeza, index(cabeza, "\"")+1); name = substr(n1, 1, index(n1, "\"")-1); gsub("[?]> ", "?>\n", cola); print cola > name;}'
+  VUELVE=$(pwd)
+  cd "$INPUT_TMPDIR"
+  rm -Rf ObjectReplacements
+  zip -q -r - . >"$OTRASALIDA"
+  cd "$VUELVE"
+  rm -Rf "$INPUT_TMPDIR"
+
+  if [ "$BORRAFICHERO" = "true" ]; then
+    rm -Rf "$INFILE";
+  fi
+
+  if [ "$REDIR" == "" ]; then cat "$OTRASALIDA"; else cat "$OTRASALIDA" > "$SALIDA"; fi
+  rm -Rf "$OTRASALIDA"
+  rm -Rf "$TMCOMPFILE"
+}
+
+translate_docx ()
+{
+  INPUT_TMPDIR=$(mktemp -d "$TMPDIR/apertium.XXXXXXXX")
+
+  locale_utf8
+  test_zip
+
+  if [ "$INFILE" = "" ]; then
+    INFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
+    cat > "$INFILE"
+    BORRAFICHERO="true"
+  fi
+  OTRASALIDA=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
+
+  if [ "$UWORDS" = "no" ]; then
+    OPCIONU="-u";
+  else OPCIONU="";
+  fi
+
+  unzip -q -o -d "$INPUT_TMPDIR" "$INFILE"
+
+  for i in $(find "$INPUT_TMPDIR"|grep "xlsx$");
+  do LOCALTEMP=$(mktemp "$TMPDIR/apertium.XXXXXXXX");
+    "$APERTIUM_PATH/apertium" -f xlsx -d "$DATADIR" "$OPCIONU" "$PAIR" <"$i" >"$LOCALTEMP";
+    cp "$LOCALTEMP" "$i";
+    rm "$LOCALTEMP";
+  done;
+
+  find "$INPUT_TMPDIR" | grep "xml" |\
+  grep -v -i \\\(settings\\\|theme\\\|styles\\\|font\\\|rels\\\|docProps\\\) |\
+  awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
+  "$APERTIUM_PATH/apertium-deswxml" ${FORMAT_OPTIONS} |\
+  if [ "$TRANSLATION_MEMORY_FILE" = "" ];
+  then cat;
+  else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
+  fi | \
+    if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
+    sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+  else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+  fi | \
+    "$APERTIUM_PATH/apertium-rewxml"|\
+  awk '{punto = index($0, "/>") + 3; cabeza = substr($0, 1, punto-1); cola = substr($0, punto); n1 = substr(cabeza, index(cabeza, "\"")+1); name = substr(n1, 1, index(n1, "\"")-1); gsub("[?]> ", "?>\n", cola); print cola > name;}'
+  VUELVE=$(pwd)
+  cd "$INPUT_TMPDIR"
+  zip -q -r - . >"$OTRASALIDA"
+  cd "$VUELVE"
+  rm -Rf "$INPUT_TMPDIR"
+
+  if [ "$BORRAFICHERO" = "true" ]; then
+    rm -Rf "$INFILE";
+  fi
+
+  if [ "$REDIR" == "" ]; then cat "$OTRASALIDA"; else cat "$OTRASALIDA" > "$SALIDA"; fi
+  rm -Rf "$OTRASALIDA"
+  rm -Rf "$TMCOMPFILE"
+}
+
+translate_pptx ()
+{
+  INPUT_TMPDIR=$(mktemp -d "$TMPDIR/apertium.XXXXXXXX")
+
+  locale_utf8
+  test_zip
+
+  if [ "$INFILE" = "" ]; then
+    INFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
+    cat > "$INFILE"
+    BORRAFICHERO="true"
+  fi
+  OTRASALIDA=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
+
+  if [ "$UWORDS" = "no" ]; then
+    OPCIONU="-u";
+  else OPCIONU="";
+  fi
+
+  unzip -q -o -d "$INPUT_TMPDIR" "$INFILE"
+
+  for i in $(find "$INPUT_TMPDIR"|grep "xlsx$"); do
+    LOCALTEMP=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
+    "$APERTIUM_PATH/apertium" -f xlsx -d "$DATADIR" "$OPCIONU" "$PAIR" <"$i" >"$LOCALTEMP";
+    cp "$LOCALTEMP" "$i"
+    rm "$LOCALTEMP"
+  done;
+
+  find "$INPUT_TMPDIR" | grep "xml$" |\
+  grep "slides\/slide" |\
+  awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
+  "$APERTIUM_PATH/apertium-despptx" ${FORMAT_OPTIONS} |\
+  if [ "$TRANSLATION_MEMORY_FILE" = "" ];
+  then cat;
+  else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
+  fi | \
+    if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
+    sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+  else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+  fi | \
+    "$APERTIUM_PATH/apertium-repptx" |\
+  awk '{punto = index($0, "/>") + 3; cabeza = substr($0, 1, punto-1); cola = substr($0, punto); n1 = substr(cabeza, index(cabeza, "\"")+1); name = substr(n1, 1, index(n1, "\"")-1); gsub("[?]> ", "?>\n", cola); print cola > name;}'
+  VUELVE=$(pwd)
+  cd "$INPUT_TMPDIR"
+  zip -q -r - . >"$OTRASALIDA"
+  cd "$VUELVE"
+  rm -Rf "$INPUT_TMPDIR"
+
+  if [ "$BORRAFICHERO" = "true" ]; then
+    rm -Rf "$INFILE";
+  fi
+
+  if [ "$REDIR" == "" ]; then cat "$OTRASALIDA"; else cat "$OTRASALIDA" > "$SALIDA"; fi
+  rm -Rf "$OTRASALIDA"
+  rm -Rf "$TMCOMPFILE"
+}
+
+
+translate_xlsx ()
+{
+  INPUT_TMPDIR=$(mktemp -d "$TMPDIR/apertium.XXXXXXXX")
+
+  locale_utf8
+  test_zip
+
+  if [ "$INFILE" = "" ]; then
+    INFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
+    cat > "$INFILE"
+    BORRAFICHERO="true"
+  fi
+  OTRASALIDA=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
+
+  unzip -q -o -d "$INPUT_TMPDIR" "$INFILE"
+  find "$INPUT_TMPDIR" | grep "sharedStrings.xml" |\
+  awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
+  "$APERTIUM_PATH/apertium-desxlsx" ${FORMAT_OPTIONS} |\
+  if [ "$TRANSLATION_MEMORY_FILE" = "" ];
+  then cat;
+  else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
+  fi | \
+    if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
+    sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+  else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+  fi | \
+    "$APERTIUM_PATH/apertium-rexlsx" |\
+  awk '{punto = index($0, "/>") + 3; cabeza = substr($0, 1, punto-1); cola = substr($0, punto); n1 = substr(cabeza, index(cabeza, "\"")+1); name = substr(n1, 1, index(n1, "\"")-1); gsub("[?]> ", "?>\n", cola); print cola > name;}'
+  VUELVE=$(pwd)
+  cd "$INPUT_TMPDIR"
+  zip -q -r - . >"$OTRASALIDA"
+  cd "$VUELVE"
+  rm -Rf "$INPUT_TMPDIR"
+
+  if [ "$BORRAFICHERO" = "true" ]; then
+    rm -Rf "$INFILE";
+  fi
+
+  if [ "$REDIR" == "" ]; then cat "$OTRASALIDA"; else cat "$OTRASALIDA" > "$SALIDA"; fi
+  rm -Rf "$OTRASALIDA"
+  rm -Rf "$TMCOMPFILE"
+}
+
+translate_htmlnoent ()
+{
+  "$APERTIUM_PATH/apertium-deshtml" ${FORMAT_OPTIONS} "$INFILE" | \
+    if [ "$TRANSLATION_MEMORY_FILE" = "" ]; then
+    cat
+  else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
+  fi | if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
+    sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+  else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+  fi | if [ "$FORMAT" = "none" ]; then
+    if [ "$REDIR" == "" ]; then cat; else cat > "$SALIDA"; fi
+  else if [ "$REDIR" == "" ]; then "$APERTIUM_PATH/apertium-rehtml-noent"; else "$APERTIUM_PATH/apertium-rehtml-noent" > "$SALIDA"; fi
+  fi
+
+  rm -Rf "$TMCOMPFILE"
+}
+
+
+
+
+
+##########################################################
+# Option and argument parsing, setting globals variables #
+##########################################################
+PATH="${APERTIUM_PATH}:${PATH}"
+[[ -z $TMPDIR ]] && TMPDIR=/tmp
+TMCOMPFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
+trap 'rm -Rf "$TMCOMPFILE"' EXIT
+
+# Default values, may be overridden below:
+PAIR=""
+INFILE="/dev/stdin"
+FORMAT="txt"
+DATADIR=$DEFAULT_DIRECTORY
+TRANSLATION_MEMORY_DIRECTION=$PAIR
+LIST_MODES_AND_EXIT=false
+FORMAT_OPTIONS=""
+
+# Skip (but store) non-option arguments that come before options:
+declare -a ARGS_PREOPT
+declare -i OPTIND=1
+while [[ $OPTIND -le $# ]]; do
+  arg=${@:$OPTIND:1}
+  case $arg in
+    -*) break ;;
+    *) ARGS_PREOPT+=($arg); (( OPTIND++ )) ;;
+  esac
+done
+
+
+while getopts ":uahlf:d:m:o:n" opt; do
+  case "$opt" in
+    f) FORMAT=$OPTARG ;;
+    d) DATADIR=$OPTARG ;;
+    m) TRANSLATION_MEMORY_FILE=$OPTARG ;;
+    o) TRANSLATION_MEMORY_DIRECTION=$OPTARG ;;
+    u) UWORDS="no" ;;
+    n) FORMAT_OPTIONS="-n" ;;
+    a) OPTION_TAGGER="-m" ;;
+    l) LIST_MODES_AND_EXIT=true ;;
+    h) message ;;
+    \?) echo "ERROR: Unknown option $OPTARG"; message ;;
+    :) echo "ERROR: $OPTARG requires an argument"; message ;;
+  esac
+done
+shift $(($OPTIND-1))
+
+if $LIST_MODES_AND_EXIT; then list_directions; exit 0; fi
+
+# Restore non-option arguments that came before options back into arg list:
+set -- "${ARGS_PREOPT[@]}" "$@"
+
+case "$#" in
+  3)
+    SALIDA=$3
+    REDIR=">"
+    INFILE=$2
+    PAIR=$1
+    if [[ ! -e "$INFILE" ]]; then
+      echo "Error: file '$INFILE' not found."
+      message
+    fi
+    ;;
+  2)
+    INFILE=$2
+    PAIR=$1
+    if [[ ! -e "$INFILE" ]]; then
+      echo "Error: file '$INFILE' not found."
+      message
+    fi
+    ;;
+  1)
+    PAIR=$1
+    ;;
+  *)
+    message
+    ;;
+esac
+
+
+if [[ -n $TRANSLATION_MEMORY_FILE ]]; then
+  "$APERTIUM_PATH/lt-tmxcomp" "$TRANSLATION_MEMORY_DIRECTION" "$TRANSLATION_MEMORY_FILE" "$TMCOMPFILE" >/dev/null
+  if [ "$?" != "0" ]; then
+    echo "Error: Cannot compile TM '$TRANSLATION_MEMORY_FILE'"
+    echo"   hint: use -o parameter"
+    message
+  fi
+fi
+
+if [[ ! -d "$DATADIR/modes" ]]; then
+  echo "Error: Directory '$DATADIR/modes' does not exist."
+  message
+fi
+
+if [[ ! -e "$DATADIR/modes/$PAIR.mode" ]]; then
+  echo -n "Error: Mode $PAIR does not exist"
+  c=$(find "$DATADIR/modes"|wc -l)
+  if [ "$c" -le 1 ]; then
+    echo "."
+  else
+    echo ". Try one of:"
+    list_directions
+  fi
+  exit 1
+fi
+
+#Parametro opcional, de no estar, lee de la entrada estandar (stdin)
+
+case "$FORMAT" in
+  none)
+    if [ "$UWORDS" = "no" ]; then
+      OPTION="-n";
+    else OPTION="-g";
+    fi
+    ;;
+  txt|rtf|html|xpresstag|mediawiki)
+    if [ "$UWORDS" = "no" ]; then OPTION="-n";
+    else OPTION="-g";
+    fi;
+    ;;
+  rtf)
+    if [ "$UWORDS" = "no" ]; then OPTION="-n";
+    else OPTION="-g";
+    fi;
+    MILOCALE=$(locale -a|grep -i -v "utf\|^C$\|^POSIX$"|head -1);
+    if [ "$MILOCALE" = "" ]; then
+      echo "Error: Install a ISO-8859-1 compatible locale in your system";
+      exit 1;
+    fi
+    export LC_CTYPE=$MILOCALE
+    ;;
+
+  odt)
+    if [ "$UWORDS" = "no" ]; then OPTION="-n";
+    else OPTION="-g";
+    fi;
+    translate_odt
+    exit 0
+    ;;
+  latex)
+    if [ "$UWORDS" = "no" ]; then OPTION="-n";
+    else OPTION="-g";
+    fi;
+    translate_latex
+    exit 0
+    ;;
+  latex-raw)
+    if [ "$UWORDS" = "no" ]; then OPTION="-n";
+    else OPTION="-g";
+    fi;
+    translate_latex_raw
+    exit 0
+    ;;
+  
+  
+  docx)
+    if [ "$UWORDS" = "no" ]; then OPTION="-n";
+    else OPTION="-g";
+    fi;
+    translate_docx
+    exit 0
+    ;;
+  xlsx)
+    if [ "$UWORDS" = "no" ]; then OPTION="-n";
+    else OPTION="-g";
+    fi;
+    translate_xlsx
+    exit 0
+    ;;
+  pptx)
+    if [ "$UWORDS" = "no" ]; then OPTION="-n";
+    else OPTION="-g";
+    fi;
+    translate_pptx
+    exit 0
+    ;;
+  html-noent)
+    if [ "$UWORDS" = "no" ]; then OPTION="-n";
+    else OPTION="-g";
+    fi;
+    translate_htmlnoent
+    exit 0
+    ;;
+  
+  wxml)
+    if [ "$UWORDS" = "no" ]; then OPTION="-n";
+    else OPTION="-g";
+    fi;
+    locale_utf8
+    ;;
+  
+  txtu)
+    FORMAT="txt";
+    OPTION="-n"
+    ;;
+  htmlu)
+    FORMAT="html";
+    OPTION="-n";
+    ;;
+  xpresstagu)
+    FORMAT="xpresstag";
+    OPTION="-n";
+    ;;
+  rtfu)
+    FORMAT="rtf";
+    OPTION="-n";
+    MILOCALE=$(locale -a|grep -i -v "utf\|^C$\|^POSIX$"|head -1);
+    if [ "$MILOCALE" = "" ]; then
+      echo "Error: Install a ISO-8859-1 compatible locale in your system";
+      exit 1;
+    fi
+    export LC_CTYPE=$MILOCALE
+    ;;
+
+  odtu)
+    OPTION="-n"
+    translate_odt
+    exit 0
+    ;;
+
+  docxu)
+    OPTION="-n"
+    translate_docx
+    exit 0
+    ;;
+
+  xlsxu)
+    OPTION="-n"
+    translate_xlsx
+    exit 0
+    ;;
+
+  pptxu)
+    OPTION="-n"
+    translate_pptx
+    exit 0
+    ;;
+
+  wxmlu)
+    OPTION="-n";
+    locale_utf8
+    ;;
+
+
+
+  *) # Por defecto asumimos txt
+    FORMAT="txt"
+    OPTION="-g"
+    ;;
+esac
+
+if [ -z "$REF" ]
+then
+    REF=$FORMAT
+fi
+
+set -e -o pipefail
+
+if [ "$FORMAT" = "none" ]; then
+    cat "$INFILE"
+else
+  "$APERTIUM_PATH/apertium-des$FORMAT" ${FORMAT_OPTIONS} "$INFILE"
+fi | if [ "$TRANSLATION_MEMORY_FILE" = "" ];
+     then
+         cat
+     else
+       "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE"
+     fi | if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
+              sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+          else
+            "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
+          fi | if [ "$FORMAT" = "none" ]; then
+                   if [ "$REDIR" = "" ]; then
+                       cat
+                   else
+                     cat > "$SALIDA"
+                   fi
+               else
+                 if [ "$REDIR" = "" ]; then
+                     "$APERTIUM_PATH/apertium-re$FORMAT"
+                 else
+                   "$APERTIUM_PATH/apertium-re$FORMAT" > "$SALIDA"
+                 fi
+               fi
+
Index: branches/apertium-tagger/apertium2/apertium/postchunk.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/postchunk.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/postchunk.cc	(revision 69632)
@@ -0,0 +1,2074 @@
+/*
+ * Copyright (C) 2005--2015 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/postchunk.h>
+#include <apertium/trx_reader.h>
+#include <apertium/utf_converter.h>
+#include <lttoolbox/compression.h>
+#include <lttoolbox/xml_parse_util.h>
+
+#include <cctype>
+#include <cerrno>
+#include <iostream>
+#include <stack>
+#include <apertium/string_utils.h>
+#include "apertium_config.h"
+#include <apertium/unlocked_cstdio.h>
+
+using namespace Apertium;
+using namespace std;
+
+void
+Postchunk::destroy()
+{
+  if(me)
+  {
+    delete me;
+    me = NULL;
+  }
+  if(doc)
+  {
+    xmlFreeDoc(doc);
+    doc = NULL;
+  }  
+}
+
+Postchunk::Postchunk() :
+word(0),
+blank(0),
+lword(0),
+lblank(0),
+output(0),
+any_char(0),
+any_tag(0),
+nwords(0)
+{
+  me = NULL;
+  doc = NULL;
+  root_element = NULL;
+  lastrule = NULL;
+  inword = false;
+  null_flush = false;
+  internal_null_flush = false;
+}
+
+Postchunk::~Postchunk()
+{
+  destroy();
+}
+
+void 
+Postchunk::readData(FILE *in)
+{
+  alphabet.read(in);
+  any_char = alphabet(TRXReader::ANY_CHAR);
+  any_tag = alphabet(TRXReader::ANY_TAG);
+
+  Transducer t;
+  t.read(in, alphabet.size());
+  
+  map<int, int> finals;  
+  
+  // finals
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    int key = Compression::multibyte_read(in);
+    finals[key] = Compression::multibyte_read(in);
+  }  
+  
+  me = new MatchExe(t, finals);
+ 
+  // attr_items
+  bool recompile_attrs = Compression::string_read(in) != string(pcre_version());
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    attr_items[cad_k].read(in);
+    wstring fallback = Compression::wstring_read(in);
+    if(recompile_attrs) {
+      attr_items[cad_k].compile(UtfConverter::toUtf8(fallback));
+    }
+  }
+
+  // variables
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    variables[cad_k] = UtfConverter::toUtf8(Compression::wstring_read(in));
+  }
+
+  // macros
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    macros[cad_k] = Compression::multibyte_read(in);
+  }
+
+  // lists
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+
+    for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
+    {
+      wstring const cad_v = Compression::wstring_read(in);
+      lists[cad_k].insert(UtfConverter::toUtf8(cad_v));
+      listslow[cad_k].insert(UtfConverter::toUtf8(StringUtils::tolower(cad_v)));
+    }  
+  }  
+}
+
+void
+Postchunk::read(string const &transferfile, string const &datafile)
+{
+  readPostchunk(transferfile);
+  
+  // datafile
+  FILE *in = fopen(datafile.c_str(), "rb");
+  if(!in)
+  {
+    cerr << "Error: Could not open file '" << datafile << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  readData(in);
+  fclose(in);
+
+}
+
+void
+Postchunk::readPostchunk(string const &in)
+{
+  doc = xmlReadFile(in.c_str(), NULL, 0);
+  
+  if(doc == NULL)
+  {
+    cerr << "Error: Could not parse file '" << in << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  
+  root_element = xmlDocGetRootElement(doc);
+  
+  // search for macros & rules
+  for(xmlNode *i = root_element->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "section-def-macros"))
+      {
+        collectMacros(i);
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "section-rules"))
+      {
+        collectRules(i);
+      }
+    } 
+  }
+}
+
+void
+Postchunk::collectRules(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      for(xmlNode *j = i->children; ; j = j->next)
+      {
+        if(j->type == XML_ELEMENT_NODE && !xmlStrcmp(j->name, (const xmlChar *) "action"))
+        {
+          rule_map.push_back(j);
+          break;
+        }
+      }
+    }
+  }
+}
+
+void
+Postchunk::collectMacros(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      macro_map.push_back(i);
+    }
+  }
+}
+
+bool
+Postchunk::checkIndex(xmlNode *element, int index, int limit)
+{
+  if(index > limit)
+  {
+    wcerr << L"Error in " << UtfConverter::fromUtf8((char *) doc->URL) <<L": line " << element->line << endl;
+    return false;
+  }
+  return true;
+}
+
+
+string 
+Postchunk::evalString(xmlNode *element)
+{
+  map<xmlNode *, TransferInstr>::iterator it;
+  it = evalStringCache.find(element);
+  if(it != evalStringCache.end())
+  {
+    TransferInstr &ti = it->second;
+    switch(ti.getType())
+    {
+      case ti_clip_tl:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          return word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]);
+        }
+        break;
+        
+      case ti_lu_count:
+        return StringUtils::itoa_string(tmpword.size());
+
+      case ti_var:
+        return variables[ti.getContent()];
+        
+      case ti_lit_tag:
+      case ti_lit:
+        return ti.getContent();
+        
+      case ti_b:
+        if(checkIndex(element, ti.getPos(), lblank))
+        {
+          if(ti.getPos() >= 0)
+          {
+            return !blank?"":*(blank[ti.getPos()]);
+          }
+          return " ";
+        }
+        break;
+        
+      case ti_get_case_from:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          return copycase(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]),
+                          evalString((xmlNode *) ti.getPointer()));
+        }
+        break;
+        
+      case ti_case_of_tl:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          return caseOf(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]));
+        }
+        break;
+        
+      default:
+        return "";
+    }
+    return "";
+  }
+
+  if(!xmlStrcmp(element->name, (const xmlChar *) "clip"))
+  {
+    int pos = 0;
+    xmlChar *part = NULL;
+
+    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+	part = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *)i->children->content);
+      }
+    }
+
+    evalStringCache[element] = TransferInstr(ti_clip_tl, (const char *) part, pos, NULL);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit-tag"))
+  {
+    evalStringCache[element] = TransferInstr(ti_lit_tag, 
+                                             tags((const char *) element->properties->children->content), 0);                                            
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit"))
+  {
+    evalStringCache[element] = TransferInstr(ti_lit, string((char *) element->properties->children->content), 0);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "b"))
+  {
+    if(element->properties == NULL)
+    {
+      evalStringCache[element] = TransferInstr(ti_b, " ", -1);
+    }
+    else
+    {
+      int pos = atoi((const char *) element->properties->children->content) - 1;
+      evalStringCache[element] = TransferInstr(ti_b, "", pos);
+    }
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "get-case-from"))
+  {
+    int pos = atoi((const char *) element->properties->children->content);
+    xmlNode *param = NULL;
+    for(xmlNode *i = element->children; i != NULL; i = i->next)
+    {
+      if(i->type == XML_ELEMENT_NODE)
+      {
+	param = i;
+	break;
+      }
+    }
+
+    evalStringCache[element] = TransferInstr(ti_get_case_from, "lem", pos, param);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "var"))
+  {
+    evalStringCache[element] = TransferInstr(ti_var, (const char *) element->properties->children->content, 0);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "lu-count"))
+  {
+    evalStringCache[element] = TransferInstr(ti_lu_count, "", 0);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "case-of"))
+  {
+    int pos = 0;
+    xmlChar *part = NULL;
+
+    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+	part = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *) i->children->content);
+      }
+    }
+      
+    evalStringCache[element] = TransferInstr(ti_case_of_tl, (const char *) part, pos);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "concat"))
+  { 
+    string value;
+    for(xmlNode *i = element->children; i != NULL; i = i->next)
+    {
+      if(i->type == XML_ELEMENT_NODE)
+      {
+        value.append(evalString(i));
+      }
+    }
+    return value;
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "lu"))
+  {
+    string myword;
+    for(xmlNode *i = element->children; i != NULL; i = i->next)
+    {
+       if(i->type == XML_ELEMENT_NODE)
+       {
+         myword.append(evalString(i));
+       }
+    }
+    
+    if(myword != "")
+    {
+      return "^"+myword+"$";
+    }
+    else
+    {
+      return "";
+    }
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "mlu"))
+  {
+    string value;
+     	  
+    bool first_time = true;
+    
+    for(xmlNode *i = element->children; i != NULL; i = i->next)
+    {
+      if(i->type == XML_ELEMENT_NODE)
+      {
+        string myword;
+	 
+        for(xmlNode *j = i->children; j != NULL; j = j->next)
+        {
+          if(j->type == XML_ELEMENT_NODE)
+	  {
+            myword.append(evalString(j));
+	  }
+        }
+	      
+	if(!first_time)
+	{
+	  if(myword != "" && myword[0] != '#')  //'+#' problem
+	  {
+	    value.append("+");
+          }
+	}
+	else
+	{
+	  if(myword != "")
+	  {
+	    first_time = false;
+          }
+	}
+	 
+	value.append(myword);
+      }
+    }
+
+    if(value != "")
+    {
+      return "^"+value+"$";
+    }
+    else
+    {
+      return "";
+    }
+  }
+  
+  else
+  {
+    cerr << "Error: unexpected rvalue expression '" << element->name << "'" << endl;
+    exit(EXIT_FAILURE);
+  }
+
+  return evalString(element);
+}
+
+void
+Postchunk::processOut(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "lu"))
+      {
+        string myword;
+        for(xmlNode *j = i->children; j != NULL; j = j->next)
+        {
+          if(j->type == XML_ELEMENT_NODE)
+          {
+            myword.append(evalString(j));
+          }
+        }
+        if(myword != "")
+        {
+          fputwc_unlocked(L'^', output);
+          fputws_unlocked(UtfConverter::fromUtf8(myword).c_str(), output);
+          fputwc_unlocked(L'$', output);
+        }
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu"))
+      {
+        fputwc_unlocked(L'^', output);
+        bool first_time = true;
+        for(xmlNode *j = i->children; j != NULL; j = j->next)
+        {
+          if(j->type == XML_ELEMENT_NODE)
+          {
+            string myword;
+            for(xmlNode *k = j->children; k != NULL; k = k->next)
+            {
+              if(k->type == XML_ELEMENT_NODE)
+              {
+                myword.append(evalString(k));
+              }
+            }
+	      
+            if(!first_time)
+            {
+              if(myword != "")
+              {
+                fputwc_unlocked('+', output);
+              }
+            }
+	    else
+	    {
+	      if(myword != "")
+	      {
+	        first_time = false;
+              }
+	    }
+	    fputws_unlocked(UtfConverter::fromUtf8(myword).c_str(), output);	      
+	  }
+        }
+        fputwc_unlocked(L'$', output);
+      }
+      else // 'b'
+      {
+        fputws_unlocked(UtfConverter::fromUtf8(evalString(i)).c_str(), output);
+      }
+    }
+  }
+}
+
+void
+Postchunk::processTags(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(!xmlStrcmp(i->name, (xmlChar const *) "tag"))
+      {
+        for(xmlNode *j = i->children; j != NULL; j = j->next)
+        {
+          if(j->type == XML_ELEMENT_NODE)
+          {
+            fputws_unlocked(UtfConverter::fromUtf8(evalString(j)).c_str(), output);
+          }
+        }
+      }
+    }
+  }
+}
+
+void
+Postchunk::processInstruction(xmlNode *localroot)
+{
+  if(!xmlStrcmp(localroot->name, (const xmlChar *) "choose"))
+  {
+    processChoose(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "let"))
+  {
+    processLet(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "append"))
+  {
+    processAppend(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "out"))
+  {
+    processOut(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "call-macro"))
+  {
+    processCallMacro(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "modify-case"))
+  {
+    processModifyCase(localroot);
+  }
+}
+
+void
+Postchunk::processLet(xmlNode *localroot)
+{
+  xmlNode *leftSide = NULL, *rightSide = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(leftSide == NULL)
+      {
+	leftSide = i;
+      }
+      else
+      {
+	rightSide = i;
+	break;
+      }
+    }
+  }
+
+  map<xmlNode *, TransferInstr>::iterator it = evalStringCache.find(leftSide);
+  if(it != evalStringCache.end())
+  {
+    TransferInstr &ti = it->second;
+    switch(ti.getType())
+    {
+      case ti_var:
+        variables[ti.getContent()] = evalString(rightSide);
+        return;
+        
+      case ti_clip_tl:
+        word[ti.getPos()]->setChunkPart(attr_items[ti.getContent()], evalString(rightSide));
+        return;      
+        
+      default:
+        return;
+    }
+  }
+  if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
+  {
+    string const val = (const char *) leftSide->properties->children->content;
+    variables[val] = evalString(rightSide);
+    evalStringCache[leftSide] = TransferInstr(ti_var, val, 0);
+  }
+  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
+  {
+    int pos = 0;
+    xmlChar *part = NULL;
+
+    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+	part = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *) i->children->content);
+      }
+    }
+    
+
+    word[pos]->setChunkPart(attr_items[(const char *) part], 
+			    evalString(rightSide));
+    evalStringCache[leftSide] = TransferInstr(ti_clip_tl, (const char *) part, 
+					      pos, NULL);
+  }
+}
+
+void
+Postchunk::processAppend(xmlNode *localroot)
+{
+  string name;
+  for(xmlAttr *i = localroot->properties; i != NULL; i = i->next)
+  {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "n"))
+    {
+      name = (char *) i->children->content; 
+      break;
+    }
+  }
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      variables[name].append(evalString(i));
+    }
+  }
+}
+
+void
+Postchunk::processModifyCase(xmlNode *localroot)
+{
+  xmlNode *leftSide = NULL, *rightSide = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(leftSide == NULL)
+      {
+	leftSide = i;
+      }
+      else
+      {
+	rightSide = i;
+	break;
+      }
+    }
+  }
+
+  if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
+  {
+    int pos = 0;
+    xmlChar *part = NULL;
+
+    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+	part = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *) i->children->content);
+      }
+    }
+
+    string const result = copycase(evalString(rightSide), 
+				   word[pos]->chunkPart(attr_items[(const char *) part]));
+    word[pos]->setChunkPart(attr_items[(const char *) part], result);
+
+  }
+  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
+  {
+    string const val = (const char *) leftSide->properties->children->content;
+    variables[val] = copycase(evalString(rightSide), variables[val]);
+  }
+}
+
+void
+Postchunk::processCallMacro(xmlNode *localroot)
+{
+  const char *n = (const char *) localroot->properties->children->content;
+  int npar = 0;
+
+  xmlNode *macro = macro_map[macros[n]];
+
+  for(xmlAttr *i = macro->properties; i != NULL; i = i->next)
+  {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "npar"))
+    {
+      npar = atoi((const char *) i->children->content);
+      break;
+    }
+  }
+  
+  if (npar <= 0)
+  {
+    throw "Postchunk::processCallMacro() assumes npar > 0, but got npar <= 0";
+  }
+
+  InterchunkWord **myword = NULL;
+  if(npar > 0)
+  {
+    myword = new InterchunkWord *[npar+1];  
+  }
+  string **myblank = NULL;
+  if(npar > 0)
+  {
+    myblank = new string *[npar];
+  }
+
+  myword[0] = word[0];
+  
+  int idx = 1;
+  int lastpos = 0;
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      int pos = atoi((const char *) i->properties->children->content);
+      if(!checkIndex(localroot, pos, lword)) {
+        pos=1; // for a rule to match, there has to be at least one word, so should be safe
+      }
+      myword[idx] = word[pos];
+      if(blank)
+      {
+        myblank[idx-1] = blank[lastpos];
+      }
+      
+      idx++;
+      lastpos = pos;
+    }
+  }
+
+  swap(myword, word);
+  swap(myblank, blank);
+  swap(npar, lword);
+  
+  for(xmlNode *i = macro->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      processInstruction(i);
+    }
+  }
+
+  swap(myword, word);
+  swap(myblank, blank);
+  swap(npar, lword);
+
+  delete[] myword;
+  delete[] myblank;
+}
+
+void
+Postchunk::processChoose(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "when"))
+      {
+        bool picked_option = false;
+        
+	for(xmlNode *j = i->children; j != NULL; j = j->next)
+	{
+	  if(j->type == XML_ELEMENT_NODE)
+	  {
+	    if(!xmlStrcmp(j->name, (const xmlChar *) "test"))
+	    {
+	      if(!processTest(j))
+	      {
+		break;
+	      }
+	      else
+	      {
+	        picked_option = true;
+              }
+	    }
+	    else
+	    {
+	      processInstruction(j);
+	    }
+	  }
+	}
+        if(picked_option)
+        {
+          return;
+        }	
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "otherwise"))
+      {
+	for(xmlNode *j = i->children; j != NULL; j = j->next)
+	{
+	  if(j->type == XML_ELEMENT_NODE)
+	  {
+	    processInstruction(j);
+	  }
+	}
+      }
+    }
+  }
+}
+
+bool
+Postchunk::processLogical(xmlNode *localroot)
+{
+  if(!xmlStrcmp(localroot->name, (const xmlChar *) "equal"))
+  {
+    return processEqual(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with"))
+  {
+    return processBeginsWith(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with-list"))
+  { 
+    return processBeginsWithList(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with"))
+  {
+    return processEndsWith(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with-list"))
+  {
+    return processEndsWithList(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "contains-substring"))
+  {
+    return processContainsSubstring(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "or"))
+  {
+    return processOr(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "and"))
+  {
+    return processAnd(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "not"))
+  {
+    return processNot(localroot);
+  } 
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "in"))
+  {
+    return processIn(localroot);
+  }
+
+  return false;
+}
+
+bool
+Postchunk::processIn(xmlNode *localroot)
+{
+  xmlNode *value = NULL;
+  xmlChar *idlist = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    { 
+      if(value == NULL)
+      {
+	value = i;
+      }
+      else
+      {
+	idlist = i->properties->children->content;
+	break;
+      }
+    }
+  }
+
+  string sval = evalString(value);
+
+  if(localroot->properties != NULL)
+  {
+    if(!xmlStrcmp(localroot->properties->children->content, 
+		  (const xmlChar *) "yes"))
+    {
+      set<string, Ltstr> &myset = listslow[(const char *) idlist];
+      if(myset.find(tolower(sval)) != myset.end())
+      {
+	return true;
+      }
+      else
+      {
+	return false;
+      }
+    }
+  }
+
+  set<string, Ltstr> &myset = lists[(const char *) idlist];
+  if(myset.find(sval) != myset.end())
+  {
+    return true;
+  }
+  else
+  {
+    return false;
+  }
+}
+
+bool
+Postchunk::processTest(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      return processLogical(i);
+    }
+  }  
+  return false;
+}
+
+bool
+Postchunk::processAnd(xmlNode *localroot)
+{
+  bool val = true;
+  for(xmlNode *i = localroot->children; val && i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      val = val && processLogical(i);
+    }
+  }
+
+  return val;
+}
+
+bool
+Postchunk::processOr(xmlNode *localroot)
+{
+  bool val = false;
+  for(xmlNode *i = localroot->children; !val && i != NULL ; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      val = val || processLogical(i);
+    }
+  }
+
+  return val;
+}
+
+bool
+Postchunk::processNot(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      return !processLogical(i);
+    }
+  }
+  return false;
+}
+
+bool
+Postchunk::processEqual(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  if(localroot->properties == NULL)
+  {
+    return evalString(first) == evalString(second);
+  }
+  else
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      return tolower(evalString(first)) == tolower(evalString(second));
+    }
+    else
+    {
+      return evalString(first) == evalString(second);
+    }
+  }
+}
+
+bool
+Postchunk::beginsWith(string const &s1, string const &s2) const
+{
+  int const limit = s2.size(), constraint = s1.size();
+  
+  if(constraint < limit)
+  {
+    return false;
+  }
+  for(int i = 0; i != limit; i++)
+  {
+    if(s1[i] != s2[i])
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool
+Postchunk::endsWith(string const &s1, string const &s2) const
+{
+  int const limit = s2.size(), constraint = s1.size();
+  
+  if(constraint < limit)
+  {
+    return false;
+  }
+  for(int i = limit-1, j = constraint - 1; i >= 0; i--, j--)
+  {
+    if(s1[j] != s2[i])
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+
+bool
+Postchunk::processBeginsWith(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  if(localroot->properties == NULL)
+  {
+    return beginsWith(evalString(first), evalString(second));
+  }
+  else
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      return beginsWith(tolower(evalString(first)), tolower(evalString(second)));
+    }
+    else
+    {
+      return beginsWith(evalString(first), evalString(second));
+    }
+  }
+}
+
+bool
+Postchunk::processEndsWith(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  if(localroot->properties == NULL)
+  {
+    return endsWith(evalString(first), evalString(second));
+  }
+  else
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      return endsWith(tolower(evalString(first)), tolower(evalString(second)));
+    }
+    else
+    {
+      return endsWith(evalString(first), evalString(second));
+    }
+  }
+}
+
+bool
+Postchunk::processBeginsWithList(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  xmlChar *idlist = second->properties->children->content;
+  string needle = evalString(first);
+  set<string, Ltstr>::iterator it, limit;
+
+  if(localroot->properties == NULL || 
+     xmlStrcmp(localroot->properties->children->content, (const xmlChar *) "yes"))
+  {
+    it = lists[(const char *) idlist].begin();
+    limit = lists[(const char *) idlist].end();
+  }
+  else
+  {
+    needle = tolower(needle);
+    it = listslow[(const char *) idlist].begin();
+    limit = listslow[(const char *) idlist].end();
+  }
+  
+  for(; it != limit; it++)
+  {
+    if(beginsWith(needle, *it))
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool
+Postchunk::processEndsWithList(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  xmlChar *idlist = second->properties->children->content;
+  string needle = evalString(first);
+  set<string, Ltstr>::iterator it, limit;
+
+  if(localroot->properties == NULL || 
+     xmlStrcmp(localroot->properties->children->content, (const xmlChar *) "yes"))
+  {
+    it = lists[(const char *) idlist].begin();
+    limit = lists[(const char *) idlist].end();
+  }
+  else
+  {
+    needle = tolower(needle);
+    it = listslow[(const char *) idlist].begin();
+    limit = listslow[(const char *) idlist].end();
+  }
+  
+  for(; it != limit; it++)
+  {
+    if(endsWith(needle, *it))
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+
+bool
+Postchunk::processContainsSubstring(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  if(localroot->properties == NULL)
+  {
+    return evalString(first).find(evalString(second)) != string::npos;
+  }
+  else
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      return tolower(evalString(first)).find(tolower(evalString(second))) != string::npos;
+    }
+    else
+    {
+      return evalString(first).find(evalString(second)) != string::npos;
+    }
+  }
+}
+
+string
+Postchunk::copycase(string const &source_word, string const &target_word)
+{
+  wstring result;
+  wstring const s_word = UtfConverter::fromUtf8(source_word);
+  wstring const t_word = UtfConverter::fromUtf8(target_word);
+
+  bool firstupper = iswupper(s_word[0]);
+  bool uppercase = firstupper && iswupper(s_word[s_word.size()-1]);
+  bool sizeone = s_word.size() == 1;
+
+  if(!uppercase || (sizeone && uppercase))
+  {
+    result = StringUtils::tolower(t_word);
+  }
+  else
+  {
+    result = StringUtils::toupper(t_word);
+  }
+  
+  if(firstupper)
+  {
+    result[0] = towupper(result[0]);
+  }
+   
+  return UtfConverter::toUtf8(result);
+}
+
+string 
+Postchunk::caseOf(string const &str)
+{
+  wstring const s = UtfConverter::fromUtf8(str);
+
+  if(s.size() > 1)
+  {
+    if(!iswupper(s[0]))
+    {
+      return "aa";
+    }
+    else if(!iswupper(s[s.size()-1]))
+    {
+      return "Aa";
+    }
+    else
+    {
+      return "AA";
+    }
+  }
+  else if(s.size() == 1)
+  {
+    if(!iswupper(s[0]))
+    {
+      return "aa";
+    }
+    else
+    {
+      return "Aa";
+    }
+  }
+  else
+  {
+    return "aa";
+  }
+}
+
+wstring 
+Postchunk::caseOf(wstring const &str)
+{
+  if(str.size() > 1)
+  {
+    if(!iswupper(str[0]))
+    {
+      return L"aa";
+    }
+    else if(!iswupper(str[str.size()-1]))
+    {
+      return L"Aa";
+    }
+    else
+    {
+      return L"AA";
+    }
+  }
+  else if(str.size() == 1)
+  {
+    if(!iswupper(str[0]))
+    {
+      return L"aa";
+    }
+    else
+    {
+      return L"Aa";
+    }
+  }
+  else
+  {
+    return L"aa";
+  }
+}
+
+string
+Postchunk::tolower(string const &str) const
+{
+  return UtfConverter::toUtf8(StringUtils::tolower(UtfConverter::fromUtf8(str)));
+}
+
+string
+Postchunk::tags(string const &str) const
+{
+  string result = "<";
+
+  for(unsigned int i = 0, limit = str.size(); i != limit; i++)
+  {
+    if(str[i] == '.')
+    {
+      result.append("><");
+    }
+    else
+    {
+      result += str[i];
+    }
+  }
+  
+  result += '>';
+
+  return result;
+}
+
+void
+Postchunk::processRule(xmlNode *localroot)
+{
+  // localroot is suposed to be an 'action' tag
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      processInstruction(i);
+    }
+  }
+}
+
+TransferToken &
+Postchunk::readToken(FILE *in)
+{
+  if(!input_buffer.isEmpty())
+  {
+    return input_buffer.next();
+  }
+
+  wstring content;
+  while(true)
+  {
+    int val = fgetwc_unlocked(in);
+    if(feof(in) || (internal_null_flush && val == 0))
+    {
+      return input_buffer.add(TransferToken(content, tt_eof));
+    }
+    if(val == L'\\')
+    {  
+      content += L'\\';
+      content += wchar_t(fgetwc_unlocked(in));
+    }
+    else if(val == L'[')
+    {
+      content += L'[';
+      while(true)
+      {
+	int val2 = fgetwc_unlocked(in);
+	if(val2 == L'\\')
+	{
+	  content += L'\\';
+	  content += wchar_t(fgetwc_unlocked(in));
+	}
+	else if(val2 == L']')
+	{
+	  content += L']';
+	  break;
+	}
+	else
+	{
+	  content += wchar_t(val2);
+	}
+      }
+    }
+    else if(inword && val == L'{')
+    {
+      content += L'{';
+      while(true)
+      {
+	int val2 = fgetwc_unlocked(in);
+	if(val2 == L'\\')
+	{
+	  content += L'\\';
+	  content += wchar_t(fgetwc_unlocked(in));
+	}
+	else if(val2 == L'}')
+	{
+	  int val3 = wchar_t(fgetwc_unlocked(in));
+	  ungetwc(val3, in);
+	  
+	  content += L'}';
+	  if(val3 == L'$')
+	  {
+	    break;  
+	  }
+	}
+	else
+	{
+	  content += wchar_t(val2);
+	}
+      }
+    }
+    else if(inword && val == L'$')
+    {
+      inword = false;
+      return input_buffer.add(TransferToken(content, tt_word));
+    }
+    else if(val == L'^')
+    {
+      inword = true;
+      return input_buffer.add(TransferToken(content, tt_blank));
+    }
+    else
+    {
+      content += wchar_t(val);
+    }
+  }
+}
+
+bool
+Postchunk::getNullFlush(void)
+{
+  return null_flush;
+}
+
+void
+Postchunk::setNullFlush(bool null_flush)
+{
+  this->null_flush = null_flush;
+}
+
+void
+Postchunk::postchunk_wrapper_null_flush(FILE *in, FILE *out)
+{
+  null_flush = false;
+  internal_null_flush = true;
+  
+  while(!feof(in))
+  {
+    postchunk(in, out);
+    fputwc_unlocked(L'\0', out);
+    int code = fflush(out);
+    if(code != 0)
+    {
+      wcerr << L"Could not flush output " << errno << endl;
+    }
+  }
+  
+  internal_null_flush = false;
+  null_flush = true;
+}    
+
+void
+Postchunk::postchunk(FILE *in, FILE *out)
+{
+  if(getNullFlush())
+  {
+    postchunk_wrapper_null_flush(in, out);
+  }
+  
+  int last = 0;
+
+  output = out;
+  ms.init(me->getInitial());
+  
+  while(true)
+  {
+    if(ms.size() == 0)
+    {
+      if(lastrule != NULL)
+      {
+	applyRule();
+	input_buffer.setPos(last);
+      }
+      else
+      {
+	if(tmpword.size() != 0)
+	{
+	  unchunk(*tmpword[0], output);
+	  tmpword.clear();
+	  input_buffer.setPos(last);
+	  input_buffer.next();       
+	  last = input_buffer.getPos();
+	  ms.init(me->getInitial());
+	}
+	else if(tmpblank.size() != 0)
+	{
+	  fputws_unlocked(tmpblank[0]->c_str(), output);
+	  tmpblank.clear();
+	  last = input_buffer.getPos();
+	  ms.init(me->getInitial());
+	}
+      }
+    }
+    int val = ms.classifyFinals(me->getFinals());
+    if(val != -1)
+    {
+      lastrule = rule_map[val-1];      
+      last = input_buffer.getPos();
+    }
+
+    TransferToken &current = readToken(in);
+   
+    switch(current.getType())
+    {
+      case tt_word:
+	applyWord(current.getContent());
+        tmpword.push_back(&current.getContent());
+	break;
+
+      case tt_blank:
+	ms.step(L' ');
+	tmpblank.push_back(&current.getContent());
+	break;
+
+      case tt_eof:
+	if(tmpword.size() != 0)
+	{
+	  tmpblank.push_back(&current.getContent());
+	  ms.clear();
+	}
+	else
+	{
+	  fputws_unlocked(current.getContent().c_str(), output);
+	  return;
+	}
+	break;
+
+      default:
+	cerr << "Error: Unknown input token." << endl;
+	return;
+    }
+  }
+}
+
+void
+Postchunk::applyRule()
+{
+  wstring const chunk = *tmpword[0];
+  tmpword.clear();
+  splitWordsAndBlanks(chunk, tmpword, tmpblank);
+
+  word = new InterchunkWord *[tmpword.size()+1];
+  lword = tmpword.size();
+  word[0] = new InterchunkWord(UtfConverter::toUtf8(wordzero(chunk)));
+
+  for(unsigned int i = 1, limit = tmpword.size()+1; i != limit; i++)
+  {
+    if(i == 1)
+    {
+      if(limit != 2)
+      {
+        blank = new string *[limit - 2];
+        lblank = limit - 3;
+      }
+      else
+      {
+        blank = NULL;
+        lblank = 0;
+      }
+    }
+    else
+    {
+      blank[i-2] = new string(UtfConverter::toUtf8(*tmpblank[i-1]));
+    }
+    
+    word[i] = new InterchunkWord(UtfConverter::toUtf8(*tmpword[i-1]));
+  }
+
+  processRule(lastrule);
+  lastrule = NULL;
+
+  if(word)
+  {
+    for(unsigned int i = 0, limit = tmpword.size() + 1; i != limit; i++)
+    {
+      delete word[i];
+    }
+    delete[] word;
+  }
+  if(blank)
+  {
+    for(unsigned int i = 0, limit = tmpword.size() - 1; i != limit; i++)
+    {
+      delete blank[i];
+    }
+    delete[] blank;
+  }
+  word = NULL;
+  blank = NULL;
+
+  for(unsigned int i = 0, limit = tmpword.size(); i != limit; i++)
+  {
+    if(i != 0)
+    {
+      delete tmpblank[i];
+    }
+    delete tmpword[i];
+  }
+  tmpword.clear();
+  tmpblank.clear();
+  ms.init(me->getInitial());
+}
+
+void
+Postchunk::applyWord(wstring const &word_str)
+{
+  ms.step(L'^');
+  for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
+  {
+    switch(word_str[i])
+    {
+      case L'\\':
+        i++;
+	ms.step(towlower(word_str[i]), any_char);
+	break;
+
+      case L'<':
+/*	for(unsigned int j = i+1; j != limit; j++)
+	{
+	  if(word_str[j] == '>')
+	  {
+	    int symbol = alphabet(word_str.substr(i, j-i+1));
+	    if(symbol)
+	    {
+	      ms.step(symbol, any_tag);
+	    }
+	    else
+	    {
+	      ms.step(any_tag);
+	    }
+	    i = j;
+	    break;
+	  }
+	}
+	break;*/
+	
+      case L'{':  // ignore the unmodifiable part of the chunk
+        ms.step(L'$');
+        return;
+	
+      default:
+	ms.step(towlower(word_str[i]), any_char);
+	break;
+    }
+  }
+  ms.step(L'$');
+}
+
+vector<wstring>
+Postchunk::getVecTags(wstring const &chunk)
+{
+  vector<wstring> vectags;
+
+  for(int i = 0, limit = chunk.size(); i != limit; i++)
+  {
+    if(chunk[i] == L'\\')
+    {
+      i++;
+    }
+    else if(chunk[i] == L'<')
+    {
+      wstring mytag;
+      do
+      {
+        mytag += chunk[i++];
+      }
+      while(chunk[i] != L'>');
+      vectags.push_back(mytag + L'>');
+    }
+    else if(chunk[i] == L'{')
+    {
+      break;
+    }
+  }
+  return vectags;
+}
+
+int
+Postchunk::beginChunk(wstring const &chunk)
+{
+  for(int i = 0, limit = chunk.size(); i != limit; i++)
+  {
+    if(chunk[i] == L'\\')
+    {
+      i++;
+    }
+    else if(chunk[i] == L'{')
+    {
+      return i + 1;
+    }
+  }
+  return chunk.size();
+}
+
+int
+Postchunk::endChunk(wstring const &chunk)
+{
+  return chunk.size()-2;
+}
+
+wstring
+Postchunk::wordzero(wstring const &chunk)
+{
+  for(unsigned int i = 0, limit = chunk.size(); i != limit ;i++)
+  {
+    if(chunk[i] == L'\\')
+    {
+      i++;
+    }
+    else if(chunk[i] == L'{')
+    {
+      return chunk.substr(0, i);
+    }
+  }
+
+  return L"";
+}
+
+wstring
+Postchunk::pseudolemma(wstring const &chunk)
+{
+  for(unsigned int i = 0, limit = chunk.size(); i != limit ;i++)
+  {
+    if(chunk[i] == L'\\')
+    {
+      i++;
+    }
+    else if(chunk[i] == L'<' || chunk[i] == L'{')
+    {
+      return chunk.substr(0, i);
+    }
+  }
+
+  return L"";
+}
+
+void
+Postchunk::unchunk(wstring const &chunk, FILE *output)
+{
+  vector<wstring> vectags = getVecTags(chunk);
+  wstring case_info = caseOf(pseudolemma(chunk));
+  bool uppercase_all = false;
+  bool uppercase_first = false;
+ 
+  if(case_info == L"AA")
+  {
+    uppercase_all = true;
+  }  
+  else if(case_info == L"Aa")
+  {
+    uppercase_first = true;
+  }
+
+  for(int i = beginChunk(chunk), limit = endChunk(chunk); i < limit; i++)
+  {
+    if(chunk[i] == L'\\')
+    {
+      fputwc_unlocked(L'\\', output);
+      fputwc_unlocked(chunk[++i], output);
+    }
+    else if(chunk[i] == L'^')
+    {
+      fputwc_unlocked(L'^', output);
+      while(chunk[++i] != L'$')
+      {
+        if(chunk[i] == L'\\')
+        {
+          fputwc_unlocked(L'\\', output);
+          fputwc_unlocked(chunk[++i], output);
+        }
+        else if(chunk[i] == L'<')
+        {
+          if(iswdigit(chunk[i+1]))
+          {
+            // replace tag
+            unsigned long value = wcstoul(chunk.c_str()+i+1, 
+					  NULL, 0) - 1;
+            //atoi(chunk.c_str()+i+1)-1;
+            if(vectags.size() > value)
+            {
+              fputws_unlocked(vectags[value].c_str(), output);
+            }
+            while(chunk[++i] != L'>');
+          }
+          else
+          {
+            fputwc_unlocked(L'<', output);
+	    while(chunk[++i] != L'>') fputwc_unlocked(chunk[i], output);
+            fputwc_unlocked(L'>', output);
+          }
+        }
+        else
+        {
+          if(uppercase_all)
+          {
+            fputwc_unlocked(towupper(chunk[i]), output);
+          }
+          else if(uppercase_first)
+          {
+	    if(iswalnum(chunk[i]))
+	    {
+	      fputwc_unlocked(towupper(chunk[i]), output);
+	      uppercase_first = false;
+	    }
+            else
+	    {
+	      fputwc_unlocked(chunk[i], output);
+	    }
+          }
+          else
+          {
+            fputwc_unlocked(chunk[i], output);
+          }
+        }
+      }
+      fputwc_unlocked(L'$', output);
+    }
+    else if(chunk[i] == L'[')
+    {
+      fputwc_unlocked(L'[', output);
+      while(chunk[++i] != L']')
+      {
+        if(chunk[i] == L'\\')
+        {
+          fputwc_unlocked(L'\\', output);
+          fputwc_unlocked(chunk[++i], output);
+        }
+        else
+        {
+          fputwc_unlocked(chunk[i], output);
+        }
+      }
+      fputwc_unlocked(L']', output);
+    }
+    else
+    {
+      fputwc_unlocked(chunk[i], output);
+    }
+  }
+}
+
+
+void
+Postchunk::splitWordsAndBlanks(wstring const &chunk, vector<wstring *> &words,
+                               vector<wstring *> &blanks)
+{
+  vector<wstring> vectags = getVecTags(chunk);
+  wstring case_info = caseOf(pseudolemma(chunk));
+  bool uppercase_all = false;
+  bool uppercase_first = false;
+  bool lastblank = true;
+ 
+  if(case_info == L"AA")
+  {
+    uppercase_all = true;
+  }  
+  else if(case_info == L"Aa")
+  {
+    uppercase_first = true;
+  }
+  
+  for(int i = beginChunk(chunk), limit = endChunk(chunk); i < limit; i++)
+  {
+    if(chunk[i] == L'^')
+    {
+      if(!lastblank)
+      {
+        blanks.push_back(new wstring(L""));
+      }
+      lastblank = false;
+      wstring *myword = new wstring();
+      wstring &ref = *myword;
+    
+      while(chunk[++i] != L'$')
+      {
+        if(chunk[i] == L'\\')
+        {
+          ref += L'\\';
+          ref += chunk[++i];
+        }
+        else if(chunk[i] == L'<')
+        {
+          if(iswdigit(chunk[i+1]))
+          {
+            // replace tag
+            unsigned long value = wcstoul(chunk.c_str()+i+1, 
+                                          NULL, 0) - 1;
+            if(vectags.size() > value)
+            {
+              ref.append(vectags[value]);
+            }
+            while(chunk[++i] != L'>');
+          }
+          else
+          {
+            ref += L'<';
+            while(chunk[++i] != L'>') ref += chunk[i];
+            ref += L'>';
+          }
+        }
+        else
+        {
+          if(uppercase_all)
+          {
+            ref += towupper(chunk[i]);
+          }
+          else if(uppercase_first)
+          {
+            if(iswalnum(chunk[i]))
+            {
+              ref += towupper(chunk[i]);
+              uppercase_first = false;
+            }
+            else
+            {
+              ref += chunk[i];
+            }
+          }
+          else
+          {
+            ref += chunk[i];
+          }
+        }
+      }
+
+      words.push_back(myword);
+    }
+    else if(chunk[i] == L'[')
+    {
+      if (!(lastblank && blanks.back())) 
+      {
+        blanks.push_back(new wstring());
+      }
+      wstring &ref = *(blanks.back());
+      ref += L'[';
+      while(chunk[++i] != L']')
+      {
+        if(chunk[i] == L'\\')
+        {
+          ref += L'\\';
+          ref += chunk[++i];
+        }
+        else
+        {
+          ref += chunk[i];
+        }
+      }
+      ref += chunk[i];
+
+      lastblank = true;
+    }
+    else
+    {
+      if (!lastblank)
+      {
+        wstring *myblank = new wstring(L"");
+        blanks.push_back(myblank);
+      }
+      wstring &ref = *(blanks.back());
+      if(chunk[i] == L'\\')
+      {
+        ref += L'\\';
+        ref += chunk[++i];
+      }
+      else
+      {
+        ref += chunk[i];
+      }
+      lastblank = true;
+    }
+  }
+}
+
Index: branches/apertium-tagger/apertium2/apertium/unlocked_cstdio.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/unlocked_cstdio.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/unlocked_cstdio.h	(revision 69632)
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _APERTIUM_UNLOCKED_CSTDIO_
+#define _APERTIUM_UNLOCKED_CSTDIO_
+
+#include <cstdio>
+
+#if !HAVE_DECL_FPUTS_UNLOCKED
+#define fputs_unlocked fputs
+#endif
+
+#if !HAVE_DECL_FGETC_UNLOCKED 
+#define fgetc_unlocked fgetc
+#endif
+
+#if !HAVE_DECL_FPUTC_UNLOCKED
+#define fputc_unlocked fputc
+#endif
+
+#if !HAVE_DECL_FWRITE_UNLOCKED
+#define fwrite_unlocked fwrite
+#endif
+
+#if !HAVE_DECL_FREAD_UNLOCKED
+#define fread_unlocked fread
+#endif
+
+#if !HAVE_DECL_FGETWC_UNLOCKED
+#define fgetwc_unlocked fgetwc
+#endif
+
+#if !HAVE_DECL_FPUTWC_UNLOCKED
+#define fputwc_unlocked fputwc
+#endif
+
+#if !HAVE_DECL_FPUTWS_UNLOCKED
+#define fputws_unlocked fputws
+#endif
+
+#if !HAVE_MBTOWC
+#include <cwchar>
+inline int wctomb(char *s, wchar_t wc) { return wcrtomb(s,wc,NULL); }
+inline int mbtowc(wchar_t *pwc, const char *s, size_t n) { return mbrtowc(pwc, s, n, NULL); }
+#endif
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/lextor.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lextor.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lextor.cc	(revision 69632)
@@ -0,0 +1,1045 @@
+/*
+ * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
+ * author: Felipe S�nchez-Mart�nez
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <apertium/lextor.h>
+#include <apertium/string_utils.h>
+
+#include <algorithm>
+#include <string>
+#include <cmath>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+
+
+#define PI 3.14159265358979323846264338327950288
+
+bool LexTor::debug;
+double LexTor::angleth;
+
+LexTor::LexTor() :
+fstpbil(0)
+{
+  lextor_data=NULL;
+  tlmodel=NULL;
+}
+  
+LexTor::LexTor(const LexTor& lt) :
+fstpbil(0)
+{
+  lextor_data=lt.lextor_data;
+  tlmodel=lt.tlmodel;
+}
+  
+LexTor::~LexTor() {
+}
+
+void 
+LexTor::set_lextor_data(LexTorData* ltd) {
+  lextor_data=ltd;
+}
+
+void 
+LexTor::set_tlmodel(LexTorData* tlm) {
+  tlmodel=tlm;
+}
+
+void 
+LexTor::set_bildic(FSTProcessor *fstp) {
+  fstpbil=fstp;
+}
+
+void 
+LexTor::trainwrd(wistream& is, int left, int right, double weigth_exponent) {
+  if (lextor_data==NULL) {
+    wcerr<<L"Error in LexTor::trainwrd, you must call set_lextor_data before training\n";
+    exit(EXIT_FAILURE);
+  }
+
+  lextor_data->ensure_stopwords_ok();
+
+  wcerr<<L"Number of words to take into account on the left side: "<<left<<L"\n";
+  wcerr<<L"Number of words to take into account on the right side: "<<right<<L"\n";
+
+  set<wstring> words2workwith=lextor_data->get_words();
+  set<wstring>::iterator itword;
+
+  map<wstring, COUNT_DATA_TYPE> wordsum;
+
+  wcerr<<L"Words to work with:\n";
+  for(itword=words2workwith.begin(); itword!=words2workwith.end(); itword++) {
+    wcerr<<*itword<<L"\n";
+  }
+  wcerr<<L"\n";
+
+  is.clear();
+  is.seekg(ios::beg);
+
+  int nw=0;
+
+  map<wstring, map<wstring, COUNT_DATA_TYPE> > context;
+  deque<wstring> buffer;
+  unsigned word_index=(unsigned)left;
+
+  unsigned buffer_max_size=(unsigned)(left+1+right);
+  
+  LexTorWord *ltword;
+  ltword=LexTorWord::next_word(is);
+  while(ltword!=NULL) {
+    if ((++nw%250000)==0)
+      wcerr<<nw<<L" words processed\n";
+      
+    if(debug) {
+      wcerr<<L"Word read from corpus: "<<ltword->get_word_string()
+	  <<L", reduced: "<<lextor_data->reduce(ltword->get_word_string())<<flush;
+      getchar();
+    }
+
+    wstring reduced_word=lextor_data->reduce(ltword->get_word_string());
+
+    if (!lextor_data->is_stopword(reduced_word)) {
+      if (buffer.size()>=buffer_max_size) {
+	buffer.pop_front();
+      }
+      buffer.push_back(reduced_word);
+
+      wordsum[reduced_word]+=1.0;
+
+      //The buffer is already full
+      if (buffer.size()==buffer_max_size) {
+	for(itword=words2workwith.begin(); itword!=words2workwith.end(); itword++) {
+	  if (buffer[word_index]==(*itword)) {
+	    if(debug) {
+	      wcerr<<L"WINDOW: ";
+	      for (unsigned i=0; i<buffer.size(); i++) {
+		if(i==word_index)
+		  wcerr<<L"[>>>>"<<buffer[i]<<L"<<<<] ";
+		else
+		  wcerr<<L"["<<buffer[i]<<L"] ";
+	      }
+	      wcerr<<L"\n";
+	    }
+
+	    int distance=(-1)*left;
+	    for(unsigned i=0; i<buffer.size(); i++) {
+	      if ((i!=word_index) && (buffer[i]!=(*itword))) {
+		if (debug) {
+		  wcerr<<L"   WORD: ["<<buffer[i]<<L"] ";
+		  wcerr<<L"   DISTANCE: "<<distance<<L" ";
+		  wcerr<<L"   ADDED COUNT: "<<1.0/pow(fabs((double)distance),weigth_exponent)<<L" ";
+		  wcerr<<L"   TO ["<<*itword<<L"]\n";
+		}
+		context[*itword][buffer[i]]+=1.0/pow(fabs((double)distance),weigth_exponent);
+	      }
+	      distance++;
+	    }
+	    if (debug)
+	      getchar();
+	    break;
+	  }
+	}
+      }
+    }
+
+    delete ltword;
+    ltword=LexTorWord::next_word(is);
+  }
+
+  wcerr<<L"Corpus has "<<nw<<L" words\n";
+
+  //Set the count of each word
+  map<wstring, COUNT_DATA_TYPE>::iterator itws;
+  for(itws=wordsum.begin(); itws!=wordsum.end(); itws++) {
+    lextor_data->set_wordcount(itws->first,itws->second);
+    //if(debug) {
+    wcerr<<L"wordcount("<<itws->first<<L") = "<<itws->second<<L"\n";
+    //}
+  }
+
+  //All co-occurrences have been collected. We need to filter them
+  //so as to take into account only the n most frequents
+  for(itword=words2workwith.begin(); itword!=words2workwith.end(); itword++) {
+    PairStringCountComparer comparer;
+    vector<pair<wstring, COUNT_DATA_TYPE> > context_v;
+    map<wstring, COUNT_DATA_TYPE>::iterator itm;
+
+    while(context[*itword].size()>0) {
+      itm=context[*itword].begin();
+      context_v.push_back(*itm);
+      context[*itword].erase(itm);
+    }
+
+    sort(context_v.begin(), context_v.end(), comparer);
+    wstring w=*itword;
+    lextor_data->set_cooccurrence_context(w, context_v);
+    lextor_data->set_lexchoice_sum(w, wordsum[w]);
+
+    //if (debug) {
+    wcerr<<L"lexchoice_sum("<<w<<L") = "<<wordsum[w]<<L"\n";
+    //}
+  }
+}
+
+void 
+LexTor::trainlch(wistream& is, int left, int right, LexTorData& tlwordmodel, 
+                 FSTProcessor& dic, FSTProcessor& bildic, double weigth_exponent) {
+  if (lextor_data==NULL) {
+    wcerr<<L"Error in LexTor::trainlch, you must call set_lextor_data before training\n";
+    exit(EXIT_FAILURE);
+  }
+
+  lextor_data->ensure_stopwords_ok();
+
+  wcerr<<L"Number of words to take into account on the left side: "<<left<<L"\n";
+  wcerr<<L"Number of words to take into account on the right side: "<<right<<L"\n";
+
+  set<wstring> words2workwith=lextor_data->get_words();
+  set<wstring>::iterator itword;
+
+  map<wstring, COUNT_DATA_TYPE> wordsum;
+  map<wstring, COUNT_DATA_TYPE> lechsum;
+
+  wcerr<<L"Words to work with:\n";
+  for(itword=words2workwith.begin(); itword!=words2workwith.end(); itword++) {
+    wcerr<<*itword<<L"\n";
+  }
+  wcerr<<L"\n";
+
+  //For a given lexical choice it stores its translation
+  map<wstring, wstring> lexchoice_translation;
+  map<wstring, set<wstring> > lexical_choices_of_word;
+
+  wcerr<<L"Lexical choices:\n";
+  for(itword=words2workwith.begin(); itword!=words2workwith.end(); itword++) {
+    set<wstring> lexical_choices=lextor_data->get_lexical_choices(*itword);
+    lexical_choices_of_word[*itword]=lexical_choices;
+    set<wstring>::iterator itlch;
+    for(itlch=lexical_choices.begin(); itlch!=lexical_choices.end(); itlch++) {
+      lexchoice_translation[*itlch]=tlwordmodel.reduce(bildic.biltrans(*itlch,false));
+      wcerr<<*itlch<<L", tr:"<<lexchoice_translation[*itlch]<<L"\n";
+    }
+  }
+  wcerr<<L"\n";
+
+
+  is.clear();
+  is.seekg(ios::beg);
+
+  int nw=0;
+
+  map<wstring, map<wstring, COUNT_DATA_TYPE> > context;
+  deque<LexTorWord> buffer;
+
+  int word_index=left;
+  unsigned buffer_max_size=left+right+1;
+
+  LexTorWord *ltword;
+  ltword=LexTorWord::next_word(is,&dic);
+  while(ltword!=NULL) {
+    if (debug) {
+      wcerr<<L"Word read from corpus: "<<ltword->get_word_string()<<L", reduce: "<<lextor_data->reduce(ltword->get_word_string());
+      getchar();
+    }
+    if ((++nw%250000)==0)
+      wcerr<<nw<<L" words processed\n";
+
+    wstring reduced_word=lextor_data->reduce(ltword->get_word_string());
+
+    if (!lextor_data->is_stopword(reduced_word)) {	
+      if (buffer.size()>=buffer_max_size) {
+	buffer.pop_front();
+      }
+      buffer.push_back(*ltword);
+
+      wordsum[reduced_word]+=1.0;
+
+      //The buffer is already full
+      if (buffer.size()==buffer_max_size) {
+
+	wstring reduced_buffer_word=lextor_data->reduce(buffer[word_index].get_word_string());
+
+        for(itword=words2workwith.begin(); itword!=words2workwith.end(); itword++) {
+	  if (reduced_buffer_word==(*itword)) {
+	    //We translate each word in the context
+	    //Note: Words in the context can also be ambiguous (with more than one lexical choice)
+	    //In that case the count will come from all the possible
+	    //translations 
+        vector <vector<wstring> > translation_buffer(buffer_max_size);
+		vector <wstring> reduced_buffer(buffer_max_size);
+
+	    for (int i=0; i<(int)buffer_max_size; i++) {
+	      reduced_buffer[i]=lextor_data->reduce(buffer[i].get_word_string());	      
+	    }
+
+	    if(debug) {
+	      wcerr<<L"WINDOW: ";
+	      for (unsigned i=0; i<buffer.size(); i++) {
+		if(i==(unsigned)word_index)
+		  wcerr<<L"[>>>>"<<reduced_buffer[i]<<L"<<<<] ";
+		else
+		  wcerr<<L"["<<reduced_buffer[i]<<L"] ";
+	      }
+	      wcerr<<L"\n";
+	      wcerr<<L"TRANSLATED: ";
+	    }
+
+	    for (int i=0; i<(int)buffer_max_size; i++) {
+	      wstring str_translations=L"";
+	      for(int j=0; j<buffer[i].n_lexical_choices(); j++) {
+		wstring aux_tr=buffer[i].translate(bildic,j);
+		if (aux_tr.length()>0) {
+		  wstring tr=tlwordmodel.reduce(aux_tr);
+		  translation_buffer[i].push_back(tr);
+		  str_translations+=tr+L"/";
+		} else {
+		  wcerr<<L"Warning in LexTor::trainlch: translation of ["<<buffer[i].get_word_string()
+		      <<L"] is empty\n";
+		}
+	      }
+	      if (debug) {
+		if (i==word_index)
+		  wcerr<<L"[>>>>"<<str_translations<<L"<<<<] ";
+		else
+		  wcerr<<L"["<<str_translations<<L"] ";
+	      }
+	    }
+
+	    if(debug)
+	      wcerr<<L"\n";
+
+	    set<wstring> lexical_choices=lexical_choices_of_word[*itword];
+	    set<wstring>::iterator itlch;
+
+	    map<wstring, map<wstring, COUNT_DATA_TYPE> > local_context;
+	    map<wstring, COUNT_DATA_TYPE> sumvotes_context;
+
+	    //For each lexical choice the counts from the TL are collected
+	    for(itlch=lexical_choices.begin(); itlch!=lexical_choices.end(); itlch++) {
+	      for (int i=0; i<(int)buffer_max_size; i++) {
+		if ((i!=word_index)&&(reduced_buffer[i]!=(*itword))) {
+		  COUNT_DATA_TYPE target_vote=0;
+
+		  //The counts of the TL co-occurrence model are transferred to the SL. If the SL word is ambiguous
+		  //it will have more than one translation into TL, so we need to normalize using the frequency of words
+		  //in the TL
+		  vector <double> translation_weighs(translation_buffer[i].size());
+		  double sum=0.0;
+		  if (translation_buffer[i].size()>1) {
+		    for(int j=0; j<(int)translation_buffer[i].size(); j++) {
+		      translation_weighs[j]=tlwordmodel.get_lexchoice_sum(translation_buffer[i][j]);
+		      sum+=translation_weighs[j];
+
+		      //!!!!! Para hacer que no tenga en cuenta las polisemicas del contexto
+		      ///////translation_weighs[j]=0;
+		      //!!!!!
+
+		      if (debug) {
+			wcerr<<L"Frequency of translation ["<<translation_buffer[i][j]<<L"] ="
+			    <<translation_weighs[j]<<L"\n";
+		      }
+		    }
+		  } else {
+		    translation_weighs[0]=1;
+		    sum=1;
+		  }
+
+		  for(int j=0; j<(int)translation_buffer[i].size(); j++) {
+		    translation_weighs[j]=translation_weighs[j]/sum;
+		    if (debug) {
+		      wcerr<<L"Weight of translation ["<<translation_buffer[i][j]<<L"] ="
+			  <<translation_weighs[j]<<L"\n";
+		    }
+		  }
+
+		  for(int j=0; j<(int)translation_buffer[i].size(); j++) {
+		    if (lexchoice_translation[*itlch].length()==0) {
+		      wcerr<<L"Error: Translation of lexical choice '"<<*itlch<<L"' is empty\n";
+		    }
+
+		    double aux_vote=0;
+		    //aux_vote=tlwordmodel.vote_from_word(lexchoice_translation[*itlch], 
+		    //				    translation_buffer[i][j])*translation_weighs[j];
+		    if (tlwordmodel.get_wordcount(lexchoice_translation[*itlch])>0) {
+		      aux_vote=(tlwordmodel.vote_from_word(lexchoice_translation[*itlch],translation_buffer[i][j])/
+				tlwordmodel.get_wordcount(lexchoice_translation[*itlch]))*translation_weighs[j];
+		      if (debug) {
+			wcerr<<L"C("<<lexchoice_translation[*itlch]<<L", "<<translation_buffer[i][j]<<L") = "
+			    <<tlwordmodel.vote_from_word(lexchoice_translation[*itlch],translation_buffer[i][j])<<L"\n";
+			wcerr<<L"C("<<lexchoice_translation[*itlch]<<L") = "<<tlwordmodel.get_wordcount(lexchoice_translation[*itlch])<<L"\n";
+		      }
+		    } else {
+		      if (tlwordmodel.vote_from_word(lexchoice_translation[*itlch],translation_buffer[i][j])>0) {
+			wcerr<<L"Error in LexTor::trainlch: TL vote is not null, but its word count is null.\n";
+			wcerr<<L"lexchoice_translation: "<<lexchoice_translation[*itlch]<<L"\n";
+			wcerr<<L"translation_buffer: "<<translation_buffer[i][j]<<L"\n";
+		      }
+		    }
+		    target_vote+=aux_vote;
+
+		    if(debug) {
+		      wcerr<<L"Target vote for ["<<lexchoice_translation[*itlch]
+			  <<L"] from ["<<translation_buffer[i][j]<<L"] = "<<aux_vote<<L"\n";
+		    }
+		  }
+
+		  if (target_vote>0) {
+		    local_context[*itlch][reduced_buffer[i]]+=target_vote;
+		    sumvotes_context[reduced_buffer[i]]+=target_vote;
+		  }
+		}
+	      }
+	    }
+
+	    if (debug) {
+	      wcerr<<L"COUNTS NORMALIZATION\n";
+	    }
+
+	    //Now we normalize the counts and estimate the number of
+	    //times each lexical choice has been seen.
+	    map<wstring, COUNT_DATA_TYPE> local_lexsum;
+	    double local_lexsumsum=0.0;
+	    for(itlch=lexical_choices.begin(); itlch!=lexical_choices.end(); itlch++) {
+	      int distance=(-1)*left;
+	      for (int i=0; i<(int)buffer_max_size; i++) { 
+		if ((i!=word_index)&&(reduced_buffer[i]!=(*itword))) {
+		  if (local_context[*itlch][reduced_buffer[i]]>0) {
+		    double cc=local_context[*itlch][reduced_buffer[i]]/sumvotes_context[reduced_buffer[i]];
+		    double count_to_apply=cc/pow(fabs((double)distance),weigth_exponent);
+		    context[*itlch][reduced_buffer[i]]+=count_to_apply;
+		    if (debug) {
+		      wcerr<<L"Lexical choice: ["<<*itlch
+                          <<L"], context word: ["<<reduced_buffer[i]<<L"], "
+			  <<L"normalize count: "<<cc<<L"\n";
+		      wcerr<<L"Distance: "<<distance<<L", count to apply: "
+			  <<count_to_apply<<L"\n";
+
+		    }
+
+		    local_lexsum[*itlch]+=count_to_apply;
+		    local_lexsumsum+=count_to_apply;
+
+		    if (debug) {
+		      wcerr<<L"local_lexsum["<<*itlch<<L"] = "<<local_lexsum[*itlch]<<L"\n";
+		      wcerr<<L"local_lexsumsum = "<<local_lexsumsum<<L"\n";
+		    }
+
+		  }
+		}
+		distance++;
+	      }
+	    }
+
+	    for(itlch=lexical_choices.begin(); itlch!=lexical_choices.end(); itlch++) {
+	      if ((local_lexsum[*itlch]>0) && (local_lexsumsum>0))
+		lechsum[*itlch]+=local_lexsum[*itlch]/local_lexsumsum;
+	      if (debug) {
+		wcerr<<L"lechsum["<<*itlch<<L"] = "<<lechsum[*itlch]<<L"\n";
+	      }
+	    }
+	    
+
+	    if(debug) {
+	      wcerr<<L"\n";
+	      getchar();
+	    }
+
+	    break;
+	  }
+	}
+      }
+    } 
+
+    delete ltword;
+    ltword=LexTorWord::next_word(is,&dic);
+  }
+  
+  wcerr<<L"Corpus has "<<nw<<L" words\n";
+
+  //Set the count of each word
+  map<wstring, COUNT_DATA_TYPE>::iterator itws;
+  for(itws=wordsum.begin(); itws!=wordsum.end(); itws++) {
+    lextor_data->set_wordcount(itws->first,itws->second);
+    //if(debug) {
+    wcerr<<L"wordcount("<<itws->first<<L") = "<<itws->second<<L"\n";
+    //}
+  }
+
+  //All co-occurrences have been collected. We need to filter them
+  //so as to take into account only the n most frequent
+  for(itword=words2workwith.begin(); itword!=words2workwith.end(); itword++) {
+    set<wstring> lexical_choices=lexical_choices_of_word[*itword];
+    set<wstring>::iterator itlch;
+    for(itlch=lexical_choices.begin(); itlch!=lexical_choices.end(); itlch++) {
+      PairStringCountComparer comparer;
+      vector<pair<wstring, COUNT_DATA_TYPE> > context_v;
+      map<wstring, COUNT_DATA_TYPE>::iterator itm;
+
+      while(context[*itlch].size()>0) {
+	itm=context[*itlch].begin();
+	//wcerr<<itm->first<<L" "<<itm->second<<L"\n";
+	context_v.push_back(*itm);
+	context[*itlch].erase(itm);
+      }
+    
+      sort(context_v.begin(), context_v.end(), comparer);
+      wstring lch=*itlch;
+      lextor_data->set_cooccurrence_context(lch, context_v);
+      //lextor_data->set_lexchoice_sum(lch, tlwordmodel.get_lexchoice_sum(lexchoice_translation[lch]));
+
+      //wcerr<<L"lexchoice_sum("<<lch<<L") = lexchoice_sum_target("<<lexchoice_translation[lch]<<L") ="
+      //    <<tlwordmodel.get_lexchoice_sum(lexchoice_translation[lch])<<L"\n";
+    }
+  } 
+
+  //Set the count of each lexical choice
+  map<wstring, COUNT_DATA_TYPE>::iterator itlcs;
+  for(itlcs=lechsum.begin(); itlcs!=lechsum.end(); itlcs++) {
+    lextor_data->set_lexchoice_sum(itlcs->first,itlcs->second);
+    //if(debug) {
+    wcerr<<L"lexchoice_sum("<<itlcs->first<<L") = "<<itlcs->second<<L"\n";
+    //}
+  }
+
+
+  wcerr<<L"Training done\n"; 
+}
+
+void 
+LexTor::lexical_selector(wistream& is, FSTProcessor &fstp, int left, int right, double weigth_exponent, LexTorEval* lteval) {
+  if (lextor_data==NULL) {
+    wcerr<<L"Error in LexTor::lexical_selector, you must call set_lextor_data before\n";
+    exit(EXIT_FAILURE);
+  }
+
+  deque<LexTorWord> buffer;
+  deque<LexTorWord> window;
+
+  LexTorWord  nullword(L"NULLWORD", &fstp);
+
+  for(int i=0; i<(left+right+1); i++)
+    window.push_back(nullword);
+
+  int retain=0;
+
+  LexTorWord* ltword=NULL;
+  ltword=LexTorWord::next_word(is, &fstp);
+
+  while(ltword) {
+    //wcerr<<L"Word read: "<<ltword->get_word_string()
+    //<<L", reduced: "<<lextor_data->reduce(ltword->get_word_string())<<L" ";
+    //wcerr<<L"# lexical choices: "<<ltword->n_lexical_choices()<<L"\n";
+
+    if (!lextor_data->is_stopword(lextor_data->reduce(ltword->get_word_string()))) { 
+      if (window.size()>=(unsigned)(left+1+right)) 
+	window.pop_front();
+      
+      window.push_back(*ltword);
+
+      if (ltword->n_lexical_choices()>1) {
+	retain++;
+	if (retain>1)
+	  buffer.push_back(*ltword);
+      } else {
+	if (retain>0) 
+	  buffer.push_back(*ltword);
+	else {
+	  wcout<<ltword->get_lexical_choice(-1,true);
+	  if (lteval) 
+	    lteval->evalword(*ltword, -1, lextor_data);
+	}
+      }
+
+      if (window[left].n_lexical_choices()>1) {
+
+	if (debug) {
+	  wcerr<<L"WINDOW: ";
+	  for(int i=0; i<(int)window.size(); i++) {
+	    if(i==left)
+	      wcerr<<L"[>>>>"<<window[i].get_word_string()<<L"<<<<] ";
+	    else
+	      wcerr<<L"["<<window[i].get_word_string()<<L"] ";
+	  }
+	  wcerr<<L"\n";
+	  wcerr<<L"BUFFER: ";
+	  for(int i=0; i<(int)buffer.size(); i++)
+	    wcerr<<L"["<<buffer[i].get_word_string()<<L"] ";
+	  wcerr<<L"\n\n";
+  
+	}
+
+	int winner=estimate_winner_lch(window, left, weigth_exponent);
+
+	wcout<<window[left].get_lexical_choice(winner,true);
+	if (lteval) 
+	  lteval->evalword(window[left], winner, lextor_data);
+	
+	//For debug
+	/*
+	  cout<<L" | ";
+	  for(int j=0; j<window[left].n_lexical_choices(); j++) {
+	  if (j>0)
+	  cout<<L"|";
+	  cout<<window[left].get_lexical_choice(j,false);
+	  }
+	  cout<<L"\n";
+	*/
+
+	//Now those words that were retained must be released
+	if(retain>0) {
+	  while ((buffer.size()>0)&&(buffer[0].n_lexical_choices()==1)) {
+	    wcout<<buffer[0].get_lexical_choice(-1,true);
+	    if (lteval) 
+	      lteval->evalword(buffer[0], -1, lextor_data);
+	    buffer.pop_front();
+	  }
+	  if ((buffer.size()>0)&&(buffer[0].n_lexical_choices()>1))
+	    buffer.pop_front(); 
+
+	  retain--;
+	}
+      } 
+    } else { //It's a stopword
+      if (retain>0) 
+	buffer.push_back(*ltword);
+      else {
+	wcout<<ltword->get_lexical_choice(-1,true);
+	if (lteval) 
+	  lteval->evalword(*ltword, -1, lextor_data);
+      }
+    }
+
+    delete ltword;
+    ltword=LexTorWord::next_word(is, &fstp);
+  }
+
+  if (retain>0) {
+    for(unsigned i=left+1; i<window.size(); i++) {
+      if (window[i].n_lexical_choices()>1) {
+	int winner=estimate_winner_lch(window, i, weigth_exponent);
+
+	wcout<<window[i].get_lexical_choice(winner,true);
+	if (lteval)
+	  lteval->evalword(window[i], winner, lextor_data);
+
+	//For debug
+	/*
+	  cout<<L" | ";
+	  for(int j=0; j<window[i].n_lexical_choices(); j++) {
+	  if (j>0)
+	  cout<<L"|";
+	  cout<<window[i].get_lexical_choice(j,false);
+	  }
+	  cout<<L"\n";
+	*/
+
+	//Now those words that were retained must be released
+	if(retain>0) {
+	  while ((buffer.size()>0)&&(buffer[0].n_lexical_choices()==1)) {
+	    wcout<<buffer[0].get_lexical_choice(-1,true);
+	    if (lteval) 
+	      lteval->evalword(buffer[0], -1, lextor_data);
+	    buffer.pop_front();
+	  }
+	  if ((buffer.size()>0)&&(buffer[0].n_lexical_choices()>1))
+	    buffer.pop_front(); 
+
+	  retain--;
+	}
+
+      }
+    }
+  }
+
+  //wcerr<<L"retain: "<<retain<<L"\n";
+}
+
+int 
+LexTor::estimate_winner_lch(deque<LexTorWord>& window, int word_index, double weigth_exponent) {
+  //return estimate_winner_lch_cosine(window, word_index, weigth_exponent);
+  return estimate_winner_lch_voting(window, word_index, weigth_exponent);
+  //return estimate_winner_lch_mostprob(window, word_index, weigth_exponent);
+  //return estimate_winner_lch_votingtl(window, word_index, weigth_exponent);
+  //return -1;
+}
+
+int 
+LexTor::estimate_winner_lch_voting(deque<LexTorWord>& window, int word_index, double weigth_exponent) {
+  vector <double> lexchoices_count(window[word_index].n_lexical_choices());
+
+  if (debug) {
+    wcerr<<L"WINDOW: ";
+    for(unsigned i=0; i<window.size(); i++) {
+      if (i==(unsigned)word_index)
+	wcerr<<L"[>>>>"<<lextor_data->reduce(window[i].get_word_string())<<L"<<<<] ";
+      else
+	wcerr<<L"["<<lextor_data->reduce(window[i].get_word_string())<<L"] ";
+    }
+    wcerr<<L"\n";
+  }
+
+  //
+  double sum_lexchoices=0.0;
+  for(int i=0; i<window[word_index].n_lexical_choices(); i++) {
+    double aux_lexchoice_sum=lextor_data->get_lexchoice_sum(lextor_data->reduce_lexical_choice(window[word_index].get_lexical_choice(i,false)));
+    sum_lexchoices+=aux_lexchoice_sum;
+    if (debug) {
+      wcerr<<L"lexchoice_sum("<<lextor_data->reduce_lexical_choice(window[word_index].get_lexical_choice(i,false))<<L") = "<<aux_lexchoice_sum<<L"\n";
+    }
+  }
+  double wordcount=lextor_data->get_wordcount(lextor_data->reduce(window[word_index].get_word_string()));
+  if (debug) {
+    wcerr<<L"wordcount("<<lextor_data->reduce(window[word_index].get_word_string())<<L") = "<<wordcount<<L"\n";
+  }
+  //
+
+  for(int i=0; i<window[word_index].n_lexical_choices(); i++) {
+    lexchoices_count[i]=0;
+    wstring reduced_lexchoice=lextor_data->reduce_lexical_choice(window[word_index].get_lexical_choice(i,false));
+    if (debug) {
+      wcerr<<L"lexical choice: "<<window[word_index].get_lexical_choice(i)<<L" reduced: "<<reduced_lexchoice<<L"\n";
+    }
+
+    int distance=(-1)*(word_index);
+    for(int j=0; j<(int)window.size(); j++) { 
+      //For all words in the context window
+      if(j!=word_index) {
+	COUNT_DATA_TYPE vote=0;
+
+	wstring reduced_word=lextor_data->reduce(window[j].get_word_string());
+
+	if (lextor_data->get_wordcount(reduced_word)>0) {
+	  vote=lextor_data->vote_from_word(reduced_lexchoice, reduced_word)/
+	    (((lextor_data->get_lexchoice_sum(reduced_lexchoice))/sum_lexchoices)*wordcount);
+
+	  lexchoices_count[i]+=vote/pow(fabs((double)distance),weigth_exponent);
+	}
+
+	if (debug) {
+	  wcerr<<L"Count for "<<reduced_lexchoice<<L" from "<<reduced_word<<L" is "<<vote<<L"\n";
+	  wcerr<<L"Vote: "<<lextor_data->vote_from_word(reduced_lexchoice, reduced_word)<<L" word count: "
+	      <<lextor_data->get_wordcount(reduced_word)<<L"\n";
+	  wcerr<<L"["<<reduced_word<<L"] DISTANCE: "<<distance<<L", ";
+	  wcerr<<L" Count to apply: "<<vote/pow(fabs((double)distance),weigth_exponent)<<L"\n";
+	}
+      }
+      distance++;
+    }
+  }
+
+  //Now the winner is calculated
+  int winner=-1; //This will make the default one to be used if unchanged
+  COUNT_DATA_TYPE winner_vote=-100000000;
+  for(int i=0; i<window[word_index].n_lexical_choices(); i++) {
+    if ((lexchoices_count[i]>0) && (lexchoices_count[i]>winner_vote)) {
+      winner_vote=lexchoices_count[i];
+      winner=i;
+    } 
+    /*
+      else if ((lexchoices_count[i]>0) && (lexchoices_count[i]==winner_vote)) {
+      //Take the most probable one, the one with the highest sum
+      COUNT_DATA_TYPE sum_i=lextor_data->get_lexchoice_sum(lextor_data->reduce(window[word_index].get_lexical_choice(i)));
+      COUNT_DATA_TYPE sum_win=lextor_data->get_lexchoice_sum(lextor_data->reduce(window[word_index].get_lexical_choice(winner)));
+      if (sum_i>sum_win)
+      winner=i;
+      }
+    */
+  }
+  
+  if (debug) {
+    wcerr<<L"WINNER: "<<winner<<L" "<<window[word_index].get_lexical_choice(winner)<<L"\n";
+  }
+  return winner;
+}
+
+int 
+LexTor::estimate_winner_lch_mostprob(deque<LexTorWord>& window, int word_index,  double weigth_exponent) {
+  int winner=-1;
+  double greatest_sum=-1;
+  for(int i=0; i<window[word_index].n_lexical_choices(); i++) {
+    wstring reduced_lexchoice=lextor_data->reduce_lexical_choice(window[word_index].get_lexical_choice(i,false));
+    double sumlch=lextor_data->get_lexchoice_sum(reduced_lexchoice);
+
+
+    if (debug) {
+      wcerr<<L"sum("<<reduced_lexchoice<<L") = "<<sumlch<<L"\n";
+    }
+
+    if (sumlch>greatest_sum) {
+      greatest_sum=sumlch;
+      winner=i;
+    }
+  }
+
+  if (greatest_sum==0)
+    winner=-1;
+
+  if (debug) 
+    wcerr<<L"WINNER: "<<winner<<L" "<<window[word_index].get_lexical_choice(winner)<<L"\n";
+
+  return winner;
+}
+
+int 
+LexTor::estimate_winner_lch_cosine(deque<LexTorWord>& window, int word_index, double weigth_exponent) {
+  map<wstring, double> vcontext;
+
+  int distance=(-1)*(word_index);
+  for(int i=0; i<(int)window.size(); i++) {
+    if (i!=word_index) {
+      wstring reduced_word=lextor_data->reduce(window[i].get_word_string());
+      vcontext[reduced_word]+=1.0/pow(fabs((double)distance),weigth_exponent);
+    }
+    distance++;
+  }
+
+  if (debug) {
+    wcerr<<L"CONTEXT VECTOR\n-------------------\n";
+    map<wstring, double>::iterator it;
+    for(it=vcontext.begin(); it!=vcontext.end(); it++)
+      wcerr<<it->first<<L", "<<it->second<<L"\n";
+  }
+
+  ////double max_cosine=-2;
+  double min_angle=360;
+  int winner=-1;
+  double diff_angle=-1;
+  for(int i=0; i<window[word_index].n_lexical_choices(); i++) {
+    wstring reduced_lexchoice=lextor_data->reduce_lexical_choice(window[word_index].get_lexical_choice(i,false));
+
+    double aux_cosine=cosine(vcontext, reduced_lexchoice);
+    double aux_angle=(acos(aux_cosine)*180)/PI;
+    if (debug) {
+      wcerr<<L"cos("<<lextor_data->reduce(window[word_index].get_word_string())<<L", "
+	  <<reduced_lexchoice<<L") = "<<aux_cosine<<L"; ang = "<<aux_angle<<L" grades\n";
+    }
+
+    if (aux_angle<min_angle) {
+      if (min_angle!=360) {
+	diff_angle=min_angle-aux_angle;
+      }
+      min_angle=aux_angle;
+      winner=i;
+    } else if ((min_angle!=360)&&(diff_angle==-1)) {
+      diff_angle=fabs(min_angle-aux_angle);
+    }
+
+
+    /*
+      if (aux_cosine>max_cosine) {
+      diff_angle=abs(min_angle-aux_angle);
+      winner=i;
+      max_cosine=aux_cosine;
+      min_angle=aux_angle;
+      }
+    */
+  }
+
+  if (debug) {
+    wcerr<<L"DIFF ANGLE: "<<diff_angle<<L"\n";
+  }
+  if (diff_angle<=angleth)
+    winner=-1;
+
+  if (debug) 
+    wcerr<<L"WINNER: "<<winner<<L" "<<window[word_index].get_lexical_choice(winner)<<L"\n";
+  
+  return winner;
+}
+
+int 
+LexTor::estimate_winner_lch_votingtl(deque<LexTorWord>& window, int word_index, double weigth_exponent) {
+  if (tlmodel==NULL) {
+    wcerr<<L"Error in LexTor::estimate_winner_lch_votingtl: you must call LexTor::set_tlmodel first.\n";
+    exit(EXIT_FAILURE);
+  }  
+
+  vector <double> lexchoices_count(window[word_index].n_lexical_choices());
+  vector <vector <wstring> > translation_window (window.size());
+  vector <wstring> reduced_window(window.size());
+
+  for (unsigned i=0; i<window.size(); i++) 
+    reduced_window[i]=lextor_data->reduce(window[i].get_word_string());	      
+  
+  if(debug) {
+    wcerr<<L"WINDOW: ";
+    for (unsigned i=0; i<window.size(); i++) {
+      if(i==(unsigned)word_index)
+	wcerr<<L"[>>>>"<<reduced_window[i]<<L"<<<<] ";
+      else
+	wcerr<<L"["<<reduced_window[i]<<L"] ";
+    }
+    wcerr<<L"\n";
+    wcerr<<L"TRANSLATED: ";
+  }
+
+  //We translate each word in the context
+  //Note: Words in the context can also be ambiguous (with more than one lexical choice)
+  for (unsigned i=0; i<window.size(); i++) {
+    wstring str_translations=L"";
+    for(int j=0; j<window[i].n_lexical_choices(); j++) {
+      wstring tr=tlmodel->reduce(window[i].translate(*fstpbil,j));
+      translation_window[i].push_back(tr);
+      str_translations+=tr+L"/";
+    }
+    if (debug) {
+      if (i==(unsigned)word_index)
+	wcerr<<L"[>>>>"<<str_translations<<L"<<<<] ";
+      else
+	wcerr<<L"["<<str_translations<<L"] ";
+    }
+  }
+
+  if(debug)
+    wcerr<<L"\n";
+
+  //For each lexical choice the counts from the TL are collected
+  for(unsigned i=0; i<(unsigned)window[word_index].n_lexical_choices(); i++) {
+    lexchoices_count[i]=0;
+
+    for (unsigned k=0; k<window.size(); k++) {
+      if ((k!=(unsigned)word_index)&&(reduced_window[k]!=reduced_window[word_index])) {
+	COUNT_DATA_TYPE target_vote=0;
+
+	//If the SL word is ambiguous it will have more than one
+	//translation into TL, so we need to normalize using the
+	//frequency of words in the TL
+    vector <double> translation_weighs(translation_window[k].size());
+	double sum=0.0;
+	if (translation_window[k].size()>1) {
+	  for(unsigned j=0; j<translation_window[k].size(); j++) {
+	    translation_weighs[j]=tlmodel->get_lexchoice_sum(translation_window[k][j]);
+	    sum+=translation_weighs[j];
+
+	    //!!!!! Para hacer que no tenga en cuenta las
+	    //!!!!! polisemicas del contexto
+	    ///////translation_weighs[j]=0;
+	    //!!!!!
+	    //!!!!!
+
+	    if (debug) {
+	      wcerr<<L"Frequency of translation ["<<translation_window[k][j]<<L"] ="
+		  <<translation_weighs[j]<<L"\n";
+	    }
+	  }
+	} else {
+	  translation_weighs[0]=1;
+	  sum=1;
+	}
+
+	for(unsigned j=0; j<translation_window[k].size(); j++) {
+	  translation_weighs[j]=translation_weighs[j]/sum;
+	  if (debug) {
+	    wcerr<<L"Weight of translation ["<<translation_window[k][j]<<L"] ="
+		<<translation_weighs[j]<<L"\n";
+	  }
+	}
+
+	for(unsigned j=0; j<translation_window[k].size(); j++) {
+	  double aux_vote=0;
+	  //aux_vote=tlwordmodel.vote_from_word(lexchoice_translation[*itlch], 
+	  //				    translation_buffer[i][j])*translation_weighs[j];
+	  if(debug) 
+	    wcerr<<translation_window[word_index][i]<<L" "<<translation_window[k][j]<<L" "
+		<<tlmodel->vote_from_word(translation_window[word_index][i],translation_window[k][j])<<L" "
+		<<tlmodel->get_wordcount(translation_window[k][j])<<L" "<<translation_weighs[j]<<L"\n";
+	
+	  if (tlmodel->get_wordcount(translation_window[k][j])>0) {
+	    aux_vote=(tlmodel->vote_from_word(translation_window[word_index][i],translation_window[k][j])/
+		      tlmodel->get_wordcount(translation_window[k][j]))*translation_weighs[j];
+	  } 
+	  target_vote+=aux_vote;
+
+	  if(debug) {
+	    wcerr<<L"Target vote for ["<<translation_window[word_index][i]
+		<<L"] from ["<<translation_window[k][j]<<L"] = "<<aux_vote<<L"\n";
+	  }
+	}
+
+	lexchoices_count[i]+=target_vote;
+      }
+    }
+  }
+
+
+  if(debug) {
+    for(int i=0; i<window[word_index].n_lexical_choices(); i++) 
+      wcerr<<L"lexchoicecount["<<i<<L"] = "<<lexchoices_count[i]<<L"\n";
+    //getchar();
+  }
+
+  //Now the winner is calculated
+  int winner=-1; //This will make the default one to be used if unchanged
+  COUNT_DATA_TYPE winner_vote=-100000000;
+  for(int i=0; i<window[word_index].n_lexical_choices(); i++) {
+    if ((lexchoices_count[i]>0) && (lexchoices_count[i]>winner_vote)) {
+      winner_vote=lexchoices_count[i];
+      winner=i;
+    } 
+  }
+
+  if (debug) 
+    wcerr<<L"WINNER: "<<winner<<L" "<<window[word_index].get_lexical_choice(winner)<<L"\n";
+
+  return winner;
+}
+
+double 
+LexTor::cosine(map<wstring, double>& vcontext, const wstring& reduced_lexchoice) {
+  map<wstring, double>::iterator itc;
+
+  //We calculate the scalar product between vcontext and the lexchoice vector
+  double scalar_product=0;
+  for(itc=vcontext.begin(); itc!=vcontext.end(); itc++) {
+    scalar_product+=(itc->second)*(lextor_data->vote_from_word(reduced_lexchoice, itc->first));
+  }
+
+  //We calculate the module of vcontext, ||vcontext||
+  double module_vcontext=0;
+  for(itc=vcontext.begin(); itc!=vcontext.end(); itc++) {
+    module_vcontext+=(itc->second)*(itc->second);
+  }
+  module_vcontext=sqrt(module_vcontext);
+
+  //We get the module of the lexchoice vector, ||lexchoice vector||
+  double module_lexchoice_vector=lextor_data->get_module_lexchoice_vector(reduced_lexchoice);
+
+  if (module_vcontext==0) { 
+    wcerr<<L"Error in LexTor::vectors_cosine: module_vcontext is equal to zero.\n"
+	<<L"The cosine cannot be computed\n";
+    if (debug) {
+      wcerr<<L"CONTEXT VECTOR\n-------------------\n";
+      map<wstring, double>::iterator it;
+      for(it=vcontext.begin(); it!=vcontext.end(); it++)
+	wcerr<<it->first<<L", "<<it->second<<L"\n";
+    }
+
+    return -2;
+    //exit(EXIT_FAILURE);
+  }
+
+  if (module_lexchoice_vector==0) {
+    if (debug) {
+      wcerr<<L"Warning in LexTor::vectors_cosine: module_lexchoice_vector is equal to zero.\n"
+	  <<L"The cosine cannot be computed\n";
+      wcerr<<L"reduced lexical choice: "<<reduced_lexchoice<<L"\n";
+    }
+    return -2;
+  }
+
+  return scalar_product/(module_vcontext*module_lexchoice_vector);
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium-prelatex.l
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-prelatex.l	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-prelatex.l	(revision 69632)
@@ -0,0 +1,476 @@
+
+
+%{
+
+
+
+#include <cstdlib>
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+#include <apertium/latex_accentsmap.h>
+
+extern "C" {
+#if !defined(__STDC__)
+# define __STDC__ 1
+#endif
+#include <regex.h>
+}
+
+#include <string>
+#include <lttoolbox/lt_locale.h>
+#include <lttoolbox/ltstr.h>
+#ifndef GENFORMAT
+#include "apertium_config.h"
+#endif
+#include <apertium/unlocked_cstdio.h>
+#ifdef _WIN32
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+using namespace std;
+
+AccentsMap accentsMap(false);
+wstring closesym = L"";
+string memconv = "";
+//For german babel detection
+bool ngermanbabel = false;
+
+wstring convertir(string const &multibyte, int const length)
+{
+  memconv.append(multibyte.c_str(), length);
+  int tam = memconv.size();
+  wchar_t *retval = new wchar_t[tam+1];
+  size_t l = mbstowcs(retval, memconv.c_str(), tam);
+
+  if(l == ((size_t) -1))
+  {
+    delete[] retval;
+    if(memconv.size() >= 4)
+    {
+      wcerr << L"Warning: wrong encoding" << endl;
+    }
+    return L"";
+  }
+  else
+  {
+    memconv = "";
+    retval[l] = 0;
+    wstring ret = retval;
+    delete[] retval;
+    return ret;
+  }
+}
+
+
+
+
+%}
+
+
+%option nounput
+%option noyywrap
+%option stack
+
+%x mathenv
+%x readbrackets
+
+%%
+
+
+
+
+
+\\t\{..\}	{ //This information is lost
+	fputws(convertir(yytext+3,yyleng-4).c_str(),yyout);
+}
+\\l	{
+	fputws(L"ł", yyout);
+}
+
+\"[oOaAuUsS]	{ //When usepackage[ngerman]{babel} is present (not checked).
+	if(!ngermanbabel)
+		fputws(convertir(yytext,yyleng).c_str(),yyout);
+	else {
+		switch(yytext[1]){
+			case 'o': fputws(L"ö", yyout); break;
+			case 'O': fputws(L"Ö", yyout); break;
+			case 'a': fputws(L"ä", yyout); break;
+			case 'A': fputws(L"Ä", yyout); break;
+			case 'u': fputws(L"ü", yyout); break;
+			case 'U': fputws(L"Ü", yyout); break;
+			case 's': fputws(L"ß", yyout); break;
+			case 'S': fputws(L"ß", yyout); break;
+		}
+	}
+}
+
+
+
+\\[\^\"\'`]((\{\\[ij]\})|(\\[ij]))	{
+	switch(yytext[1]){
+	case '^':
+		if(yytext[4]=='i')
+			fputws(L"î", yyout);
+		else
+			fputws(L"ĵ",yyout);
+		break;
+	case '\"':
+		if(yytext[4]=='i')
+			fputws(L"ï",yyout);
+		else
+			fputws(L"j",yyout); //should actually be j with umlaut
+		break;
+	case '\'':
+		if(yytext[4]=='i')
+			fputws(L"í",yyout);
+		else
+			fputws(L"j",yyout); //should actually be j with accent
+		break;
+	case '`':
+		if(yytext[4]=='i')
+			fputws(L"ì",yyout);
+		else
+			fputws(L"k",yyout); //should actually be j with accent
+		break;
+	}
+}
+
+\{\\oe\}	{
+	fputws(L"œ",yyout);
+}
+
+\{\\OE\}	{
+	fputws(L"Œ",yyout);
+}
+
+\{\\ae\}	{
+	fputws(L"æ",yyout);
+}
+
+\{\\AE\}	{
+	fputws(L"Æ",yyout);
+}
+
+\{\\aa\}	{
+	fputws(L"å",yyout);
+}
+
+\{\\AA\}	{
+	fputws(L"Å",yyout);
+}
+
+\{\\o\}	{
+	fputws(L"ø",yyout);
+}
+
+\{\\O\}	{
+	fputws(L"Ø",yyout);
+}
+
+\{\\ss\}	{
+	fputws(L"ß",yyout);
+}
+
+\\#[0-9]+	{
+        fputws((wstring(L"<HASH_")+convertir(yytext+2,yyleng-2)+wstring(L"/>")).c_str(),yyout);
+}
+
+\\#		{
+        fputws(L"<HASH/>", yyout);
+}
+
+\\[`'\^\"H~ck=b.druv]((\{.\})|(.))	{
+	wstring ws = convertir(yytext,yyleng).c_str();
+
+	wstring result = accentsMap.get(
+	  L""+ws.substr(1,1)+ (
+	    (yyleng==3)? ws.substr(2,1) : ws.substr(3,1)
+	  ));
+
+        if(result == L"")
+        {
+	  fputws((wstring(L"<")+convertir(yytext+1,yyleng)+wstring(L"/>")).c_str(),yyout);
+        }
+        else
+        {
+  	  fputws(result.c_str(), yyout);
+        }
+}
+
+\\\\	{
+	fputws(L"<BR/>",yyout);
+}
+
+\%.*	{
+	if(yytext[yyleng-1]=='\r')
+		fputws((wstring(L"<COMMENT>")+convertir(yytext+1,yyleng-2)+wstring(L"</COMMENT>\r")).c_str(),yyout);
+	else
+		fputws((wstring(L"<COMMENT>")+convertir(yytext+1,yyleng-1)+wstring(L"</COMMENT>")).c_str(),yyout);
+}
+
+\\usepackage\[[^\]]*\]	{
+	wstring ws = convertir(yytext+12,yyleng-13);
+	fputws((wstring(L"<usepackage/><PARAM>")+ws+wstring(L"</PARAM>")).c_str(), yyout);
+	if(ws.find(L"ngerman") != wstring::npos)
+		ngermanbabel = true;
+}
+
+\[[^\]]*\]	{
+	fputws((wstring(L"<PARAM>")+convertir(yytext+1,yyleng-2)+wstring(L"</PARAM>")).c_str(), yyout);
+}
+
+\\begin[^a-zA-Z0-9_]	{
+	BEGIN(readbrackets);
+	closesym = L"";
+}
+
+\\end[^a-zA-Z0-9_]	{
+	BEGIN(readbrackets);
+	closesym = L"/";
+}
+
+
+
+<readbrackets>[ \n\r\t]*\{?[ \n\r\t]*	{
+	wstring ws = convertir(yytext,yyleng);
+	int i = ws.find(L'{'); //remove it
+	if(i>=0)
+		ws = ws.substr(0,i)+ws.substr(i+1);
+	fputws(ws.c_str(),yyout);
+}
+
+<readbrackets>[a-zA-Z0-9]+\*	{
+	fputws((wstring(L"<")+closesym+convertir(yytext,yyleng-1)+wstring(L"_STAR>")).c_str(),yyout);
+}
+
+<readbrackets>[a-zA-Z0-9]+	{
+	fputws((wstring(L"<")+closesym+convertir(yytext,yyleng)+wstring(L">")).c_str(),yyout);
+}
+
+<readbrackets>[ \n\r\t]*\}[ \n\r\t]*	{
+	BEGIN(0);
+	wstring ws = convertir(yytext,yyleng);
+	int i = ws.find(L'}'); //remove it
+	if(i>=0)
+		ws = ws.substr(0,i)+ws.substr(i+1);
+	fputws(ws.c_str(),yyout);
+}
+
+
+\\[A-Za-z]+\*	{
+	fputws((wstring(L"<")+convertir(yytext+1,yyleng-2)+wstring(L"_STAR/>")).c_str(),yyout);
+}
+
+\\[A-Za-z]+	{
+	fputws((wstring(L"<")+convertir(yytext+1,yyleng)+wstring(L"/>")).c_str(),yyout);
+}
+
+\\\{	{
+        fputws(L"<LEFTESCAPEDBRACE/>", yyout);
+        }
+
+\\\{	{
+        fputws(L"<RIGHTESCAPEDBRACE/>", yyout);
+        }
+
+\\\%	{
+        fputws(L"<ESCAPEDPERCENT/>", yyout);
+        }
+
+\{	{
+	fputws(L"<CONTENTS>",yyout);
+}
+
+\}	{
+	fputws((wstring(L"</CONTENTS>")).c_str(),yyout);
+}
+
+~	{
+	fputws(L"&NBSP;",yyout);
+}
+
+\$\$	{
+	BEGIN(mathenv);
+	fputws(L"<MATH_DOLLARS>",yyout);
+}
+
+<mathenv>\$\$	{
+	fputws(L"</MATH_DOLLARS>",yyout);
+	BEGIN(0);
+}
+
+\$	{
+	BEGIN(mathenv);
+	fputws(L"<MATH_DOLLAR>",yyout);
+}
+
+<mathenv>\$	{
+	fputws(L"</MATH_DOLLAR>",yyout);
+	BEGIN(0);
+}
+
+\\verb[|][^|]+[|]	{
+        fputws(L"<VERB>",yyout);
+        wstring ws = convertir(yytext, yyleng);
+        fputws(ws.substr(5, ws.size()-5).c_str(), yyout);
+        fputws(L"</VERB>", yyout);
+}
+
+\\verb[!][^!]+[!]	{
+        fputws(L"<VERB>",yyout);
+        wstring ws = convertir(yytext, yyleng);
+        fputws(ws.substr(5, ws.size()-5).c_str(), yyout);
+        fputws(L"</VERB>", yyout);
+}
+
+\\verb[?][^?]+[?]	{
+        fputws(L"<VERB>",yyout);
+        wstring ws = convertir(yytext, yyleng);
+        fputws(ws.substr(5, ws.size()-5).c_str(), yyout);
+        fputws(L"</VERB>", yyout);
+}
+
+\\verb[/][^/]+[/]	{
+        fputws(L"<VERB>",yyout);
+        wstring ws = convertir(yytext, yyleng);
+        fputws(ws.substr(5, ws.size()-5).c_str(), yyout);
+        fputws(L"</VERB>", yyout);
+}
+
+\\verb[#][^#]+[#]	{
+        fputws(L"<VERB>",yyout);
+        wstring ws = convertir(yytext, yyleng);
+        fputws(ws.substr(5, ws.size()-5).c_str(), yyout);
+        fputws(L"</VERB>", yyout);
+}
+
+\\verb[+][^+]+[+]	{
+        fputws(L"<VERB>",yyout);
+        wstring ws = convertir(yytext, yyleng);
+        fputws(ws.substr(5, ws.size()-5).c_str(), yyout);
+        fputws(L"</VERB>", yyout);
+}
+
+\\\(	{
+	fputws(L"<MATH_PAR>",yyout);
+}
+
+\\\)	{
+	fputws(L"</MATH_PAR>",yyout);
+}
+
+\\\[	{
+	fputws(L"<MATH_BRA>",yyout);
+}
+
+\\\]	{
+	fputws(L"</MATH_BRA>",yyout);
+}
+
+\?`	{
+	fputws(L"¿",yyout);
+}
+
+!`	{
+	fputws(L"¡",yyout);
+}
+
+\"	{
+	fputws(L"&quot;",yyout);
+}
+\'	{
+	fputws(L"&apos;",yyout);
+}
+\<	{
+	fputws(L"&lt;",yyout);
+}
+\>	{
+	fputws(L"&gt;",yyout);
+}
+\\\&	{
+	fputws(L"&amp;",yyout);
+}
+\&	{
+	fputws(L"<AMP/>",yyout);
+}
+
+
+
+
+
+(.|\n|\r)	{
+	fputws(convertir(yytext,yyleng).c_str(),yyout);
+}
+
+<mathenv>(.|\n)	{
+	fputws(convertir(yytext,yyleng).c_str(),yyout);
+}
+
+
+<<EOF>>	{
+	return 0;
+}
+%%
+
+
+
+void usage(string const &progname)
+{
+
+  cerr << "USAGE: " << progname << " [input_file [output_file]" << ']' << endl;
+
+  cerr << "LaTeX format preprocessor " << endl;
+  exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+  LtLocale::tryToSetLocale();
+  size_t base = 0;
+
+  if(argc >= 2 && !strcmp(argv[1],"-i"))
+  {
+    base++;
+  }
+
+ if((argc-base) > 4)
+  {
+    usage(argv[0]);
+  }
+
+  switch(argc-base)
+  {
+    case 3:
+      yyout = fopen(argv[2+base], "w");
+      if(!yyout)
+      {
+        usage(argv[0]);
+      }
+    case 2:
+      yyin = fopen(argv[1+base], "r");
+      if(!yyin)
+      {
+        usage(argv[0]);
+      }
+      break;
+    default:
+      break;
+  }
+
+#ifdef _WIN32
+  _setmode(_fileno(yyin), _O_U8TEXT);
+  _setmode(_fileno(yyout), _O_U8TEXT);
+#endif
+  // prevent warning message
+  yy_push_state(1);
+  yy_top_state();
+  yy_pop_state();
+
+  yylex();
+
+  fclose(yyin);
+  fclose(yyout);
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium_re.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_re.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_re.cc	(revision 69632)
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/apertium_re.h>
+#include <lttoolbox/compression.h>
+#include <iostream>
+#include <cstdlib>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+using namespace std;
+
+ApertiumRE::ApertiumRE() :
+re(0)
+{
+  empty = true;
+}
+
+ApertiumRE::~ApertiumRE()
+{
+  if(!empty)
+  {
+    pcre_free(re);
+  }
+  empty = true;
+}
+
+void
+ApertiumRE::read(FILE *input)
+{
+  unsigned int size = Compression::multibyte_read(input);
+  re = static_cast<pcre *>(pcre_malloc(size));
+  if(size != fread(re, 1, size, input))
+  {
+    wcerr << L"Error reading regexp" << endl;
+    exit(EXIT_FAILURE);
+  }
+  
+  empty = false;
+}
+
+void
+ApertiumRE::compile(string const &str)
+{
+  const char *error;
+  int erroroffset;
+  re = pcre_compile(str.c_str(), PCRE_DOTALL|PCRE_CASELESS|PCRE_EXTENDED|PCRE_UTF8,
+	            &error, &erroroffset, NULL);
+  if(re == NULL)
+  {
+    wcerr << L"Error: pcre_compile ";
+    cerr << error << endl;
+    exit(EXIT_FAILURE);
+  }
+  
+  empty = false;
+}
+
+void 
+ApertiumRE::write(FILE *output) const
+{
+  if(empty)
+  {
+    cerr << L"Error, cannot write empty regexp" << endl;
+    exit(EXIT_FAILURE);
+  }
+  
+  size_t size;
+  int rc = pcre_fullinfo(re, NULL, PCRE_INFO_SIZE, &size);
+  if(rc < 0)
+  {
+    wcerr << L"Error calling pcre_fullinfo()\n" << endl;
+    exit(EXIT_FAILURE);
+  }
+  
+  Compression::multibyte_write(size, output);
+  
+  size_t rc2 = fwrite(re, 1, size, output);
+  if(rc2 != size)
+  {
+    wcerr << L"Error writing precompiled regex\n" << endl;
+    exit(EXIT_FAILURE);
+  }                
+}
+
+string
+ApertiumRE::match(string const &str) const
+{
+  if(empty)
+  {
+    return "";
+  }
+  
+  int result[3];
+  int workspace[4096];
+//  int rc = pcre_exec(re, NULL, str.c_str(), str.size(), 0, PCRE_NO_UTF8_CHECK, result, 3);
+  int rc = pcre_dfa_exec(re, NULL, str.c_str(), str.size(), 0, PCRE_NO_UTF8_CHECK, result, 3, workspace, 4096);
+
+  if(rc < 0)
+  {
+    switch(rc)
+    {
+      case PCRE_ERROR_NOMATCH:
+	return "";
+
+      default:
+	wcerr << L"Error: Unknown error matching regexp (code " << rc << L")" << endl;
+	exit(EXIT_FAILURE);
+    }
+  }
+  
+  return str.substr(result[0], result[1]-result[0]);
+}
+
+void
+ApertiumRE::replace(string &str, string const &value) const
+{
+  if(empty)
+  {
+    return;
+  }
+  
+  int result[3];
+  int workspace[4096];
+  // int rc = pcre_exec(re, NULL, str.c_str(), str.size(), 0, PCRE_NO_UTF8_CHECK, result, 3);
+  int rc = pcre_dfa_exec(re, NULL, str.c_str(), str.size(), 0, PCRE_NO_UTF8_CHECK, result, 3, workspace, 4096);
+  if(rc < 0)
+  {
+    switch(rc)
+    {
+      case PCRE_ERROR_NOMATCH:
+	return;
+      
+      default:
+	wcerr << L"Error: Unknown error matching regexp (code " << rc << L")" << endl;
+	exit(EXIT_FAILURE);
+    }
+  }
+
+  string res = str.substr(0, result[0]);
+  res.append(value);
+  res.append(str.substr(result[1]));
+  str = res;
+}
Index: branches/apertium-tagger/apertium2/apertium/interchunk.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/interchunk.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/interchunk.cc	(revision 69632)
@@ -0,0 +1,1603 @@
+/*
+ * Copyright (C) 2005--2015 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/interchunk.h>
+#include <apertium/trx_reader.h>
+#include <apertium/utf_converter.h>
+#include <lttoolbox/compression.h>
+#include <lttoolbox/xml_parse_util.h>
+
+#include <cctype>
+#include <cerrno>
+#include <iostream>
+#include <stack>
+#include <apertium/string_utils.h>
+#include "apertium_config.h"
+#include <apertium/unlocked_cstdio.h>
+
+using namespace Apertium;
+using namespace std;
+
+void
+Interchunk::destroy()
+{
+  delete me;
+  me = NULL;
+
+  if(doc)
+  {
+    xmlFreeDoc(doc);
+    doc = NULL;
+  }  
+}
+
+Interchunk::Interchunk() :
+word(0),
+blank(0),
+lword(0),
+lblank(0),
+output(0),
+any_char(0),
+any_tag(0),
+nwords(0)
+{
+  me = NULL;
+  doc = NULL;
+  root_element = NULL;
+  lastrule = NULL;
+  inword = false;
+  null_flush = false;
+  internal_null_flush = false;
+  trace = false;
+  emptyblank = "";
+}
+
+Interchunk::~Interchunk()
+{
+  destroy();
+}
+
+void 
+Interchunk::readData(FILE *in)
+{
+  alphabet.read(in);
+  any_char = alphabet(TRXReader::ANY_CHAR);
+  any_tag = alphabet(TRXReader::ANY_TAG);
+
+  Transducer t;
+  t.read(in, alphabet.size());
+  
+  map<int, int> finals;  
+  
+  // finals
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    int key = Compression::multibyte_read(in);
+    finals[key] = Compression::multibyte_read(in);
+  }  
+  
+  me = new MatchExe(t, finals);
+ 
+  // attr_items
+  bool recompile_attrs = Compression::string_read(in) != string(pcre_version());
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    attr_items[cad_k].read(in);
+    wstring fallback = Compression::wstring_read(in);
+    if(recompile_attrs) {
+      attr_items[cad_k].compile(UtfConverter::toUtf8(fallback));
+    }
+  }
+
+  // variables
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    variables[cad_k] = UtfConverter::toUtf8(Compression::wstring_read(in));
+  }
+
+  // macros
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    macros[cad_k] = Compression::multibyte_read(in);
+  }
+
+  // lists
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+
+    for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
+    {
+      wstring const cad_v = Compression::wstring_read(in);
+      lists[cad_k].insert(UtfConverter::toUtf8(cad_v));
+      listslow[cad_k].insert(UtfConverter::toUtf8(StringUtils::tolower(cad_v)));
+    }  
+  }
+}
+
+void
+Interchunk::read(string const &transferfile, string const &datafile)
+{
+  readInterchunk(transferfile);
+  
+  // datafile
+  FILE *in = fopen(datafile.c_str(), "rb");
+  if(!in)
+  {
+    cerr << "Error: Could not open file '" << datafile << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  readData(in);
+  fclose(in);
+
+}
+
+void
+Interchunk::readInterchunk(string const &in)
+{
+  doc = xmlReadFile(in.c_str(), NULL, 0);
+  
+  if(doc == NULL)
+  {
+    cerr << "Error: Could not parse file '" << in << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  
+  root_element = xmlDocGetRootElement(doc);
+  
+  // search for macros & rules
+  for(xmlNode *i = root_element->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "section-def-macros"))
+      {
+        collectMacros(i);
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "section-rules"))
+      {
+        collectRules(i);
+      }
+    } 
+  }
+}
+
+void
+Interchunk::collectRules(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      for(xmlNode *j = i->children; ; j = j->next)
+      {
+        if(j->type == XML_ELEMENT_NODE && !xmlStrcmp(j->name, (const xmlChar *) "action"))
+        {
+          rule_map.push_back(j);
+          break;
+        }
+      }
+    }
+  }
+}
+
+void
+Interchunk::collectMacros(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      macro_map.push_back(i);
+    }
+  }
+}
+
+bool
+Interchunk::checkIndex(xmlNode *element, int index, int limit)
+{
+  if(index >= limit)
+  {
+    wcerr << L"Error in " << UtfConverter::fromUtf8((char *) doc->URL) <<L": line " << element->line << endl;
+    return false;
+  }
+  return true;
+}
+
+
+string 
+Interchunk::evalString(xmlNode *element)
+{
+  if (element == 0)
+  {
+    throw "Interchunk::evalString() was passed a NULL element";
+  }
+
+  map<xmlNode *, TransferInstr>::iterator it;
+  it = evalStringCache.find(element);
+  if(it != evalStringCache.end())
+  {
+    TransferInstr &ti = it->second;
+    switch(ti.getType())
+    {
+      case ti_clip_tl:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          if(ti.getContent() == "content") // jacob's new 'part'
+          { 
+            string wf = word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]);
+            return wf.substr(1, wf.length()-2); // trim away the { and }  
+          }
+          else
+          {
+            return word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]);
+          }
+        }
+        break;
+        
+      case ti_var:
+        return variables[ti.getContent()];
+        
+      case ti_lit_tag:
+      case ti_lit:
+        return ti.getContent();
+        
+      case ti_b:
+        if(checkIndex(element, ti.getPos(), lblank))
+        {
+          if(ti.getPos() >= 0)
+          {
+            return !blank?"":*(blank[ti.getPos()]);
+          }
+          return " ";
+        }
+        break;
+            
+      case ti_get_case_from:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          return copycase(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]),
+                          evalString((xmlNode *) ti.getPointer()));
+        }
+        break;
+      
+      case ti_case_of_tl:
+        if(checkIndex(element, ti.getPos(), lword))
+        {
+          return caseOf(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]));
+        }
+        break;
+      
+      default:
+        return "";
+    }
+    return "";
+  }
+
+  if(!xmlStrcmp(element->name, (const xmlChar *) "clip"))
+  {
+    int pos = 0;
+    xmlChar *part = NULL;
+
+    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+	part = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *)i->children->content) - 1;
+      }
+    }
+
+    evalStringCache[element] = TransferInstr(ti_clip_tl, (const char *) part, pos, NULL);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit-tag"))
+  {
+    evalStringCache[element] = TransferInstr(ti_lit_tag, 
+                                             tags((const char *) element->properties->children->content), 0);                                            
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "lit"))
+  {
+    evalStringCache[element] = TransferInstr(ti_lit, ((const char *) element->properties->children->content), 0);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "b"))
+  {
+    if(element->properties == NULL)
+    {
+      evalStringCache[element] = TransferInstr(ti_b, " ", -1);
+    }
+    else
+    {
+      int pos = atoi((const char *) element->properties->children->content) - 1;
+      evalStringCache[element] = TransferInstr(ti_b, "", pos);
+    }
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "get-case-from"))
+  {
+    int pos = atoi((const char *) element->properties->children->content) - 1;
+    xmlNode *param = NULL;
+    for(xmlNode *i = element->children; i != NULL; i = i->next)
+    {
+      if(i->type == XML_ELEMENT_NODE)
+      {
+	param = i;
+	break;
+      }
+    }
+
+    evalStringCache[element] = TransferInstr(ti_get_case_from, "lem", pos, param);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "var"))
+  {
+    evalStringCache[element] = TransferInstr(ti_var, (const char *) element->properties->children->content, 0);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "case-of"))
+  {
+    int pos = 0;
+    xmlChar *part = NULL;
+
+    for(xmlAttr *i = element->properties; i != NULL; i = i->next)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+	part = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *) i->children->content) - 1;
+      }
+    }
+      
+    evalStringCache[element] = TransferInstr(ti_case_of_tl, (const char *) part, pos);
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "concat"))
+  { 
+    string value;
+    for(xmlNode *i = element->children; i != NULL; i = i->next)
+    {
+      if(i->type == XML_ELEMENT_NODE)
+      {
+        value.append(evalString(i));
+      }
+    }
+    return value;
+  }
+  else if(!xmlStrcmp(element->name, (const xmlChar *) "chunk"))
+  {
+    return processChunk(element);
+  }  
+  else
+  {
+    cerr << "Error: unexpected rvalue expression '" << element->name << "'" << endl;
+    exit(EXIT_FAILURE);
+  }
+
+  return evalString(element);
+}
+
+void
+Interchunk::processOut(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "chunk"))
+      {
+        fputws_unlocked(UtfConverter::fromUtf8(processChunk(i)).c_str(), output);
+      }
+      else // 'b'
+      {
+        fputws_unlocked(UtfConverter::fromUtf8(evalString(i)).c_str(), output);
+      }
+    }
+  }
+}
+
+string
+Interchunk::processChunk(xmlNode *localroot)
+{
+  string result;
+  result.append("^");
+  
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      result.append(evalString(i));
+    }      
+  }
+  
+  result.append("$");
+  return result;
+}
+
+void
+Interchunk::processInstruction(xmlNode *localroot)
+{
+  if(!xmlStrcmp(localroot->name, (const xmlChar *) "choose"))
+  {
+    processChoose(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "let"))
+  {
+    processLet(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "append"))
+  {
+    processAppend(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "out"))
+  {
+    processOut(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "call-macro"))
+  {
+    processCallMacro(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "modify-case"))
+  {
+    processModifyCase(localroot);
+  }
+}
+
+void
+Interchunk::processLet(xmlNode *localroot)
+{
+  xmlNode *leftSide = NULL, *rightSide = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(leftSide == NULL)
+      {
+	leftSide = i;
+      }
+      else
+      {
+	rightSide = i;
+	break;
+      }
+    }
+  }
+
+  map<xmlNode *, TransferInstr>::iterator it = evalStringCache.find(leftSide);
+  if(it != evalStringCache.end())
+  {
+    TransferInstr &ti = it->second;
+    switch(ti.getType())
+    {
+      case ti_var:
+        variables[ti.getContent()] = evalString(rightSide);
+        return;
+        
+      case ti_clip_tl:
+        word[ti.getPos()]->setChunkPart(attr_items[ti.getContent()], evalString(rightSide));
+        return;      
+        
+      default:
+        return;
+    }
+  }
+  if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
+  {
+    string const val = (const char *) leftSide->properties->children->content;
+    variables[val] = evalString(rightSide);
+    evalStringCache[leftSide] = TransferInstr(ti_var, val, 0);
+  }
+  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
+  {
+    int pos = 0;
+    xmlChar *part = NULL;
+
+    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+	part = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *) i->children->content) - 1;
+      }
+    }
+    
+
+    word[pos]->setChunkPart(attr_items[(const char *) part], 
+			    evalString(rightSide));
+    evalStringCache[leftSide] = TransferInstr(ti_clip_tl, 
+					      (const char *) part, 
+					      pos, NULL);
+  }
+}
+
+void
+Interchunk::processAppend(xmlNode *localroot)
+{
+  string name;
+  for(xmlAttr *i = localroot->properties; i != NULL; i = i->next)
+  {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "n"))
+    {
+      name = (char *) i->children->content; 
+      break;
+    }
+  }
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      variables[name].append(evalString(i));
+    }
+  }
+}
+
+void
+Interchunk::processModifyCase(xmlNode *localroot)
+{
+  xmlNode *leftSide = NULL, *rightSide = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(leftSide == NULL)
+      {
+	leftSide = i;
+      }
+      else
+      {
+	rightSide = i;
+	break;
+      }
+    }
+  }
+
+  if(leftSide->name != NULL && !xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
+  {
+    int pos = 0;
+    xmlChar *part = NULL;
+
+    for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
+      {
+	part = i->children->content;
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
+      {
+	pos = atoi((const char *) i->children->content) - 1;
+      }
+    }
+
+    string const result = copycase(evalString(rightSide), 
+				   word[pos]->chunkPart(attr_items[(const char *) part]));
+    word[pos]->setChunkPart(attr_items[(const char *) part], result);
+  }
+  else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
+  {
+    string const val = (const char *) leftSide->properties->children->content;
+    variables[val] = copycase(evalString(rightSide), variables[val]);
+  }
+}
+
+void
+Interchunk::processCallMacro(xmlNode *localroot)
+{
+  const char *n = (const char *) localroot->properties->children->content;
+  int npar = 0;
+
+  xmlNode *macro = macro_map[macros[n]];
+
+  for(xmlAttr *i = macro->properties; i != NULL; i = i->next)
+  {
+    if(!xmlStrcmp(i->name, (const xmlChar *) "npar"))
+    {
+      npar = atoi((const char *) i->children->content);
+      break;
+    }
+  }
+
+  // ToDo: Is it at all valid if npar <= 0 ?
+
+  InterchunkWord **myword = NULL;
+  if(npar > 0)
+  {
+    myword = new InterchunkWord *[npar];
+  }
+  string **myblank = NULL;
+  if(npar > 0)
+  {
+    myblank = new string *[npar];
+    myblank[npar-1] = &emptyblank;
+  }
+
+  int idx = 0;
+  int lastpos = 0;
+  for(xmlNode *i = localroot->children; npar && i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      int pos = atoi((const char *) i->properties->children->content)-1;
+      myword[idx] = word[pos];
+      if(idx-1 >= 0)
+      {
+        myblank[idx-1] = blank[lastpos];
+      }
+      idx++;
+      lastpos = pos;
+    }
+  }
+
+  swap(myword, word);
+  swap(myblank, blank);
+  swap(npar, lword);
+
+  for(xmlNode *i = macro->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      processInstruction(i);
+    }
+  }
+
+  swap(myword, word);
+  swap(myblank, blank);
+  swap(npar, lword);
+
+  delete[] myword;
+  delete[] myblank;
+}
+
+void
+Interchunk::processChoose(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(!xmlStrcmp(i->name, (const xmlChar *) "when"))
+      {
+        bool picked_option = false;
+        
+	for(xmlNode *j = i->children; j != NULL; j = j->next)
+	{
+	  if(j->type == XML_ELEMENT_NODE)
+	  {
+	    if(!xmlStrcmp(j->name, (const xmlChar *) "test"))
+	    {
+	      if(!processTest(j))
+	      {
+		break;
+	      }
+	      else
+	      {
+	        picked_option = true;
+              }
+	    }
+	    else
+	    {
+	      processInstruction(j);
+	    }
+	  }
+	}
+        if(picked_option)
+        {
+          return;
+        }	
+      }
+      else if(!xmlStrcmp(i->name, (const xmlChar *) "otherwise"))
+      {
+	for(xmlNode *j = i->children; j != NULL; j = j->next)
+	{
+	  if(j->type == XML_ELEMENT_NODE)
+	  {
+	    processInstruction(j);
+	  }
+	}
+      }
+    }
+  }
+}
+
+bool
+Interchunk::processLogical(xmlNode *localroot)
+{
+  if(!xmlStrcmp(localroot->name, (const xmlChar *) "equal"))
+  {
+    return processEqual(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with"))
+  {
+    return processBeginsWith(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "begins-with-list"))
+  {
+    return processBeginsWithList(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with"))
+  {
+    return processEndsWith(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "ends-with-list"))
+  {
+    return processEndsWithList(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "contains-substring"))
+  {
+    return processContainsSubstring(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "or"))
+  {
+    return processOr(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "and"))
+  {
+    return processAnd(localroot);
+  }
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "not"))
+  {
+    return processNot(localroot);
+  } 
+  else if(!xmlStrcmp(localroot->name, (const xmlChar *) "in"))
+  {
+    return processIn(localroot);
+  }
+
+  return false;
+}
+
+bool
+Interchunk::processIn(xmlNode *localroot)
+{
+  xmlNode *value = NULL;
+  xmlChar *idlist = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    { 
+      if(value == NULL)
+      {
+	value = i;
+      }
+      else
+      {
+	idlist = i->properties->children->content;
+	break;
+      }
+    }
+  }
+
+  string sval = evalString(value);
+
+  if(localroot->properties != NULL)
+  {
+    if(!xmlStrcmp(localroot->properties->children->content, 
+		  (const xmlChar *) "yes"))
+    {
+      set<string, Ltstr> &myset = listslow[(const char *) idlist];
+      if(myset.find(tolower(sval)) != myset.end())
+      {
+	return true;
+      }
+      else
+      {
+	return false;
+      }
+    }
+  }
+
+  set<string, Ltstr> &myset = lists[(const char *) idlist];
+  if(myset.find(sval) != myset.end())
+  {
+    return true;
+  }
+  else
+  {
+    return false;
+  }
+}
+
+bool
+Interchunk::processTest(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      return processLogical(i);
+    }
+  }  
+  return false;
+}
+
+bool
+Interchunk::processAnd(xmlNode *localroot)
+{
+  bool val = true;
+  for(xmlNode *i = localroot->children; val && i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      val = val && processLogical(i);
+    }
+  }
+
+  return val;
+}
+
+bool
+Interchunk::processOr(xmlNode *localroot)
+{
+  bool val = false;
+  for(xmlNode *i = localroot->children; !val && i != NULL ; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      val = val || processLogical(i);
+    }
+  }
+
+  return val;
+}
+
+bool
+Interchunk::processNot(xmlNode *localroot)
+{
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      return !processLogical(i);
+    }
+  }
+  return false;
+}
+
+bool
+Interchunk::processEqual(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  if(localroot->properties == NULL)
+  {
+    return evalString(first) == evalString(second);
+  }
+  else
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      return tolower(evalString(first)) == tolower(evalString(second));
+    }
+    else
+    {
+      return evalString(first) == evalString(second);
+    }
+  }
+}
+
+bool
+Interchunk::beginsWith(string const &s1, string const &s2) const
+{
+  int const limit = s2.size(), constraint = s1.size();
+  
+  if(constraint < limit)
+  {
+    return false;
+  }
+  for(int i = 0; i != limit; i++)
+  {
+    if(s1[i] != s2[i])
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool
+Interchunk::endsWith(string const &s1, string const &s2) const
+{
+  int const limit = s2.size(), constraint = s1.size();
+  
+  if(constraint < limit)
+  {
+    return false;
+  }
+  for(int i = limit-1, j = constraint - 1; i >= 0; i--, j--)
+  {
+    if(s1[j] != s2[i])
+    {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+
+bool
+Interchunk::processBeginsWith(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  if(localroot->properties == NULL)
+  {
+    return beginsWith(evalString(first), evalString(second));
+  }
+  else
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      return beginsWith(tolower(evalString(first)), tolower(evalString(second)));
+    }
+    else
+    {
+      return beginsWith(evalString(first), evalString(second));
+    }
+  }
+}
+
+bool
+Interchunk::processEndsWith(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  if(localroot->properties == NULL)
+  {
+    return endsWith(evalString(first), evalString(second));
+  }
+  else
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      return endsWith(tolower(evalString(first)), tolower(evalString(second)));
+    }
+    else
+    {
+      return endsWith(evalString(first), evalString(second));
+    }
+  }
+}
+
+bool
+Interchunk::processBeginsWithList(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  xmlChar *idlist = second->properties->children->content;
+  string needle = evalString(first);
+  set<string, Ltstr>::iterator it, limit;
+
+  if(localroot->properties == NULL || 
+     xmlStrcmp(localroot->properties->children->content, (const xmlChar *) "yes"))
+  {
+    it = lists[(const char *) idlist].begin();
+    limit = lists[(const char *) idlist].end();
+  }
+  else
+  {
+    needle = tolower(needle);
+    it = listslow[(const char *) idlist].begin();
+    limit = listslow[(const char *) idlist].end();
+  }
+  
+  for(; it != limit; it++)
+  {
+    if(beginsWith(needle, *it))
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool
+Interchunk::processEndsWithList(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  xmlChar *idlist = second->properties->children->content;
+  string needle = evalString(first);
+  set<string, Ltstr>::iterator it, limit;
+
+  if(localroot->properties == NULL || 
+     xmlStrcmp(localroot->properties->children->content, (const xmlChar *) "yes"))
+  {
+    it = lists[(const char *) idlist].begin();
+    limit = lists[(const char *) idlist].end();
+  }
+  else
+  {
+    needle = tolower(needle);
+    it = listslow[(const char *) idlist].begin();
+    limit = listslow[(const char *) idlist].end();
+  }
+  
+  for(; it != limit; it++)
+  {
+    if(endsWith(needle, *it))
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool
+Interchunk::processContainsSubstring(xmlNode *localroot)
+{
+  xmlNode *first = NULL, *second = NULL;
+
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      if(first == NULL)
+      {
+        first = i;
+      }
+      else
+      {
+	second = i;
+	break;
+      }
+    }
+  }
+
+  if(localroot->properties == NULL)
+  {
+    return evalString(first).find(evalString(second)) != string::npos;
+  }
+  else
+  {
+    if(!xmlStrcmp(localroot->properties->children->content,
+		  (const xmlChar *) "yes"))
+    {
+      return tolower(evalString(first)).find(tolower(evalString(second))) != string::npos;
+    }
+    else
+    {
+      return evalString(first).find(evalString(second)) != string::npos;
+    }
+  }
+}
+
+string
+Interchunk::copycase(string const &source_word, string const &target_word)
+{
+  wstring result;
+  wstring const s_word = UtfConverter::fromUtf8(source_word);
+  wstring const t_word = UtfConverter::fromUtf8(target_word);
+
+  bool firstupper = iswupper(s_word[0]);
+  bool uppercase = firstupper && iswupper(s_word[s_word.size()-1]);
+  bool sizeone = s_word.size() == 1;
+
+  if(!uppercase || (sizeone && uppercase))
+  {
+    result = StringUtils::tolower(t_word);
+  }
+  else
+  {
+    result = StringUtils::toupper(t_word);
+  }
+  
+  if(firstupper)
+  {
+    result[0] = towupper(result[0]);
+  }
+   
+  return UtfConverter::toUtf8(result);
+}
+
+string 
+Interchunk::caseOf(string const &str)
+{
+  wstring const s = UtfConverter::fromUtf8(str);
+
+  if(s.size() > 1)
+  {
+    if(!iswupper(s[0]))
+    {
+      return "aa";
+    }
+    else if(!iswupper(s[s.size()-1]))
+    {
+      return "Aa";
+    }
+    else
+    {
+      return "AA";
+    }
+  }
+  else if(s.size() == 1)
+  {
+    if(!iswupper(s[0]))
+    {
+      return "aa";
+    }
+    else
+    {
+      return "Aa";
+    }
+  }
+  else
+  {
+    return "aa";
+  }
+}
+
+string
+Interchunk::tolower(string const &str) const
+{
+  return UtfConverter::toUtf8(StringUtils::tolower(UtfConverter::fromUtf8(str)));
+}
+
+string
+Interchunk::tags(string const &str) const
+{
+  string result = "<";
+
+  for(unsigned int i = 0, limit = str.size(); i != limit; i++)
+  {
+    if(str[i] == '.')
+    {
+      result.append("><");
+    }
+    else
+    {
+      result += str[i];
+    }
+  }
+  
+  result += '>';
+
+  return result;
+}
+
+void
+Interchunk::processRule(xmlNode *localroot)
+{
+  // localroot is suposed to be an 'action' tag
+  for(xmlNode *i = localroot->children; i != NULL; i = i->next)
+  {
+    if(i->type == XML_ELEMENT_NODE)
+    {
+      processInstruction(i);
+    }
+  }
+}
+
+TransferToken &
+Interchunk::readToken(FILE *in)
+{
+  if(!input_buffer.isEmpty())
+  {
+    return input_buffer.next();
+  }
+
+  wstring content;
+  while(true)
+  {
+    int val = fgetwc_unlocked(in);
+    if(feof(in) || (internal_null_flush && val == 0))
+    {
+      return input_buffer.add(TransferToken(content, tt_eof));
+    }
+    if(val == L'\\')
+    {  
+      content += L'\\';
+      content += wchar_t(fgetwc_unlocked(in));
+    }
+    else if(val == L'[')
+    {
+      content += L'[';
+      while(true)
+      {
+	int val2 = fgetwc_unlocked(in);
+	if(val2 == L'\\')
+	{
+	  content += L'\\';
+	  content += wchar_t(fgetwc_unlocked(in));
+	}
+	else if(val2 == L']')
+	{
+	  content += L']';
+	  break;
+	}
+	else
+	{
+	  content += wchar_t(val2);
+	}
+      }
+    }
+    else if(inword && val == L'{')
+    {
+      content += L'{';
+      while(true)
+      {
+	int val2 = fgetwc_unlocked(in);
+	if(val2 == L'\\')
+	{
+	  content += L'\\';
+	  content += wchar_t(fgetwc_unlocked(in));
+	}
+	else if(val2 == L'}')
+	{
+	  wint_t val3 = wchar_t(fgetwc_unlocked(in));
+	  ungetwc(val3, in);
+	  
+	  content += L'}';
+	  if(val3 == L'$')
+	  {
+	    break;  
+	  }
+	}
+	else
+	{
+	  content += wchar_t(val2);
+	}
+      }
+    }
+    else if(inword && val == L'$')
+    {
+      inword = false;
+      return input_buffer.add(TransferToken(content, tt_word));
+    }
+    else if(val == L'^')
+    {
+      inword = true;
+      return input_buffer.add(TransferToken(content, tt_blank));
+    }
+    else
+    {
+      content += wchar_t(val);
+    }
+  }
+}
+
+bool
+Interchunk::getNullFlush(void)
+{
+  return null_flush;
+}
+
+void
+Interchunk::setNullFlush(bool null_flush)
+{
+  this->null_flush = null_flush;
+}
+
+void
+Interchunk::setTrace(bool trace)
+{
+  this->trace = trace;
+}
+
+void
+Interchunk::interchunk_wrapper_null_flush(FILE *in, FILE *out)
+{
+  null_flush = false;
+  internal_null_flush = true;
+  
+  while(!feof(in))
+  {
+    interchunk(in, out);
+    fputwc_unlocked(L'\0', out);
+    int code = fflush(out);
+    if(code != 0)
+    {
+      wcerr << L"Could not flush output " << errno << endl;
+    }
+  }
+  internal_null_flush = false;
+  null_flush = true;
+}    
+
+
+void
+Interchunk::interchunk(FILE *in, FILE *out)
+{
+  if(getNullFlush())
+  {
+    interchunk_wrapper_null_flush(in, out);
+  }
+  
+  int last = 0;
+
+  output = out;
+  ms.init(me->getInitial());
+  
+  while(true)
+  {
+    if(ms.size() == 0)
+    {
+      if(lastrule != NULL)
+      {
+	applyRule();
+	input_buffer.setPos(last);
+      }
+      else
+      {
+	if(tmpword.size() != 0)
+	{
+          fputwc_unlocked(L'^', output);
+          fputws_unlocked(tmpword[0]->c_str(), output);
+          fputwc_unlocked(L'$', output);
+	  tmpword.clear();
+	  input_buffer.setPos(last);
+	  input_buffer.next();       
+	  last = input_buffer.getPos();
+	  ms.init(me->getInitial());
+	}
+	else if(tmpblank.size() != 0)
+	{
+	  fputws_unlocked(tmpblank[0]->c_str(), output);
+	  tmpblank.clear();
+	  last = input_buffer.getPos();
+	  ms.init(me->getInitial());
+	}
+      }
+    }
+    int val = ms.classifyFinals(me->getFinals());
+    if(val != -1)
+    {
+      lastrule = rule_map[val-1];      
+      last = input_buffer.getPos();
+
+      if(trace)
+      {
+        wcerr << endl << L"apertium-interchunk: Rule " << val << L" ";
+        for (unsigned int ind = 0; ind < tmpword.size(); ind++)
+        {
+          if (ind != 0)
+          {
+            wcerr << L" ";
+          }
+          wcerr << *tmpword[ind];
+        }
+        wcerr << endl;
+      }
+    }
+
+    TransferToken &current = readToken(in);
+   
+    switch(current.getType())
+    {
+      case tt_word:
+	applyWord(current.getContent());
+        tmpword.push_back(&current.getContent());
+	break;
+
+      case tt_blank:
+	ms.step(L' ');
+	tmpblank.push_back(&current.getContent());
+	break;
+
+      case tt_eof:
+	if(tmpword.size() != 0)
+	{
+	  tmpblank.push_back(&current.getContent());
+	  ms.clear();
+	}
+	else
+	{
+	  fputws_unlocked(current.getContent().c_str(), output);
+	  tmpblank.clear();
+	  return;
+	}
+	break;
+
+      default:
+	cerr << "Error: Unknown input token." << endl;
+	return;
+    }
+  }
+}
+
+void
+Interchunk::applyRule()
+{
+  unsigned int limit = tmpword.size();
+  
+  for(unsigned int i = 0; i != limit; i++)
+  {
+    if(i == 0)
+    {
+      word = new InterchunkWord *[limit];
+      lword = limit;
+      if(limit != 1)
+      {
+        blank = new string *[limit - 1];
+        lblank = limit - 1;
+      }
+      else
+      {
+        blank = NULL;
+        lblank = 0;
+      }
+    }
+    else
+    {
+      blank[i-1] = new string(UtfConverter::toUtf8(*tmpblank[i-1]));
+    }
+    
+    word[i] = new InterchunkWord(UtfConverter::toUtf8(*tmpword[i]));
+  }
+
+  processRule(lastrule);
+  lastrule = NULL;
+
+  if(word)
+  {
+    for(unsigned int i = 0; i != limit; i++)
+    {
+      delete word[i];
+    }
+    delete[] word;
+  }
+  if(blank)
+  {
+    for(unsigned int i = 0; i != limit - 1; i++)
+    {
+      delete blank[i];
+    }
+    delete[] blank;
+  }
+  word = NULL;
+  blank = NULL;
+  tmpword.clear();
+  tmpblank.clear();
+  ms.init(me->getInitial());
+}
+
+void
+Interchunk::applyWord(wstring const &word_str)
+{
+  ms.step(L'^');
+  for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
+  {
+    switch(word_str[i])
+    {
+      case L'\\':
+        i++;
+	ms.step(towlower(word_str[i]), any_char);
+	break;
+
+      case L'<':
+	for(unsigned int j = i+1; j != limit; j++)
+	{
+	  if(word_str[j] == L'>')
+	  {
+	    int symbol = alphabet(word_str.substr(i, j-i+1));
+	    if(symbol)
+	    {
+	      ms.step(symbol, any_tag);
+	    }
+	    else
+	    {
+	      ms.step(any_tag);
+	    }
+	    i = j;
+	    break;
+	  }
+	}
+	break;
+	
+      case L'{':  // ignore the unmodifiable part of the chunk
+        ms.step(L'$');
+        return;
+	
+      default:
+	ms.step(towlower(word_str[i]), any_char);
+	break;
+    }
+  }
+  ms.step(L'$');
+}
Index: branches/apertium-tagger/apertium2/apertium/interchunk.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/interchunk.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/interchunk.h	(revision 69632)
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _INTERCHUNK_
+#define _INTERCHUNK_
+
+#include <apertium/transfer_instr.h>
+#include <apertium/transfer_token.h>
+#include <apertium/interchunk_word.h>
+#include <apertium/apertium_re.h>
+#include <lttoolbox/alphabet.h>
+#include <lttoolbox/buffer.h>
+#include <lttoolbox/ltstr.h>
+#include <lttoolbox/match_exe.h>
+#include <lttoolbox/match_state.h>
+
+#include <cstring>
+#include <cstdio>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+class Interchunk
+{
+private:
+  
+  Alphabet alphabet;
+  MatchExe *me;
+  MatchState ms;
+  map<string, ApertiumRE, Ltstr> attr_items;
+  map<string, string, Ltstr> variables;
+  map<string, int, Ltstr> macros;
+  map<string, set<string, Ltstr>, Ltstr> lists;
+  map<string, set<string, Ltstr>, Ltstr> listslow;
+  vector<xmlNode *> macro_map;
+  vector<xmlNode *> rule_map;
+  xmlDoc *doc;
+  xmlNode *root_element;
+  InterchunkWord **word;
+  string **blank;
+  int lword, lblank;
+  Buffer<TransferToken> input_buffer;
+  vector<wstring *> tmpword;
+  vector<wstring *> tmpblank;
+
+  FILE *output;
+  int any_char;
+  int any_tag;
+
+  xmlNode *lastrule;
+  unsigned int nwords;
+  
+  map<xmlNode *, TransferInstr> evalStringCache;
+  bool inword;
+  bool null_flush;
+  bool internal_null_flush;
+  bool trace;
+  string emptyblank;
+  
+  void destroy();
+  void readData(FILE *input);
+  void readInterchunk(string const &input);
+  void collectMacros(xmlNode *localroot);
+  void collectRules(xmlNode *localroot);
+  string caseOf(string const &str);
+  string copycase(string const &source_word, string const &target_word);
+
+  void processLet(xmlNode *localroot);
+  void processAppend(xmlNode *localroot);
+  void processOut(xmlNode *localroot);
+  void processCallMacro(xmlNode *localroot);
+  void processModifyCase(xmlNode *localroot);
+  bool processLogical(xmlNode *localroot);
+  bool processTest(xmlNode *localroot);
+  bool processAnd(xmlNode *localroot);
+  bool processOr(xmlNode *localroot);
+  bool processEqual(xmlNode *localroot);
+  bool processBeginsWith(xmlNode *localroot);
+  bool processBeginsWithList(xmlNode *localroot);
+  bool processEndsWith(xmlNode *localroot);
+  bool processEndsWithList(xmlNode *localroot);
+  bool processContainsSubstring(xmlNode *localroot);
+  bool processNot(xmlNode *localroot);
+  bool processIn(xmlNode *localroot);
+  void processRule(xmlNode *localroot);
+  string evalString(xmlNode *localroot);
+  void processInstruction(xmlNode *localroot);
+  void processChoose(xmlNode *localroot);
+  string processChunk(xmlNode *localroot);
+
+  bool beginsWith(string const &str1, string const &str2) const;
+  bool endsWith(string const &str1, string const &str2) const;
+  string tolower(string const &str) const;
+  string tags(string const &str) const;
+  string readWord(FILE *in);
+  string readBlank(FILE *in);
+  string readUntil(FILE *in, int const symbol) const;
+  void applyWord(wstring const &word_str);
+  void applyRule();
+  TransferToken & readToken(FILE *in);
+  bool checkIndex(xmlNode *element, int index, int limit); 
+  void interchunk_wrapper_null_flush(FILE *in, FILE *out);
+
+public:
+  Interchunk();
+  ~Interchunk();
+  
+  void read(string const &transferfile, string const &datafile);
+  void interchunk(FILE *in, FILE *out);
+  bool getNullFlush(void);
+  void setNullFlush(bool null_flush);
+  void setTrace(bool trace);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/postchunk.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/postchunk.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/postchunk.h	(revision 69632)
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _POSTCHUNK_
+#define _POSTCHUNK_
+
+#include <apertium/transfer_instr.h>
+#include <apertium/transfer_token.h>
+#include <apertium/interchunk_word.h>
+#include <apertium/apertium_re.h>
+#include <lttoolbox/alphabet.h>
+#include <lttoolbox/buffer.h>
+#include <lttoolbox/ltstr.h>
+#include <lttoolbox/match_exe.h>
+#include <lttoolbox/match_state.h>
+
+#include <cstdio>
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+class Postchunk
+{
+private:
+  
+  Alphabet alphabet;
+  MatchExe *me;
+  MatchState ms;
+  map<string, ApertiumRE, Ltstr> attr_items;
+  map<string, string, Ltstr> variables;
+  map<string, int, Ltstr> macros;
+  map<string, set<string, Ltstr>, Ltstr> lists;
+  map<string, set<string, Ltstr>, Ltstr> listslow;
+  vector<xmlNode *> macro_map;
+  vector<xmlNode *> rule_map;
+  xmlDoc *doc;
+  xmlNode *root_element;
+  InterchunkWord **word;
+  string **blank;
+  int lword, lblank;
+  Buffer<TransferToken> input_buffer;
+  vector<wstring *> tmpword;
+  vector<wstring *> tmpblank;
+
+  FILE *output;
+  int any_char;
+  int any_tag;
+
+  xmlNode *lastrule;
+  unsigned int nwords;
+  
+  map<xmlNode *, TransferInstr> evalStringCache;
+
+  bool inword;
+  bool null_flush;
+  bool internal_null_flush;
+
+  void destroy();
+  void readData(FILE *input);
+  void readPostchunk(string const &input);
+  void collectMacros(xmlNode *localroot);
+  void collectRules(xmlNode *localroot);
+  static string caseOf(string const &str);
+  static wstring caseOf(wstring const &str);
+  string copycase(string const &source_word, string const &target_word);
+
+  void processLet(xmlNode *localroot);
+  void processAppend(xmlNode *localroot);
+  void processOut(xmlNode *localroot);
+  void processCallMacro(xmlNode *localroot);
+  void processModifyCase(xmlNode *localroot);
+  bool processLogical(xmlNode *localroot);
+  bool processTest(xmlNode *localroot);
+  bool processAnd(xmlNode *localroot);
+  bool processOr(xmlNode *localroot);
+  bool processEqual(xmlNode *localroot);
+  bool processBeginsWith(xmlNode *localroot);
+  bool processBeginsWithList(xmlNode *localroot);
+  bool processEndsWith(xmlNode *localroot);
+  bool processEndsWithList(xmlNode *localroot);
+  bool processContainsSubstring(xmlNode *localroot);
+  bool processNot(xmlNode *localroot);
+  bool processIn(xmlNode *localroot);
+  void processRule(xmlNode *localroot);
+  string evalString(xmlNode *localroot);
+  void processInstruction(xmlNode *localroot);
+  void processChoose(xmlNode *localroot);
+  void processTags(xmlNode *localroot);
+  bool beginsWith(string const &str1, string const &str2) const;
+  bool endsWith(string const &str1, string const &str2) const;
+  string tolower(string const &str) const;
+  string tags(string const &str) const;
+  string readWord(FILE *in);
+  string readBlank(FILE *in);
+  string readUntil(FILE *in, int const symbol) const;
+  void applyWord(wstring const &word_str);
+  void applyRule();
+  TransferToken & readToken(FILE *in);
+  static void unchunk(wstring const &chunk, FILE *output);
+  static vector<wstring> getVecTags(wstring const &chunk);
+  static int beginChunk(wstring const &chunk);
+  static int endChunk(wstring const &chunk);
+  static void splitWordsAndBlanks(wstring const &chunk, 
+				  vector<wstring *> &words,
+				  vector<wstring *> &blanks);
+  static wstring pseudolemma(wstring const &chunk);
+  static wstring wordzero(wstring const &chunk);
+  bool checkIndex(xmlNode *element, int index, int limit);  
+  void postchunk_wrapper_null_flush(FILE *in, FILE *out);
+
+public:
+  Postchunk();
+  ~Postchunk();
+  
+  void read(string const &transferfile, string const &datafile);
+  void postchunk(FILE *in, FILE *out);
+  bool getNullFlush(void);
+  void setNullFlush(bool null_flush);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/tagger_data_hmm.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger_data_hmm.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger_data_hmm.cc	(revision 69632)
@@ -0,0 +1,403 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/hmm.h>
+#include <apertium/tagger_data_hmm.h>
+#include <lttoolbox/compression.h>
+#include <apertium/endian_double_util.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+
+void
+TaggerDataHMM::destroy()
+{
+  if(a != NULL)
+  {
+    for(int i = 0; i != N; i++)
+    {
+      delete [] a[i];
+    }
+    delete [] a;
+  }
+  a = NULL;
+
+  if(b != NULL)
+  {
+    for(int i = 0; i != N; i++)
+    {
+      delete [] b[i];
+    }
+    delete [] b;
+  }
+  b = NULL;
+
+  N = 0;
+  M = 0;
+}
+
+TaggerDataHMM::TaggerDataHMM()
+{
+  a = NULL;
+  b = NULL;
+  N = 0;
+  M = 0;
+}
+
+TaggerDataHMM::~TaggerDataHMM()
+{
+  destroy();
+}
+
+TaggerDataHMM::TaggerDataHMM(TaggerDataHMM const &o)
+{
+  a = NULL;
+  b = NULL;
+  N = 0;
+  M = 0;
+
+  TaggerData::copy(o);
+  this->setProbabilities(o.N, o.M, o.a, o.b);
+}
+
+TaggerDataHMM::TaggerDataHMM(TaggerData const &o)
+{
+
+  a = NULL;
+  b = NULL;
+  N = 0;
+  M = 0;
+  
+  TaggerData::copy(o);
+}
+
+TaggerDataHMM &
+TaggerDataHMM::operator =(TaggerDataHMM const &o)
+{
+  if(this != &o)
+  {
+    destroy();
+    TaggerData::copy(o);
+    this->setProbabilities(o.N, o.M, o.a, o.b);
+  }
+  return *this;
+}
+  
+void
+TaggerDataHMM::setProbabilities(int const myN, int const myM, 
+                             double **myA, double **myB)
+{
+  this->destroy();
+  N = myN;
+  M = myM;
+  
+  if(N != 0 && M != 0)
+  {
+    // NxN matrix
+    a = new double * [N];
+    for(int i = 0; i != N; i++)
+    {
+      a[i] = new double[N];
+      if(myA != NULL)
+      {
+        for(int j = 0; j != N; j++) // ToDo: N should be M? Check use of N and M in this function
+        { 
+          a[i][j] = myA[i][j];
+        }
+      }
+    }
+  
+    // NxM matrix
+    b = new double * [N];
+    for(int i = 0; i != N; i++)
+    {
+      b[i] = new double[M];
+      if(myB != NULL)
+      {
+        for(int j = 0; j != M; j++)
+        {
+          b[i][j] = myB[i][j];
+        }
+      }
+    }
+  }
+  else
+  {
+    a = NULL;
+    b = NULL;
+  }  
+}
+
+double ** 
+TaggerDataHMM::getA()
+{
+  return a;
+}
+
+double ** 
+TaggerDataHMM::getB()
+{
+  return b;
+}
+
+int 
+TaggerDataHMM::getN()
+{  
+  return N;
+}
+
+int
+TaggerDataHMM::getM()
+{
+  return M;
+}
+
+void
+TaggerDataHMM::read(FILE *in)
+{
+  destroy();
+
+  // open_class
+  int val = 0;
+  for(int i = Compression::multibyte_read(in); i != 0; i--)
+  {
+    val += Compression::multibyte_read(in);
+    open_class.insert(val);
+  }
+  
+  // forbid_rules
+  for(int i = Compression::multibyte_read(in); i != 0; i--)
+  {
+    TForbidRule aux;
+    aux.tagi = Compression::multibyte_read(in);
+    aux.tagj = Compression::multibyte_read(in);
+    forbid_rules.push_back(aux);
+  }
+
+  
+  // array_tags
+  for(int i = Compression::multibyte_read(in); i != 0; i--)
+  {
+    array_tags.push_back(Compression::wstring_read(in));
+  }
+  
+  // tag_index
+  for(int i = Compression::multibyte_read(in); i != 0; i--)
+  {
+    wstring tmp = Compression::wstring_read(in);    
+    tag_index[tmp] = Compression::multibyte_read(in);
+  }
+
+  // enforce_rules  
+  for(int i = Compression::multibyte_read(in); i != 0; i--)
+  {
+    TEnforceAfterRule aux;
+    aux.tagi = Compression::multibyte_read(in);
+    for(int j = Compression::multibyte_read(in); j != 0; j--)
+    {
+      aux.tagsj.push_back(Compression::multibyte_read(in));
+    }
+    enforce_rules.push_back(aux);
+  }
+
+  // prefer_rules
+  for(int i = Compression::multibyte_read(in); i != 0; i--)
+  {
+    prefer_rules.push_back(Compression::wstring_read(in));
+  }
+
+  // constants
+  constants.read(in);
+
+  // output
+  output.read(in); 
+
+  // dimensions
+  N = Compression::multibyte_read(in);
+  M = Compression::multibyte_read(in);
+
+  
+  a = new double * [N];
+  b = new double * [N];
+  for(int i = 0; i != N; i++)
+  {
+    a[i] = new double[N];
+    b[i] = new double[M];
+  }
+   
+  // read a
+  for(int i = 0; i != N; i++)
+  {
+    for(int j = 0; j != N; j++)
+    {
+      a[i][j] = EndianDoubleUtil::read(in);
+    }
+  }
+
+  // initializing b matrix
+  for(int i = 0 ; i != N; i++)
+  {
+    for(int j = 0; j != M; j++)
+    {
+      b[i][j] = ZERO;
+    }
+  }
+
+  // read nonZERO values of b
+  int nval = Compression::multibyte_read(in);
+
+  for(; nval != 0; --nval)
+  {
+    int i = Compression::multibyte_read(in);
+    int j = Compression::multibyte_read(in);
+    b[i][j] = EndianDoubleUtil::read(in);
+  }
+
+  // read pattern list
+  plist.read(in);
+    
+  // read discards on ambiguity
+  discard.clear();
+
+  int limit = Compression::multibyte_read(in);  
+  if(feof(in))
+  {
+    return;
+  }
+  
+  for(int i = 0; i < limit; i++)
+  {
+    discard.push_back(Compression::wstring_read(in));
+  }
+}
+
+void
+TaggerDataHMM::write(FILE *out)
+{
+  
+  // open_class
+  Compression::multibyte_write(open_class.size(), out);  
+  int val = 0;
+  for(set<TTag>::const_iterator it = open_class.begin(), limit = open_class.end();
+      it != limit; it++)
+  {
+    Compression::multibyte_write(*it-val, out);    
+    val = *it;
+  }
+  
+  // forbid_rules
+  Compression::multibyte_write(forbid_rules.size(), out);
+  for(unsigned int i = 0, limit = forbid_rules.size(); i != limit; i++)
+  {
+    Compression::multibyte_write(forbid_rules[i].tagi, out);
+    Compression::multibyte_write(forbid_rules[i].tagj, out);
+  }
+  
+  // array_tags
+  Compression::multibyte_write(array_tags.size(), out);
+  for(unsigned int i = 0, limit = array_tags.size(); i != limit; i++)
+  {
+    Compression::wstring_write(array_tags[i], out);
+  }
+
+  // tag_index
+  Compression::multibyte_write(tag_index.size(), out);
+  for(map<wstring, int, Ltstr>::iterator it = tag_index.begin(), limit = tag_index.end();
+      it != limit; it++)
+  {
+    Compression::wstring_write(it->first, out);
+    Compression::multibyte_write(it->second, out);
+  }
+  
+  // enforce_rules
+  Compression::multibyte_write(enforce_rules.size(), out);
+  for(unsigned int i = 0, limit = enforce_rules.size(); i != limit; i++)
+  {
+    Compression::multibyte_write(enforce_rules[i].tagi, out);
+    Compression::multibyte_write(enforce_rules[i].tagsj.size(), out);
+    for(unsigned int j = 0, limit2 = enforce_rules[i].tagsj.size(); j != limit2; j++)
+    {
+      Compression::multibyte_write(enforce_rules[i].tagsj[j], out);
+    }
+  }
+
+  // prefer_rules
+  Compression::multibyte_write(prefer_rules.size(), out);
+  for(unsigned int i = 0, limit = prefer_rules.size(); i != limit; i++)
+  {
+    Compression::wstring_write(prefer_rules[i], out);
+  }
+  
+  // constants
+  constants.write(out);  
+
+  // output
+  output.write(out);
+
+  // a matrix
+  Compression::multibyte_write(N, out);
+  Compression::multibyte_write(M, out);
+  for(int i = 0; i != N; i++)
+  {
+    for(int j = 0; j != N; j++)
+    {
+      EndianDoubleUtil::write(out, a[i][j]);
+    }
+  }
+
+  // b matrix, writing only useful values
+  
+  int nval = 0;
+  for(int i = 0; i != N; i++)
+  {
+    for(int j = 0; j != M; j++)
+    {
+      if(output[j].find(i) != output[j].end())
+      {
+        nval++;
+      }
+    }
+  }
+
+  Compression::multibyte_write(nval, out);
+  for(int i = 0; i != N; i++)
+  {
+    for(int j = 0; j != M; j++)
+    {
+      if(output[j].find(i) != output[j].end())
+      {
+        Compression::multibyte_write(i, out);
+        Compression::multibyte_write(j, out);
+        EndianDoubleUtil::write(out, b[i][j]);
+      }
+    }
+  }  
+  
+  // write pattern list
+  plist.write(out);
+  
+  // write discard list
+  
+  if(discard.size() != 0)
+  {
+    Compression::multibyte_write(discard.size(), out);
+    for(unsigned int i = 0, limit = discard.size(); i != limit; i++)
+    {
+      Compression::wstring_write(discard[i], out);
+    }
+  }  
+}
+
Index: branches/apertium-tagger/apertium2/apertium/tagger_word.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger_word.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger_word.cc	(revision 69632)
@@ -0,0 +1,384 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/tagger_word.h>
+#include <apertium/utf_converter.h>
+#include <apertium/string_utils.h>
+#include "apertium_config.h"
+#include <apertium/unlocked_cstdio.h>
+
+using namespace Apertium;
+
+bool TaggerWord::generate_marks=false;
+
+vector<wstring> TaggerWord::array_tags;
+
+bool TaggerWord::show_ignored_string=true;
+
+map<wstring, ApertiumRE, Ltstr> TaggerWord::patterns;
+
+TaggerWord::TaggerWord(bool prev_plus_cut) :
+show_sf(false)
+{
+   ignored_string = L"";
+   plus_cut=false;
+   previous_plus_cut=prev_plus_cut;
+}
+
+TaggerWord::TaggerWord(const TaggerWord &w){
+  superficial_form = w.superficial_form;
+  tags = w.tags;
+  show_sf = false;
+  lexical_forms = w.lexical_forms;
+  ignored_string = w.ignored_string;
+  plus_cut = w.plus_cut;
+  previous_plus_cut=w.previous_plus_cut;
+}
+
+TaggerWord::~TaggerWord(){
+}
+
+void
+TaggerWord::set_show_sf(bool sf){
+  show_sf = sf;
+}
+
+bool
+TaggerWord::get_show_sf(){
+  return show_sf;
+}
+
+void
+TaggerWord::set_superficial_form(const wstring &sf){
+  superficial_form = sf;
+}
+
+wstring& 
+TaggerWord::get_superficial_form() {
+  return superficial_form;
+}
+
+bool
+TaggerWord::match(wstring const &s, wstring const &pattern)
+{
+  map<wstring, ApertiumRE, Ltstr>::iterator it = patterns.find(pattern);
+  string const utfs = UtfConverter::toUtf8(s);
+
+  if(it == patterns.end())
+  {
+    string utfpattern = UtfConverter::toUtf8(pattern);
+    string regexp = "";
+    
+    while(true)
+    {
+      size_t pos = utfpattern.find("<*>");
+      if(pos == string::npos)
+      {
+        break;
+      }
+      utfpattern.replace(pos, 3, "(<[^>]+>)+");
+    }
+    patterns[pattern].compile(utfpattern);
+    return patterns[pattern].match(utfs) != "";
+  }
+  else
+  {
+    return it->second.match(utfs) != "";
+  }
+}
+
+void
+TaggerWord::add_tag(TTag &t, const wstring &lf, vector<wstring> const &prefer_rules){
+
+  //Tag is added only is it is not present yet
+  //Sometime one word can have more than one lexical form assigned to the same tag
+  if (tags.find(t)==tags.end()) {
+    tags.insert(t);
+    lexical_forms[t]=lf;
+  } else {
+    //Take a look at the prefer rules
+    for(int i=0; i < (int) prefer_rules.size(); i++)
+    {
+      if (match(lf, prefer_rules[i])) 
+      {
+	lexical_forms[t]=lf;
+	break;
+      }
+    }
+  }
+}
+
+set<TTag>&
+TaggerWord::get_tags() {
+  return tags;
+}
+
+bool
+TaggerWord::isAmbiguous() const
+{
+  return tags.size() > 1;
+}
+
+wstring
+TaggerWord::get_string_tags() {
+  wstring st;
+  set<TTag>::iterator itag = tags.begin();
+  
+  st=L"{";  
+  for(itag=tags.begin(); itag!=tags.end(); itag++) {
+    if (itag!=tags.begin())
+      st+=L',';
+    st+=array_tags[*itag];
+  }
+  st += L'}';  
+  
+  return st;  
+}
+
+wstring
+TaggerWord::get_lexical_form(TTag &t, int const TAG_kEOF) {
+  wstring ret= L"";
+
+  if (show_ignored_string)
+    ret.append(ignored_string);
+   
+  if(t==TAG_kEOF)
+    return ret;
+
+  if (!previous_plus_cut){
+    if(TaggerWord::generate_marks && isAmbiguous())
+    {
+      ret.append(L"^=");
+    }
+    else
+    {
+      ret += L'^';
+    }
+
+    if(get_show_sf()){   // append the superficial form
+      ret.append(superficial_form);
+      ret+=L'/'; 
+    }
+  }
+
+  if (lexical_forms.size()==0) { // This is an UNKNOWN WORD
+    ret +=L'*';
+    ret.append(superficial_form);
+  } else if ((*lexical_forms.begin()).second[0]==L'*') { //This is an
+							//unknown word
+							//that has
+							//been guessed
+    ret += L'*';
+    ret.append(superficial_form);
+  } else if (lexical_forms.size()>1) {  //This is an ambiguous word
+    ret.append(lexical_forms[t]);
+  } else {
+    ret.append(lexical_forms[t]);
+  }
+  
+  if (ret != ignored_string) {
+    if (plus_cut)
+      ret+=L'+';
+    else {
+      ret += L'$';	
+    }
+  }
+
+
+  //if ((superficial_form.length()>0)&&(superficial_form[superficial_form.length()-1]=='\''))
+  //   //Si la forma superficial termina en apostrofo metemos un espacio en blanco tras la cadena '/$'
+  //   //o '/'. De no hacerlo en la traducci�n aparecer�n dos palabras sin blanco alguno.
+  //   ret+=" "; //Quiz� este no sea el sitio apropiado para hacer esto, lo suyo ser�a un m�dulo
+  //             //antes del tagger o del anmor.
+     
+  return ret;
+}
+
+wstring 
+TaggerWord::get_all_chosen_tag_first(TTag &t, int const TAG_kEOF) {
+  wstring ret=L"";
+
+  if (show_ignored_string)
+    ret.append(ignored_string);
+   
+  if(t==TAG_kEOF)
+    return ret;
+ 
+  if (!previous_plus_cut)
+  {
+    if(TaggerWord::generate_marks && isAmbiguous())
+    {
+      ret.append(L"^=");
+    }
+    else
+    {
+      ret += L'^';
+    }
+  }
+ 
+  ret.append(superficial_form);
+ 
+  if (lexical_forms.size()==0) { // This is an UNKNOWN WORD
+    ret+=L"/*";
+    ret.append(superficial_form);
+  } else {
+    ret+=L"/";
+    ret.append(lexical_forms[t]);
+    if (lexical_forms.size()>1) {
+      set<TTag>::iterator it;
+      for (it=tags.begin(); it!=tags.end(); it++) {
+	if (*it != t) {
+	  ret+=L"/";
+          ret.append(lexical_forms[*it]);
+	}
+      }
+    }
+  }
+   
+  if (ret != ignored_string) {
+    if (plus_cut)
+      ret+=L"+";
+    else {
+      ret+=L"$";
+    }
+  }
+      
+  return ret;
+}
+
+//OBSOLETE
+wstring
+TaggerWord::get_lexical_form_without_ignored_string(TTag &t, int const TAG_kEOF) {
+  wstring ret;
+   
+  if(t==TAG_kEOF)
+     return ret;
+ 
+  if (lexical_forms.size()==0) { //This is an unknown word
+      ret.append(L"*^");
+      ret.append(superficial_form);
+  } else if ((*lexical_forms.begin()).second[0]=='*') {  //This is an unknown word that has been guessed
+    ret.append(L"*^");
+    ret.append(superficial_form);
+  } else {
+    ret += L'^';
+    ret.append(lexical_forms[t]);
+  }
+  
+  if (ret.length() != 0) {
+    if (plus_cut)
+      ret+=L'+';
+    else {
+      ret +=L'$';	
+    }
+  }
+
+  return ret;
+}
+
+void
+TaggerWord::add_ignored_string(wstring const &s) {
+  ignored_string.append(s);
+}
+
+void 
+TaggerWord::set_plus_cut(const bool &c) {
+  plus_cut=c;
+}
+
+bool
+TaggerWord::get_plus_cut() {
+  return plus_cut;
+}
+
+wostream&
+operator<< (wostream& os, TaggerWord &w) {
+  os<<w.get_string_tags()<< L" \t Word: " << w.get_superficial_form();
+  return os;
+}
+
+void 
+TaggerWord::setArrayTags(vector<wstring> const &at)
+{
+  array_tags = at;
+}
+
+void
+TaggerWord::print()
+{
+  wcout << L"[#" << superficial_form << L"# ";
+  for(set<TTag>::iterator it=tags.begin(), limit = tags.end(); it != limit; it++)
+  {
+    wcout << L"(" << *it << L" " << lexical_forms[*it] << L") ";
+  }
+  wcout << L"\b]\n";
+}
+
+void
+TaggerWord::outputOriginal(FILE *output) {
+
+  wstring s=superficial_form;
+
+  map<TTag, wstring>::iterator it;
+  for(it=lexical_forms.begin(); it!=lexical_forms.end(); it++) {
+    if (it->second.length()>0)
+    {
+      s+=L'/';
+      s.append(it->second);
+    }
+  }
+
+  if (s.length()>0)
+  {
+    s=L"^"+s+L"$\n";
+  }
+
+  fputws_unlocked(s.c_str(), output);
+}
+
+void
+TaggerWord::discardOnAmbiguity(wstring const &tags)
+{
+  if(isAmbiguous())
+  {
+    map<TTag, wstring>::iterator it = lexical_forms.begin(),
+                              limit = lexical_forms.end();
+    set<TTag> newsettag;
+    while(it != limit)
+    {
+      if(match(it->second, tags))
+      {
+        lexical_forms.erase(it);
+        it = lexical_forms.begin();
+      }
+      else
+      {
+        newsettag.insert(it->first);
+      }
+        
+      if(lexical_forms.size() == 1)
+      {
+        newsettag.insert(lexical_forms.begin()->first);
+        break;
+      }
+      it++;
+    }
+    if(tags.size() != newsettag.size())
+    { 
+      this->tags = newsettag;
+    }
+  }
+}
Index: branches/apertium-tagger/apertium2/apertium/tmx_align_parameters.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_align_parameters.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_align_parameters.h	(revision 69632)
@@ -0,0 +1,51 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef __TMXALIGNER_ALIGN_PARAMETERS_H
+#define __TMXALIGNER_ALIGN_PARAMETERS_H
+
+
+class AlignParameters
+{
+public:
+  enum RealignType { NoRealign, ModelOneRealign, FineTranslationRealign };
+
+  bool justSentenceIds;
+  bool justBisentences;
+
+  bool cautiousMode;
+  RealignType realignType;
+  double qualityThreshold;
+
+  double postprocessTrailQualityThreshold;
+  double postprocessTrailStartAndEndQualityThreshold;
+  double postprocessTrailByTopologyQualityThreshold;
+
+  std::string handAlignFilename;
+
+  bool utfCharCountingMode;
+  
+  std::string autoDictionaryDumpFilename; // Empty string means do not dump.
+
+AlignParameters() : justSentenceIds(true), 
+    justBisentences(false), cautiousMode(false),
+    realignType(NoRealign),
+    qualityThreshold(-100000),
+    postprocessTrailQualityThreshold(-1),
+    postprocessTrailStartAndEndQualityThreshold(-1),
+    postprocessTrailByTopologyQualityThreshold(-1),
+    utfCharCountingMode(false)
+      {}
+
+
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/tmx_alignment.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_alignment.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_alignment.cc	(revision 69632)
@@ -0,0 +1,614 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#include <apertium/tmx_alignment.h>
+
+#include <apertium/tmx_words.h> // For SentenceList
+#include <apertium/tmx_dictionary.h> // For FrequencyMap
+
+#include <iostream>
+#include <map>
+#include <set>
+#include <algorithm>
+
+// Copypaste-elve. TODO Elhelyezni.
+#define massert(e) if (!(e)) { std::cerr << #e << " failed" << std::endl; throw "assert"; }
+
+std::ostream& operator<<( std::ostream& os, std::pair<int,int> p )
+{
+  os << p.first << "," << p.second;
+  return os;
+}
+
+namespace TMXAligner
+{
+
+
+// Attention, the two-sentence length is the first argument. Usually the Hungarian is, but not here.
+// The bigger the better. closeness is always smaller than bestScore.
+double closeness( double twoSentenceLength, double oneSentenceLength )
+{
+  const double bestScore = 0.3;
+  const double quasiglobal_closenessMultiplier = 0.3;
+
+  double ratio;
+
+  if (twoSentenceLength>oneSentenceLength)
+  {
+    ratio = (twoSentenceLength+1)/(oneSentenceLength+1);
+  }
+  else
+  {
+    ratio = (oneSentenceLength+1)/(twoSentenceLength+1);
+  }
+
+  ratio -= 1.0;
+
+  // assert(ratio>=0);
+  return bestScore - quasiglobal_closenessMultiplier * ratio;
+}
+
+const unsigned char Diag = 1;
+const unsigned char HuSkip = 2;
+const unsigned char EnSkip = 3;
+const unsigned char HuHuEnSkip = 4;
+const unsigned char HuEnEnSkip = 5;
+const unsigned char Dead = 6;
+
+void buildDynProgMatrix( const AlignMatrix& w, const SentenceValues& huLength, const SentenceValues& enLength,
+                         QuasiDiagonal<double>& v, TrelliMatrix& trellis )
+{
+  const int huBookSize = w.size();
+
+
+  int huPos,enPos;
+
+  // v[huPos][enPos] gives the similarity of the [0,huPos) and [0,enPos) intervals.
+  // The smaller value, the better similarity. (Unlike in the original similarity matrix w, where bigger is better.)
+
+  double infinity = 1e6;
+
+  for ( huPos=0; huPos<=huBookSize; ++huPos )
+  {
+    int rowStart = v.rowStart(huPos);
+    int rowEnd   = v.rowEnd(huPos);
+    for ( enPos=rowStart; enPos<rowEnd; ++enPos )
+    {
+      double& val = v.cell(huPos,enPos);
+      unsigned char& trail = trellis.cell(huPos,enPos);
+
+      bool quasiglobal_knightsMoveAllowed = true;
+      if (quasiglobal_knightsMoveAllowed)
+      {
+        double lengthFitness(0);
+
+        bool quasiglobal_lengthFitnessApplied = true;
+
+        // The array is indexed by the step directions. The smaller value, the better.
+        double values[Dead];
+        int i;
+        for ( i=1; i<Dead; ++i )
+          values[i] = infinity;
+
+        if (huPos>0)
+        {
+          values[HuSkip] = v[huPos-1][enPos]   - skipScore;
+        }
+
+        if (enPos>0)
+        {
+          values[EnSkip] = v[huPos][enPos-1]   - skipScore;
+        }
+
+        if ((huPos>0) && (enPos>0))
+        {
+          if (quasiglobal_lengthFitnessApplied)
+          {
+            lengthFitness = closeness(huLength[huPos-1], enLength[enPos-1]);
+          }
+          else
+          {
+            lengthFitness = 0;
+          }
+
+          values[Diag] = v[huPos-1][enPos-1] - w[huPos-1][enPos-1] - lengthFitness ;
+        }
+
+        const double dotLength = 2.0 ;
+
+        if ((huPos>1) && (enPos>0))
+        {
+          if (quasiglobal_lengthFitnessApplied)
+          {
+            lengthFitness = closeness(huLength[huPos-2]+huLength[huPos-1]+dotLength, enLength[enPos-1]);
+          }
+          else
+          {
+            lengthFitness = 0;
+          }
+
+        }
+
+        if ((huPos>0) && (enPos>1))
+        {
+          if (quasiglobal_lengthFitnessApplied)
+          {
+            // Attention, the two-sentence length is the first argument. Usually the Hungarian is the first argument, but not here.
+            lengthFitness = closeness(enLength[enPos-2]+enLength[enPos-1]+dotLength, huLength[huPos-1]);
+          }
+          else
+          {
+            lengthFitness = 0;
+          }
+
+          const double& a = w[huPos-1][enPos-1] ;
+          const double& b = w[huPos-1][enPos-2] ;
+          values[HuEnEnSkip] = v[huPos-1][enPos-2] - ( a<b ? a : b ) - skipScore - lengthFitness ; // The worse of the two crossed square.
+        }
+
+        unsigned char direction = Dead;
+        double bestValue = infinity;
+        for ( i=1; i<Dead; ++i )
+        {
+          if (values[i]<bestValue)
+          {
+            bestValue = values[i];
+            direction = i;
+          }
+        }
+
+        trail = direction;
+        if (direction==Dead)
+        {
+          val = 0;
+        }
+        else
+        {
+          val = bestValue;
+        }
+      }
+      else // (!quasiglobal_knightsMoveAllowed)
+      {
+        int borderCase = ( (huPos==0) ? 0 : 2 ) + ( (enPos==0) ? 0 : 1 ) ;
+
+        switch (borderCase)
+        {
+        case 0:
+          {
+            val = 0;
+            trail = Dead;
+            break;
+          }
+        case 1: // huPos==0
+          {
+            val = v[0][enPos-1] - skipScore ;
+            trail = EnSkip;
+            break;
+          }
+        case 2: // enPos==0
+          {
+            val = v[huPos-1][0] - skipScore ;
+            trail = HuSkip;
+            break;
+          }
+        case 3:
+          {
+            double x  = v[huPos-1][enPos]   - skipScore ;
+            double y  = v[huPos]  [enPos-1] - skipScore ;
+            double xy = v[huPos-1][enPos-1] - w[huPos-1][enPos-1] ;
+
+            double best = xy;
+            trail = Diag;
+            if (x<best)
+            {
+              best = x;
+              trail = HuSkip;
+            }
+            if (y<best)
+            {
+              best = y;
+              trail = EnSkip;
+            }
+            val = best;
+            break;
+          }
+        }
+      }
+    }
+  }
+}
+
+void trelliToLadder( const TrelliMatrix& trellis, Trail& bestTrail )
+{
+  bestTrail.clear();
+
+  // The -1 is needed because the trellis matrix is one larger than the similarity matrix.
+  // This points to its downmost rightmost element.
+  const int huBookSize = trellis.size()-1;
+  const int enBookSize = trellis.otherSize()-1;
+
+  int huPos=huBookSize;
+  int enPos=enBookSize;
+
+  bool logging = false;
+
+  if (logging) std::cerr << std::endl;
+
+  bool over = false;
+  bool hopelesslyBadTrail = false;
+  bestTrail.push_back(std::make_pair(huPos,enPos));
+
+  while (true)
+  {
+    unsigned char trelli = trellis[huPos][enPos];
+
+    if ((huPos==0) || (enPos==0))
+      break;
+
+    switch (trelli)
+    {
+    case Diag :
+    {
+      --huPos;
+      --enPos;
+      break;
+    }
+    case HuSkip :
+    {
+      --huPos;
+      break;
+    }
+    case EnSkip :
+    {
+      --enPos;
+      break;
+    }
+    case HuHuEnSkip :
+    {
+      huPos -= 2;
+      --enPos;
+      break;
+    }
+    case HuEnEnSkip :
+    {
+      --huPos;
+      enPos -= 2;
+      break;
+    }
+    case Dead :
+    {
+      over = true;
+      break;
+    }
+    default:
+    {
+      hopelesslyBadTrail = true;
+      over = true;
+      break;
+    }
+    }
+
+    if (over)
+      break;
+
+    bestTrail.push_back(std::make_pair(huPos,enPos));
+
+    if (logging)
+    {
+      std::cerr << huPos << " \t" << enPos << std::endl;
+    }
+
+  }
+
+  if (hopelesslyBadTrail)
+  {
+    bestTrail.clear();
+    bestTrail.push_back(std::make_pair(huBookSize,enBookSize));
+    bestTrail.push_back(std::make_pair(0,0));
+    std::cerr << "Error: hopelessly bad trail." << std::endl;
+  }
+
+  std::reverse(bestTrail.begin(),  bestTrail.end()  );
+}
+
+
+void align( const AlignMatrix& w, const SentenceValues& huLength, const SentenceValues& enLength,
+            Trail& bestTrail, AlignMatrix& v )
+{
+  const int huBookSize = w.size();
+  const int enBookSize = w.otherSize();
+  const int thickness  = w.thickness();
+
+  massert(w.size()+1 == v.size());
+  massert(w.otherSize()+1 == v.otherSize());
+
+  TrelliMatrix trellis( huBookSize+1,enBookSize+1,thickness, Dead );
+
+  buildDynProgMatrix( w, huLength, enLength, v, trellis );
+
+//  std::cerr << "Matrix built." << std::endl;
+
+  trelliToLadder( trellis, bestTrail );
+
+//  std::cerr << "Trail found." << std::endl;
+}
+
+
+bool oneToOne( const Trail& bestTrail, int pos )
+{
+  return (
+      ( bestTrail[pos+1].first -bestTrail[pos].first  == 1 )
+        &&
+      ( bestTrail[pos+1].second-bestTrail[pos].second == 1 )
+     );
+}
+
+
+int countIntersectionOfTrails( const Trail& sx, const Trail& sy )
+{
+  int inter(0);
+
+  Trail::const_iterator sxt = sx.begin();
+  Trail::const_iterator syt = sy.begin();
+  Trail::const_iterator sxe = sx.end();
+  Trail::const_iterator sye = sy.end();
+  for ( ; sxt!=sxe && syt!=sye ; )
+  {
+    if ( *sxt < *syt )
+      ++sxt;
+    else if ( *sxt > *syt )
+      ++syt;
+    else
+    {
+      ++inter;
+      ++sxt;
+      ++syt;
+    }
+  }
+  return inter;
+}
+
+
+// A bit of an abuse of the fact that Trail and BisentenceList are typedef'd to the same structure.
+double scoreTrailOrBisentenceList( const Trail& trailAuto, const Trail& trailHand )
+{
+  int score = countIntersectionOfTrails( trailAuto, trailHand );
+
+  std::cerr << trailAuto.size()-score << " misaligned out of " << trailHand.size() << " correct items, "
+    << trailAuto.size() << " bets." << std::endl;
+
+  std::cerr << "Precision: " << 1.0*score/trailAuto.size() 
+    << ", Recall: " << 1.0*score/trailHand.size() << std::endl;
+
+  double ratio = 1.0*(trailAuto.size()-score)/trailAuto.size();
+  return ratio;
+}
+
+
+void trailToBisentenceList( const Trail& bestTrail,
+                            BisentenceList& bisentenceList )
+{
+  bisentenceList.clear();
+
+  int trailSize = bestTrail.size();
+
+  for ( int pos=0; pos<trailSize-1; ++pos )
+  {
+    if ( oneToOne(bestTrail,pos) )
+    {
+      bisentenceList.push_back(bestTrail  [pos]);
+    }
+  }
+}
+
+
+double scoreBisentenceList( const BisentenceList& bisentenceListAuto, const Trail& trailHand )
+{
+  BisentenceList bisentenceListHand;
+  trailToBisentenceList( trailHand, bisentenceListHand );
+
+  double score = scoreTrailOrBisentenceList( bisentenceListAuto, bisentenceListHand ) ;
+
+  return score;
+}
+
+double scoreTrail( const Trail& trailAuto, const Trail& trailHand )
+{
+  return ( scoreTrailOrBisentenceList( trailAuto, trailHand ) );
+}
+
+
+void setBox( AlignMatrix& m, int huPos, int enPos, int radius, int insideOfRadiusValue )
+{
+  for ( int x=huPos-radius; x<=huPos+radius; ++x )
+  {
+    for ( int y=enPos-radius; y<=enPos+radius; ++y )
+    {
+      if ( (x>=0) && (x<m.size()) && (y>=0) && (y<m.otherSize()) )
+      {
+        m.cell(x,y) = insideOfRadiusValue ; // ToDo: Should this be (y,x) instead? Function has args y,x not x,y. Fix here or function
+      }
+    }
+  }
+}
+
+// Fills the complement of the radius of the trail with minus infties.
+// The return value true means success. Failure means that during the fill,
+// we intersected the outside of the quasidiagonal area.
+// In this case, the operation is not finished.
+bool borderDetailedAlignMatrix( AlignMatrix& alignMatrix, const Trail& trail, int radius )
+{
+  int huBookSize = alignMatrix.size();
+
+  int huPos, enPos;
+  for ( huPos=0; huPos<huBookSize; ++huPos )
+  {
+    int rowStart = alignMatrix.rowStart(huPos);
+    int rowEnd   = alignMatrix.rowEnd(huPos);
+    for ( enPos=rowStart; enPos<rowEnd; ++enPos )
+    {
+      alignMatrix.cell(huPos,enPos) = outsideOfRadiusValue;
+    }
+  }
+
+  // We seriously use the fact that many-to-zero segments are subdivided into one-to-zero segments.
+  // Inside setBox, an exception is thrown if we try to write outside the quasidiagonal.
+  // If we catch such an exception, it means that the quasidiagonal is not thick enough.
+  // In this case, we abandon the whole align, just to be sure.
+  try
+  {
+    for ( size_t i=0; i<trail.size(); ++i )
+    {
+      setBox( alignMatrix, trail[i].first, trail[i].second, radius, insideOfRadiusValue );
+    }
+  }
+  catch ( const char* errorType )
+  {
+    massert( std::string(errorType) == "out of quasidiagonal" )
+    return false;
+  }
+
+  bool verify = true;
+  if (verify)
+  {
+    int numberOfEvaluatedItems(0);
+    for ( huPos=0; huPos<huBookSize; ++huPos )
+    {
+      int rowStart = alignMatrix.rowStart(huPos);
+      int rowEnd   = alignMatrix.rowEnd(huPos);
+      for ( enPos=rowStart; enPos<rowEnd; ++enPos )
+      {
+        if (alignMatrix[huPos][enPos]==insideOfRadiusValue)
+        {
+          ++numberOfEvaluatedItems;
+        }
+      }
+    }
+
+    std::cerr << numberOfEvaluatedItems << " items inside the border." << std::endl;
+  }
+
+  return true;
+}
+
+template <class T>
+void dumpAlignMatrix( const QuasiDiagonal<T>& alignMatrix )
+{
+  int huPos,enPos;
+
+  int huBookSize = alignMatrix.size();
+  int enBookSize = alignMatrix.otherSize();
+
+  for ( huPos=0; huPos<huBookSize; ++huPos )
+  {
+    for ( enPos=0; enPos<enBookSize; ++enPos )
+    {
+      int start = alignMatrix.rowStart(huPos);
+      int end   = alignMatrix.rowEnd  (huPos);
+
+      if ( (enPos<start) || (enPos>=end) )
+      {
+        std::cout << "-1\t";
+        continue;
+      }
+
+      std::cout << alignMatrix[huPos][enPos] << "\t";
+    }
+    std::cout << std::endl;
+  }
+}
+
+void dumpAlignMatrix( const QuasiDiagonal<int>& alignMatrix, bool graphical )
+{
+  int huPos,enPos;
+
+  int huBookSize = alignMatrix.size();
+  int enBookSize = alignMatrix.otherSize();
+
+  for ( huPos=0; huPos<huBookSize; ++huPos )
+  {
+    for ( enPos=0; enPos<enBookSize; ++enPos )
+    {
+      int start = alignMatrix.rowStart(huPos);
+      int end   = alignMatrix.rowEnd  (huPos);
+
+      if ( (enPos<start) || (enPos>=end) )
+      {
+        if (graphical)
+        {
+          std::cout << "   ";
+        }
+        else
+        {
+          std::cout << "-1\t";
+        }
+        continue;
+      }
+
+      if (graphical)
+      {
+        char c(' ');
+        switch (alignMatrix[huPos][enPos])
+        {
+          case 0: c=' '; break;
+          case 1: c='.'; break;
+          case 2: c=':'; break;
+          case 3: c='|'; break;
+          case 4: c='+'; break;
+          default: c='X'; break;
+        }
+        std::cout << c << " ";
+      }
+      else
+      {
+        std::cout << alignMatrix[huPos][enPos] << "\t";
+      }
+    }
+    std::cout << std::endl;
+  }
+}
+
+void dumpTrelliMatrix( const TrelliMatrix& trellis )
+{
+  std::map<int, std::string> directions;
+
+  directions[Diag] = "HuEn";
+  directions[HuSkip] = "Hu";
+  directions[EnSkip] = "En";
+  directions[HuHuEnSkip] = "HuHuEn";
+  directions[HuEnEnSkip] = "HuEnEn";
+  directions[Dead] = "Dead";
+
+  int huPos,enPos;
+
+  int huBookSize = trellis.size();
+  int enBookSize = trellis.otherSize();
+
+  for ( huPos=0; huPos<huBookSize; ++huPos )
+  {
+    for ( enPos=0; enPos<enBookSize; ++enPos )
+    {
+      int start = trellis.rowStart(huPos);
+      int end   = trellis.rowEnd  (huPos);
+
+      if ( (enPos<start) || (enPos>=end) )
+      {
+        std::cout << "-1\t";
+        continue;
+      }
+
+      std::cout << directions[trellis[huPos][enPos]] << "\t";
+    }
+    std::cout << std::endl;
+  }
+}
+
+} // namespace TMXAligner
Index: branches/apertium-tagger/apertium2/apertium/tmx_builder.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_builder.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_builder.cc	(revision 69632)
@@ -0,0 +1,1027 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/tmx_builder.h>
+#include <apertium/utf_converter.h>
+#include <apertium/string_utils.h>
+#include <apertium/tmx_aligner_tool.h>
+#include <lttoolbox/ltstr.h>
+#include <lttoolbox/compression.h>
+
+
+#include <cmath>
+#include <cstdlib>
+#include <iostream>
+#include <iomanip>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <apertium/string_utils.h>
+#include "apertium_config.h"
+#include <apertium/unlocked_cstdio.h>
+
+#ifdef _MSC_VER
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+using namespace Apertium;
+using namespace std;
+
+TMXBuilder::TMXBuilder(wstring const &l1, wstring const &l2):
+low_limit(0)
+{
+  lang1 = l1;
+  lang2 = l2;
+
+  // default values of the parameters
+    
+  max_edit = 50;
+  diagonal_width = 10;
+  window_size = 100;
+  step = 75;
+  percent=0.85;
+  edit_distance_percent=0.30;
+ 
+  freference = NULL;
+}
+
+TMXBuilder::~TMXBuilder()
+{
+}
+
+wstring 
+TMXBuilder::restOfBlank(FILE *input)
+{
+  wstring result = L"[";
+  
+  while(true)
+  {
+    wint_t val = fgetwc(input);
+    if(feof(input))
+    {
+      return L"";
+    }
+    switch(val)
+    {
+      case L'\\':
+        result += L'\\';
+        val = fgetwc(input);
+        if(feof(input))
+        {
+          return L"";
+        }
+        result += static_cast<wchar_t>(val);
+        break;
+      
+      case L']':
+        result += L']';
+        return result;
+        
+      default:
+        result += static_cast<wchar_t>(val);
+        break;
+    }
+  }
+  
+  return L"";
+}
+
+wstring 
+TMXBuilder::nextBlank(FILE *input)
+{
+  wstring result = L"";
+  
+  while(true)
+  {
+    wint_t val = fgetwc(input);
+    if(feof(input))
+    {
+      return L"";
+    }
+    switch(val)
+    {
+      case L'\\':
+        fgetwc(input);
+        break;
+      case L'[':
+        
+        result = restOfBlank(input);
+        return result;
+    }
+  }  
+}
+
+bool
+TMXBuilder::compatible(FILE *f1, FILE *f2, bool lazy)
+{
+  wstring s1 = nextBlank(f1), s2 = nextBlank(f2);
+  if(!lazy)
+  {  
+    while(!feof(f1) && !feof(f2))
+    {
+      if(s1 != s2)
+      {
+        return false;
+      }
+      s1 = nextBlank(f1);
+      s2 = nextBlank(f2);
+    }
+  }    
+  else
+  {
+    while(!feof(f1) && !feof(f2))
+    {
+      if(s1.size() < s2.size()*(1-0.05) || s1.size() > s2.size()*(1+0.05))
+      {
+        return false;
+      }
+      s1 = nextBlank(f1);
+      s2 = nextBlank(f2);
+    }    
+  }
+  return true;
+}
+
+bool
+TMXBuilder::check(string const &file1, string const &file2, bool lazy)
+{
+  FILE *f1 = fopen(file1.c_str(), "rb");
+  FILE *f2 = fopen(file2.c_str(), "rb");
+  if(!f1 && !f2)
+  {
+    wcerr << L"Error: Cannot access files '" << UtfConverter::fromUtf8(file1);
+    wcerr << L"' and '" << UtfConverter::fromUtf8(file2) << "'" << endl;
+    return false;
+  }
+  else if(!f1)
+  {
+    wcerr << L"Error: Cannot access file '";
+    wcerr << UtfConverter::fromUtf8(file2);
+    wcerr << "'" << endl;
+    fclose(f2);
+    return false;
+  }
+  else if(!f2)
+  {
+    wcerr << L"Error: Cannot access file '";
+    wcerr << UtfConverter::fromUtf8(file2);
+    wcerr << "'" << endl;
+    fclose(f1);
+    return false;
+  }
+     
+  bool retval = compatible(f1, f2, lazy);
+
+  fclose(f1);
+  fclose(f2);
+  return retval;
+}
+
+wstring
+TMXBuilder::nextTU(FILE *input)
+{
+  wstring current_tu = L"";
+  wstring tmp;
+  
+  while(true)
+  {
+    wint_t symbol = fgetwc_unlocked(input);
+    if(feof(input))
+    {
+      if(current_tu == L"")
+      {
+        return L"";
+      }
+      else
+      {
+        return current_tu;
+      }
+    }
+    switch(symbol)
+    {
+      case L'\\':
+        symbol = fgetwc_unlocked(input);
+        if(feof(input))
+        {
+          if(current_tu == L"")
+          {
+            return L"";
+          }
+          else
+          {
+            return current_tu;
+          }
+        }
+        // continued down
+      default:
+        current_tu += static_cast<wchar_t>(symbol);
+        break;
+      
+      case L'[':
+        tmp = restOfBlank(input);
+        if(tmp.substr(0,2) == L"[ ")
+        {
+          current_tu.append(L" ");
+        }  
+        current_tu.append(L"<ph/>");
+        if(tmp.substr(tmp.size()-2, 2) == L" ]")
+        {
+          current_tu.append(L" ");
+        }   
+        break;
+      
+      case L'.':
+        current_tu += L'.';
+        symbol = fgetwc_unlocked(input);
+
+        if(symbol != L'[' && !iswspace(symbol))
+        {
+          if(!feof(input))          
+          {
+            ungetwc(symbol, input);
+          }
+        }
+        else
+        {
+          if(!feof(input))
+          {
+            ungetwc(symbol, input);
+          }
+
+          return current_tu;
+/*          size_t idx = current_tu.size()-1;
+          while(current_tu[idx] == L'.')
+          {
+            idx--;
+          }
+          return current_tu.substr(0, idx+1);*/
+        }
+        break;
+      
+      case L'?':
+      case L'!':
+        current_tu += static_cast<wchar_t>(symbol);
+        return current_tu;
+    }
+  }
+  
+  return current_tu;
+}
+
+wstring
+TMXBuilder::xmlize(wstring const &str)
+{
+  wstring result = L"";
+  
+  for(size_t i = 0, limit = str.size(); i < limit; i++)
+  {
+    switch(str[i])
+    {
+      case L'<':
+        if(i + 5 <= limit && str.substr(i,5)==L"<ph/>")
+        {
+          result.append(L"<ph/>");
+          i += 4;
+          break;
+        }
+        else
+        {
+          result.append(L"&lt;");
+        }
+        break;
+        
+      case L'>':
+        result.append(L"&gt;");
+        break;
+        
+      case L'&':
+        result.append(L"&amp;");
+        break;
+      
+      default:
+        result += str[i];
+        break;
+    }
+  }
+  
+  // remove leading <ph/>'s
+  
+  bool cambio = true;
+  while(cambio == true)
+  {
+    cambio = false;
+    while(result.size() >= 5 && result.substr(0,5) == L"<ph/>")
+    {
+      result = result.substr(5);
+      cambio = true;
+    }
+    while(result.size() > 0 && !iswalnum(result[0]) && !iswpunct(result[0]))
+    {
+      result = result.substr(1);
+      cambio = true;
+    }
+  }
+  // remove trailing <ph/>'s
+  
+  cambio = true;
+  while(cambio == true)
+  {
+    cambio = false;
+    while(result.size() > 5 && result.substr(result.size()-5) == L"<ph/>")
+    {
+      result = result.substr(0, result.size()-5);
+      cambio = true;
+    }
+    while(result.size() > 0 && !iswalnum(result[result.size()-1]) && !iswpunct(result[result.size()-1]))
+    {
+      result = result.substr(0, result.size()-1);
+      cambio = true;
+    }
+  }
+  
+  // remove trailing punctuation
+
+  
+  for(unsigned int i = result.size()-1; result.size() > 0 && i > 0; i--)
+  {
+    if(!isRemovablePunct(result[i]))
+    {
+      result = result.substr(0, i+1);
+      break;
+    }
+  }
+
+  while(result.size() > 0 && isRemovablePunct(result[result.size()-1]))
+  {
+    result = result.substr(0,result.size()-1);
+  }
+
+  return result;
+} 
+
+void 
+TMXBuilder::generate(string const &file1, string const &file2, 
+                     string const &outfile)
+{
+  FILE *output = stdout;
+
+  if(outfile != "")
+  {
+    output = fopen(outfile.c_str(), "w");
+    if(!output)
+    {
+      wcerr << L"Error: file '" << UtfConverter::fromUtf8(outfile);
+      wcerr << L"' cannot be opened for writing" << endl;
+      exit(EXIT_FAILURE);
+    }
+  }
+#ifdef _MSC_VER
+  _setmode(_fileno(output), _O_U8TEXT);
+#endif
+
+  FILE *f1 = fopen(file1.c_str(), "r");
+  if(!f1)
+  {
+    wcerr << L"Error: file '" << UtfConverter::fromUtf8(file1);
+    wcerr << L"' cannot be opened for reading" << endl;
+    exit(EXIT_FAILURE);
+  }
+
+  FILE *f2 = fopen(file2.c_str(), "r");
+  if(!f2)
+  {
+    wcerr << L"Error: file '" << UtfConverter::fromUtf8(file2);
+    wcerr << L"' cannot be opened for reading" << endl;
+    exit(EXIT_FAILURE);
+  }
+
+#ifdef _MSC_VER
+  _setmode(_fileno(f1), _O_U8TEXT);
+  _setmode(_fileno(f2), _O_U8TEXT);
+#endif   
+ 
+  generateTMX(f1, f2, output);
+}
+
+vector<wstring>
+TMXBuilder::reverseList(vector<wstring> const &v)
+{
+  vector<wstring> retval(v.size());
+ 
+  for(int j = v.size() - 1, i = 0; j >=0; j--, i++)
+  {
+    retval[i] = v[j];
+  }
+  
+  return retval;
+}
+
+vector<wstring>
+TMXBuilder::sentenceList(FILE *file)
+{
+  vector<wstring> retval;
+  
+  while(true)
+  {
+    wstring f = nextTU(file);
+    if(feof(file))
+    {
+      break;
+    }   
+    retval.push_back(f);
+  }
+  
+  return retval;
+}  
+
+vector<wstring>
+TMXBuilder::extractFragment(vector<wstring> const &text, unsigned int base, unsigned int width)
+{
+  vector<wstring> result;
+  
+  for(unsigned int i = base; i < (base + width) && i < text.size(); i++)
+  {
+    result.push_back(text[i]);
+  }
+  
+  return result;
+}
+
+int
+TMXBuilder::argmin(int nw, int n, int w)
+{
+  if(nw <= n)
+  {
+    if(nw <= w)
+    {
+      return 1;
+    }
+    else
+    {
+      return 3;
+    }
+  }
+  else if(n <= w)
+  {
+    return 2;
+  }
+  else
+  {
+    return 3;
+  }
+}  
+
+void
+TMXBuilder::generateTMX(FILE *f1, FILE *f2, FILE *output)
+{
+  fprintf(output, "<?xml version=\"1.0\"?>\n");
+  fprintf(output, "<tmx version=\"1.4\">\n");
+  fprintf(output, "<header creationtool=\"Apertium TMX Builder\"\n");
+  fprintf(output, "        creationtoolversion=\"%s\"\n", PACKAGE_VERSION);
+  fprintf(output, "        segtype=\"sentence\"\n");
+  fprintf(output, "        srclang=\"%s\"\n", UtfConverter::toUtf8(lang1).c_str());
+  fprintf(output, "        adminlang=\"%s\"\n", UtfConverter::toUtf8(lang2).c_str());
+  fprintf(output, "        datatype=\"plaintext\"\n");
+  fprintf(output, "        o-tmf=\"none\">\n");
+  fprintf(output, "</header>\n");
+  fprintf(output, "<body>\n");
+  outputTU(f1, f2, output);
+  fprintf(output, "</body>\n</tmx>\n");
+
+}
+
+void
+TMXBuilder::printTable(int *table, unsigned int nrows, unsigned int ncols)
+{
+  for(unsigned int i = 0; i < nrows; i++)
+  {
+    for(unsigned int j = 0; j < ncols; j++)
+    {
+      if(j != 0)
+      {
+        wcerr << L" ";
+      }
+      wcerr << setw(10) << table[i*ncols + j];
+    }
+    wcerr << endl;
+  }
+}
+
+
+void
+TMXBuilder::printTUCond(FILE *output, wstring const &tu1, wstring const &tu2, bool secure_zone)
+{
+  if(secure_zone && similar(tu1, tu2))
+  {
+    printTU(output, tu1, tu2);
+  }  
+}
+
+void
+TMXBuilder::splitAndMove(FILE *f1, string const &filename)
+{
+  FILE *stream = fopen(filename.c_str(), "w");
+  vector<wstring> fichero_por_cadenas = sentenceList(f1);
+  for(size_t i = 0; i < fichero_por_cadenas.size(); i++)
+  {
+    fputws_unlocked(fichero_por_cadenas[i].c_str(), stream);
+    fputws_unlocked(L"\n", stream);
+  }
+  fclose(stream);
+}
+
+void
+TMXBuilder::outputTU(FILE *f1, FILE *f2, FILE *output)
+{
+  string left = tmpnam(NULL);
+  string right = tmpnam(NULL);
+  string out = tmpnam(NULL);
+
+  splitAndMove(f1, left);
+  fclose(f1);
+
+  splitAndMove(f2, right);
+  fclose(f2);
+
+  TMXAligner::DictionaryItems dict;
+  AlignParameters ap;
+  
+  ap.justSentenceIds = false;
+  ap.utfCharCountingMode = false;
+  ap.realignType=AlignParameters::NoRealign;
+
+  TMXAligner::alignerToolWithFilenames(dict, left, right, ap, out);
+
+  FILE *stream = fopen(out.c_str(), "r");
+  int conta = 0;
+  wstring partes[2];
+  while(true)
+  {
+    wchar_t val = fgetwc(stream);
+    if(feof(stream))
+    {
+      break;
+    }
+  
+    if(val == L'\t')
+    {
+      conta++;
+    }
+    else if(val == L'\n')
+    {
+      if(partes[0] != L"" && partes[1] != L"")
+      {
+        printTU(output, partes[0], partes[1]);
+      }
+      partes[0] = L"";
+      partes[1] = L"";
+      conta = 0;
+    }
+    if(conta < 2)
+    {
+      partes[conta] += val;
+    }
+  }
+  
+  unlink(left.c_str());
+  unlink(right.c_str());
+  unlink(out.c_str());
+
+  /*
+
+
+  int base_i = 0, base_j = 0;
+
+  vector<wstring> lista1 = reverseList(sentenceList(f1)),
+                  lista2 = reverseList(sentenceList(f2)), lista3;
+
+  if(freference != NULL)
+  {
+    lista3 = reverseList(sentenceList(freference));
+  } 
+
+  while(true)
+  { 
+    vector<wstring> l1 = extractFragment(lista1, base_i, window_size);
+    vector<wstring> l2 = extractFragment(lista2, base_j, window_size) , l3;
+
+    if(lista3.size() != 0)
+    {
+      l3 = extractFragment(lista3, base_j, window_size);
+    }
+
+    int *table;
+    if(lista3.size() == 0)
+    {
+      table = levenshteinTable(l1, l2, diagonal_width, max_edit);
+    }
+    else
+    {
+      table = levenshteinTable(l1, l3, diagonal_width, max_edit);
+    }
+
+    unsigned int const nrows = l1.size() + 1;
+    unsigned int const ncols = l2.size() + 1;
+    unsigned int i = nrows - 1;
+    unsigned int j = ncols - 1;
+  
+  
+    //    printTable(table, nrows, ncols);
+  
+    bool newBase = false;
+  
+
+    while(true)
+    {
+      int v = argmin(table[(i-1)*ncols + j-1], // i-1, j-1
+                     table[(i-1)*ncols + j],  // i-j, j
+                     table[i*ncols + j-1]); // i, j-1
+      switch(v)
+      {
+        case 1:
+          i--;
+          j--;
+	  
+          if(l3.size() == 0)
+	  {
+            if((newBase || l1.size() < step) && similar(l1[i], l2[j]))
+  	    {
+  	      printTU(output, l1[i], l2[j]);
+	    }
+	  }
+	  else
+	  {
+            if((newBase || l1.size() < step) && similar(l1[i], l3[j]))
+  	    {
+  	      printTU(output, l1[i], l2[j]);
+	    }
+	  }	    
+          break;
+      
+        case 2: 
+          i--;
+          if(i > 2 && argmin(table[(i-1)*ncols + j-1],
+			     table[(i-1)*ncols + j],  
+			     table[i*ncols + j-1]) == 3 && 
+	              argmin(table[(i-1)*ncols + j-2],
+			     table[(i-1)*ncols + j-1],  
+			     table[i*ncols + j-2]) != 1)
+	    {
+	      if(l3.size() == 0)
+	      {
+		if((newBase || l1.size() < step) && similar(l1[i], l2[j]))
+		  {
+		      printTU(output, l1[i], l2[j]);
+		  }
+		}
+	      else
+		{
+		  if((newBase || l1.size() < step) && similar(l1[i], l3[j]))
+		    {
+		      printTU(output, l1[i], l2[j]);
+		    }
+		}	    
+	    } 
+
+	  //          wcerr << L"[" << i << L" " << j << L"]" << endl;
+         break;
+    
+        case 3:
+          j--;
+          if(j > 2 && argmin(table[(i-1)*ncols + j-1],
+			     table[(i-1)*ncols + j],  
+			     table[i*ncols + j-1]) == 1 && 
+	              argmin(table[(i-1)*ncols + j-2],
+			     table[(i-1)*ncols + j-1],  
+			     table[i*ncols + j-2]) != 3)
+	    {
+	      if(l3.size() == 0)
+	      {
+		if((newBase || l1.size() < step) && similar(l1[i], l2[j]))
+		  {
+		      printTU(output, l1[i], l2[j]);
+		  }
+		}
+	      else
+		{
+		  if((newBase || l1.size() < step) && similar(l1[i], l3[j]))
+		    {
+		      printTU(output, l1[i], l2[j]);
+		    }
+		}	    
+	    } 
+
+
+          break;
+    
+        default:
+          // error
+          break;
+      }
+  
+      if(i == step  && !newBase)
+      {
+         base_i += i;
+         base_j += j;
+         newBase = true;
+      }
+      
+      if(i == 0 || j == 0)
+      {
+        break;
+      }
+    }
+  
+    delete[] table;  
+    
+    if(l1.size() < window_size)
+    {
+      break;
+    }
+    }*/
+}
+
+int 
+TMXBuilder::weight(wstring const &s)
+{
+  return s.size()*2;  // just the size of the string
+}
+
+int * 
+TMXBuilder::levenshteinTable(vector<wstring> &l1, vector<wstring> &l2, 
+			     unsigned int diagonal_width, unsigned int max_edit)
+{  
+  unsigned int const nrows = l1.size() + 1;
+  unsigned int const ncols = l2.size() + 1;
+  
+  int *table = new int[nrows * ncols];
+  
+  table[0] = 0;
+  
+  for(unsigned int i = 1; i < nrows; i++)
+  {
+    table[i*ncols] = table[(i-1)*ncols] + weight(l1[i-1]);
+  }
+  
+  for(unsigned int j = 1; j < ncols; j++)
+  {
+    table[j] = table[j-1] + weight(l2[j-1]);
+  }
+  
+  for(unsigned int i = 1; i < nrows; i++)
+  {
+    for(unsigned int j = 1; j < ncols; j++)
+    {
+      int ed = 0;
+      
+      if(i > (j + diagonal_width))
+      {
+        ed = table[i*ncols]+table[j];
+      }
+      else if(j > (i + diagonal_width))
+      {
+        ed = table[i*ncols]+table[j];
+      }
+      else
+      {
+        ed = editDistance(l1[i-1], l2[j-1], max_edit);
+      }
+      
+      table[i*ncols+j] = min3(table[(i-1)*ncols + j-1] + ed,
+                              table[(i-1)*ncols + j] + weight(l2[j-1]),
+                              table[i*ncols + j-1] + weight(l1[i-1]));
+    }
+  }
+  
+  return table;
+}
+
+wstring
+TMXBuilder::filter(wstring const &tu)
+{
+  bool has_text = false;  
+  unsigned int count_blank = 0;
+
+  for(unsigned int i = 0, limit = tu.size(); i != limit; i++)
+  {
+    if(iswalpha(tu[i]))
+    {
+      has_text = true;
+    }      
+    else if(has_text && iswspace(tu[i]))
+    {
+      count_blank++;
+    }
+  }  
+
+  if(!has_text || count_blank <= 2 || tu.size() == 0)
+  {
+    return L"";
+  }
+
+  return xmlize(tu);  
+}
+
+void
+TMXBuilder::printTU(FILE *output, wstring const &tu1, wstring const &tu2) const
+{
+  wstring tu1_filtered = filter(tu1);
+  wstring tu2_filtered = filter(tu2);
+
+  if(tu1_filtered != L"" && tu2_filtered != L"")
+  {
+
+    fprintf(output, "<tu>\n  <tuv xml:lang=\"%s\"><seg>%s</seg></tuv>\n", 
+                    UtfConverter::toUtf8(lang1).c_str(), 
+                    UtfConverter::toUtf8(tu1_filtered).c_str());
+                  
+    fprintf(output, "  <tuv xml:lang=\"%s\"><seg>%s</seg></tuv>\n</tu>\n",
+                    UtfConverter::toUtf8(lang2).c_str(), 
+                    UtfConverter::toUtf8(tu2_filtered).c_str());  
+  }
+} 
+
+int
+TMXBuilder::min3(int i1, int i2, int i3)
+{
+  if(i1 <= i2)
+  {
+    if(i1 <= i3)
+    {
+      return i1;
+    }
+    else
+    {
+      return i3;
+    }
+  }
+  else if(i2 <= i3)
+  {
+    return i2;
+  }
+  else
+  {
+    return i3;
+  }
+}
+
+int
+TMXBuilder::min2(int i1, int i2)
+{
+  if(i1 <= i2)
+  {
+    return i1;
+  }
+  else
+  {
+    return i2;
+  }
+}
+
+int
+TMXBuilder::editDistance(wstring const &s1, wstring const &s2, unsigned int max_edit)
+{
+  int const nrows = min2(s1.size() + 1, max_edit);
+  int const ncols = min2(s2.size() + 1, max_edit);
+  
+  int *table = new int[nrows*ncols];
+ 
+  table[0] = 0;
+  
+  for(int i = 1; i < nrows; i++)
+  {
+    table[i*ncols] = i;
+  }
+
+  for(int j = 1; j < nrows; j++)
+  {
+    table[j] = j;
+  }
+    
+  for(int i = 1; i < nrows; i++)
+  {
+    for(int j = 1; j < ncols; j++)
+    {
+      int coste = 0;
+      if(s1[i-1] != s2[j-1])
+      {
+        coste = 1;
+      }
+      
+      table[i*ncols+j] = min3(table[(i-1)*ncols+(j-1)]+coste,
+                              table[(i-1)*ncols+j] + 2,
+                              table[i*ncols+(j-1)] + 2);
+    }
+  }
+  int result = table[(nrows*ncols)-1];
+  delete[] table;
+  return result;
+}
+
+void
+TMXBuilder::setMaxEdit(int me)
+{
+  max_edit = me;
+}
+
+void
+TMXBuilder::setDiagonalWidth(int dw)
+{
+  diagonal_width = dw;
+}
+
+void
+TMXBuilder::setWindowSize(int ws)
+{
+  window_size = ws;
+}
+
+void
+TMXBuilder::setStep(int s)
+{
+  step = s;
+}
+
+void
+TMXBuilder::setPercent(double p)
+{
+  percent = p;
+}
+
+void
+TMXBuilder::setLowLimit(int l)
+{
+  low_limit = l;
+}
+
+void
+TMXBuilder::setEditDistancePercent(double e)
+{
+  edit_distance_percent = e;
+}
+
+bool
+TMXBuilder::isRemovablePunct(wchar_t const &c)
+{
+  return c == L'.';
+}
+
+bool
+TMXBuilder::similar(wstring const &s1, wstring const &s2)
+{
+  unsigned int l1 = s1.size();
+  unsigned int l2 = s2.size(); 
+
+  if((l1 <= low_limit) && (l2 <= low_limit))
+  {
+    return true;
+  }
+  else
+  {
+    int maxlength = max(l1, l2);
+    int minlength = min(l1, l2);
+    int ed = editDistance(s1, s2, maxlength);
+
+    if(double(ed) < edit_distance_percent*double(maxlength))
+    { 
+      return double(minlength)/double(maxlength) > percent;
+    }
+    else
+    {
+      return false;
+    }
+  }
+}
+
+void
+TMXBuilder::setTranslation(string const &filename)
+{
+  freference = fopen(filename.c_str(), "r");
+  if(!freference)
+  {
+    wcerr << L"Error: file '" << UtfConverter::fromUtf8(filename);
+    wcerr << L"' cannot be opened for reading" << endl;
+    freference = NULL;
+  }
+
+#ifdef _MSC_VER
+  if(freference != NULL)
+  {
+    _setmode(_fileno(freference), _O_U8TEXT);
+  }
+#endif     
+}
Index: branches/apertium-tagger/apertium2/apertium/tmx_translate.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_translate.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_translate.cc	(revision 69632)
@@ -0,0 +1,362 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#include <apertium/tmx_translate.h>
+
+#include <apertium/tmx_words.h>
+#include <apertium/tmx_dictionary.h>
+#include <apertium/tmx_dic_tree.h>
+
+#include <algorithm>
+#include <fstream>
+
+namespace TMXAligner
+{
+
+void buildDumbDictionary( const DictionaryItems& dictionary, DumbDictionary& dumbDictionary )
+{
+  dumbDictionary.clear();
+
+  for (size_t i=0; i<dictionary.size(); ++i )
+  {
+    const Phrase& en = dictionary[i].first;
+    const Phrase& hu = dictionary[i].second;
+
+    if (hu.size()==1)
+    {
+      dumbDictionary[ hu[0] ] = en ;
+      // std::cerr << hu[0] << "\t" << en << std::endl;
+    }
+  }
+}
+
+void buildDumbDictionaryUsingFrequencies( 
+       const DictionaryItems& dictionary, 
+       FrequencyMap& enFreq, 
+       DumbDictionary& dumbDictionary )
+{
+  dumbDictionary.clear();
+
+
+  for (size_t i=0; i<dictionary.size(); ++i )
+  {
+    const Phrase& en = dictionary[i].first;
+    const Phrase& hu = dictionary[i].second;
+
+    if (hu.size()==1)
+    {
+      Word originalWord = hu[0];
+      DumbDictionary::const_iterator ft = dumbDictionary.find(originalWord);
+      bool overWrite = false;
+      if (ft!=dumbDictionary.end())
+      {
+        // Phrases with both length of k>1 are incomparable.
+
+        const Phrase& oldTrans = ft->second;
+
+        // Shorter phrases are better than longer ones.
+        if (oldTrans.size()>en.size())
+        {
+          overWrite = true;
+        }
+
+        // More frequent words are better than less frequent ones.
+        if ( (oldTrans.size()==1) && (en.size()==1) )
+        {
+          if ( enFreq[oldTrans[0]] < enFreq[en[0]] )
+          {
+            overWrite = true;
+          }
+        }
+      }
+      else
+      {
+        overWrite = true;
+      }
+
+      if (overWrite)
+        dumbDictionary[originalWord] = en ;
+    }
+  }
+}
+
+void buildDumbDictionary( TMXAligner::DumbDictionary& dumbDictionary,
+                          const std::string& dictionaryFilename,
+                          const TMXAligner::SentenceList& enSentenceList
+                        )
+{
+  TMXAligner::DictionaryItems dictionary;
+  {
+    std::ifstream is( dictionaryFilename.c_str() );
+    dictionary.read( is );
+    std::cerr << dictionary.size() << " dictionary items read." << std::endl;
+  }
+
+  if (!enSentenceList.empty())
+  {
+    TMXAligner::FrequencyMap enFreq;
+    enFreq.build(enSentenceList);
+    TMXAligner::buildDumbDictionaryUsingFrequencies( dictionary, enFreq, dumbDictionary );
+  }
+  else
+  {
+    TMXAligner::buildDumbDictionary( dictionary, dumbDictionary );
+  }
+}
+
+void trivialTranslateWord(
+                     const DumbDictionary& dumbDictionary,
+                     const Word& originalWord,
+                     Phrase& words
+                     )
+{
+  words.clear();
+
+  DumbDictionary::const_iterator ft = dumbDictionary.find(originalWord);
+  if (ft!=dumbDictionary.end())
+  {
+    words = ft->second;
+  }
+  else
+  {
+    bool leaveAsItis(false);
+
+    // This worsens the score for the 1984 corpus, most possibly because of the false cognates a(a), is(is), van(van).
+    bool alwaysLeaveAsItis = true;
+    if (alwaysLeaveAsItis)
+    {
+      leaveAsItis = true;
+    }
+
+    if ( !leaveAsItis && (originalWord[0]>='A') && (originalWord[0]<='Z') )
+    {
+      leaveAsItis = true;
+    }
+
+    if (!leaveAsItis)
+    {
+      bool isNumber(true);
+      for ( size_t k=0; k<originalWord.size(); ++k )
+      {
+        char c = originalWord[k];
+        if ( (c!='.') && ( (c<'0') || (c>'9') ) )
+        {
+          isNumber = false;
+          break;
+        }
+      }
+
+      if (isNumber)
+      {
+        leaveAsItis = true;
+      }
+    }
+
+    if (leaveAsItis)
+    {
+      words.push_back(originalWord);
+    }
+  }
+}
+
+void trivialTranslate(
+                     const DumbDictionary& dumbDictionary,
+                     const Sentence& sentence,
+                           Sentence& translatedSentence
+                     )
+{
+  bool logging = false;
+
+  std::ofstream* translateLogsPtr = 0;
+  if (logging)
+  {
+    translateLogsPtr = new std::ofstream( "/dev/null", std::ios::app );
+  }
+  std::ostream& logs = translateLogsPtr ? *translateLogsPtr : std::cout ;
+
+  translatedSentence.id = sentence.id;
+  Phrase& words = translatedSentence.words;
+
+  if (logging && !translatedSentence.id.empty())
+    logs << translatedSentence.id << "\t";
+
+  const Phrase& originalWords = sentence.words;
+
+  for ( size_t j=0; j<originalWords.size(); ++j )
+  {
+    Word originalWord = originalWords[j];
+
+    Phrase phrase;
+    trivialTranslateWord( dumbDictionary, originalWord, phrase );
+    
+    for (size_t k=0; k<phrase.size(); ++k )
+    {
+      words.push_back(phrase[k]);
+    }
+
+    if (logging)
+      logs << originalWord << "(";
+    for (size_t k=0; k<phrase.size(); ++k )
+    {
+      if (logging)
+      {
+        logs << phrase[k];
+        if (k<phrase.size()-1)
+          logs << " ";
+      }
+    }
+    if (logging)
+      logs << ") ";
+  }
+
+  if (logging)
+    logs << "\n";
+
+  if (logging)
+  {
+    delete translateLogsPtr;
+  }
+}
+
+void trivialTranslateSentenceList(
+                     const DumbDictionary& dumbDictionary,
+                     const SentenceList& sentenceList,
+                           SentenceList& translatedSentenceList
+                     )
+{
+  {
+    std::ofstream translateLogs( "/dev/null" );
+  }
+  
+  translatedSentenceList.clear();
+
+  for ( size_t i=0; i<sentenceList.size(); ++i )
+  {
+    Sentence translatedSentence;
+
+    trivialTranslate( dumbDictionary, 
+                      sentenceList[i],
+                      translatedSentence
+                     );
+
+    translatedSentenceList.push_back(translatedSentence);
+  }
+}
+
+
+void naiveTranslate(
+                     const DictionaryItems& dictionary,
+                     const SentenceList& sentenceList,
+                           SentenceList& translatedSentenceList
+                     )
+{
+  translatedSentenceList.clear();
+
+  SubsetLookup<Word,int> subsetLookup;
+  {
+    for ( size_t i=0; i<dictionary.size(); ++i )
+    {
+      subsetLookup.add( dictionary[i].second, i+1 ); // !!! i+1
+    }
+    std::cerr << "Index tree built." << std::endl;
+  }
+
+  for ( size_t i=0; i<sentenceList.size(); ++i )
+  {
+    Sentence sentence;
+    sentence.id = sentenceList[i].id;
+    Phrase& words = sentence.words;
+
+    std::set<int> results;
+    subsetLookup.lookup( sentenceList[i].words, results );
+
+    for ( std::set<int>::const_iterator it=results.begin(); it!=results.end(); ++it )
+    {
+      const Phrase& phrase = dictionary[*it-1].first; // !!! i-1
+
+      for ( size_t i=0; i<phrase.size(); ++i )
+      {
+        words.push_back(phrase[i]);
+      }
+    }
+
+    translatedSentenceList.push_back(sentence);
+  }
+
+  std::cerr << "Analysis ready." << std::endl;
+}
+
+
+void buildDumbMultiDictionary( const DictionaryItems& dictionary, DumbMultiDictionary& dumbMultiDictionary, bool reverse )
+{
+  dumbMultiDictionary.clear();
+
+  for (size_t i=0; i<dictionary.size(); ++i )
+  {
+    const Phrase& en = dictionary[i].first;
+    const Phrase& hu = dictionary[i].second;
+
+    if (!reverse)
+    {
+      if (hu.size()==1)
+      {
+        dumbMultiDictionary.insert( DumbMultiDictionary::value_type( hu[0], en ) );
+      }
+    }
+    else
+    {
+      if (en.size()==1)
+      {
+        dumbMultiDictionary.insert( DumbMultiDictionary::value_type( en[0], hu ) );
+      }
+    }
+  }
+}
+
+
+void sortNormalizeSentences( TMXAligner::SentenceList& sentenceList )
+{
+  {
+    for ( size_t pos=0; pos<sentenceList.size(); ++pos )
+    {
+      TMXAligner::Phrase& sentence = sentenceList[pos].words;
+      std::sort(sentence.begin(),sentence.end());
+    }
+  }
+}
+
+
+void normalizeTextsForIdentity( const DictionaryItems& dictionary,
+                                const SentenceList& huSentenceListPretty,  const SentenceList& enSentenceListPretty,
+                                      SentenceList& huSentenceListGarbled,       SentenceList& enSentenceListGarbled )
+{
+  DumbDictionary dumbDictionary;
+
+  FrequencyMap enFreq;
+  enFreq.build(enSentenceListPretty);
+  buildDumbDictionaryUsingFrequencies( dictionary, enFreq, dumbDictionary );
+
+//  std::cerr << "Simplified dictionary ready." << std::endl;
+
+  SentenceList huSentenceList;
+
+  trivialTranslateSentenceList( dumbDictionary, huSentenceListPretty, huSentenceListGarbled );
+
+//  std::cerr << "Rough translation ready." << std::endl;
+
+  sortNormalizeSentences(huSentenceListGarbled);
+
+  enSentenceListGarbled = enSentenceListPretty;
+  sortNormalizeSentences(enSentenceListGarbled);
+}
+
+
+} // namespace TMXAligner
Index: branches/apertium-tagger/apertium2/apertium/transfer_instr.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer_instr.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer_instr.h	(revision 69632)
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TRANSFERINSTR_
+#define _TRANSFERINSTR_
+
+#include<string>
+
+using namespace std;
+
+enum TransferInstrType
+{
+  ti_clip_sl,
+  ti_clip_tl,
+  ti_var,
+  ti_lit_tag,
+  ti_lit,
+  ti_b,
+  ti_get_case_from,
+  ti_case_of_sl,
+  ti_case_of_tl,
+  ti_linkto_sl,
+  ti_linkto_tl,
+  ti_lu_count
+};
+
+class TransferInstr
+{
+private:
+  TransferInstrType type;
+  string content;
+  int pos;
+  void *pointer;
+  bool condition;
+  
+  void copy(TransferInstr const &o);
+  void destroy();
+public:
+  TransferInstr() :
+  type(ti_clip_sl),
+  pos(0),
+  pointer(0),
+  condition(false)
+  {}
+  TransferInstr(TransferInstrType t, string const &c, int const p, 
+                void *ptr=NULL, bool cond = true);
+  ~TransferInstr();
+  TransferInstr(TransferInstr const &o);
+  TransferInstr & operator =(TransferInstr const &o);
+  
+  
+  TransferInstrType getType();
+  string const & getContent();
+  int getPos();
+  void * getPointer();
+  bool getCondition();
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/transfer_mult.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer_mult.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer_mult.cc	(revision 69632)
@@ -0,0 +1,514 @@
+/*
+ * Copyright (C) 2005--2015 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/transfer_mult.h>
+#include <apertium/trx_reader.h>
+#include <lttoolbox/compression.h>
+#include <lttoolbox/xml_parse_util.h>
+#include <apertium/utf_converter.h>
+#include <apertium/string_utils.h>
+
+#include <cctype>
+#include <iostream>
+#include <stack>
+
+using namespace std;
+
+void
+TransferMult::destroy()
+{
+  if(me)
+  {
+    delete me;
+    me = NULL;
+  }
+}
+
+TransferMult::TransferMult() :
+word(0),
+blank(0),
+output(0),
+any_char(0),
+any_tag(0),
+nwords(0)
+{
+  me = NULL;
+  isRule = false;
+  defaultAttrs = lu;
+  numwords = 0;
+}
+
+TransferMult::~TransferMult()
+{
+  destroy();
+}
+
+string
+TransferMult::tolower(string const &str) const
+{
+  string result = str;
+  for(unsigned int i = 0, limit = str.size(); i != limit; i++)
+  {
+    result[i] = ::tolower(result[i]);
+  }
+
+  return result;
+}
+
+void 
+TransferMult::readData(FILE *in)
+{
+  alphabet.read(in);
+  any_char = alphabet(TRXReader::ANY_CHAR);
+  any_tag = alphabet(TRXReader::ANY_TAG);
+
+  Transducer t;
+  t.read(in, alphabet.size());
+  
+  map<int, int> finals;  
+  
+  // finals
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    int key = Compression::multibyte_read(in);
+    finals[key] = Compression::multibyte_read(in);
+  }  
+  
+  me = new MatchExe(t, finals);
+ 
+  // attr_items
+  bool recompile_attrs = Compression::string_read(in) != string(pcre_version());
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    attr_items[cad_k].read(in);
+    wstring fallback = Compression::wstring_read(in);
+    if(recompile_attrs) {
+      attr_items[cad_k].compile(UtfConverter::toUtf8(fallback));
+    }
+  }
+
+  // variables
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    variables[cad_k] = UtfConverter::toUtf8(Compression::wstring_read(in));
+  }
+
+  // macros
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    macros[cad_k] = Compression::multibyte_read(in);
+  }
+
+  // lists
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+
+    for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
+    {
+      wstring const cad_v = Compression::wstring_read(in);
+      lists[cad_k].insert(UtfConverter::toUtf8(cad_v));
+      listslow[cad_k].insert(UtfConverter::toUtf8(StringUtils::tolower(cad_v)));
+    }  
+  }
+}
+
+void
+TransferMult::readBil(string const &fstfile)
+{
+  FILE *in = fopen(fstfile.c_str(), "r");
+  if(!in)
+  {
+    cerr << "Error: Could not open file '" << fstfile << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  fstp.load(in);
+  fstp.initBiltrans();
+  fclose(in);
+}
+
+void
+TransferMult::read(string const &datafile, string const &fstfile)
+{
+  // datafile
+  FILE *in = fopen(datafile.c_str(), "r");
+  if(!in)
+  {
+    cerr << "Error: Could not open file '" << datafile << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  readData(in);
+  fclose(in);
+  
+  readBil(fstfile);
+}
+
+TransferToken &
+TransferMult::readToken(FILE *in)
+{
+  if(!input_buffer.isEmpty())
+  {
+    return input_buffer.next();
+  }
+
+  wstring content = L"";
+  while(true)
+  {
+    int val = fgetwc_unlocked(in);
+    if(feof(in))
+    {
+      return input_buffer.add(TransferToken(content, tt_eof));
+    }
+    if(val == L'\\')
+    {  
+      content += L'\\';
+      content += wchar_t(fgetwc_unlocked(in));
+    }
+    else if(val == L'[')
+    {
+      content += L'[';
+      while(true)
+      {
+	int val2 = fgetwc_unlocked(in);
+	if(val2 == L'\\')
+	{
+	  content += L'\\';
+	  content += wchar_t(fgetwc_unlocked(in));
+	}
+	else if(val2 == L']')
+	{
+	  content += L']';
+	  break;
+	}
+	else
+	{
+	  content += wchar_t(val2);
+	}
+      }
+    }
+    else if(val == L'$')
+    {
+      return input_buffer.add(TransferToken(content, tt_word));
+    }
+    else if(val == L'^')
+    {
+      return input_buffer.add(TransferToken(content, tt_blank));
+    }
+    else
+    {
+      content += wchar_t(val);
+    }
+  }
+}
+
+void
+TransferMult::transfer(FILE *in, FILE *out)
+{
+  int last = 0;
+
+  output = out;
+  ms.init(me->getInitial());
+  
+  while(true)
+  {
+    if(ms.size() == 0)
+    {
+      if(isRule)
+      {
+	applyRule();
+	isRule = false;
+	input_buffer.setPos(last);
+      }
+      else
+      {
+	if(tmpword.size() != 0)
+	{
+	  pair<wstring, int> tr = fstp.biltransWithQueue(*tmpword[0], false);
+	  if(tr.first.size() != 0)
+	  {
+	    vector<wstring> multiword = acceptions(tr.first);
+	    if(multiword.size() > 1)
+	    {
+	      fputws_unlocked(L"[{]", output);
+	    }	    
+	    for(unsigned int i = 0, limit = multiword.size(); i != limit; i++)
+	    {
+	      if(i > 0)
+	      {
+	        fputws_unlocked(L"[|]", output);
+	      }
+	      fputwc_unlocked(L'^', output);
+	      fputws_unlocked(multiword[i].c_str(), output);
+	      fputwc_unlocked(L'$', output);
+	    }
+	    if(multiword.size() > 1)
+	    {
+	      fputws_unlocked(L".[][}]", output);
+            }
+	  }
+	  tmpword.clear();
+	  isRule = false;
+	  input_buffer.setPos(last);
+	  input_buffer.next();       
+	  last = input_buffer.getPos();
+	  ms.init(me->getInitial());
+	}
+	else if(tmpblank.size() != 0)
+	{
+	  fputws_unlocked(tmpblank[0]->c_str(), output);
+	  tmpblank.clear();
+	  last = input_buffer.getPos();
+	  ms.init(me->getInitial());
+	}
+      }
+    }
+    int val = ms.classifyFinals(me->getFinals());
+    if(val != -1)
+    {
+      isRule = true;
+      numwords = tmpword.size();
+      last = input_buffer.getPos();
+    }
+
+    TransferToken &current = readToken(in);
+   
+    switch(current.getType())
+    {
+      case tt_word:
+	applyWord(current.getContent());
+        tmpword.push_back(&current.getContent());
+	break;
+
+      case tt_blank:
+	ms.step(L' ');
+	tmpblank.push_back(&current.getContent());
+	break;
+
+      case tt_eof:
+	if(tmpword.size() != 0)
+	{
+	  tmpblank.push_back(&current.getContent());
+	  ms.clear();
+	}
+	else
+	{
+	  fputws_unlocked(current.getContent().c_str(), output);
+	  return;
+	}
+	break;
+
+      default:
+	wcerr << L"Error: Unknown input token." << endl;
+	return;
+    }
+  }
+}
+
+bool
+TransferMult::isDefaultWord(wstring const &str)
+{
+  return str.find(L" D<");
+}
+
+vector<wstring> 
+TransferMult::acceptions(wstring str)
+{
+  vector<wstring> result;
+  int low = 0;
+
+  // removing '@'  
+  if(str[0] == L'@')
+  {
+    str = str.substr(1);
+  }
+  
+  for(unsigned int i = 0, limit = str.size(); i != limit; i++)
+  {
+     if(str[i] == L'\\')
+     {
+       i++;
+     }
+     else if(str[i] == L'/')
+     {
+       wstring new_word = str.substr(low, i-low);
+      
+       if(result.size() > 1 && isDefaultWord(new_word))
+       {
+	 result.push_back(result[0]);
+	 result[0] = new_word;
+       }
+       else
+       {
+         result.push_back(new_word);
+       }
+       low = i + 1;
+     }
+  }
+  
+  wstring otherword = str.substr(low);
+  if(result.size() > 0 && isDefaultWord(otherword))
+  {
+    result.push_back(result[0]);
+    result[0] = otherword;
+  }
+  else
+  {
+    result.push_back(otherword);
+  }
+
+  // eliminar las acepciones sin sentido marcado
+  if(result.size() >= 2)
+  {
+    vector<wstring> result2;
+    for(unsigned int i = 0, limit = result.size(); i != limit; i++)
+    {
+      if(result[i].find(L"__") != wstring::npos)
+      {
+        result2.push_back(result[i]);
+      }
+    } 
+    if(result2.size() >= 2)
+    {
+      return result2;
+    }
+  }
+
+  return result;
+}
+
+void 
+TransferMult::writeMultiple(list<vector<wstring> >::iterator itwords,
+                            list<wstring>::iterator itblanks, 
+                            list<vector<wstring> >::const_iterator limitwords, 
+                            wstring acum , bool multiple)
+{
+  if(itwords == limitwords)
+  {
+    if(multiple)
+    {
+      output_string.append(L"[|]");
+    }      
+    output_string.append(acum);
+  }
+  else
+  {
+    vector<wstring> &refword = *itwords;
+
+    itwords++;
+  
+    if(itwords == limitwords)
+    {
+      for(unsigned int i = 0, limit = refword.size(); i != limit; i++)
+      {
+        writeMultiple(itwords, itblanks, limitwords, 
+                      acum + L"^" + refword[i] + L"$", multiple || (i > 0));
+      }
+    }
+    else
+    {
+      wstring &refblank = *itblanks;
+      itblanks++;
+      
+      for(unsigned int i = 0, limit = refword.size(); i != limit; i++)
+      {
+        writeMultiple(itwords, itblanks, limitwords, 
+                      acum + L"^" + refword[i] + L"$" + refblank, 
+                      multiple || (i > 0));
+      }
+    }
+  }
+}
+
+void
+TransferMult::applyRule()
+{
+  list<wstring> blanks;
+  list<vector<wstring> > words;  
+
+  pair<wstring, int> tr = fstp.biltransWithQueue(*tmpword[0], false);
+  words.push_back(acceptions(tr.first));
+  
+  for(unsigned int i = 1; i != numwords; i++)
+  {
+    blanks.push_back(*tmpblank[i-1]);    
+    pair<wstring, int> tr = fstp.biltransWithQueue(*tmpword[i], false);
+    words.push_back(acceptions(tr.first));
+  }
+
+  output_string = L"";
+  writeMultiple(words.begin(), blanks.begin(), words.end());
+  
+  if(output_string.find(L"[|]") != wstring::npos)
+  {
+    fputws_unlocked(L"[{]", output);
+    fputws_unlocked(output_string.c_str(), output);
+    fputws_unlocked(L".[][}]", output);
+  }
+  else
+  {
+    fputws_unlocked(output_string.c_str(), output);
+  }
+  
+  ms.init(me->getInitial());
+  
+  tmpblank.clear();
+  tmpword.clear();
+  numwords = 0;
+}
+
+void
+TransferMult::applyWord(wstring const &word_str)
+{
+  ms.step(L'^');
+  for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
+  {
+    switch(word_str[i])
+    {
+      case L'\\':
+        i++;
+	ms.step(towlower(word_str[i]), any_char);
+	break;
+
+      case L'<':
+	for(unsigned int j = i+1; j != limit; j++)
+	{
+	  if(word_str[j] == L'>')
+	  {
+	    int symbol = alphabet(word_str.substr(i, j-i+1));
+	    if(symbol)
+	    {
+	      ms.step(symbol, any_tag);
+	    }
+	    else
+	    {
+	      ms.step(any_tag);
+	    }
+	    i = j;
+	    break;
+	  }
+	}
+	break;
+	
+      default:
+	ms.step(towlower(word_str[i]), any_char);
+	break;
+    }
+  }
+  ms.step(L'$');
+}
Index: branches/apertium-tagger/apertium2/apertium/transfer_mult.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer_mult.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer_mult.h	(revision 69632)
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TRANSFER_MULT_
+#define _TRANSFER_MULT_
+
+#include <apertium/transfer_instr.h>
+#include <apertium/transfer_token.h>
+#include <apertium/transfer_word.h>
+#include <lttoolbox/alphabet.h>
+#include <lttoolbox/buffer.h>
+#include <lttoolbox/fst_processor.h>
+#include <lttoolbox/match_exe.h>
+#include <lttoolbox/match_state.h>
+
+#include <cstdio>
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+class TransferMult
+{
+private:
+  
+  Alphabet alphabet;
+  MatchExe *me;
+  MatchState ms;
+  map<string, ApertiumRE, Ltstr> attr_items;
+  map<string, string, Ltstr> variables;
+  map<string, int, Ltstr> macros;
+  map<string, set<string, Ltstr>, Ltstr> lists;
+  map<string, set<string, Ltstr>, Ltstr> listslow;
+  TransferWord **word;
+  string **blank;
+  Buffer<TransferToken> input_buffer;
+  vector<wstring *> tmpword;
+  vector<wstring *> tmpblank;
+  wstring output_string;  
+
+  FSTProcessor fstp;
+  FILE *output;
+  int any_char;
+  int any_tag;
+  bool isRule;
+  unsigned int numwords;
+  
+  unsigned int nwords;
+  
+  enum OutputType{lu,chunk};
+  
+  OutputType defaultAttrs;
+  
+  void destroy();
+  void readData(FILE *input);
+  void readBil(string const &filename);
+  string caseOf(string const &str);
+  string copycase(string const &source_word, string const &target_word);
+
+  bool beginsWith(string const &str1, string const &str2) const;
+  bool endsWith(string const &str1, string const &str2) const;
+  string tolower(string const &str) const;
+  string tags(string const &str) const;
+  wstring readWord(FILE *in);
+  wstring readBlank(FILE *in);
+  wstring readUntil(FILE *in, int const symbol) const;
+  void applyWord(wstring const &word_str);
+  void applyRule();
+  TransferToken & readToken(FILE *in);
+  void writeMultiple(list<vector<wstring> >::iterator itwords,
+                     list<wstring>::iterator itblanks, 
+                     list<vector<wstring> >::const_iterator limitwords, 
+                     wstring acum = L"", bool multiple = false);
+  vector<wstring> acceptions(wstring str);
+  bool isDefaultWord(wstring const &str);
+public:
+  TransferMult();
+  ~TransferMult();
+  
+  void read(string const &datafile, string const &fstfile);
+  void transfer(FILE *in, FILE *out);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/transfer_token.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer_token.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer_token.cc	(revision 69632)
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/transfer_token.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+
+void
+TransferToken::copy(TransferToken const &o)
+{
+  type = o.type;
+  content = o.content;
+}
+
+void
+TransferToken::destroy()
+{
+}
+
+TransferToken::TransferToken() :
+type(tt_eof)
+{
+}
+
+TransferToken::TransferToken(wstring const &content,
+			     TransferTokenType type)
+{
+  this->content = content;
+  this->type = type;
+}
+
+TransferToken::~TransferToken()
+{
+  destroy();
+}
+
+TransferToken::TransferToken(TransferToken const &o)
+{
+  copy(o);
+}
+
+TransferToken &
+TransferToken::operator =(TransferToken const &o)
+{
+  if(this != &o)
+  {
+    destroy();
+    copy(o);
+  }
+  return *this;
+}
+
+TransferTokenType
+TransferToken::getType()
+{
+  return type;
+}
+
+wstring & 
+TransferToken::getContent()
+{
+  return content;
+}
+
+void 
+TransferToken::setType(TransferTokenType type)
+{
+  this->type = type;
+}
+
+void 
+TransferToken::setContent(wstring const &content)
+{
+  this->content = content;
+}
+
Index: branches/apertium-tagger/apertium2/apertium/transfer_word.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer_word.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer_word.cc	(revision 69632)
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <apertium/transfer_word.h>
+#include <iostream>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+void
+TransferWord::copy(TransferWord const &o)
+{
+  s_str = o.s_str;
+  t_str = o.t_str;
+  queue_length = o.queue_length;
+}
+
+void
+TransferWord::destroy()
+{
+}
+
+TransferWord::TransferWord() :
+queue_length(0)
+{
+}
+
+TransferWord::TransferWord(string const &src, string const &tgt, int queue)
+{
+  init(src, tgt);
+  queue_length = queue;
+}
+
+TransferWord::~TransferWord()
+{
+  destroy();
+}
+
+TransferWord::TransferWord(TransferWord const &o)
+{
+  copy(o);
+}
+
+TransferWord &
+TransferWord::operator =(TransferWord const &o)
+{
+  if(this != &o)
+  {
+    destroy();
+    copy(o);
+  }
+  return *this;
+}
+
+void
+TransferWord::init(string const &src, string const &tgt)
+{
+  s_str = src;
+  t_str = tgt;
+}
+
+string
+TransferWord::source(ApertiumRE const &part, bool with_queue)
+{
+  if(with_queue)
+  {
+    return part.match(s_str);
+  }
+  else
+  {
+    return part.match(s_str.substr(0, s_str.size() - queue_length));
+  }
+}
+
+string
+TransferWord::target(ApertiumRE const &part, bool with_queue)
+{
+  if(with_queue)
+  {
+    return part.match(t_str);
+  }
+  else
+  {
+    return part.match(t_str.substr(0, t_str.size() - queue_length));
+  }
+}
+
+void
+TransferWord::setSource(ApertiumRE const &part, string const &value, 
+			bool with_queue)
+{
+  if(with_queue)
+  {
+    part.replace(s_str, value);
+  }
+  else
+  {
+    string mystring = s_str.substr(0, s_str.size() - queue_length);
+    part.replace(mystring, value);
+    s_str = mystring + s_str.substr(s_str.size() - queue_length);
+  }
+}
+
+void
+TransferWord::setTarget(ApertiumRE const &part, string const &value, 
+			bool with_queue)
+{
+  if(with_queue)
+  {
+    part.replace(t_str, value);
+  }
+  else
+  {
+    string mystring = t_str.substr(0, t_str.size() - queue_length);
+    part.replace(mystring, value);
+    t_str = mystring + t_str.substr(t_str.size() - queue_length);
+  }
+}
Index: branches/apertium-tagger/apertium2/apertium/trx_reader.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/trx_reader.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/trx_reader.cc	(revision 69632)
@@ -0,0 +1,631 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/trx_reader.h>
+#include <lttoolbox/xml_parse_util.h>
+#include <lttoolbox/compression.h>
+
+#include <cstdlib>
+#include <iostream>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+wstring const
+TRXReader::ANY_TAG = L"<ANY_TAG>";
+
+wstring const
+TRXReader::ANY_CHAR = L"<ANY_CHAR>";
+
+void
+TRXReader::destroy()
+{
+  xmlFreeTextReader(reader);
+}
+
+TRXReader::TRXReader() :
+reader(0),
+type(0)
+{
+  td.getAlphabet().includeSymbol(ANY_TAG);
+  td.getAlphabet().includeSymbol(ANY_CHAR);
+}
+
+TRXReader::~TRXReader()
+{
+  destroy();
+}
+
+void
+TRXReader::step()
+{
+  int retval = xmlTextReaderRead(reader);
+  if(retval != 1)
+  {
+    parseError(L"unexpected EOF");
+  }
+  name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+  type = xmlTextReaderNodeType(reader);
+}
+
+wstring
+TRXReader::attrib(wstring const &name)
+{
+  return XMLParseUtil::attrib(reader, name);
+} 
+
+void
+TRXReader::parseError(wstring const &message)
+{
+  wcerr << L"Error: (" << xmlTextReaderGetParserLineNumber(reader);
+  wcerr << L"): " << message << L"." << endl;
+  exit(EXIT_FAILURE);
+}
+
+int
+TRXReader::insertLemma(int const base, wstring const &lemma)
+{
+  int retval = base;
+  static int const any_char = td.getAlphabet()(ANY_CHAR);
+  if(lemma == L"")
+  {
+    retval = td.getTransducer().insertSingleTransduction(any_char, retval);
+    td.getTransducer().linkStates(retval, retval, any_char);
+    int another = td.getTransducer().insertSingleTransduction(L'\\', retval);
+    td.getTransducer().linkStates(another, retval, any_char);
+  }
+  else
+  {
+    for(unsigned int i = 0, limit = lemma.size();  i != limit; i++)
+    {
+      if(lemma[i] == L'\\')
+      {
+        retval = td.getTransducer().insertSingleTransduction(L'\\', retval);
+	i++;
+        retval = td.getTransducer().insertSingleTransduction(int(lemma[i]), 
+							     retval);
+      }
+      else if(lemma[i] == L'*')
+      {
+	retval = td.getTransducer().insertSingleTransduction(any_char, retval);
+	td.getTransducer().linkStates(retval, retval, any_char);
+      }
+      else
+      {
+	retval = td.getTransducer().insertSingleTransduction(int(lemma[i]), 
+							     retval);
+      }
+    }
+  }
+  
+  return retval;
+}
+
+int
+TRXReader::insertTags(int const base, wstring const &tags)
+{
+  int retval = base;
+  static int const any_tag = td.getAlphabet()(ANY_TAG);
+  if(tags.size() != 0)
+  {
+    for(unsigned int i = 0, limit = tags.size(); i < limit; i++)
+    {
+      if(tags[i] == L'*')
+      {
+        retval = td.getTransducer().insertSingleTransduction(any_tag, retval);
+        td.getTransducer().linkStates(retval, retval, any_tag);
+        i++;
+      }  
+      else
+      {
+        wstring symbol = L"<";
+        for(unsigned int j = i; j != limit; j++)
+        {
+          if(tags[j] == L'.')
+          {  
+            symbol.append(tags.substr(i, j-i));
+            i = j;
+            break;
+          }
+        }
+        
+        if(symbol == L"<")
+        {
+          symbol.append(tags.substr(i));
+          i = limit;
+        }
+        symbol += L'>';
+        td.getAlphabet().includeSymbol(symbol);
+        retval = td.getTransducer().insertSingleTransduction(td.getAlphabet()(symbol), retval);
+      }
+    }
+  }
+  else
+  {
+    return base; // new line
+  }
+  
+  return retval;
+}
+
+void
+TRXReader::read(string const &filename)
+{
+  reader = xmlReaderForFile(filename.c_str(), NULL, 0);
+  if(reader == NULL)
+  {
+    cerr << "Error: Cannot open '" << filename << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+
+  procDefCats();
+  step();
+  while(name == L"#text" || name == L"#comment")
+  {
+    step();
+  }
+ 
+  if(name == L"section-def-attrs")
+  {
+    procDefAttrs();
+    step();
+    while(name == L"#text" || name == L"#comment")
+    {
+      step();
+    }
+  }
+  
+  if(name == L"section-def-vars")
+  {
+    procDefVars();
+    step();
+    while(name == L"#text" || name == L"#comment")
+    {
+      step();
+    }
+  }
+
+  if(name == L"section-def-lists")
+  {
+    procDefLists();
+    step();
+    while(name == L"#text" || name == L"#comment")
+    {
+      step();
+    }
+  }
+
+  if(name == L"section-def-macros")
+  {
+    procDefMacros();
+    step();
+    while(name == L"#text" || name == L"#comment")
+    {
+      step();
+    }
+  }
+
+  if(name == L"section-rules")
+  {
+    procRules();
+    step();
+    while(name == L"#text" || name == L"#comment")
+    {
+      step();
+    }
+  }  
+}
+
+void
+TRXReader::procRules()
+{
+  int count = 0;
+  set<int> alive_states;
+  
+  while(type != XML_READER_TYPE_END_ELEMENT || 
+	name != L"section-rules")
+  {
+    step();
+    if(name == L"rule")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+	count++;
+      }
+    }
+    else if(name == L"pattern")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        alive_states.clear();
+        alive_states.insert(td.getTransducer().getInitial());
+      }
+      else
+      {
+        for(set<int>::iterator it = alive_states.begin(), limit = alive_states.end();
+            it != limit; it++)
+        {
+          td.getTransducer().setFinal(*it);
+          if(td.getFinals().find(*it) == td.getFinals().end())
+          {
+            td.getFinals()[*it] = count;
+          }       
+          else
+          {
+            wcerr << L"Warning (" << xmlTextReaderGetParserLineNumber(reader);
+            wcerr << L"): "
+              << L"Paths to rule " << count << " blocked by rule " << td.getFinals()[*it]
+              << L"." << endl;
+
+          }
+        }
+      }
+    }
+    else if(name == L"pattern-item")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        pair<multimap<wstring, LemmaTags, Ltstr>::iterator,
+             multimap<wstring, LemmaTags, Ltstr>::iterator> range;
+
+        range = cat_items.equal_range(attrib(L"n"));
+      
+        if(range.first == range.second)
+        {
+          parseError(L"Undefined cat-item '" + attrib(L"n"));
+        }
+
+// new code
+      
+        set<int> alive_states_new;
+        
+        for(; range.first != range.second; range.first++)
+        {
+          for(set<int>::iterator it = alive_states.begin(), limit = alive_states.end();
+              it != limit; it++)
+          {
+            // mark of begin of word
+            int tmp = td.getTransducer().insertSingleTransduction(L'^', *it);
+            if(*it != td.getTransducer().getInitial())
+            {
+              // insert optional blank between two words
+              int alt = td.getTransducer().insertSingleTransduction(L' ', *it);
+              td.getTransducer().linkStates(alt, tmp, L'^');
+            }
+            
+            // insert word
+            tmp = insertLemma(tmp, range.first->second.lemma);
+            tmp = insertTags(tmp, range.first->second.tags);
+            
+            // insert mark of end of word
+            tmp = td.getTransducer().insertSingleTransduction(L'$', tmp);
+            
+            // set as alive_state
+            alive_states_new.insert(tmp);
+          }
+        } 
+        
+        // copy new alive states on alive_states set
+        alive_states = alive_states_new;      
+      }
+    }
+    else if(name == L"let")
+    {
+      int count = 0;
+      int lineno = xmlTextReaderGetParserLineNumber(reader); 
+      while(name != L"let" || type != XML_READER_TYPE_END_ELEMENT)
+      {
+        step();
+        if(type == XML_ELEMENT_NODE)
+        {
+          count++;
+          
+          if(name == L"clip" && attrib(L"side") == L"sl")
+          {
+            wcerr << L"Warning (" << lineno;
+            wcerr << L"): assignment to 'sl' side has no effect." << endl;
+          }    
+        }
+        
+        if(count != 0)
+        {
+          break;
+        }
+      }
+      
+    }
+  }
+}
+
+void
+TRXReader::write(string const &filename)
+{
+  FILE *out = fopen(filename.c_str(), "wb");
+  if(!out)
+  {
+    cerr << "Error: cannot open '" << filename;
+    cerr << "' for writing" << endl;
+    exit(EXIT_FAILURE);
+  }
+  
+  td.write(out);
+
+  fclose(out);
+}
+
+void
+TRXReader::procDefAttrs()
+{
+  wstring attrname;
+
+  while(type != XML_READER_TYPE_END_ELEMENT || 
+	name != L"section-def-attrs")
+  {
+    step();
+    if(name == L"attr-item")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        insertAttrItem(attrname, attrib(L"tags"));
+      }
+    }
+    else if(name == L"def-attr")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        attrname = attrib(L"n");
+      }
+      else
+      {
+        wstring all = td.getAttrItems()[attrname];
+        td.getAttrItems()[attrname] = L"(" + all + L")";
+        attrname = L"";
+      }
+    }
+    else if(name == L"#text")
+    {
+      // do nothing
+    }
+    else if(name == L"#comment")
+    {
+      // do nothing
+    }
+    else if(name == L"section-def-attrs")
+    {
+      // do nothing
+    }
+    else
+    {
+      parseError(L"Unexpected '<" + name + L">' tag");
+    }
+  }
+}
+
+void 
+TRXReader::procDefCats()
+{
+  while(type == XML_READER_TYPE_END_ELEMENT || !(name == L"transfer" || name == L"interchunk" || name == L"postchunk"))
+  {
+    step();
+    if(name != L"#text" && name != L"transfer" &&  name != L"interchunk" &&
+       name != L"postchunk" && name != L"section-def-cats" && name != L"#comment")
+    {
+      parseError(L"'<" + name + L">' tag unexpected");
+    }
+  }
+  
+  wstring catname;
+
+  while(type != XML_READER_TYPE_END_ELEMENT || 
+	name != L"section-def-cats")
+  {
+    step();
+    if(name == L"cat-item")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        if(attrib(L"tags") != L"")
+        {
+          insertCatItem(catname, attrib(L"lemma"), attrib(L"tags"));
+        }
+        else
+        {
+          insertCatItem(catname, attrib(L"name"), L"");
+        }
+      }
+    }
+    else if(name == L"def-cat")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        catname = attrib(L"n");
+      }
+      else
+      {
+        catname = L"";
+      }
+    }
+    else if(name == L"#text")
+    {
+      // do nothing
+    }
+    else if(name == L"#comment")
+    {
+      // do nothing
+    }
+    else if(name == L"section-def-cats")
+    {
+      // do nothing
+    }
+    else
+    {
+      parseError(L"Unexpected '<" + name + L">' tag");
+    }
+  }
+}
+
+void
+TRXReader::procDefVars()
+{
+  while(type != XML_READER_TYPE_END_ELEMENT || 
+	name != L"section-def-vars")
+  {
+    step();
+    if(name == L"def-var")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        createVar(attrib(L"n"), attrib(L"v"));
+      }
+    }
+    else if(name == L"#text")
+    {
+      // do nothing
+    }
+    else if(name == L"#comment")
+    {
+      // do nothing
+    }
+    else if(name == L"section-def-vars")
+    {
+      // do nothing
+    }
+    else
+    {
+      parseError(L"Unexpected '<" + name + L">' tag");
+    }
+  }
+}
+
+void
+TRXReader::procDefLists()
+{
+  wstring listname;
+
+  while(type != XML_READER_TYPE_END_ELEMENT || 
+	name != L"section-def-lists")
+  {
+    step();
+    if(name == L"list-item")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        insertListItem(listname, attrib(L"v"));
+      }
+    }
+    else if(name == L"def-list")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        listname = attrib(L"n");
+      }
+      else
+      {
+        listname = L"";
+      }
+    }
+    else if(name == L"#text")
+    {
+      // do nothing
+    }
+    else if(name == L"#comment")
+    {
+      // do nothing
+    }
+    else if(name == L"section-def-lists")
+    {
+      // do nothing
+    }
+    else
+    {
+      parseError(L"Unexpected '<" + name + L">' tag");
+    }
+  }
+}
+
+void
+TRXReader::procDefMacros()
+{
+  int count = 0;
+  while(type != XML_READER_TYPE_END_ELEMENT || 
+	name != L"section-def-macros")
+  {
+    step();
+    if(name == L"def-macro")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        createMacro(attrib(L"n"), count++);
+      }
+    }
+  }
+}
+
+void
+TRXReader::createMacro(wstring const &name, int const value)
+{
+  if(td.getMacros().find(name) != td.getMacros().end())
+  {
+    parseError(L"Macro '" + name + L"' defined at least twice");    
+  }
+  td.getMacros()[name] = value;
+}
+
+void
+TRXReader::insertListItem(wstring const &name, wstring const &value)
+{
+  td.getLists()[name].insert(value);
+}
+
+void
+TRXReader::createVar(wstring const &name, wstring const &initial_value)
+{
+  td.getVariables()[name] = initial_value;
+}
+
+void
+TRXReader::insertCatItem(wstring const &name, wstring const &lemma, 
+			 wstring const &tags)
+{
+  LemmaTags lt;
+  lt.lemma = lemma;
+  lt.tags = tags;
+  cat_items.insert(pair<wstring, LemmaTags>(name, lt));
+}
+
+void 
+TRXReader::insertAttrItem(wstring const &name, wstring const &tags)
+{
+  if(td.getAttrItems()[name].size() != 0)
+  {
+    td.getAttrItems()[name] += L'|';
+  }  
+  
+  td.getAttrItems()[name] += '<';
+
+  for(unsigned int i = 0, limit = tags.size(); i != limit; i++)
+  {
+    if(tags[i] == L'.')
+    {
+      td.getAttrItems()[name].append(L"><");
+    }
+    else
+    {
+	td.getAttrItems()[name] += tags[i];
+    }
+  }
+  td.getAttrItems()[name] += L'>';
+  
+}
Index: branches/apertium-tagger/apertium2/apertium/trx_reader.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/trx_reader.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/trx_reader.h	(revision 69632)
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TRXREADER_
+#define _TRXREADER_
+
+#include <apertium/transfer_data.h>
+#include <lttoolbox/ltstr.h>
+
+#include <libxml/xmlreader.h>
+#include <map>
+#include <string>
+
+using namespace std;
+
+class TRXReader
+{
+private:
+  struct LemmaTags
+  {
+    wstring lemma;
+    wstring tags;
+  };
+
+  xmlTextReaderPtr reader;  
+
+  int type;
+  wstring name;
+
+  multimap<wstring, LemmaTags, Ltstr> cat_items;
+  TransferData td;
+
+  wstring attrib(wstring const &name);
+
+  void parseError(wstring const &message);
+  void destroy();
+  void clearTagIndex();
+  
+  void step();
+  void procTransfer();
+  void procDefCats();
+  void procDefAttrs();
+  void procDefVars();
+  void procDefLists();
+  void procDefMacros();
+  void procRules();
+
+  void insertCatItem(wstring const &name, wstring const &lemma, 
+		     wstring const &tags);
+  void insertAttrItem(wstring const &name, wstring const &tags);
+  void createVar(wstring const &name, wstring const &initial_value);
+  void insertListItem(wstring const &name, wstring const &value);
+  void createMacro(wstring const &name, int const val);
+
+  int insertLemma(int const base, wstring const &lemma);
+  int insertTags(int const base, wstring const &tags);
+  
+public:
+  static wstring const ANY_TAG;
+  static wstring const ANY_CHAR;
+
+
+  TRXReader();
+  ~TRXReader();
+
+  void read(string const &filename);
+  void write(string const &filename);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/tsx_reader.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tsx_reader.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tsx_reader.cc	(revision 69632)
@@ -0,0 +1,596 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/tsx_reader.h>
+#include <lttoolbox/xml_parse_util.h>
+#include <lttoolbox/compression.h>
+#include <apertium/string_utils.h>
+
+#include <cstdlib>
+#include <iostream>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+void
+TSXReader::copy(TSXReader const &o)
+{
+}
+
+void
+TSXReader::destroy()
+{
+}
+
+TSXReader::TSXReader() :
+reader(0),
+type(0)
+{
+  open_class = &(tdata.getOpenClass());
+  forbid_rules = &(tdata.getForbidRules());
+  tag_index = &(tdata.getTagIndex());
+  array_tags = &(tdata.getArrayTags());
+  enforce_rules = &(tdata.getEnforceRules());
+  prefer_rules = &(tdata.getPreferRules());
+  plist = &(tdata.getPatternList());
+  constants = &(tdata.getConstants());
+}
+
+TSXReader::~TSXReader()
+{
+  destroy();
+}
+
+TSXReader::TSXReader(TSXReader const &o)
+{
+  copy(o);
+}
+
+
+void
+TSXReader::clearTagIndex()
+{
+  tag_index->clear();
+  array_tags->clear();
+  newTagIndex(L"LPAR");
+  newTagIndex(L"RPAR");
+  newTagIndex(L"LQUEST");
+  newTagIndex(L"CM");
+  newTagIndex(L"SENT");
+  newTagIndex(L"kEOF");
+  newTagIndex(L"kUNDEF");
+}
+
+void
+TSXReader::step()
+{
+  int retval = xmlTextReaderRead(reader);
+  if(retval != 1)
+  {
+    parseError(L"unexpected EOF");
+  }
+  name = XMLParseUtil::towstring(xmlTextReaderConstName(reader));
+  type = xmlTextReaderNodeType(reader);
+}
+
+TSXReader &
+TSXReader::operator =(TSXReader const &o)
+{
+  if(this != &o)
+  {
+    destroy();
+    copy(o);
+  }
+  return *this;
+}
+
+wstring
+TSXReader::attrib(wstring const &name)
+{
+  return XMLParseUtil::attrib(reader, name);
+} 
+
+void
+TSXReader::parseError(wstring const &message)
+{
+  wcerr << L"Error: (" << xmlTextReaderGetParserLineNumber(reader);
+  wcerr << L"): " << message << L"." << endl;
+  exit(EXIT_FAILURE);
+}
+
+void
+TSXReader::newTagIndex(wstring const &tag)
+{
+  if(tag_index->find(L"TAG_" + tag) != tag_index->end())
+  {
+    parseError(L"'" + tag + L"' already defined");
+  }
+
+  array_tags->push_back(L"TAG_" + tag);
+  (*tag_index)[L"TAG_" + tag] = array_tags->size() - 1;
+}
+
+void
+TSXReader::newDefTag(wstring const &tag)
+{
+  if(tag_index->find(L"TAG_" + tag) != tag_index->end())
+  {
+    parseError(L"'" + tag + L"' already defined");
+  }
+
+  array_tags->push_back(tag);
+  (*tag_index)[L"TAG_" + tag] = array_tags->size() - 1;
+}
+
+void
+TSXReader::newConstant(wstring const &constant)
+{
+  constants->setConstant(constant, array_tags->size());
+  array_tags->push_back(constant);
+}
+
+void
+TSXReader::procDiscardOnAmbiguity()
+{
+  while(type != XML_READER_TYPE_END_ELEMENT || name != L"discard-on-ambiguity")
+  {
+    step();
+
+    if(name == L"discard")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        tdata.addDiscard(L"<" + StringUtils::substitute(attrib(L"tags"), L".", L"><") + L">");
+      }
+    }
+    else if(name == L"#text")
+    {
+      // do nothing
+    }
+    else if(name == L"#comment")
+    {
+      // do nothing
+    }
+    else if(name == L"discard-on-ambiguity")
+    {
+      if(type == XML_READER_TYPE_END_ELEMENT)
+      {
+	break;
+      }
+      else
+      {
+	parseError(L"Unexpected 'discard-on-ambiguity' open tag");
+      }
+    }
+    else
+    {
+      parseError(L"unexpected '<" + name + L">' tag");
+    }
+  }
+}
+
+void
+TSXReader::procDefLabel()
+{
+  wstring name_attr = attrib(L"name");
+  wstring closed_attr = attrib(L"closed");
+  newDefTag(name_attr);
+
+  if(closed_attr != L"true")
+  {
+    open_class->insert((*tag_index)[L"TAG_"+name_attr]);
+  }
+
+  while(type != XML_READER_TYPE_END_ELEMENT || name != L"def-label")
+  {
+    step();
+
+    if(name == L"tags-item")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+	plist->insert((*tag_index)[L"TAG_"+name_attr], attrib(L"lemma"),
+		     attrib(L"tags"));
+      }
+    }
+    else if(name == L"def-label")
+    {
+      return;
+    }
+    else if(name == L"#text")
+    {
+      // do nothing
+    }
+    else if(name == L"#comment")
+    {
+      // do nothing
+    }
+    else
+    {
+      parseError(L"unexpected '<" + name + L">' tag");
+    }
+  }
+}
+
+void
+TSXReader::procDefMult()
+{
+  wstring name_attr = attrib(L"name");
+  wstring closed_attr = attrib(L"closed");
+  newDefTag(name_attr);
+  if(closed_attr != L"true")
+  {
+    open_class->insert((*tag_index)[L"TAG_"+name_attr]);
+  }
+
+  while(type != XML_READER_TYPE_END_ELEMENT || name != L"def-mult")
+  {
+    step();
+    if(name == L"sequence")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+	plist->beginSequence();
+	while(type != XML_READER_TYPE_END_ELEMENT || name != L"sequence")
+	{
+	  step();
+	  if(name == L"label-item")
+	  {
+	    if(type != XML_READER_TYPE_END_ELEMENT)
+	    {
+	      plist->insert((*tag_index)[L"TAG_"+name_attr],
+                            (*tag_index)[L"TAG_"+attrib(L"label")]);
+	    }
+	  }
+	  else if(name == L"tags-item")
+	  {
+	    if(type != XML_READER_TYPE_END_ELEMENT)
+	    {
+	      plist->insert((*tag_index)[L"TAG_"+name_attr],
+			    attrib(L"lemma"), attrib(L"tags"));
+	    }
+	  }
+	  else if(name == L"sequence")
+	  {
+	    break;
+	  }
+	  else if(name == L"#text")
+	  {
+	    // do nothing
+	  }
+	  else if(name == L"#comment")
+	  {
+	    // do nothing
+          }
+	}
+	plist->endSequence();
+      }
+    }
+    else if(name == L"#text")
+    {
+      // do nothing
+    }
+    else if(name == L"#comment")
+    {
+      // do nothing
+    }
+    else if(name == L"def-mult")
+    { 
+      // do nothing
+    }
+    else
+    {
+      parseError(L"unexpected '<" + name + L">' tag");
+    }
+  }
+}
+
+void
+TSXReader::procTagset()
+{ 
+  while(type == XML_READER_TYPE_END_ELEMENT || name != L"tagset")
+  {
+    step();
+    if(name != L"#text" && name != L"tagger" && name != L"tagset")
+    {
+      parseError(L"'<" + name + L">' tag unexpected");
+    }
+  }
+  
+  while(type != XML_READER_TYPE_END_ELEMENT || name != L"tagset")
+  {
+    step();
+    if(name == L"def-label")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+	procDefLabel();
+      }
+    }
+    else if(name == L"def-mult")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        procDefMult();
+      }
+    }
+    else if(name == L"#text")
+    {
+      // do nothing
+    }
+    else if(name == L"#comment")
+    {
+      // do nothing
+    }
+    else if(name == L"tagset")
+    {
+      // do nothing
+    }
+    else
+    {
+      parseError(L"Unexpected '<" + name + L">' tag");
+    }
+  }
+}
+
+
+void
+TSXReader::procLabelSequence()
+{
+  TForbidRule forbid_rule;
+
+  step();
+  while(name == L"#text" || name == L"#comment")
+  {
+    step();
+  }
+  if(name != L"label-item")
+  {
+    parseError(L"<label-item> tag expected");
+  }
+  
+  forbid_rule.tagi = (*tag_index)[L"TAG_" + attrib(L"label")];
+
+  step();
+  while(name == L"#text" || name == L"#comment")
+  {
+    step();
+  }
+  if(name != L"label-item")
+  {
+    parseError(L"<label-item> tag expected");
+  }
+  forbid_rule.tagj = (*tag_index)[L"TAG_" + attrib(L"label")];
+  
+  forbid_rules->push_back(forbid_rule);
+}
+
+void
+TSXReader::procForbid()
+{
+  while(type != XML_READER_TYPE_END_ELEMENT || name != L"forbid")
+  {
+    step();
+    if(name == L"label-sequence")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+	procLabelSequence();
+      }
+    }
+    else if(name == L"#text")
+    {
+      // do nothing
+    }
+    else if(name == L"#comment")
+    {
+      // do nothing
+    }
+    else if(name == L"forbid")
+    {
+      if(type == XML_READER_TYPE_END_ELEMENT)
+      {
+	break;
+      }
+      else
+      {
+	parseError(L"Unexpected '" + name + L"' open tag");
+      }
+    }
+    else
+    {
+      parseError(L"Unexpected '" + name + L"' tag");
+    }
+  }  
+}
+
+void
+TSXReader::procEnforce()
+{
+  TEnforceAfterRule aux;
+  while(type != XML_READER_TYPE_END_ELEMENT || name != L"enforce-rules")
+  {
+    step();
+    if(name == L"enforce-after")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+	aux.tagi = (*tag_index)[L"TAG_" + attrib(L"label")];
+      }
+      else
+      {
+	enforce_rules->push_back(aux);
+	aux.tagsj.clear();
+      }
+    }
+    else if(name == L"label-set")
+    {
+      // do nothing
+    }
+    else if(name == L"label-item")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+	aux.tagsj.push_back((*tag_index)[L"TAG_" + attrib(L"label")]);
+      }
+    }
+    else if(name == L"#text")
+    {
+      // do nothing
+    }
+    else if(name == L"#comment")
+    {
+      // do nothing
+    }
+    else if(name == L"enforce-rules")
+    {
+      if(type == XML_READER_TYPE_END_ELEMENT)
+      {
+	break;
+      }
+      else
+      {
+	parseError(L"Unexpected 'enforce-rules' open tag");
+      }
+    }
+    else
+    {
+      parseError(L"Unexpected '" + name + L"' tag");
+    }
+  }
+}
+
+void
+TSXReader::procPreferences()
+{
+  while(type != XML_READER_TYPE_END_ELEMENT || name != L"preferences")
+  {
+    step();
+    if(name == L"prefer")
+    {
+      if(type != XML_READER_TYPE_END_ELEMENT)
+      {
+        wstring const tags = L"<" + StringUtils::substitute(attrib(L"tags"), L".", L"><") + L">";
+	prefer_rules->push_back(tags);
+      }
+    }
+    else if(name == L"#text")
+    {
+      //do nothing
+    }
+    else if(name == L"#comment")
+    {
+      // do nothing
+    }
+    else if(name == L"preferences")
+    {
+      if(type == XML_READER_TYPE_END_ELEMENT)
+      {
+	break;
+      }
+      else
+      {
+	parseError(L"Unexpected 'preferences' open tag");
+      }
+    }
+    else
+    {
+      parseError(L"Unexpected '" + name + L"' tag");
+    }
+  }
+}
+
+void
+TSXReader::read(string const &filename)
+{
+  reader = xmlReaderForFile(filename.c_str(), NULL, 0);
+  if(reader == NULL)
+  {
+    cerr << "Error: Cannot open '" << filename << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+
+  open_class->clear();
+  forbid_rules->clear();
+  clearTagIndex();
+  enforce_rules->clear();
+
+  procTagset();
+
+  step();
+  while(name == L"#text" || name == L"#comment")
+  {
+    step();
+  }
+  if(name == L"forbid")
+  {
+    procForbid();
+    step();
+    while(name == L"#text" || name == L"#comment")
+    {
+      step();
+    }
+  }
+  if(name == L"enforce-rules")
+  {
+    procEnforce();
+    step();
+    while(name == L"#text" || name == L"#comment")
+    {
+      step();
+    }
+  }
+  if(name == L"preferences")
+  {
+    procPreferences();
+    step();
+    while(name == L"#text" || name == L"#comment")
+    {
+      step();
+    }
+  }
+  if(name == L"discard-on-ambiguity")
+  {
+    if(type != XML_READER_TYPE_END_ELEMENT)
+    {
+      procDiscardOnAmbiguity();
+    }
+  }
+
+  xmlFreeTextReader(reader);
+  xmlCleanupParser();
+
+  newConstant(L"kMOT");
+  newConstant(L"kDOLLAR");
+  newConstant(L"kBARRA");
+  newConstant(L"kMAS");
+  newConstant(L"kIGNORAR");
+  newConstant(L"kBEGIN");
+  newConstant(L"kUNKNOWN");
+  
+  plist->insert((*tag_index)[L"TAG_LPAR"], L"", L"lpar");
+  plist->insert((*tag_index)[L"TAG_RPAR"], L"", L"rpar");
+  plist->insert((*tag_index)[L"TAG_LQUEST"], L"", L"lquest");
+  plist->insert((*tag_index)[L"TAG_CM"], L"", L"cm");
+  plist->insert((*tag_index)[L"TAG_SENT"], L"", L"sent");
+//  plist->insert((*tag_index)[L"TAG_kMAS"], L"+", L"");
+  plist->buildTransducer();
+}
+
+TaggerData &
+TSXReader::getTaggerData()
+{
+  return tdata;
+}
Index: branches/apertium-tagger/apertium2/apertium/tsx_reader.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tsx_reader.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tsx_reader.h	(revision 69632)
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TSXREADER_
+#define _TSXREADER_
+
+#include <apertium/constant_manager.h>
+#include <apertium/tagger_data.h>
+#include <apertium/ttag.h>
+#include <lttoolbox/pattern_list.h>
+#include <lttoolbox/ltstr.h>
+
+#include <libxml/xmlreader.h>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+using namespace std;
+
+class TSXReader
+{
+private:
+  xmlTextReaderPtr reader;  
+  set<TTag> *open_class;
+  vector<TForbidRule> *forbid_rules;
+  map<wstring, TTag, Ltstr> *tag_index;
+  vector<wstring> *array_tags;
+  vector<TEnforceAfterRule> *enforce_rules;
+  vector<wstring> *prefer_rules;
+  PatternList *plist;
+  ConstantManager *constants;
+  TaggerData tdata;
+
+  int type;
+  wstring name;
+
+  wstring attrib(wstring const &name);
+
+  void parseError(wstring const &message);
+  void newTagIndex(wstring const &tag);
+  void newDefTag(wstring const &tag);
+  void newConstant(wstring const &constant);
+  void procDefLabel();
+  void procDefMult();
+  void procDiscardOnAmbiguity();
+  void procTagset();
+  void procForbid();
+  void procLabelSequence();
+  void procEnforce();
+  void procPreferences();
+  void destroy();
+  void clearTagIndex();
+
+  void step();
+public:
+  TSXReader();
+  ~TSXReader();
+
+  void read(string const &filename);
+  TaggerData & getTaggerData();
+
+private:
+  void copy(TSXReader const &o);
+  TSXReader(TSXReader const &o);
+  TSXReader & operator =(TSXReader const &o);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/apertium.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium.1	(revision 69632)
@@ -0,0 +1,115 @@
+.TH apertium 1 2006-03-08 "" ""
+.SH NAME
+apertium \- This application is part of (
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.sf.net\fR.
+.SH SYNOPSIS
+.B apertium
+[\-d datadir] [\-f format] [\-u] [\-a] {language-pair} [infile [outfile]]
+.SH DESCRIPTION
+.BR apertium 
+is the application that most people will be using as it simplifies the
+use of apertium/lt-toolbox tools for machine translation
+purposes. 
+.PP
+This tool tries to ease the use of \fIlt-toolbox\fR (which contains
+all the lexical processing modules and tools) and \fIapertium\fR
+(which contains the rest of the engine) by providing a unique
+front-end to the end-user.
+.PP
+The different modules behind the apertium machine translation
+architecture are in order:
+.RS
+\(bu \fIde-formatter:\fR Separates the text to be translated from the
+format information.
+.PP
+\(bu \fImorphological-analyser:\fR Tokenizes the text in surface forms.
+.PP
+\(bu \fIpart-of-speech tagger:\fR Chooses one surface forms among
+homographs.
+.PP
+\(bu \fIlexical transfer module:\fR Reads each source-language lexical
+form and delivers a corresponding target-language lexical form.
+.PP
+\(bu \fIstructural transfer module:\fR Detects fixed-length patterns
+of lexical forms (chunks or phrases) needing special processing due to
+grammatical divergences between the two languages and performs the
+corresponding transformations.
+.PP
+\(bu \fImorphological generator:\fR Delivers a target-language surface
+form for each target-language lexical form, by suitably inflecting it.
+.PP
+\(bu \fIpost-generator:\fR Performs orthographical operations such as
+contractions and apostrophations.
+.PP
+\(bu \fIre-formatter:\fR Restores the format information encapsulated
+by the de-formatter into the translated text and removes the
+encapsulation sequences used to protect certain characters in the
+source text.
+.RE
+.SH OPTIONS
+.PP
+.B -d datadir
+The directory holding the linguistic data.  By default it will use the
+expected installation path.
+.PP
+.B language-pair
+The language pair: LANG1-LANG2 (for instance \fIes-ca\fR or \fIca-es\fR).
+.PP
+.B -f format
+Specifies the format of the input and output files which can have
+these values:
+.RS
+\(bu \fItxt\fR \fB(default value)\fR Input and output files are in
+text format.
+.PP
+\(bu \fIhtml\fR Input and output files are in "html" format. This
+"html" is the one accepted by the vast majority of web browsers.
+.PP
+\(bu \fIhtml-noent\fR Input and output files are in "html" format, but
+preserving native encoding characters rather than using HTML text
+entities.
+.PP
+\(bu \fIrtf\fR Input and output files are in "rtf" format. The
+accepted "rtf" is the one generated by \fBMicrosoft WordPad (C)\fR and
+\fBMicrosoft Office (C)\fR up to and including \fBOffice-97\fR.
+.RE
+.PP
+.B -u
+Disable marking of unknown words with the '*' character.
+.PP
+.B -a
+Enable marking of disambiguated words with the '=' character.
+.RS
+.SH FILES
+These are the two files that can be used with this command:
+.PP
+.B -m memory.tmx
+use a translation memory to recycle translations
+.PP
+.B -o direction 
+translation direction using the translation memory, by default 'direction' is used instead
+.PP
+.B -l
+lists the available translation directions and exits direction        
+typically, LANG1-LANG2, but see modes.xml in language data
+.PP
+.B infile
+Input file (stdin by default).
+.PP
+.B outfile
+Output file (stdout by default).
+.PP
+.SH SEE ALSO
+.I lt-proc\fR(1),
+.I lt-comp\fR(1),
+.I lt-expand\fR(1),
+.I apertium-tagger\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+reserved.
Index: branches/apertium-tagger/apertium2/apertium/apertium-multiple-translations.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-multiple-translations.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-multiple-translations.cc	(revision 69632)
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/transfer_mult.h>
+#include <lttoolbox/lt_locale.h>
+#include <apertium/apertium_config.h>
+
+#include <cstdlib>
+#include <iostream>
+#include <libgen.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#ifdef WIN32
+#if defined(__MINGW32__)
+#define __MSVCRT_VERSION__  0x0800
+#endif
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+using namespace std;
+
+void message(char *progname)
+{
+  cerr << "USAGE: " << basename(progname) << " preproc biltrans [input [output]]" << endl;
+  cerr << "  preproc    result of preprocess trules file" << endl;
+  cerr << "  biltrans   bilingual letter transducer file" << endl;
+  cerr << "  input      input file, standard input by default" << endl;
+  cerr << "  output     output file, standard output by default" << endl;
+  exit(EXIT_FAILURE);
+}
+
+int main(int argc, char *argv[])
+{
+  LtLocale::tryToSetLocale();
+  
+  if(argc > 5 || argc <3)
+  {
+    message(argv[0]);
+  }
+
+  for(unsigned int i = 1; i < 3; i++)
+  {
+    struct stat mybuf;
+    if(stat(argv[i], &mybuf) == -1)
+    {
+      cerr << "Error: can't stat file '";
+      cerr << argv[i] << "'." << endl;
+      exit(EXIT_FAILURE);
+    }
+  }
+  
+  FILE *input = stdin, *output = stdout;
+  if(argc >= 4)
+  {
+    input = fopen(argv[3], "r");
+    if(!input)
+    {
+      cerr << "Error: can't open input file '" << argv[3] << "'." << endl;
+      exit(EXIT_FAILURE);
+    }
+    if(argc == 5)
+    {
+      output = fopen(argv[4], "w");
+      if(!output)
+      {
+	cerr << "Error: can't open output file '";
+	cerr << argv[4] << "'." << endl;
+	exit(EXIT_FAILURE);
+      }
+    }
+  }
+#ifdef WIN32
+  _setmode(_fileno(input), _O_U8TEXT);
+  _setmode(_fileno(output), _O_U8TEXT);
+#endif 
+
+  TransferMult t;
+  t.read(argv[1], argv[2]);
+
+  t.transfer(input, output);
+  return EXIT_SUCCESS;
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium_filter_ambiguity.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_filter_ambiguity.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_filter_ambiguity.cc	(revision 69632)
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/tsx_reader.h>
+#include <lttoolbox/compression.h>
+#include <lttoolbox/lt_locale.h>
+#include <apertium/hmm.h>
+#include <apertium/tagger_data_hmm.h>
+#include <apertium/tagger_word.h>
+#include <apertium/string_utils.h>
+
+#include <cstdlib>
+#include <iostream>
+#include <cstdio>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <libgen.h>
+#ifdef _MSC_VER
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+using namespace Apertium;
+using namespace std;
+
+FILE * open_file(char const *filename, char const *mode)
+{
+  FILE *retval;
+
+  struct stat var;  
+  if(stat(filename, &var))
+  {
+    cerr << "Can't stat '" << filename << "'" << endl;
+    exit(EXIT_FAILURE);
+  }
+ 
+  retval = fopen(filename, mode);
+  
+  if(!retval)
+  {
+    cerr << "Can't open '" << filename << "'" << endl;
+    exit(EXIT_FAILURE);
+  }
+#ifdef _MSC_VER
+  _setmode(_fileno(retval), _O_U8TEXT);
+#endif   
+
+  return retval;
+}
+
+int main(int argc, char *argv[])
+{
+  LtLocale::tryToSetLocale();
+  
+  if(argc < 2 || argc > 4)
+  {
+    cerr << "USAGE: " << basename(argv[0]) << " tsx_file [input [output]" << endl; 
+    exit(EXIT_FAILURE);
+  }
+
+  FILE *input = stdin, *output = stdout;  
+  switch(argc)
+  {
+    case 4:
+      output = open_file(argv[3], "w");
+      // no break
+    case 3:      
+      input = open_file(argv[2], "r");
+      // no break
+    case 2:
+    default:
+      break;
+  }   
+  
+  TSXReader reader;
+  reader.read(argv[1]);
+
+  TaggerWord::setArrayTags(reader.getTaggerData().getArrayTags());
+
+  TaggerDataHMM tdhmm(reader.getTaggerData());  
+  HMM hmm(&tdhmm);
+  hmm.filter_ambiguity_classes(input, output);
+  
+  return EXIT_SUCCESS;  
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium_re.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_re.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_re.h	(revision 69632)
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _APERTIUM_RE_
+#define _APERTIUM_RE_
+
+#include <pcre.h>
+#include <cstdio>
+#include <string>
+
+using namespace std;
+
+class ApertiumRE
+{
+private:
+  bool empty;
+  pcre *re;
+public:
+  ApertiumRE();
+  ~ApertiumRE();
+  void read(FILE *);
+  void write(FILE *) const;
+  string match(string const &str) const;
+  void replace(string &str, string const &value) const;
+  void compile(string const &str);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/collection.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/collection.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/collection.cc	(revision 69632)
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <lttoolbox/compression.h>
+#include <apertium/collection.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+
+int
+Collection::size()
+{
+  return element.size();
+}
+
+bool 
+Collection::has_not(const set<int> &t)
+{
+  return index.find(t) == index.end();
+}
+
+const set<int> &
+Collection::operator[](int n)
+{
+  return *element[n];
+}
+
+int &
+Collection::operator[](const set<int> &t)
+{
+  if(has_not(t))
+  {
+    index[t] = index.size()-1;
+    element.push_back(&(index.find(t)->first));
+  }
+  return index[t];
+}
+
+int &
+Collection::add(const set<int> &t)
+{
+  index[t] = index.size()-1;
+  element.push_back(&(index.find(t)->first));
+  return index[t];
+}
+
+void
+Collection::write(FILE *output)
+{
+  Compression::multibyte_write(element.size(), output);
+
+  for(int i = 0, limit = element.size(); i != limit; i++)
+  {
+    Compression::multibyte_write(element[i]->size(), output);
+    for(set<int>::const_iterator it = element[i]->begin(), 
+	  limit2 = element[i]->end(); it != limit2; it++)
+    {
+      Compression::multibyte_write(*it, output);
+    }
+  }
+}
+
+void
+Collection::read(FILE *input)
+{
+  int size = Compression::multibyte_read(input);
+
+  for(; size != 0; size--)
+  {
+    set<int> myset;
+    int set_size = Compression::multibyte_read(input);
+    for(; set_size != 0; set_size--)
+    {
+      myset.insert(Compression::multibyte_read(input));
+    }
+    add(myset);
+  }
+}
Index: branches/apertium-tagger/apertium2/apertium/collection.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/collection.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/collection.h	(revision 69632)
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __COLLECTION_H
+#define __COLLECTION_H
+
+#include <cstdio>
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace std;
+
+/** Collection
+ *  Is an indexed set.
+ */ 
+class Collection {
+  map <set<int>, int> index;
+  vector <const set<int> *> element;
+public:
+  /** Returns the collection's size. 
+   */
+  int size (void);
+
+  /** Checks whether or not the collection has the element received as
+   *  a parameter.  
+   *  @param t element @return true if t is not in the
+   *  collection
+   */
+  bool has_not (const set<int>& t);
+
+  /** @param n position in the collection
+   *  @return the element at the n-th position
+   */
+  const set<int>& operator[] (int n);
+
+  /** If the element received as a parameter does not appear in the
+   *  collection, it is added at the end.  
+   *  @param t an element @return
+   *  the position in which t appears in the collection.
+   */
+  int& operator[] (const set<int>& t);
+
+  /** Adds an element to the collection
+   *  @param t the element to be added
+   */  
+  int& add(const set<int>& t);
+
+  /** 
+   *  Write the collection contents to an output stream
+   *  @param output the output stream
+   */
+  void write(FILE *output);
+
+  /**
+   *  Reads the collection contents from an input stream
+   *  @param input the input stream
+   */
+  void read(FILE *input);
+};
+
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/constant_manager.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/constant_manager.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/constant_manager.cc	(revision 69632)
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/constant_manager.h>
+#include <lttoolbox/compression.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+void
+ConstantManager::copy(ConstantManager const &o)
+{
+  constants = o.constants;
+}
+
+void
+ConstantManager::destroy()
+{
+}
+
+ConstantManager::ConstantManager()
+{
+}
+
+ConstantManager::~ConstantManager()
+{
+  destroy();
+}
+
+ConstantManager::ConstantManager(ConstantManager const &o)
+{
+  copy(o);
+}
+
+ConstantManager &
+ConstantManager::operator =(ConstantManager const &o)
+{
+  if(this != &o)
+  {
+    destroy();
+    copy(o);
+  }
+  return *this;
+}
+void 
+ConstantManager::setConstant(wstring const &constant, int const value)
+{
+  constants[constant] = value;
+}
+
+int 
+ConstantManager::getConstant(wstring const &constant)
+{
+  return constants[constant];
+}  
+
+void
+ConstantManager::write(FILE *output)
+{
+  Compression::multibyte_write(constants.size(), output);
+
+  for(map<wstring, int>::const_iterator it = constants.begin(), limit = constants.end();
+      it != limit; it++)
+  {
+    Compression::wstring_write(it->first, output);
+    Compression::multibyte_write(it->second, output);
+  }
+}
+
+void
+ConstantManager::read(FILE *input)
+{
+  constants.clear();
+  int size = Compression::multibyte_read(input);
+  for(int i = 0; i != size; i++)
+  {
+    wstring mystr = Compression::wstring_read(input);
+    constants[mystr] = Compression::multibyte_read(input);
+  }
+}
Index: branches/apertium-tagger/apertium2/apertium/constant_manager.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/constant_manager.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/constant_manager.h	(revision 69632)
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _CONSTANTMANAGER_
+#define _CONSTANTMANAGER_
+
+#include <cstdio>
+#include <map>
+#include <string>
+
+using namespace std;
+
+class ConstantManager
+{
+private:
+  map<wstring, int> constants; 
+
+  void copy(ConstantManager const &o);
+  void destroy();
+public:
+  ConstantManager();
+  ~ConstantManager();
+  ConstantManager(ConstantManager const &o);
+  ConstantManager & operator =(ConstantManager const &o);
+  
+  void setConstant(wstring const &constant, int const value);
+  int getConstant(wstring const &constant);
+  void write(FILE *output);
+  void read(FILE *input);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/deformat.xsl
===================================================================
--- branches/apertium-tagger/apertium2/apertium/deformat.xsl	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/deformat.xsl	(revision 69632)
@@ -0,0 +1,870 @@
+<?xml version="1.0" encoding="ISO-8859-1"?> <!-- -*- nxml -*- -->
+<!--
+ Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+-->
+<xsl:stylesheet version="1.0"
+                xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:output method="text" encoding="UTF-8"/>
+
+<xsl:param name="mode"/>
+
+<xsl:template name="replaceString">
+  <xsl:param name="haystack"/>
+  <xsl:param name="needle"/>
+  <xsl:param name="replacement"/>
+  <xsl:choose>
+    <xsl:when test="contains($haystack, $needle)">
+      <xsl:value-of select="substring-before($haystack, $needle)"/>
+      <xsl:value-of select="$replacement"/>
+      <xsl:call-template name="replaceString">
+	<xsl:with-param name="haystack"
+			select="substring-after($haystack, $needle)"/>
+	<xsl:with-param name="needle" select="$needle"/>
+	<xsl:with-param name="replacement" select="$replacement"/>
+      </xsl:call-template>
+    </xsl:when>
+    <xsl:otherwise>
+      <xsl:value-of select="$haystack"/>
+    </xsl:otherwise>
+  </xsl:choose>
+</xsl:template>
+
+
+<xsl:template name="format-rule-apertium">
+  <xsl:choose>
+    <xsl:when test="count(./begin) = 0">
+      <xsl:value-of select="./tag/@regexp"/>
+      <xsl:value-of select="string('&#x9;{&#xA;')"/>
+      <xsl:if test="./@eos = string('yes')">
+	<xsl:value-of select="string('  isDot = true;&#xA;')"/>
+      </xsl:if>
+      <xsl:value-of select="string('  bufferAppend(buffer, yytext);&#xA;}&#xA;')"/>
+    </xsl:when>
+    <xsl:otherwise>
+      <xsl:variable name="thisnode" select="."/>
+      <xsl:value-of select="./begin/@regexp"/>
+      <xsl:value-of select="string('&#x9;{&#xA;')"/>
+      <xsl:if test="./@eos = string('yes')">
+	<xsl:value-of select="string('  isDot = true;&#xA;')"/>
+      </xsl:if>
+      <xsl:value-of select="string('  bufferAppend(buffer, yytext);&#xA;  yy_push_state(C')"/>
+      <xsl:for-each select="/format/rules/format-rule/begin ">
+	<xsl:if test="./@regexp = $thisnode/begin/@regexp">
+          <xsl:value-of select="position()"/>
+	</xsl:if>
+      </xsl:for-each>
+      <xsl:value-of select="string(');&#xA;}&#xA;')"/>
+    </xsl:otherwise>
+  </xsl:choose>
+</xsl:template>
+
+<xsl:template name="format-rule-matxin">
+  <xsl:choose>
+    <xsl:when test="./@type = 'open'">
+      <xsl:value-of select="./tag/@regexp"/>
+      <xsl:value-of select="string('&#x9;{&#xA;')"/>
+      <xsl:if test="./@eos = string('yes')">
+        <xsl:value-of select="string('  isDot = true;&#xA;')"/>
+      </xsl:if>
+      <xsl:value-of select="string('  printBuffer();&#xA;')"/>
+
+      <xsl:value-of select="string('  if (hasWrite_white) {&#xA;    fputws(L&quot; &quot;, yyout);&#xA;')"/>
+      <xsl:value-of select="string('    offset++;&#xA;    hasWrite_white = false;&#xA;  }&#xA;')"/>
+
+      <xsl:value-of select="string('  current++;&#xA;  orders.push_back(current);&#xA;')"/>
+      <xsl:value-of select="string('  last=&quot;open_tag&quot;;&#xA;  offsets.push_back(offset);&#xA;')"/>
+      <xsl:value-of select="string('  wchar_t* symbol = new wchar_t[strlen(yytext) + 1];&#xA;')"/>
+      <xsl:value-of select="string('  mbstowcs(symbol, yytext, strlen(yytext));&#xA;')"/>
+      <xsl:value-of select="string('  symbol[strlen(yytext)] = (char) 0;&#xA;')"/>
+      <xsl:value-of select="string('  tags.push_back(symbol);&#xA;  delete[] symbol;&#xA;}&#xA;')"/>
+    </xsl:when>
+    <xsl:when test="./@type = 'close'">
+      <xsl:value-of select="./tag/@regexp"/>
+      <xsl:value-of select="string('&#x9;{&#xA;')"/>
+      <xsl:if test="./@eos = string('yes')">
+        <xsl:value-of select="string('  isDot = true;&#xA;')"/>
+      </xsl:if>
+      <xsl:value-of select="string('  int ind=get_index(yytext);&#xA;')"/>
+      <xsl:value-of select="string('  printBuffer(ind, yytext);&#xA;}&#xA;')"/>
+    </xsl:when>
+    <xsl:when test="./@type = 'comment'">
+      <xsl:variable name="thisnode" select="."/>
+      <xsl:value-of select="./begin/@regexp"/>
+      <xsl:value-of select="string('&#x9;{&#xA;')"/>
+      <xsl:if test="./@eos = string('yes')">
+        <xsl:value-of select="string('  isDot = true;&#xA;')"/>
+      </xsl:if>
+      <xsl:value-of select="string('  last = &quot;buffer&quot;;&#xA;')"/>
+      <xsl:value-of select="string('  bufferAppend(buffer, yytext);&#xA;  yy_push_state(C')"/>
+      <xsl:for-each select="/format/rules/format-rule[@type='comment']">
+        <xsl:if test="./begin/@regexp = $thisnode/begin/@regexp">
+          <xsl:value-of select="position()"/>
+        </xsl:if>
+      </xsl:for-each>
+      <xsl:value-of select="string(');&#xA;}&#xA;')"/>
+    </xsl:when>
+    <xsl:otherwise>
+      <xsl:value-of select="./tag/@regexp"/>
+      <xsl:value-of select="string('&#x9;{&#xA;')"/>
+      <xsl:if test="./@eos = string('yes')">
+        <xsl:value-of select="string('  isDot = true;&#xA;')"/>
+      </xsl:if>
+      <xsl:value-of select="string('  last = &quot;buffer&quot;;&#xA;')"/>
+      <xsl:value-of select="string('  bufferAppend(buffer, yytext);&#xA;}&#xA;')"/>
+    </xsl:otherwise>
+  </xsl:choose>
+</xsl:template>
+
+<xsl:template match="format">
+
+%{
+
+#include &lt;cstdlib&gt;
+#include &lt;iostream&gt;
+#include &lt;map&gt;
+#include &lt;string&gt;
+#include &lt;vector&gt;
+
+extern "C" {
+#if !defined(__STDC__)
+# define __STDC__ 1
+#endif
+#include &lt;regex.h&gt;
+}
+
+#include &lt;string&gt;
+#include &lt;lttoolbox/lt_locale.h&gt;
+#include &lt;lttoolbox/ltstr.h&gt;
+#ifndef GENFORMAT
+#include "apertium_config.h"
+#endif
+#include &lt;apertium/unlocked_cstdio.h&gt;
+#ifdef _WIN32
+#include &lt;io.h&gt;
+#include &lt;fcntl.h&gt;
+#endif
+
+using namespace std;
+
+wstring buffer;
+string symbuf;
+bool isDot, hasWrite_dot, hasWrite_white;
+bool eosIncond;
+bool noDot;
+FILE *formatfile;
+string last;
+int current;
+long int offset;
+
+
+vector&lt;long int&gt; offsets;
+vector&lt;wstring&gt; tags;
+vector&lt;int&gt; orders;
+
+regex_t escape_chars;
+regex_t names_regexp;
+
+void bufferAppend(wstring &amp;buf, string const &amp;str)
+{
+  symbuf.append(str);
+
+  for(size_t i = 0, limit = symbuf.size(); i &lt; limit;)
+  {
+    wchar_t symbol;
+    int gap = mbtowc(&amp;symbol, symbuf.c_str() + i, MB_CUR_MAX);
+    if(gap == -1)
+    {
+      if(i + MB_CUR_MAX &lt; limit)
+      {
+        buf += L'?';
+        gap = 1;
+      }
+      else
+      {
+        symbuf = symbuf.substr(i);
+        return;
+      }
+    }
+    else
+    {
+      buf += symbol;
+    }
+
+    i += gap;
+  }
+
+  symbuf = "";
+  return;
+}
+
+
+void init_escape()
+{
+  if(regcomp(&amp;escape_chars, "<xsl:call-template name="replaceString">
+      <xsl:with-param name="haystack"
+		      select="/format/options/escape-chars/@regexp"/>
+      <xsl:with-param name="needle" select="string('\')"/>
+      <xsl:with-param name="replacement" select="string('\\')"/>
+    </xsl:call-template>", REG_EXTENDED))
+  {
+    cerr &lt;&lt; "ERROR: Illegal regular expression for escape characters" &lt;&lt; endl;
+    exit(EXIT_FAILURE);
+  }
+}
+
+void init_tagNames()
+{
+  if(regcomp(&amp;names_regexp, "<xsl:call-template name="replaceString">
+      <xsl:with-param name="haystack"
+		      select="/format/options/tag-name/@regexp"/>
+      <xsl:with-param name="needle" select="string('\')"/>
+      <xsl:with-param name="replacement" select="string('\\')"/>
+    </xsl:call-template>", REG_EXTENDED))
+  {
+    cerr &lt;&lt; "ERROR: Illegal regular expression for tag-names" &lt;&lt; endl;
+    exit(EXIT_FAILURE);
+  }
+}
+
+string backslash(string const &amp;str)
+{
+  string new_str;
+
+  for(unsigned int i = 0; i &lt; str.size(); i++)
+  {
+    if(str[i] == '\\')
+    {
+      new_str += str[i];
+    }
+    new_str += str[i];
+  }
+
+  return new_str;
+}
+
+
+wstring escape(string const &amp;str)
+{
+  regmatch_t pmatch;
+
+  char const *mystring = str.c_str();
+  int base = 0;
+  wstring result;
+
+  while(!regexec(&amp;escape_chars, mystring + base, 1, &amp;pmatch, 0))
+  {
+    bufferAppend(result, str.substr(base, pmatch.rm_so));
+    result += L'\\';
+    wchar_t micaracter;
+    int pos = mbtowc(&amp;micaracter, str.c_str() + base + pmatch.rm_so, MB_CUR_MAX);
+    if(pos == -1)
+    {
+      wcerr &lt;&lt; L"Uno" &lt;&lt; endl;
+      wcerr &lt;&lt; L"Encoding error." &lt;&lt; endl;
+      exit(EXIT_FAILURE);
+    }
+
+    result += micaracter;
+    base += pmatch.rm_eo;
+  }
+
+  bufferAppend(result, str.substr(base));
+  return result;
+}
+
+wstring escape(wstring const &amp;str)
+{
+  string dest;
+
+  for(size_t i = 0, limit = str.size(); i &lt; limit; i++)
+  {
+#ifdef __GNUC__
+    char symbol[MB_CUR_MAX+1];
+#else
+    std::string _symbol(MB_CUR_MAX+1, 0);
+    char *symbol = &amp;_symbol[0];
+#endif
+    int pos = wctomb(symbol, str[i]);
+    if(pos == -1)
+    {
+      symbol[0]='?';
+      pos = 1;
+    }
+    symbol[pos] = 0;
+    dest.append(symbol);
+  }
+  return escape(dest);
+}
+
+string get_tagName(string tag){
+  regmatch_t pmatch;
+
+  char const *mystring = tag.c_str();
+  string result;
+  if(!regexec(&amp;names_regexp, mystring, 1, &amp;pmatch, 0))
+  {
+    result=tag.substr(pmatch.rm_so, pmatch.rm_eo - pmatch.rm_so);
+    return result;
+  }
+
+  return "";
+}
+
+
+<xsl:for-each select="./rules/replacement-rule">
+  <xsl:variable name="varname"
+		select="concat(concat(string('S'),position()),string('_substitution'))"/>
+  <xsl:value-of select="string('map&lt;string, wstring, Ltstr&gt; S')"/>
+  <xsl:value-of select="position()"/>
+  <xsl:value-of select="string('_substitution;&#xA;&#xA;void S')"/>
+  <xsl:value-of select="position()"/>
+  <xsl:value-of select="string('_init()&#xA;{')"/>
+
+  <xsl:for-each select="./replace">
+    <xsl:value-of select="string('&#xA;  ')"/>
+    <xsl:value-of select="$varname"/>
+    <xsl:value-of select="string('[&quot;')"/>
+    <xsl:value-of select="./@source"/>
+    <xsl:value-of select="string('&quot;] = L&quot;')"/>
+    <xsl:value-of select="./@target"/>
+    <xsl:value-of select="string('&quot;;')"/>
+  </xsl:for-each>
+
+  <xsl:value-of select="string('&#xA;}&#xA;')"/>
+</xsl:for-each>
+
+<xsl:if test="$mode=string('matxin')">
+int get_index(string end_tag){
+  string new_end_tag;
+  size_t pos;
+
+  for (int i=tags.size()-1; i >= 0; i--) {
+    // a wchar to char conversion can be up to 4 times larger
+    char *tmp = new char (sizeof(char)*((tags[i].size()+1) * 4));
+    // Keep the existing memset. Better safe than sorry.
+    memset(tmp, '\0', tags[i].size() + 1);
+
+    pos = wcstombs(tmp, tags[i].c_str(), tags[i].size());
+    if (pos == (size_t)-1)
+    {
+      wcerr &lt;&lt; L"Encoding error." &lt;&lt; endl;
+      exit(EXIT_FAILURE);
+    }
+    new_end_tag = tmp;
+    delete[] tmp;
+
+    if (get_tagName(end_tag) == get_tagName(new_end_tag))
+      return i;
+  }
+
+  return -1;
+}
+
+void print_emptyTags() {
+  wchar_t tag[250];
+
+  for (size_t i=0; i &lt; tags.size(); i++) {
+    swprintf(tag, 250, L"&lt;format-tag offset=\"%d\" order= \"%d\"&gt;&lt;![CDATA[", offsets[i], orders[i]);
+    fputws(tag, formatfile);
+    fputws(tags[i].c_str(), formatfile);
+    fputwc(L']', formatfile);
+    swprintf(tag, 250, L"]&gt;&lt;/format-tag&gt;\n");
+    fputws(tag, formatfile);
+  }
+}
+</xsl:if>
+
+<xsl:choose>
+  <xsl:when test="$mode=string('matxin')">
+void printBuffer(int ind=-1, string end_tag="")
+{
+  wchar_t tag[250];
+  wstring etiketa;
+  wstring wend_tag;
+  size_t pos;
+  int num;
+  wchar_t result[end_tag.size() + 1];
+
+  // Convert end_tag to wstring
+  pos = mbstowcs(result, end_tag.c_str(), end_tag.size());
+  if (pos == (size_t) -1)
+  {
+    wcerr &lt;&lt; L"Encoding error." &lt;&lt; endl;
+    exit(EXIT_FAILURE);
+  }
+  result[pos] = L'\0';
+  wend_tag = result;
+
+  if (ind != -1 &amp;&amp; ind == tags.size()-1 &amp;&amp;
+      offsets[ind] == offset &amp;&amp; orders[ind] == current)
+  {
+    last = "buffer";
+    buffer = tags.back() + buffer + wend_tag;
+    tags.pop_back();
+    offsets.pop_back();
+    orders.pop_back();
+  }
+  else if (ind == -1 &amp;&amp; wend_tag != L"")
+  {
+    last = "buffer";
+    buffer = buffer + wend_tag;
+  }
+  else
+  {
+    if (hasWrite_dot &amp;&amp; isDot)
+    {
+      swprintf(tag, 250, L"&lt;empty-tag offset=\"%d\"/&gt;\n", offset+1);
+      fputws(tag, formatfile);
+
+      fputws(L" .\n", yyout);
+      offset += 2;
+      hasWrite_dot = false;
+    }
+
+    isDot = false;
+
+    if ((buffer.size() == 1 &amp;&amp; buffer[0] != ' ') || buffer.size() &gt; 1)
+    {
+      if (hasWrite_white)
+      {
+        fputws(L" ", yyout);
+        offset++;
+        hasWrite_white = false;
+      }
+
+      current++;
+
+      swprintf(tag, 250, L"&lt;format-tag offset=\"%d\" order=\"%d\"&gt;&lt;![CDATA[", offset, current);
+      fputws(tag, formatfile);
+      while ((pos = buffer.find(L"]]&gt;")) != wstring::npos)
+        buffer.replace(pos, 3, L"\\]\\]\\&gt;");
+      fputws(buffer.c_str(), formatfile);
+      swprintf(tag, 250, L"]]&gt;&lt;/format-tag&gt;\n");
+      fputws(tag, formatfile);
+    }
+    else
+    {
+      fputws(buffer.c_str(), yyout);
+      offset += buffer.size();
+    }
+
+
+    if (ind != -1)
+    {
+      if (hasWrite_white)
+      {
+        fputws(L" ", yyout);
+        offset++;
+        hasWrite_white = false;
+      }
+
+      num = swprintf(tag, 250, L"&lt;open-close-tag&gt;\n");
+      swprintf(tag + num, 250 - num, L"&lt;open-tag offset=\"%d\" order=\"%d\"&gt;&lt;![CDATA[", offsets[ind], orders[ind]);
+      fputws(tag, formatfile);
+      etiketa = tags[ind];
+      while ((pos = etiketa.find(L"]]&gt;")) != wstring::npos)
+        etiketa.replace(pos, 3, L"\\]\\]\\&gt;");
+      fputws(etiketa.c_str(), formatfile);
+
+      current++;
+
+      num = swprintf(tag, 250, L"]]&gt;&lt;/open-tag&gt;\n");
+      swprintf(tag + num, 250 - num, L"&lt;close-tag offset=\"%d\" order=\"%d\"&gt;&lt;![CDATA[", offset, current);
+      fputws(tag, formatfile);
+      while ((pos = wend_tag.find(L"]]&gt;")) != wstring::npos)
+        wend_tag.replace(pos, 3, L"\\]\\]\\&gt;");
+      fputws(wend_tag.c_str(), formatfile);
+      num = swprintf(tag, 250, L"]]&gt;&lt;/close-tag&gt;\n");
+      swprintf(tag + num, 250 - num, L"&lt;/open-close-tag&gt;\n");
+      fputws(tag, formatfile);
+
+      tags.erase(tags.begin() + ind);
+      offsets.erase(offsets.begin() + ind);
+      orders.erase(orders.begin() + ind);
+    }
+
+
+    last = "buffer";
+    buffer = L"";
+  }
+
+}
+  </xsl:when>
+  <xsl:otherwise>
+
+void preDot()
+{
+  if(eosIncond)
+  {
+    if(noDot)
+    {
+      fputws_unlocked(L"[]", yyout);
+    }
+    else
+    {
+      fputws_unlocked(L".[]", yyout);
+    }
+  }
+}
+
+void printBuffer()
+{
+  if(isDot &amp;&amp; !eosIncond)
+  {
+    if(noDot)
+    {
+      fputws_unlocked(L"[]", yyout);
+    }
+    else
+    {
+      fputws_unlocked(L".[]", yyout);
+    }
+    isDot = false;
+  }
+  if(buffer.size() &gt; <xsl:value-of select="/format/options/largeblocks/@size"/>)
+  {
+    string filename = tmpnam(NULL);
+    FILE *largeblock = fopen(filename.c_str(), "w");
+    fputws_unlocked(buffer.c_str(), largeblock);
+    fclose(largeblock);
+    preDot();
+    fputwc_unlocked(L'[', yyout);
+    fputwc_unlocked(L'@', yyout);
+    wchar_t cad[filename.size()];
+    size_t pos = mbstowcs(cad, filename.c_str(), filename.size());
+    if(pos == (size_t) -1)
+    {
+      wcerr &lt;&lt; L"Tres" &lt;&lt; endl;
+
+      wcerr &lt;&lt; L"Encoding error." &lt;&lt; endl;
+      exit(EXIT_FAILURE);
+    }
+    cad[pos] = 0;
+    fputws_unlocked(cad, yyout);
+    fputwc_unlocked(L']', yyout);
+  }
+  else if(buffer.size() &gt; 1)
+  {
+    preDot();
+    fputwc_unlocked(L'[', yyout);
+    wstring const tmp = escape(buffer);
+    if(tmp[0] == L'@')
+    {
+      fputwc_unlocked(L'\\', yyout);
+    }
+    fputws_unlocked(tmp.c_str(), yyout);
+    fputwc_unlocked(L']', yyout);
+  }
+  else if(buffer.size() == 1 &amp;&amp; buffer[0] != L' ')
+  {
+    preDot();
+    fputwc_unlocked(L'[', yyout);
+    wstring const tmp = escape(buffer);
+    if(tmp[0] == L'@')
+    {
+      fputwc_unlocked(L'\\', yyout);
+    }
+    fputws_unlocked(tmp.c_str(), yyout);
+
+    fputwc_unlocked(L']', yyout);
+  }
+  else
+  {
+    fputws_unlocked(buffer.c_str(), yyout);
+  }
+
+  buffer = L"";
+}
+  </xsl:otherwise>
+</xsl:choose>
+%}
+
+<xsl:if test="count(./rules/format-rule[@type='comment']) &gt; 1">
+<xsl:value-of select="string('%x')"/>
+<xsl:for-each select="./rules/format-rule[@type='comment']">
+  <xsl:value-of select="string(' C')"/>
+  <xsl:value-of select="position()"/>
+</xsl:for-each>
+</xsl:if>
+%option nounput
+%option noyywrap<xsl:if test="./options/case-sensitive/@value=string('no')">
+%option caseless</xsl:if>
+%option stack
+
+%%
+
+<xsl:for-each select="./rules/format-rule[@type='comment']">
+  <xsl:variable name="sc"
+                select="concat(string('C'), position())"/>
+  <xsl:variable name="thisnode" select="."/>
+&lt;<xsl:value-of select="$sc"/>&gt;{
+
+<xsl:for-each select="/format/rules/format-rule">
+  <xsl:sort select="./@priority" data-type="number" order="ascending"/>
+  <xsl:choose>
+    <xsl:when test="$thisnode/@priority &gt; ./@priority">
+      <xsl:value-of select="string('&#x9;')"/>
+      <xsl:choose>
+        <xsl:when test="$mode=string('matxin')">
+          <xsl:call-template name="format-rule-matxin"/>
+        </xsl:when>
+        <xsl:otherwise>
+          <xsl:call-template name="format-rule-apertium"/>
+        </xsl:otherwise>
+      </xsl:choose>
+    </xsl:when>
+    <xsl:otherwise/>
+  </xsl:choose>
+</xsl:for-each>
+
+<xsl:value-of select="string('&#x9;')"/><xsl:value-of select="./end/@regexp"/>&#x9;{
+  last = "buffer";
+  bufferAppend(buffer, yytext);
+  yy_pop_state();
+}
+
+&#x9;\n|.&#x9;{
+  last = "buffer";
+  bufferAppend(buffer, yytext);
+}
+
+}
+</xsl:for-each>
+
+<xsl:for-each select="./rules/format-rule">
+  <xsl:choose>
+    <xsl:when test="$mode=string('matxin')">
+      <xsl:call-template name="format-rule-matxin"/>
+    </xsl:when>
+    <xsl:otherwise>
+      <xsl:call-template name="format-rule-apertium"/>
+    </xsl:otherwise>
+  </xsl:choose>
+</xsl:for-each>
+
+
+<xsl:for-each select="./rules/replacement-rule">
+  <xsl:variable name="varname"
+		select="concat(concat(string('S'),position()),string('_substitution'))"/>
+  <xsl:value-of select="string('&#xA;')"/>
+  <xsl:value-of select="./@regexp"/>
+  <xsl:value-of select="string('&#x9;{&#xA;  if(')"/>
+  <xsl:value-of select="$varname"/>
+  <xsl:value-of select="string('.find(yytext) != ')"/>
+  <xsl:value-of select="$varname"/>
+  <xsl:value-of select="string('.end())&#xA;  {&#xA;    printBuffer();&#xA;    fputws_unlocked(')"/>
+  <xsl:value-of select="$varname"/>
+  <xsl:value-of select="string('[yytext].c_str(), yyout);&#xA;    offset+=')"/>
+  <xsl:value-of select="$varname"/>
+  <xsl:value-of select="string('[yytext].size();&#xA;')"/>
+  <xsl:value-of select="string('    hasWrite_dot = hasWrite_white = true;&#xA;  }&#xA;  else&#xA;  {&#xA;')"/>
+  <xsl:value-of select="string('    last=&quot;buffer&quot;;&#xA;    bufferAppend(buffer, yytext);&#xA;  }&#xA;}&#xA;')"/>
+</xsl:for-each>
+
+<xsl:value-of select="./options/space-chars/@regexp"/>&#x9;{
+  if (last == "open_tag")
+    bufferAppend(tags.back(), yytext);
+  else
+    bufferAppend(buffer, yytext);
+
+}
+
+<xsl:value-of select="./options/escape-chars/@regexp"/>&#x9;{
+  printBuffer();
+  fputwc_unlocked(L'\\', yyout);
+  offset++;
+  wchar_t symbol;
+  int pos = mbtowc(&amp;symbol, yytext, MB_CUR_MAX);
+  if(pos == -1)
+  {
+      wcerr &lt;&lt; L"Cuatro" &lt;&lt; endl;
+
+    wcerr &lt;&lt; L"Encoding error." &lt;&lt; endl;
+    exit(EXIT_FAILURE);
+  }
+
+  fputwc_unlocked(symbol, yyout);
+  offset++;
+  hasWrite_dot = hasWrite_white = true;
+
+}
+
+.&#x9;{
+  printBuffer();
+  symbuf += yytext;
+  wchar_t symbol;
+  int pos = mbtowc(&amp;symbol, symbuf.c_str(), MB_CUR_MAX);
+  if(pos == -1)
+  {
+    if(symbuf.size() > (size_t) MB_CUR_MAX)
+    {
+      // unknown character
+      symbuf = "";
+      fputwc_unlocked(L'?', yyout);
+      offset++;
+      hasWrite_dot = hasWrite_white = true;
+    }
+  }
+  else
+  {
+    symbuf = "";
+    fputwc_unlocked(symbol, yyout);
+    offset++;
+    hasWrite_dot = hasWrite_white = true;
+  }
+}
+
+&lt;&lt;EOF&gt;&gt;&#x9;{
+  isDot = true;
+
+  preDot();
+  printBuffer();
+  return 0;
+}
+%%
+
+
+
+void usage(string const &amp;progname)
+{
+<xsl:choose>
+  <xsl:when test="$mode=string('matxin')">
+  cerr &lt;&lt; "USAGE: " &lt;&lt; progname &lt;&lt; " format_file [input_file [output_file]" &lt;&lt; ']' &lt;&lt; endl;
+  </xsl:when>
+  <xsl:otherwise>
+  cerr &lt;&lt; "USAGE: " &lt;&lt; progname &lt;&lt; " [ -h | -i | -n ] [input_file [output_file]" &lt;&lt; ']' &lt;&lt; endl;
+  </xsl:otherwise>
+</xsl:choose>
+  cerr &lt;&lt; "<xsl:value-of select="./@name"/> format processor " &lt;&lt; endl;
+  exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+  LtLocale::tryToSetLocale();
+  size_t base = 0;
+  eosIncond = false;
+
+  if(argc &gt;= 2)
+  {
+    if(!strcmp(argv[1],"-i"))
+    {
+      eosIncond = true;
+      base++;
+    }
+    else if(!strcmp(argv[1],"-n"))
+    {
+      noDot = true;
+      base++;
+    }
+  }
+<xsl:choose>
+  <xsl:when test="$mode=string('matxin')">
+  if(argc &gt; 4 || argc &lt; 2)
+  {
+    usage(argv[0]);
+  }
+
+  switch(argc-base)
+  {
+    case 4:
+      yyout = fopen(argv[3+base], "w");
+      if(!yyout)
+      {
+        usage(argv[0]);
+      }
+    case 3:
+      yyin = fopen(argv[2+base], "r");
+      if(!yyin)
+      {
+        usage(argv[0]);
+      }
+    case 2:
+      formatfile = fopen(argv[1+base], "w");
+      if(!formatfile)
+      {
+        usage(argv[0]);
+      }
+      break;
+    default:
+      break;
+  }
+  </xsl:when>
+  <xsl:otherwise>
+ if((argc-base) &gt; 4)
+  {
+    usage(argv[0]);
+  }
+
+  switch(argc-base)
+  {
+    case 3:
+      yyout = fopen(argv[2+base], "w");
+      if(!yyout)
+      {
+        usage(argv[0]);
+      }
+    case 2:
+      yyin = fopen(argv[1+base], "r");
+      if(!yyin)
+      {
+        usage(argv[0]);
+      }
+      break;
+    default:
+      break;
+  }
+  </xsl:otherwise>
+</xsl:choose>
+#ifdef _WIN32
+  _setmode(_fileno(yyin), _O_U8TEXT);
+  _setmode(_fileno(yyout), _O_U8TEXT);
+#endif
+  // prevent warning message
+  yy_push_state(1);
+  yy_top_state();
+  yy_pop_state();
+
+<xsl:for-each select="./rules/replacement-rule">
+  <xsl:value-of select="string('  S')"/>
+  <xsl:value-of select="position()"/>
+  <xsl:value-of select="string('_init();&#xA;')"/>
+</xsl:for-each>
+
+<xsl:if test="$mode=string('matxin')">
+  fputws(L"&lt;?xml version=\&quot;1.0\&quot; encoding=\&quot;UTF-8\&quot; ?>\n", formatfile);
+  fputws(L"&lt;format&gt;\n", formatfile);
+</xsl:if>
+
+  last = "";
+  buffer = L"";
+  isDot = hasWrite_dot = hasWrite_white = false;
+  current=0;
+  offset = 0;
+  init_escape();
+  init_tagNames();
+  yylex();
+
+<xsl:if test="$mode=string('matxin')">
+  print_emptyTags();
+  fputws(L"&lt;/format&gt;", formatfile);
+  fclose(formatfile);
+</xsl:if>
+  fclose(yyin);
+  fclose(yyout);
+}
+</xsl:template>
+</xsl:stylesheet>
Index: branches/apertium-tagger/apertium2/apertium/endian_double_util.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/endian_double_util.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/endian_double_util.cc	(revision 69632)
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <cstdio>
+#include <iostream>
+#include <apertium/endian_double_util.h>
+#include <apertium/apertium_config.h>
+#include <apertium/unlocked_cstdio.h>
+
+using namespace std;
+
+double 
+EndianDoubleUtil::read(FILE *input)
+{
+  double retval;
+#ifdef WORDS_BIGENDIAN
+  fread_unlocked(&retval, sizeof(double), 1, input);
+#else
+  char *s = reinterpret_cast<char *>(&retval);
+
+  for(int i = sizeof(double)-1; i != -1; i--)
+  {
+    if(fread_unlocked(&(s[i]), 1, 1, input)==0)
+    {
+      return 0;
+    }
+  } 
+#endif
+  return retval;
+}
+
+double
+EndianDoubleUtil::read(istream &is)
+{
+  double retval;
+#ifdef WORDS_BIGENDIAN
+  is.read((char *) &retval, sizeof(double));
+#else
+  char *s = reinterpret_cast<char *>(&retval);
+
+  for(int i = sizeof(double)-1; i != -1; i--)
+  {
+    is.read(&(s[i]), sizeof(char));
+  } 
+#endif
+  return retval;    
+}
+  
+void 
+EndianDoubleUtil::write(FILE *output, double const &val)
+{
+  double val2 = val;
+#ifdef WORDS_BIGENDIAN
+  fwrite(&val2, sizeof(double), 1, output);
+#else
+  char *s = reinterpret_cast<char *>(&val2);
+    
+  for(int i = sizeof(double)-1; i != -1; i--)
+  {
+    fwrite(&(s[i]), 1, 1, output);
+  }
+#endif
+}
+
+void 
+EndianDoubleUtil::write(ostream &os, double const &val)
+{
+  double val2 = val;
+#ifdef WORDS_BIGENDIAN
+  os.write(reinterpret_cast<char *>(&val2), sizeof(double));
+#else
+  char *s = reinterpret_cast<char *>(&val2);
+    
+  for(int i = sizeof(double)-1; i != -1; i--)
+  {
+    os.write(&(s[i]), sizeof(char));
+  }
+#endif
+}
Index: branches/apertium-tagger/apertium2/apertium/endian_double_util.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/endian_double_util.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/endian_double_util.h	(revision 69632)
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _ENDIANDOUBLEUTIL_
+#define _ENDIANDOUBLEUTIL_
+
+#include <cctype>
+#include <cstdio>
+#include <iostream>
+
+using namespace std;
+
+/**
+ * Generic class to process correctly endian-enabled I/O operations
+ */
+class EndianDoubleUtil
+{
+public:
+  /**
+   * Read procedure.
+   * @param input the stream to read from.
+   * @returns the first element readed from the current position of the stream
+   */
+  static double read(FILE *input);
+
+  /**
+   * Read procedure, C++ I/O version.
+   * @param is the stream to read from.
+   * @returns the first element readed from the current position of the stream
+   */
+  static double read(istream &is);
+  
+  /**
+   * Write procedure.
+   * @param output the stream to write to
+   * @param val the value of the generic object to write to the stream
+   */
+  static void write(FILE *output, double const &val);
+  
+  /**
+   * Write procedure, C++ I/O version.
+   * @param output the stream to write to
+   * @param val the value of the generic object to write to the stream
+   */
+  static void write(ostream &os, double const &val);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/format.dtd
===================================================================
--- branches/apertium-tagger/apertium2/apertium/format.dtd	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/format.dtd	(revision 69632)
@@ -0,0 +1,141 @@
+<!--
+   Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+  
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+  
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+  
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+      DTD for the format specification files
+      Sergio Ortiz  2005.05.13
+-->
+
+
+<!ELEMENT format (options,rules)> 
+<!ATTLIST format name CDATA #REQUIRED> 
+<!--
+      'format' is the root element containing the whole format specification
+      file.  The attribute 'name' specifies the name of the format
+-->
+
+<!ELEMENT options (largeblocks,input,output, tag-name, escape-chars,space-chars,case-sensitive)>
+<!--
+      General options of the format 
+-->
+		
+<!ELEMENT largeblocks EMPTY>
+<!ATTLIST largeblocks size CDATA #REQUIRED>
+<!-- 
+      The attribute size is used to define the maximal size in bytes of
+      inline format blocks
+-->
+		
+<!ELEMENT input EMPTY>
+<!ATTLIST input zip-path CDATA #IMPLIED>
+<!ATTLIST input encoding CDATA #REQUIRED>
+<!--  
+      Reserved for future extensions
+-->
+		
+<!ELEMENT output EMPTY>
+<!ATTLIST output zip-path CDATA #IMPLIED>
+<!ATTLIST output encoding CDATA #REQUIRED>
+<!--
+      Reserved for future extensions
+-->
+
+<!ELEMENT tag-name EMPTY>
+<!ATTLIST tag-name regexp CDATA #REQUIRED>
+<!--
+      The attribute regexp defines (whith a _flex_ regular expression) how 
+      take a tag name from a whole tag. '\'
+-->
+
+<!ELEMENT escape-chars EMPTY>
+<!ATTLIST escape-chars regexp CDATA #REQUIRED>
+<!--
+      The attribute regexp defines (whith a _flex_ regular expression) the
+      set of characters to be escaped with preceding a backslash '\'
+-->
+
+<!ELEMENT space-chars EMPTY>
+<!ATTLIST space-chars regexp CDATA #REQUIRED>
+<!--
+      Define the space characters (in regexp) with a _flex_ regular 
+      expression
+-->
+
+<!ELEMENT case-sensitive EMPTY>
+<!ATTLIST case-sensitive value (yes|no) #REQUIRED>
+<!--
+      The attribute 'value' is set to 'yes' if the case is relevant in the 
+      specification of the format.  Otherwise is set to 'no'
+-->
+
+		
+<!ELEMENT rules (format-rule|replacement-rule)+>
+<!--
+      Group the rules of processing format and the rules of substitute 
+      expressions by characters that are part of the text
+-->
+
+<!ELEMENT format-rule (tag|(begin,end))>
+<!ATTLIST format-rule type (comment|empty|open|close) #IMPLIED>
+<!ATTLIST format-rule eos (yes|no) #IMPLIED>
+<!ATTLIST format-rule priority CDATA #REQUIRED>
+<!--
+      Format rule parent element.  It may include a 'tag' element or
+      a couple of elements 'begin', 'end'.  In the first case, this element is 
+      considered to be part of the format.  In the second case, the begin and 
+      the end element are considered to enclosing format.  The attribute
+      'eos' (end of sentence) is set to 'yes' if that rule defines a dot in
+      the text being processed (is no by default).  The attribute 'priority' 
+      marks the order of precedence of the rule
+-->
+      
+<!ELEMENT tag EMPTY>
+<!ATTLIST tag regexp CDATA #REQUIRED>
+<!--
+      Define an element that is part of the format by the pattern specified
+      as a value for the regexp attribute
+-->
+
+<!ELEMENT begin EMPTY>
+<!ATTLIST begin regexp CDATA #REQUIRED>
+<!--
+      The attribute 'regexp' is the regular expression that detects the
+      begining delimiter of a block of format
+-->
+
+<!ELEMENT end EMPTY>
+<!ATTLIST end regexp CDATA #REQUIRED>
+<!--
+      The attribute 'regexp' is the regular expression that detects the
+      ending delimiter of a block of format
+-->
+
+<!ELEMENT replacement-rule (replace+)>
+<!ATTLIST replacement-rule regexp CDATA #REQUIRED>
+<!-- 
+      Root element for a replacement rule.  The attribute 'regexp' is the
+      general expression to detect the elements to replace
+-->
+
+<!ELEMENT replace EMPTY>
+<!ATTLIST replace source CDATA #REQUIRED>
+<!ATTLIST replace target CDATA #REQUIRED>
+<!ATTLIST replace prefer (yes|no) #IMPLIED>
+<!--  
+      Replacement rule.  The 'source' is a string of one or more characters.
+      The 'target' MUST be a single character.  The 'prefer' attribute, when 
+      set to 'yes' defines the preferred reverse translation of the 
+      replacement.
+-->
Index: branches/apertium-tagger/apertium2/apertium/interchunk.dtd
===================================================================
--- branches/apertium-tagger/apertium2/apertium/interchunk.dtd	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/interchunk.dtd	(revision 69632)
@@ -0,0 +1,442 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!-- 
+   Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+  
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+  
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+  
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+    Draft of DTD for the structural transfer rule files 
+     
+    Sergio Ortiz, Gema Ram�rez-S�nchez, Mireia Ginest�, Mikel L. Forcada, 
+    2005.07.29. 
+-->    
+
+<!ENTITY % condition "(and|or|not|equal|begins-with|begins-with-list|ends-with|ends-with-list|contains-substring|in)">
+<!ENTITY % container "(var|clip)">
+<!ENTITY % sentence "(let|out|choose|modify-case|call-macro|append)">
+<!ENTITY % value "(b|clip|lit|lit-tag|var|get-case-from|case-of|concat|chunk)">
+<!ENTITY % stringvalue "(clip|lit|var|get-case-from|case-of)">
+
+<!ELEMENT interchunk (section-def-cats, section-def-attrs, section-def-vars, section-def-lists?, section-def-macros?, section-rules)>
+<!-- 
+     'interchunk' is the root element containing the whole structural
+     interchunk rule file.  
+-->
+
+<!ELEMENT section-def-cats (def-cat+)>
+<!-- 
+     The 'def-cats' section defines the categories used to build the
+patterns used in rules
+ -->
+
+<!ELEMENT def-cat (cat-item+)>
+<!ATTLIST def-cat  n ID #REQUIRED
+                   c CDATA #IMPLIED>
+<!-- 
+     Each 'def-cat' defines one category in terms of a list of
+     category items and has a unique name 'n', which is mandatory
+-->
+
+<!ELEMENT cat-item EMPTY>
+<!ATTLIST cat-item lemma CDATA #IMPLIED 
+                   tags CDATA #REQUIRED 
+                   c CDATA #IMPLIED> 
+<!-- 
+          Each 'cat-item' (category item) represents a set of lexical forms
+     and has a mandatory attribute 'tags' whose value is a sequence of
+     dot-separated tag names; this sequence is a subsequence of the
+     tag sequence defining each possible lexical form. For example,
+     tags="n.f" would match all lexical forms containing this tag
+     sequence, such as "^casa<n><f><pl>$".
+
+     In addition, an optional attribute, "lemma", may be used to
+     define lexical forms having a particular substring in their lemma
+-->
+ 
+<!ELEMENT section-def-attrs (def-attr+)>
+
+<!-- 
+     The 'def-attrs' section defines the attributes that will be
+     identified in matched lexical forms 
+-->
+
+<!ELEMENT def-attr (attr-item+)>
+<!ATTLIST def-attr n ID #REQUIRED
+                   c CDATA #IMPLIED>
+<!-- 
+     Each def-attr defines one attribute in terms of a list of
+     attribute items and has a mandatory unique name n 
+-->
+
+<!ELEMENT attr-item EMPTY>
+<!ATTLIST attr-item tags CDATA #IMPLIED
+                    c CDATA #IMPLIED>
+<!-- 
+     Each 'attr-item' specifies a subsequence of the tags in
+     that lexical form (attribute 'tags')
+-->
+
+<!ELEMENT section-def-vars (def-var+)>
+<!-- 
+     The 'def-vars' section defines the global variables
+     that will be used to transfer information between rules
+-->
+
+<!ELEMENT def-var EMPTY>
+<!ATTLIST def-var n ID #REQUIRED
+                  v CDATA #IMPLIED
+                  c CDATA #IMPLIED>
+<!-- 
+     The definition of a global variable has a mandatory unique name 'n' that
+     will be used to refer to it. A value of initialization can also be specified
+     by means the 'v' attribute.  The default value of the initialization is the
+     empty string.
+-->
+
+<!ELEMENT section-def-lists (def-list)+>
+<!--
+     Element 'section-def-lists' encloses a set of list definitions
+-->
+
+<!ELEMENT def-list (list-item+)>
+<!ATTLIST def-list n ID #REQUIRED
+                   c CDATA #IMPLIED>
+<!--
+     The 'def-list' element defines a named list to search with the 'in' 
+     element.  Attribute 'n' sets the name of the list
+-->
+
+<!ELEMENT list-item EMPTY>
+<!ATTLIST list-item v CDATA #REQUIRED
+                    c CDATA #IMPLIED>
+<!--
+     Attribute 'v' of 'list-item' element contains the value to be added to 
+     the list being defined     
+-->
+
+<!ELEMENT section-def-macros (def-macro)+>
+<!-- 
+
+     The 'def-macros' section defines macros containing portions of
+     code frequently used in the action part of rules
+
+-->
+
+<!ELEMENT def-macro (%sentence;)+>
+<!ATTLIST def-macro n ID #REQUIRED>
+<!ATTLIST def-macro npar CDATA #REQUIRED
+                    c CDATA #IMPLIED>
+<!-- 
+     Macro definition:
+     
+     A macro has a mandatory name (the value of 'n'), a number of parameters
+     (the value of 'npar') and a body containing arguments and statements.  
+-->
+
+<!ELEMENT section-rules (rule+)>
+<!-- 
+     The rules section contains a sequence of one or more rules
+-->
+
+<!ELEMENT rule (pattern, action)>
+<!ATTLIST rule comment CDATA #IMPLIED>
+<!-- 
+      Each rule has a pattern and an action 
+      * attribute 'comment' allows to put in comments about the purpose of
+        the rule being defined
+-->
+
+<!ELEMENT pattern (pattern-item+)>
+<!-- 
+The pattern is specified in terms of pattern items, each one
+representing a lexical form in the matched pattern 
+-->
+
+<!ELEMENT pattern-item EMPTY>
+<!ATTLIST pattern-item n IDREF #REQUIRED>
+<!-- 
+       Each attribute to be activated is referred to by its name in the def-cats section 
+-->
+
+<!ELEMENT action (%sentence;)*>
+<!ATTLIST action c CDATA #IMPLIED>
+<!-- 
+       Encloses the procedural part of a rule
+-->
+
+<!ELEMENT choose (when+,otherwise?)>
+<!ATTLIST choose c CDATA #IMPLIED>
+<!-- 
+     The choose statement is a selection statement (similar to a case
+     statement) composed of one or more tested cases and an optional
+     otherwise 
+-->
+
+<!ELEMENT when (test,(%sentence;)*)>
+<!ATTLIST when c CDATA #IMPLIED>
+<!-- 
+     Each tested case is a block of zero or more statements 
+-->
+
+<!ELEMENT otherwise (%sentence;)+>
+<!ATTLIST otherwise c CDATA #IMPLIED>
+<!-- 
+     The otherwise case is also a block of one or more statements 
+-->
+
+<!ELEMENT test (%condition;)>
+<!ATTLIST test c CDATA #IMPLIED>
+<!-- 
+     The test in a tested case may be a conjunction, a disjunction, or
+     a negation of simpler tests, as well as a simple equality test
+-->
+
+<!ELEMENT and ((%condition;),(%condition;)+)>
+<!--  
+     Each conjuntion test contains two or more simpler tests 
+-->
+
+<!ELEMENT or ((%condition;),(%condition;)+)>
+<!-- 
+     Each disjunction test contains two or more simpler tests 
+-->
+
+<!ELEMENT not (%condition;)>
+<!-- 
+     The negation of a simpler test is a test itself 
+-->
+
+<!ELEMENT equal (%value;,%value;)> 
+<!ATTLIST equal caseless (no|yes) #IMPLIED>
+<!-- 
+      The simplest test is an equality test. The right part and the
+      left part of the equality may both be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+<!ELEMENT begins-with (%value;,%value;)> 
+<!ATTLIST begins-with caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part at the beginning.
+      Both parts of the test may both be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+<!ELEMENT ends-with (%value;,%value;)> 
+<!ATTLIST ends-with caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part at the end.
+      Both parts of the test may both be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+<!ELEMENT begins-with-list (%value;,list)> 
+<!ATTLIST begins-with-list caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part at the beginning.
+      First parts of the test may be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section. The second part
+      must be always a list.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+
+<!ELEMENT ends-with-list (%value;,list)> 
+<!ATTLIST ends-with-list caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part at the end.
+      First parts of the test may be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section. The second part
+      must be always a list.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+
+<!ELEMENT contains-substring (%value;,%value;)> 
+<!ATTLIST contains-substring caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part.
+      Both parts of the test may both be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+
+
+
+<!ELEMENT in (%value;, list)>
+<!ATTLIST in caseless (no|yes) #IMPLIED>
+<!--
+    'in' performs a search of a value in a list.  If 'caseless' is set to yes,
+    this search is performed without attending to the case
+-->
+
+<!ELEMENT list EMPTY>
+<!ATTLIST list n IDREF #REQUIRED>
+<!--
+    'list' refers, with the name in attribute 'n', a list defined before in
+    the 'section-def-list' section
+-->
+
+<!ELEMENT let (%container;, %value;)>
+<!-- 
+      An assignment statement ('let') assigns the value of a clip (see
+      below), a literal string ('lit'), a literal tag('lit-tag') or the 
+      value of a global variable ('var') to either a global variable ('var') 
+      or a clip
+-->
+
+<!ELEMENT append (%value;)+>
+<!ATTLIST append n IDREF #REQUIRED>
+<!-- 
+      This instruction appends the value of a clip (see
+      below), a literal string ('lit'), a literal tag('lit-tag') or the 
+      value of a global variable ('var') to either a global variable ('var') 
+      or a clip, identified by the "n" attribute
+-->
+
+
+<!ELEMENT out (b|chunk|var)+>
+<!ATTLIST out c CDATA #IMPLIED>
+<!-- 
+      'out' is an output statement; it may output blanks or chunks
+-->
+
+<!ELEMENT modify-case (%container;, %stringvalue;)>
+<!--
+      The first argument of 'modify-case' copy the case of the second 
+      argument.
+--> 
+
+<!ELEMENT call-macro (with-param)*>
+<!ATTLIST call-macro n IDREF #REQUIRED>
+<!-- 
+      A macro may be called anywhere by name with one or more
+      arguments
+-->
+
+<!ELEMENT with-param EMPTY>
+<!ATTLIST with-param pos CDATA #REQUIRED>
+<!-- 
+      The attribute pos in each argument is used to refer to a lexical
+      form in the current rule. For example, if a 2-parameter macro
+      has been defined to perform noun-adjective agreement operations,
+      it may be used with arguments 1 and 2 in a noun-adjective rule,
+      with arguments 2, 3 and 1 in a determiner-noun-adjective rule, with
+      arguments 1 and 3 in a noun-adverb-adjective rule, and with
+      arguments 2 and 1 in an adjective-noun rule 
+-->
+
+<!ELEMENT clip EMPTY>
+<!ATTLIST clip pos CDATA #REQUIRED
+               part CDATA #REQUIRED
+               c CDATA #IMPLIED>
+<!-- 
+      A 'clip' is a substring of a source-language or target-language
+      lexical form, extracted according to an attribute:
+
+      * 'pos' is an index (1, 2, 3...) used to select a lexical form
+         inside the rule;
+   
+      * the value of 'part' is the name of an attribute defined in
+        def-attrs, but may take also the values 'lem' (referring to
+        the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+        (lemma queue) and 'whole' (referring to the whole lexical form).
+
+-->
+
+<!ELEMENT lit EMPTY>
+<!ATTLIST lit v CDATA #REQUIRED>
+<!-- 
+      A literal string value: the value of the literal is the value of
+      the 'v' attribute
+-->
+
+<!ELEMENT lit-tag EMPTY>
+<!ATTLIST lit-tag v CDATA #REQUIRED>
+<!-- 
+      A literal string value: the value of the literal is the value of
+      the 'v' attribute
+-->
+
+
+<!ELEMENT var EMPTY>
+<!ATTLIST var n IDREF #REQUIRED>
+<!-- 
+     Each 'var' is a variable identifier: the attribute n is the name
+     of the variable. When it is in an 'out', a 'test', or the right
+     part of a 'let', it represents the value of the variable; when in
+     the left part of a 'let' it represents the reference of the
+     variable. 
+-->
+
+<!ELEMENT get-case-from (clip|lit|var)> 
+<!ATTLIST get-case-from pos CDATA #REQUIRED>
+<!-- Atenci�n, falta modificar todos los comentarios donde intervenga
+get-case-from -->
+
+<!ELEMENT case-of EMPTY>
+<!ATTLIST case-of pos CDATA #REQUIRED
+               part CDATA #REQUIRED>
+<!--
+      A 'case-of' is a value representing the case of a "clip".  This value 
+      will be "aa" (all lowercase), "Aa" (first uppercase) and "AA",
+      (all uppercase).
+
+      * 'pos' is an index (1, 2, 3...) used to select a lexical form
+         inside the rule;
+   
+      * the value of 'part' is the name of an attribute defined in
+        def-attrs, but may take also the values 'lem' (referring to
+        the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+        (lemma queue) and 'whole' (referring to the whole lexical form).
+-->
+
+
+<!ELEMENT concat (%value;)+>
+<!-- Concatenates a sequence of values -->
+
+<!ELEMENT chunk (%value;)+>
+<!-- 
+     Encloses a chunk      
+-->
+
+<!ELEMENT pseudolemma (%value;)>
+
+<!ELEMENT b EMPTY>
+<!ATTLIST b pos CDATA #IMPLIED>
+<!-- 
+     'b' is a [super]blanks item, indexed by pos; for example, a 'b'
+     with pos="2" refers to the [super]blanks (including format data
+     encapsulated by the de-formatter) between lexical form 2 and
+     lexical form 3. Managing [super]blanks explicitly allows for the
+     correct placement of format when the result of structural
+     transfer has more or less lexical items than the original or has
+     been reordered in some way.  If attribute "pos" is not specified, then
+     a single blank (ASCII 32) is generated.
+-->
Index: branches/apertium-tagger/apertium2/apertium/interchunk_word.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/interchunk_word.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/interchunk_word.cc	(revision 69632)
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <apertium/interchunk_word.h>
+#include <iostream>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+
+void
+InterchunkWord::copy(InterchunkWord const &o)
+{
+  this->chunk = o.chunk;
+}
+
+void
+InterchunkWord::destroy()
+{
+}
+
+InterchunkWord::InterchunkWord()
+{
+}
+
+InterchunkWord::InterchunkWord(string const &chunk)
+{
+  init(chunk);
+}
+
+InterchunkWord::~InterchunkWord()
+{
+  destroy();
+}
+
+InterchunkWord::InterchunkWord(InterchunkWord const &o)
+{
+  copy(o);
+}
+
+InterchunkWord &
+InterchunkWord::operator =(InterchunkWord const &o)
+{
+  if(this != &o)
+  {
+    destroy();
+    copy(o);
+  }
+  return *this;
+}
+
+void
+InterchunkWord::init(string const &chunk)
+{
+  for(size_t i = 0; i < chunk.size(); i++)
+  {
+    if(chunk[i] == '\\')
+    {
+      i++;
+    }
+    else if(chunk[i] == '{')
+    {
+      this->chunk = chunk.substr(0, i);
+      this->queue = chunk.substr(i);
+      return;
+    }
+  }
+  this->chunk = chunk;
+  this->queue = "";
+}
+
+string
+InterchunkWord::chunkPart(ApertiumRE const &part)
+{
+  string result = part.match(chunk);
+  if(result.size() == 0)
+  {
+    result = part.match(queue);
+    if(result.size() != queue.size())
+    {
+      return "";
+    }
+    else
+    {
+      return result;
+    }
+  }
+  else if(result.size() == chunk.size())
+  {
+    return part.match(chunk+queue);
+  }
+  else
+  {
+    return result;
+  }
+}
+
+void
+InterchunkWord::setChunkPart(ApertiumRE const &part, string const &value)
+{
+  part.replace(chunk, value);
+}
Index: branches/apertium-tagger/apertium2/apertium/interchunk_word.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/interchunk_word.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/interchunk_word.h	(revision 69632)
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _INTERCHUNKWORD_
+#define _INTERCHUNKWORD_
+
+#include <apertium/apertium_re.h>
+#include <map>
+#include <string>
+
+using namespace std;
+
+/**
+ * Word type for transfer modules
+ */
+class InterchunkWord
+{
+private:
+  /**
+   * Target language chunk name and tags
+   */
+  string chunk;
+  
+  /**
+   * Target language chunk content
+   */
+  string queue;   
+     
+  /**
+   * Copy method
+   * @param o the object to be copied
+   */
+  void copy(InterchunkWord const &o);
+  
+  /**
+   * Destroy method
+   */
+  void destroy();
+  
+public:
+  /**
+   * Non-parametric constructor
+   */
+  InterchunkWord();
+  /**
+   * Destructor
+   */
+  ~InterchunkWord();
+  
+  /**
+   * Copy constructor
+   * @param o the object to be copied
+   */
+  InterchunkWord(InterchunkWord const &o);
+  
+  /**
+   * Parametric constructor calling init()
+   * @param chunk the chunk
+   */
+  InterchunkWord(string const &chunk);
+  
+  /**
+   * Assignment operator
+   * @param o the object to be assigned
+   * @return reference to left part of assignment
+   */
+  InterchunkWord & operator =(InterchunkWord const &o);
+
+  /**
+   * Sets a chunk
+   * @param chunk the chunk
+   */
+  void init(string const &chunk);
+  
+  /**
+   * Reference a chunk part
+   * @param part regular expression to match
+   * @returns reference to the part of string matched
+   */ 
+  string chunkPart(ApertiumRE const &part);
+
+  /**
+   * Sets a value for a chunk part
+   * @param part regular expression to match
+   * @param value the new value for the given part
+   */
+  void setChunkPart(ApertiumRE const &part, string const &value);
+
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/latex_accentsmap.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/latex_accentsmap.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/latex_accentsmap.cc	(revision 69632)
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <apertium/latex_accentsmap.h>
+
+using namespace std;
+
+
+AccentsMap::AccentsMap(bool char2latex) {
+	if(char2latex)
+		init_camap();
+	else
+		init_acmap();
+}
+
+AccentsMap::~AccentsMap(){
+}
+
+void AccentsMap::init_acmap() {
+	init_camap();
+	for (acmap::iterator i = map.begin(); 
+		i != map.end(); 
+		++i)
+	{
+		map[i->second] = i->first;
+	}
+}
+
+void AccentsMap::init_camap() {
+
+	map[L"à"] = L"`a"; // Grave accent
+	map[L"è"] = L"`e";
+	map[L"ì"] = L"`\\i";
+	map[L"ò"] = L"`o";
+	map[L"ù"] = L"`u";
+	map[L"ỳ"] = L"`y";
+	map[L"À"] = L"`A";
+	map[L"È"] = L"`E";
+	map[L"Ì"] = L"`I";
+	map[L"Ò"] = L"`O";
+	map[L"Ù"] = L"`U";
+	map[L"Ỳ"] = L"`Y";
+	map[L"á"] = L"'a"; // Acute accent
+	map[L"é"] = L"'e";
+	map[L"í"] = L"'\\i";
+	map[L"ó"] = L"'o";
+	map[L"ú"] = L"'u";
+	map[L"ý"] = L"'y";
+	map[L"Á"] = L"'A";
+	map[L"É"] = L"'E";
+	map[L"Í"] = L"'I";
+	map[L"Ó"] = L"'O";
+	map[L"Ú"] = L"'U";
+	map[L"Ý"] = L"'Y";
+	map[L"â"] = L"^a"; // Circumflex
+	map[L"ê"] = L"^e";
+	map[L"î"] = L"^\\i";
+	map[L"ô"] = L"^o";
+	map[L"û"] = L"^u";
+	map[L"ŷ"] = L"^y";
+	map[L"Â"] = L"^A";
+	map[L"Ê"] = L"^E";
+	map[L"Î"] = L"^I";
+	map[L"Ô"] = L"^O";
+	map[L"Û"] = L"^U";
+	map[L"Ŷ"] = L"^Y";
+	map[L"ä"] = L"\"a";    // Umlaut or dieresis
+	map[L"ë"] = L"\"e";
+	map[L"ï"] = L"\"\\i";
+	map[L"ö"] = L"\"o";
+	map[L"ü"] = L"\"u";
+	map[L"ÿ"] = L"\"y";
+	map[L"Ä"] = L"\"A";
+	map[L"Ë"] = L"\"E";
+	map[L"Ï"] = L"\"I";
+	map[L"Ö"] = L"\"O";
+	map[L"Ü"] = L"\"U";
+	map[L"Ÿ"] = L"\"Y";
+
+	map[L"ñ"] = L"~n";
+	map[L"Ñ"] = L"~N";
+  
+	map[L"ç"] = L"cc";   // Cedilla
+	map[L"Ç"] = L"cC";
+
+
+}
+
+wstring AccentsMap::get(wstring input){
+	it = map.find(input);
+	if(it == map.end())
+		return L"";
+	else
+		return (*it).second;
+}
+
+//Optionally:
+void AccentsMap::init_locale(){ 
+	char *locale = setlocale(LC_ALL, "");
+	std::locale lollocale(locale);
+	wcout.imbue(lollocale);
+}
+
+
+
+/*latexAccents = [
+	map[L"à"] = L"\\`a"; # Grave accent
+	map[L"è"] = L"\\`e";
+	map[L"ì"] = L"\\`\\i";
+	map[L"ò"] = L"\\`o";
+	map[L"ù"] = L"\\`u";
+	map[L"ỳ"] = L"\\`y";
+	map[L"À"] = L"\\`A";
+	map[L"È"] = L"\\`E";
+	map[L"Ì"] = L"\\`\\I";
+	map[L"Ò"] = L"\\`O";
+	map[L"Ù"] = L"\\`U";
+	map[L"Ỳ"] = L"\\`Y";
+	map[L"á"] = L"\\'a"; # Acute accent
+	map[L"é"] = L"\\'e";
+	map[L"í"] = L"\\'\\i";
+	map[L"ó"] = L"\\'o";
+	map[L"ú"] = L"\\'u";
+	map[L"ý"] = L"\\'y";
+	map[L"Á"] = L"\\'A";
+	map[L"É"] = L"\\'E";
+	map[L"Í"] = L"\\'\\I";
+	map[L"Ó"] = L"\\'O";
+	map[L"Ú"] = L"\\'U";
+	map[L"Ý"] = L"\\'Y";
+	map[L"â"] = L"\\^a"; # Circumflex
+	map[L"ê"] = L"\\^e";
+	map[L"î"] = L"\\^\\i";
+	map[L"ô"] = L"\\^o";
+	map[L"û"] = L"\\^u";
+	map[L"ŷ"] = L"\\^y";
+	map[L"Â"] = L"\\^A";
+	map[L"Ê"] = L"\\^E";
+	map[L"Î"] = L"\\^\\I";
+	map[L"Ô"] = L"\\^O";
+	map[L"Û"] = L"\\^U";
+	map[L"Ŷ"] = L"\\^Y";
+	map[L"ä"] = L"\\\"a";    # Umlaut or dieresis
+	map[L"ë"] = L"\\\"e";
+	map[L"ï"] = L"\\\"\\i";
+	map[L"ö"] = L"\\\"o";
+	map[L"ü"] = L"\\\"u";
+	map[L"ÿ"] = L"\\\"y";
+	map[L"Ä"] = L"\\\"A";
+	map[L"Ë"] = L"\\\"E";
+	map[L"Ï"] = L"\\\"\\I";
+	map[L"Ö"] = L"\\\"O";
+	map[L"Ü"] = L"\\\"U";
+	map[L"Ÿ"] = L"\\\"Y";
+	map[L"ç"] = L"\\c{c}";   # Cedilla
+	map[L"Ç"] = L"\\c{C}";
+	map[L"œ"] = L"{\\oe}";   # Ligatures
+	map[L"Œ"] = L"{\\OE}";
+	map[L"æ"] = L"{\\ae}";
+	map[L"Æ"] = L"{\\AE}";
+	map[L"å"] = L"{\\aa}";
+	map[L"Å"] = L"{\\AA}";
+	map[L"–"] = L"--";   # Dashes
+	map[L"—"] = L"---";
+	map[L"ø"] = L"{\\o}";    # Misc latin-1 letters
+	map[L"Ø"] = L"{\\O}";
+	map[L"ß"] = L"{\\ss}";
+	map[L"¡"] = L"{!`}";
+	map[L"¿"] = L"{?`}";
+	map[L"\\"] = L"\\\\";    # Characters that should be quoted
+	map[L"~"] = L"\\~";
+	map[L"&"] = L"\\&";
+	map[L"$"] = L"\\$";
+	map[L"{"] = L"\\{";
+	map[L"}"] = L"\\}";
+	map[L"%"] = L"\\%";
+	map[L"#"] = L"\\#";
+	map[L"_"] = L"\\_";
+	map[L"≥"] = L"$\\ge$";   # Math operators
+	map[L"≤"] = L"$\\le$";
+	map[L"≠"] = L"$\\neq$";
+	map[L"©"] = L"\copyright"; # Misc
+	map[L"ı"] = L"{\\i}";
+	map[L"µ"] = L"$\\mu$";
+	map[L"°"] = L"$\\deg$";
+	map[L"‘"] = L"`";    #Quotes
+	map[L"’"] = L"'";
+	map[L"“"] = L"``";
+	map[L"”"] = L"''";
+	map[L"‚"] = L",";
+	map[L"„"] = L",,";
+]*/
+
+
+
+
+
+
Index: branches/apertium-tagger/apertium2/apertium/latex_accentsmap.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/latex_accentsmap.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/latex_accentsmap.h	(revision 69632)
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <map>
+#include <iostream>
+#include <cwchar>
+#include <string>
+#include <cstring>
+#include <locale>
+#include <lttoolbox/ltstr.h>
+
+using namespace std;
+
+/*struct Ltstr // Already in lttoolbox/ltstr.h
+{
+  bool operator()(wstring const &s1, wstring const &s2) const
+  {
+    return wcscmp(s1.c_str(), s2.c_str()) < 0;
+  }
+};
+*/
+
+class AccentsMap {
+	typedef std::map<wstring, wstring, Ltstr> acmap;
+	private:
+		acmap           map; // Accent to character
+		acmap::iterator it;  // Iterator for searching
+
+		void init_acmap();
+		void init_camap();
+	public:
+		AccentsMap(bool char2accent); // the direction
+		~AccentsMap();
+
+		// Optionally
+		void init_locale(); 
+
+		// The getter for both directions depending on init.
+		wstring get(wstring input);
+};
+
Index: branches/apertium-tagger/apertium2/apertium/lexchoice.xsl
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lexchoice.xsl	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lexchoice.xsl	(revision 69632)
@@ -0,0 +1,172 @@
+<?xml version="1.0" encoding="ISO-8859-1"?> <!-- -*- nxml -*- -->
+<!--
+ Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:output method="xml" encoding="ISO-8859-1"/>
+<xsl:param name="r2l"/>
+
+<xsl:template match="s">
+  <s n="{./@n}"/>
+</xsl:template>
+
+<xsl:template match="b">
+  <b />
+</xsl:template>
+
+<xsl:template match="g">
+  <g><xsl:apply-templates/></g>
+</xsl:template>
+
+<xsl:template match="a">
+  <a/>
+</xsl:template>
+
+<xsl:template match="j">
+  <j/>
+</xsl:template>
+
+<xsl:template match="l">
+  <xsl:apply-templates select="./*|text()"/>
+</xsl:template>
+
+<xsl:template match="r">
+  <xsl:apply-templates select="./*|text()"/>
+</xsl:template>
+
+<xsl:template match="par">
+  <par n="{./@n}"/>
+</xsl:template>
+
+<xsl:template match="re">
+  <re><xsl:apply-templates/></re>
+</xsl:template>
+
+<xsl:template match="p">
+  <p>
+  <xsl:choose>
+    <xsl:when test="not($r2l=string('yes'))">
+      <l><xsl:apply-templates select="./r/*|./r/text()"/></l>
+      <xsl:choose>
+	<xsl:when test="not($r2l=string('yes')) and not(count(../@srl)=0)">
+	  <r><xsl:apply-templates select="./r/text()|./r/*[not(name(.)=string('s'))]"/>__<xsl:apply-templates select="../@srl"/><xsl:apply-templates select="./r/*[name(.)=string('s')]"/></r>
+	</xsl:when>
+	<xsl:otherwise>
+	  <r><xsl:apply-templates select="./r/*|./r/text()"/></r>
+	</xsl:otherwise>
+      </xsl:choose>
+    </xsl:when>
+    <xsl:otherwise>
+      <l><xsl:apply-templates select="./l/*|./l/text()"/></l>
+      <xsl:choose>
+      <xsl:when test="$r2l=string('yes') and not(count(../@slr)=0)">
+	<r><xsl:apply-templates select="./l/text()|./l/*[not(name(.)=string('s'))]"/>__<xsl:apply-templates select="../@slr"/><xsl:apply-templates select="./l/*[name(.)=string('s')]"/></r>
+      </xsl:when>
+      <xsl:otherwise>
+	<r><xsl:apply-templates select="./l/*|./l/text()"/></r>
+      </xsl:otherwise>
+    </xsl:choose>
+
+    </xsl:otherwise>
+  </xsl:choose>
+  </p>
+</xsl:template>
+
+<xsl:template match="i">
+ <!-- <i><xsl:apply-templates/></i> -->
+
+  <p>
+    <xsl:choose>
+    <xsl:when test="not($r2l=string('yes'))">
+      <l><xsl:apply-templates select="*|text()"/></l>
+      <xsl:choose>
+	<xsl:when test="not($r2l=string('yes')) and not(count(../@srl)=0)">
+	  <r><xsl:apply-templates select="text()|*[not(name(.)=string('s'))]"/>__<xsl:apply-templates select="../@srl"/><xsl:apply-templates select="*[name(.)=string('s')]"/></r>
+	</xsl:when>
+	<xsl:otherwise>
+	  <r><xsl:apply-templates select="*|text()"/></r>
+	</xsl:otherwise>
+      </xsl:choose>
+    </xsl:when>
+    <xsl:otherwise>
+      <l><xsl:apply-templates select="*|text()"/></l>
+      <xsl:choose>
+      <xsl:when test="$r2l=string('yes') and not(count(../@slr)=0)">
+	<r><xsl:apply-templates select="text()|*[not(name(.)=string('s'))]"/>__<xsl:apply-templates select="../@slr"/><xsl:apply-templates select="*[name(.)=string('s')]"/></r>
+      </xsl:when>
+      <xsl:otherwise>
+	<r><xsl:apply-templates select="*|text()"/></r>
+      </xsl:otherwise>
+    </xsl:choose>
+
+    </xsl:otherwise>
+  </xsl:choose> 
+  </p> 
+</xsl:template>
+
+
+<!-- TO DO: support for i tags -->
+
+<xsl:template match="e">
+  <xsl:choose>
+    <xsl:when test="not($r2l=string('yes'))">
+      <xsl:if test="not(./@r=string('LR'))">
+        <e><xsl:apply-templates select="./*"/></e>
+      </xsl:if>
+    </xsl:when>
+    <xsl:otherwise>
+      <xsl:if test="not(./@r=string('RL'))">
+        <e><xsl:apply-templates select="./*"/></e>
+      </xsl:if>
+    </xsl:otherwise>
+  </xsl:choose>
+</xsl:template>
+
+
+<xsl:template match="dictionary">
+<dictionary>
+  <xsl:value-of select="string('&#xA;')"/>
+  <xsl:copy-of select="./alphabet"/>
+  <xsl:value-of select="string('&#xA;')"/>
+  <xsl:copy-of select="./sdefs"/>
+  <xsl:value-of select="string('&#xA;')"/>
+  <xsl:if test="not(count(./pardefs/pardef)=0)">
+    <pardefs>
+  <xsl:value-of select="string('&#xA;')"/>
+
+      <xsl:for-each select="./pardefs/pardef">
+  <xsl:value-of select="string('&#xA;')"/>
+
+	<pardef n="{./@n}">
+	  <xsl:apply-templates/>
+	</pardef>
+      </xsl:for-each>
+  <xsl:value-of select="string('&#xA;')"/>
+
+    </pardefs>
+  </xsl:if>
+  <xsl:value-of select="string('&#xA;')"/>
+  <xsl:for-each select="./section">
+    <section id="{./@id}" type="{./@type}">
+      <xsl:apply-templates/>
+    </section>
+  </xsl:for-each>
+</dictionary>
+
+</xsl:template>
+
+
+</xsl:stylesheet>
Index: branches/apertium-tagger/apertium2/apertium/lexchoicebil.xsl
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lexchoicebil.xsl	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lexchoicebil.xsl	(revision 69632)
@@ -0,0 +1,169 @@
+<?xml version="1.0" encoding="ISO-8859-1"?> <!-- -*- nxml -*- -->
+<!--
+ Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:output method="xml" encoding="ISO-8859-1"/>
+<xsl:param name="r2l"/>
+
+<xsl:template match="s">
+  <s n="{./@n}"/>
+</xsl:template>
+
+<xsl:template match="b">
+  <b />
+</xsl:template>
+
+<xsl:template match="g">
+  <g><xsl:apply-templates/></g>
+</xsl:template>
+
+<xsl:template match="a">
+  <a/>
+</xsl:template>
+
+<xsl:template match="j">
+  <j/>
+</xsl:template>
+
+<xsl:template match="l">
+  <xsl:apply-templates select="./*|text()"/>
+</xsl:template>
+
+<xsl:template match="r">
+  <xsl:apply-templates select="./*|text()"/>
+</xsl:template>
+
+<xsl:template match="par">
+  <par n="{./@n}"/>
+</xsl:template>
+
+<xsl:template match="re">
+  <re><xsl:apply-templates/></re>
+</xsl:template>
+
+<xsl:template match="p">
+  <p>
+  <xsl:choose>
+    <xsl:when test="not($r2l=string('yes'))">
+      <xsl:choose>
+	<xsl:when test="not($r2l=string('yes')) and not(count(../@srl)=0)">
+	  <l><xsl:apply-templates select="./r/text()|./r/*[not(name(.)=string('s'))]"/>__<xsl:apply-templates select="../@srl"/><xsl:apply-templates select="./r/*[name(.)=string('s')]"/></l>
+	</xsl:when>
+	<xsl:otherwise>
+	  <l><xsl:apply-templates select="./r/*|./r/text()"/></l>
+	</xsl:otherwise>
+      </xsl:choose>
+      <r><xsl:apply-templates select="./l/*|./l/text()"/></r>
+    </xsl:when>
+    <xsl:otherwise>
+      <xsl:choose>
+        <xsl:when test="($r2l=string('yes')) and not(count(../@slr)=0)">
+          <l><xsl:apply-templates select="./l/text()|./l/*[not(name(.)=string('s'))]"/>__<xsl:apply-templates select="../@slr"/><xsl:apply-templates select="./l/*[name(.)=string('s')]"/></l>
+        </xsl:when>
+        <xsl:otherwise>
+	  <l><xsl:apply-templates select="./l/*|./l/text()"/></l>
+        </xsl:otherwise>
+      </xsl:choose>
+      <r><xsl:apply-templates select="./r/*|./r/text()"/></r>
+    </xsl:otherwise>
+  </xsl:choose>
+  </p>
+</xsl:template>
+
+
+<xsl:template match="i">
+  <p>
+  <xsl:choose>
+    <xsl:when test="not($r2l=string('yes'))">
+      <xsl:choose>
+	<xsl:when test="not($r2l=string('yes')) and not(count(../@srl)=0)">
+	  <l><xsl:apply-templates select="text()|*[not(name(.)=string('s'))]"/>__<xsl:apply-templates select="../@srl"/><xsl:apply-templates select="*[name(.)=string('s')]"/></l>
+	</xsl:when>
+	<xsl:otherwise>
+	  <l><xsl:apply-templates select="*|text()"/></l>
+	</xsl:otherwise>
+      </xsl:choose>
+      <r><xsl:apply-templates select="*|text()"/></r>
+    </xsl:when>
+    <xsl:otherwise>
+      <xsl:choose>
+        <xsl:when test="($r2l=string('yes')) and not(count(../@slr)=0)">
+          <l><xsl:apply-templates select="text()|*[not(name(.)=string('s'))]"/>__<xsl:apply-templates select="../@slr"/><xsl:apply-templates select="*[name(.)=string('s')]"/></l>
+        </xsl:when>
+        <xsl:otherwise>
+	  <l><xsl:apply-templates select="*|text()"/></l>
+        </xsl:otherwise>
+      </xsl:choose>
+      <r><xsl:apply-templates select="*|text()"/></r>
+    </xsl:otherwise>
+  </xsl:choose>
+  </p>
+</xsl:template>
+
+
+<xsl:template match="e">
+  <xsl:choose>
+    <xsl:when test="./@i=string('yes')">
+    </xsl:when>
+    <xsl:when test="not($r2l=string('yes'))">
+      <xsl:if test="not(./@r=string('LR'))">
+        <e><xsl:apply-templates select="./*"/></e>
+      </xsl:if>
+    </xsl:when>
+    <xsl:otherwise>
+      <xsl:if test="not(./@r=string('RL'))">
+        <e><xsl:apply-templates select="./*"/></e>
+      </xsl:if>
+    </xsl:otherwise>
+  </xsl:choose>
+</xsl:template>
+
+
+<xsl:template match="dictionary">
+<dictionary>
+  <xsl:value-of select="string('&#xA;')"/>
+  <xsl:copy-of select="./alphabet"/>
+  <xsl:value-of select="string('&#xA;')"/>
+  <xsl:copy-of select="./sdefs"/>
+  <xsl:value-of select="string('&#xA;')"/>
+  <xsl:if test="not(count(./pardefs/pardef)=0)">
+    <pardefs>
+  <xsl:value-of select="string('&#xA;')"/>
+
+      <xsl:for-each select="./pardefs/pardef">
+  <xsl:value-of select="string('&#xA;')"/>
+
+	<pardef n="{./@n}">
+	  <xsl:apply-templates/>
+	</pardef>
+      </xsl:for-each>
+  <xsl:value-of select="string('&#xA;')"/>
+
+    </pardefs>
+  </xsl:if>
+  <xsl:value-of select="string('&#xA;')"/>
+  <xsl:for-each select="./section">
+    <section id="{./@id}" type="{./@type}">
+      <xsl:apply-templates/>
+    </section>
+  </xsl:for-each>
+</dictionary>
+
+</xsl:template>
+
+
+</xsl:stylesheet>
Index: branches/apertium-tagger/apertium2/apertium/lextor.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lextor.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lextor.h	(revision 69632)
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
+ * author: Felipe S�nchez-Mart�nez
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __LEXTOR_H
+#define __LEXTOR_H
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <deque>
+
+#include <apertium/lextor_data.h>
+#include <apertium/lextor_word.h>
+#include <apertium/lextor_eval.h>
+
+using namespace std;
+
+/** Class LexTor (Lexical Selector class)
+ */  
+
+class LexTor {
+private:
+  LexTorData *lextor_data;
+
+  //For usew when tl information is used to perform lexical selection
+  LexTorData *tlmodel;
+  FSTProcessor *fstpbil;
+
+  int estimate_winner_lch(deque<LexTorWord>& window, int word_index,  double weigth_exponent);
+  int estimate_winner_lch_voting(deque<LexTorWord>& window, int word_index,  double weigth_exponent);
+  int estimate_winner_lch_cosine(deque<LexTorWord>& window, int word_index,  double weigth_exponent);
+  int estimate_winner_lch_mostprob(deque<LexTorWord>& window, int word_index,  double weigth_exponent);
+  int estimate_winner_lch_votingtl(deque<LexTorWord>& window, int word_index,  double weigth_exponent);
+
+  double cosine(map<wstring, double>& vcontext, const wstring& reduced_lexchoice);
+public:
+
+  static bool debug;
+  static double angleth;
+
+  LexTor();
+  
+  LexTor(const LexTor& lt);
+  
+  ~LexTor();
+
+  void set_lextor_data(LexTorData* ltd);
+
+  //Use to set the tlmodel to be used when tl information is used to
+  //perform lexical selection
+  void set_tlmodel(LexTorData* tlm);
+  void set_bildic(FSTProcessor *fstp);
+
+  void trainwrd(wistream& wis, int left, int right, double weigth_exponent=0);
+
+  void trainlch(wistream& wis, int left, int right, LexTorData& wordmodel, 
+                FSTProcessor& dic, FSTProcessor& bildic, double weigth_exponent=0);
+
+  void lexical_selector(wistream& wis, FSTProcessor &fstp, int left, int right, 
+                        double weigth_exponent=0, LexTorEval* lteval=NULL);
+
+  /** NOTE on the weigth_exponent parameter: This parameter is used to
+      change the influence of surrounding words on the decision to
+      take on an ambiguous word (word with more than one lexical
+      choice). For example, if a decision is being take on word w_i,
+      the the weigth of the surrounding words is: 
+      Score(w_i-2) = count(w_i-2)/pow(2,weigth_exponent), 
+      Score(w_i-1) = count(w_i-1)/pow(1,weigth_exponent), 
+      Score(w_i+1) = count(w_i+1)/pow(1,weigth_exponent), 
+      Score(w_i+2) = count(w_i+2)/pow(2,weigth_exponent).
+  */
+};
+
+class PairStringCountComparer {
+public:
+  bool operator()(const pair<wstring, COUNT_DATA_TYPE>& e1, const pair<wstring, COUNT_DATA_TYPE>& e2)  const {
+    //True if e1>e2
+
+    if (e1.second > e2.second)
+      return true;
+    else if (e1.second == e2.second)
+      return (e1.first>e2.first);
+    else
+      return false;
+  }
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/lextor_data.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lextor_data.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lextor_data.cc	(revision 69632)
@@ -0,0 +1,527 @@
+/*
+ * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
+ * author: Felipe S�nchez-Mart�nez
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <apertium/lextor_data.h>
+#include <apertium/string_utils.h>
+#include <apertium/lextor_word.h>
+#include <apertium/lextor.h>
+
+#include <lttoolbox/compression.h>
+#include <apertium/endian_double_util.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+LexTorData::LexTorData() {
+  n_stopwords=0;
+  n_words=0;
+  n_words_per_set=0;
+  n_set=0;
+
+  index2word.push_back(NULLWORD);
+  word2index[NULLWORD]=0;
+  n_words++;
+}
+  
+LexTorData::LexTorData(const LexTorData& ltd) {
+  n_stopwords=ltd.n_stopwords;
+  n_words=ltd.n_words;
+  n_words_per_set=ltd.n_words_per_set;
+  n_set=ltd.n_set;
+
+  word2index=ltd.word2index;
+  index2word=ltd.index2word;
+
+  lexchoice_set=ltd.lexchoice_set;
+  lexchoice_sum=ltd.lexchoice_sum;
+  //lexchoice_prob=ltd.lexchoice_prob;
+
+  stopwords=ltd.stopwords;
+  words=ltd.words;
+  lexical_choices=ltd.lexical_choices;
+  reduced_lexical_choices=ltd.reduced_lexical_choices;
+}
+  
+LexTorData::~LexTorData() {
+}
+
+COUNT_DATA_TYPE
+LexTorData::vote_from_word(const wstring& lexical_choice, const wstring& word) {
+  WORD_DATA_TYPE ind_lexchoice=word2index[StringUtils::tolower(lexical_choice)];
+  WORD_DATA_TYPE ind_word=word2index[StringUtils::tolower(word)];
+
+  //To avoid creating a null entry in lexchoice_set[lexical_choice]
+  if (lexchoice_set[ind_lexchoice].find(ind_word)==lexchoice_set[ind_lexchoice].end())
+    return 0;
+  else
+    return lexchoice_set[ind_lexchoice][ind_word];
+}
+
+//double 
+//LexTorData::get_lexchoice_prob(const string& lexical_choice) {
+//  return lexchoice_prob[word2index[lexical_choice]];
+//}
+
+
+void 
+LexTorData::set_wordcount(const wstring& word, COUNT_DATA_TYPE c) {
+  WORD_DATA_TYPE ind_word=word2index[StringUtils::tolower(word)];
+  wordcount[ind_word]=c;
+}
+
+COUNT_DATA_TYPE 
+LexTorData::get_wordcount(const wstring& word) {
+  WORD_DATA_TYPE ind_word=word2index[StringUtils::tolower(word)];
+
+  if (wordcount.find(ind_word)==wordcount.end())
+    return 0;
+  else
+    return wordcount[ind_word];
+}
+
+COUNT_DATA_TYPE
+LexTorData::get_lexchoice_sum(const wstring& lexical_choice) {
+  return lexchoice_sum[word2index[StringUtils::tolower(lexical_choice)]];
+}
+
+void 
+LexTorData::set_lexchoice_sum(const wstring& lexical_choice, COUNT_DATA_TYPE sum) {
+  lexchoice_sum[word2index[StringUtils::tolower(lexical_choice)]]=sum;
+}
+
+bool
+LexTorData::is_stopword(const wstring& word) {
+  return (stopwords.find(StringUtils::tolower(word))!=stopwords.end());
+}
+
+void 
+LexTorData::read(FILE *is) {
+  //cerr<<"LexTorData::read------------------------------------\n";
+  n_stopwords=(WORD_DATA_TYPE)Compression::multibyte_read(is);
+  n_words=(WORD_DATA_TYPE)Compression::multibyte_read(is);
+  n_words_per_set=(WORD_DATA_TYPE)Compression::multibyte_read(is);
+  n_set=(WORD_DATA_TYPE)Compression::multibyte_read(is);
+
+  //cerr<<n_stopwords<<"\n";
+  //cerr<<n_words<<"\n";
+  //cerr<<n_words_per_set<<"\n";
+  //cerr<<n_set<<"\n";
+
+
+  //Read the set of stopwords
+  //cerr<<"stopwords--------------------------------------------\n";
+  for (unsigned int i=0; i<n_stopwords; i++) {
+    stopwords.insert(Compression::wstring_read(is));
+    //cerr<<"len: "<<len<<" str: "<<str<<"\n";
+  }
+
+  //Read the list of words
+  //cerr<<"list of words----------------------------------------\n";
+  for(unsigned int i=1; i<n_words; i++) {
+    wstring str = Compression::wstring_read(is);
+    index2word.push_back(str);
+    word2index[str]=i;
+    wordcount[i]=EndianDoubleUtil::read(is);
+    //cerr<<"len: "<<len<<" str: "<<str<<" index: "<<i<<" word_count: "<<wordcount[i]<<"\n";
+  }
+
+  //Read data of each set associate to each lexical choice (or word)
+  for(unsigned int i=0; i<n_set; i++) {
+    WORD_DATA_TYPE lexchoice;
+    COUNT_DATA_TYPE sum;
+    //double prob;
+
+    lexchoice=(WORD_DATA_TYPE)Compression::multibyte_read(is);
+    sum=EndianDoubleUtil::read(is);
+
+    //cerr<<"lexchoice: "<<lexchoice<<" sum: "<<sum<<" "<<index2word[lexchoice]<<"\n";
+
+    reduced_lexical_choices.insert(index2word[lexchoice]);
+
+    lexchoice_sum[lexchoice]=sum;
+    //lexchoice_prob[lexchoice]=prob;
+
+    /////lexical_choices.insert(index2word[lexchoice]);
+
+    for(unsigned int j=0; j<n_words_per_set; j++) {
+      WORD_DATA_TYPE word;
+      COUNT_DATA_TYPE count;
+
+      word=(WORD_DATA_TYPE)Compression::multibyte_read(is);
+      count=EndianDoubleUtil::read(is);
+      //cerr<<"     word: "<<word<<" count: "<<count<<"\n";
+      lexchoice_set[lexchoice][word]=count;
+    }
+  }
+
+  //First we read the number of words to take into account
+  WORD_DATA_TYPE nwords2workwith;
+
+  nwords2workwith=(WORD_DATA_TYPE)Compression::multibyte_read(is);
+  for (unsigned int i=0; i<nwords2workwith; i++) {
+    WORD_DATA_TYPE word;
+
+    word=(WORD_DATA_TYPE)Compression::multibyte_read(is);
+    words.insert(index2word[word]);
+    //cerr<<"word: "<<index2word[word]<<"\n";
+  }
+}
+
+void 
+LexTorData::write(FILE *os) {
+  //cerr<<"LexTorData::write------------------------------------\n";
+  //cerr<<n_stopwords<<"\n";
+  //cerr<<n_words<<"\n";
+  //cerr<<n_words_per_set<<"\n";
+  //cerr<<n_set<<"\n";
+  Compression::multibyte_write(n_stopwords, os);
+  Compression::multibyte_write(n_words, os);
+  Compression::multibyte_write(n_words_per_set, os);
+  Compression::multibyte_write(n_set, os);
+
+  //Write the set of stopwords
+  //cerr<<"stopwords--------------------------------------------\n";
+  set<wstring>::iterator it;
+  for (it=stopwords.begin(); it!=stopwords.end(); it++) {
+    Compression::wstring_write(*it, os);
+  }
+
+  //Write the list of words
+  //cerr<<"list of words----------------------------------------\n";
+  for(unsigned int i=1; i<index2word.size(); i++) {
+    Compression::wstring_write(index2word[i], os);
+    EndianDoubleUtil::write(os, wordcount[i]);
+  }
+
+  //Write data of each set associate to each lexical choice (or word)
+  map<WORD_DATA_TYPE, map<WORD_DATA_TYPE, COUNT_DATA_TYPE> >::iterator it_lch_set;
+  map<WORD_DATA_TYPE, COUNT_DATA_TYPE>::iterator it_w_lch_set;
+  //map<WORD_DATA_TYPE, double>::iterator it_lch_prob;
+
+  for(it_lch_set=lexchoice_set.begin(); it_lch_set!=lexchoice_set.end(); it_lch_set++) {
+    WORD_DATA_TYPE lexchoice=it_lch_set->first;
+    COUNT_DATA_TYPE sum=lexchoice_sum[lexchoice];
+    //double prob=lexchoice_prob[lexchoice];
+
+    //cerr<<"lexchoice: "<<lexchoice<<" sum: "<<sum<<" "<<index2word[lexchoice]<<"\n";
+    Compression::multibyte_write(lexchoice, os);    
+    //os.write(reinterpret_cast<char * const> (&prob), sizeof(double));
+    EndianDoubleUtil::write(os, sum);
+
+    int nwritten_words=0;
+    for(it_w_lch_set=it_lch_set->second.begin(); 
+        it_w_lch_set!=it_lch_set->second.end(); 
+        it_w_lch_set++) {
+      WORD_DATA_TYPE word=it_w_lch_set->first;
+      COUNT_DATA_TYPE count=it_w_lch_set->second;
+      //cerr<<"     word: "<<word<<" count: "<<count<<"\n";
+      Compression::multibyte_write(word, os);
+      EndianDoubleUtil::write(os, count);
+      nwritten_words++;
+    }
+
+    //If there were less written words than expected
+    while (nwritten_words<n_words_per_set){
+      WORD_DATA_TYPE word=word2index[NULLWORD];
+      COUNT_DATA_TYPE count=0;
+      //cerr<<"     word: "<<word<<" count: "<<count<<"\n";
+      Compression::multibyte_write(word, os);
+      EndianDoubleUtil::write(os, count);
+      nwritten_words++;
+    }
+  }
+
+  //First we write the number of words to take into account
+  WORD_DATA_TYPE nwords2workwith=words.size();
+  Compression::multibyte_write(nwords2workwith, os);
+
+  set<wstring>::iterator sit;
+  for(sit=words.begin(); sit!=words.end(); sit++) {
+    WORD_DATA_TYPE word=word2index[*sit];
+    Compression::multibyte_write(word, os);
+    //cerr<<"word: "<<*sit<<"\n";
+  }
+}
+
+void 
+LexTorData::read_stopwords(wistream& is) {
+  while (!is.eof()) {
+    wstring w;
+    getline(is,w);
+    w=StringUtils::tolower(w);
+    if (w.length()>0) {
+      stopwords.insert(w);
+      wcerr<<L"stopword: "<<w<<L"\n";
+    }
+  }
+  n_stopwords=stopwords.size();
+  wcerr<<L"# stopwords read from file: "<<n_stopwords<<L"\n";
+}  
+
+void 
+LexTorData::read_words(wistream& is) {
+  while(!is.eof()) {
+    wstring w;
+    getline(is,w);
+    w=StringUtils::tolower(w);
+    if (w.length()>0) {
+      words.insert(w);
+      new_word_register(w);
+    }
+  }
+  n_set=words.size();
+  wcerr<<L"# words: "<<n_set<<L"\n";
+}
+
+void 
+LexTorData::read_lexical_choices(FSTProcessor& fstp) {
+  set<wstring>::iterator it;
+  int nlexchoices=0;
+
+  for(it=words.begin(); it!=words.end(); it++) {
+    LexTorWord ambiguousword(*it, &fstp);
+    nlexchoices+=ambiguousword.n_lexical_choices();
+
+    for(int i=0; i<ambiguousword.n_lexical_choices(); i++) {
+      lexical_choices[*it].insert(ambiguousword.get_lexical_choice(i,false));
+      //lexical_choices[*it].insert(reduce_lexical_choice(ambiguousword.get_lexical_choice(i,false)));
+    }
+  }
+
+  n_set=nlexchoices;
+
+  wcerr<<L"# lexical choices: "<<n_set<<L"\n";
+}
+
+set<wstring>
+LexTorData::get_words() {
+  return words;
+}
+
+set<wstring> 
+LexTorData::get_lexical_choices(const wstring& word) {
+  return lexical_choices[StringUtils::tolower(word)];
+}
+
+void 
+LexTorData::set_nwords_per_set(int i){
+  n_words_per_set=i;
+  wcerr<<L"# words per co-ocurrence model: "<<n_words_per_set<<L"\n";
+}
+
+void 
+LexTorData::set_cooccurrence_context(const wstring& lexical_choice, 
+                                     const vector<pair<wstring, COUNT_DATA_TYPE> >& context) {
+  wcerr<<L"Co-occurrence model for lexical_choice/word: "<<lexical_choice<<L"\n";
+
+  if (context.size()==0) {
+    wcerr<<L"Warning: co-occurrence model for lexical_choice/word: "<<lexical_choice<<L" is empty\n";
+    wcerr<<L"It seems that training corpus is too small or thematically homogeneous\n";
+    n_set--;
+  }
+
+  new_word_register(lexical_choice);
+
+  for (unsigned int i=0; ((i<n_words_per_set)&&(i<context.size())); i++) {
+    wcerr<<context[i].first<<L" "<<context[i].second<<L"\n";
+
+    new_word_register(context[i].first);
+
+    lexchoice_set[word2index[StringUtils::tolower(lexical_choice)]][word2index[StringUtils::tolower(context[i].first)]]=context[i].second;
+
+    //////wordcount[word2index[StringUtils::tolower(context[i].first)]]+=context[i].second;
+  }
+}
+
+void
+LexTorData::ensure_stopwords_ok() {
+  set<wstring>::iterator its, itw;
+  set<wstring> swaux;
+
+  //Notice that stopwords consist of lemma and first tag while words
+  //consist of lemma and one (the first one) or more tags
+
+  for(its=stopwords.begin(); its!=stopwords.end(); its++) {
+    bool is_ok=true;
+    for(itw=words.begin(); itw!=words.end(); itw++) {
+      //cerr<<"sw: "<<*its<<" w: "<<*itw<<"\n";
+      if (itw->find(*its)==0) {
+	wcerr<<L"Warning: Word '"<<*itw<<L"' for which co-ocurrence models will"
+	    <<L" be estimated is also a stopword. ";
+	wcerr<<L"Removing it from the stopwords list\n";
+	is_ok=false;
+	break;
+      }
+    }
+    if(is_ok)
+      swaux.insert(*its);
+  }
+
+  stopwords=swaux;
+
+  wcerr<<n_stopwords-stopwords.size()<<L" stopwords were discarded\n";
+
+  n_stopwords=stopwords.size();
+
+  wcerr<<L"# stopwords finally taken into account: "<<n_stopwords<<L"\n";
+}
+
+wstring 
+LexTorData::reduce(const wstring& s) {
+  wstring str;
+
+  if ((s.length()>0) && (s[0]=='^') && (s[s.length()-1]=='$'))
+    str=StringUtils::tolower(s.substr(1, s.length()-1));
+  else
+    str=StringUtils::tolower(s);
+
+  set<wstring>::iterator it;
+  for(it=words.begin(); it!=words.end(); it++) {
+    if (str.find(*it)==0) {
+      return (*it);
+    }
+  }
+
+  unsigned int p=str.find(L">");
+  unsigned int i=0;
+  if (p==static_cast<unsigned int>(wstring::npos)) { //s could correspond to an unknown word
+    p=str.length();
+    if ((str.length()>0) && (str[0]=='*'))
+      i=1; // to remove the star (unknown word mark)
+  }
+  else
+    p++;
+  
+  if (i>=p) {
+    wcerr<<L"Warning in LexTorData::reduce: input string: '"<<s<<L"', string after operation: '"<<str<<L"'\n";
+    wcerr<<L"begin index: "<<i<<", end index: "<<p<<L"\n";
+    i=0;
+  }
+
+  return str.substr(i,p);
+}
+
+wstring 
+LexTorData::reduce_lexical_choice(const wstring& s) {
+  wstring str;
+
+  if ((s.length()>0) && (s[0]=='^') && (s[s.length()-1]=='$'))
+    str=StringUtils::tolower(s.substr(1, s.length()-1));
+  else
+    str=StringUtils::tolower(s);
+
+  set<wstring>::iterator it;
+  for(it=reduced_lexical_choices.begin(); it!=reduced_lexical_choices.end(); it++) {
+    if (str.find(*it)==0) {
+      return (*it);
+    }
+  }
+
+  //return StringUtils::substitute(str," d<", " D<");
+
+  return str;
+}
+
+void 
+LexTorData::new_word_register(const wstring& word) {
+  wstring w=StringUtils::tolower(word);
+
+  if (word2index.find(w)==word2index.end()) {
+    index2word.push_back(w);
+    int ind=index2word.size()-1;
+    if (ind>MAX_WORD_INDEX) {
+      wcerr<<L"Error: The number of words to be considered is greater that the maximum allowed by\n";
+      wcerr<<L"the data type used to store words\n";
+      wcerr<<L"Edit source file LexTorData.H and change the WORD_DATA_TYPE define\n";
+      exit(EXIT_FAILURE);
+    }
+    word2index[w]=(WORD_DATA_TYPE)ind;
+    n_words=index2word.size();
+    wordcount[(WORD_DATA_TYPE)ind]=0;
+  }
+}
+
+/*
+vector<pair<WORD_DATA_TYPE, double> > 
+LexTorData::get_cooccurrence_vector(const string& lexical_choice) {
+  vector<pair<WORD_DATA_TYPE, double> > v;
+  WORD_DATA_TYPE ind_lexchoice=word2index[StringUtils::tolower(lexical_choice)];
+  map<WORD_DATA_TYPE, COUNT_DATA_TYPE>::iterator it;
+
+  for(it=lexchoice_set[ind_lexchoice].begin(); it!= lexchoice_set[ind_lexchoice].end(); it++) 
+    v.push_back(*it);
+  
+  return v;
+}
+*/
+
+
+double 
+LexTorData::get_module_lexchoice_vector(const wstring& lexical_choice) {
+  WORD_DATA_TYPE ind_lexchoice=word2index[StringUtils::tolower(lexical_choice)];
+  map<WORD_DATA_TYPE, COUNT_DATA_TYPE>::iterator it;
+
+  double module=0;
+
+  for(it=lexchoice_set[ind_lexchoice].begin(); it!= lexchoice_set[ind_lexchoice].end(); it++) 
+    module+=(it->second)*(it->second);
+
+  module=sqrt(module);
+
+  return module;
+}
+
+double 
+LexTorData::cosine(const wstring& reduced_lexch1, const wstring& reduced_lexch2) {
+  WORD_DATA_TYPE ind_lexchoice1=word2index[StringUtils::tolower(reduced_lexch1)];
+  WORD_DATA_TYPE ind_lexchoice2=word2index[StringUtils::tolower(reduced_lexch2)];
+  map<WORD_DATA_TYPE, COUNT_DATA_TYPE>::iterator it;
+
+  //We calculate the scalar product
+  double scalar_product=0;
+  for(it=lexchoice_set[ind_lexchoice1].begin(); it!= lexchoice_set[ind_lexchoice1].end(); it++) {
+    if (lexchoice_set[ind_lexchoice2].find(it->first)!=
+        lexchoice_set[ind_lexchoice2].end()) {
+      scalar_product+=(it->second)*lexchoice_set[ind_lexchoice2][it->first];
+    }
+  }
+
+  //We get the module of the lexchoice vectors, ||lexchoice vector||
+  double module_lexch1_vector=get_module_lexchoice_vector(reduced_lexch1);
+  double module_lexch2_vector=get_module_lexchoice_vector(reduced_lexch2);
+
+
+  if (module_lexch1_vector==0) {
+    if (LexTor::debug) {
+      wcerr<<L"Warning in LexTorData::cosine: module_lexch1_vector is equal zero.\n"
+	  <<L"The cosine cannot be compute\n";
+      wcerr<<L"reduced lexical choice: "<<reduced_lexch1<<L"\n";
+    }
+    return -2;
+  }
+
+  if (module_lexch2_vector==0) {
+    if (LexTor::debug) {
+      wcerr<<L"Warning in LexTorData::cosine: module_lexch2_vector is equal zero.\n"
+	  <<L"The cosine cannot be compute\n";
+      wcerr<<L"reduced lexical choice: "<<reduced_lexch2<<L"\n";
+    }
+    return -2;
+  }
+
+  return scalar_product/(module_lexch1_vector*module_lexch2_vector);
+}
Index: branches/apertium-tagger/apertium2/apertium/lextor_data.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lextor_data.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lextor_data.h	(revision 69632)
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
+ * author: Felipe S�nchez-Mart�nez
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __LEXTORDATA_H
+#define __LEXTORDATA_H
+
+#include <iostream>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <cmath>
+#include <cstdio>
+
+#include <lttoolbox/fst_processor.h>
+
+#define WORD_DATA_TYPE unsigned short
+#define MAX_WORD_INDEX (pow(2.0,(double)(sizeof(WORD_DATA_TYPE)*8))-1)
+
+#define COUNT_DATA_TYPE double
+
+#define NULLWORD L"NULLWORD"
+
+using namespace std;
+
+/** Class LexTorData. (Lexical Selector Data class)
+ */  
+
+class LexTorData{
+private:
+
+  WORD_DATA_TYPE n_stopwords;
+  WORD_DATA_TYPE n_words;
+  WORD_DATA_TYPE n_words_per_set;
+  WORD_DATA_TYPE n_set;
+
+  //For a give word (or lexical choice) its index is returned and vice versa
+  map<wstring, WORD_DATA_TYPE> word2index;
+  vector<wstring> index2word;
+
+  map<WORD_DATA_TYPE, COUNT_DATA_TYPE> wordcount;
+
+  //For a given lexical choice it contains the set of words it appears
+  //with, and for each co-appearing word, the number of times they
+  //co-appear
+  map<WORD_DATA_TYPE, map<WORD_DATA_TYPE, COUNT_DATA_TYPE> > lexchoice_set;
+
+  //For a given lexical choice it contains the sum of all co-appearing words
+  map<WORD_DATA_TYPE, COUNT_DATA_TYPE> lexchoice_sum;
+
+  //For a given lexical choice it contains its probability  
+  //map<WORD_DATA_TYPE, double> lexchoice_prob;
+
+  //Set of stopwords
+  set<wstring> stopwords;
+
+  //Set of words to work with
+  set<wstring> words;
+
+  //For a given word it contains its set of lexical-choices (when available)
+  map<wstring, set<wstring> > lexical_choices;
+  
+  set<wstring> reduced_lexical_choices;
+
+  void new_word_register(const wstring& w);
+public:
+
+  LexTorData();
+  
+  LexTorData(const LexTorData& ltd);
+  
+  ~LexTorData();
+
+  COUNT_DATA_TYPE vote_from_word(const wstring& lexical_choice, const wstring& word);
+
+  //double get_lexchoice_prob(const string& lexical_choice);
+
+  COUNT_DATA_TYPE get_lexchoice_sum(const wstring& lexical_choice);
+
+  void set_wordcount(const wstring& word, COUNT_DATA_TYPE c);
+  COUNT_DATA_TYPE get_wordcount(const wstring& word);
+
+  void set_lexchoice_sum(const wstring& lexical_choice, COUNT_DATA_TYPE sum);
+
+  bool is_stopword(const wstring& word);
+
+  void read(FILE *is);
+
+  void write(FILE *os);
+
+  void read_stopwords(wistream& is);
+
+  void read_words(wistream& is);
+
+  void read_lexical_choices(FSTProcessor& fstp);
+
+  void set_nwords_per_set(int i);
+
+  void set_cooccurrence_context(const wstring& lexical_choice, 
+                                const vector<pair<wstring, COUNT_DATA_TYPE> >& context);
+
+  //vector<pair<WORD_DATA_TYPE, double> >
+  //get_cooccurrence_vector(const string& lexical_choice);
+  double get_module_lexchoice_vector(const wstring& lexical_choice);
+
+  double cosine(const wstring& reduced_lexch1, const wstring& reduced_lexch2);
+
+  set<wstring> get_words();
+
+  set<wstring> get_lexical_choices(const wstring& word);
+
+  //Used to ensure that none of the stopwords are in the set 
+  //of words from which co-occurrence models are being estimated
+  void ensure_stopwords_ok();
+
+  //Given a word in the apertium format  the lemma and the fisrt tag
+  //are returned (both in lower case) if possible
+  wstring reduce(const wstring& s);
+
+  wstring reduce_lexical_choice(const wstring& s);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/lextor_eval.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lextor_eval.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lextor_eval.cc	(revision 69632)
@@ -0,0 +1,144 @@
+/*
+ * Copyright (C) 2004-2006 Felipe S�nchez-Mart�nez
+ * Copyright (C) 2006 Universitat d'Alacant
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <iostream>
+#include <cmath>
+#include <apertium/lextor_eval.h>
+#include <apertium/lextor.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+LexTorEval::LexTorEval(wistream* iref) {
+  nwords=0;
+  //nunknown=0;
+  nignored=0;
+  npol=0;
+  //nerrors_nopol=0;
+  nerrors_pol=0;
+  //nerrors_unk=0;
+
+  ndefault=0;
+
+  refer=iref;
+
+  //words2ignore.insert();
+  words2ignore.insert(L"as<cnjadv>");
+  words2ignore.insert(L"at<pr>");
+  words2ignore.insert(L"before<pr>");
+  words2ignore.insert(L"but<cnjcoo>");
+  words2ignore.insert(L"by<pr>");
+  words2ignore.insert(L"for<pr>");
+  words2ignore.insert(L"how<adv>");
+  words2ignore.insert(L"in<pr>");
+  words2ignore.insert(L"just<adv>");
+  words2ignore.insert(L"off<pr>");
+  words2ignore.insert(L"on<pr>");
+  words2ignore.insert(L"over<pr>");
+  words2ignore.insert(L"right<adv>");
+  words2ignore.insert(L"since<cnjadv>");
+  words2ignore.insert(L"whether<cnjadv>");
+}
+
+LexTorEval::~LexTorEval() {
+}
+
+void 
+LexTorEval::print_evaluation() {
+  wcerr<<L"# of words.......................................... "<<nwords<<L"\n"
+      <<L"# of ignored words.................................. "<<nignored<<L"\n"
+      <<L"# of polisemous words............................... "<<npol<<L"\n"
+      <<L"# of errors due to polisemous words................. "<<nerrors_pol<<L"\n"
+      <<L"# of times context does not discriminate (NODIS).... "<<ndefault<<L"\n"
+      <<L"% of polysemous words............................... "<<(npol/nwords)*100.0<<L" %\n"
+      <<L"% of error over polisemous words ................... "<<(nerrors_pol/npol)*100.0<<L" %\n"
+      <<L"% of error over all words .......................... "<<(nerrors_pol/nwords)*100.0<<L" %\n"
+      <<L"% of NODIS.......................................... "<<(ndefault/npol)*100.0<<L" %\n";
+  wcerr<<L"NOTE: # ignored words ARE NOT included in the rest of counts\n";
+
+  return;
+
+  wcerr<<L"\nReport by words:\n---------------------------------------\n";
+
+  map<wstring, double>::iterator it;
+  wcerr<<L"WORD\t\tOCCURR\tERROR\tDEFAULT\t%ERROR\t%DEFAULT\n";
+  wcerr<<L"-----------------------------------------------------------------\n";
+  for(it=nwords_per_word.begin(); it!=nwords_per_word.end(); it++) {
+    wcerr<<it->first<<L"\t"<<it->second<<L"\t"<<nerrors_per_word[it->first]<<L"\t"
+	<<ndefault_per_word[it->first]<<L"\t"<<(nerrors_per_word[it->first]/it->second)*100<<L"\t"
+	<<(ndefault_per_word[it->first]/it->second)*100<<L"\n";
+  }
+}
+
+void 
+LexTorEval::evalword(LexTorWord& ltword, int winner, LexTorData* lextor_data) {
+  wstring reduced_w=lextor_data->reduce(ltword.get_lexical_choice(winner,false));
+  wstring word=lextor_data->reduce(ltword.get_word_string());
+  wstring wref;
+  wstring reduced_wref;
+  bool ignore=false;
+
+  getline(*refer,wref);
+
+  //  if (words2ignore.find(word)!=words2ignore.end()) {
+  //  return;
+  //}
+
+  if (wref.find(L">__IGNORE") != wstring::npos) 
+    ignore=true;
+  
+  if (!ignore) {
+    nwords+=1.0;
+    reduced_wref=lextor_data->reduce(wref);
+    if (ltword. n_lexical_choices()>1) {
+      npol+=1.0;
+      nwords_per_word[word]+=1.0;
+      if (winner<0) {
+	ndefault+=1.0;
+	ndefault_per_word[word]+=1.0;
+      }
+      if (reduced_w!=reduced_wref) {
+	nerrors_pol+=1.0;
+	nerrors_per_word[word]+=1.0;
+	if (LexTor::debug) {
+	  wcerr<<L"========= ERROR\n";
+	}
+      } else {
+	if (LexTor::debug) {
+	  wcerr<<L"========= OK\n";
+	}
+      }
+    } else {
+      //if (LexTor::debug)
+      //  cerr<<"EVAL: reduced_w="<<reduced_w<<" reduced_wref="<<reduced_wref<<"\n";
+
+      if(reduced_wref!=reduced_w) {
+	wcerr<<L"Error: Input and reference corpora are not aligned\n";
+	wcerr<<L"word="<<reduced_w<<L" ref. word="<<reduced_wref<<L"\n";
+	wcerr<<L"Number of words: "<<nwords+nignored<<L"\n";
+	exit(EXIT_FAILURE);
+      }
+    }
+  } else {
+    //reduced_wref=wref;
+    nignored+=1.0;
+    if (LexTor::debug) {
+      cerr<<L"========= IGNORED\n";
+    }
+
+  }
+}
Index: branches/apertium-tagger/apertium2/apertium/lextor_eval.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lextor_eval.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lextor_eval.h	(revision 69632)
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2004-2006 Felipe S�nchez-Mart�nez
+ * Copyright (C) 2006 Universitat d'Alacant
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __LEXTOR_EVAL_H
+#define __LEXTOR_EVAL_H
+
+#include <string>
+#include <vector>
+#include <set>
+#include <map>
+#include <istream>
+#include <apertium/lextor_data.h>
+#include <apertium/lextor_word.h>
+
+using namespace std;
+
+class LexTorEval {
+private:
+
+  double nwords;
+  //double nunknown;
+  double nignored;
+  double npol;
+  //double nerrors_nopol;
+  double nerrors_pol;
+  //double nerrors_unk;
+
+  double ndefault;
+
+  map<wstring, double> nwords_per_word;
+  map<wstring, double> nerrors_per_word;
+  map<wstring, double> ndefault_per_word;
+
+  wistream* refer;
+
+  set<wstring> words2ignore;
+public:  
+ 
+  LexTorEval(wistream *iref);
+
+  ~LexTorEval();
+
+  void evalword(LexTorWord& ltword, int winner, LexTorData* lextor_data);
+
+  void print_evaluation();
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/lextor_word.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lextor_word.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lextor_word.cc	(revision 69632)
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
+ * author: Felipe S�nchez-Mart�nez
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <apertium/lextor_word.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+LexTorWord::LexTorWord() {
+  ignored_string = L"";
+  word = L"";
+  default_choice = 0;
+}
+  
+LexTorWord::LexTorWord(const LexTorWord& ltw) {
+  word=ltw.word;
+  ignored_string=ltw.ignored_string;
+  lexical_choices=ltw.lexical_choices;
+  default_choice=ltw.default_choice;
+}
+
+LexTorWord::LexTorWord(const wstring &str, FSTProcessor *fstp) {
+  word=str;
+  ignored_string=L"";
+  extract_lexical_choices(fstp);
+}
+  
+LexTorWord::~LexTorWord() {
+}
+
+wstring 
+LexTorWord::get_word_string() {
+  return word;
+}
+
+int 
+LexTorWord::n_lexical_choices() {
+  return lexical_choices.size();
+}
+
+wstring 
+LexTorWord::get_lexical_choice(int choice, bool include_ignored) {
+  if (word == L"") {
+    if (include_ignored)
+      return ignored_string;
+    else 
+      return L"";
+  }
+
+  if (choice<0)
+    choice=default_choice;
+
+  if (choice>=(int)lexical_choices.size()) {
+    wcerr<<L"Error in LexTorWord::get_lexical_choice, choice position given is "
+	<<L"greater than the number of choices available\n";
+    wcerr<<L"position requested: "<<choice<<"\n";
+    wcerr<<L"number of lexical choices: "<<lexical_choices.size()<<"\n";
+    wcerr<<L"Returning default choice\n";
+    choice=default_choice;
+  }
+
+  if (include_ignored)
+    return ignored_string+L"^"+lexical_choices[choice]+L"$";
+  else
+    return lexical_choices[choice];
+}
+
+wstring 
+LexTorWord::translate(FSTProcessor& bildic, int lexchoice) {
+  wstring t;
+
+  //cerr<<"In LexTorWord::translate, called with: "<<word<<", lexchoice: "<<lexchoice<<"\n";
+
+  if (lexchoice<0)
+    lexchoice=default_choice;
+  else if(lexchoice>=(int)lexical_choices.size()) {
+    wcerr<<L"Error in LexTorWord::translate, choice position given is "
+	 <<L"greater than the number of choices available\n";
+    wcerr<<L"position requested: "<<lexchoice<<"\n";
+    wcerr<<L"number of lexical choices: "<<lexical_choices.size()<<"\n";
+    wcerr<<L"Returning default choice\n";
+    lexchoice=default_choice;
+  }
+
+  t=bildic.biltrans(lexical_choices[lexchoice], false);
+
+  //cerr<<"Translation: "<<t<<"\n";
+
+  return t;
+}
+
+void 
+LexTorWord::extract_lexical_choices(FSTProcessor *fstp) {
+
+  lexical_choices=StringUtils::split_wstring(fstp->biltrans(word,false), L"/");
+  default_choice=0;
+
+  if (lexical_choices.size()>1) { //lexically ambiguous word
+    for(unsigned int i=0; i<lexical_choices.size(); i++) {
+
+      unsigned int p=lexical_choices[i].find(L" D<");
+      if (p!=static_cast<unsigned int>(string::npos)) {
+	if (!((lexical_choices[i].length()>p+2) && (lexical_choices[i][p+2]=='<'))) {
+	  wcerr<<L"Error in LexTorWord::next_word when analyzing lexical options\n";
+	  wcerr<<L"Word: "<<word<<"; lexical choices: "<<fstp->biltrans(word,false)<<L"\n";
+	  exit(EXIT_FAILURE);
+	}
+	default_choice=i;
+      }
+    }
+  }
+}
+
+LexTorWord* 
+LexTorWord::next_word(wistream& is, FSTProcessor *fstp) {
+  LexTorWord w;
+  wchar_t c, prev_c=L' ';
+  bool finish=false;
+  bool reading_word=false;
+
+  while (!finish) {
+    is>>c;
+
+    if (is.fail()) {
+      if (reading_word) {
+	wcerr<<L"Error in LexTorWord::next_word while reading input word\n";
+	wcerr<<L"Malformed input string, at '"<<c<<L"'\n";
+	exit(EXIT_FAILURE);
+      } else {
+	if ((w.word.length()>0)||(w.ignored_string.length()>0)) {
+	  if(fstp!=NULL)
+	    w.extract_lexical_choices(fstp);
+	  return new LexTorWord(w);
+	} else 
+	  return NULL;
+      }
+    }
+
+    if ((c==L'^') && (prev_c!=L'\\') && (!reading_word)) {
+      reading_word=true;
+    } else if ((c==L'$') && (prev_c!=L'\\') && (reading_word)) {
+      finish=true;
+    } else {
+      if (reading_word)
+	w.word+=c;
+      else
+	w.ignored_string+=c;
+    }
+    prev_c=c;
+  }
+
+  if ((w.word.length()==0) && (w.ignored_string.length()==0))
+    return NULL;
+
+  if(fstp!=NULL)
+    w.extract_lexical_choices(fstp);
+
+  /*
+    cerr<<"word: "<<w.word<<"\n";
+    for (unsigned int i=0; i<w.lexical_choices.size(); i++) {
+    cerr<<"Lex choice at "<<i<<": "<<w.lexical_choices[i]<<"\n";
+    }
+    cerr<<"Default: "<<w.default_choice<<"\n\n";
+  */
+
+  return new LexTorWord(w);
+}
+
Index: branches/apertium-tagger/apertium2/apertium/lextor_word.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/lextor_word.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/lextor_word.h	(revision 69632)
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
+ * author: Felipe S�nchez-Mart�nez
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __LEXTORWORD_H
+#define __LEXTORWORD_H
+
+#include <iostream>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <lttoolbox/fst_processor.h>
+#include <apertium/lextor_data.h>
+
+using namespace std;
+
+/** Class LexTorWord. (Lexical Selector Word)
+ */  
+
+class LexTorWord{
+private:
+  wstring word;
+  wstring ignored_string;
+  vector<wstring> lexical_choices;
+  int default_choice;
+  
+  void extract_lexical_choices(FSTProcessor *fstp);
+public:
+
+  LexTorWord();
+  
+  LexTorWord(const LexTorWord& ltw);
+
+  LexTorWord(const wstring& str, FSTProcessor *fstp);
+  
+  ~LexTorWord();
+
+  /** Return the lexical choice at position 'choice', if 'choice' is not
+   *  given the default one is returned
+   */
+  wstring get_lexical_choice(int choice=-1, bool include_ignored=true);
+
+  /** Returns the number of lexical choices for this word 
+   */
+  int n_lexical_choices();
+
+  wstring get_word_string();
+
+  wstring translate(FSTProcessor& bildic, int choice=-1);
+
+
+  /** When calling this method the set of lexical choice for each word
+   *  will be extracted from the FSTProcessor object if present.
+   *  Moreover the input stream (is) is supossed to be in the
+   *  intermediate format used by the apertium MT system.
+   */
+  static LexTorWord* next_word(wistream& is, FSTProcessor *fstp=NULL);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/morpho_stream.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/morpho_stream.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/morpho_stream.cc	(revision 69632)
@@ -0,0 +1,402 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+/** 
+ *  Word class and MorphoStream class definitions
+ *
+ *  @author	Felipe S�nchez-Mart�nez 
+ */
+
+#include <apertium/morpho_stream.h>
+#include <apertium/constant_manager.h>
+#include <vector>
+#include <apertium/string_utils.h>
+#include "apertium_config.h"
+#include <apertium/unlocked_cstdio.h>
+
+using namespace Apertium;
+MorphoStream::MorphoStream(FILE *ftxt, bool d, TaggerData *t)
+{
+  foundEOF = false;
+  debug=d;
+  td = t;
+  me = td->getPatternList().newMatchExe();
+  alphabet = td->getPatternList().getAlphabet();
+  input = ftxt;
+  ca_any_char = alphabet(PatternList::ANY_CHAR);
+  ca_any_tag = alphabet(PatternList::ANY_TAG);
+  
+  ConstantManager &constants = td->getConstants();
+  ca_kignorar = constants.getConstant(L"kIGNORAR");
+  ca_kbarra = constants.getConstant(L"kBARRA");
+  ca_kdollar = constants.getConstant(L"kDOLLAR");
+  ca_kbegin = constants.getConstant(L"kBEGIN");
+  ca_kmot = constants.getConstant(L"kMOT");
+  ca_kmas = constants.getConstant(L"kMAS");
+  ca_kunknown = constants.getConstant(L"kUNKNOWN");
+  
+  map<wstring, int, Ltstr> &tag_index = td->getTagIndex();
+  ca_tag_keof = tag_index[L"TAG_kEOF"];  
+  ca_tag_kundef = tag_index[L"TAG_kUNDEF"]; 
+
+  end_of_file = false;
+  null_flush = false;
+}
+
+MorphoStream::~MorphoStream() 
+{
+  delete me;
+}
+
+TaggerWord *
+MorphoStream::get_next_word()
+{
+  if(vwords.size() != 0)
+  {
+    TaggerWord* word=vwords.front();
+    vwords.erase(vwords.begin());
+    
+    if(word->isAmbiguous())
+    {
+      vector<wstring> &ref = td->getDiscardRules();
+      for(unsigned int i = 0; i < ref.size(); i++)
+      {
+        word->discardOnAmbiguity(ref[i]);
+      }
+    }
+//    cout << *word << endl;
+    return word;
+  }
+
+  if(feof(input))
+  {
+    return NULL;
+  }
+  
+  int ivwords = 0;
+  vwords.push_back(new TaggerWord());
+
+  while(true)
+  {
+    int symbol = fgetwc_unlocked(input);
+    if(feof(input) || (null_flush && symbol == L'\0'))
+    {
+      end_of_file = true;
+      vwords[ivwords]->add_tag(ca_tag_keof, L"", td->getPreferRules());
+      return get_next_word();
+    }
+    if(symbol == L'^')
+    {
+      readRestOfWord(ivwords);
+      return get_next_word();
+    }
+    else
+    {
+      wstring str = L"";
+      if(symbol == L'\\')
+      {
+        symbol = fgetwc_unlocked(input);
+        str += L'\\';
+        str += static_cast<wchar_t>(symbol);
+        symbol = L'\\';
+      }
+      else
+      {
+        str += static_cast<wchar_t>(symbol);
+      }
+      
+      while(symbol != L'^')
+      {
+	symbol = fgetwc_unlocked(input);
+	if(feof(input) || (null_flush && symbol == L'\0'))
+	{
+	  end_of_file = true;
+	  vwords[ivwords]->add_ignored_string(str);
+          vwords[ivwords]->add_tag(ca_tag_keof, L"", td->getPreferRules());
+	  return get_next_word();
+	}
+	else if(symbol == L'\\')
+	{
+	  str += L'\\';
+          symbol = fgetwc_unlocked(input);
+	  if(feof(input) || (null_flush && symbol == L'\0'))
+	  {
+	    end_of_file = true;
+	    vwords[ivwords]->add_ignored_string(str);
+            vwords[ivwords]->add_tag(ca_tag_keof, L"", td->getPreferRules());
+	    return get_next_word();
+	  }
+	  str += static_cast<wchar_t>(symbol);
+	  symbol = L'\\';
+	}
+	else if(symbol == L'^')
+	{
+	  if(str.size() > 0)
+	  {
+	    vwords[ivwords]->add_ignored_string(str);
+          }
+	  readRestOfWord(ivwords);
+	  return get_next_word();
+	}
+        else
+	{
+	  str += static_cast<wchar_t>(symbol);
+	}
+      }
+    }
+  }
+}
+
+void
+MorphoStream::lrlmClassify(wstring const &str, int &ivwords)
+{
+  int floor = 0;
+  int last_type = -1;
+  int last_pos = 0;
+
+  ms.init(me->getInitial());
+  for(int i = 0, limit = str.size(); i != limit; i++)
+  {
+    if(str[i] != L'<')
+    {
+      if(str[i] == L'+')
+      {
+        int val = ms.classifyFinals(me->getFinals());
+        if(val != -1)
+        {
+          last_pos = i-1;
+          last_type = val;
+        }
+      }
+      ms.step(towlower(str[i]), ca_any_char);
+    }
+    else
+    {
+      wstring tag = L"";
+      for(int j = i+1; j != limit; j++)
+      {
+        if(str[j] == L'\\')
+        {
+ 	  j++;
+        }
+        else if(str[j] == L'>')
+        {
+ 	  tag = str.substr(i, j-i+1);
+	  i = j;
+          break;
+        }
+      }
+
+      int symbol = alphabet(tag);
+      if(symbol)
+      {
+        ms.step(symbol, ca_any_tag);
+      }
+      else
+      {
+        ms.step(ca_any_tag);
+      }
+    }
+
+    if(ms.size() == 0)
+    {
+      if(last_pos != floor)
+      {
+        vwords[ivwords]->add_tag(last_type, 
+                                 str.substr(floor, last_pos - floor + 1),
+                                 td->getPreferRules());
+	if(str[last_pos+1] == L'+' && last_pos+1 < limit )
+	{	
+	  floor = last_pos + 1;
+	  last_pos = floor;
+          vwords[ivwords]->set_plus_cut(true); 
+          if (((int)vwords.size())<=((int)(ivwords+1)))
+            vwords.push_back(new TaggerWord(true));
+          ivwords++;
+	  ms.init(me->getInitial());
+	}
+	i = floor++;
+      }
+      else
+      {
+        if (debug)
+        {
+	  wcerr<<L"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<L"'\n";
+          wcerr<<L"         This is because of an incomplete tagset definition or a dictionary error\n";
+	}
+        vwords[ivwords]->add_tag(ca_tag_kundef, str.substr(floor) , td->getPreferRules());
+	return;
+      }
+    }
+    else if(i == limit - 1)
+    {
+      if(ms.classifyFinals(me->getFinals()) == -1)
+      {
+	if(last_pos != floor)
+	{
+	  vwords[ivwords]->add_tag(last_type, 
+                                   str.substr(floor, last_pos - floor + 1),
+                                   td->getPreferRules());
+          if(str[last_pos+1] == L'+' && last_pos+1 < limit )
+          {	
+            floor = last_pos + 1;
+	    last_pos = floor;
+            vwords[ivwords]->set_plus_cut(true); 
+            if (((int)vwords.size())<=((int)(ivwords+1)))
+              vwords.push_back(new TaggerWord(true));
+            ivwords++;
+            ms.init(me->getInitial());
+	  }
+	  i = floor++;
+        }
+        else
+        {
+          if (debug)
+          {
+	    wcerr<<L"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<L"'\n";
+            wcerr<<L"         This is because of an incomplete tagset definition or a dictionary error\n";
+	  }
+          vwords[ivwords]->add_tag(ca_tag_kundef, str.substr(floor) , td->getPreferRules());
+	  return;
+        }
+      }
+    }
+  }
+  
+  int val = ms.classifyFinals(me->getFinals());
+  if(val == -1)
+  {
+    val = ca_tag_kundef;
+    if (debug)
+    {
+      wcerr<<L"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<L"'\n";
+      wcerr<<L"         This is because of an incomplete tagset definition or a dictionary error\n";
+    }
+
+  }    
+  vwords[ivwords]->add_tag(val, str.substr(floor), td->getPreferRules());
+}
+
+void
+MorphoStream::readRestOfWord(int &ivwords)
+{
+  // first we have the superficial form
+  wstring  str = L"";
+  
+  while(true)
+  {
+    int symbol = fgetwc_unlocked(input);
+    if(feof(input) || (null_flush && symbol == L'\0'))
+    {
+      end_of_file = true;
+      if(str.size() > 0)
+      {
+        vwords[ivwords]->add_ignored_string(str);
+        wcerr<<L"Warning (internal): kIGNORE was returned while reading a word\n";
+        wcerr<<L"Word being read: "<<vwords[ivwords]->get_superficial_form()<<L"\n";
+        wcerr<<L"Debug: "<< str <<L"\n";
+      }
+      vwords[ivwords]->add_tag(ca_tag_keof, L"", td->getPreferRules());
+      return;
+    }
+    else if(symbol == L'\\')
+    {
+      symbol = fgetwc_unlocked(input);
+      str += L'\\';
+      str += static_cast<wchar_t>(symbol);
+    }
+    else if(symbol == L'/')
+    {
+      vwords[ivwords]->set_superficial_form(str); 
+      str = L"";
+      break;
+    }
+    else if(symbol == L'$')
+    {
+      vwords[ivwords]->set_superficial_form(str);
+      vwords[ivwords]->add_ignored_string(L"$");
+      break;
+    }
+    else
+    {
+      str += static_cast<wchar_t>(symbol);
+    }
+  }
+
+  // then we read the acceptions
+
+  while(true)
+  {
+    int symbol = fgetwc_unlocked(input);
+    if(feof(input) || (null_flush && symbol == L'\0'))
+    {
+      end_of_file = true;
+      if(str.size() > 0)
+      {
+        vwords[ivwords]->add_ignored_string(str);
+        wcerr<<L"Warning (internal): kIGNORE was returned while reading a word\n";
+        wcerr<<L"Word being read: "<<vwords[ivwords]->get_superficial_form()<<L"\n";
+        wcerr<<L"Debug: "<< str <<L"\n";
+      }
+      vwords[ivwords]->add_tag(ca_tag_keof, L"", td->getPreferRules());
+      return;
+    }
+    else if(symbol == L'\\')
+    {
+      symbol = fgetwc_unlocked(input);
+      str += L'\\';
+      str += static_cast<wchar_t>(symbol);
+      symbol = L'\\';  // to prevent exiting with '\$'
+    }
+    else if(symbol == L'/')
+    {
+      lrlmClassify(str, ivwords);
+      str = L"";
+      ivwords = 0;
+      continue;
+    }
+    else if(symbol == L'$')
+    {
+      if(str[0] != L'*')// do nothing with unknown words 
+      {
+	lrlmClassify(str, ivwords);
+      }
+      return;
+    }
+    else
+    {
+      str += static_cast<wchar_t>(symbol);
+    }    
+  }
+}
+
+void
+MorphoStream::setNullFlush(bool nf)
+{
+  null_flush = nf;
+}
+
+bool
+MorphoStream::getEndOfFile(void)
+{
+  return end_of_file;
+}
+
+void
+MorphoStream::setEndOfFile(bool eof)
+{
+  end_of_file = eof;
+}
Index: branches/apertium-tagger/apertium2/apertium/morpho_stream.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/morpho_stream.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/morpho_stream.h	(revision 69632)
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+/** 
+ *  Word class and MorphoStream class definitions
+ *
+ *  @author	Felipe S�nchez-Mart�nez 
+ */
+
+#ifndef __MORPHOSTREAM_H
+#define __MORPHOSTREAM_H
+
+#include <apertium/constant_manager.h>
+#include <lttoolbox/match_exe.h>
+#include <lttoolbox/match_state.h>
+#include <apertium/tagger_data.h>
+#include <apertium/tagger_word.h>
+
+#include <cstdio>
+#include <deque>
+#include <iostream>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+using namespace std;
+
+/** Class MorphoStream.  
+ *  This class processes the output of class  yyFlexLexer (lex.yy.cc), and 
+ *  builds the TaggerWord objects managed by the tagger 
+ */
+class MorphoStream {
+private:
+  bool foundEOF;
+  wstring last_string_tag;
+  bool debug;
+  FILE *input;
+  int ca_any_char;
+  int ca_any_tag;
+  int ca_kignorar;
+  int ca_kbarra;
+  int ca_kdollar;
+  int ca_kbegin;
+  int ca_kmot;
+  int ca_kmas;
+  int ca_kunknown;
+  int ca_tag_keof;
+  int ca_tag_kundef;
+
+  vector<TaggerWord *> vwords; //Vector used to implement a buffer
+                             //to treat ambiguous multiword units
+
+  MatchExe *me;
+  TaggerData *td;
+  Alphabet alphabet;
+  MatchState ms;
+
+  bool null_flush;
+  bool end_of_file;
+
+  void readRestOfWord(int &ivwords);
+  void lrlmClassify(wstring const &str, int &ivwords);
+public:
+
+   /** Constructor 
+    *  @param is the input stream.
+    */
+   MorphoStream(FILE *ftxt, bool d, TaggerData *t);
+  
+   /** 
+    *  Destructor 
+    */
+   ~MorphoStream();
+  
+   /** Get next word in the input stream
+    *  @return  A pointer to the next word in the input stream 
+    */
+   TaggerWord* get_next_word();  
+   
+   /** 
+    * Set up the flag to detect '\0' characters
+    * @param nf the null_flush value
+    */
+   void setNullFlush(bool nf);
+   
+   /**
+    * Return true if the last reading is end of file of '\0' when null_flush 
+    * is true
+    * @returns the value of end_of_file
+    */
+   bool getEndOfFile(void);
+   
+   /**
+    * Sets a new value for the end_of_file_flag
+    * @param eof the new value for end_of_file
+    */
+   void setEndOfFile(bool eof);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/new2old.xsl
===================================================================
--- branches/apertium-tagger/apertium2/apertium/new2old.xsl	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/new2old.xsl	(revision 69632)
@@ -0,0 +1,151 @@
+<?xml version="1.0" encoding="ISO-8859-1"?><!-- -*- xml-*- -->
+<!--
+ Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+-->
+<!--
+==========================================================================
+| File..........: convert.xsl
+| Author........: Marco A. Montava
+| Date..........: 29-Jul-2006
+| Description...: Conversor de diccionaris amb polisemia a tractament simple
+==========================================================================
+-->
+
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<xsl:output method="xml" encoding="ISO-8859-1" indent="no"/>
+
+<xsl:template match="/">
+  <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+  <xsl:apply-templates select="dictionary"/>
+</xsl:template>
+
+<xsl:template match="dictionary">
+  <dictionary><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+  <xsl:copy-of select="alphabet"/> <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+  <xsl:copy-of select="sdefs"/> <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+  <xsl:copy-of select="pardefs"/> <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+  <xsl:apply-templates select="section"/>
+  </dictionary><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+</xsl:template>
+
+<xsl:template match="section">
+  <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+  <section id='{@id}' type='{@type}'><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+  <xsl:apply-templates />
+  </section><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+</xsl:template>
+
+
+
+
+<xsl:template match="e[@slr|@srl]"> <!-- si te problemes de polisemia -->
+  <xsl:choose>
+     <!-- si te conflicte L-R y R-L -->
+     <xsl:when test="./@slr!='' and ./@srl!='' ">
+         <xsl:choose>
+         <!-- si es per Defecte R-L i L-R-->
+         <xsl:when test="substring(./@srl,(string-length(./@srl)-1),2)=' D' and substring(./@slr,(string-length(./@slr)-1),2)=' D' ">
+            <e><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+               <xsl:copy-of select="*"/>
+            <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+            </e>
+         </xsl:when>
+         <!-- si es per Defecte L-R -->
+         <xsl:when test="substring(./@slr,(string-length(./@slr)-1),2)=' D'">
+            <e r="LR"><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+               <xsl:copy-of select="*"/>
+            <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+            </e>
+         </xsl:when>
+         <!-- si es per Defecte R-L -->
+         <xsl:when test="substring(./@srl,(string-length(./@srl)-1),2)=' D'">
+            <e r="RL"><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+               <xsl:copy-of select="*"/>
+            <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+            </e>
+         </xsl:when>
+         <!-- **** si no es per defecte cap, llavors s'elimina, s'ignora **** -->
+          </xsl:choose>
+      </xsl:when>
+      <!-- si sols te conflicte R-L -->
+      <xsl:when test="./@srl!='' ">
+         <xsl:choose>
+         <!-- si es la solucio per Defecte R-L i te restriccio RL-->
+         <xsl:when test="substring(./@srl,(string-length(./@srl)-1),2)=' D' and ./@r='RL'">
+            <e r="RL"><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+               <xsl:copy-of select="*"/>
+            <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+            </e>
+         </xsl:when>
+        <!-- si es la solucio per Defecte R-L -->
+         <xsl:when test="substring(./@srl,(string-length(./@srl)-1),2)=' D' ">
+            <e><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+               <xsl:copy-of select="*"/>
+            <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+            </e>
+         </xsl:when>
+         <!-- si no es la solucio per Defecte R-L i te restriccio RL-->
+         <xsl:when test="@r='RL' ">
+            <!-- L'ELIMINEM -->
+         </xsl:when>
+         <!-- si no es la solucio per Defecte R-L -->
+         <xsl:otherwise>
+            <e r="LR"><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+               <xsl:copy-of select="*"/>
+            <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+            </e>
+         </xsl:otherwise>
+         </xsl:choose>
+      </xsl:when>
+      <!-- si sols te conflicte L-R -->
+      <xsl:when test="./@slr!=''">
+         <xsl:choose>
+         <!-- si es la solucio per Defecte L-R i te restriccio LR-->
+         <xsl:when test="substring(./@slr,(string-length(./@slr)-1),2)=' D' and ./@r='LR'">
+            <e r="LR"><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+               <xsl:copy-of select="*"/>
+            <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+            </e>
+         </xsl:when>
+        <!-- si es la solucio per Defecte L-R -->
+         <xsl:when test="substring(./@slr,(string-length(./@slr)-1),2)=' D' ">
+            <e><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+               <xsl:copy-of select="*"/>
+            <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+            </e>
+         </xsl:when>
+         <!-- si no es la solucio per Defecte L-R i te restriccio LR-->
+         <xsl:when test="@r='LR' ">
+            <!-- L'ELIMINEM -->
+         </xsl:when>
+         <!-- si no es la solucio per Defecte L-R -->
+         <xsl:otherwise>
+            <e r="RL"><xsl:value-of select="string('&#xa;')"/><!-- \n -->
+               <xsl:copy-of select="*"/>
+            <xsl:value-of select="string('&#xa;')"/><!-- \n -->
+            </e>
+         </xsl:otherwise>
+         </xsl:choose>
+      </xsl:when>
+  </xsl:choose>    
+</xsl:template>
+
+<xsl:template match="e[not(@slr|@srl)]">  <!-- elements sense polisemia -->
+  <xsl:copy-of select ="."/>
+</xsl:template>
+
+
+</xsl:stylesheet>
\ No newline at end of file
Index: branches/apertium-tagger/apertium2/apertium/postchunk.dtd
===================================================================
--- branches/apertium-tagger/apertium2/apertium/postchunk.dtd	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/postchunk.dtd	(revision 69632)
@@ -0,0 +1,434 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!-- 
+   Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+  
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+  
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+  
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+    Draft of DTD for the structural transfer rule files 
+     
+    Sergio Ortiz, Gema Ram�rez-S�nchez, Mireia Ginest�, Mikel L. Forcada, 
+    2005.07.29. 
+-->    
+
+<!ENTITY % condition "(and|or|not|equal|begins-with|begins-with-list|ends-with|ends-with-list|contains-substring|in)">
+<!ENTITY % container "(var|clip)">
+<!ENTITY % sentence "(let|out|choose|modify-case|call-macro|append)">
+<!ENTITY % value "(b|clip|lit|lit-tag|var|get-case-from|case-of|concat|lu-count|lu|mlu)">
+<!ENTITY % stringvalue "(clip|lit|var|get-case-from|case-of|lu-count)">
+
+<!ELEMENT postchunk (section-def-cats, section-def-attrs, section-def-vars, section-def-lists?, section-def-macros?, section-rules)>
+<!-- 
+     'postchunk' is the root element containing the whole structural
+     postchunk rule file.  
+-->
+
+<!ELEMENT section-def-cats (def-cat+)>
+<!-- 
+     The 'def-cats' section defines the categories used to build the
+patterns used in rules
+ -->
+
+<!ELEMENT def-cat (cat-item+)>
+<!ATTLIST def-cat  n ID #REQUIRED
+                   c CDATA #IMPLIED>
+<!-- 
+     Each 'def-cat' defines one category in terms of a list of
+     category items and has a unique name 'n', which is mandatory
+-->
+
+<!ELEMENT cat-item EMPTY>
+<!ATTLIST cat-item name CDATA #REQUIRED> 
+<!-- 
+     In addition, a required attribute, "name", is used to specify 
+     wich chunk name is detected by this cat-item
+-->
+ 
+<!ELEMENT section-def-attrs (def-attr+)>
+
+<!-- 
+     The 'def-attrs' section defines the attributes that will be
+     identified in matched lexical forms 
+-->
+
+<!ELEMENT def-attr (attr-item+)>
+<!ATTLIST def-attr n ID #REQUIRED
+                   c CDATA #IMPLIED>
+<!-- 
+     Each def-attr defines one attribute in terms of a list of
+     attribute items and has a mandatory unique name n 
+-->
+
+<!ELEMENT attr-item EMPTY>
+<!ATTLIST attr-item tags CDATA #IMPLIED
+                    c CDATA #IMPLIED>
+<!-- 
+     Each 'attr-item' specifies a subsequence of the tags in
+     that lexical form (attribute 'tags')
+-->
+
+<!ELEMENT section-def-vars (def-var+)>
+<!-- 
+     The 'def-vars' section defines the global variables
+     that will be used to transfer information between rules
+-->
+
+<!ELEMENT def-var EMPTY>
+<!ATTLIST def-var n ID #REQUIRED
+                  v CDATA #IMPLIED
+                  c CDATA #IMPLIED>
+<!-- 
+     The definition of a global variable has a mandatory unique name 'n' that
+     will be used to refer to it. A value of initialization can also be specified
+     by means the 'v' attribute.  The default value of the initialization is the
+     empty string.
+-->
+
+<!ELEMENT section-def-lists (def-list)+>
+<!--
+     Element 'section-def-lists' encloses a set of list definitions
+-->
+
+<!ELEMENT def-list (list-item+)>
+<!ATTLIST def-list n ID #REQUIRED
+                   c CDATA #IMPLIED>
+<!--
+     The 'def-list' element defines a named list to search with the 'in' 
+     element.  Attribute 'n' sets the name of the list
+-->
+
+<!ELEMENT list-item EMPTY>
+<!ATTLIST list-item v CDATA #REQUIRED
+                    c CDATA #IMPLIED>
+<!--
+     Attribute 'v' of 'list-item' element contains the value to be added to 
+     the list being defined     
+-->
+
+<!ELEMENT section-def-macros (def-macro)+>
+<!-- 
+
+     The 'def-macros' section defines macros containing portions of
+     code frequently used in the action part of rules
+
+-->
+
+<!ELEMENT def-macro (%sentence;)+>
+<!ATTLIST def-macro n ID #REQUIRED>
+<!ATTLIST def-macro npar CDATA #REQUIRED
+                    c CDATA #IMPLIED>
+<!-- 
+     Macro definition:
+     
+     A macro has a mandatory name (the value of 'n'), a number of parameters
+     (the value of 'npar') and a body containing arguments and statements.  
+-->
+
+<!ELEMENT section-rules (rule+)>
+<!-- 
+     The rules section contains a sequence of one or more rules
+-->
+
+<!ELEMENT rule (pattern, action)>
+<!ATTLIST rule comment CDATA #IMPLIED>
+<!-- 
+      Each rule has a pattern and an action 
+      * Attribute 'comment' allows to include a comment with the rule
+-->
+
+<!ELEMENT pattern (pattern-item)>
+<!-- 
+The pattern is specified in terms of pattern items, each one
+representing a lexical form in the matched pattern 
+-->
+
+<!ELEMENT pattern-item EMPTY>
+<!ATTLIST pattern-item n IDREF #REQUIRED>
+<!-- 
+       Each attribute to be activated is referred to by its name in the def-cats section 
+-->
+
+<!ELEMENT action (%sentence;)*>
+<!ATTLIST action c CDATA #IMPLIED>
+<!-- 
+       Encloses the procedural part of a rule
+-->
+
+<!ELEMENT choose (when+,otherwise?)>
+<!ATTLIST choose c CDATA #IMPLIED>
+<!-- 
+     The choose statement is a selection statement (similar to a case
+     statement) composed of one or more tested cases and an optional
+     otherwise 
+-->
+
+<!ELEMENT when (test,(%sentence;)*)>
+<!ATTLIST when c CDATA #IMPLIED>
+<!-- 
+     Each tested case is a block of zero or more statements 
+-->
+
+<!ELEMENT otherwise (%sentence;)+>
+<!ATTLIST otherwise c CDATA #IMPLIED>
+<!-- 
+     The otherwise case is also a block of one or more statements 
+-->
+
+<!ELEMENT test (%condition;)>
+<!ATTLIST test c CDATA #IMPLIED>
+<!-- 
+     The test in a tested case may be a conjunction, a disjunction, or
+     a negation of simpler tests, as well as a simple equality test
+-->
+
+<!ELEMENT and ((%condition;),(%condition;)+)>
+<!--  
+     Each conjuntion test contains two or more simpler tests 
+-->
+
+<!ELEMENT or ((%condition;),(%condition;)+)>
+<!-- 
+     Each disjunction test contains two or more simpler tests 
+-->
+
+<!ELEMENT not (%condition;)>
+<!-- 
+     The negation of a simpler test is a test itself 
+-->
+
+<!ELEMENT equal (%value;,%value;)> 
+<!ATTLIST equal caseless (no|yes) #IMPLIED>
+<!-- 
+      The simplest test is an equality test. The right part and the
+      left part of the equality may both be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+<!ELEMENT begins-with (%value;,%value;)> 
+<!ATTLIST begins-with caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part at the beginning.
+      Both parts of the test may both be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+<!ELEMENT ends-with (%value;,%value;)> 
+<!ATTLIST ends-with caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part at the end.
+      Both parts of the test may both be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+<!ELEMENT begins-with-list (%value;,list)> 
+<!ATTLIST begins-with-list caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part at the beginning.
+      First parts of the test may be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section. The second part
+      must be always a list.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+<!ELEMENT ends-with-list (%value;,list)> 
+<!ATTLIST ends-with-list caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part at the end.
+      First parts of the test may be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section. The second part
+      must be always a list.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+<!ELEMENT contains-substring (%value;,%value;)> 
+<!ATTLIST contains-substring caseless (no|yes) #IMPLIED>
+<!-- 
+      Tests if the left part contains the right part.
+      Both parts of the test may both be a clip (see below), a
+      literal string ('lit'), a literal tag ('lit-tag') or the value of 
+      a variable ('var') defined in the def-vars section.  When the attribute
+      'caseless' is set to 'yes', the comparison is made without attending
+      to the case.
+-->
+
+
+
+
+<!ELEMENT in (%value;, list)>
+<!ATTLIST in caseless (no|yes) #IMPLIED>
+<!--
+    'in' performs a search of a value in a list.  If 'caseless' is set to yes,
+    this search is performed without attending to the case
+-->
+
+<!ELEMENT list EMPTY>
+<!ATTLIST list n IDREF #REQUIRED>
+<!--
+    'list' refers, with the name in attribute 'n', a list defined before in
+    the 'section-def-list' section
+-->
+
+<!ELEMENT let (%container;, %value;)>
+<!-- 
+      An assignment statement ('let') assigns the value of a clip (see
+      below), a literal string ('lit'), a literal tag('lit-tag') or the 
+      value of a global variable ('var') to either a global variable ('var') 
+      or a clip
+-->
+
+<!ELEMENT append (%value;)+>
+<!ATTLIST append n IDREF #REQUIRED>
+<!-- 
+      This instruction appends the value of a clip (see
+      below), a literal string ('lit'), a literal tag('lit-tag') or the 
+      value of a global variable ('var') to either a global variable ('var') 
+      or a clip, identified by the "n" attribute
+-->
+
+
+<!ELEMENT out (b|lu|mlu|var)+>
+<!ATTLIST out c CDATA #IMPLIED>
+<!-- 
+      'out' is an output statement; it may output blanks or chunks
+-->
+
+<!ELEMENT modify-case (%container;, %stringvalue;)>
+<!--
+      The first argument of 'modify-case' copy the case of the second 
+      argument.
+--> 
+
+<!ELEMENT call-macro (with-param)*>
+<!ATTLIST call-macro n IDREF #REQUIRED>
+<!-- 
+      A macro may be called anywhere by name with one or more
+      arguments
+-->
+
+<!ELEMENT with-param EMPTY>
+<!ATTLIST with-param pos CDATA #REQUIRED>
+<!-- 
+      The attribute pos in each argument is used to refer to a lexical
+      form in the current rule. For example, if a 2-parameter macro
+      has been defined to perform noun-adjective agreement operations,
+      it may be used with arguments 1 and 2 in a noun-adjective rule,
+      with arguments 2, 3 and 1 in a determiner-noun-adjective rule, with
+      arguments 1 and 3 in a noun-adverb-adjective rule, and with
+      arguments 2 and 1 in an adjective-noun rule 
+-->
+
+<!ELEMENT clip EMPTY>
+<!ATTLIST clip pos CDATA #REQUIRED
+               part CDATA #REQUIRED
+               c CDATA #IMPLIED>
+<!-- 
+      A 'clip' is a substring of a source-language or target-language
+      lexical form, extracted according to an attribute:
+
+      * 'pos' is an index (1, 2, 3...) used to select a lexical form
+         inside the rule;
+   
+      * the value of 'part' is the name of an attribute defined in
+        def-attrs, but may take also the values 'lem' (referring to
+        the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+        (lemma queue) and 'whole' (referring to the whole lexical form).
+
+-->
+
+<!ELEMENT lit EMPTY>
+<!ATTLIST lit v CDATA #REQUIRED>
+<!-- 
+      A literal string value: the value of the literal is the value of
+      the 'v' attribute
+-->
+
+<!ELEMENT lit-tag EMPTY>
+<!ATTLIST lit-tag v CDATA #REQUIRED>
+<!-- 
+      A literal string value: the value of the literal is the value of
+      the 'v' attribute
+-->
+
+
+<!ELEMENT var EMPTY>
+<!ATTLIST var n IDREF #REQUIRED>
+<!-- 
+     Each 'var' is a variable identifier: the attribute n is the name
+     of the variable. When it is in an 'out', a 'test', or the right
+     part of a 'let', it represents the value of the variable; when in
+     the left part of a 'let' it represents the reference of the
+     variable. 
+-->
+
+<!ELEMENT get-case-from (clip|lit|var)> 
+<!ATTLIST get-case-from pos CDATA #REQUIRED>
+<!-- Atenci�n, falta modificar todos los comentarios donde intervenga
+get-case-from -->
+
+<!ELEMENT case-of EMPTY>
+<!ATTLIST case-of pos CDATA #REQUIRED
+               part CDATA #REQUIRED>
+<!--
+      A 'case-of' is a value representing the case of a "clip".  This value 
+      will be "aa" (all lowercase), "Aa" (first uppercase) and "AA",
+      (all uppercase).
+
+      * 'pos' is an index (1, 2, 3...) used to select a lexical form
+         inside the rule;
+   
+      * the value of 'part' is the name of an attribute defined in
+        def-attrs, but may take also the values 'lem' (referring to
+        the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+        (lemma queue) and 'whole' (referring to the whole lexical form).
+-->
+
+
+<!ELEMENT concat (%value;)+>
+<!-- Concatenates a sequence of values -->
+
+<!ELEMENT mlu (lu+)>
+<!-- Encloses a multiword -->
+
+<!ELEMENT lu (%value;)+>
+<!-- Encloses a word -->
+
+<!ELEMENT b EMPTY>
+<!ATTLIST b pos CDATA #IMPLIED>
+<!-- 
+     'b' is a [super]blanks item, indexed by pos; for example, a 'b'
+     with pos="2" refers to the [super]blanks (including format data
+     encapsulated by the de-formatter) between lexical form 2 and
+     lexical form 3. Managing [super]blanks explicitly allows for the
+     correct placement of format when the result of structural
+     transfer has more or less lexical items than the original or has
+     been reordered in some way.  If attribute "pos" is not specified, then
+     a single blank (ASCII 32) is generated.
+-->
+
+<!ELEMENT lu-count EMPTY>
+<!--
+     Number of lexical units (words inside the chunk) in the rule
+--> 
Index: branches/apertium-tagger/apertium2/apertium/reformat.xsl
===================================================================
--- branches/apertium-tagger/apertium2/apertium/reformat.xsl	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/reformat.xsl	(revision 69632)
@@ -0,0 +1,237 @@
+<?xml version="1.0" encoding="ISO-8859-1"?> <!-- -*- nxml -*- -->
+<!--
+ Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, see <http://www.gnu.org/licenses/>.
+-->
+<xsl:stylesheet version="1.0"
+                xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+  <xsl:output method="text" encoding="UTF-8"/>
+
+<xsl:template match="format">
+
+%{
+
+#ifndef GENFORMAT
+#include "apertium_config.h"
+#endif
+#include &lt;apertium/unlocked_cstdio.h&gt;
+
+#include &lt;cstdlib&gt;
+#include &lt;iostream&gt;
+#include &lt;libgen.h&gt;
+#include &lt;map&gt;
+#include &lt;string&gt;
+#include &lt;unistd.h&gt;
+#include &lt;lttoolbox/lt_locale.h&gt;
+#include &lt;lttoolbox/ltstr.h&gt;
+#include &lt;wchar.h&gt;
+#ifdef _WIN32
+#include &lt;io.h&gt;
+#include &lt;fcntl.h&gt;
+#endif
+
+using namespace std;
+
+<xsl:for-each select="./rules/replacement-rule">
+  <xsl:variable name="varname"
+		select="concat(concat(string('S'),position()),string('_substitution'))"/>
+  <xsl:value-of select="string('map&lt;wstring, wstring, Ltstr&gt; S')"/>
+  <xsl:value-of select="position()"/>
+  <xsl:value-of select="string('_substitution;&#xA;&#xA;void S')"/>
+  <xsl:value-of select="position()"/>
+  <xsl:value-of select="string('_init()&#xA;{')"/>
+
+  <xsl:for-each select="./replace">
+    <xsl:if test="./@prefer = string('yes')">
+      <xsl:value-of select="string('&#xA;  ')"/>
+      <xsl:value-of select="$varname"/>
+      <xsl:value-of select="string('[L&quot;')"/>
+      <xsl:value-of select="./@target"/>
+      <xsl:value-of select="string('&quot;] = L&quot;')"/>
+      <xsl:value-of select="./@source"/>
+      <xsl:value-of select="string('&quot;;')"/>
+    </xsl:if>
+  </xsl:for-each>
+
+  <xsl:value-of select="string('&#xA;}&#xA;')"/>
+</xsl:for-each>
+
+string memconv;
+
+wstring convertir(char const *multibyte, int const length)
+{
+  memconv.append(multibyte, length);
+  int tam = memconv.size();
+  if (memconv == "")
+    return L"";
+  wchar_t *retval = new wchar_t[tam+1];
+  size_t l = mbstowcs(retval, memconv.c_str(), tam);
+
+  if(l == ((size_t) -1))
+  {
+    if(memconv.size() >= 4)
+    {
+      wcerr &lt;&lt; L"Warning: wrong encoding" &lt;&lt; endl;
+    }
+    if (retval != NULL)
+      delete[] retval;
+    return L"";
+  }
+  else
+  {
+    memconv = "";
+    retval[l] = 0;
+    wstring ret = retval;
+    if (retval != NULL)
+      delete[] retval;
+    return ret;
+  }
+}
+
+%}
+
+%option nounput
+%option noyywrap<xsl:if test="./options/case-sensitive/@value=string('no')">
+%option caseless</xsl:if>
+
+%%
+
+"["|"]"&#x9;{
+  // do nothing
+}
+
+"[@"[^]]+"]"&#x9;{
+  string filename = yytext;
+  filename = filename.substr(2, filename.size()-3);
+  FILE *temp = fopen(filename.c_str(), "r");
+  wint_t mychar;
+#ifdef _WIN32
+  _setmode(_fileno(temp), _O_U8TEXT);
+#endif
+
+  if(!temp)
+  {
+    cerr &lt;&lt; "ERROR: File '" &lt;&lt; filename &lt;&lt;"' not found." &lt;&lt; endl;
+    exit(EXIT_FAILURE);
+  }
+  while(static_cast&lt;int&gt;(mychar = fgetwc_unlocked(temp)) != EOF)
+  {
+    fputwc_unlocked(mychar, yyout);
+  }
+  fclose(temp);
+  unlink(filename.c_str());
+}
+
+"[\\@"&#x9;{
+  fputwc_unlocked(L'@', yyout);
+}
+
+".[]"&#x9;{
+  // do nothing
+}
+
+"\\"<xsl:value-of select="/format/options/escape-chars/@regexp"/>&#x9;{
+  fputws_unlocked(convertir(yytext+1, yyleng-1).c_str(), yyout);
+}
+
+
+
+.|\n&#x9;{
+  wstring yytext_conv = convertir(yytext, yyleng);
+<xsl:for-each select="./rules/replacement-rule">
+  <xsl:variable name="varname"
+		select="concat(concat(string('S'),position()),string('_substitution'))"/>
+
+  <xsl:value-of select="string('  ')"/>
+  <xsl:if test="not(position()=1)">
+    <xsl:value-of select="string('else ')"/>
+  </xsl:if>
+  <xsl:value-of select="string('if(')"/>
+  <xsl:value-of select="$varname"/>
+  <xsl:value-of select="string('.find(yytext_conv) != ')"/>
+  <xsl:value-of select="$varname"/>
+  <xsl:value-of select="string('.end())&#xA;  {&#xA;')"/>
+  <xsl:value-of select="string('    fputws_unlocked(')"/>
+  <xsl:value-of select="$varname"/>
+  <xsl:value-of select="string('[yytext_conv].c_str(), yyout);')"/>
+  <xsl:value-of select="string('&#xA;  }&#xA;')"/>
+</xsl:for-each>
+
+<xsl:if test="not(count(./rules/replacement-rule)=0)">
+  <xsl:value-of select="string('  else&#xA;  {&#xA;  ')"/>
+</xsl:if>
+<xsl:value-of select="string('  fputws_unlocked(yytext_conv.c_str(), yyout);&#xA;')"/>
+<xsl:if test="not(count(./rules/replacement-rule)=0)">
+  <xsl:value-of select="string('  }')"/>
+</xsl:if>
+}
+
+&lt;&lt;EOF&gt;&gt;&#x9;{
+  return 0;
+}
+
+%%
+
+void usage(string const &amp;progname)
+{
+  cerr &lt;&lt; "USAGE: " &lt;&lt; progname &lt;&lt; " [input_file [output_file]" &lt;&lt; ']' &lt;&lt; endl;
+  cerr &lt;&lt; "<xsl:value-of select="./@name"/> format processor " &lt;&lt; endl;
+  exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+  LtLocale::tryToSetLocale();
+
+  if(argc &gt; 3)
+  {
+    usage(argv[0]);
+  }
+
+  switch(argc)
+  {
+    case 3:
+      yyout = fopen(argv[2], "w");
+      if(!yyout)
+      {
+        usage(argv[0]);
+      }
+    case 2:
+      yyin = fopen(argv[1], "r");
+      if(!yyin)
+      {
+        usage(argv[0]);
+      }
+      break;
+    default:
+      break;
+  }
+#ifdef _WIN32
+  _setmode(_fileno(yyin), _O_U8TEXT);
+  _setmode(_fileno(yyout), _O_U8TEXT);
+#endif
+
+<xsl:for-each select="./rules/replacement-rule">
+  <xsl:value-of select="string('  S')"/>
+  <xsl:value-of select="position()"/>
+  <xsl:value-of select="string('_init();&#xA;')"/>
+</xsl:for-each>
+
+  yylex();
+  fclose(yyin);
+  fclose(yyout);
+}
+</xsl:template>
+</xsl:stylesheet>
Index: branches/apertium-tagger/apertium2/apertium/string_utils.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/string_utils.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/string_utils.cc	(revision 69632)
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
+ * author: Felipe S�nchez-Mart�nez
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <apertium/string_utils.h>
+#include <lttoolbox/xml_parse_util.h>
+#include <iostream>
+#include <cstring>
+
+#ifdef _MSC_VER
+#define snprintf _snprintf
+#endif
+
+//Delete white spaces from the end and the begining of the string
+wstring 
+StringUtils::trim(wstring const &str) 
+{ 
+  if(str == L"")
+  {
+    return L"";
+  }
+
+  int begin = 0, end = str.size() - 1;
+
+  while(begin < end && iswspace(str[begin]))
+  {
+    begin++;
+  }
+
+  while(end > begin && iswspace(str[end]))
+  {
+    end--;
+  }
+
+  if(!iswspace(str[end]))
+  {
+    end++;
+  }
+ 
+  return str.substr(begin, end-begin);
+}
+
+vector<wstring>
+StringUtils::split_wstring(wstring const &input, wstring const &delimiter) 
+{
+  unsigned pos;
+  int new_pos;
+  vector<wstring> result;
+  wstring s = L"";
+  pos=0;
+
+  while(pos<input.size())
+  {
+    new_pos=input.find(delimiter, pos);
+    if(new_pos<0)
+      new_pos=input.size();
+    s=input.substr(pos, new_pos-pos);
+    if (s.length()==0) {
+      wcerr<<L"Warning in StringUtils::split_wstring: After splitting there is an empty string\n";
+      wcerr<<L"Skipping this empty string\n";
+    } else
+      result.push_back(s);
+    pos=new_pos+delimiter.size();
+  }
+
+  return result;
+}
+
+wstring 
+StringUtils::vector2wstring(vector<wstring> const &v)
+{
+  wstring s = L"";
+  for(unsigned i=0; i<v.size(); i++)
+  {
+    if (i>0)
+      s+=L' ';
+    s.append(v[i]);
+  }
+  return s;
+}
+
+wstring 
+StringUtils::substitute(wstring const &source, wstring const &olds, wstring const &news) {
+  wstring s = source;
+
+  unsigned int p=s.find(olds , 0);
+  while (p!=static_cast<unsigned int>(wstring::npos))
+  {
+    s.replace(p, olds.length(), news);
+    p+=news.length();
+    p=s.find(olds,p);
+  }
+
+  return s;
+}
+
+wstring
+StringUtils::itoa(int n)
+{
+  return XMLParseUtil::stows(itoa_string(n));
+}
+
+string
+StringUtils::itoa_string(int n)
+{
+  char str[256];
+  snprintf(str, 256, "%d", n);
+  return str;
+}
+
+wstring
+StringUtils::ftoa(double f)
+{
+  char str[256];
+  sprintf(str, "%f",f);
+  return XMLParseUtil::stows(str);
+}
+
+wstring
+StringUtils::tolower(wstring const &s)
+{
+  wstring l=s;
+  for(unsigned i=0; i<s.length(); i++)
+  {
+    l[i] = (wchar_t) towlower(s[i]);
+  }
+  return l;
+}
+
+wstring
+StringUtils::toupper(wstring const &s) {
+  wstring l=s;
+  for(unsigned i=0; i<s.length(); i++)
+  {
+    l[i]  = (wchar_t) towupper(s[i]);
+  }
+
+  return l;
+}
+
+bool Apertium::operator==(string const &s1, string const &s2)
+{
+  return strcmp(s1.c_str(), s2.c_str()) == 0;
+}
+
+bool Apertium::operator==(string const &s1, char const *s2)
+{
+  return strcmp(s1.c_str(), s2) == 0;
+}
+
+bool Apertium::operator==(char const *s1, string const &s2)
+{
+  return strcmp(s1, s2.c_str()) == 0;
+}
+
+bool Apertium::operator!=(string const &s1, string const &s2)
+{
+  return strcmp(s1.c_str(), s2.c_str()) != 0;
+}
+
+bool Apertium::operator!=(string const &s1, char const *s2)
+{
+  return strcmp(s1.c_str(), s2) != 0;
+}
+
+bool Apertium::operator!=(char const *s1, string const &s2)
+{
+  return strcmp(s1, s2.c_str()) != 0;
+}
Index: branches/apertium-tagger/apertium2/apertium/string_utils.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/string_utils.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/string_utils.h	(revision 69632)
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2006 Universitat d'Alacant / Universidad de Alicante
+ * author: Felipe S�nchez-Mart�nez
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __STRINGUTILS_H_
+#define __STRINGUTILS_H_
+
+#include <string>
+#include <cstring>
+#include <vector>
+
+using namespace std;
+
+namespace Apertium
+{
+  bool operator==(string const &s1, string const &s2);
+  bool operator==(string const &s1, char const *s2);
+  bool operator==(char const *s1, string const &s2);
+  bool operator!=(string const &s1, string const &s2);
+  bool operator!=(string const &s1, char const *s2);
+  bool operator!=(char const *s1, string const &s2);
+}
+
+class StringUtils {
+  public:
+  
+  static wstring trim(wstring const &str);
+
+  static vector<wstring> split_wstring(wstring const &input, wstring const &delimiter);
+
+  static wstring vector2wstring(vector<wstring> const &v);
+
+  //Replace each ocurrence of the string 'olds' by the string 'news' in string 'source'
+  static wstring substitute(const wstring &source, const wstring &olds, const wstring &news);
+
+  static wstring itoa(int n);
+  
+  static string itoa_string(int n);
+  
+  static wstring ftoa(double f);
+
+  static wstring tolower(wstring const &s);
+
+  static wstring toupper(wstring const &s);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/tagger.dtd
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger.dtd	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger.dtd	(revision 69632)
@@ -0,0 +1,157 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+
+<!-- 
+   Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+  
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+  
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+  
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+     DTD for the tagset and the rules to enforce the state to state
+     transition probabilities used by the part-of-speech tagger. 
+     2005.07.29.
+-->
+
+<!ELEMENT tagger (tagset,forbid?,enforce-rules?,preferences?,discard-on-ambiguity?)>
+<!ATTLIST tagger name CDATA #REQUIRED>
+<!--
+	'tagger' is the root element containing the whole tagset for a given
+    language specified through the mandatory attribute 'name'
+-->
+
+<!ELEMENT tagset (def-label+,def-mult*)>
+
+<!-- 
+	The 'tagset' section defines the correspondance between simple 
+    or multiple morphological categories defining a lexical form and the coarser 
+    ones with which the part-of-speech tagger works
+-->
+
+<!ELEMENT def-label (tags-item+)>
+<!ATTLIST def-label name CDATA #REQUIRED
+                    c CDATA #IMPLIED
+                    closed CDATA #IMPLIED>
+
+<!-- 
+	Each 'def-label' defines one coarse tag in terms of a list of fine tags 
+    and has a mandatory unique name. The optional attribute 'closed="true"' may be used
+    to specify if the defined fine tags belong to a closed list.
+    c is for comments and is ignored
+-->
+
+<!ELEMENT tags-item EMPTY>
+<!ATTLIST tags-item tags CDATA #REQUIRED
+	            lemma CDATA #IMPLIED>
+
+<!--
+	Each 'tags-item' may be a dot-separated subsequence of the morphological tags
+    corresponding to a coarse tag optionally in association with a given lemma 
+-->
+<!ELEMENT def-mult (sequence+)>
+<!ATTLIST def-mult name CDATA #REQUIRED
+                    c CDATA #IMPLIED
+                   closed CDATA #IMPLIED>
+
+<!--
+	Each 'def-mult' defines one coarse tag in terms of a sequence of coarse
+    tags previously defined as 'def-labels' or a sequence of fine tags. A mandatory 
+    name is required for each 'def-mult' which may also has an optional attribute 
+    'closed="true"' if it belongs to a closed list
+    c is for comments and is ignored
+-->
+
+<!ELEMENT sequence ((tags-item|label-item)+)>
+
+<!--
+	Element 'sequence' encloses a set of tags or labels which defines 
+    a unit with more than one label
+-->
+
+<!ELEMENT label-item EMPTY>
+<!ATTLIST label-item label CDATA #REQUIRED
+                    c CDATA #IMPLIED>
+
+<!--
+	Each 'label' of the 'label-item' correspond to a coarse tag previously 
+    defined as a 'def-label' by a name.
+    c is for comments and is ignored
+-->
+
+<!ELEMENT forbid (label-sequence+)>
+
+<!--   
+	Element 'forbid' contains sequences of morphological categories that are not 
+    allowed in a given language
+-->
+
+<!ELEMENT label-sequence (label-item+)>
+<!ATTLIST label-sequence c CDATA #IMPLIED>
+
+<!--	
+	Each 'label-sequence' is restricted to two 'label-items' 
+    c is for comments and is ignored
+-->
+
+<!ELEMENT enforce-rules (enforce-after+)>
+
+<!--
+        Element 'enforce-rules' defines sets of coarse tags that must follow specified ones
+-->
+
+<!ELEMENT enforce-after (label-set)>
+<!ATTLIST enforce-after label CDATA #REQUIRED
+                        c CDATA #IMPLIED>
+
+<!-- 
+	Each 'enforce-after' encloses the set of coarse tags ('label-set') that must follow 
+    the one defined in 'label', as a mandatory attribute
+    c is for comments and is ignored
+-->
+
+<!ELEMENT label-set (label-item+)>
+<!ATTLIST label-set c CDATA #IMPLIED>
+<!--
+	The set of 'label-items' enforced after a 'label' are enclosed inside element 'label-set'  
+    c is for comments and is ignored
+-->
+
+<!ELEMENT preferences (prefer+)>
+
+<!-- 	
+	Element 'preferences' allows to decide amongst two or more fine tag sequences 
+    which are grouped in the same coarse tag. 
+-->
+
+<!ELEMENT prefer EMPTY>
+<!ATTLIST prefer tags CDATA #REQUIRED
+                 c CDATA #IMPLIED>
+
+<!-- 
+	Each 'prefer' element has a mandatory attribute 'tags' made of a sequence of fine tags 
+    c is for comments and is ignored
+-->
+
+<!ELEMENT discard-on-ambiguity (discard+)>
+
+<!--
+        List of label-item or tags-item to be discarded when an ambiguity
+	occurs inside a word
+-->
+
+<!ELEMENT discard EMPTY>
+<!ATTLIST discard tags CDATA #REQUIRED
+                  c CDATA #IMPLIED>
+
+<!-- 
+	Each 'discard' element has a mandatory attribute 'tags' made of a sequence of fine tags 
+    c is for comments and is ignored
+-->
Index: branches/apertium-tagger/apertium2/apertium/tagger_data.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger_data.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger_data.cc	(revision 69632)
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/tagger_data.h>
+#include <lttoolbox/compression.h>
+#include <apertium/endian_double_util.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+
+void
+TaggerData::copy(TaggerData const &o)
+{
+  open_class = o.open_class;
+  forbid_rules = o.forbid_rules;
+  tag_index = o.tag_index;
+  array_tags = o.array_tags;
+  enforce_rules = o.enforce_rules;
+  prefer_rules = o.prefer_rules;
+  constants = o.constants;
+  output = o.output;  
+  plist = o.plist;
+}
+
+TaggerData::TaggerData()
+{
+}
+
+TaggerData::~TaggerData()
+{
+}
+
+TaggerData::TaggerData(TaggerData const &o)
+{
+  copy(o);
+}
+
+TaggerData &
+TaggerData::operator =(TaggerData const &o)
+{
+  if(this != &o)
+  {
+    copy(o);
+  }
+  return *this;
+}
+
+set<TTag> &
+TaggerData::getOpenClass()
+{
+  return open_class;
+}
+
+void
+TaggerData::setOpenClass(set<TTag> const &oc)
+{
+  open_class = oc;
+}
+
+vector<TForbidRule> &
+TaggerData::getForbidRules()
+{
+  return forbid_rules;
+}
+
+void
+TaggerData::setForbidRules(vector<TForbidRule> &fr)
+{
+  forbid_rules = fr;
+}  
+
+map<wstring, TTag, Ltstr> &
+TaggerData::getTagIndex()
+{
+  return tag_index;
+}
+
+void
+TaggerData::setTagIndex(map<wstring, TTag, Ltstr> const &ti)
+{
+  tag_index = ti;
+}
+  
+vector<wstring> &
+TaggerData::getArrayTags()
+{
+  return array_tags;
+}
+
+void
+TaggerData::setArrayTags(vector<wstring> const &at)
+{
+  array_tags = at;
+}
+
+vector<TEnforceAfterRule> &
+TaggerData::getEnforceRules()
+{
+  return enforce_rules;
+}
+
+void
+TaggerData::setEnforceRules(vector<TEnforceAfterRule> const &tear)
+{
+  enforce_rules = tear;
+}
+
+vector<wstring> &
+TaggerData::getPreferRules()
+{
+  return prefer_rules;
+}
+
+void
+TaggerData::setPreferRules(vector<wstring> const &pr)
+{
+  prefer_rules = pr;
+}
+
+vector<wstring> &
+TaggerData::getDiscardRules()
+{
+  return discard;
+}
+
+void
+TaggerData::setDiscardRules(vector<wstring> const &v)
+{
+  discard = v;
+}
+
+ConstantManager &
+TaggerData::getConstants()
+{
+  return constants;
+}
+
+void
+TaggerData::setConstants(ConstantManager const &c)
+{  
+  constants = c;
+}
+
+Collection &
+TaggerData::getOutput()
+{
+  return output;
+}
+
+void
+TaggerData::setOutput(Collection const &c)
+{
+  output = c;
+}
+
+PatternList &
+TaggerData::getPatternList()
+{
+  return plist;
+}
+
+void
+TaggerData::setPatternList(PatternList const &pl)
+{
+  plist = pl;
+}
+
+void
+TaggerData::addDiscard(wstring const &tags)
+{
+  discard.push_back(tags);
+}
Index: branches/apertium-tagger/apertium2/apertium/tagger_data.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger_data.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger_data.h	(revision 69632)
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TAGGERDATA_
+#define _TAGGERDATA_
+
+#include <apertium/constant_manager.h>
+#include <apertium/ttag.h>
+#include <apertium/collection.h>
+#include <lttoolbox/pattern_list.h>
+#include <lttoolbox/ltstr.h>
+
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+using namespace std;
+
+class TaggerData
+{
+protected:
+  set<TTag> open_class;
+  vector<TForbidRule> forbid_rules;
+  map<wstring, TTag, Ltstr> tag_index;
+  vector<wstring> array_tags;
+  vector<TEnforceAfterRule> enforce_rules;
+  vector<wstring> prefer_rules;
+  ConstantManager constants;
+  Collection output;
+  PatternList plist;
+
+  vector<wstring> discard;
+  
+  void copy(TaggerData const &o);
+public:
+  TaggerData();
+  virtual ~TaggerData();
+  TaggerData(TaggerData const &o);
+  TaggerData & operator =(TaggerData const &o);
+  
+  set<TTag> & getOpenClass();
+  void setOpenClass(set<TTag> const &oc);
+
+  vector<TForbidRule> & getForbidRules();
+  void setForbidRules(vector<TForbidRule> &fr);
+  
+  map<wstring, TTag, Ltstr> & getTagIndex();
+  void setTagIndex(map<wstring, TTag, Ltstr> const &ti);
+  
+  vector<wstring> & getArrayTags();
+  void setArrayTags(vector<wstring> const &at);
+
+  vector<TEnforceAfterRule> & getEnforceRules();
+  void setEnforceRules(vector<TEnforceAfterRule> const &tear);
+
+  vector<wstring> & getPreferRules();
+  void setPreferRules(vector<wstring> const &pr);
+  
+  vector<wstring> & getDiscardRules();
+  void setDiscardRules(vector<wstring> const &dr);
+
+  ConstantManager & getConstants();
+  void setConstants(ConstantManager const &c);
+  
+  virtual Collection & getOutput();
+  void setOutput(Collection const &c);
+ 
+  void setPatternList(PatternList const &pl);
+  void addDiscard(wstring const &tags);
+  PatternList & getPatternList();
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/tagger_data_hmm.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger_data_hmm.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger_data_hmm.h	(revision 69632)
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TAGGERDATAHMM_
+#define _TAGGERDATAHMM_
+
+#include <apertium/tagger_data.h>
+
+class TaggerDataHMM : public TaggerData
+{
+private:
+  int N;
+  int M;
+  double **a;
+  double **b;
+
+  void destroy();
+public:
+  TaggerDataHMM();
+  virtual ~TaggerDataHMM();
+  TaggerDataHMM(TaggerDataHMM const &o);
+  TaggerDataHMM(TaggerData const &o);
+  TaggerDataHMM & operator =(TaggerDataHMM const &o);
+ 
+  virtual void setProbabilities(int const myN, int const myM, 
+                        double **myA = NULL, double **myB = NULL);
+
+  virtual double ** getA();
+  virtual double ** getB();
+  virtual int getN();
+  virtual int getM();
+  
+  virtual void read(FILE *in);
+  virtual void write(FILE *out);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/tagger_data_lsw.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger_data_lsw.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger_data_lsw.cc	(revision 69632)
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/lswpost.h>
+#include <apertium/tagger_data_lsw.h>
+#include <lttoolbox/compression.h>
+#include <apertium/endian_double_util.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+
+void
+TaggerDataLSW::destroy()
+{
+  if (d != NULL) {
+    for (int i = 0; i < N; ++i) {
+      for (int j = 0; j < N; ++j) {
+        delete [] d[i][j];
+      }
+      delete [] d[i];
+    }
+  delete [] d;
+  }
+  d = NULL;
+  
+  N = 0;
+}
+
+TaggerDataLSW::TaggerDataLSW()
+{
+  d = NULL;
+  N = 0;
+}
+
+TaggerDataLSW::~TaggerDataLSW()
+{
+  destroy();
+}
+
+TaggerDataLSW::TaggerDataLSW(TaggerDataLSW const &o)
+{
+  d = NULL;
+  N = 0;
+  TaggerData::copy(o);
+  this->setProbabilities(o.N, o.d);
+}
+
+TaggerDataLSW::TaggerDataLSW(TaggerData const &o)
+{
+  d = NULL;
+  N = 0;
+  TaggerData::copy(o);
+}
+
+TaggerDataLSW &
+TaggerDataLSW::operator =(TaggerDataLSW const &o)
+{
+  if(this != &o)
+  {
+    destroy();
+    TaggerData::copy(o);
+    this->setProbabilities(o.N, o.d);
+  }
+  return *this;
+}
+
+void
+TaggerDataLSW::setProbabilities(int const myN, double ***myD) {
+   this->destroy();
+   N = myN;
+   if(N != 0) {
+     d = new double ** [N];
+     for (int i = 0; i < N; ++i) {
+       d[i] = new double * [N];
+       for (int j = 0; j < N; ++j) {
+           d[i][j] = new double [N];
+           if (myD != NULL) {
+              for (int k = 0; k < N; ++k) {
+                d[i][j][k] = myD[i][j][k];
+              }
+           }
+       }
+     }
+   } else {
+     d = NULL;
+   }
+}
+
+double ***
+TaggerDataLSW::getD() {
+  return d;
+}
+
+int 
+TaggerDataLSW::getN()
+{  
+  return N;
+}
+
+void
+TaggerDataLSW::read(FILE *in)
+{
+  destroy();
+
+  // open_class
+  int val = 0;
+  for(int i = Compression::multibyte_read(in); i != 0; i--)
+  {
+    val += Compression::multibyte_read(in);
+    open_class.insert(val);
+  }
+  
+  // forbid_rules
+  for(int i = Compression::multibyte_read(in); i != 0; i--)
+  {
+    TForbidRule aux;
+    aux.tagi = Compression::multibyte_read(in);
+    aux.tagj = Compression::multibyte_read(in);
+    forbid_rules.push_back(aux);
+  }
+
+  
+  // array_tags
+  for(int i = Compression::multibyte_read(in); i != 0; i--)
+  {
+    array_tags.push_back(Compression::wstring_read(in));
+  }
+  
+  // tag_index
+  for(int i = Compression::multibyte_read(in); i != 0; i--)
+  {
+    wstring tmp = Compression::wstring_read(in);    
+    tag_index[tmp] = Compression::multibyte_read(in);
+  }
+
+  // enforce_rules  
+  for(int i = Compression::multibyte_read(in); i != 0; i--)
+  {
+    TEnforceAfterRule aux;
+    aux.tagi = Compression::multibyte_read(in);
+    for(int j = Compression::multibyte_read(in); j != 0; j--)
+    {
+      aux.tagsj.push_back(Compression::multibyte_read(in));
+    }
+    enforce_rules.push_back(aux);
+  }
+
+  // prefer_rules
+  for(int i = Compression::multibyte_read(in); i != 0; i--)
+  {
+    prefer_rules.push_back(Compression::wstring_read(in));
+  }
+
+  // constants
+  constants.read(in);
+
+  // output
+  output.read(in); 
+
+  // dimensions
+  N = Compression::multibyte_read(in);
+
+  d = new double ** [N];
+  for ( int i = 0; i < N; ++i) {
+    d[i] = new double * [N];
+    for (int j = 0; j < N; ++j) {
+      d[i][j] = new double [N];
+    }
+  }
+
+  // initializing d matrix
+  for (int i = 0; i < N; ++i) {
+    for (int j = 0; j < N; ++j) {
+      for (int k = 0; k < N; ++k) {
+        d[i][j][k] = 0;
+      }
+    }
+  }
+
+  int nval = Compression::multibyte_read(in);
+  for(; nval != 0; nval--) {
+    int i = Compression::multibyte_read(in);
+    int j = Compression::multibyte_read(in);
+    int k = Compression::multibyte_read(in);
+    d[i][j][k] = EndianDoubleUtil::read(in);
+  }
+   
+  // read pattern list
+  plist.read(in);
+    
+  // read discards on ambiguity
+  discard.clear();
+
+  int limit = Compression::multibyte_read(in);  
+  if(feof(in))
+  {
+    return;
+  }
+  
+  for(int i = 0; i < limit; i++)
+  {
+    discard.push_back(Compression::wstring_read(in));
+  }
+}
+
+void
+TaggerDataLSW::write(FILE *out)
+{
+  
+  // open_class
+  Compression::multibyte_write(open_class.size(), out);  
+  int val = 0;
+  for(set<TTag>::const_iterator it = open_class.begin(), limit = open_class.end();
+      it != limit; it++)
+  {
+    Compression::multibyte_write(*it-val, out);    
+    val = *it;
+  }
+  
+  // forbid_rules
+  Compression::multibyte_write(forbid_rules.size(), out);
+  for(unsigned int i = 0, limit = forbid_rules.size(); i != limit; i++)
+  {
+    Compression::multibyte_write(forbid_rules[i].tagi, out);
+    Compression::multibyte_write(forbid_rules[i].tagj, out);
+  }
+  
+  // array_tags
+  Compression::multibyte_write(array_tags.size(), out);
+  for(unsigned int i = 0, limit = array_tags.size(); i != limit; i++)
+  {
+    Compression::wstring_write(array_tags[i], out);
+  }
+
+  // tag_index
+  Compression::multibyte_write(tag_index.size(), out);
+  for(map<wstring, int, Ltstr>::iterator it = tag_index.begin(), limit = tag_index.end();
+      it != limit; it++)
+  {
+    Compression::wstring_write(it->first, out);
+    Compression::multibyte_write(it->second, out);
+  }
+  
+  // enforce_rules
+  Compression::multibyte_write(enforce_rules.size(), out);
+  for(unsigned int i = 0, limit = enforce_rules.size(); i != limit; i++)
+  {
+    Compression::multibyte_write(enforce_rules[i].tagi, out);
+    Compression::multibyte_write(enforce_rules[i].tagsj.size(), out);
+    for(unsigned int j = 0, limit2 = enforce_rules[i].tagsj.size(); j != limit2; j++)
+    {
+      Compression::multibyte_write(enforce_rules[i].tagsj[j], out);
+    }
+  }
+
+  // prefer_rules
+  Compression::multibyte_write(prefer_rules.size(), out);
+  for(unsigned int i = 0, limit = prefer_rules.size(); i != limit; i++)
+  {
+    Compression::wstring_write(prefer_rules[i], out);
+  }
+  
+  // constants
+  constants.write(out);  
+
+  // output
+  output.write(out);
+
+  // d matrix
+  Compression::multibyte_write(N, out);
+
+  int nval = 0;
+  for (int i = 0; i < N; ++i) {
+    for (int j = 0; j < N; ++j) {
+      for (int k = 0; k < N; ++k) {
+        if (d[i][j][k] > ZERO) {
+          ++nval;
+        }
+      }
+    }
+  }
+  Compression::multibyte_write(nval, out);
+
+  for (int i = 0; i < N; ++i) {
+    for (int j = 0; j < N; ++j) {
+      for (int k = 0; k < N; ++k) {
+        if (d[i][j][k] > ZERO) {
+          Compression::multibyte_write(i, out);
+          Compression::multibyte_write(j, out);
+          Compression::multibyte_write(k, out);
+          EndianDoubleUtil::write(out, d[i][j][k]);
+        }
+      }
+    }
+  }
+  
+  // write pattern list
+  plist.write(out);
+  
+  // write discard list
+  
+  if(discard.size() != 0)
+  {
+    Compression::multibyte_write(discard.size(), out);
+    for(unsigned int i = 0, limit = discard.size(); i != limit; i++)
+    {
+      Compression::wstring_write(discard[i], out);
+    }
+  }  
+}
+
Index: branches/apertium-tagger/apertium2/apertium/tagger_data_lsw.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger_data_lsw.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger_data_lsw.h	(revision 69632)
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TAGGERDATALSW_
+#define _TAGGERDATALSW_
+
+#include <apertium/tagger_data.h>
+
+class TaggerDataLSW : public TaggerData
+{
+private:
+  int N;
+  double ***d;
+  
+  void destroy();
+
+public:
+  TaggerDataLSW();
+  virtual ~TaggerDataLSW();
+  TaggerDataLSW(TaggerDataLSW const &o);
+  TaggerDataLSW(TaggerData const &o);
+  TaggerDataLSW & operator =(TaggerDataLSW const &o);
+  
+  void setProbabilities(int const myN, double ***myD = NULL);
+
+  virtual double *** getD();
+  virtual int getN();
+  
+  void read(FILE *in);
+  void write(FILE *out);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/tagger_word.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tagger_word.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tagger_word.h	(revision 69632)
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __TAGGERWORD_H
+#define __TAGGERWORD_H
+
+#include <iostream>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+
+#include <lttoolbox/ltstr.h>
+#include <apertium/ttag.h>
+#include <apertium/apertium_re.h>
+
+using namespace std;
+
+/** Class TaggerWord.
+ *  It stores the superficial form and all possible tags that it can receive.
+ *  It has the fine tags delivered by the morphological analyzer and the coarse
+ *  ones used by the PoS tagger.
+ */  
+class TaggerWord{
+private:
+  wstring superficial_form; 
+  
+  set<TTag> tags;  //Set of all possible tags
+  map<TTag, wstring> lexical_forms;  //For a given coarse tag it stores the fine tag 
+                                    //delevered by the morphological analyzer
+  wstring ignored_string;
+  
+  bool plus_cut; //Flag to distinguish the way in which the word was ended.
+                  //If it was done by '$' its value should be false
+                  //If it was done by '+' its value should be true
+  bool previous_plus_cut; //Flag to distinguish the way in which the
+			  //previous word was ended. It has the same
+			  //plus_cut meaning
+  bool show_sf; // Show the superficial form in the output
+  static map<wstring, ApertiumRE, Ltstr> patterns;
+
+  bool match(wstring const &s, wstring const &pattern);
+public:
+  static bool generate_marks;
+  static vector<wstring> array_tags;
+
+  static bool show_ignored_string;
+
+   /** 
+    * Constructor 
+    */
+   TaggerWord(bool prev_plus_cut=false);
+  
+   /** 
+    * Copy constructor
+    */
+   TaggerWord(const TaggerWord &w);
+  
+   /** 
+    * Destructor 
+    */
+   virtual ~TaggerWord();
+  
+   /** Set the superficial form of the word.
+    *  @param s the superficial form
+    */
+   void set_superficial_form(const wstring &s);
+  
+   /** Get the superficial form of the word
+    *
+    */
+   wstring& get_superficial_form();
+  
+   /** Add a new tag to the set of all possible tags of the word.
+    *  @param t the coarse tag
+    *  @param lf the lexical form (fine tag)
+    */
+   virtual void add_tag(TTag &t, const wstring &lf, vector<wstring> const &prefer_rules);
+
+   /** Get the set of tags of this word.
+    *  @return  set of tags.
+    */  
+   virtual set<TTag>& get_tags();
+  
+   /** Get a wstring with the set of tags
+    */
+   virtual wstring get_string_tags();
+   
+  /** Get the lexical form (fine tag) for a given tag (coarse one)
+   *  @param  t the tag
+   *  @return the lexical form of tag t
+   */
+  virtual wstring get_lexical_form(TTag &t, int const TAG_kEOF); 
+
+  wstring get_all_chosen_tag_first(TTag &t, int const TAG_kEOF);
+  
+  /** Get the lexical form (fine tag) for a given tag (coarse one)
+   *  @param  t the tag
+   *  @return the lexical form of tag t without other text that
+   *          is ignored.
+   */  
+  wstring get_lexical_form_without_ignored_string(TTag &t, int const TAG_kEOF); 
+  
+  /** Add text to the ignored string
+   *
+   */   
+  void add_ignored_string(wstring const &s);
+  
+  /** Set the flag plus_cut to a certain value. If this flag is set to true means
+   *  that there were a '+' between this word and the next one 
+   */
+   void set_plus_cut(const bool &c);
+
+  /**
+   * Get and set the "show superficial form" flag
+   */
+  void set_show_sf(bool sf);
+  bool get_show_sf();
+
+  /** Get the value of the plus_cut flag */
+  bool get_plus_cut();
+   
+  /** Output operator
+   */
+  friend wostream& operator<< (wostream& os, TaggerWord &w);
+  
+  static void setArrayTags(vector<wstring> const &at);
+
+  void print();
+  
+  void outputOriginal(FILE *output);
+  
+  bool isAmbiguous() const;  // CAUTION: unknown words are not considered to 
+                             // be ambiguous by this method
+  
+  void discardOnAmbiguity(wstring const &tags);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/tmx_builder.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_builder.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_builder.h	(revision 69632)
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TMXBUILDER_
+#define _TMXBUILDER_
+
+#include <apertium/transfer_data.h>
+#include <string>
+#include <cstdio>
+
+using namespace std;
+
+class TMXBuilder
+{
+private:
+  wstring lang1;
+  wstring lang2;
+  unsigned int max_edit;
+  unsigned int diagonal_width;
+  unsigned int window_size;
+  unsigned int step;
+  double percent;
+  double edit_distance_percent;
+  unsigned int low_limit;
+  FILE *freference;
+
+  static wstring nextTU(FILE *input);
+  static wstring restOfBlank(FILE *input);
+  static wstring nextBlank(FILE *input);
+  static wstring xmlize(wstring const &str);
+  static bool compatible(FILE *input, FILE *output, bool lazy = false);
+  void generateTMX(FILE *f1, FILE *f2, FILE *output);
+  void outputTU(FILE *f1, FILE *f2, FILE *output);
+  static vector<wstring> reverseList(vector<wstring> const &v);
+  static vector<wstring> sentenceList(FILE *file);
+  static int argmin(int nw, int n, int w);
+  static int * levenshteinTable(vector<wstring> &l1, vector<wstring> &l2, 
+				unsigned int diagonal_width, unsigned int max_edit);
+  void printTU(FILE *output, wstring const &tu1, wstring const &tu2) const;
+  static wstring filter(wstring const &s);
+  static int weight(wstring const &s);  
+  static void printTable(int *table, unsigned int nrows, unsigned int ncols);
+  static int editDistance(wstring const &s1, wstring const &s2, unsigned int max_edit);
+  static int min3(int i1, int i2, int i3);
+  static int min2(int i1, int i2);
+  void printTUCond(FILE *output, wstring const &s1, wstring const &s2, bool secure_zone);
+  static vector<wstring> extractFragment(vector<wstring> const &text, unsigned int base, 
+					 unsigned int width);
+
+  static bool isRemovablePunct(wchar_t const &c);
+  bool similar(wstring const &s1, wstring const &s2);
+
+  void splitAndMove(FILE *file, string const &filename);
+public:
+  TMXBuilder(wstring const &l1, wstring const &l2);
+  ~TMXBuilder();
+  static bool check(string const &file1, string const &file2, bool lazy = false);
+  void generate(string const &file1, string const &file2, 
+                string const &outfile="");
+                
+  void setMaxEdit(int me);
+  void setDiagonalWidth(int dw);
+  void setWindowSize(int ws);
+  void setStep(int s);
+  void setPercent(double p);
+  void setLowLimit(int l);
+  void setEditDistancePercent(double e);
+  void setTranslation(string const &filename);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/transfer_data.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer_data.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer_data.cc	(revision 69632)
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2005--2015 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <apertium/transfer_data.h>
+#include <lttoolbox/compression.h>
+#include <apertium/utf_converter.h>
+#include <apertium/apertium_re.h>
+#include <iostream>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+using namespace std;
+
+void
+TransferData::copy(TransferData const &o)
+{
+  alphabet = o.alphabet;
+  transducer = o.transducer;
+  finals = o.finals;
+  attr_items = o.attr_items;
+  macros = o.macros;
+  lists = o.lists;
+  variables = o.variables;
+}
+
+void
+TransferData::destroy()
+{
+}
+
+TransferData::TransferData()
+{
+  // adding fixed attr_items
+  attr_items[L"lem"] = L"(([^<]|\"\\<\")+)";
+  attr_items[L"lemq"] = L"\\#[- _][^<]+";
+  attr_items[L"lemh"] = L"(([^<#]|\"\\<\"|\"\\#\")+)";
+  attr_items[L"whole"] = L"(.+)";
+  attr_items[L"tags"] = L"((<[^>]+>)+)";
+  attr_items[L"chname"] = L"({([^/]+)\\/)"; // includes delimiters { and / !!!
+  attr_items[L"chcontent"] = L"(\\{.+)";
+  attr_items[L"content"] = L"(\\{.+)";
+}
+
+TransferData::~TransferData()
+{
+  destroy();
+}
+
+TransferData::TransferData(TransferData const &o)
+{
+  copy(o);
+}
+
+TransferData &
+TransferData::operator =(TransferData const &o)
+{
+  if(this != &o)
+  {
+    destroy();
+    copy(o);
+  }
+  return *this;
+}
+
+Alphabet &
+TransferData::getAlphabet()
+{
+  return alphabet;
+}
+
+Transducer &
+TransferData::getTransducer()
+{
+  return transducer;
+}
+
+map<int, int> &
+TransferData::getFinals()
+{
+  return finals;
+}
+
+map<wstring, wstring, Ltstr> &
+TransferData::getAttrItems()
+{
+  return attr_items;
+}
+
+map<wstring, int, Ltstr> &
+TransferData::getMacros()
+{
+  return macros;
+}
+
+map<wstring, set<wstring, Ltstr>, Ltstr> &
+TransferData::getLists()
+{
+  return lists;
+}
+
+map<wstring, wstring, Ltstr> &
+TransferData::getVariables()
+{
+  return variables;
+}
+
+void
+TransferData::write(FILE *output)
+{
+  alphabet.write(output);
+  transducer.write(output, alphabet.size());
+
+  // finals
+
+  Compression::multibyte_write(finals.size(), output);
+  for(map<int, int>::const_iterator it = finals.begin(), limit = finals.end();
+      it != limit; it++)
+  {
+    Compression::multibyte_write(it->first, output);
+    Compression::multibyte_write(it->second, output);
+  }
+
+  // attr_items
+
+  // precompiled regexps
+  writeRegexps(output);
+
+  // variables
+  Compression::multibyte_write(variables.size(), output);
+  for(map<wstring, wstring, Ltstr>::const_iterator it = variables.begin(), limit = variables.end();
+      it != limit; it++)
+  {
+    Compression::wstring_write(it->first, output);
+    Compression::wstring_write(it->second, output);
+  }
+
+  // macros
+  Compression::multibyte_write(macros.size(), output);
+  for(map<wstring, int, Ltstr>::const_iterator it = macros.begin(), limit = macros.end();
+      it != limit; it++)
+  {
+    Compression::wstring_write(it->first, output);
+    Compression::multibyte_write(it->second, output);
+  }
+
+  // lists
+  Compression::multibyte_write(lists.size(), output);
+  for(map<wstring, set<wstring, Ltstr>, Ltstr>::const_iterator it = lists.begin(), limit = lists.end();
+      it != limit; it++)
+  {
+    Compression::wstring_write(it->first, output);
+    Compression::multibyte_write(it->second.size(), output);
+
+    for(set<wstring, Ltstr>::const_iterator it2 = it->second.begin(), limit2 = it->second.end();
+	it2 != limit2; it2++)
+    {
+      Compression::wstring_write(*it2, output);
+    }
+  }
+
+}
+
+void
+TransferData::writeRegexps(FILE *output)
+{
+  Compression::string_write(string(pcre_version()), output);
+  Compression::multibyte_write(attr_items.size(), output);
+
+  map<wstring, wstring, Ltstr>::iterator it, limit;
+  for(it = attr_items.begin(), limit = attr_items.end(); it != limit; it++)
+  {
+    Compression::wstring_write(it->first, output);
+    ApertiumRE my_re;
+    my_re.compile(UtfConverter::toUtf8(it->second));
+    my_re.write(output);
+    Compression::wstring_write(it->second, output);
+  }
+}
Index: branches/apertium-tagger/apertium2/apertium/transfer_data.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer_data.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer_data.h	(revision 69632)
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TRANSFERDATA_
+#define _TRANSFERDATA_
+
+#include <lttoolbox/alphabet.h>
+#include <lttoolbox/ltstr.h>
+#include <lttoolbox/transducer.h>
+
+#include <map>
+#include <set>
+
+using namespace std;
+
+class TransferData
+{
+private:
+  void copy(TransferData const &o);
+  void destroy();
+  
+  map<wstring, wstring, Ltstr> attr_items;
+  map<wstring, int, Ltstr> macros;
+  map<wstring, set<wstring, Ltstr>, Ltstr> lists;
+  map<wstring, wstring, Ltstr> variables;
+  
+  Alphabet alphabet;
+  Transducer transducer;
+  map<int, int> finals;
+
+  void writeRegexps(FILE *output);
+ public:
+  TransferData();
+  ~TransferData();
+  TransferData(TransferData const &o);
+  TransferData & operator =(TransferData const &o);
+  
+  Alphabet & getAlphabet();
+  Transducer & getTransducer();
+  map<int, int> & getFinals();
+  map<wstring, wstring, Ltstr> & getAttrItems();  
+
+  map<wstring, int, Ltstr> & getMacros();
+  map<wstring, set<wstring, Ltstr>, Ltstr> & getLists();
+  map<wstring, wstring, Ltstr> & getVariables();
+  
+  void write(FILE *output);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/transfer_instr.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer_instr.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer_instr.cc	(revision 69632)
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/transfer_instr.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+void
+TransferInstr::copy(TransferInstr const &o)
+{
+  type = o.type;
+  content = o.content;
+  pos = o.pos;
+  pointer = o.pointer;
+  condition = o.condition;
+}
+
+void
+TransferInstr::destroy()
+{
+}
+
+TransferInstr::TransferInstr(TransferInstrType t, string const &c, 
+                             int const p, void *ptr, bool cond)
+{
+  type = t;
+  content = c;
+  pos = p;
+  pointer = ptr;
+  condition = cond;
+}
+
+TransferInstr::~TransferInstr()
+{
+  destroy();
+}
+
+TransferInstr::TransferInstr(TransferInstr const &o)
+{
+  copy(o);
+}
+
+TransferInstr &
+TransferInstr::operator =(TransferInstr const &o)
+{
+  if(this != &o)
+  {
+    destroy();
+    copy(o);
+  }
+  return *this;
+}
+
+TransferInstrType
+TransferInstr::getType()
+{
+  return type;
+}
+
+string const &
+TransferInstr::getContent()
+{
+  return content;
+}
+
+int
+TransferInstr::getPos()
+{
+  return pos;
+}
+
+void *
+TransferInstr::getPointer()
+{
+  return pointer;
+}
+
+bool
+TransferInstr::getCondition()
+{
+  return condition;
+}
Index: branches/apertium-tagger/apertium2/apertium/transfer_token.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer_token.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer_token.h	(revision 69632)
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TRANSFERTOKEN_
+#define _TRANSFERTOKEN_
+
+#include <string>
+
+using namespace std;
+
+enum TransferTokenType
+{
+  tt_eof,
+  tt_word,
+  tt_blank
+};
+
+
+class TransferToken
+{
+private:
+  TransferTokenType type;
+  wstring content;
+
+  void copy(TransferToken const &o);
+  void destroy();
+public:
+  TransferToken();
+  TransferToken(wstring const &content, TransferTokenType type);
+  ~TransferToken();
+  TransferToken(TransferToken const &o);
+  TransferToken & operator =(TransferToken const &o);
+  TransferTokenType getType();
+  wstring & getContent();
+  void setType(TransferTokenType type);
+  void setContent(wstring const &content);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/transfer_word.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer_word.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer_word.h	(revision 69632)
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _TRANSFERWORD_
+#define _TRANSFERWORD_
+
+#include <map>
+#include <apertium/apertium_re.h>
+#include <string>
+#include <cstdlib>
+
+using namespace std;
+
+/**
+ * Word type for transfer modules
+ */
+class TransferWord
+{
+private:
+  /**
+   * Source language word
+   */
+  string s_str;
+  
+  /**
+   * Target language word
+   */
+  string t_str;
+  
+  /**
+   * Queue length
+   */
+  int queue_length;
+
+  /**
+   * Copy method
+   * @param o the object to be copied
+   */
+  void copy(TransferWord const &o);
+  
+  /**
+   * Destroy method
+   */
+  void destroy();
+  
+  /**
+   * Accesses the source/target side of a word using the specified part
+   * @param str tipically s_str or t_str
+   * @param part regular expression to match/access
+   * @return reference to matched/accessed string
+   */
+  string access(string const &str, ApertiumRE const &part);
+
+  /**
+   * Assings a value to the source/target side of a word using the
+   * specified part 
+   * @param str tipically s_str or t_str 
+   * @param part regular expression to match/access 
+   * @param value the string to be assigned
+   */
+  void assign(string &str, ApertiumRE const &part, string const &value);
+
+public:
+  /**
+   * Non-parametric constructor
+   */
+  TransferWord();
+  /**
+   * Destructor
+   */
+  ~TransferWord();
+  
+  /**
+   * Copy constructor
+   * @param o the object to be copied
+   */
+  TransferWord(TransferWord const &o);
+  
+  /**
+   * Parametric constructor calling init()
+   * @param src source word
+   * @param tgt target word
+   * @param queue queue lenght
+   */
+  TransferWord(string const &src, string const &tgt, int queue = 0);
+  
+  /**
+   * Assignment operator
+   * @param o the object to be assigned
+   * @return reference to left part of assignment
+   */
+  TransferWord & operator =(TransferWord const &o);
+
+  /**
+   * Sets a bi-word (a source language word and its counterpart in target
+   * language
+   * @param src source word
+   * @param tgt target word
+   */
+  void init(string const &src, string const &tgt);
+  
+  /**
+   * Reference a source language word part
+   * @param part regular expression to match
+   * @param with_queue access taking into account the queue
+   * @returns reference to the part of string matched
+   */ 
+  string source(ApertiumRE const &part, bool with_queue = true);
+
+  /**
+   * Reference a target language word part
+   * @param part regular expression to match
+   * @param with_queue access taking into account the queue
+   * @returns reference to the part of string matched
+   */ 
+  string target(ApertiumRE const &part, bool with_queue = true);
+
+  /**
+   * Sets a value for a source language word part
+   * @param part regular expression to match
+   * @param value the new value for the given part
+   * @param with_queue access taking or not into account the queue
+   */
+  void setSource(ApertiumRE const &part, string const &value, 
+		 bool with_queue = true);
+
+  /**
+   * Sets a value for a target language word part
+   * @param part regular expression to match
+   * @param value the new value for the given part
+   * @param with_queue access taking or not into account the queue
+   */
+  void setTarget(ApertiumRE const &part, string const &value, 
+		 bool with_queue = true);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/transfer_word_list.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer_word_list.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer_word_list.cc	(revision 69632)
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/transfer_word_list.h>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+void
+TransferWordList::copy(TransferWordList const &o)
+{
+  casefull_set = o.casefull_set;
+  caseless_set = o.caseless_set;
+}
+
+void
+TransferWordList::destroy()
+{
+}
+
+TransferWordList::TransferWordList()
+{
+}
+
+TransferWordList::~TransferWordList()
+{
+  destroy();
+}
+
+TransferWordList::TransferWordList(TransferWordList const &o)
+{
+  copy(o);
+}
+
+TransferWordList &
+TransferWordList::operator =(TransferWordList const &o)
+{
+  if(this != &o)
+  {
+    destroy();
+    copy(o);
+  }
+  return *this;
+}
+
+bool
+TransferWordList::search(string const &cad, bool caseless)
+{
+  if(caseless)
+  {
+    return caseless_set.find(cad) != caseless_set.end();
+  }
+  else
+  {
+    return casefull_set.find(cad) != casefull_set.end();
+  }
+}
+
+void
+TransferWordList::addWord(string const &cad)
+{
+  casefull_set.insert(cad);
+  caseless_set.insert(cad);
+}
Index: branches/apertium-tagger/apertium2/apertium/transfer_word_list.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transfer_word_list.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transfer_word_list.h	(revision 69632)
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TRANSFERWORDLIST_
+#define _TRANSFERWORDLIST_
+
+#include <cstring>
+#include <set>
+#include <string>
+#ifdef _MSC_VER
+#define strcasecmp _stricmp
+#endif
+
+using namespace std;
+
+struct ltstr
+{
+  bool operator()(string const &s1, string const &s2) const
+  {
+    return s1 < s2;
+  }
+};
+
+struct ltstri
+{
+  bool operator()(string const &s1, string const &s2) const
+  {
+    return strcasecmp(s1.c_str(), s2.c_str()) < 0;
+  }
+};
+
+class TransferWordList
+{
+private:
+  set<string, ltstr> casefull_set;
+  set<string, ltstri> caseless_set;
+
+  void copy(TransferWordList const &o);
+  void destroy();
+public:
+  TransferWordList();
+  ~TransferWordList();
+  TransferWordList(TransferWordList const &o);
+  TransferWordList & operator =(TransferWordList const &o);
+
+  bool search(string const &cad, bool caseless = false);
+  void addWord(string const &cad);
+};
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/transferpp.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transferpp.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transferpp.cc	(revision 69632)
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/trx_reader.h>
+#include <lttoolbox/lt_locale.h>
+#include <cstdlib>
+#include <iostream>
+#include <apertium/string_utils.h>
+#include <libgen.h>
+
+using namespace Apertium;
+using namespace std;
+
+int main(int argc, char *argv[])
+{
+  LtLocale::tryToSetLocale();
+
+  if(argc != 3)
+  {
+    cerr << "USAGE: " << basename(argv[0]) << " rules_file transfer_file" << endl;
+    exit(EXIT_FAILURE);
+  }
+
+  TRXReader myReader;
+  myReader.read(argv[1]);
+  myReader.write(argv[2]);
+  
+  return EXIT_SUCCESS;
+}
Index: branches/apertium-tagger/apertium2/apertium/ttag.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/ttag.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/ttag.h	(revision 69632)
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _TTAG_
+#define _TTAG_
+
+#include <vector>
+
+using namespace std;
+
+typedef int TTag;
+
+struct TForbidRule
+{
+    TTag tagi;
+    TTag tagj;
+};
+
+class TEnforceAfterRule
+{
+public:
+    TTag tagi;
+    vector<TTag> tagsj;
+};
+
+
+
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/utf_converter.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/utf_converter.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/utf_converter.cc	(revision 69632)
@@ -0,0 +1,613 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <apertium/utf_converter.h>
+#include <iostream>
+#include <cstdlib>
+#include <apertium/string_utils.h>
+
+using namespace Apertium;
+
+#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
+#define UNI_MAX_BMP (UTF32)0x0000FFFF
+#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
+#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
+#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
+#define UNI_SUR_HIGH_START  (UTF32)0xD800
+#define UNI_SUR_HIGH_END    (UTF32)0xDBFF
+#define UNI_SUR_LOW_START   (UTF32)0xDC00
+#define UNI_SUR_LOW_END     (UTF32)0xDFFF
+
+using namespace std;
+
+namespace UtfConverter
+{
+
+  typedef unsigned int	 UTF32;	/* at least 32 bits */
+  typedef unsigned short UTF16;	/* at least 16 bits */
+  typedef unsigned char	 UTF8;	/* typically 8 bits */
+
+  /* Some fundamental constants */
+
+  typedef enum {
+    conversionOK, 	/* conversion successful */
+    sourceExhausted,	/* partial character in source, but hit end */
+    targetExhausted,	/* insuff. room in target for conversion */
+    sourceIllegal	/* source sequence is illegal/malformed */
+  } ConversionResult;
+
+  typedef enum {
+    strictConversion = 0,
+    lenientConversion
+  } ConversionFlags;
+
+  static const int halfShift  = 10; /* used for shifting by 10 bits */
+
+  static const UTF32 halfBase = 0x0010000UL;
+  static const UTF32 halfMask = 0x3FFUL;
+
+  
+  void conversionError()
+  {
+    wcerr << L"Error: conversion error" << endl;
+    exit(EXIT_FAILURE);
+  }
+
+  /* --------------------------------------------------------------------- */
+
+  ConversionResult ConvertUTF32toUTF16 (
+					const UTF32** sourceStart, const UTF32* sourceEnd, 
+					UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF32* source = *sourceStart;
+    UTF16* target = *targetStart;
+    while (source < sourceEnd) {
+      UTF32 ch;
+      if (target >= targetEnd) {
+	result = targetExhausted; break;
+      }
+      ch = *source++;
+      if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+	/* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
+	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+	  if (flags == strictConversion) {
+	    --source; /* return to the illegal value itself */
+	    result = sourceIllegal;
+	    break;
+	  } else {
+	    *target++ = UNI_REPLACEMENT_CHAR;
+	  }
+	} else {
+	  *target++ = (UTF16)ch; /* normal case */
+	}
+      } else if (ch > UNI_MAX_LEGAL_UTF32) {
+	if (flags == strictConversion) {
+	  result = sourceIllegal;
+	} else {
+	  *target++ = UNI_REPLACEMENT_CHAR;
+	}
+      } else {
+	/* target is a character in range 0xFFFF - 0x10FFFF. */
+	if (target + 1 >= targetEnd) {
+	  --source; /* Back up source pointer! */
+	  result = targetExhausted; break;
+	}
+	ch -= halfBase;
+	*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
+	*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
+      }
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+  }
+
+  /* --------------------------------------------------------------------- */
+
+  ConversionResult ConvertUTF16toUTF32 (
+					const UTF16** sourceStart, const UTF16* sourceEnd, 
+					UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF16* source = *sourceStart;
+    UTF32* target = *targetStart;
+    UTF32 ch, ch2;
+    while (source < sourceEnd) {
+      const UTF16* oldSource = source; /*  In case we have to back up because of target overflow. */
+      ch = *source++;
+      /* If we have a surrogate pair, convert to UTF32 first. */
+      if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+	/* If the 16 bits following the high surrogate are in the source buffer... */
+	if (source < sourceEnd) {
+	  ch2 = *source;
+	  /* If it's a low surrogate, convert to UTF32. */
+	  if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+	    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+	      + (ch2 - UNI_SUR_LOW_START) + halfBase;
+	    ++source;
+	  } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+	    --source; /* return to the illegal value itself */
+	    result = sourceIllegal;
+	    break;
+	  }
+	} else { /* We don't have the 16 bits following the high surrogate. */
+	  --source; /* return to the high surrogate */
+	  result = sourceExhausted;
+	  break;
+	}
+      } else if (flags == strictConversion) {
+	/* UTF-16 surrogate values are illegal in UTF-32 */
+	if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+	  --source; /* return to the illegal value itself */
+	  result = sourceIllegal;
+	  break;
+	}
+      }
+      if (target >= targetEnd) {
+	source = oldSource; /* Back up source pointer! */
+	result = targetExhausted; break;
+      }
+      *target++ = ch;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+
+    return result;
+  }
+
+  /* --------------------------------------------------------------------- */
+
+  /*
+   * Index into the table below with the first byte of a UTF-8 sequence to
+   * get the number of trailing bytes that are supposed to follow it.
+   * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
+   * left as-is for anyone who may want to do such conversion, which was
+   * allowed in earlier algorithms.
+   */
+  static const char trailingBytesForUTF8[256] = {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
+  };
+
+  /*
+   * Magic values subtracted from a buffer value during UTF8 conversion.
+   * This table contains as many values as there might be trailing bytes
+   * in a UTF-8 sequence.
+   */
+  static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
+					    0x03C82080UL, 0xFA082080UL, 0x82082080UL };
+
+  /*
+   * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
+   * into the first byte, depending on how many bytes follow.  There are
+   * as many entries in this table as there are UTF-8 sequence types.
+   * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
+   * for *legal* UTF-8 will be 4 or fewer bytes total.
+   */
+  static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
+
+  /* --------------------------------------------------------------------- */
+
+  /* The interface converts a whole buffer to avoid function-call overhead.
+   * Constants have been gathered. Loops & conditionals have been removed as
+   * much as possible for efficiency, in favor of drop-through switches.
+   * (See "Note A" at the bottom of the file for equivalent code.)
+   * If your compiler supports it, the "isLegalUTF8" call can be turned
+   * into an inline function.
+   */
+
+  /* --------------------------------------------------------------------- */
+
+  ConversionResult ConvertUTF16toUTF8 (
+				       const UTF16** sourceStart, const UTF16* sourceEnd, 
+				       UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF16* source = *sourceStart;
+    UTF8* target = *targetStart;
+    while (source < sourceEnd) {
+      UTF32 ch;
+      unsigned short bytesToWrite = 0;
+      const UTF32 byteMask = 0xBF;
+      const UTF32 byteMark = 0x80; 
+      const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
+      ch = *source++;
+      /* If we have a surrogate pair, convert to UTF32 first. */
+      if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
+	/* If the 16 bits following the high surrogate are in the source buffer... */
+	if (source < sourceEnd) {
+	  UTF32 ch2 = *source;
+	  /* If it's a low surrogate, convert to UTF32. */
+	  if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
+	    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
+	      + (ch2 - UNI_SUR_LOW_START) + halfBase;
+	    ++source;
+	  } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
+	    --source; /* return to the illegal value itself */
+	    result = sourceIllegal;
+	    break;
+	  }
+	} else { /* We don't have the 16 bits following the high surrogate. */
+	  --source; /* return to the high surrogate */
+	  result = sourceExhausted;
+	  break;
+	}
+      } else if (flags == strictConversion) {
+	/* UTF-16 surrogate values are illegal in UTF-32 */
+	if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
+	  --source; /* return to the illegal value itself */
+	  result = sourceIllegal;
+	  break;
+	}
+      }
+      /* Figure out how many bytes the result will require */
+      if (ch < (UTF32)0x80) {	     bytesToWrite = 1;
+      } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
+      } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
+      } else if (ch < (UTF32)0x110000) {  bytesToWrite = 4;
+      } else {			    bytesToWrite = 3;
+      ch = UNI_REPLACEMENT_CHAR;
+      }
+
+      target += bytesToWrite;
+      if (target > targetEnd) {
+	source = oldSource; /* Back up source pointer! */
+	target -= bytesToWrite; result = targetExhausted; break;
+      }
+      switch (bytesToWrite) { /* note: everything falls through. */
+      case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+      case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+      case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+      case 1: *--target =  (UTF8)(ch | firstByteMark[bytesToWrite]);
+      }
+      target += bytesToWrite;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+  }
+
+  /* --------------------------------------------------------------------- */
+
+  /*
+   * Utility routine to tell whether a sequence of bytes is legal UTF-8.
+   * This must be called with the length pre-determined by the first byte.
+   * If not calling this from ConvertUTF8to*, then the length can be set by:
+   *  length = trailingBytesForUTF8[*source]+1;
+   * and the sequence is illegal right away if there aren't that many bytes
+   * available.
+   * If presented with a length > 4, this returns false.  The Unicode
+   * definition of UTF-8 goes up to 4-byte sequences.
+   */
+
+  static bool isLegalUTF8(const UTF8 *source, int length) {
+    UTF8 a;
+    const UTF8 *srcptr = source+length;
+    switch (length) {
+    default: return false;
+      /* Everything else falls through when "true"... */
+    case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+    case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
+    case 2: if ((a = (*--srcptr)) > 0xBF) return false;
+
+      switch (*source) {
+	/* no fall-through in this inner switch */
+      case 0xE0: if (a < 0xA0) return false; break;
+      case 0xED: if (a > 0x9F) return false; break;
+      case 0xF0: if (a < 0x90) return false; break;
+      case 0xF4: if (a > 0x8F) return false; break;
+      default:   if (a < 0x80) return false;
+      }
+
+    case 1: if (*source >= 0x80 && *source < 0xC2) return false;
+    }
+    if (*source > 0xF4) return false;
+    return true;
+  }
+
+  /* --------------------------------------------------------------------- */
+
+  /*
+   * Exported function to return whether a UTF-8 sequence is legal or not.
+   * This is not used here; it's just exported.
+   */
+  bool isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
+    int length = trailingBytesForUTF8[*source]+1;
+    if (source+length > sourceEnd) {
+      return false;
+    }
+    return isLegalUTF8(source, length);
+  }
+
+  /* --------------------------------------------------------------------- */
+
+  ConversionResult ConvertUTF8toUTF16 (
+				       const UTF8** sourceStart, const UTF8* sourceEnd, 
+				       UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF8* source = *sourceStart;
+    UTF16* target = *targetStart;
+    while (source < sourceEnd) {
+      UTF32 ch = 0;
+      unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+      if (source + extraBytesToRead >= sourceEnd) {
+	result = sourceExhausted; break;
+      }
+      /* Do this check whether lenient or strict */
+      if (! isLegalUTF8(source, extraBytesToRead+1)) {
+	result = sourceIllegal;
+	break;
+      }
+      /*
+       * The cases all fall through. See "Note A" below.
+       */
+      switch (extraBytesToRead) {
+      case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+      case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
+      case 3: ch += *source++; ch <<= 6;
+      case 2: ch += *source++; ch <<= 6;
+      case 1: ch += *source++; ch <<= 6;
+      case 0: ch += *source++;
+      }
+      ch -= offsetsFromUTF8[extraBytesToRead];
+
+      if (target >= targetEnd) {
+	source -= (extraBytesToRead+1); /* Back up source pointer! */
+	result = targetExhausted; break;
+      }
+      if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
+	/* UTF-16 surrogate values are illegal in UTF-32 */
+	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+	  if (flags == strictConversion) {
+	    source -= (extraBytesToRead+1); /* return to the illegal value itself */
+	    result = sourceIllegal;
+	    break;
+	  } else {
+	    *target++ = UNI_REPLACEMENT_CHAR;
+	  }
+	} else {
+	  *target++ = (UTF16)ch; /* normal case */
+	}
+      } else if (ch > UNI_MAX_UTF16) {
+	if (flags == strictConversion) {
+	  result = sourceIllegal;
+	  source -= (extraBytesToRead+1); /* return to the start */
+	  break; /* Bail out; shouldn't continue */
+	} else {
+	  *target++ = UNI_REPLACEMENT_CHAR;
+	}
+      } else {
+	/* target is a character in range 0xFFFF - 0x10FFFF. */
+	if (target + 1 >= targetEnd) {
+	  source -= (extraBytesToRead+1); /* Back up source pointer! */
+	  result = targetExhausted; break;
+	}
+	ch -= halfBase;
+	*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
+	*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
+      }
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+  }
+
+  /* --------------------------------------------------------------------- */
+
+  ConversionResult ConvertUTF32toUTF8 (
+				       const UTF32** sourceStart, const UTF32* sourceEnd, 
+				       UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF32* source = *sourceStart;
+    UTF8* target = *targetStart;
+    while (source < sourceEnd) {
+      UTF32 ch;
+      unsigned short bytesToWrite = 0;
+      const UTF32 byteMask = 0xBF;
+      const UTF32 byteMark = 0x80; 
+      ch = *source++;
+      if (flags == strictConversion ) {
+	/* UTF-16 surrogate values are illegal in UTF-32 */
+	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+	  --source; /* return to the illegal value itself */
+	  result = sourceIllegal;
+	  break;
+	}
+      }
+      /*
+       * Figure out how many bytes the result will require. Turn any
+       * illegally large UTF32 things (> Plane 17) into replacement chars.
+       */
+      if (ch < (UTF32)0x80) {	     bytesToWrite = 1;
+      } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
+      } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
+      } else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;
+      } else {			    bytesToWrite = 3;
+      ch = UNI_REPLACEMENT_CHAR;
+      result = sourceIllegal;
+      }
+	
+      target += bytesToWrite;
+      if (target > targetEnd) {
+	--source; /* Back up source pointer! */
+	target -= bytesToWrite; result = targetExhausted; break;
+      }
+      switch (bytesToWrite) { /* note: everything falls through. */
+      case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+      case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+      case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
+      case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
+      }
+      target += bytesToWrite;
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+  }
+
+  /* --------------------------------------------------------------------- */
+
+  ConversionResult ConvertUTF8toUTF32 (
+				       const UTF8** sourceStart, const UTF8* sourceEnd, 
+				       UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
+    ConversionResult result = conversionOK;
+    const UTF8* source = *sourceStart;
+    UTF32* target = *targetStart;
+    while (source < sourceEnd) {
+      UTF32 ch = 0;
+      unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
+      if (source + extraBytesToRead >= sourceEnd) {
+	result = sourceExhausted; break;
+      }
+      /* Do this check whether lenient or strict */
+      if (! isLegalUTF8(source, extraBytesToRead+1)) {
+	result = sourceIllegal;
+	break;
+      }
+      /*
+       * The cases all fall through. See "Note A" below.
+       */
+      switch (extraBytesToRead) {
+      case 5: ch += *source++; ch <<= 6;
+      case 4: ch += *source++; ch <<= 6;
+      case 3: ch += *source++; ch <<= 6;
+      case 2: ch += *source++; ch <<= 6;
+      case 1: ch += *source++; ch <<= 6;
+      case 0: ch += *source++;
+      }
+      ch -= offsetsFromUTF8[extraBytesToRead];
+
+      if (target >= targetEnd) {
+	source -= (extraBytesToRead+1); /* Back up the source pointer! */
+	result = targetExhausted; break;
+      }
+      if (ch <= UNI_MAX_LEGAL_UTF32) {
+	/*
+	 * UTF-16 surrogate values are illegal in UTF-32, and anything
+	 * over Plane 17 (> 0x10FFFF) is illegal.
+	 */
+	if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
+	  if (flags == strictConversion) {
+	    source -= (extraBytesToRead+1); /* return to the illegal value itself */
+	    result = sourceIllegal;
+	    break;
+	  } else {
+	    *target++ = UNI_REPLACEMENT_CHAR;
+	  }
+	} else {
+	  *target++ = ch;
+	}
+      } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
+	result = sourceIllegal;
+	*target++ = UNI_REPLACEMENT_CHAR;
+      }
+    }
+    *sourceStart = source;
+    *targetStart = target;
+    return result;
+  }
+
+  wstring fromUtf8(string const & utf8string)
+  {
+    size_t widesize = utf8string.length();
+    if (sizeof(wchar_t) == 2)
+      {
+	wstring resultstring;
+	resultstring.resize(widesize+1, L'\0');
+	const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
+	const UTF8* sourceend = sourcestart + widesize;
+	UTF16* targetstart = reinterpret_cast<UTF16*>(&resultstring[0]);
+	UTF16* targetend = targetstart + widesize;
+	ConversionResult res = ConvertUTF8toUTF16(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
+	if (res != conversionOK)
+	  {
+	    conversionError();
+	  }
+	*targetstart = 0;
+	return resultstring.substr(0, wcslen(resultstring.c_str()));
+      }
+    else if (sizeof(wchar_t) == 4)
+      {
+	wstring resultstring;
+	resultstring.resize(widesize+1, L'\0');
+	const UTF8* sourcestart = reinterpret_cast<const UTF8*>(utf8string.c_str());
+	const UTF8* sourceend = sourcestart + widesize;
+	UTF32* targetstart = reinterpret_cast<UTF32*>(&resultstring[0]);
+	UTF32* targetend = targetstart + widesize;
+	ConversionResult res = ConvertUTF8toUTF32(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
+	if (res != conversionOK)
+	  {
+	    conversionError();
+	  }
+	*targetstart = 0;
+	return resultstring.substr(0,wcslen(resultstring.c_str()));
+      }
+    else
+      {
+	conversionError();
+      }
+    return L"";
+  }
+
+  string toUtf8(wstring const &widestring)
+  {
+    size_t widesize = widestring.length();
+
+    if (sizeof(wchar_t) == 2)
+      {
+	size_t utf8size = 3 * widesize + 1;
+	string resultstring;
+	resultstring.resize(utf8size, '\0');
+	const UTF16* sourcestart = reinterpret_cast<const UTF16*>(widestring.c_str());
+	const UTF16* sourceend = sourcestart + widesize;
+	UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]);
+	UTF8* targetend = targetstart + utf8size;
+	ConversionResult res = ConvertUTF16toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
+	if (res != conversionOK)
+	  {
+	    conversionError();
+	  }
+	*targetstart = 0;
+	return resultstring.substr(0, strlen(resultstring.c_str()));
+      }
+    else if (sizeof(wchar_t) == 4)
+      {
+	size_t utf8size = 4 * widesize + 1;
+	string resultstring;
+	resultstring.resize(utf8size, '\0');
+	const UTF32* sourcestart = reinterpret_cast<const UTF32*>(widestring.c_str());
+	const UTF32* sourceend = reinterpret_cast<const UTF32*>(widestring.c_str() + widesize);
+	UTF8* targetstart = reinterpret_cast<UTF8*>(&resultstring[0]);
+	UTF8* targetend = targetstart + utf8size;
+	ConversionResult res = ConvertUTF32toUTF8(&sourcestart, sourceend, &targetstart, targetend, strictConversion);
+	if (res != conversionOK)
+	  {
+	    conversionError();
+	  }
+	*targetstart = 0;
+	return resultstring.substr(0, strlen(resultstring.c_str()));
+      }
+    else
+      {
+	conversionError();
+      }
+    return "";
+  }
+}
Index: branches/apertium-tagger/apertium2/apertium/utf_converter.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/utf_converter.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/utf_converter.h	(revision 69632)
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _UTFCONVERTER_
+#define _UTFCONVERTER_
+
+#include <string>
+
+using namespace std;
+
+namespace UtfConverter
+{
+    wstring fromUtf8(string const &utf8string);
+    string toUtf8(wstring const &widestring);
+}
+
+#endif
Index: branches/apertium-tagger/apertium2/apertium/apertium-unformat.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-unformat.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-unformat.1	(revision 69632)
@@ -0,0 +1,45 @@
+.TH apertium 1 2006-03-08 "" ""
+.SH NAME
+apertium-unformat \- This application is part of (
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.sf.net\fR.
+.SH SYNOPSIS
+.B apertium-unformat
+[\-f format] [infile [outfile]]
+.SH DESCRIPTION
+.BR apertium 
+is the application that extract unformatted text from documents.
+.RE
+.SH OPTIONS
+.PP
+.B -f format
+Specifies the format of the input and output files which can have
+these values:
+.RS
+\(bu \fItxt\fR \fB(default value)\fR Input and output files are in
+text format.
+.PP
+\(bu \fIhtml\fR Input and output files are in "html" format. This
+"html" is the one acceptd by the vast majority of web browsers.
+.PP
+\(bu \fIrtf\fR Input and output files are in "rtf" format. The
+accepted "rtf" is the one generated by \fBMicrosoft WordPad (C)\fR and
+\fBMicrosoft Office (C)\fR up to and including \fBOffice-97\fR.
+.RE
+.PP
+.B infile
+Input file (stdin by default).
+.PP
+.B outfile
+Output file (stdout by default).
+.PP
+.SH SEE ALSO
+.I apertium\fR(1),
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+reserved.
Index: branches/apertium-tagger/apertium2/apertium/apertium-deshtml.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-deshtml.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-deshtml.1	(revision 69632)
@@ -0,0 +1,48 @@
+.TH apertium-deshtml 1 2006-03-21 "" ""
+.SH NAME
+apertium-deshtml \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-deshtml
+[ \-h ] [ \-i ] [ \-n ]
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-deshtml 
+is an HTML format processor. Data should be passed through this 
+processor before being piped to lt-proc. The program takes input
+in the form of an HTML document and produces output suitable for
+processing with lt-proc. HTML tags and other format information are  enclosed in brackets so that lt-proc treats them as whitespace between words.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.B \-i
+Makes the addition of trailing sentence terminator (".") unconditional, often
+leading to duplicates.
+.B \-n
+Suppresses the addition of a trailing sentence terminator.
+.PP
+.SH EXAMPLE
+.TP
+You could write the following to show how the word "gener" is analysed: 
+.TP
+echo "<b>gener</b>" | apertium-deshtml | lt-proc ca-es.automorf.bin
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-desrtf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-deslatex.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-deslatex.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-deslatex.1	(revision 69632)
@@ -0,0 +1,50 @@
+.TH apertium-deslatex 1 2012-02-29 "" ""
+.SH NAME
+apertium-deslatex \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-deslatex
+[ \-h ] [ \-i ] [ \-n ]
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-deslatex
+This filter preprocess apertium-prelatex output to a deformatted 'XMLish'
+LaTeX custom format. The output suitable for
+processing with lt-proc. Format information (newlines, tabs, etc.) is enclosed in brackets so that lt-proc treats it as whitespace between words.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.B \-i
+Makes the addition of trailing sentence terminator (".") unconditional, often
+leading to duplicates.
+.B \-n
+Suppresses the addition of a trailing sentence terminator.
+.PP
+.SH EXAMPLE
+.TP
+You could write the following to show how the word "gener" is analysed: 
+.TP
+echo "<textit/><CONTENTS>gener</CONTENTS>" | apertium-deslatex | lt-proc ca-es.automorf.bin
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-prelatex\fR(1),
+.I apertium-postlatex\fR(1),
+.I apertium-relatex\fR(1),
+.I apertium-postlatex-raw\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Complicated constructions in LaTeX (i.e. custom defined tags) are not (yet)
+supported.
+.PP
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-desodt.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-desodt.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-desodt.1	(revision 69632)
@@ -0,0 +1,48 @@
+.TH apertium-desodt 1 2006-03-21 "" ""
+.SH NAME
+apertium-desodt \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-desodt
+[ \-h ] [ \-i ] [ \-n ]
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-desodt 
+is an ODT format processor. Data should be passed through this 
+processor before being piped to lt-proc. The program takes input
+in the form of an ODT document and produces output suitable for
+processing with lt-proc. ODT tags and other format information are  enclosed in brackets so that lt-proc treats them as whitespace between words.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.B \-i
+Makes the addition of trailing sentence terminator (".") unconditional, often
+leading to duplicates.
+.B \-n
+Suppresses the addition of a trailing sentence terminator.
+.PP
+.SH EXAMPLE
+.TP
+You could write the following to show how the word "gener" is analysed: 
+.TP
+echo "<b>gener</b>" | apertium-desodt | lt-proc ca-es.automorf.bin
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-desrtf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-despptx.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-despptx.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-despptx.1	(revision 69632)
@@ -0,0 +1,48 @@
+.TH apertium-despptx 1 2006-03-21 "" ""
+.SH NAME
+apertium-despptx \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-despptx
+[ \-h ] [ \-i ] [ \-n ]
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-despptx 
+is an PPTX format processor. Data should be passed through this 
+processor before being piped to lt-proc. The program takes input
+in the form of an PPTX document and produces output suitable for
+processing with lt-proc. PPTX tags and other format information are  enclosed in brackets so that lt-proc treats them as whitespace between words.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.B \-i
+Makes the addition of trailing sentence terminator (".") unconditional, often
+leading to duplicates.
+.B \-n
+Suppresses the addition of a trailing sentence terminator.
+.PP
+.SH EXAMPLE
+.TP
+You could write the following to show how the word "gener" is analysed: 
+.TP
+echo "<b>gener</b>" | apertium-despptx | lt-proc ca-es.automorf.bin
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-desrtf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-desrtf.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-desrtf.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-desrtf.1	(revision 69632)
@@ -0,0 +1,47 @@
+.TH apertium-desrtf 1 2006-03-21 "" ""
+.SH NAME
+apertium-desrtf \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-desrtf
+[ \-h ] [ \-i ] [ \-n ]
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-desrtf 
+is an RTF format processor. Data should be passed through this 
+processor before being piped to lt-proc. The program takes input
+in the form of an RTF document and produces output suitable for 
+processing with lt-proc. RTF commands  and other format information are  enclosed in brackets so that lt-proc treats them as whitespace between words.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.B \-i
+Makes the addition of trailing sentence terminator (".") unconditional, often
+leading to duplicates.
+.B \-n
+Suppresses the addition of a trailing sentence terminator.
+.PP
+.SH EXAMPLE
+.TP
+You could write the following to show how the input document  is analysed: 
+.TP
+cat <input.rtf> | apertium-desrtf | lt-proc ca-es.automorf.bin
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-deshtml\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-destxt.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-destxt.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-destxt.1	(revision 69632)
@@ -0,0 +1,46 @@
+.TH apertium-destxt 1 2006-03-21 "" ""
+.SH NAME
+apertium-destxt \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-destxt
+[ \-h ] [ \-i ] [ \-n ]
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-destxt 
+is an text format processor. Data should be passed through this 
+processor before being piped to lt-proc. The program takes input
+in the form of a text file and produces output suitable for
+processing with lt-proc. Format information (newlines, tabs, etc.) is enclosed in brackets so that lt-proc treats it as whitespace between words.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.B \-i
+Makes the addition of trailing sentence terminator (".") unconditional, often
+leading to duplicates.
+.B \-n
+Suppresses the addition of a trailing sentence terminator.
+.PP
+.SH EXAMPLE
+.TP
+You could write the following to show how the word "gener" is analysed: 
+.TP
+echo "gener" | apertium-destxt | lt-proc ca-es.automorf.bin
+.PP
+.SH SEE ALSO
+.I apertium-deshtml\fR(1),
+.I apertium-desrtf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-deswxml.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-deswxml.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-deswxml.1	(revision 69632)
@@ -0,0 +1,48 @@
+.TH apertium-deswxml 1 2006-03-21 "" ""
+.SH NAME
+apertium-deswxml \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-deswxml
+[ \-h ] [ \-i ] [ \-n ]
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-deswxml 
+is an WXML format processor. Data should be passed through this 
+processor before being piped to lt-proc. The program takes input
+in the form of an WXML document and produces output suitable for
+processing with lt-proc. WXML tags and other format information are  enclosed in brackets so that lt-proc treats them as whitespace between words.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.B \-i
+Makes the addition of trailing sentence terminator (".") unconditional, often
+leading to duplicates.
+.B \-n
+Suppresses the addition of a trailing sentence terminator.
+.PP
+.SH EXAMPLE
+.TP
+You could write the following to show how the word "gener" is analysed: 
+.TP
+echo "<b>gener</b>" | apertium-deswxml | lt-proc ca-es.automorf.bin
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-desrtf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-desxlsx.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-desxlsx.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-desxlsx.1	(revision 69632)
@@ -0,0 +1,48 @@
+.TH apertium-desxlsx 1 2006-03-21 "" ""
+.SH NAME
+apertium-desxlsx \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-desxlsx
+[ \-h ] [ \-i ] [ \-n ]
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-desxlsx 
+is an XLSX format processor. Data should be passed through this 
+processor before being piped to lt-proc. The program takes input
+in the form of an XLSX document and produces output suitable for
+processing with lt-proc. XLSX tags and other format information are  enclosed in brackets so that lt-proc treats them as whitespace between words.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.B \-i
+Makes the addition of trailing sentence terminator (".") unconditional, often
+leading to duplicates.
+.B \-n
+Suppresses the addition of a trailing sentence terminator.
+.PP
+.SH EXAMPLE
+.TP
+You could write the following to show how the word "gener" is analysed: 
+.TP
+echo "<b>gener</b>" | apertium-desxlsx | lt-proc ca-es.automorf.bin
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-desrtf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-pretransfer.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-pretransfer.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-pretransfer.1	(revision 69632)
@@ -0,0 +1,40 @@
+.TH apertium-pretransfer 1 2006-03-21 "" ""
+.SH NAME
+apertium-pretransfer \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-pretransfer
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-pretransfer 
+module applies some changes to multiwords (such as moving the lemma queue of 
+a multiword with inner inflection just after the lemma head). If 
+the input is not a multiword, it does not affect the output.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH EXAMPLE
+.TP
+You could write the following to show how the expression "trobant-lo a faltar" is analysed: 
+.TP
+echo "trobant-lo a faltar" | apertium-destxt | lt-proc ca-es.automorf.bin |./ca-es.tagger \-\-tagger ca-es | apertium-pretransfer
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-transfer\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-tagger-apply-new-rules.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-tagger-apply-new-rules.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-tagger-apply-new-rules.1	(revision 69632)
@@ -0,0 +1,40 @@
+.TH apertium-tagger-apply-new-rules 1 2007-03-24 "" ""
+.SH NAME
+apertium-tagger-apply-new-rules \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-tagger-apply-new-rules 
+\-\-filein [ <input file> ] \-\-fileout [ <output file> ] \-\-tsxfile [ <rule file> ]
+
+.PP
+.SH DESCRIPTION
+.BR apertium-tagger-apply-new-rules
+is used to forbid and enforce rules which are applied to the given HMM parameters.
+
+Note that the TSX file provided with \-\-tsxfile *must* be equal, in terms of label definitions, to the one used when training the HMM parameters that are to be modified.
+
+.SH OPTIONS
+.TP
+.B \-i, \-\-filein
+Specify the file with the HMM parameter to process
+.TP
+.B \-o, \-\-fileout
+To specify the file to which the HMM will be written
+.TP
+.B \-x, \-\-tsxfile 
+File containing the rules to apply
+.PP
+.SH SEE ALSO
+.I apertium-tagger\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005 -- 2007, Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-tagger.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-tagger.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-tagger.1	(revision 69632)
@@ -0,0 +1,103 @@
+.TH apertium-tagger 1 2006-08-30 "" ""
+.SH NAME
+apertium-tagger \- This application is part of  (
+.B apertium
+)
+.PP
+This tool is part of the apertium open-source machine translation
+architecture: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-tagger \-\-train|\-t 
+{n} DIC CRP TSX PROB [\-\-debug|\-d]\fR 
+.PP
+.B apertium-tagger \-\-supervised|\-s 
+{n} DIC CRP TSX PROB HTAG UNTAG [\-\-debug|\-d]\fR 
+.PP
+.B apertium-tagger \-\-retrain|\-r 
+{n} CRP PROB [\-\-debug|\-d] \fR
+.PP
+.B apertium-tagger \-\-tagger|\-g 
+[\-\-first|\-f] PROB [\-\-debug|\-d] [INPUT [OUTPUT]] \fR
+.PP
+.SH DESCRIPTION
+.BR apertium-tagger 
+is the application responsible for the apertium part-of-speech tagger
+training or tagging, depending on the calling options.  This command
+only reads from the standard input if the option \fB\-\-tagger\fR or
+\fB\-g\fR is used.
+.SH OPTIONS
+.TP
+.B \-t {n}, \-\-train {n}
+Initializes parameters through Kupiec's method (unsupervised),
+then performs \fBn\fR iterations of the Baum-Welch training algorithm
+(unsupervised).
+.TP
+.B \-s {n}, \-\-supervised {n}
+Initializes parameters against a hand-tagged text (supervised) through
+the maximum likelihood estimate method, then performs \fBn\fR
+iterations of the Baum-Welch training algorithm (unsupervised)
+.TP
+.B \-r {n}, \-\-retrain {n}
+Retrains the model with \fBn\fR additional Baum-Welch iterations
+(unsupervised).
+.TP
+.B \-g, \-\-tagger
+Tags input text by means of Viterbi algorithm.
+.TP
+.B \-p, \-\-show\-superficial
+Prints the superficial form of the word along side the lexical form
+in the output stream.
+.TP
+.B \-f, \-\-first
+Used in conjuntion with \-g (\-\-tagger) makes the tagger
+give all lexical forms of each word, with the chosen
+one in the first place (after the lemma)
+.TP
+.B \-d, \-\-debug
+Print error (if any) or debug messages while operating.
+.TP
+.B \-m, \-\-mark
+Mark disambiguated words.
+.TP
+.B \-h, \-\-help
+Display a help message.
+.SH FILES
+These are the kinds of files used with each option:
+.PP
+.B DIC
+Full expanded dictionary file
+.PP
+.B CRP
+Training text corpus file
+.PP
+.B TSX
+Tagger specification file, in XML format
+.PP
+.B PROB 
+Tagger data file, built in the training and used while tagging
+.PP
+.B  HTAG        
+Hand-tagged text corpus
+.PP
+.B UNTAG       
+Untagged text corpus, morphological analysis of HTAG corpus to use
+both jointly with \-s option
+.PP
+.B INPUT       
+Input file, stdin by default
+.PP
+.B OUTPUT      
+Output file, stdout by default
+.PP
+.SH SEE ALSO
+.I lt-proc\fR(1),
+.I lt-comp\fR(1),
+.I lt-expand\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/tmx_aligner_tool.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_aligner_tool.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_aligner_tool.cc	(revision 69632)
@@ -0,0 +1,744 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#include <apertium/tmx_aligner_tool.h>
+
+namespace TMXAligner
+{
+
+extern std::string hunglishDictionaryHome;
+extern std::string hunglishExperimentsHome;
+
+void readTrailOrBisentenceList( std::istream& is, Trail& trail )
+{
+  trail.clear();
+  while ( is.peek() != -1 )
+  {
+    int huPos, enPos;
+
+    is >> huPos;
+    if (is.peek()!=' ')
+    {
+      std::cerr << "no space in line" << std::endl;
+      throw "data error";
+    }
+    is.ignore();
+
+    is >> enPos;
+    if (is.peek()!='\n')
+    {
+      std::cerr << "too much data in line" << std::endl;
+      throw "data error";
+    }
+    is.ignore();
+
+    trail.push_back(std::make_pair(huPos,enPos));
+  }
+}
+
+void scoreBisentenceListByFile( const BisentenceList& bisentenceList, const std::string& handAlignFile )
+{
+  Trail trailHand;
+  std::ifstream is( handAlignFile.c_str() );
+  readTrailOrBisentenceList( is, trailHand );
+
+  scoreBisentenceList( bisentenceList, trailHand );
+}
+
+void scoreTrailByFile( const Trail& bestTrail, const std::string& handAlignFile )
+{
+  Trail trailHand;
+  std::ifstream is( handAlignFile.c_str() );
+  readTrailOrBisentenceList( is, trailHand );
+
+  scoreTrail( bestTrail, trailHand );
+}
+
+// TEMP TEMP
+void logLexiconCoverageOfBicorpus( SentenceList& huSentenceList, SentenceList& enSentenceList,
+                                   const DictionaryItems& dictionaryItems );
+
+
+// The <p> scores should not be counted. This causes some complications.
+// Otherwise, this is just the average score of segments.
+// Currently this does not like segment lengths of more than two.
+double globalScoreOfTrail( const Trail& trail, const AlignMatrix& dynMatrix,
+                         const SentenceList& huSentenceListGarbled, const SentenceList& enSentenceListGarbled )
+{
+  TrailScoresInterval trailScoresInterval( trail, dynMatrix, huSentenceListGarbled, enSentenceListGarbled );
+
+  return trailScoresInterval(0,trail.size()-1);
+}
+
+
+void collectBisentences( const Trail& bestTrail, const AlignMatrix& dynMatrix,
+                         const SentenceList& huSentenceListPretty, const SentenceList& enSentenceListPretty,
+                         SentenceList& huBisentences, SentenceList& enBisentences,
+                         double qualityThreshold )
+{
+  huBisentences.clear();
+  enBisentences.clear();
+
+  BisentenceList bisentenceList;
+
+  TrailScores trailScores( bestTrail, dynMatrix );
+  trailToBisentenceList( bestTrail, trailScores, qualityThreshold, bisentenceList );
+
+  for (size_t i=0; i<bisentenceList.size(); ++i )
+  {
+    huBisentences.push_back( huSentenceListPretty[ bisentenceList[i].first  ] );
+    enBisentences.push_back( enSentenceListPretty[ bisentenceList[i].second ] );
+  }
+
+//  std::cerr << huBisentences.size() << " bisentences collected." << std::endl;
+
+}
+
+
+void temporaryDumpOfAlignMatrix( std::ostream& os, const AlignMatrix& alignMatrix )
+{
+  for ( int huPos=0; huPos<alignMatrix.size(); ++huPos )
+  {
+    int rowStart = alignMatrix.rowStart(huPos);
+    int rowEnd   = alignMatrix.rowEnd(huPos);
+    for ( int enPos=rowStart; enPos<rowEnd; ++enPos )
+    {
+      bool numeric = true;
+      if (numeric)
+      {
+        os << alignMatrix[huPos][enPos] << "\t" ;
+      }
+      else
+      {
+        if (alignMatrix[huPos][enPos]<0)
+        {
+          os << ". " ;
+        }
+        else if (alignMatrix[huPos][enPos]<10)
+        {
+          os << alignMatrix[huPos][enPos] << " " ;
+        }
+        else
+        {
+          os << "X " ;
+        }
+      }
+    }
+    os << std::endl;
+  }
+}
+
+
+double alignerToolWithObjects( const DictionaryItems& dictionary,
+                 SentenceList& huSentenceListPretty,
+                 SentenceList& enSentenceList,
+                 const AlignParameters& alignParameters,
+                 std::ostream& os )
+{
+  int huBookSize = huSentenceListPretty.size();
+  int enBookSize = enSentenceList.size();
+
+  SentenceValues huLength,enLength;
+  setSentenceValues( huSentenceListPretty, huLength, alignParameters.utfCharCountingMode ); // Here we use the most originalest Hungarian text.
+  setSentenceValues( enSentenceList,       enLength, alignParameters.utfCharCountingMode );
+
+  bool quasiglobal_stopwordRemoval = false;
+//  std::cerr << "quasiglobal_stopwordRemoval is set to " << quasiglobal_stopwordRemoval << std::endl;
+  if (quasiglobal_stopwordRemoval)
+  {
+    removeStopwords( huSentenceListPretty, enSentenceList );
+//    std::cerr << "Stopwords removed." << std::endl;
+  }
+
+  SentenceList huSentenceListGarbled, enSentenceListGarbled;
+
+  normalizeTextsForIdentity( dictionary,
+                             huSentenceListPretty,  enSentenceList,
+                             huSentenceListGarbled, enSentenceListGarbled );
+
+  const int minimalThickness = 500;
+
+  const double quasiglobal_maximalSizeInMegabytes = 4000;
+
+  const int maximalThickness = (int) (
+    quasiglobal_maximalSizeInMegabytes
+    * 1024*1024 /*bytes*/
+    / ( 2*sizeof(double)+sizeof(char) ) /* for the similarity, dynprog and trelli matrices */
+    / (double)( huBookSize ) /* the memory consumption of alignMatrix( huBookSize, enBookSize, thickness ) is huBookSize*thickness. */
+    / 2.4 /* unexplained empirically observed factor. linux top is weird. :) */
+    ) ;
+
+  // Note that thickness is not a radius, it's a diameter.
+  const double thicknessRatio = 10.0;
+
+  int thickness = (int) ( (double)( huBookSize>enBookSize ? huBookSize : enBookSize ) / thicknessRatio ) ;
+
+  thickness = ( thickness>minimalThickness ? thickness : minimalThickness ) ;
+
+  if (thickness>maximalThickness)
+  {
+//    std::cerr << "WARNING: Downgrading planned thickness " << thickness << " to " << maximalThickness ;
+//    std::cerr << " to obey memory constraint of " << quasiglobal_maximalSizeInMegabytes << " megabytes " << std::endl;
+//    std::cerr << "You should recompile if you have much more physical RAM than that. People of the near-future, forgive me for the inconvenience." << std::endl;
+
+    thickness = maximalThickness;
+  }
+
+  AlignMatrix similarityMatrix( huBookSize, enBookSize, thickness, outsideOfRadiusValue );
+
+  sentenceListsToAlignMatrixIdentity( huSentenceListGarbled, enSentenceListGarbled, similarityMatrix );
+//  std::cerr << std::endl;
+//  std::cerr << "Rough translation-based similarity matrix ready." << std::endl;
+
+  Trail bestTrail;
+  AlignMatrix dynMatrix( huBookSize+1, enBookSize+1, thickness, 1e30 );
+
+  align( similarityMatrix, huLength, enLength, bestTrail, dynMatrix );
+//  std::cerr << "Align ready." << std::endl;
+
+  double globalQuality;
+  globalQuality = globalScoreOfTrail( bestTrail, dynMatrix,
+                                      huSentenceListGarbled, enSentenceListGarbled );
+
+  //  std::cerr << "Global quality of unfiltered align " << globalQuality << std::endl;
+
+  if (alignParameters.realignType==AlignParameters::NoRealign)
+  {
+  }
+  else
+  {
+    AlignMatrix similarityMatrixDetailed( huBookSize, enBookSize, thickness, outsideOfRadiusValue );
+
+    bool success = borderDetailedAlignMatrix( similarityMatrixDetailed, bestTrail, 5/*radius*/ );
+
+    if (!success)
+    {
+//      std::cerr << "Realign zone too close to quasidiagonal border. Abandoning realign. The align itself is suspicious." << std::endl;
+    }
+    else
+    {
+//      std::cerr << "Border of realign zone determined." << std::endl;
+
+      switch (alignParameters.realignType)
+      {
+      case AlignParameters::ModelOneRealign:
+        {
+          IBMModelOne modelOne;
+
+          SentenceList huBisentences,enBisentences;
+
+          throw "unimplemented";
+//          std::cerr << "Plausible bisentences filtered." << std::endl;
+
+          modelOne.build(huBisentences,enBisentences);
+//          std::cerr << "IBM Model I ready." << std::endl;
+
+          sentenceListsToAlignMatrixIBMModelOne( huSentenceListPretty, enSentenceList, modelOne, similarityMatrixDetailed );
+//          std::cerr << "IBM Model I based similarity matrix ready." << std::endl;
+          break;
+        }
+      case AlignParameters::FineTranslationRealign:
+        {
+          TransLex transLex;
+          transLex.build(dictionary);
+//          std::cerr << "Hashtable for dictionary ready." << std::endl;
+
+          sentenceListsToAlignMatrixTranslation( huSentenceListPretty, enSentenceList, transLex, similarityMatrixDetailed );
+
+//          std::cerr << "Fine translation-based similarity matrix ready." << std::endl;
+          break;
+        }
+
+      case AlignParameters::NoRealign:
+      default:
+	{
+	  break;
+	}
+      }
+
+      Trail bestTrailDetailed;
+      AlignMatrix dynMatrixDetailed( huBookSize+1, enBookSize+1, thickness, 1e30 );
+      align( similarityMatrixDetailed, huLength, enLength, bestTrailDetailed, dynMatrixDetailed );
+//      std::cerr << "Detail realign ready." << std::endl;
+
+      bestTrail = bestTrailDetailed;
+      dynMatrix = dynMatrixDetailed;
+
+      globalQuality = globalScoreOfTrail( bestTrail, dynMatrix,
+                                          huSentenceListGarbled, enSentenceListGarbled );
+
+      //      std::cerr << "Global quality of unfiltered align after realign " << globalQuality << std::endl;
+    }
+  }
+
+  TrailScoresInterval trailScoresInterval( bestTrail, dynMatrix, huSentenceListGarbled, enSentenceListGarbled );
+
+  if ( alignParameters.postprocessTrailQualityThreshold != -1 )
+  {
+    postprocessTrail( bestTrail, trailScoresInterval, alignParameters.postprocessTrailQualityThreshold );
+//    std::cerr << "Trail start and end postprocessed by score." << std::endl;
+  }
+
+  if ( alignParameters.postprocessTrailStartAndEndQualityThreshold != -1 )
+  {
+    postprocessTrailStartAndEnd( bestTrail, trailScoresInterval, alignParameters.postprocessTrailStartAndEndQualityThreshold );
+//    std::cerr << "Trail start and end postprocessed by score." << std::endl;
+  }
+
+  if ( alignParameters.postprocessTrailByTopologyQualityThreshold != -1 )
+  {
+    postprocessTrailByTopology( bestTrail, alignParameters.postprocessTrailByTopologyQualityThreshold );
+//    std::cerr << "Trail postprocessed by topology." << std::endl;
+  }
+
+  bool quasiglobal_spaceOutBySentenceLength = true;
+//  std::cerr << "quasiglobal_spaceOutBySentenceLength is set to " << quasiglobal_spaceOutBySentenceLength << std::endl;
+  if (quasiglobal_spaceOutBySentenceLength)
+  {
+    spaceOutBySentenceLength( bestTrail, huSentenceListPretty, enSentenceList, alignParameters.utfCharCountingMode );
+//    std::cerr << "Trail spaced out by sentence length." << std::endl;
+  }
+
+  // In cautious mode, auto-aligned rundles are thrown away if
+  // their left or right neighbour holes are not one-to-one.
+  if (alignParameters.cautiousMode)
+  {
+    cautiouslyFilterTrail( bestTrail );
+//    std::cerr << "Trail filtered by topology." << std::endl;
+  }
+
+  globalQuality = globalScoreOfTrail( bestTrail, dynMatrix,
+                                    huSentenceListGarbled, enSentenceListGarbled );
+
+  //  std::cerr << "Global quality of unfiltered align after realign " << globalQuality << std::endl;
+
+  bool textual = ! alignParameters.justSentenceIds ;
+
+  if (alignParameters.justBisentences)
+  {
+    BisentenceList bisentenceList;
+    trailToBisentenceList( bestTrail, bisentenceList );
+
+    filterBisentenceListByQuality( bisentenceList, dynMatrix, alignParameters.qualityThreshold );
+
+    BisentenceListScores bisentenceListScores(bisentenceList, dynMatrix);
+
+    for ( size_t i=0; i<bisentenceList.size(); ++i )
+    {
+      int huPos = bisentenceList[i].first;
+      int enPos = bisentenceList[i].second;
+
+      if (textual)
+      {
+        os << huSentenceListPretty[huPos].words;
+      }
+      else
+      {
+        os << huPos ;
+      }
+
+      os << "\t" ;
+
+      if (textual)
+      {
+        os << enSentenceList[enPos].words;
+      }
+      else
+      {
+        os << enPos ;
+      }
+
+      os << "\t" << bisentenceListScores(i);
+
+      os << std::endl;
+    }
+
+    if (! alignParameters.handAlignFilename.empty())
+    {
+      scoreBisentenceListByFile( bisentenceList, alignParameters.handAlignFilename );
+    }
+  }
+  else
+  {
+    filterTrailByQuality( bestTrail, trailScoresInterval, alignParameters.qualityThreshold );
+
+    for ( size_t i=0; i<bestTrail.size()-1; ++i )
+    {
+      // The [huPos, nexthuPos) interval corresponds to the [enPos, nextenPos) interval.
+      int huPos = bestTrail[i].first;
+      int enPos = bestTrail[i].second;
+      int nexthuPos = bestTrail[i+1].first;
+      int nextenPos = bestTrail[i+1].second;
+
+      if (textual)
+      {
+        int j;
+        for ( j=huPos; j<nexthuPos; ++j )
+        {
+            os << huSentenceListPretty[j].words;
+
+            if (j+1<nexthuPos)
+              os << " "; // os << " ~~~ ";
+        }
+
+        os << "\t" ;
+
+        for ( j=enPos; j<nextenPos; ++j )
+        {
+          os << enSentenceList[j].words;
+          if (j+1<nextenPos)
+          {
+            os << " "; // os << " ~~~ ";
+          }
+        }
+      }
+      else // (!textual)
+      {
+        os << huPos << "\t" << enPos ;
+      }
+
+      os << "\t" << trailScoresInterval(i);
+
+      os << std::endl;
+    }
+
+    if (! alignParameters.handAlignFilename.empty())
+    {
+      scoreTrailByFile( bestTrail, alignParameters.handAlignFilename );
+    }
+  }
+
+  return globalQuality;
+}
+
+
+void alignerToolWithFilenames( const DictionaryItems& dictionary,
+                 const std::string& huFilename, const std::string& enFilename,
+                 const AlignParameters& alignParameters,
+                 const std::string& outputFilename)
+{
+  std::ifstream hus(huFilename.c_str());
+  SentenceList huSentenceListPretty;
+  huSentenceListPretty.readNoIds( hus );
+//  std::cerr << huSentenceListPretty.size() << " hungarian sentences read." << std::endl;
+
+  std::ifstream ens(enFilename.c_str());
+  SentenceList enSentenceList;
+  enSentenceList.readNoIds( ens );
+//  std::cerr << enSentenceList.size() << " english sentences read." << std::endl;
+
+  if ( (enSentenceList.      size() < huSentenceListPretty.size()/5) ||
+       (huSentenceListPretty.size() < enSentenceList.      size()/5) )
+  {
+//    std::cerr << "Sizes differing too much. Ignoring files to avoid a rare loop bug." << std::endl;
+    return;
+  }
+
+  if (outputFilename.empty())
+  {
+    /* double globalQuality = */alignerToolWithObjects
+     ( dictionary, huSentenceListPretty, enSentenceList, alignParameters, std::cout );
+
+//    std::cerr << "Quality " << globalQuality << std::endl ;
+      
+  }
+  else
+  {
+    std::ofstream os(outputFilename.c_str());
+    /*double globalQuality = */ alignerToolWithObjects
+     ( dictionary, huSentenceListPretty, enSentenceList, alignParameters, os );
+
+    // If you want to collect global quality information in batch mode, grep "^Quality" of stderr must do.
+//    std::cerr << "Quality\t" << outputFilename << "\t" << globalQuality << std::endl ;
+  }
+
+}
+
+void fillPercentParameter( Arguments& args, const std::string& argName, double& value )
+{
+  int valueInt;
+  if ( args.getNumericParam(argName, valueInt))
+  {
+    value = 1.0 * valueInt / 100 ;
+  }
+}
+
+void main_alignerToolUsage()
+{
+  std::cerr << "Usage (either):\n\
+    alignerTool [ common_arguments ] [ -hand=hand_align_file ] dictionary_file source_text target_text\n\
+\n\
+or:\n\
+    alignerTool [ common_arguments ] -batch dictionary_file batch_file\n\
+\n\
+where\n\
+common_arguments ::= [ -text ] [ -bisent ] [ -utf ] [ -cautious ] [ -realign [ -autodict=filename ] ]\n\
+    [ -thresh=n ] [ -ppthresh=n ] [ -headerthresh=n ] [ -topothresh=n ]\n\
+\n\
+Arguments:\n\
+\n\
+-text\n\
+	The output should be in text format, rather than the default (numeric) ladder format.\n\
+\n\
+-bisent\n\
+	Only bisentences (one-to-one alignment segments) are printed. In non-text mode, their\n\
+	starting rung is printed.\n\
+\n\
+-cautious\n\
+	In -bisent mode, only bisentences for which both the preceding and the following\n\
+	segments are one-to-one are printed. In the default non-bisent mode, only rungs\n\
+	for which both the preceding and the following segments are one-to-one are printed.\n\
+\n\
+-hand=file\n\
+	When this argument is given, the precision and recall of the alignment is calculated\n\
+	based on the manually built ladder file. Information like the following is written\n\
+	on the standard error: \n\
+	53 misaligned out of 6446 correct items, 6035 bets.\n\
+	Precision: 0.991218, Recall: 0.928017\n\
+	\n\
+        Note that by default, 'item' means rung. The switch -bisent also changes the semantics\n\
+	of the scoring from rung-based to bisentence-based and in this case 'item' means bisentences.\n\
+	See File formats about the format of this input align file.\n\
+\n\
+-autodict=filename\n\
+	The dictionary built during realign is saved to this file. By default, it is not saved.\n\
+\n\
+-utf\n\
+	The system uses the character counts of the sentences as information for the\n\
+	pairing of sentences. By default, it assumes one-byte character encoding such\n\
+	as ISO Latin-1 when calculating these counts. If our text is in UTF-8 format,\n\
+	byte counts and character counts are different, and we must use the -utf switch\n\
+	to force the system to properly calculate character counts.\n\
+	Note: UTF-16 input is not supported.\n\
+\n\
+Postfiltering options:\n\
+There are various postprocessors which remove implausible rungs based on various heuristics.\n\
+\n\
+-thresh=n\n\
+	Don't print out segments with score lower than n/100.\n\
+\n\
+-ppthresh=n\n\
+	Filter rungs with less than n/100 average score in their vicinity.\n\
+\n\
+-headerthresh=n\n\
+	Filter all rungs at the start and end of texts until finding a reliably\n\
+	plausible region.\n\
+\n\
+-topothresh=n\n\
+	Filter rungs with less than n percent of one-to-one segments in their vicinity.\n\
+\n\
+";
+}
+
+int main_alignerTool(int argC, char* argV[])
+{
+#ifndef _DEBUG
+  try
+#endif
+  {
+    if (argC<4)
+    {
+      main_alignerToolUsage();
+      throw "";
+    }
+
+    Arguments args;
+    std::vector<const char*> remains;
+    args.read( argC, argV, remains );
+
+    AlignParameters alignParameters;
+    
+    if (args.getSwitchCompact("text"))
+    {
+      alignParameters.justSentenceIds = false;
+    }
+
+    if (args.getSwitchCompact("bisent"))
+    {
+      alignParameters.justBisentences = true;
+    }
+
+    if (args.getSwitchCompact("cautious"))
+    {
+      alignParameters.cautiousMode = true;
+    }
+
+    alignParameters.utfCharCountingMode = args.getSwitchCompact("utf");
+
+    fillPercentParameter( args, "thresh", alignParameters.qualityThreshold );
+
+    fillPercentParameter( args, "ppthresh", alignParameters.postprocessTrailQualityThreshold );
+
+    fillPercentParameter( args, "headerthresh", alignParameters.postprocessTrailStartAndEndQualityThreshold );
+
+    fillPercentParameter( args, "topothresh", alignParameters.postprocessTrailByTopologyQualityThreshold );
+
+    bool batchMode = args.getSwitchCompact("batch") ;
+
+    if (batchMode && (remains.size()!=2) )
+    {
+      std::cerr << "Batch mode requires exactly two file arguments." << std::endl;
+      std::cerr << std::endl;
+
+      main_alignerToolUsage();
+      throw "argument error";
+    }
+
+    std::string handArgumentname = "hand";
+    if (args.find(handArgumentname)!=args.end())
+    {
+      if (batchMode)
+      {
+        std::cerr << "-batch and -" << handArgumentname << " are incompatible switches." << std::endl;
+        throw "argument error";
+      }
+      else
+      {
+        alignParameters.handAlignFilename = args[handArgumentname].dString ;
+        args.erase(handArgumentname);
+
+        if (alignParameters.handAlignFilename.empty())
+        {
+          std::cerr << "-" << handArgumentname << " switch requires a filename value." << std::endl;
+          throw "argument error";
+        }
+      }
+    }
+
+    std::string autoDictDumpArgumentname = "autodict";
+    if (args.find(autoDictDumpArgumentname)!=args.end())
+    {
+      if (batchMode)
+      {
+        std::cerr << "-batch and -" << autoDictDumpArgumentname << " are incompatible switches." << std::endl;
+        throw "argument error";
+      }
+      else
+      {
+        alignParameters.autoDictionaryDumpFilename = args[autoDictDumpArgumentname].dString ;
+        args.erase(autoDictDumpArgumentname);
+
+        if (alignParameters.autoDictionaryDumpFilename.empty())
+        {
+          std::cerr << "-" << autoDictDumpArgumentname << " switch requires a filename value." << std::endl;
+          throw "argument error";
+        }
+      }
+    }
+
+    if (!batchMode && (remains.size()!=3) )
+    {
+      std::cerr << "Nonbatch mode requires exactly three file arguments." << std::endl;
+      std::cerr << std::endl;
+
+      main_alignerToolUsage();
+      throw "argument error";
+    }
+
+    try
+    {
+      args.checkEmptyArgs();
+    }
+    catch (...)
+    {
+      std::cerr << std::endl;
+
+      main_alignerToolUsage();
+      throw "argument error";
+    }
+
+//    std::cerr << "Reading dictionary..." << std::endl;
+    const char* dicFilename = remains[0] ;
+    DictionaryItems dictionary;
+    std::ifstream dis(dicFilename);
+    dictionary.read(dis);
+
+    if (batchMode)
+    {
+      const char* batchFilename = remains[1] ;
+      std::ifstream bis(batchFilename);
+      
+      while (bis.good()&&!bis.eof())
+      {
+        std::string line;
+        std::getline(bis,line);
+
+        std::vector<std::string> words;
+        split( line, words, '\t' );
+
+        if (words.size()!=3)
+        {
+          std::cerr << "Batch file has incorrect format." << std::endl;
+          throw "data error";
+        }
+
+        std::string huFilename, enFilename, outFilename;
+        huFilename  = words[0];
+        enFilename  = words[1];
+        outFilename = words[2];
+
+//        std::cerr << "Processing " << outFilename << std::endl;
+        bool failed = false;
+        try
+        {
+          alignerToolWithFilenames( dictionary, huFilename, enFilename, alignParameters, outFilename );
+        }
+        catch ( const char* errorType )
+        {
+          std::cerr << errorType << std::endl;
+          failed = true;
+        }
+        catch ( std::exception& e )
+        {
+          std::cerr << "some failed assertion:" << e.what() << std::endl;
+          failed = true;
+        }
+        catch ( ... )
+        {
+          std::cerr << "some unknown failed assertion..." << std::endl;
+          failed = true;
+        }
+
+        if (failed)
+        {
+          std::cerr << "Align failed for " << outFilename << std::endl;
+        }
+      }
+    }
+    else
+    {
+      const char* huFilename  = remains[1] ;
+      const char* enFilename  = remains[2] ;
+
+      alignerToolWithFilenames( dictionary, huFilename, enFilename, alignParameters );
+    }
+  }
+#ifndef _DEBUG
+  catch ( const char* errorType )
+  {
+    std::cerr << errorType << std::endl;
+    return -1;
+  }
+  catch ( std::exception& e )
+  {
+    std::cerr << "some failed assertion:" << e.what() << std::endl;
+    return -1;
+  }
+  catch ( ... )
+  {
+    std::cerr << "some unknown failed assertion..." << std::endl;
+    return -1;
+  }
+#endif
+  return 0;
+}
+
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium-desrtf-cp1250.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-desrtf-cp1250.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-desrtf-cp1250.1	(revision 69632)
@@ -0,0 +1,47 @@
+.TH apertium-desrtf 1 2006-03-21 "" ""
+.SH NAME
+apertium-desrtf \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://apertium.sf.net\fR.
+.SH SYNOPSIS
+.B apertium-desrtf
+[ -h ] [ -i ] [ -n ]
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-desrtf 
+is an RTF format processor. Data should be passed through this 
+processor before being piped to lt-proc. The program takes input
+in the form of an RTF document and produces output suitable for 
+processing with lt-proc. RTF commands  and other format information are  enclosed in brackets so that lt-proc treats them as whitespace between words.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.B \-i
+Makes the addition of trailing sentence terminator (".") unconditional, often
+leading to duplicates.
+.B \-n
+Suppresses the addition of a trailing sentence terminator.
+.PP
+.SH EXAMPLE
+.TP
+You could write the following to show how the input document  is analysed: 
+.TP
+cat <input.rtf> | apertium-desrtf | lt-proc ca-es.automorf.bin
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-deshtml\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-desrtf-cp1251.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-desrtf-cp1251.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-desrtf-cp1251.1	(revision 69632)
@@ -0,0 +1,47 @@
+.TH apertium-desrtf 1 2006-03-21 "" ""
+.SH NAME
+apertium-desrtf \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://apertium.sf.net\fR.
+.SH SYNOPSIS
+.B apertium-desrtf
+[ -h ] [ -i ] [ -n ]
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-desrtf 
+is an RTF format processor. Data should be passed through this 
+processor before being piped to lt-proc. The program takes input
+in the form of an RTF document and produces output suitable for 
+processing with lt-proc. RTF commands  and other format information are  enclosed in brackets so that lt-proc treats them as whitespace between words.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.B \-i
+Makes the addition of trailing sentence terminator (".") unconditional, often
+leading to duplicates.
+.B \-n
+Suppresses the addition of a trailing sentence terminator.
+.PP
+.SH EXAMPLE
+.TP
+You could write the following to show how the input document  is analysed: 
+.TP
+cat <input.rtf> | apertium-desrtf | lt-proc ca-es.automorf.bin
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-deshtml\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-postlatex-raw.l
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-postlatex-raw.l	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-postlatex-raw.l	(revision 69632)
@@ -0,0 +1,306 @@
+
+
+%{
+
+
+
+#include <cstdlib>
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+#include <apertium/latex_accentsmap.h>
+
+extern "C" {
+#if !defined(__STDC__)
+# define __STDC__ 1
+#endif
+#include <regex.h>
+}
+
+#include <string>
+#include <lttoolbox/lt_locale.h>
+#include <lttoolbox/ltstr.h>
+#ifndef GENFORMAT
+#include "apertium_config.h"
+#endif
+#include <apertium/unlocked_cstdio.h>
+#ifdef _WIN32
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+using namespace std;
+
+AccentsMap accentsMap(true);
+wstring closesym = L"";
+string memconv = "";
+
+wstring convertir(string const &multibyte, int const length)
+{
+  memconv.append(multibyte.c_str(), length);
+  int tam = memconv.size();
+  wchar_t *retval = new wchar_t[tam+1];
+  size_t l = mbstowcs(retval, memconv.c_str(), tam);
+
+  if(l == ((size_t) -1))
+  {
+    delete[] retval;
+    if(memconv.size() >= 4)
+    {
+      wcerr << L"Warning: wrong encoding" << endl;
+    }
+    return L"";
+  }
+  else
+  {
+    memconv = "";
+    retval[l] = 0;
+    wstring ret = retval;
+    delete[] retval;
+    return ret;
+  }
+}
+
+
+
+
+%}
+
+
+%option nounput
+%option noyywrap
+%option stack
+
+%x mathenv
+%x readbrackets
+
+%%
+
+
+
+&quot;	{
+	fputws(L"\"",yyout);
+}
+&apos;	{
+	fputws(L"\'",yyout);
+}
+&lt;	{
+	fputws(L"<",yyout);
+}
+&gt;	{
+	fputws(L">",yyout);
+}
+&amp;	{
+	fputws(L"\\&",yyout);
+}
+\<AMP\/\>	{
+	fputws(L"&",yyout);
+}
+
+\<LEFTESCAPEDBRACE\/\>	{
+        fputws(L"\\{", yyout);
+}
+
+\<RIGHTESCAPEDBRACE\/\>	{
+        fputws(L"\\}", yyout);
+}
+
+\<ESCAPEDPERCENT\/\>	{
+        fputws(L"\\%", yyout);
+}
+
+
+
+¿	{
+	fputws(L"?`",yyout);
+}
+
+¡	{
+	fputws(L"!`",yyout);
+}
+
+
+
+\<MATH_DOLLARS\>	{
+	BEGIN(mathenv);
+	fputws(L"$$",yyout);
+}
+
+<mathenv>\<\/MATH_DOLLARS\>	{
+	fputws(L"$$",yyout);
+	BEGIN(0);
+}
+
+
+\<MATH_DOLLAR\>	{
+	BEGIN(mathenv);
+	fputws(L"$",yyout);
+}
+
+<mathenv>\<\/MATH_DOLLAR\>	{
+	fputws(L"$",yyout);
+	BEGIN(0);
+}
+
+\<MATH_PAR\>	{
+	fputws(L"\\(",yyout);
+}
+
+\<\/MATH_PAR\>	{
+	fputws(L"\\)",yyout);
+}
+
+\<MATH_BRA\>	{
+	fputws(L"\\[",yyout);
+}
+
+\<\/MATH_BRA\>	{
+	fputws(L"\\]",yyout);
+}
+
+
+\<CONTENTS\>	{
+	fputws(L"{",yyout);
+}
+
+\<\/CONTENTS\>	{
+	fputws(L"}",yyout);
+}
+
+&NBSP;	{
+	fputws(L"~",yyout);
+}
+
+
+
+\<BR\/\>	{
+	fputws(L"\\\\",yyout);
+}
+
+\<COMMENT\>[^\<]*	{
+	fputws((wstring(L"\%")+convertir(yytext+9,yyleng-9)).c_str(),yyout);
+}
+
+\<\/COMMENT\>	{
+}
+
+
+\<PARAM\>[^\<]*	{
+	fputws((wstring(L"[")+convertir(yytext+7,yyleng-7)).c_str(),yyout);
+}
+\<\/PARAM\>	{
+	fputws(L"]", yyout);
+}
+
+\<VERB\>	{
+        fputws(L"\\verb", yyout);
+}
+
+\<\/VERB\>	{
+        ;
+}
+
+
+\<[a-zA-Z0-9]+\>	{
+	fputws((wstring(L"\\begin{")+convertir(yytext+1,yyleng-2)+wstring(L"}")).c_str(),yyout);
+}
+
+\<[a-zA-Z0-9]+_STAR\>	{
+	fputws((wstring(L"\\begin{")+convertir(yytext+1,yyleng-7)+wstring(L"*}")).c_str(),yyout);
+}
+
+\<\/[a-zA-Z0-9]+\>	{
+	fputws((wstring(L"\\end{")+convertir(yytext+2,yyleng-3)+wstring(L"}")).c_str(),yyout);
+}
+
+\<\/[a-zA-Z0-9]+_STAR\>	{
+	fputws((wstring(L"\\end{")+convertir(yytext+2,yyleng-8)+wstring(L"*}")).c_str(),yyout);
+}
+
+\<[a-zA-Z0-9]+\/\>	{
+	fputws((wstring(L"\\")+convertir(yytext+1,yyleng-3)).c_str(),yyout);
+}
+
+\<[a-zA-Z0-9]+_STAR\/\>	{
+	fputws((wstring(L"\\")+convertir(yytext+1,yyleng-8)+wstring(L"*")).c_str(),yyout);
+}
+
+\#	{
+        fputws(L"\\#", yyout);
+}
+
+
+(.|\n)	{
+	fputws(convertir(yytext,yyleng).c_str(),yyout);
+}
+
+<mathenv>(.|\n)	{
+	fputws(convertir(yytext,yyleng).c_str(),yyout);
+}
+
+
+<<EOF>>	{
+	return 0;
+}
+%%
+
+
+
+void usage(string const &progname)
+{
+
+  cerr << "USAGE: " << progname << " [input_file [output_file]" << ']' << endl;
+
+  cerr << "LaTeX format postprocessor " << endl;
+  exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+  LtLocale::tryToSetLocale();
+  size_t base = 0;
+
+  if(argc >= 2 && !strcmp(argv[1],"-i"))
+  {
+    base++;
+  }
+
+ if((argc-base) > 4)
+  {
+    usage(argv[0]);
+  }
+
+  switch(argc-base)
+  {
+    case 3:
+      yyout = fopen(argv[2+base], "w");
+      if(!yyout)
+      {
+        usage(argv[0]);
+      }
+    case 2:
+      yyin = fopen(argv[1+base], "r");
+      if(!yyin)
+      {
+        usage(argv[0]);
+      }
+      break;
+    default:
+      break;
+  }
+
+#ifdef _WIN32
+  _setmode(_fileno(yyin), _O_U8TEXT);
+  _setmode(_fileno(yyout), _O_U8TEXT);
+#endif
+  // prevent warning message
+  yy_push_state(1);
+  yy_top_state();
+  yy_pop_state();
+
+  yylex();
+
+  fclose(yyin);
+  fclose(yyout);
+}
Index: branches/apertium-tagger/apertium2/apertium/apertium-postlatex.l
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-postlatex.l	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-postlatex.l	(revision 69632)
@@ -0,0 +1,364 @@
+
+
+%{
+
+
+
+#include <cstdlib>
+#include <iostream>
+#include <map>
+#include <string>
+#include <vector>
+#include <apertium/latex_accentsmap.h>
+
+extern "C" {
+#if !defined(__STDC__)
+# define __STDC__ 1
+#endif
+#include <regex.h>
+}
+
+#include <string>
+#include <lttoolbox/lt_locale.h>
+#include <lttoolbox/ltstr.h>
+#ifndef GENFORMAT
+#include "apertium_config.h"
+#endif
+#include <apertium/unlocked_cstdio.h>
+#ifdef _WIN32
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+using namespace std;
+
+AccentsMap accentsMap(true);
+wstring closesym = L"";
+string memconv = "";
+
+wstring convertir(string const &multibyte, int const length)
+{
+  memconv.append(multibyte.c_str(), length);
+  int tam = memconv.size();
+  wchar_t *retval = new wchar_t[tam+1];
+  size_t l = mbstowcs(retval, memconv.c_str(), tam);
+
+  if(l == ((size_t) -1))
+  {
+    delete[] retval;
+    if(memconv.size() >= 4)
+    {
+      wcerr << L"Warning: wrong encoding" << endl;
+    }
+    return L"";
+  }
+  else
+  {
+    memconv = "";
+    retval[l] = 0;
+    wstring ret = retval;
+    delete[] retval;
+    return ret;
+  }
+}
+
+
+
+
+%}
+
+
+%option nounput
+%option noyywrap
+%option stack
+
+%x mathenv
+%x readbrackets
+
+%%
+
+
+
+&quot;	{
+	fputws(L"\"",yyout);
+}
+&apos;	{
+	fputws(L"\'",yyout);
+}
+&lt;	{
+	fputws(L"<",yyout);
+}
+&gt;	{
+	fputws(L">",yyout);
+}
+&amp;	{
+	fputws(L"\\&",yyout);
+}
+\<AMP\/\>	{
+	fputws(L"&",yyout);
+}
+
+\<LEFTESCAPEDBRACE\/\>	{
+        fputws(L"\\{", yyout);
+}
+
+\<RIGHTESCAPEDBRACE\/\>	{
+        fputws(L"\\}", yyout);
+}
+
+\<ESCAPEDPERCENT\/\>	{
+        fputws(L"\\%", yyout);
+}
+
+¿	{
+	fputws(L"?`",yyout);
+}
+
+¡	{
+	fputws(L"!`",yyout);
+}
+
+
+
+\<MATH_DOLLARS\>	{
+	BEGIN(mathenv);
+	fputws(L"$$",yyout);
+}
+
+<mathenv>\<\/MATH_DOLLARS\>	{
+	fputws(L"$$",yyout);
+	BEGIN(0);
+}
+
+
+\<MATH_DOLLAR\>	{
+	BEGIN(mathenv);
+	fputws(L"$",yyout);
+}
+
+<mathenv>\<\/MATH_DOLLAR\>	{
+	fputws(L"$",yyout);
+	BEGIN(0);
+}
+
+\<MATH_PAR\>	{
+	fputws(L"\\(",yyout);
+}
+
+\<\/MATH_PAR\>	{
+	fputws(L"\\)",yyout);
+}
+
+\<MATH_BRA\>	{
+	fputws(L"\\[",yyout);
+}
+
+\<\/MATH_BRA\>	{
+	fputws(L"\\]",yyout);
+}
+
+
+\<CONTENTS\>	{
+	fputws(L"{",yyout);
+}
+
+\<\/CONTENTS\>	{
+	fputws(L"}",yyout);
+}
+
+&NBSP;	{
+	fputws(L"~",yyout);
+}
+
+
+
+\<BR\/\>	{
+	fputws(L"\\\\",yyout);
+}
+
+\<COMMENT\>[^\<]*	{
+	fputws((wstring(L"\%")+convertir(yytext+9,yyleng-9)).c_str(),yyout);
+}
+
+\<\/COMMENT\>	{
+}
+
+
+\<PARAM\>[^\<]*	{
+	fputws((wstring(L"[")+convertir(yytext+7,yyleng-7)).c_str(),yyout);
+}
+\<\/PARAM\>	{
+	fputws(L"]", yyout);
+}
+
+\<VERB\>	{
+        fputws(L"\\verb", yyout);
+}
+
+\<\/VERB\>	{
+        ;
+}
+
+
+
+ł	{
+	fputws(L"\\l", yyout);
+}
+
+
+œ	{
+	fputws(L"{\\oe}",yyout);
+}
+
+Œ	{
+	fputws(L"{\\OE}",yyout);
+}
+
+æ	{
+	fputws(L"{\\ae}",yyout);
+}
+
+Æ	{
+	fputws(L"{\\AE}",yyout);
+}
+
+å	{
+	fputws(L"{\\aa}",yyout);
+}
+
+Å	{
+	fputws(L"{\\AA}",yyout);
+}
+
+ø	{
+	fputws(L"{\\o}",yyout);
+}
+
+Ø	{
+	fputws(L"{\\O}",yyout);
+}
+
+ß	{
+	fputws(L"{\\ss}",yyout);
+}
+
+\<[a-zA-Z0-9]+\>	{
+	fputws((wstring(L"\\begin{")+convertir(yytext+1,yyleng-2)+wstring(L"}")).c_str(),yyout);
+}
+
+\<HASH_[0-9]+\/\>	{
+	fputws((wstring(L"\\#")+convertir(yytext+6,yyleng-8)).c_str(),yyout);
+}
+
+\<HASH\/\>		{
+        fputws(L"\\#", yyout);
+}
+
+\<[a-zA-Z0-9]+_STAR\>	{
+	fputws((wstring(L"\\begin{")+convertir(yytext+1,yyleng-7)+wstring(L"*}")).c_str(),yyout);
+}
+
+\<\/[a-zA-Z0-9]+\>	{
+	fputws((wstring(L"\\end{")+convertir(yytext+2,yyleng-3)+wstring(L"}")).c_str(),yyout);
+}
+
+\<\/[a-zA-Z0-9]+_STAR\>	{
+	fputws((wstring(L"\\end{")+convertir(yytext+2,yyleng-8)+wstring(L"*}")).c_str(),yyout);
+}
+
+\<[a-zA-Z0-9]+\/\>	{
+	fputws((wstring(L"\\")+convertir(yytext+1,yyleng-3)).c_str(),yyout);
+}
+
+\<[a-zA-Z0-9]+_STAR\/\>	{
+	fputws((wstring(L"\\")+convertir(yytext+1,yyleng-8)+wstring(L"*")).c_str(),yyout);
+}
+ /*NO ENTIENDO ESTA REGLA
+ \#	{
+        fputws(L"\\#", yyout);
+ }*/
+
+
+[^A-Za-z\n]	{
+	wstring wt = convertir(yytext,yyleng);
+	wstring wa = accentsMap.get(wt);
+	if( wa == L"" )
+		fputws(wt.c_str(),yyout);
+ 	else
+		fputws(wstring(L"\\"+wa.substr(0,1)+L"{"+wa.substr(1)+L"}").c_str(),yyout);
+}
+
+
+(.|\n)	{
+	fputws(convertir(yytext,yyleng).c_str(),yyout);
+}
+
+<mathenv>(.|\n)	{
+	fputws(convertir(yytext,yyleng).c_str(),yyout);
+}
+
+
+<<EOF>>	{
+	return 0;
+}
+%%
+
+
+
+void usage(string const &progname)
+{
+
+  cerr << "USAGE: " << progname << " [input_file [output_file]" << ']' << endl;
+
+  cerr << "LaTeX format postprocessor " << endl;
+  exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+  LtLocale::tryToSetLocale();
+  size_t base = 0;
+
+  if(argc >= 2 && !strcmp(argv[1],"-i"))
+  {
+    base++;
+  }
+
+ if((argc-base) > 4)
+  {
+    usage(argv[0]);
+  }
+
+  switch(argc-base)
+  {
+    case 3:
+      yyout = fopen(argv[2+base], "w");
+      if(!yyout)
+      {
+        usage(argv[0]);
+      }
+    case 2:
+      yyin = fopen(argv[1+base], "r");
+      if(!yyin)
+      {
+        usage(argv[0]);
+      }
+      break;
+    default:
+      break;
+  }
+
+#ifdef _WIN32
+  _setmode(_fileno(yyin), _O_U8TEXT);
+  _setmode(_fileno(yyout), _O_U8TEXT);
+#endif
+  // prevent warning message
+  yy_push_state(1);
+  yy_top_state();
+  yy_pop_state();
+
+  yylex();
+
+  fclose(yyin);
+  fclose(yyout);
+}
Index: branches/apertium-tagger/apertium2/apertium/xsd/transfer.xsd
===================================================================
--- branches/apertium-tagger/apertium2/apertium/xsd/transfer.xsd	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/xsd/transfer.xsd	(revision 69632)
@@ -0,0 +1,1049 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
+
+  <!-- Elements grouped by their "supertype" -->
+  <xs:group name="condition">
+    <xs:choice>
+      <xs:element ref="and"/>
+      <xs:element ref="or"/>
+      <xs:element ref="not"/>
+      <xs:element ref="equal"/>
+      <xs:element ref="begins-with"/>
+      <xs:element ref="begins-with-list"/>
+      <xs:element ref="ends-with"/>
+      <xs:element ref="ends-with-list"/>
+      <xs:element ref="contains-substring"/>
+      <xs:element ref="in"/>
+    </xs:choice>
+  </xs:group>
+  <xs:group name="container">
+    <xs:choice>
+      <xs:element ref="var"/>
+      <xs:element ref="clip"/>
+    </xs:choice>
+  </xs:group>
+  <xs:group name="sentence">
+    <xs:choice>
+      <xs:element ref="let"/>
+      <xs:element ref="out"/>
+      <xs:element ref="choose"/>
+      <xs:element ref="modify-case"/>
+      <xs:element ref="call-macro"/>
+      <xs:element ref="append"/>
+    </xs:choice>
+  </xs:group>
+  <xs:group name="value">
+    <xs:choice>
+      <xs:element ref="b"/>
+      <xs:element ref="clip"/>
+      <xs:element ref="lit"/>
+      <xs:element ref="lit-tag"/>
+      <xs:element ref="var"/>
+      <xs:element ref="get-case-from"/>
+      <xs:element ref="case-of"/>
+      <xs:element ref="concat"/>
+      <xs:element ref="lu"/>
+      <xs:element ref="mlu"/>
+      <xs:element ref="chunk"/>
+    </xs:choice>
+  </xs:group>
+  <xs:group name="stringvalue">
+    <xs:choice>
+      <xs:element ref="clip"/>
+      <xs:element ref="lit"/>
+      <xs:element ref="var"/>
+      <xs:element ref="get-case-from"/>
+      <xs:element ref="case-of"/>
+    </xs:choice>
+  </xs:group>
+  <xs:group name="outputable">
+    <xs:choice>
+      <xs:element ref="mlu"/>
+      <xs:element ref="lu"/>
+      <xs:element ref="b"/>
+      <xs:element ref="chunk"/>
+      <xs:element ref="var"/>
+    </xs:choice>
+  </xs:group>
+
+  <xs:element name="transfer" type="transfer_t"> <!-- the transfer_t is added for XSD code generation -->
+    <xs:keyref name="category_name_reference" refer="category_name">
+      <xs:selector xpath=".//pattern-item" />
+      <xs:field xpath="@n" />
+    </xs:keyref>
+    <xs:keyref name="list_name_reference" refer="list_name">
+      <xs:selector xpath=".//list" />
+      <xs:field xpath="@n" />
+    </xs:keyref>
+    <xs:keyref name="macro_name_reference" refer="macro_name">
+      <xs:selector xpath=".//call-macro" />
+      <xs:field xpath="@n" />
+    </xs:keyref>
+    <!--
+	TODO: So far cant' think of a way to express this constraint, since in
+	addition to a reference to an section-def-attrs/def-attr/@n the attribute can also be
+	"lemma","lemh","lemq","whole".
+    -->   
+    <!--xs:keyref name="attribute_name_reference" refer="attribute_name">
+      <xs:selector xpath=".//clip" />
+      <xs:field xpath="@part" />
+    </xs:keyref-->
+    <xs:keyref name="variable_name_reference" refer="variable_name">
+      <xs:selector xpath=".//var" />
+      <xs:field xpath="@n" />
+    </xs:keyref>
+  </xs:element>
+  <xs:annotation>
+    <xs:documentation>
+      'transfer' is the root element containing the whole structural
+      transfer rule file.  Attribute 'default' specifies if
+      unmatched words have to be written as lexical units ("lu", this is
+      the default value) or as chunks ("chunk").
+    </xs:documentation>
+  </xs:annotation>    
+  <xs:complexType name="transfer_t">
+    <xs:sequence>
+      <xs:element ref="section-def-cats"/>
+      <xs:element ref="section-def-attrs" minOccurs="0" />
+      <xs:element ref="section-def-vars" minOccurs="0" />
+      <xs:element ref="section-def-lists" minOccurs="0" />
+      <xs:element ref="section-def-macros" minOccurs="0" />
+      <xs:element ref="section-rules"/>
+    </xs:sequence>
+    <xs:attribute name="default" use="optional">
+      <xs:simpleType>
+        <xs:restriction base="xs:string">
+          <xs:enumeration value="lu"/>
+          <xs:enumeration value="chunk"/>
+        </xs:restriction>
+      </xs:simpleType>
+    </xs:attribute>
+  </xs:complexType>
+  
+
+  <xs:element name="section-def-cats">
+    <xs:annotation>
+      <xs:documentation>
+	The 'def-cats' section defines the categories used to build the
+	patterns used in rules
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="def-cat" maxOccurs="unbounded"/>
+      </xs:sequence>
+    </xs:complexType>
+    <xs:unique name="category_name">
+      <xs:selector xpath="def-cat" />
+      <xs:field xpath="@n" />
+    </xs:unique>
+  </xs:element>
+
+  <xs:element name="def-cat">
+    <xs:annotation>
+      <xs:documentation>
+	Each 'def-cat' defines one category in terms of a list of
+	category items and has a unique name 'n', which is mandatory
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="cat-item" maxOccurs="unbounded"/>
+      </xs:sequence>
+      <xs:attribute name="n" type="xs:string" use="required"/>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="cat-item">
+    <xs:annotation>
+      <xs:documentation>
+	Each 'cat-item' (category item) represents a set of lexical forms
+	and has a mandatory attribute 'tags' whose value is a sequence of
+	dot-separated tag names; this sequence is a subsequence of the
+	tag sequence defining each possible lexical form. For example,
+	tags="n.f" would match all lexical forms containing this tag
+	sequence, such as "^casa&lt;n&gt;&lt;f&gt;&lt;pl&gt;$".
+	
+	In addition, an optional attribute, "lemma", may be used to
+	define lexical forms having a particular substring in their lemma	
+      </xs:documentation>      
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="lemma" type="xs:string" use="optional"/>
+      <xs:attribute name="tags" type="xs:string" use="required"/>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="section-def-attrs">
+    <xs:annotation>
+      <xs:documentation>
+	The 'def-attrs' section defines the attributes that will be
+	identified in matched lexical forms 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="def-attr" maxOccurs="unbounded"/>
+      </xs:sequence>
+    </xs:complexType>
+    <xs:unique name="attribute_name">
+      <xs:selector xpath="def-attr" />
+      <xs:field xpath="@n" />
+    </xs:unique>
+  </xs:element>
+  
+  <xs:element name="def-attr">
+    <xs:annotation>
+      <xs:documentation>
+	Each def-attr defines one attribute in terms of a list of
+	attribute items and has a mandatory unique name n 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="attr-item" maxOccurs="unbounded"/>
+      </xs:sequence>
+      <xs:attribute name="n" type="xs:string" use="required"/>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="attr-item">
+    <xs:annotation>
+      <xs:documentation>
+	Each 'attr-item' specifies a subsequence of the tags in
+	that lexical form (attribute 'tags')
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="tags" type="xs:string" use="optional"/>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="section-def-vars">
+    <xs:annotation>
+      <xs:documentation>
+	The 'def-vars' section defines the global variables
+	that will be used to transfer information between rules
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="def-var" maxOccurs="unbounded"/>
+      </xs:sequence>
+    </xs:complexType>
+    <xs:unique name="variable_name">
+      <xs:selector xpath="def-var" />
+      <xs:field xpath="@n" />    
+    </xs:unique>
+  </xs:element>
+
+  <xs:element name="def-var">
+    <xs:annotation>
+      <xs:documentation>
+	The definition of a global variable has a mandatory unique name 'n' that
+	will be used to refer to it. A value of initialization can also be specified
+	by means the 'v' attribute.  The default value of the initialization is the
+	empty string.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="n" type="xs:string" use="required"/>
+      <xs:attribute name="v" type="xs:string" use="optional"/>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="section-def-lists">
+    <xs:annotation>
+      <xs:documentation>
+	Element 'section-def-lists' encloses a set of list definitions
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence maxOccurs="unbounded">
+        <xs:element ref="def-list"/>
+      </xs:sequence>
+    </xs:complexType>
+    <xs:unique name="list_name">
+      <xs:selector xpath="def-list" />
+      <xs:field xpath="@n" />    
+    </xs:unique>
+  </xs:element>
+
+  <xs:element name="def-list">
+    <xs:annotation>
+      <xs:documentation>
+	The 'def-list' element defines a named list to search with the 'in' 
+	element.  Attribute 'n' sets the name of the list
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="list-item" maxOccurs="unbounded"/>
+      </xs:sequence>
+      <xs:attribute name="n" type="xs:string" use="required"/>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="list-item">
+    <xs:annotation>
+      <xs:documentation>
+	Attribute 'v' of 'list-item' element contains the value to be added to 
+	the list being defined     
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="v" type="xs:string" use="required"/>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="section-def-macros">
+    <xs:annotation>
+      <xs:documentation>
+	The 'def-macros' section defines macros containing portions of
+	code frequently used in the action part of rules
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence maxOccurs="unbounded">
+        <xs:element ref="def-macro"/>
+      </xs:sequence>
+    </xs:complexType>
+    <xs:unique name="macro_name">
+      <xs:selector xpath="def-macro" />
+      <xs:field xpath="@n" />    
+    </xs:unique>
+  </xs:element>
+
+  <xs:element name="def-macro">
+    <xs:annotation>
+      <xs:documentation>
+	Macro definition:
+	
+	A macro has a mandatory name (the value of 'n'), a number of parameters
+	(the value of 'npar') and a body containing arguments and statements.  
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence minOccurs="1" maxOccurs="unbounded">
+        <xs:group ref="sentence"/>
+      </xs:sequence>
+      <xs:attribute name="n" type="xs:string" use="required"/>
+      <xs:attribute name="npar" type="xs:string" use="required"/>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="section-rules">
+    <xs:annotation>
+      <xs:documentation>
+	The rules section contains a sequence of one or more rules
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="rule" maxOccurs="unbounded"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="rule">
+    <xs:annotation>
+      <xs:documentation>
+	Each rule has a pattern and an action 
+	* attribute 'comment' allows to put in comments about the purpose of
+        the rule being defined
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="pattern"/>
+        <xs:element ref="action"/>
+      </xs:sequence>
+      <xs:attribute name="comment" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="pattern">
+    <xs:annotation>
+      <xs:documentation>
+	The pattern is specified in terms of pattern items, each one
+	representing a lexical form in the matched pattern 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="pattern-item" maxOccurs="unbounded"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="pattern-item">
+    <xs:annotation>
+      <xs:documentation>
+	Each attribute to be activated is referred to by its name in the def-cats section 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="n" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="action">
+    <xs:annotation>
+      <xs:documentation>
+	Encloses the procedural part of a rule
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence minOccurs="0" maxOccurs="unbounded">
+        <xs:group ref="sentence"/>
+      </xs:sequence>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="choose">
+    <xs:annotation>
+      <xs:documentation>
+	The choose statement is a selection statement (similar to a case
+	statement) composed of one or more tested cases and an optional
+	otherwise 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="when" maxOccurs="unbounded"/>
+        <xs:element ref="otherwise" minOccurs="0" maxOccurs="1"/>
+      </xs:sequence>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="when">
+    <xs:annotation>
+      <xs:documentation>
+	Each tested case is a block of zero or more statements 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="test"/>
+        <xs:sequence minOccurs="0" maxOccurs="unbounded">
+          <xs:group ref="sentence"/>
+        </xs:sequence>
+      </xs:sequence>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="otherwise">
+    <xs:annotation>
+      <xs:documentation>
+	The otherwise case is also a block of one or more statements 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence minOccurs="1" maxOccurs="unbounded">
+        <xs:group ref="sentence"/>        
+      </xs:sequence>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="test">
+    <xs:annotation>
+      <xs:documentation>
+	The test in a tested case may be a conjunction, a disjunction, or
+	a negation of simpler tests, as well as a simple equality test
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:group ref="condition"/>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="and">
+    <xs:annotation>
+      <xs:documentation>
+	Each conjuntion test contains two or more simpler tests 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+	<xs:group ref="condition"/>
+	<xs:group ref="condition" minOccurs="1" maxOccurs="unbounded"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="or">
+    <xs:annotation>
+      <xs:documentation>
+	Each disjunction test contains two or more simpler tests 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+	<xs:group ref="condition"/>
+	<xs:group ref="condition" minOccurs="1" maxOccurs="unbounded"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="not">
+    <xs:annotation>
+      <xs:documentation>
+	The negation of a simpler test is a test itself 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:group ref="condition"/>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="equal">
+    <xs:annotation>
+      <xs:documentation>
+	The simplest test is an equality test. The right part and the
+	left part of the equality may both be a clip (see below), a
+	literal string ('lit'), a literal tag ('lit-tag') or the value of 
+	a variable ('var') defined in the def-vars section.  When the attribute
+	'caseless' is set to 'yes', the comparison is made without attending
+	to the case.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:group ref="value"/>
+        <xs:group ref="value"/>
+      </xs:sequence>
+      <xs:attribute name="caseless" use="optional">
+        <xs:simpleType>
+          <xs:restriction base="xs:string">
+            <xs:enumeration value="no"/>
+            <xs:enumeration value="yes"/>
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:attribute>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="begins-with">
+    <xs:annotation>
+      <xs:documentation>
+	Tests if the left part contains the right part at the beginning.
+	Both parts of the test may both be a clip (see below), a
+	literal string ('lit'), a literal tag ('lit-tag') or the value of 
+	a variable ('var') defined in the def-vars section.  When the attribute
+	'caseless' is set to 'yes', the comparison is made without attending
+	to the case.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:group ref="value"/>
+        <xs:group ref="value"/>
+      </xs:sequence>
+      <xs:attribute name="caseless" use="optional">
+        <xs:simpleType>
+          <xs:restriction base="xs:string">
+            <xs:enumeration value="no"/>
+            <xs:enumeration value="yes"/>
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:attribute>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="ends-with">
+    <xs:annotation>
+      <xs:documentation>
+	Tests if the left part contains the right part at the end.
+	Both parts of the test may both be a clip (see below), a
+	literal string ('lit'), a literal tag ('lit-tag') or the value of 
+	a variable ('var') defined in the def-vars section.  When the attribute
+	'caseless' is set to 'yes', the comparison is made without attending
+	to the case.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:group ref="value"/>
+        <xs:group ref="value"/>
+      </xs:sequence>
+      <xs:attribute name="caseless" use="optional">
+        <xs:simpleType>
+          <xs:restriction base="xs:string">
+            <xs:enumeration value="no"/>
+            <xs:enumeration value="yes"/>
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:attribute>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="begins-with-list">
+    <xs:annotation>
+      <xs:documentation>
+	Tests if the left part contains the right part at the beginning.
+	First parts of the test may be a clip (see below), a
+	literal string ('lit'), a literal tag ('lit-tag') or the value of 
+	a variable ('var') defined in the def-vars section. The second part
+	must be always a list.  When the attribute
+	'caseless' is set to 'yes', the comparison is made without attending
+	to the case.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:group ref="value"/>
+        <xs:element ref="list"/>
+      </xs:sequence>
+      <xs:attribute name="caseless" use="optional">
+        <xs:simpleType>
+          <xs:restriction base="xs:string">
+            <xs:enumeration value="no"/>
+            <xs:enumeration value="yes"/>
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:attribute>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="ends-with-list">
+    <xs:annotation>
+      <xs:documentation>
+	Tests if the left part contains the right part at the end.
+	First parts of the test may be a clip (see below), a
+	literal string ('lit'), a literal tag ('lit-tag') or the value of 
+	a variable ('var') defined in the def-vars section. The second part
+	must be always a list.  When the attribute
+	'caseless' is set to 'yes', the comparison is made without attending
+	to the case.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:group ref="value"/>
+        <xs:element ref="list"/>
+      </xs:sequence>      
+      <xs:attribute name="caseless" use="optional">
+        <xs:simpleType>
+          <xs:restriction base="xs:string">
+            <xs:enumeration value="no"/>
+            <xs:enumeration value="yes"/>
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:attribute>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="contains-substring">
+    <xs:annotation>
+      <xs:documentation>
+	Tests if the left part contains the right part.
+	Both parts of the test may both be a clip (see below), a
+	literal string ('lit'), a literal tag ('lit-tag') or the value of 
+	a variable ('var') defined in the def-vars section.  When the attribute
+	'caseless' is set to 'yes', the comparison is made without attending
+	to the case.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:group ref="value"/>
+        <xs:group ref="value"/>
+      </xs:sequence>        
+      <xs:attribute name="caseless" use="optional">
+        <xs:simpleType>
+          <xs:restriction base="xs:string">
+            <xs:enumeration value="no"/>
+            <xs:enumeration value="yes"/>
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:attribute>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="in">
+    <xs:annotation>
+      <xs:documentation>
+	'in' performs a search of a value in a list.  If 'caseless' is set to yes,
+	this search is performed without attending to the case
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>    
+      <xs:sequence>
+        <xs:group ref="value"/>
+        <xs:element ref="list"/>      
+      </xs:sequence>
+      <xs:attribute name="caseless" use="optional">
+        <xs:simpleType>
+          <xs:restriction base="xs:string">
+            <xs:enumeration value="no"/>
+            <xs:enumeration value="yes"/>
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:attribute>
+    </xs:complexType>
+  </xs:element>
+
+  <xs:element name="list">
+    <xs:annotation>
+      <xs:documentation>
+	'list' refers, with the name in attribute 'n', a list defined before in
+	the 'section-def-list' section
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="n" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="let">
+    <xs:annotation>
+      <xs:documentation>
+	An assignment statement ('let') assigns the value of a clip (see
+	below), a literal string ('lit'), a literal tag('lit-tag') or the 
+	value of a global variable ('var') to either a global variable ('var') 
+	or a clip
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:group ref="container"/>
+        <xs:group ref="value"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="append">
+    <xs:annotation>
+      <xs:documentation>
+	This instruction appends the value of a clip (see
+	below), a literal string ('lit'), a literal tag('lit-tag') or the 
+	value of a global variable ('var') to either a global variable ('var') 
+	or a clip, identified by the "n" attribute
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:group ref="value" minOccurs="1" maxOccurs="unbounded"/>
+      </xs:sequence>
+      <xs:attribute name="n" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="out">
+    <xs:annotation>
+      <xs:documentation>
+	'out' is an output statement; it may output any sequence of
+	clips, literal strings, literal tags, variables, and whitespace items 
+	(see below) 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence minOccurs="1" maxOccurs="unbounded">
+	<xs:group ref="outputable"/>
+      </xs:sequence>
+      <xs:attribute name="c" type="xs:string" use="optional"/>      
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="modify-case">
+    <xs:annotation>
+      <xs:documentation>
+	The first argument of 'modify-case' copy the case of the second 
+	argument.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:group ref="container"/>
+        <xs:group ref="stringvalue"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="call-macro">
+    <xs:annotation>
+      <xs:documentation>
+	A macro may be called anywhere by name with one or more
+	arguments
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence minOccurs="0" maxOccurs="unbounded">
+        <xs:element ref="with-param"/>
+      </xs:sequence>
+      <xs:attribute name="n" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="with-param">
+    <xs:annotation>
+      <xs:documentation>
+	The attribute pos in each argument is used to refer to a lexical
+	form in the current rule. For example, if a 2-parameter macro
+	has been defined to perform noun-adjective agreement operations,
+	it may be used with arguments 1 and 2 in a noun-adjective rule,
+	with arguments 2, 3 and 1 in a determiner-noun-adjective rule, with
+	arguments 1 and 3 in a noun-adverb-adjective rule, and with
+	arguments 2 and 1 in an adjective-noun rule 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="pos" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="clip">
+    <xs:annotation>
+      <xs:documentation>
+	A 'clip' is a substring of a source-language or target-language
+	lexical form, extracted according to an attribute:
+	
+	* 'pos' is an index (1, 2, 3...) used to select a lexical form
+        inside the rule;
+	
+	* 'side' is used to select a source-language ('sl') or a
+        target-language ('tl') clip
+	
+	* the value of 'part' is the name of an attribute defined in
+        def-attrs, but may take also the values 'lem' (referring to
+        the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+        (lemma queue) and 'whole' (referring to the whole lexical form).
+	
+	* the value of 'queue' may be 'no' or 'yes'.  'yes' is assumed  by 
+        default.
+	
+	* 'link-to' causes the other attributes to be ignored in clip evaluation
+        when using 'clip' as a right hand side element (as value), and 
+        returns its value.  When using as a left hand side (as reference), 
+        the value of the 'as' attribute is ignored.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="pos" type="xs:string" use="required"/>
+      <xs:attribute name="side" use="required">
+        <xs:simpleType>
+          <xs:restriction base="xs:string">
+            <xs:enumeration value="sl"/>
+            <xs:enumeration value="tl"/>
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:attribute>
+      <xs:attribute name="part" type="xs:string" use="optional"/>
+      <xs:attribute name="queue" type="xs:string" use="optional"/>
+      <xs:attribute name="link-to" type="xs:string" use="optional"/>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="lit">
+    <xs:annotation>
+      <xs:documentation>
+	A literal string value: the value of the literal is the value of
+	the 'v' attribute
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="v" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="lit-tag">
+    <xs:annotation>
+      <xs:documentation>
+	A literal string value: the value of the literal is the value of
+	the 'v' attribute
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="v" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="var">
+    <xs:annotation>
+      <xs:documentation>
+	Each 'var' is a variable identifier: the attribute n is the name
+	of the variable. When it is in an 'out', a 'test', or the right
+	part of a 'let', it represents the value of the variable; when in
+	the left part of a 'let' it represents the reference of the
+	variable. 
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="n" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="get-case-from">
+    <xs:annotation>
+      <xs:documentation>
+	TODO:
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:choice>
+        <xs:element ref="clip"/>
+        <xs:element ref="lit"/>
+        <xs:element ref="var"/>
+      </xs:choice>
+      <xs:attribute name="pos" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="case-of">
+    <xs:annotation>
+      <xs:documentation>
+	A 'case-of' is a value representing the case of a "clip".  This value 
+	will be "aa" (all lowercase), "Aa" (first uppercase) and "AA",
+	(all uppercase).
+
+	* 'pos' is an index (1, 2, 3...) used to select a lexical form
+        inside the rule;
+	
+	* 'side' is used to select a source-language ('sl') or a
+        target-language ('tl') clip
+	
+	* the value of 'part' is the name of an attribute defined in
+        def-attrs, but may take also the values 'lem' (referring to
+        the lemma of the lexical form), 'lemh' (lemma head), 'lemq'
+        (lemma queue) and 'whole' (referring to the whole lexical form).
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="pos" type="xs:string" use="required"/>
+      <xs:attribute name="side" use="required">
+        <xs:simpleType>
+          <xs:restriction base="xs:string">
+            <xs:enumeration value="sl"/>
+            <xs:enumeration value="tl"/>
+          </xs:restriction>
+        </xs:simpleType>
+      </xs:attribute>
+      <xs:attribute name="part" type="xs:string" use="required"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="concat">
+    <xs:annotation>
+      <xs:documentation>
+	Concatenates a sequence of values.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+	<xs:group ref="value" minOccurs="1" maxOccurs="unbounded"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="mlu">
+    <xs:annotation>
+      <xs:documentation>
+	Encloses a multiword.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="lu" maxOccurs="unbounded"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="lu">
+    <xs:annotation>
+      <xs:documentation>
+	Encloses a word inside an 'out' element.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+	<xs:group ref="value" minOccurs="1" maxOccurs="unbounded"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="chunk">
+    <xs:annotation>
+      <xs:documentation>
+	Encloses a chunk inside an 'out' element.      
+	* 'name' the pseudolemma of the chunk.
+	* 'namefrom' get the name from a variable.
+	* 'case' the variable to get the uppercase/lowercase policy
+        to apply it to the chunk name
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="tags"/>
+        <xs:choice maxOccurs="unbounded">
+          <xs:element ref="mlu"/>
+          <xs:element ref="lu"/>
+          <xs:element ref="b"/>
+          <xs:element ref="var"/>
+        </xs:choice>
+      </xs:sequence>
+      <xs:attribute name="name" type="xs:string" use="optional"/>
+      <xs:attribute name="namefrom" type="xs:string" use="optional"/>
+      <xs:attribute name="case" type="xs:string" use="optional"/>
+      <xs:attribute name="c" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="tags">
+    <xs:annotation>
+      <xs:documentation>
+	A sequence of tags for a lexical unit.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+        <xs:element ref="tag" maxOccurs="unbounded"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="tag">
+    <xs:annotation>
+      <xs:documentation>
+	A lexical unit tag.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:sequence>
+	<xs:group ref="value"/>
+      </xs:sequence>
+    </xs:complexType>
+  </xs:element>
+  
+  <xs:element name="b">
+    <xs:annotation>
+      <xs:documentation>
+	'b' is a [super]blanks item, indexed by pos; for example, a 'b'
+	with pos="2" refers to the [super]blanks (including format data
+	encapsulated by the de-formatter) between lexical form 2 and
+	lexical form 3. Managing [super]blanks explicitly allows for the
+	correct placement of format when the result of structural
+	transfer has more or less lexical items than the original or has
+	been reordered in some way.  If attribute "pos" is not specified, then
+	a single blank (ASCII 32) is generated.
+      </xs:documentation>
+    </xs:annotation>
+    <xs:complexType>
+      <xs:attribute name="pos" type="xs:string" use="optional"/>
+    </xs:complexType>
+  </xs:element>
+  
+</xs:schema>
Index: branches/apertium-tagger/apertium2/apertium/tmx_aligner_tool.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_aligner_tool.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_aligner_tool.h	(revision 69632)
@@ -0,0 +1,41 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef _ALIGNER_TOOL_H_
+#define _ALIGNER_TOOL_C_
+
+#include <apertium/tmx_alignment.h>
+
+#include <apertium/tmx_words.h>
+#include <apertium/tmx_book_to_matrix.h>
+#include <apertium/tmx_translate.h>
+#include <apertium/tmx_trail_postprocessors.h>
+
+#include <apertium/tmx_arguments_parser.h>
+#include <apertium/tmx_strings_and_streams.h>
+#include <apertium/tmx_serialize_impl.h>
+#include <apertium/tmx_align_parameters.h>
+
+
+#include <fstream>
+#include <iostream>
+
+#include <cmath>
+
+namespace TMXAligner{
+
+void alignerToolWithFilenames(const DictionaryItems& dictionary,
+			      const std::string& huFilename, 
+			      const std::string& enFilename,
+			      const AlignParameters& alignParameters,
+			      const std::string& outputFilename = "" );
+}
+#endif

Property changes on: branches/apertium-tagger/apertium2/apertium/tmx_aligner_tool.h
___________________________________________________________________
Added: svn:mergeinfo
## -0,0 +0,0 ##
Index: branches/apertium-tagger/apertium2/apertium/tmx_alignment.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_alignment.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_alignment.h	(revision 69632)
@@ -0,0 +1,115 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef __TMXALIGNER_ALIGNMENT_ALIGNMENT_H
+#define __TMXALIGNER_ALIGNMENT_ALIGNMENT_H
+
+#include <apertium/tmx_quasi_diagonal.h>
+
+#include <string>
+#include <set>
+
+namespace TMXAligner
+{
+
+// Simply double values for each sentence. Right now we store sentence lengths in them.
+typedef std::vector<double> SentenceValues;
+
+// See quasiDiagonal.h
+typedef QuasiDiagonal<double> AlignMatrix;
+
+// Contains directions, a bit like a force field.
+typedef QuasiDiagonal<unsigned char> TrelliMatrix;
+
+// A Rundle (x,y) cuts the bitext into two sub-bitexts:
+// [0,x)+[0,y) and [x,huSize)+[y,enSize).
+typedef std::pair<int,int> Rundle;
+
+// A Trail is a strictly ordered list of Rundles.
+// It cuts the bitext into small bitexts.
+// Such a small bitext is called a hole or segmentum.
+// A hole can contion zero Hungarian sentence,
+// it can contain zero English sentences, but not both.
+// A Trail is sometimes referred to as a Ladder.
+typedef std::vector<Rundle> Trail;
+
+// A BisentenceList is formally identical to a Trail, but semantically very different.
+// It represents an ordered list of bisentences.
+// There are some functions which utilize the formal identity,
+// manipulating both structures.
+typedef std::vector< std::pair<int,int> > BisentenceList;
+
+// OBSOLETE:
+// TrailValues gives scores to the Rundles of a Trail (of the same size).
+// Conceptually TrailValues should be attached to Trails.
+// A TrailValues structure always accompanies a Trails list,
+// but their consistency must be maintained by hand, pre-OO-style. (TODO)
+// typedef std::vector<double> TrailValues;
+
+// OBSOLETE:
+// Has the exactly same relation to BisentenceList as
+// a TrailValues has to a Trail. But note that these 
+// scores mark the confidence in a bisentence. This is
+// very different from the confidence in a rundle.
+// typedef std::vector<double> BisentenceValues;
+
+double closeness( double twoSentenceLength, double oneSentenceLength );
+
+const double skipScore = -0.3;
+
+
+// The main align function,
+// Gets a confidence value for every sentence-pair,
+// and sentence lengths for each sentence (for a a Gale-Church-like scoring).
+// Returns a trail with the best total score, and the computed dynMatrix matrix:
+// dynMatrix[huPos][enPos] gives the similarity of the [0,huPos) and [0,enPos) intervals.
+void align( const AlignMatrix& w, const SentenceValues& huLength, const SentenceValues& enLength,
+            Trail& bestTrail, AlignMatrix& dynMatrix );
+
+
+bool oneToOne( const Trail& bestTrail, int pos );
+
+// Collect bisentences.
+void trailToBisentenceList( const Trail& bestTrail,
+                            BisentenceList& bisentenceList );
+
+// Score precision-recall of a BisentenceList according to a hand-aligned bicorpus.
+// For best results, zero-to-many holes of the hand-alignment should be subdivided to zero-to-ones.
+// Builds the manual bisentencelist. The compared sets consist of Bisentences.
+double scoreBisentenceList( const BisentenceList& bisentenceList, const Trail& trailHand );
+
+// The same precision-recall calculation for Trails. The compared sets consist of Rundles.
+double scoreTrail         ( const Trail&          trailAuto,      const Trail& trailHand );
+
+
+const int outsideOfRadiusValue = -1000000;
+const int insideOfRadiusValue  = 0;
+
+// Fills the complement of the radius of the trail with minus infties.
+// The return value true means success. Failure means that during the fill,
+// we intersected the outside of the quasidiagonal area.
+// In this case, the operation is not finished.
+bool borderDetailedAlignMatrix( AlignMatrix& m, const Trail& trail, int radius );
+
+// What the name implies.
+void dumpAlignMatrix( const AlignMatrix& m, bool graphical );
+
+template <class T>
+void dumpAlignMatrix( const QuasiDiagonal<T>& alignMatrix );
+
+void dumpAlignMatrix( const QuasiDiagonal<int>& alignMatrix, bool graphical );
+
+void dumpTrelliMatrix( const TrelliMatrix& trellis );
+
+
+} // namespace TMXAligner
+
+#endif // #define __TMXALIGNER_ALIGNMENT_ALIGNMENT_H
Index: branches/apertium-tagger/apertium2/apertium/tmx_arguments_parser.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_arguments_parser.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_arguments_parser.cc	(revision 69632)
@@ -0,0 +1,203 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#include <apertium/tmx_arguments_parser.h>
+#include <iostream>
+#include <stdlib.h>
+
+// Could be better.
+bool alphabetic( char c)
+{
+  return ((c>='a')&&(c<='z')) || ((c>='A')&&(c<='Z')) || (c=='_');
+}
+
+bool Arguments::read( int argc, char **argv )
+{
+  for ( int i=1; i<argc; ++i )
+  {
+    std::string p = argv[i];
+    if (p.empty() || p[0]!='-')
+    {
+      std::cerr << p << ": unable to parse argument\n";
+      throw "argument error";
+      return false;
+    }
+    p.erase(0,1);
+
+    if (p.empty())
+    {
+      std::cerr << "Empty argument\n";
+      throw "argument error";
+      return false;
+    }
+
+    size_t j;
+
+    for (j = 0 ; j<p.size(); ++j )
+    {
+      if (! alphabetic(p[j]) )
+      {
+        if (p[j]=='=')
+          p.erase(j,1);
+        break;
+      }
+    }
+
+    ArgName name = p.substr(0,j);
+    std::string val = p.substr(j, p.size()-j);
+    int num = atoi(val.c_str());
+
+    AnyData anyData(val);
+    if ( (num!=0) || (val=="0") )
+    {
+      anyData.dInt = num;
+      anyData.kind = AnyData::Int;
+    }
+    operator[](name) = anyData;
+
+  }
+
+  return true;
+}
+
+bool Arguments::read( int argc, char **argv, std::vector<const char*>& remains )
+{
+  remains.clear();
+
+  for ( int i=1; i<argc; ++i )
+  {
+    std::string p = argv[i];
+    if (p.empty() || p[0]!='-')
+    {
+      remains.push_back(argv[i]);
+      continue;
+    }
+
+    p.erase(0,1);
+
+    if (p.empty())
+    {
+      std::cerr << "Empty argument\n";
+      throw "argument error";
+      return false;
+    }
+
+    size_t j;
+    for (j = 0; j<p.size(); ++j )
+    {
+      if (! alphabetic(p[j]) )
+      {
+        if (p[j]=='=')
+          p.erase(j,1);
+        break;
+      }
+    }
+
+    ArgName name = p.substr(0,j);
+    std::string val = p.substr(j, p.size()-j);
+    int num = atoi(val.c_str());
+
+    AnyData anyData(val);
+    if ( (num!=0) || (val=="0") )
+    {
+      anyData.dInt = num;
+      anyData.kind = AnyData::Int;
+    }
+    operator[](name) = anyData;
+
+  }
+
+  return true;
+}
+
+bool Arguments::getNumericParam( const std::string& name, int& num )
+{
+  const_iterator it=find(name);
+  if (it==end())
+  {
+    // std::cerr << "Argument -" << name << " missing.\n";
+    return false;
+  }
+
+  if (it->second.kind != AnyData::Int)
+  {
+    std::cerr << "Argument -" << name << ": integer expected.\n";
+    throw "argument error";
+  }
+
+  num = it->second.dInt;
+  erase(name);
+  return true;
+}
+
+bool Arguments::getSwitchConst( const ArgName& name, bool& sw ) const
+{
+  const_iterator it=find(name);
+  if (it==end())
+  {
+    sw = false;
+    return true;
+  }
+  else if (! it->second.dString.empty())
+  {
+    std::cerr << "Argument -" << name << ": value is not allowed.\n";
+    return false;
+  }
+  else
+  {
+    sw = true;
+    return true;
+  }
+}
+
+bool Arguments::getSwitch( const ArgName& name, bool& sw )
+{
+  bool ok = getSwitchConst(name, sw);
+  if (ok)
+    erase(name);
+
+  return ok;
+}
+
+bool Arguments::getSwitchCompact( const ArgName& name )
+{
+  bool sw(false);
+  bool ok = getSwitchConst(name, sw);
+  if (ok)
+  {
+    erase(name);
+    return sw;
+  }
+  else
+  {
+    std::cerr << "No value is allowed for argument -" << name << ".\n";
+    throw "argument error";
+  }
+}
+
+void Arguments::checkEmptyArgs() const
+{
+  if (!empty())
+  {
+    std::cerr << "Invalid argument: ";
+
+    for ( Arguments::const_iterator it=begin(); it!=end(); ++it )
+    {
+      std::cerr << "-" << it->first;
+      if (!it->second.dString.empty())
+        std::cerr << "=" << it->second.dString;
+      std::cerr << " ";
+    }
+    std::cerr << std::endl;
+
+    throw "argument error";
+  }
+}
Index: branches/apertium-tagger/apertium2/apertium/tmx_arguments_parser.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_arguments_parser.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_arguments_parser.h	(revision 69632)
@@ -0,0 +1,72 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef __ARGUMENTSPARSER_H
+#define __ARGUMENTSPARSER_H
+
+#include <string>
+#include <map>
+#include <vector>
+
+// Current usage and limitations:
+// Every argument starts with a '-'.
+// It is a key/value pair. The delimiter
+// is either the first '=' (erased), or the
+// first nonalphabetic character (not erased).
+
+class AnyData
+{
+public:
+  enum Kind { Int, String, Float, Set };
+
+public:
+  AnyData() : kind(String), dInt(-1) {}
+  AnyData( const int& d ) : kind(Int), dInt(d) {}
+  AnyData( const std::string& d ) : kind(String), dInt(-1), dString(d) {}
+
+public:
+  Kind kind;
+  int dInt;
+  std::string dString;
+};
+
+typedef std::string ArgName;
+typedef std::map< ArgName, AnyData > ArgumentMap;
+
+class Arguments : public ArgumentMap
+{
+public:
+  // Very important note: When read finds a numeric/set argument,
+  // it sets anyData.kind to Int. But STILL, it fills anyData.dString,
+  // just in case. So if the ArgumentMap was built by Arguments::read,
+  // the dString fields are all filled.
+  bool read( int argc, char **argv );
+
+  // remains is filled with the arguments not starting with '-'.
+  bool read( int argc, char **argv, std::vector<const char*>& remains );
+
+  // const if fails, erases arg if succeeds.
+  bool getNumericParam( const ArgName& name, int& num );
+
+  // sw is true if the switch is present. The function
+  // returns false if the argument value is not empty.
+  bool getSwitch( const ArgName& name, bool& sw );
+
+  bool getSwitchConst( const ArgName& name, bool& sw ) const;
+
+  // Returns true if the switch is present. Throws an error message if
+  // if the argument value is not empty.
+  bool getSwitchCompact( const ArgName& name );
+
+  void checkEmptyArgs() const;
+};
+
+#endif // #define __ARGUMENTSPARSER_H
Index: branches/apertium-tagger/apertium2/apertium/tmx_book_to_matrix.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_book_to_matrix.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_book_to_matrix.cc	(revision 69632)
@@ -0,0 +1,382 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#include <apertium/tmx_book_to_matrix.h>
+#include <apertium/tmx_alignment.h>
+#include <apertium/tmx_dictionary.h> // For IBMModelOne
+
+#include <iostream>
+#include <cmath>
+
+#include <fstream> // Just for similarityEvaluator, which should go anyway. TODO.
+
+namespace TMXAligner
+{
+
+
+// (!!!) We assert that sx and sy are ordered sets of Word-s!
+int intersectionSize( const WordList& sx, const WordList& sy )
+{
+  int inter=0;
+  WordList::const_iterator sxt = sx.begin();
+  WordList::const_iterator syt = sy.begin();
+  WordList::const_iterator sxe = sx.end();
+  WordList::const_iterator sye = sy.end();
+  for ( ; sxt!=sxe && syt!=sye ; )
+  {
+    if ( *sxt < *syt )
+      ++sxt;
+    else if ( *sxt > *syt )
+      ++syt;
+    else
+    {
+      ++inter;
+      ++sxt;
+      ++syt;
+    }
+  }
+  return inter;
+}
+
+bool isNumber( const std::string& s )
+{
+  int n = s.size();
+  for ( int i=0; i<n; ++i )
+  {
+    if ( (s[i]<'0') || (s[i]>'9') )
+    {
+      return false;
+    }
+  }
+  return true;
+}
+
+// (!!!) We assert that sx and sy are ordered sets of Word-s!
+int specializedIntersectionSize( const WordList& sx, const WordList& sy )
+{
+  int inter=0;
+  WordList::const_iterator sxt = sx.begin();
+  WordList::const_iterator syt = sy.begin();
+  WordList::const_iterator sxe = sx.end();
+  WordList::const_iterator sye = sy.end();
+
+  int numberOfDifferingNumbers = 0;
+  int numberOfSameNumbers = 0;
+
+  for ( ; sxt!=sxe && syt!=sye ; )
+  {
+    if ( *sxt < *syt )
+    {
+      if (isNumber(*sxt))
+      {
+        ++numberOfDifferingNumbers;
+      }
+      ++sxt;
+    }
+    else if ( *sxt > *syt )
+    {
+      if (isNumber(*syt))
+      {
+        ++numberOfDifferingNumbers;
+      }
+      ++syt;
+    }
+    else
+    {
+      if (isNumber(*syt))
+      {
+        ++numberOfSameNumbers;
+      }
+      ++inter;
+      ++sxt;
+      ++syt;
+    }
+  }
+
+  if ( (numberOfSameNumbers>0) && ( numberOfDifferingNumbers <= numberOfSameNumbers/5 ) )
+  {
+    inter += 10;
+  }
+
+  return inter;
+}
+
+const std::string paragraphString = "<p>";
+
+bool isParagraph( const Phrase& phrase )
+{
+  return ( (phrase.size()==1) && (phrase[0]==paragraphString) );
+}
+
+bool exceptionalScoring( const Phrase& hu, const Phrase& en, double& score )
+{
+  bool huIsParagraph = isParagraph(hu);
+  bool enIsParagraph = isParagraph(en);
+
+  // We like it if both are paragraph delimiters
+  if ( huIsParagraph && enIsParagraph )
+  {
+    score = scoreOfParagraphMatch;
+    return true;
+  }
+
+  if ( huIsParagraph || enIsParagraph )
+  {
+    score = scoreOfParagraphMisMatch;
+    return true;
+  }
+
+  return false;
+}
+
+
+const double maximumScore = 3.0;
+
+double scoreByIdentity( const Phrase& hu, const Phrase& en )
+{
+  double score = 0;
+  if ( ! exceptionalScoring( hu, en, score ) )
+  {
+    score = specializedIntersectionSize( hu, en );
+
+    // If we divide with max here, we are better at avoiding global mistakes.
+    // If we divide with min here, we are better at avoiding local mistakes.
+    // I think. This is just a theory. :)
+    // What is fact? If we divide with min, we give higher scores to valid 2-to-1 segments.
+    // But we make silly mistakes because we give higher scores to some invalid 1-to-1 segments like this:
+    // Kocogtam. -Like I said, I was out jogging-- -ObviousIy, you weren't jogging.
+    // Remember the day that they threw you out?
+    // 
+    // Hopefully Gale-Church scoring compensates for this. Sometimes does not compensate enough.
+    score /= ( (hu.size()<en.size() ? hu.size() : en.size() ) + 1 ) ;
+    score *= maximumScore ;
+  }
+
+  return score;
+}
+
+void sentenceListsToAlignMatrixIdentity( const SentenceList& huSentenceList, const SentenceList& enSentenceList, AlignMatrix& alignMatrix )
+{
+  int huPos,enPos;
+
+  int huBookSize = huSentenceList.size();
+
+  for ( huPos=0; huPos<huBookSize; ++huPos )
+  {
+    int rowStart = alignMatrix.rowStart(huPos);
+    int rowEnd   = alignMatrix.rowEnd(huPos);
+    for ( enPos=rowStart; enPos<rowEnd; ++enPos )
+    {
+      const Phrase& hu = huSentenceList[huPos].words;
+      const Phrase& en = enSentenceList[enPos].words;
+
+      alignMatrix.setCell( huPos, enPos, scoreByIdentity(hu,en) );
+    }
+
+    bool rarelyLogging = true;
+
+    if (!rarelyLogging || (huPos%100==0))
+    {
+     // std::cerr << huPos << " ";
+    }
+  }
+}
+
+double scoreByTranslation( const Phrase& hu, const Phrase& en, const TransLex& transLex )
+{
+  double score = 0;
+  if ( ! exceptionalScoring( hu, en, score ) )
+  {
+    for ( size_t huPos=0; huPos<hu.size(); ++huPos )
+    {
+      const Word& huWord = hu[huPos];
+      // TODO Ezt lookupLeftWord es intersection_size kombinaciojaval kell:
+      for ( size_t enPos=0; enPos<en.size(); ++enPos )
+      {
+        const Word& enWord = en[enPos];
+        if ( (huWord==enWord) && (huWord!="is") && (huWord!="a") )
+        {
+          ++score;
+        }
+        else if (transLex.isPresent(huWord,enWord))
+        {
+          ++score;
+        }
+      }
+    }
+  }
+
+  return score;
+}
+
+// This is much-much slower, but instead of identity, uses a many-to-many dictionary.
+// For performance reasons, by convention does not calculate the similarity if the 
+// alignMatrix element contains outsideOfRadiusValue, a big negative number.
+void sentenceListsToAlignMatrixTranslation(
+                                           const SentenceList& huSentenceList, const SentenceList& enSentenceList,
+                                           const TransLex& transLex,
+                                           AlignMatrix& alignMatrix )
+{
+
+  int huPos,enPos;
+
+  int huBookSize = huSentenceList.size();
+
+  int numberOfEvaluatedItems(0);
+
+  for ( huPos=0; huPos<huBookSize; ++huPos )
+  {
+    int rowStart = alignMatrix.rowStart(huPos);
+    int rowEnd   = alignMatrix.rowEnd(huPos);
+    for ( enPos=rowStart; enPos<rowEnd; ++enPos )
+    {
+      if (alignMatrix[huPos][enPos]==outsideOfRadiusValue)
+      {
+        continue;
+      }
+
+      ++numberOfEvaluatedItems;
+
+      const Phrase& hu = huSentenceList[huPos].words;
+      const Phrase& en = enSentenceList[enPos].words;
+
+      alignMatrix.setCell( huPos, enPos, scoreByTranslation( hu, en, transLex ) );
+    }
+
+    bool rarelyLogging = true;
+
+    if (!rarelyLogging || (huPos%100==0))
+    {
+     // std::cerr << huPos << " (" << numberOfEvaluatedItems << ") ";
+    }
+  }
+}
+
+double scoreByModelOne( const Phrase& hu, const Phrase& en, const IBMModelOne& modelOne )
+{
+  double score = 0;
+  if ( ! exceptionalScoring( hu, en, score ) )
+  {
+    score = - modelOne.distance(hu,en);
+  }
+
+  return score;
+}
+
+void sentenceListsToAlignMatrixIBMModelOne(
+                                           const SentenceList& huSentenceList, const SentenceList& enSentenceList,
+                                           const IBMModelOne& modelOne,
+                                           AlignMatrix& alignMatrix )
+{
+  int huPos,enPos;
+
+  int huBookSize = huSentenceList.size();
+
+  for ( huPos=0; huPos<huBookSize; ++huPos )
+  {
+    int rowStart = alignMatrix.rowStart(huPos);
+    int rowEnd   = alignMatrix.rowEnd(huPos);
+    for ( enPos=rowStart; enPos<rowEnd; ++enPos )
+    {
+      if (alignMatrix[huPos][enPos]==outsideOfRadiusValue)
+      {
+        continue;
+      }
+
+      const Phrase& hu = huSentenceList[huPos].words;
+      const Phrase& en = enSentenceList[enPos].words;
+
+      alignMatrix.setCell( huPos, enPos, scoreByModelOne( hu, en, modelOne ) );
+    }
+
+    bool rarelyLogging = true;
+
+    if (!rarelyLogging || (huPos%100==0))
+    {
+     // std::cerr << huPos << " ";
+    }
+  }
+}
+
+const double paragraphDelimiterFictiveLength = 0.1973;
+
+int characterLength( const Word& word, bool utfCharCountingMode )
+{
+  if (utfCharCountingMode)
+  {
+    int length = 0;
+    for ( size_t i=0; i<word.size(); ++i )
+    {
+      // A code is the start of an utf-8 byte-sequence describing a character
+      // iff it is not in the [128,192) range. 
+      if (((unsigned char)word[i]<(unsigned char)128)||((unsigned char)word[i]>=(unsigned char)192))
+      {
+        ++length;
+      }
+    }
+    return length;
+  }
+  else
+  {
+    return word.size();
+  }
+}
+
+double characterLength( const Phrase& words, bool utfCharCountingMode )
+{
+  // A space ennyi betut er:
+  const double spaceValue = 0; // 1.5;
+
+
+  if (isParagraph(words))
+  {
+    return paragraphDelimiterFictiveLength;
+  }
+
+  double sum(0);
+  for ( size_t i=0; i<words.size(); ++i )
+  {
+    sum += characterLength( words[i], utfCharCountingMode ) + spaceValue ;
+  }
+  return sum;
+}
+
+double characterLength( int start, int end,
+                 const SentenceList& sentenceList, bool utfCharCountingMode )
+{
+  // A mondat vege ennyi betut er:
+  const double sentenceValue = 3 ;
+
+  double sum(0);
+  for ( int i=start; i<end; ++i )
+  {
+    double len = characterLength( sentenceList[i].words, utfCharCountingMode );
+
+    if ( len != paragraphDelimiterFictiveLength )
+    {
+      sum += len + sentenceValue ;
+    }
+  }
+  return sum;
+}
+
+void setSentenceValues( const SentenceList& sentences, SentenceValues& lengths, bool utfCharCountingMode )
+{
+  lengths.clear();
+
+  for ( size_t i=0; i<sentences.size(); ++i )
+  {
+    lengths.push_back( characterLength(sentences[i].words,utfCharCountingMode) );
+  }
+}
+
+
+} // namespace TMXAligner
Index: branches/apertium-tagger/apertium2/apertium/tmx_book_to_matrix.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_book_to_matrix.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_book_to_matrix.h	(revision 69632)
@@ -0,0 +1,64 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef __TMXALIGNER_ALIGNMENT_BOOKTOMATRIX_H
+#define __TMXALIGNER_ALIGNMENT_BOOKTOMATRIX_H
+
+#include <apertium/tmx_words.h>
+#include <apertium/tmx_alignment.h>
+
+namespace TMXAligner
+{
+
+const double scoreOfParagraphMatch = 0.31;
+
+const double scoreOfParagraphMisMatch = -1.0;
+
+bool isParagraph( const Phrase& phrase );
+
+// (!!!) We assert that sx and sy are ordered sets of Word-s!
+int intersectionSize( const WordList& sx, const WordList& sy );
+
+void sentenceListsToAlignMatrixIdentity( const SentenceList& huSentenceList, const SentenceList& enSentenceList, AlignMatrix& alignMatrix );
+
+class TransLex;
+
+double scoreByIdentity( const Phrase& hu, const Phrase& en );
+
+double scoreByTranslation( const Phrase& hu, const Phrase& en, const TransLex& transLex );
+
+// This is much-much slower, but instead of identity, uses a many-to-many dictionary.
+// For performance reasons, by convention does not calculate the similarity if the 
+// alignMatrix element contains outsideOfRadiusValue, a big negative number.
+void sentenceListsToAlignMatrixTranslation(
+                                           const SentenceList& huSentenceListPretty, const SentenceList& enSentenceList,
+                                           const TransLex& transLex,
+                                           AlignMatrix& alignMatrixDetailed );
+
+class IBMModelOne;
+
+void sentenceListsToAlignMatrixIBMModelOne(
+                                           const SentenceList& huSentenceList, const SentenceList& enSentenceList,
+                                           const IBMModelOne& modelOne,
+                                           AlignMatrix& alignMatrix );
+
+int characterLength( const Word& words, bool utfCharCountingMode=false );
+
+double characterLength( const Phrase& words, bool utfCharCountingMode=false );
+
+
+double characterLength( int start, int end, const SentenceList& sentenceList, bool utfCharCountingMode=false );
+
+void setSentenceValues( const SentenceList& sentences, SentenceValues& lengths, bool utfCharCountingMode );
+
+} // namespace TMXAligner
+
+#endif // #define __TMXALIGNER_ALIGNMENT_BOOKTOMATRIX_H
Index: branches/apertium-tagger/apertium2/apertium/tmx_dic_tree.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_dic_tree.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_dic_tree.h	(revision 69632)
@@ -0,0 +1,215 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef __TMXALIGNER_TEIREADER_DICTIONARIES_H
+#define __TMXALIGNER_TEIREADER_DICTIONARIES_H
+
+#include <map>
+#include <vector>
+#include <set>
+#include <iostream>
+
+namespace TMXAligner
+{
+
+// A simple tree class.
+// 
+template <class Atom, class Identifier>
+class DicTree
+{
+public:
+  // Gets value a bit below. Ugly C++.
+  static const bool WarnOnConflict;
+
+  DicTree() : id(0) {}
+  DicTree( const Identifier& id_ ) : id(id_) {}
+
+  ~DicTree();
+
+  const Identifier& getIdentifier() const { return id; }
+  void setIdentifier( const Identifier& id_) { id=id_; }
+  DicTree<Atom, Identifier>* lookup( const Atom& word ) const;
+  DicTree& add( const Atom& word, const Identifier& id );
+  bool empty() const { return children.empty(); }
+
+  void dump( std::ostream& os ) const;
+
+private:
+  typedef std::map<Atom,DicTree*> DicTreeMap;
+  DicTreeMap children;
+  Identifier id;
+};
+
+template <class Atom, class Identifier>
+const bool DicTree<Atom,Identifier>::WarnOnConflict = false;
+
+// This structure stores a very sparse set-system of words.
+// (A dictionary of complex expressions.)
+// 
+// It supports the following query:
+// It receives a set of words S. It gives back the sets 
+// of the set system that are contained in this set S.
+// 
+// For it to be effective, we must be careful during the building phase:
+// words in vector 'words' must be ordered by INCREASING frequency. Rare words first.
+
+template <class Atom, class Identifier>
+class SubsetLookup
+{
+public:
+
+  typedef std::vector<Atom> Atoms;
+
+  void add( const Atoms& words, const Identifier& id );
+
+  void lookup( const Atoms& words, std::set<Identifier>& results ) const;
+
+  void dump( std::ostream& os ) const;
+
+private:
+  DicTree<Atom,Identifier> tree;
+};
+
+// Implementation. F.ck C++ for having to put this in a header.
+
+template <class Atom, class Identifier>
+DicTree<Atom, Identifier>::~DicTree()
+{
+  for ( typename DicTreeMap::iterator it=children.begin(); it!=children.end(); ++it )
+  {
+    delete it->second;
+  }
+}
+
+// Az id-t soha nem irja at nullarol nemnullara.
+// Ha nemnullarol nemnullara irja at, akkor kiabal elotte.
+template <class Atom, class Identifier>
+DicTree<Atom, Identifier>& DicTree<Atom, Identifier>::add( const Atom& word, const Identifier& id )
+{
+  DicTree* v = lookup(word);
+  if (!v)
+  {
+    v = new DicTree<Atom, Identifier>();
+    v->id = id;
+    children[word] = v;
+  }
+  else
+  {
+    if ( ( v->id != 0 ) && ( id != 0 ) )
+    {
+      if (WarnOnConflict)
+        std::cerr << "warning: conflict in tree" << std::endl;
+    }
+    if ( id != 0 )
+    {
+      v->id = id;
+    }
+  }
+
+  return (*v);
+}
+
+template <class Atom, class Identifier>
+DicTree<Atom, Identifier>* DicTree<Atom, Identifier>::lookup( const Atom& word ) const
+{
+  typename DicTreeMap::const_iterator ft = children.find(word);
+
+  if (ft==children.end())
+  {
+    return 0;
+  }
+  else
+  {
+    return ft->second;
+  }
+}
+
+template <class Atom, class Identifier>
+void DicTree<Atom, Identifier>::dump( std::ostream& os ) const
+{
+  if (id!=0)
+  {
+    os << id << " ";
+  }
+  os << "{" << std::endl;
+  for ( typename DicTreeMap::const_iterator it=children.begin(); it!=children.end(); ++it )
+  {
+    os << it->first << " ";
+    it->second->dump(os);
+  }
+  os << "}" << std::endl;
+}
+
+template <class Atom, class Identifier>
+void SubsetLookup<Atom, Identifier>::add( const Atoms& words, const Identifier& id )
+{
+  DicTree<Atom, Identifier>* v = &tree;
+
+  for ( typename Atoms::const_iterator it=words.begin(); it!=words.end(); ++it )
+  {
+    DicTree<Atom, Identifier>& newv = v->add(*it,0);
+    v = &newv;
+  }
+  if ( v->getIdentifier() == 0 )
+  {
+    v->setIdentifier(id);
+  }
+  else
+  {
+    if (DicTree<Atom, Identifier>::WarnOnConflict)
+      std::cerr << "warning: conflict in tree" << std::endl;
+  }
+}
+
+template <class Atom, class Identifier>
+void SubsetLookup<Atom, Identifier>::lookup( const Atoms& words, std::set<Identifier>& results ) const
+{
+  typedef std::set<const DicTree<Atom, Identifier>*> Pebbles;
+  Pebbles pebbles;
+  pebbles.insert(&tree);
+
+  results.clear();
+
+  for ( typename Atoms::const_iterator it=words.begin(); it!=words.end(); ++it )
+  {
+    const Atom& word = *it;
+
+    for ( typename Pebbles::const_iterator jt=pebbles.begin(); jt!=pebbles.end(); ++jt )
+    {
+      const DicTree<Atom, Identifier>* subTree = (*jt)->lookup(word) ;
+      
+      if (!subTree)
+        continue;
+
+      const Identifier& id = subTree->getIdentifier();
+      if (id!=0)
+      {
+        results.insert(id);
+      }
+
+      if (!subTree->empty())
+      {
+        pebbles.insert(subTree);
+      }
+    }
+  }
+}
+
+template <class Atom, class Identifier>
+void SubsetLookup<Atom, Identifier>::dump( std::ostream& os ) const
+{
+  tree.dump(os);
+}
+
+} // namespace TMXAligner
+
+
+#endif // #define __TMXALIGNER_TEIREADER_DICTIONARIES_H
Index: branches/apertium-tagger/apertium2/apertium/tmx_dictionary.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_dictionary.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_dictionary.cc	(revision 69632)
@@ -0,0 +1,671 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#include <apertium/tmx_dictionary.h>
+
+#include <apertium/tmx_serialize_impl.h>
+#include <apertium/tmx_strings_and_streams.h>
+
+#include <fstream>
+#include <iostream>
+#include <set>
+#include <cassert>
+#include <sstream>
+
+#include <cmath>
+
+#define massert(e) if (!(e)) { std::cerr << #e << " failed" << std::endl; throw "assert"; }
+
+namespace TMXAligner
+{
+
+void eatwhite( std::istream& is )
+{
+  while (true)
+  {
+    char c=is.peek();
+    if ( (c!=' ') && (c!='\t') )
+    {
+      break;
+    }
+    is.ignore();
+    if (is.eof())
+      break;
+  }
+}
+
+void read( WordList& ph, std::istream& is )
+{
+  ph.clear();
+
+  while (true)
+  {
+    if (is.eof())
+    {
+      break;
+    }
+    if (is.peek()=='\r')
+    {
+      is.ignore();
+    }
+    if (is.peek()=='\n')
+    {
+      is.ignore();
+      break;
+    }
+
+    Word w;
+    is >> w;
+
+    eatwhite(is);
+
+    if (w.empty())
+      break;
+
+    ph.push_back(w);
+  }
+}
+
+void SentenceList::read( std::istream& is )
+{
+  clear();
+
+  while (!is.eof())
+  {
+    Sentence sentence;
+
+    is >> sentence.id;
+
+    if (sentence.id.empty())
+      break;
+
+    if (is.peek()!='\t')
+      break;
+    is.ignore();
+
+    TMXAligner::read( sentence.words, is );
+
+    push_back(sentence);
+  }
+}
+
+void SentenceList::readNoIds( std::istream& is )
+{
+  clear();
+
+  while ( (is.good()) && (!is.eof()) )
+  {
+    Sentence sentence;
+
+    TMXAligner::read( sentence.words, is );
+
+    push_back(sentence);
+  }
+}
+
+void SentenceList::write( std::ostream& os ) const
+{
+  for ( size_t i=0; i<size(); ++i )
+  {
+    const Sentence& sentence = operator[](i);
+    os << sentence.id << "\t" << sentence.words << "\n";
+  }
+  os.flush();
+}
+
+void SentenceList::writeNoIds( std::ostream& os ) const
+{
+  for ( size_t i=0; i<size(); ++i )
+  {
+    const Sentence& sentence = operator[](i);
+    os << sentence.words << "\n";
+  }
+  os.flush();
+}
+
+void readBicorpus( std::istream& is, SentenceList& huSentenceList, SentenceList& enSentenceList)
+{
+  huSentenceList.clear();
+  enSentenceList.clear();
+
+  while ( (is.good()) && (!is.eof()) )
+  {
+    std::string line;
+
+    std::vector<std::string> halfs;
+    std::getline(is,line,'\n');
+
+    if (line.empty())
+    {
+      break;
+    }
+
+    split( line, halfs );
+    if (halfs.size()!=2)
+    {
+      std::cerr << "Incorrect bicorpus file: " << halfs.size() << " records in line " << huSentenceList.size() << std::endl;
+      throw "data error";
+    }
+
+    {
+      std::istringstream iss(halfs[0]);
+
+      Sentence sentence;
+      read( sentence.words, iss );
+
+      huSentenceList.push_back(sentence);
+    }
+    {
+      std::istringstream iss(halfs[1]);
+
+      Sentence sentence;
+      read( sentence.words, iss );
+
+      enSentenceList.push_back(sentence);
+    }
+  }
+}
+
+void writeBicorpus( std::ostream& os, const SentenceList& huSentenceList, const SentenceList& enSentenceList)
+{
+  assert(huSentenceList.size()==enSentenceList.size());
+
+  for ( size_t i=0; i<huSentenceList.size(); ++i )
+  {
+    os << huSentenceList[i].words << "\t" << enSentenceList[i].words << "\n";
+  }
+  os.flush();
+}
+
+void HalfDictionary::read( std::istream& is )
+{
+  clear();
+
+  while (!is.eof())
+  {
+    WordList ph;
+    TMXAligner::read(ph,is);
+
+    if (ph.empty())
+      continue;
+
+    push_back(ph);
+  }
+}
+
+void DictionaryItems::read( std::istream& is )
+{
+  clear();
+
+  while (!is.eof())
+  {
+    WordList hu;
+    WordList en;
+    Word delimiter;
+
+    bool engPart = true;
+
+    while (true)
+    {
+      Word w;
+      is >> w;
+
+      if (w.empty())
+        break;
+
+      // We allow vonyo7's "@" delimiter, and vonyokornai's "@V", "@N" etc. delimiters.
+      if ( (w.size()<=2) && (w[0]=='@') )
+      {
+        engPart = false;
+        delimiter = w;
+      }
+      else if (engPart)
+      {
+        en.push_back(w);
+      }
+      else
+      {
+        hu.push_back(w);
+      }
+
+      while ( (is.peek()==' ') || (is.peek()=='\r') )
+      {
+        is.ignore();
+      }
+      
+      if (is.peek()=='\n')
+      {
+        is.ignore();
+        break;
+      }
+    }
+
+    if (en.empty())
+      break;
+
+    push_back(std::make_pair(en,hu));
+
+  }
+
+}
+
+
+void Dictionary::read( const char* dictionaryFile )
+{
+  throw "unimplemented";
+}
+
+void Dictionary::build( const DictionaryItems& dictionaryItems )
+{
+  throw "unimplemented";
+}
+
+void Dictionary::reverse( const Dictionary& dic )
+{
+  throw "unimplemented";
+}
+
+bool Dictionary::lookupWord( const Word& word, DictionaryItems& results ) const
+{
+  return false;
+}
+
+bool Dictionary::lookupWordSet( const WordList& words, DictionaryItems& results ) const
+{
+  return false;
+}
+
+void FrequencyMap::add( const Word& word )
+{
+  ++operator[](word);
+}
+
+void FrequencyMap::remove( const Word& word )
+{
+  --operator[](word);
+}
+
+void FrequencyMap::build( const WordList& wordList )
+{
+  for ( size_t j=0; j<wordList.size(); ++j )
+  {
+    add(wordList[j]);
+  }
+}
+
+void FrequencyMap::remove( const WordList& wordList )
+{
+  for ( size_t j=0; j<wordList.size(); ++j )
+  {
+    remove(wordList[j]);
+  }
+}
+
+void FrequencyMap::build( const SentenceList& sentenceList )
+{
+  for ( size_t i=0; i<sentenceList.size(); ++i )
+  {
+    for ( size_t j=0; j<sentenceList[i].words.size(); ++j )
+    {
+      add(sentenceList[i].words[j]);
+    }
+  }
+}
+
+int FrequencyMap::total() const
+{
+  const_iterator it;
+
+  int totalItemNum(0);
+  for ( it=begin(); it!=end(); ++it )
+  {
+    totalItemNum += it->second;
+  }
+  return totalItemNum;
+}
+
+void FrequencyMap::dump( std::ostream& os, int itemNum ) const
+{
+  FrequencyMap::ReFrequencyMap reFrequencyMap;
+  reverseMap(reFrequencyMap);
+
+  FrequencyMap::ReFrequencyMap::reverse_iterator rit;
+  for ( rit=reFrequencyMap.rbegin(); rit!=reFrequencyMap.rend(); ++rit )
+  {
+    os << rit->first << "\t" << rit->second << "\n";
+
+    --itemNum;
+    if (itemNum==0)
+      break;
+  }
+  os.flush();
+}
+
+void FrequencyMap::highPassFilter( WordList& allowedWords, double ratio ) const
+{
+  allowedWords.clear();
+
+  FrequencyMap::ReFrequencyMap reFrequencyMap;
+  reverseMap(reFrequencyMap);
+
+  FrequencyMap::ReFrequencyMap::reverse_iterator rit;
+
+  int totalItemNum = total();
+
+  int localItemNum(0);
+  for ( rit=reFrequencyMap.rbegin(); rit!=reFrequencyMap.rend(); ++rit )
+  {
+    localItemNum += rit->first;
+    if ( ((double)localItemNum)/totalItemNum > ratio )
+      break;
+
+    allowedWords.push_back(rit->second);
+  }
+}
+
+void FrequencyMap::lowPassFilter( WordList& allowedWords, double ratio ) const
+{
+  allowedWords.clear();
+
+  FrequencyMap::ReFrequencyMap reFrequencyMap;
+  reverseMap(reFrequencyMap);
+
+  FrequencyMap::ReFrequencyMap::iterator rit;
+
+  int totalItemNum = total();
+
+  int localItemNum(0);
+  for ( rit=reFrequencyMap.begin(); rit!=reFrequencyMap.end(); ++rit )
+  {
+    localItemNum += rit->first;
+
+    if ( ((double)localItemNum)/totalItemNum > ratio )
+      break;
+
+    allowedWords.push_back(rit->second);
+  }
+}
+
+void FrequencyMap::reverseMap( FrequencyMap::ReFrequencyMap& reFrequencyMap ) const
+{
+  reFrequencyMap.clear();
+
+  for ( const_iterator it=begin(); it!=end(); ++it )
+  {
+    reFrequencyMap.insert( FrequencyMap::ReFrequencyMap::value_type(it->second,it->first) );
+  }
+}
+
+
+void filterSentences( SentenceList& sentenceList, const WordList& words )
+{
+  std::set<Word> wordSet;
+
+  for (size_t i=0; i<words.size(); ++i )
+  {
+    wordSet.insert(words[i]);
+  }
+
+  for (size_t i=0; i<sentenceList.size(); ++i )
+  {
+    WordList& wordList = sentenceList[i].words;
+
+    for ( size_t j=0; j<wordList.size(); ++j )
+    {
+      if ( wordSet.find(wordList[j]) == wordSet.end() )
+      {
+        wordList.erase(wordList.begin()+j);
+        --j;
+      }
+    }
+  }
+}
+
+
+void cStyleStringsToStringSet( const char** wordsPtr, std::set<Word>& words )
+{
+  words.clear();
+  const char** currWordsPtr=wordsPtr;
+  while (**currWordsPtr!='\0')
+  {
+    words.insert(*currWordsPtr);
+    ++currWordsPtr;
+  }
+}
+
+void removeHungarianStopwords( SentenceList& huSentenceList )
+{
+  const char* huStopwordsC[] =
+  {
+    "a", "az",
+
+    "egy",
+
+    "�s",
+
+    "nem", "ne",
+
+    "is",
+
+    "van",
+
+    "�",
+
+    "ha",
+
+    ""
+  };
+
+  std::set<Word> stopwords;
+  cStyleStringsToStringSet( huStopwordsC, stopwords );
+
+  
+  for ( size_t i=0; i<huSentenceList.size(); ++i )
+  {
+
+
+    WordList& huWords = huSentenceList[i].words;
+    for ( size_t j=0; j<huWords.size(); )
+    {
+      if (stopwords.find(huWords[j])!=stopwords.end())
+      {
+        huWords.erase(huWords.begin()+j);
+      }
+      else
+      {
+        ++j;
+      }
+    }
+  }
+}
+
+void removeEnglishStopwords( SentenceList& enSentenceList )
+{
+  // Mar megbocsasson mindenki, hogy ezt programkodban rogzitem, de rogzitem.
+  const char* enStopwordsC[] = 
+  {
+    "the", "it",
+
+    "a", "an", "one",
+
+    "and",
+
+    "not", "no",
+
+    "too",
+
+    "is", "be", // Az 1984 be-re stemmeli az is-t.
+
+    "to",
+
+    "he", "she",
+
+    "if",
+
+    "of",
+
+    ""
+  };
+
+  std::set<Word> stopwords;
+  cStyleStringsToStringSet( enStopwordsC, stopwords );
+
+
+  for (size_t i=0; i<enSentenceList.size(); ++i )
+  {
+
+    WordList& enWords = enSentenceList[i].words;
+    for (size_t j=0; j<enWords.size(); )
+    {
+      if (stopwords.find(enWords[j])!=stopwords.end())
+      {
+        enWords.erase(enWords.begin()+j);
+      }
+      else
+      {
+        ++j;
+      }
+    }
+  }
+}
+
+void removeStopwords( SentenceList& huSentenceList, SentenceList& enSentenceList )
+{
+  removeHungarianStopwords( huSentenceList );
+  removeEnglishStopwords  ( enSentenceList );
+}
+
+void TransLex::add( const Word& huWord, const Word& enWord )
+{
+  forward .insert( WordMultimap::value_type( huWord, enWord ) );
+  backward.insert( WordMultimap::value_type( enWord, huWord ) );
+}
+
+// Note that multiword phrases are simply ignored.
+void TransLex::build( const DictionaryItems& dictionaryItems )
+{
+  int added(0), ignored(0);
+  for ( size_t i=0; i<dictionaryItems.size(); ++i )
+  {
+    if ( (dictionaryItems[i].first.size()==1) && (dictionaryItems[i].second.size()==1) )
+    {
+      add( dictionaryItems[i].first[0], dictionaryItems[i].second[0] );
+      ++added;
+    }
+    else
+    {
+      ++ignored;
+    }
+  }
+  std::cerr << added << " items added to TransLex, " << ignored << " multiword items ignored." << std::endl;
+}
+
+TransLex::DictInterval TransLex::lookupLeftWord ( const Word& huWord ) const
+{
+  return (forward.equal_range(huWord));
+}
+
+TransLex::DictInterval TransLex::lookupRightWord( const Word& enWord ) const
+{
+  return (backward.equal_range(enWord));
+}
+
+bool TransLex::isPresent( const Word& huWord, const Word& enWord ) const
+{
+  DictInterval dictInterval = lookupLeftWord(huWord);
+
+  for ( WordMultimapIt it=dictInterval.first; it!=dictInterval.second; ++it )
+  {
+    if (it->second == enWord)
+    {
+      return true;
+    }
+  }
+  return false;
+}
+
+double IBMModelOne::lookup( const Word& hu, const Word& en ) const
+{
+  TransProbs::const_iterator ft = transProbs.find( std::make_pair(hu,en) );
+
+  if (ft==transProbs.end())
+  {
+    return 0;
+  }
+  else
+  {
+    return ft->second;
+  }
+}
+
+void IBMModelOne::build( const SentenceList& huSentenceList, const SentenceList& enSentenceList )
+{
+  transProbs.clear();
+
+  massert( huSentenceList.size()==enSentenceList.size() );
+
+
+  std::map<Word,double> huProb;
+
+  for ( size_t sen=0; sen<huSentenceList.size(); ++sen )
+  {
+    const Phrase& hu = huSentenceList[sen].words;
+    const Phrase& en = enSentenceList[sen].words;
+
+    double huRatio = 1.0 / hu.size();
+
+    for ( size_t huPos=0; huPos<hu.size(); ++huPos )
+    {
+      const Word& huWord = hu[huPos];
+      huProb[huWord] += huRatio;
+
+      for ( size_t enPos=0; enPos<en.size(); ++enPos )
+      {
+        transProbs[ std::make_pair(huWord, en[enPos]) ] += huRatio ;
+      }
+    }
+  }
+
+  for ( TransProbs::iterator it=transProbs.begin(); it!=transProbs.end(); ++it )
+  {
+    it->second /= huProb[it->first.first];
+  }
+}
+
+void IBMModelOne::reestimate( const SentenceList& huSentenceList, const SentenceList& enSentenceList )
+{
+  throw "unimplemented";
+}
+
+// 
+double IBMModelOne::distance( const Phrase& hu, const Phrase& en ) const
+{
+  double val = log(1.0+hu.size()) / en.size() ;
+
+  for ( size_t enPos=0; enPos<en.size(); ++enPos )
+  {
+    double sum = 0;
+    const Word& enWord = en[enPos];
+
+    for ( size_t huPos=0; huPos<hu.size(); ++huPos )
+    {
+      sum += lookup( hu[huPos], enWord );
+    }
+
+    massert( sum>0 );
+
+    val -= log(sum);
+  }
+
+  throw "unimplemented";
+}
+
+} // namespace TMXAligner
Index: branches/apertium-tagger/apertium2/apertium/tmx_dictionary.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_dictionary.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_dictionary.h	(revision 69632)
@@ -0,0 +1,131 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef __TMXALIGNER_ALIGNMENT_DICTIONARY_H
+#define __TMXALIGNER_ALIGNMENT_DICTIONARY_H
+
+#include <apertium/tmx_words.h>
+
+#include <string>
+#include <vector>
+#include <map>
+#include <iosfwd>
+
+
+namespace TMXAligner
+{
+
+typedef std::pair<Phrase,Phrase> DictionaryItem;
+
+class DictionaryItems : public std::vector<DictionaryItem>
+{
+public:
+  void read( std::istream& is );
+};
+
+class HalfDictionary : public std::vector<WordList>
+{
+public:
+  void read( std::istream& is );
+};
+
+
+// After reading, this dictionary cannot be altered.
+// Also, this is a strictly one-directional dictionary.
+// If the other direction is needed, reverse( const Dictionary& dic ) another dictionary.
+class Dictionary
+{
+public:
+  void read( const char* dictionaryFile );
+  void reverse( const Dictionary& dic );
+  void build( const DictionaryItems& dictionaryItems );
+
+  bool lookupWord( const Word& word, DictionaryItems& results ) const;
+  bool lookupWordSet( const WordList& words, DictionaryItems& results ) const;
+
+private:
+  void buildWordLookupTable();
+
+private:
+  DictionaryItems dictionaryItems;
+
+  typedef std::map<Word,int> wordLookupTable;
+};
+
+class FrequencyMap : public std::map<Word,int>
+{
+public:
+  void add( const Word& word );
+  void remove( const Word& word );
+  void build( const WordList& wordList );
+  void remove( const WordList& wordList );
+  void build( const SentenceList& sentenceList ); // Just for convenience.
+  int  total() const;
+  void dump( std::ostream& os, int itemNum ) const;
+  void lowPassFilter( WordList& allowedWords, double ratio ) const;
+  void highPassFilter( WordList& allowedWords, double ratio ) const;
+
+private:
+  typedef std::multimap<int,Word> ReFrequencyMap;
+  void reverseMap( ReFrequencyMap& reFrequencyMap ) const;
+};
+
+
+void filterSentences( SentenceList& sentenceList, const WordList& words );
+
+void removeHungarianStopwords( SentenceList& huSentenceList );
+void removeEnglishStopwords  ( SentenceList& enSentenceList );
+void removeStopwords  ( SentenceList& huSentenceList, SentenceList& enSentenceList );
+
+
+typedef std::pair<Word,Word> WordPair;
+
+class TransLex
+{
+public:
+
+  typedef std::multimap<Word,Word> WordMultimap;
+  typedef WordMultimap::const_iterator WordMultimapIt;
+  typedef std::pair<WordMultimapIt,WordMultimapIt> DictInterval;
+
+  void add( const Word& huWord, const Word& enWord );
+  void build( const DictionaryItems& dictionaryItems );
+
+  DictInterval lookupLeftWord ( const Word& huWord ) const;
+  DictInterval lookupRightWord( const Word& enWord ) const;
+  bool isPresent( const Word& huWord, const Word& enWord ) const;
+
+private:
+  WordMultimap forward;
+  WordMultimap backward;
+};
+
+class IBMModelOne
+{
+public:
+  double lookup( const Word& hu, const Word& en ) const;
+
+  double distance( const Phrase& hu, const Phrase& en ) const;
+
+  void build( const SentenceList& huSentenceList, const SentenceList& enSentenceList );
+
+  void reestimate( const SentenceList& huSentenceList, const SentenceList& enSentenceList );
+
+public:
+  typedef std::pair<Word,Word> WordPair;
+  typedef std::map<WordPair,double> TransProbs;
+
+  TransProbs transProbs;
+};
+
+} // namespace TMXAligner
+
+#endif // #define __TMXALIGNER_ALIGNMENT_DICTIONARY_H
Index: branches/apertium-tagger/apertium2/apertium/tmx_quasi_diagonal.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_quasi_diagonal.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_quasi_diagonal.h	(revision 69632)
@@ -0,0 +1,171 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef __TMXALIGNER_ALIGNMENT_QUASIDIAGONAL_H
+#define __TMXALIGNER_ALIGNMENT_QUASIDIAGONAL_H
+
+#include <vector>
+
+namespace TMXAligner
+{
+
+template <class T>
+class QuasiDiagonal
+{
+public:
+
+  // Quite slow, because of the many bounds checks.
+  class QuasiDiagonalRow
+  {
+  public:
+
+    // QuasiDiagonalRow is similar to a vector of size size_. The difference is
+    // that only the [offset_,offset_+thickness) subinterval can be written.
+    // Reading from outside this interval yields the default T().
+    // Reading from outside the [0,size) interval yields a throw.
+    // It is NOT asserted that [offset_,offset_+thickness)
+    // should be a subset of [0,size).
+    //
+    QuasiDiagonalRow( int size_=0, int offset_=0, int thickness=0, T outsideDefault_=T() )
+      : offset(offset_), size(size_), data(thickness,T()), outsideDefault(outsideDefault_) {}
+
+    enum ZoneType
+    {
+      DiagZone    = 1,
+      MatrixZone  = 2,
+      OutsideZone = 3
+    };
+
+    ZoneType zone(int k) const
+    {
+      if ( ! ((k>=0) && (k<size)) )
+      {
+        return OutsideZone;
+      }
+      int d = k-offset;
+      if ( (d>=0) && (d<(int)data.size()) )
+      {
+        return DiagZone;
+      }
+      else
+      {
+        return MatrixZone;
+      }
+    }
+
+    const T& operator[](int k) const
+    {
+      if ( ! ((k>=0) && (k<size)) )
+      {
+        throw "out of matrix";
+      }
+      int d = k-offset;
+      if ( (d>=0) && (d<(int)data.size()) )
+      {
+        return data[k-offset];
+      }
+      else
+      {
+        return outsideDefault;
+      }
+    }
+
+    T& cell(int k)
+    {
+      if ( ! ((k>=0) && (k<size)) )
+      {
+        throw "out of matrix";
+      }
+      int d = k-offset;
+      if ( (d>=0) && (d<(int)data.size()) )
+      {
+        return data[k-offset];
+      }
+      else
+      {
+        throw "out of quasidiagonal";
+      }
+    }
+
+  private:
+    int offset;
+    int size;
+    std::vector<T> data;
+    T   outsideDefault;
+  };
+
+  QuasiDiagonal( int height_, int width_, int thickness_, T outsideDefault_=T() )
+    : height(height_), width(width_), thicknes(thickness_)
+  {
+    for ( int i=0; i<height; ++i )
+    {
+      // Too much copying, but we don't care.
+      QuasiDiagonalRow row( width, offset(i), thicknes, outsideDefault_ );
+      rows.push_back(row);
+    }
+  }
+
+  int offset( int row ) const
+  {
+    return (row*width/height-thicknes/2);
+  }
+
+  int rowStart( int row ) const
+  {
+    int s=offset(row);
+    return ( s>0 ? s : 0 );
+  }
+
+  int rowEnd( int row ) const
+  {
+    int e=offset(row)+thicknes;
+    return ( e<width ? e : width );
+  }
+
+  // The first coordinate is (somewhat atypically) the row.
+  const QuasiDiagonalRow& operator[]( int y ) const
+  {
+    return rows[y];
+  }
+  
+  T& cell( int y, int x )
+  {
+    if ((y<0)||(y>=height))
+    {
+      throw "out of matrix";
+    }
+
+    return rows[y].cell(x);
+  }
+
+  bool setCell( int y, int x, const T& t )
+  {
+    cell(y,x) = t;
+    return true;
+  }
+
+  int size() const { return height; }
+  // Yes, I know it's a stupid name. The reason is, I don't want to
+  // put width/height on the interface, because usually
+  // the first coord is the columns, but not here.
+  // This could lead to confusion.
+  int otherSize() const { return width; }
+
+  int thickness() const { return thicknes; }
+
+private:
+  std::vector<QuasiDiagonalRow> rows;
+  int height,width,thicknes;
+};
+
+} // namespace TMXAligner
+
+#endif // #define __TMXALIGNER_ALIGNMENT_QUASIDIAGONAL_H
Index: branches/apertium-tagger/apertium2/apertium/tmx_serialize_impl.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_serialize_impl.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_serialize_impl.h	(revision 69632)
@@ -0,0 +1,52 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef __TMXALIGNER_INCLUDE_SERIALIZEIMPL_H
+#define __TMXALIGNER_INCLUDE_SERIALIZEIMPL_H
+
+#include <iostream>
+#include <vector>
+#include <set>
+
+template <class T>
+std::ostream& operator<<( std::ostream& os, const std::vector<T>& v )
+{
+  for ( typename std::vector<T>::const_iterator it=v.begin(); it!=v.end(); ++it )
+  {
+    os << *it ;
+    if (it+1!=v.end())
+      os << " ";
+  }
+  return os;
+}
+
+template <class T>
+std::ostream& operator<<( std::ostream& os, const std::set<T>& v )
+{
+  typename std::set<T>::const_iterator it=v.begin();
+  while (true)
+  {
+    os << *it ;
+
+    typename std::set<T>::const_iterator itplus = it;
+    ++itplus;
+
+    if (itplus == v.end())
+      break;
+    else
+      os << " ";
+
+    it = itplus;
+  }
+  return os;
+}
+
+#endif // #define __TMXALIGNER_INCLUDE_SERIALIZEIMPL_H
Index: branches/apertium-tagger/apertium2/apertium/tmx_strings_and_streams.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_strings_and_streams.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_strings_and_streams.cc	(revision 69632)
@@ -0,0 +1,38 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#include <apertium/tmx_strings_and_streams.h>
+
+namespace TMXAligner
+{
+
+void split( const std::string line, std::vector<std::string>& words, char delim /*='\t'*/ )
+{
+  words.clear();
+
+  std::string current;
+
+  for (size_t i=0; i<line.size(); ++i )
+  {
+    if (line[i]==delim)
+    {
+      words.push_back(current);
+      current = "";
+    }
+    else
+    {
+      current += line[i];
+    }
+  }
+  words.push_back(current);
+}
+
+} // namespace TMXAligner
Index: branches/apertium-tagger/apertium2/apertium/tmx_strings_and_streams.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_strings_and_streams.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_strings_and_streams.h	(revision 69632)
@@ -0,0 +1,25 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef __TMXALIGNER_INCLUDE_STRINGSANDSTREAMS_H
+#define __TMXALIGNER_INCLUDE_STRINGSANDSTREAMS_H
+
+#include <string>
+#include <vector>
+
+namespace TMXAligner
+{
+
+void split( const std::string line, std::vector<std::string>& words, char delim='\t' );
+
+} // namespace TMXAligner
+
+#endif // #define __TMXALIGNER_INCLUDE_STRINGSANDSTREAMS_H
Index: branches/apertium-tagger/apertium2/apertium/tmx_trail_postprocessors.cc
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_trail_postprocessors.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_trail_postprocessors.cc	(revision 69632)
@@ -0,0 +1,481 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#include <apertium/tmx_trail_postprocessors.h>
+
+#include <apertium/tmx_words.h>
+#include <apertium/tmx_book_to_matrix.h>
+
+#include <iostream>
+#include <cmath>
+
+namespace TMXAligner
+{
+
+const bool global_postprocessLogging = false;
+
+TrailScores::TrailScores( const Trail& trail_, const AlignMatrix& dynMatrix_ ) : trail(trail_), dynMatrix(dynMatrix_) {}
+
+double TrailScores::operator()( int j ) const
+{
+  return
+    dynMatrix[trail[j  ].first][trail[j  ].second]
+    -
+    dynMatrix[trail[j+1].first][trail[j+1].second] ;
+}
+
+BisentenceListScores::BisentenceListScores( const BisentenceList& bisentenceList_, const AlignMatrix& dynMatrix_ ) 
+  : bisentenceList(bisentenceList_), dynMatrix(dynMatrix_) {}
+
+double BisentenceListScores::operator()( int j ) const
+{
+  return
+    dynMatrix[bisentenceList[j].first  ][bisentenceList[j].second]
+    -
+    dynMatrix[bisentenceList[j].first+1][bisentenceList[j].second+1] ;
+}
+
+
+TrailScoresInterval::TrailScoresInterval( const Trail& trail_, const AlignMatrix& dynMatrix_,
+    const SentenceList& huSentenceList_, const SentenceList& enSentenceList_ )
+    : trail(trail_), dynMatrix(dynMatrix_), huSentenceList(huSentenceList_), enSentenceList(enSentenceList_) {}
+
+// The average score of the jth segmentum. The bigger the better.
+// Division is by the maximum of the Hungarian and English intervals.
+// This is a somewhat arbritary decision, and goes very badly with the
+// scoring of the knight's moves. But we really have no better choice.
+// 
+// Also, the method applies some very ugly hacks to avoid the effect of
+// paragraph-delimiters. It strips both intervals of <p>s, and
+// modifies the dynMatrix-based score assuming that all <p>s got paired.
+// except surplus <p>s.
+double TrailScoresInterval::scoreSegmentum( const Rundle& start, const Rundle& end ) const
+{
+  int huDiff = end.first  - start.first  ;
+  int enDiff = end.second - start.second ;
+
+  double score = 
+    dynMatrix[start.first][start.second]
+    -
+    dynMatrix[end.  first][end.  second] ;
+
+  int i;
+  int huParagraphNum(0), enParagraphNum(0) ;
+  for ( i=start.first; i<end.first; ++i )
+  {
+    if (isParagraph(huSentenceList[i].words))
+    {
+      ++huParagraphNum;
+    }
+  }
+  for ( i=start.second; i<end.second; ++i )
+  {
+    if (isParagraph(enSentenceList[i].words))
+    {
+      ++enParagraphNum;
+    }
+  }
+
+  int estimatedParagraphMatches = huParagraphNum<enParagraphNum ? huParagraphNum : enParagraphNum ;
+
+  int estimatedParagraphMismatches = ( huParagraphNum>enParagraphNum ? huParagraphNum : enParagraphNum ) - estimatedParagraphMatches ;
+
+  double scoreDeviationBecauseOfThoseStupidParagraphs =
+    scoreOfParagraphMatch * estimatedParagraphMatches + skipScore * estimatedParagraphMismatches;
+
+  int huDiffParagraphAdjusted = huDiff - huParagraphNum ;
+  int enDiffParagraphAdjusted = enDiff - enParagraphNum ;
+
+  int maxDiffParagraphAdjusted = huDiffParagraphAdjusted>enDiffParagraphAdjusted ? huDiffParagraphAdjusted : enDiffParagraphAdjusted ;
+
+  if (maxDiffParagraphAdjusted==0)
+  {
+    return 0;
+  }
+  else
+  {
+    return ( score - scoreDeviationBecauseOfThoseStupidParagraphs ) / maxDiffParagraphAdjusted ;
+  }
+}
+
+// The score of the jth segmentum. The bigger the better.
+double TrailScoresInterval::operator()( int j ) const
+{
+  Rundle start = trail[j];
+  Rundle end   = trail[j+1];
+
+  return scoreSegmentum( start, end );
+}
+
+double TrailScoresInterval::operator()( int j, int k ) const
+{
+  Rundle start = trail[j];
+  Rundle end   = trail[k];
+
+  return scoreSegmentum( start, end );
+}
+
+
+void removeRundles( Trail& trail, const std::set<int>& rundlesToKill )
+{
+  // Not a speed bottleneck.
+  Trail newTrail;
+  for ( size_t i=0; i<trail.size(); ++i )
+  {
+    if (rundlesToKill.find(i)==rundlesToKill.end())
+    {
+      newTrail.push_back(trail[i]);
+    }
+  }
+
+  trail = newTrail;
+}
+
+
+// In cautious mode, auto-aligned rundles are thrown away if
+// their left or right neighbour holes are not one-to-one.
+// From the -bisent point of view:
+// In cautious mode, auto-aligned one-to-one bisentences are thrown away if
+// they have left or right neighbours which are not one-to-one.
+// This of course dramatically improves precision while slightly degrading recall.
+void cautiouslyFilterTrail( Trail& bestTrail )
+{
+  Trail bestTrailNew;
+
+  int trailSize = bestTrail.size();
+
+  for ( int pos=0; pos<trailSize-1; ++pos )
+  {
+    if (
+         (pos==0)
+           || 
+         ( oneToOne(bestTrail,pos-1) && oneToOne(bestTrail,pos) )
+       )
+    {
+      bestTrailNew.push_back(bestTrail  [pos]);
+    }
+  }
+
+  bestTrail = bestTrailNew ;
+}
+
+
+// O, hogy szomorodna meg a C++.
+inline double absValue( double x )
+{
+  return ( x>0 ? x : -x );
+}
+
+// Egy zero-to-nonzero hole valamelyik oldalan levo rundle-t kiirtom, ha a
+// rundle torlese kozeliti az uj hezagban a magyar karakterszam / angol karakterszam
+// hanyadost egyhez. A bal es a jobb kozul azt valasztom, amelyik tobbet javit.
+// 
+// Meg akkor is olvasztok, ha ezzel kicsit rontok, mivel a valodi zero-to-one eleg ritka.
+// Legalabbis regenyekben. Az improvementSlack konstansnak domainfuggonek kellene lennie.
+void spaceOutBySentenceLength( Trail& bestTrail, 
+                 const SentenceList& huSentenceListPretty,
+                 const SentenceList& enSentenceList,
+		 bool utfCharCountingMode )
+{
+  // i most egy hole es nem egy rundle indexe.
+  for ( size_t i=1; i<bestTrail.size()-2; ) // Figyelem, direkt nincs ++i.
+  {
+    bool huZero = (bestTrail[i].first == bestTrail[i+1].first);
+    bool enZero = (bestTrail[i].second== bestTrail[i+1].second);
+
+    bool huParagraph = ( (bestTrail[i].first +1 == bestTrail[i+1].first) && isParagraph(huSentenceListPretty[bestTrail[i].first].words) );
+    bool enParagraph = ( (bestTrail[i].second+1 == bestTrail[i+1].second) && isParagraph(enSentenceList[bestTrail[i].second].words) );
+
+    if (
+         ( huZero && enParagraph )
+         ||
+         ( enZero && huParagraph )
+       )
+    {
+      ++i;
+      continue;
+    }
+
+    // It is a zero-to-any, and the "any" is not a lonely paragraph-delimiter.
+    if (
+         ( huZero && !enParagraph )
+         ||
+         ( enZero && !huParagraph )
+       )
+    {
+      // continue is not allowed here, because of the ++i in the else branch.
+
+      double huRightBlock  = characterLength( bestTrail[i+1].first, bestTrail[i+2].first, huSentenceListPretty, utfCharCountingMode );
+      double huMiddleBlock = characterLength( bestTrail[i].first,   bestTrail[i+1].first, huSentenceListPretty, utfCharCountingMode );
+      double huLeftBlock   = characterLength( bestTrail[i-1].first, bestTrail[i].first,   huSentenceListPretty, utfCharCountingMode );
+    
+      double enRightBlock  = characterLength( bestTrail[i+1].second, bestTrail[i+2].second, enSentenceList, utfCharCountingMode );
+      double enMiddleBlock = characterLength( bestTrail[i].second,   bestTrail[i+1].second, enSentenceList, utfCharCountingMode );
+      double enLeftBlock   = characterLength( bestTrail[i-1].second, bestTrail[i].second,   enSentenceList, utfCharCountingMode );
+
+      // A middleBlock-ok kozul az egyik nulla.
+      double oldLeftRatio = (huLeftBlock+1)/(enLeftBlock+1);
+      double newLeftRatio = (huLeftBlock+huMiddleBlock+1)/(enLeftBlock+enMiddleBlock+1);
+
+      double oldRightRatio = (huRightBlock+1)/(enRightBlock+1);
+      double newRightRatio = (huRightBlock+huMiddleBlock+1)/(enRightBlock+enMiddleBlock+1);
+
+      double improvesLeft  = absValue(log(oldLeftRatio )) - absValue(log(newLeftRatio )) ;
+      double improvesRight = absValue(log(oldRightRatio)) - absValue(log(newRightRatio)) ;
+
+      const double improvementSlack = log(0.8);
+      if ( (improvesLeft>improvementSlack) || (improvesRight>improvementSlack) )
+      {
+        bool eraseLeft = (improvesLeft>improvesRight);
+
+        if (eraseLeft)
+        {
+          bestTrail.erase(bestTrail.begin()+i);
+        }
+        else
+        {
+          bestTrail.erase(bestTrail.begin()+i+1);
+        }
+
+      }
+      else
+      {
+        ++i;
+      }
+    }
+    else
+    {
+      ++i;
+    }
+  }
+}
+
+
+// The function gets a nonconst reference to bestTrail.
+// On the other hand, it gets a const reference to bestTrail, through trailScoresInterval.
+// Therefore, the function may only modify bestTrail after it finished reading trailScoresInterval.
+void postprocessTrailStart( Trail& bestTrail,
+                            const TrailScoresInterval& trailScoresInterval,
+                            const double& qualityThreshold )
+{
+  const int window = 10;
+
+  std::set<int> rundlesToKill;
+
+  int trailSize = bestTrail.size();
+
+  for ( int pos=1; pos<trailSize-1-window; ++pos )
+  {
+    double avg = trailScoresInterval( pos, pos+window );
+
+    if (avg<qualityThreshold)
+    {
+      if (global_postprocessLogging)
+      {
+        std::cerr << "Thrown away at position " << pos
+          << ", avarage " << avg << ", threshold " << qualityThreshold << std::endl;
+      }
+
+      for ( size_t j=pos; (j<size_t(pos+window)) && (j<bestTrail.size()-1); ++j )
+      {
+        rundlesToKill.insert(j);
+      }
+    }
+    else
+    {
+      break;
+    }
+  }
+
+  removeRundles( bestTrail, rundlesToKill );
+}
+
+// The function gets a nonconst reference to bestTrail.
+// On the other hand, it gets a const reference to bestTrail, through trailScoresInterval.
+// Therefore, the function may only modify bestTrail after it finished reading trailScoresInterval.
+void postprocessTrailEnd( Trail& bestTrail,
+                            const TrailScoresInterval& trailScoresInterval,
+                            const double& qualityThreshold )
+{
+  const int window = 10;
+
+  std::set<int> rundlesToKill;
+
+  int trailSize = bestTrail.size();
+
+  for ( int pos=trailSize-1-window-1; pos>0; --pos )
+  {
+    double avg = trailScoresInterval( pos, pos+window );
+
+    if (avg<qualityThreshold)
+    {
+      if (global_postprocessLogging)
+      {
+        std::cerr << "Thrown away at position " << pos
+          << ", avarage " << avg << ", threshold " << qualityThreshold << std::endl;
+      }
+
+      for ( size_t j=pos; (j<size_t(pos+window)) && (j<bestTrail.size()-1); ++j )
+      {
+        rundlesToKill.insert(j);
+      }
+    }
+    else
+    {
+      break;
+    }
+  }
+
+  removeRundles( bestTrail, rundlesToKill );
+}
+
+// The function gets a nonconst reference to bestTrail.
+// On the other hand, it gets a const reference to bestTrail, through trailScoresInterval.
+// Therefore, the function may only modify bestTrail after it finished reading trailScoresInterval.
+void postprocessTrailStartAndEnd( Trail& bestTrail, const TrailScoresInterval& trailScoresInterval, double qualityThreshold )
+{
+  postprocessTrailStart( bestTrail, trailScoresInterval, qualityThreshold );
+  postprocessTrailEnd  ( bestTrail, trailScoresInterval, qualityThreshold );
+}
+
+// The function gets a nonconst reference to bestTrail.
+// On the other hand, it gets a const reference to bestTrail, through trailScoresInterval.
+// Therefore, the function may only modify bestTrail after it finished reading trailScoresInterval.
+void postprocessTrail( Trail& bestTrail, const TrailScoresInterval& trailScoresInterval, double qualityThreshold )
+{
+  const int window = 10;
+
+  std::set<int> rundlesToKill;
+
+  int trailSize = bestTrail.size();
+
+  for ( int pos=1; pos<trailSize-1-window; ++pos )
+  {
+    double avg = trailScoresInterval( pos, pos+window );
+
+    if (avg<qualityThreshold)
+    {
+      if (global_postprocessLogging)
+      {
+        std::cerr << "Thrown away at position " << pos
+          << ", avarage " << avg << ", threshold " << qualityThreshold << std::endl;
+      }
+
+      for ( size_t j=pos; (j<size_t(pos+window)) && (j<bestTrail.size()-1); ++j )
+      {
+        rundlesToKill.insert(j);
+      }
+    }
+  }
+
+  removeRundles( bestTrail, rundlesToKill );
+}
+
+
+// Erases rundles which are predominantly surrounded by not-one-to-one holes.
+void postprocessTrailByTopology( Trail& bestTrail, double qualityThreshold )
+{
+  const int window = 10;
+
+  std::set<int> rundlesToKill;
+
+  int trailSize = bestTrail.size();
+
+  for ( int pos=1; pos<trailSize-1-window; ++pos )
+  {
+    double sum=0;
+
+    for ( int j=pos; j<pos+window; ++j )
+    {
+      sum += ( oneToOne(bestTrail,j) ? 1 : 0 ) ;
+    }
+
+    double avg = sum / window ;
+
+    if (avg<qualityThreshold)
+    {
+      if (global_postprocessLogging)
+      {
+        std::cerr << "Thrown away at position " << pos
+          << ", avarage " << avg << std::endl;
+      }
+
+      for ( size_t j=pos; (j<size_t(pos+window)) && (j<bestTrail.size()-1); ++j )
+      {
+        rundlesToKill.insert(j);
+      }
+    }
+  }
+
+  removeRundles( bestTrail, rundlesToKill );
+}
+
+
+void trailToBisentenceList( const Trail& bestTrail, const TrailScores& trailScores, double qualityThreshold,
+                            BisentenceList& bisentenceList )
+{
+  bisentenceList.clear();
+
+  int trailSize = bestTrail.size();
+
+  for ( int pos=0; pos<trailSize-1; ++pos )
+  {
+    if ( oneToOne(bestTrail,pos) && (trailScores(pos)>=qualityThreshold) )
+    {
+      bisentenceList.push_back(bestTrail[pos]);
+    }
+  }
+}
+
+
+// This is basically incorrect.
+// Here we use the score of the right-hand segment to decide about the rundle.
+//
+// The function gets a nonconst reference to bestTrail.
+// On the other hand, it gets a const reference to bestTrail, through trailScoresInterval.
+// Therefore, the function may only modify bestTrail after it finished reading trailScoresInterval.
+void filterTrailByQuality( Trail& trail, const TrailScoresInterval& trailScoresInterval,
+                           const double& qualityThreshold )
+{
+  Trail newTrail;
+
+  newTrail.push_back(trail.front());
+  for ( size_t i=1; i<trail.size()-1; ++i )
+  {
+    if ( trailScoresInterval(i) >= qualityThreshold )
+    {
+      newTrail.push_back(trail[i]);
+    }
+  }
+  newTrail.push_back(trail.back());
+
+  trail = newTrail;
+}
+
+void filterBisentenceListByQuality( BisentenceList& bisentenceList, const AlignMatrix& dynMatrix,
+                                    const double& qualityThreshold )
+{
+  BisentenceList newBisentenceList;
+
+  BisentenceListScores bisentenceListScores(bisentenceList,dynMatrix);
+
+  for ( size_t i=0; i<bisentenceList.size(); ++i )
+  {
+    if ( bisentenceListScores(i) >= qualityThreshold )
+    {
+      newBisentenceList.push_back(bisentenceList[i]);
+    }
+  }
+
+  bisentenceList = newBisentenceList;
+}
+
+} // namespace TMXAligner
Index: branches/apertium-tagger/apertium2/apertium/tmx_trail_postprocessors.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_trail_postprocessors.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_trail_postprocessors.h	(revision 69632)
@@ -0,0 +1,142 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef __TMXALIGNER_ALIGNMENT_TRAILPOSTPROCESSORS_H
+#define __TMXALIGNER_ALIGNMENT_TRAILPOSTPROCESSORS_H
+
+#include <apertium/tmx_alignment.h>
+
+namespace TMXAligner
+{
+
+// Helper class that calculates scores of holes.
+class TrailScores
+{
+public:
+  TrailScores( const Trail& trail_, const AlignMatrix& dynMatrix_ );
+  // The score of the jth segmentum. The bigger the better.
+  double operator()( int j ) const;
+
+private:
+  const Trail& trail;
+  const AlignMatrix& dynMatrix;
+};
+
+
+class SentenceList;
+
+
+// Helper class that calculates scores of segmentums.
+class TrailScoresInterval
+{
+public:
+  TrailScoresInterval( const Trail& trail_, const AlignMatrix& dynMatrix_,
+    const SentenceList& huSentenceList_, const SentenceList& enSentenceList_ );
+
+  // The average score of the jth segmentum. The bigger the better.
+  // Division is by the maximum of the Hungarian and English intervals.
+  // This is a somewhat arbritary decision, and goes very badly with the
+  // scoring of the knight's moves. But we really have no better choice.
+  // 
+  // Also, the method applies some very ugly hacks to avoid the effect of
+  // paragraph-delimiters. It strips both intervals of <p>s, and
+  // modifies the dynMatrix-based score assuming that all <p>s got paired.
+  // except surplus <p>s.
+  double scoreSegmentum( const Rundle& start, const Rundle& end ) const;
+
+  // The score of a segment identified by its index.
+  double operator()( int j ) const;
+  // The score of a union of segments identified by its start and end rundles' index.
+  // Both these methods rely on scoreSegmentum():
+  // This means an important thing: the score only depends
+  // on the start and end rundle, not the rundles in between.
+  double operator()( int j, int k ) const;
+
+private:
+  const Trail& trail;
+  const AlignMatrix& dynMatrix;
+  const SentenceList& huSentenceList;
+  const SentenceList& enSentenceList;
+};
+
+// Helper class that calculates scores of one-to-one holes.
+class BisentenceListScores
+{
+public:
+  BisentenceListScores( const BisentenceList& bisentenceList_, const AlignMatrix& dynMatrix_ );
+  // The score of the jth bisentence. The bigger the better.
+  double operator()( int j ) const;
+
+private:
+  const BisentenceList& bisentenceList;
+  const AlignMatrix& dynMatrix;
+};
+
+void removeRundles( Trail& trail, const std::set<int>& rundlesToKill );
+
+// In cautious mode, auto-aligned rundles are thrown away if
+// their left or right neighbour holes are not one-to-one.
+// From the point of view of the resultant bisentences:
+// In cautious mode, one-to-one bisentences are thrown away if
+// they have left or right neighbours which are not one-to-one.
+// This of course dramatically improves precision while slightly degrading recall.
+void cautiouslyFilterTrail( Trail& bestTrail );
+
+void spaceOutBySentenceLength( Trail& bestTrail, 
+                 const SentenceList& huSentenceListPretty,
+                 const SentenceList& enSentenceList,
+		 bool utfCharCountingMode );
+
+// The function gets a nonconst reference to bestTrail.
+// On the other hand, it gets a const reference to bestTrail, through trailScoresInterval.
+// Therefore, the function may only modify bestTrail after it finished reading trailScoresInterval.
+void postprocessTrailStart( Trail& bestTrail,
+                            const TrailScoresInterval& trailScoresInterval,
+                            const double& qualityThreshold );
+
+// The function gets a nonconst reference to bestTrail.
+// On the other hand, it gets a const reference to bestTrail, through trailScoresInterval.
+// Therefore, the function may only modify bestTrail after it finished reading trailScoresInterval.
+void postprocessTrailStartAndEnd( Trail& bestTrail,
+                                  const TrailScoresInterval& trailScoresInterval,
+                                  double qualityThreshold );
+
+// The function gets a nonconst reference to bestTrail.
+// On the other hand, it gets a const reference to bestTrail, through trailScoresInterval.
+// Therefore, the function may only modify bestTrail after it finished reading trailScoresInterval.
+void postprocessTrail( Trail& bestTrail, 
+                       const TrailScoresInterval& trailScoresInterval, 
+                       double qualityThreshold );
+
+
+// Throws away rundles which are predominantly surrounded by not-one-to-one holes.
+void postprocessTrailByTopology( Trail& bestTrail, double qualityThreshold );
+
+
+// Only collect bisentences with score at least qualityThreshold.
+void trailToBisentenceList( const Trail& bestTrail, const TrailScores& trailScores, double qualityThreshold,
+                            BisentenceList& bisentenceList );
+
+// This is basically incorrect.
+// Here we use the score of the right-hand segment to decide about the rundle.
+//
+// The function gets a nonconst reference to bestTrail.
+// On the other hand, it gets a const reference to bestTrail, through trailScoresInterval.
+// Therefore, the function may only modify bestTrail after it finished reading trailScoresInterval.
+void filterTrailByQuality( Trail& trail, const TrailScoresInterval& trailScoresInterval,
+                           const double& qualityThreshold );
+
+void filterBisentenceListByQuality( BisentenceList& bisentenceList, const AlignMatrix& dynMatrix,
+                                    const double& qualityThreshold );
+
+} // namespace TMXAligner
+
+#endif // #define __TMXALIGNER_ALIGNMENT_TRAILPOSTPROCESSORS_H
Index: branches/apertium-tagger/apertium2/apertium/tmx_translate.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_translate.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_translate.h	(revision 69632)
@@ -0,0 +1,76 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef __TMXALIGNER_ALIGNMENT_TRANSLATE_H
+#define __TMXALIGNER_ALIGNMENT_TRANSLATE_H
+
+#include <apertium/tmx_words.h>
+#include <apertium/tmx_dictionary.h>
+
+namespace TMXAligner
+{
+
+typedef std::map< std::string, Phrase > DumbDictionary;
+
+// This will become a class, with dictionary initialization, and a translate method.
+// It will have various implementations.
+
+void buildDumbDictionary( const DictionaryItems& dictionary, DumbDictionary& dumbDictionary );
+
+void buildDumbDictionaryUsingFrequencies( 
+       const DictionaryItems& dictionary, 
+       FrequencyMap& enFreq, 
+       DumbDictionary& dumbDictionary );
+
+void buildDumbDictionary( TMXAligner::DumbDictionary& dumbDictionary,
+                          const std::string& dictionaryFilename,
+                          const TMXAligner::SentenceList& enSentenceList = TMXAligner::SentenceList()
+                        );
+
+void trivialTranslateWord(
+                     const DumbDictionary& dumbDictionary,
+                     const Word& originalWord,
+                     Phrase& words
+                     );
+
+void trivialTranslate(
+                     const DumbDictionary& dumbDictionary,
+                     const Sentence& sentence,
+                           Sentence& translatedSentence
+                     );
+
+void trivialTranslateSentenceList(
+                     const DumbDictionary& dumbDictionary,
+                     const SentenceList& sentenceList,
+                           SentenceList& translatedSentenceList
+                     );
+
+void naiveTranslate(
+                     const DictionaryItems& dictionary,
+                     const SentenceList& sentenceList,
+                           SentenceList& translatedSentenceList
+                     );
+
+typedef std::multimap< std::string, Phrase > DumbMultiDictionary;
+
+void buildDumbMultiDictionary( const DictionaryItems& dictionary, DumbMultiDictionary& dumbMultiDictionary, bool reverse );
+
+void sortNormalizeSentences( TMXAligner::SentenceList& sentenceList );
+
+// This function preprocesses the sentences so that sentenceListsToAlignMatrixIdentity can be applied to them.
+// It does a rough translation and an alphabetic sort of words.
+void normalizeTextsForIdentity( const DictionaryItems& dictionary,
+                                const SentenceList& huSentenceListPretty,  const SentenceList& enSentenceListPretty,
+                                      SentenceList& huSentenceListGarbled,       SentenceList& enSentenceListGarbled );
+
+} // namespace TMXAligner
+
+#endif // #define __TMXALIGNER_ALIGNMENT_TRANSLATE_H
Index: branches/apertium-tagger/apertium2/apertium/tmx_words.h
===================================================================
--- branches/apertium-tagger/apertium2/apertium/tmx_words.h	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/tmx_words.h	(revision 69632)
@@ -0,0 +1,55 @@
+/*************************************************************************
+*                                                                        *
+*  (C) Copyright 2004. Media Research Centre at the                      *
+*  Sociology and Communications Department of the                        *
+*  Budapest University of Technology and Economics.                      *
+*                                                                        *
+*  Developed by Daniel Varga.                                            *
+*                                                                        *
+*  From hunalign; for license see ../AUTHORS and ../COPYING.hunalign     *
+*                                                                        *
+*************************************************************************/
+#ifndef __TMXALIGNER_ALIGNMENT_WORDS_H
+#define __TMXALIGNER_ALIGNMENT_WORDS_H
+
+#include <string>
+#include <vector>
+#include <iosfwd>
+
+namespace TMXAligner
+{
+
+typedef std::string String;
+
+typedef String Word;
+
+typedef std::vector<Word> WordList;
+
+typedef WordList Phrase;
+
+typedef std::vector<Phrase> Book;
+
+struct Sentence
+{
+  WordList words;
+  String   sentence;
+  String   id;
+};
+
+// Implemented in dictionary.cpp
+class SentenceList : public std::vector<Sentence>
+{
+public:
+  void read ( std::istream& is );
+  void readNoIds( std::istream& is );
+  void write( std::ostream& os ) const;
+  void writeNoIds( std::ostream& os ) const;
+};
+
+// Implemented in dictionary.cpp
+void readBicorpus( std::istream& is, SentenceList& huSentenceList, SentenceList& enSentenceList);
+void writeBicorpus( std::ostream& os, const SentenceList& huSentenceList, const SentenceList& enSentenceList);
+
+} // namespace TMXAligner
+
+#endif // #define __TMXALIGNER_ALIGNMENT_WORDS_H
Index: branches/apertium-tagger/apertium2/apertium/html-format.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/html-format.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/html-format.xml	(revision 69632)
@@ -0,0 +1,378 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<format name="html">
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="ISO-8859-1"/>
+    <output encoding="ISO-8859-1"/>
+    <tag-name regexp="[a-zA-Z]+"/>
+    <escape-chars regexp='[][^@\\/${}]'/>
+    <space-chars regexp='[ \n\t\r&lt;&gt;~]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp='"&lt;!--"'/>
+      <end regexp='"--&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;script"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/script"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;apertium-notrans"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/apertium-notrans&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;style"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/style"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="3">
+      <tag regexp='"&lt;br"(" "[^&gt;]*)?"&gt;"|"&lt;hr"(" "[^&gt;]*)?"&gt;"|"&lt;p"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="3"> 
+      <tag regexp='"&lt;li"(" "[^&gt;]*)?"&gt;"|"&lt;ul"(" "[^&gt;]*)?"&gt;"|"&lt;ol"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="3"> 
+      <tag regexp='"&lt;tr"(" "[^&gt;]*)?"&gt;"|"&lt;td"(" "[^&gt;]*)?"&gt;"|"&lt;th"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3">
+      <tag regexp='"&lt;/br"(" "[^&gt;]*)?"&gt;"|"&lt;/hr"(" "[^&gt;]*)?"&gt;"|"&lt;/p"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3"> 
+      <tag regexp='"&lt;/li"(" "[^&gt;]*)?"&gt;"|"&lt;/ul"(" "[^&gt;]*)?"&gt;"|"&lt;/ol"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3"> 
+      <tag regexp='"&lt;/tr"(" "[^&gt;]*)?"&gt;"|"&lt;/td"(" "[^&gt;]*)?"&gt;"|"&lt;/th"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+<!-- Faltan algunas etiquetas de final de frase. -->
+
+    <format-rule type="open" eos="yes" priority="3"> 
+      <tag regexp='"&lt;title"(" "[^&gt;]*)?"&gt;"|"&lt;div"(" "[^&gt;]*)?"&gt;"|"&lt;option"(" "[^&gt;]*)?"&gt;"|"&lt;h"[1-6](" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3"> 
+      <tag regexp='"&lt;/title"(" "[^&gt;]*)?"&gt;"|"&lt;/div"(" "[^&gt;]*)?"&gt;"|"&lt;/option"(" "[^&gt;]*)?"&gt;"|"&lt;/h"[1-6](" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+<!-- La etiqueta DOCTYPE no la reconoce -->
+
+    <format-rule type="empty" eos="no" priority="4">
+      <tag regexp='"&lt;"("img"|"link")(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="empty" eos="no" priority="5">
+      <tag regexp='("&lt;!"|"&lt;?")[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+
+<!-- End (Cambios hechos por Gorka L.)-->
+
+    <format-rule type="open" eos="no" priority="5">
+      <tag regexp='"&lt;"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    
+    <format-rule type="close" eos="no" priority="5">
+      <tag regexp='"&lt;/"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    
+    <replacement-rule regexp='"&amp;"([a-zA-Z]+|"#x"[0-9a-fA-F]{1,4}|"#"[0-9]{1,8});'>
+      <replace source="&amp;Agrave;" target="À" prefer="yes"/>
+      <replace source="&amp;#192;" target="À"/>
+      <replace source="&amp;#xC0;" target="À"/>
+      <replace source="&amp;#xc0;" target="À"/>
+      <replace source="&amp;Aacute;" target="Á" prefer="yes"/>
+      <replace source="&amp;#193;" target="Á"/>
+      <replace source="&amp;#xC1;" target="Á"/>
+      <replace source="&amp;#xc1;" target="Á"/>
+      <replace source="&amp;Acirc;" target="Â" prefer="yes"/>
+      <replace source="&amp;#194;" target="Â"/>
+      <replace source="&amp;#xC2;" target="Â"/>
+      <replace source="&amp;#xc2;" target="Â"/>
+      <replace source="&amp;Atilde;" target="Ã" prefer="yes"/>
+      <replace source="&amp;#195;" target="Ã"/>
+      <replace source="&amp;#xC3;" target="Ã"/>
+      <replace source="&amp;#xc3;" target="Ã"/>
+      <replace source="&amp;Auml;" target="Ä" prefer="yes"/>
+      <replace source="&amp;#196;" target="Ä"/>
+      <replace source="&amp;#xC4;" target="Ä"/>
+      <replace source="&amp;#xc4;" target="Ä"/>
+      <replace source="&amp;Aring;" target="Å" prefer="yes"/>
+      <replace source="&amp;#197;" target="Å"/>
+      <replace source="&amp;#xC5;" target="Å"/>
+      <replace source="&amp;#xc5;" target="Å"/>
+      <replace source="&amp;AElig;" target="Æ" prefer="yes"/>
+      <replace source="&amp;#198;" target="Æ"/>
+      <replace source="&amp;#xC6;" target="Æ"/>
+      <replace source="&amp;#xc6;" target="Æ"/>
+      <replace source="&amp;Ccedil;" target="Ç" prefer="yes"/>
+      <replace source="&amp;#199;" target="Ç"/>
+      <replace source="&amp;#xC7;" target="Ç"/>
+      <replace source="&amp;#xc7;" target="Ç"/>
+      <replace source="&amp;Egrave;" target="È" prefer="yes"/>
+      <replace source="&amp;#200;" target="È"/>
+      <replace source="&amp;#xC8;" target="È"/>
+      <replace source="&amp;#xc8;" target="È"/>
+      <replace source="&amp;Eacute;" target="É" prefer="yes"/>
+      <replace source="&amp;#201;" target="É"/>
+      <replace source="&amp;#xC9;" target="É"/>
+      <replace source="&amp;#xc9;" target="É"/>
+      <replace source="&amp;Ecirc;" target="Ê" prefer="yes"/>
+      <replace source="&amp;#202;" target="Ê"/>
+      <replace source="&amp;#xCA;" target="Ê"/>
+      <replace source="&amp;#xca;" target="Ê"/>
+      <replace source="&amp;Euml;" target="Ë" prefer="yes"/>
+      <replace source="&amp;#203;" target="Ë"/>
+      <replace source="&amp;#xCB;" target="Ë"/>
+      <replace source="&amp;#xcb;" target="Ë"/>
+      <replace source="&amp;Igrave;" target="Ì" prefer="yes"/>
+      <replace source="&amp;#204;" target="Ì"/>
+      <replace source="&amp;#xCC;" target="Ì"/>
+      <replace source="&amp;#xcc;" target="Ì"/>
+      <replace source="&amp;Iacute;" target="Í" prefer="yes"/>
+      <replace source="&amp;#205;" target="Í"/>
+      <replace source="&amp;#xCD;" target="Í"/>
+      <replace source="&amp;#xcd;" target="Í"/>
+      <replace source="&amp;Icirc;" target="Î" prefer="yes"/>
+      <replace source="&amp;#206;" target="Î"/>
+      <replace source="&amp;#xCE;" target="Î"/>
+      <replace source="&amp;#xce;" target="Î"/>
+      <replace source="&amp;Iuml;" target="Ï" prefer="yes"/>
+      <replace source="&amp;#207;" target="Ï"/>
+      <replace source="&amp;#xCF;" target="Ï"/>
+      <replace source="&amp;#xcf;" target="Ï"/>
+      <replace source="&amp;ETH;" target="Ð" prefer="yes"/>
+      <replace source="&amp;#208;" target="Ð"/>
+      <replace source="&amp;#xD0;" target="Ð"/>
+      <replace source="&amp;#xd0;" target="Ð"/>
+      <replace source="&amp;Ntilde;" target="Ñ" prefer="yes"/>
+      <replace source="&amp;#209;" target="Ñ"/>
+      <replace source="&amp;#xD1;" target="Ñ"/>
+      <replace source="&amp;#xd1;" target="Ñ"/>
+      <replace source="&amp;Ograve;" target="Ò" prefer="yes"/>
+      <replace source="&amp;#210;" target="Ò"/>
+      <replace source="&amp;#xD2;" target="Ò"/>
+      <replace source="&amp;#xd2;" target="Ò"/>
+      <replace source="&amp;Oacute;" target="Ó" prefer="yes"/>
+      <replace source="&amp;#211;" target="Ó"/>
+      <replace source="&amp;#xD3;" target="Ó"/>
+      <replace source="&amp;#xd3;" target="Ó"/>
+      <replace source="&amp;Ocirc;" target="Ô" prefer="yes"/>
+      <replace source="&amp;#212;" target="Ô"/>
+      <replace source="&amp;#xD4;" target="Ô"/>
+      <replace source="&amp;#xd4;" target="Ô"/>
+      <replace source="&amp;Otilde;" target="Õ" prefer="yes"/>
+      <replace source="&amp;#213;" target="Õ"/>
+      <replace source="&amp;#xD5;" target="Õ"/>
+      <replace source="&amp;#xd5;" target="Õ"/>
+      <replace source="&amp;Ouml;" target="Ö" prefer="yes"/>
+      <replace source="&amp;#214;" target="Ö"/>
+      <replace source="&amp;#xD6;" target="Ö"/>
+      <replace source="&amp;#xd6;" target="Ö"/>
+      <replace source="&amp;Oslash;" target="Ø" prefer="yes"/>
+      <replace source="&amp;#216;" target="Ø"/>
+      <replace source="&amp;#xD8;" target="Ø"/>
+      <replace source="&amp;#xd8;" target="Ø"/>
+      <replace source="&amp;Ugrave;" target="Ù" prefer="yes"/>
+      <replace source="&amp;#217;" target="Ù"/>
+      <replace source="&amp;#xD9;" target="Ù"/>
+      <replace source="&amp;#xd9;" target="Ù"/>
+      <replace source="&amp;Uacute;" target="Ú" prefer="yes"/>
+      <replace source="&amp;#218;" target="Ú"/>
+      <replace source="&amp;#xDA;" target="Ú"/>
+      <replace source="&amp;#xda;" target="Ú"/>
+      <replace source="&amp;Ucirc;" target="Û" prefer="yes"/>
+      <replace source="&amp;#219;" target="Û"/>
+      <replace source="&amp;#xDB;" target="Û"/>
+      <replace source="&amp;#xdb;" target="Û"/>
+      <replace source="&amp;Uuml;" target="Ü" prefer="yes"/>
+      <replace source="&amp;#220;" target="Ü"/>
+      <replace source="&amp;#xDC;" target="Ü"/>
+      <replace source="&amp;#xdc;" target="Ü"/>
+      <replace source="&amp;Yacute;" target="Ý" prefer="yes"/>
+      <replace source="&amp;#221;" target="Ý"/>
+      <replace source="&amp;#xDD;" target="Ý"/>
+      <replace source="&amp;#xdd;" target="Ý"/>
+      <replace source="&amp;THORN;" target="Þ" prefer="yes"/>
+      <replace source="&amp;#222;" target="Þ"/>
+      <replace source="&amp;#xDE;" target="Þ"/>
+      <replace source="&amp;#xde;" target="Þ"/>
+      <replace source="&amp;szlig;" target="ß" prefer="yes"/>
+      <replace source="&amp;#223;" target="ß"/>
+      <replace source="&amp;#xDF;" target="ß"/>
+      <replace source="&amp;#xdf;" target="ß"/>
+      <replace source="&amp;agrave;" target="à" prefer="yes"/>
+      <replace source="&amp;#224;" target="à"/>
+      <replace source="&amp;#xE0;" target="à"/>
+      <replace source="&amp;#xe0;" target="à"/>
+      <replace source="&amp;aacute;" target="á" prefer="yes"/>
+      <replace source="&amp;#225;" target="á"/>
+      <replace source="&amp;#xE1;" target="á"/>
+      <replace source="&amp;#xe1;" target="á"/>
+      <replace source="&amp;acirc;" target="â" prefer="yes"/>
+      <replace source="&amp;#226;" target="â"/>
+      <replace source="&amp;#xE2;" target="â"/>
+      <replace source="&amp;#xe2;" target="â"/>
+      <replace source="&amp;atilde;" target="ã" prefer="yes"/>
+      <replace source="&amp;#227;" target="ã"/>
+      <replace source="&amp;#xE3;" target="ã"/>
+      <replace source="&amp;#xe3;" target="ã"/>
+      <replace source="&amp;auml;" target="ä" prefer="yes"/>
+      <replace source="&amp;#228;" target="ä"/>
+      <replace source="&amp;#xE4;" target="ä"/>
+      <replace source="&amp;#xe4;" target="ä"/>
+      <replace source="&amp;aring;" target="å" prefer="yes"/>
+      <replace source="&amp;#229;" target="å"/>
+      <replace source="&amp;#xE5;" target="å"/>
+      <replace source="&amp;#xe5;" target="å"/>
+      <replace source="&amp;aelig;" target="æ" prefer="yes"/>
+      <replace source="&amp;#230;" target="æ"/>
+      <replace source="&amp;#xE6;" target="æ"/>
+      <replace source="&amp;#xe6;" target="æ"/>
+      <replace source="&amp;ccedil;" target="ç" prefer="yes"/>
+      <replace source="&amp;#231;" target="ç"/>
+      <replace source="&amp;#xE7;" target="ç"/>
+      <replace source="&amp;#xe7;" target="ç"/>
+      <replace source="&amp;egrave;" target="è" prefer="yes"/>
+      <replace source="&amp;#232;" target="è"/>
+      <replace source="&amp;#xE8;" target="è"/>
+      <replace source="&amp;#xe8;" target="è"/>
+      <replace source="&amp;eacute;" target="é" prefer="yes"/>
+      <replace source="&amp;#233;" target="é"/>
+      <replace source="&amp;#xE9;" target="é"/>
+      <replace source="&amp;#xe9;" target="é"/>
+      <replace source="&amp;ecirc;" target="ê" prefer="yes"/>
+      <replace source="&amp;#234;" target="ê"/>
+      <replace source="&amp;#xEA;" target="ê"/>
+      <replace source="&amp;#xea;" target="ê"/>
+      <replace source="&amp;euml;" target="ë" prefer="yes"/>
+      <replace source="&amp;#235;" target="ë"/>
+      <replace source="&amp;#xEB;" target="ë"/>
+      <replace source="&amp;#xeb;" target="ë"/>
+      <replace source="&amp;igrave;" target="ì" prefer="yes"/>
+      <replace source="&amp;#236;" target="ì"/>
+      <replace source="&amp;#xEC;" target="ì"/>
+      <replace source="&amp;#xec;" target="ì"/>
+      <replace source="&amp;iacute;" target="í" prefer="yes"/>
+      <replace source="&amp;#237;" target="í"/>
+      <replace source="&amp;#xED;" target="í"/>
+      <replace source="&amp;#xed;" target="í"/>
+      <replace source="&amp;icirc;" target="î" prefer="yes"/>
+      <replace source="&amp;#238;" target="î"/>
+      <replace source="&amp;#xEE;" target="î"/>
+      <replace source="&amp;#xee;" target="î"/>
+      <replace source="&amp;iuml;" target="ï" prefer="yes"/>
+      <replace source="&amp;#239;" target="ï"/>
+      <replace source="&amp;#xEF;" target="ï"/>
+      <replace source="&amp;#xef;" target="ï"/>
+      <replace source="&amp;eth;" target="ð" prefer="yes"/>
+      <replace source="&amp;#240;" target="ð"/>
+      <replace source="&amp;#xF0;" target="ð"/>
+      <replace source="&amp;#xf0;" target="ð"/>
+      <replace source="&amp;ntilde;" target="ñ" prefer="yes"/>
+      <replace source="&amp;#241;" target="ñ"/>
+      <replace source="&amp;#xF1;" target="ñ"/>
+      <replace source="&amp;#xf1;" target="ñ"/>
+      <replace source="&amp;ograve;" target="ò" prefer="yes"/>
+      <replace source="&amp;#242;" target="ò"/>
+      <replace source="&amp;#xF2;" target="ò"/>
+      <replace source="&amp;#xf2;" target="ò"/>
+      <replace source="&amp;oacute;" target="ó" prefer="yes"/>
+      <replace source="&amp;#243;" target="ó"/>
+      <replace source="&amp;#xF3;" target="ó"/>
+      <replace source="&amp;#xf3;" target="ó"/>
+      <replace source="&amp;ocirc;" target="ô" prefer="yes"/>
+      <replace source="&amp;#244;" target="ô"/>
+      <replace source="&amp;#xF4;" target="ô"/>
+      <replace source="&amp;#xf4;" target="ô"/>
+      <replace source="&amp;otilde;" target="õ" prefer="yes"/>
+      <replace source="&amp;#245;" target="õ"/>
+      <replace source="&amp;#xF5;" target="õ"/>
+      <replace source="&amp;#xf5;" target="õ"/>
+      <replace source="&amp;ouml;" target="ö" prefer="yes"/>
+      <replace source="&amp;#246;" target="ö"/>
+      <replace source="&amp;#xF6;" target="ö"/>
+      <replace source="&amp;#xf6;" target="ö"/>
+      <replace source="&amp;oslash;" target="ø" prefer="yes"/>
+      <replace source="&amp;#248;" target="ø"/>
+      <replace source="&amp;#xF8;" target="ø"/>
+      <replace source="&amp;#xf8;" target="ø"/>
+      <replace source="&amp;ugrave;" target="ù" prefer="yes"/>
+      <replace source="&amp;#249;" target="ù"/>
+      <replace source="&amp;#xF9;" target="ù"/>
+      <replace source="&amp;#xf9;" target="ù"/>
+      <replace source="&amp;uacute;" target="ú" prefer="yes"/>
+      <replace source="&amp;#250;" target="ú"/>
+      <replace source="&amp;#xFA;" target="ú"/>
+      <replace source="&amp;#xfa;" target="ú"/>
+      <replace source="&amp;ucirc;" target="û" prefer="yes"/>
+      <replace source="&amp;#251;" target="û"/>
+      <replace source="&amp;#xFB;" target="û"/>
+      <replace source="&amp;#xfb;" target="û"/>
+      <replace source="&amp;uuml;" target="ü" prefer="yes"/>
+      <replace source="&amp;#252;" target="ü"/>
+      <replace source="&amp;#xFC;" target="ü"/>
+      <replace source="&amp;#xfc;" target="ü"/>
+      <replace source="&amp;yacute;" target="ý" prefer="yes"/>
+      <replace source="&amp;#253;" target="ý"/>
+      <replace source="&amp;#xFD;" target="ý"/>
+      <replace source="&amp;#xfd;" target="ý"/>
+      <replace source="&amp;thorn;" target="þ" prefer="yes"/>
+      <replace source="&amp;#254;" target="þ"/>
+      <replace source="&amp;#xFE;" target="þ"/>
+      <replace source="&amp;#xfe;" target="þ"/>
+      <replace source="&amp;yuml;" target="ÿ" prefer="yes"/>
+      <replace source="&amp;#255;" target="ÿ"/>
+      <replace source="&amp;#xFF;" target="ÿ"/>
+      <replace source="&amp;#xff;" target="ÿ"/>
+      <replace source="&amp;middot;" target="·" prefer="yes"/>
+      <replace source="&amp;#183;" target="·"/>
+      <replace source="&amp;#xB7;" target="·"/>
+      <replace source="&amp;#xb7;" target="·"/>
+      <replace source="&amp;laquo;" target="«" prefer="yes"/>
+      <replace source="&amp;#171;" target="«"/>
+      <replace source="&amp;#xAB;" target="«"/>
+      <replace source="&amp;#xab;" target="«"/>
+      <replace source="&amp;raquo;" target="»" prefer="yes"/>
+      <replace source="&amp;#187;" target="»"/>
+      <replace source="&amp;#xBB;" target="»"/>
+      <replace source="&amp;#xbb;" target="»"/>
+
+      <replace source="&amp;rsquo;" target="'"/>
+
+
+      <!-- For Esperanto characters. Named entities like &circ; doesent work in browsers, so follow scheme of http://bertilow.com/html/esperantaj/html_uni.html -->
+      <replace source="&amp;#264;" target="Ĉ" prefer="yes"/>
+      <replace source="&amp;#265;" target="ĉ" prefer="yes"/>
+      <replace source="&amp;#284;" target="Ĝ" prefer="yes"/>
+      <replace source="&amp;#285;" target="ĝ" prefer="yes"/>
+      <replace source="&amp;#292;" target="Ĥ" prefer="yes"/>
+      <replace source="&amp;#293;" target="ĥ" prefer="yes"/>
+      <replace source="&amp;#308;" target="Ĵ" prefer="yes"/>
+      <replace source="&amp;#309;" target="ĵ" prefer="yes"/>
+      <replace source="&amp;#348;" target="Ŝ" prefer="yes"/>
+      <replace source="&amp;#349;" target="ŝ" prefer="yes"/>
+      <replace source="&amp;#364;" target="Ŭ" prefer="yes"/>
+      <replace source="&amp;#365;" target="ŭ" prefer="yes"/>
+
+      <!-- replace &#39; with ' in input (this will make i.a. English genitive work). -->
+      <replace source="&amp;#39;" target="'"/>
+      <replace source="'" target="'" prefer="yes"/>
+
+
+      
+<!--      <replace source="&amp;apos;" target="'"/>
+      <replace source="&amp;quot;" target="\&quot;"/> -->
+    </replacement-rule>  
+  </rules>
+
+</format>
Index: branches/apertium-tagger/apertium2/apertium/html-noent-format.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/html-noent-format.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/html-noent-format.xml	(revision 69632)
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<format name="html">
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="ISO-8859-1"/>
+    <output encoding="ISO-8859-1"/>
+    <tag-name regexp="[a-zA-Z]+"/>
+    <escape-chars regexp='[][^@\\/${}]'/>
+    <space-chars regexp='[ \n\t\r&lt;&gt;~]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp='"&lt;!--"'/>
+      <end regexp='"--&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;script"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/script"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;apertium-notrans"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/apertium-notrans&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;style"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/style"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="3">
+      <tag regexp='"&lt;br"(" "[^&gt;]*)?"&gt;"|"&lt;hr"(" "[^&gt;]*)?"&gt;"|"&lt;p"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="3"> 
+      <tag regexp='"&lt;li"(" "[^&gt;]*)?"&gt;"|"&lt;ul"(" "[^&gt;]*)?"&gt;"|"&lt;ol"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="3"> 
+      <tag regexp='"&lt;tr"(" "[^&gt;]*)?"&gt;"|"&lt;td"(" "[^&gt;]*)?"&gt;"|"&lt;th"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3">
+      <tag regexp='"&lt;/br"(" "[^&gt;]*)?"&gt;"|"&lt;/hr"(" "[^&gt;]*)?"&gt;"|"&lt;/p"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3"> 
+      <tag regexp='"&lt;/li"(" "[^&gt;]*)?"&gt;"|"&lt;/ul"(" "[^&gt;]*)?"&gt;"|"&lt;/ol"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3"> 
+      <tag regexp='"&lt;/tr"(" "[^&gt;]*)?"&gt;"|"&lt;/td"(" "[^&gt;]*)?"&gt;"|"&lt;/th"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+<!-- Faltan algunas etiquetas de final de frase. -->
+
+    <format-rule type="open" eos="yes" priority="3"> 
+      <tag regexp='"&lt;title"(" "[^&gt;]*)?"&gt;"|"&lt;div"(" "[^&gt;]*)?"&gt;"|"&lt;option"(" "[^&gt;]*)?"&gt;"|"&lt;h"[1-6](" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3"> 
+      <tag regexp='"&lt;/title"(" "[^&gt;]*)?"&gt;"|"&lt;/div"(" "[^&gt;]*)?"&gt;"|"&lt;/option"(" "[^&gt;]*)?"&gt;"|"&lt;/h"[1-6](" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+<!-- La etiqueta DOCTYPE no la reconoce -->
+
+    <format-rule type="empty" eos="no" priority="4">
+      <tag regexp='"&lt;"("img"|"link")(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="empty" eos="no" priority="5">
+      <tag regexp='("&lt;!"|"&lt;?")[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+
+<!-- End (Cambios hechos por Gorka L.)-->
+
+    <format-rule type="open" eos="no" priority="5">
+      <tag regexp='"&lt;"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    
+    <format-rule type="close" eos="no" priority="5">
+      <tag regexp='"&lt;/"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    
+  </rules>
+
+</format>
Index: branches/apertium-tagger/apertium2/apertium/mediawiki-format.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/mediawiki-format.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/mediawiki-format.xml	(revision 69632)
@@ -0,0 +1,161 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<format name="html">
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="UTF-8"/>
+    <output encoding="UTF-8"/>
+    <tag-name regexp="[a-zA-Z]+"/>
+    <escape-chars regexp='[][^@\\/$]'/>
+    <space-chars regexp='[ \n\t\r&lt;&gt;{}~]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp='"&lt;!--"'/>
+      <end regexp='"--&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp='"&amp;lt;!--"'/>
+      <end regexp='"--&amp;gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;script"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/script"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;apertium-notrans"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/apertium-notrans&gt;"'/>
+    </format-rule>
+
+    <!-- Mediawiki XML export -->
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;siteinfo&gt;"'/>
+      <end regexp='"&lt;/siteinfo&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;comment&gt;"'/>
+      <end regexp='"&lt;/comment&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;id&gt;"'/>
+      <end regexp='"&lt;/id&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;contributor&gt;"'/>
+      <end regexp='"&lt;/contributor&gt;"'/>
+    </format-rule>
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;timestamp&gt;"'/>
+      <end regexp='"&lt;/timestamp&gt;"'/>
+    </format-rule>
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"{{"'/>
+      <end regexp='"}}"'/>
+    </format-rule>
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&amp;lt;math&amp;gt;"'/>
+      <end regexp='"&amp;lt;/math&amp;gt;"'/>
+    </format-rule>
+
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;style"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/style"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="3">
+      <tag regexp='"&lt;br"(" "[^&gt;]*)?"&gt;"|"&lt;hr"(" "[^&gt;]*)?"&gt;"|"&lt;p"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="3"> 
+      <tag regexp='"&lt;li"(" "[^&gt;]*)?"&gt;"|"&lt;ul"(" "[^&gt;]*)?"&gt;"|"&lt;ol"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="3"> 
+      <tag regexp='"&lt;tr"(" "[^&gt;]*)?"&gt;"|"&lt;td"(" "[^&gt;]*)?"&gt;"|"&lt;th"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3">
+      <tag regexp='"&lt;/br"(" "[^&gt;]*)?"&gt;"|"&lt;/hr"(" "[^&gt;]*)?"&gt;"|"&lt;/p"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3"> 
+      <tag regexp='"&lt;/li"(" "[^&gt;]*)?"&gt;"|"&lt;/ul"(" "[^&gt;]*)?"&gt;"|"&lt;/ol"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3"> 
+      <tag regexp='"&lt;/tr"(" "[^&gt;]*)?"&gt;"|"&lt;/td"(" "[^&gt;]*)?"&gt;"|"&lt;/th"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="empty" eos="no" priority="1">
+      <tag regexp='"["'/>
+    </format-rule>
+
+    <format-rule type="empty" eos="no" priority="1">
+      <tag regexp='"]"'/>
+    </format-rule>
+
+    <format-rule type="empty" eos="no" priority="1">
+      <tag regexp='"&apos;"([&apos;]+)'/>
+    </format-rule>
+
+    <format-rule type="empty" eos="no" priority="1">
+      <tag regexp='^([\*=:;#]+)'/>
+    </format-rule>
+
+    <format-rule type="empty" eos="yes" priority="1">
+      <tag regexp='"="([=]+)'/>
+    </format-rule>
+
+
+    <format-rule type="empty" eos="no" priority="3">
+      <tag regexp='"[["([a-z][a-z]|[a-z][a-z][a-z]|"image"|"file")":"([^\]]*)"]]"'/>
+    </format-rule>
+
+<!-- Faltan algunas etiquetas de final de frase. -->
+
+    <format-rule type="open" eos="yes" priority="3"> 
+      <tag regexp='"&lt;title"(" "[^&gt;]*)?"&gt;"|"&lt;div"(" "[^&gt;]*)?"&gt;"|"&lt;option"(" "[^&gt;]*)?"&gt;"|"&lt;h"[1-6](" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3"> 
+      <tag regexp='"&lt;/title"(" "[^&gt;]*)?"&gt;"|"&lt;/div"(" "[^&gt;]*)?"&gt;"|"&lt;/option"(" "[^&gt;]*)?"&gt;"|"&lt;/h"[1-6](" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+<!-- La etiqueta DOCTYPE no la reconoce -->
+
+    <format-rule type="empty" eos="no" priority="4">
+      <tag regexp='"&lt;"("img"|"link")(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="empty" eos="no" priority="5">
+      <tag regexp='("&lt;!"|"&lt;?")[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+
+<!-- End (Cambios hechos por Gorka L.)-->
+
+    <format-rule type="open" eos="no" priority="5">
+      <tag regexp='"&lt;"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    
+    <format-rule type="close" eos="no" priority="5">
+      <tag regexp='"&lt;/"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    
+    <format-rule type="open" eos="no" priority="5">
+      <tag regexp='"&amp;lt;"[a-zA-Z][^&amp;]*"&amp;gt;"'/>
+    </format-rule>
+    
+    <format-rule type="close" eos="no" priority="5">
+      <tag regexp='"&amp;lt;/"[a-zA-Z][^&amp;]*"&amp;gt;"'/>
+    </format-rule>
+  </rules>
+
+</format>
Index: branches/apertium-tagger/apertium2/apertium/odt-format.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/odt-format.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/odt-format.xml	(revision 69632)
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<format name="html">
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="ISO-8859-1"/>
+    <output encoding="ISO-8859-1"/>
+    <tag-name regexp="[a-zA-Z]+"/>
+    <escape-chars regexp='[][^@\\/$]'/>
+    <space-chars regexp='[ \n\t\r&lt;&gt;{}~]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp='"&lt;!--"'/>
+      <end regexp='"--&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;math:"[a-zA-Z]+(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/math:"[a-zA-Z]+(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule> 
+    
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;math"(" xmlns="[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/math&gt;"'/>
+    </format-rule> 
+    
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&amp;lt;apertium-notrans"(" "[^&gt;]*)?"&amp;gt;"'/>
+      <end regexp='"&amp;lt;/apertium-notrans&amp;gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="3">
+      <tag regexp='"&lt;text:"[hp](" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3">
+      <tag regexp='"&lt;/text:"[hp](" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+
+    <format-rule type="empty" eos="no" priority="4">
+      <tag regexp='("&lt;!"|"&lt;?")[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+
+
+    <format-rule type="open" eos="no" priority="4">
+      <tag regexp='"&lt;"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    
+    <format-rule type="close" eos="no" priority="4">
+      <tag regexp='"&lt;/"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    <replacement-rule regexp='"&amp;"([a-zA-Z]+|"#x"[0-9a-fA-F]{1,4}|"#"[0-9]{1,8});'>
+      <replace source="&amp;apos;" target="'"/>
+    </replacement-rule>
+  </rules>
+
+</format>
Index: branches/apertium-tagger/apertium2/apertium/pptx-format.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/pptx-format.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/pptx-format.xml	(revision 69632)
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<format name="html">
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="ISO-8859-1"/>
+    <output encoding="ISO-8859-1"/>
+    <tag-name regexp="[a-zA-Z]+"/>
+    <escape-chars regexp='[][^@\\/$]'/>
+    <space-chars regexp='[ \n\t\r&lt;&gt;{}~]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp='"&lt;!--"'/>
+      <end regexp='"--&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="yes" priority="2">
+      <begin regexp='"&lt;pkg:binaryData"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/pkg:binaryData"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule> 
+    
+    <format-rule type="comment" eos="yes" priority="2">
+      <begin regexp='"&lt;p:timing"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/p:timing"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="3">
+      <tag regexp='"&lt;a:p"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3">
+      <tag regexp='"&lt;/a:p"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&amp;lt;apertium-notrans"(" "[^&gt;]*)?"&amp;gt;"'/>
+      <end regexp='"&amp;lt;/apertium-notrans&amp;gt;"'/>
+    </format-rule>
+
+
+    <format-rule type="empty" eos="no" priority="4">
+      <tag regexp='("&lt;!"|"&lt;?")[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+
+
+    <format-rule type="open" eos="no" priority="4">
+      <tag regexp='"&lt;"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    
+    <format-rule type="close" eos="no" priority="4">
+      <tag regexp='"&lt;/"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    <replacement-rule regexp='"&amp;"([a-zA-Z]+|"#x"[0-9a-fA-F]{1,4}|"#"[0-9]{1,8});'>
+      <replace source="&amp;apos;" target="'"/>
+    </replacement-rule>
+  </rules>
+
+</format>
Index: branches/apertium-tagger/apertium2/apertium/wxml-format.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/wxml-format.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/wxml-format.xml	(revision 69632)
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<format name="html">
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="ISO-8859-1"/>
+    <output encoding="ISO-8859-1"/>
+    <tag-name regexp="[a-zA-Z]+"/>
+    <escape-chars regexp='[][^@\\/${}]'/>
+    <space-chars regexp='[ \n\t\r&lt;&gt;~]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp='"&lt;!--"'/>
+      <end regexp='"--&gt;"'/>
+    </format-rule>
+    
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;Properties"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/Properties"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="yes" priority="3">
+      <begin regexp='"&lt;pkg:binaryData"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/pkg:binaryData"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule> 
+    
+    <format-rule type="comment" eos="yes" priority="3">
+      <begin regexp='"&lt;w:instrText"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/w:instrText"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>    
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&amp;lt;apertium-notrans"(" "[^&gt;]*)?"&amp;gt;"'/>
+      <end regexp='"&amp;lt;/apertium-notrans&amp;gt;"'/>
+    </format-rule>
+
+
+    <format-rule type="comment" eos="yes" priority="3">
+      <begin regexp='"&lt;w:drawing"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/w:drawing"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>    
+
+
+    <format-rule type="open" eos="yes" priority="4">
+      <tag regexp='"&lt;w:p"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="4">
+      <tag regexp='"&lt;/w:p"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+
+    <format-rule type="empty" eos="no" priority="5">
+      <tag regexp='("&lt;!"|"&lt;?")[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+
+
+    <format-rule type="open" eos="no" priority="5">
+      <tag regexp='"&lt;"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    
+    <format-rule type="close" eos="no" priority="5">
+      <tag regexp='"&lt;/"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    <replacement-rule regexp='"&amp;"([a-zA-Z]+|"#x"[0-9a-fA-F]{1,4}|"#"[0-9]{1,8});'>
+      <replace source="&amp;apos;" target="'"/>
+    </replacement-rule>
+  </rules>
+
+</format>
Index: branches/apertium-tagger/apertium2/apertium/xlsx-format.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/xlsx-format.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/xlsx-format.xml	(revision 69632)
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<format name="html">
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="ISO-8859-1"/>
+    <output encoding="ISO-8859-1"/>
+    <tag-name regexp="[a-zA-Z]+"/>
+    <escape-chars regexp='[][^@\\/${}]'/>
+    <space-chars regexp='[ \n\t\r&lt;&gt;~]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp='"&lt;!--"'/>
+      <end regexp='"--&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="yes" priority="2">
+      <begin regexp='"&lt;f"(" "[^&gt;]*)?"&gt;"'/>
+      <end regexp='"&lt;/f"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule> 
+
+
+    <format-rule type="open" eos="yes" priority="3">
+      <tag regexp='"&lt;si"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="3">
+      <tag regexp='"&lt;/si"(" "[^&gt;]*)?"&gt;"'/>
+    </format-rule>
+
+
+    <format-rule type="empty" eos="no" priority="4">
+      <tag regexp='("&lt;!"|"&lt;?")[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&amp;lt;apertium-notrans"(" "[^&gt;]*)?"&amp;gt;"'/>
+      <end regexp='"&amp;lt;/apertium-notrans&amp;gt;"'/>
+    </format-rule>
+
+
+    <format-rule type="open" eos="no" priority="4">
+      <tag regexp='"&lt;"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    
+    <format-rule type="close" eos="no" priority="4">
+      <tag regexp='"&lt;/"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    <replacement-rule regexp='"&amp;"([a-zA-Z]+|"#x"[0-9a-fA-F]{1,4}|"#"[0-9]{1,8});'>
+      <replace source="&amp;apos;" target="'"/>
+    </replacement-rule>
+  </rules>
+
+</format>
Index: branches/apertium-tagger/apertium2/apertium/latex-format.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/latex-format.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/latex-format.xml	(revision 69632)
@@ -0,0 +1,268 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<format name="latex">
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="ISO-8859-1"/>
+    <output encoding="ISO-8859-1"/>
+    <tag-name regexp="[a-zA-Z]+"/>
+    <escape-chars regexp='[][^@\\/${}]'/>
+    <space-chars regexp='[ \n\t\r&lt;&gt;~]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp='"&lt;!--"'/>
+      <end regexp='"--&gt;"'/>
+    </format-rule>
+    
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp='"&lt;COMMENT&gt;"'/>
+      <end regexp='"&lt;/COMMENT&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp='"&lt;VERB&gt;"'/>
+      <end regexp='"&lt;/VERB&gt;"'/>
+    </format-rule>
+
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;cite"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>      
+      <end regexp='"&lt;/CONTENTS&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="no" priority="2">
+      <begin regexp='"&lt;ref"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>      
+      <end regexp='"&lt;/CONTENTS&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;part"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+    
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;part"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;PARAM&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;chapter"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;chapter"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;PARAM&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;section"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;section"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;PARAM&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;title"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;title"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;PARAM&gt;"'/>  
+    </format-rule>
+    
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;mline"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;thanks"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;subsection"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;paragraph"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>   
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;subsection"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;PARAM&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;subsubsection"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;subsubsection"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;PARAM&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;subsubsubsection"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;subsubsubsection"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;PARAM&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;subsubsubsubsection"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+    
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;subsubsubsubsection"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;PARAM&gt;"'/>  
+    </format-rule>
+    
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;subsubsubsubsubsection"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;subsubsubsubsubsection"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;PARAM&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;frametitle"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+    
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;frametitle"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;PARAM&gt;"'/>  
+    </format-rule>
+    
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;block&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;caption"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+    
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;footnote"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+    
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;framebox"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+     
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;parbox"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"[^\&lt;]+"&lt;/CONTENTS&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="2">
+      <tag regexp='"&lt;item"(_STAR)?"/&gt;&lt;PARAM&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="no" priority="2">
+      <tag regexp='"&lt;textit"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="close" eos="no" priority="2">
+      <tag regexp='"&lt;/CONTENTS-noeos&gt;"'/>  
+    </format-rule>
+    
+    <format-rule type="open" eos="no" priority="2">
+      <tag regexp='"&lt;emph"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="no" priority="2">
+      <tag regexp='"&lt;textbf"(_STAR)?"/&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="4">
+      <tag regexp='"&lt;CONTENTS&gt;"[ \t\n\r]*"&lt;sf/&gt;"'/>      
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="4">
+      <tag regexp='"&lt;CONTENTS&gt;"[ \t\n\r]*"&lt;bf/&gt;"'/>      
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="4">
+      <tag regexp='"&lt;CONTENTS&gt;"[ \t\n\r]*"&lt;em/&gt;"'/>      
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="4">
+      <tag regexp='"&lt;CONTENTS&gt;"[ \t\n\r]*"&lt;sc/&gt;"'/>      
+    </format-rule>
+
+    <format-rule type="comment" eos="yes" priority="4">
+      <begin regexp='"&lt;CONTENTS&gt;"'/>      
+      <end regexp='"&lt;/CONTENTS&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="4">
+      <tag regexp='"&lt;/CONTENTS&gt;"'/>
+    </format-rule>
+        
+    <format-rule type="comment" eos="yes" priority="4">
+      <begin regexp='"&lt;PARAM&gt;"'/>
+      <end regexp='"&lt;/PARAM&gt;"'/>
+    </format-rule>
+
+    <format-rule type="close" eos="yes" priority="4">
+      <tag regexp='"&lt;/PARAM&gt;"'/>
+    </format-rule>
+
+
+    <format-rule type="comment" eos="no" priority="4">
+      <begin regexp='"&lt;MATH_DOLLAR&gt;"'/>
+      <end regexp='"&lt;/MATH_DOLLAR&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="yes" priority="4">
+      <begin regexp='"&lt;MATH_DOLLARS&gt;"'/>
+      <end regexp='"&lt;/MATH_DOLLARS&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="yes" priority="4">
+      <begin regexp='"&lt;equation"(_STAR)?"&gt;"'/>
+      <end regexp='"&lt;/equation"(_STAR)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="yes" priority="4">
+      <begin regexp='"&lt;thebibliography"(_STAR)?"&gt;"'/>
+      <end regexp='"&lt;/thebibliography"(_STAR)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="comment" eos="yes" priority="4">
+      <begin regexp='"&lt;eqnarray"(_STAR)?"&gt;"'/>
+      <end regexp='"&lt;/eqnarray"(_STAR)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="4">
+      <tag regexp='"&lt;itemize"(_STAR)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="open" eos="yes" priority="4">
+      <tag regexp='"&lt;/itemize"(_STAR)?"&gt;"'/>
+    </format-rule>
+
+    <format-rule type="empty" eos="yes" priority="4">
+      <tag regexp='"&lt;item/&gt;"'/>
+    </format-rule>
+    
+    
+
+    <format-rule type="open" eos="no" priority="5">
+      <tag regexp='"&lt;"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    
+    <format-rule type="close" eos="no" priority="5">
+      <tag regexp='"&lt;/"[a-zA-Z][^&gt;]*"&gt;"'/>
+    </format-rule>
+    
+<!--    <format-rule type="empty" eos="no" priority="5">
+      <tag regexp='"&amp;"([a-zA-Z]+|"#x"[0-9a-fA-F]{1,4}|"#"[0-9]{1,8})";"'/>
+    </format-rule> -->
+
+    <format-rule type="empty" eos="yes" priority="6">
+      <tag regexp='"&lt;/PARAM&gt;"[ \t\n\r]*"&lt;CONTENTS&gt;"'/>  
+    </format-rule>
+
+
+    <replacement-rule regexp='"&amp;"([a-zA-Z]+|"#x"[0-9a-fA-F]{1,4}|"#"[0-9]{1,8});'>
+      <replace source="&amp;apos;" target="'"/>
+    </replacement-rule>
+  </rules>
+
+</format>
Index: branches/apertium-tagger/apertium2/apertium/rtf-format.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/rtf-format.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/rtf-format.xml	(revision 69632)
@@ -0,0 +1,473 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<format name="rtf"> <!-- version 1.0 -->
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="ISO-8859-1"/>
+    <output encoding="ISO-8859-1"/>
+    <tag-name regexp="''"/>
+    <escape-chars regexp='(\\|[][&lt;&gt;@^$/{}])'/> 
+<!--    <escape-chars regexp='[]]^@&lt;&gt;/]'/>  -->
+
+    <space-chars regexp='[ \n\t\r$*~]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+    
+    <!-- Exceptions with priority 1  -->
+    <!-- Style Sheet names are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\snext&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\keycode&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <!-- Font names are also format: -->
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fcharset&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule>
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fnil&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\froman&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fswiss&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fmodern&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fscript&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fdecor&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\ftech&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fbidi&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\falt&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fontfile&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fn&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\sbasedon&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\additive&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <!-- File names are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\file&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- List Table names are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\listname&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\leveltext&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\levelnumbers&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <!-- Pictures are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\pict&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\sn&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\sv&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;{\\\*\\blipuid  &quot;[^ \n\r]+&quot;}&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Objects are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\object&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Document Variables are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\docvar&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Bookmarks are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\bkmkstart&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\bkmkend&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Index Entries are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\rxe&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Fields are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fldinst&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fldrslt&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Other format tags which contain #CDATA: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\pntxt&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\colortbl&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Hexadecimal Data -->
+    <format-rule type="empty" eos="yes" priority="1">
+      <tag regexp="[a-f0-9]{20,}" />
+    </format-rule>
+    
+    
+    
+    
+
+    <!-- End of sentence tags: par, row, cell -->
+    <format-rule type="empty" eos="yes" priority="2">
+      <tag regexp="&quot;\\par&quot;|&quot;\\row&quot;|&quot;\\cell&quot;" />
+    </format-rule>
+    
+
+    <format-rule type="open" eos="no" priority="3">
+      <tag regexp="&quot;{&quot;[ \n\r]*\\[^'][^ \n\r\\]*[ \n\r]*"/>
+    </format-rule>
+
+    <!-- General Tags Specification -->
+    <format-rule type="empty" eos="no" priority="3">
+      <tag regexp="\\[^r'][^ \n\r\\]*[ \n\r]*"/>
+    </format-rule>
+    <format-rule type="empty" eos="no" priority="3">
+      <tag regexp="\\r[^q][^ \n\r\\]*[ \n\r]*"/>
+    </format-rule>
+    <format-rule type="empty" eos="no" priority="3">
+      <tag regexp="\\rq[^u][^ \n\r\\]*[ \n\r]*"/>      
+    </format-rule>
+    <format-rule type="empty" eos="no" priority="3">
+      <tag regexp="\\rqu[^o][^ \n\r\\]*[ \n\r]*"/>      
+    </format-rule>
+    <format-rule type="empty" eos="no" priority="3">
+      <tag regexp="\\rquo[^t][^ \n\r\\]*[ \n\r]*"/>      
+    </format-rule>
+    <format-rule type="empty" eos="no" priority="3">
+      <tag regexp="\\rquot[^e][^ \n\r\\]*[ \n\r]*"/>      
+    </format-rule>
+    <format-rule type="empty" eos="no" priority="3">
+      <tag regexp="\\rquote[^ \n\r\\]+[ \n\r]*"/>      
+    </format-rule>
+
+    <format-rule type="close" eos="no" priority="4">
+      <tag regexp="&quot;}&quot;"/>
+    </format-rule>
+    <format-rule type="open" eos="no" priority="4">
+      <tag regexp="&quot;{&quot;"/>
+    </format-rule>
+
+    <replacement-rule regexp="&quot;\\rquote &quot;">
+      <replace source="\\rquote " target="'"/>
+      <replace source="'" target="'" preferred="yes"/>
+    </replacement-rule>
+    
+    <replacement-rule regexp="&quot;\\'&quot;[0-9a-fA-F][0-9a-fA-F](\r|\n|&quot;\r\n&quot;)?"> 
+      <replace source="\\'c0" target="�" prefer="yes"/>
+      <replace source="\\'c1" target="�" prefer="yes"/>
+      <replace source="\\'c2" target="�" prefer="yes"/>
+      <replace source="\\'c3" target="�" prefer="yes"/>
+      <replace source="\\'c4" target="�" prefer="yes"/>
+      <replace source="\\'c5" target="�" prefer="yes"/>
+      <replace source="\\'c6" target="�" prefer="yes"/>
+      <replace source="\\'c7" target="�" prefer="yes"/>
+      <replace source="\\'c8" target="�" prefer="yes"/>
+      <replace source="\\'c9" target="�" prefer="yes"/>
+      <replace source="\\'ca" target="�" prefer="yes"/>
+      <replace source="\\'cb" target="�" prefer="yes"/>
+      <replace source="\\'cc" target="�" prefer="yes"/>
+      <replace source="\\'cd" target="�" prefer="yes"/>
+      <replace source="\\'ce" target="�" prefer="yes"/>
+      <replace source="\\'cf" target="�" prefer="yes"/>
+      <replace source="\\'d0" target="�" prefer="yes"/>
+      <replace source="\\'d1" target="�" prefer="yes"/>
+      <replace source="\\'d2" target="�" prefer="yes"/>
+      <replace source="\\'d3" target="�" prefer="yes"/>
+      <replace source="\\'d4" target="�" prefer="yes"/>
+      <replace source="\\'d5" target="�" prefer="yes"/>
+      <replace source="\\'d6" target="�" prefer="yes"/>
+      <replace source="\\'d8" target="�" prefer="yes"/>
+      <replace source="\\'d9" target="�" prefer="yes"/>
+      <replace source="\\'da" target="�" prefer="yes"/>
+      <replace source="\\'db" target="�" prefer="yes"/>
+      <replace source="\\'dc" target="�" prefer="yes"/>
+      <replace source="\\'dd" target="�" prefer="yes"/>
+      <replace source="\\'de" target="�" prefer="yes"/>
+      <replace source="\\'df" target="�" prefer="yes"/>
+      <replace source="\\'e0" target="�" prefer="yes"/>
+      <replace source="\\'e1" target="�" prefer="yes"/>
+      <replace source="\\'e2" target="�" prefer="yes"/>
+      <replace source="\\'e3" target="�" prefer="yes"/>
+      <replace source="\\'e4" target="�" prefer="yes"/>
+      <replace source="\\'e5" target="�" prefer="yes"/>
+      <replace source="\\'e6" target="�" prefer="yes"/>
+      <replace source="\\'e7" target="�" prefer="yes"/>
+      <replace source="\\'e8" target="�" prefer="yes"/>
+      <replace source="\\'e9" target="�" prefer="yes"/>
+      <replace source="\\'ea" target="�" prefer="yes"/>
+      <replace source="\\'eb" target="�" prefer="yes"/>
+      <replace source="\\'ec" target="�" prefer="yes"/>
+      <replace source="\\'ed" target="�" prefer="yes"/>
+      <replace source="\\'ee" target="�" prefer="yes"/>
+      <replace source="\\'ef" target="�" prefer="yes"/>
+      <replace source="\\'f0" target="�" prefer="yes"/>
+      <replace source="\\'f1" target="�" prefer="yes"/>
+      <replace source="\\'f2" target="�" prefer="yes"/>
+      <replace source="\\'f3" target="�" prefer="yes"/>
+      <replace source="\\'f4" target="�" prefer="yes"/>
+      <replace source="\\'f5" target="�" prefer="yes"/>
+      <replace source="\\'f6" target="�" prefer="yes"/>
+      <replace source="\\'f8" target="�" prefer="yes"/>
+      <replace source="\\'f9" target="�" prefer="yes"/>
+      <replace source="\\'fa" target="�" prefer="yes"/>
+      <replace source="\\'fb" target="�" prefer="yes"/>
+      <replace source="\\'fc" target="�" prefer="yes"/>
+      <replace source="\\'fd" target="�" prefer="yes"/>
+      <replace source="\\'fe" target="�" prefer="yes"/>
+      <replace source="\\'ff" target="�" prefer="yes"/>
+      <!-- The same characters followed by a \r\n: -->
+      <replace source="\\'c0\r\n" target="�"/>
+      <replace source="\\'c1\r\n" target="�"/>
+      <replace source="\\'c2\r\n" target="�"/>
+      <replace source="\\'c3\r\n" target="�"/>
+      <replace source="\\'c4\r\n" target="�"/>
+      <replace source="\\'c5\r\n" target="�"/>
+      <replace source="\\'c6\r\n" target="�"/>
+      <replace source="\\'c7\r\n" target="�"/>
+      <replace source="\\'c8\r\n" target="�"/>
+      <replace source="\\'c9\r\n" target="�"/>
+      <replace source="\\'ca\r\n" target="�"/>
+      <replace source="\\'cb\r\n" target="�"/>
+      <replace source="\\'cc\r\n" target="�"/>
+      <replace source="\\'cd\r\n" target="�"/>
+      <replace source="\\'ce\r\n" target="�"/>
+      <replace source="\\'cf\r\n" target="�"/>
+      <replace source="\\'d0\r\n" target="�"/>
+      <replace source="\\'d1\r\n" target="�"/>
+      <replace source="\\'d2\r\n" target="�"/>
+      <replace source="\\'d3\r\n" target="�"/>
+      <replace source="\\'d4\r\n" target="�"/>
+      <replace source="\\'d5\r\n" target="�"/>
+      <replace source="\\'d6\r\n" target="�"/>
+      <replace source="\\'d8\r\n" target="�"/>
+      <replace source="\\'d9\r\n" target="�"/>
+      <replace source="\\'da\r\n" target="�"/>
+      <replace source="\\'db\r\n" target="�"/>
+      <replace source="\\'dc\r\n" target="�"/>
+      <replace source="\\'dd\r\n" target="�"/>
+      <replace source="\\'de\r\n" target="�"/>
+      <replace source="\\'df\r\n" target="�"/>
+      <replace source="\\'e0\r\n" target="�"/>
+      <replace source="\\'e1\r\n" target="�"/>
+      <replace source="\\'e2\r\n" target="�"/>
+      <replace source="\\'e3\r\n" target="�"/>
+      <replace source="\\'e4\r\n" target="�"/>
+      <replace source="\\'e5\r\n" target="�"/>
+      <replace source="\\'e6\r\n" target="�"/>
+      <replace source="\\'e7\r\n" target="�"/>
+      <replace source="\\'e8\r\n" target="�"/>
+      <replace source="\\'e9\r\n" target="�"/>
+      <replace source="\\'ea\r\n" target="�"/>
+      <replace source="\\'eb\r\n" target="�"/>
+      <replace source="\\'ec\r\n" target="�"/>
+      <replace source="\\'ed\r\n" target="�"/>
+      <replace source="\\'ee\r\n" target="�"/>
+      <replace source="\\'ef\r\n" target="�"/>
+      <replace source="\\'f0\r\n" target="�"/>
+      <replace source="\\'f1\r\n" target="�"/>
+      <replace source="\\'f2\r\n" target="�"/>
+      <replace source="\\'f3\r\n" target="�"/>
+      <replace source="\\'f4\r\n" target="�"/>
+      <replace source="\\'f5\r\n" target="�"/>
+      <replace source="\\'f6\r\n" target="�"/>
+      <replace source="\\'f8\r\n" target="�"/>
+      <replace source="\\'f9\r\n" target="�"/>
+      <replace source="\\'fa\r\n" target="�"/>
+      <replace source="\\'fb\r\n" target="�"/>
+      <replace source="\\'fc\r\n" target="�"/>
+      <replace source="\\'fd\r\n" target="�"/>
+      <replace source="\\'fe\r\n" target="�"/>
+      <replace source="\\'ff\r\n" target="�"/>
+      <!-- The same characters followed by a \n: -->
+      <replace source="\\'c0\n" target="�"/>
+      <replace source="\\'c1\n" target="�"/>
+      <replace source="\\'c2\n" target="�"/>
+      <replace source="\\'c3\n" target="�"/>
+      <replace source="\\'c4\n" target="�"/>
+      <replace source="\\'c5\n" target="�"/>
+      <replace source="\\'c6\n" target="�"/>
+      <replace source="\\'c7\n" target="�"/>
+      <replace source="\\'c8\n" target="�"/>
+      <replace source="\\'c9\n" target="�"/>
+      <replace source="\\'ca\n" target="�"/>
+      <replace source="\\'cb\n" target="�"/>
+      <replace source="\\'cc\n" target="�"/>
+      <replace source="\\'cd\n" target="�"/>
+      <replace source="\\'ce\n" target="�"/>
+      <replace source="\\'cf\n" target="�"/>
+      <replace source="\\'d0\n" target="�"/>
+      <replace source="\\'d1\n" target="�"/>
+      <replace source="\\'d2\n" target="�"/>
+      <replace source="\\'d3\n" target="�"/>
+      <replace source="\\'d4\n" target="�"/>
+      <replace source="\\'d5\n" target="�"/>
+      <replace source="\\'d6\n" target="�"/>
+      <replace source="\\'d8\n" target="�"/>
+      <replace source="\\'d9\n" target="�"/>
+      <replace source="\\'da\n" target="�"/>
+      <replace source="\\'db\n" target="�"/>
+      <replace source="\\'dc\n" target="�"/>
+      <replace source="\\'dd\n" target="�"/>
+      <replace source="\\'de\n" target="�"/>
+      <replace source="\\'df\n" target="�"/>
+      <replace source="\\'e0\n" target="�"/>
+      <replace source="\\'e1\n" target="�"/>
+      <replace source="\\'e2\n" target="�"/>
+      <replace source="\\'e3\n" target="�"/>
+      <replace source="\\'e4\n" target="�"/>
+      <replace source="\\'e5\n" target="�"/>
+      <replace source="\\'e6\n" target="�"/>
+      <replace source="\\'e7\n" target="�"/>
+      <replace source="\\'e8\n" target="�"/>
+      <replace source="\\'e9\n" target="�"/>
+      <replace source="\\'ea\n" target="�"/>
+      <replace source="\\'eb\n" target="�"/>
+      <replace source="\\'ec\n" target="�"/>
+      <replace source="\\'ed\n" target="�"/>
+      <replace source="\\'ee\n" target="�"/>
+      <replace source="\\'ef\n" target="�"/>
+      <replace source="\\'f0\n" target="�"/>
+      <replace source="\\'f1\n" target="�"/>
+      <replace source="\\'f2\n" target="�"/>
+      <replace source="\\'f3\n" target="�"/>
+      <replace source="\\'f4\n" target="�"/>
+      <replace source="\\'f5\n" target="�"/>
+      <replace source="\\'f6\n" target="�"/>
+      <replace source="\\'f8\n" target="�"/>
+      <replace source="\\'f9\n" target="�"/>
+      <replace source="\\'fa\n" target="�"/>
+      <replace source="\\'fb\n" target="�"/>
+      <replace source="\\'fc\n" target="�"/>
+      <replace source="\\'fd\n" target="�"/>
+      <replace source="\\'fe\n" target="�"/>
+      <replace source="\\'ff\n" target="�"/>
+      <!-- The same characters followed by a \r: -->
+      <replace source="\\'c0\r" target="�"/>
+      <replace source="\\'c1\r" target="�"/>
+      <replace source="\\'c2\r" target="�"/>
+      <replace source="\\'c3\r" target="�"/>
+      <replace source="\\'c4\r" target="�"/>
+      <replace source="\\'c5\r" target="�"/>
+      <replace source="\\'c6\r" target="�"/>
+      <replace source="\\'c7\r" target="�"/>
+      <replace source="\\'c8\r" target="�"/>
+      <replace source="\\'c9\r" target="�"/>
+      <replace source="\\'ca\r" target="�"/>
+      <replace source="\\'cb\r" target="�"/>
+      <replace source="\\'cc\r" target="�"/>
+      <replace source="\\'cd\r" target="�"/>
+      <replace source="\\'ce\r" target="�"/>
+      <replace source="\\'cf\r" target="�"/>
+      <replace source="\\'d0\r" target="�"/>
+      <replace source="\\'d1\r" target="�"/>
+      <replace source="\\'d2\r" target="�"/>
+      <replace source="\\'d3\r" target="�"/>
+      <replace source="\\'d4\r" target="�"/>
+      <replace source="\\'d5\r" target="�"/>
+      <replace source="\\'d6\r" target="�"/>
+      <replace source="\\'d8\r" target="�"/>
+      <replace source="\\'d9\r" target="�"/>
+      <replace source="\\'da\r" target="�"/>
+      <replace source="\\'db\r" target="�"/>
+      <replace source="\\'dc\r" target="�"/>
+      <replace source="\\'dd\r" target="�"/>
+      <replace source="\\'de\r" target="�"/>
+      <replace source="\\'df\r" target="�"/>
+      <replace source="\\'e0\r" target="�"/>
+      <replace source="\\'e1\r" target="�"/>
+      <replace source="\\'e2\r" target="�"/>
+      <replace source="\\'e3\r" target="�"/>
+      <replace source="\\'e4\r" target="�"/>
+      <replace source="\\'e5\r" target="�"/>
+      <replace source="\\'e6\r" target="�"/>
+      <replace source="\\'e7\r" target="�"/>
+      <replace source="\\'e8\r" target="�"/>
+      <replace source="\\'e9\r" target="�"/>
+      <replace source="\\'ea\r" target="�"/>
+      <replace source="\\'eb\r" target="�"/>
+      <replace source="\\'ec\r" target="�"/>
+      <replace source="\\'ed\r" target="�"/>
+      <replace source="\\'ee\r" target="�"/>
+      <replace source="\\'ef\r" target="�"/>
+      <replace source="\\'f0\r" target="�"/>
+      <replace source="\\'f1\r" target="�"/>
+      <replace source="\\'f2\r" target="�"/>
+      <replace source="\\'f3\r" target="�"/>
+      <replace source="\\'f4\r" target="�"/>
+      <replace source="\\'f5\r" target="�"/>
+      <replace source="\\'f6\r" target="�"/>
+      <replace source="\\'f8\r" target="�"/>
+      <replace source="\\'f9\r" target="�"/>
+      <replace source="\\'fa\r" target="�"/>
+      <replace source="\\'fb\r" target="�"/>
+      <replace source="\\'fc\r" target="�"/>
+      <replace source="\\'fd\r" target="�"/>
+      <replace source="\\'fe\r" target="�"/>
+      <replace source="\\'ff\r" target="�"/>
+      <!-- With UTF-8 -->
+      <replace source="\\u237" target="�"/>
+      <replace source="\\u243" target="�"/>
+    </replacement-rule>  
+  </rules>
+
+</format>
Index: branches/apertium-tagger/apertium2/apertium/txt-format.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/txt-format.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/txt-format.xml	(revision 69632)
@@ -0,0 +1,18 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<format name="txt">
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="ISO-8859-1"/>
+    <output encoding="ISO-8859-1"/>
+    <tag-name regexp="''"/>
+    <escape-chars regexp='[][\\/@&lt;&gt;^${}]'/>
+    <space-chars regexp='[ \n\t\r~]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+    <format-rule type="empty" eos="yes" priority="1">
+      <tag regexp='(("\n\n")|("\r\n\r\n"))+'/>
+    </format-rule>
+  </rules>
+</format>
Index: branches/apertium-tagger/apertium2/apertium/xpresstag-format.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/xpresstag-format.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/xpresstag-format.xml	(revision 69632)
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<format name="xpresstag">
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="ISO-8859-1"/>
+    <output encoding="ISO-8859-1"/>
+    <tag-name regexp="[a-zA-Z]+"/>
+    <escape-chars regexp='[][^@\\/${}]'/>
+    <space-chars regexp='[ \n\t\r*&lt;&gt;~]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+
+    <format-rule type="empty" eos="yes" priority="1">
+      <tag regexp="&quot;&lt;&quot;[^-&gt;]+&quot;-&gt;&quot;"/>
+    </format-rule>
+    <format-rule type="empty" eos="no" priority="2">
+      <tag regexp="&quot;&lt;&quot;[^&gt;]+&quot;&gt;&quot;"/>
+    </format-rule>
+    
+    <format-rule type="empty" eos="yes" priority="3">
+      <tag regexp="&quot;@&quot;[^&lt;:]+&quot;:&quot;"/>
+    </format-rule>
+    <format-rule type="empty" eos="yes" priority="3">
+      <tag regexp="&quot;@&quot;[^&lt;=]+&quot;=&quot;"/>
+    </format-rule>
+
+    <format-rule type="empty" eos="yes" priority="3">
+      <tag
+regexp="&quot;@&quot;[^&lt;=]+&quot;=[&quot;[^]]+&quot;]&quot;"/>
+    </format-rule>
+  </rules>
+</format>
\ No newline at end of file
Index: branches/apertium-tagger/apertium2/apertium/apertium-filter-ambiguity.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-filter-ambiguity.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-filter-ambiguity.1	(revision 69632)
@@ -0,0 +1,26 @@
+.TH apertium-filter-ambiguity 1 2006-03-21 "" ""
+.SH NAME
+apertium-filter-ambiguity \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-filter-ambiguity
+[input_file [output_file]]
+.PP
+.SH DESCRIPTION
+.BR apertium-filter-ambiguity 
+takes input from STDIN or input_file, gets tagger data, filters ambiguity 
+classes and outputs on STDOUT or output_file, in each case.
+.PP
+.SH SEE ALSO
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-gen-deformat.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-gen-deformat.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-gen-deformat.1	(revision 69632)
@@ -0,0 +1,49 @@
+.TH apertium-gen-deformat 1 2006-03-21 "" ""
+.SH NAME
+apertium-gen-deformat \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-gen-deformat
+[
+.B \-a \fR| 
+.B \-A \fR|
+.B \-m \fR|
+.B \-M \fR
+] <input file> <output file>
+.PP
+.SH DESCRIPTION
+.BR apertium-gen-deformat 
+is a script which generates a C++ deformatter for a particular format. The
+deformatter reads in a format specification file in XML and outputs a C++ deformatter 
+using flex. 
+.SH OPTIONS
+.TP
+.B \-a
+Runs in apertium standard mode.
+.TP
+.B \-A, 
+Runs in apertium optimised mode (default)
+.TP
+.B \-m 
+Runs in matxin standard mode  (matxin is another open-source machine translation system: \fBhttp://www.sourceforge.org/matxin\fR)
+.TP
+.B \-M
+Runs in matxin optimised mode
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-gen-reformat\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-gen-reformat.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-gen-reformat.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-gen-reformat.1	(revision 69632)
@@ -0,0 +1,34 @@
+.TH apertium-gen-reformat 1 2006-03-21 "" ""
+.SH NAME
+apertium-gen-reformat \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+architecture: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-gen-reformat
+[
+.B \-O \fR
+] <input file> <output file>
+.PP
+.SH DESCRIPTION
+.BR apertium-gen-reformat 
+is a script which generates a C++ reformatter for a particular format. The
+reformatter reads in a format file in XML and outputs a C++ reformatter 
+using flex.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-gen-deformat\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-multiple-translations.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-multiple-translations.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-multiple-translations.1	(revision 69632)
@@ -0,0 +1,43 @@
+.TH apertium-multiple-translations 1 2006-03-08 "" ""
+.SH NAME
+apertium-multiple-translations \- This application is part of (
+.B apertium
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-multiple-translations
+preproc biltrans [input [output]]
+.SH DESCRIPTION
+.BR apertium-multiple-translations 
+is the program that outputs multiple translations of certain words in a text according to the
+different possible translations of the words in the bilingual dictionary (in a dictionary
+that supports it).  The place to put this program in the modes.xml file is 
+just after apertium-pretransfer.
+.PP
+.RE
+.SH FILES
+These are the four files that can be used with this command:
+.B preproc    
+Result of preprocess trules file
+.PP
+.B biltrans   
+Bilingual letter transducer file
+.PP
+.B infile
+Input file (stdin by default).
+.PP
+.B outfile
+Output file (stdout by default).
+.PP
+.SH SEE ALSO
+.I apertium-transfer\fR(1),
+.I apertium \fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005--2008 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-postlatex-raw.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-postlatex-raw.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-postlatex-raw.1	(revision 69632)
@@ -0,0 +1,37 @@
+.TH apertium-postlatex-raw 1 2012-02-29 "" ""
+.SH NAME
+apertium-postlatex \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-postlatex-raw
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-postlatex-raw
+This filter generates LaTeX code from the output of apertium-relatex
+command. Non-ASCII characters are generated into its native encoding,
+depending on the locale of the running environment.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-deslatex\fR(1),
+.I apertium-prelatex\fR(1),
+.I apertium-relatex\fR(1),
+.I apertium-postlatex-raw\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Complicated constructions in LaTeX (i.e. custom defined tags) are not (yet)
+supported.
+.PP
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-postlatex.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-postlatex.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-postlatex.1	(revision 69632)
@@ -0,0 +1,37 @@
+.TH apertium-postlatex 1 2012-02-29 "" ""
+.SH NAME
+apertium-postlatex \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-postlatex
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-postlatex
+This filter generates LaTeX code from the output of apertium-relatex
+command. Non-ASCII characters are transformed to ASCII-compatible LaTeX construction
+rather than natively-encoded characters.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-deslatex\fR(1),
+.I apertium-prelatex\fR(1),
+.I apertium-relatex\fR(1),
+.I apertium-postlatex\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Complicated constructions in LaTeX (i.e. custom defined tags) are not (yet)
+supported.
+.PP
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-prelatex.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-prelatex.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-prelatex.1	(revision 69632)
@@ -0,0 +1,37 @@
+.TH apertium-prelatex 1 2012-02-29 "" ""
+.SH NAME
+apertium-prelatex \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-prelatex
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-prelatex
+This filter preprocess LaTeX as input transforming it into a deformatted 'XMLish'
+LaTeX custom format. The output suitable for preprocess with
+apertium-deslatex deformatter.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-deslatex\fR(1),
+.I apertium-postlatex\fR(1),
+.I apertium-relatex\fR(1),
+.I apertium-postlatex-raw\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Complicated constructions in LaTeX (i.e. custom defined tags) are not (yet)
+supported.
+.PP
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-preprocess-transfer.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-preprocess-transfer.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-preprocess-transfer.1	(revision 69632)
@@ -0,0 +1,37 @@
+.TH apertium-preprocess-transfer 1 2006-03-08 "" ""
+.SH NAME
+apertium-preprocess-transfer \- This application is part of (
+.B apertium
+)
+.PP
+This tool is part of the open-source apertium machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-preprocess-transfer
+rules_file transfer_file
+.SH DESCRIPTION
+.BR apertium-preprocess-transfer 
+is a structural transfer preprocessor which reads in a structural transfer 
+rule file and generates a file with precompiled patterns and indexes to the 
+actions of the rules of the structural transfer module specification.
+.PP
+.RE
+.SH FILES
+These are the two files that can be used with this command:
+.PP
+.B rules_file
+File with structural transfer rules
+.PP
+.B transfer_file
+File with precompiled patterns
+.PP
+.SH SEE ALSO
+.I apertium\fR(1),
+.I apertium-transfer\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-rehtml.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-rehtml.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-rehtml.1	(revision 69632)
@@ -0,0 +1,34 @@
+.TH apertium-rehtml 1 2006-03-21 "" ""
+.SH NAME
+apertium-rehtml \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-rehtml
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-rehtml 
+is an HTML format processor. It restores the original  HTML formatting
+the text had before being passed through the apertium-deshtml deformatter.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-retxt\fR(1),
+.I apertium-rertf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-relatex.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-relatex.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-relatex.1	(revision 69632)
@@ -0,0 +1,37 @@
+.TH apertium-relatex 1 2012-02-29 "" ""
+.SH NAME
+apertium-relatex \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-relatex
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-relatex
+This filter preprocess apertium generator output and removes superblanks
+marks to do subsequent processing in apertium pipeline.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-prelatex\fR(1),
+.I apertium-postlatex\fR(1),
+.I apertium-desatex\fR(1),
+.I apertium-postlatex-raw\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Complicated constructions in LaTeX (i.e. custom defined tags) are not (yet)
+supported.
+.PP
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-remediawiki.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-remediawiki.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-remediawiki.1	(revision 69632)
@@ -0,0 +1,35 @@
+.TH apertium-retxt 1 2006-03-21 "" ""
+.SH NAME
+apertium-remediawiki \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-retxt
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-retxt 
+is a mediawiki format processor. It restores the original formatting
+the text had (newlines, tabs, etc.) before being passed through the apertium-desmediawiki deformatter.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-retxt\fR(1),
+.I apertium-rehtml\fR(1),
+.I apertium-rertf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-reodt.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-reodt.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-reodt.1	(revision 69632)
@@ -0,0 +1,34 @@
+.TH apertium-reodt 1 2006-03-21 "" ""
+.SH NAME
+apertium-reodt \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-reodt
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-reodt 
+is an ODT format processor. It restores the original  ODT formatting
+the text had before being passed through the apertium-desodt deformatter.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-retxt\fR(1),
+.I apertium-rertf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-repptx.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-repptx.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-repptx.1	(revision 69632)
@@ -0,0 +1,34 @@
+.TH apertium-repptx 1 2006-03-21 "" ""
+.SH NAME
+apertium-repptx \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-repptx
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-repptx 
+is an PPTXX format processor. It restores the original PPTX formatting
+the text had before being passed through the apertium-despptx deformatter.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-retxt\fR(1),
+.I apertium-rertf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-rertf-cp1250.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-rertf-cp1250.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-rertf-cp1250.1	(revision 69632)
@@ -0,0 +1,34 @@
+.TH apertium-rertf 1 2006-03-21 "" ""
+.SH NAME
+apertium-rertf \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-rertf
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-rertf 
+is an RTF format processor. It restores the original RTF formatting
+the text had before being passed through the apertium-desrtf deformatter.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-retxt\fR(1),
+.I apertium-rehtml\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-rertf-cp1251.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-rertf-cp1251.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-rertf-cp1251.1	(revision 69632)
@@ -0,0 +1,34 @@
+.TH apertium-rertf 1 2006-03-21 "" ""
+.SH NAME
+apertium-rertf \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-rertf
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-rertf 
+is an RTF format processor. It restores the original RTF formatting
+the text had before being passed through the apertium-desrtf deformatter.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-retxt\fR(1),
+.I apertium-rehtml\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-rertf.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-rertf.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-rertf.1	(revision 69632)
@@ -0,0 +1,34 @@
+.TH apertium-rertf 1 2006-03-21 "" ""
+.SH NAME
+apertium-rertf \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-rertf
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-rertf 
+is an RTF format processor. It restores the original RTF formatting
+the text had before being passed through the apertium-desrtf deformatter.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-retxt\fR(1),
+.I apertium-rehtml\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-retxt.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-retxt.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-retxt.1	(revision 69632)
@@ -0,0 +1,34 @@
+.TH apertium-retxt 1 2006-03-21 "" ""
+.SH NAME
+apertium-retxt \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-retxt
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-retxt 
+is a text format processor. It restores the original formatting
+the text had (newlines, tabs, etc.) before being passed through the apertium-destxt deformatter.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-rehtml\fR(1),
+.I apertium-rertf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-rewxml.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-rewxml.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-rewxml.1	(revision 69632)
@@ -0,0 +1,34 @@
+.TH apertium-rewxml 1 2006-03-21 "" ""
+.SH NAME
+apertium-rewxml \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-rewxml
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-rewxml 
+is an WXML format processor. It restores the original WXML formatting
+the text had before being passed through the apertium-deswxml deformatter.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-retxt\fR(1),
+.I apertium-rertf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-rexlsx.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-rexlsx.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-rexlsx.1	(revision 69632)
@@ -0,0 +1,34 @@
+.TH apertium-rexlsx 1 2006-03-21 "" ""
+.SH NAME
+apertium-rexlsx \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-rexlsx
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-rexlsx 
+is an XLSX format processor. It restores the original XLSX formatting
+the text had before being passed through the apertium-desxlsx deformatter.
+
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-retxt\fR(1),
+.I apertium-rertf\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-utils-fixlatex.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-utils-fixlatex.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-utils-fixlatex.1	(revision 69632)
@@ -0,0 +1,38 @@
+.TH apertium-utils-fixlatex 1 2012-02-29 "" ""
+.SH NAME
+apertium-utils-fixlatex \- This application is part of (
+.B apertium 
+)
+.PP
+This tool is part of the apertium open-source machine translation toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-utils-fixlatex
+[ <input file> [ <output file> ] ]
+.PP
+.SH DESCRIPTION
+.BR apertium-utils-fixlatex
+gawk-based script to fix some constructions in 'XMLish' apertium LaTeX
+format to get better translation results.
+.SH OPTIONS
+.TP
+.B \-h, \-\-help
+Display this help.
+.PP
+.SH SEE ALSO
+.I apertium-destxt\fR(1),
+.I apertium-prelatex\fR(1),
+.I apertium-relatex\fR(1),
+.I apertium-postlatex\fR(1),
+.I apertium-desatex\fR(1),
+.I apertium-postlatex-raw\fR(1),
+.I lt-proc\fR(1),
+.I apertium\fR(1).
+.SH BUGS
+Complicated constructions in LaTeX (i.e. custom defined tags) are not (yet)
+supported.
+.PP
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-unformat-header.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-unformat-header.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-unformat-header.sh	(revision 69632)
@@ -0,0 +1,229 @@
+PAIR=""
+INPUT_FILE="/dev/stdin"
+OUTPUT_FILE="/dev/stdout"
+
+[ -z "$TMPDIR" ] && TMPDIR=/tmp
+
+
+message ()
+{
+  echo "USAGE: $(basename $0) [-f format] [in [out]]"
+  echo " -f format        one of: txt (default), html, rtf, odt, docx, wxml, xlsx, pptx"
+  echo " in               input file (stdin by default)"
+  echo " out              output file (stdout by default)"
+  exit 1;
+}
+
+locale_utf8 ()
+{
+  export LC_CTYPE=$(locale -a|grep -i "utf[.]*8"|head -1);
+  if [ "$LC_CTYPE" = "" ]
+  then echo "Error: Install an UTF-8 locale in your system";
+       exit 1;
+  fi
+}
+
+test_zip ()
+{
+ if [ "$(which zip)" = "" ]
+  then echo "Error: Install 'zip' command in your system";
+       exit 1;
+  fi
+  
+  if [ "$(which unzip)" = "" ]
+  then echo "Error: Install 'unzip' command in your system";
+       exit 1;
+  fi 
+}
+
+test_gawk ()
+{
+  GAWK=$(which gawk)
+  if [ "$GAWK" = "" ]
+  then echo "Error: Install 'gawk' in your system"
+       exit 1
+  fi
+}
+
+
+unformat_latex()
+{
+  test_gawk
+  
+  if [ "$FICHERO" = "" ]
+  then FICHERO=$(mktemp $TMPDIR/apertium.XXXXXXXX)
+       cat > $FICHERO
+       BORRAFICHERO="true"
+  fi
+
+  $APERTIUM_PATH/apertium-prelatex $FICHERO | \
+  $APERTIUM_PATH/apertium-utils-fixlatex | \
+  $APERTIUM_PATH/apertium-deslatex  >$SALIDA
+  
+  if [ "$BORRAFICHERO" = "true" ]
+  then rm -Rf $FICHERO
+  fi
+}
+
+
+unformat_odt ()
+{
+  INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX)
+
+  locale_utf8
+  test_zip
+  
+  unzip -q -o -d $INPUT_TMPDIR $FICHERO
+  find $INPUT_TMPDIR | grep content\\\.xml |\
+  awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
+  $APERTIUM_PATH/apertium-desodt >$SALIDA
+  rm -Rf $INPUT_TMPDIR
+}
+
+unformat_docx ()
+{
+  INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX)
+
+  locale_utf8
+  test_zip
+  
+  unzip -q -o -d $INPUT_TMPDIR $FICHERO
+  
+  for i in $(find $INPUT_TMPDIR|grep "xlsx$");
+  do LOCALTEMP=$(mktemp $TMPDIR/apertium.XXXXXXXX)
+     $APERTIUM_PATH/apertium -f xlsx -d $DIRECTORY $OPCIONU $PREFIJO <$i >$LOCALTEMP;
+     cp $LOCALTEMP $i;
+     rm $LOCALTEMP;
+  done;
+  
+  find $INPUT_TMPDIR | grep "xml" |\
+  grep -v -i \\\(settings\\\|theme\\\|styles\\\|font\\\|rels\\\|docProps\\\) |\
+  awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
+  $APERTIUM_PATH/apertium-deswxml >$SALIDA
+  rm -Rf $INPUT_TMPDIR
+}
+
+unformat_pptx ()
+{
+  INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX)
+
+  locale_utf8
+  test_zip
+    
+  unzip -q -o -d $INPUT_TMPDIR $FICHERO
+  
+  for i in $(find $INPUT_TMPDIR|grep "xlsx$");
+  do LOCALTEMP=$(mktemp $TMPDIR/apertium.XXXXXXXX)
+     $APERTIUM_PATH/apertium -f xlsx -d $DIRECTORY $OPCIONU $PREFIJO <$i >$LOCALTEMP
+     cp $LOCALTEMP $i
+     rm $LOCALTEMP
+  done;
+  
+  find $INPUT_TMPDIR | grep "xml$" |\
+  grep "slides\/slide" |\
+  awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
+  $APERTIUM_PATH/apertium-despptx >$SALIDA
+  rm -Rf $INPUT_TMPDIR
+}
+
+
+unformat_xlsx ()
+{
+  INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX)
+
+  locale_utf8
+  test_zip
+  
+  unzip -q -o -d $INPUT_TMPDIR $FICHERO
+  find $INPUT_TMPDIR | grep "sharedStrings.xml" |\
+  awk '{printf "<file name=\"" $0 "\"/>"; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
+  $APERTIUM_PATH/apertium-desxlsx >$SALIDA
+  rm -Rf $INPUT_TMPDIR
+
+}
+
+
+ARGS=$(getopt "f:" $*)
+set -- $ARGS
+for i
+do
+  case "$i" in 
+    -f) shift; FORMAT=$1; shift;;
+    --) shift; break;;
+  esac
+done
+
+case "$#" in 
+     2)
+       OUTPUT_FILE=$2; 
+       INPUT_FILE=$1;
+       if [ ! -e $INPUT_FILE ];
+       then echo "Error: file '$INPUT_FILE' not found."
+            message;
+       fi
+       ;;
+     1)
+       INPUT_FILE=$1;
+       if [ ! -e $INPUT_FILE ];
+       then echo "Error: file '$INPUT_FILE' not found."
+            message;
+       fi
+       ;;
+     0)
+       ;;
+     *)
+       message 
+       ;;
+esac    
+
+if [ x$FORMAT = x ]; then FORMAT="txt"; fi
+
+FORMATADOR=$FORMAT;
+FICHERO=$INPUT_FILE;
+SALIDA=$OUTPUT_FILE;
+
+
+case "$FORMATADOR" in 
+        rtf)
+		MILOCALE=$(locale -a|grep -i -v "utf\|^C$\|^POSIX$"|head -1);
+		if [ "$MILOCALE" = "" ]
+		then echo "Error: Install a ISO-8859-1 compatible locale in your system";
+	             exit 1;
+	        fi
+	        export LC_CTYPE=$MILOCALE
+		;;
+        html-noent)
+        	FORMATADOR="html"
+        	;;
+        
+        latex)
+                unformat_latex
+                exit 0
+                ;;
+                
+        odt)
+		unformat_odt
+		exit 0
+		;;
+	docx)
+		unformat_docx
+		exit 0
+		;;
+	xlsx)
+		unformat_xlsx
+		exit 0
+		;;
+	pptx)
+		unformat_pptx
+		exit 0
+		;;
+		
+	wxml)
+	        locale_utf8
+	        ;;
+	*)
+	        ;;
+	        	
+esac
+
+$APERTIUM_PATH/apertium-des$FORMATADOR $FICHERO >$SALIDA
Index: branches/apertium-tagger/apertium2/apertium/utils-fixlatex-header.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/utils-fixlatex-header.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/utils-fixlatex-header.sh	(revision 69632)
@@ -0,0 +1,52 @@
+INPUT_FILE=/dev/stdin
+OUTPUT_FILE=/dev/stdout
+
+cat $INPUT_FILE | \
+gawk '
+function is_inline_tag(str,                      aux, val)
+{
+  for(val in INLINETAGS)
+  {
+    aux = INLINETAGS[val] "<CONTENTS>";
+    if(gsub(aux, aux, str) == 1)
+    {
+      return 1;
+    }
+  }
+  
+  return 0;
+}
+
+BEGIN{
+  RS="</CONTENTS>";  
+  
+  INLINETAGS[1]="<textit/>";
+  INLINETAGS[2]="<textbf/>";
+  INLINETAGS[3]="<emph/>";
+}
+{
+  MYRECORD[++nline] = $0;  
+}
+END{
+  for(i=1; i < nline; i++)
+  {
+    if(gsub("<CONTENTS>", "<CONTENTS>", MYRECORD[i]) == 1)
+    {
+      if(is_inline_tag(MYRECORD[i]))
+      {
+        printf("%s</CONTENTS-noeos>", MYRECORD[i]);
+      }
+      else
+      {
+        printf("%s</CONTENTS>", MYRECORD[i]);
+      }
+    } 
+    else
+    {
+      printf("%s</CONTENTS>", MYRECORD[i]);
+    }
+  }
+  
+  printf("%s", MYRECORD[nline]);
+}' > $OUTPUT_FILE
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-interchunk.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-interchunk.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-interchunk.1	(revision 69632)
@@ -0,0 +1,54 @@
+.TH apertium\-interchunk 1 2007-03-11 "" ""
+.SH NAME
+apertium\-interchunk \- This application is part of
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-interchunk 
+[\-tz] trules preproc [input [output]]
+.SH DESCRIPTION
+This is an intermediate tool from Apertium level 2 engine. You should
+never have to use it independently.
+.PP
+It is the second transfer module of the Apertium level 2 transfer model after
+\fIapertium-transfer\fR and before \fIapertium-postchunk\fR.
+.PP
+It takes care of interchunk processing operations such as chunk
+reordering, changes in the morphosyntactical features of chunks
+according to the information in neighboring chunks, or generating new
+chunks.
+.SH OPTIONS
+  \-t         trace mode
+  \-z         flush buffer on the null character
+.SH FILES
+These are the kinds of files that can be used with this command:
+.PP
+.B trules
+A rules file with extension \fI.t2x\fR.
+.PP
+.B preproc
+A file with extension \fI.t2x.bin\fR that holds the result of
+preprocessing the \fItrules\fR file with
+\fIapertium-preprocess-transfer\fR.
+.PP
+.B input, output
+Represent the input and output files. By default they are the standard
+input and standard output.
+.SH SEE ALSO
+.I apertium\-gen\-modes\fR(1),
+.I apertium\-postchunk\fR(1),
+.I apertium\-transfer\fR(1),
+.I apertium\-validate\-interchunk\fR(1),
+.I apertium\-validate\-modes\fR(1),
+.I apertium\-validate\-postchunk\fR(1).
+.SH BUGS
+Lots of them...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005-2007 Universitat d'Alacant / Universidad de
+Alicante. This is free software.  You may
+redistribute copies of it under the terms of the GNU General Public
+License <http://www.gnu.org/licenses/gpl.html>.
Index: branches/apertium-tagger/apertium2/apertium/apertium-postchunk.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-postchunk.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-postchunk.1	(revision 69632)
@@ -0,0 +1,52 @@
+.TH apertium\-postchunk 1 2007-03-11 "" ""
+.SH NAME
+apertium\-postchunk \- This application is part of
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-postchunk 
+[\-z] trules preproc [input [output]]
+.SH DESCRIPTION
+This is an intermediate tool from Apertium level 2 engine. You should
+never have to use it independently.
+.PP
+It is the third transfer module of the Apertium level 2 transfer model
+after \fIapertium-transfer\fR and \fIapertium-interchunk\fR.
+.PP
+It generates lexical forms from the chunks generated by
+apertium-interchunk by effecting some finishing changes in their
+morphological information.
+.SH OPTIONS
+\-z         flush buffer on the null character
+.SH FILES
+These are the kinds of files that can be used with this command:
+.PP
+.B trules
+A rules file with extension \fI.t3x\fR.
+.PP
+.B preproc
+A file with extension \fI.t3x.bin\fR that holds the result of
+preprocessing the \fItrules\fR file with
+\fIapertium-preprocess-transfer\fR.
+.PP
+.B input, output
+Represent the input and output files. By default they are the standard
+input and standard output.
+.SH SEE ALSO
+.I apertium\-gen\-modes\fR(1),
+.I apertium\-interchunk\fR(1),
+.I apertium\-validate\-postchunk\fR(1),
+.I apertium\-validate\-interchunk\fR(1),
+.I apertium\-validate\-modes\fR(1),
+.I apertium\-transfer\fR(1).
+.SH BUGS
+Lots of them...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005-2007 Universitat d'Alacant / Universidad de
+Alicante. This is free software.  You may
+redistribute copies of it under the terms of the GNU General Public
+License <http://www.gnu.org/licenses/gpl.html>.
Index: branches/apertium-tagger/apertium2/apertium/apertium_config.h.cmake_in
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium_config.h.cmake_in	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium_config.h.cmake_in	(revision 69632)
@@ -0,0 +1,57 @@
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "${PACKAGE_BUGREPORT}"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "${PACKAGE_NAME}"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "${PACKAGE_STRING}"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "${PACKAGE_TARNAME}"
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "${PACKAGE_VERSION}"
+
+#define HAVE_DECL_FPUTS_UNLOCKED   ${HAVE_DECL_FPUTS_UNLOCKED}
+#define HAVE_DECL_FGETC_UNLOCKED   ${HAVE_DECL_FGETC_UNLOCKED}
+#define HAVE_DECL_FPUTC_UNLOCKED   ${HAVE_DECL_FPUTC_UNLOCKED}
+#define HAVE_DECL_FWRITE_UNLOCKED  ${HAVE_DECL_FWRITE_UNLOCKED}
+#define HAVE_DECL_FREAD_UNLOCKED   ${HAVE_DECL_FREAD_UNLOCKED}
+#define HAVE_DECL_FGETWC_UNLOCKED  ${HAVE_DECL_FGETWC_UNLOCKED}
+#define HAVE_DECL_FPUTWC_UNLOCKED  ${HAVE_DECL_FPUTWC_UNLOCKED}
+#define HAVE_DECL_FPUTWS_UNLOCKED  ${HAVE_DECL_FPUTWS_UNLOCKED}
+
+#define HAVE_DECL_FPUTS_NOLOCK   ${HAVE_DECL_FPUTS_NOLOCK}
+#define HAVE_DECL_FGETC_NOLOCK   ${HAVE_DECL_FGETC_NOLOCK}
+#define HAVE_DECL_FPUTC_NOLOCK   ${HAVE_DECL_FPUTC_NOLOCK}
+#define HAVE_DECL_FWRITE_NOLOCK  ${HAVE_DECL_FWRITE_NOLOCK}
+#define HAVE_DECL_FREAD_NOLOCK   ${HAVE_DECL_FREAD_NOLOCK}
+#define HAVE_DECL_FGETWC_NOLOCK  ${HAVE_DECL_FGETWC_NOLOCK}
+#define HAVE_DECL_FPUTWC_NOLOCK  ${HAVE_DECL_FPUTWC_NOLOCK}
+#define HAVE_DECL_FPUTWS_NOLOCK  ${HAVE_DECL_FPUTWS_NOLOCK}
+
+#if !defined(HAVE_DECL_FPUTS_UNLOCKED) && defined (HAVE_DECL_FPUTS_NOLOCK)
+#define fputs_unlocked _fputs_nolock
+#endif
+#if !defined(HAVE_DECL_FGETC_UNLOCKED) && defined (HAVE_DECL_FGETC_NOLOCK)
+#define fgetc_unlocked _fgetc_nolock
+#endif
+#if !defined(HAVE_DECL_FPUTC_UNLOCKED) && defined (HAVE_DECL_FPUTC_NOLOCK)
+#define fputc_unlocked _fputc_nolock
+#endif
+#if !defined(HAVE_DECL_FWRITE_UNLOCKED) && defined (HAVE_DECL_FWRITE_NOLOCK)
+#define fwrite_unlocked _fwrite_nolock
+#endif
+#if !defined(HAVE_DECL_FREAD_UNLOCKED) && defined (HAVE_DECL_FREAD_NOLOCK)
+#define fread_unlocked _fread_nolock
+#endif
+#if !defined(HAVE_DECL_FPUTWS_UNLOCKED) && defined (HAVE_DECL_FPUTWS_NOLOCK)
+#define fputws_unlocked _fputws_nolock
+#endif
+#if !defined(HAVE_DECL_FGETWC_UNLOCKED) && defined (HAVE_DECL_FGETWC_NOLOCK)
+#define fgetwc_unlocked _fgetwc_nolock
+#endif
+#if !defined(HAVE_DECL_FPUTWC_UNLOCKED) && defined (HAVE_DECL_FPUTWC_NOLOCK)
+#define fputwc_unlocked _fputwc_nolock
+#endif
Index: branches/apertium-tagger/apertium2/apertium/rtf-format-cp1250.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/rtf-format-cp1250.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/rtf-format-cp1250.xml	(revision 69632)
@@ -0,0 +1,532 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<format name="rtf"> <!-- version 1.0 -->
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="windows-1250"/>
+    <output encoding="UTF-8"/>
+    <tag-name regexp=""/>
+    <escape-chars regexp='\\|[][&lt;&gt;@^$/{}]'/> 
+<!--    <escape-chars regexp='[]]^@&lt;&gt;/]'/>  -->
+
+    <space-chars regexp='[ \n\t\r$*]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+    
+    <!-- Exceptions with priority 1  -->
+    <!-- Style Sheet names are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\snext&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\keycode&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <!-- Font names are also format: -->
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fcharset&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule>
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fnil&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\froman&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fswiss&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fmodern&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fscript&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fdecor&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\ftech&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fbidi&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\falt&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fontfile&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fn&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\sbasedon&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\additive&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <!-- File names are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\file&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- List Table names are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\listname&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\leveltext&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\levelnumbers&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <!-- Pictures are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\pict&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\sn&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\sv&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;{\\\*\\blipuid  &quot;[^ \n\r]+&quot;}&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Objects are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\object&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Document Variables are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\docvar&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Bookmarks are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\bkmkstart&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\bkmkend&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Index Entries are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\rxe&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Fields are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fldinst&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fldrslt&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Other format tags which contain #CDATA: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\pntxt&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\colortbl&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Hexadecimal Data -->
+    <format-rule type="empty" eos="yes" priority="1">
+      <tag regexp="[a-f0-9]{20,}" />
+    </format-rule>
+    
+    
+    
+    
+
+    <!-- End of sentence tags: par, row, cell -->
+    <format-rule type="empty" eos="yes" priority="2">
+      <tag regexp="&quot;\\par&quot;|&quot;\\row&quot;|&quot;\\cell&quot;" />
+    </format-rule>
+    
+
+    <format-rule type="open" eos="no" priority="3">
+      <tag regexp="&quot;{&quot;[ \n\r]*\\[^'][^ \n\r\\]*[ \n\r]*"/>
+    </format-rule>
+
+    <!-- General Tags Specification -->
+    <format-rule type="empty" eos="no" priority="3">
+      <tag regexp="\\[^'][^ \n\r\\]*[ \n\r]*"/>
+    </format-rule>
+    <format-rule type="close" eos="no" priority="4">
+      <tag regexp="&quot;}&quot;"/>
+    </format-rule>
+    <format-rule type="open" eos="no" priority="4">
+      <tag regexp="&quot;{&quot;"/>
+    </format-rule>
+
+
+    <replacement-rule regexp="&quot;\\'&quot;[0-9a-fA-F][0-9a-fA-F](\r|\n|&quot;\r\n&quot;)?"> 
+      <replace source="\\'8a" target="Š" prefer="yes"/>
+      <replace source="\\'8c" target="Ś" prefer="yes"/>
+      <replace source="\\'8d" target="Ť" prefer="yes"/>
+      <replace source="\\'8e" target="Ž" prefer="yes"/>
+      <replace source="\\'8f" target="Ź" prefer="yes"/>
+      <replace source="\\'9a" target="š" prefer="yes"/>
+      <replace source="\\'9c" target="ś" prefer="yes"/>
+      <replace source="\\'9d" target="ť" prefer="yes"/>
+      <replace source="\\'9e" target="ž" prefer="yes"/>
+      <replace source="\\'9f" target="ź" prefer="yes"/>
+      <replace source="\\'a3" target="Ł" prefer="yes"/>
+      <replace source="\\'a5" target="Ą" prefer="yes"/>
+      <replace source="\\'aa" target="Ş" prefer="yes"/>
+      <replace source="\\'af" target="Ż" prefer="yes"/>
+      <replace source="\\'b3" target="ł" prefer="yes"/>
+      <replace source="\\'b5" target="µ" prefer="yes"/>
+      <replace source="\\'b9" target="ą" prefer="yes"/>
+      <replace source="\\'ba" target="ş" prefer="yes"/>
+      <replace source="\\'bc" target="Ľ" prefer="yes"/>
+      <replace source="\\'be" target="ľ" prefer="yes"/>
+      <replace source="\\'bf" target="ż" prefer="yes"/>
+      <replace source="\\'c0" target="Ŕ" prefer="yes"/>
+      <replace source="\\'c1" target="Á" prefer="yes"/>
+      <replace source="\\'c2" target="Â" prefer="yes"/>
+      <replace source="\\'c3" target="Ă" prefer="yes"/>
+      <replace source="\\'c4" target="Ä" prefer="yes"/>
+      <replace source="\\'c5" target="Ĺ" prefer="yes"/>
+      <replace source="\\'c6" target="Ć" prefer="yes"/>
+      <replace source="\\'c7" target="Ç" prefer="yes"/>
+      <replace source="\\'c8" target="Č" prefer="yes"/>
+      <replace source="\\'c9" target="É" prefer="yes"/>
+      <replace source="\\'ca" target="Ę" prefer="yes"/>
+      <replace source="\\'cb" target="Ë" prefer="yes"/>
+      <replace source="\\'cc" target="Ě" prefer="yes"/>
+      <replace source="\\'cd" target="Í" prefer="yes"/>
+      <replace source="\\'ce" target="Î" prefer="yes"/>
+      <replace source="\\'cf" target="Ď" prefer="yes"/>
+      <replace source="\\'d0" target="Ð" prefer="yes"/>
+      <replace source="\\'d1" target="Ń" prefer="yes"/>
+      <replace source="\\'d2" target="Ň" prefer="yes"/>
+      <replace source="\\'d3" target="Ó" prefer="yes"/>
+      <replace source="\\'d4" target="Ô" prefer="yes"/>
+      <replace source="\\'d5" target="Ő" prefer="yes"/>
+      <replace source="\\'d6" target="Ö" prefer="yes"/>
+      <replace source="\\'d8" target="Ř" prefer="yes"/>
+      <replace source="\\'d9" target="Ů" prefer="yes"/>
+      <replace source="\\'da" target="Ú" prefer="yes"/>
+      <replace source="\\'db" target="Ű" prefer="yes"/>
+      <replace source="\\'dc" target="Ü" prefer="yes"/>
+      <replace source="\\'dd" target="Ý" prefer="yes"/>
+      <replace source="\\'de" target="Ţ" prefer="yes"/>
+      <replace source="\\'df" target="ß" prefer="yes"/>
+      <replace source="\\'e0" target="ŕ" prefer="yes"/>
+      <replace source="\\'e1" target="á" prefer="yes"/>
+      <replace source="\\'e2" target="â" prefer="yes"/>
+      <replace source="\\'e3" target="ă" prefer="yes"/>
+      <replace source="\\'e4" target="ä" prefer="yes"/>
+      <replace source="\\'e5" target="ĺ" prefer="yes"/>
+      <replace source="\\'e6" target="ć" prefer="yes"/>
+      <replace source="\\'e7" target="ç" prefer="yes"/>
+      <replace source="\\'e8" target="č" prefer="yes"/>
+      <replace source="\\'e9" target="é" prefer="yes"/>
+      <replace source="\\'ea" target="ę" prefer="yes"/>
+      <replace source="\\'eb" target="ë" prefer="yes"/>
+      <replace source="\\'ec" target="ě" prefer="yes"/>
+      <replace source="\\'ed" target="í" prefer="yes"/>
+      <replace source="\\'ee" target="î" prefer="yes"/>
+      <replace source="\\'ef" target="ď" prefer="yes"/>
+      <replace source="\\'f0" target="đ" prefer="yes"/>
+      <replace source="\\'f1" target="ń" prefer="yes"/>
+      <replace source="\\'f2" target="ň" prefer="yes"/>
+      <replace source="\\'f3" target="ó" prefer="yes"/>
+      <replace source="\\'f4" target="ô" prefer="yes"/>
+      <replace source="\\'f5" target="ő" prefer="yes"/>
+      <replace source="\\'f6" target="ö" prefer="yes"/>
+      <replace source="\\'f8" target="ř" prefer="yes"/>
+      <replace source="\\'f9" target="ů" prefer="yes"/>
+      <replace source="\\'fa" target="ú" prefer="yes"/>
+      <replace source="\\'fb" target="ű" prefer="yes"/>
+      <replace source="\\'fc" target="ü" prefer="yes"/>
+      <replace source="\\'fd" target="ý" prefer="yes"/>
+      <replace source="\\'fe" target="ţ" prefer="yes"/>
+      <replace source="\\'ff" target="˙" prefer="yes"/>
+      
+     <!-- The same characters followed by a \r\n: -->
+      <replace source="\\'8a\r\n" target="Š"/>
+      <replace source="\\'8c\r\n" target="Ś"/>
+      <replace source="\\'8d\r\n" target="Ť"/>
+      <replace source="\\'8e\r\n" target="Ž"/>
+      <replace source="\\'8f\r\n" target="Ź"/>
+      <replace source="\\'9a\r\n" target="š"/>
+      <replace source="\\'9c\r\n" target="ś"/>
+      <replace source="\\'9d\r\n" target="ť"/>
+      <replace source="\\'9e\r\n" target="ž"/>
+      <replace source="\\'9f\r\n" target="ź"/>
+      <replace source="\\'a3\r\n" target="Ł"/>
+      <replace source="\\'a5\r\n" target="Ą"/>
+      <replace source="\\'aa\r\n" target="Ş"/>
+      <replace source="\\'af\r\n" target="Ż"/>
+      <replace source="\\'b3\r\n" target="ł"/>
+      <replace source="\\'b5\r\n" target="µ"/>
+      <replace source="\\'b9\r\n" target="ą"/>
+      <replace source="\\'ba\r\n" target="ş"/>
+      <replace source="\\'bc\r\n" target="Ľ"/>
+      <replace source="\\'be\r\n" target="ľ"/>
+      <replace source="\\'bf\r\n" target="ż"/>
+      <replace source="\\'c0\r\n" target="Ŕ"/>
+      <replace source="\\'c1\r\n" target="Á"/>
+      <replace source="\\'c2\r\n" target="Â"/>
+      <replace source="\\'c3\r\n" target="Ă"/>
+      <replace source="\\'c4\r\n" target="Ä"/>
+      <replace source="\\'c5\r\n" target="Ĺ"/>
+      <replace source="\\'c6\r\n" target="Ć"/>
+      <replace source="\\'c7\r\n" target="Ç"/>
+      <replace source="\\'c8\r\n" target="Č"/>
+      <replace source="\\'c9\r\n" target="É"/>
+      <replace source="\\'ca\r\n" target="Ę"/>
+      <replace source="\\'cb\r\n" target="Ë"/>
+      <replace source="\\'cc\r\n" target="Ě"/>
+      <replace source="\\'cd\r\n" target="Í"/>
+      <replace source="\\'ce\r\n" target="Î"/>
+      <replace source="\\'cf\r\n" target="Ď"/>
+      <replace source="\\'d0\r\n" target="Ð"/>
+      <replace source="\\'d1\r\n" target="Ń"/>
+      <replace source="\\'d2\r\n" target="Ň"/>
+      <replace source="\\'d3\r\n" target="Ó"/>
+      <replace source="\\'d4\r\n" target="Ô"/>
+      <replace source="\\'d5\r\n" target="Ő"/>
+      <replace source="\\'d6\r\n" target="Ö"/>
+      <replace source="\\'d8\r\n" target="Ř"/>
+      <replace source="\\'d9\r\n" target="Ů"/>
+      <replace source="\\'da\r\n" target="Ú"/>
+      <replace source="\\'db\r\n" target="Ű"/>
+      <replace source="\\'dc\r\n" target="Ü"/>
+      <replace source="\\'dd\r\n" target="Ý"/>
+      <replace source="\\'de\r\n" target="Ţ"/>
+      <replace source="\\'df\r\n" target="ß"/>
+      <replace source="\\'e0\r\n" target="ŕ"/>
+      <replace source="\\'e1\r\n" target="á"/>
+      <replace source="\\'e2\r\n" target="â"/>
+      <replace source="\\'e3\r\n" target="ă"/>
+      <replace source="\\'e4\r\n" target="ä"/>
+      <replace source="\\'e5\r\n" target="ĺ"/>
+      <replace source="\\'e6\r\n" target="ć"/>
+      <replace source="\\'e7\r\n" target="ç"/>
+      <replace source="\\'e8\r\n" target="č"/>
+      <replace source="\\'e9\r\n" target="é"/>
+      <replace source="\\'ea\r\n" target="ę"/>
+      <replace source="\\'eb\r\n" target="ë"/>
+      <replace source="\\'ec\r\n" target="ě"/>
+      <replace source="\\'ed\r\n" target="í"/>
+      <replace source="\\'ee\r\n" target="î"/>
+      <replace source="\\'ef\r\n" target="ď"/>
+      <replace source="\\'f0\r\n" target="đ"/>
+      <replace source="\\'f1\r\n" target="ń"/>
+      <replace source="\\'f2\r\n" target="ň"/>
+      <replace source="\\'f3\r\n" target="ó"/>
+      <replace source="\\'f4\r\n" target="ô"/>
+      <replace source="\\'f5\r\n" target="ő"/>
+      <replace source="\\'f6\r\n" target="ö"/>
+      <replace source="\\'f8\r\n" target="ř"/>
+      <replace source="\\'f9\r\n" target="ů"/>
+      <replace source="\\'fa\r\n" target="ú"/>
+      <replace source="\\'fb\r\n" target="ű"/>
+      <replace source="\\'fc\r\n" target="ü"/>
+      <replace source="\\'fd\r\n" target="ý"/>
+      <replace source="\\'fe\r\n" target="ţ"/>
+      <replace source="\\'ff\r\n" target="˙"/>
+      <!-- The same characters followed by a \n: -->
+      <replace source="\\'8a\n" target="Š"/>
+      <replace source="\\'8c\n" target="Ś"/>
+      <replace source="\\'8d\n" target="Ť"/>
+      <replace source="\\'8e\n" target="Ž"/>
+      <replace source="\\'8f\n" target="Ź"/>
+      <replace source="\\'9a\n" target="š"/>
+      <replace source="\\'9c\n" target="ś"/>
+      <replace source="\\'9d\n" target="ť"/>
+      <replace source="\\'9e\n" target="ž"/>
+      <replace source="\\'9f\n" target="ź"/>
+      <replace source="\\'a3\n" target="Ł"/>
+      <replace source="\\'a5\n" target="Ą"/>
+      <replace source="\\'aa\n" target="Ş"/>
+      <replace source="\\'af\n" target="Ż"/>
+      <replace source="\\'b3\n" target="ł"/>
+      <replace source="\\'b5\n" target="µ"/>
+      <replace source="\\'b9\n" target="ą"/>
+      <replace source="\\'ba\n" target="ş"/>
+      <replace source="\\'bc\n" target="Ľ"/>
+      <replace source="\\'be\n" target="ľ"/>
+      <replace source="\\'bf\n" target="ż"/>
+      <replace source="\\'c0\n" target="Ŕ"/>
+      <replace source="\\'c1\n" target="Á"/>
+      <replace source="\\'c2\n" target="Â"/>
+      <replace source="\\'c3\n" target="Ă"/>
+      <replace source="\\'c4\n" target="Ä"/>
+      <replace source="\\'c5\n" target="Ĺ"/>
+      <replace source="\\'c6\n" target="Ć"/>
+      <replace source="\\'c7\n" target="Ç"/>
+      <replace source="\\'c8\n" target="Č"/>
+      <replace source="\\'c9\n" target="É"/>
+      <replace source="\\'ca\n" target="Ę"/>
+      <replace source="\\'cb\n" target="Ë"/>
+      <replace source="\\'cc\n" target="Ě"/>
+      <replace source="\\'cd\n" target="Í"/>
+      <replace source="\\'ce\n" target="Î"/>
+      <replace source="\\'cf\n" target="Ď"/>
+      <replace source="\\'d0\n" target="Ð"/>
+      <replace source="\\'d1\n" target="Ń"/>
+      <replace source="\\'d2\n" target="Ň"/>
+      <replace source="\\'d3\n" target="Ó"/>
+      <replace source="\\'d4\n" target="Ô"/>
+      <replace source="\\'d5\n" target="Ő"/>
+      <replace source="\\'d6\n" target="Ö"/>
+      <replace source="\\'d8\n" target="Ř"/>
+      <replace source="\\'d9\n" target="Ů"/>
+      <replace source="\\'da\n" target="Ú"/>
+      <replace source="\\'db\n" target="Ű"/>
+      <replace source="\\'dc\n" target="Ü"/>
+      <replace source="\\'dd\n" target="Ý"/>
+      <replace source="\\'de\n" target="Ţ"/>
+      <replace source="\\'df\n" target="ß"/>
+      <replace source="\\'e0\n" target="ŕ"/>
+      <replace source="\\'e1\n" target="á"/>
+      <replace source="\\'e2\n" target="â"/>
+      <replace source="\\'e3\n" target="ă"/>
+      <replace source="\\'e4\n" target="ä"/>
+      <replace source="\\'e5\n" target="ĺ"/>
+      <replace source="\\'e6\n" target="ć"/>
+      <replace source="\\'e7\n" target="ç"/>
+      <replace source="\\'e8\n" target="č"/>
+      <replace source="\\'e9\n" target="é"/>
+      <replace source="\\'ea\n" target="ę"/>
+      <replace source="\\'eb\n" target="ë"/>
+      <replace source="\\'ec\n" target="ě"/>
+      <replace source="\\'ed\n" target="í"/>
+      <replace source="\\'ee\n" target="î"/>
+      <replace source="\\'ef\n" target="ď"/>
+      <replace source="\\'f0\n" target="đ"/>
+      <replace source="\\'f1\n" target="ń"/>
+      <replace source="\\'f2\n" target="ň"/>
+      <replace source="\\'f3\n" target="ó"/>
+      <replace source="\\'f4\n" target="ô"/>
+      <replace source="\\'f5\n" target="ő"/>
+      <replace source="\\'f6\n" target="ö"/>
+      <replace source="\\'f8\n" target="ř"/>
+      <replace source="\\'f9\n" target="ů"/>
+      <replace source="\\'fa\n" target="ú"/>
+      <replace source="\\'fb\n" target="ű"/>
+      <replace source="\\'fc\n" target="ü"/>
+      <replace source="\\'fd\n" target="ý"/>
+      <replace source="\\'fe\n" target="ţ"/>
+      <replace source="\\'ff\n" target="˙"/>
+      <!-- The same characters followed by a \r: -->
+      <replace source="\\'8a\r" target="Š"/>
+      <replace source="\\'8c\r" target="Ś"/>
+      <replace source="\\'8d\r" target="Ť"/>
+      <replace source="\\'8e\r" target="Ž"/>
+      <replace source="\\'8f\r" target="Ź"/>
+      <replace source="\\'9a\r" target="š"/>
+      <replace source="\\'9c\r" target="ś"/>
+      <replace source="\\'9d\r" target="ť"/>
+      <replace source="\\'9e\r" target="ž"/>
+      <replace source="\\'9f\r" target="ź"/>
+      <replace source="\\'a3\r" target="Ł"/>
+      <replace source="\\'a5\r" target="Ą"/>
+      <replace source="\\'aa\r" target="Ş"/>
+      <replace source="\\'af\r" target="Ż"/>
+      <replace source="\\'b3\r" target="ł"/>
+      <replace source="\\'b5\r" target="µ"/>
+      <replace source="\\'b9\r" target="ą"/>
+      <replace source="\\'ba\r" target="ş"/>
+      <replace source="\\'bc\r" target="Ľ"/>
+      <replace source="\\'be\r" target="ľ"/>
+      <replace source="\\'bf\r" target="ż"/>
+      <replace source="\\'c0\r" target="Ŕ"/>
+      <replace source="\\'c1\r" target="Á"/>
+      <replace source="\\'c2\r" target="Â"/>
+      <replace source="\\'c3\r" target="Ă"/>
+      <replace source="\\'c4\r" target="Ä"/>
+      <replace source="\\'c5\r" target="Ĺ"/>
+      <replace source="\\'c6\r" target="Ć"/>
+      <replace source="\\'c7\r" target="Ç"/>
+      <replace source="\\'c8\r" target="Č"/>
+      <replace source="\\'c9\r" target="É"/>
+      <replace source="\\'ca\r" target="Ę"/>
+      <replace source="\\'cb\r" target="Ë"/>
+      <replace source="\\'cc\r" target="Ě"/>
+      <replace source="\\'cd\r" target="Í"/>
+      <replace source="\\'ce\r" target="Î"/>
+      <replace source="\\'cf\r" target="Ď"/>
+      <replace source="\\'d0\r" target="Ð"/>
+      <replace source="\\'d1\r" target="Ń"/>
+      <replace source="\\'d2\r" target="Ň"/>
+      <replace source="\\'d3\r" target="Ó"/>
+      <replace source="\\'d4\r" target="Ô"/>
+      <replace source="\\'d5\r" target="Ő"/>
+      <replace source="\\'d6\r" target="Ö"/>
+      <replace source="\\'d8\r" target="Ř"/>
+      <replace source="\\'d9\r" target="Ů"/>
+      <replace source="\\'da\r" target="Ú"/>
+      <replace source="\\'db\r" target="Ű"/>
+      <replace source="\\'dc\r" target="Ü"/>
+      <replace source="\\'dd\r" target="Ý"/>
+      <replace source="\\'de\r" target="Ţ"/>
+      <replace source="\\'df\r" target="ß"/>
+      <replace source="\\'e0\r" target="ŕ"/>
+      <replace source="\\'e1\r" target="á"/>
+      <replace source="\\'e2\r" target="â"/>
+      <replace source="\\'e3\r" target="ă"/>
+      <replace source="\\'e4\r" target="ä"/>
+      <replace source="\\'e5\r" target="ĺ"/>
+      <replace source="\\'e6\r" target="ć"/>
+      <replace source="\\'e7\r" target="ç"/>
+      <replace source="\\'e8\r" target="č"/>
+      <replace source="\\'e9\r" target="é"/>
+      <replace source="\\'ea\r" target="ę"/>
+      <replace source="\\'eb\r" target="ë"/>
+      <replace source="\\'ec\r" target="ě"/>
+      <replace source="\\'ed\r" target="í"/>
+      <replace source="\\'ee\r" target="î"/>
+      <replace source="\\'ef\r" target="ď"/>
+      <replace source="\\'f0\r" target="đ"/>
+      <replace source="\\'f1\r" target="ń"/>
+      <replace source="\\'f2\r" target="ň"/>
+      <replace source="\\'f3\r" target="ó"/>
+      <replace source="\\'f4\r" target="ô"/>
+      <replace source="\\'f5\r" target="ő"/>
+      <replace source="\\'f6\r" target="ö"/>
+      <replace source="\\'f8\r" target="ř"/>
+      <replace source="\\'f9\r" target="ů"/>
+      <replace source="\\'fa\r" target="ú"/>
+      <replace source="\\'fb\r" target="ű"/>
+      <replace source="\\'fc\r" target="ü"/>
+      <replace source="\\'fd\r" target="ý"/>
+      <replace source="\\'fe\r" target="ţ"/>
+      <replace source="\\'ff\r" target="˙"/>
+    </replacement-rule>
+  </rules>
+
+</format>
Index: branches/apertium-tagger/apertium2/apertium/rtf-format-cp1251.xml
===================================================================
--- branches/apertium-tagger/apertium2/apertium/rtf-format-cp1251.xml	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/rtf-format-cp1251.xml	(revision 69632)
@@ -0,0 +1,576 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<format name="rtf"> <!-- version 1.0 -->
+  <options>
+    <largeblocks size="8192"/>
+    <input encoding="windows-1251"/>
+    <output encoding="UTF-8"/>
+    <tag-name regexp=""/>
+    <escape-chars regexp='\\|[][&lt;&gt;@^$/{}]'/> 
+<!--    <escape-chars regexp='[]]^@&lt;&gt;/]'/>  -->
+
+    <space-chars regexp='[ \n\t\r$*]'/>
+    <case-sensitive value="no"/>
+  </options>
+
+  <rules>
+    
+    <!-- Exceptions with priority 1  -->
+    <!-- Style Sheet names are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\snext&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\keycode&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <!-- Font names are also format: -->
+    <format-rule type="comment" eos="no" priority="1">
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fcharset&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule>
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fnil&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\froman&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fswiss&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fmodern&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fscript&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fdecor&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\ftech&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fbidi&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\falt&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fontfile&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fn&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\sbasedon&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\additive&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <!-- File names are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\file&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- List Table names are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\listname&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\leveltext&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\levelnumbers&quot;"/>
+      <end   regexp="&quot;;&quot;"/>
+    </format-rule> 
+    <!-- Pictures are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\pict&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\sn&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\sv&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;{\\\*\\blipuid  &quot;[^ \n\r]+&quot;}&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Objects are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\object&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Document Variables are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\docvar&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Bookmarks are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\bkmkstart&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\bkmkend&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Index Entries are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\rxe&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Fields are also format: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fldinst&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\fldrslt&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Other format tags which contain #CDATA: -->
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\pntxt&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <format-rule type="comment" eos="no" priority="1"> 
+      <begin regexp="&quot;{&quot;[ \n\r]*(\\[^'][^ \n\r\\]*[ \n\r]*)*&quot;\\colortbl&quot;"/>
+      <end   regexp="&quot;}&quot;"/>
+    </format-rule> 
+    <!-- Hexadecimal Data -->
+    <format-rule type="empty" eos="yes" priority="1">
+      <tag regexp="[a-f0-9]{20,}" />
+    </format-rule>
+    
+    
+    
+    
+
+    <!-- End of sentence tags: par, row, cell -->
+    <format-rule type="empty" eos="yes" priority="2">
+      <tag regexp="&quot;\\par&quot;|&quot;\\row&quot;|&quot;\\cell&quot;" />
+    </format-rule>
+    
+
+    <format-rule type="open" eos="no" priority="3">
+      <tag regexp="&quot;{&quot;[ \n\r]*\\[^'][^ \n\r\\]*[ \n\r]*"/>
+    </format-rule>
+
+    <!-- General Tags Specification -->
+    <format-rule type="empty" eos="no" priority="3">
+      <tag regexp="\\[^'][^ \n\r\\]*[ \n\r]*"/>
+    </format-rule>
+    <format-rule type="close" eos="no" priority="4">
+      <tag regexp="&quot;}&quot;"/>
+    </format-rule>
+    <format-rule type="open" eos="no" priority="4">
+      <tag regexp="&quot;{&quot;"/>
+    </format-rule>
+
+
+    <replacement-rule regexp="&quot;\\'&quot;[0-9a-fA-F][0-9a-fA-F](\r|\n|&quot;\r\n&quot;)?"> 
+      <replace source="\\'80" target="Ђ" prefer="yes"/>
+      <replace source="\\'81" target="Ѓ" prefer="yes"/>
+      <replace source="\\'83" target="ѓ" prefer="yes"/>
+      <replace source="\\'8a" target="Љ" prefer="yes"/>
+      <replace source="\\'8c" target="Њ" prefer="yes"/>
+      <replace source="\\'8d" target="Ќ" prefer="yes"/>
+      <replace source="\\'8e" target="Ћ" prefer="yes"/>
+      <replace source="\\'8f" target="Џ" prefer="yes"/>
+      <replace source="\\'90" target="ђ" prefer="yes"/>
+      <replace source="\\'9a" target="љ" prefer="yes"/>
+      <replace source="\\'9c" target="њ" prefer="yes"/>
+      <replace source="\\'9d" target="ќ" prefer="yes"/>
+      <replace source="\\'9e" target="ћ" prefer="yes"/>
+      <replace source="\\'9f" target="џ" prefer="yes"/>
+      <replace source="\\'a1" target="Ў" prefer="yes"/>
+      <replace source="\\'a2" target="ў" prefer="yes"/>
+      <replace source="\\'a3" target="Ј" prefer="yes"/>
+      <replace source="\\'a5" target="Ґ" prefer="yes"/>
+      <replace source="\\'a8" target="Ё" prefer="yes"/>
+      <replace source="\\'aa" target="Є" prefer="yes"/>
+      <replace source="\\'af" target="Ї" prefer="yes"/>
+      <replace source="\\'b3" target="і" prefer="yes"/>
+      <replace source="\\'b4" target="ґ" prefer="yes"/>
+      <replace source="\\'b5" target="µ" prefer="yes"/>
+      <replace source="\\'b8" target="ё" prefer="yes"/>
+      <replace source="\\'ba" target="є" prefer="yes"/>
+      <replace source="\\'bc" target="ј" prefer="yes"/>
+      <replace source="\\'bd" target="Ѕ" prefer="yes"/>
+      <replace source="\\'be" target="ѕ" prefer="yes"/>
+      <replace source="\\'bf" target="ї" prefer="yes"/>
+      <replace source="\\'c0" target="А" prefer="yes"/>
+      <replace source="\\'c1" target="Б" prefer="yes"/>
+      <replace source="\\'c2" target="В" prefer="yes"/>
+      <replace source="\\'c3" target="Г" prefer="yes"/>
+      <replace source="\\'c4" target="Д" prefer="yes"/>
+      <replace source="\\'c5" target="Е" prefer="yes"/>
+      <replace source="\\'c6" target="Ж" prefer="yes"/>
+      <replace source="\\'c7" target="З" prefer="yes"/>
+      <replace source="\\'c8" target="И" prefer="yes"/>
+      <replace source="\\'c9" target="Й" prefer="yes"/>
+      <replace source="\\'ca" target="К" prefer="yes"/>
+      <replace source="\\'cb" target="Л" prefer="yes"/>
+      <replace source="\\'cc" target="М" prefer="yes"/>
+      <replace source="\\'cd" target="Н" prefer="yes"/>
+      <replace source="\\'ce" target="О" prefer="yes"/>
+      <replace source="\\'cf" target="П" prefer="yes"/>
+      <replace source="\\'d0" target="Р" prefer="yes"/>
+      <replace source="\\'d1" target="С" prefer="yes"/>
+      <replace source="\\'d2" target="Т" prefer="yes"/>
+      <replace source="\\'d3" target="У" prefer="yes"/>
+      <replace source="\\'d4" target="Ф" prefer="yes"/>
+      <replace source="\\'d5" target="Х" prefer="yes"/>
+      <replace source="\\'d6" target="Ц" prefer="yes"/>
+      <replace source="\\'d7" target="Ч" prefer="yes"/>
+      <replace source="\\'d8" target="Ш" prefer="yes"/>
+      <replace source="\\'d9" target="Щ" prefer="yes"/>
+      <replace source="\\'da" target="Ъ" prefer="yes"/>
+      <replace source="\\'db" target="Ы" prefer="yes"/>
+      <replace source="\\'dc" target="Ь" prefer="yes"/>
+      <replace source="\\'dd" target="Э" prefer="yes"/>
+      <replace source="\\'de" target="Ю" prefer="yes"/>
+      <replace source="\\'df" target="Я" prefer="yes"/>
+      <replace source="\\'e0" target="а" prefer="yes"/>
+      <replace source="\\'e1" target="б" prefer="yes"/>
+      <replace source="\\'e2" target="в" prefer="yes"/>
+      <replace source="\\'e3" target="г" prefer="yes"/>
+      <replace source="\\'e4" target="д" prefer="yes"/>
+      <replace source="\\'e5" target="е" prefer="yes"/>
+      <replace source="\\'e6" target="ж" prefer="yes"/>
+      <replace source="\\'e7" target="з" prefer="yes"/>
+      <replace source="\\'e8" target="и" prefer="yes"/>
+      <replace source="\\'e9" target="й" prefer="yes"/>
+      <replace source="\\'ea" target="к" prefer="yes"/>
+      <replace source="\\'eb" target="л" prefer="yes"/>
+      <replace source="\\'ec" target="м" prefer="yes"/>
+      <replace source="\\'ed" target="н" prefer="yes"/>
+      <replace source="\\'ee" target="о" prefer="yes"/>
+      <replace source="\\'ef" target="п" prefer="yes"/>
+      <replace source="\\'f0" target="р" prefer="yes"/>
+      <replace source="\\'f1" target="с" prefer="yes"/>
+      <replace source="\\'f2" target="т" prefer="yes"/>
+      <replace source="\\'f3" target="у" prefer="yes"/>
+      <replace source="\\'f4" target="ф" prefer="yes"/>
+      <replace source="\\'f5" target="х" prefer="yes"/>
+      <replace source="\\'f6" target="ц" prefer="yes"/>
+      <replace source="\\'f7" target="ч" prefer="yes"/>
+      <replace source="\\'f8" target="ш" prefer="yes"/>
+      <replace source="\\'f9" target="щ" prefer="yes"/>
+      <replace source="\\'fa" target="ъ" prefer="yes"/>
+      <replace source="\\'fb" target="ы" prefer="yes"/>
+      <replace source="\\'fc" target="ь" prefer="yes"/>
+      <replace source="\\'fd" target="э" prefer="yes"/>
+      <replace source="\\'fe" target="ю" prefer="yes"/>
+      <replace source="\\'ff" target="я" prefer="yes"/>
+      
+     <!-- The same characters followed by a \r\n: -->
+      <replace source="\\'80\r\n" target="Ђ"/>
+      <replace source="\\'81\r\n" target="Ѓ"/>
+      <replace source="\\'83\r\n" target="ѓ"/>
+      <replace source="\\'8a\r\n" target="Љ"/>
+      <replace source="\\'8c\r\n" target="Њ"/>
+      <replace source="\\'8d\r\n" target="Ќ"/>
+      <replace source="\\'8e\r\n" target="Ћ"/>
+      <replace source="\\'8f\r\n" target="Џ"/>
+      <replace source="\\'90\r\n" target="ђ"/>
+      <replace source="\\'9a\r\n" target="љ"/>
+      <replace source="\\'9c\r\n" target="њ"/>
+      <replace source="\\'9d\r\n" target="ќ"/>
+      <replace source="\\'9e\r\n" target="ћ"/>
+      <replace source="\\'9f\r\n" target="џ"/>
+      <replace source="\\'a1\r\n" target="Ў"/>
+      <replace source="\\'a2\r\n" target="ў"/>
+      <replace source="\\'a3\r\n" target="Ј"/>
+      <replace source="\\'a5\r\n" target="Ґ"/>
+      <replace source="\\'a8\r\n" target="Ё"/>
+      <replace source="\\'aa\r\n" target="Є"/>
+      <replace source="\\'af\r\n" target="Ї"/>
+      <replace source="\\'b3\r\n" target="і"/>
+      <replace source="\\'b4\r\n" target="ґ"/>
+      <replace source="\\'b5\r\n" target="µ"/>
+      <replace source="\\'b8\r\n" target="ё"/>
+      <replace source="\\'ba\r\n" target="є"/>
+      <replace source="\\'bc\r\n" target="ј"/>
+      <replace source="\\'bd\r\n" target="Ѕ"/>
+      <replace source="\\'be\r\n" target="ѕ"/>
+      <replace source="\\'bf\r\n" target="ї"/>
+      <replace source="\\'c0\r\n" target="А"/>
+      <replace source="\\'c1\r\n" target="Б"/>
+      <replace source="\\'c2\r\n" target="В"/>
+      <replace source="\\'c3\r\n" target="Г"/>
+      <replace source="\\'c4\r\n" target="Д"/>
+      <replace source="\\'c5\r\n" target="Е"/>
+      <replace source="\\'c6\r\n" target="Ж"/>
+      <replace source="\\'c7\r\n" target="З"/>
+      <replace source="\\'c8\r\n" target="И"/>
+      <replace source="\\'c9\r\n" target="Й"/>
+      <replace source="\\'ca\r\n" target="К"/>
+      <replace source="\\'cb\r\n" target="Л"/>
+      <replace source="\\'cc\r\n" target="М"/>
+      <replace source="\\'cd\r\n" target="Н"/>
+      <replace source="\\'ce\r\n" target="О"/>
+      <replace source="\\'cf\r\n" target="П"/>
+      <replace source="\\'d0\r\n" target="Р"/>
+      <replace source="\\'d1\r\n" target="С"/>
+      <replace source="\\'d2\r\n" target="Т"/>
+      <replace source="\\'d3\r\n" target="У"/>
+      <replace source="\\'d4\r\n" target="Ф"/>
+      <replace source="\\'d5\r\n" target="Х"/>
+      <replace source="\\'d6\r\n" target="Ц"/>
+      <replace source="\\'d7\r\n" target="Ч"/>
+      <replace source="\\'d8\r\n" target="Ш"/>
+      <replace source="\\'d9\r\n" target="Щ"/>
+      <replace source="\\'da\r\n" target="Ъ"/>
+      <replace source="\\'db\r\n" target="Ы"/>
+      <replace source="\\'dc\r\n" target="Ь"/>
+      <replace source="\\'dd\r\n" target="Э"/>
+      <replace source="\\'de\r\n" target="Ю"/>
+      <replace source="\\'df\r\n" target="Я"/>
+      <replace source="\\'e0\r\n" target="а"/>
+      <replace source="\\'e1\r\n" target="б"/>
+      <replace source="\\'e2\r\n" target="в"/>
+      <replace source="\\'e3\r\n" target="г"/>
+      <replace source="\\'e4\r\n" target="д"/>
+      <replace source="\\'e5\r\n" target="е"/>
+      <replace source="\\'e6\r\n" target="ж"/>
+      <replace source="\\'e7\r\n" target="з"/>
+      <replace source="\\'e8\r\n" target="и"/>
+      <replace source="\\'e9\r\n" target="й"/>
+      <replace source="\\'ea\r\n" target="к"/>
+      <replace source="\\'eb\r\n" target="л"/>
+      <replace source="\\'ec\r\n" target="м"/>
+      <replace source="\\'ed\r\n" target="н"/>
+      <replace source="\\'ee\r\n" target="о"/>
+      <replace source="\\'ef\r\n" target="п"/>
+      <replace source="\\'f0\r\n" target="р"/>
+      <replace source="\\'f1\r\n" target="с"/>
+      <replace source="\\'f2\r\n" target="т"/>
+      <replace source="\\'f3\r\n" target="у"/>
+      <replace source="\\'f4\r\n" target="ф"/>
+      <replace source="\\'f5\r\n" target="х"/>
+      <replace source="\\'f6\r\n" target="ц"/>
+      <replace source="\\'f7\r\n" target="ч"/>
+      <replace source="\\'f8\r\n" target="ш"/>
+      <replace source="\\'f9\r\n" target="щ"/>
+      <replace source="\\'fa\r\n" target="ъ"/>
+      <replace source="\\'fb\r\n" target="ы"/>
+      <replace source="\\'fc\r\n" target="ь"/>
+      <replace source="\\'fd\r\n" target="э"/>
+      <replace source="\\'fe\r\n" target="ю"/>
+      <replace source="\\'ff\r\n" target="я"/>
+      <!-- The same characters followed by a \n: -->
+      <replace source="\\'80\n" target="Ђ"/>
+      <replace source="\\'81\n" target="Ѓ"/>
+      <replace source="\\'83\n" target="ѓ"/>
+      <replace source="\\'8a\n" target="Љ"/>
+      <replace source="\\'8c\n" target="Њ"/>
+      <replace source="\\'8d\n" target="Ќ"/>
+      <replace source="\\'8e\n" target="Ћ"/>
+      <replace source="\\'8f\n" target="Џ"/>
+      <replace source="\\'90\n" target="ђ"/>
+      <replace source="\\'9a\n" target="љ"/>
+      <replace source="\\'9c\n" target="њ"/>
+      <replace source="\\'9d\n" target="ќ"/>
+      <replace source="\\'9e\n" target="ћ"/>
+      <replace source="\\'9f\n" target="џ"/>
+      <replace source="\\'a1\n" target="Ў"/>
+      <replace source="\\'a2\n" target="ў"/>
+      <replace source="\\'a3\n" target="Ј"/>
+      <replace source="\\'a5\n" target="Ґ"/>
+      <replace source="\\'a8\n" target="Ё"/>
+      <replace source="\\'aa\n" target="Є"/>
+      <replace source="\\'af\n" target="Ї"/>
+      <replace source="\\'b3\n" target="і"/>
+      <replace source="\\'b4\n" target="ґ"/>
+      <replace source="\\'b5\n" target="µ"/>
+      <replace source="\\'b8\n" target="ё"/>
+      <replace source="\\'ba\n" target="є"/>
+      <replace source="\\'bc\n" target="ј"/>
+      <replace source="\\'bd\n" target="Ѕ"/>
+      <replace source="\\'be\n" target="ѕ"/>
+      <replace source="\\'bf\n" target="ї"/>
+      <replace source="\\'c0\n" target="А"/>
+      <replace source="\\'c1\n" target="Б"/>
+      <replace source="\\'c2\n" target="В"/>
+      <replace source="\\'c3\n" target="Г"/>
+      <replace source="\\'c4\n" target="Д"/>
+      <replace source="\\'c5\n" target="Е"/>
+      <replace source="\\'c6\n" target="Ж"/>
+      <replace source="\\'c7\n" target="З"/>
+      <replace source="\\'c8\n" target="И"/>
+      <replace source="\\'c9\n" target="Й"/>
+      <replace source="\\'ca\n" target="К"/>
+      <replace source="\\'cb\n" target="Л"/>
+      <replace source="\\'cc\n" target="М"/>
+      <replace source="\\'cd\n" target="Н"/>
+      <replace source="\\'ce\n" target="О"/>
+      <replace source="\\'cf\n" target="П"/>
+      <replace source="\\'d0\n" target="Р"/>
+      <replace source="\\'d1\n" target="С"/>
+      <replace source="\\'d2\n" target="Т"/>
+      <replace source="\\'d3\n" target="У"/>
+      <replace source="\\'d4\n" target="Ф"/>
+      <replace source="\\'d5\n" target="Х"/>
+      <replace source="\\'d6\n" target="Ц"/>
+      <replace source="\\'d7\n" target="Ч"/>
+      <replace source="\\'d8\n" target="Ш"/>
+      <replace source="\\'d9\n" target="Щ"/>
+      <replace source="\\'da\n" target="Ъ"/>
+      <replace source="\\'db\n" target="Ы"/>
+      <replace source="\\'dc\n" target="Ь"/>
+      <replace source="\\'dd\n" target="Э"/>
+      <replace source="\\'de\n" target="Ю"/>
+      <replace source="\\'df\n" target="Я"/>
+      <replace source="\\'e0\n" target="а"/>
+      <replace source="\\'e1\n" target="б"/>
+      <replace source="\\'e2\n" target="в"/>
+      <replace source="\\'e3\n" target="г"/>
+      <replace source="\\'e4\n" target="д"/>
+      <replace source="\\'e5\n" target="е"/>
+      <replace source="\\'e6\n" target="ж"/>
+      <replace source="\\'e7\n" target="з"/>
+      <replace source="\\'e8\n" target="и"/>
+      <replace source="\\'e9\n" target="й"/>
+      <replace source="\\'ea\n" target="к"/>
+      <replace source="\\'eb\n" target="л"/>
+      <replace source="\\'ec\n" target="м"/>
+      <replace source="\\'ed\n" target="н"/>
+      <replace source="\\'ee\n" target="о"/>
+      <replace source="\\'ef\n" target="п"/>
+      <replace source="\\'f0\n" target="р"/>
+      <replace source="\\'f1\n" target="с"/>
+      <replace source="\\'f2\n" target="т"/>
+      <replace source="\\'f3\n" target="у"/>
+      <replace source="\\'f4\n" target="ф"/>
+      <replace source="\\'f5\n" target="х"/>
+      <replace source="\\'f6\n" target="ц"/>
+      <replace source="\\'f7\n" target="ч"/>
+      <replace source="\\'f8\n" target="ш"/>
+      <replace source="\\'f9\n" target="щ"/>
+      <replace source="\\'fa\n" target="ъ"/>
+      <replace source="\\'fb\n" target="ы"/>
+      <replace source="\\'fc\n" target="ь"/>
+      <replace source="\\'fd\n" target="э"/>
+      <replace source="\\'fe\n" target="ю"/>
+      <replace source="\\'ff\n" target="я"/>
+      <!-- The same characters followed by a \r: -->
+      <replace source="\\'80\r" target="Ђ"/>
+      <replace source="\\'81\r" target="Ѓ"/>
+      <replace source="\\'83\r" target="ѓ"/>
+      <replace source="\\'8a\r" target="Љ"/>
+      <replace source="\\'8c\r" target="Њ"/>
+      <replace source="\\'8d\r" target="Ќ"/>
+      <replace source="\\'8e\r" target="Ћ"/>
+      <replace source="\\'8f\r" target="Џ"/>
+      <replace source="\\'90\r" target="ђ"/>
+      <replace source="\\'9a\r" target="љ"/>
+      <replace source="\\'9c\r" target="њ"/>
+      <replace source="\\'9d\r" target="ќ"/>
+      <replace source="\\'9e\r" target="ћ"/>
+      <replace source="\\'9f\r" target="џ"/>
+      <replace source="\\'a1\r" target="Ў"/>
+      <replace source="\\'a2\r" target="ў"/>
+      <replace source="\\'a3\r" target="Ј"/>
+      <replace source="\\'a5\r" target="Ґ"/>
+      <replace source="\\'a8\r" target="Ё"/>
+      <replace source="\\'aa\r" target="Є"/>
+      <replace source="\\'af\r" target="Ї"/>
+      <replace source="\\'b3\r" target="і"/>
+      <replace source="\\'b4\r" target="ґ"/>
+      <replace source="\\'b5\r" target="µ"/>
+      <replace source="\\'b8\r" target="ё"/>
+      <replace source="\\'ba\r" target="є"/>
+      <replace source="\\'bc\r" target="ј"/>
+      <replace source="\\'bd\r" target="Ѕ"/>
+      <replace source="\\'be\r" target="ѕ"/>
+      <replace source="\\'bf\r" target="ї"/>
+      <replace source="\\'c0\r" target="А"/>
+      <replace source="\\'c1\r" target="Б"/>
+      <replace source="\\'c2\r" target="В"/>
+      <replace source="\\'c3\r" target="Г"/>
+      <replace source="\\'c4\r" target="Д"/>
+      <replace source="\\'c5\r" target="Е"/>
+      <replace source="\\'c6\r" target="Ж"/>
+      <replace source="\\'c7\r" target="З"/>
+      <replace source="\\'c8\r" target="И"/>
+      <replace source="\\'c9\r" target="Й"/>
+      <replace source="\\'ca\r" target="К"/>
+      <replace source="\\'cb\r" target="Л"/>
+      <replace source="\\'cc\r" target="М"/>
+      <replace source="\\'cd\r" target="Н"/>
+      <replace source="\\'ce\r" target="О"/>
+      <replace source="\\'cf\r" target="П"/>
+      <replace source="\\'d0\r" target="Р"/>
+      <replace source="\\'d1\r" target="С"/>
+      <replace source="\\'d2\r" target="Т"/>
+      <replace source="\\'d3\r" target="У"/>
+      <replace source="\\'d4\r" target="Ф"/>
+      <replace source="\\'d5\r" target="Х"/>
+      <replace source="\\'d6\r" target="Ц"/>
+      <replace source="\\'d7\r" target="Ч"/>
+      <replace source="\\'d8\r" target="Ш"/>
+      <replace source="\\'d9\r" target="Щ"/>
+      <replace source="\\'da\r" target="Ъ"/>
+      <replace source="\\'db\r" target="Ы"/>
+      <replace source="\\'dc\r" target="Ь"/>
+      <replace source="\\'dd\r" target="Э"/>
+      <replace source="\\'de\r" target="Ю"/>
+      <replace source="\\'df\r" target="Я"/>
+      <replace source="\\'e0\r" target="а"/>
+      <replace source="\\'e1\r" target="б"/>
+      <replace source="\\'e2\r" target="в"/>
+      <replace source="\\'e3\r" target="г"/>
+      <replace source="\\'e4\r" target="д"/>
+      <replace source="\\'e5\r" target="е"/>
+      <replace source="\\'e6\r" target="ж"/>
+      <replace source="\\'e7\r" target="з"/>
+      <replace source="\\'e8\r" target="и"/>
+      <replace source="\\'e9\r" target="й"/>
+      <replace source="\\'ea\r" target="к"/>
+      <replace source="\\'eb\r" target="л"/>
+      <replace source="\\'ec\r" target="м"/>
+      <replace source="\\'ed\r" target="н"/>
+      <replace source="\\'ee\r" target="о"/>
+      <replace source="\\'ef\r" target="п"/>
+      <replace source="\\'f0\r" target="р"/>
+      <replace source="\\'f1\r" target="с"/>
+      <replace source="\\'f2\r" target="т"/>
+      <replace source="\\'f3\r" target="у"/>
+      <replace source="\\'f4\r" target="ф"/>
+      <replace source="\\'f5\r" target="х"/>
+      <replace source="\\'f6\r" target="ц"/>
+      <replace source="\\'f7\r" target="ч"/>
+      <replace source="\\'f8\r" target="ш"/>
+      <replace source="\\'f9\r" target="щ"/>
+      <replace source="\\'fa\r" target="ъ"/>
+      <replace source="\\'fb\r" target="ы"/>
+      <replace source="\\'fc\r" target="ь"/>
+      <replace source="\\'fd\r" target="э"/>
+      <replace source="\\'fe\r" target="ю"/>
+      <replace source="\\'ff\r" target="я"/>
+    </replacement-rule>
+  </rules>
+
+</format>
Index: branches/apertium-tagger/apertium2/apertium/apertium-lextor.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-lextor.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-lextor.1	(revision 69632)
@@ -0,0 +1,132 @@
+.TH apertium-lextor 1 2006-12-12 "" ""
+.SH NAME
+apertium-lextor \- This application is part of
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-lextor
+.B \-\-trainwrd\fR stopwords words n left right corpus model
+[
+.B \-\-weightexp\fR w
+]
+[
+.B \-\-debug\fR
+]
+.PP
+.B apertium\-lextor
+.B \-\-trainlch\fR stopwords lexchoices n left right corpus wordmodel dic bildic model
+[
+.B \-\-weightexp\fR w
+]
+[
+.B \-\-debug\fR
+]
+.PP
+.B apertium\-lextor
+.B \-\-lextor\fR model dic left right
+[
+.B \-\-debug\fR
+]
+[
+.B \-\-weightexp\fR w
+]
+.PP
+.SH DESCRIPTION
+.BR apertium\-lextor 
+is the application responsible for training and usage of the \fIlexical
+selector module\fR.
+.SH OPTIONS
+.PP
+.B \-\-trainwrd | \-t\fR
+.br
+Train word co-occurrences model. It needs the following required parameters:
+.TP
+.I stopwords\fR file containing a list of stop words. Stop words are ignored.
+.TP
+.I words\fR file containing a list of words. For each word a co-occurrence model is built.
+.TP
+.I n\fR number of words per co\-occurrence model (for each model, the \fIn\fR most frequent words).
+.TP
+.I left\fR left\-side context to take into account (number of words).
+.TP
+.I right\fR right\-side context to take into account (number of words).
+.TP
+.I corpus\fR file containing the training corpus.
+.TP
+.I model\fR output file on which the co\-occurrence models are saved.
+.PP
+.B \-\-trainlch | \-r\fR
+.br
+Train lexical choices co\-occurrence models using a target language
+co\-occurrence model and a bilingual dictionary. It needs the
+following required parameters:
+.TP
+.I stopwords\fR file containing a list of stop words. Stop words are ignored.
+.TP
+.I lexchoices\fR file containing a list of lexical choices. For each lexical choice a co\-occurrence model is built.
+.TP
+.I n\fR number of words per co\-occurrence model (for each model, the n most frequent words).
+.TP
+.I left\fR left\-side context to take into account (number of words).
+.TP
+.I right\fR right\-side context to take into account (number of words).
+.TP
+.I corpus\fR file containing the training corpus.
+.TP 
+.I wordmodel\fR target\-language word co\-occurrence model (previously trained by means of the \fB\-\-trainwrd\fR option).
+.TP
+.I dic\fR the lexical-selection dictionary (binary format).
+.TP
+.I bildic\fR the bilingual dictionary (binary format).
+.TP
+.I model\fR output file on which the co\-occurrence models are saved.
+.PP
+.PP
+.B \-\-lextor | \-l
+.br
+Perform the lexical selection on the input stream. It needs the
+following required parameters:
+.TP
+.I model\fR  file containing the model to be used for the lexical selection.
+.TP
+.I dic\fR lexical\-selection dictionary (binary format).
+.TP
+.I left\fR left\-side context to take into account (number of words).
+.TP
+.I right\fR right\-side context to take into account (number of words).
+.PP
+.B \-\-weightexp w
+.br 
+Specify a weight value to change the influence of surrounding words
+while training or performing the lexical selection. The parameter
+\fIw\fR must be a positive value.
+.PP
+.B \-\-debug | \-d
+.br 
+Show debug information while working.
+.PP
+.B \-\-help | \-h
+.br
+Shows this help.
+.PP
+.B \-\-version | \-v
+.br
+Shows license information.
+.PP
+.SH SEE ALSO
+.I apertium\-gen\-lextorbil\fR(1),
+.I apertium\-preprocess\-corpus\-lextor\fR(1),
+.I apertium\-gen\-stopwords\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\-translation\fR(1),
+.I apertium\-lextor\-eval\fR(1),
+.I apertium\-gen\-lextormono\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+reserved.
Index: branches/apertium-tagger/apertium2/apertium/apertium-gen-lextorbil.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-gen-lextorbil.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-gen-lextorbil.1	(revision 69632)
@@ -0,0 +1,48 @@
+.TH apertium-gen-lextorbil 1 2006-12-11 "" ""
+.SH NAME
+apertium-gen-lextorbil \- This application is part of
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-gen\-lextorbil
+.I lr\fR\ |
+.I rl\fR
+input_file output_file
+.PP
+.SH DESCRIPTION
+.BR apertium\-gen\-lextorbil 
+is the application responsible for generating the bilingual dictionary
+used by the transfer module when apertium\-lextor is being used to
+perform lexical selection.
+.SH OPTIONS
+.TP
+.B lr\fR The bilingual dictionary to generate is for left to right translation.
+.PP
+.B rl\fR The bilingual dictionary to generate is for right to left translation.
+.SH FILES
+These are the kinds of files used with this tool:
+.PP
+.B input_file
+A bilingual dictionary.
+.PP
+.B output_file
+A bilingual dictionary in which each word has \fIonly\fR one
+translation equivalent.
+.PP
+.SH SEE ALSO
+.I apertium\-gen\-lextormono\fR(1),
+.I apertium\-preprocess\-corpus\-lextor\fR(1),
+.I apertium\-gen\-stopwords\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\-translation\fR(1),
+.I apertium\-lextor\fR(1),
+.I apertium\-lextor\-eval\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+reserved.
Index: branches/apertium-tagger/apertium2/apertium/apertium-gen-lextormono.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-gen-lextormono.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-gen-lextormono.1	(revision 69632)
@@ -0,0 +1,48 @@
+.TH apertium-gen-lextormono 1 2006-12-11 "" ""
+.SH NAME
+apertium-gen-lextormono \- This application is part of
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-gen\-lextormono
+.I lr\fR\ |
+.I rl\fR
+input_file output_file
+.PP
+.SH DESCRIPTION
+.BR apertium\-gen\-lextormono 
+is the application responsible for generating the monolingual
+dictionary used by the lexical selection module to know about the
+translation sense marks of each source language word.
+.SH OPTIONS
+.TP
+.B lr\fR The monolingual dictionary to generate is for left to right translation.
+.PP
+.B rl\fR The monolingual dictionary to generate is for right to left translation.
+.SH FILES
+These are the kinds of files used with this tool:
+.PP
+.B input_file
+A bilingual dictionary.
+.PP
+.B output_file
+A monolingual dictionary that for each word gives its translation
+sense marks.
+.PP
+.SH SEE ALSO
+.I apertium\-gen\-lextorbil\fR(1),
+.I apertium\-preprocess\-corpus\-lextor\fR(1),
+.I apertium\-gen\-stopwords\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\-translation\fR(1),
+.I apertium\-lextor\-eval\fR(1),
+.I apertium\-lextor\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+reserved.
Index: branches/apertium-tagger/apertium2/apertium/apertium-gen-modes.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-gen-modes.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-gen-modes.1	(revision 69632)
@@ -0,0 +1,43 @@
+.TH apertium\-gen\-modes 1 2007-03-11 "" ""
+.SH NAME
+apertium\-gen\-modes \- This application is part of
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-gen\-modes
+modes-file [package name]
+.SH DESCRIPTION
+This is an intermediate tool from Apertium level 2 engine. You should
+never have to use it independently.
+.PP
+It creates a series of \fI.mode\fR files from a \fImodes-file\fR 
+these files are then put into the current directory if they are marked
+as \fIinstall\fR, and for modes that are not to be installed, for 
+example debugging modes, these are put into a \fImodes\fR directory 
+inside the linguistic data. 
+.SH OPTIONS
+If a package name is specified, it creates the modes with the apertium
+install prefix plus the package name. If you wish to install the modes, 
+you should specify the package name. If you don't want to install the modes,
+leave it off.
+.SH FILES
+.B modes-file
+A XML file that tells \fBapertium\-gen\-modes\fR which scripts must
+be created in the directory \fImodes\fR.	
+.SH SEE ALSO
+.I apertium\-interchunk\fR(1),
+.I apertium\-postchunk\fR(1),
+.I apertium\-validate\-interchunk\fR(1),
+.I apertium\-validate\-modes\fR(1),
+.I apertium\-validate\-postchunk\fR(1).
+.SH BUGS
+Lots of them...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005-2007 Universitat d'Alacant / Universidad de
+Alicante. This is free software.  You may
+redistribute copies of it under the terms of the GNU General Public
+License <http://www.gnu.org/licenses/gpl.html>.
Index: branches/apertium-tagger/apertium2/apertium/apertium-gen-stopwords-lextor.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-gen-stopwords-lextor.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-gen-stopwords-lextor.1	(revision 69632)
@@ -0,0 +1,45 @@
+.TH apertium-gen-stopwords-lextor 1 2006-12-12 "" ""
+.SH NAME
+apertium-gen-stopwords-lextor \- This application is part of
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-gen\-stopwords\-lextor
+n input_file output_file
+.PP
+.SH DESCRIPTION
+.BR apertium\-gen\-stopwords\-lextor 
+is the application responsible for generating the list of
+\fIstopwords\fR used by the lexical selection module
+(apertium\-lextor). Stopwords are ignored as they cannot have multiple
+translations.
+.SH OPTIONS
+.B n
+the desired number of stopwords.
+.SH FILES
+These are the kinds of parameters and files used with this tool:
+.PP
+.B input_file
+contains a large preprocessed corpus (see
+apertium\-preprocess\-corpus\-lextor).
+.PP
+.B output_file
+The file which gets the generated stopwords.
+.PP
+.SH SEE ALSO
+.I apertium\-gen\-lextorbil\fR(1),
+.I apertium\-gen\-lextormono\fR(1),
+.I apertium\-preprocess\-corpus\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\-translation\fR(1),
+.I apertium\-lextor\-eval\fR(1),
+.I apertium\-lextor\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+reserved.
Index: branches/apertium-tagger/apertium2/apertium/apertium-gen-wlist-lextor-translation.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-gen-wlist-lextor-translation.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-gen-wlist-lextor-translation.1	(revision 69632)
@@ -0,0 +1,56 @@
+.TH apertium-gen-wlist-lextor-translation 1 2006-12-12 "" ""
+.SH NAME
+apertium-gen-wlist-lextor-translation \- This application is part of
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-gen\-wlist\-lextor\-translation
+.B \-\-mono|\-m\fR dic.bin 
+.B \-\-bil|\-b\fR bildic.bin 
+.B \-\-wlist|\-w\fR wlistfile
+.PP
+.SH DESCRIPTION
+.BR apertium\-gen\-wlist\-lextor\-translation 
+is the application responsible for generating all the possible
+translations of polysemous words.
+.SH OPTIONS
+.TP
+.B \-\-mono|\-m\fR dic.bin
+.TP
+ Specifies the monolingual lexical selection dictionary to use (see apertium\-gen\-lextormono).
+.TP
+.B \-\-bil|\-b\fR bildic.bin
+.TP
+Specifies the bilingual lexical selection ditionary to use (see apertium\-gen\-lextorbil).
+.TP 
+.B \-\-wlist|-w\fR wlistfile
+.TP
+Specifies the list of words to translate (see apertium\-gen\-wlist\-lextor).
+.TP 
+.B \-\-help|\-h\fR
+.TP
+Shows a brief usage help.
+.TP 
+.B \-\-version|\-v\fR
+.TP
+Shows the version string of this tool and it's license.
+.SH FILES
+This tool uses no files apart from the ones associated to each option.
+.PP
+.SH SEE ALSO
+.I apertium\-gen\-lextorbil\fR(1),
+.I apertium\-preprocess\-corpus\-lextor\fR(1),
+.I apertium\-gen\-stopwords\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\fR(1),
+.I apertium\-gen\-lextormono\fR(1),
+.I apertium\-lextor\-eval\fR(1),
+.I apertium\-lextor\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+reserved.
Index: branches/apertium-tagger/apertium2/apertium/apertium-gen-wlist-lextor.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-gen-wlist-lextor.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-gen-wlist-lextor.1	(revision 69632)
@@ -0,0 +1,42 @@
+.TH apertium-gen-wlist-lextor 1 2006-12-12 "" ""
+.SH NAME
+apertium-gen-wlist-lextor \- This application is part of
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-gen\-wlist\-lextor
+input_file output_file
+.PP
+.SH DESCRIPTION
+.BR apertium\-gen\-wlist\-lextor 
+is the application responsible for generating the list of words used by
+apertium\-lextor.
+.SH OPTIONS
+This tool currently has no options.
+.SH FILES
+These are the kinds of files used with this tool:
+.PP
+.B input_file
+is a lextor monolingual dictionary file generated with
+\fIapertium\-gen\-lextormono\fR. These files usually have the extension \fI.dix\fR.
+.PP
+.B output_file
+The file which gets the generated list of words.
+.PP
+.SH SEE ALSO
+.I apertium\-gen\-lextorbil\fR(1),
+.I apertium\-gen\-lextormono\fR(1),
+.I apertium\-preprocess\-corpus\-lextor\fR(1),
+.I apertium\-gen\-stopwords\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\-translation\fR(1),
+.I apertium\-lextor\-eval\fR(1),
+.I apertium\-lextor\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+reserved.
Index: branches/apertium-tagger/apertium2/apertium/apertium-lextor-eval.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-lextor-eval.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-lextor-eval.1	(revision 69632)
@@ -0,0 +1,62 @@
+.TH apertium-lextor-eval 1 2006-12-12 "" ""
+.SH NAME
+apertium-lextor-eval \- This application is part of
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-lextor\-eval
+.B \-\-reference\fR reftext 
+.B \-\-parameters\fR | \fB\-p\fR model dic left right
+.PP
+.SH DESCRIPTION
+.BR apertium\-lextor\-eval 
+is the application used to evaluate the performance of lexical
+selection models that have been previously estimated with
+\fBapertium\-lextor\fR(1). To achieve this purpose a manually
+disambiguated corpus is used. This corpus is read from the standard
+input and must be in the intermediate format used by apertium.
+.SH OPTIONS
+.TP
+.B \-\-reference | \-r\fR
+.br
+Specifies the reference corpus used for evaluation (one word per line
+with the correct translation sense for those words with more than
+one).
+.TP
+.B \-\-parameters | \-p\fR
+.br
+It allows to specify the following required parameters:
+.TP
+.I model\fR the file containing the model to be used for the lexical selection.
+.TP
+.I dic\fR  the lexical\-selection dictionary in binary format.
+.TP
+.I left\fR left\-side context to take into account (number of words).
+.TP
+.I right\fR right\-side context to take into account (number of words).
+.PP
+.B \-\-help | \-h
+.br
+Shows this help.
+.PP
+.B \-\-version | \-v
+.br
+Shows license information.
+.PP
+.SH SEE ALSO
+.I apertium\-gen\-lextorbil\fR(1),
+.I apertium\-preprocess\-corpus\-lextor\fR(1),
+.I apertium\-gen\-stopwords\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\-translation\fR(1),
+.I apertium\-lextor\-mono\fR(1),
+.I apertium\-lextor\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+reserved.
Index: branches/apertium-tagger/apertium2/apertium/apertium-preprocess-corpus-lextor.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-preprocess-corpus-lextor.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-preprocess-corpus-lextor.1	(revision 69632)
@@ -0,0 +1,48 @@
+.TH apertium-preprocess-corpus-lextor 1 2006-12-12 "" ""
+.SH NAME
+apertium-preprocess-corpus-lextor \- This application is part of
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium machine translation
+architecture: \fBhttp://apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-preprocess\-corpus\-lextor
+data_dir translation_dir input_file output_file
+.PP
+.SH DESCRIPTION
+.BR apertium\-preprocess\-corpus\-lextor 
+is the application responsible for preprocessing the training corpus
+for the lexical selector training.
+.SH OPTIONS
+This tool currently has no options.
+.SH FILES
+These are the kinds of files and directories used with this tool:
+.PP
+.B data_dir
+the path to the linguistic data to use.
+.PP
+.B translation_dir
+the translation direction to use.
+.PP
+.B input_file
+contains a large corpus in 
+.I raw\fR format.
+.PP
+.B output_file
+The file which gets the preprocessed corpus.
+.PP
+.SH SEE ALSO
+.I apertium\-gen\-lextorbil\fR(1),
+.I apertium\-gen\-lextormono\fR(1),
+.I apertium\-gen\-lextor\-eval\fR(1),
+.I apertium\-gen\-stopwords\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\fR(1),
+.I apertium\-gen\-wlist\-lextor\-translation\fR(1),
+.I apertium\-lextor\fR(1).
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+(c) 2005,2006 Universitat d'Alacant / Universidad de Alicante. All rights
+reserved.
Index: branches/apertium-tagger/apertium2/apertium/apertium-validate-acx.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-validate-acx.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-validate-acx.1	(revision 69632)
@@ -0,0 +1,40 @@
+.TH apertium\-validate\-acx 1 2006\-03\-11 "" ""
+.SH NAME
+apertium\-validate\-acx \- This application is part of 
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium open\-source machine translation toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-validate\-acx 
+<input_file>
+.SH DESCRIPTION
+This is an intermediate tool from Apertium level 2 engine. You should
+never have to use it independently.
+.PP
+It is a script that validates a set of structural acx rules
+against the apertium structural acx rules RNG using the
+\fBxmllint\fR utility.
+.SH OPTIONS
+It has no options.
+.SH FILES
+.B input_file
+A \fIacx.xml\fR file
+.PP
+.B acx.rng
+The DTD used to validate the input file.
+.SH SEE ALSO
+.I apertium\-gen\-modes\fR(1),
+.I apertium\-gen\-oldbil\fR(1),
+.I apertium\-interchunk\fR(1),
+.I apertium\-validate\-modes\fR(1),
+.I apertium\-validate\-interchunk\fR(1),
+.I apertium\-validate\-postchunk\fR(1).
+.SH BUGS
+Lots of them...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005-2007 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-validate-dictionary.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-validate-dictionary.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-validate-dictionary.1	(revision 69632)
@@ -0,0 +1,23 @@
+.TH apertium-validate-dictionary 1 2006-03-21 "" ""
+.SH NAME
+apertium-validate-dictionary \- This application is part of 
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-validate-dictionary <input_file>
+.SH DESCRIPTION
+.BR apertium-validate-dictionary
+is a script that validates a dictionary file against
+the apertium DTD file for dictionaries using the xmllint utility.
+
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-validate-interchunk.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-validate-interchunk.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-validate-interchunk.1	(revision 69632)
@@ -0,0 +1,34 @@
+.TH apertium\-validate\-interchunk 1 2006\-03\-11 "" ""
+.SH NAME
+apertium\-validate\-interchunk \- This application is part of 
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium open\-source machine translation toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-validate\-interchunk 
+<input_file>
+.SH DESCRIPTION
+This is an intermediate tool from Apertium level 2 engine. You should
+never have to use it independently.
+.PP
+It is a script that validates a set of structural interchunk rules
+against the apertium structural interchunk rules DTD using the
+\fBxmllint\fR utility.
+.SH OPTIONS
+It has no options.
+.SH FILES
+.B input_file
+A \fIinterchunk.xml\fR file
+.PP
+.B modes.dtd
+The DTD used to validate the input file.
+.PP
+.SH BUGS
+Lots of them...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005-2007 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-validate-modes.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-validate-modes.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-validate-modes.1	(revision 69632)
@@ -0,0 +1,39 @@
+.TH apertium\-validate\-modes 1 2006\-03\-11 "" ""
+.SH NAME
+apertium\-validate\-modes \- This application is part of 
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium open\-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-validate\-modes 
+<input_file>
+.SH DESCRIPTION
+This is an intermediate tool from Apertium level 2 engine. You should
+never have to use it independently.
+.PP
+It is a script that validates a \fImodes.xml\fR file against the
+apertium structural modes DTD using the \fBxmllint\fR utility.
+.SH OPTIONS
+It has no options.
+.SH FILES
+.B input_file
+A \fImodes.xml\fR file
+.PP
+.B modes.dtd
+The DTD used to validate the input file.
+.SH SEE ALSO
+.I apertium\-gen\-modes\fR(1),
+.I apertium\-gen\-oldbil\fR(1),
+.I apertium\-interchunk\fR(1),
+.I apertium\-validate\-postchunk\fR(1),
+.I apertium\-validate\-interchunk\fR(1).
+.SH BUGS
+Lots of them...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005-2007 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-validate-postchunk.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-validate-postchunk.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-validate-postchunk.1	(revision 69632)
@@ -0,0 +1,39 @@
+.TH apertium\-validate\-postchunk 1 2006\-03\-11 "" ""
+.SH NAME
+apertium\-validate\-postchunk \- This application is part of 
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium open\-source machine translation toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium\-validate\-postchunk 
+<input_file>
+.SH DESCRIPTION
+This is an intermediate tool from Apertium level 2 engine. You should
+never have to use it independently.
+.PP
+It is a script that validates a set of structural postchunk rules
+against the apertium structural postchunk rules DTD using the
+\fBxmllint\fR utility.
+.SH OPTIONS
+It has no options.
+.SH FILES
+.B input_file
+A \fIpostchunk.xml\fR file
+.PP
+.B postchunk.dtd
+The DTD used to validate the input file.
+.SH SEE ALSO
+.I apertium\-gen\-modes\fR(1),
+.I apertium\-gen\-oldbil\fR(1),
+.I apertium\-interchunk\fR(1),
+.I apertium\-validate\-modes\fR(1),
+.I apertium\-validate\-interchunk\fR(1).
+.SH BUGS
+Lots of them...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005-2007 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-validate-tagger.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-validate-tagger.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-validate-tagger.1	(revision 69632)
@@ -0,0 +1,25 @@
+.TH apertium-validate-tagger 1 2006-03-21 "" ""
+.SH NAME
+apertium-validate-tagger \- This application is part of 
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium open-source machine translation
+toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-validate-tagger <input_file>
+.SH DESCRIPTION
+.BR apertium-validate-tagger
+is a script that checks the validity of a set of rules which 
+enforce the state to state transition probabilities used by the 
+part-of-speech tagger. The script uses xmllint to validate
+against a DTD.
+
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/apertium-validate-transfer.1
===================================================================
--- branches/apertium-tagger/apertium2/apertium/apertium-validate-transfer.1	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/apertium-validate-transfer.1	(revision 69632)
@@ -0,0 +1,22 @@
+.TH apertium-validate-transfer 1 2006-03-21 "" ""
+.SH NAME
+apertium-validate-transfer \- This application is part of 
+(
+.B apertium
+)
+.PP
+This tool is part of the apertium open-source machine translation toolbox: \fBhttp://www.apertium.org\fR.
+.SH SYNOPSIS
+.B apertium-validate-transfer <input_file>
+.SH DESCRIPTION
+.BR apertium-validate-transfer
+is a script that validates a set of structural transfer rules against
+the apertium structural transfer rules DTD using the xmllint utility.
+
+.SH BUGS
+Lots of...lurking in the dark and waiting for you!
+.SH AUTHOR
+Copyright (c) 2005, 2006 Universitat d'Alacant / Universidad de Alicante.
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+
Index: branches/apertium-tagger/apertium2/apertium/deformat-header.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/deformat-header.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/deformat-header.sh	(revision 69632)
@@ -0,0 +1,51 @@
+if [ $# != 2 ]
+then if [ $# != 3 ]
+     then echo "USAGE: $(basename $0) -[aAmM] <input_file> <output_file>";
+          echo "  -a: apertium standard mode";
+          echo "  -A: apertium optimized mode (default mode)";
+          echo "  -m: matxin standard mode";
+          echo "  -M: matxin optimized mode"; 
+          exit 1;
+     elif [ $1 != "-a" ] && [ $1 != "-A" ] && [ $1 != "-m" ] && [ $1 != "-M" ]
+     then echo "USAGE: $(basename $0) -[AaMm] <input file> <output_file>";
+          echo "  -a: apertium standard mode";
+          echo "  -A: apertium optimized mode (default mode)";
+          echo "  -m: matxin standard mode";
+          echo "  -M: matxin optimized mode"; 
+          exit 1;
+     fi
+fi
+
+FLEXOPTS=""
+FILE1=$1;
+FILE2=$2;
+
+if [ $# = 2 ]
+then if [ ! -e $1 ] 
+     then echo "ERROR: '$1' file not found";
+          exit 1;
+     fi 
+fi
+
+MODE="apertium" # default mode
+
+if [ $# = 3 ]
+then if [ ! -e $2 ]
+     then echo "ERROR: '$2' file not found";
+          exit 1;
+     fi
+
+     if [ $1 = "-a" ]
+     then FLEXOPTS="";
+          MODE="apertium";
+     elif [ $1 = "-m" ]
+     then FLEXOPTS="";
+          MODE="matxin";
+     elif [ $1 = "-M" ]
+     then FLEXOPTS="-Cfer";
+          MODE="matxin";
+     fi
+
+     FILE1=$2;
+     FILE2=$3;
+fi
Index: branches/apertium-tagger/apertium2/apertium/gen-header.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/gen-header.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/gen-header.sh	(revision 69632)
@@ -0,0 +1,30 @@
+if [ $# != 2 ]
+then if [ $# != 3 ]
+     then echo "USAGE: $(basename $0) [-O] <input_file> <output_file>";
+          exit 1;
+     elif [ $1 != "-O" ]
+     then echo "USAGE: $(basename $0) [-O] <input file> <output_file>";
+          exit 1;
+     fi
+fi
+
+FLEXOPTS=""
+FILE1=$1;
+FILE2=$2;
+
+if [ $# = 2 ]
+then if [ ! -e $1 ] 
+     then echo "ERROR: '$1' file not found";
+          exit 1;
+     fi 
+fi
+
+if [ $# = 3 ]
+then if [ ! -e $2 ]
+     then echo "ERROR: '$2' file not found";
+          exit 1;
+     fi
+     FLEXOPTS="-Cfer";
+     FILE1=$2;
+     FILE2=$3;
+fi
Index: branches/apertium-tagger/apertium2/apertium/gen-stopwords-lextor.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/gen-stopwords-lextor.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/gen-stopwords-lextor.sh	(revision 69632)
@@ -0,0 +1,45 @@
+
+if [ $# != 3 ]
+then echo "USAGE: $(basename $0) <n> <input_file> <output_file>" 1>&2
+     echo "where <n> is the desired number of stopwords" 1>&2
+     echo "      <input_file> contains a large preprocessed corpus" 1>&2
+     echo "      <output_file> is the file to which the list of stopwords is written" 1>&2
+     exit 1
+fi
+
+N=$1
+INFILE=$2
+OUTFILE=$3
+
+if [ ! -e $INFILE  ]
+then echo "ERROR: '$INFILE' file not found" 1>&2
+     exit 1
+fi
+
+cat $INFILE |\
+sed -re "s/(\^[0-9���������������������������������������������ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz <>�+.,;:_'#*%()?�!�-]+\\$)/\1\n/g" |\
+sed -re "s/^[ \t]+//g" |\
+sed -re "s/[ \t]+$//g" |\
+sed -re "s/^\^//g" |\
+sed -re "s/\\\$$//g" |\
+awk '{if (length($0)>0) print tolower($0)}' |\
+awk '{ #Only lemma and first tag; rest of tags, if present, are ignored
+  if (index($0,">")>0)
+    print substr($0,1,index($0,">"));
+  else
+    print $0;
+}' |\
+sort | uniq -c | sort -n -r |\
+head -n $N |\
+awk 'BEGIN{FS=" "}
+{
+  c="";
+  for(i=2; i<=NF; i++) {
+    if (length(c)>0)
+      c= c " "
+    c = c $i  
+  }
+  print c;
+}' > $OUTFILE
+
+exit 0
Index: branches/apertium-tagger/apertium2/apertium/gen-wlist-lextor-header.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/gen-wlist-lextor-header.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/gen-wlist-lextor-header.sh	(revision 69632)
@@ -0,0 +1,25 @@
+
+if [ $# != 2 ]
+then echo "USAGE: $(basename $0) <input_file> <output_file>" 1>&2
+     echo "where <input_file> is a lextor monolingual dictionary (.dix) file" 1>&2
+     echo "generated with apertium-gen-lextormono" 1>&2
+     exit 1
+fi
+
+if [ ! -e $1 ]
+then echo "ERROR: '$1' file not found" 1>&2
+     exit 1
+fi
+
+
+$LTTOOLBOX_PATH/lt-expand $1 | grep -v "__REGEXP__" |\
+awk 'BEGIN{FS=":"}{if(index($2,"__")>0) print $1}' |\
+sort | uniq > $2 #|\
+#awk '{ #Only lemma and first tag; rest of tags, if present, are ignored
+#  if (index($0,">")>0)
+#    print substr($0,1,index($0,">"));
+#  else
+#    print $0;
+#}' > $2
+
+exit 0
Index: branches/apertium-tagger/apertium2/apertium/preprocess-corpus-lextor.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/preprocess-corpus-lextor.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/preprocess-corpus-lextor.sh	(revision 69632)
@@ -0,0 +1,54 @@
+
+if [ $# != 4 ]
+then echo "USAGE: $(basename $0) <dada_dir> <translation_dir> <input_file> <output_file>" 1>&2
+     echo "where <data_dir> is the path to the linguistic data to use" 1>&2 
+     echo "      <translation_dir> is the translation direction to use" 1>&2
+     echo "      <input_file> contains a large corpus in raw format" 1>&2
+     echo "      <output_file> is the file to which the preprocessed corpus is written" 1>&2
+     exit 1
+fi
+
+DATA_DIR=$1
+TRANSLATION_DIR=$2
+INFILE=$3
+OUTFILE=$4
+
+if [ ! -e $INFILE  ]
+then echo "ERROR: '$INFILE' file not found" 1>&2
+     exit 1
+fi
+
+if [ ! -e $DATA_DIR/$TRANSLATION_DIR.automorf.bin  ]
+then echo "ERROR: '$DATA_DIR/$TRANSLATION_DIR.automorf.bin' file not found" 1>&2
+     exit 1
+fi
+
+if [ ! -e $DATA_DIR/$TRANSLATION_DIR.prob  ]
+then echo "ERROR: '$DATA_DIR/$TRANSLATION_DIR.prob' file not found" 1>&2
+     exit 1
+fi
+
+
+cat $INFILE | $APERTIUM_PATH/apertium-destxt |\
+$LTTOOLBOX_PATH/lt-proc -a $DATA_DIR/$TRANSLATION_DIR.automorf.bin |\
+$APERTIUM_PATH/apertium-tagger -g $DATA_DIR/$TRANSLATION_DIR.prob |\
+$APERTIUM_PATH/apertium-pretransfer |\
+$APERTIUM_PATH/apertium-retxt |\
+awk 'BEGIN{FS="\\$"} #Discards characters not belonging to apertium words
+{
+  c="";
+  for (j=1; j<=NF; j++) {
+    w=$j;
+    w=substr(w,index(w,"^"));
+            
+    if ((length(w)>0) && (index(w,"^")>0)) {                    
+      if (length(c)>0)
+        c = c " ";                                  
+      c = c w "$";
+    }
+  }
+  
+  print c;
+}' >  $OUTFILE
+
+exit 0
Index: branches/apertium-tagger/apertium2/apertium/script_header.sh.cmake_in
===================================================================
--- branches/apertium-tagger/apertium2/apertium/script_header.sh.cmake_in	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/script_header.sh.cmake_in	(revision 69632)
@@ -0,0 +1,7 @@
+#!${BASH}
+
+export APERTIUM_PATH=${APERTIUM_PATH}
+export LTTOOLBOX_PATH=${LTTOOLBOX_PATH}
+export DEFAULT_DIRECTORY=${DEFAULT_DIRECTORY}
+
+${SCRIPT}

Property changes on: branches/apertium-tagger/apertium2/apertium/script_header.sh.cmake_in
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: branches/apertium-tagger/apertium2/apertium/trans-header.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/trans-header.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/trans-header.sh	(revision 69632)
@@ -0,0 +1,107 @@
+case $# in
+  2)
+    DATOS=$1
+    PREFIJO=$2
+    FORMATADOR=txt
+    ;;
+  3)
+    DATOS=$1
+    PREFIJO=$2
+    FORMATADOR=$3
+    ;;
+  4)
+    DATOS=$1
+    PREFIJO=$2
+    FORMATADOR=$3
+    FICHERO=$4
+    ;;
+  5)  
+    DATOS=$1
+    PREFIJO=$2
+    FORMATADOR=$3
+    FICHERO=$4
+    SALIDA=$5
+    ;;
+  *)
+    echo "USAGE: $(basename $0) <datadir> <translation> [format [infile [outfile]]]"
+    echo " datadir          Directory of linguistic data"
+    echo " translation      LANG1-LANG2"
+    echo " format           one of: txt (default), txtu, html, htmlu, rtf, rtfu"
+    echo " infile           input file (stdin by default)"
+    echo " outfile          output file (stdout by default)"
+    exit 1;
+esac
+
+#Par�metros obligatorios
+PREFIJO=$2    #Direcci�n traducci�n Ejm.- es-ca
+FORMATADOR=$3 #Fuente a traducir Ejm.- txt
+
+DATOS=$1
+
+#Parametro opcional, de no estar, lee de la entrada estandar (stdin)
+FICHERO=$4    #Fichero con el texto a traducir
+
+PATH=.:/usr/local/bin:$PATH
+AUTOMORF=$DATOS/$PREFIJO.automorf.bin
+AUTOBIL=$DATOS/$PREFIJO.autobil.bin
+#AUTOBIL=$DATOS/$PREFIJO.lextorbil.bin
+AUTOGEN=$DATOS/$PREFIJO.autogen.bin
+AUTOPGEN=$DATOS/$PREFIJO.autopgen.bin
+
+DEP="dep"
+
+TURL="cat" #No hace nada, se introduce para no tener
+           #que cambiar la l�nea de montaje, pues en algunos
+	   #casos se usa como ultimo eslab�n de la cadena el
+	   #programa turl o ext-turl.
+REF=
+	      
+case "$FORMATADOR" in 
+	txt)
+		FORMATADOR="txt"
+		GENERADOR="lt-proc -g"		
+		;;
+	txtu)
+		FORMATADOR="txt"
+		GENERADOR="lt-proc -n"
+		;;
+	rtf)
+		FORMATADOR="rtf"
+		GENERADOR="lt-proc -g"		
+		;;
+	rtfu)
+		FORMATADOR="rtf"		
+		GENERADOR="lt-proc -n"
+		;;
+	html)
+		FORMATADOR="html"
+		GENERADOR="lt-proc -g"	
+		;;
+	htmlu)
+		FORMATADOR="html"
+		GENERADOR="lt-proc -n"		
+		;;
+	*) # Por defecto asumimos txt
+		FORMATADOR="txt"
+		GENERADOR="lt-proc -g"
+		;;	
+esac
+
+if [ -z $REF ]
+then 
+        REF=$FORMATADOR
+fi
+
+$APERTIUM_PATH/apertium-des$FORMATADOR $FICHERO | \
+$LTTOOLBOX_PATH/lt-proc $AUTOMORF | \
+$APERTIUM_PATH/apertium-tagger -g $DATOS/$PREFIJO.prob | \
+$APERTIUM_PATH/apertium-pretransfer | \
+#$APERTIUM_PATH/apertium-lextor -l $DATOS/$PREFIJO.lextor $DATOS/$PREFIJO.lextormono.bin 3 3 | \
+$APERTIUM_PATH/apertium-transfer $DATOS/trules-$PREFIJO.xml $DATOS/trules-$PREFIJO.bin $AUTOBIL | \
+$LTTOOLBOX_PATH/$GENERADOR $AUTOGEN  | \
+$LTTOOLBOX_PATH/lt-proc -p $AUTOPGEN | \
+if [ x$SALIDA = x ]
+then $APERTIUM_PATH/apertium-re$FORMATADOR 
+else
+  $APERTIUM_PATH/apertium-re$FORMATADOR >$SALIDA
+fi
Index: branches/apertium-tagger/apertium2/apertium/trans-lextor-header.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/trans-lextor-header.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/trans-lextor-header.sh	(revision 69632)
@@ -0,0 +1,107 @@
+case $# in
+  2)
+    DATOS=$1
+    PREFIJO=$2
+    FORMATADOR=txt
+    ;;
+  3)
+    DATOS=$1
+    PREFIJO=$2
+    FORMATADOR=$3
+    ;;
+  4)
+    DATOS=$1
+    PREFIJO=$2
+    FORMATADOR=$3
+    FICHERO=$4
+    ;;
+  5)  
+    DATOS=$1
+    PREFIJO=$2
+    FORMATADOR=$3
+    FICHERO=$4
+    SALIDA=$5
+    ;;
+  *)
+    echo "USAGE: $(basename $0) <datadir> <translation> [format [infile [outfile]]]"
+    echo " datadir          Directory of linguistic data"
+    echo " translation      LANG1-LANG2"
+    echo " format           one of: txt (default), txtu, html, htmlu, rtf, rtfu"
+    echo " infile           input file (stdin by default)"
+    echo " outfile          output file (stdout by default)"
+    exit 1;
+esac
+
+#Par�metros obligatorios
+PREFIJO=$2    #Direcci�n traducci�n Ejm.- es-ca
+FORMATADOR=$3 #Fuente a traducir Ejm.- txt
+
+DATOS=$1
+
+#Parametro opcional, de no estar, lee de la entrada estandar (stdin)
+FICHERO=$4    #Fichero con el texto a traducir
+
+PATH=.:/usr/local/bin:$PATH
+AUTOMORF=$DATOS/$PREFIJO.automorf.bin
+#AUTOBIL=$DATOS/$PREFIJO.autobil.bin
+AUTOBIL=$DATOS/$PREFIJO.lextorbil.bin
+AUTOGEN=$DATOS/$PREFIJO.autogen.bin
+AUTOPGEN=$DATOS/$PREFIJO.autopgen.bin
+
+DEP="dep"
+
+TURL="cat" #No hace nada, se introduce para no tener
+           #que cambiar la l�nea de montaje, pues en algunos
+	   #casos se usa como ultimo eslab�n de la cadena el
+	   #programa turl o ext-turl.
+REF=
+	      
+case "$FORMATADOR" in 
+	txt)
+		FORMATADOR="txt"
+		GENERADOR="lt-proc -g"		
+		;;
+	txtu)
+		FORMATADOR="txt"
+		GENERADOR="lt-proc -n"
+		;;
+	rtf)
+		FORMATADOR="rtf"
+		GENERADOR="lt-proc -g"		
+		;;
+	rtfu)
+		FORMATADOR="rtf"		
+		GENERADOR="lt-proc -n"
+		;;
+	html)
+		FORMATADOR="html"
+		GENERADOR="lt-proc -g"	
+		;;
+	htmlu)
+		FORMATADOR="html"
+		GENERADOR="lt-proc -n"		
+		;;
+	*) # Por defecto asumimos txt
+		FORMATADOR="txt"
+		GENERADOR="lt-proc -g"
+		;;	
+esac
+
+if [ -z $REF ]
+then 
+        REF=$FORMATADOR
+fi
+
+$APERTIUM_PATH/apertium-des$FORMATADOR $FICHERO | \
+$LTTOOLBOX_PATH/lt-proc $AUTOMORF | \
+$APERTIUM_PATH/apertium-tagger -g $DATOS/$PREFIJO.prob | \
+$APERTIUM_PATH/apertium-pretransfer | \
+$APERTIUM_PATH/apertium-lextor -l $DATOS/$PREFIJO.lextor $DATOS/$PREFIJO.lextormono.bin 3 3 | \
+$APERTIUM_PATH/apertium-transfer $DATOS/trules-$PREFIJO.xml $DATOS/trules-$PREFIJO.bin $AUTOBIL | \
+$LTTOOLBOX_PATH/$GENERADOR $AUTOGEN  | \
+$LTTOOLBOX_PATH/lt-proc -p $AUTOPGEN | \
+if [ x$SALIDA = x ]
+then $APERTIUM_PATH/apertium-re$FORMATADOR 
+else
+  $APERTIUM_PATH/apertium-re$FORMATADOR >$SALIDA
+fi
Index: branches/apertium-tagger/apertium2/apertium/transformdic-header.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transformdic-header.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transformdic-header.sh	(revision 69632)
@@ -0,0 +1,20 @@
+if [ $# != 3 ]
+then echo "USAGE: $(basename $0) lr|rl <input_file> <output_file>";
+     exit 1;
+fi
+
+FILE1=$2;
+FILE2=$3;
+
+if [ ! -e $2 ] 
+then echo "ERROR: '$1' file not found";
+     exit 1;
+fi
+
+if [ $1 = "lr" ]
+then xsltproc $XSLTPROC_OPTIONS_LR $STYLESHEET $FILE1 >$FILE2
+elif [ $1 = "rl" ]
+then xsltproc $XSLTPROC_OPTIONS_RL $STYLESHEET $FILE1 >$FILE2
+else 
+  echo "ERROR: $1 option invalid";
+fi
Index: branches/apertium-tagger/apertium2/apertium/transformdicbil-header.sh
===================================================================
--- branches/apertium-tagger/apertium2/apertium/transformdicbil-header.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium/transformdicbil-header.sh	(revision 69632)
@@ -0,0 +1,14 @@
+if [ $# != 2 ]
+then echo "USAGE: $(basename $0) <input_file> <output_file>";
+     exit 1;
+fi
+
+FILE1=$1;
+FILE2=$2;
+
+if [ ! -e $1 ] 
+then echo "ERROR: '$1' file not found";
+     exit 1;
+fi
+
+xsltproc $XSLTPROC_OPTIONS $STYLESHEET $FILE1 >$FILE2
Index: branches/apertium-tagger/apertium2/apertium
===================================================================
--- branches/apertium-tagger/apertium2/apertium	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium	(revision 69632)

Property changes on: branches/apertium-tagger/apertium2/apertium
___________________________________________________________________
Added: svn:ignore
## -0,0 +1,43 ##
+.deps
+.libs
+Makefile
+Makefile.in
+stamp-*
+apertium
+apertium-[dfgilprtv]*[!1]
+apertium_config.h
+apertium_config.h.in
+apertium_deshtml.cc
+apertium_deslatex.cc
+apertium_desmediawiki.cc
+apertium_desodt.cc
+apertium_despptx.cc
+apertium_desrtf.cc
+apertium_destxt.cc
+apertium_deswxml.cc
+apertium_desxlsx.cc
+apertium_desxpresstag.cc
+apertium-multiple-translations
+apertium_postlatex.cc
+apertium_postlatex_raw.cc
+apertium_prelatex.cc
+apertium_rehtml.cc
+apertium_rehtml_noent.cc
+apertium_relatex.cc
+apertium_remediawiki.cc
+apertium_reodt.cc
+apertium_repptx.cc
+apertium_rertf.cc
+apertium_retxt.cc
+apertium_rewxml.cc
+apertium_rexlsx.cc
+apertium_rexpresstag.cc
+apertium-unformat
+apertium-utils-fixlatex
+dix.rnc
+format.rnc
+interchunk.rnc
+modes.rnc
+postchunk.rnc
+tagger.rnc
+transfer.rnc
Index: branches/apertium-tagger/apertium2/tests/tagger/__init__.py
===================================================================
--- branches/apertium-tagger/apertium2/tests/tagger/__init__.py	(nonexistent)
+++ branches/apertium-tagger/apertium2/tests/tagger/__init__.py	(revision 69632)
@@ -0,0 +1,304 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import functools
+import unittest
+import tempfile
+from os.path import join as pjoin
+from os.path import abspath, dirname
+from subprocess import (check_call, check_output, Popen, PIPE, DEVNULL,
+                        TimeoutExpired, CalledProcessError)
+
+
+# Utilities
+def tmp(contents):
+    t = tempfile.NamedTemporaryFile(mode='w', delete=False)
+    t.write(contents)
+    return t.name
+
+
+def rel(fn):
+    return abspath(pjoin(dirname(abspath(__file__)), fn))
+
+
+APERTIUM_TAGGER = rel("../../apertium/apertium-tagger")
+
+
+def check_stderr(*popenargs, timeout=None, **kwargs):
+    # Essentially a copypasted version of check_output.
+    # Can be significantly abridged with Python 3.5's run(...)
+    if 'stderr' in kwargs:
+        raise ValueError('stderr argument not allowed, it will be overridden.')
+    if 'input' in kwargs:
+        if 'stdin' in kwargs:
+            raise ValueError('stdin and input arguments may not both be used.')
+        inputdata = kwargs['input']
+        del kwargs['input']
+        kwargs['stdin'] = PIPE
+    else:
+        inputdata = None
+    with Popen(*popenargs, stderr=PIPE, **kwargs) as process:
+        try:
+            unused_output, err = process.communicate(inputdata,
+                                                     timeout=timeout)
+        except TimeoutExpired:
+            process.kill()
+            unused_output, err = process.communicate()
+            raise TimeoutExpired(process.args, timeout, output=err)
+        except:
+            process.kill()
+            process.wait()
+            raise
+        retcode = process.poll()
+        if retcode:
+            raise CalledProcessError(retcode, process.args, output=err)
+    return err
+
+
+def trace_dec(f):
+    @functools.wraps(f)
+    def inner(*args, **kwargs):
+        if len(args) > 0:
+            print("run " + " ".join(args[0]))
+        return f(*args, **kwargs)
+    return inner
+
+
+def trace_plus_unicode(f):
+    return functools.partial(trace_dec(f), universal_newlines=True)
+
+check_call = trace_plus_unicode(check_call)
+check_output = trace_plus_unicode(check_output)
+check_stderr = trace_plus_unicode(check_stderr)
+
+# Test files
+DIC = """
+^the/the<det><def><sp>$
+^books/book<n><pl>/book<vblex><pri><p3><sg>$
+^has/have<vbhaver><pres><p3><sg>$
+^booked/book<vblex><pp>/book<vblex><past>$
+^close/close<adj><sint>/close<n><sg>/close<vblex><inf>/close<vblex><pres>/close<vblex><imp>$
+^cat/cat<n><sg>$
+^room/room<n><sg>$
+^red/red<adj><sint>$
+^./.<sent>$
+""".strip()
+
+TSX = """
+<?xml version="1.0" encoding="utf-8"?>
+<tagger name="test">
+  <tagset>
+    <def-label name="DET" closed="true">
+      <tags-item tags="det.*"/>
+      <tags-item tags="det.*.*"/>
+    </def-label>
+    <def-label name="VERB">
+      <tags-item tags="vblex.*"/>
+      <tags-item tags="vbhaver.*"/>
+    </def-label>
+    <def-label name="NOUN">
+      <tags-item tags="n.*"/>
+    </def-label>
+    <def-label name="ADJ">
+      <tags-item tags="adj.*"/>
+      <tags-item tags="adj"/>
+    </def-label>
+  </tagset>
+</tagger>
+""".strip()
+
+TRAIN_NO_PROBLEM_UNTAGGED = """
+^The/the<det><def><sp>$
+^cat/cat<n><sg>$
+^books/book<n><pl>/book<vblex><pri><p3><sg>$
+^the/the<det><def><sp>$
+^room/room<n><sg>$
+^./.<sent>$
+
+^The/the<det><def><sp>$
+^red/red<adj><sint>$
+^cat/cat<n><sg>$
+^books/book<n><pl>/book<vblex><pri><p3><sg>$
+^the/the<det><def><sp>$
+^red/red<adj><sint>$
+^room/room<n><sg>$
+^./.<sent>$
+
+^The/the<det><def><sp>$
+^red/red<adj><sint>$
+^cat/cat<n><sg>$
+^books/book<n><pl>/book<vblex><pri><p3><sg>$
+^the/the<det><def><sp>$
+^room/room<n><sg>$
+^./.<sent>$
+""".strip()
+
+TRAIN_NO_PROBLEM_TAGGED = """
+^The/the<det><def><sp>$
+^cat/cat<n><sg>$
+^books/book<vblex><pri><p3><sg>$
+^the/the<det><def><sp>$
+^room/room<n><sg>$
+^./.<sent>$
+
+^The/the<det><def><sp>$
+^red/red<adj><sint>$
+^cat/cat<n><sg>$
+^books/book<vblex><pri><p3><sg>$
+^the/the<det><def><sp>$
+^red/red<adj><sint>$
+^room/room<n><sg>$
+^./.<sent>$
+
+^The/the<det><def><sp>$
+^red/red<adj><sint>$
+^cat/cat<n><sg>$
+^books/book<vblex><pri><p3><sg>$
+^the/the<det><def><sp>$
+^room/room<n><sg>$
+^./.<sent>$
+""".strip()
+
+TRAIN_CAT_TO_BE_A_VERB_UNTAGGED = """
+^The/The<det><def><sp>$
+^falling/fall<vblex><pprs>/fall<vblex><ger>/fall<vblex><subs>$
+^cat/cat<n><sg>$
+^has/have<vbhaver><pres><p3><sg>$
+^booked/book<vblex><pp>/book<vblex><past>$
+^books/book<n><pl>/book<vblex><pres><p3><sg>$
+^./.<sent>$
+
+^Close/close<adj><sint>/close<n><sg>/close<vblex><inf>/close<vblex><pres>/close<vblex><imp>$
+^the/the<det><def><sp>$
+^books/book<n><pl>/book<vblex><pri><p3><sg>$
+^./.<sent>$
+
+^The/the<det><def><sp>$
+^falling/fall<vblex><pprs>/fall<vblex><ger>/fall<vblex><subs>$
+^cat/cat<n><sg>$
+^has/have<vbhaver><pres><p3><sg>$
+^books/book<n><pl>/book<vblex><pres><p3><sg>$
+^./.<sent>$
+""".strip()
+
+TRAIN_CAT_TO_BE_A_VERB_TAGGED = """
+^The/The<det><def><sp>$
+^falling/fall<vblex><pprs>$
+^cat/cat<n><sg>$
+^has/have<vbhaver><pres><p3><sg>$
+^booked/book<vblex><pp>$
+^books/book<n><pl>$
+^./.<sent>$
+
+^Close/close<vblex><imp>$
+^the/the<det><def><sp>$
+^books/book<n><pl>$
+^./.<sent>$
+
+^The/the<det><def><sp>$
+^falling/fall<vblex><pprs>$
+^cat/cat<n><sg>$
+^has/have<vbhaver><pres><p3><sg>$
+^books/book<n><pl>$
+^./.<sent>$
+""".strip()
+
+TEST_SUCCESS = """
+^The/the<det><def><sp>$
+^cat/cat<n><sg>$
+^books/book<n><pl>/book<vblex><pri><p3><sg>$
+^the/the<det><def><sp>$
+^room/room<n><sg>$
+^./.<sent>$
+""".strip()
+
+TEST_NEW_AMBG_CLASS = """
+^The/the<det><def><sp>$
+^cat/cat<n><sg>/cat<adj>$
+^books/book<n><pl>/book<vblex><pri><p3><sg>$
+^the/the<det><def><sp>$
+^room/room<n><sg>$
+^./.<sent>$
+""".strip()
+
+# Expected strings
+EXPECTED_SUBST = """
+Error: A new ambiguity class was found.
+Retraining the tagger is necessary so as to take it into account.
+Word 'cat'.
+New ambiguity class: {NOUN,ADJ}
+""".strip().split("\n")
+
+
+# Tests
+class AmbiguityClassTest(unittest.TestCase):
+    def setUp(self):
+        self.tsx_fn = tmp(TSX)
+        self.dic_fn = tmp(DIC)
+
+    def changing_class_impl(self, flags, model_fn):
+        test1 = tmp(TEST_SUCCESS)
+        test2 = tmp(TEST_NEW_AMBG_CLASS)
+        success_stderr = check_stderr(
+            [APERTIUM_TAGGER, '-d'] + flags +
+            ['-g', model_fn, test1],
+            stdout=DEVNULL)
+        self.assertEqual(success_stderr.strip(), "")
+        subst_stderr = check_stderr(
+            [APERTIUM_TAGGER, '-d'] + flags +
+            ['-g', model_fn, test2],
+            stdout=DEVNULL)
+        subst_stderr = [line.strip()
+                        for line in subst_stderr.strip().split("\n")]
+        self.assertEqual(subst_stderr, EXPECTED_SUBST)
+        ambg_class = check_output(
+           [rel('test-find-similar-ambiguity-class'), model_fn],
+           input="NOUN ADJ\n")
+        substituted_class = set(ambg_class.split(" "))
+        # Should get open class
+        self.assertSetEqual(substituted_class, set(("VERB", "NOUN", "ADJ")))
+
+    def test_changing_class_hmm_sup(self):
+        model_fn = tmp("")
+        untagged = tmp(TRAIN_NO_PROBLEM_UNTAGGED)
+        tagged = tmp(TRAIN_NO_PROBLEM_TAGGED)
+        check_call(
+            [APERTIUM_TAGGER, '-s', '0', self.dic_fn, untagged, self.tsx_fn,
+             model_fn, tagged, untagged])
+        self.changing_class_impl([], model_fn)
+
+    def test_changing_class_hmm_unsup(self):
+        model_fn = tmp("")
+        untagged = tmp(TRAIN_NO_PROBLEM_UNTAGGED)
+        check_call(
+            [APERTIUM_TAGGER, '-t', '1', self.dic_fn, untagged, self.tsx_fn,
+             model_fn])
+        self.changing_class_impl([], model_fn)
+
+    def test_changing_class_sliding_window(self):
+        model_fn = tmp("")
+        untagged = tmp(TRAIN_NO_PROBLEM_UNTAGGED)
+        check_call(
+            [APERTIUM_TAGGER, '--sliding-window', '-t', '1', self.dic_fn,
+             untagged, self.tsx_fn, model_fn])
+        self.changing_class_impl(['--sliding-window'], model_fn)
+
+    def test_cat_is_a_verb(self):
+        model_fn = tmp("")
+        untagged = tmp(TRAIN_CAT_TO_BE_A_VERB_UNTAGGED)
+        tagged = tmp(TRAIN_CAT_TO_BE_A_VERB_TAGGED)
+        new_ambg_class = tmp(TEST_NEW_AMBG_CLASS)
+        check_call(
+            [APERTIUM_TAGGER, '-s', '0', self.dic_fn, untagged, self.tsx_fn,
+             model_fn, tagged, untagged])
+        subst_stdout = check_output(
+            [APERTIUM_TAGGER, '-d', '-g', model_fn, new_ambg_class],
+            stderr=DEVNULL)
+        acceptable = False
+        for line in subst_stdout.split("\n"):
+            if (line.startswith('^cat') and ('<adj>' in line or '<n>' in line)):
+                acceptable = True
+        self.assertTrue(
+            acceptable,
+            "'cat' must be output and tagged as an adjective or a noun.\n" +
+            "Actual output:\n{}".format(subst_stdout))

Property changes on: branches/apertium-tagger/apertium2/tests/tagger/__init__.py
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: branches/apertium-tagger/apertium2/tests/tagger/Makefile.am
===================================================================
--- branches/apertium-tagger/apertium2/tests/tagger/Makefile.am	(nonexistent)
+++ branches/apertium-tagger/apertium2/tests/tagger/Makefile.am	(revision 69632)
@@ -0,0 +1,14 @@
+library_includedir = $(includedir)/$(GENERIC_LIBRARY_NAME)-$(GENERIC_API_VERSION)/$(GENERIC_LIBRARY_NAME)
+
+bin_PROGRAMS = test-find-similar-ambiguity-class
+bin_SCRIPTS =  $(GENERATEDSCRIPTS)
+
+AM_CPPFLAGS = -I$(top_srcdir)
+
+apertiumdir = $(prefix)/share/apertium
+apertiuminclude = $(prefix)/include/apertium-$(GENERIC_API_VERSION)
+apertiumlib = $(prefix)/lib
+apertiumsysconf = $(prefix)/etc/apertium
+
+test_find_similar_ambiguity_class_SOURCES = test_find_similar_ambiguity_classes.cc
+test_find_similar_ambiguity_class_LDADD = -L$(top_srcdir)/$(GENERIC_LIBRARY_NAME)/.libs/ $(APERTIUM_LIBS) -l$(GENERIC_LIBRARY_NAME)$(GENERIC_MAJOR_VERSION)
Index: branches/apertium-tagger/apertium2/tests/tagger/test_find_similar_ambiguity_classes.cc
===================================================================
--- branches/apertium-tagger/apertium2/tests/tagger/test_find_similar_ambiguity_classes.cc	(nonexistent)
+++ branches/apertium-tagger/apertium2/tests/tagger/test_find_similar_ambiguity_classes.cc	(revision 69632)
@@ -0,0 +1,61 @@
+#include "apertium/utf_converter.h"
+#include "apertium/tagger_utils.h"
+#include "apertium/tagger_data_hmm.h"
+#include "apertium/tagger_data.h"
+#include <iostream>
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+
+void print_ambiguity_class(const vector<wstring> &array_tags, const set<TTag> &abgset)
+{
+  unsigned int j;
+  set<TTag>::const_iterator abgseti;
+  for (abgseti=abgset.begin(), j=0; abgseti!=abgset.end(); abgseti++, j++) {
+    wcout << array_tags[*abgseti];
+    if (j < abgset.size() - 1) {
+      wcout << " ";
+    }
+  }
+}
+
+void find_similar_ambiguity_class_io(TaggerData &td)
+{
+  vector<wstring> &array_tags = td.getArrayTags();
+  wstring line = L"";
+  getline(wcin, line, L'\n');
+
+  wstringstream line_stream(line);
+  set<TTag> ambiguity_class;
+  wstring tag_name;
+  while (line_stream >> tag_name) {
+    vector<wstring>::iterator it;
+    it = find(array_tags.begin(), array_tags.end(), tag_name);
+    if (it == array_tags.end()) {
+        wcerr << L"Tag not in model: " << tag_name << L'\n';
+        exit(-3);
+    }
+    ambiguity_class.insert(it - array_tags.begin());
+  }
+  set<TTag> similar_ambiguity_class = tagger_utils::find_similar_ambiguity_class(td, ambiguity_class);
+  print_ambiguity_class(array_tags, similar_ambiguity_class);
+}
+
+int main(int argc, char *argv[])
+{
+  if (argc < 2) {
+    cerr<<"Usage: "<<argv[0]<<" <probfile>\n";
+    exit(-1);
+  }
+  char* probfile = argv[1];
+  TaggerDataHMM tagger_data_hmm;
+  FILE* fin = fopen(probfile, "r");
+  if (!fin) {
+    cerr<<"Error: cannot open file '"<<probfile<<"'\n";
+    exit(-2);
+  }
+  tagger_data_hmm.read(fin);
+  fclose(fin);
+
+  find_similar_ambiguity_class_io((TaggerData&)tagger_data_hmm);
+}
Index: branches/apertium-tagger/apertium2/tests/Makefile.am
===================================================================
--- branches/apertium-tagger/apertium2/tests/Makefile.am	(nonexistent)
+++ branches/apertium-tagger/apertium2/tests/Makefile.am	(revision 69632)
@@ -0,0 +1 @@
+SUBDIRS = tagger
Index: branches/apertium-tagger/apertium2/tests/run_tests.py
===================================================================
--- branches/apertium-tagger/apertium2/tests/run_tests.py	(nonexistent)
+++ branches/apertium-tagger/apertium2/tests/run_tests.py	(revision 69632)
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+sys.path.append(os.path.realpath("."))
+
+import unittest
+import pretransfer
+import tagger
+
+if __name__ == "__main__":
+    os.chdir(os.path.dirname(__file__))
+    failures = 0
+    for module in [pretransfer, tagger]:
+        suite = unittest.TestLoader().loadTestsFromModule(module)
+        res = unittest.TextTestRunner(verbosity = 2).run(suite)
+        failures += len(res.failures)
+    sys.exit(min(failures, 255))

Property changes on: branches/apertium-tagger/apertium2/tests/run_tests.py
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: branches/apertium-tagger/apertium2/tests/pretransfer/__init__.py
===================================================================
--- branches/apertium-tagger/apertium2/tests/pretransfer/__init__.py	(nonexistent)
+++ branches/apertium-tagger/apertium2/tests/pretransfer/__init__.py	(revision 69632)
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import unittest
+
+import itertools
+from subprocess import Popen, PIPE, call
+from tempfile import mkdtemp
+from shutil import rmtree
+
+import signal
+class Alarm(Exception):
+    pass
+
+class PretransferTest(unittest.TestCase):
+    """Subclass and override inputs/expectedOutputs (and possibly other
+stuff) to create new pretransfer tests."""
+
+    flags = ["-z"]
+    inputs = [""]
+    expectedOutputs = [""]
+    expectedRetCodeFail = False
+
+    def alarmHandler(self, signum, frame):
+        raise Alarm
+
+    def withTimeout(self, seconds, cmd, *args, **kwds):
+        signal.signal(signal.SIGALRM, self.alarmHandler)
+        signal.alarm(seconds)
+        ret = cmd(*args, **kwds)
+        signal.alarm(0)         # reset the alarm
+        return ret
+
+    def communicateFlush(self, string):
+        self.proc.stdin.write(string.encode('utf-8'))
+        self.proc.stdin.write(b'\0')
+        self.proc.stdin.flush()
+
+        output = []
+        char = None
+        try:
+            char = self.withTimeout(2, self.proc.stdout.read, 1)
+        except Alarm:
+            pass
+        while char and char != b'\0':
+            output.append(char)
+            try:
+                char = self.withTimeout(2, self.proc.stdout.read, 1)
+            except Alarm:
+                break           # send what we got up till now
+
+        return b"".join(output).decode('utf-8')
+
+    def runTest(self):
+        try:
+            self.proc = Popen(["../apertium/apertium-pretransfer"] + self.flags,
+                              stdin=PIPE,
+                              stdout=PIPE,
+                              stderr=PIPE)
+
+            for inp, exp in zip(self.inputs, self.expectedOutputs):
+                self.assertEqual(self.communicateFlush(inp+"[][\n]"),
+                                 exp+"[][\n]")
+
+            self.proc.communicate() # let it terminate
+            self.proc.stdin.close()
+            self.proc.stdout.close()
+            self.proc.stderr.close()
+            retCode = self.proc.poll()
+            if self.expectedRetCodeFail:
+                self.assertNotEqual(retCode, 0)
+            else:
+                self.assertEqual(retCode, 0)
+
+        finally:
+            pass
+
+
+class BasicPretransferTest(PretransferTest):
+    inputs =          ["^a<n>$", "^a<n>+c<po>$",   "^a<vblex><pres># b$", "[<div>]^a<n>$", "[<div>]^a<vblex><pres># b$"]
+    expectedOutputs = ["^a<n>$", "^a<n>$ ^c<po>$", "^a# b<vblex><pres>$", "[<div>]^a<n>$", "[<div>]^a# b<vblex><pres>$"]
+
+class JoinGroupPretransferTest(PretransferTest):
+    inputs =          ["[<div>]^a<vblex><pres>+c<po># b$",   "[<div>]^a<vblex><pres>+c<po>+d<po># b$"]
+    expectedOutputs = ["[<div>]^a# b<vblex><pres>$ ^c<po>$", "[<div>]^a# b<vblex><pres>$ ^c<po>$ ^d<po>$"]
+
+
+# Proposed inline blank format:
+class InlineBlankPretransferTest(PretransferTest):
+    inputs =          ["[{<i>}]^a<vblex><pres>+c<po># b$",          "[{<i>}]^a<vblex><pres>+c<po>+d<po># b$"]
+    expectedOutputs = ["[{<i>}]^a# b<vblex><pres>$ [{<i>}]^c<po>$", "[{<i>}]^a# b<vblex><pres>$ [{<i>}]^c<po>$ [{<i>}]^d<po>$"]
+    @unittest.expectedFailure
+    def runTest(self):
+        super().runTest(self)
Index: branches/apertium-tagger/apertium2/tests/README
===================================================================
--- branches/apertium-tagger/apertium2/tests/README	(nonexistent)
+++ branches/apertium-tagger/apertium2/tests/README	(revision 69632)
@@ -0,0 +1,7 @@
+Tests require python3, run like
+
+    python3 tests/run_tests.py
+
+You may have to do "(sudo) make install" once before running the tests.
+
+They should all pass.
Index: branches/apertium-tagger/apertium2/.gitignore
===================================================================
--- branches/apertium-tagger/apertium2/.gitignore	(nonexistent)
+++ branches/apertium-tagger/apertium2/.gitignore	(revision 69632)
@@ -0,0 +1,88 @@
+*.la
+*.lo
+*.o
+*.pyc
+
+**.deps/
+**.dirstamp
+
+# /
+/autom4te.cache
+
+/compile
+/config.guess
+/config.status
+/config.sub
+/configure
+/depcomp
+/install-sh
+/libtool
+/ltmain.sh
+/missing
+
+/aclocal.m4
+/config.log
+/INSTALL
+/Makefile
+/Makefile.in
+
+/*.pc
+
+# /apertium/wildcard
+/apertium/.libs
+
+/apertium/apertium
+/apertium/apertium-*[!1]
+
+!/apertium/apertium-createmodes.awk
+!/apertium/apertium-header.sh
+!/apertium/apertium-multiple-translations.cc
+!/apertium/apertium-unformat-header.sh
+
+
+/apertium/apertium_config.h
+/apertium/apertium_config.h.in
+/apertium/apertium_config.h.in~
+/apertium/apertium_deshtml.cc
+/apertium/apertium_deslatex.cc
+/apertium/apertium_desmediawiki.cc
+/apertium/apertium_desodt.cc
+/apertium/apertium_despptx.cc
+/apertium/apertium_desrtf.cc
+/apertium/apertium_destxt.cc
+/apertium/apertium_deswxml.cc
+/apertium/apertium_desxlsx.cc
+/apertium/apertium_desxpresstag.cc
+/apertium/apertium_postlatex.cc
+/apertium/apertium_postlatex_raw.cc
+/apertium/apertium_prelatex.cc
+/apertium/apertium_rehtml.cc
+/apertium/apertium_rehtml_noent.cc
+/apertium/apertium_relatex.cc
+/apertium/apertium_remediawiki.cc
+/apertium/apertium_reodt.cc
+/apertium/apertium_repptx.cc
+/apertium/apertium_rertf.cc
+/apertium/apertium_retxt.cc
+/apertium/apertium_rewxml.cc
+/apertium/apertium_rexlsx.cc
+/apertium/apertium_rexpresstag.cc
+
+/apertium/dix.rnc
+/apertium/format.rnc
+/apertium/interchunk.rnc
+/apertium/Makefile
+/apertium/Makefile.in
+/apertium/modes.rnc
+/apertium/postchunk.rnc
+/apertium/tagger.rnc
+/apertium/transfer.rnc
+
+/apertium/stamp-*
+
+# Tests
+/tests/**/Makefile.in
+/tests/**/Makefile
+/tests/**/.libs
+
+/tests/tagger/test-find-similar-ambiguity-class
Index: branches/apertium-tagger/apertium2/Makefile.am
===================================================================
--- branches/apertium-tagger/apertium2/Makefile.am	(nonexistent)
+++ branches/apertium-tagger/apertium2/Makefile.am	(revision 69632)
@@ -0,0 +1,19 @@
+SUBDIRS = $(GENERIC_LIBRARY_NAME) tests
+DIST_SUBDIRS = $(GENERIC_LIBRARY_NAME) tests
+
+modesdir=$(prefix)/share/apertium/modes
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = apertium.pc
+
+aclocaldir = $(datadir)/aclocal
+aclocal_DATA = apertium.m4
+
+EXTRA_DIST=autogen.sh README-MODES apertium.m4
+
+install-data-local:
+	mkdir -p $(DESTDIR)$(modesdir)
+	$(INSTALL_DATA) README-MODES  $(DESTDIR)$(modesdir)/README
+
+test: tests/run_tests.py
+	$<
Index: branches/apertium-tagger/apertium2/configure.ac
===================================================================
--- branches/apertium-tagger/apertium2/configure.ac	(nonexistent)
+++ branches/apertium-tagger/apertium2/configure.ac	(revision 69632)
@@ -0,0 +1,189 @@
+#                                               -*- Autoconf -*-
+# Process this file with autoconf to produce a configure script.
+
+AC_PREREQ(2.52)
+
+m4_define([required_lttoolbox_version], [3.3.3])
+m4_define([required_libxml_version], [2.6.17])
+m4_define([required_libpcre_version], [6.4])
+#m4_define([required_pkg_config_version], [0.15])
+
+AC_INIT([apertium], [3.4.2], [sortiz@users.sourceforge.net])
+AC_CONFIG_HEADER([apertium/apertium_config.h])
+
+AC_CANONICAL_SYSTEM
+
+GENERIC_LIBRARY_NAME=apertium
+
+# Release versioning
+GENERIC_MAJOR_VERSION=3
+GENERIC_MINOR_VERSION=4
+GENERIC_MICRO_VERSION=0
+
+# API version (often = GENERIC_MAJOR_VERSION.GENERIC_MINOR_VERSION)
+GENERIC_API_VERSION=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION
+AC_SUBST(GENERIC_API_VERSION)
+AC_SUBST(GENERIC_MAJOR_VERSION)
+
+# Shared library versioning
+GENERIC_LIBRARY_VERSION=0:0:0
+#                       | | |
+#                +------+ | +---+
+#                |        |     |
+#             current:revision:age
+#                |        |     |
+#                |        |     +- increment if interfaces have been added
+#                |        |        set to zero if interfaces have been removed
+#                                  or changed
+#                |        +- increment if source code has changed
+#                |           set to zero if current is incremented
+#                +- increment if interfaces have been added, removed or changed
+
+AC_SUBST(GENERIC_LIBRARY_VERSION)
+PACKAGE=$GENERIC_LIBRARY_NAME
+AC_SUBST(GENERIC_LIBRARY_NAME)
+
+GENERIC_VERSION=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION.$GENERIC_MICRO_VERSION
+GENERIC_RELEASE=$GENERIC_MAJOR_VERSION.$GENERIC_MINOR_VERSION
+AC_SUBST(GENERIC_RELEASE)
+AC_SUBST(GENERIC_VERSION)
+
+VERSION=$GENERIC_VERSION
+
+AM_INIT_AUTOMAKE(no-define)
+
+AC_PROG_CXX
+AC_PROG_LIBTOOL
+AM_SANITY_CHECK
+AC_LANG_CPLUSPLUS
+
+CFLAGS="-Wall -Wextra $CFLAGS"
+CXXFLAGS="-Wall -Wextra $CXXFLAGS"
+
+AC_ARG_ENABLE(debug,
+              [  --enable-debug    Enable "-g -Wall" compiler options],
+              [CXXFLAGS="-g -Wall"; CFLAGS="-g -Wall"; AC_DEFINE([ENABLE_DEBUG], [1], [ENABLE_DEBUG])])
+
+AC_ARG_ENABLE(profile,
+              [  --enable-profile  Enable "-pg -g -Wall" compiler options],
+              [CXXFLAGS="-pg -g -Wall"; CFLAGS="-pg -g -Wall"; LDFLAGS="-pg"])
+
+
+AC_PATH_PROG(XMLLINT, xmllint, no)
+if test x$ac_cv_path_XMLLINT = x
+then
+  AC_MSG_ERROR([You don't have xmllint installed.])
+fi
+if test x$ac_cv_path_XMLLINT = xno
+then
+  AC_MSG_ERROR([You don't have xmllint installed.])
+fi
+
+  AC_PATH_PROG(XSLTPROC, xsltproc, no)
+  if test x$ac_cv_path_XSLTPROC = x
+  then
+    AC_MSG_ERROR([You don't have xsltproc installed.])
+  fi
+  if test x$ac_cv_path_XSLTPROC = xno
+  then
+    AC_MSG_ERROR([You don't have xsltproc installed.])
+  fi
+
+AC_PATH_PROG(BASH, bash, no)
+if test x$ac_cv_path_BASH = x
+then
+  AC_MSG_ERROR([You don't have bash installed.])
+fi
+if test x$ac_cv_path_BASH = xno
+then
+  AC_MSG_ERROR([You don't have bash installed.])
+fi
+
+AC_PATH_PROG(FLEX, flex, no)
+if test x$ac_cv_path_FLEX = x
+then
+  AC_MSG_ERROR([You don't have flex installed.])
+fi
+if test x$ac_cv_path_FLEX = xno
+then
+  AC_MSG_ERROR([You don't have flex installed.])
+fi
+
+AC_PATH_PROG(PKG_CONFIG, pkg-config, no)
+if test x$ac_cv_path_PKG_CONFIG = x
+then
+  AC_MSG_ERROR([You don't have pkg-config installed.])
+fi
+if test x$ac_cv_path_PKG_CONFIG = xno
+then
+  AC_MSG_ERROR([You don't have pkg-config installed.])
+fi
+
+AC_CHECK_FUNCS(strcasecmp)
+
+if test x$(uname) != xDarwin;
+then
+AC_CHECK_HEADER(pcreposix.h,
+  AC_CHECK_LIB(pcre, pcre_fullinfo,[
+    LIBS="$LIBS -lpcreposix -lpcre"
+    no_comp_check=yes],
+    AC_MSG_ERROR([*** unable to locate pcre library ***])),
+  AC_MSG_ERROR([*** unable to locate pcreposix.h include file ***]))
+
+AC_CHECK_HEADER(pcrecpp.h,
+  AC_CHECK_LIB(pcrecpp,pcre_compile,[
+  LIBS="$LIBS -lpcrecpp"
+  no_comp_check=yes],
+  AC_MSG_ERROR([*** unable to locate pcrecpp library ***])),
+  AC_MSG_ERROR([*** unable to locate pcrecpp.h include file ***]))
+fi
+
+
+PKG_CHECK_MODULES(APERTIUM, [
+  lttoolbox >= required_lttoolbox_version
+  libxml-2.0 >= required_libxml_version
+  libpcre >= required_libpcre_version], CPPFLAGS="$CPPFLAGS $APERTIUM_CFLAGS"; LIBS="$LIBS $APERTIUM_LIBS")
+
+# Check for wide strings
+AC_DEFUN([AC_CXX_WSTRING],[
+  AC_CACHE_CHECK(whether the compiler supports wide strings,
+  ac_cv_cxx_wstring,
+  [AC_LANG_SAVE
+   AC_LANG_CPLUSPLUS
+   AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <string>]],[[
+std::wstring test = L"test";
+   ]])],
+   [ac_cv_cxx_wstring=yes], [ac_cv_cxx_wstring=no])
+   AC_LANG_RESTORE
+  ])
+])
+
+AC_CXX_WSTRING
+AC_C_BIGENDIAN
+
+if test "$ac_cv_cxx_wstring" = no
+then
+  AC_MSG_ERROR([Missing wide string support])
+fi
+
+
+# Checks for header files.
+AC_HEADER_STDC
+AC_CHECK_HEADERS([stdlib.h string.h unistd.h stddef.h])
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_HEADER_STDBOOL
+AC_C_CONST
+AC_TYPE_SIZE_T
+
+# Checks for library functions.
+AC_FUNC_ERROR_AT_LINE
+
+AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, fputc_unlocked, fputs_unlocked, getopt, getopt_long, fgetwc_unlocked, fputwc_unlocked, fgetws_unlocked, fputws_unlocked])
+AC_CHECK_FUNCS([setlocale strdup getopt snprintf mbtowc])
+AC_REPLACE_FUNCS(getopt_long)
+
+AM_CONDITIONAL([WINDOWS], [test x$version_type = xwindows])
+AS_IF([test x$version_type = xwindows], [AC_DEFINE(HAVE_GETOPT_LONG,0)], [])
+
+AC_OUTPUT([Makefile apertium.pc apertium/Makefile tests/Makefile tests/tagger/Makefile])
Index: branches/apertium-tagger/apertium2/Jenkinsfile
===================================================================
--- branches/apertium-tagger/apertium2/Jenkinsfile	(nonexistent)
+++ branches/apertium-tagger/apertium2/Jenkinsfile	(revision 69632)
@@ -0,0 +1,10 @@
+node {
+   stage 'Checkout'
+   checkout scm
+
+   stage 'Build'
+   sh "./autogen.sh && make clean && make"
+
+   stage 'Test'
+   sh "make test"
+}
Index: branches/apertium-tagger/apertium2/NEWS
===================================================================
--- branches/apertium-tagger/apertium2/NEWS	(nonexistent)
+++ branches/apertium-tagger/apertium2/NEWS	(revision 69632)
@@ -0,0 +1,128 @@
+
+===================
+ NEWS for apertium
+===================
+
+SVN
+---
+
+Version 3.4.2, 2016-05-15 (-r68437)
+---------------------------------
+
+* some bugfixes to apertium-tagger, e.g.
+  https://sourceforge.net/p/apertium/tickets/94/
+
+* bugfixes to modes: now accept dirs with spaces, and allow installing apertium
+  itself and language data to different prefixes, as well as auto-generating
+  debug modes
+
+* fix a crash when apertium-tagger is compiled with with clang
+
+* new option -n to deformatters turns off dot-insertion
+  http://sourceforge.net/p/apertium/tickets/68
+
+* new transfer instruction <reject-current-rule shifting="yes|no"/>;
+  see transfer.dtd for details (not implemented for
+  interchunk/postchunk)
+
+* apertium-transfer-tools-generalisation-dev branch merged; outputs
+  extra trace information from transfer for generalising
+  corpus-generated transfer rules
+
+* apertium-tagger: supervised training and tagging for unigram models
+  based on http://coltekin.net/cagri/papers/trmorph-tools.pdf
+
+* fix some off-by-one/out-of-bounds segfaults in transfer
+  https://sourceforge.net/p/apertium/tickets/89/
+
+* various distribution-related fixes, static analysis fixes,
+  documentation
+
+Version 3.4.0, 2015-03-17 (-r59200)
+---------------------------------
+
+* transfer files now work even if they were compiled with a different
+  version of pcre
+
+* more explicit validation checks on .dix compilation
+
+* various fixes to driver script:
+
+  * `apertium -d . -l` behaves as expected now
+
+  * `>>` no longer empties out the destination file
+
+  * safer variable quoting
+
+* some Windows Unicode fixes
+
+* tagger now resets its state after a flush
+
+
+Version 3.3, 2014-08-20 (-r56825)
+---------------------------------
+
+* new Light Sliding Window Part-of-Speech Tagger (GsoC project merged)
+
+* new LaTeX format handler
+
+* new html-noent format handler (html without turning non-ASCII into entities)
+
+* bilingual lookup can now be separate from transfer
+
+  * see new -b options to lt-proc/apertium-transfer
+
+* apertium.m4 now available for language pairs to simplify build rules
+  and depend on monolingual data
+
+* some memory leaks and many minor bugs fixed
+
+  * pretransfer now allows '+' inside tags
+
+* --trace modes for transfer
+
+
+Version 3.2, 2010-09-21 (-r25741)
+---------------------------------
+
+* Fixed some bugs in pretransfer, allow '+' inside tags
+
+* Updated the DTDs to allow comments anywhere
+
+
+Version 3.1, 2008-09-29
+-----------------------
+
+
+Version 3.0, 2008-08-01
+-----------------------
+
+* Debian package
+
+
+Version 2.0, 2007-06-19
+-----------------------
+
+
+Version 1.9, 2006-12-15
+-----------------------
+
+
+Version 1.0, 2006-10-02
+-----------------------
+
+* Debian package
+
+
+Version 0.9, 2005-09-29
+-----------------------
+
+
+Version 0.8, 2005-08-01
+-----------------------
+
+
+
+# Local Variables:
+# mode: markdown
+# End:
Index: branches/apertium-tagger/apertium2/apertium.m4
===================================================================
--- branches/apertium-tagger/apertium2/apertium.m4	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium.m4	(revision 69632)
@@ -0,0 +1,155 @@
+# apertium.m4 - Macros to locate and utilise apertium libraries -*- Autoconf -*-
+# serial 1 (apertium-3.4.2)
+#
+# Copyright (C) 2013--2016 Universitat d'Alacant / Universidad de Alicante
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+
+# AP_CHECK_LING([ID], [MONOLINGUAL_PACKAGE])
+#
+# Check to see whether MONOLINGUAL_PACKAGE exists, and if so sets
+# AP_LIB[ID] and AP_SRC[ID].
+#
+# As an example, AP_CHECK_LING([1], [apertium-fie]) would check that
+# apertium-fie exists, and set AP_LIB1 and AP_SRC1 to the paths
+# containing the binaries and sources respectively of that monolingual
+# language package.
+#
+# Also sets up options --with-lang[ID] (e.g. --with-lang1) if the user
+# wants to use the source code checkout instead of installed files.
+# ------------------------------------------
+AC_DEFUN([AP_CHECK_LING],
+[
+  AC_ARG_VAR([AP_SRC][$1], [Path to $2 sources, same as AP_LIB$1 if --with-lang$1 set])
+  AC_ARG_VAR([AP_LIB][$1], [Path to $2 binaries, same as AP_SRC$1 if --with-lang$1 set])
+  AC_ARG_VAR([AP_SUBDIRS], [List of all --with-lang dirs; add it to SUBDIRS to make configure-specified dependencies recursively])
+  AC_ARG_WITH([lang][$1],
+    [dnl
+AS_HELP_STRING([--with-lang][$1],dnl
+[Uninstalled source directory for $2, defines AP_SRC$1 and AP_LIB$1 for Makefile, otherwise these are set to paths of installed files.])
+    ],
+    [
+      AP_LIB$1=$withval
+      AP_SRC$1=$withval
+      echo "Using $2 from $withval"
+      AP_SUBDIRS="$AP_SUBDIRS $withval"
+    ],
+    [
+      # TODO: PKG_CHECK_MODULES sets useless variables, while _EXISTS
+      # doesn't error if not found, should make a PKG_CHECK macro that
+      # errors but does not set _CFLAGS/_LIBS
+      PKG_CHECK_MODULES(m4_toupper(m4_bpatsubst($2, [-], [_])), [$2])
+      AP_LIB$1=`pkg-config --variable=dir $2`
+      AP_SRC$1=`pkg-config --variable=srcdir $2`
+    ])
+  if test -z "$AP_SRC$1" || ! test -d "$AP_SRC$1"; then
+    AC_MSG_ERROR([Could not find sources dir for $2 (AP_SRC$1="$AP_SRC$1")])
+  fi
+])
+
+
+# AP_MKINCLUDE()
+#
+# Creates the file ap_include.am and sets the variable ap_include to
+# point to this path. Now in your Makefile.am you can include
+# ap_include.am by writing @ap_include@ on a line by itself.
+#
+# The file defines a pattern rule for making modes files, and a goal
+# for installing the ones that have install="yes" in modes.xml. To
+# generate modes, include a line like
+#
+#     noinst_DATA=modes/$(PREFIX1).mode
+#
+# in your Makefile.am with _at most one mode_ (the others will be
+# created even if you list only one, listing several will lead to
+# trouble with parallell make).
+# 
+# Install the modes by making install-data-local dependent on
+# install-modes, ie.
+#
+#     install-data-local: install-modes
+#
+# Also defined is a goal for making the .deps folder. If you want some
+# file to be built in a folder named .deps, just make that goal
+# dependent on .deps/.d, e.g.
+#
+#     .deps/intermediate.dix: original.dix .deps/.d
+# 
+# ------------------------------------------
+AC_DEFUN([AP_MKINCLUDE],
+[
+  AC_SUBST_FILE(ap_include)
+  ap_include=$srcdir/ap_include.am
+
+  cat >$srcdir/ap_include.am <<EOF
+
+modes/%.mode: modes.xml
+	apertium-validate-modes modes.xml
+	apertium-gen-modes modes.xml
+	modes=\`xmllint --xpath '//mode@<:@@install="yes"@:>@/@name' modes.xml | sed 's/ *name="\(@<:@^"@:>@*\)"/\1.mode /g'\`; \\
+		if test -n "\$\$modes"; then mv \$\$modes modes/; fi
+
+apertium_modesdir=\$(prefix)/share/apertium/modes/
+install-modes:
+	mv modes modes.bak
+	apertium-gen-modes -f modes.xml \$(prefix)/share/apertium/\$(BASENAME)
+	rm -rf modes
+	mv modes.bak modes
+	test -d \$(DESTDIR)\$(apertium_modesdir) || mkdir \$(DESTDIR)\$(apertium_modesdir)
+	modes=\`xmllint --xpath '//mode@<:@@install="yes"@:>@/@name' modes.xml | sed 's/ *name="\(@<:@^"@:>@*\)"/\1.mode /g'\`; \\
+		if test -n "\$\$modes"; then \\
+			\$(INSTALL_DATA) \$\$modes \$(DESTDIR)\$(apertium_modesdir); \\
+			rm \$\$modes; \\
+		fi
+
+.deps/.d:
+	test -d .deps || mkdir .deps
+	touch \$[]@
+
+.PRECIOUS: .deps/.d
+
+langs:
+	@fail=; \
+	if \$(am__make_keepgoing); then \
+	  failcom='fail=yes'; \
+	else \
+	  failcom='exit 1'; \
+	fi; \
+	dot_seen=no; \
+	list='\$(AP_SUBDIRS)'; \
+	for subdir in \$\$list; do \
+	  echo "Making \$\$subdir"; \
+	  (\$(am__cd) \$\$subdir && \$(MAKE) \$(AM_MAKEFLAGS) all-am) \
+	  || eval \$\$failcom; \
+	done; \
+	\$(MAKE) \$(AM_MAKEFLAGS) all-am || exit 1; \
+	test -z "\$\$fail"
+.PHONY: langs
+
+
+.deps/%.autobil.prefixes: %.autobil.bin .deps/.d
+	lt-print $< | sed 's/ /@_SPACE_@/g' > .deps/\@S|@*.autobil.att
+	hfst-txt2fst -e ε <  .deps/\@S|@*.autobil.att > .deps/\@S|@*.autobil.hfst
+	hfst-project -p upper .deps/\@S|@*.autobil.hfst > .deps/\@S|@*.autobil.upper                                   # bidix
+	echo ' @<:@ ? - %+ @:>@* ' | hfst-regexp2fst > .deps/\@S|@*.any-nonplus.hfst                                                        # [^+]*
+	hfst-concatenate -1 .deps/\@S|@*.autobil.upper -2 .deps/\@S|@*.any-nonplus.hfst -o .deps/\@S|@*.autobil.nonplussed    # bidix [^+]*
+	echo ' %+ ' | hfst-regexp2fst > .deps/\@S|@*.single-plus.hfst                                                                 # +
+	hfst-concatenate -1 .deps/\@S|@*.single-plus.hfst -2 .deps/\@S|@*.autobil.nonplussed -o .deps/\@S|@*.autobil.postplus # + bidix [^+]*
+	hfst-repeat -f0 -t3 -i .deps/\@S|@*.autobil.postplus -o .deps/\@S|@*.autobil.postplus.0,3                      # (+ bidix [^+]*){0,3} -- gives at most three +
+	hfst-concatenate -1 .deps/\@S|@*.autobil.nonplussed -2 .deps/\@S|@*.autobil.postplus.0,3 -o \@S|@@                 # bidix [^+]* (+ bidix [^+]*){0,3}
+
+EOF
+
+])
Index: branches/apertium-tagger/apertium2/apertium.pc.in
===================================================================
--- branches/apertium-tagger/apertium2/apertium.pc.in	(nonexistent)
+++ branches/apertium-tagger/apertium2/apertium.pc.in	(revision 69632)
@@ -0,0 +1,10 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: apertium
+Description: rule-based machine translation system
+Version: @VERSION@
+Libs: -L${libdir} -l@GENERIC_LIBRARY_NAME@@GENERIC_MAJOR_VERSION@ @APERTIUM_LIBS@
+Cflags: -I${includedir}/@GENERIC_LIBRARY_NAME@-@GENERIC_API_VERSION@ -I${libdir}/@GENERIC_LIBRARY_NAME@-@GENERIC_API_VERSION@/include @APERTIUM_CFLAGS@
Index: branches/apertium-tagger/apertium2/COPYING
===================================================================
--- branches/apertium-tagger/apertium2/COPYING	(nonexistent)
+++ branches/apertium-tagger/apertium2/COPYING	(revision 69632)
@@ -0,0 +1,339 @@
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                            NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
Index: branches/apertium-tagger/apertium2/COPYING.hunalign
===================================================================
--- branches/apertium-tagger/apertium2/COPYING.hunalign	(nonexistent)
+++ branches/apertium-tagger/apertium2/COPYING.hunalign	(revision 69632)
@@ -0,0 +1,502 @@
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard.  To achieve this, non-free programs must be
+allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at
+    least three years, to give the same user the materials
+    specified in Subsection 6a, above, for a charge no more
+    than the cost of performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded.  In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+                            NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.  It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
Index: branches/apertium-tagger/apertium2/README
===================================================================
--- branches/apertium-tagger/apertium2/README	(nonexistent)
+++ branches/apertium-tagger/apertium2/README	(revision 69632)
@@ -0,0 +1,68 @@
+REQUIREMENTS
+
+* This package needs the package lttoolbox-3.3.1 installed in the
+system, as well as libxml and libpcre.
+
+See www.apertium.org for more information on installing.
+
+DESCRIPTION
+
+When building, this package generates, among others, the following
+modules:
+
+* apertium-deshtml, apertium-desrtf, apertium-destxt
+  Deformatters for html, rtf and txt document formats.
+
+* apertium-rehtml, apertium-rertf, apertium-retxt
+  Reformatters for html, rtf and txt document formats.
+
+* apertium
+  Translator program.  Execute without parameters to see the usage.
+
+QUICK START
+
+1) Download the packages for lttoolbox-VERSION.tar.gz and
+   apertium-VERSION.tar.gz and linguistic data
+
+   Note: If you are using the translator from SVN, run ./autogen.sh before
+         running ./configure in all cases.
+
+2) Unpack lttoolbox and do ('#' means 'do that with root privileges'):
+   $ cd lttoolbox-VERSION
+   $ ./configure
+   $ make
+   # make install
+
+3) Unpack apertium and do:
+   $ cd apertium-VERSION
+   $ ./configure
+   $ make
+   # make install
+
+4) Unpack linguistic data (LING_DATA_DIR) and do:
+   $ cd LING_DATA_DIR
+   $ ./configure
+   $ make
+   and wait for a while (minutes).
+
+5) Use the translator
+
+   USAGE: apertium [-d datadir] [-f format] [-u] <direction> [in [out]]
+    -d datadir       directory of linguistic data
+    -f format        one of: txt (default), html, rtf, odt, docx, wxml, xlsx, pptx,
+                     xpresstag, html-noent, latex, latex-raw
+    -a               display ambiguity
+    -u               don't display marks '*' for unknown words
+    -n               don't insert period before possible sentence-ends
+    -m memory.tmx    use a translation memory to recycle translations
+    -o direction     translation direction using the translation memory,
+                     by default 'direction' is used instead
+    -l               lists the available translation directions and exits
+    direction        typically, LANG1-LANG2, but see modes.xml in language data
+    in               input file (stdin by default)
+    out              output file (stdout by default)
+
+
+   Sample:
+
+   $ apertium -f txt es-ca <input >output
Index: branches/apertium-tagger/apertium2/ChangeLog
===================================================================
--- branches/apertium-tagger/apertium2/ChangeLog	(nonexistent)
+++ branches/apertium-tagger/apertium2/ChangeLog	(revision 69632)
@@ -0,0 +1,11 @@
+(See SVN for the actual ChangeLog.)
+
+
+
+Mon Jun  5 00:29:11 BST 2006
+
+Initial packaging.
+
+Wed Oct  3 07:12:19 BST 2007
+
+Packaging version 3.0.
Index: branches/apertium-tagger/apertium2/autogen.sh
===================================================================
--- branches/apertium-tagger/apertium2/autogen.sh	(nonexistent)
+++ branches/apertium-tagger/apertium2/autogen.sh	(revision 69632)
@@ -0,0 +1,35 @@
+#! /bin/sh
+
+# If the user specified a --prefix, take that, otherwise /usr/local/
+# is the default.
+PREFIX=/usr/local
+prefixnext=false
+for i in "$@"; do
+    case $i in
+        --prefix=*)		# equals separated:
+	    PREFIX="${i#*=}"
+	    ;;
+        --prefix)		# space separated:
+	    prefixnext=true
+	    ;;
+        *)
+	    $prefixnext && PREFIX="$i" && prefixnext=false
+	    ;;
+    esac
+done
+
+# Set the paths needed by libtool/pkg-config/aclocal etc. By inferring
+# them based on --prefix , users don't have to edit ~/.bashrc. We only
+# append, so if a user has some other preference, that will override.
+PATH="${PATH}:/usr/local/bin"
+export PATH
+LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:${PREFIX}/lib"
+export LD_LIBRARY_PATH
+PKG_CONFIG_PATH="${PKG_CONFIG_PATH}:${PREFIX}/share/pkgconfig:${PREFIX}/lib/pkgconfig"
+export PKG_CONFIG_PATH
+ACLOCAL_PATH="${ACLOCAL_PATH}:${PREFIX}/share/aclocal"
+export ACLOCAL_PATH
+
+
+# Pass on all args to configure
+autoreconf -fi && ./configure "$@"

Property changes on: branches/apertium-tagger/apertium2/autogen.sh
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property
Index: branches/apertium-tagger/apertium2/README-MODES
===================================================================
--- branches/apertium-tagger/apertium2/README-MODES	(nonexistent)
+++ branches/apertium-tagger/apertium2/README-MODES	(revision 69632)
@@ -0,0 +1,3 @@
+For information on modes, please see our Wiki:
+
+  http://wiki.apertium.org/wiki/Modes
Index: branches/apertium-tagger/apertium2/AUTHORS
===================================================================
--- branches/apertium-tagger/apertium2/AUTHORS	(nonexistent)
+++ branches/apertium-tagger/apertium2/AUTHORS	(revision 69632)
@@ -0,0 +1,11 @@
+(c) 2005-2007 Universitat d'Alacant / Universidad de Alicante.
+(c) 2007-2008 Prompsit Language Engineering S.L.
+
+Most of the files tmx_* are taken from the hunalign package:
+(C) Copyright 2004. Media Research Centre at the
+Sociology and Communications Department of the
+Budapest University of Technology and Economics.
+
+hunalign is licensed under the GNU Lesser GPL v. 2.1, see
+COPYING.hunalign for more details.
+
Index: branches/apertium-tagger/apertium2/cmake/CMakeUseFlex.cmake
===================================================================
--- branches/apertium-tagger/apertium2/cmake/CMakeUseFlex.cmake	(nonexistent)
+++ branches/apertium-tagger/apertium2/cmake/CMakeUseFlex.cmake	(revision 69632)
@@ -0,0 +1,42 @@
+# - Look for GNU flex, the lexer generator.
+# Defines the following:
+#  FLEX_EXECUTABLE - path to the flex executable
+#  FLEX_FILE - parse a file with flex
+#  FLEX_PREFIX_OUTPUTS - Set to true to make FLEX_FILE produce outputs of
+#                        lex.${filename}.c, not lex.yy.c . Passes -P to flex.
+
+IF(NOT DEFINED FLEX_PREFIX_OUTPUTS)
+  SET(FLEX_PREFIX_OUTPUTS FALSE)
+ENDIF(NOT DEFINED FLEX_PREFIX_OUTPUTS)
+
+IF(NOT FLEX_EXECUTABLE)
+  FIND_PROGRAM(FLEX_EXECUTABLE flex)
+  IF (FLEX_EXECUTABLE)
+    MESSAGE (STATUS "Found flex -- ${FLEX_EXECUTABLE}")
+  ELSE (FLEX_EXECUTABLE)
+    MESSAGE (ERROR "flex not found")
+  ENDIF(FLEX_EXECUTABLE)
+ENDIF(NOT FLEX_EXECUTABLE)
+
+IF(FLEX_EXECUTABLE)
+  MACRO(FLEX OUT_FILE SWITCHES IN_FILE)
+    GET_FILENAME_COMPONENT(PATH "${IN_FILE}" PATH)
+
+    IF(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/${PATH}")
+      FILE(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/${PATH}")
+    ENDIF(NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/${PATH}")
+
+    SET(FULL_OUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/${PATH}${OUT_FILE}")
+    SET(FULL_IN_FILE  "${CMAKE_CURRENT_SOURCE_DIR}/${IN_FILE}")
+
+    ADD_CUSTOM_COMMAND(
+      OUTPUT  "${FULL_OUT_FILE}"
+      COMMAND "${FLEX_EXECUTABLE}"
+      ARGS    "${SWITCHES}"
+              -o"${FULL_OUT_FILE}"
+              "${FULL_IN_FILE}"
+      DEPENDS "${FULL_IN_FILE}")
+
+    SET_SOURCE_FILES_PROPERTIES("${FULL_OUT_FILE}" PROPERTIES GENERATED TRUE)
+  ENDMACRO(FLEX)
+ENDIF(FLEX_EXECUTABLE)
Index: branches/apertium-tagger/apertium2/cmake/CMakeUseXsltproc.cmake
===================================================================
--- branches/apertium-tagger/apertium2/cmake/CMakeUseXsltproc.cmake	(nonexistent)
+++ branches/apertium-tagger/apertium2/cmake/CMakeUseXsltproc.cmake	(revision 69632)
@@ -0,0 +1,35 @@
+# - Look for GNU xsltproc, the lexer generator.
+# Defines the following:
+#  XSLTPROC_EXECUTABLE - path to the xsltproc executable
+#  XSLTPROC_FILE - parse a file with xsltproc
+#  XSLTPROC_PREFIX_OUTPUTS - Set to true to make XSLTPROC_FILE produce outputs of
+#                        lex.${filename}.c, not lex.yy.c . Passes -P to xsltproc.
+
+IF(NOT XSLTPROC_EXECUTABLE)
+  FIND_PROGRAM(XSLTPROC_EXECUTABLE xsltproc)
+  IF (XSLTPROC_EXECUTABLE)
+    MESSAGE (STATUS "Found xsltproc -- ${XSLTPROC_EXECUTABLE}")
+  ELSE (XSLTPROC_EXECUTABLE)
+    MESSAGE (ERROR "xsltproc not found")
+  ENDIF(XSLTPROC_EXECUTABLE)
+ENDIF(NOT XSLTPROC_EXECUTABLE)
+
+IF(XSLTPROC_EXECUTABLE)
+  MACRO(XSLTPROC OUT_FILE XSL_FILE XML_FILE)
+    GET_FILENAME_COMPONENT(PATH "${XSL_FILE}" PATH)
+
+    SET(FULL_OUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${OUT_FILE}")
+    SET(FULL_XSL_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${XSL_FILE}")
+    SET(FULL_XML_FILE "${CMAKE_CURRENT_SOURCE_DIR}/${XML_FILE}")
+
+    ADD_CUSTOM_COMMAND(
+      OUTPUT  "${FULL_OUT_FILE}"
+      COMMAND "${XSLTPROC_EXECUTABLE}"
+      ARGS    -o "${FULL_OUT_FILE}"
+              "${FULL_XSL_FILE}"
+              "${FULL_XML_FILE}"
+      DEPENDS "${FULL_XSL_FILE}" "${FULL_XML_FILE}")
+
+    SET_SOURCE_FILES_PROPERTIES("${FULL_OUT_FILE}" PROPERTIES GENERATED TRUE)
+  ENDMACRO(XSLTPROC)
+ENDIF(XSLTPROC_EXECUTABLE)
Index: branches/apertium-tagger/apertium2/cmake/FindLibPcre.cmake
===================================================================
--- branches/apertium-tagger/apertium2/cmake/FindLibPcre.cmake	(nonexistent)
+++ branches/apertium-tagger/apertium2/cmake/FindLibPcre.cmake	(revision 69632)
@@ -0,0 +1,38 @@
+IF (LIBPCRE_INCLUDE_DIR AND LIBPCRE_LIBRARIES)
+   # in cache already
+   SET(LibPcre_FIND_QUIETLY TRUE)
+ENDIF (LIBPCRE_INCLUDE_DIR AND LIBPCRE_LIBRARIES)
+
+IF (NOT WIN32)
+   # use pkg-config to get the directories and then use these values
+   # in the FIND_PATH() and FIND_LIBRARY() calls
+   INCLUDE(UsePkgConfig)
+   PKGCONFIG(libpcre LIBPCRE_INCLUDES LIBPCRE_LIB_DIR LIBPCRE_LDFLAGS LIBPCRE_CFLAGS)
+   SET(LIBPCRE_DEFINITIONS ${LIBPCRE_CFLAGS})
+ENDIF (NOT WIN32)
+
+FIND_PATH(LIBPCRE_INCLUDE_DIR pcre.h
+          PATHS ${LIBPCRE_INCLUDES})
+
+FIND_LIBRARY(LIBPCRE_LIBRARIES
+             NAMES pcre libpcre
+             PATHS ${LIBPCRE_LIB_DIR})
+
+IF (LIBPCRE_INCLUDE_DIR AND LIBPCRE_LIBRARIES)
+   SET(LIBPCRE_FOUND TRUE)
+ELSE (LIBPCRE_INCLUDE_DIR AND LIBPCRE_LIBRARIES)
+   SET(LIBPCRE_FOUND FALSE)
+ENDIF (LIBPCRE_INCLUDE_DIR AND LIBPCRE_LIBRARIES)
+
+IF (LIBPCRE_FOUND)
+   IF (NOT LibPcre_FIND_QUIETLY)
+      MESSAGE(STATUS "Found LibPcre: ${LIBPCRE_LIBRARIES}")
+   ENDIF (NOT LibPcre_FIND_QUIETLY)
+ELSE (LIBPCRE_FOUND)
+   IF (LibPcre_FIND_REQUIRED)
+      MESSAGE(SEND_ERROR "Could NOT find LibPcre")
+   ENDIF (LibPcre_FIND_REQUIRED)
+ENDIF (LIBPCRE_FOUND)
+
+MARK_AS_ADVANCED(LIBPCRE_INCLUDE_DIR LIBPCRE_LIBRARIES)
+
Index: branches/apertium-tagger/apertium2/cmake/FindLibXml2.cmake
===================================================================
--- branches/apertium-tagger/apertium2/cmake/FindLibXml2.cmake	(nonexistent)
+++ branches/apertium-tagger/apertium2/cmake/FindLibXml2.cmake	(revision 69632)
@@ -0,0 +1,59 @@
+# - Try to find LibXml2
+# Once done this will define
+#
+#  LIBXML2_FOUND - system has LibXml2
+#  LIBXML2_INCLUDE_DIR - the LibXml2 include directory
+#  LIBXML2_LIBRARIES - the libraries needed to use LibXml2
+#  LIBXML2_DEFINITIONS - Compiler switches required for using LibXml2
+#
+# Copyright (c) 2006, Alexander Neundorf <neundorf@kde.org>
+# This code is available under the BSD license, see licenses/BSD for details.
+
+# Copyright (c) 2006, Alexander Neundorf, <neundorf@kde.org>
+#
+# Redistribution and use is allowed according to the terms of the BSD license.
+# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
+
+
+IF (LIBXML2_INCLUDE_DIR AND LIBXML2_LIBRARIES)
+   # in cache already
+   SET(LibXml2_FIND_QUIETLY TRUE)
+ENDIF (LIBXML2_INCLUDE_DIR AND LIBXML2_LIBRARIES)
+
+IF (NOT WIN32)
+   # use pkg-config to get the directories and then use these values
+   # in the FIND_PATH() and FIND_LIBRARY() calls
+   INCLUDE(UsePkgConfig)
+   PKGCONFIG(libxml-2.0 _LibXml2IncDir _LibXml2LinkDir _LibXml2LinkFlags _LibXml2Cflags)
+   SET(LIBXML2_DEFINITIONS ${_LibXml2Cflags})
+ENDIF (NOT WIN32)
+
+FIND_PATH(LIBXML2_INCLUDE_DIR libxml/xpath.h
+   PATHS
+   ${_LibXml2IncDir}
+   PATH_SUFFIXES libxml2
+   )
+
+FIND_LIBRARY(LIBXML2_LIBRARIES NAMES xml2 libxml2
+   PATHS
+   ${_LibXml2LinkDir}
+   )
+
+IF (LIBXML2_INCLUDE_DIR AND LIBXML2_LIBRARIES)
+   SET(LIBXML2_FOUND TRUE)
+ELSE (LIBXML2_INCLUDE_DIR AND LIBXML2_LIBRARIES)
+   SET(LIBXML2_FOUND FALSE)
+ENDIF (LIBXML2_INCLUDE_DIR AND LIBXML2_LIBRARIES)
+
+IF (LIBXML2_FOUND)
+   IF (NOT LibXml2_FIND_QUIETLY)
+      MESSAGE(STATUS "Found LibXml2: ${LIBXML2_LIBRARIES}")
+   ENDIF (NOT LibXml2_FIND_QUIETLY)
+ELSE (LIBXML2_FOUND)
+   IF (LibXml2_FIND_REQUIRED)
+      MESSAGE(SEND_ERROR "Could NOT find LibXml2")
+   ENDIF (LibXml2_FIND_REQUIRED)
+ENDIF (LIBXML2_FOUND)
+
+MARK_AS_ADVANCED(LIBXML2_INCLUDE_DIR LIBXML2_LIBRARIES)
+
Index: branches/apertium-tagger/apertium2/cmake/FindLttoolbox3.cmake
===================================================================
--- branches/apertium-tagger/apertium2/cmake/FindLttoolbox3.cmake	(nonexistent)
+++ branches/apertium-tagger/apertium2/cmake/FindLttoolbox3.cmake	(revision 69632)
@@ -0,0 +1,57 @@
+# - Try to find Lttoolbox3
+# Once done this will define
+#
+#  LTTOOLBOX3_FOUND - system has Lttoolbox3
+#  LTTOOLBOX3_INCLUDE_DIR - the Lttoolbox3 include directory
+#  LTTOOLBOX3_LIBRARIES - the libraries needed to use Lttoolbox3
+#  LTTOOLBOX3_DEFINITIONS - Compiler switches required for using Lttoolbox3
+#
+# Copyright (c) 2006, Alexander Neundorf <neundorf@kde.org>
+# This code is available under the BSD license, see licenses/BSD for details.
+
+# Copyright (c) 2006, Alexander Neundorf, <neundorf@kde.org>
+#
+# Redistribution and use is allowed according to the terms of the BSD license.
+# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
+#
+# This is derived from FindLibXml2.cmake
+
+IF (LTTOOLBOX3_INCLUDE_DIR AND LTTOOLBOX3_LIBRARIES)
+   # in cache already
+   SET(Lttoolbox3_FIND_QUIETLY TRUE)
+ENDIF (LTTOOLBOX3_INCLUDE_DIR AND LTTOOLBOX3_LIBRARIES)
+
+IF (NOT WIN32)
+   # use pkg-config to get the directories and then use these values
+   # in the FIND_PATH() and FIND_LIBRARY() calls
+   INCLUDE(UsePkgConfig)
+   PKGCONFIG(lttoolbox-3.0 LTTOOLBOX3_INCLUDES LTTOOLBOX3_LIB_DIR LTTOOLBOX3_LDFLAGS LTTOOLBOX3_CFLAGS)
+   SET(LTTOOLBOX3_DEFINITIONS ${LTTOOLBOX3_CFLAGS})
+ENDIF (NOT WIN32)
+
+FIND_PATH(LTTOOLBOX3_INCLUDE_DIR lttoolbox/alphabet.h
+   PATHS ${LTTOOLBOX3_INCLUDES}
+   PATH_SUFFIXES lttoolbox-3.0)
+
+FIND_LIBRARY(LTTOOLBOX3_LIBRARIES
+             NAMES lttoolbox3
+             PATHS ${LTTOOLBOX3_LIB_DIR})
+
+IF (LTTOOLBOX3_INCLUDE_DIR AND LTTOOLBOX3_LIBRARIES)
+   SET(LTTOOLBOX3_FOUND TRUE)
+ELSE (LTTOOLBOX3_INCLUDE_DIR AND LTTOOLBOX3_LIBRARIES)
+   SET(LTTOOLBOX3_FOUND FALSE)
+ENDIF (LTTOOLBOX3_INCLUDE_DIR AND LTTOOLBOX3_LIBRARIES)
+
+IF (LTTOOLBOX3_FOUND)
+   IF (NOT Lttoolbox3_FIND_QUIETLY)
+      MESSAGE(STATUS "Found Lttoolbox3: ${LTTOOLBOX3_LIBRARIES}")
+   ENDIF (NOT Lttoolbox3_FIND_QUIETLY)
+ELSE (LTTOOLBOX3_FOUND)
+   IF (Lttoolbox3_FIND_REQUIRED)
+      MESSAGE(SEND_ERROR "Could NOT find Lttoolbox3")
+   ENDIF (Lttoolbox3_FIND_REQUIRED)
+ENDIF (LTTOOLBOX3_FOUND)
+
+MARK_AS_ADVANCED(LTTOOLBOX3_INCLUDE_DIR LTTOOLBOX3_LIBRARIES)
+
Index: branches/apertium-tagger/apertium2
===================================================================
--- branches/apertium-tagger/apertium2	(nonexistent)
+++ branches/apertium-tagger/apertium2	(revision 69632)

Property changes on: branches/apertium-tagger/apertium2
___________________________________________________________________
Added: svn:ignore
## -0,0 +1,16 ##
+autom4te.cache
+Makefile
+Makefile.in
+missing
+configure
+config.sub
+config.status
+config.log
+config.guess
+aclocal.m4
+*.pc
+depcomp
+install-sh
+libtool
+ltmain.sh
+compile