commit 1cfbf775ef31e20ccdf150dd6b2013ec882f5425
Author: aboelhamd <aboelhamd.abotreka@gmail.com>
Date:   Wed May 8 23:53:02 2019 +0200

    Minor modifications, beside addition of new programs and scripts

diff --git a/put-rules-ids.py b/put-rules-ids.py
index fd6485d..718a290 100644
--- a/put-rules-ids.py
+++ b/put-rules-ids.py
@@ -1,8 +1,8 @@
 import sys
 
 if (len(sys.argv) < 3) :
-	print('Usage: python put-ids.py <original transfer file name> <new transfer file name>');
-	sys.exit(-1)
+	print('\nUsage: python put-ids.py original-transfer-file-path new-transfer-file-path');
+	sys.exit()
 
 oldfile = open(sys.argv[1], 'r')
 newfile = open(sys.argv[2], 'w')
diff --git a/rem-par-new-lines.py b/rem-par-new-lines.py
new file mode 100644
index 0000000..3a89966
--- /dev/null
+++ b/rem-par-new-lines.py
@@ -0,0 +1,22 @@
+import sys
+
+if (len(sys.argv) != 5) :
+	print('\nUsage: python3 rem-par-new-lines source-file-path target-file-path new-source-file-path new-target-file-path');
+	sys.exit()
+
+file3 = open(sys.argv[3], 'w+')
+file4 = open(sys.argv[4], 'w+')
+
+with open(sys.argv[1]) as file1, open(sys.argv[2]) as file2: 
+    for line1, line2 in zip(file1, file2):
+      line1 = line1.strip()
+      line2 = line2.strip()
+      if (len(line1)>0 and len(line2)>0):
+        file3.write(line1+"\n")
+        file4.write(line2+"\n")
+
+
+file1.close()
+file2.close()
+file3.close()
+file4.close()
diff --git a/score-sentences.py b/score-sentences.py
index c73a384..5930ef0 100644
--- a/score-sentences.py
+++ b/score-sentences.py
@@ -2,7 +2,7 @@ import sys
 import kenlm
 
 if (len(sys.argv) < 4) :
-	print('Usage: python score-sentences.py arpa_or_binary_LM_file target_lang_file weights_file');
+	print('\nUsage: python score-sentences.py arpa_or_binary_LM_file target_lang_file weights_file');
 	sys.exit(-1)
 
 targetfile = open(sys.argv[2], 'r')
diff --git a/sentenceTokenizer.rb b/sentenceTokenizer.rb
index 632058f..5f1f1f2 100644
--- a/sentenceTokenizer.rb
+++ b/sentenceTokenizer.rb
@@ -1,7 +1,7 @@
 require 'pragmatic_segmenter'
 
 if (ARGV.length < 3)
-  puts "Usage : ruby2.3 sentenceTokenizer.rb 639-1ISOlangCode textFilePath sentencesFilePath"
+  puts "\nUsage : ruby2.3 sentenceTokenizer.rb 639-1ISOlangCode textFilePath sentencesFilePath"
   exit
 end
 
diff --git a/spcCharsRem.rb b/spcCharsRem.rb
index 50e37d0..a275daa 100644
--- a/spcCharsRem.rb
+++ b/spcCharsRem.rb
@@ -1,13 +1,18 @@
 if (ARGV.length < 2)
-  puts "Usage : ruby2.3 spcCharsRem.rb oldFilePath newFilePath"
+  puts "\nUsage : ruby2.3 spcCharsRem.rb oldFilePath newFilePath"
   exit
 end
 
+file = File.open(ARGV[1], "w")
+
 File.open(ARGV[0]).each do |line1|
-	line1.delete! ('\\\(\)\[\]\{\}\<\>\|\$\/\'\"')
-    
-  File.open(ARGV[1], "a") do |line2|
-    line2.puts line1
+	#line1.delete! ('\\\(\)\[\]\{\}\<\>\|\$\/\'\"')
+  if ((line1 =~ /\s*\n/) == 0)  
+    next
   end
+
+  file.puts line1
 end
 
+file.close
+
diff --git a/src/CombAlign.cpp b/src/CombAlign.cpp
new file mode 100644
index 0000000..1aaa27d
--- /dev/null
+++ b/src/CombAlign.cpp
@@ -0,0 +1,211 @@
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string>
+#include <map>
+#include <set>
+#include <vector>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sstream>
+
+#include "../pugixml/pugixml.hpp"
+#include "RuleParser.h"
+#include "RuleExecution.h"
+#include "TranElemLiterals.h"
+#include "CLExec.h"
+
+using namespace std;
+using namespace pugi;
+using namespace elem;
+
+int
+main (int argc, char **argv)
+{
+  string localeId, transferFilePath, lextorFilePath, chunkerFilePath, referenceFilePath,
+      newRefFilePath;
+
+  if (argc == 7)
+    {
+      localeId = argv[1];
+      transferFilePath = argv[2];
+      lextorFilePath = argv[3];
+      chunkerFilePath = argv[4];
+      referenceFilePath = argv[5];
+      newRefFilePath = argv[6];
+    }
+  else
+    {
+//      localeId = "es_ES";
+//      transferFilePath = "transferFile.t1x";
+//      sentenceFilePath = "spa-test.txt";
+//      lextorFilePath = "spa-test.lextor";
+//      interInFilePath = "inter2.txt";
+
+//      localeId = "kk_KZ";
+//      transferFilePath = "apertium-kaz-tur.kaz-tur.t1x";
+//      sentenceFilePath = "sample-sentences.txt";
+//      lextorFilePath = "sample-lextor.txt";
+//      interInFilePath = "sample-inter.txt";
+
+      localeId = "es_ES";
+      transferFilePath =
+	  "/home/aboelhamd/apertium-eng-spa-ambiguous-rules/apertium-eng-spa.spa-eng.t1x";
+      lextorFilePath =
+	  "/home/aboelhamd/eclipse-workspace/machinetranslation/test-lextor.txt";
+      chunkerFilePath =
+	  "/home/aboelhamd/eclipse-workspace/machinetranslation/test-chunker.txt";
+      referenceFilePath =
+	  "/home/aboelhamd/eclipse-workspace/machinetranslation/tgt-test.txt";
+      newRefFilePath =
+	  "/home/aboelhamd/eclipse-workspace/machinetranslation/tgt-test-mul.txt";
+
+      cout << "Error in parameters !" << endl;
+      cout << "Parameters are : localeId transferFilePath lextorFilePath"
+	  << " chunkerFilePath referenceFilePath newRefFilePath" << endl;
+      cout << "localeId : ICU locale ID for the source language. For Kazakh => kk-KZ"
+	  << endl;
+      cout << "transferFilePath : Apertium transfer file of the language pair used."
+	  << endl;
+      cout << "lextorFilePath : Apertium lextor file for the source language sentences."
+	  << endl;
+      cout << "chunkerFilePath : chunker file path (output of this program and"
+	  << " input for apertium interchunk)." << endl;
+      cout << "referenceFilePath : Reference parallel target translation file path."
+	  << endl;
+      cout << "newRefFilePath : New aligned reference file path." << endl;
+//      return -1;
+    }
+
+  ifstream lextorFile (lextorFilePath.c_str ());
+  ofstream chunkerFile (chunkerFilePath.c_str ());
+  ifstream referenceFile (referenceFilePath);
+  ofstream newRefFile (newRefFilePath);
+  if (lextorFile.is_open () && chunkerFile.is_open () && referenceFile.is_open ()
+      && newRefFile.is_open ())
+    {
+      // load transfer file in an xml document object
+      xml_document transferDoc;
+      xml_parse_result result = transferDoc.load_file (transferFilePath.c_str ());
+
+      if (string (result.description ()) != "No error")
+	{
+	  cout << "ERROR : " << result.description () << endl;
+	  return -1;
+	}
+
+      // xml node of the parent node (transfer) in the transfer file
+      xml_node transfer = transferDoc.child ("transfer");
+
+      map<string, vector<vector<string> > > attrs = RuleParser::getAttrs (transfer);
+      map<string, string> vars = RuleParser::getVars (transfer);
+      map<string, vector<string> > lists = RuleParser::getLists (transfer);
+
+      unsigned i = 0;
+      string tokenizedSentence, refSent;
+      while (getline (lextorFile, tokenizedSentence) && getline (referenceFile, refSent))
+	{
+	  cout << i++ << endl;
+
+	  // spaces after each token
+	  vector<string> spaces;
+
+	  // tokens in the sentence order
+	  vector<string> slTokens, tlTokens;
+
+	  // tags of tokens in order
+	  vector<vector<string> > slTags, tlTags;
+
+	  RuleParser::sentenceTokenizer (&slTokens, &tlTokens, &slTags, &tlTags, &spaces,
+					 tokenizedSentence);
+
+	  // map of tokens ids and their matched categories
+	  map<unsigned, vector<string> > catsApplied;
+
+	  RuleParser::matchCats (&catsApplied, slTokens, slTags, transfer);
+
+	  // map of matched rules and a pair of first token id and patterns number
+	  map<xml_node, vector<pair<unsigned, unsigned> > > rulesApplied;
+
+	  RuleParser::matchRules (&rulesApplied, slTokens, catsApplied, transfer);
+
+	  // rule and (target) token map to specific output
+	  // if rule has many patterns we will choose the first token only
+	  map<unsigned, map<unsigned, string> > ruleOutputs;
+
+	  // map (target) token to all matched rules ids and the number of pattern items of each rule
+	  map<unsigned, vector<pair<unsigned, unsigned> > > tokenRules;
+
+	  RuleExecution::ruleOuts (&ruleOutputs, &tokenRules, slTokens, slTags, tlTokens,
+				   tlTags, rulesApplied, attrs, lists, &vars, spaces,
+				   localeId);
+	  // final outs
+	  vector<string> outs;
+	  // number of possible combinations
+	  unsigned compNum;
+	  // nodes for every token and rule
+	  map<unsigned, vector<RuleExecution::Node*> > nodesPool;
+	  // ambiguous informations
+	  vector<RuleExecution::AmbigInfo*> ambigInfo;
+
+	  // rules combinations
+	  vector<vector<RuleExecution::Node*> > combNodes;
+
+	  nodesPool = RuleExecution::getNodesPool (tokenRules);
+
+	  RuleExecution::getAmbigInfo (tokenRules, nodesPool, &ambigInfo, &compNum);
+	  RuleExecution::getOuts (&outs, &combNodes, ambigInfo, nodesPool, ruleOutputs,
+				  spaces);
+
+	  // write the outs
+	  for (unsigned j = 0; j < outs.size (); j++)
+	    {
+	      chunkerFile << outs[j] << endl;
+	      newRefFile << refSent << endl;
+	    }
+
+	  chunkerFile << endl;
+	  newRefFile << endl;
+
+	  // delete AmbigInfo pointers
+	  for (unsigned j = 0; j < ambigInfo.size (); j++)
+	    {
+	      // delete the dummy node pointers
+	      set<RuleExecution::Node*> dummies;
+	      for (unsigned k = 0; k < ambigInfo[j]->combinations.size (); k++)
+		dummies.insert (ambigInfo[j]->combinations[k][0]);
+	      for (set<RuleExecution::Node*>::iterator it = dummies.begin ();
+		  it != dummies.end (); it++)
+		delete (*it);
+
+	      delete ambigInfo[j];
+	    }
+	  // delete Node pointers
+	  for (map<unsigned, vector<RuleExecution::Node*> >::iterator it =
+	      nodesPool.begin (); it != nodesPool.end (); it++)
+	    {
+	      for (unsigned j = 0; j < it->second.size (); j++)
+		{
+		  delete it->second[j];
+		}
+	    }
+	}
+
+      lextorFile.close ();
+      chunkerFile.close ();
+      referenceFile.close ();
+      newRefFile.close ();
+      cout << "CombAlign finished!";
+    }
+  else
+    {
+      cout << "ERROR in opening files!" << endl;
+    }
+
+  return 0;
+}
diff --git a/src/ModelResult.cpp b/src/LangModAnalysis.cpp
similarity index 100%
rename from src/ModelResult.cpp
rename to src/LangModAnalysis.cpp
diff --git a/src/OrderAmbigSents.cpp b/src/OrderAmbigSents.cpp
index c939175..84896e3 100644
--- a/src/OrderAmbigSents.cpp
+++ b/src/OrderAmbigSents.cpp
@@ -115,12 +115,12 @@ main (int argc, char **argv)
       vector<string> orderedSources, orderedTargets;
       vector<unsigned> ambigCounts;
 
-//      unsigned i = 0;
+      unsigned i = 0;
       string tokenizedSentence, sourceSentence, targetSentence;
       while (getline (lextorFile, tokenizedSentence)
 	  && getline (sourceFile, sourceSentence) && getline (targetFile, targetSentence))
 	{
-//	  cout << i++ << endl;
+	  cout << i++ << endl;
 
 // spaces after each token
 	  vector<string> spaces;
@@ -195,8 +195,9 @@ main (int argc, char **argv)
 	}
 
       // write the ordered sentences
-      for (unsigned j = 0; j < orderedSources.size (); j++)
+      for (unsigned j = 0; j < 10000; j++)
 	{
+	  cout << j << endl;
 	  orderedSrcFile << orderedSources[j] << endl;
 	  orderedTrgFile << orderedTargets[j] << endl;
 	}
diff --git a/src/RulesApplier.cpp b/src/RulesApplier.cpp
index 8e9623c..13476af 100644
--- a/src/RulesApplier.cpp
+++ b/src/RulesApplier.cpp
@@ -1,303 +1,303 @@
-//#include <stdio.h>
-//#include <iostream>
-//#include <fstream>
-//#include <stdexcept>
-//#include <unistd.h>
-//#include <stdlib.h>
-//#include <string>
-//#include <map>
-//#include <set>
-//#include <vector>
-//#include <string.h>
-//#include <sys/stat.h>
-//#include <time.h>
-//#include <sys/time.h>
-//#include <sstream>
-//
-//#include "../pugixml/pugixml.hpp"
-//#include "RuleParser.h"
-//#include "RuleExecution.h"
-//#include "TranElemLiterals.h"
-//#include "CLExec.h"
-//
-//using namespace std;
-//using namespace pugi;
-//using namespace elem;
-//
-//int
-//main (int argc, char **argv)
-//{
-//  string localeId, transferFilePath, lextorFilePath, interInFilePath;
-//
-//  if (argc == 5)
-//    {
-//      localeId = argv[1];
-//      transferFilePath = argv[2];
-//      lextorFilePath = argv[3];
-//      interInFilePath = argv[4];
-//    }
-//  else
-//    {
-////      localeId = "es_ES";
-////      transferFilePath = "transferFile.t1x";
-////      sentenceFilePath = "spa-test.txt";
-////      lextorFilePath = "spa-test.lextor";
-////      interInFilePath = "inter2.txt";
-//
-////      localeId = "kk_KZ";
-////      transferFilePath = "apertium-kaz-tur.kaz-tur.t1x";
-////      sentenceFilePath = "sample-sentences.txt";
-////      lextorFilePath = "sample-lextor.txt";
-////      interInFilePath = "sample-inter.txt";
-//
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#include <stdexcept>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string>
+#include <map>
+#include <set>
+#include <vector>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sstream>
+
+#include "../pugixml/pugixml.hpp"
+#include "RuleParser.h"
+#include "RuleExecution.h"
+#include "TranElemLiterals.h"
+#include "CLExec.h"
+
+using namespace std;
+using namespace pugi;
+using namespace elem;
+
+int
+main (int argc, char **argv)
+{
+  string localeId, transferFilePath, lextorFilePath, interInFilePath;
+
+  if (argc == 5)
+    {
+      localeId = argv[1];
+      transferFilePath = argv[2];
+      lextorFilePath = argv[3];
+      interInFilePath = argv[4];
+    }
+  else
+    {
 //      localeId = "es_ES";
-//      transferFilePath =
-//	  "/home/aboelhamd/apertium-eng-spa-ambiguous-rules/apertium-eng-spa.spa-eng.t1x";
-//      lextorFilePath =
-//	  "/home/aboelhamd/eclipse-workspace/machinetranslation/test-lextor.txt";
-//      interInFilePath =
-//	  "/home/aboelhamd/eclipse-workspace/machinetranslation/test-chunker.txt";
-//
-//      cout << "Error in parameters !" << endl;
-//      cout << "Parameters are : localeId transferFilePath lextorFilePath interInFilePath"
-//	  << endl;
-//      cout << "localeId : ICU locale ID for the source language. For Kazakh => kk-KZ"
-//	  << endl;
-//      cout << "transferFilePath : Apertium transfer file of the language pair used."
-//	  << endl;
-//      cout << "lextorFilePath : Apertium lextor file for the source language sentences."
-//	  << endl;
-//      cout
-//	  << "interInFilePath : Output file name of this program which is the input for apertium interchunk."
-//	  << endl;
-////      return -1;
-//    }
-//
-//  ifstream lextorFile (lextorFilePath.c_str ());
-//  ofstream interInFile (interInFilePath.c_str ());
-//  ifstream refFile (
-//      string ("/home/aboelhamd/eclipse-workspace/machinetranslation/tgt-test.txt").c_str ());
-//  ofstream refInFile (
-//      string ("/home/aboelhamd/eclipse-workspace/machinetranslation/tgt-test-mul.txt").c_str ());
-//  ifstream errFile (
-//      string (
-//	  "/home/aboelhamd/Downloads/apertium-eval-translator-master/ambig_results.txt").c_str ());
-//  ofstream bestInFile (
-//      string ("/home/aboelhamd/eclipse-workspace/machinetranslation/best-chunker.txt").c_str ());
-//  if (lextorFile.is_open () && interInFile.is_open ())
-//    {
-//      // load transfer file in an xml document object
-//      xml_document transferDoc;
-//      xml_parse_result result = transferDoc.load_file (transferFilePath.c_str ());
-//
-//      if (string (result.description ()) != "No error")
-//	{
-//	  cout << "ERROR : " << result.description () << endl;
-//	  return -1;
-//	}
-//
-//      // xml node of the parent node (transfer) in the transfer file
-//      xml_node transfer = transferDoc.child ("transfer");
-//
-//      map<string, vector<vector<string> > > attrs = RuleParser::getAttrs (transfer);
-//      map<string, string> vars = RuleParser::getVars (transfer);
-//      map<string, vector<string> > lists = RuleParser::getLists (transfer);
-//
-//      unsigned i = 0;
-//      string tokenizedSentence, refSent;
-//      while (getline (lextorFile, tokenizedSentence) && getline (refFile, refSent))
-//	{
-//	  cout << i++ << endl;
-//
-//	  // spaces after each token
-//	  vector<string> spaces;
-//
-//	  // tokens in the sentence order
-//	  vector<string> slTokens, tlTokens;
-//
-//	  // tags of tokens in order
-//	  vector<vector<string> > slTags, tlTags;
-//
-//	  RuleParser::sentenceTokenizer (&slTokens, &tlTokens, &slTags, &tlTags, &spaces,
-//					 tokenizedSentence);
-//
-//	  // map of tokens ids and their matched categories
-//	  map<unsigned, vector<string> > catsApplied;
-//
-//	  RuleParser::matchCats (&catsApplied, slTokens, slTags, transfer);
-//
-//	  // map of matched rules and a pair of first token id and patterns number
-//	  map<xml_node, vector<pair<unsigned, unsigned> > > rulesApplied;
-//
-//	  RuleParser::matchRules (&rulesApplied, slTokens, catsApplied, transfer);
-//
-//	  // rule and (target) token map to specific output
-//	  // if rule has many patterns we will choose the first token only
-//	  map<unsigned, map<unsigned, string> > ruleOutputs;
-//
-//	  // map (target) token to all matched rules ids and the number of pattern items of each rule
-//	  map<unsigned, vector<pair<unsigned, unsigned> > > tokenRules;
-//
-//	  RuleExecution::ruleOuts (&ruleOutputs, &tokenRules, slTokens, slTags, tlTokens,
-//				   tlTags, rulesApplied, attrs, lists, &vars, spaces,
-//				   localeId);
-//	  // final outs
-//	  vector<string> outs;
-//	  // number of possible combinations
-//	  unsigned compNum;
-//	  // nodes for every token and rule
-//	  map<unsigned, vector<RuleExecution::Node*> > nodesPool;
-//	  // ambiguous informations
-//	  vector<RuleExecution::AmbigInfo*> ambigInfo;
-//
-//	  // rules combinations
-//	  vector<vector<RuleExecution::Node*> > combNodes;
-//
-//	  nodesPool = RuleExecution::getNodesPool (tokenRules);
-//
-//	  RuleExecution::getAmbigInfo (tokenRules, nodesPool, &ambigInfo, &compNum);
-//	  RuleExecution::getOuts (&outs, &combNodes, ambigInfo, nodesPool, ruleOutputs,
-//				  spaces);
-//
-////	  for (unsigned j = 0; j < tlTokens.size (); j++)
-////	    {
-////	      cout << tlTokens[j] << endl;
-////	      vector<pair<unsigned, unsigned> > rulees = tokenRules[j];
-////	      for (unsigned k = 0; k < rulees.size (); k++)
-////		{
-////		  cout << rulees[k].first << " , " << rulees[k].second << endl;
-////		}
-////	      cout << endl;
-////	    }
-////
-////	  for (unsigned j = 0; j < ambigInfo.size (); j++)
-////	    {
-////	      cout << "firTokId = " << ambigInfo[j]->firTokId << "; maxPat = "
-////		  << ambigInfo[j]->maxPat << endl;
-////	      vector<vector<RuleExecution::Node*> > combinations =
-////		  ambigInfo[j]->combinations;
-////	      cout << endl;
-////	      for (unsigned k = 0; k < combinations.size (); k++)
-////		{
-////		  vector<RuleExecution::Node*> nodes = combinations[k];
-////		  for (unsigned l = 1; l < nodes.size (); l++)
-////		    {
-////		      cout << "tok=" << nodes[l]->tokenId << "; rul=" << nodes[l]->ruleId
-////			  << "; pat=" << nodes[l]->patNum << " - ";
-////		    }
-////		  cout << endl;
-////		}
-////	      cout << endl;
-////	    }
-////
-////	  for (map<unsigned, map<unsigned, string> >::iterator it = ruleOutputs.begin ();
-////	      it != ruleOutputs.end (); it++)
-////	    {
-////	      cout << "ruleId=" << it->first << endl;
-////	      map<unsigned, string> outs = it->second;
-////
-////	      for (map<unsigned, string>::iterator it2 = outs.begin ();
-////		  it2 != outs.end (); it2++)
-////		{
-////		  cout << "tokId=" << it2->first << " , out = " << it2->second << endl;
-////		}
-////	      cout << endl;
-////	    }
-////	  cout << endl;
-////
-////	  for (unsigned j = 0; j < tlTokens.size (); j++)
-////	    {
-////	      vector<RuleExecution::Node*> nodes = nodesPool[j];
-////	      cout << "tokId = " << j << " : " << tlTokens[j] << endl;
-////	      for (unsigned k = 0; k < nodes.size (); k++)
-////		{
-////		  cout << "ruleId = " << nodes[k]->ruleId << "; patNum = "
-////		      << nodes[k]->patNum << endl;
-////		}
-////	      cout << endl;
-////	    }
-////
-////	  for (unsigned j = 0; j < combNodes.size (); j++)
-////	    {
-////	      vector<RuleExecution::Node*> nodes = combNodes[j];
-////	      for (unsigned k = 0; k < nodes.size (); k++)
-////		{
-////		  cout << "tok=" << nodes[k]->tokenId << "; rul=" << nodes[k]->ruleId
-////		      << "; pat=" << nodes[k]->patNum << " - ";
-////		}
-////	      cout << endl;
-////	    }
-//
-////	  set<string> diffOuts (outs.begin (), outs.end ());
-////
-////	  // write the outs
-////	  for (set<string>::iterator it = diffOuts.begin (); it != diffOuts.end (); it++)
-////	    {
-////	      interInFile << *it << endl;
-////	      refInFile << refSent << endl;
-////	    }
-//
-//	  float min = 100000;
-//	  int minInd = -1;
-//	  string serr;
-//	  float err;
-//
-//	  // write the outs
-//	  for (unsigned j = 0; j < outs.size (); j++)
+//      transferFilePath = "transferFile.t1x";
+//      sentenceFilePath = "spa-test.txt";
+//      lextorFilePath = "spa-test.lextor";
+//      interInFilePath = "inter2.txt";
+
+//      localeId = "kk_KZ";
+//      transferFilePath = "apertium-kaz-tur.kaz-tur.t1x";
+//      sentenceFilePath = "sample-sentences.txt";
+//      lextorFilePath = "sample-lextor.txt";
+//      interInFilePath = "sample-inter.txt";
+
+      localeId = "es_ES";
+      transferFilePath =
+	  "/home/aboelhamd/apertium-eng-spa-ambiguous-rules/apertium-eng-spa.spa-eng.t1x";
+      lextorFilePath =
+	  "/home/aboelhamd/eclipse-workspace/machinetranslation/test-lextor.txt";
+      interInFilePath =
+	  "/home/aboelhamd/eclipse-workspace/machinetranslation/test-chunker.txt";
+
+      cout << "Error in parameters !" << endl;
+      cout << "Parameters are : localeId transferFilePath lextorFilePath interInFilePath"
+	  << endl;
+      cout << "localeId : ICU locale ID for the source language. For Kazakh => kk-KZ"
+	  << endl;
+      cout << "transferFilePath : Apertium transfer file of the language pair used."
+	  << endl;
+      cout << "lextorFilePath : Apertium lextor file for the source language sentences."
+	  << endl;
+      cout
+	  << "interInFilePath : Output file name of this program which is the input for apertium interchunk."
+	  << endl;
+//      return -1;
+    }
+
+  ifstream lextorFile (lextorFilePath.c_str ());
+  ofstream interInFile (interInFilePath.c_str ());
+  ifstream refFile (
+      string ("/home/aboelhamd/eclipse-workspace/machinetranslation/tgt-test.txt").c_str ());
+  ofstream refInFile (
+      string ("/home/aboelhamd/eclipse-workspace/machinetranslation/tgt-test-mul.txt").c_str ());
+  ifstream errFile (
+      string (
+	  "/home/aboelhamd/Downloads/apertium-eval-translator-master/ambig_results.txt").c_str ());
+  ofstream bestInFile (
+      string ("/home/aboelhamd/eclipse-workspace/machinetranslation/best-chunker.txt").c_str ());
+  if (lextorFile.is_open () && interInFile.is_open ())
+    {
+      // load transfer file in an xml document object
+      xml_document transferDoc;
+      xml_parse_result result = transferDoc.load_file (transferFilePath.c_str ());
+
+      if (string (result.description ()) != "No error")
+	{
+	  cout << "ERROR : " << result.description () << endl;
+	  return -1;
+	}
+
+      // xml node of the parent node (transfer) in the transfer file
+      xml_node transfer = transferDoc.child ("transfer");
+
+      map<string, vector<vector<string> > > attrs = RuleParser::getAttrs (transfer);
+      map<string, string> vars = RuleParser::getVars (transfer);
+      map<string, vector<string> > lists = RuleParser::getLists (transfer);
+
+      unsigned i = 0;
+      string tokenizedSentence, refSent;
+      while (getline (lextorFile, tokenizedSentence) && getline (refFile, refSent))
+	{
+	  cout << i++ << endl;
+
+	  // spaces after each token
+	  vector<string> spaces;
+
+	  // tokens in the sentence order
+	  vector<string> slTokens, tlTokens;
+
+	  // tags of tokens in order
+	  vector<vector<string> > slTags, tlTags;
+
+	  RuleParser::sentenceTokenizer (&slTokens, &tlTokens, &slTags, &tlTags, &spaces,
+					 tokenizedSentence);
+
+	  // map of tokens ids and their matched categories
+	  map<unsigned, vector<string> > catsApplied;
+
+	  RuleParser::matchCats (&catsApplied, slTokens, slTags, transfer);
+
+	  // map of matched rules and a pair of first token id and patterns number
+	  map<xml_node, vector<pair<unsigned, unsigned> > > rulesApplied;
+
+	  RuleParser::matchRules (&rulesApplied, slTokens, catsApplied, transfer);
+
+	  // rule and (target) token map to specific output
+	  // if rule has many patterns we will choose the first token only
+	  map<unsigned, map<unsigned, string> > ruleOutputs;
+
+	  // map (target) token to all matched rules ids and the number of pattern items of each rule
+	  map<unsigned, vector<pair<unsigned, unsigned> > > tokenRules;
+
+	  RuleExecution::ruleOuts (&ruleOutputs, &tokenRules, slTokens, slTags, tlTokens,
+				   tlTags, rulesApplied, attrs, lists, &vars, spaces,
+				   localeId);
+	  // final outs
+	  vector<string> outs;
+	  // number of possible combinations
+	  unsigned compNum;
+	  // nodes for every token and rule
+	  map<unsigned, vector<RuleExecution::Node*> > nodesPool;
+	  // ambiguous informations
+	  vector<RuleExecution::AmbigInfo*> ambigInfo;
+
+	  // rules combinations
+	  vector<vector<RuleExecution::Node*> > combNodes;
+
+	  nodesPool = RuleExecution::getNodesPool (tokenRules);
+
+	  RuleExecution::getAmbigInfo (tokenRules, nodesPool, &ambigInfo, &compNum);
+	  RuleExecution::getOuts (&outs, &combNodes, ambigInfo, nodesPool, ruleOutputs,
+				  spaces);
+
+//	  for (unsigned j = 0; j < tlTokens.size (); j++)
 //	    {
-//	      getline (errFile, serr);
-//	      err = strtof (serr.c_str (), NULL);
-//
-//	      if (err < min)
+//	      cout << tlTokens[j] << endl;
+//	      vector<pair<unsigned, unsigned> > rulees = tokenRules[j];
+//	      for (unsigned k = 0; k < rulees.size (); k++)
 //		{
-//		  min = err;
-//		  minInd = j;
+//		  cout << rulees[k].first << " , " << rulees[k].second << endl;
 //		}
-//
-//	      interInFile << outs[j] << endl;
-//	      refInFile << refSent << endl;
+//	      cout << endl;
 //	    }
-////	  cout << minInd << endl;
-//	  bestInFile << outs[minInd] << endl;
-//
-//	  interInFile << endl;
-//	  refInFile << endl;
 //
-//	  // delete AmbigInfo pointers
 //	  for (unsigned j = 0; j < ambigInfo.size (); j++)
 //	    {
-//	      // delete the dummy node pointers
-//	      set<RuleExecution::Node*> dummies;
-//	      for (unsigned k = 0; k < ambigInfo[j]->combinations.size (); k++)
-//		dummies.insert (ambigInfo[j]->combinations[k][0]);
-//	      for (set<RuleExecution::Node*>::iterator it = dummies.begin ();
-//		  it != dummies.end (); it++)
-//		delete (*it);
+//	      cout << "firTokId = " << ambigInfo[j]->firTokId << "; maxPat = "
+//		  << ambigInfo[j]->maxPat << endl;
+//	      vector<vector<RuleExecution::Node*> > combinations =
+//		  ambigInfo[j]->combinations;
+//	      cout << endl;
+//	      for (unsigned k = 0; k < combinations.size (); k++)
+//		{
+//		  vector<RuleExecution::Node*> nodes = combinations[k];
+//		  for (unsigned l = 1; l < nodes.size (); l++)
+//		    {
+//		      cout << "tok=" << nodes[l]->tokenId << "; rul=" << nodes[l]->ruleId
+//			  << "; pat=" << nodes[l]->patNum << " - ";
+//		    }
+//		  cout << endl;
+//		}
+//	      cout << endl;
+//	    }
 //
-//	      delete ambigInfo[j];
+//	  for (map<unsigned, map<unsigned, string> >::iterator it = ruleOutputs.begin ();
+//	      it != ruleOutputs.end (); it++)
+//	    {
+//	      cout << "ruleId=" << it->first << endl;
+//	      map<unsigned, string> outs = it->second;
+//
+//	      for (map<unsigned, string>::iterator it2 = outs.begin ();
+//		  it2 != outs.end (); it2++)
+//		{
+//		  cout << "tokId=" << it2->first << " , out = " << it2->second << endl;
+//		}
+//	      cout << endl;
 //	    }
-//	  // delete Node pointers
-//	  for (map<unsigned, vector<RuleExecution::Node*> >::iterator it =
-//	      nodesPool.begin (); it != nodesPool.end (); it++)
+//	  cout << endl;
+//
+//	  for (unsigned j = 0; j < tlTokens.size (); j++)
 //	    {
-//	      for (unsigned j = 0; j < it->second.size (); j++)
+//	      vector<RuleExecution::Node*> nodes = nodesPool[j];
+//	      cout << "tokId = " << j << " : " << tlTokens[j] << endl;
+//	      for (unsigned k = 0; k < nodes.size (); k++)
 //		{
-//		  delete it->second[j];
+//		  cout << "ruleId = " << nodes[k]->ruleId << "; patNum = "
+//		      << nodes[k]->patNum << endl;
 //		}
+//	      cout << endl;
 //	    }
-//	}
 //
-//      lextorFile.close ();
-//      interInFile.close ();
-//      refFile.close ();
-//      refInFile.close ();
-//      bestInFile.close ();
-//      cout << "RulesApplier finished!";
-//    }
-//  else
-//    {
-//      cout << "ERROR in opening files!" << endl;
-//    }
+//	  for (unsigned j = 0; j < combNodes.size (); j++)
+//	    {
+//	      vector<RuleExecution::Node*> nodes = combNodes[j];
+//	      for (unsigned k = 0; k < nodes.size (); k++)
+//		{
+//		  cout << "tok=" << nodes[k]->tokenId << "; rul=" << nodes[k]->ruleId
+//		      << "; pat=" << nodes[k]->patNum << " - ";
+//		}
+//	      cout << endl;
+//	    }
+
+//	  set<string> diffOuts (outs.begin (), outs.end ());
 //
-//  return 0;
-//}
+//	  // write the outs
+//	  for (set<string>::iterator it = diffOuts.begin (); it != diffOuts.end (); it++)
+//	    {
+//	      interInFile << *it << endl;
+//	      refInFile << refSent << endl;
+//	    }
+
+	  float min = 100000;
+	  int minInd = -1;
+	  string serr;
+	  float err;
+
+	  // write the outs
+	  for (unsigned j = 0; j < outs.size (); j++)
+	    {
+	      getline (errFile, serr);
+	      err = strtof (serr.c_str (), NULL);
+
+	      if (err < min)
+		{
+		  min = err;
+		  minInd = j;
+		}
+
+	      interInFile << outs[j] << endl;
+	      refInFile << refSent << endl;
+	    }
+//	  cout << minInd << endl;
+	  bestInFile << outs[minInd] << endl;
+
+	  interInFile << endl;
+	  refInFile << endl;
+
+	  // delete AmbigInfo pointers
+	  for (unsigned j = 0; j < ambigInfo.size (); j++)
+	    {
+	      // delete the dummy node pointers
+	      set<RuleExecution::Node*> dummies;
+	      for (unsigned k = 0; k < ambigInfo[j]->combinations.size (); k++)
+		dummies.insert (ambigInfo[j]->combinations[k][0]);
+	      for (set<RuleExecution::Node*>::iterator it = dummies.begin ();
+		  it != dummies.end (); it++)
+		delete (*it);
+
+	      delete ambigInfo[j];
+	    }
+	  // delete Node pointers
+	  for (map<unsigned, vector<RuleExecution::Node*> >::iterator it =
+	      nodesPool.begin (); it != nodesPool.end (); it++)
+	    {
+	      for (unsigned j = 0; j < it->second.size (); j++)
+		{
+		  delete it->second[j];
+		}
+	    }
+	}
+
+      lextorFile.close ();
+      interInFile.close ();
+      refFile.close ();
+      refInFile.close ();
+      bestInFile.close ();
+      cout << "RulesApplier finished!";
+    }
+  else
+    {
+      cout << "ERROR in opening files!" << endl;
+    }
+
+  return 0;
+}