commit de5c00c7318bd8705feb6f18d5ccdc99437bb8de
Author: aboelhamd <aboelhamd.abotreka@gmail.com>
Date:   Sat May 25 02:59:58 2019 +0200

    Modify beamsearch to work with tags features

diff --git a/score-sentences.py b/score-sentences.py
index 21de0bf..3db521f 100644
--- a/score-sentences.py
+++ b/score-sentences.py
@@ -12,7 +12,7 @@ weightfile = open(sys.argv[3], 'w+')
 model = kenlm.LanguageModel(sys.argv[1])
 
 for sentence in targetfile:
-	weightfile.write('%f\n' % (1.0/model.score(sentence)))
+	weightfile.write('%f\n' % -(1.0/model.score(sentence)))
 
 targetfile.close()
 weightfile.close()
diff --git a/src/BeamSearch.cpp b/src/BeamSearch.cpp
index 9ca5738..2f0186c 100644
--- a/src/BeamSearch.cpp
+++ b/src/BeamSearch.cpp
@@ -26,28 +26,24 @@ using namespace std;
 using namespace pugi;
 using namespace elem;
 
-int
-main (int argc, char **argv)
-{
-  string lextorFilePath, interInFilePath, localeId, transferFilePath, modelsDest, k;
-
-  if (argc == 7)
-    {
-      localeId = argv[1];
-      transferFilePath = argv[2];
-      lextorFilePath = argv[3];
-      interInFilePath = argv[4];
-      modelsDest = argv[5];
-      k = argv[6];
-    }
-  else
-    {
-      localeId = "es_ES";
-      transferFilePath = "apertium-eng-spa.spa-eng.t1x";
-      lextorFilePath = "lextor.txt";
-      interInFilePath = "beaminter.txt";
-      modelsDest = "/home/aboelhamd/Downloads/models";
-      k = "8";
+int main(int argc, char **argv) {
+	string lextorFilePath, interInFilePath, localeId, transferFilePath,
+			modelsDest, k;
+
+	if (argc == 7) {
+		localeId = argv[1];
+		transferFilePath = argv[2];
+		lextorFilePath = argv[3];
+		interInFilePath = argv[4];
+		modelsDest = argv[5];
+		k = argv[6];
+	} else {
+		localeId = "es_ES";
+		transferFilePath = "apertium-eng-spa.spa-eng.t1x";
+		lextorFilePath = "lextor.txt";
+		interInFilePath = "beaminter.txt";
+		modelsDest = "/home/aboelhamd/Downloads/models";
+		k = "8";
 
 //      localeId = "kk_KZ";
 //      transferFilePath = "apertium-kaz-tur.kaz-tur.t1x";
@@ -57,131 +53,133 @@ main (int argc, char **argv)
 //      modelsDest = "./UntitledFolder/models";
 //      k = "8";
 
-      cout << "Error in parameters !" << endl;
-      cout
-	  << "Parameters are : localeId transferFilePath lextorFilePath interInFilePath modelsDest beamSize"
-	  << endl;
-      cout << "localeId : ICU locale ID for the source language. For Kazakh => kk_KZ"
-	  << endl;
-      cout << "transferFilePath : Apertium transfer file of the language pair used."
-	  << endl;
-      cout << "lextorFilePath : Apertium lextor file for the source language sentences."
-	  << endl;
-      cout
-	  << "interInFilePath : Output file of this program which is the input for apertium interchunk."
-	  << endl;
-      cout << "modelsDest : Yasmet models merged file destination." << endl;
-      cout << "beamSize : The size of beam in beam search algorithm." << endl;
-      return -1;
-    }
-
-  ifstream lextorFile (lextorFilePath.c_str ());
-  ofstream interInFile (interInFilePath.c_str ());
-  if (lextorFile.is_open () && interInFile.is_open ())
-    {
-      // load transfer file in an xml document object
-      xml_document transferDoc;
-      xml_parse_result result = transferDoc.load_file (transferFilePath.c_str ());
-      if (string (result.description ()) != "No error")
-	{
-	  cout << "ERROR : " << result.description () << endl;
-	  return -1;
+		cout << "Error in parameters !" << endl;
+		cout
+				<< "Parameters are : localeId transferFilePath lextorFilePath interInFilePath modelsDest beamSize"
+				<< endl;
+		cout
+				<< "localeId : ICU locale ID for the source language. For Kazakh => kk_KZ"
+				<< endl;
+		cout
+				<< "transferFilePath : Apertium transfer file of the language pair used."
+				<< endl;
+		cout
+				<< "lextorFilePath : Apertium lextor file for the source language sentences."
+				<< endl;
+		cout
+				<< "interInFilePath : Output file of this program which is the input for apertium interchunk."
+				<< endl;
+		cout << "modelsDest : Yasmet models merged file destination." << endl;
+		cout << "beamSize : The size of beam in beam search algorithm." << endl;
+		return -1;
 	}
 
-      // xml node of the parent node (transfer) in the transfer file
-      xml_node transfer = transferDoc.child ("transfer");
-
-      map<string, vector<vector<string> > > attrs = RuleParser::getAttrs (transfer);
-      map<string, string> vars = RuleParser::getVars (transfer);
-      map<string, vector<string> > lists = RuleParser::getLists (transfer);
-      map<string, map<string, vector<float> > > classesWeights =
-	  CLExec::loadYasmetModels (modelsDest);
-
-      int beam;
-      stringstream buffer (k);
-      buffer >> beam;
+	ifstream lextorFile(lextorFilePath.c_str());
+	ofstream interInFile(interInFilePath.c_str());
+	if (lextorFile.is_open() && interInFile.is_open()) {
+		// load transfer file in an xml document object
+		xml_document transferDoc;
+		xml_parse_result result = transferDoc.load_file(
+				transferFilePath.c_str());
+		if (string(result.description()) != "No error") {
+			cout << "ERROR : " << result.description() << endl;
+			return -1;
+		}
+
+		// xml node of the parent node (transfer) in the transfer file
+		xml_node transfer = transferDoc.child("transfer");
+
+		map<string, vector<vector<string> > > attrs = RuleParser::getAttrs(
+				transfer);
+		map<string, string> vars = RuleParser::getVars(transfer);
+		map<string, vector<string> > lists = RuleParser::getLists(transfer);
+		map<string, map<string, vector<float> > > classesWeights =
+				CLExec::loadYasmetModels(modelsDest);
+
+		int beam;
+		stringstream buffer(k);
+		buffer >> beam;
 
 //      unsigned i = 0;
-      string tokenizedSentence;
-      while (getline (lextorFile, tokenizedSentence))
-	{
+		string tokenizedSentence;
+		while (getline(lextorFile, tokenizedSentence)) {
 //	  cout << i << endl;
 
-	  // spaces after each token
-	  vector<string> spaces;
+// spaces after each token
+			vector<string> spaces;
 
-	  // tokens in the sentence order
-	  vector<string> slTokens, tlTokens;
+			// tokens in the sentence order
+			vector<string> slTokens, tlTokens;
 
-	  // tags of tokens in order
-	  vector<vector<string> > slTags, tlTags;
+			// tags of tokens in order
+			vector<vector<string> > slTags, tlTags;
 
-	  RuleParser::sentenceTokenizer (&slTokens, &tlTokens, &slTags, &tlTags, &spaces,
-					 tokenizedSentence);
+			RuleParser::sentenceTokenizer(&slTokens, &tlTokens, &slTags,
+					&tlTags, &spaces, tokenizedSentence);
 
-	  // map of tokens ids and their matched categories
-	  map<unsigned, vector<string> > catsApplied;
+			// map of tokens ids and their matched categories
+			map<unsigned, vector<string> > catsApplied;
 
-	  RuleParser::matchCats (&catsApplied, slTokens, slTags, transfer);
+			RuleParser::matchCats(&catsApplied, slTokens, slTags, transfer);
 
-	  // map of matched rules and a pair of first token id and patterns number
-	  map<xml_node, vector<pair<unsigned, unsigned> > > rulesApplied;
+			// map of matched rules and a pair of first token id and patterns number
+			map<xml_node, vector<pair<unsigned, unsigned> > > rulesApplied;
 
-	  RuleParser::matchRules (&rulesApplied, slTokens, catsApplied, transfer);
+			RuleParser::matchRules(&rulesApplied, slTokens, catsApplied,
+					transfer);
 
-	  // rule and (target) token map to specific output
-	  // if rule has many patterns we will choose the first token only
-	  map<unsigned, map<unsigned, string> > ruleOutputs;
+			// rule and (target) token map to specific output
+			// if rule has many patterns we will choose the first token only
+			map<unsigned, map<unsigned, string> > ruleOutputs;
 
-	  // map (target) token to all matched rules ids and the number of pattern items of each rule
-	  map<unsigned, vector<pair<unsigned, unsigned> > > tokenRules;
+			// map (target) token to all matched rules ids and the number of pattern items of each rule
+			map<unsigned, vector<pair<unsigned, unsigned> > > tokenRules;
 
-	  RuleExecution::ruleOuts (&ruleOutputs, &tokenRules, slTokens, slTags, tlTokens,
-				   tlTags, rulesApplied, attrs, lists, &vars, spaces,
-				   localeId);
+			RuleExecution::ruleOuts(&ruleOutputs, &tokenRules, slTokens, slTags,
+					tlTokens, tlTags, rulesApplied, attrs, lists, &vars, spaces,
+					localeId);
 
-	  // final outputs
-	  vector<string> outs;
-	  // number of generated combinations
-	  unsigned compNum;
-	  // nodes for every token and rule
-	  map<unsigned, vector<RuleExecution::Node*> > nodesPool;
-	  // ambiguous informations
-	  vector<RuleExecution::AmbigInfo*> ambigInfo;
-	  // beam tree
-	  vector<pair<vector<RuleExecution::Node*>, float> > beamTree;
-	  // rules combinations
-	  vector<vector<RuleExecution::Node*> > combNodes;
+			// final outputs
+			vector<string> outs;
+			// number of generated combinations
+			unsigned compNum;
+			// nodes for every token and rule
+			map<unsigned, vector<RuleExecution::Node*> > nodesPool;
+			// ambiguous informations
+			vector<RuleExecution::AmbigInfo*> ambigInfo;
+			// beam tree
+			vector<pair<vector<RuleExecution::Node*>, float> > beamTree;
+			// rules combinations
+			vector<vector<RuleExecution::Node*> > combNodes;
 
-	  nodesPool = RuleExecution::getNodesPool (tokenRules);
+			nodesPool = RuleExecution::getNodesPool(tokenRules);
 
-	  RuleExecution::getAmbigInfo (tokenRules, nodesPool, &ambigInfo, &compNum);
+			RuleExecution::getAmbigInfo(tokenRules, nodesPool, &ambigInfo,
+					&compNum);
 
-	  vector<RuleExecution::AmbigInfo*> newAmbigInfo;
-	  for (unsigned j = 0; j < ambigInfo.size (); j++)
-	    if (ambigInfo[j]->combinations.size () > 1)
-	      newAmbigInfo.push_back (ambigInfo[j]);
+			vector<RuleExecution::AmbigInfo*> newAmbigInfo;
+			for (unsigned j = 0; j < ambigInfo.size(); j++)
+				if (ambigInfo[j]->combinations.size() > 1)
+					newAmbigInfo.push_back(ambigInfo[j]);
 
-	  CLExec::beamSearch (&beamTree, beam, slTokens, newAmbigInfo, classesWeights,
-			      localeId);
+			CLExec::beamSearch(&beamTree, beam, slTokens, slTags, newAmbigInfo,
+					classesWeights, localeId);
 
-	  // take the first sentence only
-	  beamTree.erase (beamTree.begin () + 1, beamTree.end ());
+			// take the first sentence only
+			beamTree.erase(beamTree.begin() + 1, beamTree.end());
 
-	  RuleExecution::getOuts (&outs, &combNodes, beamTree, nodesPool, ruleOutputs,
-				  spaces);
+			RuleExecution::getOuts(&outs, &combNodes, beamTree, nodesPool,
+					ruleOutputs, spaces);
 
-	  // write the outs
-	  for (unsigned j = 0; j < outs.size (); j++)
-	    interInFile << outs[j] << endl;
+			// write the outs
+			for (unsigned j = 0; j < outs.size(); j++)
+				interInFile << outs[j] << endl;
 
+		}
+		interInFile.close();
+		lextorFile.close();
+	} else {
+		cout << "ERROR in opening files!" << endl;
 	}
-      interInFile.close ();
-      lextorFile.close ();
-    }
-  else
-    {
-      cout << "ERROR in opening files!" << endl;
-    }
-  return 0;
+	return 0;
 }
diff --git a/src/BeamSearch.h b/src/BeamSearch.h
deleted file mode 100644
index 6793339..0000000
--- a/src/BeamSearch.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * BeamSearch.h
- *
- *  Created on: Mar 10, 2019
- *      Author: aboelhamd
- */
-
-#ifndef SRC_BEAMSEARCH_H_
-#define SRC_BEAMSEARCH_H_
-
-#include <iostream>
-
-using namespace std;
-
-class BeamSearch
-{
-public:
-  static void
-  transfer (string transferFilePath, string localeId, string modelsFileDest, string k,
-	    FILE* lextorFileFile, FILE* outFile);
-};
-#endif /* SRC_BEAMSEARCH_H_ */
diff --git a/src/CLExec.cpp b/src/CLExec.cpp
index e54475f..e2215db 100644
--- a/src/CLExec.cpp
+++ b/src/CLExec.cpp
@@ -33,118 +33,103 @@ using namespace std;
 using namespace pugi;
 using namespace elem;
 
-string
-exec (string cmd)
-{
-  string data;
-  FILE * stream;
-  const int max_buffer = 256;
-  char buffer[max_buffer];
-
-  stream = popen (cmd.c_str (), "r");
-  if (stream)
-    {
-      while (!feof (stream))
-	if (fgets (buffer, max_buffer, stream) != NULL)
-	  data.append (buffer);
-      pclose (stream);
-    }
-  return data;
+string exec(string cmd) {
+	string data;
+	FILE * stream;
+	const int max_buffer = 256;
+	char buffer[max_buffer];
+
+	stream = popen(cmd.c_str(), "r");
+	if (stream) {
+		while (!feof(stream))
+			if (fgets(buffer, max_buffer, stream) != NULL)
+				data.append(buffer);
+		pclose(stream);
+	}
+	return data;
 }
 
-void
-CLExec::segmenter (string inFilePath, string outFilePath)
-{
-  // clear file before writing again
-  ofstream ofs;
-  ofs.open (outFilePath.c_str (), ofstream::out | ofstream::trunc);
-  exec (
-      string ("ruby2.3 kazSentenceTokenizer.rb ") + inFilePath + string (" ")
-	  + outFilePath);
+void CLExec::segmenter(string inFilePath, string outFilePath) {
+	// clear file before writing again
+	ofstream ofs;
+	ofs.open(outFilePath.c_str(), ofstream::out | ofstream::trunc);
+	exec(
+			string("ruby2.3 kazSentenceTokenizer.rb ") + inFilePath
+					+ string(" ") + outFilePath);
 }
 
-void
-CLExec::biltrans (string inFilePath, string outFilePath)
-{
-  // clear file before writing again
-  ofstream ofs;
-  ofs.open (outFilePath.c_str (), ofstream::out | ofstream::trunc);
-  exec (
-      string ("apertium -d $HOME/apertium-kaz-tur kaz-tur-biltrans ") + inFilePath
-	  + string (" ") + outFilePath);
+void CLExec::biltrans(string inFilePath, string outFilePath) {
+	// clear file before writing again
+	ofstream ofs;
+	ofs.open(outFilePath.c_str(), ofstream::out | ofstream::trunc);
+	exec(
+			string("apertium -d $HOME/apertium-kaz-tur kaz-tur-biltrans ")
+					+ inFilePath + string(" ") + outFilePath);
 }
 
-void
-CLExec::lextor (string inFilePath, string outFilePath)
-{
-  // clear file before writing again
-  ofstream ofs;
-  ofs.open (outFilePath.c_str (), ofstream::out | ofstream::trunc);
-  exec (
-      string ("lrx-proc -m $HOME/apertium-kaz-tur/kaz-tur.autolex.bin ") + inFilePath
-	  + string (" >") + outFilePath);
+void CLExec::lextor(string inFilePath, string outFilePath) {
+	// clear file before writing again
+	ofstream ofs;
+	ofs.open(outFilePath.c_str(), ofstream::out | ofstream::trunc);
+	exec(
+			string("lrx-proc -m $HOME/apertium-kaz-tur/kaz-tur.autolex.bin ")
+					+ inFilePath + string(" >") + outFilePath);
 }
 
-void
-CLExec::interchunk (string inFilePath, string outFilePath)
-{
-  exec (
-      string ("apertium-interchunk")
-	  + string (" $HOME/apertium-kaz-tur/apertium-kaz-tur.kaz-tur.t2x")
-	  + string (" $HOME/apertium-kaz-tur/kaz-tur.t2x.bin ") + inFilePath
-	  + string (" ") + outFilePath);
+void CLExec::interchunk(string inFilePath, string outFilePath) {
+	exec(
+			string("apertium-interchunk")
+					+ string(
+							" $HOME/apertium-kaz-tur/apertium-kaz-tur.kaz-tur.t2x")
+					+ string(" $HOME/apertium-kaz-tur/kaz-tur.t2x.bin ")
+					+ inFilePath + string(" ") + outFilePath);
 }
 
-void
-CLExec::postchunk (string inFilePath, string outFilePath)
-{
-  exec (
-      string ("apertium-postchunk")
-	  + string (" $HOME/apertium-kaz-tur/apertium-kaz-tur.kaz-tur.t3x")
-	  + string (" $HOME/apertium-kaz-tur/kaz-tur.t3x.bin ") + inFilePath
-	  + string (" ") + outFilePath);
+void CLExec::postchunk(string inFilePath, string outFilePath) {
+	exec(
+			string("apertium-postchunk")
+					+ string(
+							" $HOME/apertium-kaz-tur/apertium-kaz-tur.kaz-tur.t3x")
+					+ string(" $HOME/apertium-kaz-tur/kaz-tur.t3x.bin ")
+					+ inFilePath + string(" ") + outFilePath);
 }
 
-void
-CLExec::transfer (string inFilePath, string outFilePath)
-{
-  exec (
-      string ("apertium-transfer -n")
-	  + string (" $HOME/apertium-kaz-tur/apertium-kaz-tur.kaz-tur.t4x")
-	  + string (" $HOME/apertium-kaz-tur/kaz-tur.t4x.bin ") + inFilePath
-	  + string (" | lt-proc -g $HOME/apertium-kaz-tur/kaz-tur.autogen.bin")
-	  + string (" | lt-proc -p $HOME/apertium-kaz-tur/kaz-tur.autopgen.bin")
-	  + string (" >") + outFilePath);
+void CLExec::transfer(string inFilePath, string outFilePath) {
+	exec(
+			string("apertium-transfer -n")
+					+ string(
+							" $HOME/apertium-kaz-tur/apertium-kaz-tur.kaz-tur.t4x")
+					+ string(" $HOME/apertium-kaz-tur/kaz-tur.t4x.bin ")
+					+ inFilePath
+					+ string(
+							" | lt-proc -g $HOME/apertium-kaz-tur/kaz-tur.autogen.bin")
+					+ string(
+							" | lt-proc -p $HOME/apertium-kaz-tur/kaz-tur.autopgen.bin")
+					+ string(" >") + outFilePath);
 }
 
-void
-CLExec::assignWeights (string inFilePath, string outFilePath)
-{
-  exec (
-      (string ("python3 $HOME/NormaliseK/exampleken.py <") + string (inFilePath)
-	  + string (">") + string (outFilePath)).c_str ());
+void CLExec::assignWeights(string inFilePath, string outFilePath) {
+	exec(
+			(string("python3 $HOME/NormaliseK/exampleken.py <")
+					+ string(inFilePath) + string(">") + string(outFilePath)).c_str());
 }
 
-vector<string>
-CLExec::getFilesInDir (string dir)
-{
-  vector<string> files;
-
-  DIR *pDIR;
-  struct dirent *entry;
-  if ((pDIR = opendir ((string ("./") + dir).c_str ())))
-    {
-      while ((entry = readdir (pDIR)))
-	{
-	  if (strcmp (entry->d_name, ".") != 0 && strcmp (entry->d_name, "..") != 0)
-	    {
-	      files.push_back (entry->d_name);
-	    }
+vector<string> CLExec::getFilesInDir(string dir) {
+	vector<string> files;
+
+	DIR *pDIR;
+	struct dirent *entry;
+	if ((pDIR = opendir((string("./") + dir).c_str()))) {
+		while ((entry = readdir(pDIR))) {
+			if (strcmp(entry->d_name, ".") != 0
+					&& strcmp(entry->d_name, "..") != 0) {
+				files.push_back(entry->d_name);
+			}
+		}
+		closedir(pDIR);
 	}
-      closedir (pDIR);
-    }
 
-  return files;
+	return files;
 }
 
 //void
@@ -162,63 +147,57 @@ CLExec::getFilesInDir (string dir)
 //    }
 //}
 
-map<string, map<string, vector<float> > >
-CLExec::loadYasmetModels (string modelsFilePath/*, string *localeid*/)
-{
-  // map with key yasmet model name and the value is
-  // another map with key word name and the value is
-  // vector of weights in order
-  map<string, map<string, vector<float> > > classWeights;
+map<string, map<string, vector<float> > > CLExec::loadYasmetModels(
+		string modelsFilePath/*, string *localeid*/) {
+	// map with key yasmet model name and the value is
+	// another map with key word name and the value is
+	// vector of weights in order
+	map<string, map<string, vector<float> > > classWeights;
 
-  ifstream modelsFile ((modelsFilePath).c_str ());
+	ifstream modelsFile((modelsFilePath).c_str());
 
-  if (modelsFile.is_open ())
-    {
-      string line, model, token, weight;
+	if (modelsFile.is_open()) {
+		string line, model, token, weight;
 
-      // localeid
+		// localeid
 //      getline (modelsFile, line);
 //      *localeid = line;
 
-      while (getline (modelsFile, line))
-	{
-	  // 0=>word , 1=>rule_num & 2=>wieght
-	  // we don't need rule number , because
-	  // the weights are already sorted
-
-	  char lineChar[line.size ()];
-	  strcpy (lineChar, line.c_str ());
-
-	  token = strtok (lineChar, ": ");
-	  if (token == "file")
-	    {
-	      model = strtok (NULL, ": ");
-	      continue;
-	    }
-	  // skip rule_num
-	  strtok (NULL, ": ");
+		while (getline(modelsFile, line)) {
+			// 0=>word , 1=>rule_num & 2=>wieght
+			// we don't need rule number , because
+			// the weights are already sorted
+
+			char lineChar[line.size()];
+			strcpy(lineChar, line.c_str());
+
+			token = strtok(lineChar, ": ");
+			if (token == "file") {
+				model = strtok(NULL, ": ");
+				continue;
+			}
+			// skip rule_num
+			strtok(NULL, ": ");
 //			cout << "rulenum= " << strtok(NULL, ": ") << endl;
 
-	  weight = strtok (NULL, ": ");
+			weight = strtok(NULL, ": ");
 //			cout << "weight= " << weight << endl;
 
-	  float w = strtof (weight.c_str (), NULL);
+			float w = strtof(weight.c_str(), NULL);
 //			cout << w << endl;
 //			if (w < 0)
 //				cout << w << endl;
-	  classWeights[model][token].push_back (w);
+			classWeights[model][token].push_back(w);
 //			if (classWeights[model][token][classWeights[model][token].size() - 1]
 //					< 0)
 //				cout << w << endl;
 //			cout
 //					<< classWeights[model][token][classWeights[model][token].size()
 //							- 1] << endl;
+		}
+	} else {
+		cout << "error in opening models file" << endl;
 	}
-    }
-  else
-    {
-      cout << "error in opening models file" << endl;
-    }
 //	for (map<string, map<string, vector<float> > >::iterator it =
 //			classWeights.begin(); it != classWeights.end(); it++) {
 //		cout << "model=" << it->first << endl;
@@ -232,84 +211,71 @@ CLExec::loadYasmetModels (string modelsFilePath/*, string *localeid*/)
 //			cout << endl;
 //		}
 //	}
-  return classWeights;
+	return classWeights;
 }
 
-string
-CLExec::toLowerCase (string word, string localeId)
-{
-  icu::UnicodeString uString (word.c_str ());
-  string lowWord;
-  uString.toLower (localeId.c_str ()).toUTF8String (lowWord);
-  return lowWord;
+string CLExec::toLowerCase(string word, string localeId) {
+	icu::UnicodeString uString(word.c_str());
+	string lowWord;
+	uString.toLower(localeId.c_str()).toUTF8String(lowWord);
+	return lowWord;
 }
 
-string
-CLExec::toUpperCase (string word, string localeId)
-{
-  icu::UnicodeString uString (word.c_str ());
-  string upWord;
-  uString.toUpper (localeId.c_str ()).toUTF8String (upWord);
-  return upWord;
+string CLExec::toUpperCase(string word, string localeId) {
+	icu::UnicodeString uString(word.c_str());
+	string upWord;
+	uString.toUpper(localeId.c_str()).toUTF8String(upWord);
+	return upWord;
 }
 
-string
-CLExec::FirLetUpperCase (string word, string localeId)
-{
-  icu::UnicodeString uString (word.c_str ());
-  uString.toLower (localeId.c_str ());
-  uString.setCharAt (
-      0, icu::UnicodeString (uString.charAt (0)).toUpper (localeId.c_str ()).charAt (0));
-
-  string upWord;
-  uString.toUTF8String (upWord);
-  return upWord;
+string CLExec::FirLetUpperCase(string word, string localeId) {
+	icu::UnicodeString uString(word.c_str());
+	uString.toLower(localeId.c_str());
+	uString.setCharAt(0,
+			icu::UnicodeString(uString.charAt(0)).toUpper(localeId.c_str()).charAt(
+					0));
+
+	string upWord;
+	uString.toUTF8String(upWord);
+	return upWord;
 }
 
 // The result of bitwise character comparison: 0 if this contains
 // the same characters as text, -1 if the characters in this are
 // bitwise less than the characters in text, +1 if the characters
 // in this are bitwise greater than the characters in text.
-int
-CLExec::compare (string word1, string word2)
-{
-  icu::UnicodeString uString1 (word1.c_str ());
-  icu::UnicodeString uString2 (word2.c_str ());
+int CLExec::compare(string word1, string word2) {
+	icu::UnicodeString uString1(word1.c_str());
+	icu::UnicodeString uString2(word2.c_str());
 
-  return uString1.compare (uString2);
+	return uString1.compare(uString2);
 }
 
-int
-CLExec::compareCaseless (string word1, string word2, string localeId)
-{
-  icu::UnicodeString uString1 (word1.c_str ());
-  uString1.toLower (localeId.c_str ());
-  icu::UnicodeString uString2 (word2.c_str ());
-  uString2.toLower (localeId.c_str ());
+int CLExec::compareCaseless(string word1, string word2, string localeId) {
+	icu::UnicodeString uString1(word1.c_str());
+	uString1.toLower(localeId.c_str());
+	icu::UnicodeString uString2(word2.c_str());
+	uString2.toLower(localeId.c_str());
 
-  return uString1.compare (uString2);
+	return uString1.compare(uString2);
 }
 
 // to sort translations from best to worth by their weight
-bool
-sortParameter (pair<vector<RuleExecution::Node*>, float> a,
-	       pair<vector<RuleExecution::Node*>, float> b)
-{
-  return (a.second > b.second);
+bool sortParameter(pair<vector<RuleExecution::Node*>, float> a,
+		pair<vector<RuleExecution::Node*>, float> b) {
+	return (a.second > b.second);
 }
 
-void
-CLExec::beamSearch (vector<pair<vector<RuleExecution::Node*>, float> > *beamTree,
-		    unsigned beam, vector<string> slTokens,
-		    vector<RuleExecution::AmbigInfo*> ambigInfo,
-		    map<string, map<string, vector<float> > > classesWeights,
-		    string localeId)
-{
-  // Initialization
-  (*beamTree).push_back (pair<vector<RuleExecution::Node*>, float> ());
-
-  for (unsigned i = 0; i < ambigInfo.size (); i++)
-    {
+void CLExec::beamSearch(
+		vector<pair<vector<RuleExecution::Node*>, float> > *beamTree,
+		unsigned beam, vector<string> slTokens, vector<vector<string> > slTags,
+		vector<RuleExecution::AmbigInfo*> ambigInfo,
+		map<string, map<string, vector<float> > > classesWeights,
+		string localeId) {
+	// Initialization
+	(*beamTree).push_back(pair<vector<RuleExecution::Node*>, float>());
+
+	for (unsigned i = 0; i < ambigInfo.size(); i++) {
 //      for (unsigned x = 0; x < beamTree->size (); x++)
 //	{
 //	  cout << "weight = " << (*beamTree)[x].second << endl;
@@ -321,142 +287,137 @@ CLExec::beamSearch (vector<pair<vector<RuleExecution::Node*>, float> > *beamTree
 //	    }
 //	}
 
-      RuleExecution::AmbigInfo* ambig = ambigInfo[i];
+		RuleExecution::AmbigInfo* ambig = ambigInfo[i];
 //      pair<pair<unsigned, unsigned>, pair<unsigned, vector<vector<unsigned> > > > p =
 //	  ambigInfo[i];
 //      pair<unsigned, unsigned> wordInd = p.first;
 //      vector<vector<unsigned> > ambigRules = p.second.second;
-      unsigned ambigRulesSize = ambig->combinations.size ();
-
-      // name of the file is the concatenation of rules ids
-      string rulesNums;
-      for (unsigned x = 0; x < ambigRulesSize; x++)
-	{
-	  // avoid dummy node
-	  for (unsigned y = 1; y < ambig->combinations[x].size (); y++)
-	    {
-	      stringstream ss;
-	      ss << ambig->combinations[x][y]->ruleId;
-	      rulesNums += ss.str ();
-
-	      if (y + 1 < ambig->combinations[x].size ())
-		rulesNums += "_";
-	    }
-	  rulesNums += "+";
-	}
+		unsigned ambigRulesSize = ambig->combinations.size();
+
+		// name of the file is the concatenation of rules ids
+		string rulesNums;
+		for (unsigned x = 0; x < ambigRulesSize; x++) {
+			// avoid dummy node
+			for (unsigned y = 1; y < ambig->combinations[x].size(); y++) {
+				stringstream ss;
+				ss << ambig->combinations[x][y]->ruleId;
+				rulesNums += ss.str();
+
+				if (y + 1 < ambig->combinations[x].size())
+					rulesNums += "_";
+			}
+			rulesNums += "+";
+		}
 
 //      cout << rulesNums << endl;
 
-      map<string, vector<float> > classWeights = classesWeights[(rulesNums + ".model")];
-
-      // build new tree for the new words
-      vector<pair<vector<RuleExecution::Node*>, float> > newTree;
-
-      // initialize the new tree
-      for (unsigned x = 0; x < ambigRulesSize; x++)
-	{
-	  newTree.push_back (
-	      pair<vector<RuleExecution::Node*>, float> (vector<RuleExecution::Node*> (),
-							 0));
-	}
-      // put rules
-      for (unsigned z = 0; z < ambigRulesSize; z++)
-	{
-	  for (unsigned y = 0; y < ambig->combinations[z].size (); y++)
-	    {
-	      newTree[z].first.push_back (ambig->combinations[z][y]);
-	    }
-	}
-
-      for (unsigned x = ambig->firTokId; x < ambig->firTokId + ambig->maxPat; x++)
-	{
-	  // word key is the word and it's order in the rule
-	  stringstream ss;
-	  ss << x - ambig->firTokId;
-	  string num = "_" + ss.str ();
-
-	  // handle the case of two lemmas separated by a space
-	  for (unsigned t = 0; t < slTokens[x].size (); t++)
-	    if (slTokens[x][t] == ' ')
-	      slTokens[x].replace (t, 1, "_");
-
-	  string word = toLowerCase (slTokens[x], localeId) + num;
-	  vector<float> wordWeights = classWeights[word];
-
-	  // put weights
-	  if (wordWeights.empty ())
-	    {
-	      for (unsigned z = 0; z < ambigRulesSize; z++)
-		newTree[z].second += 1;
-	      cout << "word : " << word << "  is not found in dataset : " << rulesNums
-		  << endl;
-	    }
-
-	  else
-	    for (unsigned z = 0; z < ambigRulesSize; z++)
-	      newTree[z].second += wordWeights[z];
-
-	}
-
-      // expand beamTree
-      unsigned initSize = beamTree->size ();
-      for (unsigned z = 0; z < ambigRulesSize - 1; z++)
-	{
-	  for (unsigned x = 0; x < initSize; x++)
-	    {
-	      beamTree->push_back (
-		  pair<vector<RuleExecution::Node*>, float> ((*beamTree)[x]));
-	    }
+		map<string, vector<float> > classWeights = classesWeights[(rulesNums
+				+ ".model")];
+
+		// build new tree for the new words
+		vector<pair<vector<RuleExecution::Node*>, float> > newTree;
+
+		// initialize the new tree
+		for (unsigned x = 0; x < ambigRulesSize; x++) {
+			newTree.push_back(
+					pair<vector<RuleExecution::Node*>, float>(
+							vector<RuleExecution::Node*>(), 0));
+		}
+		// put rules
+		for (unsigned z = 0; z < ambigRulesSize; z++) {
+			for (unsigned y = 0; y < ambig->combinations[z].size(); y++) {
+				newTree[z].first.push_back(ambig->combinations[z][y]);
+			}
+		}
+
+		for (unsigned x = ambig->firTokId; x < ambig->firTokId + ambig->maxPat;
+				x++) {
+			// word key is the word and it's order in the rule
+			stringstream ss;
+			ss << x - ambig->firTokId;
+			string num = "_" + ss.str();
+
+			// handle the case of two lemmas separated by a space
+			for (unsigned t = 0; t < slTokens[x].size(); t++)
+				if (slTokens[x][t] == ' ')
+					slTokens[x].replace(t, 1, "_");
+
+			string word = toLowerCase(slTokens[x], localeId) + num;
+			vector<float> wordWeights = classWeights[word];
+
+			// put weights
+			if (wordWeights.empty()) {
+				for (unsigned z = 0; z < ambigRulesSize; z++)
+					newTree[z].second += 1;
+				cout << "word : " << word << "  is not found in dataset : "
+						<< rulesNums << endl;
+			}
+
+			else {
+				vector<float> tagWeights;
+				for (unsigned t = 0; t < slTags[x].size(); t++) {
+					string tag = slTags[x][t] + num;
+					tagWeights = classWeights[tag];
+					for (unsigned w = 0; w < tagWeights.size(); w++)
+						wordWeights[w] += tagWeights[w];
+				}
+				for (unsigned z = 0; z < ambigRulesSize; z++)
+					newTree[z].second += wordWeights[z];
+			}
+
+		}
+
+		// expand beamTree
+		unsigned initSize = beamTree->size();
+		for (unsigned z = 0; z < ambigRulesSize - 1; z++) {
+			for (unsigned x = 0; x < initSize; x++) {
+				beamTree->push_back(
+						pair<vector<RuleExecution::Node*>, float>(
+								(*beamTree)[x]));
+			}
+		}
+
+		// merge the two trees
+		for (unsigned z = 0; z < ambigRulesSize; z++) {
+			for (unsigned x = initSize * z; x < initSize * (z + 1); x++) {
+				// put the new rules with the old
+				(*beamTree)[x].first.insert((*beamTree)[x].first.end(),
+						newTree[z].first.begin(), newTree[z].first.end());
+
+				// add their wiehgts
+				(*beamTree)[x].second += newTree[z].second;
+			}
+		}
+
+		// sort beam tree
+		sort(beamTree->begin(), beamTree->end(), sortParameter);
+
+		// remove elements more than (beam)
+		if (beamTree->size() > beam)
+			beamTree->erase(beamTree->begin() + beam, beamTree->end());
 	}
-
-      // merge the two trees
-      for (unsigned z = 0; z < ambigRulesSize; z++)
-	{
-	  for (unsigned x = initSize * z; x < initSize * (z + 1); x++)
-	    {
-	      // put the new rules with the old
-	      (*beamTree)[x].first.insert ((*beamTree)[x].first.end (),
-					   newTree[z].first.begin (),
-					   newTree[z].first.end ());
-
-	      // add their wiehgts
-	      (*beamTree)[x].second += newTree[z].second;
-	    }
-	}
-
-      // sort beam tree
-      sort (beamTree->begin (), beamTree->end (), sortParameter);
-
-      // remove elements more than (beam)
-      if (beamTree->size () > beam)
-	beamTree->erase (beamTree->begin () + beam, beamTree->end ());
-    }
 }
 
-void
-CLExec::getTransInds (vector<pair<unsigned, float> > *transInds,
-		      vector<pair<vector<unsigned>, float> > beamTree,
-		      vector<vector<pair<unsigned, unsigned> > > rulesIds)
-{
-  for (unsigned i = 0; i < beamTree.size (); i++)
-    {
-      vector<unsigned> transInd = beamTree[i].first;
-      for (unsigned j = 0; j < rulesIds.size (); j++)
-	{
-	  vector<pair<unsigned, unsigned> > weigInd = rulesIds[j];
-
-	  unsigned count = 0;
-	  for (unsigned x = 0; x < weigInd.size () && count < transInd.size (); x++)
-	    {
-	      if (transInd[count] == weigInd[x].first)
-		count++;
-	    }
-
-	  if (count == transInd.size ())
-	    {
-	      transInds->push_back (pair<unsigned, float> (j, beamTree[i].second));
-	      break;
-	    }
+void CLExec::getTransInds(vector<pair<unsigned, float> > *transInds,
+		vector<pair<vector<unsigned>, float> > beamTree,
+		vector<vector<pair<unsigned, unsigned> > > rulesIds) {
+	for (unsigned i = 0; i < beamTree.size(); i++) {
+		vector<unsigned> transInd = beamTree[i].first;
+		for (unsigned j = 0; j < rulesIds.size(); j++) {
+			vector<pair<unsigned, unsigned> > weigInd = rulesIds[j];
+
+			unsigned count = 0;
+			for (unsigned x = 0; x < weigInd.size() && count < transInd.size();
+					x++) {
+				if (transInd[count] == weigInd[x].first)
+					count++;
+			}
+
+			if (count == transInd.size()) {
+				transInds->push_back(
+						pair<unsigned, float>(j, beamTree[i].second));
+				break;
+			}
+		}
 	}
-    }
 }
diff --git a/src/CLExec.h b/src/CLExec.h
index 16fbc7b..37f6dfa 100644
--- a/src/CLExec.h
+++ b/src/CLExec.h
@@ -17,67 +17,68 @@
 using namespace std;
 using namespace pugi;
 
-class CLExec
-{
+class CLExec {
 public:
 
-  static void
-  segmenter (string inFilePath, string outFilePath);
+	static void
+	segmenter(string inFilePath, string outFilePath);
 
-  static void
-  lextor (string inFilePath, string outFilePath);
+	static void
+	lextor(string inFilePath, string outFilePath);
 
-  static void
-  biltrans (string inFilePath, string outFilePath);
+	static void
+	biltrans(string inFilePath, string outFilePath);
 
-  static void
-  interchunk (string inFilePath, string outFilePath);
+	static void
+	interchunk(string inFilePath, string outFilePath);
 
-  static void
-  postchunk (string inFilePath, string outFilePath);
+	static void
+	postchunk(string inFilePath, string outFilePath);
 
-  static void
-  transfer (string inFilePath, string outFilePath);
+	static void
+	transfer(string inFilePath, string outFilePath);
 
-  static void
-  assignWeights (string inFilePath, string outFilePath);
+	static void
+	assignWeights(string inFilePath, string outFilePath);
 
-  static vector<string>
-  getFilesInDir (string dir);
+	static vector<string>
+	getFilesInDir(string dir);
 
 //  static void
 //  runYasmet ();
 
-  static map<string, map<string, vector<float> > >
-  loadYasmetModels (string modelsDest/*, string *localeid*/);
+	static map<string, map<string, vector<float> > >
+	loadYasmetModels(string modelsDest/*, string *localeid*/);
 
-  static void
-  handleDatasets ();
+	static void
+	handleDatasets();
 
-  static string
-  toLowerCase (string word, string localeId);
+	static string
+	toLowerCase(string word, string localeId);
 
-  static string
-  toUpperCase (string word, string localeId);
+	static string
+	toUpperCase(string word, string localeId);
 
-  static string
-  FirLetUpperCase (string word, string localeId);
+	static string
+	FirLetUpperCase(string word, string localeId);
 
-  static int
-  compare (string word1, string word2);
+	static int
+	compare(string word1, string word2);
 
-  static int
-  compareCaseless (string word1, string word2, string localeId);
+	static int
+	compareCaseless(string word1, string word2, string localeId);
 
-  static void
-  beamSearch (vector<pair<vector<RuleExecution::Node*>, float> > *beamTree, unsigned beam,
-	      vector<string> slTokens, vector<RuleExecution::AmbigInfo*> ambigInfo,
-	      map<string, map<string, vector<float> > > classesWeights, string localeId);
+	static void
+	beamSearch(vector<pair<vector<RuleExecution::Node*>, float> > *beamTree,
+			unsigned beam, vector<string> slTokens, vector<vector<string> > slTags,
+			vector<RuleExecution::AmbigInfo*> ambigInfo,
+			map<string, map<string, vector<float> > > classesWeights,
+			string localeId);
 
-  static void
-  getTransInds (vector<pair<unsigned, float> > *transInds,
-		vector<pair<vector<unsigned>, float> > beamTree,
-		vector<vector<pair<unsigned, unsigned> > > rulesIds);
+	static void
+	getTransInds(vector<pair<unsigned, float> > *transInds,
+			vector<pair<vector<unsigned>, float> > beamTree,
+			vector<vector<pair<unsigned, unsigned> > > rulesIds);
 };
 
 #endif /* SRC_CLEXEC_H_ */
diff --git a/src/RulesApplier.cpp b/src/RulesApplier.cpp
index 08eb485..ea36284 100644
--- a/src/RulesApplier.cpp
+++ b/src/RulesApplier.cpp
@@ -34,6 +34,9 @@ int main(int argc, char **argv) {
 		case 'u':
 			newLextorFilePath = optarg;
 			break;
+		case ':':
+			printf("option %c needs a value\n", optopt);
+			return -1;
 		case '?':
 			printf("unknown option: %c\n", optopt);
 			return -1;