commit 1cfbf775ef31e20ccdf150dd6b2013ec882f5425 Author: aboelhamd Date: Wed May 8 23:53:02 2019 +0200 Minor modifications, beside addition of new programs and scripts diff --git a/put-rules-ids.py b/put-rules-ids.py index fd6485d..718a290 100644 --- a/put-rules-ids.py +++ b/put-rules-ids.py @@ -1,8 +1,8 @@ import sys if (len(sys.argv) < 3) : - print('Usage: python put-ids.py '); - sys.exit(-1) + print('\nUsage: python put-ids.py original-transfer-file-path new-transfer-file-path'); + sys.exit() oldfile = open(sys.argv[1], 'r') newfile = open(sys.argv[2], 'w') diff --git a/rem-par-new-lines.py b/rem-par-new-lines.py new file mode 100644 index 0000000..3a89966 --- /dev/null +++ b/rem-par-new-lines.py @@ -0,0 +1,22 @@ +import sys + +if (len(sys.argv) != 5) : + print('\nUsage: python3 rem-par-new-lines source-file-path target-file-path new-source-file-path new-target-file-path'); + sys.exit() + +file3 = open(sys.argv[3], 'w+') +file4 = open(sys.argv[4], 'w+') + +with open(sys.argv[1]) as file1, open(sys.argv[2]) as file2: + for line1, line2 in zip(file1, file2): + line1 = line1.strip() + line2 = line2.strip() + if (len(line1)>0 and len(line2)>0): + file3.write(line1+"\n") + file4.write(line2+"\n") + + +file1.close() +file2.close() +file3.close() +file4.close() diff --git a/score-sentences.py b/score-sentences.py index c73a384..5930ef0 100644 --- a/score-sentences.py +++ b/score-sentences.py @@ -2,7 +2,7 @@ import sys import kenlm if (len(sys.argv) < 4) : - print('Usage: python score-sentences.py arpa_or_binary_LM_file target_lang_file weights_file'); + print('\nUsage: python score-sentences.py arpa_or_binary_LM_file target_lang_file weights_file'); sys.exit(-1) targetfile = open(sys.argv[2], 'r') diff --git a/sentenceTokenizer.rb b/sentenceTokenizer.rb index 632058f..5f1f1f2 100644 --- a/sentenceTokenizer.rb +++ b/sentenceTokenizer.rb @@ -1,7 +1,7 @@ require 'pragmatic_segmenter' if (ARGV.length < 3) - puts "Usage : ruby2.3 sentenceTokenizer.rb 639-1ISOlangCode textFilePath sentencesFilePath" + puts "\nUsage : ruby2.3 sentenceTokenizer.rb 639-1ISOlangCode textFilePath sentencesFilePath" exit end diff --git a/spcCharsRem.rb b/spcCharsRem.rb index 50e37d0..a275daa 100644 --- a/spcCharsRem.rb +++ b/spcCharsRem.rb @@ -1,13 +1,18 @@ if (ARGV.length < 2) - puts "Usage : ruby2.3 spcCharsRem.rb oldFilePath newFilePath" + puts "\nUsage : ruby2.3 spcCharsRem.rb oldFilePath newFilePath" exit end +file = File.open(ARGV[1], "w") + File.open(ARGV[0]).each do |line1| - line1.delete! ('\\\(\)\[\]\{\}\<\>\|\$\/\'\"') - - File.open(ARGV[1], "a") do |line2| - line2.puts line1 + #line1.delete! ('\\\(\)\[\]\{\}\<\>\|\$\/\'\"') + if ((line1 =~ /\s*\n/) == 0) + next end + + file.puts line1 end +file.close + diff --git a/src/CombAlign.cpp b/src/CombAlign.cpp new file mode 100644 index 0000000..1aaa27d --- /dev/null +++ b/src/CombAlign.cpp @@ -0,0 +1,211 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../pugixml/pugixml.hpp" +#include "RuleParser.h" +#include "RuleExecution.h" +#include "TranElemLiterals.h" +#include "CLExec.h" + +using namespace std; +using namespace pugi; +using namespace elem; + +int +main (int argc, char **argv) +{ + string localeId, transferFilePath, lextorFilePath, chunkerFilePath, referenceFilePath, + newRefFilePath; + + if (argc == 7) + { + localeId = argv[1]; + transferFilePath = argv[2]; + lextorFilePath = argv[3]; + chunkerFilePath = argv[4]; + referenceFilePath = argv[5]; + newRefFilePath = argv[6]; + } + else + { +// localeId = "es_ES"; +// transferFilePath = "transferFile.t1x"; +// sentenceFilePath = "spa-test.txt"; +// lextorFilePath = "spa-test.lextor"; +// interInFilePath = "inter2.txt"; + +// localeId = "kk_KZ"; +// transferFilePath = "apertium-kaz-tur.kaz-tur.t1x"; +// sentenceFilePath = "sample-sentences.txt"; +// lextorFilePath = "sample-lextor.txt"; +// interInFilePath = "sample-inter.txt"; + + localeId = "es_ES"; + transferFilePath = + "/home/aboelhamd/apertium-eng-spa-ambiguous-rules/apertium-eng-spa.spa-eng.t1x"; + lextorFilePath = + "/home/aboelhamd/eclipse-workspace/machinetranslation/test-lextor.txt"; + chunkerFilePath = + "/home/aboelhamd/eclipse-workspace/machinetranslation/test-chunker.txt"; + referenceFilePath = + "/home/aboelhamd/eclipse-workspace/machinetranslation/tgt-test.txt"; + newRefFilePath = + "/home/aboelhamd/eclipse-workspace/machinetranslation/tgt-test-mul.txt"; + + cout << "Error in parameters !" << endl; + cout << "Parameters are : localeId transferFilePath lextorFilePath" + << " chunkerFilePath referenceFilePath newRefFilePath" << endl; + cout << "localeId : ICU locale ID for the source language. For Kazakh => kk-KZ" + << endl; + cout << "transferFilePath : Apertium transfer file of the language pair used." + << endl; + cout << "lextorFilePath : Apertium lextor file for the source language sentences." + << endl; + cout << "chunkerFilePath : chunker file path (output of this program and" + << " input for apertium interchunk)." << endl; + cout << "referenceFilePath : Reference parallel target translation file path." + << endl; + cout << "newRefFilePath : New aligned reference file path." << endl; +// return -1; + } + + ifstream lextorFile (lextorFilePath.c_str ()); + ofstream chunkerFile (chunkerFilePath.c_str ()); + ifstream referenceFile (referenceFilePath); + ofstream newRefFile (newRefFilePath); + if (lextorFile.is_open () && chunkerFile.is_open () && referenceFile.is_open () + && newRefFile.is_open ()) + { + // load transfer file in an xml document object + xml_document transferDoc; + xml_parse_result result = transferDoc.load_file (transferFilePath.c_str ()); + + if (string (result.description ()) != "No error") + { + cout << "ERROR : " << result.description () << endl; + return -1; + } + + // xml node of the parent node (transfer) in the transfer file + xml_node transfer = transferDoc.child ("transfer"); + + map > > attrs = RuleParser::getAttrs (transfer); + map vars = RuleParser::getVars (transfer); + map > lists = RuleParser::getLists (transfer); + + unsigned i = 0; + string tokenizedSentence, refSent; + while (getline (lextorFile, tokenizedSentence) && getline (referenceFile, refSent)) + { + cout << i++ << endl; + + // spaces after each token + vector spaces; + + // tokens in the sentence order + vector slTokens, tlTokens; + + // tags of tokens in order + vector > slTags, tlTags; + + RuleParser::sentenceTokenizer (&slTokens, &tlTokens, &slTags, &tlTags, &spaces, + tokenizedSentence); + + // map of tokens ids and their matched categories + map > catsApplied; + + RuleParser::matchCats (&catsApplied, slTokens, slTags, transfer); + + // map of matched rules and a pair of first token id and patterns number + map > > rulesApplied; + + RuleParser::matchRules (&rulesApplied, slTokens, catsApplied, transfer); + + // rule and (target) token map to specific output + // if rule has many patterns we will choose the first token only + map > ruleOutputs; + + // map (target) token to all matched rules ids and the number of pattern items of each rule + map > > tokenRules; + + RuleExecution::ruleOuts (&ruleOutputs, &tokenRules, slTokens, slTags, tlTokens, + tlTags, rulesApplied, attrs, lists, &vars, spaces, + localeId); + // final outs + vector outs; + // number of possible combinations + unsigned compNum; + // nodes for every token and rule + map > nodesPool; + // ambiguous informations + vector ambigInfo; + + // rules combinations + vector > combNodes; + + nodesPool = RuleExecution::getNodesPool (tokenRules); + + RuleExecution::getAmbigInfo (tokenRules, nodesPool, &ambigInfo, &compNum); + RuleExecution::getOuts (&outs, &combNodes, ambigInfo, nodesPool, ruleOutputs, + spaces); + + // write the outs + for (unsigned j = 0; j < outs.size (); j++) + { + chunkerFile << outs[j] << endl; + newRefFile << refSent << endl; + } + + chunkerFile << endl; + newRefFile << endl; + + // delete AmbigInfo pointers + for (unsigned j = 0; j < ambigInfo.size (); j++) + { + // delete the dummy node pointers + set dummies; + for (unsigned k = 0; k < ambigInfo[j]->combinations.size (); k++) + dummies.insert (ambigInfo[j]->combinations[k][0]); + for (set::iterator it = dummies.begin (); + it != dummies.end (); it++) + delete (*it); + + delete ambigInfo[j]; + } + // delete Node pointers + for (map >::iterator it = + nodesPool.begin (); it != nodesPool.end (); it++) + { + for (unsigned j = 0; j < it->second.size (); j++) + { + delete it->second[j]; + } + } + } + + lextorFile.close (); + chunkerFile.close (); + referenceFile.close (); + newRefFile.close (); + cout << "CombAlign finished!"; + } + else + { + cout << "ERROR in opening files!" << endl; + } + + return 0; +} diff --git a/src/ModelResult.cpp b/src/LangModAnalysis.cpp similarity index 100% rename from src/ModelResult.cpp rename to src/LangModAnalysis.cpp diff --git a/src/OrderAmbigSents.cpp b/src/OrderAmbigSents.cpp index c939175..84896e3 100644 --- a/src/OrderAmbigSents.cpp +++ b/src/OrderAmbigSents.cpp @@ -115,12 +115,12 @@ main (int argc, char **argv) vector orderedSources, orderedTargets; vector ambigCounts; -// unsigned i = 0; + unsigned i = 0; string tokenizedSentence, sourceSentence, targetSentence; while (getline (lextorFile, tokenizedSentence) && getline (sourceFile, sourceSentence) && getline (targetFile, targetSentence)) { -// cout << i++ << endl; + cout << i++ << endl; // spaces after each token vector spaces; @@ -195,8 +195,9 @@ main (int argc, char **argv) } // write the ordered sentences - for (unsigned j = 0; j < orderedSources.size (); j++) + for (unsigned j = 0; j < 10000; j++) { + cout << j << endl; orderedSrcFile << orderedSources[j] << endl; orderedTrgFile << orderedTargets[j] << endl; } diff --git a/src/RulesApplier.cpp b/src/RulesApplier.cpp index 8e9623c..13476af 100644 --- a/src/RulesApplier.cpp +++ b/src/RulesApplier.cpp @@ -1,303 +1,303 @@ -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -// -//#include "../pugixml/pugixml.hpp" -//#include "RuleParser.h" -//#include "RuleExecution.h" -//#include "TranElemLiterals.h" -//#include "CLExec.h" -// -//using namespace std; -//using namespace pugi; -//using namespace elem; -// -//int -//main (int argc, char **argv) -//{ -// string localeId, transferFilePath, lextorFilePath, interInFilePath; -// -// if (argc == 5) -// { -// localeId = argv[1]; -// transferFilePath = argv[2]; -// lextorFilePath = argv[3]; -// interInFilePath = argv[4]; -// } -// else -// { -//// localeId = "es_ES"; -//// transferFilePath = "transferFile.t1x"; -//// sentenceFilePath = "spa-test.txt"; -//// lextorFilePath = "spa-test.lextor"; -//// interInFilePath = "inter2.txt"; -// -//// localeId = "kk_KZ"; -//// transferFilePath = "apertium-kaz-tur.kaz-tur.t1x"; -//// sentenceFilePath = "sample-sentences.txt"; -//// lextorFilePath = "sample-lextor.txt"; -//// interInFilePath = "sample-inter.txt"; -// +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../pugixml/pugixml.hpp" +#include "RuleParser.h" +#include "RuleExecution.h" +#include "TranElemLiterals.h" +#include "CLExec.h" + +using namespace std; +using namespace pugi; +using namespace elem; + +int +main (int argc, char **argv) +{ + string localeId, transferFilePath, lextorFilePath, interInFilePath; + + if (argc == 5) + { + localeId = argv[1]; + transferFilePath = argv[2]; + lextorFilePath = argv[3]; + interInFilePath = argv[4]; + } + else + { // localeId = "es_ES"; -// transferFilePath = -// "/home/aboelhamd/apertium-eng-spa-ambiguous-rules/apertium-eng-spa.spa-eng.t1x"; -// lextorFilePath = -// "/home/aboelhamd/eclipse-workspace/machinetranslation/test-lextor.txt"; -// interInFilePath = -// "/home/aboelhamd/eclipse-workspace/machinetranslation/test-chunker.txt"; -// -// cout << "Error in parameters !" << endl; -// cout << "Parameters are : localeId transferFilePath lextorFilePath interInFilePath" -// << endl; -// cout << "localeId : ICU locale ID for the source language. For Kazakh => kk-KZ" -// << endl; -// cout << "transferFilePath : Apertium transfer file of the language pair used." -// << endl; -// cout << "lextorFilePath : Apertium lextor file for the source language sentences." -// << endl; -// cout -// << "interInFilePath : Output file name of this program which is the input for apertium interchunk." -// << endl; -//// return -1; -// } -// -// ifstream lextorFile (lextorFilePath.c_str ()); -// ofstream interInFile (interInFilePath.c_str ()); -// ifstream refFile ( -// string ("/home/aboelhamd/eclipse-workspace/machinetranslation/tgt-test.txt").c_str ()); -// ofstream refInFile ( -// string ("/home/aboelhamd/eclipse-workspace/machinetranslation/tgt-test-mul.txt").c_str ()); -// ifstream errFile ( -// string ( -// "/home/aboelhamd/Downloads/apertium-eval-translator-master/ambig_results.txt").c_str ()); -// ofstream bestInFile ( -// string ("/home/aboelhamd/eclipse-workspace/machinetranslation/best-chunker.txt").c_str ()); -// if (lextorFile.is_open () && interInFile.is_open ()) -// { -// // load transfer file in an xml document object -// xml_document transferDoc; -// xml_parse_result result = transferDoc.load_file (transferFilePath.c_str ()); -// -// if (string (result.description ()) != "No error") -// { -// cout << "ERROR : " << result.description () << endl; -// return -1; -// } -// -// // xml node of the parent node (transfer) in the transfer file -// xml_node transfer = transferDoc.child ("transfer"); -// -// map > > attrs = RuleParser::getAttrs (transfer); -// map vars = RuleParser::getVars (transfer); -// map > lists = RuleParser::getLists (transfer); -// -// unsigned i = 0; -// string tokenizedSentence, refSent; -// while (getline (lextorFile, tokenizedSentence) && getline (refFile, refSent)) -// { -// cout << i++ << endl; -// -// // spaces after each token -// vector spaces; -// -// // tokens in the sentence order -// vector slTokens, tlTokens; -// -// // tags of tokens in order -// vector > slTags, tlTags; -// -// RuleParser::sentenceTokenizer (&slTokens, &tlTokens, &slTags, &tlTags, &spaces, -// tokenizedSentence); -// -// // map of tokens ids and their matched categories -// map > catsApplied; -// -// RuleParser::matchCats (&catsApplied, slTokens, slTags, transfer); -// -// // map of matched rules and a pair of first token id and patterns number -// map > > rulesApplied; -// -// RuleParser::matchRules (&rulesApplied, slTokens, catsApplied, transfer); -// -// // rule and (target) token map to specific output -// // if rule has many patterns we will choose the first token only -// map > ruleOutputs; -// -// // map (target) token to all matched rules ids and the number of pattern items of each rule -// map > > tokenRules; -// -// RuleExecution::ruleOuts (&ruleOutputs, &tokenRules, slTokens, slTags, tlTokens, -// tlTags, rulesApplied, attrs, lists, &vars, spaces, -// localeId); -// // final outs -// vector outs; -// // number of possible combinations -// unsigned compNum; -// // nodes for every token and rule -// map > nodesPool; -// // ambiguous informations -// vector ambigInfo; -// -// // rules combinations -// vector > combNodes; -// -// nodesPool = RuleExecution::getNodesPool (tokenRules); -// -// RuleExecution::getAmbigInfo (tokenRules, nodesPool, &ambigInfo, &compNum); -// RuleExecution::getOuts (&outs, &combNodes, ambigInfo, nodesPool, ruleOutputs, -// spaces); -// -//// for (unsigned j = 0; j < tlTokens.size (); j++) -//// { -//// cout << tlTokens[j] << endl; -//// vector > rulees = tokenRules[j]; -//// for (unsigned k = 0; k < rulees.size (); k++) -//// { -//// cout << rulees[k].first << " , " << rulees[k].second << endl; -//// } -//// cout << endl; -//// } -//// -//// for (unsigned j = 0; j < ambigInfo.size (); j++) -//// { -//// cout << "firTokId = " << ambigInfo[j]->firTokId << "; maxPat = " -//// << ambigInfo[j]->maxPat << endl; -//// vector > combinations = -//// ambigInfo[j]->combinations; -//// cout << endl; -//// for (unsigned k = 0; k < combinations.size (); k++) -//// { -//// vector nodes = combinations[k]; -//// for (unsigned l = 1; l < nodes.size (); l++) -//// { -//// cout << "tok=" << nodes[l]->tokenId << "; rul=" << nodes[l]->ruleId -//// << "; pat=" << nodes[l]->patNum << " - "; -//// } -//// cout << endl; -//// } -//// cout << endl; -//// } -//// -//// for (map >::iterator it = ruleOutputs.begin (); -//// it != ruleOutputs.end (); it++) -//// { -//// cout << "ruleId=" << it->first << endl; -//// map outs = it->second; -//// -//// for (map::iterator it2 = outs.begin (); -//// it2 != outs.end (); it2++) -//// { -//// cout << "tokId=" << it2->first << " , out = " << it2->second << endl; -//// } -//// cout << endl; -//// } -//// cout << endl; -//// -//// for (unsigned j = 0; j < tlTokens.size (); j++) -//// { -//// vector nodes = nodesPool[j]; -//// cout << "tokId = " << j << " : " << tlTokens[j] << endl; -//// for (unsigned k = 0; k < nodes.size (); k++) -//// { -//// cout << "ruleId = " << nodes[k]->ruleId << "; patNum = " -//// << nodes[k]->patNum << endl; -//// } -//// cout << endl; -//// } -//// -//// for (unsigned j = 0; j < combNodes.size (); j++) -//// { -//// vector nodes = combNodes[j]; -//// for (unsigned k = 0; k < nodes.size (); k++) -//// { -//// cout << "tok=" << nodes[k]->tokenId << "; rul=" << nodes[k]->ruleId -//// << "; pat=" << nodes[k]->patNum << " - "; -//// } -//// cout << endl; -//// } -// -//// set diffOuts (outs.begin (), outs.end ()); -//// -//// // write the outs -//// for (set::iterator it = diffOuts.begin (); it != diffOuts.end (); it++) -//// { -//// interInFile << *it << endl; -//// refInFile << refSent << endl; -//// } -// -// float min = 100000; -// int minInd = -1; -// string serr; -// float err; -// -// // write the outs -// for (unsigned j = 0; j < outs.size (); j++) +// transferFilePath = "transferFile.t1x"; +// sentenceFilePath = "spa-test.txt"; +// lextorFilePath = "spa-test.lextor"; +// interInFilePath = "inter2.txt"; + +// localeId = "kk_KZ"; +// transferFilePath = "apertium-kaz-tur.kaz-tur.t1x"; +// sentenceFilePath = "sample-sentences.txt"; +// lextorFilePath = "sample-lextor.txt"; +// interInFilePath = "sample-inter.txt"; + + localeId = "es_ES"; + transferFilePath = + "/home/aboelhamd/apertium-eng-spa-ambiguous-rules/apertium-eng-spa.spa-eng.t1x"; + lextorFilePath = + "/home/aboelhamd/eclipse-workspace/machinetranslation/test-lextor.txt"; + interInFilePath = + "/home/aboelhamd/eclipse-workspace/machinetranslation/test-chunker.txt"; + + cout << "Error in parameters !" << endl; + cout << "Parameters are : localeId transferFilePath lextorFilePath interInFilePath" + << endl; + cout << "localeId : ICU locale ID for the source language. For Kazakh => kk-KZ" + << endl; + cout << "transferFilePath : Apertium transfer file of the language pair used." + << endl; + cout << "lextorFilePath : Apertium lextor file for the source language sentences." + << endl; + cout + << "interInFilePath : Output file name of this program which is the input for apertium interchunk." + << endl; +// return -1; + } + + ifstream lextorFile (lextorFilePath.c_str ()); + ofstream interInFile (interInFilePath.c_str ()); + ifstream refFile ( + string ("/home/aboelhamd/eclipse-workspace/machinetranslation/tgt-test.txt").c_str ()); + ofstream refInFile ( + string ("/home/aboelhamd/eclipse-workspace/machinetranslation/tgt-test-mul.txt").c_str ()); + ifstream errFile ( + string ( + "/home/aboelhamd/Downloads/apertium-eval-translator-master/ambig_results.txt").c_str ()); + ofstream bestInFile ( + string ("/home/aboelhamd/eclipse-workspace/machinetranslation/best-chunker.txt").c_str ()); + if (lextorFile.is_open () && interInFile.is_open ()) + { + // load transfer file in an xml document object + xml_document transferDoc; + xml_parse_result result = transferDoc.load_file (transferFilePath.c_str ()); + + if (string (result.description ()) != "No error") + { + cout << "ERROR : " << result.description () << endl; + return -1; + } + + // xml node of the parent node (transfer) in the transfer file + xml_node transfer = transferDoc.child ("transfer"); + + map > > attrs = RuleParser::getAttrs (transfer); + map vars = RuleParser::getVars (transfer); + map > lists = RuleParser::getLists (transfer); + + unsigned i = 0; + string tokenizedSentence, refSent; + while (getline (lextorFile, tokenizedSentence) && getline (refFile, refSent)) + { + cout << i++ << endl; + + // spaces after each token + vector spaces; + + // tokens in the sentence order + vector slTokens, tlTokens; + + // tags of tokens in order + vector > slTags, tlTags; + + RuleParser::sentenceTokenizer (&slTokens, &tlTokens, &slTags, &tlTags, &spaces, + tokenizedSentence); + + // map of tokens ids and their matched categories + map > catsApplied; + + RuleParser::matchCats (&catsApplied, slTokens, slTags, transfer); + + // map of matched rules and a pair of first token id and patterns number + map > > rulesApplied; + + RuleParser::matchRules (&rulesApplied, slTokens, catsApplied, transfer); + + // rule and (target) token map to specific output + // if rule has many patterns we will choose the first token only + map > ruleOutputs; + + // map (target) token to all matched rules ids and the number of pattern items of each rule + map > > tokenRules; + + RuleExecution::ruleOuts (&ruleOutputs, &tokenRules, slTokens, slTags, tlTokens, + tlTags, rulesApplied, attrs, lists, &vars, spaces, + localeId); + // final outs + vector outs; + // number of possible combinations + unsigned compNum; + // nodes for every token and rule + map > nodesPool; + // ambiguous informations + vector ambigInfo; + + // rules combinations + vector > combNodes; + + nodesPool = RuleExecution::getNodesPool (tokenRules); + + RuleExecution::getAmbigInfo (tokenRules, nodesPool, &ambigInfo, &compNum); + RuleExecution::getOuts (&outs, &combNodes, ambigInfo, nodesPool, ruleOutputs, + spaces); + +// for (unsigned j = 0; j < tlTokens.size (); j++) // { -// getline (errFile, serr); -// err = strtof (serr.c_str (), NULL); -// -// if (err < min) +// cout << tlTokens[j] << endl; +// vector > rulees = tokenRules[j]; +// for (unsigned k = 0; k < rulees.size (); k++) // { -// min = err; -// minInd = j; +// cout << rulees[k].first << " , " << rulees[k].second << endl; // } -// -// interInFile << outs[j] << endl; -// refInFile << refSent << endl; +// cout << endl; // } -//// cout << minInd << endl; -// bestInFile << outs[minInd] << endl; -// -// interInFile << endl; -// refInFile << endl; // -// // delete AmbigInfo pointers // for (unsigned j = 0; j < ambigInfo.size (); j++) // { -// // delete the dummy node pointers -// set dummies; -// for (unsigned k = 0; k < ambigInfo[j]->combinations.size (); k++) -// dummies.insert (ambigInfo[j]->combinations[k][0]); -// for (set::iterator it = dummies.begin (); -// it != dummies.end (); it++) -// delete (*it); +// cout << "firTokId = " << ambigInfo[j]->firTokId << "; maxPat = " +// << ambigInfo[j]->maxPat << endl; +// vector > combinations = +// ambigInfo[j]->combinations; +// cout << endl; +// for (unsigned k = 0; k < combinations.size (); k++) +// { +// vector nodes = combinations[k]; +// for (unsigned l = 1; l < nodes.size (); l++) +// { +// cout << "tok=" << nodes[l]->tokenId << "; rul=" << nodes[l]->ruleId +// << "; pat=" << nodes[l]->patNum << " - "; +// } +// cout << endl; +// } +// cout << endl; +// } // -// delete ambigInfo[j]; +// for (map >::iterator it = ruleOutputs.begin (); +// it != ruleOutputs.end (); it++) +// { +// cout << "ruleId=" << it->first << endl; +// map outs = it->second; +// +// for (map::iterator it2 = outs.begin (); +// it2 != outs.end (); it2++) +// { +// cout << "tokId=" << it2->first << " , out = " << it2->second << endl; +// } +// cout << endl; // } -// // delete Node pointers -// for (map >::iterator it = -// nodesPool.begin (); it != nodesPool.end (); it++) +// cout << endl; +// +// for (unsigned j = 0; j < tlTokens.size (); j++) // { -// for (unsigned j = 0; j < it->second.size (); j++) +// vector nodes = nodesPool[j]; +// cout << "tokId = " << j << " : " << tlTokens[j] << endl; +// for (unsigned k = 0; k < nodes.size (); k++) // { -// delete it->second[j]; +// cout << "ruleId = " << nodes[k]->ruleId << "; patNum = " +// << nodes[k]->patNum << endl; // } +// cout << endl; // } -// } // -// lextorFile.close (); -// interInFile.close (); -// refFile.close (); -// refInFile.close (); -// bestInFile.close (); -// cout << "RulesApplier finished!"; -// } -// else -// { -// cout << "ERROR in opening files!" << endl; -// } +// for (unsigned j = 0; j < combNodes.size (); j++) +// { +// vector nodes = combNodes[j]; +// for (unsigned k = 0; k < nodes.size (); k++) +// { +// cout << "tok=" << nodes[k]->tokenId << "; rul=" << nodes[k]->ruleId +// << "; pat=" << nodes[k]->patNum << " - "; +// } +// cout << endl; +// } + +// set diffOuts (outs.begin (), outs.end ()); // -// return 0; -//} +// // write the outs +// for (set::iterator it = diffOuts.begin (); it != diffOuts.end (); it++) +// { +// interInFile << *it << endl; +// refInFile << refSent << endl; +// } + + float min = 100000; + int minInd = -1; + string serr; + float err; + + // write the outs + for (unsigned j = 0; j < outs.size (); j++) + { + getline (errFile, serr); + err = strtof (serr.c_str (), NULL); + + if (err < min) + { + min = err; + minInd = j; + } + + interInFile << outs[j] << endl; + refInFile << refSent << endl; + } +// cout << minInd << endl; + bestInFile << outs[minInd] << endl; + + interInFile << endl; + refInFile << endl; + + // delete AmbigInfo pointers + for (unsigned j = 0; j < ambigInfo.size (); j++) + { + // delete the dummy node pointers + set dummies; + for (unsigned k = 0; k < ambigInfo[j]->combinations.size (); k++) + dummies.insert (ambigInfo[j]->combinations[k][0]); + for (set::iterator it = dummies.begin (); + it != dummies.end (); it++) + delete (*it); + + delete ambigInfo[j]; + } + // delete Node pointers + for (map >::iterator it = + nodesPool.begin (); it != nodesPool.end (); it++) + { + for (unsigned j = 0; j < it->second.size (); j++) + { + delete it->second[j]; + } + } + } + + lextorFile.close (); + interInFile.close (); + refFile.close (); + refInFile.close (); + bestInFile.close (); + cout << "RulesApplier finished!"; + } + else + { + cout << "ERROR in opening files!" << endl; + } + + return 0; +}