commit 58c2c22ebf56685b0046502688d9408ae097cfba Author: aboelhamd Date: Sat May 18 15:52:51 2019 +0200 Solved options bug, again diff --git a/src/BeamResult.cpp b/src/BeamResult.cpp deleted file mode 100644 index b873638..0000000 --- a/src/BeamResult.cpp +++ /dev/null @@ -1,259 +0,0 @@ -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -// -//#include "../pugixml/pugixml.hpp" -//#include "RuleParser.h" -//#include "RuleExecution.h" -//#include "TranElemLiterals.h" -//#include "CLExec.h" -// -//#include -// -//using namespace std; -//using namespace pugi; -//using namespace elem; -// -//int -//main (int argc, char **argv) -//{ -// string sentenceFilePath, lextorFilePath, localeId, transferFilePath, modelsDest, -// beamSize, transferOutFilePath, beamOutFilePath; -// -// if (argc == 9) -// { -// localeId = argv[1]; -// transferFilePath = argv[2]; -// sentenceFilePath = argv[3]; -// lextorFilePath = argv[4]; -// -// transferOutFilePath = argv[5]; -// beamOutFilePath = argv[6]; -// -// modelsDest = argv[7]; -// beamSize = argv[8]; -// } -// else -// { -//// localeId = "es_ES"; -//// transferFilePath = "transferFile.t1x"; -//// sentenceFilePath = "spa-test.txt"; -//// lextorFilePath = "spa-test.lextor"; -//// interInFilePath = "beaminter.out"; -//// modelsDest = "modelstry"; -//// k = "8"; -// -// localeId = "kk_KZ"; -// transferFilePath = "apertium-kaz-tur.kaz-tur.t1x"; -// sentenceFilePath = "src.txt"; -// lextorFilePath = "lextor.txt"; -// -// transferOutFilePath = "beam-transfer.txt"; -// beamOutFilePath = "beamOutFile.txt"; -// -// modelsDest = "./UntitledFolder/models"; -// beamSize = "8"; -// -// cout << "Error in parameters !" << endl; -// cout -// << "Parameters are : localeId transferFilePath sentenceFilePath lextorFilePath transferOutFilePath beamOutFilePath modelsDest beamSize" -// << endl; -// cout << "localeId : ICU locale ID for the source language. For Kazakh => kk-KZ" -// << endl; -// cout << "transferFilePath : Apertium transfer file of the language pair used." -// << endl; -// cout << "sentenceFilePath : Source language sentences file." << endl; -// cout << "lextorFilePath : Apertium lextor file for the source language sentences." -// << endl; -// cout -// << "transferOutFilePath : Output file of apertium transfer for the source language sentences." -// << endl; -// cout -// << "beamOutFilePath : Output file name of this program which is the best translations for the language sentences." -// << endl; -// cout << "modelsDest : Yasmet models destination." << endl; -// cout << "beamSize : The size of beam in beam search algorithm." << endl; -// return -1; -// } -// -// // seed for randomness -// srand (time (NULL)); -// -// ifstream lextorFile (lextorFilePath.c_str ()); -// ifstream inSentenceFile (sentenceFilePath.c_str ()); -// if (lextorFile.is_open () && inSentenceFile.is_open ()) -// { -// // load transfer file in an xml document object -// xml_document transferDoc; -// xml_parse_result result = transferDoc.load_file (transferFilePath.c_str ()); -// -// if (string (result.description ()) != "No error") -// { -// cout << "ERROR : " << result.description () << endl; -// return -1; -// } -// -// // xml node of the parent node (transfer) in the transfer file -// xml_node transfer = transferDoc.child ("transfer"); -// -// vector sourceSentences, tokenizedSentences; -// -// string tokenizedSentence; -// while (getline (lextorFile, tokenizedSentence)) -// { -// string sourceSentence; -// if (!getline (inSentenceFile, sourceSentence)) -// sourceSentence = "No more sentences"; -// -// sourceSentences.push_back (sourceSentence); -// tokenizedSentences.push_back (tokenizedSentence); -// } -// lextorFile.close (); -// inSentenceFile.close (); -// -// map > > attrs = RuleParser::getAttrs (transfer); -// map vars = RuleParser::getVars (transfer); -// map > lists = RuleParser::getLists (transfer); -// -// map > > classesWeights = -// CLExec::loadYasmetModels (modelsDest); -// -//// vector > vouts; -// -// int beam; -// stringstream buffer (beamSize); -// buffer >> beam; -// -// // empty the output file -// ofstream beamFile (beamOutFilePath.c_str ()); -// beamFile.close (); -// -// ifstream transferOutFile (transferOutFilePath.c_str ()); -// -// if (transferOutFile.is_open ()) -// for (unsigned i = 0; i < sourceSentences.size (); i++) -// { -// cout << i << endl; -// -// string sourceSentence, tokenizedSentence; -// sourceSentence = sourceSentences[i]; -// tokenizedSentence = tokenizedSentences[i]; -// -// // spaces after each token -// vector spaces; -// -// // tokens in the sentence order -// vector slTokens, tlTokens; -// -// // tags of tokens in order -// vector > slTags, tlTags; -// -// RuleParser::sentenceTokenizer (&slTokens, &tlTokens, &slTags, &tlTags, -// &spaces, tokenizedSentence); -// -// // map of tokens ids and their matched categories -// map > catsApplied; -// -// RuleParser::matchCats (&catsApplied, slTokens, slTags, transfer); -// -// // map of matched rules and a pair of first token id and patterns number -// map > > rulesApplied; -// -// RuleParser::matchRules (&rulesApplied, slTokens, catsApplied, transfer); -// -// // rule and (target) token map to specific output -// // if rule has many patterns we will choose the first token only -// map > ruleOutputs; -// -// // map (target) token to all matched rules ids and the number of pattern items of each rule -// map > > tokenRules; -// -// RuleExecution::ruleOuts (&ruleOutputs, &tokenRules, slTokens, slTags, -// tlTokens, tlTags, rulesApplied, attrs, lists, &vars, -// spaces, localeId); -// -// // final outputs -// vector outs; -// // number of generated combinations -// unsigned compNum; -// // nodes for every token and rule -// map > nodesPool; -// // ambiguous informations -// vector ambigInfo; -// // beam tree -// vector, float> > beamTree; -// // rules combinations -// vector > combNodes; -// -// nodesPool = RuleExecution::getNodesPool (tokenRules); -// -// RuleExecution::getAmbigInfo (tokenRules, nodesPool, &ambigInfo, &compNum); -// -// vector newAmbigInfo; -// for (unsigned j = 0; j < ambigInfo.size (); j++) -// if (ambigInfo[j]->combinations.size () > 1) -// newAmbigInfo.push_back (ambigInfo[j]); -// -// CLExec::beamSearch (&beamTree, beam, slTokens, newAmbigInfo, classesWeights, -// localeId); -// -// RuleExecution::getOuts (&outs, &combNodes, beamTree, nodesPool, ruleOutputs, -// spaces); -// -// // read transfer -// string line; -// vector beamTransfers; -// for (unsigned j = 0; j < outs.size (); j++) -// { -// getline (transferOutFile, line); -// beamTransfers.push_back (line); -// } -// -// // write beam results -// ofstream beamFile (beamOutFilePath.c_str (), ofstream::app); -// if (beamFile.is_open ()) -// { -// beamFile << "source sentence (" << (i + 1) << ") : " << endl; -// beamFile << sourceSentence << endl << endl; -// // just take first best -// for (unsigned j = 0; j < /*outs.size ()*/1; j++) -// { -// beamFile << "target sentence " /*<< (j + 1)*/<< " : " << endl; -// beamFile << beamTransfers[j] << endl; -// beamFile << "weight = " << beamTree[j].second << endl; -// beamFile << "rules : "; -// for (unsigned k = 0; k < combNodes[j].size (); k++) -// if (combNodes[j][k]->ruleId) -// beamFile << combNodes[j][k]->ruleId << " "; -// beamFile << endl << endl; -// beamFile -// << "------------------------------------------------------------------" -// << endl << endl; -// } -// } -// beamFile.close (); -// } -// else -// { -// cout << "ERROR in opening files!" << endl; -// } -// transferOutFile.close (); -// } -// else -// { -// cout << "ERROR in opening files!" << endl; -// } -// return 0; -//} diff --git a/src/BeamSearch2.cpp b/src/BeamSearch2.cpp deleted file mode 100644 index 2225b71..0000000 --- a/src/BeamSearch2.cpp +++ /dev/null @@ -1,426 +0,0 @@ -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -//#include -// -//#include "../pugixml/pugixml.hpp" -//#include "RuleParser.h" -//#include "RuleExecution.h" -//#include "TranElemLiterals.h" -//#include "CLExec.h" -//#include "BeamSearch.h" -// -//#include -// -//using namespace std; -//using namespace pugi; -//using namespace elem; -// -//void -//BeamSearch::transfer (string transferFilePath, string localeId, string modelsFileDest, -// string k, FILE* lextorFileFile, FILE* outFile) -//{ -// -// // load transfer file in an xml document object -// xml_document transferDoc; -// xml_parse_result result = transferDoc.load_file (transferFilePath.c_str ()); -// if (string (result.description ()) != "No error") -// { -// cout << "ERROR : " << result.description () << endl; -// exit (EXIT_FAILURE); -// } -// -// // xml node of the parent node (transfer) in the transfer file -// xml_node transfer = transferDoc.child ("transfer"); -// -// map > > attrs = RuleParser::getAttrs (transfer); -// map vars = RuleParser::getVars (transfer); -// map > lists = RuleParser::getLists (transfer); -// -// map > > classesWeights = CLExec::loadYasmetModels ( -// modelsFileDest); -// -// int beam; -// stringstream buffer (k); -// buffer >> beam; -// -// char buff[10240]; -// string tokenizedSentence; -// while (fgets (buff, 10240, lextorFileFile)) -// { -// tokenizedSentence = buff; -// -// // spaces after each token -// vector spaces; -// -// // tokens in the sentence order -// vector slTokens, tlTokens; -// -// // tags of tokens in order -// vector > slTags, tlTags; -// -// RuleParser::sentenceTokenizer (&slTokens, &tlTokens, &slTags, &tlTags, &spaces, -// tokenizedSentence); -// -// // map of tokens ids and their matched categories -// map > catsApplied; -// -// RuleParser::matchCats (&catsApplied, slTokens, slTags, transfer); -// -// // map of matched rules and a pair of first token id and patterns number -// map > > rulesApplied; -// -// RuleParser::matchRules (&rulesApplied, slTokens, catsApplied, transfer); -// -// // rule and (target) token map to specific output -// // if rule has many patterns we will choose the first token only -// map > ruleOutputs; -// -// // map (target) token to all matched rules ids and the number of pattern items of each rule -// map > > tokenRules; -// -// RuleExecution::ruleOuts (&ruleOutputs, &tokenRules, slTokens, slTags, tlTokens, -// tlTags, rulesApplied, attrs, lists, &vars, spaces, -// localeId); -// -// // final outputs -// vector outs; -// // number of generated combinations -// unsigned compNum; -// // nodes for every token and rule -// map > nodesPool; -// // ambiguous informations -// vector ambigInfo; -// // beam tree -// vector, float> > beamTree; -// // rules combinations -// vector > combNodes; -// -// nodesPool = RuleExecution::getNodesPool (tokenRules); -// -// RuleExecution::getAmbigInfo (tokenRules, nodesPool, &ambigInfo, &compNum); -// -// vector newAmbigInfo; -// for (unsigned j = 0; j < ambigInfo.size (); j++) -// if (ambigInfo[j]->combinations.size () > 1) -// newAmbigInfo.push_back (ambigInfo[j]); -// -// CLExec::beamSearch (&beamTree, beam, slTokens, newAmbigInfo, classesWeights, -// localeId); -// -// RuleExecution::getOuts (&outs, &combNodes, beamTree, nodesPool, ruleOutputs, -// spaces); -// -// // write the outs -// for (unsigned j = 0; j < outs.size (); j++) -// { -// fputs (outs[j].c_str (), outFile); -// } -// -// // delete AmbigInfo pointers -// for (unsigned j = 0; j < ambigInfo.size (); j++) -// { -// // delete the dummy node pointers -// set dummies; -// for (unsigned k = 0; k < ambigInfo[j]->combinations.size (); k++) -// dummies.insert (ambigInfo[j]->combinations[k][0]); -// for (set::iterator it = dummies.begin (); -// it != dummies.end (); it++) -// delete (*it); -// -// delete ambigInfo[j]; -// } -// // delete Node pointers -// for (map >::iterator it = nodesPool.begin (); -// it != nodesPool.end (); it++) -// { -// for (unsigned j = 0; j < it->second.size (); j++) -// { -// delete it->second[j]; -// } -// } -// -// } -// -//} -// -//FILE * -//open_input (string const &filename) -//{ -// FILE *input = fopen (filename.c_str (), "r"); -// if (!input) -// { -// wcerr << "Error: can't open input file '"; -// wcerr << filename.c_str () << "'." << endl; -// exit (EXIT_FAILURE); -// } -// -// return input; -//} -// -//FILE * -//open_output (string const &filename) -//{ -// FILE *output = fopen (filename.c_str (), "w"); -// if (!output) -// { -// wcerr << "Error: can't open output file '"; -// wcerr << filename.c_str () << "'." << endl; -// exit (EXIT_FAILURE); -// } -// return output; -//} -// -////int main(int argc, char **argv) { -//// string sentenceFilePath, lextorFilePath, interInFilePath, localeId, -//// transferFilePath, modelsDest, k; -//// -//// if (argc == 8) { -//// localeId = argv[1]; -//// transferFilePath = argv[2]; -//// sentenceFilePath = argv[3]; -//// lextorFilePath = argv[4]; -//// interInFilePath = argv[5]; -//// modelsDest = argv[6]; -//// k = argv[7]; -//// } else { -//// localeId = "es_ES"; -//// transferFilePath = "apertium-eng-spa.spa-eng.t1x"; -//// sentenceFilePath = "sentences.txt"; -//// lextorFilePath = "lextor.txt"; -//// interInFilePath = "beaminter.txt"; -//// modelsDest = "/home/aboelhamd/Downloads/models"; -//// k = "8"; -//// -////// localeId = "kk_KZ"; -////// transferFilePath = "apertium-kaz-tur.kaz-tur.t1x"; -////// sentenceFilePath = "src.txt"; -////// lextorFilePath = "lextor.txt"; -////// interInFilePath = "beam-inter.txt"; -////// modelsDest = "./UntitledFolder/models"; -////// k = "8"; -//// -//// cout << "Error in parameters !" << endl; -//// cout -//// << "Parameters are : localeId transferFilePath sentenceFilePath lextorFilePath interInFilePath modelsDest beamSize" -//// << endl; -//// cout -//// << "localeId : ICU locale ID for the source language. For Kazakh => kk-KZ" -//// << endl; -//// cout -//// << "transferFilePath : Apertium transfer file of the language pair used." -//// << endl; -//// cout << "sentenceFilePath : Source language sentences file." << endl; -//// cout -//// << "lextorFilePath : Apertium lextor file for the source language sentences." -//// << endl; -//// cout -//// << "interInFilePath : Output file of this program which is the input for apertium interchunk." -//// << endl; -//// cout << "modelsDest : Yasmet models destination." << endl; -//// cout << "beamSize : The size of beam in beam search algorithm." << endl; -////// return -1; -//// } -//// -//// ifstream lextorFile(lextorFilePath.c_str()); -//// ifstream inSentenceFile(sentenceFilePath.c_str()); -//// if (lextorFile.is_open() && inSentenceFile.is_open()) { -//// // load transfer file in an xml document object -//// xml_document transferDoc; -//// xml_parse_result result = transferDoc.load_file( -//// transferFilePath.c_str()); -//// if (string(result.description()) != "No error") { -//// cout << "ERROR : " << result.description() << endl; -//// return -1; -//// } -//// -//// // xml node of the parent node (transfer) in the transfer file -//// xml_node transfer = transferDoc.child("transfer"); -//// -//// vector sourceSentences, tokenizedSentences; -//// -//// string tokenizedSentence; -//// while (getline(lextorFile, tokenizedSentence)) { -//// string sourceSentence; -//// if (!getline(inSentenceFile, sourceSentence)) -//// sourceSentence = "No more sentences"; -//// -//// sourceSentences.push_back(sourceSentence); -//// tokenizedSentences.push_back(tokenizedSentence); -//// } -//// lextorFile.close(); -//// inSentenceFile.close(); -//// -//// map > > attrs = RuleParser::getAttrs( -//// transfer); -//// map vars = RuleParser::getVars(transfer); -//// map > lists = RuleParser::getLists(transfer); -//// -//// map > > classesWeights = -//// CLExec::loadYasmetModels(modelsDest); -//// -//// vector > vouts; -//// -//// int beam; -//// stringstream buffer(k); -//// buffer >> beam; -//// for (unsigned i = 0; i < sourceSentences.size(); i++) { -//// cout << i << endl; -//// -//// string sourceSentence, tokenizedSentence; -//// sourceSentence = sourceSentences[i]; -//// tokenizedSentence = tokenizedSentences[i]; -//// -//// // spaces after each token -//// vector spaces; -//// -//// // tokens in the sentence order -//// vector slTokens, tlTokens; -//// -//// // tags of tokens in order -//// vector > slTags, tlTags; -//// -//// RuleParser::sentenceTokenizer(&slTokens, &tlTokens, &slTags, -//// &tlTags, &spaces, tokenizedSentence); -//// -//// // map of tokens ids and their matched categories -//// map > catsApplied; -//// -//// RuleParser::matchCats(&catsApplied, slTokens, slTags, transfer); -//// -//// // map of matched rules and a pair of first token id and patterns number -//// map > > rulesApplied; -//// -//// RuleParser::matchRules(&rulesApplied, slTokens, catsApplied, -//// transfer); -//// -//// // rule and (target) token map to specific output -//// // if rule has many patterns we will choose the first token only -//// map > ruleOutputs; -//// -//// // map (target) token to all matched rules ids and the number of pattern items of each rule -//// map > > tokenRules; -//// -//// RuleExecution::ruleOuts(&ruleOutputs, &tokenRules, slTokens, slTags, -//// tlTokens, tlTags, rulesApplied, attrs, lists, &vars, spaces, -//// localeId); -//// -//// // final outputs -//// vector outs; -//// // number of generated combinations -//// unsigned compNum; -//// // nodes for every token and rule -//// map > nodesPool; -//// // ambiguous informations -//// vector ambigInfo; -//// // beam tree -//// vector, float> > beamTree; -//// // rules combinations -//// vector > combNodes; -//// -//// nodesPool = RuleExecution::getNodesPool(tokenRules); -//// -//// RuleExecution::getAmbigInfo(tokenRules, nodesPool, &ambigInfo, -//// &compNum); -//// -//// vector newAmbigInfo; -//// for (unsigned j = 0; j < ambigInfo.size(); j++) -//// if (ambigInfo[j].combinations.size() > 1) -//// newAmbigInfo.push_back(ambigInfo[j]); -//// -//// CLExec::beamSearch(&beamTree, beam, slTokens, newAmbigInfo, -//// classesWeights, localeId); -//// -//// RuleExecution::getOuts(&outs, &combNodes, beamTree, nodesPool, -//// ruleOutputs, spaces); -//// -//// vouts.push_back(outs); -//// } -//// -//// // write the outs -//// ofstream interInFile(interInFilePath.c_str()); -//// if (interInFile.is_open()) -//// for (unsigned i = 0; i < vouts.size(); i++) { -//// for (unsigned j = 0; j < vouts[i].size(); j++) -//// interInFile << vouts[i][j] << endl; -//// } -//// else -//// cout << "ERROR in opening files!" << endl; -//// interInFile.close(); -//// -//// } else { -//// cout << "ERROR in opening files!" << endl; -//// } -//// return 0; -////} -// -////int main(int argc, char *argv[]) { -//// -//// string transferFilePath, localeId, modelsDest, k; -//// FILE *input = stdin, *output = stdout; -//// -//// if (argc == 7) { -//// output = open_output(argv[argc - 1]); -//// input = open_input(argv[argc - 2]); -//// k = argv[argc - 3]; -//// modelsDest = argv[argc - 4]; -//// localeId = argv[argc - 5]; -//// transferFilePath = argv[argc - 6]; -//// } else if (argc == 6) { -//// input = open_input(argv[argc - 1]); -//// k = argv[argc - 2]; -//// modelsDest = argv[argc - 3]; -//// localeId = argv[argc - 4]; -//// transferFilePath = argv[argc - 5]; -//// } else if (argc == 5) { -//// k = argv[argc - 1]; -//// modelsDest = argv[argc - 2]; -//// localeId = argv[argc - 3]; -//// transferFilePath = argv[argc - 4]; -//// } -//// -//// BeamSearch::transfer(transferFilePath, localeId, modelsDest, k, input, -//// output); -////} -// -//int -//main () -//{ -// map > > models = CLExec::loadYasmetModels ( -// "/home/aboelhamd/Downloads/newmodel2.model"); -//// cout << models["34+36_32+.model"][""]; -// -//// vector weights = -//// models["33_68+33_67+33_95+33_113+115_71+115_66+.model"]["muy_0"]; -//// for (unsigned i = 0; i < weights.size(); i++) { -//// cout << weights[i] << endl; -//// } -//// cout << endl; -// for (map > >::iterator it = models.begin (); -// it != models.end (); it++) -// { -// cout << "model=" << it->first << endl; -// for (map >::iterator it2 = it->second.begin (); -// it2 != it->second.end (); it2++) -// { -// cout << "word= " << it2->first << endl; -// vector weights = it2->second; -// for (unsigned i = 0; i < weights.size (); i++) -// { -// cout << weights[i] << endl; -// } -// cout << endl; -// } -// } -//} diff --git a/src/LangModAnalysis.cpp b/src/LangModAnalysis.cpp index 90e7f05..03a308a 100644 --- a/src/LangModAnalysis.cpp +++ b/src/LangModAnalysis.cpp @@ -54,12 +54,15 @@ int main(int argc, char **argv) { } if (argc - optind == 6) { - localeId = argv[optind - argc - 6]; - transferFilePath = argv[optind - argc - 5]; - sentenceFilePath = argv[optind - argc - 4]; - lextorFilePath = argv[optind - argc - 3]; - targetFilePath = argv[optind - argc - 2]; - weightFilePath = argv[optind - argc - 1]; + localeId = argv[argc - 6]; + transferFilePath = argv[argc - 5]; + sentenceFilePath = argv[argc - 4]; + lextorFilePath = argv[argc - 3]; + targetFilePath = argv[argc - 2]; + weightFilePath = argv[argc - 1]; + cout << localeId << " " << transferFilePath << " " << sentenceFilePath + << " " << lextorFilePath << " " << targetFilePath << " " + << weightFilePath << endl; } else { // localeId = "es_ES"; // transferFilePath = "transferFile.t1x"; @@ -82,12 +85,12 @@ int main(int argc, char **argv) { // randModFilePath = "randModFile.txt"; localeId = "es_ES"; - transferFilePath = "transferFile3.t1x"; - sentenceFilePath = "spa-toknizer.txt"; - lextorFilePath = "spa-lextor.txt"; + transferFilePath = "test/apertium-kaz-tur.kaz-tur.t1x"; + sentenceFilePath = "test/source.txt"; + lextorFilePath = "test/lextor.txt"; - targetFilePath = "spa-transfer.txt"; - weightFilePath = "spa-weight.txt"; + targetFilePath = "test/transfer.txt"; + weightFilePath = "test/night-weights.txt"; analysisFilePath = "outAnalysis.txt"; bestModFilePath = "bestModFile.txt"; @@ -123,7 +126,7 @@ int main(int argc, char **argv) { cout << "randModFilePath : Third output file name which is random translations from (language model) for the source language sentences." << endl; - return -1; +// return -1; } // seed for randomness @@ -182,7 +185,7 @@ int main(int argc, char **argv) { if (weightFile.is_open() && targetFile.is_open()) for (unsigned i = 0; i < sourceSentences.size(); i++) { -// cout << i << endl; + cout << i << endl; string sourceSentence, tokenizedSentence; sourceSentence = sourceSentences[i];