commit 213f751be13e8c3cfdeb2d66107590842fec1353 Author: aboelhamd Date: Sat Apr 20 02:29:41 2019 +0200 No debug traces diff --git a/src/RuleExecution.cpp b/src/RuleExecution.cpp index 283509c..1caeb92 100644 --- a/src/RuleExecution.cpp +++ b/src/RuleExecution.cpp @@ -1,10 +1,3 @@ -/* - * RuleExecution.cpp - * - * Created on: May 5, 2018 - * Author: aboelhamd - */ - #include #include #include @@ -13,7 +6,8 @@ #include #include -#include "../pugixml/pugixml.hpp" +//#include "../pugixml/pugixml.hpp" +#include "pugixml.hpp" #include "TranElemLiterals.h" #include "CLExec.h" @@ -91,7 +85,7 @@ putOuts (vector outputs, vector nestedOutputs) newOutput += outputs[i]; newOutput += nestedOutputs[j]; // newOutput.push_back (spaces[tokenIndex]); - +// cout << "output : " << newOutput << endl; newOutputs.push_back (newOutput); } } @@ -214,6 +208,7 @@ RuleExecution::getOuts (vector* finalOuts, vector >* finalC ruleOutputs[combinations[k][l].ruleId][combinations[k][l].tokenId] + spaces[combinations[k][l].tokenId + combinations[k][l].patNum - 1]; +// cout << "ambigout : " << ambigOut << endl; // cout << i << " : " << j << " , " << ambigOut << endl; } ambigOuts.push_back (ambigOut); @@ -1002,13 +997,13 @@ pushDistinct (map > >* tokenRules, void printNodeAttrs (xml_node node) { - cout << node.name () << endl; - for (xml_node::attribute_iterator it = node.attributes_begin (); - it != node.attributes_end (); it++) - { - cout << it->name () << "=" << it->value () << "; "; - } - cout << endl << endl; +// cout << node.name () << endl; +// for (xml_node::attribute_iterator it = node.attributes_begin (); +// it != node.attributes_end (); it++) +// { +// cout << it->name () << "=" << it->value () << "; "; +// } +// cout << endl << endl; } void @@ -1517,7 +1512,7 @@ RuleExecution::equal (xml_node equal, vector >* slAnalysisTokens, // if (secondStr[i] != '<' && secondStr[i] != '>') // temp += secondStr[i]; // secondStr = temp; - cout << "firstStr=" << firstStr << " , secondStr=" << secondStr << endl; +// cout << "firstStr=" << firstStr << " , secondStr=" << secondStr << endl; xml_attribute caseless = equal.attribute (CASE_LESS); if (string (caseless.value ()) == "yes") @@ -1561,7 +1556,7 @@ RuleExecution::choose (xml_node chooseNode, vector >* slAnalysisT condition = true; } - cout << "condition=" << condition << endl; +// cout << "condition=" << condition << endl; if (condition) { for (xml_node inst = child.first_child (); inst; inst = inst.next_sibling ()) @@ -1920,7 +1915,7 @@ RuleExecution::var (xml_node var, map* vars) string varName = var.attribute (N).value (); string varValue = (*vars)[varName]; - cout << "varname=" << varName << " , value=" << (*vars)[varName] << endl; +// cout << "varname=" << varName << " , value=" << (*vars)[varName] << endl; return varValue; } @@ -1987,9 +1982,9 @@ RuleExecution::let (xml_node let, vector >* slAnalysisTokens, resultStr += secondResult[i]; string varName = firstChild.attribute (N).value (); - cout << "varname=" << varName << " , value=" << resultStr << endl; +// cout << "varname=" << varName << " , value=" << resultStr << endl; (*vars)[varName] = resultStr; - cout << "varname=" << varName << " , value=" << (*vars)[varName] << endl; +// cout << "varname=" << varName << " , value=" << (*vars)[varName] << endl; } else if (firstName == CLIP) { @@ -2066,9 +2061,9 @@ RuleExecution::clip (xml_node clip, vector >* slAnalysisTokens, xml_attribute linkTo = clip.attribute (LINK_TO); if (string (linkTo.name ()) == LINK_TO) { - pos = linkTo.as_uint () - 1; - result = tags[pos]; - +// pos = linkTo.as_uint () - 1; +// result = tags[pos]; + result.push_back ("<" + string (linkTo.value ()) + ">"); // for (unsigned i = 0; i < result.size (); i++) // result[i] = "<" + result[i] + ">"; @@ -2084,10 +2079,10 @@ RuleExecution::clip (xml_node clip, vector >* slAnalysisTokens, if (side == TL) analysisToken = (*tlAnalysisTokens)[pos]; - cout << "analysisToken = "; - for (unsigned i = 0; i < analysisToken.size (); i++) - cout << analysisToken[i] << " "; - cout << endl; +// cout << "analysisToken = "; +// for (unsigned i = 0; i < analysisToken.size (); i++) +// cout << analysisToken[i] << " "; +// cout << endl; if (part == WHOLE) { @@ -2110,8 +2105,8 @@ RuleExecution::clip (xml_node clip, vector >* slAnalysisTokens, // } // lem = newLem; size_t spaceInd = lem.find ('#'); -// if (spaceInd == string::npos) -// spaceInd = lem.find (' '); +// if (spaceInd == string::npos) +// spaceInd = lem.find (' '); if (spaceInd == string::npos) { @@ -2476,8 +2471,8 @@ RuleExecution::modifyCase (xml_node modifyCase, vector >* slAnaly // lem = newLem; size_t spaceInd = lem.find ('#'); -// if (spaceInd == string::npos) -// spaceInd = lem.find (' '); +// if (spaceInd == string::npos) +// spaceInd = lem.find (' '); if (spaceInd == string::npos) { if (Case == aa) diff --git a/src/RulesApplier.cpp b/src/RulesApplier.cpp index 11f1a96..3035cbe 100644 --- a/src/RulesApplier.cpp +++ b/src/RulesApplier.cpp @@ -26,15 +26,14 @@ using namespace elem; int main (int argc, char **argv) { - string localeId, transferFilePath, sentenceFilePath, lextorFilePath, interInFilePath; + string localeId, transferFilePath, lextorFilePath, interInFilePath; - if (argc == 6) + if (argc == 5) { localeId = argv[1]; transferFilePath = argv[2]; - sentenceFilePath = argv[3]; - lextorFilePath = argv[4]; - interInFilePath = argv[5]; + lextorFilePath = argv[3]; + interInFilePath = argv[4]; } else { @@ -52,7 +51,6 @@ main (int argc, char **argv) localeId = "es_ES"; transferFilePath = "./issues/apertium-eng-spa.spa-eng.t1x"; - sentenceFilePath = "./issues/sents.txt"; lextorFilePath = "./issues/lextor.txt"; interInFilePath = "./issues/interIn.txt"; @@ -64,18 +62,16 @@ main (int argc, char **argv) << endl; cout << "transferFilePath : Apertium transfer file of the language pair used." << endl; - cout << "sentenceFilePath : Source language sentences file." << endl; cout << "lextorFilePath : Apertium lextor file for the source language sentences." << endl; cout << "interInFilePath : Output file name of this program which is the input for apertium interchunk." << endl; -// return -1; + return -1; } ifstream lextorFile (lextorFilePath.c_str ()); - ifstream inSentenceFile (sentenceFilePath.c_str ()); - if (lextorFile.is_open () && inSentenceFile.is_open ()) + if (lextorFile.is_open ()) { // load transfer file in an xml document object xml_document transferDoc; @@ -90,20 +86,14 @@ main (int argc, char **argv) // xml node of the parent node (transfer) in the transfer file xml_node transfer = transferDoc.child ("transfer"); - vector sourceSentences, tokenizedSentences; + vector tokenizedSentences; string tokenizedSentence; while (getline (lextorFile, tokenizedSentence)) { - string sourceSentence; - if (!getline (inSentenceFile, sourceSentence)) - sourceSentence = "No more sentences"; - - sourceSentences.push_back (sourceSentence); tokenizedSentences.push_back (tokenizedSentence); } lextorFile.close (); - inSentenceFile.close (); map > > attrs = RuleParser::getAttrs (transfer); map vars = RuleParser::getVars (transfer); @@ -111,12 +101,11 @@ main (int argc, char **argv) ofstream interInFile (interInFilePath.c_str ()); if (interInFile.is_open ()) - for (unsigned i = 0; i < sourceSentences.size (); i++) + for (unsigned i = 0; i < tokenizedSentences.size (); i++) { - cout << i << endl; +// cout << i << endl; - string sourceSentence, tokenizedSentence; - sourceSentence = sourceSentences[i]; + string tokenizedSentence; tokenizedSentence = tokenizedSentences[i]; // spaces after each token @@ -164,48 +153,10 @@ main (int argc, char **argv) nodesPool = RuleExecution::getNodesPool (tokenRules); - for (map >::iterator it = - ruleOutputs.begin (); it != ruleOutputs.end (); it++) - { - cout << "ruleId=" << it->first << endl; - map outs = it->second; - - for (map::iterator it2 = outs.begin (); - it2 != outs.end (); it2++) - { - cout << "tokId=" << it2->first << " , out = " << it2->second << endl; - } - cout << endl; - } - cout << endl; - - for (unsigned j = 0; j < tlTokens.size (); j++) - { - vector nodes = nodesPool[j]; - cout << "tokId = " << j << " : " << tlTokens[j] << endl; - for (unsigned k = 0; k < nodes.size (); k++) - { - cout << "ruleId = " << nodes[k].ruleId << "; patNum = " - << nodes[k].patNum << endl; - } - cout << endl; - } - RuleExecution::getAmbigInfo (tokenRules, nodesPool, &ambigInfo, &compNum); RuleExecution::getOuts (&outs, &combNodes, ambigInfo, nodesPool, ruleOutputs, spaces); - for (unsigned j = 0; j < combNodes.size (); j++) - { - vector nodes = combNodes[j]; - for (unsigned k = 0; k < nodes.size (); k++) - { - cout << "tok=" << nodes[k].tokenId << "; rul=" << nodes[k].ruleId - << "; pat=" << nodes[k].patNum << " - "; - } - cout << endl; - } - // write the outs for (unsigned j = 0; j < outs.size (); j++) interInFile << outs[j] << endl; diff --git a/src/YasmetFormatter.cpp b/src/YasmetFormatter.cpp index 69fb2cb..91451e2 100644 --- a/src/YasmetFormatter.cpp +++ b/src/YasmetFormatter.cpp @@ -26,18 +26,16 @@ using namespace elem; int main (int argc, char **argv) { - string sentenceFilePath = "sentences.txt", lextorFilePath = "lextor.txt", - weightOutFilePath = "weights.txt", localeId = "kk_KZ", transferFilePath = - "transferFile.tx1", datasetsPath = "datasets"; + string lextorFilePath = "lextor.txt", weightOutFilePath = "weights.txt", localeId = + "kk_KZ", transferFilePath = "transferFile.tx1", datasetsPath = "datasets"; - if (argc == 7) + if (argc == 6) { localeId = argv[1]; transferFilePath = argv[2]; - sentenceFilePath = argv[3]; - lextorFilePath = argv[4]; - weightOutFilePath = argv[5]; - datasetsPath = argv[6]; + lextorFilePath = argv[3]; + weightOutFilePath = argv[4]; + datasetsPath = argv[5]; } else { @@ -50,10 +48,8 @@ main (int argc, char **argv) // outputFilePath = "output.out"; // datasetsPath = "datasetstry2"; -//./yasmet-formatter $localeId sentences.txt lextor.txt transfer.txt weights.txt $outputFile $datasets; localeId = "kk_KZ"; transferFilePath = "apertium-kaz-tur.kaz-tur.t1x"; - sentenceFilePath = "sample-sentences.txt"; lextorFilePath = "sample-lextor.txt"; weightOutFilePath = "norm-weights.txt"; datasetsPath = "datasetstry1234"; @@ -66,7 +62,6 @@ main (int argc, char **argv) << endl; cout << "transferFilePath : Apertium transfer file of the language pair used." << endl; - cout << "sentenceFilePath : Source language sentences file." << endl; cout << "lextorFilePath : Apertium lextor file for the source language sentences." << endl; cout @@ -78,8 +73,7 @@ main (int argc, char **argv) } ifstream lextorFile (lextorFilePath.c_str ()); - ifstream inSentenceFile (sentenceFilePath.c_str ()); - if (lextorFile.is_open () && inSentenceFile.is_open ()) + if (lextorFile.is_open ()) { // load transfer file in an xml document object xml_document* transferDoc = new xml_document (); @@ -94,44 +88,26 @@ main (int argc, char **argv) // xml node of the parent node (transfer) in the transfer file xml_node transfer = transferDoc->child ("transfer"); - vector *sourceSentences = new vector (), *tokenizedSentences = - new vector (); + vector *tokenizedSentences = new vector (); string tokenizedSentence; - //unsigned i = 0; while (getline (lextorFile, tokenizedSentence)) { - string sourceSentence; - if (!getline (inSentenceFile, sourceSentence)) - sourceSentence = "No more sentences"; - - sourceSentences->push_back (sourceSentence); tokenizedSentences->push_back (tokenizedSentence); - //if (i == 100) - // break; - //i++; } lextorFile.close (); - inSentenceFile.close (); map > > attrs = RuleParser::getAttrs (transfer); map vars = RuleParser::getVars (transfer); map > lists = RuleParser::getLists (transfer); -// vector >* vslTokens = new vector > (); -// vector vouts; -// vector >* vambigInfo = new vector< -// vector > (); -// vector > > vcompNodes; - ifstream weightOutFile (weightOutFilePath.c_str ()); if (weightOutFile.is_open ()) - for (unsigned i = 0; i < sourceSentences->size (); i++) + for (unsigned i = 0; i < tokenizedSentences->size (); i++) { // cout << i << endl; string sourceSentence, tokenizedSentence; - sourceSentence = (*sourceSentences)[i]; tokenizedSentence = (*tokenizedSentences)[i]; // spaces after each token