commit 5c3db17503ada22217e1d84cf838f5efa43a00e7 Author: Tanmai Khanna Date: Fri Jun 26 19:21:07 2020 +0530 blanks in output based on where lem/lemh is clipped from diff --git a/apertium/transfer.cc b/apertium/transfer.cc index e6030ae..fae09be 100644 --- a/apertium/transfer.cc +++ b/apertium/transfer.cc @@ -315,12 +315,16 @@ Transfer::evalString(xmlNode *element) { if(in_lu) { - secondary_tags.append(word[ti.getPos()]->source(attr_items["sectags"], ti.getCondition())); + secondary_tags.append(word[ti.getPos()]->source(attr_items["sectags"], ti.getCondition())); + wblank.append(word[ti.getPos()]->blank()); } else if(in_let_var) { string temp_sl_secondary_tags = word[ti.getPos()]->source(attr_items["sectags"], ti.getCondition()); var_secondary_tags[var_val].append(temp_sl_secondary_tags); + + string temp_wblank = word[ti.getPos()]->blank(); + var_wblank[var_val].append(temp_wblank); } } else if(ti.getContent().compare("lemq") == 0) @@ -351,11 +355,15 @@ Transfer::evalString(xmlNode *element) if(in_lu) { secondary_tags.append(word[ti.getPos()]->target(attr_items["sectags"], ti.getCondition())); + wblank.append(word[ti.getPos()]->blank()); } else if(in_let_var) { string temp_tl_secondary_tags = word[ti.getPos()]->target(attr_items["sectags"], ti.getCondition()); var_secondary_tags[var_val].append(temp_tl_secondary_tags); + + string temp_wblank = word[ti.getPos()]->blank(); + var_wblank[var_val].append(temp_wblank); } } else if(ti.getContent().compare("lemq") == 0) @@ -659,6 +667,7 @@ Transfer::evalString(xmlNode *element) { in_lu = true; secondary_tags.clear(); + wblank.clear(); string myword; for(xmlNode *i = element->children; i != NULL; i = i->next) @@ -674,7 +683,7 @@ Transfer::evalString(xmlNode *element) if(myword != "") { - return "^"+myword+"$"; + return wblank+"^"+myword+"$"; } else { @@ -761,6 +770,7 @@ Transfer::processOut(xmlNode *localroot) { in_lu = true; secondary_tags.clear(); + wblank.clear(); string myword; for(xmlNode *j = i->children; j != NULL; j = j->next) @@ -776,6 +786,7 @@ Transfer::processOut(xmlNode *localroot) myword.append(secondary_tags); if(myword != "") { + fputws_unlocked(UtfConverter::fromUtf8(wblank).c_str(), output); fputwc_unlocked(L'^', output); fputws_unlocked(UtfConverter::fromUtf8(myword).c_str(), output); fputwc_unlocked(L'$', output); @@ -915,6 +926,7 @@ Transfer::processChunk(xmlNode *localroot) { in_lu = true; secondary_tags.clear(); + wblank.clear(); string myword; for(xmlNode *j = i->children; j != NULL; j = j->next) @@ -930,6 +942,7 @@ Transfer::processChunk(xmlNode *localroot) if(myword != "") { + result.append(wblank); result.append("^"); result.append(myword); result.append("$"); @@ -1101,6 +1114,7 @@ Transfer::processLet(xmlNode *localroot) var_val = ti.getContent(); var_secondary_tags[var_val].clear(); + var_wblank.clear(); var_has_lemq[var_val] = false; variables[ti.getContent()] = evalString(rightSide); @@ -1151,6 +1165,7 @@ Transfer::processLet(xmlNode *localroot) var_val = val; var_secondary_tags[var_val].clear(); + var_wblank.clear(); var_has_lemq[var_val] = false; variables[val] = evalString(rightSide); @@ -2072,7 +2087,8 @@ Transfer::readToken(FILE *in) } if(in_wblank) { - content += L"[["; + content = L"[["; + content+= wchar_t(val); while(true) { diff --git a/apertium/transfer.h b/apertium/transfer.h index 98a8881..d03fd30 100644 --- a/apertium/transfer.h +++ b/apertium/transfer.h @@ -71,6 +71,8 @@ private: map var_has_lemq; //map variable name to bool->true if variable clips lemq bool in_wblank; + string wblank; + map var_wblank; bool gettingLemmaFromWord(string attr); diff --git a/apertium/transfer_word.cc b/apertium/transfer_word.cc index 0bed7c3..86862fe 100644 --- a/apertium/transfer_word.cc +++ b/apertium/transfer_word.cc @@ -116,16 +116,9 @@ TransferWord::reference(ApertiumRE const &part, bool with_queue) } string -TransferWord::blank(ApertiumRE const &part, bool with_queue) +TransferWord::blank() { - if(with_queue) //TODO test removing - { - return part.match(b_str); - } - else - { - return part.match(b_str.substr(0, b_str.size() - queue_length)); - } + return b_str; } bool diff --git a/apertium/transfer_word.h b/apertium/transfer_word.h index 8755856..3ff35d8 100644 --- a/apertium/transfer_word.h +++ b/apertium/transfer_word.h @@ -153,11 +153,9 @@ public: /** * Reference the wordbound blank part - * @param part regular expression to match - * @param with_queue access taking into account the queue - * @returns reference to the part of string matched + * @returns reference to the wordbound blank */ - string blank(ApertiumRE const &part, bool with_queue = true); + string blank(); /** * Sets a value for a source language word part