commit 8f4dd5dd16e5f8df97a1d68e0a440914cb54181e Author: Tanmai Khanna Date: Mon May 11 17:07:18 2020 +0530 getting secondary tags in output from the word which provides the lemma | modifying regex in transfer_data for tags and stags diff --git a/apertium/transfer.cc b/apertium/transfer.cc index eff15a4..50a397d 100644 --- a/apertium/transfer.cc +++ b/apertium/transfer.cc @@ -304,6 +304,12 @@ Transfer::evalString(xmlNode *element) case ti_clip_sl: if(checkIndex(element, ti.getPos(), lword)) { + if(ti.getContent().compare("lem") == 0 || ti.getContent().compare("lemh") == 0) //only get secondary tags if we're getting the lemma from the word + { + stags = word[ti.getPos()]->source(attr_items["stags"], ti.getCondition()); + //cout << "\n##SLSTAGS::" <source(attr_items[ti.getContent()], ti.getCondition()); } break; @@ -311,6 +317,12 @@ Transfer::evalString(xmlNode *element) case ti_clip_tl: if(checkIndex(element, ti.getPos(), lword)) { + if(ti.getContent().compare("lem") == 0 || ti.getContent().compare("lemh") == 0) + { + stags = word[ti.getPos()]->target(attr_items["stags"], ti.getCondition()); + //cout << "\n##TLSTAGS::" <target(attr_items[ti.getContent()], ti.getCondition()); } break; @@ -651,7 +663,7 @@ Transfer::processOut(xmlNode *localroot) { if(i->type == XML_ELEMENT_NODE) { - if(defaultAttrs == lu) + if(defaultAttrs == lu) //stream_modification TODO { if(!xmlStrcmp(i->name, (const xmlChar *) "lu")) { @@ -793,8 +805,11 @@ Transfer::processChunk(xmlNode *localroot) result.append(processTags(i)); result.append("{"); } - else if(!xmlStrcmp(i->name, (const xmlChar *) "lu")) + else if(!xmlStrcmp(i->name, (const xmlChar *) "lu")) //Need to add secondary tags here! { + //cout << "\n%%OUTNOW%%\n"; + stags.clear(); + string myword; for(xmlNode *j = i->children; j != NULL; j = j->next) { @@ -803,6 +818,9 @@ Transfer::processChunk(xmlNode *localroot) myword.append(evalString(j)); } } + //cout << "\n###MYWORD###\n" << myword << "\n###MYWORD###\n"; + myword.append(stags); //from the LU that the lem or lemh has come from + //cout << "\n###WITHSTAGS###\n" << myword << "\n###WITHSTAGS###\n"; if(myword != "") { result.append("^"); @@ -810,7 +828,7 @@ Transfer::processChunk(xmlNode *localroot) result.append("$"); } } - else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) + else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) //stream modification TODO { bool first_time = true; string myword; diff --git a/apertium/transfer.h b/apertium/transfer.h index 2a1135f..7ee5722 100644 --- a/apertium/transfer.h +++ b/apertium/transfer.h @@ -61,7 +61,9 @@ private: Buffer input_buffer; vector tmpword; vector tmpblank; - + + string stags; //stores secondary tags of the LU that is being output + FSTProcessor fstp; FSTProcessor extended; bool isExtended; diff --git a/apertium/transfer_data.cc b/apertium/transfer_data.cc index 78015f3..adf8b8b 100644 --- a/apertium/transfer_data.cc +++ b/apertium/transfer_data.cc @@ -50,7 +50,8 @@ TransferData::TransferData() attr_items[L"lemq"] = L"\\#[- _][^<]+"; attr_items[L"lemh"] = L"^(([^<#]|\"\\<\"|\"\\#\")+)"; attr_items[L"whole"] = L"(.+)"; - attr_items[L"tags"] = L"((<[^>]+>)+)"; + attr_items[L"tags"] = L"((<[^:>]+>)+)"; //match all tags excluding secondary tags + attr_items[L"stags"] = L"((<[^>]+:[^>]+>)+)"; //match all secondary tags attr_items[L"chname"] = L"({([^/]+)\\/)"; // includes delimiters { and / !!! attr_items[L"chcontent"] = L"(\\{.+)"; attr_items[L"content"] = L"(\\{.+)";