commit 890357653f6556d64e54f3485edb280d80f12126 Author: Tanmai Khanna Date: Tue Aug 13 11:47:01 2019 +0530 Changes for Anaphora Resolution (#55) Handle new ref attribute. By Tanmai Khanna diff --git a/apertium/transfer.cc b/apertium/transfer.cc index 7941d94..f8950e3 100644 --- a/apertium/transfer.cc +++ b/apertium/transfer.cc @@ -313,6 +313,13 @@ Transfer::evalString(xmlNode *element) } break; + case ti_clip_ref: + if(checkIndex(element, ti.getPos(), lword)) + { + return word[ti.getPos()]->reference(attr_items[ti.getContent()], ti.getCondition()); + } + break; + case ti_linkto_sl: if(checkIndex(element, ti.getPos(), lword)) { @@ -341,6 +348,20 @@ Transfer::evalString(xmlNode *element) } break; + case ti_linkto_ref: + if(checkIndex(element, ti.getPos(), lword)) + { + if(word[ti.getPos()]->reference(attr_items[ti.getContent()], ti.getCondition()) != "") + { + return "<" + string((char *) ti.getPointer()) + ">"; + } + else + { + return ""; + } + } + break; + case ti_var: return variables[ti.getContent()]; @@ -380,6 +401,13 @@ Transfer::evalString(xmlNode *element) } break; + case ti_case_of_ref: + if(checkIndex(element, ti.getPos(), lword)) + { + return caseOf(word[ti.getPos()]->reference(attr_items[ti.getContent()])); + } + break; + default: return ""; } @@ -425,6 +453,10 @@ Transfer::evalString(xmlNode *element) { evalStringCache[element] = TransferInstr(ti_linkto_sl, (const char *) part, pos, (void *) as, queue); } + else if(!xmlStrcmp(side, (const xmlChar *) "ref")) + { + evalStringCache[element] = TransferInstr(ti_linkto_ref, (const char *) part, pos, (void *) as, queue); + } else { evalStringCache[element] = TransferInstr(ti_linkto_tl, (const char *) part, pos, (void *) as, queue); @@ -434,6 +466,10 @@ Transfer::evalString(xmlNode *element) { evalStringCache[element] = TransferInstr(ti_clip_sl, (const char *) part, pos, NULL, queue); } + else if(!xmlStrcmp(side, (const xmlChar *) "ref")) + { + evalStringCache[element] = TransferInstr(ti_clip_ref, (const char *) part, pos, NULL, queue); + } else { evalStringCache[element] = TransferInstr(ti_clip_tl, (const char *) part, pos, NULL, queue); @@ -504,6 +540,10 @@ Transfer::evalString(xmlNode *element) { evalStringCache[element] = TransferInstr(ti_case_of_sl, (const char *) part, pos); } + else if(!xmlStrcmp(side, (const xmlChar *) "ref")) + { + evalStringCache[element] = TransferInstr(ti_case_of_ref, (const char *) part, pos); + } else { evalStringCache[element] = TransferInstr(ti_case_of_tl, (const char *) part, pos); @@ -939,6 +979,12 @@ Transfer::processLet(xmlNode *localroot) } return; + case ti_clip_ref: + if (checkIndex(leftSide, ti.getPos(), lword)) { + word[ti.getPos()]->setReference(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition()); + } + return; + default: return; } @@ -996,6 +1042,11 @@ Transfer::processLet(xmlNode *localroot) word[pos]->setTarget(attr_items[(const char *) part], evalString(rightSide), queue); evalStringCache[leftSide] = TransferInstr(ti_clip_tl, (const char *) part, pos, NULL, queue); } + else if(!xmlStrcmp(side, (const xmlChar *) "ref")) + { + word[pos]->setReference(attr_items[(const char *) part], evalString(rightSide), queue); + evalStringCache[leftSide] = TransferInstr(ti_clip_ref, (const char *) part, pos, NULL, queue); + } else { word[pos]->setSource(attr_items[(const char *) part], evalString(rightSide), queue); @@ -1086,6 +1137,12 @@ Transfer::processModifyCase(xmlNode *localroot) word[pos]->source(attr_items[(const char *) part], queue)); word[pos]->setSource(attr_items[(const char *) part], result); } + else if(!xmlStrcmp(side, (const xmlChar *) "ref")) + { + string const result = copycase(evalString(rightSide), + word[pos]->reference(attr_items[(const char *) part], queue)); + word[pos]->setReference(attr_items[(const char *) part], result); + } else { string const result = copycase(evalString(rightSide), @@ -2036,6 +2093,8 @@ Transfer::transfer(FILE *in, FILE *out) { wstring sl; wstring tl; + wstring ref; + int seenSlash = 0; for(wstring::const_iterator it = tmpword[0]->begin(); it != tmpword[0]->end(); it++) { @@ -2047,17 +2106,25 @@ Transfer::transfer(FILE *in, FILE *out) it++; sl.push_back(*it); } - else + else if(seenSlash == 1) { tl.push_back(*it); it++; tl.push_back(*it); } + else + { + ref.push_back(*it); + it++; + ref.push_back(*it); + } continue; } else if(*it == L'/') { seenSlash++; + + ref.clear(); //the word after the last slash is the ref continue; } if(seenSlash == 0) @@ -2068,9 +2135,9 @@ Transfer::transfer(FILE *in, FILE *out) { tl.push_back(*it); } - else if(seenSlash > 1) + else { - break; + ref.push_back(*it); } } //tmpword[0]->assign(sl); @@ -2216,14 +2283,18 @@ Transfer::applyRule() if(useBilingual && preBilingual == false) { tr = fstp.biltransWithQueue(*tmpword[i], false); + wstring refx; word[i] = new TransferWord(UtfConverter::toUtf8(*tmpword[i]), UtfConverter::toUtf8(tr.first), + UtfConverter::toUtf8(refx), tr.second); } else if(preBilingual) { wstring sl; wstring tl; + wstring ref; + int seenSlash = 0; for(wstring::const_iterator it = tmpword[i]->begin(); it != tmpword[i]->end(); it++) { @@ -2235,18 +2306,26 @@ Transfer::applyRule() it++; sl.push_back(*it); } - else + else if(seenSlash == 1) { tl.push_back(*it); it++; tl.push_back(*it); } + else + { + ref.push_back(*it); + it++; + ref.push_back(*it); + } continue; } if(*it == L'/') { seenSlash++; + + ref.clear(); //word after last slash is ref continue; } if(seenSlash == 0) @@ -2257,21 +2336,24 @@ Transfer::applyRule() { tl.push_back(*it); } - else if(seenSlash > 1) + else { - break; + ref.push_back(*it); } } tr = pair(tl, false); word[i] = new TransferWord(UtfConverter::toUtf8(sl), UtfConverter::toUtf8(tr.first), + UtfConverter::toUtf8(ref), tr.second); } else // neither useBilingual nor preBilingual (sl==tl) { tr = pair(*tmpword[i], false); + wstring refx; word[i] = new TransferWord(UtfConverter::toUtf8(*tmpword[i]), UtfConverter::toUtf8(tr.first), + UtfConverter::toUtf8(refx), tr.second); } } diff --git a/apertium/transfer.dtd b/apertium/transfer.dtd index 3107d08..48b93d2 100644 --- a/apertium/transfer.dtd +++ b/apertium/transfer.dtd @@ -364,7 +364,7 @@ representing a lexical form in the matched pattern