commit 8d420a7ea454a35308c4e8db917b5a5c626fcbd0 Author: Tanmai Khanna Date: Fri Aug 28 00:54:42 2020 +0530 Change blank handling to use a blank queue | position for has no meaning in transfer now | Added tests diff --git a/apertium/transfer.cc b/apertium/transfer.cc index 8109073..ac3e24b 100644 --- a/apertium/transfer.cc +++ b/apertium/transfer.cc @@ -51,7 +51,6 @@ Transfer::destroy() Transfer::Transfer() : word(0), -blank(0), lword(0), lblank(0), last_lword(0), @@ -72,7 +71,6 @@ nwords(0) internal_null_flush = false; trace = false; trace_att = false; - emptyblank = ""; in_lu = false; in_let_var = false; } @@ -472,11 +470,15 @@ Transfer::evalString(xmlNode *element) return ti.getContent(); case ti_b: - if(ti.getPos() >= 0 && checkIndex(element, ti.getPos(), lblank)) + if(!blank_queue.empty()) { - return !blank?"":*(blank[ti.getPos()]); + string retblank = blank_queue.front(); + blank_queue.pop(); + + return retblank; } - else { + else + { return " "; } break; @@ -588,15 +590,7 @@ Transfer::evalString(xmlNode *element) } else if(!xmlStrcmp(element->name, (const xmlChar *) "b")) { - if(element->properties == NULL) - { - evalStringCache[element] = TransferInstr(ti_b, " ", -1); - } - else - { - int pos = atoi((const char *) element->properties->children->content) - 1; - evalStringCache[element] = TransferInstr(ti_b, "", pos); - } + evalStringCache[element] = TransferInstr(ti_b, " ", -1); } else if(!xmlStrcmp(element->name, (const xmlChar *) "get-case-from")) { @@ -887,6 +881,12 @@ Transfer::processOut(xmlNode *localroot) } } } + + while(!blank_queue.empty()) //flush remaining blanks + { + fputws_unlocked(UtfConverter::fromUtf8(blank_queue.front()).c_str(), output); + blank_queue.pop(); + } } string @@ -1427,15 +1427,8 @@ Transfer::processCallMacro(xmlNode *localroot) myword = new TransferWord *[npar]; std::fill(myword, myword+npar, (TransferWord *)(0)); } - string **myblank = NULL; - if(npar > 0) - { - myblank = new string *[npar]; - myblank[npar-1] = &emptyblank; - } - + int idx = 0; - int lastpos = 0; for(xmlNode *i = localroot->children; npar && i != NULL; i = i->next) { if(i->type == XML_ELEMENT_NODE) @@ -1446,23 +1439,12 @@ Transfer::processCallMacro(xmlNode *localroot) } int pos = atoi((const char *) i->properties->children->content)-1; myword[idx] = word[pos]; - if(idx-1 >= 0) - { - if(lastpos + 1 > lblank) { // if a 1-pattern rule calls macro with same - noblank = ""; // param twice the blank array will be empty - myblank[idx-1] = &noblank; - } - else { - myblank[idx-1] = blank[lastpos]; - } - } + idx++; - lastpos = pos; } } swap(myword, word); - swap(myblank, blank); swap(npar, lword); for(xmlNode *i = macro->children; i != NULL; i = i->next) @@ -1474,11 +1456,9 @@ Transfer::processCallMacro(xmlNode *localroot) } swap(myword, word); - swap(myblank, blank); swap(npar, lword); delete[] myword; - delete[] myblank; } int @@ -2617,18 +2597,19 @@ Transfer::applyRule() lword = limit; if(limit != 1) { - blank = new string *[limit - 1]; lblank = limit - 1; } else { - blank = NULL; lblank = 0; } } else { - blank[i-1] = new string(UtfConverter::toUtf8(*tmpblank[i-1])); + if(int(blank_queue.size()) + 1 < last_lword) + { + blank_queue.push(string(UtfConverter::toUtf8(*tmpblank[i-1]))); + } } pair tr; @@ -2767,17 +2748,7 @@ Transfer::applyRule() } delete[] word; } - if(blank) - { - for(unsigned int i = 0; i != limit - 1; i++) - { - delete blank[i]; - blank[i] = 0; - } - delete[] blank; - } word = NULL; - blank = NULL; tmpword.clear(); tmpblank.clear(); ms.init(me->getInitial()); diff --git a/apertium/transfer.h b/apertium/transfer.h index 6a5b06a..68409a7 100644 --- a/apertium/transfer.h +++ b/apertium/transfer.h @@ -34,6 +34,7 @@ #include #include #include +#include using namespace std; @@ -55,10 +56,9 @@ private: xmlDoc *doc; xmlNode *root_element; TransferWord **word; - string **blank; + queue blank_queue; int lword, lblank; int last_lword; - string noblank = ""; Buffer input_buffer; vector tmpword; vector tmpblank; diff --git a/tests/data/apertium-nno-nob.nob-nno.t1x b/tests/data/apertium-nno-nob.nob-nno.t1x index 1de6011..e84d857 100644 --- a/tests/data/apertium-nno-nob.nob-nno.t1x +++ b/tests/data/apertium-nno-nob.nob-nno.t1x @@ -12,6 +12,12 @@ + + + + + + @@ -99,13 +105,12 @@ - - + @@ -113,7 +118,7 @@ - + @@ -144,5 +149,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/transfer/__init__.py b/tests/transfer/__init__.py index 805b0f1..d92b58d 100644 --- a/tests/transfer/__init__.py +++ b/tests/transfer/__init__.py @@ -97,12 +97,23 @@ class SlLemqTest(TransferTest): class WordboundBlankTest(TransferTest): inputs = ["[blank1] [[t:s:123456]]^worda/wordta$ ;[blank2]; [[t:b:xyz123; t:l:xyz347]]^wordb/wordtb$ [blank3]; [[t:i:abc123; t:s:abc123]]^hun/ho$"] - expectedOutputs = ["[blank1] ^prn{[[t:i:abc123; t:s:abc123]]^ho$ [[t:b:xyz123; t:l:xyz347]]^wordtb$}$ ;[blank2]; ^det{[[t:s:123456; t:i:abc123; t:s:abc123]]^wordta+ho$}$ [blank3]; "] + expectedOutputs = ["[blank1] ^prn{[[t:i:abc123; t:s:abc123]]^ho$[[t:b:xyz123; t:l:xyz347]]^wordtb$}$ ;[blank2]; ^det{[[t:s:123456; t:i:abc123; t:s:abc123]]^wordta+ho$}$ [blank3]; "] class SingleLUWordboundBlankTest(TransferTest): inputs = ["[blank1] [[t:s:123456]]^worda/wordta$ ;[blank2]; [[t:b:xyz123; t:l:xyz347]]^wordb/wordtb$ [blank3]; "] expectedOutputs = ["[blank1] ^nacr{[[t:s:123456]]^test$ [[t:s:123456]]^wordta$}$ ^nacr2{[[t:s:123456]]^testlem$ [[t:s:123456]]^wordta+postp$}$ ;[blank2]; ^nacr{[[t:b:xyz123; t:l:xyz347]]^test$ [[t:b:xyz123; t:l:xyz347]]^wordtb$}$ ^nacr2{[[t:b:xyz123; t:l:xyz347]]^testlem$ [[t:b:xyz123; t:l:xyz347]]^wordtb+postp$}$ [blank3]; "] +class SuperblankTest(TransferTest): + inputs = [ "[blank1] ^worda/wordta$ ;[blank2]; ^wordb/wordtb$ [blank3]; ^hun/ho$ [blank4] ", #Rule: superblankrule1 -> No in rule output, should flush all blanks after rule output + "[blank1] ^worda/wordta$ ;[blank2]; ^wordb/wordtb$ [blank3]; ^hun/ho$ [blank4] ", #No rule matches, should print all blanks as is + "[blank1] ^wordb/wordtb$ ;[blank2]; ^worda/wordta$ [blank3]; ^hun/ho$ [blank4] ", #Rule: superblankrule2 -> One in rule output, should print one and flush the rest + "[blank1] ^hun/ho$ ;[blank2]; ^worda/wordta$ [blank3]; [blank4] ", #Rule: superblankrule3 -> Input rule has 1 blank, output has 3, should print input blank for the first and just spaces for the rest + "[blank1] ^hun/ho$ ;[blank2]; ^worda/wordta$ [blank3]; ^wordb/wordtb$ ;[blank4]; ^worda/wordta$ [blank5]; ^hun/ho$ [blank6] "] #Multiple rule matches + expectedOutputs = [ "[blank1] ^test1{^wordta$^wordtb$^ho$}$ ;[blank2]; [blank3]; [blank4] ", + "[blank1] ^default{^wordta$}$ ;[blank2]; ^default{^wordtb$}$ [blank3]; ^default{^ho$}$ [blank4] ", + "[blank1] ^test1{^wordta$ ;[blank2]; ^ho$}$ [blank3]; [blank4] ", + "[blank1] ^test1{^ho$ ;[blank2]; ^wordta$ ^ho$ ^wordta$}$ [blank3]; [blank4] ", + "[blank1] ^test1{^ho$ ;[blank2]; ^wordta$ ^ho$ ^wordta$}$ [blank3]; ^test1{^wordta$ ;[blank4]; ^ho$}$ [blank5]; [blank6] "] class BincompatTest(BasicTransferTest): bindata = "data/bincompat.t1x.bin"