commit 0698361fb3573215c50d0aa8926b74fca287e084 Author: Tanmai Khanna Date: Fri Aug 28 22:16:03 2020 +0530 blank as queue in postchunk | tests added | remove redundant variable in transfer diff --git a/apertium/postchunk.cc b/apertium/postchunk.cc index 1ac8110..ee18d08 100644 --- a/apertium/postchunk.cc +++ b/apertium/postchunk.cc @@ -48,9 +48,7 @@ Postchunk::destroy() Postchunk::Postchunk() : word(0), -blank(0), lword(0), -lblank(0), output(0), any_char(0), any_tag(0), @@ -347,11 +345,15 @@ Postchunk::evalString(xmlNode *element) return ti.getContent(); case ti_b: - if(ti.getPos() >= 0 && checkIndex(element, ti.getPos(), lblank)) + if(!blank_queue.empty()) { - return !blank?"":*(blank[ti.getPos()]); + string retblank = blank_queue.front(); + blank_queue.pop(); + + return retblank; } - else { + else + { return " "; } break; @@ -663,6 +665,12 @@ Postchunk::processOut(xmlNode *localroot) } } } + + while(!blank_queue.empty()) //flush remaining blanks + { + fputws_unlocked(UtfConverter::fromUtf8(blank_queue.front()).c_str(), output); + blank_queue.pop(); + } } void @@ -915,17 +923,11 @@ Postchunk::processCallMacro(xmlNode *localroot) { myword = new InterchunkWord *[npar+1]; } - string **myblank = NULL; - if(npar > 0) - { - myblank = new string *[npar]; - } myword[0] = word[0]; bool indexesOK = true; int idx = 1; - int lastpos = 0; for(xmlNode *i = localroot->children; i != NULL; i = i->next) { if(i->type == XML_ELEMENT_NODE) @@ -936,18 +938,11 @@ Postchunk::processCallMacro(xmlNode *localroot) pos = 1; } myword[idx] = word[pos]; - if(blank) - { - myblank[idx-1] = blank[lastpos]; - } - idx++; - lastpos = pos; } } swap(myword, word); - swap(myblank, blank); swap(npar, lword); if(indexesOK) { @@ -964,11 +959,9 @@ Postchunk::processCallMacro(xmlNode *localroot) } swap(myword, word); - swap(myblank, blank); swap(npar, lword); delete[] myword; - delete[] myblank; } void @@ -1850,22 +1843,9 @@ Postchunk::applyRule() for(unsigned int i = 1, limit = tmpword.size()+1; i != limit; i++) { - if(i == 1) + if(i != 1) { - if(limit != 2) - { - blank = new string *[limit - 2]; - lblank = limit - 3; - } - else - { - blank = NULL; - lblank = 0; - } - } - else - { - blank[i-2] = new string(UtfConverter::toUtf8(*tmpblank[i-1])); + blank_queue.push(string(UtfConverter::toUtf8(*tmpblank[i-1]))); } word[i] = new InterchunkWord(UtfConverter::toUtf8(*tmpword[i-1])); @@ -1882,16 +1862,7 @@ Postchunk::applyRule() } delete[] word; } - if(blank) - { - for(unsigned int i = 0, limit = tmpword.size() - 1; i != limit; i++) - { - delete blank[i]; - } - delete[] blank; - } word = NULL; - blank = NULL; for(unsigned int i = 0, limit = tmpword.size(); i != limit; i++) { diff --git a/apertium/postchunk.h b/apertium/postchunk.h index 5a92234..f10d29f 100644 --- a/apertium/postchunk.h +++ b/apertium/postchunk.h @@ -33,6 +33,7 @@ #include #include #include +#include using namespace std; @@ -54,8 +55,8 @@ private: xmlDoc *doc; xmlNode *root_element; InterchunkWord **word; - string **blank; - int lword, lblank; + queue blank_queue; + int lword; Buffer input_buffer; vector tmpword; vector tmpblank; diff --git a/apertium/transfer.cc b/apertium/transfer.cc index ac3e24b..a391f5f 100644 --- a/apertium/transfer.cc +++ b/apertium/transfer.cc @@ -52,7 +52,6 @@ Transfer::destroy() Transfer::Transfer() : word(0), lword(0), -lblank(0), last_lword(0), output(0), any_char(0), @@ -2595,14 +2594,6 @@ Transfer::applyRule() word = new TransferWord *[limit]; std::fill(word, word+limit, (TransferWord *)(0)); lword = limit; - if(limit != 1) - { - lblank = limit - 1; - } - else - { - lblank = 0; - } } else { diff --git a/apertium/transfer.h b/apertium/transfer.h index 68409a7..9f6c026 100644 --- a/apertium/transfer.h +++ b/apertium/transfer.h @@ -57,7 +57,7 @@ private: xmlNode *root_element; TransferWord **word; queue blank_queue; - int lword, lblank; + int lword; int last_lword; Buffer input_buffer; vector tmpword; diff --git a/tests/data/apertium-nno-nob.nno-nob.t3x b/tests/data/apertium-nno-nob.nno-nob.t3x index df91750..7bc0d2b 100644 --- a/tests/data/apertium-nno-nob.nno-nob.t3x +++ b/tests/data/apertium-nno-nob.nno-nob.t3x @@ -17,6 +17,15 @@ + + + + + + + + + @@ -177,5 +186,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/postchunk/__init__.py b/tests/postchunk/__init__.py index e83de7f..69547cc 100644 --- a/tests/postchunk/__init__.py +++ b/tests/postchunk/__init__.py @@ -114,6 +114,26 @@ class WordboundBlankTest(PostchunkTest): class SingleLUWordboundBlankTest(PostchunkTest): inputs = ["^thing_wb{^[[t:i:xyzabc]]thing$}$ ^n_n{[[t:b:123456]]^worda$ ;[testblank] [[t:s:xyzab12]]^wordb# xyz$}$ [blanks] ^thing_wb{^[[t:i:xyzabc]]thing$}$ [blankx] ^vblex{[[t:b:123zbc]]^gå$}$^default{^.$}$ [blanks3] ^thing{^[[t:i:xyzabc]]thing$}$"] expectedOutputs = ["[[t:i:xyzabc]]^newthing$ [[t:i:xyzabc]]^thing$ [[t:i:xyzabc]]^thing+newpr$ [[t:s:xyzab12]]^wordb# xyz$ ;[testblank] [[t:b:123456]]^worda$ [[t:b:123456; t:s:xyzab12]]^worda+wordb# xyz$ [blanks] [[t:i:xyzabc]]^newthing$ [[t:i:xyzabc]]^thing$ [[t:i:xyzabc]]^thing+newpr$ [blankx] [[t:b:123zbc]]^gå$^.$ [blanks3] [[t:i:xyzabc]]^thing$"] + +class SuperblankTest(PostchunkTest): + inputs = [ "[blank1];; ^n_n{^worda$ ;[blank2] ^wordb# xyz$}$ ;[blank3]; ", + "[blank1];; ^n_k{^worda$ ;[blank2] ^wordb# xyz$}$ ;[blank3]; ", #Blanks when no rules match + "[blank1];; ^n_n2{^worda$ ;[blank2] ^wordb# xyz$ ;[blank3]; ^wordc$}$ ;[blank4]; ", #When output rule has more than input blanks + "[blank1];; ^n_n3{^worda$ ;[blank2] ^wordb# xyz$ ;[blank3]; ^wordc$}$ ;[blank4]; ", #Output rule has no + "[blank1];; ^n_n4{^worda$ ;[blank2] ^wordb# xyz$ ;[blank3]; ^wordc$}$ ;[blank4]; ", #Output rule has one + "[blank1];; ^n_n{^worda$ ;[blank2] ^wordb# xyz$}$ ;[blank3]; ^n_n4{^worda$ ;[blank4] ^wordb# xyz$ ;[blank5]; ^wordc$}$ ;[blank6]; "] #Multiple matching rules + + expectedOutputs = [ "[blank1];; ^wordb# xyz$ ;[blank2] ^worda$ ^worda+wordb# xyz$ ;[blank3]; ", + "[blank1];; ^worda$ ;[blank2] ^wordb# xyz$ ;[blank3]; ", + "[blank1];; ^wordb# xyz$ ;[blank2] ^worda$ ;[blank3]; ^worda+wordb# xyz$ ^wordc$ ;[blank4]; ", + "[blank1];; ^wordb# xyz$^worda$^worda+wordb# xyz$^wordc$ ;[blank2] ;[blank3]; ;[blank4]; ", + "[blank1];; ^wordb# xyz$^worda$ ;[blank2] ^worda+wordb# xyz$^wordc$ ;[blank3]; ;[blank4]; ", + "[blank1];; ^wordb# xyz$ ;[blank2] ^worda$ ^worda+wordb# xyz$ ;[blank3]; ^wordb# xyz$^worda$ ;[blank4] ^worda+wordb# xyz$^wordc$ ;[blank5]; ;[blank6]; "] + + + + + class BincompatTest(SimplePostchunkTest): bindata = "data/bincompat.t3x.bin"