commit 15d83f8e020cb486dfb1a4eeee76c043f680dac7 Author: Tanmai Khanna Date: Mon Jul 20 12:04:07 2020 +0530 if input chunk in postchunk rule has one LU, blank outputs on all LUs in rule output | fix output blank in postchunk | add tests diff --git a/apertium/postchunk.cc b/apertium/postchunk.cc index 7e432da..13c35ef 100644 --- a/apertium/postchunk.cc +++ b/apertium/postchunk.cc @@ -489,7 +489,7 @@ Postchunk::evalString(xmlNode *element) if(lword == 1) { - out_wblank = word[0]->getBlank(); + out_wblank = word[1]->getBlank(); } if(myword != "") @@ -547,7 +547,7 @@ Postchunk::evalString(xmlNode *element) if(lword == 1) { - out_wblank = word[0]->getBlank(); + out_wblank = word[1]->getBlank(); } if(value != "") @@ -594,7 +594,7 @@ Postchunk::processOut(xmlNode *localroot) if(lword == 1) { - out_wblank = word[0]->getBlank(); + out_wblank = word[1]->getBlank(); } if(myword != "") @@ -649,7 +649,7 @@ Postchunk::processOut(xmlNode *localroot) if(lword == 1) { - out_wblank = word[0]->getBlank(); + out_wblank = word[1]->getBlank(); } fputws_unlocked(UtfConverter::fromUtf8(out_wblank).c_str(), output); diff --git a/apertium/postchunk.h b/apertium/postchunk.h index 5a92234..1a1e14d 100644 --- a/apertium/postchunk.h +++ b/apertium/postchunk.h @@ -56,6 +56,7 @@ private: InterchunkWord **word; string **blank; int lword, lblank; + int last_lword; Buffer input_buffer; vector tmpword; vector tmpblank; diff --git a/apertium/transfer.cc b/apertium/transfer.cc index ad6d8db..aceab04 100644 --- a/apertium/transfer.cc +++ b/apertium/transfer.cc @@ -54,6 +54,7 @@ word(0), blank(0), lword(0), lblank(0), +last_lword(0), output(0), any_char(0), any_tag(0), diff --git a/tests/data/apertium-nno-nob.nno-nob.t3x b/tests/data/apertium-nno-nob.nno-nob.t3x index a52713d..df91750 100644 --- a/tests/data/apertium-nno-nob.nno-nob.t3x +++ b/tests/data/apertium-nno-nob.nno-nob.t3x @@ -17,6 +17,9 @@ + + + @@ -155,5 +158,24 @@ + + + + + + + + + + + + + + + + + + + diff --git a/tests/postchunk/__init__.py b/tests/postchunk/__init__.py index 52c9ef3..e83de7f 100644 --- a/tests/postchunk/__init__.py +++ b/tests/postchunk/__init__.py @@ -111,6 +111,9 @@ class WordboundBlankTest(PostchunkTest): inputs = ["^n_n{[[t:b:123456]]^worda$ ;[testblank] [[t:s:xyzab12]]^wordb# xyz$}$"] expectedOutputs = ["[[t:s:xyzab12]]^wordb# xyz$ ;[testblank] [[t:b:123456]]^worda$ [[t:b:123456; t:s:xyzab12]]^worda+wordb# xyz$"] +class SingleLUWordboundBlankTest(PostchunkTest): + inputs = ["^thing_wb{^[[t:i:xyzabc]]thing$}$ ^n_n{[[t:b:123456]]^worda$ ;[testblank] [[t:s:xyzab12]]^wordb# xyz$}$ [blanks] ^thing_wb{^[[t:i:xyzabc]]thing$}$ [blankx] ^vblex{[[t:b:123zbc]]^gå$}$^default{^.$}$ [blanks3] ^thing{^[[t:i:xyzabc]]thing$}$"] + expectedOutputs = ["[[t:i:xyzabc]]^newthing$ [[t:i:xyzabc]]^thing$ [[t:i:xyzabc]]^thing+newpr$ [[t:s:xyzab12]]^wordb# xyz$ ;[testblank] [[t:b:123456]]^worda$ [[t:b:123456; t:s:xyzab12]]^worda+wordb# xyz$ [blanks] [[t:i:xyzabc]]^newthing$ [[t:i:xyzabc]]^thing$ [[t:i:xyzabc]]^thing+newpr$ [blankx] [[t:b:123zbc]]^gå$^.$ [blanks3] [[t:i:xyzabc]]^thing$"] class BincompatTest(SimplePostchunkTest): bindata = "data/bincompat.t3x.bin"