commit 9ea7f13cdb68457cabaeecf08cb66f034033b2c5 Author: Tanmai Khanna Date: Sat Aug 29 18:03:31 2020 +0530 Superblank ordering in transfer (#102) * Change blank handling to use a blank queue * Position for now has no meaning in transfer, postchunk, or interchunk * Add space to blank queue if input blank is empty string diff --git a/apertium/interchunk.cc b/apertium/interchunk.cc index f03b550..c8873b7 100644 --- a/apertium/interchunk.cc +++ b/apertium/interchunk.cc @@ -46,9 +46,8 @@ Interchunk::destroy() Interchunk::Interchunk() : word(0), -blank(0), lword(0), -lblank(0), +last_lword(0), output(0), any_char(0), any_tag(0), @@ -62,7 +61,6 @@ nwords(0) null_flush = false; internal_null_flush = false; trace = false; - emptyblank = ""; } Interchunk::~Interchunk() @@ -270,11 +268,15 @@ Interchunk::evalString(xmlNode *element) return ti.getContent(); case ti_b: - if(ti.getPos() >= 0 && checkIndex(element, ti.getPos(), lblank)) + if(!blank_queue.empty()) { - return !blank?"":*(blank[ti.getPos()]); + string retblank = blank_queue.front(); + blank_queue.pop(); + + return retblank; } - else { + else + { return " "; } break; @@ -420,6 +422,15 @@ Interchunk::processOut(xmlNode *localroot) } } } + + while(!blank_queue.empty()) //flush remaining blanks that are not spaces + { + if(blank_queue.front().compare(" ") != 0) + { + fputws_unlocked(UtfConverter::fromUtf8(blank_queue.front()).c_str(), output); + } + blank_queue.pop(); + } } string @@ -649,38 +660,19 @@ Interchunk::processCallMacro(xmlNode *localroot) { myword = new InterchunkWord *[npar]; } - string **myblank = NULL; - if(npar > 0) - { - myblank = new string *[npar]; - myblank[npar-1] = &emptyblank; - } int idx = 0; - int lastpos = 0; for(xmlNode *i = localroot->children; npar && i != NULL; i = i->next) { if(i->type == XML_ELEMENT_NODE) { int pos = atoi((const char *) i->properties->children->content)-1; myword[idx] = word[pos]; - if(idx-1 >= 0) - { - if(lastpos + 1 > lblank) { // if a 1-pattern rule calls macro with same - noblank = ""; // param twice the blank array will be empty - myblank[idx-1] = &noblank; - } - else { - myblank[idx-1] = blank[lastpos]; - } - } idx++; - lastpos = pos; } } swap(myword, word); - swap(myblank, blank); swap(npar, lword); for(xmlNode *i = macro->children; i != NULL; i = i->next) @@ -692,11 +684,9 @@ Interchunk::processCallMacro(xmlNode *localroot) } swap(myword, word); - swap(myblank, blank); swap(npar, lword); delete[] myword; - delete[] myblank; } void @@ -1481,6 +1471,8 @@ Interchunk::interchunk(FILE *in, FILE *out) size_t lastrule_line = rule_lines[val-1]; lastrule = rule_map[val-1]; last = input_buffer.getPos(); + + last_lword = tmpword.size(); if(trace) { @@ -1543,20 +1535,22 @@ Interchunk::applyRule() { word = new InterchunkWord *[limit]; lword = limit; - if(limit != 1) - { - blank = new string *[limit - 1]; - lblank = limit - 1; - } - else - { - blank = NULL; - lblank = 0; - } } else { - blank[i-1] = new string(UtfConverter::toUtf8(*tmpblank[i-1])); + if(int(blank_queue.size()) < last_lword - 1) + { + string blank_to_add = string(UtfConverter::toUtf8(*tmpblank[i-1])); + + if(!blank_to_add.empty()) + { + blank_queue.push(blank_to_add); + } + else + { + blank_queue.push(" "); + } + } } word[i] = new InterchunkWord(UtfConverter::toUtf8(*tmpword[i])); @@ -1573,16 +1567,8 @@ Interchunk::applyRule() } delete[] word; } - if(blank) - { - for(unsigned int i = 0; i != limit - 1; i++) - { - delete blank[i]; - } - delete[] blank; - } + word = NULL; - blank = NULL; tmpword.clear(); tmpblank.clear(); ms.init(me->getInitial()); diff --git a/apertium/interchunk.h b/apertium/interchunk.h index 02fea7c..49ee2a0 100644 --- a/apertium/interchunk.h +++ b/apertium/interchunk.h @@ -34,6 +34,7 @@ #include #include #include +#include using namespace std; @@ -55,9 +56,9 @@ private: xmlDoc *doc; xmlNode *root_element; InterchunkWord **word; - string **blank; - int lword, lblank; - string noblank = ""; + queue blank_queue; + int lword; + int last_lword; Buffer input_buffer; vector tmpword; vector tmpblank; diff --git a/apertium/postchunk.cc b/apertium/postchunk.cc index 1ac8110..32ce004 100644 --- a/apertium/postchunk.cc +++ b/apertium/postchunk.cc @@ -48,9 +48,7 @@ Postchunk::destroy() Postchunk::Postchunk() : word(0), -blank(0), lword(0), -lblank(0), output(0), any_char(0), any_tag(0), @@ -347,11 +345,15 @@ Postchunk::evalString(xmlNode *element) return ti.getContent(); case ti_b: - if(ti.getPos() >= 0 && checkIndex(element, ti.getPos(), lblank)) + if(!blank_queue.empty()) { - return !blank?"":*(blank[ti.getPos()]); + string retblank = blank_queue.front(); + blank_queue.pop(); + + return retblank; } - else { + else + { return " "; } break; @@ -663,6 +665,15 @@ Postchunk::processOut(xmlNode *localroot) } } } + + while(!blank_queue.empty()) //flush remaining blanks that are not spaces + { + if(blank_queue.front().compare(" ") != 0) + { + fputws_unlocked(UtfConverter::fromUtf8(blank_queue.front()).c_str(), output); + } + blank_queue.pop(); + } } void @@ -915,17 +926,11 @@ Postchunk::processCallMacro(xmlNode *localroot) { myword = new InterchunkWord *[npar+1]; } - string **myblank = NULL; - if(npar > 0) - { - myblank = new string *[npar]; - } myword[0] = word[0]; bool indexesOK = true; int idx = 1; - int lastpos = 0; for(xmlNode *i = localroot->children; i != NULL; i = i->next) { if(i->type == XML_ELEMENT_NODE) @@ -936,18 +941,11 @@ Postchunk::processCallMacro(xmlNode *localroot) pos = 1; } myword[idx] = word[pos]; - if(blank) - { - myblank[idx-1] = blank[lastpos]; - } - idx++; - lastpos = pos; } } swap(myword, word); - swap(myblank, blank); swap(npar, lword); if(indexesOK) { @@ -964,11 +962,9 @@ Postchunk::processCallMacro(xmlNode *localroot) } swap(myword, word); - swap(myblank, blank); swap(npar, lword); delete[] myword; - delete[] myblank; } void @@ -1850,23 +1846,19 @@ Postchunk::applyRule() for(unsigned int i = 1, limit = tmpword.size()+1; i != limit; i++) { - if(i == 1) + if(i != 1) { - if(limit != 2) + string blank_to_add = string(UtfConverter::toUtf8(*tmpblank[i-1])); + + if(!blank_to_add.empty()) { - blank = new string *[limit - 2]; - lblank = limit - 3; + blank_queue.push(blank_to_add); } else { - blank = NULL; - lblank = 0; + blank_queue.push(" "); } } - else - { - blank[i-2] = new string(UtfConverter::toUtf8(*tmpblank[i-1])); - } word[i] = new InterchunkWord(UtfConverter::toUtf8(*tmpword[i-1])); } @@ -1882,16 +1874,7 @@ Postchunk::applyRule() } delete[] word; } - if(blank) - { - for(unsigned int i = 0, limit = tmpword.size() - 1; i != limit; i++) - { - delete blank[i]; - } - delete[] blank; - } word = NULL; - blank = NULL; for(unsigned int i = 0, limit = tmpword.size(); i != limit; i++) { diff --git a/apertium/postchunk.h b/apertium/postchunk.h index 5a92234..f10d29f 100644 --- a/apertium/postchunk.h +++ b/apertium/postchunk.h @@ -33,6 +33,7 @@ #include #include #include +#include using namespace std; @@ -54,8 +55,8 @@ private: xmlDoc *doc; xmlNode *root_element; InterchunkWord **word; - string **blank; - int lword, lblank; + queue blank_queue; + int lword; Buffer input_buffer; vector tmpword; vector tmpblank; diff --git a/apertium/transfer.cc b/apertium/transfer.cc index 8109073..f20af4b 100644 --- a/apertium/transfer.cc +++ b/apertium/transfer.cc @@ -51,9 +51,7 @@ Transfer::destroy() Transfer::Transfer() : word(0), -blank(0), lword(0), -lblank(0), last_lword(0), output(0), any_char(0), @@ -72,7 +70,6 @@ nwords(0) internal_null_flush = false; trace = false; trace_att = false; - emptyblank = ""; in_lu = false; in_let_var = false; } @@ -472,11 +469,15 @@ Transfer::evalString(xmlNode *element) return ti.getContent(); case ti_b: - if(ti.getPos() >= 0 && checkIndex(element, ti.getPos(), lblank)) + if(!blank_queue.empty()) { - return !blank?"":*(blank[ti.getPos()]); + string retblank = blank_queue.front(); + blank_queue.pop(); + + return retblank; } - else { + else + { return " "; } break; @@ -588,15 +589,7 @@ Transfer::evalString(xmlNode *element) } else if(!xmlStrcmp(element->name, (const xmlChar *) "b")) { - if(element->properties == NULL) - { - evalStringCache[element] = TransferInstr(ti_b, " ", -1); - } - else - { - int pos = atoi((const char *) element->properties->children->content) - 1; - evalStringCache[element] = TransferInstr(ti_b, "", pos); - } + evalStringCache[element] = TransferInstr(ti_b, " ", -1); } else if(!xmlStrcmp(element->name, (const xmlChar *) "get-case-from")) { @@ -887,6 +880,15 @@ Transfer::processOut(xmlNode *localroot) } } } + + while(!blank_queue.empty()) //flush remaining blanks that are not spaces + { + if(blank_queue.front().compare(" ") != 0) + { + fputws_unlocked(UtfConverter::fromUtf8(blank_queue.front()).c_str(), output); + } + blank_queue.pop(); + } } string @@ -1427,15 +1429,8 @@ Transfer::processCallMacro(xmlNode *localroot) myword = new TransferWord *[npar]; std::fill(myword, myword+npar, (TransferWord *)(0)); } - string **myblank = NULL; - if(npar > 0) - { - myblank = new string *[npar]; - myblank[npar-1] = &emptyblank; - } - + int idx = 0; - int lastpos = 0; for(xmlNode *i = localroot->children; npar && i != NULL; i = i->next) { if(i->type == XML_ELEMENT_NODE) @@ -1446,23 +1441,12 @@ Transfer::processCallMacro(xmlNode *localroot) } int pos = atoi((const char *) i->properties->children->content)-1; myword[idx] = word[pos]; - if(idx-1 >= 0) - { - if(lastpos + 1 > lblank) { // if a 1-pattern rule calls macro with same - noblank = ""; // param twice the blank array will be empty - myblank[idx-1] = &noblank; - } - else { - myblank[idx-1] = blank[lastpos]; - } - } + idx++; - lastpos = pos; } } swap(myword, word); - swap(myblank, blank); swap(npar, lword); for(xmlNode *i = macro->children; i != NULL; i = i->next) @@ -1474,11 +1458,9 @@ Transfer::processCallMacro(xmlNode *localroot) } swap(myword, word); - swap(myblank, blank); swap(npar, lword); delete[] myword; - delete[] myblank; } int @@ -2615,20 +2597,22 @@ Transfer::applyRule() word = new TransferWord *[limit]; std::fill(word, word+limit, (TransferWord *)(0)); lword = limit; - if(limit != 1) - { - blank = new string *[limit - 1]; - lblank = limit - 1; - } - else - { - blank = NULL; - lblank = 0; - } } else { - blank[i-1] = new string(UtfConverter::toUtf8(*tmpblank[i-1])); + if(int(blank_queue.size()) < last_lword - 1) + { + string blank_to_add = string(UtfConverter::toUtf8(*tmpblank[i-1])); + + if(!blank_to_add.empty()) + { + blank_queue.push(blank_to_add); + } + else + { + blank_queue.push(" "); + } + } } pair tr; @@ -2767,17 +2751,7 @@ Transfer::applyRule() } delete[] word; } - if(blank) - { - for(unsigned int i = 0; i != limit - 1; i++) - { - delete blank[i]; - blank[i] = 0; - } - delete[] blank; - } word = NULL; - blank = NULL; tmpword.clear(); tmpblank.clear(); ms.init(me->getInitial()); diff --git a/apertium/transfer.h b/apertium/transfer.h index 6a5b06a..9f6c026 100644 --- a/apertium/transfer.h +++ b/apertium/transfer.h @@ -34,6 +34,7 @@ #include #include #include +#include using namespace std; @@ -55,10 +56,9 @@ private: xmlDoc *doc; xmlNode *root_element; TransferWord **word; - string **blank; - int lword, lblank; + queue blank_queue; + int lword; int last_lword; - string noblank = ""; Buffer input_buffer; vector tmpword; vector tmpblank; diff --git a/tests/data/apertium-nno-nob.nno-nob.t2x b/tests/data/apertium-nno-nob.nno-nob.t2x new file mode 100644 index 0000000..c97adf2 --- /dev/null +++ b/tests/data/apertium-nno-nob.nno-nob.t2x @@ -0,0 +1,246 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/apertium-nno-nob.nno-nob.t3x b/tests/data/apertium-nno-nob.nno-nob.t3x index df91750..7bc0d2b 100644 --- a/tests/data/apertium-nno-nob.nno-nob.t3x +++ b/tests/data/apertium-nno-nob.nno-nob.t3x @@ -17,6 +17,15 @@ + + + + + + + + + @@ -177,5 +186,63 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/apertium-nno-nob.nob-nno.t1x b/tests/data/apertium-nno-nob.nob-nno.t1x index 1de6011..e84d857 100644 --- a/tests/data/apertium-nno-nob.nob-nno.t1x +++ b/tests/data/apertium-nno-nob.nob-nno.t1x @@ -12,6 +12,12 @@ + + + + + + @@ -99,13 +105,12 @@ - - + @@ -113,7 +118,7 @@ - + @@ -144,5 +149,62 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/bincompat.t2x.bin b/tests/data/bincompat.t2x.bin new file mode 100644 index 0000000..5cde7a7 Binary files /dev/null and b/tests/data/bincompat.t2x.bin differ diff --git a/tests/data/nno-nob.t2x.bin b/tests/data/nno-nob.t2x.bin new file mode 100644 index 0000000..a437354 Binary files /dev/null and b/tests/data/nno-nob.t2x.bin differ diff --git a/tests/interchunk/__init__.py b/tests/interchunk/__init__.py new file mode 100644 index 0000000..14ff9c0 --- /dev/null +++ b/tests/interchunk/__init__.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import unittest + +from subprocess import Popen, PIPE, call + +import signal + + +class Alarm(Exception): + pass + + +class InterchunkTest(unittest.TestCase): + """Subclass and override inputs/expectedOutputs (and possibly other +stuff) to create new interchunk tests.""" + + bindata = "data/nno-nob.t2x.bin" + t2xdata = "data/apertium-nno-nob.nno-nob.t2x" + flags = ["-z"] + inputs = [""] + expectedOutputs = [""] + expectedRetCodeFail = False + + def alarmHandler(self, signum, frame): + raise Alarm + + def withTimeout(self, seconds, cmd, *args, **kwds): + signal.signal(signal.SIGALRM, self.alarmHandler) + signal.alarm(seconds) + ret = cmd(*args, **kwds) + signal.alarm(0) # reset the alarm + return ret + + def communicateFlush(self, string): + self.proc.stdin.write(string.encode('utf-8')) + self.proc.stdin.write(b'\0') + self.proc.stdin.flush() + + output = [] + char = None + try: + char = self.withTimeout(2, self.proc.stdout.read, 1) + except Alarm: + pass + while char and char != b'\0': + output.append(char) + try: + char = self.withTimeout(2, self.proc.stdout.read, 1) + except Alarm: + break # send what we got up till now + + return b"".join(output).decode('utf-8') + + def compile(self): + compileCmd = ["../apertium/apertium-preprocess-transfer", + self.t2xdata, + self.bindata] + self.assertEqual(call(compileCmd), + 0) + + def runTest(self): + self.compile() + try: + cmd = ["../apertium/apertium-interchunk"] \ + + self.flags \ + + [self.t2xdata, self.bindata] + self.proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE) + + for inp, exp in zip(self.inputs, self.expectedOutputs): + self.assertEqual(self.communicateFlush(inp+"[][\n]"), + exp+"[][\n]") + + self.proc.communicate() # let it terminate + self.proc.stdin.close() + self.proc.stdout.close() + self.proc.stderr.close() + retCode = self.proc.poll() + if self.expectedRetCodeFail: + self.assertNotEqual(retCode, 0) + else: + self.assertEqual(retCode, 0) + + finally: + pass + + +class SimpleInterchunkTest(InterchunkTest): + inputs = ["^prn{^ho$}$ ^prn2{^ho$}$", + "^vblex{^gå$}$^default{^.$}$"] + expectedOutputs = ["^prn2{^ho$}$^prn{^ho$}$", + "^vblex{^gå$}$^default{^.$}$"] + +class WordboundBlankTest(InterchunkTest): + inputs = ["^n_n{[[t:b:123456]]^worda$ ;[testblank] [[t:s:xyzab12]]^wordb# xyz$}$", + "^prn{[[t:b:abc823]]^ho$}$ ^prn2{[[t:i:poa023; t:span:12xas23]]^ho$}$"] + expectedOutputs = ["^n_n{[[t:b:123456]]^worda$ ;[testblank] [[t:s:xyzab12]]^wordb# xyz$}$", + "^prn2{[[t:i:poa023; t:span:12xas23]]^ho$}$^prn{[[t:b:abc823]]^ho$}$"] + +class SuperblankTest(InterchunkTest): + inputs = [ "[blank1];; ^test1{^worda$}$ ;[blank2] ^test2{^wordb# xyz$}$ ;[blank3]; ^test3{^wordc# xyz$}$ [blank4];;", #superblankrule1 + "[blank1];; ^test1{^worda$}$ ;[blank2] ^test2{^wordb# xyz$}$ ;[blank3]; ^test3{^wordc# xyz$}$ [blank4];;", #Blanks when no rules match + "[blank1];; ^test2{^worda$}$ ;[blank2] ^test2{^wordb# xyz$}$ ;[blank3]; ^test3{^wordc# xyz$}$ [blank4];;", #superblankrule2 -> When output rule has more than input blanks, print all then spaces + "[blank1];; ^test3{^worda$}$ ;[blank2] ^test2{^wordb# xyz$}$ ;[blank3]; ^test1{^wordc# xyz$}$ [blank4];;", #superblankrule3 -> Output rule has no , flush all blanks after rule output + "[blank1];; ^test1{^worda$}$ ;[blank2] ^test3{^wordb# xyz$}$ ;[blank3]; ^test2{^wordc# xyz$}$ [blank4];;", #superblankrule4 -> Output rule has one , print one blank, then flush all after rule output + "[blank1];; ^test1{^worda$}$ ;[blank2] ^test2{^wordb# xyz$}$ ;[blank3]; ^test3{^wordc# xyz$}$ [blank4];; ^test1{^worda$}$ ;[blank5] ^test3{^wordb# xyz$}$ ;[blank6]; ^test2{^wordc# xyz$}$ [blank7];;", #Multiple matching rules -> superblankrule1 & superblankrule4 + "[blank1];; ^test1{^worda$}$ ;[blank2] ^test2{^wordb# xyz$}$ ;[blank3]; ^test2x{^wordc# xyz$}$ [blank4];; ^test2{^wordb# xyz$}$ ;[blank5];"] #Rule followed by unknown + + expectedOutputs = [ "[blank1];; ^test2{^wordb# xyz$}$ ;[blank2] ^test1{^worda$}$ ;[blank3]; ^test3{^wordc# xyz$}$ [blank4];;", + "[blank1];; ^test1{^worda$}$ ;[blank2] ^test2{^wordb# xyz$}$ ;[blank3]; ^test3{^wordc# xyz$}$ [blank4];;", + "[blank1];; ^test2{^wordb# xyz$}$ ;[blank2] ^test2{^worda$}$ ;[blank3]; ^test2{^wordb# xyz$}$ ^test2{^worda$}$ ^test3{^wordc# xyz$}$ [blank4];;", + "[blank1];; ^test2{^wordb# xyz$}$^test3{^worda$}$^test1{^wordc# xyz$}$ ;[blank2] ;[blank3]; [blank4];;", + "[blank1];; ^test3{^wordb# xyz$}$^test1{^worda$}$ ;[blank2] ^test2{^wordc# xyz$}$ ;[blank3]; [blank4];;", + "[blank1];; ^test2{^wordb# xyz$}$ ;[blank2] ^test1{^worda$}$ ;[blank3]; ^test3{^wordc# xyz$}$ [blank4];; ^test3{^wordb# xyz$}$^test1{^worda$}$ ;[blank5] ^test2{^wordc# xyz$}$ ;[blank6]; [blank7];;", + "[blank1];; ^test2{^wordb# xyz$}$ ;[blank2] ^test1{^worda$}$ ;[blank3]; ^test2x{^wordc# xyz$}$ [blank4];; ^test2{^wordb# xyz$}$ ;[blank5];"] + + +class BincompatTest(SimpleInterchunkTest): + bindata = "data/bincompat.t2x.bin" + + def compile(self): + pass diff --git a/tests/postchunk/__init__.py b/tests/postchunk/__init__.py index e83de7f..fa2e49d 100644 --- a/tests/postchunk/__init__.py +++ b/tests/postchunk/__init__.py @@ -114,6 +114,25 @@ class WordboundBlankTest(PostchunkTest): class SingleLUWordboundBlankTest(PostchunkTest): inputs = ["^thing_wb{^[[t:i:xyzabc]]thing$}$ ^n_n{[[t:b:123456]]^worda$ ;[testblank] [[t:s:xyzab12]]^wordb# xyz$}$ [blanks] ^thing_wb{^[[t:i:xyzabc]]thing$}$ [blankx] ^vblex{[[t:b:123zbc]]^gå$}$^default{^.$}$ [blanks3] ^thing{^[[t:i:xyzabc]]thing$}$"] expectedOutputs = ["[[t:i:xyzabc]]^newthing$ [[t:i:xyzabc]]^thing$ [[t:i:xyzabc]]^thing+newpr$ [[t:s:xyzab12]]^wordb# xyz$ ;[testblank] [[t:b:123456]]^worda$ [[t:b:123456; t:s:xyzab12]]^worda+wordb# xyz$ [blanks] [[t:i:xyzabc]]^newthing$ [[t:i:xyzabc]]^thing$ [[t:i:xyzabc]]^thing+newpr$ [blankx] [[t:b:123zbc]]^gå$^.$ [blanks3] [[t:i:xyzabc]]^thing$"] + +class SuperblankTest(PostchunkTest): + inputs = [ "[blank1];; ^n_n{^worda$ ;[blank2] [blank2.1]; ^wordb# xyz$}$ ;[blank3]; ", + "[blank1];; ^n_k{^worda$ ;[blank2] ^wordb# xyz$}$ ;[blank3]; ", #Blanks when no rules match + "[blank1];; ^n_n2{^worda$ ;[blank2] ^wordb# xyz$ ;[blank3]; ^wordc$}$ ;[blank4]; ", #superblank rule 1 -> When output rule has more than input blanks, print all then spaces + "[blank1];; ^n_n3{^worda$ ;[blank2] ^wordb# xyz$ ;[blank3]; ^wordc$}$ ;[blank4]; ", #superblank rule 2 -> Output rule has no , flush all blanks after rule output + "[blank1];; ^n_n4{^worda$ ;[blank2] ^wordb# xyz$ ;[blank3]; ^wordc$}$ ;[blank4]; ", #superblank rule 3 -> Output rule has one , print one blank, then flush all after rule output + "[blank1];; ^n_n{^worda$ ;[blank2] ^wordb# xyz$}$ ;[blank3]; ^n_n4{^worda$ ;[blank4] ^wordb# xyz$ ;[blank5]; ^wordc$}$ ;[blank6]; ", #Multiple matching rules + "[blank1];; ^n_n2{^worda$ ;[blank2] ^wordb# xyz$ ;[blank3]; ^wordc$}$ ;[blank4]; ^n_k{^worda$}$ ;[blank5]" ] #Matching rule followed by unknown word + + expectedOutputs = [ "[blank1];; ^wordb# xyz$ ;[blank2] [blank2.1]; ^worda$ ^worda+wordb# xyz$ ;[blank3]; ", + "[blank1];; ^worda$ ;[blank2] ^wordb# xyz$ ;[blank3]; ", + "[blank1];; ^wordb# xyz$ ;[blank2] ^worda$ ;[blank3]; ^worda+wordb# xyz$ ^wordc$ ;[blank4]; ", + "[blank1];; ^wordb# xyz$^worda$^worda+wordb# xyz$^wordc$ ;[blank2] ;[blank3]; ;[blank4]; ", + "[blank1];; ^wordb# xyz$^worda$ ;[blank2] ^worda+wordb# xyz$^wordc$ ;[blank3]; ;[blank4]; ", + "[blank1];; ^wordb# xyz$ ;[blank2] ^worda$ ^worda+wordb# xyz$ ;[blank3]; ^wordb# xyz$^worda$ ;[blank4] ^worda+wordb# xyz$^wordc$ ;[blank5]; ;[blank6]; ", + "[blank1];; ^wordb# xyz$ ;[blank2] ^worda$ ;[blank3]; ^worda+wordb# xyz$ ^wordc$ ;[blank4]; ^worda$ ;[blank5]" ] + + class BincompatTest(SimplePostchunkTest): bindata = "data/bincompat.t3x.bin" diff --git a/tests/run_tests.py b/tests/run_tests.py index 96fc24d..5930e63 100755 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -8,6 +8,7 @@ import unittest import tagger import pretransfer import transfer +import interchunk import postchunk import adaptdocx @@ -17,6 +18,7 @@ if __name__ == "__main__": for module in [tagger, pretransfer, transfer, + interchunk, postchunk, adaptdocx]: suite = unittest.TestLoader().loadTestsFromModule(module) diff --git a/tests/transfer/__init__.py b/tests/transfer/__init__.py index 805b0f1..d92b58d 100644 --- a/tests/transfer/__init__.py +++ b/tests/transfer/__init__.py @@ -97,12 +97,23 @@ class SlLemqTest(TransferTest): class WordboundBlankTest(TransferTest): inputs = ["[blank1] [[t:s:123456]]^worda/wordta$ ;[blank2]; [[t:b:xyz123; t:l:xyz347]]^wordb/wordtb$ [blank3]; [[t:i:abc123; t:s:abc123]]^hun/ho$"] - expectedOutputs = ["[blank1] ^prn{[[t:i:abc123; t:s:abc123]]^ho$ [[t:b:xyz123; t:l:xyz347]]^wordtb$}$ ;[blank2]; ^det{[[t:s:123456; t:i:abc123; t:s:abc123]]^wordta+ho$}$ [blank3]; "] + expectedOutputs = ["[blank1] ^prn{[[t:i:abc123; t:s:abc123]]^ho$[[t:b:xyz123; t:l:xyz347]]^wordtb$}$ ;[blank2]; ^det{[[t:s:123456; t:i:abc123; t:s:abc123]]^wordta+ho$}$ [blank3]; "] class SingleLUWordboundBlankTest(TransferTest): inputs = ["[blank1] [[t:s:123456]]^worda/wordta$ ;[blank2]; [[t:b:xyz123; t:l:xyz347]]^wordb/wordtb$ [blank3]; "] expectedOutputs = ["[blank1] ^nacr{[[t:s:123456]]^test$ [[t:s:123456]]^wordta$}$ ^nacr2{[[t:s:123456]]^testlem$ [[t:s:123456]]^wordta+postp$}$ ;[blank2]; ^nacr{[[t:b:xyz123; t:l:xyz347]]^test$ [[t:b:xyz123; t:l:xyz347]]^wordtb$}$ ^nacr2{[[t:b:xyz123; t:l:xyz347]]^testlem$ [[t:b:xyz123; t:l:xyz347]]^wordtb+postp$}$ [blank3]; "] +class SuperblankTest(TransferTest): + inputs = [ "[blank1] ^worda/wordta$ ;[blank2]; ^wordb/wordtb$ [blank3]; ^hun/ho$ [blank4] ", #Rule: superblankrule1 -> No in rule output, should flush all blanks after rule output + "[blank1] ^worda/wordta$ ;[blank2]; ^wordb/wordtb$ [blank3]; ^hun/ho$ [blank4] ", #No rule matches, should print all blanks as is + "[blank1] ^wordb/wordtb$ ;[blank2]; ^worda/wordta$ [blank3]; ^hun/ho$ [blank4] ", #Rule: superblankrule2 -> One in rule output, should print one and flush the rest + "[blank1] ^hun/ho$ ;[blank2]; ^worda/wordta$ [blank3]; [blank4] ", #Rule: superblankrule3 -> Input rule has 1 blank, output has 3, should print input blank for the first and just spaces for the rest + "[blank1] ^hun/ho$ ;[blank2]; ^worda/wordta$ [blank3]; ^wordb/wordtb$ ;[blank4]; ^worda/wordta$ [blank5]; ^hun/ho$ [blank6] "] #Multiple rule matches + expectedOutputs = [ "[blank1] ^test1{^wordta$^wordtb$^ho$}$ ;[blank2]; [blank3]; [blank4] ", + "[blank1] ^default{^wordta$}$ ;[blank2]; ^default{^wordtb$}$ [blank3]; ^default{^ho$}$ [blank4] ", + "[blank1] ^test1{^wordta$ ;[blank2]; ^ho$}$ [blank3]; [blank4] ", + "[blank1] ^test1{^ho$ ;[blank2]; ^wordta$ ^ho$ ^wordta$}$ [blank3]; [blank4] ", + "[blank1] ^test1{^ho$ ;[blank2]; ^wordta$ ^ho$ ^wordta$}$ [blank3]; ^test1{^wordta$ ;[blank4]; ^ho$}$ [blank5]; [blank6] "] class BincompatTest(BasicTransferTest): bindata = "data/bincompat.t1x.bin"