commit 61d7c0d8f8e4ab4a5ad33f44db4dea60dc0a2422 Author: Tanmai Khanna Date: Wed Aug 26 17:05:17 2020 +0530 Fix wblank printing in postgeneration for when no rules match diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc index 05a5dcd..9282cf1 100644 --- a/lttoolbox/fst_processor.cc +++ b/lttoolbox/fst_processor.cc @@ -2344,6 +2344,8 @@ FSTProcessor::postgeneration(FILE *input, FILE *output) if(lf == L"") { unsigned int mark = sf.size(); + unsigned int space_index = sf.size(); + for(unsigned int i = 1, limit = sf.size(); i < limit; i++) { if(sf[i] == L'~') @@ -2351,8 +2353,36 @@ FSTProcessor::postgeneration(FILE *input, FILE *output) mark = i; break; } + else if(sf[i] == L' ') + { + space_index = i; + } + } + + if(space_index != sf.size()) + { + fputws_unlocked(sf.substr(1, space_index-1).c_str(), output); + + if(need_end_wblank) + { + fputws_unlocked(L"[[/]]", output); + need_end_wblank = false; + fputwc_unlocked(sf[space_index], output); + flushWblanks(output); + } + else + { + fputwc_unlocked(sf[space_index], output); + } + + fputws_unlocked(sf.substr(space_index+1, mark-space_index-1).c_str(), output); + } + else + { + flushWblanks(output); + fputws_unlocked(sf.substr(1, mark-1).c_str(), output); } - fputws_unlocked(sf.substr(1, mark-1).c_str(), output); + if(mark == sf.size()) { input_buffer.back(1); diff --git a/tests/lt_proc/__init__.py b/tests/lt_proc/__init__.py index 9e8db46..b7dd63d 100644 --- a/tests/lt_proc/__init__.py +++ b/tests/lt_proc/__init__.py @@ -199,7 +199,14 @@ class PostgenerationWordboundBlankEscapingTest(unittest.TestCase, ProcTest): procflags = ["-p", "-z"] inputs = [ "Systran ([[t:a:PJD9GA]]http:\/\/www.systran.de\/[[/]]).[] Systran (http:\/\/www.systran.de\/).[]"] - expectedOutputs = [ "Systran ([[t:a:PJD9GA]]http:\/\/www.systran.de\/[[/]]).[] Systran (http:\/\/www.systran.de\/).[]" ] - + expectedOutputs = [ "Systran ([[t:a:PJD9GA]]http:\/\/www.systran.de\/[[/]]).[] Systran (http:\/\/www.systran.de\/).[]"] + +class PostgenerationWordboundBlankNoRuleMatchTest(unittest.TestCase, ProcTest): + procdix = "data/postgen.dix" + procflags = ["-p", "-z"] + inputs = [ "[[t:span:HIIiRQ]]Complacer[[/]] [[t:span01:HIIiRQ]]~le[[/]] [[t:span02:HIIiRQ]]ayuda[[/]] [[11t:span:HIIiRQ; t:a:_IOHRg]]mejora[[/]] [[22t:span:HIIiRQ; t:a:_IOHRg]]~la[[/]] [[33t:span:HIIiRQ; t:a:_IOHRg]]prenda[[/]]"] + + expectedOutputs = [ "[[t:span:HIIiRQ]]Complacer[[/]] [[t:span01:HIIiRQ]]le[[/]] [[t:span02:HIIiRQ]]ayuda[[/]] [[11t:span:HIIiRQ; t:a:_IOHRg]]mejora[[/]] [[22t:span:HIIiRQ; t:a:_IOHRg]]la[[/]] [[33t:span:HIIiRQ; t:a:_IOHRg]]prenda[[/]]"] + # These fail on some systems: #from null_flush_invalid_stream_format import *