commit f5697bcd8704afc6ec571590fda82e5a76a1c909 Author: Tanmai Khanna Date: Wed Jul 15 17:30:00 2020 +0530 Fix wordbound blank merging | Add tests for wordbound blanks diff --git a/src/lsx_processor.cc b/src/lsx_processor.cc index 0163aa4..e50edba 100644 --- a/src/lsx_processor.cc +++ b/src/lsx_processor.cc @@ -293,27 +293,28 @@ LSXProcessor::processWord(FILE* input, FILE* output) if(wblank.empty()) { - for(size_t j = 0; j < out_lus.size(); j++) + bool first_bound_blank = true; + for(size_t j = 0; j < bound_blank_queue.size()-1; j++) //ignore last bound_blank as it belongs to word after matched pattern { if(bound_blank_queue[j].size() > 0) { - if(j == 0) + if(first_bound_blank) { wblank += L"[["; + first_bound_blank = false; } - else if(j > 0) + else { wblank += L"; "; } wblank += bound_blank_queue[j].c_str(); - - if(j == out_lus.size() - 1) - { - wblank += L"]]"; - } } } + if(!first_bound_blank) //if there were any wordbound blanks + { + wblank += L"]]"; + } fputws_unlocked(wblank.c_str(), output); } diff --git a/tests/lsx_proc/__init__.py b/tests/lsx_proc/__init__.py index 65fe3b2..d3acaf5 100644 --- a/tests/lsx_proc/__init__.py +++ b/tests/lsx_proc/__init__.py @@ -8,8 +8,24 @@ from proctest import ProcTest class nullFlushTest(unittest.TestCase, ProcTest): procdix = "data/short-example.dix" procflags = ["-z"] - inputs = ["^take$ ^it$ ^out$","^take$ ^me$ ^out$"] - expectedOutputs = ["^take# out$ ^it$","^take# out$ ^me$"] - + inputs = ["^take$ ^it$ ^out$", + "^take$ ^me$ ^out$", + "^the$ !!^Aragonese$ ;^take$ ;.^it$ !;^out$ ^a$", #out of LU characters test + "^the$ []^Aragonese$[] []^take$ []^it$ []^out$ []^a$[]", #normal blanks test + "^the$ [[t:b:123456]]^Aragonese$ [[t:s:abc123]]^take$ [[t:b:xyz567]]^it$ [[t:p:yui124]]^out$ [[t:b:uvw674]]^a$", #wordbound blank tests + "^the$ [[t:b:123456]]^Aragonese$ [[t:s:abc123; t:p:hgb650]]^take$ ^it$ [[t:p:yui124]]^out$ [[t:b:uvw674]]^a$", + "^the$ [[t:b:123456]]^Aragonese$ ^take$ [[t:b:xyz567]]^it$ ^out$ [[t:b:uvw674]]^a$", + "^the$ [[t:b:123456]]^Aragonese$ ^take$ ^it$ [[t:p:yui124]]^out$ [[t:b:uvw674]]^a$", + "^the$ [[t:b:123456]]^Aragonese$ [[t:s:abc123; t:p:hgb650]]^take$ ^it$ [[t:p:yui124; t:x:puhbj23]]^out$ [[t:b:uvw674]]^a$"] + + expectedOutputs = ["^take# out$ ^it$", + "^take# out$ ^me$", + "^the$ !!^Aragonese$ ;^take# out$ ;.^it$ !; ^a$", + "^the$ []^Aragonese$[] []^take# out$ []^it$ [] []^a$[]", + "^the$ [[t:b:123456]]^Aragonese$ [[t:s:abc123; t:b:xyz567; t:p:yui124]]^take# out$ [[t:s:abc123; t:b:xyz567; t:p:yui124]]^it$ [[t:b:uvw674]]^a$", + "^the$ [[t:b:123456]]^Aragonese$ [[t:s:abc123; t:p:hgb650; t:p:yui124]]^take# out$ [[t:s:abc123; t:p:hgb650; t:p:yui124]]^it$ [[t:b:uvw674]]^a$", + "^the$ [[t:b:123456]]^Aragonese$ [[t:b:xyz567]]^take# out$ [[t:b:xyz567]]^it$ [[t:b:uvw674]]^a$", + "^the$ [[t:b:123456]]^Aragonese$ [[t:p:yui124]]^take# out$ [[t:p:yui124]]^it$ [[t:b:uvw674]]^a$", + "^the$ [[t:b:123456]]^Aragonese$ [[t:s:abc123; t:p:hgb650; t:p:yui124; t:x:puhbj23]]^take# out$ [[t:s:abc123; t:p:hgb650; t:p:yui124; t:x:puhbj23]]^it$ [[t:b:uvw674]]^a$"] # These fail on some systems: #from null_flush_invalid_stream_format import *