Index: branches/apertium-separable/src/lsx_processor.cc =================================================================== --- branches/apertium-separable/src/lsx_processor.cc (revision 81014) +++ branches/apertium-separable/src/lsx_processor.cc (revision 81018) @@ -86,7 +86,9 @@ vector new_states; vector alive_states; - list blanks; + list blankqueue; + blankqueue.push_back(L""); + wstring blank; alive_states.push_back(*initial_state); @@ -96,7 +98,6 @@ bool outOfWord = true; bool isEscaped = false; bool finalFound = false; - bool leading = false; wstring in = L""; wstring out; @@ -109,7 +110,6 @@ alive_states.push_back(*initial_state); fputws(in.c_str(), output); in = L""; - leading = true; } else if(alive_states.size() == 0 && finalFound) { @@ -117,12 +117,21 @@ finalFound = false; } - if((val == L'^' && !isEscaped && outOfWord)) + if((val == L'^' && !isEscaped && outOfWord) || feof(input)) { outOfWord = false; + blankqueue.push_back(blank); + blank = L""; + fputws(blankqueue.front().c_str(), output); + blankqueue.pop_front(); in += val; continue; } + if(outOfWord) + { + blank += val; + continue; + } if((feof(input) || val == L'$') && !isEscaped && !outOfWord) { @@ -178,7 +187,8 @@ } else if(val > 0) { - s.step_override(val, alphabet(L""), val); // FIXME deal with cases! + int val_lowercase = towlower(val); + s.step_override(val_lowercase, alphabet(L""), val); // FIXME deal with cases! } if(s.size() > 0) @@ -190,7 +200,6 @@ { out = s.filterFinals(anfinals, alphabet, escaped_chars); new_states.push_back(*initial_state); - finalFound = true; for (int i=0; i < (int) out.size(); i++) @@ -209,10 +218,6 @@ // } } out = out.substr(0, out.length()-3); // remove extra trailing '$ ^' : '^ ' is excess, '$' will be added in the next loop with fputws(in,output) - /* FIXME another hack */ - // if(leading) { - // fputwc(L' ', output); - // } fputws(out.c_str(), output); } } @@ -234,13 +239,13 @@ // { // wstring blank = L""; // blank += static_cast(val); - // blanks.push_back(blank); + // blankqueue.push_back(blank); // // wcout << "b" << blank << "b"; // } // else if(val == L'[') // tag // { // wstring blank = readFullBlock(input, L'[', L']'); - // blanks.push_back(blank); + // blankqueue.push_back(blank); // wcout << "b"<< blank<<"B"; // } // FIXME anything between $ and ^ @@ -250,12 +255,12 @@ continue; // } - if(blanks.size() > 0) - { - // wcout << blanks.front(); - blanks.pop_front(); + // if(blankqueue.size() > 0) + // { + // // wcout << blankqueue.front(); + // blankqueue.pop_front(); + // } } - } else { wcerr << L"outOfWord error" << endl; @@ -262,5 +267,15 @@ } } + + // wcout << endl << endl << L"bq size: " << blankqueue.size() << endl; + // for (auto b : blankqueue) + // wcout << b << endl; + + /* flushing rest of the blanks here */ + for (wstring b : blankqueue) + { + fputws(b.c_str(), output); + } return 0; } \ No newline at end of file