Index: branches/apertium-separable/src/lsx_processor.cc =================================================================== --- branches/apertium-separable/src/lsx_processor.cc (revision 80979) +++ branches/apertium-separable/src/lsx_processor.cc (revision 80980) @@ -104,6 +104,27 @@ while(!feof(input)) { int val = fgetwc(input); + + if(outOfWord) + { + wstring blank = L""; + while(val != L'^' && !feof(input)) + { + blank += val; + val = fgetwc(input); + } + blanks.push_back(blank); + fputws(blanks.front().c_str(),output); + blanks.pop_front(); + } + + if((val == L'^' && !isEscaped && outOfWord)) + { + outOfWord = false; + in += val; + continue; + } + if(alive_states.size() == 0 && !finalFound) { alive_states.push_back(*initial_state); @@ -117,13 +138,6 @@ finalFound = false; } - if((val == L'^' && !isEscaped && outOfWord)) - { - outOfWord = false; - in += val; - continue; - } - if((feof(input) || val == L'$') && !isEscaped && !outOfWord) { new_states.clear(); @@ -208,7 +222,7 @@ // out[i+1] = L'^'; // } } - out = out.substr(0, out.length()-3); // remove extra trailing '$ ^' : '^ ' is excess, '$' will be added in the next loop with fputws(in,output) + // out = out.substr(0, out.length()-3); // remove extra trailing '$ ^' : '^ ' is excess, '$' will be added in the next loop with fputws(in,output) /* FIXME another hack */ // if(leading) { // fputwc(L' ', output); @@ -218,40 +232,41 @@ } alive_states.swap(new_states); } - else if(outOfWord) // FIXME need to deal with superblank stuff - { - // wcout << (wchar_t) val << endl; - if(val == L' ') - { - wstring blank = L""; - blank += static_cast(val); - blanks.push_back(blank); - // wcout << "b" << blank << "b"; - } - else if(val == L'[') // tag - { - wstring blank = readFullBlock(input, L'[', L']'); - blanks.push_back(blank); - wcout << "b"<< blank<<"B"; - } - // FIXME anything between $ and ^ + // else if(outOfWord) // FIXME need to deal with superblank stuff + // { + // + // // wcout << (wchar_t) val << endl; + // if(val == L' ') + // { + // wstring blank = L""; + // blank += static_cast(val); + // blanks.push_back(blank); + // // wcout << "b" << blank << "b"; + // } + // else if(val == L'[') // tag + // { + // wstring blank = readFullBlock(input, L'[', L']'); + // blanks.push_back(blank); + // wcout << "b"<< blank<<"B"; + // } + // // FIXME anything between $ and ^ + // // else + // // { + // // fputwc(val, output); + // // continue; + // // } + // + // if(blanks.size() > 0) + // { + // // wcout << blanks.front(); + // blanks.pop_front(); + // } + // } // else // { - // fputwc(val, output); - // continue; + // wcerr << L"outOfWord error" << endl; // } - - if(blanks.size() > 0) - { - // wcout << blanks.front(); - blanks.pop_front(); } - } - else - { - wcerr << L"outOfWord error" << endl; - } - } /* FIXME removed */ // if (!finalFound)