Index: branches/apertium-separable/src/lsx_FSTProcessor.cc =================================================================== --- branches/apertium-separable/src/lsx_FSTProcessor.cc (revision 81566) +++ branches/apertium-separable/src/lsx_FSTProcessor.cc (revision 81569) @@ -1198,16 +1198,12 @@ // wcout << L"SPACE"; // } - if((val == L'^' && isEscaped(val) && outOfWord) || feof(input)) { - outOfWord = false; blankqueue.push(blank); if(alive_states.size() == 0) { - if(!finalFound) - { if(blankqueue.size() > 0) { fputws(blankqueue.front().c_str(), output); @@ -1216,7 +1212,6 @@ } alive_states.push_back(*initial_state); -// wcerr << endl << L"" << in << L"" << endl; for(int i=0; i < (int) in.size(); i++) { if(in[i] == L'$' && in[i+1] == L'^' && blankqueue.size() > 0) @@ -1225,15 +1220,19 @@ blankqueue.pop(); } } + finalFound = false; fputws(in.c_str(), output); fflush(output); + in = L""; //****** } + else if(finalFound && alive_states.size() == 1) + { finalFound = false; - in = L""; } blank = L""; in += val; + outOfWord = false; continue; } @@ -1257,7 +1256,7 @@ if(s.size() > 0) { new_states.push_back(s); - } + } /* if(s.isFinal(all_finals)) { @@ -1265,17 +1264,15 @@ new_states.push_back(*initial_state); }*/ - if(s.isFinal(all_finals)) + if(s.isFinal(all_finals)) { new_states.clear(); out = s.filterFinals(all_finals, alphabet, escaped_chars); -// wcerr << endl << L"" << out << L"" << endl; -// wcerr << endl << L"" << s.size() << L"" << endl; new_states.push_back(*initial_state); - finalFound = true; - for (int i=0; i < (int) out.size(); i++) + int out_size = out.size(); + for (int i=0; i < out_size; i++) { wchar_t c = out[i]; if(c == L'/') @@ -1282,11 +1279,12 @@ { out[i] = L'^'; } - else if(c == L'$' && out[i-1] == L'<' && out[i+1] == L'>') + else if(c == L'$' && out[i-1] == L'<' && out[i+1] == L'>') // indexing { - // out.erase(i+1, 1); out[i+1] = L'^'; out.erase(i-1, 1); + out_size--; + i--; } } if(out[out.length()-1] == L'^') @@ -1293,9 +1291,9 @@ { out = out.substr(0, out.length()-1); // extra ^ at the end } - else + else // take# out ... of { - for(int i=out.length()-1; i>=0; i--) + for(int i=out.length()-1; i>=0; i--) // indexing { if(out[i] == L'$') { @@ -1305,28 +1303,45 @@ } out += L'$'; } + if(blankqueue.size() > 0) { fputws(blankqueue.front().c_str(), output); blankqueue.pop(); } - for(int i=0; i < (int) out.length(); i++) + + out_size = out.size(); + for(int i=0; i < out_size; i++) // indexing { if((out[i] == L'$' || out[i] == L'#') && blankqueue.size() > 0) { out.insert(i+1, blankqueue.front().c_str()); + out_size += blankqueue.front().size(); blankqueue.pop(); } + else if(out[i] == L' ' && blankqueue.size() > 0) + { + out.insert(i+1, blankqueue.front().c_str()); + out.erase(i,1); + out_size += (blankqueue.front().size() - 1); + blankqueue.pop(); } + } fputws(out.c_str(), output); flushBlanks(output); + finalFound = true; + out = L""; + in = L""; } } alive_states.swap(new_states); outOfWord = true; - in += val; + if(!finalFound) + { + in += val; //do not remove + } continue; } @@ -1335,12 +1350,12 @@ if(val == L'<') // tag { wstring tag = readFullBlock(input, L'<', L'>'); + in += tag; if(!alphabet.isSymbolDefined(tag)) { alphabet.includeSymbol(tag); } val = static_cast(alphabet(tag)); - in += tag; } else { @@ -1348,21 +1363,17 @@ } new_states.clear(); - wstring res = L""; for(vector::const_iterator it = alive_states.begin(); it != alive_states.end(); it++) { - res = L""; State s = *it; if(val < 0) { - fflush(output); s.step_override(val, alphabet(L""), val); } else if(val > 0) { - fflush(output); int val_lowercase = towlower(val); - s.step_override(val_lowercase, alphabet(L""), val); // FIXME deal with cases! + s.step_override(val_lowercase, alphabet(L""), val); // FIXME deal with cases! in step_override } if(s.size() > 0) @@ -1378,7 +1389,6 @@ } - void FSTProcessor::tm_analysis(FILE *input, FILE *output) {