Index: branches/apertium-separable/src/lsx_processor.cc =================================================================== --- branches/apertium-separable/src/lsx_processor.cc (revision 80775) +++ branches/apertium-separable/src/lsx_processor.cc (revision 80776) @@ -1,19 +1,16 @@ -#include -#include -#include -#include -#include -// #include -#include -#include +// #include +// #include +// #include +// #include +// #include +// #include +// #include -#include #include -#include #include #include +#include #include -#include /* get the text between delim1 and delim2 */ wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2); @@ -93,8 +90,7 @@ escaped_chars.insert(L'<'); escaped_chars.insert(L'>'); - State *initial_state; - initial_state = new State(); + State *initial_state = new State(); initial_state->init(transducer.getInitial()); anfinals.insert(transducer.getFinals().begin(), transducer.getFinals().end()); @@ -110,18 +106,23 @@ bool isEscaped = false; bool finalFound = false; - wstring out; - wstring in; + wstring in, out; while(!feof(input)) { + if(alive_states.size() == 0 && !finalFound) + { + alive_states.push_back(*initial_state); + fputws(in.c_str(), output); + in = L""; + } + int val = fgetwc(input); // read 1 wide char // wcout << L"| " << (wchar_t)val << L" | val: " << val << L" || as.size(): " << alive_states.size() << L" || out of word: " << outOfWord << endl; - if((val == L'^' && !isEscaped && outOfWord) /*|| val == L' '*/) + if((val == L'^' && !isEscaped && outOfWord)) { outOfWord = false; - // wcout << "| continue " << (wchar_t)val << endl; in += val; continue; } @@ -140,8 +141,7 @@ if(s.isFinal(anfinals)) { - wstring out = s.filterFinals(anfinals, alphabet, escaped_chars); - // cout << "FINAL: " << /*out <<*/ endl; + out += s.filterFinals(anfinals, alphabet, escaped_chars); new_states.push_back(*initial_state); } } @@ -161,7 +161,6 @@ } val = static_cast(alphabet(tag)); in += tag; - // wcout << tag << endl; // fwprintf(stderr, L"tag %S: %d\n", tag.c_str(), val); } @@ -196,22 +195,10 @@ { // cout << "finals size: " << s.size() << endl; out = s.filterFinals(anfinals, alphabet, escaped_chars); - // wcout << out << endl;s // wcerr << s.getReadableString(alphabet) << endl; new_states.push_back(*initial_state); finalFound = true; - } - } - alive_states.swap(new_states); - } - if(outOfWord) - { - continue; - } - } - if(finalFound) - { for (int i=0; i < (int) out.size(); i++) { wchar_t c = out[i]; @@ -226,16 +213,47 @@ out[i+1] = L'^'; } } - out = out.substr(0, out.length()-1); + out = out.substr(0, out.length()-2); // remove extra trailing ' ^ + fputwc(L' ', output); + fputws(out.c_str(), output); } - else + } + alive_states.swap(new_states); + } + + if(outOfWord) { - // out.assign(in); + continue; + } + } + + // if(finalFound) + // { + // for (int i=0; i < (int) out.size(); i++) + // { + // wchar_t c = out[i]; + // if(c == L'/') + // { + // out[i] = L'^'; + // } + // else if(c == L'$') + // { + // out[i-1] = '$'; + // out[i] = L' '; + // out[i+1] = L'^'; + // } + // } + // out = out.substr(0, out.length()-2); // remove extra trailing ' ^' + // } + // else + if (!finalFound) + { out = in; + // wcout << out << endl; // wcout << "equals? " << (out==in); } // wcout << out.c_str() << endl; - fputws(out.c_str(), output); + // fputws(out.c_str(), output); fputwc(L'\n', output); return 0; } \ No newline at end of file