Index: branches/apertium-separable/src/lsx_processor.cc =================================================================== --- branches/apertium-separable/src/lsx_processor.cc (revision 81010) +++ branches/apertium-separable/src/lsx_processor.cc (revision 81011) @@ -3,12 +3,9 @@ #include #include #include -#include +/* get the text between delim1 and delim2 */ wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2); -// wchar_t readEscaped(FILE *input); -// void streamError(); -void flushBlanks(FILE *output); wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2) @@ -16,36 +13,16 @@ wstring result = L""; result += delim1; wchar_t c = delim1; + while(!feof(input) && c != delim2) { c = static_cast(fgetwc(input)); result += c; - if(c != L'\\') - { - continue; } - // else - // { - // result += static_cast(readEscaped(input)); - // } - } - // if(c != delim2) - // { - // streamError(); - // } + return result; } -// void -// flushBlanks(FILE *output) -// { -// for(unsigned int i = blankqueue.size(); i > 0; i--) -// { -// fputws_unlocked(blankqueue.front().c_str(), output); -// blankqueue.pop(); -// } -// } - int main (int argc, char** argv) { if(argc != 2) @@ -56,7 +33,6 @@ Alphabet alphabet; TransExe transducer; - FSTProcessor fstp; LtLocale::tryToSetLocale(); @@ -110,7 +86,7 @@ vector new_states; vector alive_states; - list blankqueue; + list blanks; alive_states.push_back(*initial_state); @@ -120,6 +96,7 @@ bool outOfWord = true; bool isEscaped = false; bool finalFound = false; + bool leading = false; wstring in = L""; wstring out; @@ -127,37 +104,12 @@ while(!feof(input)) { int val = fgetwc(input); - - if(outOfWord) - { - // wstring blank = L""; - // while(val != L'^' && !feof(input)) - // { - // blank += val; - // val = fgetwc(input); - // } - // blankqueue.push_back(blank); - // fputwc(val, output); - // fputws(blankqueue.front().c_str(),output); - // fflush(output); - // blankqueue.pop_front(); - // outOfWord = false; - // continue; - // fputwc(val,output); - } - if(val == L'^' && !isEscaped && outOfWord) - { - outOfWord = false; - in += val; - continue; - } - if(alive_states.size() == 0 && !finalFound) { alive_states.push_back(*initial_state); fputws(in.c_str(), output); - fflush(output); in = L""; + leading = true; } else if(alive_states.size() == 0 && finalFound) { @@ -164,15 +116,21 @@ in = L""; finalFound = false; } + + if((val == L'^' && !isEscaped && outOfWord)) + { + outOfWord = false; + in += val; + continue; + } + if((feof(input) || val == L'$') && !isEscaped && !outOfWord) { - // wcout <::const_iterator it = alive_states.begin(); it != alive_states.end(); it++) { State s = *it; s.step(alphabet(L"<$>")); - if(s.size() > 0) { new_states.push_back(s); @@ -237,7 +195,7 @@ for (int i=0; i < (int) out.size(); i++) { - // wchar_t c = out[i]; + wchar_t c = out[i]; /* FIXME these hacks */ // if(c == L'/') // { @@ -250,34 +208,59 @@ // out[i+1] = L'^'; // } } - // out = out.substr(1, out.length()-1); + out = out.substr(0, out.length()-3); // remove extra trailing '$ ^' : '^ ' is excess, '$' will be added in the next loop with fputws(in,output) /* FIXME another hack */ + // if(leading) { + // fputwc(L' ', output); + // } fputws(out.c_str(), output); - fflush(output); } } alive_states.swap(new_states); } - else if(outOfWord) // FIXME need to deal with superblnk stuff + else if(outOfWord) // FIXME need to deal with superblank stuff { - fputwc(val, output); + // wcout << (wchar_t) val << endl; + // wstring blank = L""; - // fputws(blankqueue.front().c_str(),output); - // fflush(output); - // blankqueue.pop_front(); - // continue; + // while(val != L'^' || feof(input)) + // { + // blank += static_cast(val); + // val = fgetwc(input); + // } + // skip = true; - - // if(blankqueue.size() > 0) + // if(val == L' ') // { - // fputws(blankqueue.front().c_str(), output); - // blankqueue.pop_front(); + // wstring blank = L""; + // blank += static_cast(val); + // blanks.push_back(blank); + // // wcout << "b" << blank << "b"; // } - } + // else if(val == L'[') // tag + // { + // wstring blank = readFullBlock(input, L'[', L']'); + // blanks.push_back(blank); + // wcout << "b"<< blank<<"B"; + // } + // FIXME anything between $ and ^ // else // { - // wcerr << L"outOfWord error" << endl; + fputwc(val, output); + continue; // } + + if(blanks.size() > 0) + { + // wcout << blanks.front(); + blanks.pop_front(); } + } + else + { + wcerr << L"outOfWord error" << endl; + } + } + return 0; } \ No newline at end of file