Index: branches/apertium-separable/examples/new-example.dix =================================================================== --- branches/apertium-separable/examples/new-example.dix (revision 80644) +++ branches/apertium-separable/examples/new-example.dix (revision 80645) @@ -20,7 +20,7 @@ - + Index: branches/apertium-separable/src/lsx_compiler.cc =================================================================== --- branches/apertium-separable/src/lsx_compiler.cc (revision 80644) +++ branches/apertium-separable/src/lsx_compiler.cc (revision 80645) @@ -1,50 +1,20 @@ -#include #include #include -#include #include -#include -#include #include #include -#include - -#include #include -#include -#include -#include -#include -#include -wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2); - -wstring -readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2) -{ - wstring result = L""; - result += delim1; - wchar_t c = delim1; - - while(!feof(input) && c != delim2) - { - c = static_cast(fgetwc(input)); - result += c; - } - - return result; -} - int main (int argc, char** argv) { if(argc != 3) { - wcout << L"lsx-comp " << endl; + wcout << L"./lsx-comp " << endl; exit(0); } - /* compile */ + LtLocale::tryToSetLocale(); Compiler c; c.parse(argv[1], L"lr"); @@ -58,154 +28,5 @@ c.write(fst); fclose(fst); - /* process */ - - Alphabet alphabet; - TransExe transducer; - - LtLocale::tryToSetLocale(); - - fst = fopen(argv[2], "r"); - - set alphabetic_chars; - int len = Compression::multibyte_read(fst); - while(len > 0) - { - alphabetic_chars.insert(static_cast(Compression::multibyte_read(fst))); - len--; - } - - alphabet.read(fst); - wcerr << L"alphabet_size: " << alphabet.size() << endl; - - len = Compression::multibyte_read(fst); - len = Compression::multibyte_read(fst); - - wcerr << len << endl; - wstring name = L""; - while(len > 0) - { - name += static_cast(Compression::multibyte_read(fst)); - len--; - } - wcerr << name << endl; - - transducer.read(fst, alphabet); - - FILE *input = stdin; - FILE *output = stdout; - - set anfinals; - set escaped_chars; - - escaped_chars.insert(L'['); - escaped_chars.insert(L']'); - escaped_chars.insert(L'{'); - escaped_chars.insert(L'}'); - escaped_chars.insert(L'^'); - escaped_chars.insert(L'$'); - escaped_chars.insert(L'/'); - escaped_chars.insert(L'\\'); - escaped_chars.insert(L'@'); - escaped_chars.insert(L'<'); - escaped_chars.insert(L'>'); - - State *initial_state; - initial_state = new State(); - initial_state->init(transducer.getInitial()); - anfinals.insert(transducer.getFinals().begin(), transducer.getFinals().end()); - - - vector new_states; - vector alive_states; - - alive_states.push_back(*initial_state); - - bool outOfWord = true; - bool isEscaped = false; - - while(!feof(input)) - { - int val = fgetwc(input); // read 1 wide char - - wcerr << L"| " << (wchar_t)val << L" | val: " << val << L" || s.size(): " << alive_states.size() << L" || " << outOfWord << endl; - - if(/*val == L'^' && */ !isEscaped && outOfWord) - { - outOfWord = false; - continue; - } - - if((feof(input) || val == L'$') && !isEscaped && !outOfWord) - { - new_states.clear(); - for(vector::const_iterator it = alive_states.begin(); it != alive_states.end(); it++) - { - State s = *it; - s.step(alphabet(L"<$>")); - if(s.size() > 0) - { - new_states.push_back(s); - } - - if(s.isFinal(anfinals)) - { - wstring out = s.filterFinals(anfinals, alphabet, escaped_chars); - wcerr << "FINAL: " << out << endl; - new_states.push_back(*initial_state); - } - } - alive_states.swap(new_states); - - outOfWord = true; - continue; - } - - if(val == L'<' && !outOfWord) // if in tag, get the whole tag and modify if necessary - { - wstring tag = L""; - tag = readFullBlock(input, L'<', L'>'); - if(!alphabet.isSymbolDefined(tag)) - { - alphabet.includeSymbol(tag); - } - val = static_cast(alphabet(tag)); - - fwprintf(stderr, L"tag %S: %d\n", tag.c_str(), val); - } - - if(!outOfWord) - { - new_states.clear(); - wstring res = L""; - for(vector::const_iterator it = alive_states.begin(); it != alive_states.end(); it++) - { - res = L""; - State s = *it; - if(val < 0) - { - s.step_override(val, alphabet(L""), val); - } - else if(val > 0) - { - s.step_override(val, alphabet(L""), val); // deal with cases! - } - if(s.size() > 0) - { - new_states.push_back(s); - } - wcerr << L"| | " << (wchar_t) val << L" " << L"size: " << s.size() << L" final: " << s.isFinal(anfinals) << endl; - wcerr << L"| | cur: " << s.getReadableString(alphabet) << endl; - } - alive_states.swap(new_states); - } - - if(outOfWord) - { - continue; - } - - } - return 0; }