Index: branches/apertium-separable/src/processor.cpp =================================================================== --- branches/apertium-separable/src/processor.cpp (revision 80279) +++ branches/apertium-separable/src/processor.cpp (revision 80280) @@ -3,7 +3,6 @@ #include #include #include -// #include #include #include @@ -29,7 +28,7 @@ while(!feof(input) && c != delim2) { - c = static_cast(fgetwc(input)); + c = static_cast(fgetwc(input)); //fget_unlocked result += c; } @@ -36,11 +35,9 @@ return result; } - /*** main ***/ - int main (int argc, char** argv) { Alphabet alphabet; @@ -47,8 +44,6 @@ TransExe transducer; LtLocale::tryToSetLocale(); - - FILE *fst = fopen(argv[1], "r"); set alphabetic_chars; @@ -63,7 +58,6 @@ wcout << L"alphabet_size: " << alphabet.size() << endl; len = Compression::multibyte_read(fst); - len = Compression::multibyte_read(fst); wcout << len << endl; wstring name = L""; @@ -79,59 +73,101 @@ FILE *input = stdin; FILE *output = stdout; - vector alive_states; - set anfinals; + /* preparing for processing */ + vector alive_states; //A set of alive states is maintained to compute all the possible ways to + set anfinals; //alive node finals ? set escaped_chars; - State *initial_state; - initial_state = new State(); - initial_state->init(transducer.getInitial()); + + State* initial_state = new State(); + initial_state->init(transducer.getInitial()); // getInitial() returns an int anfinals.insert(transducer.getFinals().begin(), transducer.getFinals().end()); - /* - processing - */ + set final_states = transducer.getFinals(); + for(auto final_state : final_states) { + final_state.init(transducer.getInitial()); //initialize + } - int line_number = 0; - bool accepted = true; + /* processing */ + vector new_states; - alive_states.push_back(*initial_state); + // TODO: insert the other states + // TODO: insert the final state - while(!feof(input)) + int line_number = 0; + bool accepted = true; + while(!feof(input)) // while true { - int orig_val = fgetwc(input); // read 1 wide char - int mod_val = orig_val; + //initialize conditions + int tag_count = 0; + State* current_state = initial_state; + bool in_lemma = false; + bool in_take = false; + bool in_out = false; - if(orig_val == L'<') // if in tag, get the whole tag and modify if necessary + while (alive_states.size() > 1 and !isFinal(current_state)) { + //get the next token + int val = fgetwc(input); // read 1 wide char + bool is_tag = false; + if(val == L'<') // if in tag, get the whole tag { + in_lemma = false; + is_tag = true; wstring tag = L""; tag = readFullBlock(input, L'<', L'>'); - orig_val = static_cast(alphabet(tag)); - mod_val = static_cast(alphabet(tag)); + val = static_cast(alphabet(tag)); - cout << "orig_val: " << orig_val << endl; + tag_count++; - if(orig_val == 0) // if subsequent_tag TODO:? + cout << "val before: " << val << endl; + cout << "tag_count: " << tag_count << endl; + + if(val == 0 && tag_count > 2) //TODO: val==0? { - mod_val = static_cast(alphabet(L"")); + val = static_cast(alphabet(L"")); } - fwprintf(stderr, L"tag %S: %d\n", tag.c_str(), mod_val); + + cout << "val after: " << val << endl; + fwprintf(stderr, L"tag %S: %d\n", tag.c_str(), val); + + if (tag == '') { + accepted = true; } + } + else if(in_lemma && !in_take && !in_out) { + val == static_cast(alphabet(L"&")); + } - for(vector::const_iterator it = alive_states.begin(); it != alive_states.end(); it++) - { + // if (current_state == initial_state && not eof) { + //successfully reached eof + //exit() + + if (current_state == initial_state && val != '\n') { + accepted = true; + break; + } else if (val == '\n') { //or sent + accepted = true; + } + + //step into the next state + for(vector::const_iterator it = alive_states.begin(); it != alive_states.end(); it++) { //step //for every state in alive_states State s = *it; - s.step(mod_val); + + if (tag_count > 2) { + s.step(val, alphabet(L"")); + } else { + s.step(val) + } + if(s.size() > 0) { new_states.push_back(s); } - wcout << (wchar_t) orig_val << L" " << L"size: " << s.size() << L" final: " << s.isFinal(anfinals) << endl; + wcout << (wchar_t) val << L" " << L"size: " << s.size() << L" final: " << s.isFinal(anfinals) << endl; } alive_states.swap(new_states); } - return 0; -} + }