Index: branches/apertium-separable/src/processor3.cc =================================================================== --- branches/apertium-separable/src/processor3.cc (revision 80368) +++ branches/apertium-separable/src/processor3.cc (nonexistent) @@ -1,151 +0,0 @@ -#include -#include -#include -#include -#include -// #include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2); - - -/* get the text between delim1 and delim2 */ -/* next_token() */ -wstring -readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2) -{ - wstring result = L""; - result += delim1; - wchar_t c = delim1; - - while(!feof(input) && c != delim2) - { - c = static_cast(fgetwc(input)); - result += c; - } - - return result; -} - - -/*** -main -***/ - -int main (int argc, char** argv) -{ - Alphabet alphabet; - TransExe transducer; - - LtLocale::tryToSetLocale(); - - - FILE *fst = fopen(argv[1], "r"); - - set alphabetic_chars; - int len = Compression::multibyte_read(fst); - while(len > 0) - { - alphabetic_chars.insert(static_cast(Compression::multibyte_read(fst))); - len--; - } - - alphabet.read(fst); - wcerr << L"alphabet_size: " << alphabet.size() << endl; - - len = Compression::multibyte_read(fst); - - len = Compression::multibyte_read(fst); - wcerr << len << endl; - wstring name = L""; - while(len > 0) - { - name += static_cast(Compression::multibyte_read(fst)); - len--; - } - wcerr << name << endl; - - transducer.read(fst, alphabet); - - FILE *input = stdin; - FILE *output = stdout; - - vector alive_states; - set anfinals; - set escaped_chars; - - escaped_chars.insert(L'['); - escaped_chars.insert(L']'); - escaped_chars.insert(L'{'); - escaped_chars.insert(L'}'); - escaped_chars.insert(L'^'); - escaped_chars.insert(L'$'); - escaped_chars.insert(L'/'); - escaped_chars.insert(L'\\'); - escaped_chars.insert(L'@'); - escaped_chars.insert(L'<'); - escaped_chars.insert(L'>'); - - State *initial_state; - initial_state = new State(); - initial_state->init(transducer.getInitial()); - anfinals.insert(transducer.getFinals().begin(), transducer.getFinals().end()); - - /* - processing - */ - - vector new_states; - - alive_states.push_back(*initial_state); - - bool outOfWord = true; - bool isEscaped = false; - - State s = *initial_state; - - s.step('t'); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step('a'); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step('k'); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step('e'); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step(alphabet(L""), alphabet(L"")); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step(alphabet(L"")); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step(alphabet(L"")); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step(alphabet(L"")); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step(alphabet(L"<$>")); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step('o'); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step('u'); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step('t'); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step(alphabet(L"")); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - s.step(alphabet(L"<$>")); - wcerr << s.size() << L" ||| " << s.getReadableString(alphabet) << L" ||| " << s.isFinal(anfinals) << endl; - - wstring out = s.filterFinals(anfinals, alphabet, escaped_chars); - - wcerr << L"FINAL: " << out << endl; - - - return 0; -} Index: branches/apertium-separable/src/lsx_compiler.cc =================================================================== --- branches/apertium-separable/src/lsx_compiler.cc (revision 80368) +++ branches/apertium-separable/src/lsx_compiler.cc (revision 80370) @@ -14,8 +14,8 @@ * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ -#ifndef _MYCOMPILER_ -#define _MYCOMPILER_ +// #ifndef _MYCOMPILER_ +// #define _MYCOMPILER_ #include #include @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -77,15 +78,12 @@ wstring const Compiler::COMPILER_VL_ATTR = L"vl"; wstring const Compiler::COMPILER_VR_ATTR = L"vr"; -wstring const Compiler::COMPILER_ANYCHAR_ATTR = L"w"; -wstring const Compiler::COMPILER_ANYTAG_ATTR = L"t"; +/* add to header +wstring const Compiler::COMPILER_ANYCHAR_ELEM = L"w"; +wstring const Compiler::COMPILER_ANYTAG_ELEM = L"t"; wstring const Compiler::COMPILER_WB_ELEM = L"j"; +*/ - - = ANY_CHAR (loop) - = ANY_TAG (loop) - = <$> - Compiler::Compiler() : reader(0), verbose(false), @@ -106,7 +104,7 @@ reader = xmlReaderForFile(fichero.c_str(), NULL, 0); if(reader == NULL) { - wcerr << "Error: cannot open '" << fichero << "'." << endl; + cerr << "Error: cannot open '" << fichero << "'." << endl; exit(EXIT_FAILURE); } int ret = xmlTextReaderRead(reader); @@ -118,7 +116,7 @@ } } -void //FIXME +void Compiler::parse(string const &fichero, wstring const &dir) { direction = dir; @@ -125,7 +123,7 @@ reader = xmlReaderForFile(fichero.c_str(), NULL, 0); if(reader == NULL) { - wcerr << "Error: Cannot open '" << fichero << "'." << endl; + cerr << "Error: Cannot open '" << fichero << "'." << endl; exit(EXIT_FAILURE); } @@ -326,28 +324,27 @@ @param result: (referenced) empty list @param name: name of the node */ -void //FIXME +void Compiler::readString(list &result, wstring const &name) { - cout << "NAME" << name << endl; - // if(name == L"#text") - // { - // wstring value = XMLParseUtil::towstring(xmlTextReaderConstValue(reader)); //NOTE returns the (wstring) text value of the node, or NULL if unavailable - // for(unsigned int i = 0, limit = value.size(); i < limit; i++) //NOTE for every character - // { - // result.push_back(static_cast(value[i])); //NOTE add character to (list) result - // } - // } - // else if(name == COMPILER_BLANK_ELEM) - if(name == COMPILER_BLANK_ELEM) //NOTE COMPILER_BLANK_ELEM defined above = "b" + wcout << "NAME" << name << endl; + if(name == L"#text") { + wstring value = XMLParseUtil::towstring(xmlTextReaderConstValue(reader)); //NOTE returns the (wstring) text value of the node, or NULL if unavailable + for(unsigned int i = 0, limit = value.size(); i < limit; i++) + { + result.push_back(static_cast(value[i])); + } + } + else if(name == COMPILER_BLANK_ELEM) + { requireEmptyError(name); result.push_back(static_cast(L' ')); } - else if(name == COMPILER_WB_ELEM) //FIXME "j" + else if(name == L"j" /*COMPILER_WB_ELEM*/) //FIXME "j" { requireEmptyError(name); - result.push_back(static_cast(L'<$>')); + result.push_back(static_cast(L'$')); } else if(name == COMPILER_POSTGENERATOR_ELEM) { @@ -362,18 +359,26 @@ result.push_back(static_cast(L'#')); } } - else if(name == COMPILER_ANYCHAR_ATTR) //FIXME "w" + else if(name == L"w" /*COMPILER_ANYCHAR_ELEM*/) //FIXME "w" { - result.push_back(static_cast(name)); + wstring value = XMLParseUtil::towstring(xmlTextReaderConstValue(reader)); + for(unsigned int i = 0, limit = value.size(); i < limit; i++) + { + result.push_back(static_cast(value[i])); } - else if(name == COMPILER_ANYTAG_ATTR) //FIXME "t" + } + else if(name == L"t" /*COMPILER_ANYTAG_ELEM*/ ) //FIXME "t" { - result.push_back(static_cast(symbol)); + wstring value = XMLParseUtil::towstring(xmlTextReaderConstValue(reader)); + for(unsigned int i = 0, limit = value.size(); i < limit; i++) + { + result.push_back(static_cast(value[i])); } + } else if(name == COMPILER_S_ELEM) { requireEmptyError(name); - wstring symbol = L"<" + attrib(COMPILER_N_ATTR) + L">"; //NOTE the value of + wstring symbol = L"<" + attrib(COMPILER_N_ATTR) + L">"; //NOTE attrib from if(!alphabet.isSymbolDefined(symbol)) { @@ -970,9 +975,7 @@ } - - -int main (int argc, char** argv) { //FIXME +int main (int argc, char** argv) { Alphabet alphabet; Transducer t; @@ -997,8 +1000,6 @@ int det_sym = alphabet(L""); int prn_sym = alphabet(L""); int np_sym = alphabet(L""); - int adv_sym = alphabet(L""); - int pr_sym = alphabet(L""); int any_tag = alphabet(L""); int any_char = alphabet(L""); @@ -1094,9 +1095,6 @@ int after_adj = take_out; - /* no n */ - int from_non = take_out; //same as after_adj - /* lemma for the noun */ loop = after_adj; take_out = t.insertSingleTransduction(alphabet(any_char,any_char), loop); @@ -1116,8 +1114,6 @@ take_out = t.insertSingleTransduction(alphabet(wb_sym,wb_sym), take_out); - int after_n = take_out; - /* out */ int before_out = take_out; Index: branches/apertium-separable/src/lsx_processor.cc =================================================================== --- branches/apertium-separable/src/lsx_processor.cc (revision 80368) +++ branches/apertium-separable/src/lsx_processor.cc (revision 80370) @@ -3,7 +3,6 @@ #include #include #include -// #include #include #include @@ -17,9 +16,6 @@ wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2); - -/* get the text between delim1 and delim2 */ -/* next_token() */ wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2) { @@ -36,11 +32,6 @@ return result; } - -/*** -main -***/ - int main (int argc, char** argv) { Alphabet alphabet; @@ -48,7 +39,6 @@ LtLocale::tryToSetLocale(); - FILE *fst = fopen(argv[1], "r"); set alphabetic_chars; @@ -99,9 +89,6 @@ initial_state->init(transducer.getInitial()); anfinals.insert(transducer.getFinals().begin(), transducer.getFinals().end()); - /* - processing - */ vector new_states; vector alive_states; @@ -140,8 +127,8 @@ wstring out = s.filterFinals(anfinals, alphabet, escaped_chars); wcerr << "FINAL: " << out << endl; new_states.push_back(*initial_state); - } } + } alive_states.swap(new_states); outOfWord = true; @@ -152,7 +139,7 @@ { wstring tag = L""; tag = readFullBlock(input, L'<', L'>'); - if(!alphabet.isSymbolDefined(tag)) + if(!alphabet.isSymbolDefined(tag)) { alphabet.includeSymbol(tag); } @@ -160,8 +147,8 @@ fwprintf(stderr, L"tag %S: %d\n", tag.c_str(), val); } - - if(!outOfWord) + + if(!outOfWord) { new_states.clear(); wstring res = L""; @@ -169,11 +156,11 @@ { res = L""; State s = *it; - if(val < 0) + if(val < 0) { s.step_override(val, alphabet(L""), val); } - else if(val > 0) + else if(val > 0) { s.step_override(val, alphabet(L""), val); // deal with cases! } @@ -186,12 +173,12 @@ } alive_states.swap(new_states); } - + if(outOfWord) { continue; } - + } return 0;