Index: branches/apertium-separable/src/compiler_copy.cc =================================================================== --- branches/apertium-separable/src/compiler_copy.cc (revision 80509) +++ branches/apertium-separable/src/compiler_copy.cc (nonexistent) @@ -1,975 +0,0 @@ -/* - * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -using namespace std; - -wstring const Compiler::COMPILER_DICTIONARY_ELEM = L"dictionary"; -wstring const Compiler::COMPILER_ALPHABET_ELEM = L"alphabet"; -wstring const Compiler::COMPILER_SDEFS_ELEM = L"sdefs"; -wstring const Compiler::COMPILER_SDEF_ELEM = L"sdef"; -wstring const Compiler::COMPILER_N_ATTR = L"n"; -wstring const Compiler::COMPILER_PARDEFS_ELEM = L"pardefs"; -wstring const Compiler::COMPILER_PARDEF_ELEM = L"pardef"; -wstring const Compiler::COMPILER_PAR_ELEM = L"par"; -wstring const Compiler::COMPILER_ENTRY_ELEM = L"e"; -wstring const Compiler::COMPILER_RESTRICTION_ATTR = L"r"; -wstring const Compiler::COMPILER_RESTRICTION_LR_VAL = L"LR"; -wstring const Compiler::COMPILER_RESTRICTION_RL_VAL = L"RL"; -wstring const Compiler::COMPILER_PAIR_ELEM = L"p"; -wstring const Compiler::COMPILER_LEFT_ELEM = L"l"; -wstring const Compiler::COMPILER_RIGHT_ELEM = L"r"; -wstring const Compiler::COMPILER_S_ELEM = L"s"; -wstring const Compiler::COMPILER_REGEXP_ELEM = L"re"; -wstring const Compiler::COMPILER_SECTION_ELEM = L"section"; -wstring const Compiler::COMPILER_ID_ATTR = L"id"; -wstring const Compiler::COMPILER_TYPE_ATTR = L"type"; -wstring const Compiler::COMPILER_IDENTITY_ELEM = L"i"; -wstring const Compiler::COMPILER_JOIN_ELEM = L"j"; -wstring const Compiler::COMPILER_BLANK_ELEM = L"b"; -wstring const Compiler::COMPILER_POSTGENERATOR_ELEM = L"a"; -wstring const Compiler::COMPILER_GROUP_ELEM = L"g"; -wstring const Compiler::COMPILER_LEMMA_ATTR = L"lm"; -wstring const Compiler::COMPILER_IGNORE_ATTR = L"i"; -wstring const Compiler::COMPILER_IGNORE_YES_VAL = L"yes"; -wstring const Compiler::COMPILER_ALT_ATTR = L"alt"; -wstring const Compiler::COMPILER_V_ATTR = L"v"; -wstring const Compiler::COMPILER_VL_ATTR = L"vl"; -wstring const Compiler::COMPILER_VR_ATTR = L"vr"; - -wstring const Compiler::COMPILER_ANYTAG_ELEM = L"t"; -wstring const Compiler::COMPILER_ANYCHAR_ELEM = L"w"; -wstring const Compiler::COMPILER_WB_ELEM = L"j"; - -Compiler::Compiler() : -reader(0), -verbose(false), -first_element(false), -acx_current_char(0) -{ -} - -Compiler::~Compiler() -{ -} - -void -Compiler::parseACX(string const &fichero, wstring const &dir) -{ - if(dir == COMPILER_RESTRICTION_LR_VAL) - { - reader = xmlReaderForFile(fichero.c_str(), NULL, 0); - if(reader == NULL) - { - wcerr << "Error: cannot open '" << fichero << "'." << endl; - exit(EXIT_FAILURE); - } - int ret = xmlTextReaderRead(reader); - while(ret == 1) - { - procNodeACX(); - ret = xmlTextReaderRead(reader); - } - } -} - -void -Compiler::parse(string const &fichero, wstring const &dir) -{ - direction = dir; - wcout << direction << endl; //NOTE never prints - - reader = xmlReaderForFile(fichero.c_str(), NULL, 0); - if(reader == NULL) - { - wcerr << "Error: Cannot open '" << fichero << "'." << endl; - exit(EXIT_FAILURE); - } - - int ret = xmlTextReaderRead(reader); - - cout << ret << endl; //NOTE never prints - - while(ret == 1) - { - procNode(); - ret = xmlTextReaderRead(reader); - cout << ret << endl; //NOTE never prints - } - - if(ret != 0) - { - wcerr << L"Error: Parse error at the end of input." << endl; - } - - xmlFreeTextReader(reader); - xmlCleanupParser(); - - - // Minimize transducers - for(map::iterator it = sections.begin(), - limit = sections.end(); - it != limit; it++) - { - (it->second).minimize(); - } -} - - -void -Compiler::procAlphabet() -{ - int tipo=xmlTextReaderNodeType(reader); - - if(tipo != XML_READER_TYPE_END_ELEMENT) - { - int ret = xmlTextReaderRead(reader); - if(ret == 1) - { - xmlChar const *valor = xmlTextReaderConstValue(reader); - letters = XMLParseUtil::towstring(valor); - bool espai = true; - for(unsigned int i = 0; i < letters.length(); i++) - { - if(!isspace(letters.at(i))) - { - espai = false; - break; - } - } - if(espai == true) // libxml2 returns '\n' for , should be empty - { - letters = L""; - } - } - else - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Missing alphabet symbols." << endl; - exit(EXIT_FAILURE); - } - } -} - -void -Compiler::procSDef() -{ - alphabet.includeSymbol(L"<"+attrib(COMPILER_N_ATTR)+L">"); -} - -void -Compiler::procParDef() -{ - int tipo=xmlTextReaderNodeType(reader); - - if(tipo != XML_READER_TYPE_END_ELEMENT) - { - current_paradigm = attrib(COMPILER_N_ATTR); - } - else - { - if(!paradigms[current_paradigm].isEmpty()) - { - paradigms[current_paradigm].minimize(); - paradigms[current_paradigm].joinFinals(); - current_paradigm = L""; - } - } -} - -int -Compiler::matchTransduction(list const &pi, - list const &pd, - int estado, Transducer &t) -{ - list::const_iterator izqda, dcha, limizqda, limdcha; - - if(direction == COMPILER_RESTRICTION_LR_VAL) - { - izqda = pi.begin(); - dcha = pd.begin(); - limizqda = pi.end(); - limdcha = pd.end(); - } - else - { - izqda = pd.begin(); - dcha = pi.begin(); - limizqda = pd.end(); - limdcha = pi.end(); - } - - - if(pi.size() == 0 && pd.size() == 0) - { - estado = t.insertNewSingleTransduction(alphabet(0, 0), estado); - } - else - { - map >::iterator acx_map_ptr; - int rsymbol = 0; - - while(true) - { - int etiqueta; - - acx_map_ptr = acx_map.end(); - - if(izqda == limizqda && dcha == limdcha) - { - break; - } - else if(izqda == limizqda) - { - etiqueta = alphabet(0, *dcha); - dcha++; - } - else if(dcha == limdcha) - { - etiqueta = alphabet(*izqda, 0); - acx_map_ptr = acx_map.find(*izqda); - rsymbol = 0; - izqda++; - } - else - { - etiqueta = alphabet(*izqda, *dcha); - acx_map_ptr = acx_map.find(*izqda); - rsymbol = *dcha; - izqda++; - dcha++; - } - - int nuevo_estado = t.insertSingleTransduction(etiqueta, estado); - - if(acx_map_ptr != acx_map.end()) - { - for(set::iterator it = acx_map_ptr->second.begin(); - it != acx_map_ptr->second.end(); it++) - { - t.linkStates(estado, nuevo_estado, alphabet(*it ,rsymbol)); - } - } - estado = nuevo_estado; - } - } - - return estado; -} - - -void -Compiler::requireEmptyError(wstring const &name) -{ - if(!xmlTextReaderIsEmptyElement(reader)) - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Non-empty element '<" << name << L">' should be empty." << endl; - exit(EXIT_FAILURE); - } -} - -bool -Compiler::allBlanks() -{ - bool flag = true; - wstring text = XMLParseUtil::towstring(xmlTextReaderConstValue(reader)); - - for(unsigned int i = 0, limit = text.size(); i < limit; i++) - { - flag = flag && iswspace(text[i]); - } - - return flag; -} - -void -Compiler::readString(list &result, wstring const &name) -{ - - wcout << name << endl; //NOTE never prints - - if(name == L"#text") - { - wstring value = XMLParseUtil::towstring(xmlTextReaderConstValue(reader)); - for(unsigned int i = 0, limit = value.size(); i < limit; i++) - { - result.push_back(static_cast(value[i])); - } - } - else if(name == COMPILER_BLANK_ELEM) - { - requireEmptyError(name); - result.push_back(static_cast(L' ')); - } - // else if(name == COMPILER_JOIN_ELEM) - // { - // requireEmptyError(name); - // result.push_back(static_cast(L'+')); - // } - else if(name == COMPILER_POSTGENERATOR_ELEM) - { - requireEmptyError(name); - result.push_back(static_cast(L'~')); - } - else if(name == COMPILER_GROUP_ELEM) - { - int tipo=xmlTextReaderNodeType(reader); - if(tipo != XML_READER_TYPE_END_ELEMENT) - { - result.push_back(static_cast(L'#')); - } - } - else if(name == COMPILER_S_ELEM) - { - requireEmptyError(name); - wstring symbol = L"<" + attrib(COMPILER_N_ATTR) + L">"; - - if(!alphabet.isSymbolDefined(symbol)) - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Undefined symbol '" << symbol << L"'." << endl; - exit(EXIT_FAILURE); - } - - result.push_back(alphabet(symbol)); - } - - /* additions */ - else if(name == COMPILER_ANYTAG_ELEM) { - // wstring symbol = L"<" + name + L">"; - result.push_back(alphabet(L"")); - } - else if(name == COMPILER_ANYCHAR_ELEM) { - result.push_back(alphabet(L"")); - } - else if(name == COMPILER_WB_ELEM) { - requireEmptyError(name); - wstring symbol = L"<" + name + L">"; - result.push_back(alphabet(symbol)); - } - - else - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Invalid specification of element '<" << name; - wcerr << L">' in this context." << endl; - wcerr << L"anytag_elem: " << COMPILER_ANYTAG_ELEM << endl; - exit(EXIT_FAILURE); - } -} - -void -Compiler::skipBlanks(wstring &name) -{ - while(name == L"#text" || name == L"#comment") - { - if(name != L"#comment") - { - if(!allBlanks()) - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Invalid construction." << endl; - exit(EXIT_FAILURE); - } - } - - xmlTextReaderRead(reader); - name = XMLParseUtil::towstring(xmlTextReaderConstName(reader)); - } -} - -void -Compiler::skip(wstring &name, wstring const &elem) -{ - skip(name, elem, true); -} - -void -Compiler::skip(wstring &name, wstring const &elem, bool open) -{ - xmlTextReaderRead(reader); - name = XMLParseUtil::towstring(xmlTextReaderConstName(reader)); - wstring slash; - - if(!open) - { - slash = L"/"; - } - - while(name == L"#text" || name == L"#comment") - { - if(name != L"#comment") - { - if(!allBlanks()) - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Invalid construction." << endl; - exit(EXIT_FAILURE); - } - } - xmlTextReaderRead(reader); - name = XMLParseUtil::towstring(xmlTextReaderConstName(reader)); - } - - if(name != elem) - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Expected '<" << slash << elem << L">'." << endl; - exit(EXIT_FAILURE); - } -} - -EntryToken -Compiler::procIdentity() -{ - list both_sides; - - if(!xmlTextReaderIsEmptyElement(reader)) - { - wstring name = L""; - - while(true) - { - xmlTextReaderRead(reader); - name = XMLParseUtil::towstring(xmlTextReaderConstName(reader)); - if(name == COMPILER_IDENTITY_ELEM) - { - break; - } - readString(both_sides, name); - } - } - - if(verbose && first_element && (both_sides.front() == (int)L' ')) - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Entry begins with space." << endl; - } - first_element = false; - EntryToken e; - e.setSingleTransduction(both_sides, both_sides); - return e; -} - -EntryToken -Compiler::procTransduction() -{ - list lhs, rhs; - wstring name; - - skip(name, COMPILER_LEFT_ELEM); - - if(!xmlTextReaderIsEmptyElement(reader)) - { - name = L""; - while(true) - { - xmlTextReaderRead(reader); - name = XMLParseUtil::towstring(xmlTextReaderConstName(reader)); - if(name == COMPILER_LEFT_ELEM) - { - break; - } - readString(lhs, name); - } - } - - if(verbose && first_element && (lhs.front() == (int)L' ')) - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Entry begins with space." << endl; - } - first_element = false; - - skip(name, COMPILER_RIGHT_ELEM); - - if(!xmlTextReaderIsEmptyElement(reader)) - { - name = L""; - while(true) - { - xmlTextReaderRead(reader); - name = XMLParseUtil::towstring(xmlTextReaderConstName(reader)); - if(name == COMPILER_RIGHT_ELEM) - { - break; - } - readString(rhs, name); - } - } - - skip(name, COMPILER_PAIR_ELEM, false); - - EntryToken e; - e.setSingleTransduction(lhs, rhs); - return e; -} - -wstring -Compiler::attrib(wstring const &name) -{ - return XMLParseUtil::attrib(reader, name); -} - -EntryToken -Compiler::procPar() -{ - EntryToken e; - wstring nomparadigma = attrib(COMPILER_N_ATTR); - first_element = false; - - if(current_paradigm != L"" && nomparadigma == current_paradigm) - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Paradigm refers to itself '" << nomparadigma << L"'." < const &elements) -{ - if(current_paradigm != L"") - { - // compilation of paradigms - Transducer &t = paradigms[current_paradigm]; - int e = t.getInitial(); - - for(unsigned int i = 0, limit = elements.size(); i < limit; i++) - { - if(elements[i].isParadigm()) - { - e = t.insertTransducer(e, paradigms[elements[i].paradigmName()]); - } - else if(elements[i].isSingleTransduction()) - { - e = matchTransduction(elements[i].left(), - elements[i].right(), e, t); - } - else if(elements[i].isRegexp()) - { - RegexpCompiler analyzer; - analyzer.initialize(&alphabet); - analyzer.compile(elements[i].regExp()); - e = t.insertTransducer(e, analyzer.getTransducer(), alphabet(0,0)); - } - else - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Invalid entry token." << endl; - exit(EXIT_FAILURE); - } - } - t.setFinal(e); - } - else - { - // compilaci�n de dictionary - - Transducer &t = sections[current_section]; - int e = t.getInitial(); - - for(unsigned int i = 0, limit = elements.size(); i < limit; i++) - { - if(elements[i].isParadigm()) - { - if(i == elements.size()-1) - { - // paradigma sufijo - if(suffix_paradigms[current_section].find(elements[i].paradigmName()) != suffix_paradigms[current_section].end()) - { - t.linkStates(e, suffix_paradigms[current_section][elements[i].paradigmName()], 0); - e = postsuffix_paradigms[current_section][elements[i].paradigmName()]; - } - else - { - e = t.insertNewSingleTransduction(alphabet(0, 0), e); - suffix_paradigms[current_section][elements[i].paradigmName()] = e; - e = t.insertTransducer(e, paradigms[elements[i].paradigmName()]); - postsuffix_paradigms[current_section][elements[i].paradigmName()] = e; - } - } - else if(i == 0) - { - // paradigma prefijo - if(prefix_paradigms[current_section].find(elements[i].paradigmName()) != prefix_paradigms[current_section].end()) - { - e = prefix_paradigms[current_section][elements[i].paradigmName()]; - } - else - { - e = t.insertTransducer(e, paradigms[elements[i].paradigmName()]); - prefix_paradigms[current_section][elements[i].paradigmName()] = e; - } - } - else - { - // paradigma intermedio - e = t.insertTransducer(e, paradigms[elements[i].paradigmName()]); - } - } - else if(elements[i].isRegexp()) - { - RegexpCompiler analyzer; - analyzer.initialize(&alphabet); - analyzer.compile(elements[i].regExp()); - e = t.insertTransducer(e, analyzer.getTransducer(), alphabet(0,0)); - } - else - { - e = matchTransduction(elements[i].left(), elements[i].right(), e, t); - } - } - t.setFinal(e); - } -} - - -void -Compiler::requireAttribute(wstring const &value, wstring const &attrname, - wstring const &elemname) -{ - if(value == L"") - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): '<" << elemname; - wcerr << L"' element must specify non-void '"; - wcerr << attrname << L"' attribute." << endl; - exit(EXIT_FAILURE); - } -} - - -void -Compiler::procSection() -{ - int tipo=xmlTextReaderNodeType(reader); - - if(tipo != XML_READER_TYPE_END_ELEMENT) - { - wstring const &id = attrib(COMPILER_ID_ATTR); - wstring const &type = attrib(COMPILER_TYPE_ATTR); - requireAttribute(id, COMPILER_ID_ATTR, COMPILER_SECTION_ELEM); - requireAttribute(type, COMPILER_TYPE_ATTR, COMPILER_SECTION_ELEM); - - current_section = id; - current_section += L"@"; - current_section.append(type); - } - else - { - current_section = L""; - } -} - -void -Compiler::procEntry() -{ - wstring atributo=this->attrib(COMPILER_RESTRICTION_ATTR); - wstring ignore = this->attrib(COMPILER_IGNORE_ATTR); - wstring altval = this->attrib(COMPILER_ALT_ATTR); - wstring varval = this->attrib(COMPILER_V_ATTR); - wstring varl = this->attrib(COMPILER_VL_ATTR); - wstring varr = this->attrib(COMPILER_VR_ATTR); - - //�if entry is masked by a restriction of direction or an ignore mark - if((atributo != L"" && atributo != direction) - || ignore == COMPILER_IGNORE_YES_VAL - || (altval != L"" && altval != alt) - || (direction == COMPILER_RESTRICTION_RL_VAL && varval != L"" && varval != variant) - || (direction == COMPILER_RESTRICTION_RL_VAL && varl != L"" && varl != variant_left) - || (direction == COMPILER_RESTRICTION_LR_VAL && varr != L"" && varr != variant_right)) - { - // parse to the end of the entry - wstring name = L""; - - while(name != COMPILER_ENTRY_ELEM) - { - xmlTextReaderRead(reader); - name = XMLParseUtil::towstring(xmlTextReaderConstName(reader)); - } - - return; - } - - vector elements; - - while(true) - { - int ret = xmlTextReaderRead(reader); - if(ret != 1) - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Parse error." << endl; - exit(EXIT_FAILURE); - } - wstring name = XMLParseUtil::towstring(xmlTextReaderConstName(reader)); - skipBlanks(name); - - if(current_paradigm == L"" && verbose) - { - first_element = true; - } - - int tipo = xmlTextReaderNodeType(reader); - if(name == COMPILER_PAIR_ELEM) - { - elements.push_back(procTransduction()); - } - else if(name == COMPILER_IDENTITY_ELEM) - { - elements.push_back(procIdentity()); - } - else if(name == COMPILER_REGEXP_ELEM) - { - elements.push_back(procRegexp()); - } - else if(name == COMPILER_PAR_ELEM) - { - elements.push_back(procPar()); - - // detecci�n del uso de paradigmas no definidos - - wstring const &p = elements.rbegin()->paradigmName(); - - if(paradigms.find(p) == paradigms.end()) - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Undefined paradigm '" << p << L"'." <' into '<" << COMPILER_ENTRY_ELEM; - wcerr << L">'." << endl; - exit(EXIT_FAILURE); - } - } -} - -void Compiler::procTag() {} //TODO - -void Compiler::procChar() {} //TODO - -void Compiler::procWb() {} //TODO - -void -Compiler::procNodeACX() -{ - xmlChar const *xnombre = xmlTextReaderConstName(reader); - wstring nombre = XMLParseUtil::towstring(xnombre); - - if(nombre == L"#text") - { - /* ignore */ - } - else if(nombre == L"analysis-chars") - { - /* ignore */ - } - else if(nombre == L"char") - { - acx_current_char = static_cast(attrib(L"value")[0]); - } - else if(nombre == L"equiv-char") - { - acx_map[acx_current_char].insert(static_cast(attrib(L"value")[0])); - } - else if(nombre == L"#comment") - { - /* ignore */ - } - else - { - wcerr << L"Error in ACX file (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Invalid node '<" << nombre << L">'." << endl; - exit(EXIT_FAILURE); - } -} - -void -Compiler::procNode() -{ - xmlChar const *xnombre = xmlTextReaderConstName(reader); - wstring nombre = XMLParseUtil::towstring(xnombre); - - // HACER: optimizar el orden de ejecuci�n de esta ristra de "ifs" - - if(nombre == L"#text") - { - /* ignorar */ - } - else if(nombre == COMPILER_DICTIONARY_ELEM) - { - /* ignorar */ - } - else if(nombre == COMPILER_ALPHABET_ELEM) - { - procAlphabet(); - } - else if(nombre == COMPILER_SDEFS_ELEM) - { - /* ignorar */ - } - else if(nombre == COMPILER_SDEF_ELEM) - { - procSDef(); - } - else if(nombre == COMPILER_PARDEFS_ELEM) - { - /* ignorar */ - } - else if(nombre == COMPILER_PARDEF_ELEM) - { - procParDef(); - } - else if(nombre == COMPILER_ENTRY_ELEM) - { - procEntry(); - } - else if(nombre == COMPILER_SECTION_ELEM) - { - procSection(); - } - else if(nombre == L"#comment") - { - /* ignorar */ - } - else if(nombre == COMPILER_ANYTAG_ELEM) { - procTag(); - } - else if(nombre == COMPILER_ANYCHAR_ELEM) { - procChar(); - } - else if(nombre == COMPILER_WB_ELEM) { - procWb(); - } - else - { - wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); - wcerr << L"): Invalid node '<" << nombre << L">'." << endl; - exit(EXIT_FAILURE); - } -} - -EntryToken -Compiler::procRegexp() -{ - EntryToken et; - xmlTextReaderRead(reader); - wstring re = XMLParseUtil::towstring(xmlTextReaderConstValue(reader)); - et.setRegexp(re); - xmlTextReaderRead(reader); - return et; -} - -void -Compiler::write(FILE *output) -{ - // letters - Compression::wstring_write(letters, output); - - // symbols - alphabet.write(output); - - // transducers - Compression::multibyte_write(sections.size(), output); - - int conta=0; - for(map::iterator it = sections.begin(), - limit = sections.end(); - it != limit; it++) - { - conta++; - wcout << it->first << " " << it->second.size(); - wcout << " " << it->second.numberOfTransitions() << endl; - Compression::wstring_write(it->first, output); - it->second.write(output); - } -} - -void -Compiler::setAltValue(string const &a) -{ - alt = XMLParseUtil::stows(a); -} - -void -Compiler::setVariantValue(string const &v) -{ - variant = XMLParseUtil::stows(v); -} - -void -Compiler::setVariantLeftValue(string const &v) -{ - variant_left = XMLParseUtil::stows(v); -} - -void -Compiler::setVariantRightValue(string const &v) -{ - variant_right = XMLParseUtil::stows(v); -} - -void -Compiler::setVerbose(bool verbosity) -{ - verbose = verbosity; -} Index: branches/apertium-separable/src/compiler_copy.h =================================================================== --- branches/apertium-separable/src/compiler_copy.h (revision 80509) +++ branches/apertium-separable/src/compiler_copy.h (nonexistent) @@ -1,380 +0,0 @@ -/* - * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ -#ifndef _MYCOMPILER_ -#define _MYCOMPILER_ - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -using namespace std; - -/** - * A compiler of dictionaries to letter transducers - */ -class Compiler -{ -private: - /** - * The libxml2's XML reader - */ - xmlTextReaderPtr reader; - - /** - * The alt value - */ - wstring alt; - - /** - * The variant value (monodix) - */ - wstring variant; - - /** - * The variant value (left side of bidix) - */ - wstring variant_left; - - /** - * The variant value (right side of bidix) - */ - wstring variant_right; - - /** - * The paradigm being compiled - */ - wstring current_paradigm; - - /** - * The dictionary section being compiled - */ - wstring current_section; - - /** - * The direction of the compilation, 'lr' (left-to-right) or 'rl' - * (right-to-left) - */ - wstring direction; - - /** - * List of characters to be considered alphabetic - */ - wstring letters; - - /** - * Set verbose mode: warnings which may or may not be correct - */ - bool verbose; - - /** - * First element (of an entry) - */ - bool first_element; - - /** - * Identifier of all the symbols during the compilation - */ - Alphabet alphabet; - - /** - * List of named transducers-paradigms - */ - map paradigms; - - /** - * List of named dictionary sections - */ - map sections; - - /** - * List of named prefix copy of a paradigm - */ - map, Ltstr> prefix_paradigms; - - /** - * List of named suffix copy of a paradigm - */ - map, Ltstr> suffix_paradigms; - - /** - * List of named endings of a suffix copy of a paradgim - */ - map, Ltstr> postsuffix_paradigms; - - /** - * Mapping of aliases of characters specified in ACX files - */ - map > acx_map; - - /** - * Original char being mapped - */ - int acx_current_char; - - /* - static string range(char const a, char const b); - string readAlphabet(); - */ - - /** - * Method to parse an XML Node - */ - void procNode(); - - /** - * Method to parse an XML Node in ACX files - */ - void procNodeACX(); - - - /** - * Parse the <alphabet> element - */ - void procAlphabet(); - - /** - * Parse the <sdef< element - */ - void procSDef(); - - /** - * Parse the <pardef> element - */ - void procParDef(); - - /** - * Parse the <e> element - */ - void procEntry(); - - - /* - * added - */ - void procTag(); - void procChar(); - void procWb(); - - /** - * Parse the <re> element - * @return a list of tokens from the dictionary's entry - */ - EntryToken procRegexp(); - - /** - * Parse the <section> element - */ - void procSection(); - - /** - * Gets an attribute value with their name and the current context - * @param name the name of the attribute - * @return the value of the attribute - */ - wstring attrib(wstring const &name); - - /** - * Construct symbol pairs by align left side of both parts and insert - * them into a transducer - * @param lp left part of the transduction - * @param rp right part of the transduction - * @param state the state from wich insert the new transduction - * @param t the transducer - * @return the last state of the inserted transduction - */ - int matchTransduction(list const &lp, list const &rp, - int state, Transducer &t); - /** - * Parse the <p< element - * @return a list of tokens from the dictionary's entry - */ - EntryToken procTransduction(); - - /** - * Parse the <i< element - * @return a list of tokens from the dictionary's entry - */ - EntryToken procIdentity(); - - /** - * Parse the <par> element - * @return a list of tokens from the dictionary's entry - */ - EntryToken procPar(); - - /** - * Insert a list of tokens into the paradigm / section being processed - * @param elements the list - */ - void insertEntryTokens(vector const &elements); - - /** - * Skip all document #text nodes before "elem" - * @param name the name of the node - * @param elem the name of the expected node - */ - void skip(wstring &name, wstring const &elem); - - /** - * Skip all document #text nodes before "elem" - * @param name the name of the node - * @param elem the name of the expected node - * @param open true for open element, false for closed - */ - void skip(wstring &name, wstring const &elem, bool open); - - /** - * Skip all blank #text nodes before "name" - * @param name the name of the node - */ - void skipBlanks(wstring &name); - - - void readString(list &result, wstring const &name); - - /** - * Force an element to be empty, and check for it - * @param name the element - */ - void requireEmptyError(wstring const &name); - - /** - * Force an attribute to be specified, amd check for it - * @param value the value of the attribute - * @param attrname the name of the attribute - * @param elemname the parent of the attribute - */ - void requireAttribute(wstring const &value, wstring const &attrname, - wstring const &elemname); - - /** - * True if all the elements in the current node are blanks - * @return true if all are blanks - */ - bool allBlanks(); - -public: - - /* - * Constants to represent the element and the attributes of - * dictionaries - */ - static wstring const COMPILER_DICTIONARY_ELEM; - static wstring const COMPILER_ALPHABET_ELEM; - static wstring const COMPILER_SDEFS_ELEM; - static wstring const COMPILER_SDEF_ELEM; - static wstring const COMPILER_N_ATTR; - static wstring const COMPILER_PARDEFS_ELEM; - static wstring const COMPILER_PARDEF_ELEM; - static wstring const COMPILER_PAR_ELEM; - static wstring const COMPILER_ENTRY_ELEM; - static wstring const COMPILER_RESTRICTION_ATTR; - static wstring const COMPILER_RESTRICTION_LR_VAL; - static wstring const COMPILER_RESTRICTION_RL_VAL; - static wstring const COMPILER_PAIR_ELEM; - static wstring const COMPILER_LEFT_ELEM; - static wstring const COMPILER_RIGHT_ELEM; - static wstring const COMPILER_S_ELEM; - static wstring const COMPILER_REGEXP_ELEM; - static wstring const COMPILER_SECTION_ELEM; - static wstring const COMPILER_ID_ATTR; - static wstring const COMPILER_TYPE_ATTR; - static wstring const COMPILER_IDENTITY_ELEM; - static wstring const COMPILER_JOIN_ELEM; - static wstring const COMPILER_BLANK_ELEM; - static wstring const COMPILER_POSTGENERATOR_ELEM; - static wstring const COMPILER_GROUP_ELEM; - static wstring const COMPILER_LEMMA_ATTR; - static wstring const COMPILER_IGNORE_ATTR; - static wstring const COMPILER_IGNORE_YES_VAL; - static wstring const COMPILER_ALT_ATTR; - static wstring const COMPILER_V_ATTR; - static wstring const COMPILER_VL_ATTR; - static wstring const COMPILER_VR_ATTR; - - static wstring const COMPILER_ANYTAG_ELEM; - static wstring const COMPILER_ANYCHAR_ELEM; - static wstring const COMPILER_WB_ELEM; - - - /** - * Constructor - */ - Compiler(); - - /** - * Destructor - */ - ~Compiler(); - - /** - * Compile dictionary to letter transducers - * @param fichero file - * @param dir direction - */ - void parse(string const &fichero, wstring const &dir); - - /** - * Read ACX file - */ - void parseACX(string const &fichero, wstring const &dir); - - /** - * Write the result of compilation - * @param fd the stream where write the result - */ - void write(FILE *fd); - - /** - * Set verbose output - */ - void setVerbose(bool verbosity = false); - - /** - * Set the alt value to use in compilation - * @param a the value - */ - void setAltValue(string const &a); - - /** - * Set the variant value to use in compilation - * @param v the value - */ - void setVariantValue(string const &v); - - /** - * Set the variant_left value to use in compilation - * @param v the value - */ - void setVariantLeftValue(string const &v); - - /** - * Set the variant_right value to use in compilation - * @param v the value - */ - void setVariantRightValue(string const &v); -}; - - -#endif Index: branches/apertium-separable/src/compiler.cc =================================================================== --- branches/apertium-separable/src/compiler.cc (revision 80509) +++ branches/apertium-separable/src/compiler.cc (revision 80512) @@ -99,7 +99,6 @@ Compiler::parse(string const &fichero, wstring const &dir) { direction = dir; - wcout << direction << endl; //NOTE never prints reader = xmlReaderForFile(fichero.c_str(), NULL, 0); if(reader == NULL) @@ -110,13 +109,10 @@ int ret = xmlTextReaderRead(reader); - cout << ret << endl; //NOTE never prints - while(ret == 1) { procNode(); ret = xmlTextReaderRead(reader); - cout << ret << endl; //NOTE never prints } if(ret != 0) @@ -308,9 +304,6 @@ void Compiler::readString(list &result, wstring const &name) { - - wcout << name << endl; //NOTE never prints - if(name == L"#text") { wstring value = XMLParseUtil::towstring(xmlTextReaderConstValue(reader)); @@ -359,16 +352,17 @@ /* additions */ else if(name == COMPILER_ANYTAG_ELEM) { - // wstring symbol = L"<" + name + L">"; - result.push_back(alphabet(L"")); + result.push_back(alphabet(L"")); } else if(name == COMPILER_ANYCHAR_ELEM) { - result.push_back(alphabet(L"")); + result.push_back(alphabet(L"")); } else if(name == COMPILER_WB_ELEM) { requireEmptyError(name); - wstring symbol = L"<" + name + L">"; - result.push_back(alphabet(symbol)); + // wstring symbol = L"<" + name + L">"; + // result.push_back(alphabet(symbol)); + result.push_back(alphabet(L"")); + } else @@ -379,6 +373,9 @@ wcerr << L"anytag_elem: " << COMPILER_ANYTAG_ELEM << endl; exit(EXIT_FAILURE); } + // for (auto v : result) + // std::cout << v << " "; + // cout << endl; } void @@ -802,9 +799,9 @@ } } -void Compiler::procTag() {} //TODO +void Compiler::procAnytag() {} //TODO -void Compiler::procChar() {} //TODO +void Compiler::procAnychar() {} //TODO void Compiler::procWb() {} //TODO @@ -850,6 +847,7 @@ // HACER: optimizar el orden de ejecuci�n de esta ristra de "ifs" + // wcout << L"nombre: " << nombklsre << endl; if(nombre == L"#text") { /* ignorar */ @@ -890,15 +888,15 @@ { /* ignorar */ } - else if(nombre == COMPILER_ANYTAG_ELEM) { - procTag(); - } - else if(nombre == COMPILER_ANYCHAR_ELEM) { - procChar(); - } - else if(nombre == COMPILER_WB_ELEM) { - procWb(); - } + // else if(nombre == COMPILER_ANYTAG_ELEM) { + // procAnytag(); + // } + // else if(nombre == COMPILER_ANYCHAR_ELEM) { + // procAnychar(); + // } + // else if(nombre == COMPILER_WB_ELEM) { + // procWb(); + // } else { wcerr << L"Error (" << xmlTextReaderGetParserLineNumber(reader); Index: branches/apertium-separable/src/compiler.h =================================================================== --- branches/apertium-separable/src/compiler.h (revision 80509) +++ branches/apertium-separable/src/compiler.h (revision 80512) @@ -170,10 +170,18 @@ /* - * added + * Parse the Anytag element */ - void procTag(); - void procChar(); + void procAnytag(); + + /* + * Parse the Anychar element + */ + void procAnychar(); + + /* + * Parse the word boundary element + */ void procWb(); /** Index: branches/apertium-separable/src/lsx_compiler.cc =================================================================== --- branches/apertium-separable/src/lsx_compiler.cc (revision 80509) +++ branches/apertium-separable/src/lsx_compiler.cc (revision 80512) @@ -1,6 +1,3 @@ -// #ifndef _MYCOMPILER_ -// #define _MYCOMPILER_ - #include #include #include @@ -29,15 +26,9 @@ LtLocale::tryToSetLocale(); - cout << "1" << endl; - Compiler c; - c.parse(argv[1], L"lr"); + c.parse(argv[1], L"lr"); // writes to alphabet and transducer - - // xmlTextReaderPtr reader; - // reader = xmlReaderForFile("examples/new-example.dix", NULL, 0); - alphabet.includeSymbol(L""); alphabet.includeSymbol(L""); alphabet.includeSymbol(L""); Index: branches/apertium-separable/examples/new-example.dix =================================================================== --- branches/apertium-separable/examples/new-example.dix (revision 80509) +++ branches/apertium-separable/examples/new-example.dix (revision 80512) @@ -1,4 +1,8 @@ + + ÀÁÂÄÇÈÉÊËÌÍÎÏÑÒÓÔÖÙÚÛÜàáâäçèéêëìíîïñòóôöùúûüABCDEFGHIJKLMNOPQRSTUVW +XYZabcdefghijklmnopqrstuvwxyziǎšěřčžǔůýệướễăạảðÐđ + @@ -11,8 +15,10 @@ - + + + @@ -39,7 +45,7 @@ - +