commit a92163a7188eadd74dee286dea37efe89026a442 Author: Daniel Swanson Date: Fri Jun 11 11:36:05 2021 -0500 put perceptron code back to std::string diff --git a/apertium/feature_vec.h b/apertium/feature_vec.h index 18e848a..0df6b69 100644 --- a/apertium/feature_vec.h +++ b/apertium/feature_vec.h @@ -10,7 +10,7 @@ namespace Apertium { -typedef std::vector FeatureKey; +typedef std::vector FeatureKey; struct CompareFeatureKey { bool operator() (FeatureKey const& lhs, FeatureKey const& rhs) const; }; diff --git a/apertium/mtx_reader.cc b/apertium/mtx_reader.cc index 9281676..159e8cc 100644 --- a/apertium/mtx_reader.cc +++ b/apertium/mtx_reader.cc @@ -29,9 +29,6 @@ #include #include -typedef basic_istringstream uistringstream; -typedef basic_stringstream ustringstream; - // XML parsing function guideline // When control is pass to you, you might need to stepToTag // When delegating or returning control, step beyond yourself @@ -41,13 +38,13 @@ MTXReader::MTXReader(VM &spec) : spec(spec), in_global_defn(false), template_slot_counter(0), cur_feat(NULL) {} -size_t MTXReader::pushSetConst(UString &val) +size_t MTXReader::pushSetConst(std::string &val) { size_t set_idx = spec.set_consts.size(); - set s; - ustringstream val_ss(val); + set s; + std::stringstream val_ss(val); while (!val_ss.eof()) { - UString temp; + std::string temp; val_ss >> temp; s.insert(temp); } @@ -55,7 +52,7 @@ size_t MTXReader::pushSetConst(UString &val) return set_idx; } -size_t MTXReader::pushStrConst(UString &val) +size_t MTXReader::pushStrConst(std::string &val) { size_t str_idx = spec.str_consts.size(); spec.str_consts.push_back(val); @@ -109,15 +106,15 @@ void MTXReader::procCoarseTags() void MTXReader::procSetDef() { - UString name = attrib("name"_u); + std::string name = attrib_str("name"_u); stepToNextTag(); size_t set_idx = spec.set_consts.size(); spec.set_consts.push_back(VMSet()); VMSet &vm_set = spec.set_consts.back(); while (type != XML_READER_TYPE_END_ELEMENT) { - if (name == "set-member"_u) { - UString tag = attrib("tag"_u); - UString str = attrib("str"_u); + if (name == "set-member") { + std::string tag = attrib_str("tag"_u); + std::string str = attrib_str("str"_u); vm_set.insert(tag.empty() ? str : tag); } else { parseError("Expected set-member"_u); @@ -125,15 +122,15 @@ void MTXReader::procSetDef() stepToNextTag(); } set_names[name] = set_idx; - assert(name == "def-set"_u); + assert(name == "def-set"); stepToNextTag(); } void MTXReader::procStrDef() { - UString name = attrib("name"_u); - UString tag = attrib("tag"_u); - UString str = attrib("str"_u); + std::string name = attrib_str("name"_u); + std::string tag = attrib_str("tag"_u); + std::string str = attrib_str("str"_u); str_names[name] = pushStrConst(tag.empty() ? str : tag); stepPastSelfClosingTag("def-str"_u); } @@ -332,7 +329,7 @@ bool MTXReader::tryProcSlice(bool (MTXReader::*proc_inner)(bool)) bool MTXReader::tryProcArg(ExprType expr_type, bool allow_fail) { if (name == "var"_u) { - UString var_name = attrib("name"_u); + std::string var_name = attrib_str("name"_u); if (in_global_defn) { VarNVMap::const_iterator arg_name_it = template_arg_names.find(var_name); if (arg_name_it != template_arg_names.end()) { @@ -341,7 +338,7 @@ bool MTXReader::tryProcArg(ExprType expr_type, bool allow_fail) return true; } if (!allow_fail) { - parseError("No such argument "_u + var_name); + parseError("No such argument " + var_name); } } } @@ -351,12 +348,12 @@ bool MTXReader::tryProcArg(ExprType expr_type, bool allow_fail) bool MTXReader::tryProcVar(VM::StackValueType svt) { if (name == "var"_u) { - UString var_name = attrib("name"_u); + std::string var_name = attrib_str("name"_u); VarNVMap::const_iterator slot_names_it = slot_names.find(var_name); if (slot_names_it != slot_names.end()) { if (slot_types[slot_names_it->second] != svt) { - parseError("Variable "_u + var_name + " has the wrong type"_u); + parseError("Variable " + var_name + " has the wrong type"); } emitOpcode(VM::GETVAR); emitUInt(slot_names_it->second); @@ -364,17 +361,17 @@ bool MTXReader::tryProcVar(VM::StackValueType svt) return true; } - parseError("Variable "_u + var_name + " has not been set."_u); + parseError("Variable " + var_name + " has not been set."); } else if (!in_global_defn && name == "macro"_u) { // Get template data - UString var_name = attrib("name"_u); + std::string var_name = attrib_str("name"_u); VarNVMap::const_iterator template_name_it = template_slot_names.find(var_name); if (template_name_it == template_slot_names.end()) { - parseError("No such macro "_u + var_name); + parseError("No such macro " + var_name); } size_t templ_idx = template_name_it->second; if (template_slot_types[templ_idx] != svt) { - parseError("Macro "_u + var_name + " returns the wrong type"_u); + parseError("Macro " + var_name + " returns the wrong type"); } std::pair &templ_defn = template_defns[templ_idx]; // Get arg values @@ -676,19 +673,19 @@ MTXReader::getConstRef( const UString &lit_attr, const UString &what, VarNVMap &const_map, - size_t (MTXReader::*push_new)(UString&), + size_t (MTXReader::*push_new)(std::string&), bool& exists) { - UString const_name = attrib(ref_attr); + std::string const_name = attrib_str(ref_attr); if (!const_name.empty()) { exists = true; VarNVMap::iterator sit = const_map.find(const_name); if (sit == const_map.end()) { - parseError("No "_u + what + " named "_u + const_name); + parseError("No "_u + what + " named "_u + to_ustring(const_name.c_str())); } return sit->second; } - UString const_lit = attrib(lit_attr); + std::string const_lit = attrib_str(lit_attr); if (!const_lit.empty()) { exists = true; return (this->*push_new)(const_lit); @@ -732,13 +729,13 @@ MTXReader::getStrRef() } int -MTXReader::getInt(UString attr_name, bool& exists) +MTXReader::getInt(const UString& attr_name, bool& exists) { - UString int_lit = attrib(attr_name); + std::string int_lit = attrib_str(attr_name); if (!int_lit.empty()) { exists = true; int int_out; - ustringstream int_ss(int_lit); + stringstream int_ss(int_lit); int_ss >> int_out; return int_out; } @@ -753,12 +750,12 @@ MTXReader::getInt(bool& exists) } int -MTXReader::getInt(UString attr_name) +MTXReader::getInt(const UString& attr_name) { bool has_attr; int i = getInt(attr_name, has_attr); if (!has_attr) { - parseError("String required"_u); + parseError("String required"); } return i; } @@ -772,12 +769,12 @@ MTXReader::getInt() template void MTXReader::emitAttr( - UString what, GetT (MTXReader::*getter)(bool&), void (MTXReader::*emitter)(EmitT)) + std::string what, GetT (MTXReader::*getter)(bool&), void (MTXReader::*emitter)(EmitT)) { bool has_attr = false; GetT val = (this->*getter)(has_attr); if (!has_attr) { - parseError(what + " required"_u); + parseError(what + " required"); } (this->*emitter)(val); } @@ -785,19 +782,19 @@ MTXReader::emitAttr( void MTXReader::getAndEmitStrRef() { - emitAttr("String"_u, &MTXReader::getStrRef, &MTXReader::emitUInt); + emitAttr("String", &MTXReader::getStrRef, &MTXReader::emitUInt); } void MTXReader::getAndEmitSetRef() { - emitAttr("Set"_u, &MTXReader::getSetRef, &MTXReader::emitUInt); + emitAttr("Set", &MTXReader::getSetRef, &MTXReader::emitUInt); } void MTXReader::getAndEmitInt() { - emitAttr("Integer"_u, &MTXReader::getInt, &MTXReader::emitInt); + emitAttr("Integer", &MTXReader::getInt, &MTXReader::emitInt); } void @@ -805,7 +802,7 @@ MTXReader::procInst() { // XXX: There's no way to tell the difference between an empty and absent // attribute with the current lttoolbox xml code - UString op = attrib("opcode"_u); + std::string op = attrib_str("opcode"_u); std::transform(op.begin(), op.end(), op.begin(), ::toupper); emitOpcode(VM::opcode_values[op]); int val; @@ -974,8 +971,8 @@ MTXReader::procTypeExpr(ExprType expr_type) void MTXReader::procForEach(ExprType expr_type) { - UString var_name = attrib("as"_u); - if (var_name == ""_u) { + std::string var_name = attrib_str("as"_u); + if (var_name.empty()) { parseError("'as' attribute required for for-each."_u); } size_t slot_idx = slot_counter++; @@ -1057,20 +1054,20 @@ MTXReader::procDefMacro() cur_feat = &template_defns.back().first; cur_replacements = &template_defns.back().second; - UString var_name = attrib("as"_u); - if (var_name == ""_u) { + std::string var_name = attrib_str("as"_u); + if (var_name.empty()) { parseError("'as' attribute required for def-macro."_u); } template_slot_names[var_name] = template_slot_counter; template_arg_names.clear(); - UString args = attrib("args"_u); - uistringstream args_ss(args); + std::string args = attrib_str("args"_u); + std::istringstream args_ss(args); size_t arg_i = 0; for (; !args_ss.eof(); arg_i++) { - UString arg_name; + std::string arg_name; args_ss >> arg_name; - if (arg_name == ""_u) { + if (arg_name.empty()) { break; } template_arg_names[arg_name] = arg_i; @@ -1168,7 +1165,7 @@ MTXReader::parse() } if (name == "beam-width"_u) { size_t val; - uistringstream val_ss(attrib("val"_u)); + std::istringstream val_ss(attrib_str("val"_u)); val_ss >> val; spec.beam_width = val; } else { diff --git a/apertium/mtx_reader.h b/apertium/mtx_reader.h index b0e9dee..c500a26 100644 --- a/apertium/mtx_reader.h +++ b/apertium/mtx_reader.h @@ -43,7 +43,7 @@ class MTXReader : public XMLReader }; typedef PerceptronSpec VM; - typedef std::map VarNVMap; + typedef std::map VarNVMap; typedef std::vector > TemplateReplacements; typedef std::map >, size_t> InstanciationMap; typedef std::pair TemplateDefn; @@ -56,11 +56,11 @@ protected: virtual void parse(); private: - size_t pushSetConst(UString &val); - size_t pushStrConst(UString &val); - size_t getConstRef(const UString &ref_attr, const UString &lit_attr, - const UString &what, VarNVMap &const_map, - size_t (MTXReader::*push_new)(UString&), bool& exists); + size_t pushSetConst(std::string &val); + size_t pushStrConst(std::string &val); + size_t getConstRef(const UString& ref_attr, const UString& lit_attr, + const UString& what, VarNVMap &const_map, + size_t (MTXReader::*push_new)(std::string&), bool& exists); size_t getSetRef(bool& exists); size_t getSetRef(); size_t getStrRef(bool& exists); @@ -70,9 +70,9 @@ private: void pokeBytecode(size_t addr, VM::Bytecode bc); void emitInt(int val); void emitUInt(int val); - int getInt(UString attr_name, bool& exists); + int getInt(const UString& attr_name, bool& exists); int getInt(bool& exists); - int getInt(UString attr_name); + int getInt(const UString& attr_name); int getInt(); void procCoarseTags(); @@ -108,7 +108,7 @@ private: void procForEach(ExprType type); void procPred(); template void emitAttr( - UString what, GetT (MTXReader::*getter)(bool&), + std::string what, GetT (MTXReader::*getter)(bool&), void (MTXReader::*emitter)(EmitT)); void getAndEmitStrRef(); void getAndEmitSetRef(); diff --git a/apertium/perceptron_spec.cc b/apertium/perceptron_spec.cc index 7edeb66..378d22b 100644 --- a/apertium/perceptron_spec.cc +++ b/apertium/perceptron_spec.cc @@ -3,6 +3,8 @@ #include #include #include +#include +#include namespace Apertium { @@ -43,15 +45,15 @@ operator<<(std::ostream &out, PerceptronSpec const &ps) { return out; } -#define X(a) to_ustring(#a), -const UString PerceptronSpec::opcode_names[] = { +#define X(a) #a, +const std::string PerceptronSpec::opcode_names[] = { OPCODES }; #undef X -const UString PerceptronSpec::type_names[] = { - "integer"_u, "boolean"_u, "string"_u, "string array"_u, - "wordoid"_u, "wordoid array"_u +const std::string PerceptronSpec::type_names[] = { + "integer", "boolean", "string", "string array", + "wordoid", "wordoid array" }; static Morpheme make_sentinel_wordoid( @@ -93,7 +95,7 @@ PerceptronSpec::PerceptronSpec() { } untagged_sentinel = make_sentinel_wordoids("!UNTAGGED!"_u, "!UT!"_u); - token_wordoids_underflow = make_sentinel_token("!SURF_UNDERFLOW!"_u, "!TOK_UNDERFLOW!"_u, "!TUF!"_u); + token_wordoids_underflow = make_sentinel_token("!SURFNDERFLOW!"_u, "!TOKNDERFLOW!"_u, "!TUF!"_u); token_wordoids_overflow = make_sentinel_token("!SURF_OVERFLOW!"_u, "!TOK_OVERFLOW!"_u, "!TOF!"_u); static_constructed = true; @@ -102,7 +104,7 @@ PerceptronSpec::PerceptronSpec() { unsigned char PerceptronSpec::num_opcodes; bool PerceptronSpec::static_constructed = false; -std::map +std::map PerceptronSpec::opcode_values; std::vector PerceptronSpec::untagged_sentinel; LexicalUnit PerceptronSpec::token_wordoids_underflow; @@ -141,7 +143,7 @@ PerceptronSpec::get_features( feat_vec_delta.clear(); feat_vec_delta.push_back(FeatureKey()); FeatureKey &fk = feat_vec_delta.back(); - UString prg_id; + std::string prg_id; prg_id = i; fk.push_back(prg_id); // Each feature is tagged with the which created it to avoid collisions Machine machine( @@ -153,14 +155,16 @@ PerceptronSpec::get_features( } } -UString +std::string PerceptronSpec::coarsen(const Morpheme &wrd) const { - std::map::const_iterator it = coarsen_cache.find(wrd); + std::map::const_iterator it = coarsen_cache.find(wrd); if (it == coarsen_cache.end()) { UString coarse_tag = coarse_tags->coarsen(wrd); - coarsen_cache[wrd] = coarse_tag; - return coarse_tag; + std::string result; + utf8::utf16to8(coarse_tag.begin(), coarse_tag.end(), std::back_inserter(result)); + coarsen_cache[wrd] = result; + return result; } return it->second; } @@ -170,9 +174,9 @@ void PerceptronSpec::clearCache() const coarsen_cache.clear(); } -UString PerceptronSpec::dot = "."_u; +std::string PerceptronSpec::dot = "."; -const UString& +const std::string& PerceptronSpec::Machine::get_str_operand() { size_t idx = *(++bytecode_iter); if (idx == 255) { @@ -389,7 +393,7 @@ PerceptronSpec::Machine::execCommonOp(Opcode op) loop_state.accumulator = StackValue(std::vector()); //std::cerr << "Wordoid array size " << loop_state.iterable.size() << "\n"; } else if (stack.top().type == STRVAL) { - loop_state.accumulator = StackValue(std::vector()); + loop_state.accumulator = StackValue(std::vector()); //std::cerr << "String array size " << loop_state.iterable.size() << "\n"; } else { throw 1; @@ -477,26 +481,30 @@ PerceptronSpec::Machine::execCommonOp(Opcode op) } break; case EXTOKSURF: { UString surf = get_token(untagged).TheSurfaceForm; - stack.push(surf); + std::string temp; + utf8::utf16to8(surf.begin(), surf.end(), std::back_inserter(temp)); + stack.push(temp); } break; case EXWRDLEMMA: { UString lemma = stack.pop_off().wrd().TheLemma; - stack.push(lemma); + std::string temp; + utf8::utf16to8(lemma.begin(), lemma.end(), std::back_inserter(temp)); + stack.push(temp); } break; case EXWRDCOARSETAG: { assert(spec.coarse_tags); Morpheme &wrd = stack.top().wrd(); - UString coarse_tag = spec.coarsen(wrd); + std::string coarse_tag = spec.coarsen(wrd); stack.pop(); stack.push(coarse_tag); } break; case EXAMBGSET: { assert(spec.coarse_tags); - std::vector ambgset; + std::vector ambgset; const std::vector &analyses = get_token(untagged).TheAnalyses; std::vector::const_iterator analy_it; for (analy_it = analyses.begin(); analy_it != analyses.end(); analy_it++) { - ambgset.push_back(UString()); + ambgset.push_back(std::string()); const std::vector &wrds = analy_it->TheMorphemes; std::vector::const_iterator wrd_it = wrds.begin(); while (true) { @@ -519,7 +527,7 @@ PerceptronSpec::Machine::execCommonOp(Opcode op) std::cerr << &(*it) << " " << it->TheTag << ", "; } std::cerr << "\n";*/ - std::vector *tags_str = new std::vector; + std::vector *tags_str = new std::vector; tags_str->resize(tags.size()); transform(tags.begin(), tags.end(), tags_str->begin(), get_tag); stack.pop(); @@ -531,7 +539,7 @@ PerceptronSpec::Machine::execCommonOp(Opcode op) case SENTLENTAGGEDTOK: stack.push((int)tagged.size()); break; - case SENTLENWRD: unimplemented_opcode("SENTLENWRD"_u); break; // How can we know? + case SENTLENWRD: unimplemented_opcode("SENTLENWRD"); break; // How can we know? case TOKLENWRD: { int target_token_idx = stack.pop_off().intVal(); assert(0 <= target_token_idx && (size_t)target_token_idx < tagged.size()); @@ -568,20 +576,20 @@ PerceptronSpec::Machine::execCommonOp(Opcode op) } break; case FILTERIN: { const VMSet& set_op = get_set_operand(); - std::vector &str_arr = stack.top().strArr(); + std::vector &str_arr = stack.top().strArr(); str_arr.erase(std::remove_if( str_arr.begin(), str_arr.end(), std::not1(In(set_op)))); } break; /* case SETHAS: { const VMSet& set_op = get_set_operand(); - UString str = stack.pop_off().str(); + std::string str = stack.pop_off().str(); stack.push(set_op.find(str) != set_op.end()); } break; */ case SETHASANY: { const VMSet& set_op = get_set_operand(); - std::vector str_arr = stack.pop_off().strArr(); + std::vector str_arr = stack.pop_off().strArr(); stack.push( std::find_if(str_arr.begin(), str_arr.end(), In(set_op)) != str_arr.end() @@ -589,23 +597,25 @@ PerceptronSpec::Machine::execCommonOp(Opcode op) } break; case SETHASALL: { const VMSet& set_op = get_set_operand(); - std::vector str_arr = stack.pop_off().strArr(); + std::vector str_arr = stack.pop_off().strArr(); stack.push( std::find_if(str_arr.begin(), str_arr.end(), std::not1(In(set_op))) == str_arr.end() ); } break; case HASSUBSTR: { - UString haystack = stack.pop_off().str(); - UString needle = get_str_operand(); - stack.push(haystack.find(needle) != UString::npos); + std::string haystack = stack.pop_off().str(); + std::string needle = get_str_operand(); + stack.push(haystack.find(needle) != std::string::npos); } break; - case HASANYSUBSTR: unimplemented_opcode("HASANYSUBSTR"_u); break; - case CPYSTR: unimplemented_opcode("CPYSTR"_u); break; + case HASANYSUBSTR: unimplemented_opcode("HASANYSUBSTR"); break; + case CPYSTR: unimplemented_opcode("CPYSTR"); break; case LOWER: { - // XXX: Eek! Bad! No Unicode. ICU please. - UString &str = stack.top().str(); - std::transform(str.begin(), str.end(), str.begin(), ::tolower); + UString str = to_ustring(stack.pop_off().str().c_str()); + UString low = StringUtils::tolower(str); + std::string tmp; + utf8::utf16to8(low.begin(), low.end(), std::back_inserter(tmp)); + stack.push(tmp); } break; case SLICE: { int begin = get_int_operand(); @@ -627,7 +637,7 @@ PerceptronSpec::Machine::execCommonOp(Opcode op) } } break; case STRLEN: { - UString str = stack.pop_off().str(); + std::string str = stack.pop_off().str(); stack.push((int)str.length()); } break; case ARRLEN: { @@ -635,9 +645,9 @@ PerceptronSpec::Machine::execCommonOp(Opcode op) stack.push(str_arr_len); } break; case JOIN: { - const UString &sep = get_str_operand(); - std::vector str_arr = stack.pop_off().strArr(); - UString ss; + const std::string &sep = get_str_operand(); + std::vector str_arr = stack.pop_off().strArr(); + std::string ss; for (auto& it : str_arr) { if (!ss.empty()) { ss.append(sep); @@ -669,14 +679,14 @@ PerceptronSpec::Machine::getFeature( } break; case FCATSTRARR: { - std::vector &str_arr = stack.top().strArr(); + std::vector &str_arr = stack.top().strArr(); if (str_arr.size() == 0) { feat_vec_out.clear(); return; } else { UnaryFeatureVec new_feat_vec; new_feat_vec.reserve(feat_vec_out.size() * str_arr.size()); - std::vector::const_iterator str_arr_it; + std::vector::const_iterator str_arr_it; for (str_arr_it = str_arr.begin(); str_arr_it != str_arr.end(); str_arr_it++) { UnaryFeatureVec::iterator append_begin_it = new_feat_vec.end(); std::copy(feat_vec_out.begin(), feat_vec_out.end(), @@ -689,20 +699,20 @@ PerceptronSpec::Machine::getFeature( stack.pop(); } break; case FCATSTR: { - UString &str = stack.top().str(); + std::string &str = stack.top().str(); appendStr(feat_vec_out, str); stack.pop(); } break; case FCATBOOL: { bool b = stack.top().boolVal(); - appendStr(feat_vec_out, b ? "t"_u : "f"_u); + appendStr(feat_vec_out, b ? "t" : "f"); stack.pop(); } break; case FCATINT: { int i = stack.top().intVal(); stringstream ss; ss << i; - appendStr(feat_vec_out, to_ustring(ss.str().c_str())); + appendStr(feat_vec_out, ss.str()); stack.pop(); } break; default: @@ -731,11 +741,10 @@ PerceptronSpec::Machine::getValue() } void -PerceptronSpec::Machine::unimplemented_opcode(UString opstr) { +PerceptronSpec::Machine::unimplemented_opcode(std::string opstr) { int bytecode_idx = bytecode_iter - feat.begin(); std::stringstream msg; - msg << "Unimplemented opcode: "; - ::operator<<(msg, opstr); // namespace issue + msg << "Unimplemented opcode: " << opstr; msg << " at " << (is_feature ? "feature" : "global") << " #" << feat_idx << " address #" << bytecode_idx; throw Apertium::Exception::apertium_tagger::UnimplementedOpcode(msg); } @@ -743,40 +752,42 @@ PerceptronSpec::Machine::unimplemented_opcode(UString opstr) { PerceptronSpec::In::In(const VMSet &haystack) : haystack(haystack) {}; bool -PerceptronSpec::In::operator() (const UString &needle) const { +PerceptronSpec::In::operator() (const std::string &needle) const { return haystack.find(needle) != haystack.end(); }; void PerceptronSpec::appendStr(UnaryFeatureVec &feat_vec, - const UString &tail_str) { + const std::string &tail_str) { appendStr(feat_vec.begin(), feat_vec.end(), tail_str); } void PerceptronSpec::appendStr(UnaryFeatureVec::iterator begin, UnaryFeatureVec::iterator end, - const UString &tail_str) { + const std::string &tail_str) { for (;begin != end; begin++) { begin->push_back(tail_str); } } -UString +std::string PerceptronSpec::Machine::get_tag(const Tag &in) { - return in.TheTag; + std::string result; + utf8::utf16to8(in.TheTag.begin(), in.TheTag.end(), std::back_inserter(result)); + return result; } void PerceptronSpec::serialiseFeatDefn( std::ostream &serialised, const FeatureDefn &defn) const { - Serialiser::serialise( - UString((UChar*)&(defn.front()), defn.size()), + Serialiser::serialise( + std::string((char*)&(defn.front()), defn.size()), serialised); } void PerceptronSpec::deserialiseFeatDefn( std::istream &serialised, FeatureDefn &feat) { - UString feat_str = Deserialiser::deserialise(serialised); + std::string feat_str = Deserialiser::deserialise(serialised); feat.reserve(feat_str.size()); - UString::iterator feat_str_it; + std::string::iterator feat_str_it; for (feat_str_it = feat_str.begin(); feat_str_it != feat_str.end(); feat_str_it++) { feat.push_back(*feat_str_it); } @@ -803,7 +814,7 @@ void PerceptronSpec::deserialiseFeatDefnVec( void PerceptronSpec::serialise(std::ostream &serialised) const { Serialiser::serialise(beam_width, serialised); - Serialiser >::serialise(str_consts, serialised); + Serialiser >::serialise(str_consts, serialised); Serialiser >::serialise(set_consts, serialised); serialiseFeatDefnVec(serialised, features); serialiseFeatDefnVec(serialised, global_defns); @@ -818,7 +829,7 @@ void PerceptronSpec::serialise(std::ostream &serialised) const { void PerceptronSpec::deserialise(std::istream &serialised) { beam_width = Deserialiser::deserialise(serialised); - str_consts = Deserialiser >::deserialise(serialised); + str_consts = Deserialiser >::deserialise(serialised); set_consts = Deserialiser >::deserialise(serialised); deserialiseFeatDefnVec(serialised, features); deserialiseFeatDefnVec(serialised, global_defns); diff --git a/apertium/perceptron_spec.h b/apertium/perceptron_spec.h index a52d8b3..5d67748 100644 --- a/apertium/perceptron_spec.h +++ b/apertium/perceptron_spec.h @@ -27,7 +27,7 @@ using namespace Apertium::SentenceStream; namespace Apertium { -typedef std::set VMSet; +typedef std::set VMSet; class PerceptronSpec { public: @@ -168,9 +168,9 @@ public: #undef X static bool static_constructed; static unsigned char num_opcodes; - static const UString opcode_names[]; - static const UString type_names[]; - static std::map opcode_values; + static const std::string opcode_names[]; + static const std::string type_names[]; + static std::map opcode_values; static std::vector untagged_sentinel; static LexicalUnit token_wordoids_underflow; static LexicalUnit token_wordoids_overflow; @@ -192,10 +192,9 @@ public: break; case STRARRVAL: { out << "["; - std::vector &str_arr = val.strArr(); - std::vector::const_iterator it = str_arr.begin(); - for (; it != str_arr.end(); it++) { - out << it->c_str(); + std::vector &str_arr = val.strArr(); + for (auto& it : str_arr) { + out << it; } out << "]"; } break; @@ -205,9 +204,8 @@ public: case WRDARRVAL: { out << "["; std::vector &wrd_arr = val.wrdArr(); - std::vector::const_iterator it = wrd_arr.begin(); - for (; it != wrd_arr.end(); it++) { - out << *it; + for (auto& it : wrd_arr) { + out << it; } out << "]"; } break; @@ -230,11 +228,11 @@ public: type = other.type; switch (type) { case STRVAL: - payload.strval = new UString(*other.payload.strval); + payload.strval = new std::string(*other.payload.strval); break; case STRARRVAL: payload.strarrval = - new std::vector(*other.payload.strarrval); + new std::vector(*other.payload.strarrval); break; case WRDVAL: payload.wrdval = new Morpheme(*other.payload.wrdval); @@ -260,12 +258,12 @@ public: payload.bval = bval; type = BVAL; } - StackValue(const UString &strval) { - payload.strval = new UString(strval); + StackValue(const std::string& strval) { + payload.strval = new std::string(strval); type = STRVAL; } - StackValue(const std::vector &strarrval) { - payload.strarrval = new std::vector(strarrval); + StackValue(const std::vector &strarrval) { + payload.strarrval = new std::vector(strarrval); type = STRARRVAL; } StackValue(const Morpheme &wordoid) { @@ -290,11 +288,11 @@ public: payload.wrdarrval = new std::vector(wordoids); type = WRDARRVAL; } - StackValue(UString *strval) { + StackValue(std::string* strval) { payload.strval = strval; type = STRVAL; } - StackValue(std::vector *strarrval) { + StackValue(std::vector* strarrval) { payload.strarrval = strarrval; type = STRARRVAL; } @@ -331,11 +329,11 @@ public: assert(type == BVAL); return payload.bval; } - UString& str() const { + std::string& str() const { assert(type == STRVAL); return *payload.strval; } - std::vector& strArr() const { + std::vector& strArr() const { assert(type == STRARRVAL); return *payload.strarrval; } @@ -366,8 +364,8 @@ public: union StackValueUnion { int intval; bool bval; - UString* strval; - std::vector* strarrval; + std::string* strval; + std::vector* strarrval; Morpheme* wrdval; std::vector* wrdarrval; } payload; @@ -379,8 +377,8 @@ public: signed char intbyte : 8; }; Optional coarse_tags; - static UString dot; - std::vector str_consts; + static std::string dot; + std::vector str_consts; std::vector set_consts; mutable std::vector global_results; std::vector global_defns; @@ -390,10 +388,10 @@ public: const TaggedSentence &tagged, const Sentence &untagged, int token_idx, int wordoid_idx, UnaryFeatureVec &feat_vec_out) const; - UString coarsen(const Morpheme &wrd) const; + std::string coarsen(const Morpheme &wrd) const; void clearCache() const; int beam_width; - mutable std::map coarsen_cache; + mutable std::map coarsen_cache; private: class MachineStack { std::deque data; @@ -454,15 +452,15 @@ private: }; std::deque loop_stack; std::vector slots; - void unimplemented_opcode(UString opstr); + void unimplemented_opcode(std::string opstr); const LexicalUnit& get_token(const Sentence &untagged); const std::vector& tagged_to_wordoids(const TaggedToken &tt); const Morpheme& get_wordoid(const TaggedSentence &tagged); const VMSet& get_set_operand(); int get_int_operand(); unsigned int get_uint_operand(); - const UString& get_str_operand(); - static UString get_tag(const Tag &in); + const std::string& get_str_operand(); + static std::string get_tag(const Tag &in); bool execCommonOp(Opcode op); public: void traceMachineState(); @@ -478,16 +476,16 @@ private: int token_idx, int wordoid_idx); }; - struct In : public std::unary_function { + struct In : public std::unary_function { const VMSet& haystack; In(const VMSet &haystack); - bool operator() (const UString &needle) const; + bool operator() (const std::string &needle) const; }; static void appendStr(UnaryFeatureVec &feat_vec, - const UString &tail_str); + const std::string &tail_str); static void appendStr(UnaryFeatureVec::iterator begin, UnaryFeatureVec::iterator end, - const UString &tail_str); + const std::string &tail_str); void serialiseFeatDefn( std::ostream &serialised, const FeatureDefn &defn) const; void deserialiseFeatDefn( diff --git a/apertium/xml_reader.cc b/apertium/xml_reader.cc index f0a542b..875a484 100644 --- a/apertium/xml_reader.cc +++ b/apertium/xml_reader.cc @@ -69,12 +69,27 @@ XMLReader::attrib(UString const &name) return XMLParseUtil::attrib(reader, name); } +std::string +XMLReader::attrib_str(const UString& name) +{ + return XMLParseUtil::attrib_str(reader, name); +} + void XMLReader::parseError(UString const &message) { cerr << "Error at line " << xmlTextReaderGetParserLineNumber(reader) - << ", column " << xmlTextReaderGetParserColumnNumber(reader) - << ": " << message << "." << endl; + << ", column " << xmlTextReaderGetParserColumnNumber(reader) + << ": " << message << "." << endl; + exit(EXIT_FAILURE); +} + +void +XMLReader::parseError(const std::string& message) +{ + cerr << "Error at line " << xmlTextReaderGetParserLineNumber(reader) + << ", column " << xmlTextReaderGetParserColumnNumber(reader) + << ": " << message << "." << endl; exit(EXIT_FAILURE); } diff --git a/apertium/xml_reader.h b/apertium/xml_reader.h index 7ff74d0..3ad28c9 100644 --- a/apertium/xml_reader.h +++ b/apertium/xml_reader.h @@ -35,7 +35,9 @@ protected: int type; UString name; UString attrib(UString const &name); + string attrib_str(const UString& name); void parseError(UString const &message); + void parseError(const string& message); void unexpectedTag(); void stepToTag(); void stepPastSelfClosingTag(UString const &tag);