commit c8fde4e6d07ef030e2dac797051f5829f447e8d2 Author: Ahmed Siam Date: Mon Jul 3 16:32:41 2023 +0300 i18n of lttoolbox patch 1 diff --git a/locales/root.txt b/locales/root.txt index 67d1a00..eb2bf09 100644 --- a/locales/root.txt +++ b/locales/root.txt @@ -26,5 +26,36 @@ root{ weight_classes_desc{"Output no more than N best weight classes (where analyses with equal weight constitute a class)"} compound_max_elements_desc{"Set compound max elements"} help_desc{"show this help"} - LTTB1000{"LTTB1000: Invalid or no argument for {option}"} + usage{"USAGE: "} + version{" version "} + + LTTB1000{"ERROR LTTB1000: Invalid or no argument for {option}"} + LTTB1001{"ERROR LTTB1001: In {node_doc_url} on line {line_number}: Missing value attribute."} + LTTB1002{"ERROR LTTB1002: In {node_doc_url} on line {line_number}: Expected a single character in value attribute, but found {value_size}."} + LTTB1003{"ERROR LTTB1003: In {node_doc_url} on line {line_number}: Expected but found <{node_name}>."} + LTTB1004{"ERROR LTTB1004: In {node_doc_url} on line {line_number}: Expected but found <{node_name}>."} + LTTB1005{"ERROR LTTB1005: Unable to access \"{file_name}\"."} + LTTB1006{"ERROR LTTB1006: Invalid format in file \"{file_name}\" on line {line_number}."} + LTTB1007{"WARNING LTTB1007: Multiple fsts in \"{file_name}\" will be disjuncted."} + LTTB1008{"ERROR LTTB1008: Transducer contains epsilon transition to a final state. Aborting."} + LTTB1009{"ERROR LTTB1009: Transducer contains initial epsilon loop. Aborting."} + LTTB1010{"ERROR LTTB1010: Cannot create empty buffer."} + LTTB1011{"ERROR LTTB1011: Parse error at the end of input."} + LTTB1012{"ERROR LTTB1012: Invalid dictionary (hint: the {side} side of an entry is empty)."} + LTTB1013{"ERROR LTTB1013: Invalid dictionary (hint: entry on the {side} beginning with whitespace)."} + LTTB1014{"ERROR LTTB1014: On line {line_number}: Missing alphabet symbols."} + LTTB1015{"WARNING LTTB1015: Cannot insert from empty input. Ignoring. (You probably want to specify exact tags when deleting a word.)"} + LTTB1016{"ERROR LTTB1016: On line {line_number}: Non-empty element \"<{name}>\" should be empty."} + LTTB1017{"ERROR LTTB1017: On line {line_number}: Undefined symbol \"{symbol}\"."} + LTTB1018{"ERROR LTTB1018: On line {line_number}: Invalid specification of element \"<{name}>\" in this context."} + LTTB1019{"ERROR LTTB1019: On line {line_number}: Invalid construction."} + LTTB1020{"ERROR LTTB1020: On line {line_number}: Expected \"<{slash_element}>\"."} + LTTB1021{"ERROR LTTB1021: On line {line_number}: Entry begins with space."} + LTTB1022{"ERROR LTTB1022: On line {line_number}: Paradigm refers to itself \"{paradigm_name}\"."} + LTTB1023{"ERROR LTTB1023: On line {line_number}: Undefined paradigm \"{paradigm_name}\"."} + LTTB1024{"ERROR LTTB1024: On line {line_number}: Invalid entry token."} + LTTB1025{"ERROR LTTB1025: On line {line_number}: \"<{element_name}>\" element must specify non-void \"{attr_name}\" attribute."} + LTTB1026{"ERROR LTTB1026: On line {line_number}: Parse error."} + LTTB1027{"ERROR LTTB1027: On line {line_number}: Invalid inclusion of \"<{element_name}>\" into \"<{compiler_entry_element}>\"."} + LTTB1028{"ERROR LTTB1028: On line {line_number}: Invalid node \"<{element_name}>\"."} } diff --git a/lttoolbox/acx.cc b/lttoolbox/acx.cc index 35fef11..4a46834 100644 --- a/lttoolbox/acx.cc +++ b/lttoolbox/acx.cc @@ -16,6 +16,9 @@ */ #include #include +#include +#include +#include const xmlChar* CHAR_NODE = (const xmlChar*)"char"; const xmlChar* EQUIV_NODE = (const xmlChar*)"equiv-char"; @@ -23,33 +26,49 @@ const char* VALUE_ATTR = "value"; int32_t get_val(xmlNode* node) { + I18n i18n {LOCALES_DATA}; UString s = getattr(node, VALUE_ATTR); if (s.empty()) { - error_and_die(node, "Missing value attribute."); + //error_and_die(node, i18n.format("LTTB1001")); + std::cerr << i18n.format("LTTB1001", {"node_doc_url", "line_number"}, + {(char*)node->doc->URL, node->line}) << std::endl; + exit(EXIT_FAILURE); } std::vector v; ustring_to_vec32(s, v); if (v.size() > 1) { - error_and_die(node, "Expected a single character in value attribute, but found %d.", v.size()); + //error_and_die(node, i18n.format("LTTB1002"), v.size()); + std::cerr << i18n.format("LTTB1002", {"node_doc_url", "line_number", "value_size"}, + {(char*)node->doc->URL, node->line, std::to_string(v.size()).c_str()}) << std::endl; + exit(EXIT_FAILURE); } return v[0]; } std::map> readACX(const char* file) { + I18n i18n {LOCALES_DATA}; std::map> acx; xmlNode* top_node = load_xml(file); for (auto char_node : children(top_node)) { if (!xmlStrEqual(char_node->name, CHAR_NODE)) { - error_and_die(char_node, "Expected but found <%s>.", - (const char*)char_node->name); + //error_and_die(char_node, i18n.format("LTTB1003"), + // (const char*)char_node->name); + std::cerr << i18n.format("LTTB1003", {"node_doc_url", "line_number", "node_name"}, + {(char*)char_node->doc->URL, char_node->line, (const char*)char_node->name}) + << std::endl; + exit(EXIT_FAILURE); } int32_t key = get_val(char_node); sorted_vector vec; for (auto equiv_node : children(char_node)) { if (!xmlStrEqual(equiv_node->name, EQUIV_NODE)) { - error_and_die(char_node, "Expected but found <%s>.", - (const char*)equiv_node->name); + //error_and_die(char_node, i18n.format("LTTB1004"), + // (const char*)equiv_node->name); + std::cerr << i18n.format("LTTB1004", {"node_doc_url", "line_number", "node_name"}, + {(char*)char_node->doc->URL, char_node->line, (const char*)equiv_node->name}) + << std::endl; + exit(EXIT_FAILURE); } vec.insert(get_val(equiv_node)); } diff --git a/lttoolbox/att_compiler.cc b/lttoolbox/att_compiler.cc index db6e283..02643ec 100644 --- a/lttoolbox/att_compiler.cc +++ b/lttoolbox/att_compiler.cc @@ -27,10 +27,12 @@ #include #include #include +#include +#include using namespace icu; -AttCompiler::AttCompiler() +AttCompiler::AttCompiler(): i18n(LOCALES_DATA) {} AttCompiler::~AttCompiler() @@ -148,7 +150,7 @@ AttCompiler::parse(std::string const &file_name, bool read_rl) UFILE* infile = u_fopen(file_name.c_str(), "r", NULL, NULL); if (infile == NULL) { - std::cerr << "Error: unable to open '" << file_name << "' for reading." << std::endl; + std::cerr << i18n.format("LTTB1005", {"file_name"}, {file_name.c_str()}) << std::endl; } std::vector tokens; bool first_line_in_fst = true; // First line -- see below @@ -185,7 +187,7 @@ AttCompiler::parse(std::string const &file_name, bool read_rl) if (first_line_in_fst && tokens.size() == 1) { - std::cerr << "Error: invalid format in file '" << file_name << "' on line " << line_number << "." << std::endl; + std::cerr << i18n.format("LTTB1006", {"file_name", "line_number"}, {file_name.c_str(), line_number}) << std::endl; exit(EXIT_FAILURE); } @@ -193,7 +195,7 @@ AttCompiler::parse(std::string const &file_name, bool read_rl) { if (state_id_offset == 1) { // this is the first split we've seen - std::cerr << "Warning: Multiple fsts in '" << file_name << "' will be disjuncted." << std::endl; + std::cerr << i18n.format("LTTB1007", {"file_name"}, {file_name.c_str()}) << std::endl; multiple_transducers = true; } // Update the offset for the new FST @@ -421,7 +423,7 @@ TransducerType AttCompiler::classify_backwards(int state, std::set& path) { if(finals.find(state) != finals.end()) { - std::cerr << "ERROR: Transducer contains epsilon transition to a final state. Aborting." << std::endl; + std::cerr << i18n.format("LTTB1008") << std::endl; exit(EXIT_FAILURE); } AttNode* node = get_node(state); @@ -430,7 +432,7 @@ AttCompiler::classify_backwards(int state, std::set& path) if(t1.type != UNDECIDED) { type |= t1.type; } else if(path.find(t1.to) != path.end()) { - std::cerr << "ERROR: Transducer contains initial epsilon loop. Aborting." << std::endl; + std::cerr << i18n.format("LTTB1009") << std::endl; exit(EXIT_FAILURE); } else { path.insert(t1.to); diff --git a/lttoolbox/att_compiler.h b/lttoolbox/att_compiler.h index 4d4af15..1a7e22d 100644 --- a/lttoolbox/att_compiler.h +++ b/lttoolbox/att_compiler.h @@ -29,6 +29,7 @@ #include #include +#include #define UNDECIDED 0 #define WORD 1 @@ -137,6 +138,8 @@ private: /** Stores the transducer graph. */ std::map graph; + I18n i18n; + /** Clears the data associated with the current transducer. */ void clear(); diff --git a/lttoolbox/buffer.h b/lttoolbox/buffer.h index 6ea1f64..5a6c46b 100644 --- a/lttoolbox/buffer.h +++ b/lttoolbox/buffer.h @@ -20,7 +20,8 @@ #include #include #include - +#include +#include /** * Generic circular buffer class */ @@ -71,9 +72,10 @@ public: */ Buffer(unsigned int const buf_size=2048) { + I18n i18n {LOCALES_DATA}; if(buf_size == 0) { - std::cerr << "Error: Cannot create empty buffer." << std::endl; + std::cerr << i18n.format("LTTB1010") << std::endl; exit(EXIT_FAILURE); } buf = new T[buf_size]; diff --git a/lttoolbox/cli.cc b/lttoolbox/cli.cc index 28f347a..f5ee038 100644 --- a/lttoolbox/cli.cc +++ b/lttoolbox/cli.cc @@ -24,6 +24,7 @@ #include #include #include +#include CLI::CLI(icu::UnicodeString desc, std::string ver) { @@ -67,13 +68,14 @@ void CLI::set_epilog(std::string e) void CLI::print_usage(std::ostream& out) { + I18n i18n {LOCALES_DATA}; if (!prog_name.empty()) { out << prog_name; if (!version.empty()) { out << " v" << version; } out << ": " << description << std::endl; - out << "USAGE: " << prog_name; + out << i18n.format("usage") << prog_name; std::string bargs; std::string sargs; for (auto& it : options) { @@ -123,6 +125,7 @@ void CLI::print_usage(std::ostream& out) void CLI::parse_args(int argc, char* argv[]) { + I18n i18n {LOCALES_DATA}; prog_name = basename(argv[0]); std::string arg_str; #if HAVE_GETOPT_LONG @@ -153,7 +156,7 @@ void CLI::parse_args(int argc, char* argv[]) if (it.short_opt == cnt) { found = true; if (it.short_opt == 'v' && it.long_opt == "version") { - std::cout << prog_name << " version " << version << std::endl; + std::cout << prog_name << i18n.format("version") << version << std::endl; exit(EXIT_SUCCESS); } if (it.is_bool) { diff --git a/lttoolbox/compiler.cc b/lttoolbox/compiler.cc index 20f75c9..f2f1515 100644 --- a/lttoolbox/compiler.cc +++ b/lttoolbox/compiler.cc @@ -23,8 +23,10 @@ #include #include +#include +#include -Compiler::Compiler() +Compiler::Compiler(): i18n(LOCALES_DATA) { } @@ -61,7 +63,7 @@ Compiler::parse(std::string const &file, UStringView dir) if(ret != 0) { - std::cerr << "Error: Parse error at the end of input." << std::endl; + std::cerr << i18n.format("LTTB1011") << std::endl; } xmlFreeTextReader(reader); @@ -109,6 +111,7 @@ Compiler::parse(std::string const &file, UStringView dir) bool Compiler::valid(UStringView dir) const { + I18n i18n {LOCALES_DATA}; const char* side = (dir == COMPILER_RESTRICTION_RL_VAL ? "right" : "left"); const std::set epsilonSymbols = alphabet.symbolsWhereLeftIs(0); const std::set spaceSymbols = alphabet.symbolsWhereLeftIs(' '); @@ -118,11 +121,11 @@ Compiler::valid(UStringView dir) const auto initial = fst.getInitial(); for(const auto i : fst.closure(initial, epsilonSymbols)) { if (finals.count(i)) { - std::cerr << "Error: Invalid dictionary (hint: the " << side << " side of an entry is empty)" << std::endl; + std::cerr << i18n.format("LTTB1012", {"side"}, {side}) << std::endl; return false; } if(fst.closure(i, spaceSymbols).size() > 1) { // >1 since closure always includes self - std::cerr << "Error: Invalid dictionary (hint: entry on the " << side << " beginning with whitespace)" << std::endl; + std::cerr << i18n.format("LTTB1013", {"side"}, {side}) << std::endl; return false; } } @@ -157,8 +160,7 @@ Compiler::procAlphabet() } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Missing alphabet symbols." << std::endl; + std::cerr << i18n.format("LTTB1014", {"line_number"}, {xmlTextReaderGetParserLineNumber(reader)}) << std::endl; exit(EXIT_FAILURE); } } @@ -273,7 +275,7 @@ Compiler::matchTransduction(std::vector const &pi, // rl compilation of a badly written rule // having an epsilon with wildcard output will produce // garbage output -- see https://github.com/apertium/apertium-separable/issues/8 - std::cerr << "Warning: Cannot insert from empty input. Ignoring. (You probably want to specify exact tags when deleting a word.)" << std::endl; + std::cerr << i18n.format("LTTB1015") << std::endl; } else if (tag == alphabet(any_tag, any_tag) || tag == alphabet(any_char, any_char) || tag == alphabet(any_tag, 0) || @@ -302,8 +304,8 @@ Compiler::requireEmptyError(UStringView name) { if(!xmlTextReaderIsEmptyElement(reader)) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Non-empty element '<" << name << ">' should be empty." << std::endl; + std::cerr << i18n.format("LTTB1016", {"line_number", "name"}, {xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(name.data())}) + << std::endl; exit(EXIT_FAILURE); } } @@ -359,8 +361,9 @@ Compiler::readString(std::vector &result, UStringView name) if(!alphabet.isSymbolDefined(symbol)) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Undefined symbol '" << symbol << "'." << std::endl; + std::cerr << i18n.format("LTTB1017", {"line_number", "symbol"}, + {xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(symbol.data())}) + << std::endl; exit(EXIT_FAILURE); } @@ -387,9 +390,8 @@ Compiler::readString(std::vector &result, UStringView name) } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid specification of element '<" << name; - std::cerr << ">' in this context." << std::endl; + std::cerr << i18n.format("LTTB1018", {"line_number", "name"}, {xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(name.data())}) + << std::endl; exit(EXIT_FAILURE); } } @@ -403,8 +405,8 @@ Compiler::skipBlanks(UString &name) { if(!allBlanks()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid construction." << std::endl; + std::cerr << i18n.format("LTTB1019", {"line_number"}, {xmlTextReaderGetParserLineNumber(reader)}) + << std::endl; exit(EXIT_FAILURE); } } @@ -432,8 +434,8 @@ Compiler::skip(UString &name, UStringView elem, bool open) { if(!allBlanks()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid construction." << std::endl; + std::cerr << i18n.format("LTTB1019", {"line_number"}, {xmlTextReaderGetParserLineNumber(reader)}) + << std::endl; exit(EXIT_FAILURE); } } @@ -443,8 +445,10 @@ Compiler::skip(UString &name, UStringView elem, bool open) if(name != elem) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Expected '<" << slash << elem << ">'." << std::endl; + std::cerr << i18n.format("LTTB1020", {"line_number", "slash_element"}, + {xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(slash.data()) + + icu::UnicodeString(elem.data())}) + << std::endl; exit(EXIT_FAILURE); } } @@ -472,8 +476,8 @@ Compiler::procIdentity(double const entry_weight, bool ig) if(verbose && first_element && (both_sides.front() == (int)' ')) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Entry begins with space." << std::endl; + std::cerr << i18n.format("LTTB1021", {"line_number"}, {xmlTextReaderGetParserLineNumber(reader)}) + << std::endl; } first_element = false; EntryToken e; @@ -516,8 +520,7 @@ Compiler::procTransduction(double const entry_weight) if(verbose && first_element && (lhs.front() == (int)' ')) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Entry begins with space." << std::endl; + std::cerr << i18n.format("LTTB1021", {"line_number"}, {xmlTextReaderGetParserLineNumber(reader)}) << std::endl; } first_element = false; @@ -560,15 +563,19 @@ Compiler::procPar() if(!current_paradigm.empty() && paradigm_name == current_paradigm) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Paradigm refers to itself '" << paradigm_name << "'." << std::endl; + std::cerr << i18n.format("LTTB1022", {"line_number", "paradigm_name"}, + {xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(paradigm_name.data())}) + << std::endl; exit(EXIT_FAILURE); } if(paradigms.find(paradigm_name) == paradigms.end()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Undefined paradigm '" << paradigm_name << "'." << std::endl; + std::cerr << i18n.format("LTTB1023", {"line_number", "paradigm_name"}, + {xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(paradigm_name.data())}) + << std::endl; exit(EXIT_FAILURE); } e.setParadigm(paradigm_name); @@ -604,8 +611,7 @@ Compiler::insertEntryTokens(std::vector const &elements) } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid entry token." << std::endl; + std::cerr << i18n.format("LTTB1024", {"line_number"}, {xmlTextReaderGetParserLineNumber(reader)}) << std::endl; exit(EXIT_FAILURE); } } @@ -679,10 +685,11 @@ Compiler::requireAttribute(UStringView value, UStringView attrname, UStringView { if(value.empty()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): '<" << elemname; - std::cerr << "' element must specify non-void '"; - std::cerr << attrname << "' attribute." << std::endl; + std::cerr << i18n.format("LTTB1025", {"line_number", "element_name", "attr_name"}, + {xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(elemname.data()), + icu::UnicodeString(attrname.data())}) + << std::endl; exit(EXIT_FAILURE); } } @@ -869,8 +876,7 @@ Compiler::procEntry() int ret = xmlTextReaderRead(reader); if(ret != 1) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Parse error." << std::endl; + std::cerr << i18n.format("LTTB1026", {"line_number"}, {xmlTextReaderGetParserLineNumber(reader)}) << std::endl; exit(EXIT_FAILURE); } UString name = XMLParseUtil::readName(reader); @@ -909,8 +915,10 @@ Compiler::procEntry() auto it = paradigms.find(p); if(it == paradigms.end()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Undefined paradigm '" << p << "'." << std::endl; + std::cerr << i18n.format("LTTB1023", {"line_number", "paradigm_name"}, + {xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(p.data())}) + << std::endl; exit(EXIT_FAILURE); } // discard entries with empty paradigms (by the directions, normally) @@ -936,9 +944,11 @@ Compiler::procEntry() } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid inclusion of '<" << name << ">' into '<" << COMPILER_ENTRY_ELEM; - std::cerr << ">'." << std::endl; + std::cerr << i18n.format("LTTB1027", {"line_number", "element_name", "compiler_entry_element"}, + {xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(name.data()), + icu::UnicodeString(COMPILER_ENTRY_ELEM.data())}) + << std::endl; exit(EXIT_FAILURE); } } @@ -1013,8 +1023,10 @@ Compiler::procNode() } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid node '<" << name << ">'." << std::endl; + std::cerr << i18n.format("LTTB1028", {"line_number", "element_name"}, + {xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(name.data())}) + << std::endl; exit(EXIT_FAILURE); } } diff --git a/lttoolbox/compiler.h b/lttoolbox/compiler.h index 9b8d42f..8350a39 100644 --- a/lttoolbox/compiler.h +++ b/lttoolbox/compiler.h @@ -27,6 +27,8 @@ #include #include +#include + /** * A compiler of dictionaries to letter transducers */ @@ -177,6 +179,8 @@ private: int32_t word_boundary_s = 0; int32_t word_boundary_ns = 0; + I18n i18n; + /** * Method to parse an XML Node */