commit 596fea2c91fea3994f6dde281e319d61c6743adc Author: Ahmed Siam Date: Thu Aug 24 19:16:04 2023 +0300 i18n patch 1 diff --git a/.gitignore b/.gitignore index 741939a..23a56fd 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,10 @@ install-sh missing stamp-h1 lexd.pc + +/.vscode/ +/locales/Makefile +/locales/Makefile.in +*.res +*.dat +/configure~ diff --git a/Makefile.am b/Makefile.am index f1e9f61..4db3c4f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,5 +1,5 @@ -SUBDIRS = src +SUBDIRS = src locales EXTRA_DIST=autogen.sh check_targets = check-plain check-flags check-minimize-tags check-tags check-minimize check-single @@ -16,3 +16,7 @@ check-clean: $(check_targets): check-%: all tests/feature + make -C tests/feature O=$* LEXD_TEST_FLAGS="$$(echo '$*' | grep -v plain | sed 's/^\|-/ --/g')" check + make -C tests/feature O=$* clean + +export LTTB_I18N_DATA=$(datadir)/lttoolbox/lttoolbox.dat +export LOCALES_DIR=$(datadir)/$(PACKAGE_NAME) +export LEXD_I18N_DATA=$(LOCALES_DIR)/lexd.dat diff --git a/configure.ac b/configure.ac index 112c975..8eb01e4 100644 --- a/configure.ac +++ b/configure.ac @@ -18,6 +18,7 @@ AC_ARG_ENABLE(debug, PKG_CHECK_MODULES([LTTOOLBOX], [lttoolbox >= 3.7.1]) PKG_CHECK_MODULES([ICU_UC], [icu-uc]) PKG_CHECK_MODULES([ICU_IO], [icu-io]) +PKG_CHECK_MODULES(I18N, [i18n], CPPFLAGS="$CPPFLAGS $I18N_CFLAGS"; LIBS="$LIBS $I18N_LIBS") AC_CHECK_FUNCS([getopt_long]) @@ -39,5 +40,6 @@ AC_CONFIG_FILES([ lexd.pc Makefile src/Makefile + locales/Makefile ]) AC_OUTPUT diff --git a/locales/Makefile.am b/locales/Makefile.am new file mode 100755 index 0000000..8434e03 --- /dev/null +++ b/locales/Makefile.am @@ -0,0 +1,7 @@ +lexd.dat: root.txt en.txt + genrb -d . root.txt en.txt + echo root.res en.res > package_list.txt + pkgdata -p lexd --mode archive -d . package_list.txt + +localesdir = $(LOCALES_DIR) +dist_locales_DATA = lexd.dat diff --git a/locales/en.txt b/locales/en.txt new file mode 100755 index 0000000..0bd10e4 --- /dev/null +++ b/locales/en.txt @@ -0,0 +1,2 @@ +en{ +} \ No newline at end of file diff --git a/locales/package_list.txt b/locales/package_list.txt new file mode 100755 index 0000000..d5134dd --- /dev/null +++ b/locales/package_list.txt @@ -0,0 +1 @@ +root.res en.res diff --git a/locales/root.txt b/locales/root.txt new file mode 100755 index 0000000..f52146e --- /dev/null +++ b/locales/root.txt @@ -0,0 +1,92 @@ +root{ + lexd_desc{"{program} v{version}: compile lexd files to transducers\n" + "USAGE: {program} [-abcfmtvxUV] [rule_file [output_file]]\n" + " -a, --align: align labels (prefer a:0 b:b to a:b b:0)\n" + " -b, --bin: output as Lttoolbox binary file (default is AT&T format)\n" + " -c, --compress: condense labels (prefer a:b to 0:b a:0 - sets --align)\n" + " -f, --flags: compile using flag diacritics\n" + " -m, --minimize: do hyperminimization (sets -f)\n" + " -t, --tags: compile tags and filters with flag diacritics (sets -f)\n" + " -v, --verbose: compile verbosely\n" + " -U, --no-combine: represent multi-codepoint glyphs as multiple transitions\n" + " -V, --version: print version string\n" + " -x, --statistics: print lexicon and pattern sizes to stderr\n"} + on_line{"On line {line}: } + bad_tag{"Bad tag \"-{tag}\""} + + LEXD1000{"ERROR LEXD1000: Failed to create character iterator with code {code}"} + LEXD1001{"ERROR LEXD1001: Unable to access \"{file}\"."} + LEXD1002{"WARNING LEXD1002: Output is empty transducer."} + LEXD1003{"ERROR LEXD1003: Lexicon {lexicon} is empty."} + LEXD1004{"ERROR LEXD1004: Unnamed pattern or lexicon."} + LEXD1005{"ERROR LEXD1005: Lexicon/pattern names cannot contain character \"{char}\""} + LEXD1006{"ERROR LEXD1006: Cannot declare negative tag in lexicon"} + LEXD1007{"ERROR LEXD1007: Empty tag at char {char}"} + LEXD1008{"ERROR LEXD1008: Illegal tag filter."} + LEXD1009{"ERROR LEXD1009: Illegal negated operation."} + LEXD1010{"ERROR LEXD1010: Expected list of operands."} + LEXD1011{"ERROR LEXD1011: End of line in tag list, expected \"]\""} + LEXD1012{"ERROR LEXD1012: Multichar symbol didn't end; searching for {end}"} + LEXD1013{"ERROR LEXD1013: Quantifier {quantifier} may only be applied to parenthesized groups"} + LEXD1014{"ERROR LEXD1014: Regex contains mismatched ]"} + LEXD1015{"ERROR LEXD1015: Regex contains multiple colons"} + LEXD1016{"ERROR LEXD1016: Cannot process symbol range between multichar symbols"} + LEXD1017{"ERROR LEXD1017: First character in symbol range does not preceed last"} + LEXD1018{"ERROR LEXD1018: Mismatched parentheses in regex"} + LEXD1019{"ERROR LEXD1019: Unterminated regex"} + LEXD1020{"ERROR LEXD1020: Expected {num1} parts, found {num2}"} + LEXD1021{"ERROR LEXD1021: Already provided tag list for this side."} + LEXD1022{"ERROR LEXD1022: Lexicon entry contains multiple colons"} + LEXD1023{"ERROR LEXD1023: Lexicon entry contains both regex and text"} + LEXD1024{"ERROR LEXD1024: Negative tag has no default to unset."} + LEXD1025{"ERROR LEXD1025: Symbol \"{symbol}\" without lexicon name at u16 {begin}-{end}"} + LEXD1026{"ERROR LEXD1026: Syntax error - non-numeric index in parentheses: {index}"} + LEXD1027{"ERROR LEXD1027: Syntax error - unmatched parenthesis"} + LEXD1028{"ERROR LEXD1028: Syntax error - missing index in parenthesis"} + LEXD1029{"ERROR LEXD1029: Syntax error - double colon"} + LEXD1030{"ERROR LEXD1030: Colon without lexicon or pattern name"} + LEXD1031{"ERROR LEXD1031: Unexpected symbol \"{symbol}\" at column {col}"} + LEXD1032{"ERROR LEXD1032: Syntax error - initial |"} + LEXD1033{"ERROR LEXD1033: Syntax error - multiple consecutive |"} + LEXD1034{"ERROR LEXD1034: Syntax error - sieve and alternation operators without intervening token"} + LEXD1035{"ERROR LEXD1035: Syntax error - cannot sieve backwards after forwards."} + LEXD1036{"ERROR LEXD1036: Backward sieve without token?"} + LEXD1037{"ERROR LEXD1037: Syntax error - multiple consecutive sieve operators"} + LEXD1038{"ERROR LEXD1038: Syntax error - alternation and sieve operators without intervening token"} + LEXD1039{"ERROR LEXD1039: Forward sieve without token?"} + LEXD1040{"ERROR LEXD1040: Missing closing ] for anonymous lexicon"} + LEXD1041{"ERROR LEXD1041: Missing closing ) for anonymous pattern"} + LEXD1042{"ERROR LEXD1042: Syntax error - unexpected modifier at u16 {start}-{end}"} + LEXD1043{"ERROR LEXD1043: Syntax error - trailing |"} + LEXD1044{"ERROR LEXD1044: Syntax error - trailing sieve (< or >)"} + LEXD1045{"ERROR LEXD1045: Trailing backslash"} + LEXD1046{"ERROR LEXD1046: The name \"{name}\" cannot be used for both LEXICONs and PATTERNs."} + LEXD1047{"ERROR LEXD1047: Expected start of default right tags \"[\" after \":\"."} + LEXD1048{"ERROR LEXD1048: Unexpected character \"{char}\" after default tags."} + LEXD1049{"ERROR LEXD1049: Unnamed lexicon"} + LEXD1050{"ERROR LEXD1050: The name \"{name}\" cannot be used for both LEXICONs and PATTERNs."} + LEXD1051{"ERROR LEXD1051: Multiple incompatible definitions for lexicon \"{lexicon}\"."} + LEXD1052{"ERROR LEXD1052: Expected \"ALIAS lexicon alt_name\""} + LEXD1053{"ERROR LEXD1053: Attempt to alias undefined lexicon \"{lexicon}\"."} + LEXD1054{"ERROR LEXD1054: Unexpected {symbol}"} + LEXD1055{"ERROR LEXD1055: Lexicon entry has \"{symbol}\" (found at u16 {num1}), more than {num2} components"} + LEXD1056{"ERROR LEXD1056: Expected \"PATTERNS\" or \"LEXICON\""} + LEXD1057{"ERROR LEXD1057: Cannot select part of pattern {pattern}"} + LEXD1058{"ERROR LEXD1058: Cannot collate pattern {pattern1} with {pattern2}"} + LEXD1059{"ERROR LEXD1059: Cannot select side of pattern {pattern}"} + LEXD1060{"ERROR LEXD1060: Cannot collate lexicon {lexicon} with pattern {pattern}"} + LEXD1061{"ERROR LEXD1061: Cannot collate pattern {pattern} with lexicon {lexicon}"} + LEXD1062{"ERROR LEXD1062: Lexicon or pattern \"{lex_pat}\" is not defined."} + LEXD1063{"ERROR LEXD1063: Cannot collate {left} with {right} - both appear in free variation earlier in the pattern."} + LEXD1064{"ERROR LEXD1064: Lexicon {lexicon} cannot be both optional and non-optional in a single pattern."} + LEXD1065{"ERROR LEXD1065: Cannot build collated pattern {pattern}"} + LEXD1066{"ERROR LEXD1066: Cannot compile self-recursive pattern \"{pattern}\""} + LEXD1067{"ERROR LEXD1067: {lexicon}({part}) - part is out of range"} + LEXD1068{"ERROR LEXD1068: Cannot collate {left} with {right} - differing numbers of entries"} + LEXD1069{"ERROR LEXD1069: Cannot use {lexicon} one-sided - it contains a regex"} + LEXD1070{"ERROR LEXD1070: Cannot collate {left} with {right} - {something} contains a regex"} + LEXD1071{"WARNING LEXD1071: One-sided tags are deprecated and will soon be removed (line {line})"} + LEXD1072{"WARNING LEXD1072: The tags of {pattern1} conflict with {pattern2} on line {line}."} + LEXD1073{"WARNING LEXD1073: {pattern} is empty."} + LEXD1074{"WARNING LEXD1074: No non-empty patterns found."} +} diff --git a/src/Makefile.am b/src/Makefile.am index 1c0143c..3199db1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,5 @@ AM_LDFLAGS=$(LIBS) +AM_CPPFLAGS = -DLEXD_I18N_DATA='"$(LEXD_I18N_DATA)"' -DLTTB_I18N_DATA='"$(LTTB_I18N_DATA)"' bin_PROGRAMS = lexd diff --git a/src/icu-iter.cc b/src/icu-iter.cc index f6550d5..a4bee66 100644 --- a/src/icu-iter.cc +++ b/src/icu-iter.cc @@ -2,6 +2,7 @@ #include #include #include +#include using namespace std; using namespace icu; @@ -11,8 +12,7 @@ charspan_iter::charspan_iter(const UnicodeString &s) it = BreakIterator::createCharacterInstance(Locale::getDefault(), _status); if(U_FAILURE(_status)) { - cerr << "Failed to create character iterator with code " << _status << endl; - exit(1); + I18n(LEXD_I18N_DATA, "lexd").error("LEXD1000", {"code"}, {_status}, true); } it->setText(s); _span.first = it->first(); diff --git a/src/lexd.cc b/src/lexd.cc index 2090b74..26a4ebb 100644 --- a/src/lexd.cc +++ b/src/lexd.cc @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -9,20 +10,10 @@ using namespace std; void endProgram(char *name) { + I18n i18n {LEXD_I18N_DATA, "lexd"}; if(name != NULL) { - cout << basename(name) << " v" << VERSION << ": compile lexd files to transducers" << endl; - cout << "USAGE: " << basename(name) << " [-abcfmtvxUV] [rule_file [output_file]]" << endl; - cout << " -a, --align: align labels (prefer a:0 b:b to a:b b:0)" << endl; - cout << " -b, --bin: output as Lttoolbox binary file (default is AT&T format)" << endl; - cout << " -c, --compress: condense labels (prefer a:b to 0:b a:0 - sets --align)" << endl; - cout << " -f, --flags: compile using flag diacritics" << endl; - cout << " -m, --minimize: do hyperminimization (sets -f)" << endl; - cout << " -t, --tags: compile tags and filters with flag diacritics (sets -f)" << endl; - cout << " -v, --verbose: compile verbosely" << endl; - cout << " -U, --no-combine: represent multi-codepoint glyphs as multiple transitions" << endl; - cout << " -V, --version: print version string" << endl; - cout << " -x, --statistics: print lexicon and pattern sizes to stderr" << endl; + cout << i18n.format("lexd_desc", {"program", "version"}, {basename(name), VERSION}); } exit(EXIT_FAILURE); } @@ -152,8 +143,7 @@ int main(int argc, char *argv[]) input = u_fopen(infile.c_str(), "rb", NULL, NULL); if(!input) { - cerr << "Error: Cannot open file '" << infile << "' for reading." << endl; - exit(EXIT_FAILURE); + I18n(LEXD_I18N_DATA, "lexd").error("LEXD1001", {"file"}, {infile.c_str()}, true); } } @@ -162,8 +152,7 @@ int main(int argc, char *argv[]) output = u_fopen(outfile.c_str(), "wb", NULL, NULL); if(!output) { - cerr << "Error: Cannot open file '" << outfile << "' for writing." << endl; - exit(EXIT_FAILURE); + I18n(LEXD_I18N_DATA, "lexd").error("LEXD1001", {"file"}, {outfile.c_str()}, true); } } @@ -173,7 +162,7 @@ int main(int argc, char *argv[]) if(stats) comp.printStatistics(); if(!transducer) - cerr << "Warning: output is empty transducer." << endl; + I18n(LEXD_I18N_DATA, "lexd").error("LEXD1002", {}, {}, false); else if(bin) { // TODO: finish this! diff --git a/src/lexdcompiler.cc b/src/lexdcompiler.cc index c1971e7..13ca167 100644 --- a/src/lexdcompiler.cc +++ b/src/lexdcompiler.cc @@ -179,6 +179,13 @@ LexdCompiler::die(const char* msg, ...) u_fputc('\n', err_out); exit(EXIT_FAILURE); } +void +LexdCompiler::die(icu::UnicodeString msg) +{ + cerr << i18n.format("on_line", {"line"}, {lineNumber}) << endl + << msg << endl; + exit(EXIT_FAILURE); +} void LexdCompiler::appendLexicon(string_ref lexicon_id, const vector &to_append) { @@ -194,7 +201,7 @@ LexdCompiler::finishLexicon() if(inLex) { if (currentLexicon.size() == 0) { - die("Lexicon '%S' is empty.", err(name(currentLexiconId))); + die(i18n.format("LEXD1003", {"lexicon"}, {name(currentLexiconId)})); } appendLexicon(currentLexiconId, currentLexicon); @@ -221,11 +228,11 @@ LexdCompiler::checkName(UnicodeString& name) const static UString forbidden = u" :?|()<>[]*+"; name.trim(); int l = name.length(); - if(l == 0) die("Unnamed pattern or lexicon."); + if(l == 0) die(i18n.format("LEXD1004")); for(const auto &c: forbidden) { if(name.indexOf(c) != -1) { - die("Lexicon/pattern names cannot contain character '%C'", c); + die(i18n.format("LEXD1005", {"char"}, {c})); } } return internName(name); @@ -238,7 +245,7 @@ LexdCompiler::readTags(char_iter &iter, UnicodeString &line) if(filter.neg().empty() && filter.ops().empty()) return tags_t((set)filter.pos()); else - die("Cannot declare negative tag in lexicon"); + die(i18n.format("LEXD1006")); return tags_t(); } @@ -255,13 +262,13 @@ LexdCompiler::readTagFilter(char_iter& iter, UnicodeString& line) if(*iter == "]" || *iter == "," || *iter == " ") { if(!tag_nonempty) - die("Empty tag at char %d", + iter.span().first); + die(i18n.format("LEXD1007", {"char"}, {iter.span().first})); UnicodeString s = line.tempSubStringBetween(tag_start.first, iter.span().first); if(!tag_filter.combine( negative ? tag_filter_t(neg_tag_filter_t {checkName(s)}) : tag_filter_t(pos_tag_filter_t {checkName(s)}) )) - die("Illegal tag filter."); + die(i18n.format("LEXD1008")); tag_nonempty = false; negative = false; if(*iter == "]") @@ -278,7 +285,7 @@ LexdCompiler::readTagFilter(char_iter& iter, UnicodeString& line) { const UnicodeString s = *iter; if(negative) - die("Illegal negated operation."); + die(i18n.format("LEXD1009")); *iter++; if (*iter == "[") { @@ -291,7 +298,7 @@ LexdCompiler::readTagFilter(char_iter& iter, UnicodeString& line) ops.push_back(op); } else - die("Expected list of operands."); + die(i18n.format("LEXD1010")); if(*iter == "]") { iter++; @@ -304,7 +311,7 @@ LexdCompiler::readTagFilter(char_iter& iter, UnicodeString& line) tag_start = iter.span(); } } - die("End of line in tag list, expected ']'"); + die(i18n.format("LEXD1011")); return tag_filter_t(); } @@ -339,7 +346,7 @@ LexdCompiler::readSymbol(char_iter& iter, UnicodeString& line, lex_token_t& tok) if (*iter == end) { tok.symbols.push_back(alphabet_lookup(line.tempSubStringBetween(i, iter.span().second))); } else { - die("Multichar symbol didn't end; searching for %S", err(end)); + die(i18n.format("LEXD1012", {"end"}, {end})); } } else { appendSymbol(*iter, tok); @@ -354,10 +361,10 @@ LexdCompiler::processRegexTokenSeq(char_iter& iter, UnicodeString& line, Transdu for (; !iter.at_end(); ++iter) { if (*iter == "(" || *iter == ")" || *iter == "|" || *iter == "/") break; else if (*iter == "?" || *iter == "*" || *iter == "+") - die("Quantifier %S may only be applied to parenthesized groups", err(*iter)); - else if (*iter == "]") die("Regex contains mismatched ]"); + die(i18n.format("LEXD1014", {"quantifier"}, {*iter})); + else if (*iter == "]") die(i18n.format("LEXD1014")); else if (*iter == ":" && inleft) inleft = false; - else if (*iter == ":") die("Regex contains multiple colons"); + else if (*iter == ":") die(i18n.format("LEXD1015")); else if (*iter == "[") { ++iter; vector sym; @@ -379,11 +386,11 @@ LexdCompiler::processRegexTokenSeq(char_iter& iter, UnicodeString& line, Transdu // change the validity of the code -DGS 2022-05-17 if (start.symbols.size() != 1 || end.symbols.size() != 1 || (int)start.symbols[0] <= 0 || (int)end.symbols[0] <= 0) - die("Cannot process symbol range between multichar symbols"); + die(i18n.format("LEXD1016")); int i_start = (int)start.symbols[0]; int i_end = (int)end.symbols[0]; if (i_start > i_end) - die("First character in symbol range does not preceed last"); + die(i18n.format("LEXD1017")); for (int i = 1 + i_start; i <= i_end; i++) { lex_token_t mid; mid.symbols.push_back((trans_sym_t)i); @@ -501,9 +508,9 @@ LexdCompiler::processRegexGroup(char_iter& iter, UnicodeString& line, Transducer for (auto& it : option_ends) trans->linkStates(it, state, 0); if ((depth > 0 && *iter == "/") || (depth == 0 && *iter == ")")) - die("Mismatched parentheses in regex"); + die(i18n.format("LEXD1018")); if (iter.at_end()) - die("Unterminated regex"); + die(i18n.format("LEXD1019")); ++iter; if (depth > 0) { if (*iter == "?") { @@ -547,7 +554,8 @@ LexdCompiler::processLexiconSegment(char_iter& iter, UnicodeString& line, unsign seg.regex->setFinal(state); } if(iter.at_end() && seg.regex == nullptr && seg.left.symbols.size() == 0) - die("Expected %d parts, found %d", currentLexiconPartCount, part_count); + die(i18n.format("LEXD1020", {"num1", "num2"}, + {to_string(currentLexiconPartCount).c_str(), to_string(part_count).c_str()})); for(; !iter.at_end(); ++iter) { if((*iter).startsWith(" ") || *iter == ']') @@ -556,7 +564,7 @@ LexdCompiler::processLexiconSegment(char_iter& iter, UnicodeString& line, unsign { auto &tags_applied = inleft ? left_tags_applied : right_tags_applied; if(tags_applied) - die("Already provided tag list for this side."); + die(i18n.format("LEXD1021")); tags = readTagFilter(iter, line); --iter; tags_applied = true; @@ -566,7 +574,7 @@ LexdCompiler::processLexiconSegment(char_iter& iter, UnicodeString& line, unsign if(inleft) inleft = false; else - die("Lexicon entry contains multiple colons"); + die(i18n.format("LEXD1022")); if ((*iter).length() > 1) readSymbol(iter, line, seg.right); } else readSymbol(iter, line, (inleft ? seg.left : seg.right)); @@ -578,7 +586,7 @@ LexdCompiler::processLexiconSegment(char_iter& iter, UnicodeString& line, unsign if (seg.regex != nullptr && !(seg.left.symbols.empty() && seg.right.symbols.empty())) - die("Lexicon entry contains both regex and text"); + die(i18n.format("LEXD1023")); seg.tags = currentLexicon_tags; @@ -586,8 +594,8 @@ LexdCompiler::processLexiconSegment(char_iter& iter, UnicodeString& line, unsign { tags_t diff = subtractset(tags.neg(), seg.tags); for(string_ref t: diff) - cerr << "Bad tag '-" << to_ustring(name(t)) << "'" << endl; - die("Negative tag has no default to unset."); + cerr << i18n.format("bad_tag", {"tag"}, {name(t)}) << endl; + die(i18n.format("LEXD1024")); } return seg; @@ -605,7 +613,8 @@ LexdCompiler::readToken(char_iter& iter, UnicodeString& line) line.extract(begin_charspan.first, (iter.at_end() ? line.length() : iter.span().first) - begin_charspan.first, name); if(name.length() == 0) - die("Symbol '%S' without lexicon name at u16 %d-%d", err(*iter), iter.span().first, iter.span().second-1); + die(i18n.format("LEXD1025", {"symbol", "begin", "end"}, + {*iter, iter.span().first, iter.span().second-1})); bool optional = false; if(*iter == "?") { @@ -625,12 +634,12 @@ LexdCompiler::readToken(char_iter& iter, UnicodeString& line) for(; !iter.at_end() && (*iter).length() > 0 && *iter != ")"; iter++) { if((*iter).length() != 1 || !u_isdigit((*iter).charAt(0))) - die("Syntax error - non-numeric index in parentheses: %S", err(*iter)); + die(i18n.format("LEXD1026", {"index"}, {*iter})); } if(*iter != ")") - die("Syntax error - unmatched parenthesis"); + die(i18n.format("LEXD1027")); if(iter.span().first == begin_charspan.first) - die("Syntax error - missing index in parenthesis"); + die(i18n.format("LEXD1028")); part = (unsigned int)StringUtils::stoi(to_ustring(line.tempSubStringBetween(begin_charspan.first, iter.span().first))); ++iter; } @@ -670,15 +679,15 @@ LexdCompiler::readPatternElement(char_iter& iter, UnicodeString& line) if(boundary.indexOf(*iter) != -1) { if(*iter == ":") - die("Syntax error - double colon"); + die(i18n.format("LEXD1029")); else - die("Colon without lexicon or pattern name"); + die(i18n.format("LEXD1030")); } tok.right = readToken(iter, line); } else if(boundary.indexOf(*iter) != -1) { - die("Unexpected symbol '%S' at column %d", err(*iter), iter.span().first); + die(i18n.format("LEXD1031", {"symbol", "col"}, {*iter, iter.span().first})); } else { @@ -733,23 +742,23 @@ LexdCompiler::processPattern(char_iter& iter, UnicodeString& line) else if(*iter == "|") { if(alternation.empty()) - die("Syntax error - initial |"); + die(i18n.format("LEXD1032")); if(!final_alternative) - die("Syntax error - multiple consecutive |"); + die(i18n.format("LEXD1033")); if(just_sieved) - die("Syntax error - sieve and alternation operators without intervening token"); + die(i18n.format("LEXD1034")); final_alternative = false; } else if(*iter == "<") { if(sieve_forward) - die("Syntax error - cannot sieve backwards after forwards."); + die(i18n.format("LEXD1035")); if(alternation.empty()) - die("Backward sieve without token?"); + die(i18n.format("LEXD1036")); if(just_sieved) - die("Syntax error - multiple consecutive sieve operators"); + die(i18n.format("LEXD1037")); if(!final_alternative) - die("Syntax error - alternation and sieve operators without intervening token"); + die(i18n.format("LEXD1038")); expand_alternation(pats_cur, alternation); expand_alternation(pats_cur, left_sieve_tok); alternation.clear(); @@ -759,11 +768,11 @@ LexdCompiler::processPattern(char_iter& iter, UnicodeString& line) { sieve_forward = true; if(alternation.empty()) - die("Forward sieve without token?"); + die(i18n.format("LEXD1039")); if(just_sieved) - die("Syntax error - multiple consecutive sieve operators"); + die(i18n.format("LEXD1037")); if(!final_alternative) - die("Syntax error - alternation and sieve operators without intervening token"); + die(i18n.format("LEXD1038")); expand_alternation(pats_cur, alternation); expand_alternation(pats_cur, right_sieve_tok); alternation.clear(); @@ -779,7 +788,7 @@ LexdCompiler::processPattern(char_iter& iter, UnicodeString& line) entry.push_back(processLexiconSegment(++iter, line, 0)); if(*iter == " ") iter++; if(*iter != "]") - die("Missing closing ] for anonymous lexicon"); + die(i18n.format("LEXD1040")); currentLexicon.push_back(entry); finishLexicon(); if(final_alternative && !alternation.empty()) @@ -807,7 +816,7 @@ LexdCompiler::processPattern(char_iter& iter, UnicodeString& line) if(*iter == " ") *iter++; if(iter.at_end() || *iter != ")") - die("Missing closing ) for anonymous pattern"); + die(i18n.format("LEXD1041")); ++iter; tag_filter_t filter; if(*iter == "[") @@ -831,7 +840,7 @@ LexdCompiler::processPattern(char_iter& iter, UnicodeString& line) } else if(*iter == "?" || *iter == "*" || *iter == "+") { - die("Syntax error - unexpected modifier at u16 %d-%d", iter.span().first, iter.span().second); + die(i18n.format("LEXD1042", {"start", "end"}, {iter.span().first, iter.span().second})); } else { @@ -848,9 +857,9 @@ LexdCompiler::processPattern(char_iter& iter, UnicodeString& line) } } if(!final_alternative) - die("Syntax error - trailing |"); + die(i18n.format("LEXD1043")); if(just_sieved) - die("Syntax error - trailing sieve (< or >)"); + die(i18n.format("LEXD1044")); expand_alternation(pats_cur, alternation); for(const auto &pat : pats_cur) { @@ -901,7 +910,7 @@ LexdCompiler::processNextLine() lastWasSpace = space; } lineNumber++; - if(escape) die("Trailing backslash"); + if(escape) die(i18n.format("LEXD1045")); if(line.length() == 0) return; if(line == "PATTERNS" || line == "PATTERNS ") @@ -917,8 +926,7 @@ LexdCompiler::processNextLine() finishLexicon(); currentPatternId = checkName(name); if (lexicons.find(currentPatternId) != lexicons.end()) { - die("The name '%S' cannot be used for both LEXICONs and PATTERNs.", - err(name)); + die(i18n.format("LEXD1046", {"name"}, {name})); } inPat = true; } @@ -934,15 +942,15 @@ LexdCompiler::processNextLine() currentLexicon_tags = readTags(c, tags); if(c != c.end() && *c == ":") { - cerr << "WARNING: One-sided tags are deprecated and will soon be removed (line " << lineNumber << ")" << endl; + i18n.error("LEXD1071", {"line"}, {lineNumber}, false); ++c; if(*c == "[") unionset_inplace(currentLexicon_tags, readTags(c, tags)); else - die("Expected start of default right tags '[' after ':'."); + die(i18n.format("LEXD1047")); } if(c != c.end()) - die("Unexpected character '%C' after default tags.", (*c)[0]); + die(i18n.format("LEXD1048", {"char"}, {(*c)[0]})); name.retainBetween(0, name.indexOf('[')); } currentLexiconPartCount = 1; @@ -959,17 +967,16 @@ LexdCompiler::processNextLine() } else break; } - if(name.length() == 0) die("Unnamed lexicon"); + if(name.length() == 0) die(i18n.format("LEXD1049")); } currentLexiconId = checkName(name); if(lexicons.find(currentLexiconId) != lexicons.end()) { if(lexicons[currentLexiconId][0].size() != currentLexiconPartCount) { - die("Multiple incompatible definitions for lexicon '%S'.", err(name)); + die(i18n.format("LEXD1051", {"lexicon"}, {name})); } } if (patterns.find(currentLexiconId) != patterns.end()) { - die("The name '%S' cannot be used for both LEXICONs and PATTERNs.", - err(name)); + die(i18n.format("LEXD1050", {"name"}, {name})); } inLex = true; inPat = false; @@ -979,12 +986,12 @@ LexdCompiler::processNextLine() finishLexicon(); if(line.endsWith(' ')) line.retainBetween(0, line.length()-1); int loc = line.indexOf(" ", 6); - if(loc == -1) die("Expected 'ALIAS lexicon alt_name'"); + if(loc == -1) die(i18n.format("LEXD1052")); UnicodeString name = line.tempSubString(6, loc-6); UnicodeString alt = line.tempSubString(loc+1); string_ref altid = checkName(alt); string_ref lexid = checkName(name); - if(lexicons.find(lexid) == lexicons.end()) die("Attempt to alias undefined lexicon '%S'.", err(name)); + if(lexicons.find(lexid) == lexicons.end()) die(i18n.format("LEXD1053", {"lexicon"}, {name})); lexicons[altid] = lexicons[lexid]; inLex = false; inPat = false; @@ -994,7 +1001,7 @@ LexdCompiler::processNextLine() char_iter iter = char_iter(line); processPattern(iter, line); if(!iter.at_end() && (*iter).length() > 0) - die("Unexpected %S", err(*iter)); + die(i18n.format("LEXD1054", {"symbol"}, {*iter})); } else if(inLex) { @@ -1006,10 +1013,11 @@ LexdCompiler::processNextLine() } if(*iter == ' ') ++iter; if(!iter.at_end()) - die("Lexicon entry has '%S' (found at u16 %d), more than %d components", err(*iter), iter.span().first, currentLexiconPartCount); + die(i18n.format("LEXD1055", {"symbol", "num1", "num2"}, + {*iter, iter.span().first, to_string(currentLexiconPartCount).c_str()})); currentLexicon.push_back(entry); } - else die("Expected 'PATTERNS' or 'LEXICON'"); + else die(i18n.format("LEXD1056")); } bool @@ -1027,26 +1035,26 @@ LexdCompiler::isLexiconToken(const pattern_element_t& tok) { if(tok.left.part != 1 || tok.right.part != 1) { - die("Cannote select part of pattern %S", err(name(tok.right.name))); + die(i18n.format("LEXD1057", {"pattern"}, {name(tok.right.name)})); } return false; } // Any other scenario is an error, so we need to die() if(lpat && rpat) { - die("Cannot collate pattern %S with %S", err(name(tok.left.name)), err(name(tok.right.name))); + die(i18n.format("LEXD1058", {"pattern1", "pattern2"}, {name(tok.left.name), name(tok.right.name)})); } else if((lpat && tok.right.name.empty()) || (rpat && tok.left.name.empty())) { - die("Cannot select side of pattern %S", err(name(tok.left.name.valid() ? tok.left.name : tok.right.name))); + die(i18n.format("LEXD1059", {"pattern"}, {name(tok.left.name.valid() ? tok.left.name : tok.right.name)})); } else if(llex && rpat) { - die("Cannot collate lexicon %S with pattern %S", err(name(tok.left.name)), err(name(tok.right.name))); + die(i18n.format("LEXD1060", {"lexicon", "pattern"}, {name(tok.left.name), name(tok.right.name)})); } else if(lpat && rlex) { - die("Cannot collate pattern %S with lexicon %S", err(name(tok.left.name)), err(name(tok.right.name))); + die(i18n.format("LEXD1061", {"pattern", "lexicon"}, {name(tok.left.name), name(tok.right.name)})); } else { @@ -1058,7 +1066,7 @@ LexdCompiler::isLexiconToken(const pattern_element_t& tok) for(auto l: lexicons) cerr << to_ustring(name(l.first)) << " "; cerr << endl; - die("Lexicon or pattern '%S' is not defined.", err(name((llex || lpat) ? tok.right.name : tok.left.name))); + die(i18n.format("LEXD1062", {"lex_pat"}, {name((llex || lpat) ? tok.right.name : tok.left.name)})); } // we never reach this point, but the compiler doesn't understand die() // so we put a fake return value to keep it happy @@ -1125,7 +1133,7 @@ LexdCompiler::buildPattern(int state, Transducer* t, const pattern_t& pat, const if(tok.right.name.valid() && matchedParts.find(tok.right.name) == matchedParts.end()) matchedParts[tok.right.name] = matchedParts[tok.left.name]; if(tok.left.name.valid() && tok.right.name.valid() && matchedParts[tok.left.name] != matchedParts[tok.right.name]) - die("Cannot collate %S with %S - both appear in free variation earlier in the pattern.", err(name(tok.left.name)), err(name(tok.right.name))); + die(i18n.format("LEXD1063", {"left", "right"}, {name(tok.left.name), name(tok.right.name)})); Transducer* lex = getLexiconTransducer(pat[pos], matchedParts[tok.left.name || tok.right.name], false); if(lex) { @@ -1158,10 +1166,10 @@ LexdCompiler::determineFreedom(pattern_t& pat) { const pattern_element_t& t1 = pat[i]; if (is_optional.find(t1.left.name) != is_optional.end() && is_optional[t1.left.name] != t1.optional()) { - die("Lexicon %S cannot be both optional and non-optional in a single pattern.", err(name(t1.left.name))); + die(i18n.format("LEXD1064", {"lexicon"}, {name(t1.left.name)})); } if (is_optional.find(t1.right.name) != is_optional.end() && is_optional[t1.right.name] != t1.optional()) { - die("Lexicon %S cannot be both optional and non-optional in a single pattern.", err(name(t1.right.name))); + die(i18n.format("LEXD1064", {"lexicon"}, {name(t1.right.name)})); } if (t1.left.name.valid()) { is_optional[t1.left.name] = t1.optional(); @@ -1190,7 +1198,7 @@ Transducer* LexdCompiler::buildPattern(const pattern_element_t &tok) { if(tok.left.part != 1 || tok.right.part != 1) - die("Cannot build collated pattern %S", err(name(tok.left.name))); + die(i18n.format("LEXD1065", {"pattern"}, {name(tok.left.name)})); if(patternTransducers.find(tok) == patternTransducers.end()) { if (verbose) cerr << "Compiling " << to_ustring(printPattern(tok)) << endl; @@ -1210,18 +1218,16 @@ LexdCompiler::buildPattern(const pattern_element_t &tok) if(!pair.tag_filter.combine(tok.tag_filter.neg())) { taggable = false; if (verbose) { - cerr << "Warning: The tags of " << to_ustring(printPattern(tok)); - cerr << " conflict with " << to_ustring(printPattern(pat_untagged.second[j])); - cerr << " on line " << pat.first << "." << endl; + i18n.error("LEXD1072", {"pattern1", "pattern2", "line"}, + {printPattern(tok), printPattern(pat_untagged.second[j]), pat.first}, false); } } } if(!pat.second[i].tag_filter.combine(tok.tag_filter.pos())) { taggable = false; if (verbose) { - cerr << "Warning: The tags of " << to_ustring(printPattern(tok)); - cerr << " conflict with " << to_ustring(printPattern(pat_untagged.second[i])); - cerr << " on line " << pat.first << "." << endl; + i18n.error("LEXD1072", {"pattern1", "pattern2", "line"}, + {printPattern(tok), printPattern(pat_untagged.second[i]), pat.first}, false); } } if (!taggable) continue; @@ -1240,8 +1246,7 @@ LexdCompiler::buildPattern(const pattern_element_t &tok) t->minimize(); } else if (verbose) { - cerr << "Warning: " << to_ustring(printPattern(tok)); - cerr << " is empty." << endl; + i18n.error("LEXD1073", {"pattern"}, {printPattern(tok)}, false); } patternTransducers[tok] = t; if (verbose) { @@ -1253,7 +1258,7 @@ LexdCompiler::buildPattern(const pattern_element_t &tok) } else if(patternTransducers[tok] == NULL) { - die("Cannot compile self-recursive %S", err(printPattern(tok))); + die(i18n.format("LEXD1066", {"pattern"}, {printPattern(tok)})); } return patternTransducers[tok]; } @@ -1365,16 +1370,14 @@ LexdCompiler::buildPatternWithFlags(const pattern_element_t &tok, int pattern_st { if (i == idx && !cur.tag_filter.combine(tok.tag_filter.pos())) { if (verbose) { - cerr << "Warning: The tags of " << to_ustring(printPattern(tok)); - cerr << " conflict with " << to_ustring(printPattern(pat.second[i])); - cerr << " on line " << pat.first << "." << endl; + i18n.error("LEXD1072", {"pattern1", "pattern2", "line"}, + {printPattern(tok), printPattern(pat.second[i]), pat.first}, false); } } if (!cur.tag_filter.combine(tok.tag_filter.neg())) { if (verbose) { - cerr << "Warning: The tags of " << to_ustring(printPattern(tok)); - cerr << " conflict with " << to_ustring(printPattern(pat.second[i])); - cerr << " on line " << pat.first << "." << endl; + i18n.error("LEXD1072", {"pattern1", "pattern2", "line"}, + {printPattern(tok), printPattern(pat.second[i]), pat.first}, false); } } } @@ -1545,7 +1548,7 @@ LexdCompiler::buildPatternWithFlags(const pattern_element_t &tok, int pattern_st } else if(patternTransducers[tok] == NULL) { - die("Cannot compile self-recursive pattern '%S'", err(name(tok.left.name))); + die(i18n.format("LEXD1066", {"pattern"}, {name(tok.left.name)})); } return patternTransducers[tok]; } @@ -1746,7 +1749,7 @@ LexdCompiler::buildPatternSingleLexicon(pattern_element_t tok, int start_state) } else { - die("Cannot compile self-recursive pattern '%S'", err(name(tok.left.name))); + die(i18n.format("LEXD1066", {"pattern"}, {name(tok.left.name)})); return 0; } } @@ -1798,7 +1801,7 @@ LexdCompiler::buildTransducerSingleLexicon() int end = buildPatternSingleLexicon(start_pat, 0); if(end == -1) { - cerr << "WARNING: No non-empty patterns found." << endl; + i18n.error("LEXD1074", {}, {}, false); } else { hyperminTrans->setFinal(end); @@ -1971,12 +1974,12 @@ LexdCompiler::getLexiconTransducer(pattern_element_t tok, unsigned int entry_ind vector& lents = lexicons[tok.left.name]; if(tok.left.name.valid() && tok.left.part > lents[0].size()) - die("%S(%d) - part is out of range", err(name(tok.left.name)), tok.left.part); + die(i18n.format("LEXD1067", {"lexicon", "part"}, {name(tok.left.name), to_string(tok.left.part).c_str()})); vector& rents = lexicons[tok.right.name]; if(tok.right.name.valid() && tok.right.part > rents[0].size()) - die("%S(%d) - part is out of range", err(name(tok.right.name)), tok.right.part); + die(i18n.format("LEXD1067", {"lexicon", "part"}, {name(tok.right.name), to_string(tok.right.part).c_str()})); if(tok.left.name.valid() && tok.right.name.valid() && lents.size() != rents.size()) - die("Cannot collate %S with %S - differing numbers of entries", err(name(tok.left.name)), err(name(tok.right.name))); + die(i18n.format("LEXD1068", {"left", "right"}, {name(tok.left.name), name(tok.right.name)})); unsigned int count = (tok.left.name.valid() ? lents.size() : rents.size()); vector trans; if(free) @@ -1999,11 +2002,13 @@ LexdCompiler::getLexiconTransducer(pattern_element_t tok, unsigned int entry_ind Transducer* t = free ? trans[0] : new Transducer(); if (le.regex != nullptr || re.regex != nullptr) { if (tok.left.name.empty()) - die("Cannot use %S one-sided - it contains a regex", err(name(tok.right.name))); + die(i18n.format("LEXD1069", {"lexicon"}, {name(tok.right.name)})); if (tok.right.name.empty()) - die("Cannot use %S one-sided - it contains a regex", err(name(tok.left.name))); + die(i18n.format("LEXD1069", {"lexicon"}, {name(tok.left.name)})); if (tok.left.name != tok.right.name) - die("Cannot collate %S with %S - %S contains a regex", err(name(tok.left.name)), err(name(tok.right.name)), err(name((le.regex != nullptr ? tok.left.name : tok.right.name)))); + die(i18n.format("LEXD1070", {"left", "right", "something"}, + {name(tok.left.name), name(tok.right.name), name(tok.right.name), + name((le.regex != nullptr ? tok.left.name : tok.right.name))})); } insertEntry(t, {.left=le.left, .right=re.right, .regex=le.regex, .tags=tags}); did_anything = true; @@ -2104,12 +2109,12 @@ LexdCompiler::getLexiconTransducerWithFlags(pattern_element_t& tok, bool free) // TODO: can this be abstracted from here and getLexiconTransducer()? vector& lents = lexicons[tok.left.name]; if(tok.left.name.valid() && tok.left.part > lents[0].size()) - die("%S(%d) - part is out of range", err(name(tok.left.name)), tok.left.part); + die(i18n.format("LEXD1067", {"lexicon", "part"}, {name(tok.left.name), to_string(tok.left.part).c_str()})); vector& rents = lexicons[tok.right.name]; if(tok.right.name.valid() && tok.right.part > rents[0].size()) - die("%S(%d) - part is out of range", err(name(tok.right.name)), tok.right.part); + die(i18n.format("LEXD1067", {"lexicon", "part"}, {name(tok.right.name), to_string(tok.right.part).c_str()})); if(tok.left.name.valid() && tok.right.name.valid() && lents.size() != rents.size()) - die("Cannot collate %S with %S - differing numbers of entries", err(name(tok.left.name)), err(name(tok.right.name))); + die(i18n.format("LEXD1068", {"left", "right"}, {name(tok.left.name), name(tok.right.name)})); unsigned int count = (tok.left.name.valid() ? lents.size() : rents.size()); Transducer* trans = new Transducer(); lex_seg_t empty; @@ -2127,11 +2132,13 @@ LexdCompiler::getLexiconTransducerWithFlags(pattern_element_t& tok, bool free) lex_seg_t seg; if (le.regex != nullptr || re.regex != nullptr) { if (tok.left.name.empty()) - die("Cannot use %S one-sided - it contains a regex", err(name(tok.right.name))); + die(i18n.format("LEXD1069", {"lexicon"}, {name(tok.right.name)})); if (tok.right.name.empty()) - die("Cannot use %S one-sided - it contains a regex", err(name(tok.left.name))); + die(i18n.format("LEXD1069", {"lexicon"}, {name(tok.left.name)})); if (tok.left.name != tok.right.name) - die("Cannot collate %S with %S - %S contains a regex", err(name(tok.left.name)), err(name(tok.right.name)), err(name((le.regex != nullptr ? tok.left.name : tok.right.name)))); + die(i18n.format("LEXD1070", {"left", "right", "something"}, + {name(tok.left.name), name(tok.right.name), name(tok.right.name), + name((le.regex != nullptr ? tok.left.name : tok.right.name))})); seg.regex = le.regex; } if(!free && tok.left.name.valid()) diff --git a/src/lexdcompiler.h b/src/lexdcompiler.h index 1fd0d7e..f1e97ad 100644 --- a/src/lexdcompiler.h +++ b/src/lexdcompiler.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -351,6 +352,7 @@ private: bool doneReading = false; unsigned int anonymousCount = 0; unsigned int transitionCount = 0; + I18n i18n {LEXD_I18N_DATA, "lexd"}; Transducer* hyperminTrans; @@ -360,6 +362,7 @@ private: vector right_sieve_tok; void die(const char* msg, ...); + void die(icu::UnicodeString msg); UnicodeString printPattern(const pattern_element_t& pat); UnicodeString printFilter(const tag_filter_t& filter); void finishLexicon();