commit 2696021e46a1056eeee95ec918ce8f53d0097c95 Author: Ahmed Siam Date: Sat Jun 24 14:00:01 2023 +0300 Include the i18n library and internationalize lt-proc diff --git a/.gitignore b/.gitignore index bda2353..5ef3437 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.pyc **/*.deps/ /build/ +/.vscode/ /.ccls-cache/ /*.pc /aclocal.m4 @@ -86,6 +87,10 @@ /python/lttoolbox.py /python/setup.py /python/build* +/locales/Makefile +/locales/Makefile.in +*.res +*.dat *.egg-info/ *.egg **/.mypy_cache/ diff --git a/Makefile.am b/Makefile.am index 13a7779..fc4a319 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,6 +1,6 @@ ACLOCAL_AMFLAGS=-I m4 -SUBDIRS = $(PACKAGE_NAME) +SUBDIRS = $(PACKAGE_NAME) locales DIST_SUBDIRS = $(PACKAGE_NAME) if HAVE_PYTHON_BINDINGS @@ -15,3 +15,6 @@ EXTRA_DIST=autogen.sh tests # TODO: the below will use python3 if you run it on Arch Linux with no python2 installed test: tests/run_tests.py $(PYTHON) $< + +export LOCALES_DIR=$(datadir)/$(PACKAGE_NAME) +export LOCALES_DATA=$(LOCALES_DIR)/locales.dat diff --git a/configure.ac b/configure.ac index cd691dc..8bc54ea 100644 --- a/configure.ac +++ b/configure.ac @@ -39,6 +39,7 @@ AC_ARG_ENABLE(profile, PKG_CHECK_MODULES(LIBXML, [libxml-2.0 >= 2.6.17]) PKG_CHECK_MODULES(ICU, [icu-i18n, icu-io, icu-uc]) +PKG_CHECK_MODULES(I18N, [i18n], CPPFLAGS="$CPPFLAGS $I18N_CFLAGS"; LIBS="$LIBS $I18N_LIBS") # Checks for libraries. AC_CHECK_LIB(xml2, xmlReaderForFile) @@ -105,4 +106,4 @@ then PYTHON_INSTALL_PARAMS="--prefix=\$(prefix) --root=\$(DESTDIR)/" fi -AC_OUTPUT([Makefile lttoolbox.pc lttoolbox/Makefile python/Makefile]) +AC_OUTPUT([Makefile lttoolbox.pc lttoolbox/Makefile python/Makefile locales/Makefile]) diff --git a/locales/Makefile.am b/locales/Makefile.am new file mode 100644 index 0000000..590e1cb --- /dev/null +++ b/locales/Makefile.am @@ -0,0 +1,7 @@ +locales.dat: root.txt en.txt + genrb -d . root.txt en.txt + echo root.res en.res > package_list.txt + pkgdata -p locales --mode archive -d . package_list.txt + +localesdir = $(LOCALES_DIR) +dist_locales_DATA = locales.dat diff --git a/locales/en.txt b/locales/en.txt new file mode 100644 index 0000000..0bd10e4 --- /dev/null +++ b/locales/en.txt @@ -0,0 +1,2 @@ +en{ +} \ No newline at end of file diff --git a/locales/package_list.txt b/locales/package_list.txt new file mode 100644 index 0000000..d5134dd --- /dev/null +++ b/locales/package_list.txt @@ -0,0 +1 @@ +root.res en.res diff --git a/locales/root.txt b/locales/root.txt new file mode 100644 index 0000000..67d1a00 --- /dev/null +++ b/locales/root.txt @@ -0,0 +1,30 @@ +root{ + lt_proc_desc{"process a stream with a letter transducer"} + analysis_desc{"morphological analysis (default behavior)"} + bilingual_desc{"lexical transfer"} + case_sensitive_desc{"use the literal case of the incoming characters"} + debugged_gen_desc{"morph. generation with all the stuff"} + decompose_nouns_desc{"Try to decompound unknown words"} + generation_desc{"morphological generation"} + ignored_chars_desc{"specify file with characters to ignore"} + restore_chars_desc{"specify file with characters to diacritic restoration"} + tagged_gen_desc{"morphological generation keeping lexical forms"} + tagged_nm_gen_desc{"same as -l but without unknown word marks"} + non_marked_gen_desc{"morph. generation without unknown word marks"} + surf_bilingual_desc{"lexical transfer with surface forms"} + post_generation_desc{"post-generation"} + inter_generation_desc{"inter-generation"} + sao_desc{"SAO annotation system input processing"} + transliteration_desc{"apply transliteration dictionary"} + version_desc{"version"} + null_flush_desc{"flush output on the null character"} + dictionary_case_desc{"use dictionary case instead of surface"} + careful_case_desc{"use dictionary case if present, else surface"} + no_default_ignore_desc{"skips loading the default ignore characters"} + show_weights_desc{"Print final analysis weights (if any)"} + analyses_desc{"Output no more than N analyses (if the transducer is weighted, the N best analyses)"} + weight_classes_desc{"Output no more than N best weight classes (where analyses with equal weight constitute a class)"} + compound_max_elements_desc{"Set compound max elements"} + help_desc{"show this help"} + LTTB1000{"LTTB1000: Invalid or no argument for {option}"} +} diff --git a/lttoolbox/Makefile.am b/lttoolbox/Makefile.am index e4c1b65..5b9d003 100644 --- a/lttoolbox/Makefile.am +++ b/lttoolbox/Makefile.am @@ -29,6 +29,7 @@ lttoolboxlib = $(prefix)/lib lttoolbox_DATA = dix.dtd dix.rng dix.rnc acx.rng xsd/dix.xsd xsd/acx.xsd LDADD = liblttoolbox.la $(PTHREAD_LIBS) +AM_CPPFLAGS = -DLOCALES_DATA='"$(LOCALES_DATA)"' AM_LDFLAGS = -llttoolbox $(LIBXML_LIBS) $(ICU_LIBS) lt_append_SOURCES = lt_append.cc diff --git a/lttoolbox/cli.cc b/lttoolbox/cli.cc index ef07949..28f347a 100644 --- a/lttoolbox/cli.cc +++ b/lttoolbox/cli.cc @@ -22,14 +22,16 @@ #include #include #include +#include +#include -CLI::CLI(std::string desc, std::string ver) +CLI::CLI(icu::UnicodeString desc, std::string ver) { description = desc; version = ver; } -CLI::CLI(std::string desc) +CLI::CLI(icu::UnicodeString desc) { description = desc; } @@ -39,14 +41,14 @@ CLI::~CLI() } void CLI::add_str_arg(char short_flag, std::string long_flag, - std::string desc, std::string arg) + icu::UnicodeString desc, std::string arg) { options.push_back({.short_opt=short_flag, .long_opt=long_flag, .desc=desc, .is_bool=false, .var=arg}); } void CLI::add_bool_arg(char short_flag, std::string long_flag, - std::string desc) + icu::UnicodeString desc) { options.push_back({.short_opt=short_flag, .long_opt=long_flag, .desc=desc, .is_bool=true, .var=""}); diff --git a/lttoolbox/cli.h b/lttoolbox/cli.h index 4b1cdbf..4ca1cff 100644 --- a/lttoolbox/cli.h +++ b/lttoolbox/cli.h @@ -19,18 +19,20 @@ #include #include #include +#include +#include class CLI { private: struct CLIOption { char short_opt; std::string long_opt; - std::string desc; + icu::UnicodeString desc; bool is_bool; std::string var; }; - std::string description; + icu::UnicodeString description; std::string version; std::string epilog; @@ -45,12 +47,12 @@ private: std::string prog_name; public: - CLI(std::string desc, std::string version); - CLI(std::string desc); + CLI(icu::UnicodeString desc, std::string version); + CLI(icu::UnicodeString desc); ~CLI(); - void add_str_arg(char short_flag, std::string long_flag, std::string desc, + void add_str_arg(char short_flag, std::string long_flag, icu::UnicodeString desc, std::string arg); - void add_bool_arg(char short_flag, std::string long_flag, std::string desc); + void add_bool_arg(char short_flag, std::string long_flag, icu::UnicodeString desc); void add_file_arg(std::string name, bool optional = true); void set_epilog(std::string e); void print_usage(std::ostream& out = std::cerr); diff --git a/lttoolbox/lt_proc.cc b/lttoolbox/lt_proc.cc index a7f188f..c064f83 100644 --- a/lttoolbox/lt_proc.cc +++ b/lttoolbox/lt_proc.cc @@ -18,6 +18,7 @@ #include #include #include +#include void checkValidity(FSTProcessor const &fstp) { @@ -30,37 +31,38 @@ void checkValidity(FSTProcessor const &fstp) int main(int argc, char *argv[]) { LtLocale::tryToSetLocale(); + I18n i18n {LOCALES_DATA}; - CLI cli("process a stream with a letter transducer", PACKAGE_VERSION); + CLI cli(i18n.format("lt_proc_desc"), PACKAGE_VERSION); cli.add_file_arg("fst_file", false); cli.add_file_arg("input_file"); cli.add_file_arg("output_file"); - cli.add_bool_arg('a', "analysis", "morphological analysis (default behavior)"); - cli.add_bool_arg('b', "bilingual", "lexical transfer"); - cli.add_bool_arg('c', "case-sensitive", "use the literal case of the incoming characters"); - cli.add_bool_arg('d', "debugged-gen", "morph. generation with all the stuff"); - cli.add_bool_arg('e', "decompose-nouns", "Try to decompound unknown words"); - cli.add_bool_arg('g', "generation", "morphological generation"); - cli.add_str_arg('i', "ignored-chars", "specify file with characters to ignore", "icx_file"); - cli.add_str_arg('r', "restore-chars", "specify file with characters to diacritic restoration", "rcx_file"); - cli.add_bool_arg('l', "tagged-gen", "morphological generation keeping lexical forms"); - cli.add_bool_arg('m', "tagged-nm-gen", "same as -l but without unknown word marks"); - cli.add_bool_arg('n', "non-marked-gen", "morph. generation without unknown word marks"); - cli.add_bool_arg('o', "surf-bilingual", "lexical transfer with surface forms"); - cli.add_bool_arg('p', "post-generation", "post-generation"); - cli.add_bool_arg('x', "inter-generation", "inter-generation"); - cli.add_bool_arg('s', "sao", "SAO annotation system input processing"); - cli.add_bool_arg('t', "transliteration", "apply transliteration dictionary"); - cli.add_bool_arg('v', "version", "version"); - cli.add_bool_arg('z', "null-flush", "flush output on the null character"); - cli.add_bool_arg('w', "dictionary-case", "use dictionary case instead of surface"); - cli.add_bool_arg('C', "careful-case", "use dictionary case if present, else surface"); - cli.add_bool_arg('I', "no-default-ignore", "skips loading the default ignore characters"); - cli.add_bool_arg('W', "show-weights", "Print final analysis weights (if any)"); - cli.add_str_arg('N', "analyses", "Output no more than N analyses (if the transducer is weighted, the N best analyses)", "N"); - cli.add_str_arg('L', "weight-classes", "Output no more than N best weight classes (where analyses with equal weight constitute a class)", "N"); - cli.add_str_arg('M', "compound-max-elements", "Set compound max elements", "N"); - cli.add_bool_arg('h', "help", "show this help"); + cli.add_bool_arg('a', "analysis", i18n.format("analysis_desc")); + cli.add_bool_arg('b', "bilingual", i18n.format("bilingual_desc")); + cli.add_bool_arg('c', "case-sensitive", i18n.format("case_sensitive_desc")); + cli.add_bool_arg('d', "debugged-gen", i18n.format("debugged_gen_desc")); + cli.add_bool_arg('e', "decompose-nouns", i18n.format("decompose_nouns_desc")); + cli.add_bool_arg('g', "generation", i18n.format("generation_desc")); + cli.add_str_arg('i', "ignored-chars", i18n.format("ignored_chars_desc"), "icx_file"); + cli.add_str_arg('r', "restore-chars", i18n.format("restore_chars_desc"), "rcx_file"); + cli.add_bool_arg('l', "tagged-gen", i18n.format("tagged_gen_desc")); + cli.add_bool_arg('m', "tagged-nm-gen", i18n.format("tagged_nm_gen_desc")); + cli.add_bool_arg('n', "non-marked-gen", i18n.format("non_marked_gen_desc")); + cli.add_bool_arg('o', "surf-bilingual", i18n.format("surf_bilingual_desc")); + cli.add_bool_arg('p', "post-generation", i18n.format("post_generation_desc")); + cli.add_bool_arg('x', "inter-generation", i18n.format("inter_generation_desc")); + cli.add_bool_arg('s', "sao", i18n.format("sao_desc")); + cli.add_bool_arg('t', "transliteration", i18n.format("transliteration_desc")); + cli.add_bool_arg('v', "version", i18n.format("version_desc")); + cli.add_bool_arg('z', "null-flush", i18n.format("null_flush_desc")); + cli.add_bool_arg('w', "dictionary-case", i18n.format("dictionary_case_desc")); + cli.add_bool_arg('C', "careful-case", i18n.format("careful_case_desc")); + cli.add_bool_arg('I', "no-default-ignore", i18n.format("no_default_ignore_desc")); + cli.add_bool_arg('W', "show-weights", i18n.format("show_weights_desc")); + cli.add_str_arg('N', "analyses", i18n.format("analyses_desc"), "N"); + cli.add_str_arg('L', "weight-classes", i18n.format("weight_classes_desc"), "N"); + cli.add_str_arg('M', "compound-max-elements", i18n.format("compound_max_elements_desc"), "N"); + cli.add_bool_arg('h', "help", i18n.format("help_desc")); cli.parse_args(argc, argv); FSTProcessor fstp; @@ -145,7 +147,7 @@ int main(int argc, char *argv[]) if (strs.find("analyses") != strs.end()) { int n = atoi(strs["analyses"].back().c_str()); if (n < 1) { - std::cerr << "Invalid or no argument for analyses count" << std::endl; + std::cerr << i18n.format("LTTB1000", {"option"}, {"analyses"}) << std::endl; exit(EXIT_FAILURE); } fstp.setMaxAnalysesValue(n); @@ -153,7 +155,7 @@ int main(int argc, char *argv[]) if (strs.find("weight-classes") != strs.end()) { int n = atoi(strs["weight-classes"].back().c_str()); if (n < 1) { - std::cerr << "Invalid or no argument for weight class count" << std::endl; + std::cerr << i18n.format("LTTB1000", {"option"}, {"weight-classes"})<< std::endl; exit(EXIT_FAILURE); } fstp.setMaxWeightClassesValue(n); @@ -161,7 +163,7 @@ int main(int argc, char *argv[]) if (strs.find("compound-max-elements") != strs.end()) { // Test int n = atoi(strs["compound-max-elements"].back().c_str()); if (n < 1) { - std::cerr << "Invalid or no argument for compound max elements" << std::endl; + std::cerr << i18n.format("LTTB1000", {"option"}, {"compound-max-elements"})<< std::endl; exit(EXIT_FAILURE); } fstp.setCompoundMaxElements(n);