commit 9ad8a65c11dd6935f4642d0045ce53968718bb61 Author: vaydheesh Date: Tue Jun 11 13:32:48 2019 +0000 Added lt_proc -w as module for linux .gitignore -> added pycharm apertium/analysis/__init__.py reset 3f38530 Analyzer._postproc_text -> staticmethod Added: apertium.destxt to list of analyzer_cmds Added: os.path.join() for abs_mode_path added: apertium/swig/linux/* swig generated added: apertium/lttoolbox calls required fucntions from lttoolbox wrapper modified: apertium/utils.py executes lt-proc -w as submodule diff --git a/.gitignore b/.gitignore index f6f3a83..56c23b7 100644 --- a/.gitignore +++ b/.gitignore @@ -101,3 +101,6 @@ ENV/ # mypy .mypy_cache/ + +# PyCharm +.idea \ No newline at end of file diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index 4f3497f..95f88ab 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -1,7 +1,5 @@ import os -import tempfile from streamparser import parse, LexicalUnit # noqa: F401 -import analysis import apertium from apertium.utils import to_alpha3_code, execute, parse_mode_file @@ -23,49 +21,25 @@ class Analyzer: lang (str) """ self.analyzer_cmds = {} # type: Dict[str, List[List[str]]] - self.analyzer_path = [] # type: List[str] self.lang = to_alpha3_code(lang) # type: str if self.lang not in apertium.analyzers: raise apertium.ModeNotInstalled(self.lang) else: self.path, self.mode = apertium.analyzers[self.lang] - def _get_path(self): # type: (Analyzer) -> List[str] + def _get_commands(self): # type: (Analyzer) -> List[List[str]] """ - Read mode file for automorf.bin path - Returns: - List[str] + List[List[str]] """ if self.lang not in self.analyzer_cmds: mode_path, mode = apertium.analyzers[self.lang] - mode_path = os.path.join(mode_path, 'modes', '{}.mode'.format(mode)) - self.analyzer_cmds[self.lang] = parse_mode_file(mode_path) - self.analyzer_path = [command[-1] for command in self.analyzer_cmds[self.lang]] - return self.analyzer_path + abs_mode_path = os.path.join(mode_path, 'modes', '{}.mode'.format(mode)) + self.analyzer_cmds[self.lang] = parse_mode_file(abs_mode_path) + return self.analyzer_cmds[self.lang] @staticmethod - def _lt_proc(input_text, automorf_path): # type: (str, str) -> str - """ - Reads formatted text from apertium-des and returns its analysis - - Args: - input_text (str) - automorf_path (str) - - Returns: - str - """ - with tempfile.NamedTemporaryFile('w') as input_file, tempfile.NamedTemporaryFile('r') as output_file: - input_file.write(input_text) - input_file.flush() - x = analysis.FST() - if not x.valid(): - raise ValueError('FST Invalid') - x.analyze(automorf_path, input_file.name, output_file.name) - return output_file.read() - - def _postproc_text(self, result): # type: (Analyzer, str) -> List[LexicalUnit] + def _postproc_text(result): # type: (str) -> List[LexicalUnit] """ Postprocesses the input @@ -89,8 +63,9 @@ class Analyzer: Returns: List[LexicalUnit] """ - apertium_des = execute(in_text, [['apertium-des{}'.format(formatting), '-n']]) - result = self._lt_proc(apertium_des, self._get_path()[0]) + self._get_commands() + self.analyzer_cmds[self.lang].insert(0, ['apertium-des{}'.format(formatting), '-n']) + result = execute(in_text, self.analyzer_cmds[self.lang]) return self._postproc_text(result) diff --git a/apertium/lttoolbox.py b/apertium/lttoolbox.py new file mode 100644 index 0000000..8154a6b --- /dev/null +++ b/apertium/lttoolbox.py @@ -0,0 +1,61 @@ +import platform +import tempfile + +if platform.system() == 'Linux': + from apertium.swig.linux import lttoolbox +elif platform.system() == 'Windows': + pass + + +class LtProc: + """ + Attributes: + arg_index (int) + path (str) + input_text (str) + output_text (str) + """ + + def __init__(self, input_text, arg_index, path): + """ + Args: + input_text (str) + arg_index (int) + path (str) + """ + self.arg_index = arg_index + self.path = path + self.input_text = input_text + self.output_text = '' + + def analyze(self): # type: (LtProc) -> None + """ + Reads formatted text from apertium-des and returns its analysed text + + Args: + self (LtProc) + Returns: + None + """ + with tempfile.NamedTemporaryFile('w') as input_file, tempfile.NamedTemporaryFile('r') as output_file: + input_file.write(self.input_text) + input_file.flush() + fst = lttoolbox.FST() + if not fst.valid(): + raise ValueError('FST Invalid') + fst.analyze(self.path, input_file.name, output_file.name) + self.output_text = output_file.read() + + def execute(self): + """ + Executes the required method, depending upon the argument for lt-proc + + Args: + self (LtProc) + + Returns: + (bytes) + """ + if self.arg_index == '-w': + self.analyze() + return self.output_text.encode() diff --git a/apertium/swig/linux/__init__.py b/apertium/swig/linux/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apertium/swig/linux/lttoolbox.py b/apertium/swig/linux/lttoolbox.py new file mode 100644 index 0000000..62064ec --- /dev/null +++ b/apertium/swig/linux/lttoolbox.py @@ -0,0 +1,282 @@ +# This file was automatically generated by SWIG (http://www.swig.org). +# Version 4.0.0 +# +# Do not make changes to this file unless you know what you are doing--modify +# the SWIG interface file instead. + +from sys import version_info as _swig_python_version_info +if _swig_python_version_info < (2, 7, 0): + raise RuntimeError('Python 2.7 or later required') + +# Import the low-level C/C++ module +if __package__ or '.' in __name__: + from . import _lttoolbox +else: + import _lttoolbox + +try: + import builtins as __builtin__ +except ImportError: + import __builtin__ + +def _swig_setattr_nondynamic(self, class_type, name, value, static=1): + if name == "thisown": + return self.this.own(value) + if name == "this": + if type(value).__name__ == 'SwigPyObject': + self.__dict__[name] = value + return + method = class_type.__swig_setmethods__.get(name, None) + if method: + return method(self, value) + if not static: + object.__setattr__(self, name, value) + else: + raise AttributeError("You cannot add attributes to %s" % self) + + +def _swig_setattr(self, class_type, name, value): + return _swig_setattr_nondynamic(self, class_type, name, value, 0) + + +def _swig_getattr(self, class_type, name): + if name == "thisown": + return self.this.own() + method = class_type.__swig_getmethods__.get(name, None) + if method: + return method(self) + raise AttributeError("'%s' object has no attribute '%s'" % (class_type.__name__, name)) + + +def _swig_repr(self): + try: + strthis = "proxy of " + self.this.__repr__() + except __builtin__.Exception: + strthis = "" + return "<%s.%s; %s >" % (self.__class__.__module__, self.__class__.__name__, strthis,) + + +def _swig_setattr_nondynamic_instance_variable(set): + def set_instance_attr(self, name, value): + if name == "thisown": + self.this.own(value) + elif name == "this": + set(self, name, value) + elif hasattr(self, name) and isinstance(getattr(type(self), name), property): + set(self, name, value) + else: + raise AttributeError("You cannot add instance attributes to %s" % self) + return set_instance_attr + + +def _swig_setattr_nondynamic_class_variable(set): + def set_class_attr(cls, name, value): + if hasattr(cls, name) and not isinstance(getattr(cls, name), property): + set(cls, name, value) + else: + raise AttributeError("You cannot add class attributes to %s" % cls) + return set_class_attr + + +def _swig_add_metaclass(metaclass): + """Class decorator for adding a metaclass to a SWIG wrapped class - a slimmed down version of six.add_metaclass""" + def wrapper(cls): + return metaclass(cls.__name__, cls.__bases__, cls.__dict__.copy()) + return wrapper + + +class _SwigNonDynamicMeta(type): + """Meta class to enforce nondynamic attributes (no new attributes) for a class""" + __setattr__ = _swig_setattr_nondynamic_class_variable(type.__setattr__) + + +gm_clean = _lttoolbox.gm_clean +gm_unknown = _lttoolbox.gm_unknown +gm_all = _lttoolbox.gm_all +gm_tagged = _lttoolbox.gm_tagged +gm_tagged_nm = _lttoolbox.gm_tagged_nm +gm_carefulcase = _lttoolbox.gm_carefulcase +class FSTProcessor(object): + thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc='The membership flag') + __repr__ = _swig_repr + + def __init__(self): + _lttoolbox.FSTProcessor_swiginit(self, _lttoolbox.new_FSTProcessor()) + + def initAnalysis(self): + return _lttoolbox.FSTProcessor_initAnalysis(self) + + def initTMAnalysis(self): + return _lttoolbox.FSTProcessor_initTMAnalysis(self) + + def initSAO(self): + return _lttoolbox.FSTProcessor_initSAO(self) + + def initGeneration(self): + return _lttoolbox.FSTProcessor_initGeneration(self) + + def initPostgeneration(self): + return _lttoolbox.FSTProcessor_initPostgeneration(self) + + def initBiltrans(self): + return _lttoolbox.FSTProcessor_initBiltrans(self) + + def initDecomposition(self): + return _lttoolbox.FSTProcessor_initDecomposition(self) + + def analysis(self, *args): + return _lttoolbox.FSTProcessor_analysis(self, *args) + + def tm_analysis(self, *args): + return _lttoolbox.FSTProcessor_tm_analysis(self, *args) + + def generation(self, *args): + return _lttoolbox.FSTProcessor_generation(self, *args) + + def postgeneration(self, *args): + return _lttoolbox.FSTProcessor_postgeneration(self, *args) + + def intergeneration(self, *args): + return _lttoolbox.FSTProcessor_intergeneration(self, *args) + + def transliteration(self, *args): + return _lttoolbox.FSTProcessor_transliteration(self, *args) + + def biltrans(self, input_word, with_delim=True): + return _lttoolbox.FSTProcessor_biltrans(self, input_word, with_delim) + + def biltransfull(self, input_word, with_delim=True): + return _lttoolbox.FSTProcessor_biltransfull(self, input_word, with_delim) + + def bilingual(self, *args): + return _lttoolbox.FSTProcessor_bilingual(self, *args) + + def biltransWithQueue(self, input_word, with_delim=True): + return _lttoolbox.FSTProcessor_biltransWithQueue(self, input_word, with_delim) + + def biltransWithoutQueue(self, input_word, with_delim=True): + return _lttoolbox.FSTProcessor_biltransWithoutQueue(self, input_word, with_delim) + + def SAO(self, *args): + return _lttoolbox.FSTProcessor_SAO(self, *args) + + def parseICX(self, file): + return _lttoolbox.FSTProcessor_parseICX(self, file) + + def parseRCX(self, file): + return _lttoolbox.FSTProcessor_parseRCX(self, file) + + def load(self, input): + return _lttoolbox.FSTProcessor_load(self, input) + + def lsx(self, input, output): + return _lttoolbox.FSTProcessor_lsx(self, input, output) + + def valid(self): + return _lttoolbox.FSTProcessor_valid(self) + + def setCaseSensitiveMode(self, value): + return _lttoolbox.FSTProcessor_setCaseSensitiveMode(self, value) + + def setDictionaryCaseMode(self, value): + return _lttoolbox.FSTProcessor_setDictionaryCaseMode(self, value) + + def setBiltransSurfaceForms(self, value): + return _lttoolbox.FSTProcessor_setBiltransSurfaceForms(self, value) + + def setIgnoredChars(self, value): + return _lttoolbox.FSTProcessor_setIgnoredChars(self, value) + + def setRestoreChars(self, value): + return _lttoolbox.FSTProcessor_setRestoreChars(self, value) + + def setNullFlush(self, value): + return _lttoolbox.FSTProcessor_setNullFlush(self, value) + + def setUseDefaultIgnoredChars(self, value): + return _lttoolbox.FSTProcessor_setUseDefaultIgnoredChars(self, value) + + def setDisplayWeightsMode(self, value): + return _lttoolbox.FSTProcessor_setDisplayWeightsMode(self, value) + + def setMaxAnalysesValue(self, value): + return _lttoolbox.FSTProcessor_setMaxAnalysesValue(self, value) + + def setMaxWeightClassesValue(self, value): + return _lttoolbox.FSTProcessor_setMaxWeightClassesValue(self, value) + + def getNullFlush(self): + return _lttoolbox.FSTProcessor_getNullFlush(self) + + def getDecompoundingMode(self): + return _lttoolbox.FSTProcessor_getDecompoundingMode(self) + __swig_destroy__ = _lttoolbox.delete_FSTProcessor + +# Register FSTProcessor in _lttoolbox: +_lttoolbox.FSTProcessor_swigregister(FSTProcessor) + +HAVE_DECL_FGETC_UNLOCKED = _lttoolbox.HAVE_DECL_FGETC_UNLOCKED +HAVE_DECL_FGETWC_UNLOCKED = _lttoolbox.HAVE_DECL_FGETWC_UNLOCKED +HAVE_DECL_FPUTS_UNLOCKED = _lttoolbox.HAVE_DECL_FPUTS_UNLOCKED +HAVE_DECL_FPUTWC_UNLOCKED = _lttoolbox.HAVE_DECL_FPUTWC_UNLOCKED +HAVE_DECL_FPUTWS_UNLOCKED = _lttoolbox.HAVE_DECL_FPUTWS_UNLOCKED +HAVE_DECL_FREAD_UNLOCKED = _lttoolbox.HAVE_DECL_FREAD_UNLOCKED +HAVE_DECL_FWRITE_UNLOCKED = _lttoolbox.HAVE_DECL_FWRITE_UNLOCKED +HAVE_DLFCN_H = _lttoolbox.HAVE_DLFCN_H +HAVE_GETOPT_LONG = _lttoolbox.HAVE_GETOPT_LONG +HAVE_INTTYPES_H = _lttoolbox.HAVE_INTTYPES_H +HAVE_LIBXML2 = _lttoolbox.HAVE_LIBXML2 +HAVE_MEMORY_H = _lttoolbox.HAVE_MEMORY_H +HAVE_PYTHON = _lttoolbox.HAVE_PYTHON +HAVE_SETLOCALE = _lttoolbox.HAVE_SETLOCALE +HAVE_STDDEF_H = _lttoolbox.HAVE_STDDEF_H +HAVE_STDINT_H = _lttoolbox.HAVE_STDINT_H +HAVE_STDLIB_H = _lttoolbox.HAVE_STDLIB_H +HAVE_STRDUP = _lttoolbox.HAVE_STRDUP +HAVE_STRINGS_H = _lttoolbox.HAVE_STRINGS_H +HAVE_STRING_H = _lttoolbox.HAVE_STRING_H +HAVE_SYS_STAT_H = _lttoolbox.HAVE_SYS_STAT_H +HAVE_SYS_TYPES_H = _lttoolbox.HAVE_SYS_TYPES_H +HAVE_UNISTD_H = _lttoolbox.HAVE_UNISTD_H +LT_OBJDIR = _lttoolbox.LT_OBJDIR +PACKAGE_BUGREPORT = _lttoolbox.PACKAGE_BUGREPORT +PACKAGE_NAME = _lttoolbox.PACKAGE_NAME +PACKAGE_STRING = _lttoolbox.PACKAGE_STRING +PACKAGE_TARNAME = _lttoolbox.PACKAGE_TARNAME +PACKAGE_URL = _lttoolbox.PACKAGE_URL +PACKAGE_VERSION = _lttoolbox.PACKAGE_VERSION +STDC_HEADERS = _lttoolbox.STDC_HEADERS +class LtLocale(object): + thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc='The membership flag') + __repr__ = _swig_repr + + @staticmethod + def tryToSetLocale(): + return _lttoolbox.LtLocale_tryToSetLocale() + + def __init__(self): + _lttoolbox.LtLocale_swiginit(self, _lttoolbox.new_LtLocale()) + __swig_destroy__ = _lttoolbox.delete_LtLocale + +# Register LtLocale in _lttoolbox: +_lttoolbox.LtLocale_swigregister(LtLocale) + +def LtLocale_tryToSetLocale(): + return _lttoolbox.LtLocale_tryToSetLocale() + +class FST(FSTProcessor): + thisown = property(lambda x: x.this.own(), lambda x, v: x.this.own(v), doc='The membership flag') + __repr__ = _swig_repr + + def analyze(self, automorf_path, input_path, output_path): + return _lttoolbox.FST_analyze(self, automorf_path, input_path, output_path) + + def __init__(self): + _lttoolbox.FST_swiginit(self, _lttoolbox.new_FST()) + __swig_destroy__ = _lttoolbox.delete_FST + +# Register FST in _lttoolbox: +_lttoolbox.FST_swigregister(FST) + + + diff --git a/apertium/utils.py b/apertium/utils.py index c22f387..6274cf6 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -6,7 +6,7 @@ if False: import apertium # noqa: F401 from apertium.iso639 import iso_639_codes # noqa: F401 - +from apertium import lttoolbox iso639_codes_inverse = {v: k for k, v in iso_639_codes.items()} @@ -38,6 +38,13 @@ def execute(inp, commands): # type: (str, List[List[str]]) -> str procs = [] end = inp.encode() for i, command in enumerate(commands): + if 'lt-proc' and '-w' in command: + arg_index = command.index('-w') + automorf_path = command[-1] + ltp = lttoolbox.LtProc(end.decode(), command[arg_index], automorf_path) + end = ltp.execute() + continue + procs.append( subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE), )