commit 630de487603c5c2d02750a6fcb9728b91a9bbb02 Author: Lokendra Singh Date: Tue Jun 18 18:32:03 2019 +0000 Swip wrapper for lt-proc (#42) * Switched to lttoolbox wrapper for analysis/__init__ analysis needs to be installed from lttoolbox use NamedTempFile and pass its path to FST.analyze() * Fixed: flake8 q000 Bad quotes Replaced double quote with single quote * Added lt_proc -w as module for linux .gitignore -> added pycharm apertium/analysis/__init__.py reset 3f38530 Analyzer._postproc_text -> staticmethod Added: apertium.destxt to list of analyzer_cmds Added: os.path.join() for abs_mode_path added: apertium/swig/linux/* swig generated added: apertium/lttoolbox calls required fucntions from lttoolbox wrapper modified: apertium/utils.py executes lt-proc -w as submodule * Added TypeAnnotation and flake8 ignore apertium/swig * Added noqa for imports and Annotation * Modified: TypeAnnotation * Implemented changes suggested .gitignore: Fixed IDE specific ignore apertium/analysis/__init__.py: Fixed multiple command insertion lttoolbox.py: Modified TypeAnnotation * Fixed: Variable TypeAnnotation compatibility issue * Added: shared libraries for various python version * Added: Script to build lttoolbox wrapper for travis * Implemented changes suggested apertium/utils.py: execute() -> execute_pipeline() Fixed: usage of 'and' for 'lt-proc' & '-w' Added: Proper if-else * Added: 'lt-proc -g' generate rename: BuildSwigWrapper -> build-swig-wrapper * FIxed: Case of output for generation * Added: lt-proc -a, -b, -p Removed: lttoolbox.py Call wrapper directly from utils.py Modified: utils.py Removed: '-z' argument for every command Added: logs when calling subprocess Fixed: ResourceWarning when opening mode files * Implemented Changes Suggested Fixed log Updated travis build script * Added: file path and line no in logs diff --git a/.travis.yml b/.travis.yml index 048e7f6..014be6f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,7 @@ python: install: - pip install pipenv - travis_retry pipenv install --dev --system + - ./build-swig-wrapper.sh before_script: - wget http://apertium.projectjj.com/apt/install-nightly.sh -O - | sudo bash - sudo apt-get -f --allow-unauthenticated install apertium-all-dev diff --git a/apertium/__init__.py b/apertium/__init__.py index cebf6ee..c462f04 100644 --- a/apertium/__init__.py +++ b/apertium/__init__.py @@ -1,3 +1,4 @@ +import logging import os import platform from typing import Dict, Tuple @@ -69,3 +70,5 @@ for pair_path in pair_paths: _update_modes(pair_path) append_pair_path_windows() update_path_windows() +logging.basicConfig(format='[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s', level=logging.WARNING) +logger = logging.getLogger() diff --git a/apertium/analysis/__init__.py b/apertium/analysis/__init__.py index a47d8e0..1e95ff7 100644 --- a/apertium/analysis/__init__.py +++ b/apertium/analysis/__init__.py @@ -1,9 +1,10 @@ +import os from typing import Dict, List from streamparser import LexicalUnit, parse import apertium -from apertium.utils import execute, parse_mode_file, to_alpha3_code +from apertium.utils import execute_pipeline, parse_mode_file, to_alpha3_code class Analyzer: @@ -25,17 +26,20 @@ class Analyzer: else: self.path, self.mode = apertium.analyzers[self.lang] - def _get_commands(self): # type: (Analyzer) -> List[List[str]] + def _get_commands(self) -> List[List[str]]: """ Returns: List[List[str]] """ if self.lang not in self.analyzer_cmds: mode_path, mode = apertium.analyzers[self.lang] - self.analyzer_cmds[self.lang] = parse_mode_file(mode_path + '/modes/' + mode + '.mode') + abs_mode_path = os.path.join(mode_path, 'modes', '{}.mode'.format(mode)) + self.analyzer_cmds[self.lang] = parse_mode_file(abs_mode_path) + return self.analyzer_cmds[self.lang] - def _postproc_text(self, result): # type: (Analyzer, str) -> List[LexicalUnit] + @staticmethod + def _postproc_text(result: str) -> List[LexicalUnit]: """ Postprocesses the input @@ -48,7 +52,7 @@ class Analyzer: lexical_units = list(parse(result)) return lexical_units - def analyze(self, in_text, formatting='txt'): # type: (Analyzer, str, str) -> List[LexicalUnit] + def analyze(self, in_text: str, formatting: str = 'txt') -> List[LexicalUnit]: """ Runs apertium to analyze the input @@ -59,12 +63,15 @@ class Analyzer: Returns: List[LexicalUnit] """ - apertium_des = execute(in_text, [['apertium-des{}'.format(formatting), '-n']]) - result = execute(apertium_des, self._get_commands()) + self._get_commands() + deformatter = ['apertium-des{}'.format(formatting), '-n'] + if deformatter not in self.analyzer_cmds[self.lang]: + self.analyzer_cmds[self.lang].insert(0, deformatter) + result = execute_pipeline(in_text, self.analyzer_cmds[self.lang]) return self._postproc_text(result) -def analyze(lang, in_text, formatting='txt'): # type: (str, str, str) -> List[LexicalUnit] +def analyze(lang: str, in_text: str, formatting: str = 'txt') -> List[LexicalUnit]: """ Args: lang (str) diff --git a/apertium/generation/__init__.py b/apertium/generation/__init__.py index 2707ee9..c40a99c 100644 --- a/apertium/generation/__init__.py +++ b/apertium/generation/__init__.py @@ -1,7 +1,7 @@ from typing import Dict, List, Union import apertium -from apertium.utils import execute, parse_mode_file, to_alpha3_code +from apertium.utils import execute_pipeline, parse_mode_file, to_alpha3_code class Generator: @@ -42,7 +42,7 @@ class Generator: if self.lang in apertium.generators: commands = list(self._get_commands()) - result = execute(in_text, commands) + result = execute_pipeline(in_text, commands) return result.rstrip('\x00') else: raise apertium.ModeNotInstalled(self.lang) diff --git a/apertium/translation/__init__.py b/apertium/translation/__init__.py index de9346e..a31ac24 100644 --- a/apertium/translation/__init__.py +++ b/apertium/translation/__init__.py @@ -3,7 +3,7 @@ from subprocess import CalledProcessError, PIPE, Popen from typing import Dict, List, Optional, Tuple, Union import apertium # noqa: E402 -from apertium.utils import execute, parse_mode_file, to_alpha3_code # noqa: E402 +from apertium.utils import execute_pipeline, parse_mode_file, to_alpha3_code # noqa: E402 class Translator: @@ -24,7 +24,7 @@ class Translator: self.l1 = l1 self.l2 = l2 - def _get_commands(self, l1, l2): # type: (Translator, str, str) -> List[List[str]] + def _get_commands(self, l1: str, l2: str) -> List[List[str]]: """ Args: l1 (str) @@ -59,7 +59,7 @@ class Translator: return deformat, reformat - def _check_ret_code(self, proc): # type: (Translator, Popen) -> None + def _check_ret_code(self, proc: Popen) -> None: """ Args: proc (Popen) @@ -67,7 +67,7 @@ class Translator: if proc.returncode != 0: raise CalledProcessError() # type: ignore - def _validate_formatters(self, deformat, reformat): # type: (Translator, Optional[str], Optional[str]) -> Tuple[Union[str, object], Union[str, object]] + def _validate_formatters(self, deformat: Optional[str], reformat: Optional[str]) -> Tuple[Union[str, object], Union[str, object]]: """ Args: deformat (Optional[str]) @@ -76,7 +76,7 @@ class Translator: Returns: Tuple[Union[str, object], Union[str, object]] """ - def valid1(elt, lst): # type: (Optional[str], List[object]) -> Union[str, object] + def valid1(elt: Optional[str], lst: List[object]) -> Union[str, object]: """ Args: elt (Optional[str]) @@ -105,7 +105,7 @@ class Translator: ] return valid1(deformat, deformatters), valid1(reformat, reformatters) - def _get_deformat(self, deformat, text): # type: (Translator, str, str) -> str + def _get_deformat(self, deformat: str, text: str) -> str: """ Args: deformat (str) @@ -125,7 +125,7 @@ class Translator: res = str(deformatted) return res - def _get_reformat(self, reformat, text): # type: (Translator, str, str) -> str + def _get_reformat(self, reformat: str, text: str) -> str: """ Args: reformat (str) @@ -166,7 +166,7 @@ class Translator: unsafe_deformat, unsafe_reformat = self._get_format(formatting, deformat, reformat) deformater, reformater = self._validate_formatters(unsafe_deformat, unsafe_reformat) deformatted = self._get_deformat(str(deformater), text) - output = execute(deformatted, cmds) + output = execute_pipeline(deformatted, cmds) result = self._get_reformat(str(reformater), output).strip() return result.decode() # type: ignore diff --git a/apertium/utils.py b/apertium/utils.py index d05e802..d55ee95 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -1,11 +1,12 @@ -import re import subprocess +import tempfile from typing import List +import lttoolbox + import apertium # noqa: F401 from apertium.iso639 import iso_639_codes - iso639_codes_inverse = {v: k for k, v in iso_639_codes.items()} @@ -24,7 +25,7 @@ def to_alpha3_code(code: str) -> str: return iso639_codes_inverse[code] if code in iso639_codes_inverse else code -def execute(inp: str, commands: List[List[str]]) -> str: +def execute_pipeline(inp: str, commands: List[List[str]]) -> str: """ Args: inp (str) @@ -33,13 +34,25 @@ def execute(inp: str, commands: List[List[str]]) -> str: Returns: str """ - procs = [] end = inp.encode() - for i, command in enumerate(commands): - procs.append( - subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE), - ) - end, _ = procs[i].communicate(end) + for command in commands: + if 'lt-proc' == command[0]: + arg = command[1][1] if len(command) == 3 else '' + path = command[-1] + with tempfile.NamedTemporaryFile('w') as input_file, tempfile.NamedTemporaryFile('r') as output_file: + text = end.decode() + input_file.write(text) + input_file.flush() + lttoolbox.LtLocale.tryToSetLocale() + fst = lttoolbox.FST() + if not fst.valid(): + raise ValueError('FST Invalid') + fst.lt_proc(arg, path, input_file.name, output_file.name) + end = output_file.read().encode() + else: + apertium.logger.warning('Calling subprocess %s', command[0]) + proc = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + end, _ = proc.communicate(end) return end.decode() @@ -51,7 +64,8 @@ def parse_mode_file(mode_path: str) -> List[List[str]]: Returns: List[List[str]] """ - mode_str = open(mode_path, 'r').read().strip() + with open(mode_path) as mode_file: + mode_str = mode_file.read().strip() if mode_str: commands = [] for cmd in mode_str.strip().split('|'): @@ -59,7 +73,6 @@ def parse_mode_file(mode_path: str) -> List[List[str]]: # modes.xml instead; this is brittle (what if a path # has | or ' in it?) cmd = cmd.replace('$2', '').replace('$1', '-g') - cmd = re.sub(r'^\s*(\S*)', r'\g<1> -z', cmd) commands.append([c.strip("'") for c in cmd.split()]) return commands else: diff --git a/build-swig-wrapper.sh b/build-swig-wrapper.sh new file mode 100755 index 0000000..6ef4354 --- /dev/null +++ b/build-swig-wrapper.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +sudo apt-get install -y swig build-essential python3-setuptools +git clone -b swig_wrapper https://github.com/Vaydheesh/lttoolbox.git +cd lttoolbox || set -e +./autogen.sh --enable-python-bindings && make +cd python || set -e +python3 setup.py install \ No newline at end of file