commit b0c2f137b1a5e4ea272287cb976ab097f04ac40c Author: vaydheesh Date: Sun Jun 16 18:31:31 2019 +0000 Added: lt-proc -a, -b, -p Removed: lttoolbox.py Call wrapper directly from utils.py Modified: utils.py Removed: '-z' argument for every command Added: logs when calling subprocess Fixed: ResourceWarning when opening mode files diff --git a/apertium/lttoolbox.py b/apertium/lttoolbox.py deleted file mode 100644 index dabee9a..0000000 --- a/apertium/lttoolbox.py +++ /dev/null @@ -1,51 +0,0 @@ -import tempfile -from typing import ByteString, List # noqa: F401 - -import lttoolbox - - -class LtProc: - """ - Attributes: - command (List) - path (str) - input_text (str) - output_text (str) - """ - - def __init__(self, command: List, input_text: str) -> None: - """ - Args: - command (List) - input_text (str) - """ - self.command = command - self.path = command[-1] - self.input_text = input_text - self.output_text = '' - - def execute(self) -> ByteString: - """ - Executes the required method, depending upon the argument for lt-proc - - Args: - self (LtProc) - - Returns: - (ByteString) - """ - with tempfile.NamedTemporaryFile('w') as input_file, tempfile.NamedTemporaryFile('r') as output_file: - input_file.write(self.input_text) - input_file.flush() - lttoolbox.LtLocale.tryToSetLocale() - fst = lttoolbox.FST() - if not fst.valid(): - raise ValueError('FST Invalid') - if '-w' in self.command: - fst.setDictionaryCaseMode(True) - fst.analyze(self.path, input_file.name, output_file.name) - elif '-g' in self.command: - fst.generate(self.path, input_file.name, output_file.name) - - self.output_text = output_file.read() - return self.output_text.encode() diff --git a/apertium/utils.py b/apertium/utils.py index 93faefc..f62e8f9 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -1,9 +1,11 @@ -import re +import logging import subprocess +import tempfile from typing import List +import lttoolbox + import apertium # noqa: F401 -from apertium import lttoolbox from apertium.iso639 import iso_639_codes iso639_codes_inverse = {v: k for k, v in iso_639_codes.items()} @@ -34,11 +36,27 @@ def execute_pipeline(inp: str, commands: List[List[str]]) -> str: str """ end = inp.encode() + logging.basicConfig(format='%(asctime)s %(message)s', level=logging.DEBUG) + logger = logging.getLogger() + logger.setLevel(logging.DEBUG) for command in commands: - if 'lt-proc' in command and ('-w' in command or '-g' in command): - ltp = lttoolbox.LtProc(command, end.decode()) - end = ltp.execute() + if 'lt-proc' == command[0]: + arg = '' + if len(command) == 3: + arg = command[1][1] + path = command[-1] + with tempfile.NamedTemporaryFile('w') as input_file, tempfile.NamedTemporaryFile('r') as output_file: + text = end.decode() + input_file.write(text) + input_file.flush() + lttoolbox.LtLocale.tryToSetLocale() + fst = lttoolbox.FST() + if not fst.valid(): + raise ValueError('FST Invalid') + fst.lt_proc(arg, path, input_file.name, output_file.name) + end = output_file.read().encode() else: + logger.info('Calling subprocess %s', command[0]) proc = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE) end, _ = proc.communicate(end) return end.decode() @@ -52,7 +70,8 @@ def parse_mode_file(mode_path: str) -> List[List[str]]: Returns: List[List[str]] """ - mode_str = open(mode_path, 'r').read().strip() + with open(mode_path) as mode_file: + mode_str = mode_file.read().strip() if mode_str: commands = [] for cmd in mode_str.strip().split('|'): @@ -60,7 +79,6 @@ def parse_mode_file(mode_path: str) -> List[List[str]]: # modes.xml instead; this is brittle (what if a path # has | or ' in it?) cmd = cmd.replace('$2', '').replace('$1', '-g') - cmd = re.sub(r'^\s*(\S*)', r'\g<1> -z', cmd) commands.append([c.strip("'") for c in cmd.split()]) return commands else: