commit 9e24d31968302e85d8279fbe425027677a88b7d5 Author: vaydheesh Date: Sun Aug 18 13:52:47 2019 +0530 utils.py: continue to keep the nesting minimal diff --git a/apertium/utils.py b/apertium/utils.py index c1ad560..4ab4c79 100644 --- a/apertium/utils.py +++ b/apertium/utils.py @@ -21,6 +21,8 @@ import apertium # noqa: F401 from apertium.iso639 import iso_639_codes iso639_codes_inverse = {v: k for k, v in iso_639_codes.items()} +escape_chars = b'[]{}?^$@\\' +special_chars_map = {i: '\\' + chr(i) for i in escape_chars} def to_alpha3_code(code: str) -> str: @@ -43,9 +45,7 @@ def deformatter(text: str) -> str: This function is a text format processor. Data should be passed through this processor before being piped to lt-proc. """ - escape_chars = b'[]{}?^$@\\' - _special_chars_map = {i: '\\' + chr(i) for i in escape_chars} - return '{}[][\n]'.format(text.translate(_special_chars_map)) + return '{}[][\n]'.format(text.translate(special_chars_map)) def execute_pipeline(inp: str, commands: List[List[str]]) -> str: @@ -63,66 +63,63 @@ def execute_pipeline(inp: str, commands: List[List[str]]) -> str: used_wrapper = True if wrappers_available: if 'apertium-destxt' == command[0]: - used_wrapper = True output = deformatter(end.decode()) end = output.encode() + continue + input_file = tempfile.NamedTemporaryFile(delete=False, mode='w') + output_file = tempfile.NamedTemporaryFile(delete=False) + + text = end.decode() + input_file.write(text) + input_file.close() + + if 'lt-proc' == command[0]: + dictionary_path = command[-1] + lttoolbox.LtLocale.tryToSetLocale() + fst = lttoolbox.FST(dictionary_path) + if not fst.valid(): + raise ValueError('FST Invalid') + lt_proc_command = command[:-1] + fst.lt_proc(len(lt_proc_command), lt_proc_command, input_file.name, output_file.name) + elif 'lrx-proc' == command[0]: + dictionary_path = command[-1] + lextools.LtLocale.tryToSetLocale() + lrx = lextools.LRXProc(dictionary_path) + lrx_proc_command = command[:-1] + lrx.lrx_proc(len(lrx_proc_command), lrx_proc_command, input_file.name, output_file.name) + elif 'apertium-transfer' == command[0]: + transfer = apertium_core.ApertiumTransfer(command[-2], command[-1]) + transfer_command = command[:-2] + transfer.transfer_text(len(transfer_command), transfer_command, input_file.name, output_file.name) + elif 'apertium-interchunk' == command[0]: + interchunk = apertium_core.ApertiumInterchunk(command[-2], command[-1]) + interchunk_command = command[:-2] + interchunk.interchunk_text(len(interchunk_command), interchunk_command, input_file.name, output_file.name) + elif 'apertium-postchunk' == command[0]: + postchunk = apertium_core.ApertiumPostchunk(command[-2], command[-1]) + postchunk_command = command[:-2] + postchunk.postchunk_text(len(postchunk_command), postchunk_command, input_file.name, output_file.name) + elif 'apertium-pretransfer' == command[0]: + command = ['apertium-pretransfer', ''] + apertium_core.pretransfer(len(command), command, input_file.name, output_file.name) + elif 'apertium-tagger' == command[0]: + command += [input_file.name, output_file.name] + apertium_core.ApertiumTagger(len(command), command) + elif 'cg-proc' == command[0]: + dictionary_path = command[-1] + cg = constraint_grammar.CGProc(dictionary_path) + cg_proc_command = command[:-1] + cg.cg_proc(len(cg_proc_command), cg_proc_command, input_file.name, output_file.name) else: - input_file = tempfile.NamedTemporaryFile(delete=False, mode='w') - output_file = tempfile.NamedTemporaryFile(delete=False) - input_file_name, output_file_name = input_file.name, output_file.name - - arg = command[1][1] if len(command) >= 3 else '' - text = end.decode() - input_file.write(text) - input_file.close() - - if 'lt-proc' == command[0]: - dictionary_path = command[-1] - lttoolbox.LtLocale.tryToSetLocale() - fst = lttoolbox.FST(dictionary_path) - if not fst.valid(): - raise ValueError('FST Invalid') - lt_proc_command = command[:-1] - fst.lt_proc(len(lt_proc_command), lt_proc_command, input_file.name, output_file.name) - elif 'lrx-proc' == command[0]: - dictionary_path = command[-1] - lextools.LtLocale.tryToSetLocale() - lrx = lextools.LRXProc(dictionary_path) - lrx_proc_command = command[:-1] - lrx.lrx_proc(len(lrx_proc_command), lrx_proc_command, input_file.name, output_file.name) - elif 'apertium-transfer' == command[0]: - transfer = apertium_core.ApertiumTransfer(command[-2], command[-1]) - transfer_command = command[:-2] - transfer.transfer_text(len(transfer_command), transfer_command, input_file.name, output_file.name) - elif 'apertium-interchunk' == command[0]: - interchunk = apertium_core.ApertiumInterchunk(command[-2], command[-1]) - interchunk_command = command[:-2] - interchunk.interchunk_text(len(interchunk_command), interchunk_command, input_file.name, output_file.name) - elif 'apertium-postchunk' == command[0]: - postchunk = apertium_core.ApertiumPostchunk(command[-2], command[-1]) - postchunk_command = command[:-2] - postchunk.postchunk_text(len(postchunk_command), postchunk_command, input_file.name, output_file.name) - elif 'apertium-pretransfer' == command[0]: - command = ['apertium-pretransfer', ''] - apertium_core.pretransfer(len(command), command, input_file.name, output_file.name) - elif 'apertium-tagger' == command[0]: - command += [input_file.name, output_file.name] - apertium_core.ApertiumTagger(len(command), command) - elif 'cg-proc' == command[0]: - dictionary_path = command[-1] - cg = constraint_grammar.CGProc(dictionary_path) - cg_proc_command = command[:-1] - cg.cg_proc(len(cg_proc_command), cg_proc_command, input_file.name, output_file.name) - else: - used_wrapper = False - - if used_wrapper: - output_file.seek(0) - end = output_file.read() - output_file.close() - - os.remove(input_file.name) - os.remove(output_file.name) + used_wrapper = False + + if used_wrapper: + output_file.seek(0) + end = output_file.read() + output_file.close() + + os.remove(input_file.name) + os.remove(output_file.name) if not wrappers_available or not used_wrapper: apertium.logger.warning('Calling subprocess %s', command[0]) proc = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)