commit 4565321cb0fac80e3001b273eea073d86bb67211 Author: Daniel Swanson Date: Thu Jul 1 17:57:09 2021 -0500 load corpora, run tests, clone from github diff --git a/regtest.py b/regtest.py index 9f012d7..2017017 100644 --- a/regtest.py +++ b/regtest.py @@ -1,12 +1,204 @@ #!/usr/bin/env python3 +from collections import defaultdict from functools import partial from http import HTTPStatus import http.server import json import os +import shlex import socketserver +import subprocess +import sys import urllib.parse +import xml.etree.ElementTree + +CORPORA = {} + +class Step: + prognames = { + 'cg-proc': 'disam', + 'apertium-tagger': 'tagger', + 'apertium-pretransfer': 'pretransfer', + 'lrx-proc': 'lex', + 'apertium-transfer': 'chunker', + 'apertium-interchunk': 'interchunk', + 'apertium-postchunk': 'postchunk', + 'lsx-proc': 'autoseq', + 'rtx-proc': 'transfer', + 'apertium-anaphora': 'anaph' + } + morphmodes = { + '-b': 'biltrans', + '-p': 'postgen', + '-g': 'generator' + } + def __init__(self, xml): + pr = shlex.split(xml.attrib['name']) + self.prog = pr[0] + self.args = pr[1:] + for ar in xml: + if ar.tag == 'arg': + self.args += shlex.split(ar.attrib['name']) + else: + self.args.append(ar.attrib['name']) + for ar in self.args: + if ar == '$1' or ar == '$2': + ar = '-g' + self.name = xml.attrib.get('debug-suff', 'unknown') + if self.name == 'unknown': + if self.prog in Step.prognames: + self.name = Step.prognames[self.prog] + elif self.prog in ['lt-proc', 'hfst-proc']: + self.name = 'morph' + for op in Step.morphmodes: + if op in self.args: + self.name = Step.morphmodes[op] + def run(self, in_name, out_name): + cmd = [self.prog] + self.args + print('running', cmd) + if self.prog in Step.prognames or self.prog in ['lt-proc', 'hfst-proc']: + cmd.append('-z') + with open(in_name, 'r') as fin: + # TODO: non-\n separators + txt = fin.read().replace('\n', '\n\0') + proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + stdout, stderr = proc.communicate(txt.encode('utf-8')) + with open(out_name, 'w') as fout: + for line in stdout.decode('utf-8').split('\0'): + fout.write(line.strip() + '\n') + +class Mode: + all_modes = {} + def __init__(self, xml): + self.name = xml.attrib['name'] + self.steps = [Step(s) for s in xml[0]] + nm = defaultdict(lambda: 0) + for s in self.steps: + nm[s.name] += 1 + if nm[s.name] > 1: + s.name += str(nm[s.name]) + Mode.all_modes[self.name] = self + def run(self, corpusname, filename): + fin = 'test/' + filename + for step in self.steps: + fout = 'test/%s-%s-output.txt' % (corpusname, step.name) + step.run(fin, fout) + fin = fout + with open(fin, 'r') as f1: + with open('test/%s-all-output.txt' % corpusname, 'w') as f2: + f2.write(f1.read()) + def get_commands(self): + return [s.name for s in self.steps] + +def load_modes(): + try: + root = xml.etree.ElementTree.parse('modes.xml').getroot() + except FileNotFoundError: + print('modes.xml not found.') + print('Please ensure that apertium-regtest is being run in an Apertium directory.') + sys.exit(1) + except xml.etree.ElementTree.ParseError as e: + print('Unable to parse modes.xml.') + print('Parser message: %s' % e.msg) + sys.exit(1) + for m in root: + Mode(m) + +def get_url(remote): + proc = subprocess.run(['git', 'remote', 'get-url', remote], + stdout=subprocess.PIPE) + if proc.returncode != 0: + return '' + return proc.stdout.decode('utf-8').strip() + +def yes_no(msg): + ans = input(msg + ' (yes/no) ') + while True: + if 'yes'.startswith(ans.strip().lower()): + return True + elif 'no'.startswith(ans.strip().lower()): + return False + else: + ans = input('unable to interpret reply - please type yes or no: ') + +def check_git(): + # look for an external git repo + # return True if we end up cloning it + proc = subprocess.run(['git', 'remote'], stdout=subprocess.PIPE) + if proc.returncode != 0: + return False + all_remotes = proc.stdout.decode('utf-8').strip().split() + if len(all_remotes) == 0: + return False + url = '' + if 'origin' in all_remotes: + url = get_url('origin') + if not url: + for remote in all_remotes: + url = get_url('origin') + if url: + break + if not url: + return False + url = url.replace('/apertium-', '/test-') + ans = yes_no('Test corpora not found. Clone external test corpus?') + if not ans: + return False + inurl = input('remote url (default %s): ' % url).strip() + if not inurl: + inurl = url + proc = subprocess.run(['git', 'clone', inurl, 'test']) + if proc.returncode == 0: + return True + else: + print('Cloning failed. Please check the remote url and try again.') + sys.exit(1) + +def load_corpora(): + global CORPORA + if not os.path.isdir('test') or not os.path.isfile('test/tests.json'): + if os.path.isdir('.git'): + if check_git(): + load_corpora() + return + print('Test corpora not found. Please create test/tests.json') + print('as described at https://wiki.apertium.org/wiki/User:Popcorndude/Regression-Testing') + sys.exit(1) + with open('test/tests.json') as ts: + try: + CORPORA = json.load(ts) + except json.JSONDecoderError as e: + print('test/tests.json is not a valid JSON document. First error on line %s' % e.lineno) + sys.exit(1) + +def test_run(corpora): + ls = corpora + if '*' in corpora: + ls = list(CORPORA.keys()) + for name in ls: + corp = CORPORA[name] + # TODO: more error checking, start-step, command + if 'mode' in corp: + Mode.all_modes[corp['mode']].run(name, corp['input']) + return True, '' + +def cb_load(page): + changes = { + 'changed_final': [], + 'changed_any': [], + 'unchanged': [] + } + state = { + '_step': 25, # TODO + '_count': 0, + '_ordered': [] + } + for name, corpus in CORPORA.items(): + state[name] = {} + state[name]['cmds'] = Mode.all_modes[corpus['mode']].get_commands() + return {'state': state} class CallbackRequestHandler(http.server.SimpleHTTPRequestHandler): def do_GET(self): @@ -37,15 +229,15 @@ class CallbackRequestHandler(http.server.SimpleHTTPRequestHandler): resp = {} if params['a'][0] == 'init': - resp['folder'] = 'nowhere' - resp['corpora'] = ['blah', 'bloop', 'blarg'] + resp['folder'] = os.path.basename(os.getcwd()) + resp['corpora'] = list(CORPORA.keys()) elif params['a'][0] == 'load': try: resp = cb_load(params['p'][0]) except: resp['error'] = 'Current state is missing or invalid. You will need to run the regression test for all corpora.' elif params['a'][0] == 'run': - good, out = test_run(params.get('c', ['*'])[0]) + good, output = test_run(params.get('c', ['*'])) resp['good'] = good resp['output'] = output else: @@ -59,10 +251,12 @@ class CallbackRequestHandler(http.server.SimpleHTTPRequestHandler): self.wfile.write(rstr) def start_server(): - d = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'static/') - handle = partial(CallbackRequestHandler, directory=d) - with socketserver.TCPServer(('', 3000), handle) as httpd: - httpd.serve_forever() + d = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'static/') + handle = partial(CallbackRequestHandler, directory=d) + with socketserver.TCPServer(('', 3000), handle) as httpd: + httpd.serve_forever() if __name__ == '__main__': - start_server() + load_modes() + load_corpora() + start_server()