commit ded6d15e3dfeaaa38294f85453be5561d7233b58 Author: vivekvardhanadepu Date: Sat May 29 16:18:02 2021 +0530 minor changes in check_config.py, added automated test script for check_config diff --git a/README.md b/README.md index 0550221..5b55fe8 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,10 @@ The procedure for lexical selection training is a bit messy, with various script for more, read https://wiki.apertium.org/wiki/Ideas_for_Google_Summer_of_Code/User-friendly_lexical_selection_training +## tests + +This folder contains scripts for automated testing of the helper scripts + ## coding challenges *(using for testing)* In directory coding_challenges, diff --git a/check_config.py b/check_config.py index 1954399..957517f 100644 --- a/check_config.py +++ b/check_config.py @@ -11,7 +11,8 @@ langs_url = "https://wiki.apertium.org/wiki/List_of_language_pairs" apertium_url = "https://wiki.apertium.org/wiki/Installation" yasmet_url = "https://wiki.apertium.org/wiki/Using_weights_for_ambiguous_rules" -def parse_config(filename='config.toml'): +def check_config(filename='config.toml'): + misconfigured = False with open(filename) as config_file: config_toml = config_file.read() config = parse(config_toml) @@ -20,16 +21,19 @@ def parse_config(filename='config.toml'): assert config_toml == dumps(config) if not os.path.isfile(config['CORPUS_SL']): - print(config['CORPUS_SL'], "is not a file, provide a valid file or \nto download, look", corpora_url) - exit(-1) + print("'"+config['CORPUS_SL']+"'(CORPUS_SL)","is not a file, provide a valid"+ \ + " file or \nto download, look", corpora_url, '\n') + misconfigured = True if not os.path.isfile(config['CORPUS_TL']): - print(config['CORPUS_TL'], "is not a file, provide a valid file or \nto download, look", corpora_url) - exit(-1) + print("'"+config['CORPUS_TL']+"'(CORPUS_TL)", "is not a file, provide a valid "+ \ + "file or \nto download, look", corpora_url, '\n') + misconfigured = True if not os.path.isdir(config['LEX_TOOLS']): - print(config['LEX_TOOLS'], "is not a directory, provide a valid directory or \nto install, follow", lex_tools_url) - exit(-1) + print("'"+config['LEX_TOOLS']+"'(LEX_TOOLS)", "is not a directory, provide a valid "+ \ + "directory or \nto install, follow", lex_tools_url, '\n') + misconfigured = True else: # scripts = ['process-tagger-output', 'extract-sentences.py', 'extract-freq-lexicon.py', \ # 'ngram-count-patterns-maxent2.py', 'merge-ngrams-lambdas.py', 'lambdas-to-rules.py', \ @@ -39,31 +43,35 @@ def parse_config(filename='config.toml'): # assuming scripts are intact if 'process-tagger-output' not in os.listdir(config['LEX_TOOLS']): - print("process-tagger-output is not in", config['LEX_TOOLS'] + ",","provide a valid directory or \nto install, follow", lex_tools_url) - exit(-1) + print("'process-tagger-output' is not in", "'"+config['LEX_TOOLS']+"'(LEX_TOOLS),", \ + "provide a valid directory or \nto install, follow", lex_tools_url, '\n') + misconfigured = True if not os.path.isdir(config['FAST_ALIGN']): - print(config['FAST_ALIGN'], "is not a directory, provide a valid directory or \nto install, follow", fast_align_url) - exit(-1) + print("'"+config['FAST_ALIGN']+"'(FAST_ALIGN)", "is not a directory, provide"+ \ + " a valid directory or \nto install, follow", fast_align_url, '\n') + misconfigured = True else: if 'fast_align' not in os.listdir(config['FAST_ALIGN']): - print("fast_align is not present in", config['FAST_ALIGN']+ ",", "provide a valid directory or \nto install, follow", fast_align_url) - exit(-1) + print("fast_align is not present in", "'"+config['FAST_ALIGN']+"'(FAST_ALIGN),", \ + "provide a valid directory or \nto install, follow", fast_align_url, '\n') + misconfigured = True if not os.path.isdir(config['LANG_DATA']): - print(config['LANG_DATA'], "is not a directory, provide a valid directory or \nto install, follow", langs_url) - exit(-1) + print("'"+config['LANG_DATA']+"'(DATA)", "is not a directory, provide a valid "+ \ + "directory or \nto install, follow", langs_url, '\n') + misconfigured = True else: sl_tl_autobil = config['SL'] + '-' + config['TL'] + '.autobil.bin' tl_sl_autobil = config['TL'] + '-' + config['SL'] + '.autobil.bin' - if sl_tl_autobil not in os.listdir(config['LANG_DATA']): - print(sl_tl_autobil, "is not in", config['LANG_DATA']+ ",", "provide a valid directory or \nto install, follow", langs_url) - exit(-1) - + print("'"+sl_tl_autobil+"'", "is not in", "'"+config['LANG_DATA']+ "'(DATA),", \ + "provide a valid directory or \nto install, follow", langs_url, '\n') + misconfigured = True if tl_sl_autobil not in os.listdir(config['LANG_DATA']): - print(tl_sl_autobil, "is not in", config['LANG_DATA']+ ",", "provide a valid directory or \nto install, follow", langs_url) - exit(-1) + print("'"+tl_sl_autobil+"'", "is not in", "'"+config['LANG_DATA']+ "'(DATA),", \ + "provide a valid directory or \nto install, follow", langs_url, '\n') + misconfigured = True apertium_present = False for path in os.environ["PATH"].split(os.pathsep): @@ -72,7 +80,8 @@ def parse_config(filename='config.toml'): break if not apertium_present: - print("apertium is either not installed or not added to path, see", apertium_url) + print("apertium is either not installed or not added to path, see", apertium_url, '\n') + misconfigured = True yasmet_present = False for path in os.environ["PATH"].split(os.pathsep): @@ -81,7 +90,11 @@ def parse_config(filename='config.toml'): break if not yasmet_present: - print("yasmet is either not installed or not added to path, see", yasmet_url) + print("yasmet is either not installed or not added to path, see", yasmet_url, '\n') + misconfigured = True + + if misconfigured: + exit(1) return config diff --git a/config.toml b/config.toml index 22158dd..99fbf7b 100644 --- a/config.toml +++ b/config.toml @@ -11,10 +11,10 @@ SL = "eng" TL = "spa" # source corpus -CORPUS_SL = "europarl-v7.eng-spa.eng" +CORPUS_SL = "/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/europarl-v7.eng-spa.eng" # target corpus -CORPUS_TL = "europarl-v7.eng-spa.spa" +CORPUS_TL = "/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/europarl-v7.eng-spa.spa" # apertium-lex-tools scripts LEX_TOOLS = "/home/vivek/Documents/FOSS/apertium/apertium-lex-tools/scripts" diff --git a/lexical_training.py b/lexical_training.py index 72fdfa0..cb105b8 100644 --- a/lexical_training.py +++ b/lexical_training.py @@ -1,8 +1,8 @@ # lexical training script -from check_config import parse_config +from check_config import check_config def main(): - config = parse_config() + config = check_config() print("parsing complete") if __name__ == '__main__': diff --git a/tests/check_config_test.log b/tests/check_config_test.log new file mode 100644 index 0000000..a9156d8 --- /dev/null +++ b/tests/check_config_test.log @@ -0,0 +1,43 @@ +Test 1 : wrong paths +--------------------- +'/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/europarl-v7.eng-spa.engabc'(CORPUS_SL) is not a file, provide a valid file or +to download, look https://wiki.apertium.org/wiki/Corpora + +'/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/europarl-v7.eng-spa.spaabc'(CORPUS_TL) is not a file, provide a valid file or +to download, look https://wiki.apertium.org/wiki/Corpora + +'/home/vivek/Documents/FOSS/apertium/apertium-lex-tools/scriptsabc'(LEX_TOOLS) is not a directory, provide a valid directory or +to install, follow https://wiki.apertium.org/wiki/Install_Apertium_core_by_compiling + +'/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/fast_align/buildabc'(FAST_ALIGN) is not a directory, provide a valid directory or +to install, follow https://github.com/clab/fast_align + +'/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/apertium-eng-spaabc'(DATA) is not a directory, provide a valid directory or +to install, follow https://wiki.apertium.org/wiki/List_of_language_pairs + +Test 1 : wrong paths +--------------------- +Test 2 : partial/no installations +---------------------------------- +'process-tagger-output' is not in '/home/vivek/Documents/FOSS/apertium/apertium-lex-tools/scripts'(LEX_TOOLS), provide a valid directory or +to install, follow https://wiki.apertium.org/wiki/Install_Apertium_core_by_compiling + +fast_align is not present in '/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/fast_align/build'(FAST_ALIGN), provide a valid directory or +to install, follow https://github.com/clab/fast_align + +'engabc-spa.autobil.bin' is not in '/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/apertium-eng-spa'(DATA), provide a valid directory or +to install, follow https://wiki.apertium.org/wiki/List_of_language_pairs + +'spa-engabc.autobil.bin' is not in '/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/apertium-eng-spa'(DATA), provide a valid directory or +to install, follow https://wiki.apertium.org/wiki/List_of_language_pairs + +apertium is either not installed or not added to path, see https://wiki.apertium.org/wiki/Installation + +yasmet is either not installed or not added to path, see https://wiki.apertium.org/wiki/Using_weights_for_ambiguous_rules + +Test 1 : wrong paths +--------------------- +Test 2 : partial/no installations +---------------------------------- +Test 3 : correct installations +------------------------------- diff --git a/tests/check_config_test.py b/tests/check_config_test.py new file mode 100644 index 0000000..979e581 --- /dev/null +++ b/tests/check_config_test.py @@ -0,0 +1,100 @@ +# tests check_config.py +import sys +from tomlkit import parse, dumps +import os +import shutil + +sys.path.append('../') + +from check_config import check_config + +def main(argc, argv): + + # Test 1 + config_file = open('../config.toml', 'r') + config_toml = config_file.read() + config = parse(config_toml) + config_file.close() + + print("Test 1 : wrong paths") + print("---------------------") + + for key in config: + config[key]+="abc" + + if os.fork() == 0: + with open('check_config_test.toml', 'w') as test_file: + test_file.write(dumps(config)) + check_config('check_config_test.toml') + exit(0) + + _, _ = os.wait() + + # print() + + # Test 2 + config_file = open('../config.toml', 'r') + config_toml = config_file.read() + config = parse(config_toml) + config_file.close() + + print("Test 2 : partial/no installations") + print("----------------------------------") + + config['SL']+="abc" + + if os.path.isfile(os.path.join(config['LEX_TOOLS'], 'process-tagger-output')): + shutil.move(os.path.join(config['LEX_TOOLS'], 'process-tagger-output'), os.path.join(config['LEX_TOOLS'], 'process-tagger-output'+'abc')) + + if os.path.isfile(os.path.join(config['FAST_ALIGN'], 'fast_align')): + shutil.move(os.path.join(config['FAST_ALIGN'], 'fast_align'), os.path.join(config['FAST_ALIGN'], 'fast_align'+'abc')) + + for path in os.environ["PATH"].split(os.pathsep): + if os.path.isfile(os.path.join(path, 'apertium')): + shutil.move(os.path.join(path, 'apertium'), os.path.join(path, 'apertium'+'abc')) + break + + for path in os.environ["PATH"].split(os.pathsep): + if os.path.isfile(os.path.join(path, 'yasmet')): + shutil.move(os.path.join(path, 'yasmet'), os.path.join(path, 'yasmet'+'abc')) + break + + if os.fork() == 0: + with open('check_config_test.toml', 'w') as test_file: + test_file.write(dumps(config)) + check_config('check_config_test.toml') + exit(0) + + _, _ = os.wait() + + shutil.move(os.path.join(config['LEX_TOOLS'], 'process-tagger-output'+'abc'), os.path.join(config['LEX_TOOLS'], 'process-tagger-output')) + + shutil.move(os.path.join(config['FAST_ALIGN'], 'fast_align'+'abc'), os.path.join(config['FAST_ALIGN'], 'fast_align')) + + for path in os.environ["PATH"].split(os.pathsep): + if os.path.isfile(os.path.join(path, 'apertium'+'abc')): + shutil.move(os.path.join(path, 'apertium'+'abc'), os.path.join(path, 'apertium')) + break + + for path in os.environ["PATH"].split(os.pathsep): + if os.path.isfile(os.path.join(path, 'yasmet'+'abc')): + shutil.move(os.path.join(path, 'yasmet'+'abc'), os.path.join(path, 'yasmet')) + break + + # print() + + # Test 3 + config_file = open('../config.toml', 'r') + config_toml = config_file.read() + config = parse(config_toml) + config_file.close() + + print("Test 3 : correct installations") + print("-------------------------------") + + with open('check_config_test.toml', 'w') as test_file: + test_file.write(dumps(config)) + check_config('check_config_test.toml') + +if __name__ == '__main__': + main(len(sys.argv), sys.argv) \ No newline at end of file diff --git a/tests/check_config_test.toml b/tests/check_config_test.toml new file mode 100644 index 0000000..99fbf7b --- /dev/null +++ b/tests/check_config_test.toml @@ -0,0 +1,26 @@ +# configuration for lexical training +# Note: pass absolute paths + +# corpus name +CORPUS = "europarl-v7" + +# source language +SL = "eng" + +# target language +TL = "spa" + +# source corpus +CORPUS_SL = "/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/europarl-v7.eng-spa.eng" + +# target corpus +CORPUS_TL = "/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/europarl-v7.eng-spa.spa" + +# apertium-lex-tools scripts +LEX_TOOLS = "/home/vivek/Documents/FOSS/apertium/apertium-lex-tools/scripts" + +# fast align build folder +FAST_ALIGN = "/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/fast_align/build" + +# apertium language data +LANG_DATA = "/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/apertium-eng-spa"