commit 19b0e2a10258e57b1f909d094f0b53f9495644fb Author: vivekvardhanadepu Date: Mon May 24 20:48:43 2021 +0530 check_config added diff --git a/README.md b/README.md index 7984ab5..0550221 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ The procedure for lexical selection training is a bit messy, with various script for more, read https://wiki.apertium.org/wiki/Ideas_for_Google_Summer_of_Code/User-friendly_lexical_selection_training -## coding challenges +## coding challenges *(using for testing)* In directory coding_challenges, diff --git a/check_config.py b/check_config.py new file mode 100644 index 0000000..864d000 --- /dev/null +++ b/check_config.py @@ -0,0 +1,40 @@ +# parses the config, check if the tools are present + +from tomlkit import parse, dumps +import os + +# urls of the required tools and data +corpora_url = "" +lex_tools_url = "" +fast_align_url = "" +langs_url = "" + +def parse_config(filename='config.toml'): + with open(filename) as config_file: + config_toml = config_file.read() + config = parse(config_toml) + + # gives error if not parsed well + assert config_toml == dumps(config) + + if not os.path.isfile(config['CORPUS_SL']): + print(config['CORPUS_SL'], "is not a file. Provide a valid file or to download,\n look", corpora_url) + + if not os.path.isfile(config['CORPUS_TL']): + print(config['CORPUS_TL'], "is not a file. Provide a valid file or to download,\n look", corpora_url) + + if not os.path.isdir(config['LEX_TOOLS']): + print(config['LEX_TOOLS'], "is not a directory. Provide a valid directory or to install,\n follow", lex_tools_url) + + if not os.path.isdir(config['FAST_ALIGN']): + print(config['FAST_ALIGN'], "is not a directory. Provide a valid directory or to install,\n follow", fast_align_url) + + if not os.path.isdir(config['LANG_DATA']): + print(config['LANG_DATA'], "is not a directory. Provide a valid directory or to install,\n follow", langs_url) + + + + return config + +if __name__ == '__main__': + parse_config() \ No newline at end of file diff --git a/config.toml b/config.toml index 9ba35b5..8809d38 100644 --- a/config.toml +++ b/config.toml @@ -1,25 +1,26 @@ # configuration for lexical training +# Note: pass absolute paths # corpus name -CORPUS = +CORPUS = "europarl-v7" # source language -SL = +SL = "eng" # target language -TL = +TL = "spa" # source corpus -CORPUS_SL = +CORPUS_SL = "europarl-v7.eng-spa.eng" # target corpus -CORPUS_TL = +CORPUS_TL = "europarl-v7.eng-spa.spa" # apertium-lex-tools scripts -LEX_TOOLS = +LEX_TOOLS = "/home/vivek/Documents/FOSS/apertium/apertium-lex-tools/scripts" # apertium language data -DATA = +LANG_DATA = "/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/apertium-eng-spa" # fast align build folder -FAST_ALIGN = \ No newline at end of file +FAST_ALIGN = "/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/fast_align/build" \ No newline at end of file diff --git a/config_parser.py b/config_parser.py deleted file mode 100644 index fb544d8..0000000 --- a/config_parser.py +++ /dev/null @@ -1,14 +0,0 @@ -from tomlkit import parse, dumps - -def parse_config(filename='config.toml'): - with open(filename) as config_file: - config_toml = config_file.read() - config = parse(config_toml) - - # gives error if not parsed well - assert config_toml == dumps(config) - - return config - -if __name__ == '__main__': - parse_config() \ No newline at end of file diff --git a/lexical_training.log b/lexical_training.log index e69de29..2356a59 100644 --- a/lexical_training.log +++ b/lexical_training.log @@ -0,0 +1 @@ +{'CORPUS': 'europarl-v7', 'SL': 'eng', 'TL': 'spa', 'CORPUS_SL': 'europarl-v7.eng-spa.eng', 'CORPUS_TL': 'europarl-v7.eng-spa.spa', 'LEX_TOOLS': '/home/vivek/Documents/FOSS/apertium/apertium-lex-tools/scripts', 'DATA': '/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/apertium-eng-spa', 'FAST_ALIGN': '/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/fast_align/build'} diff --git a/lexical_training.py b/lexical_training.py index 1208a96..ace48b3 100644 --- a/lexical_training.py +++ b/lexical_training.py @@ -1,9 +1,8 @@ # lexical training script -from config_parser import parse_config +from check_config import parse_config def main(): config = parse_config() - print(config) if __name__ == '__main__': main() \ No newline at end of file