commit ccfe020961832bd42050cbefefee68f810688096
Author: vivekvardhanadepu <vivekvicky839@gmail.com>
Date:   Fri Jun 4 09:43:04 2021 +0530

    moving repo to apertium-lexical-training

diff --git a/clean_corpus.py b/clean_corpus.py
deleted file mode 100644
index b131bf8..0000000
--- a/clean_corpus.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# removes lines above and below the empty lines including the empty lines in each corpus
-# removes lines containing only ° and *
-# stripping trailing and leading spaces
-
-
-import sys
-
-
-def main(argc, argv):
-    if argc != 3:
-        print('usage: clean_corpus.py <corpus 1> <corpus 2>')
-        exit(-1)
-
-    lines1 = []
-    lines2 = []
-    lines_to_remove = set()
-
-    with open(argv[1], 'r+') as l1, open(argv[2], 'r+') as l2:
-        lines1 = l1.readlines()
-        lines2 = l2.readlines()
-        assert len(lines1) == len(lines2)
-        # print(lines1, lines2)
-        i = 0
-        for i in range(len(lines1)):
-            if (not lines1[i].strip()) or (not lines2[i].strip()):
-                lines_to_remove.update([i-1, i, i+1])
-                continue
-            
-            # removing lines only with '°', '*' and '.'
-            if (not lines1[i].replace('°', '').replace('*', '').replace('.','').strip()) and \
-                        (not lines2[i].replace('°', '').replace('*', '').replace('.', '').strip()):
-                lines_to_remove.add(i)
-            # print(lines1, lines2)
-            
-        print(lines_to_remove)
-
-        l1.seek(0)
-        # l1.write(''.join(lines1))
-        l1.write('')
-        l1.truncate()
-
-        l2.seek(0)
-        l2.write('')
-        l2.truncate()
-
-    with open(argv[1], 'a') as l1, open(argv[2], 'a') as l2:
-        lines_to_keep = set()
-        lines_to_keep.update([i for i in range(len(lines1))])
-        lines_to_keep = lines_to_keep - lines_to_remove
-        
-        for i in sorted(lines_to_keep):
-            # also removing leading and trailing spaces
-            l1.write(lines1[i].strip() + '\n')
-            l2.write(lines2[i].strip() + '\n')
-        
-        l1.truncate()
-        l2.truncate()
-
-if __name__ == '__main__':
-    main(len(sys.argv), sys.argv)
\ No newline at end of file
diff --git a/config.toml b/config.toml
deleted file mode 100644
index df7c7a3..0000000
--- a/config.toml
+++ /dev/null
@@ -1,25 +0,0 @@
-# configuration for lexical training
-
-# corpus name
-CORPUS = "europarl-v7"
-
-# source language
-SL = "eng"
-
-# target language
-TL = "spa"
-
-# source corpus
-CORPUS_SL = "europarl-v7.eng-spa.eng"
-
-# target corpus
-CORPUS_TL = "europarl-v7.eng-spa.spa"
-
-# apertium-lex-tools scripts
-LEX_TOOLS = "../apertium-lex-tools/scripts"
-
-# fast align build folder
-FAST_ALIGN = "coding_challenges/fast_align/build"
-
-# apertium language data
-LANG_DATA = "coding_challenges/apertium-eng-spa"
diff --git a/config.toml.example b/config.toml.example
deleted file mode 100644
index 5949a1f..0000000
--- a/config.toml.example
+++ /dev/null
@@ -1,25 +0,0 @@
-# configuration for lexical training
-
-# corpus name
-CORPUS = "europarl-v7"
-
-# source language
-SL = "eng"
-
-# target language
-TL = "spa"
-
-# source corpus
-CORPUS_SL = "europarl-v7.eng-spa.eng"
-
-# target corpus
-CORPUS_TL = "europarl-v7.eng-spa.spa"
-
-# apertium-lex-tools scripts
-LEX_TOOLS = "/home/vivek/Documents/FOSS/apertium/apertium-lex-tools/scripts"
-
-# fast align build folder
-FAST_ALIGN = "/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/fast_align/build"
-
-# apertium language data
-LANG_DATA = "/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/apertium-eng-spa"
diff --git a/lexical_training.py b/lexical_training.py
deleted file mode 100644
index cb105b8..0000000
--- a/lexical_training.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# lexical training script
-from check_config import check_config
-
-def main():
-    config = check_config()
-    print("parsing complete")
-    
-if __name__ == '__main__':
-    main()
\ No newline at end of file
diff --git a/tests/check_config_test.log b/tests/check_config_test.log
deleted file mode 100644
index 0654e3c..0000000
--- a/tests/check_config_test.log
+++ /dev/null
@@ -1,43 +0,0 @@
-Test 1 : wrong paths
----------------------
-'/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/tests/../europarl-v7.eng-spa.engabc'(CORPUS_SL) is not a file, provide a valid file or 
-to download, look https://wiki.apertium.org/wiki/Corpora 
-
-'/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/tests/../europarl-v7.eng-spa.spaabc'(CORPUS_TL) is not a file, provide a valid file or 
-to download, look https://wiki.apertium.org/wiki/Corpora 
-
-'/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/tests/../../apertium-lex-tools/scriptsabc'(LEX_TOOLS) is not a directory, provide a valid directory or 
-to install, follow https://wiki.apertium.org/wiki/Install_Apertium_core_by_compiling 
-
-'/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/tests/../coding_challenges/fast_align/buildabc'(FAST_ALIGN) is not a directory, provide a valid directory or 
-to install, follow https://github.com/clab/fast_align 
-
-'/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/tests/../coding_challenges/apertium-eng-spaabc'(LANG_DATA) is not a directory, provide a valid directory or 
-to install, follow https://wiki.apertium.org/wiki/List_of_language_pairs 
-
-Test 1 : wrong paths
----------------------
-Test 2 : partial/no installations
-----------------------------------
-'process-tagger-output' is not in '/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/tests/../../apertium-lex-tools/scripts'(LEX_TOOLS), provide a valid directory or 
-to install, follow https://wiki.apertium.org/wiki/Install_Apertium_core_by_compiling 
-
-fast_align is not present in '/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/tests/../coding_challenges/fast_align/build'(FAST_ALIGN), provide a valid directory or 
-to install, follow https://github.com/clab/fast_align 
-
-'engabc-spa.autobil.bin' is not in '/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/tests/../coding_challenges/apertium-eng-spa'(LANG_DATA), provide a valid directory or 
-to install, follow https://wiki.apertium.org/wiki/List_of_language_pairs 
-
-'spa-engabc.autobil.bin' is not in '/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/tests/../coding_challenges/apertium-eng-spa'(LANG_DATA), provide a valid directory or 
-to install, follow https://wiki.apertium.org/wiki/List_of_language_pairs 
-
-apertium is either not installed or not added to path, see https://wiki.apertium.org/wiki/Installation 
-
-yasmet is either not installed or not added to path, see https://wiki.apertium.org/wiki/Using_weights_for_ambiguous_rules 
-
-Test 1 : wrong paths
----------------------
-Test 2 : partial/no installations
-----------------------------------
-Test 3 : correct installations
--------------------------------
diff --git a/tests/check_config_test.py b/tests/check_config_test.py
deleted file mode 100644
index 08753b6..0000000
--- a/tests/check_config_test.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# tests check_config.py
-import sys
-from tomlkit import parse, dumps
-import os
-import shutil
-
-sys.path.append('../')
-
-from check_config import check_config
-
-def main(argc, argv):
-    
-    # Test 1
-    config_file = open('config_test.toml', 'r')
-    config_toml = config_file.read()
-    config = parse(config_toml)
-    config_file.close()
-
-    print("Test 1 : wrong paths")
-    print("---------------------")
-
-    for key in config:
-        config[key]+="abc"
-
-    if os.fork() == 0:
-        with open('check_config_test.toml', 'w') as test_file:
-            test_file.write(dumps(config))
-        check_config('check_config_test.toml')
-        exit(0)
-
-    _, _ = os.wait()
-
-    # Test 2
-    config_file = open('config_test.toml', 'r')
-    config_toml = config_file.read()
-    config = parse(config_toml)
-    config_file.close()
-
-    print("Test 2 : partial/no installations")
-    print("----------------------------------")
-
-    config['SL']+="abc"
-
-    for path in os.environ["PATH"].split(os.pathsep):
-        if os.path.isfile(os.path.join(path, 'apertium')):
-            shutil.move(os.path.join(path, 'apertium'), os.path.join(path, 'apertium'+'abc'))
-            break
-    
-    for path in os.environ["PATH"].split(os.pathsep):
-        if os.path.isfile(os.path.join(path, 'yasmet')):
-            shutil.move(os.path.join(path, 'yasmet'), os.path.join(path, 'yasmet'+'abc'))
-            break
-
-    if os.path.isfile(os.path.join(config['LEX_TOOLS'], 'process-tagger-output')):
-        shutil.move(os.path.join(config['LEX_TOOLS'], 'process-tagger-output'), os.path.join(config['LEX_TOOLS'], 'process-tagger-output'+'abc'))
-
-    if os.path.isfile(os.path.join(config['FAST_ALIGN'], 'fast_align')):
-        shutil.move(os.path.join(config['FAST_ALIGN'], 'fast_align'), os.path.join(config['FAST_ALIGN'], 'fast_align'+'abc'))
-
-    if os.fork() == 0:
-        with open('check_config_test.toml', 'w') as test_file:
-            test_file.write(dumps(config))
-        check_config('check_config_test.toml')
-        exit(0)
-
-    _, _ = os.wait()
-
-    shutil.move(os.path.join(config['LEX_TOOLS'], 'process-tagger-output'+'abc'), os.path.join(config['LEX_TOOLS'], 'process-tagger-output'))
-
-    shutil.move(os.path.join(config['FAST_ALIGN'], 'fast_align'+'abc'), os.path.join(config['FAST_ALIGN'], 'fast_align'))
-
-    for path in os.environ["PATH"].split(os.pathsep):
-        if os.path.isfile(os.path.join(path, 'apertium'+'abc')):
-            shutil.move(os.path.join(path, 'apertium'+'abc'), os.path.join(path, 'apertium'))
-            break
-    
-    for path in os.environ["PATH"].split(os.pathsep):
-        if os.path.isfile(os.path.join(path, 'yasmet'+'abc')):
-            shutil.move(os.path.join(path, 'yasmet'+'abc'), os.path.join(path, 'yasmet'))
-            break
-    
-    # Test 3
-    config_file = open('config_test.toml', 'r')
-    config_toml = config_file.read()
-    config = parse(config_toml)
-    config_file.close()
-
-    print("Test 3 : correct installations")
-    print("-------------------------------")
-
-    with open('check_config_test.toml', 'w') as test_file:
-        test_file.write(dumps(config))
-    check_config('check_config_test.toml')
-
-    os.remove('check_config_test.toml')
-
-if __name__ == '__main__':
-    main(len(sys.argv), sys.argv)
\ No newline at end of file
diff --git a/tests/config_test.toml b/tests/config_test.toml
deleted file mode 100644
index 0d67a99..0000000
--- a/tests/config_test.toml
+++ /dev/null
@@ -1,25 +0,0 @@
-# configuration for lexical training
-
-# corpus name
-CORPUS = "europarl-v7"
-
-# source language
-SL = "eng"
-
-# target language
-TL = "spa"
-
-# source corpus
-CORPUS_SL = "../europarl-v7.eng-spa.eng"
-
-# target corpus
-CORPUS_TL = "../europarl-v7.eng-spa.spa"
-
-# apertium-lex-tools scripts
-LEX_TOOLS = "../../apertium-lex-tools/scripts"
-
-# fast align build folder
-FAST_ALIGN = "../coding_challenges/fast_align/build"
-
-# apertium language data
-LANG_DATA = "../coding_challenges/apertium-eng-spa"