commit c7490dc40d6507cd725ac1554b7fb82d619ca4ad
Author: vivekvardhanadepu <vivekvicky839@gmail.com>
Date:   Thu Jul 29 01:23:18 2021 +0530

    init non-parallel corpora training

diff --git a/.github/workflows/training.yml b/.github/workflows/training.yml
index 2552d9b..2c70d5c 100644
--- a/.github/workflows/training.yml
+++ b/.github/workflows/training.yml
@@ -19,7 +19,7 @@ jobs:
           pip3 install -r requirements.txt
           
       - name: run
-        run: python3 check_config.py tests/training/config.toml
+        run: "! python3 check_config.py tests/training/config.toml"
 
   training:
     name: lexical selection training
@@ -72,4 +72,4 @@ jobs:
           pip3 install -r requirements.txt
           
       - name: Training
-        run: python3 lexical_training.py tests/training/config.toml
+        run: python3 lexical_selection_training.py tests/training/config.toml
diff --git a/.gitignore b/.gitignore
index 8a3b797..ec53a12 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,4 +12,7 @@ __pycache__/
 /*.toml
 
 # corpora
-europarl*
\ No newline at end of file
+europarl*
+
+# bash files
+/*.sh
\ No newline at end of file
diff --git a/README.md b/README.md
index 4601445..4809bb0 100644
--- a/README.md
+++ b/README.md
@@ -6,12 +6,22 @@ for more, read https://wiki.apertium.org/wiki/Ideas_for_Google_Summer_of_Code/Us
 
 ## requirements
 
+**parallel corpora:**
+
 - [parallel corpus](https://wiki.apertium.org/wiki/Corpora)
 - [apertium-core](https://wiki.apertium.org/wiki/Installation) (install apertium-lex-tools with yasmet)
 - [fast_align](https://github.com/clab/fast_align)
 - [language pair](https://wiki.apertium.org/wiki/List_of_language_pairs) (install locally)
 - python dependencies in [requirements.txt](requirements.txt)
 
+**non-parallel corpora:**
+- [non-parallel corpus](https://wiki.apertium.org/wiki/Corpora)
+- [apertium-core](https://wiki.apertium.org/wiki/Installation)
+- [language pair](https://wiki.apertium.org/wiki/List_of_language_pairs) (install locally)
+- [IRSTLM](https://wiki.apertium.org/wiki/IRSTLM)
+- python dependencies in [requirements.txt](requirements.txt)
+
+
 ## how to use
 
 - install the requirements and download or clone this repo (`git clone https://github.com/vivekvardhanadepu/apertium-lexical-training.git`)
@@ -34,3 +44,6 @@ This folder contains scripts and data for automated testing of the training scri
 Philipp Koehn.
 *Europarl: A Parallel Corpus for Statistical Machine Translation.*
 MT Summit 2005.
+
+<a id="2">[2]</a>
+https://www-i6.informatik.rwth-aachen.de/web/Software/YASMET.html
diff --git a/config.toml.example b/config.toml.example
index 9c9d15c..ed7ea17 100644
--- a/config.toml.example
+++ b/config.toml.example
@@ -18,7 +18,7 @@ CORPUS_TL = "europarl-v7.eng-spa.spa"
 # apertium-lex-tools scripts
 # LEX_TOOLS = "/home/vivek/Documents/FOSS/apertium/apertium-lex-tools/scripts"
 
-# fast align build folder
+# fast align build folder[not required for non-parallel training]
 FAST_ALIGN = "/home/vivek/Documents/FOSS/apertium/user-friendly-lexical-training/coding_challenges/fast_align/build"
 
 # apertium language data
diff --git a/lexical_training.py b/lexical_selection_training.py
similarity index 99%
rename from lexical_training.py
rename to lexical_selection_training.py
index d91916e..eb6ec33 100644
--- a/lexical_training.py
+++ b/lexical_selection_training.py
@@ -252,7 +252,7 @@ def training(config, cache_dir, log):
                 f0.seek(0)
                 f1.truncate(0)
                 # print(l)
-                cmds = [['grep', f'^{l}'], ['cut', '-f', '2'], ['head', '-1']]
+                cmds = [['grep', f'^{l}'], ['head', '-1'], ['cut', '-f', '2']]
                 pipe(cmds, f0, f1, log).wait()
                 f0.seek(0)