commit 3e09b53d4d186c9a4f5a57b386d0958e0481c49a
Author: vivekvardhanadepu <vivekvicky839@gmail.com>
Date:   Sun May 23 18:18:33 2021 +0530

    lexical training script init

diff --git a/README.md b/README.md
index 21463b6..7984ab5 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ pre-training: preProcessing.sh
 
 lang-models: make_lang_model.sh
 
-alignment: alignment.sh[using fast_align, [Chris Dyer](http://www.cs.cmu.edu/~cdyer), [Victor Chahuneau](http://victor.chahuneau.fr), and [Noah A. Smith](http://www.cs.cmu.edu/~nasmith). (2013). [A Simple, Fast, and Effective Reparameterization of IBM Model 2](http://www.ark.cs.cmu.edu/cdyer/fast_valign.pdf). In *Proc. of NAACL*.
+alignment: alignment.sh[using fast_align, [Chris Dyer](http://www.cs.cmu.edu/~cdyer), [Victor Chahuneau](http://victor.chahuneau.fr), and [Noah A. Smith](http://www.cs.cmu.edu/~nasmith). (2013). [A Simple, Fast, and Effective Reparameterization of IBM Model 2](http://www.ark.cs.cmu.edu/cdyer/fast_valign.pdf). In *Proc. of NAACL*.]
 
 rule-extraction: rule_extraction.sh
 
diff --git a/coding_challenges/parser_test.out b/coding_challenges/parser_test.out
deleted file mode 100644
index a351e5b..0000000
--- a/coding_challenges/parser_test.out
+++ /dev/null
@@ -1 +0,0 @@
-{'title': 'TOML Example', 'owner': {'name': 'Tom Preston-Werner', 'dob': DateTime(1979, 5, 27, 7, 32, tzinfo=datetime.timezone(datetime.timedelta(days=-1, seconds=57600), '-08:00'))}, 'database': {'server': '192.168.1.1', 'ports': [8000, 8001, 8002], 'connection_max': 5000, 'enabled': True}, 'servers': {'alpha': {'ip': '10.0.0.1', 'dc': 'eqdc10'}, 'beta': {'ip': '10.0.0.2', 'dc': 'eqdc10'}}, 'clients': {'data': [['gamma', 'delta'], [1, 2]], 'hosts': ['alpha', 'omega'], 'str1': 'Roses are red\nViolets are blue', 'str2': 'The quick brown fox jumps over the lazy dog.', 'float4': 5e+22, 'float5': 1000000.0, 'float6': -0.02, 'float7': 6.626e-34, 'float8': 224617.445991228, 'infinite1': inf, 'infinite2': inf, 'infinite3': -inf, 'not1': nan, 'not2': nan, 're': '\\d{2} apps is t[wo]o many', 'lines': 'The first newline is\ntrimmed in raw strings.\nAll other whitespace\nis preserved.\n'}}
diff --git a/coding_challenges/parser_test.toml b/coding_challenges/parser_test.toml
deleted file mode 100644
index 9f39c18..0000000
--- a/coding_challenges/parser_test.toml
+++ /dev/null
@@ -1,71 +0,0 @@
-# This is a TOML document.
-
-title = "TOML Example"
-
-[owner]
-name = "Tom Preston-Werner"
-dob = 1979-05-27T07:32:00-08:00 # First class dates
-
-[database]
-server = "192.168.1.1"
-ports = [ 8000, 8001, 8002 ]
-connection_max = 5000
-enabled = true
-
-[servers]
-
-  # Indentation (tabs and/or spaces) is allowed but not required
-  [servers.alpha]
-  ip = "10.0.0.1"
-  dc = "eqdc10"
-
-  [servers.beta]
-  ip = "10.0.0.2"
-  dc = "eqdc10"
-
-[clients]
-data = [ ["gamma", "delta"], [1, 2] ]
-
-# Line breaks are OK when inside arrays
-hosts = [
-  "alpha",
-  "omega"
-]
-
-str1 = """
-Roses are red
-Violets are blue"""
-
-str2 = """\
-  The quick brown \
-  fox jumps over \
-  the lazy dog.\
-  """
-
-  # exponent
-float4 = 5e+22
-float5 = 1e06
-float6 = -2E-2
-
-# both
-float7 = 6.626e-34
-
-# separators
-float8 = 224_617.445_991_228
-
-# infinity
-infinite1 = inf # positive infinity
-infinite2 = +inf # positive infinity
-infinite3 = -inf # negative infinity
-
-# not a number
-not1 = nan
-not2 = +nan
-
-re = '''\d{2} apps is t[wo]o many'''
-lines = '''
-The first newline is
-trimmed in raw strings.
-All other whitespace
-is preserved.
-'''
\ No newline at end of file
diff --git a/coding_challenges/toml_parser.py b/coding_challenges/toml_parser.py
deleted file mode 100644
index af4a056..0000000
--- a/coding_challenges/toml_parser.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from tomlkit import parse, dumps
-
-def main():
-    with open('parser_test.toml') as test_file:
-        data_toml = test_file.read()
-        data_json = parse(data_toml)
-
-    # gives error if not parsed well
-    assert data_toml == dumps(data_json)
-
-    # outputting as a dictionary
-    with open('parser_test.out', 'w') as json_file:
-            print(data_json, file=json_file)
-
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
diff --git a/config.toml b/config.toml
new file mode 100644
index 0000000..9ba35b5
--- /dev/null
+++ b/config.toml
@@ -0,0 +1,25 @@
+# configuration for lexical training
+
+# corpus name
+CORPUS = 
+
+# source language
+SL = 
+
+# target language
+TL = 
+
+# source corpus
+CORPUS_SL = 
+
+# target corpus
+CORPUS_TL = 
+
+# apertium-lex-tools scripts
+LEX_TOOLS = 
+
+# apertium language data
+DATA = 
+
+# fast align build folder
+FAST_ALIGN = 
\ No newline at end of file
diff --git a/config_parser.py b/config_parser.py
new file mode 100644
index 0000000..fb544d8
--- /dev/null
+++ b/config_parser.py
@@ -0,0 +1,14 @@
+from tomlkit import parse, dumps
+
+def parse_config(filename='config.toml'):
+    with open(filename) as config_file:
+        config_toml = config_file.read()
+        config = parse(config_toml)
+
+    # gives error if not parsed well
+    assert config_toml == dumps(config)
+    
+    return config
+
+if __name__ == '__main__':
+    parse_config()
\ No newline at end of file
diff --git a/lexical_training.log b/lexical_training.log
new file mode 100644
index 0000000..e69de29
diff --git a/lexical_training.py b/lexical_training.py
new file mode 100644
index 0000000..1208a96
--- /dev/null
+++ b/lexical_training.py
@@ -0,0 +1,9 @@
+# lexical training script
+from config_parser import parse_config
+
+def main():
+    config = parse_config()
+    print(config)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file