commit b98bd6ed4e576d471968fa01677662b44ff4e040
Author: vivekvardhanadepu <vivekvicky839@gmail.com>
Date:   Wed Aug 18 00:40:36 2021 +0530

    Scripts cleanup

diff --git a/scripts/extract-freq-lexicon.py b/scripts/extract-freq-lexicon.py
index 0aeb2df..a0f7a1c 100755
--- a/scripts/extract-freq-lexicon.py
+++ b/scripts/extract-freq-lexicon.py
@@ -20,25 +20,17 @@ import traceback
 # 5 	0-0 4-2 5-3 8-1 9-5 10-6 12-7 13-8 14-9 15-10
 # -------------------------------------------------------------------------------
 
-
-def wrap(x):
-    return '^' + x + '$'
-
-
 def extract_freq_lexicon(canditates):
-    # MAX_NGRAMS = 3
 
     cur_line = 0
     lineno = 0
     sl_tl = {}
-    # ngrams = {}
 
     cur_sl_row = []
     cur_tl_row = []
     cur_bt_row = []
     cur_al_row = []
 
-    # for line in open(sys.argv[1]).readlines():
     with open(canditates) as infile:
         for line in infile:
             line = line.strip()
@@ -130,10 +122,10 @@ def extract_freq_lexicon(canditates):
                 continue
 
             if first:
-                print(sl_tl[sl][tl], wrap(sl), wrap(tl), '@')
+                print(sl_tl[sl][tl], common.wrap(sl), common.wrap(tl), '@')
                 first = False
             else:
-                print(sl_tl[sl][tl], wrap(sl), wrap(tl))
+                print(sl_tl[sl][tl], common.wrap(sl), common.wrap(tl))
 
 
 if __name__ == '__main__':
diff --git a/scripts/extract-sentences.py b/scripts/extract-sentences.py
index fc55a63..03392fe 100755
--- a/scripts/extract-sentences.py
+++ b/scripts/extract-sentences.py
@@ -8,14 +8,11 @@ import common
 
 def ambiguous(bt):
     # legislation<n><sg>/legislación<n><f><sg>/ordenamiento<n><m><sg>
-
-    ambig = False
     for token in bt:
-        tls = token['tls']
-        if len(tls) > 1:
+        if len(token['tls']) > 1:
             return True
 
-    return ambig
+    return False
 
 
 def extract_sentences(phrase_table_file, biltrans_out_file):
diff --git a/scripts/ngram-count-patterns.py b/scripts/ngram-count-patterns.py
index 34ade71..341be3c 100755
--- a/scripts/ngram-count-patterns.py
+++ b/scripts/ngram-count-patterns.py
@@ -19,11 +19,6 @@ import common
 # 5 	0-0 4-2 5-3 8-1 9-5 10-6 12-7 13-8 14-9 15-10
 # -------------------------------------------------------------------------------
 
-
-def wrap(x):
-    return '^' + x + '$'
-
-
 def ngram_count_patterns(freq_lexicon, candidates, crisphold, max_rules):
     MAX_NGRAMS = 2
     cur_line = 0
@@ -41,8 +36,8 @@ def ngram_count_patterns(freq_lexicon, candidates, crisphold, max_rules):
             continue
 
         row = common.tokenise_tagger_line(line)
-        sl = wrap(row[0])
-        tl = wrap(row[1])
+        sl = common.wrap(row[0])
+        tl = common.wrap(row[1])
         if tl[1] == '*':
             tl = tl[:-3] + '$'
         if line.count('@') > 0:
@@ -81,19 +76,18 @@ def ngram_count_patterns(freq_lexicon, candidates, crisphold, max_rules):
                         if al_sl != i:
                             continue
 
-                        tlword = wrap(cur_tl_row[al_tl])
-                        slword = wrap(slword)
+                        tlword = common.wrap(cur_tl_row[al_tl])
+                        slword = common.wrap(slword)
 
                         if slword not in sl_tl_defaults:
                             print('!', file=sys.stderr)
                             continue
 
                         for j in range(1, MAX_NGRAMS):
-
-                            pregram = ' '.join(map(wrap, cur_sl_row[i-j:i+1]))
-                            postgram = ' '.join(map(wrap, cur_sl_row[i:i+j+1]))
+                            pregram = ' '.join(map(common.wrap, cur_sl_row[i-j:i+1]))
+                            postgram = ' '.join(map(common.wrap, cur_sl_row[i:i+j+1]))
                             roundgram = ' '.join(
-                                map(wrap, cur_sl_row[i-j:i+j+1]))
+                                map(common.wrap, cur_sl_row[i-j:i+j+1]))
 
                             if slword not in ngrams:
                                 ngrams[slword] = {}
@@ -120,10 +114,6 @@ def ngram_count_patterns(freq_lexicon, candidates, crisphold, max_rules):
                             ngrams[slword][postgram][tlword] = ngrams[slword][postgram][tlword] + 1
                             ngrams[slword][roundgram][tlword] = ngrams[slword][roundgram][tlword] + 1
 
-    #				for j in range(0, MAX_NGRAMS):
-    #					print cur_sl_row[i-j:i+1]
-    #					print cur_sl_row[i:i+j]
-
                 i = i + 1
 
             cur_line = 0