commit 893b7366f3095f3065cfe226cf1d4ca3fc17a35d
Author: vivekvardhanadepu <vivekvicky839@gmail.com>
Date:   Thu Jun 24 20:03:33 2021 +0530

    Scripts fixup

diff --git a/scripts/extract-sentences.py b/scripts/extract-sentences.py
index 01aedf7..8d7ee06 100755
--- a/scripts/extract-sentences.py
+++ b/scripts/extract-sentences.py
@@ -22,75 +22,76 @@ def ambiguous(bt):  # {
 # }
 
 
-def extract_sentences(phrase_table, biltrans_out):
+def extract_sentences(phrase_table_file, biltrans_out_file):
     reading = True
     lineno = 0
     total_valid = 0
     total_errors = 0
 
     not_ambiguous = []
+    with open(phrase_table_file) as phrase_table, open(biltrans_out_file) as biltrans_out:
+        while reading:  # {
+            try:
+                lineno = lineno + 1
+                pt_line = phrase_table.readline().strip()
+                bt_line = biltrans_out.readline().strip()
+
+                if not bt_line.strip() and not pt_line.strip():  # {
+                    reading = False
+                    break
+                elif not bt_line.strip() or not pt_line.strip():  # {
+                    continue
 
-    while reading:  # {
-        try:
-            lineno = lineno + 1
-            pt_line = phrase_table.readline().strip()
-            bt_line = biltrans_out.readline().strip()
-
-            if not bt_line.strip() and not pt_line.strip():  # {
-                reading = False
-                break
-            elif not bt_line.strip() or not pt_line.strip():  # {
-                continue
-
-            # }
-            row = pt_line.split('|||')
-            bt = common.tokenise_biltrans_line(bt_line.strip())
-            sl = common.tokenise_tagger_line(row[1].strip())
-            tl = common.tokenise_tagger_line(row[0].strip())
-
-            if not ambiguous(bt):  # {
-                not_ambiguous.append(str(lineno))
-                if len(not_ambiguous) >= 10:  # {
-                    print("not ambiguous:", ' '.join(
-                        not_ambiguous), file=sys.stderr)
-                    not_ambiguous = []
                 # }
-                continue
-            # }
-            if len(sl) < 2 and len(tl) < 2:  # {
-                continue
-            # }
+                row = pt_line.split('|||')
+                bt = common.tokenise_biltrans_line(bt_line.strip())
+                sl = common.tokenise_tagger_line(row[1].strip())
+                tl = common.tokenise_tagger_line(row[0].strip())
+
+                if not ambiguous(bt):  # {
+                    not_ambiguous.append(str(lineno))
+                    if len(not_ambiguous) >= 10:  # {
+                        print("not ambiguous:", ' '.join(
+                            not_ambiguous), file=sys.stderr)
+                        not_ambiguous = []
+                    # }
+                    continue
+                # }
+                if len(sl) < 2 and len(tl) < 2:  # {
+                    continue
+                # }
 
-            # Check that the number of words in the lexical transfer, and in the phrasetable matches up
-            if len(sl) != len(bt):  # {
-                print("Error in line", lineno,
-                      ": len(sl) != len(bt)", file=sys.stderr)
-                continue
-            # }
+                # Check that the number of words in the lexical transfer, and in the phrasetable matches up
+                if len(sl) != len(bt):  # {
+                    print("Error in line", lineno,
+                          ": len(sl) != len(bt)", file=sys.stderr)
+                    continue
+                # }
 
-            # cheking if the alignments are empty
-            if not row[2].strip():
-                print("In line", lineno, ", alignments are empty", file=sys.stderr)
+                # cheking if the alignments are empty
+                if not row[2].strip():
+                    print("In line", lineno,
+                          ", alignments are empty", file=sys.stderr)
+                    continue
+
+                # Resumption<n> of<pr> the<def><def> session<n>
+                # Resumption<n><sg>/Reanudación<n><f><sg> of<pr>/de<pr> the<det><def><sp>/el<det><def><GD><ND> session<n><sg>/sesión<n><f><sg>
+                # Reanudación<n> de<pr> el<det><def> periodo<n> de<pr> sesión<n>
+                # 0-0 1-1 2-2 5-3
+
+                print(lineno, '\t' + row[1])
+                print(lineno, '\t' + bt_line)
+                print(lineno, '\t' + row[0])
+                print(lineno, '\t' + row[2])
+                print(
+                    '-------------------------------------------------------------------------------')
+                total_valid += 1
+            except Exception as e:
+                print("Error in line", lineno, ": ", e, file=sys.stderr)
+                total_errors += 1
                 continue
 
-            # Resumption<n> of<pr> the<def><def> session<n>
-            # Resumption<n><sg>/Reanudación<n><f><sg> of<pr>/de<pr> the<det><def><sp>/el<det><def><GD><ND> session<n><sg>/sesión<n><f><sg>
-            # Reanudación<n> de<pr> el<det><def> periodo<n> de<pr> sesión<n>
-            # 0-0 1-1 2-2 5-3
-
-            print(lineno, '\t' + row[1])
-            print(lineno, '\t' + bt_line)
-            print(lineno, '\t' + row[0])
-            print(lineno, '\t' + row[2])
-            print(
-                '-------------------------------------------------------------------------------')
-            total_valid += 1
-        except Exception as e:
-            print("Error in line", lineno, ": ", e, file=sys.stderr)
-            total_errors += 1
-            continue
-
-    # }
+        # }
 
     print('total:', lineno, file=sys.stderr)
     print('valid:', total_valid,
@@ -104,5 +105,5 @@ if __name__ == '__main__':
         print('extact-sentences.py <phrasetable> <biltrans>')
         exit(1)
     # }
-    with open(sys.argv[1]) as phrase_table, open(sys.argv[2]) as biltrans_out:
-        extract_sentences(phrase_table, biltrans_out)
+
+    extract_sentences(sys.argv[1], sys.argv[2])
diff --git a/scripts/ngram-count-patterns-maxent2.py b/scripts/ngram-count-patterns-maxent2.py
index 463bc3f..693fc82 100755
--- a/scripts/ngram-count-patterns-maxent2.py
+++ b/scripts/ngram-count-patterns-maxent2.py
@@ -247,7 +247,8 @@ def ngram_count_patterns(freq_lexicon, candidates):
         print(features[feature], '\t', feature, file=sys.stderr)
     # }
 
-    exit(1)
+    # exit(1)
+    return
 
     for slword in meevents:  # {
         if len(sl_tl[slword]) < 2:  # {