commit 5cec4d05c34c50288b9dab215be5bf69c5195aee Author: vivekvardhanadepu Date: Sat Jul 31 12:40:41 2021 +0530 Script fixup: fixing positional args error diff --git a/scripts/biltrans-extract-frac-freq.py b/scripts/biltrans-extract-frac-freq.py index d17f20d..a203ef2 100644 --- a/scripts/biltrans-extract-frac-freq.py +++ b/scripts/biltrans-extract-frac-freq.py @@ -5,6 +5,7 @@ import sys from collections import defaultdict import biltrans_count_common as BCC +import common # Input: # a) Biltrans output diff --git a/scripts/biltrans_count_common.py b/scripts/biltrans_count_common.py index 0aa751c..3b14b9d 100644 --- a/scripts/biltrans_count_common.py +++ b/scripts/biltrans_count_common.py @@ -47,7 +47,7 @@ class BiltransCounter: self.dm_line = None self.dm_row = None self.dm_id = None - self.am_linenum = 0 + self.dm_linenum = 0 self.clear_ngrams() @@ -79,6 +79,7 @@ class BiltransCounter: self.dm_id, self.dm_row = None, [] self.reading = False return + ls = self.dm_line.split('\t') if self.line_ids: self.dm_id = int(self.dm_line.split('.[][')[1].split()[0]) if self.tokenizer == 'regex': @@ -93,13 +94,13 @@ class BiltransCounter: def check_rows(self): if len(self.am_row) != len(self.dm_row): print('Mismatch in number of LUs between analysis and training', file=sys.stderr) - print('\t' + am_line, file=sys.stderr) - print('\t' + dm_line, file=sys.stderr) + print('\t' + self.am_line, file=sys.stderr) + print('\t' + self.dm_line, file=sys.stderr) print('...skipping', file=sys.stderr) return False return True - def read_files_multi_dm(am_fname, dm_fname): + def read_files_multi_dm(self, am_fname, dm_fname): self.next_dm_line() while self.reading: self.next_am_line() @@ -113,7 +114,7 @@ class BiltransCounter: if self.am_linenum % 1000 == 0: print('=> %d SL and %d TL lines read' % (self.am_linenum, self.dm_linenum), file=sys.stderr) - def read_files(am_fname, dm_fname): + def read_files(self, am_fname, dm_fname): self.am_file = open(am_fname) self.dm_file = open(dm_fname) self.reading = True