commit 4e77669be261084363605c9779901bad44748f8c
Author: Daniel Swanson <popcorn.tomato.dude@gmail.com>
Date:   Mon Jul 12 22:08:35 2021 -0500

    ongoing script cleanup

diff --git a/scripts/biltrans-extract-freq.py b/scripts/biltrans-extract-freq.py
index 49fd928..b5e9d9a 100644
--- a/scripts/biltrans-extract-freq.py
+++ b/scripts/biltrans-extract-freq.py
@@ -18,13 +18,9 @@ class Counter(BCC.BiltransCounter):
 	tokenizer = 'biltrans'
 	line_ids = False
 
-	def processs_row(self, frac_count=0):
+	def process_lu(self, sl, tl, idx, cur_sl_row, frac_count=0):
 		global sl_tl
-		for i in range(len(self.am_row)):
-			if self.am_row[i].count('/') > 1:
-				sl = BCC.strip_tags(self.am_row[i], 'sl')
-				tl = BCC.strip_tags(self.dm_row[i], 'tl')
-				sl_tl[sl][tl] += 1
+		sl_tl[sl][tl] += 1
 
 c = Counter()
 c.read_files(sys.argv[1], # File with ambiguous biltrans output
diff --git a/scripts/biltrans-line-only-pos-ambig.py b/scripts/biltrans-line-only-pos-ambig.py
index cef4f50..d0f37d5 100755
--- a/scripts/biltrans-line-only-pos-ambig.py
+++ b/scripts/biltrans-line-only-pos-ambig.py
@@ -2,61 +2,41 @@
 # coding=utf-8
 # -*- encoding: utf-8 -*-
 
-import sys;
-
-pos = ["<n>", "<vblex>", "<adj>"];
-
-def process_line(l): #{
-	global pos;
-	w = '';
-	in_word = False;
-	escaped = False;
-	for c in l: #{
-		if c == '\\': #{
-			escaped = True;
-			continue;
-		#}
-		if c == '^' and escaped == False: #{
-			in_word = True;
-		#}
-		if c == '$' and escaped == False: #{
-			word_in_pos = False;
-			for p in pos: #{
-				if w.count(p) > 0: #{
-					word_in_pos = True;
-				#}
-			#}
-
-			if w.count('/') > 1 and word_in_pos == True: #{
-				return True;
-			#}
-
-			w = '';
-			in_word = False;
-		#}
-		if in_word == True: #{
-			w = w + c;
-		#}
-		escaped = False;
-	#}
-	return False;
-#}
-
-output = False;
-
-infile = sys.stdin ;
-
-if len(sys.argv) > 1: #{
-	infile = open(sys.argv[1]);
-#}
-
-for line in infile.readlines(): #{
-
-	output = process_line(line);
-
-	if output == True: #{
-		print(line.strip());
-	#}
+import sys
+
+pos = ["<n>", "<vblex>", "<adj>"]
+
+def process_line(l):
+	global pos
+	w = ''
+	in_word = False
+	escaped = False
+	for c in l:
+		if c == '\\':
+			escaped = True
+			continue
+		if c == '^' and escaped == False:
+			in_word = True
+		if c == '$' and escaped == False:
+			if w.count('/') > 1 and any(p in w for p in pos):
+				return True
+			w = ''
+			in_word = False
+		if in_word == True:
+			w += c
+		escaped = False
+	return False
+
+output = False
+
+infile = sys.stdin
+
+if len(sys.argv) > 1:
+	infile = open(sys.argv[1])
+
+for line in infile.readlines():
+	output = process_line(line)
+	if output == True:
+		print(line.strip())
 #	else:
-#		print(line.strip(), file=sys.stderr);
-#}
+#		print(line.strip(), file=sys.stderr)
diff --git a/scripts/biltrans-no-retained.py b/scripts/biltrans-no-retained.py
index 7460a71..e8eea5b 100644
--- a/scripts/biltrans-no-retained.py
+++ b/scripts/biltrans-no-retained.py
@@ -2,58 +2,53 @@
 # coding=utf-8
 # -*- encoding: utf-8 -*-
 
-import sys, codecs, copy;
+import sys
 
-lines = [] ;
-for line in open(sys.argv[2]).readlines(): #{
-	lines.append(int(line.strip()));
-#}
-print(sys.argv, len(lines), file=sys.stderr);
+lines = []
+for line in open(sys.argv[2]).readlines():
+	lines.append(int(line.strip()))
 
-lineno = 1;
+print(sys.argv, len(lines), file=sys.stderr)
 
-inf = open(sys.argv[1]);
-buf = inf.readline();
-while buf != '': #{
-	if lineno not in lines: #{
-		print(buf.strip());
-	elif lineno in lines: #{
-		print('Line ' + str(lineno) + ' discarded.', file=sys.stderr);
-	else: #{
-		print('Something weird happened.', file=sys.stderr);
-	#}
+lineno = 1
 
-	lineno = lineno + 1;
-	buf = inf.readline();
-#}
+inf = open(sys.argv[1])
+buf = inf.readline()
+while buf != '':
+	if lineno in lines:
+		print('Line ' + str(lineno) + ' discarded.', file=sys.stderr)
+	else:
+		print(buf.strip())
+	lineno += 1
+	buf = inf.readline()
 
 #
-#c = inf.read(1);
-#buf = '';
-#while c: #{
-#	if c == '\n': #{
-#		if lineno in lines: #{
-#			print(buf.strip());
-#		elif lineno not in lines: #{
-#			print('Line ' + str(lineno) + ' discarded.', file=sys.stderr);
-#		else: #{
-#			print('Something weird happened.', file=sys.stderr);
-#		#}
-#		lineno = lineno + 1;
-#		buf = '';
-#	#}
+#c = inf.read(1)
+#buf = ''
+#while c:
+#	if c == '\n':
+#		if lineno in lines:
+#			print(buf.strip())
+#		elif lineno not in lines:
+#			print('Line ' + str(lineno) + ' discarded.', file=sys.stderr)
+#		else:
+#			print('Something weird happened.', file=sys.stderr)
+#
+#		lineno = lineno + 1
+#		buf = ''
+#
+#
+#	buf = buf + c
+#	c = inf.read(1)
 #
-#	buf = buf + c;
-#	c = inf.read(1);
-##}
 ##
-#for line in open(sys.argv[1]).readlines(): #{
-#	if lineno in lines: #{
-#		print(line.strip());
-#	elif lineno not in lines: #{
-#		print('Line ' + str(lineno) + ' discarded.', file=sys.stderr);
-#	else: #{
-#		print('Something weird happened.', file=sys.stderr);
-#	#}
-#	lineno = lineno + 1;
-##}
+#for line in open(sys.argv[1]).readlines():
+#	if lineno in lines:
+#		print(line.strip())
+#	elif lineno not in lines:
+#		print('Line ' + str(lineno) + ' discarded.', file=sys.stderr)
+#	else:
+#		print('Something weird happened.', file=sys.stderr)
+#
+#	lineno = lineno + 1
+#
diff --git a/scripts/biltrans-only-retained.py b/scripts/biltrans-only-retained.py
index 7508da6..f1b41f1 100644
--- a/scripts/biltrans-only-retained.py
+++ b/scripts/biltrans-only-retained.py
@@ -2,58 +2,53 @@
 # coding=utf-8
 # -*- encoding: utf-8 -*-
 
-import sys, codecs, copy;
+import sys
 
-lines = [] ;
-for line in open(sys.argv[2]).readlines(): #{
-	lines.append(int(line.strip()));
-#}
-print(sys.argv, len(lines), file=sys.stderr);
+lines = []
+for line in open(sys.argv[2]).readlines():
+	lines.append(int(line.strip()))
 
-lineno = 1;
+print(sys.argv, len(lines), file=sys.stderr)
 
-inf = open(sys.argv[1]);
-buf = inf.readline();
-while buf != '': #{
-	if lineno in lines: #{
-		print(buf.strip());
-	elif lineno not in lines: #{
-		print('Line ' + str(lineno) + ' discarded.', file=sys.stderr);
-	else: #{
-		print('Something weird happened.', file=sys.stderr);
-	#}
+lineno = 1
 
-	lineno = lineno + 1;
-	buf = inf.readline();
-#}
+inf = open(sys.argv[1])
+buf = inf.readline()
+while buf != '':
+	if lineno in lines:
+		print(buf.strip())
+	else:
+		print('Line ' + str(lineno) + ' discarded.', file=sys.stderr)
+	lineno += 1
+	buf = inf.readline()
 
 #
-#c = inf.read(1);
-#buf = '';
-#while c: #{
-#	if c == '\n': #{
-#		if lineno in lines: #{
-#			print(buf.strip());
-#		elif lineno not in lines: #{
-#			print('Line ' + str(lineno) + ' discarded.', file=sys.stderr);
-#		else: #{
-#			print('Something weird happened.', file=sys.stderr);
-#		#}
-#		lineno = lineno + 1;
-#		buf = '';
-#	#}
+#c = inf.read(1)
+#buf = ''
+#while c:
+#	if c == '\n':
+#		if lineno in lines:
+#			print(buf.strip())
+#		elif lineno not in lines:
+#			print('Line ' + str(lineno) + ' discarded.', file=sys.stderr)
+#		else:
+#			print('Something weird happened.', file=sys.stderr)
+#
+#		lineno = lineno + 1
+#		buf = ''
+#
+#
+#	buf = buf + c
+#	c = inf.read(1)
 #
-#	buf = buf + c;
-#	c = inf.read(1);
-##}
 ##
-#for line in open(sys.argv[1]).readlines(): #{
-#	if lineno in lines: #{
-#		print(line.strip());
-#	elif lineno not in lines: #{
-#		print('Line ' + str(lineno) + ' discarded.', file=sys.stderr);
-#	else: #{
-#		print('Something weird happened.', file=sys.stderr);
-#	#}
-#	lineno = lineno + 1;
-##}
+#for line in open(sys.argv[1]).readlines():
+#	if lineno in lines:
+#		print(line.strip())
+#	elif lineno not in lines:
+#		print('Line ' + str(lineno) + ' discarded.', file=sys.stderr)
+#	else:
+#		print('Something weird happened.', file=sys.stderr)
+#
+#	lineno = lineno + 1
+#
diff --git a/scripts/biltrans-to-multitrans-line-recursive.py b/scripts/biltrans-to-multitrans-line-recursive.py
index f613529..a15a7fb 100755
--- a/scripts/biltrans-to-multitrans-line-recursive.py
+++ b/scripts/biltrans-to-multitrans-line-recursive.py
@@ -2,98 +2,53 @@
 # coding=utf-8
 # -*- encoding: utf-8 -*-
 
-import sys, codecs, copy, commands;
+import sys
 from operator import mul
 
-sys.stdin  = codecs.getreader('utf-8')(sys.stdin);
-sys.stdout = codecs.getwriter('utf-8')(sys.stdout);
-sys.stderr = codecs.getwriter('utf-8')(sys.stderr);
-
 t = 0
 lineno = 0
 
-def process_biltrans_unit(lu): #{
-
-	state = 0;
-	sl = '';
-	tl = [];
-	for c in lu[1:-1]: #{
-		#^worth<n><sg>/valor<n><m><sg>$ ^\$<mon>/\$<mon>$^20<num>/20<num>$^*m/*m$
-		#print c , sl , tl;
-		if c == '/': #{
-			state = state + 1;
-			tl.append(sl)
-		#}
-		if state == 0: #{
-			sl = sl + c;
-		#}
-		if state >= 1: #{
-			tl[state-1] = tl[state-1] + c;
-		#}
-	#}
-	return (sl, tl);
-#}
-
 def parse_input(line):
 	sentence = []
-	escaped = False;
-	reading_word = False;
+	escaped = False
+	reading_word = False
 	lu = ''
 
-	for c in line: #{
-		if c == '\\': #{
-			escaped = True;
-			lu = lu + c;
-			c = sys.stdin.read(1);
-		#}
-		if c == '^': #{
-			reading_word = True;
-		#}
-		if c == '$' and escaped == False: #{
-			lu = lu + c;
-			reading_word = False;
-			(sl, tl) = process_biltrans_unit(lu)
-			sentence.append(tl)
-			lu = '';
-		#}
-		if c != '\\' and escaped == True: #{
-			escaped = False;
-		#}
-		if c.isspace(): #{
-			if reading_word == False: #{
-				continue;
-			#}
-		#}
-		if reading_word: #{
-			lu = lu + c;
-		#}
-	#}sys.stdout.writesys.stdout.write
+	for c in line:
+		if escaped:
+			if reading_word:
+				lu += c
+		elif c == '\\':
+			if reading_word:
+				lu += c
+			escaped = True
+		elif c == '^':
+			reading_word = True
+		elif c == '$':
+			sentence.append(lu.split('/')[1:])
+			reading_word = False
+			lu = ''
+		elif reading_word:
+			lu += c
+
 	return sentence
-#}
-def process(sentence, start, out): #{
+
+def process(sentence, start, out):
 	global t
 	global lineno
-	if start >= len(sentence): #{
-		sys.stdout.write ('.[][' + str(lineno) + " " + str(t) + '].[]\t')
-		for s in out:
-			sys.stdout.write("^" + s + "$ ");
-		print ''
+	if start >= len(sentence):
+		sen = ' '.join('^'+s+'$' for s in out)
+		print('.[][%d %d].[]\t%s' % (lineno, t, sen))
 		t += 1
-		return;
-	#}
-	tokens = sentence[start]
-	for token in tokens: #{
-		out.append(token)
-		process(sentence, start + 1, out);
-		del out[-1]
-	#}
-#}
+		return
+
+	for token in sentence[start]:
+		process(sentence, start + 1, out + [token])
 
-while True: #{
-	lineno
-	string = sys.stdin.readline().rstrip();
+while True:
+	string = sys.stdin.readline().rstrip()
 	if string == "":
-		break;
+		break
 
 	tokens = parse_input(string)
 	# print map(len, tokens)
@@ -102,4 +57,3 @@ while True: #{
 	process(tokens, 0, [])
 	lineno += 1
 	t = 0
-#}
diff --git a/scripts/biltrans-to-multitrans.py b/scripts/biltrans-to-multitrans.py
index e92113f..ecb24be 100644
--- a/scripts/biltrans-to-multitrans.py
+++ b/scripts/biltrans-to-multitrans.py
@@ -2,115 +2,65 @@
 # coding=utf-8
 # -*- encoding: utf-8 -*-
 
-import sys, codecs, copy, commands;
+import sys
 
-sys.stdin  = codecs.getreader('utf-8')(sys.stdin);
-sys.stdout = codecs.getwriter('utf-8')(sys.stdout);
-sys.stderr = codecs.getwriter('utf-8')(sys.stderr);
+def process_biltrans_unit(lu, sents):
+	new_paths = {}
 
+	state = 0
+	tl = {}
+	ls = lu[1:-1].split('/')
+	sl = ls[0]
+	for i in range(1, len(ls)):
+		tl[i] = '/' + ls[i]
 
-output_sentences = {};
-output_sentences[''] = '';
-reading_word = False;
-lineno = 1;
-lu = '';
-c = sys.stdin.read(1);
+	if len(tl) == 0:
+		print('ERROR:', lu, file=sys.stderr)
+	elif len(tl) > 1:
+		for tid, trad in tl.items():
+			for path, sent in sents.items():
+				new_paths[path + trad] = sent + '^' + sl + trad + '$'
+	else:
+		for path in sents:
+			new_paths[path] = sents[path] + '^' + sl + tl[1] + '$'
+	return new_paths
 
-def process_biltrans_unit(lu, sents): #{
-	new_paths = {};
+def process_line(line):
+	escaped = False
+	in_word = False
+	cur_id = line.split()[0]
+	idx = len(cur_id) + 1
+	lu = ''
+	output_sentences = {'':''}
+	while idx < len(line):
+		c = line[idx]
+		if c == '\\':
+			if in_word:
+				lu += c
+				idx += 1
+				lu += line[idx]
+			else:
+				idx += 1
+		elif c == '^':
+			in_word = True
+		elif c == '$':
+			in_word = False
+			new_paths = process_biltrans_line(lu, output_sentences)
+			output_sentences = new_paths
+			lu = ''
+		elif in_word:
+			lu += c
+		elif c.isspace() and c != '\n':
+			for s in output_sentences:
+				output_sentences[s] += c
+		idx += 1
+	return cur_id, output_sentences
 
-	state = 0;
-	sl = '';
-	tl = {};
-	for c in lu[1:-1]: #{
-		#^worth<n><sg>/valor<n><m><sg>$ ^\$<mon>/\$<mon>$^20<num>/20<num>$^*m/*m$
-		#print c , sl , tl;
-		if c == '/': #{
-			state = state + 1;
-			if state not in tl: #{
-				tl[state] = '';
-			#}
-		#}
-		if state == 0: #{
-			sl = sl + c;
-		#}
-		if state >= 1: #{
-			tl[state] = tl[state] + c;
-		#}
-	#}
-
-	if len(tl) > 1: #{
-		for trad in tl: #{
-			for path in sents: #{
-				new_paths[path + tl[trad]] = sents[path] + '^' + sl + tl[trad] + '$';
-			#}
-		#}
-	else: #{
-		for path in sents: #{
-			if state not in tl: #{
-				print >> sys.stderr, 'ERROR: ';
-				print >> sys.stderr, sl ;
-				print >> sys.stderr, tl ;
-			#}
-			new_paths[path] = sents[path] + '^' + sl + tl[state] + '$';
-		#}
-	#}
-
-
-	return new_paths;
-#}
-
-escaped = False;
-seen_newline = True;
-cur_id = '';
-while c: #{
-	if c == '\\': #{
-		escaped = True;
-		lu = lu + c;
-		c = sys.stdin.read(1);
-	#}
-	if c == '^': #{
-		reading_word = True;
-	#}
-	if c == '$' and escaped == False: #{
-		lu = lu + c;
-		new_paths = process_biltrans_unit(lu, output_sentences);
-		del output_sentences;
-		output_sentences = new_paths;
-		reading_word = False;
-		lu = '';
-	#}
-	if c != '\\' and escaped == True: #{
-		escaped = False;
-	#}
-	if c.isspace(): #{
-		seen_newline = False;
-		if c == '\n': #{
-			print >> sys.stderr, 'output_sentences: ', len(output_sentences);
-			i = 0;
-			for sentence in output_sentences: #{
-				#print '.[][' + str(lineno) + ' ' + str(i) + ' ' + cur_id +'].[]\t' , output_sentences[sentence];
-				print '.[][' + cur_id + ' ' + str(i) + '].[]\t' , output_sentences[sentence];
-				i = i + 1;
-			#}
-			lineno = lineno + 1;
-
-			output_sentences = {};
-			output_sentences[''] = '';
-			seen_newline = True;
-			cur_id = '';
-
-		elif reading_word == False: #{
-			for sentence in output_sentences: #{
-				output_sentences[sentence] = output_sentences[sentence] + c;
-			#}
-		#}
-	#}
-	if reading_word: #{
-		lu = lu + c;
-	#}
-	if seen_newline and c != '\n': #{
-		cur_id = cur_id + c;
-	#}
-	c = sys.stdin.read(1);
-#}
+while True:
+	ln = sys.stdin.readline()
+	if not ln:
+		break
+	cur_id, sentences = process_line(ln)
+	print('output_sentences:', len(sentences), file=sys.stderr)
+	for i, sent in enumerate(sentences.values()):
+		print('.[][%s %s].[]\t%s' % (cur_id, i, sent))
diff --git a/scripts/biltrans-trim-uncovered.py b/scripts/biltrans-trim-uncovered.py
index 0c3fa23..f6fcb9f 100644
--- a/scripts/biltrans-trim-uncovered.py
+++ b/scripts/biltrans-trim-uncovered.py
@@ -2,28 +2,23 @@
 # coding=utf-8
 # -*- encoding: utf-8 -*-
 
-import sys;
+import sys
 
-pos = ["<n>", "<vblex>", "<adj>"];
+pos = ["<n>", "<vblex>", "<adj>"]
 
-output = False;
+output = False
 
-infile = sys.stdin ;
+infile = sys.stdin
 
-if len(sys.argv) > 1: #{
-	infile = open(sys.argv[1]);
-#}
+if len(sys.argv) > 1:
+	infile = open(sys.argv[1])
 
-lineno = 0;
-for line in infile.readlines(): #{
-	lineno = lineno + 1;
-	num_lu = float(line.count('$'));
-	num_unk = float(line.count('*'))  / 2.0;
-	cov = 100.0 - ((num_unk / num_lu) * 100.0);
+lineno = 0
+for line in infile.readlines():
+	lineno = lineno + 1
+	num_lu = float(line.count('$'))
+	num_unk = float(line.count('*'))  / 2.0
+	cov = 100.0 - ((num_unk / num_lu) * 100.0)
 
-	if cov >= 90.0: #{
-		print(line.strip());
-	#}
-
-
-#}
+	if cov >= 90.0:
+		print(line.strip())