commit 702a31a5739928975d6f5861d1965b422ecef3fe Author: aboelhamd Date: Sat May 11 22:17:25 2019 +0200 Script to pick min wer and per sentences diff --git a/choose-best-sents.py b/choose-best-sents.py new file mode 100644 index 0000000..7dac6ab --- /dev/null +++ b/choose-best-sents.py @@ -0,0 +1,55 @@ +import sys + +if (len(sys.argv) != 6) : + print('\nUsage: python3 choose-best-sents.py scores-file combinations-file minwer-file minper-file minwerper-file'); + sys.exit() + +minwerFile = open(sys.argv[3], 'w+') +minperFile = open(sys.argv[4], 'w+') +minwerperFile = open(sys.argv[5], 'w+') + +sents = [] +wers = [] +pers = [] +werspers = [] +minwer, minper, minwerper, minwerI, minperI, minwerperI = 10000.,10000.,10000.,0,0,0 + +with open(sys.argv[1]) as scoresFile, open(sys.argv[2]) as combFile: + for scores, sent in zip(scoresFile, combFile): + print(scores.strip()) + if (scores.strip()) : + sents.append(sent) + scoresArr = list(map(int, scores.split())) + wer = scoresArr[0] + per = scoresArr[1] + werper = wer+per + + if (wer < minwer) : + minwer = wer + minwerI = len(wers) + + if (per < minper) : + minper = per + minperI = len(pers) + + if (werper < minwerper) : + minwerper = werper + minwerperI = len(werspers) + + wers.append(wer) + pers.append(per) + werspers.append(werper) + + else : + minwerFile.write(sents[minwerI]+"\n") + minperFile.write(sents[minperI]+"\n") + minwerperFile.write(sents[minwerperI]+"\n") + + minwer, minper, minwerper, minwerI, minperI, minwerperI = 10000.,10000.,10000.,0,0,0 + + +scoresFile.close() +combFile.close() +minwerFile.close() +minperFile.close() +minwerperFile.close() diff --git a/merge-models.py b/merge-models.py index 44cbbc2..2d8121d 100644 --- a/merge-models.py +++ b/merge-models.py @@ -3,15 +3,12 @@ from os.path import isfile, join import sys if (len(sys.argv) != 3) : - print('Usage: python merge-models.py modelsdest newfile'); - sys.exit(-1) + print('\nUsage: python merge-models.py modelsdest newfile'); + sys.exit() -newfile = open(sys.argv[2], 'w') -#localeid = sys.argv[2] modelsdest = sys.argv[1] +newfile = open(sys.argv[2], 'w+') -# localeid -#newfile.write("%s\n" % localeid) models = [f for f in listdir(modelsdest) if isfile(join(modelsdest, f))] diff --git a/put-rules-ids.py b/put-rules-ids.py index 718a290..cdcce17 100644 --- a/put-rules-ids.py +++ b/put-rules-ids.py @@ -1,11 +1,11 @@ import sys -if (len(sys.argv) < 3) : +if (len(sys.argv) != 3) : print('\nUsage: python put-ids.py original-transfer-file-path new-transfer-file-path'); sys.exit() oldfile = open(sys.argv[1], 'r') -newfile = open(sys.argv[2], 'w') +newfile = open(sys.argv[2], 'w+') id = 0 diff --git a/score-sentences.py b/score-sentences.py index 5930ef0..21de0bf 100644 --- a/score-sentences.py +++ b/score-sentences.py @@ -1,12 +1,12 @@ import sys import kenlm -if (len(sys.argv) < 4) : +if (len(sys.argv) != 4) : print('\nUsage: python score-sentences.py arpa_or_binary_LM_file target_lang_file weights_file'); - sys.exit(-1) + sys.exit() targetfile = open(sys.argv[2], 'r') -weightfile = open(sys.argv[3], 'w') +weightfile = open(sys.argv[3], 'w+') # Load the language model model = kenlm.LanguageModel(sys.argv[1])