commit 93ed1926a460edd6494ad09c2b9fb1393122b8f6 Author: aboelhamd Date: Mon Jun 3 00:04:07 2019 +0200 Make removing bad sentences parallel diff --git a/rem-bad-sents.py b/rem-bad-sents.py index 9a942e7..9c2a7ab 100644 --- a/rem-bad-sents.py +++ b/rem-bad-sents.py @@ -1,21 +1,24 @@ import sys -if (len(sys.argv) != 4) : - print('\nUsage: python3 rem-bad-sents.py source-file ambig-target-file(with new lines) new-source-file'); +if (len(sys.argv) != 6) : + print('\nUsage: python3 rem-bad-sents.py source-file parallel-target-file ambigous-target-file(with new lines) new-source-file new-parallel-target-file'); sys.exit() srcFile = open(sys.argv[1], 'r') -ambigTarFile = open(sys.argv[2], 'r') -newSrcFile = open(sys.argv[3], 'w+') +trgFile = open(sys.argv[2], 'r') +ambigTrgFile = open(sys.argv[3], 'r') +newSrcFile = open(sys.argv[4], 'w+') +newTrgFile = open(sys.argv[5], 'w+') sents = [] -for sent in ambigTarFile: +for sent in ambigTrgFile: if (sent.strip()) : sents.append(sent) else : src = srcFile.readline() + trg = trgFile.readline() bad = False for sent in sents : if (sent.find("*") > -1 or sent.find("#") > -1 or sent.find("@") > -1) : @@ -23,8 +26,11 @@ for sent in ambigTarFile: break if (not bad) : newSrcFile.write(src) + newTrgFile.write(trg) sents.clear() srcFile.close() -ambigTarFile.close() +trgFile.close() +ambigTrgFile.close() newSrcFile.close() +newTrgFile.close()