commit d53245afea7ba84389449a8498915cb0e7738cc9 Author: vivekvardhanadepu Date: Sat May 29 16:25:46 2021 +0530 removing lines with only fullstops('.') diff --git a/clean_corpus.py b/clean_corpus.py index f1b85d3..6e6f5fb 100644 --- a/clean_corpus.py +++ b/clean_corpus.py @@ -42,8 +42,9 @@ def main(argc, argv): lines_to_remove.update([i-1, i, i+1]) continue - # removing lines only with '°' and '*' - if (not lines1[i].replace('°', ' ').replace('*', ' ').strip()) and (not lines2[i].replace('°', ' ').replace('*', ' ').strip()): + # removing lines only with '°', '*' and '.' + if (not lines1[i].replace('°', '').replace('*', '').replace('.','').strip()) and \ + (not lines2[i].replace('°', '').replace('*', '').replace('.', '').strip()): lines_to_remove.add(i) # print(lines1, lines2)