commit 31202405071d58c8f0e72fe4ead3c5e32373639c Author: Tanmai Khanna Date: Fri Jul 17 11:27:25 2020 +0530 Parse wordbound blanks as part of the Lexical Unit diff --git a/streamparser.py b/streamparser.py index f7f28af..986e407 100755 --- a/streamparser.py +++ b/streamparser.py @@ -150,6 +150,7 @@ class LexicalUnit: Attributes: lexical_unit (str): The lexical unit in Apertium stream format. wordform (str): The word form (surface form) of the lexical unit. + wordbound_blank (str): The wordbound blank of the lexical unit. readings (List[List[:class:`SReading`]]): The analyses of the lexical unit with sublists containing all subreadings. knownness (:class:`Knownness`): The level of knowledge of the lexical unit. """ @@ -158,7 +159,14 @@ class LexicalUnit: self.lexical_unit = lexical_unit cohort = re.split(r'(? Iterator[U buffer += char else: if char == '[': - in_superblank = True - text_buffer += char + next_char = next(stream) + if next_char == '[': + in_lexical_unit = True + else: + in_superblank = True + text_buffer += char + if next_char == ']': + in_superblank = False + text_buffer += next_char + elif char == '\\': + text_buffer += next_char + text_buffer += next(stream) + else: + text_buffer += next_char + elif char == '^': in_lexical_unit = True elif char == '\\': @@ -266,3 +287,5 @@ def main(): # type: () -> None if __name__ == '__main__': main() + +