commit 5e695022b26250e24f1235e33386a1e27e5c16e3 Author: Tanmai Khanna Date: Sun May 17 20:51:31 2020 +0530 Ignoring secondary tags during generation (#83) Ignore secondary tags for pattern matching; Escape characters in secondary tags; Deal with multiwords that have an invariable part diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc index 6348141..84d9f79 100644 --- a/lttoolbox/fst_processor.cc +++ b/lttoolbox/fst_processor.cc @@ -516,17 +516,73 @@ FSTProcessor::readGeneration(FILE *input, FILE *output) { wstring cad = L""; cad += static_cast(val); + + bool isSecondaryTag = false; + while((val = fgetwc_unlocked(input)) != L'>') { if(feof(input)) { streamError(); } + if(val == L':') + { + isSecondaryTag = true; + break; + } cad += static_cast(val); } cad += static_cast(val); - - return alphabet(cad); + + if(isSecondaryTag) + { + while(true) + { + val = fgetwc_unlocked(input); + + if(feof(input)) + { + streamError(); + } + + if(val == L'\\') + { + val = fgetwc_unlocked(input); + continue; + } + + if(isSecondaryTag) + { + if(val == L'>') + { + isSecondaryTag = false; + } + } + else + { + if(val == L'<') + { + isSecondaryTag = true; + } + else if(val == L'$') + { + break; + } + else + { + return static_cast(val); + } + } + } + + outOfWord = true; + return static_cast(L'$'); + } + else + { + return alphabet(cad); + } + } else if(val == L'[') {