commit c3ed60bc06a60cbe9b0870d9bf8fefc4c7bacddb Author: Tanmai Khanna Date: Thu Jul 16 00:03:59 2020 +0530 read wblanks properly for generation diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc index d47870a..ab27e85 100644 --- a/lttoolbox/fst_processor.cc +++ b/lttoolbox/fst_processor.cc @@ -238,6 +238,42 @@ FSTProcessor::readFullBlock(FILE *input, wchar_t const delim1, wchar_t const del return result; } +wstring +FSTProcessor::readWblank(FILE *input) +{ + wstring result = L""; + result += L"[["; + wchar_t c; + + while(!feof(input)) + { + c = static_cast(fgetwc_unlocked(input)); + result += c; + + if(c == L'\\') + { + result += static_cast(readEscaped(input)); + } + else if(c == L']') + { + c = static_cast(fgetwc_unlocked(input)); + result += c; + + if(c == L']') + { + break; + } + } + } + + if(c != L']') + { + streamError(); + } + + return result; +} + int FSTProcessor::readAnalysis(FILE *input) { @@ -531,7 +567,17 @@ FSTProcessor::readGeneration(FILE *input, FILE *output) } else if(val == L'[') { - fputws_unlocked(readFullBlock(input, L'[', L']').c_str(), output); + val = fgetwc_unlocked(input); + if(val == L'[') + { + fputws_unlocked(readWblank(input).c_str(), output); + } + else + { + ungetc(val, input); + fputws_unlocked(readFullBlock(input, L'[', L']').c_str(), output); + } + return readGeneration(input, output); } else diff --git a/lttoolbox/fst_processor.h b/lttoolbox/fst_processor.h index ec95de8..3276151 100644 --- a/lttoolbox/fst_processor.h +++ b/lttoolbox/fst_processor.h @@ -251,6 +251,12 @@ private: * @param delim1 the delimiter of the end of the sequence */ wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2); + + /** + * Reads a wordbound blank from the stream input + * @param input the stream being read + */ + wstring readWblank(FILE *input); /** * Returns true if the character code is identified as alphabetic