commit 944ed2556c38f058a5118ab5e481b3412aa3e3d8 Author: Amr Keleg Date: Sat May 11 01:30:26 2019 +0200 Fix the out of alphabet token handling in analyses generation Solves #45 Consider alphanumeric characters to be part of the vocabulary. diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc index 2be326a..d3d029a 100644 --- a/lttoolbox/fst_processor.cc +++ b/lttoolbox/fst_processor.cc @@ -837,7 +837,7 @@ FSTProcessor::isEscaped(wchar_t const c) const bool FSTProcessor::isAlphabetic(wchar_t const c) const { - return alphabetic_chars.find(c) != alphabetic_chars.end(); + return (bool)std::iswalnum(c) || alphabetic_chars.find(c) != alphabetic_chars.end(); } void