commit 8517e219105b8f7c0cb421543f755cc4c7c31934 Author: Daniel Swanson Date: Wed Aug 4 17:37:33 2021 -0500 yet more helper functions diff --git a/lttoolbox/alphabet_exe.cc b/lttoolbox/alphabet_exe.cc index 542a666..40d27e5 100644 --- a/lttoolbox/alphabet_exe.cc +++ b/lttoolbox/alphabet_exe.cc @@ -78,7 +78,7 @@ AlphabetExe::init(void* ptr) } int32_t -AlphabetExe::operator()(UString_view sv) +AlphabetExe::operator()(UString_view sv) const { auto it = symbol_map.find(sv); if (it != symbol_map.end()) { diff --git a/lttoolbox/alphabet_exe.h b/lttoolbox/alphabet_exe.h index 52ccdb9..e57a1c8 100644 --- a/lttoolbox/alphabet_exe.h +++ b/lttoolbox/alphabet_exe.h @@ -36,7 +36,7 @@ public: ~AlphabetExe(); void read(FILE* in, bool mmap); void* init(void* ptr); - int32_t operator()(UString_view sv); + int32_t operator()(UString_view sv) const; void getSymbol(UString& result, int32_t symbol, bool uppercase = false) const; bool isTag(const int32_t symbol) const; void clearSymbol(const int32_t symbol); diff --git a/lttoolbox/match_state2.cc b/lttoolbox/match_state2.cc index 3738fd3..ddf2550 100644 --- a/lttoolbox/match_state2.cc +++ b/lttoolbox/match_state2.cc @@ -100,9 +100,49 @@ MatchState2::step(const int32_t input, const int32_t alt) } void -MatchState2::step(UString_view input, const Alphabet& alpha, bool foldcase) +MatchState2::step(const int32_t input, const int32_t alt1, int32_t alt2) { - // TODO + uint16_t temp_last = last; + for (uint16_t i = first; i != temp_last; i = (i+1)%BUF_LIMIT) { + applySymbol(buffer[i], input); + applySymbol(buffer[i], alt1); + applySymbol(buffer[i], alt2); + } + first = temp_last; +} + +void +MatchState2::step(UString_view input, const AlphabetExe& alpha, bool foldcase) +{ + int32_t any_char = alpha(""_u); + int32_t any_tag = alpha(""_u); + for (uint64_t i = 0; i < input.size(); i++) { + if (input[i] == '<') { + for (uint64_t j = i+1; j < input.size(); j++) { + if (input[j] == '\\') { + j++; + } else if (input[j] == '>') { + int32_t sym = alpha(input.substr(i, j-i+1)); + if (sym) { + step(sym, any_tag); + } else { + step(any_tag); + } + i = j; + break; + } + } + } else { + if (input[i] == '\\') { + i++; + } + if (foldcase && u_isupper(input[i])) { + step(input[i], u_tolower(input[i]), any_char); + } else { + step(input[i], any_char); + } + } + } } int diff --git a/lttoolbox/match_state2.h b/lttoolbox/match_state2.h index 1542c05..4b0ca28 100644 --- a/lttoolbox/match_state2.h +++ b/lttoolbox/match_state2.h @@ -18,7 +18,7 @@ #ifndef _LT_MATCH_STATE_ #define _LT_MATCH_STATE_ -#include +#include #include #include @@ -44,7 +44,8 @@ public: bool empty() const; void step(const int32_t input); void step(const int32_t input, const int32_t alt); - void step(UString_view input, const Alphabet& alpha, bool foldcase = true); + void step(const int32_t input, const int32_t alt1, const int32_t alt2); + void step(UString_view input, const AlphabetExe& alpha, bool foldcase = true); int classifyFinals(const std::map& finals, const std::set& banned_rules) const; int classifyFinals(const std::map& finals) const; diff --git a/lttoolbox/string_utils.cc b/lttoolbox/string_utils.cc index 411380d..4b57d55 100644 --- a/lttoolbox/string_utils.cc +++ b/lttoolbox/string_utils.cc @@ -66,6 +66,27 @@ StringUtils::split(const UString& str, const UString& delim) return result; } +std::vector +StringUtils::split_escape(UString_view str, const UChar delim) +{ + std::vector ret; + size_t last = 0; + for (size_t i = 0; i < str.size(); i++) { + if (str[i] == '\\') { + i++; + } else if (str[i] == delim) { + if (i > last) { + ret.push_back(str.substr(last, i-last)); + } + last = i+1; + } + } + if (str.size() > last) { + ret.push_back(str.substr(last)); + } + return ret; +} + UString StringUtils::join(const std::vector& vec, const UString& delim) { diff --git a/lttoolbox/string_utils.h b/lttoolbox/string_utils.h index 79aeadf..d8510f9 100644 --- a/lttoolbox/string_utils.h +++ b/lttoolbox/string_utils.h @@ -11,6 +11,8 @@ public: // split string on delimiter static std::vector split(const UString& str, const UString& delim); + // split, but respect \ escapes + static std::vector split_escape(UString_view str, const UChar delim); // inverse of split static UString join(const std::vector& vec, const UString& delim);