commit f9d54e1890ebf9edc73f1f2db3f07e1535fc5cf0 Author: Daniel Swanson Date: Mon Aug 2 20:19:11 2021 -0500 lsx-proc needs to add symbols at runtime, so support that diff --git a/lttoolbox/alphabet_exe.cc b/lttoolbox/alphabet_exe.cc index 0c682f0..542a666 100644 --- a/lttoolbox/alphabet_exe.cc +++ b/lttoolbox/alphabet_exe.cc @@ -94,7 +94,12 @@ AlphabetExe::getSymbol(UString& result, int32_t symbol, bool uppercase) const if (symbol == 0) { return; } else if (symbol < 0) { - result.append(sw->get(tags[-symbol-1])); + int idx = -symbol-1; + if (idx < tag_count) { + result.append(sw->get(tags[idx])); + } else { + result.append(dynamic_symbols[idx-tag_count]); + } } else if (uppercase) { result += u_toupper(static_cast(symbol)); } else { @@ -116,3 +121,35 @@ AlphabetExe::clearSymbol(const int32_t symbol) tags[-symbol-1].count = 0; } } + +int32_t +AlphabetExe::lookupDynamic(const UString& symbol) +{ + int32_t ret; + auto it = symbol_map.find(symbol); + if (it == symbol_map.end()) { + if (dynamic_symbols.empty()) { + // should be able to usually avoid reindexing with this + dynamic_symbols.reserve(32); + } + ret = -tag_count -dynamic_symbols.size() -1; + bool rebuild = (dynamic_symbols.size() == dynamic_symbols.capacity()); + dynamic_symbols.push_back(symbol); + symbol_map[dynamic_symbols.back()] = ret; + if (rebuild) { + // moderately horrible, but that's what we get for invalidating + // all the views when dynamic_symbols gets reallocated + symbol_map.clear(); + for (uint64_t i = 0; i < tag_count; i++) { + symbol_map[sw->get(tags[i])] = -static_cast(i) - 1; + } + int32_t n = -tag_count-1; + for (auto& ds : dynamic_symbols) { + symbol_map[ds] = n--; + } + } + } else { + ret = it->second; + } + return ret; +} diff --git a/lttoolbox/alphabet_exe.h b/lttoolbox/alphabet_exe.h index 29dcdbe..52ccdb9 100644 --- a/lttoolbox/alphabet_exe.h +++ b/lttoolbox/alphabet_exe.h @@ -20,6 +20,7 @@ #include #include +#include class AlphabetExe { private: @@ -28,6 +29,8 @@ private: StringRef* tags; std::map symbol_map; bool mmapping = false; + // tags added at runtime - used by apertium-separable + std::vector dynamic_symbols; public: AlphabetExe(StringWriter* sw_); ~AlphabetExe(); @@ -37,6 +40,8 @@ public: void getSymbol(UString& result, int32_t symbol, bool uppercase = false) const; bool isTag(const int32_t symbol) const; void clearSymbol(const int32_t symbol); + // like operator() but add symbol to dynamic_symbols if not found + int32_t lookupDynamic(const UString& symbol); }; #endif