commit 1ccb1182ef88cf81856398dd13002febebb9c9a1 Author: Daniel Swanson Date: Wed Jul 7 14:35:01 2021 -0500 escape special characters in regexes and compile them at build time diff --git a/apertium/transfer_regex.cc b/apertium/transfer_regex.cc index bc72409..642caad 100644 --- a/apertium/transfer_regex.cc +++ b/apertium/transfer_regex.cc @@ -67,11 +67,14 @@ unbuildTrie(TrieNode* root) } } UString ret; + if (root->c == '+' || root->c == '*' || root->c == '?' || root->c == '.') { + ret += '\\'; + } ret += root->c; if (groups.empty()) { return ret; } else if (groups.size() == 1) { - if (root->end && groups[0][0] != '(') { + if (root->end && groups[0].size() > 1 && groups[0][0] != '(') { ret += '('; ret += '?'; ret += ':'; ret += groups[0]; ret += ')'; diff --git a/apertium/trx_reader.cc b/apertium/trx_reader.cc index e400cc0..c742d86 100644 --- a/apertium/trx_reader.cc +++ b/apertium/trx_reader.cc @@ -21,6 +21,7 @@ #include #include #include +#include UString const TRXReader::ANY_TAG = ""_u; UString const TRXReader::ANY_CHAR = ""_u; @@ -343,6 +344,9 @@ TRXReader::procDefAttrs() else { td.getAttrItems()[attrname] = optimize_regex(items); + // compile it now to check for errors + ApertiumRE r; + r.compile(td.getAttrItems()[attrname]); items.clear(); attrname.clear(); }