Bug Summary

File:GrammarApplicator_runRules.cpp
Warning:line 2885, column 5
Value stored to 'c' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name GrammarApplicator_runRules.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src -resource-dir /usr/lib/llvm-16/lib/clang/16 -D BOOST_NO_CXX98_FUNCTION_BASE=1 -D HAS_FS -D UNISTR_FROM_CHAR_EXPLICIT=explicit -D UNISTR_FROM_STRING_EXPLICIT=explicit -D _POSIX_C_SOURCE=200112 -D cg3_EXPORTS -I /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/include/posix -I /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/include -I /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src -I /usr/local/include -D NDEBUG -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -Wno-unused-result -std=c++2b -fdebug-compilation-dir=/tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src -ferror-limit 19 -fvisibility-inlines-hidden -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/cg3/scan-build/2024-09-11-161008-13503-1 -x c++ /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src/GrammarApplicator_runRules.cpp
1/*
2* Copyright (C) 2007-2024, GrammarSoft ApS
3* Developed by Tino Didriksen <mail@tinodidriksen.com>
4* Design by Eckhard Bick <eckhard.bick@mail.dk>, Tino Didriksen <mail@tinodidriksen.com>
5*
6* This program is free software: you can redistribute it and/or modify
7* it under the terms of the GNU General Public License as published by
8* the Free Software Foundation, either version 3 of the License, or
9* (at your option) any later version.
10*
11* This program is distributed in the hope that it will be useful,
12* but WITHOUT ANY WARRANTY; without even the implied warranty of
13* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14* GNU General Public License for more details.
15*
16* You should have received a copy of the GNU General Public License
17* along with this progam. If not, see <https://www.gnu.org/licenses/>.
18*/
19
20#include "GrammarApplicator.hpp"
21#include "Strings.hpp"
22#include "Tag.hpp"
23#include "Grammar.hpp"
24#include "Window.hpp"
25#include "SingleWindow.hpp"
26#include "Reading.hpp"
27#include "ContextualTest.hpp"
28#include "version.hpp"
29#include "process.hpp"
30
31namespace CG3 {
32
33enum {
34 RV_NOTHING = 1,
35 RV_SOMETHING = 2,
36 RV_DELIMITED = 4,
37 RV_TRACERULE = 8,
38};
39
40bool GrammarApplicator::doesWordformsMatch(const Tag* cword, const Tag* rword) {
41 if (rword && rword != cword) {
42 if (rword->type & T_REGEXP) {
43 if (!doesTagMatchRegexp(cword->hash, *rword)) {
44 return false;
45 }
46 }
47 else if (rword->type & T_CASE_INSENSITIVE) {
48 if (!doesTagMatchIcase(cword->hash, *rword)) {
49 return false;
50 }
51 }
52 else {
53 return false;
54 }
55 }
56 return true;
57}
58
59bool GrammarApplicator::updateRuleToCohorts(Cohort& c, const uint32_t& rsit) {
60 // Check whether this rule is in the allowed rule list from cmdline flag --rule(s)
61 if (!valid_rules.empty() && !valid_rules.contains(rsit)) {
62 return false;
63 }
64 SingleWindow* current = c.parent;
65 const Rule* r = grammar->rule_by_number[rsit];
66 if (!doesWordformsMatch(c.wordform, r->wordform)) {
67 return false;
68 }
69 if (current->rule_to_cohorts.size() < rsit+1) {
70 indexSingleWindow(*current);
71 }
72 CohortSet& cohortset = current->rule_to_cohorts[rsit];
73 std::vector<size_t> csi;
74 for (size_t i = 0; i < cohortsets.size(); ++i) {
75 if (cohortsets[i] != &cohortset) {
76 continue;
77 }
78 csi.push_back(i);
79 }
80 if (!csi.empty()) {
81 auto cap = cohortset.capacity();
82 std::vector<CohortSet::const_iterator*> ends;
83 std::vector<std::pair<CohortSet::const_iterator*,Cohort*>> chs;
84 for (size_t i = 0; i < csi.size(); ++i) {
85 if (*rocits[csi[i]] == cohortset.end()) {
86 ends.push_back(rocits[csi[i]]);
87 }
88 else {
89 chs.push_back(std::pair(rocits[csi[i]], **rocits[csi[i]]));
90 }
91 }
92 cohortset.insert(&c);
93 for (auto it : ends) {
94 *it = cohortset.end();
95 }
96 if (cap != cohortset.capacity()) {
97 for (auto& it : chs) {
98 *it.first = cohortset.find(it.second);
99 }
100 }
101 }
102 else {
103 cohortset.insert(&c);
104 }
105 return current->valid_rules.insert(rsit);
106}
107
108bool GrammarApplicator::updateValidRules(const uint32IntervalVector& rules, uint32IntervalVector& intersects, const uint32_t& hash, Reading& reading) {
109 size_t os = intersects.size();
110 auto it = grammar->rules_by_tag.find(hash);
111 if (it != grammar->rules_by_tag.end()) {
112 Cohort& c = *(reading.parent);
113 for (auto rsit : (it->second)) {
114 if (updateRuleToCohorts(c, rsit) && rules.contains(rsit)) {
115 intersects.insert(rsit);
116 }
117 }
118 }
119 return (os != intersects.size());
120}
121
122void GrammarApplicator::indexSingleWindow(SingleWindow& current) {
123 current.valid_rules.clear();
124 current.rule_to_cohorts.resize(grammar->rule_by_number.size());
125 for (auto& cs : current.rule_to_cohorts) {
126 cs.clear();
127 }
128
129 for (auto c : current.cohorts) {
130 for (uint32_t psit = 0; psit < c->possible_sets.size(); ++psit) {
131 if (c->possible_sets.test(psit) == false) {
132 continue;
133 }
134 auto rules_it = grammar->rules_by_set.find(psit);
135 if (rules_it == grammar->rules_by_set.end()) {
136 continue;
137 }
138 for (auto rsit : rules_it->second) {
139 updateRuleToCohorts(*c, rsit);
140 }
141 }
142 }
143}
144
145TagList GrammarApplicator::getTagList(const Set& theSet, bool unif_mode) const {
146 TagList theTags;
147 getTagList(theSet, theTags, unif_mode);
148 return theTags;
149}
150
151void GrammarApplicator::getTagList(const Set& theSet, TagList& theTags, bool unif_mode) const {
152 if (theSet.type & ST_SET_UNIFY) {
153 const auto& usets = (*context_stack.back().unif_sets)[theSet.number];
154 const Set& pSet = *(grammar->sets_list[theSet.sets[0]]);
155 for (auto iter : pSet.sets) {
156 if (usets.count(iter)) {
157 getTagList(*(grammar->sets_list[iter]), theTags);
158 }
159 }
160 }
161 else if (theSet.type & ST_TAG_UNIFY) {
162 for (auto iter : theSet.sets) {
163 getTagList(*(grammar->sets_list[iter]), theTags, true);
164 }
165 }
166 else if (!theSet.sets.empty()) {
167 for (auto iter : theSet.sets) {
168 getTagList(*(grammar->sets_list[iter]), theTags, unif_mode);
169 }
170 }
171 else if (unif_mode) {
172 auto unif_tags = context_stack.back().unif_tags;
173 auto iter = unif_tags->find(theSet.number);
174 if (iter != unif_tags->end()) {
175 trie_getTagList(theSet.trie, theTags, iter->second);
176 trie_getTagList(theSet.trie_special, theTags, iter->second);
177 }
178 }
179 else {
180 trie_getTagList(theSet.trie, theTags);
181 trie_getTagList(theSet.trie_special, theTags);
182 }
183 // Eliminate consecutive duplicates. Not all duplicates, since AddCohort and Append may have multiple readings with repeated tags
184 for (auto ot = theTags.begin(); theTags.size() > 1 && ot != theTags.end(); ++ot) {
185 auto it = ot;
186 ++it;
187 for (; it != theTags.end() && std::distance(ot, it) == 1;) {
188 if (*ot == *it) {
189 it = theTags.erase(it);
190 }
191 else {
192 ++it;
193 }
194 }
195 }
196}
197
198Reading* GrammarApplicator::get_sub_reading(Reading* tr, int sub_reading) {
199 if (sub_reading == 0) {
200 return tr;
201 }
202
203 if (sub_reading == GSR_ANY) {
204 // If there aren't any sub-readings, the primary reading is the same as the amalgamation of all readings
205 if (tr->next == nullptr) {
206 return tr;
207 }
208
209 subs_any.emplace_back(Reading());
210 Reading* reading = &subs_any.back();
211 *reading = *tr;
212 reading->next = nullptr;
213 while (tr->next) {
214 tr = tr->next;
215 reading->tags_list.push_back(0);
216 reading->tags_list.insert(reading->tags_list.end(), tr->tags_list.begin(), tr->tags_list.end());
217 for (auto tag : tr->tags) {
218 reading->tags.insert(tag);
219 reading->tags_bloom.insert(tag);
220 }
221 for (auto tag : tr->tags_plain) {
222 reading->tags_plain.insert(tag);
223 reading->tags_plain_bloom.insert(tag);
224 }
225 for (auto tag : tr->tags_textual) {
226 reading->tags_textual.insert(tag);
227 reading->tags_textual_bloom.insert(tag);
228 }
229 reading->tags_numerical.insert(tr->tags_numerical.begin(), tr->tags_numerical.end());
230 if (tr->mapped) {
231 reading->mapped = true;
232 }
233 if (tr->mapping) {
234 reading->mapping = tr->mapping;
235 }
236 if (tr->matched_target) {
237 reading->matched_target = true;
238 }
239 if (tr->matched_tests) {
240 reading->matched_tests = true;
241 }
242 }
243 reading->rehash();
244 return reading;
245 }
246
247 if (sub_reading > 0) {
248 for (int i = 0; i < sub_reading && tr; ++i) {
249 tr = tr->next;
250 }
251 }
252 else if (sub_reading < 0) {
253 int ntr = 0;
254 Reading* ttr = tr;
255 while (ttr) {
256 ttr = ttr->next;
257 --ntr;
258 }
259 if (!tr->next) {
260 tr = nullptr;
261 }
262 for (auto i = ntr; i < sub_reading && tr; ++i) {
263 tr = tr->next;
264 }
265 }
266 return tr;
267}
268
269#define TRACE \
270 do { \
271 get_apply_to().subreading->hit_by.push_back(rule->number); \
272 if (rule->sub_reading == 32767) { \
273 get_apply_to().reading->hit_by.push_back(rule->number); \
274 } \
275 } while (0)
276
277#define FILL_TAG_LIST(taglist)do { Reading& reading = *get_apply_to().subreading; for (
auto it = (taglist)->begin(); it != (taglist)->end();) {
if (reading.tags.find((*it)->hash) == reading.tags.end())
{ auto tt = *it; it = (taglist)->erase(it); if (tt->type
& T_SPECIAL) { if (context_stack.back().regexgrps == nullptr
) { context_stack.back().regexgrps = &regexgrps_store[used_regex
]; } auto stag = doesTagMatchReading(reading, *tt, false, true
); if (stag) { (taglist)->insert(it, grammar->single_tags
.find(stag)->second); } } continue; } ++it; } } while (0)
\
278 do { \
279 Reading& reading = *get_apply_to().subreading; \
280 for (auto it = (taglist)->begin(); it != (taglist)->end();) { \
281 if (reading.tags.find((*it)->hash) == reading.tags.end()) { \
282 auto tt = *it; \
283 it = (taglist)->erase(it); \
284 if (tt->type & T_SPECIAL) { \
285 if (context_stack.back().regexgrps == nullptr) { \
286 context_stack.back().regexgrps = &regexgrps_store[used_regex]; \
287 } \
288 auto stag = doesTagMatchReading(reading, *tt, false, true); \
289 if (stag) { \
290 (taglist)->insert(it, grammar->single_tags.find(stag)->second); \
291 } \
292 } \
293 continue; \
294 } \
295 ++it; \
296 } \
297 } while (0)
298
299#define FILL_TAG_LIST_RAW(taglist)do { Reading& reading = *get_apply_to().subreading; for (
auto& tt : *(taglist)) { if (tt->type & T_SPECIAL)
{ if (context_stack.back().regexgrps == nullptr) { context_stack
.back().regexgrps = &regexgrps_store[used_regex]; } auto stag
= doesTagMatchReading(reading, *tt, false, true); if (stag) {
tt = grammar->single_tags.find(stag)->second; } } } } while
(0)
\
300 do { \
301 Reading& reading = *get_apply_to().subreading; \
302 for (auto& tt : *(taglist)) { \
303 if (tt->type & T_SPECIAL) { \
304 if (context_stack.back().regexgrps == nullptr) { \
305 context_stack.back().regexgrps = &regexgrps_store[used_regex]; \
306 } \
307 auto stag = doesTagMatchReading(reading, *tt, false, true); \
308 if (stag) { \
309 tt = grammar->single_tags.find(stag)->second; \
310 } \
311 } \
312 } \
313 } while (0)
314
315#define APPEND_TAGLIST_TO_READING(taglist, reading)do { for (auto tter : (taglist)) { while (tter->type &
T_VARSTRING) { tter = generateVarstringTag(tter); } auto hash
= tter->hash; if (tter->type & T_MAPPING || tter->
tag[0] == grammar->mapping_prefix) { mappings->push_back
(tter); } else { hash = addTagToReading((reading), tter); } if
(updateValidRules(rules, intersects, hash, reading)) { iter_rules
= intersects.find(rule->number); iter_rules_end = intersects
.end(); } } } while (0)
\
316 do { \
317 for (auto tter : (taglist)) { \
318 while (tter->type & T_VARSTRING) { \
319 tter = generateVarstringTag(tter); \
320 } \
321 auto hash = tter->hash; \
322 if (tter->type & T_MAPPING || tter->tag[0] == grammar->mapping_prefix) { \
323 mappings->push_back(tter); \
324 } \
325 else { \
326 hash = addTagToReading((reading), tter); \
327 } \
328 if (updateValidRules(rules, intersects, hash, reading)) { \
329 iter_rules = intersects.find(rule->number); \
330 iter_rules_end = intersects.end(); \
331 } \
332 } \
333 } while (0)
334
335#define VARSTRINGIFY(tag)do { while ((tag)->type & T_VARSTRING) { (tag) = generateVarstringTag
((tag)); } } while (0)
\
336 do { \
337 while ((tag)->type & T_VARSTRING) { \
338 (tag) = generateVarstringTag((tag)); \
339 } \
340 } \
341 while (0)
342
343
344bool GrammarApplicator::runSingleRule(SingleWindow& current, const Rule& rule, RuleCallback reading_cb, RuleCallback cohort_cb) {
345 finish_cohort_loop = true;
346 bool anything_changed = false;
347 KEYWORDS type = rule.type;
348 const Set& set = *(grammar->sets_list[rule.target]);
349 CohortSet* cohortset = &current.rule_to_cohorts[rule.number];
350
351 auto override_cohortset = [&]() {
352 if (in_nested) {
353 if (!current.nested_rule_to_cohorts) {
354 current.nested_rule_to_cohorts.reset(new CohortSet());
355 }
356 cohortset = current.nested_rule_to_cohorts.get();
357 cohortset->clear();
358 cohortset->insert(get_apply_to().cohort);
359 for (auto& t : set.trie_special) {
360 if (t.first->type & T_CONTEXT && t.first->context_ref_pos <= context_stack.back().context.size()) {
361 cohortset->insert(context_stack.back().context[t.first->context_ref_pos - 1]);
362 }
363 }
364 }
365 };
366 override_cohortset();
367 cohortsets.push_back(cohortset);
368 rocits.push_back(nullptr);
369
370 scope_guard popper([&]() {
371 cohortsets.pop_back();
372 rocits.pop_back();
373 });
374
375 if (debug_level > 1) {
376 std::cerr << "DEBUG: " << cohortset->size() << "/" << current.cohorts.size() << " = " << double(cohortset->size()) / double(current.cohorts.size()) << std::endl;
377 }
378 for (auto rocit = cohortset->cbegin(); (!cohortset->empty()) && (rocit != cohortset->cend());) {
379 rocits.back() = &rocit;
380 Cohort* cohort = *rocit;
381 ++rocit;
382
383 finish_reading_loop = true;
384
385 if (debug_level > 1) {
386 std::cerr << "DEBUG: Trying cohort " << cohort->global_number << ":" << cohort->local_number << std::endl;
387 }
388
389 // If the current cohort is the initial >>> one, skip it.
390 if (cohort->local_number == 0) {
391 continue;
392 }
393 // If the cohort is removed, skip it...
394 // Removed cohorts are still in the precalculated rule_to_cohorts map,
395 // and it would take time to go through the whole map searching for the cohort.
396 // Haven't tested whether it is worth it...
397 if (cohort->type & CT_REMOVED) {
398 continue;
399 }
400
401 uint32_t c = cohort->local_number;
402 // If the cohort is temporarily unavailable due to parentheses, skip it.
403 if ((cohort->type & CT_ENCLOSED) || cohort->parent != &current) {
404 continue;
405 }
406 // If there are no readings, skip it.
407 // This is unlikely to happen as all cohorts will get a magic reading during input,
408 // and not many use the unsafe Remove rules.
409 if (cohort->readings.empty()) {
410 continue;
411 }
412 // If there's no reason to even attempt to restore, just skip it.
413 if (rule.type == K_RESTORE) {
414 if ((rule.flags & RF_DELAYED) && cohort->delayed.empty()) {
415 continue;
416 }
417 else if ((rule.flags & RF_IGNORED) && cohort->ignored.empty()) {
418 continue;
419 }
420 else if (!(rule.flags & (RF_DELAYED|RF_IGNORED)) && cohort->deleted.empty()) {
421 continue;
422 }
423 }
424 // If there is not even a remote chance the target set might match this cohort, skip it.
425 if (rule.sub_reading == 0 && (rule.target >= cohort->possible_sets.size() || !cohort->possible_sets.test(rule.target))) {
426 continue;
427 }
428
429 // If there is only 1 reading left and it is a Select or safe Remove rule, skip it.
430 if (cohort->readings.size() == 1) {
431 if (type == K_SELECT) {
432 continue;
433 }
434 if (type == K_REMOVE || type == K_IFF) {
435 if (cohort->readings.front()->noprint) {
436 continue;
437 }
438 if ((!unsafe || (rule.flags & RF_SAFE)) && !(rule.flags & RF_UNSAFE)) {
439 continue;
440 }
441 }
442 }
443 else if (type == K_UNMAP && rule.flags & RF_SAFE) {
444 continue;
445 }
446 // If it's a Delimit rule and we're at the final cohort, skip it.
447 if (type == K_DELIMIT && c == current.cohorts.size() - 1) {
448 continue;
449 }
450
451 // If the rule is only supposed to run inside a parentheses, check if cohort is.
452 if (rule.flags & RF_ENCL_INNER) {
453 if (!par_left_pos) {
454 continue;
455 }
456 if (cohort->local_number < par_left_pos || cohort->local_number > par_right_pos) {
457 continue;
458 }
459 }
460 // ...and if the rule should only run outside parentheses, check if cohort is.
461 else if (rule.flags & RF_ENCL_OUTER) {
462 if (par_left_pos && cohort->local_number >= par_left_pos && cohort->local_number <= par_right_pos) {
463 continue;
464 }
465 }
466
467 // If this is SETPARENT SAFE and there's already a parent, skip it.
468 if (type == K_SETPARENT && (rule.flags & RF_SAFE) && cohort->dep_parent != DEP_NO_PARENT) {
469 continue;
470 }
471 if ((rule.flags & RF_NOPARENT) && cohort->dep_parent != DEP_NO_PARENT) {
472 continue;
473 }
474
475 // Check if on previous runs the rule did not match this cohort, and skip if that is the case.
476 // This cache is cleared if any rule causes any state change in the window.
477 uint32_t ih = hash_value(rule.number, cohort->global_number);
478 if (index_ruleCohort_no.contains(ih)) {
479 continue;
480 }
481 index_ruleCohort_no.insert(ih);
482
483 size_t num_active = 0;
484 size_t num_iff = 0;
485
486 std::vector<Rule_Context> reading_contexts;
487 reading_contexts.reserve(cohort->readings.size());
488
489 // Assume that Iff rules are really Remove rules, until proven otherwise.
490 if (rule.type == K_IFF) {
491 type = K_REMOVE;
492 }
493
494 bool did_test = false;
495 bool test_good = false;
496 bool matched_target = false;
497
498 clear(readings_plain);
499 clear(subs_any);
500
501 // Varstring capture groups exist on a per-cohort basis, since we may need them for mapping later.
502 clear(regexgrps_z);
503 clear(regexgrps_c);
504 clear(unif_tags_rs);
505 clear(unif_sets_rs);
506
507 used_regex = 0;
508 regexgrps_store.resize(std::max(regexgrps_store.size(), cohort->readings.size()));
509 regexgrps_z.reserve(std::max(regexgrps_z.size(), cohort->readings.size()));
510 regexgrps_c.reserve(std::max(regexgrps_c.size(), cohort->readings.size()));
511
512 size_t used_unif = 0;
513 unif_tags_store.resize(std::max(unif_tags_store.size(), cohort->readings.size() + 1));
514 unif_sets_store.resize(std::max(unif_sets_store.size(), cohort->readings.size() + 1));
515
516 {
517 Rule_Context context;
518 context.target.cohort = cohort;
519 context_stack.push_back(std::move(context));
520 }
521
522 auto reset_cohorts = [&]() {
523 cohortset = &current.rule_to_cohorts[rule.number];
524 override_cohortset();
525 cohortsets.back() = cohortset;
526 if (get_apply_to().cohort->type & CT_REMOVED) {
527 rocit = cohortset->lower_bound(current.cohorts[get_apply_to().cohort->local_number]);
528 }
529 else {
530 rocit = cohortset->find(current.cohorts[get_apply_to().cohort->local_number]);
531 if (rocit != cohortset->end()) {
532 ++rocit;
533 }
534 }
535 };
536
537 // Remember the current state so we can compare later to see if anything has changed
538 const size_t state_num_readings = cohort->readings.size();
539 const size_t state_num_removed = cohort->deleted.size();
540 const size_t state_num_delayed = cohort->delayed.size();
541 const size_t state_num_ignored = cohort->ignored.size();
542
543 // This loop figures out which readings, if any, that are valid targets for the current rule
544 // Criteria for valid is that the reading must match both target and all contextual tests
545 for (size_t i = 0; i < cohort->readings.size(); ++i) {
546 // ToDo: Switch sub-readings so that they build up a passed in vector<Reading*>
547 Reading* reading = get_sub_reading(cohort->readings[i], rule.sub_reading);
548 if (!reading) {
549 cohort->readings[i]->matched_target = false;
550 cohort->readings[i]->matched_tests = false;
551 continue;
552 }
553 context_stack.back().target.reading = cohort->readings[i];
554 context_stack.back().target.subreading = reading;
555
556 // The state is stored in the readings themselves, so clear the old states
557 reading->matched_target = false;
558 reading->matched_tests = false;
559
560 if (reading->mapped && (rule.type == K_MAP || rule.type == K_ADD || rule.type == K_REPLACE)) {
561 continue;
562 }
563 if (reading->mapped && (rule.flags & RF_NOMAPPED)) {
564 continue;
565 }
566 if (reading->noprint && !allow_magic_readings) {
567 continue;
568 }
569 if (reading->immutable && rule.type != K_UNPROTECT) {
570 if (type == K_SELECT) {
571 reading->matched_target = true;
572 reading->matched_tests = true;
573 reading_contexts.push_back(context_stack.back());
574 }
575 ++num_active;
576 ++num_iff;
577 continue;
578 }
579
580 // Check if any previous reading of this cohort had the same plain signature, and if so just copy their results
581 // This cache is cleared on a per-cohort basis
582 did_test = false;
583 if (!(set.type & (ST_SPECIAL | ST_MAPPING | ST_CHILD_UNIFY)) && !readings_plain.empty()) {
584 auto rpit = readings_plain.find(reading->hash_plain);
585 if (rpit != readings_plain.end()) {
586 reading->matched_target = rpit->second->matched_target;
587 reading->matched_tests = rpit->second->matched_tests;
588 if (reading->matched_tests) {
589 ++num_active;
590 }
591 if (regexgrps_c.count(rpit->second->number)) {
592 regexgrps_c[reading->number];
593 regexgrps_c[reading->number] = regexgrps_c[rpit->second->number];
594 regexgrps_z[reading->number];
595 regexgrps_z[reading->number] = regexgrps_z[rpit->second->number];
596
597 context_stack.back().regexgrp_ct = regexgrps_z[reading->number];
598 context_stack.back().regexgrps = regexgrps_c[reading->number];
599 }
600 context_stack.back().unif_tags = unif_tags_rs[reading->hash_plain];
601 context_stack.back().unif_sets = unif_sets_rs[reading->hash_plain];
602 did_test = true;
603 test_good = rpit->second->matched_tests;
604 reading_contexts.push_back(context_stack.back());
605 continue;
606 }
607 }
608
609 // Regex capture is done on a per-reading basis, so clear all captured state.
610 context_stack.back().regexgrp_ct = 0;
611 context_stack.back().regexgrps = &regexgrps_store[used_regex];
612
613 // Unification is done on a per-reading basis, so clear all unification state.
614 context_stack.back().unif_tags = &unif_tags_store[used_unif];
615 context_stack.back().unif_sets = &unif_sets_store[used_unif];
616 unif_tags_rs[reading->hash_plain] = context_stack.back().unif_tags;
617 unif_sets_rs[reading->hash_plain] = context_stack.back().unif_sets;
618 unif_tags_rs[reading->hash] = context_stack.back().unif_tags;
619 unif_sets_rs[reading->hash] = context_stack.back().unif_sets;
620 ++used_unif;
621
622 context_stack.back().unif_tags->clear();
623 context_stack.back().unif_sets->clear();
624
625 unif_last_wordform = 0;
626 unif_last_baseform = 0;
627 unif_last_textual = 0;
628
629 same_basic = reading->hash_plain;
630 rule_target = context_target = nullptr;
631 if (context_stack.size() > 1) {
632 Cohort* m = context_stack[context_stack.size()-2].mark;
633 if (m) set_mark(m);
634 else set_mark(cohort);
635 }
636 else {
637 set_mark(cohort);
638 }
639 uint8_t orz = context_stack.back().regexgrp_ct;
640 for (auto r = cohort->readings[i]; r; r = r->next) {
641 r->active = true;
642 }
643 if (rule.line == 2746) {
644 cohort = cohort;
645 }
646 rule_target = cohort;
647 // Actually check if the reading is a valid target. First check if rule target matches...
648 if (rule.target && doesSetMatchReading(*reading, rule.target, (set.type & (ST_CHILD_UNIFY | ST_SPECIAL)) != 0)) {
649 if (rule.line == 2746) {
650 cohort = cohort;
651 }
652 bool regex_prop = true;
653 if (orz != context_stack.back().regexgrp_ct) {
654 did_test = false;
655 regex_prop = false;
656 }
657 rule_target = context_target = cohort;
658 reading->matched_target = true;
659 matched_target = true;
660 bool good = true;
661 // If we didn't already run the contextual tests, run them now.
662 if (!did_test) {
663 context_stack.back().context.clear();
664 foreach (it, rule.tests)if (!(rule.tests).empty()) for (auto it = (rule.tests).begin(
), it_end = (rule.tests).end(); it != it_end; ++it)
{
665 ContextualTest* test = *it;
666 if (rule.flags & RF_RESETX || !(rule.flags & RF_REMEMBERX)) {
667 set_mark(cohort);
668 }
669 seen_barrier = false;
670 // Keeps track of where we have been, to prevent infinite recursion in trees with loops
671 dep_deep_seen.clear();
672 // Reset the counters for which types of CohortIterator we have in play
673 std::fill(ci_depths.begin(), ci_depths.end(), UI32(0));
674 tmpl_cntx.clear();
675 // Run the contextual test...
676 Cohort* next_test = nullptr;
677 Cohort* result = nullptr;
678 Cohort** deep = nullptr;
679 if (rule.type == K_WITH) {
680 deep = &result;
681 merge_with = nullptr;
682 }
683 if (!(test->pos & POS_PASS_ORIGIN) && (no_pass_origin || (test->pos & POS_NO_PASS_ORIGIN))) {
684 next_test = runContextualTest(&current, c, test, deep, cohort);
685 }
686 else {
687 next_test = runContextualTest(&current, c, test, deep);
688 }
689 context_stack.back().context.push_back(merge_with ? merge_with : result);
690 test_good = (next_test != nullptr);
691
692 profileRuleContext(test_good, &rule, test);
693
694 if (!test_good) {
695 good = test_good;
696 if (it != rule.tests.begin() && !(rule.flags & RF_KEEPORDER)) {
697 rule.tests.erase(it);
698 rule.tests.push_front(test);
699 }
700 break;
701 }
702 did_test = ((set.type & (ST_CHILD_UNIFY | ST_SPECIAL)) == 0 && context_stack.back().unif_tags->empty() && context_stack.back().unif_sets->empty());
703 }
704 }
705 else {
706 good = test_good;
707 }
708 if (good) {
709 // We've found a match, so Iff should be treated as Select instead of Remove
710 if (rule.type == K_IFF && type != K_SELECT) {
711 type = K_SELECT;
712 if (grammar->has_protect) {
713 for (size_t j = 0; j < i; ++j) {
714 Reading* reading = get_sub_reading(cohort->readings[j], rule.sub_reading);
715 if (reading && reading->immutable) {
716 reading->matched_target = true;
717 reading->matched_tests = true;
718 ++num_active;
719 ++num_iff;
720 }
721 }
722 }
723 }
724 reading->matched_tests = true;
725 ++num_active;
726 if (profiler) {
727 Profiler::Key k{ET_RULE, rule.number + 1 };
728 auto& r = profiler->entries[k];
729 ++r.num_match;
730 if (!r.example_window) {
731 addProfilingExample(r);
732 }
733 }
734 if (!debug_rules.empty() && debug_rules.contains(rule.line)) {
735 printDebugRule(rule);
736 }
737
738 if (regex_prop && i && !regexgrps_c.empty()) {
739 for (auto z = i; z > 0; --z) {
740 auto it = regexgrps_c.find(cohort->readings[z - 1]->number);
741 if (it != regexgrps_c.end()) {
742 regexgrps_c.insert(std::make_pair(reading->number, it->second));
743 regexgrps_z.insert(std::make_pair(reading->number, regexgrps_z.find(cohort->readings[z - 1]->number)->second));
744 break;
745 }
746 }
747 }
748 }
749 else {
750 context_stack.back().regexgrp_ct = orz;
751 if (!debug_rules.empty() && debug_rules.contains(rule.line)) {
752 printDebugRule(rule, true, false);
753 }
754 }
755 ++num_iff;
756 }
757 else {
758 context_stack.back().regexgrp_ct = orz;
759 if (profiler) {
760 Profiler::Key k{ ET_RULE, rule.number + 1 };
761 ++profiler->entries[k].num_fail;
762 }
763 if (!debug_rules.empty() && debug_rules.contains(rule.line)) {
764 printDebugRule(rule, false, false);
765 }
766 }
767 readings_plain.insert(std::make_pair(reading->hash_plain, reading));
768 for (auto r = cohort->readings[i]; r; r = r->next) {
769 r->active = false;
770 }
771
772 if (reading != cohort->readings[i]) {
773 cohort->readings[i]->matched_target = reading->matched_target;
774 cohort->readings[i]->matched_tests = reading->matched_tests;
775 }
776 if (context_stack.back().regexgrp_ct) {
777 regexgrps_c[reading->number] = context_stack.back().regexgrps;
778 regexgrps_z[reading->number] = context_stack.back().regexgrp_ct;
779 ++used_regex;
780 }
781 reading_contexts.push_back(context_stack.back());
782 }
783
784 if (state_num_readings != cohort->readings.size() || state_num_removed != cohort->deleted.size() || state_num_delayed != cohort->delayed.size() || state_num_ignored != cohort->ignored.size()) {
785 anything_changed = true;
786 cohort->type &= ~CT_NUM_CURRENT;
787 }
788
789 // If none of the readings were valid targets, remove this cohort from the rule's possible cohorts.
790 if (num_active == 0 && (num_iff == 0 || rule.type != K_IFF)) {
791 if (!matched_target) {
792 --rocit; // We have already incremented rocit earlier, so take one step back...
793 rocit = cohortset->erase(rocit); // ...and one step forward again
794 }
795 context_stack.pop_back();
796 continue;
797 }
798
799 // All readings were valid targets, which means there is nothing to do for Select or safe Remove rules.
800 if (num_active == cohort->readings.size()) {
801 if (type == K_SELECT) {
802 context_stack.pop_back();
803 continue;
804 }
805 if (type == K_REMOVE && (!unsafe || (rule.flags & RF_SAFE)) && !(rule.flags & RF_UNSAFE)) {
806 context_stack.pop_back();
807 continue;
808 }
809 }
810
811 for (auto& ctx : reading_contexts) {
812 if (!ctx.target.subreading->matched_target) {
813 continue;
814 }
815 if (!ctx.target.subreading->matched_tests && rule.type != K_IFF) {
816 continue;
817 }
818 context_stack.back() = ctx;
819 reset_cohorts_for_loop = false;
820 reading_cb();
821 if (!finish_cohort_loop) {
822 context_stack.pop_back();
823 return anything_changed;
824 }
825 if (reset_cohorts_for_loop) {
826 reset_cohorts();
827 break;
828 }
829 if (!finish_reading_loop) {
830 break;
831 }
832 }
833
834 reset_cohorts_for_loop = false;
835 cohort_cb();
836 if (!finish_cohort_loop) {
837 context_stack.pop_back();
838 return anything_changed;
839 }
840 if (reset_cohorts_for_loop) {
841 reset_cohorts();
842 }
843 context_stack.pop_back();
844 }
845 return anything_changed;
846}
847
848/**
849 * Applies the passed rules to the passed SingleWindow.
850 *
851 * This function is called at least N*M times where N is number of sections in the grammar and M is the number of windows in the input.
852 * Possibly many more times, since if a section changes the state of the window the section is run again.
853 * Only when no further changes are caused at a level does it progress to next level.
854 *
855 * The loops in this function are increasingly explosive, despite efforts to contain them.
856 * In the https://visl.sdu.dk/cg3_performance.html test data, this function is called 1015 times.
857 * The first loop (rules) is executed 3101728 times.
858 * The second loop (cohorts) is executed 11087278 times.
859 * The third loop (finding readings) is executed 11738927 times; of these, 1164585 (10%) match the rule target.
860 * The fourth loop (contextual test) is executed 1184009 times; of those, 1156322 (97%) fail their contexts.
861 * The fifth loop (acting on readings) is executed 41540 times.
862 *
863 * @param[in,out] current The window to apply rules on
864 * @param[in] rules The rules to apply
865 */
866uint32_t GrammarApplicator::runRulesOnSingleWindow(SingleWindow& current, const uint32IntervalVector& rules) {
867 uint32_t retval = RV_NOTHING;
868 bool section_did_something = false;
869 bool delimited = false;
870
871 // ToDo: Now that numbering is used, can't this be made a normal max? Hm, maybe not since --sections can still force another order...but if we're smart, then we re-enumerate rules based on --sections
872 uint32IntervalVector intersects = current.valid_rules.intersect(rules);
873 ReadingList removed;
874 ReadingList selected;
875
876 if (debug_level > 1) {
877 std::cerr << "DEBUG: Trying window " << current.number << std::endl;
878 }
879
880 current.parent->cohort_map[0] = current.cohorts.front();
881
882 foreach (iter_rules, intersects)if (!(intersects).empty()) for (auto iter_rules = (intersects
).begin(), iter_rules_end = (intersects).end(); iter_rules !=
iter_rules_end; ++iter_rules)
{
883 // Conditionally re-sort the rule-to-cohort mapping when the current rule is finished, regardless of how it finishes
884 struct Sorter {
885 SingleWindow& current;
886 bool do_sort = false;
887
888 Sorter(SingleWindow& current)
889 : current(current)
890 {}
891
892 ~Sorter() {
893 if (do_sort) {
894 for (auto& cs : current.rule_to_cohorts) {
895 cs.sort();
896 }
897 }
898 }
899 } sorter(current);
900
901 repeat_rule:
902 bool rule_did_something = false;
903 uint32_t j = (*iter_rules);
904
905 // Check whether this rule is in the allowed rule list from cmdline flag --rule(s)
906 if (!valid_rules.empty() && !valid_rules.contains(j)) {
907 continue;
908 }
909
910 current_rule = grammar->rule_by_number[j];
911 Rule* rule = grammar->rule_by_number[j];
912 if (rule->type == K_IGNORE) {
913 continue;
914 }
915 if (debug_level > 1) {
916 std::cerr << "DEBUG: Trying rule " << rule->line << std::endl;
917 }
918
919 if (!apply_mappings && (rule->type == K_MAP || rule->type == K_ADD || rule->type == K_REPLACE)) {
920 continue;
921 }
922 if (!apply_corrections && (rule->type == K_SUBSTITUTE || rule->type == K_APPEND)) {
923 continue;
924 }
925 // If there are parentheses and the rule is marked as only run on the final pass, skip if this is not it.
926 if (current.has_enclosures) {
927 if ((rule->flags & RF_ENCL_FINAL) && !did_final_enclosure) {
928 continue;
929 }
930 if (did_final_enclosure && !(rule->flags & RF_ENCL_FINAL)) {
931 continue;
932 }
933 }
934
935 bool readings_changed = false;
936 bool should_repeat = false;
937 bool should_bail = false;
938
939 auto reindex = [&](SingleWindow* which = nullptr) {
940 if (!which) {
941 which = &current;
942 }
943 foreach (iter, which->cohorts)if (!(which->cohorts).empty()) for (auto iter = (which->
cohorts).begin(), iter_end = (which->cohorts).end(); iter !=
iter_end; ++iter)
{
944 (*iter)->local_number = UI32(std::distance(which->cohorts.begin(), iter));
945 }
946 gWindow->rebuildCohortLinks();
947 };
948
949 auto collect_subtree = [&](CohortSet& cs, Cohort* head, uint32_t cset) {
950 if (cset) {
951 for (auto iter : current.cohorts) {
952 // Always consider the initial cohort a match
953 if (iter->global_number == head->global_number) {
954 cs.insert(iter);
955 }
956 else if (iter->dep_parent == head->global_number && doesSetMatchCohortNormal(*iter, cset)) {
957 cs.insert(iter);
958 }
959 }
960 CohortSet more;
961 for (auto iter : current.cohorts) {
962 for (auto cht : cs) {
963 // Do not grab the whole tree from the root, in case WithChild is not (*)
964 if (cht->global_number == head->global_number) {
965 continue;
966 }
967 if (isChildOf(iter, cht)) {
968 more.insert(iter);
969 }
970 }
971 }
972 cs.insert(more.begin(), more.end());
973 }
974 else {
975 cs.insert(head);
976 }
977 };
978
979 auto add_cohort = [&](Cohort* cohort, size_t& spacesInAddedWf) {
980 Cohort* cCohort = alloc_cohort(&current);
981 cCohort->global_number = gWindow->cohort_counter++;
982
983 Tag* wf = nullptr;
984 std::vector<TagList> readings;
985 auto theTags = ss_taglist.get();
986 getTagList(*rule->maplist, theTags);
987
988 for (auto& tter : *theTags) {
989 if (tter->type & T_VSTR) {
990 VARSTRINGIFY(tter)do { while ((tter)->type & T_VARSTRING) { (tter) = generateVarstringTag
((tter)); } } while (0)
;
991 }
992 }
993
994 for (auto tter : *theTags) {
995 if(tter->type & T_WORDFORM) {
996 spacesInAddedWf = std::count_if(tter->tag.begin(), tter->tag.end(), [](UChar c){ return c == ' '; });
997 }
998 VARSTRINGIFY(tter)do { while ((tter)->type & T_VARSTRING) { (tter) = generateVarstringTag
((tter)); } } while (0)
;
999 if (tter->type & T_WORDFORM) {
1000 cCohort->wordform = tter;
1001 wf = tter;
1002 continue;
1003 }
1004 if (!wf) {
1005 u_fprintfu_fprintf_72(ux_stderr, "Error: There must be a wordform before any other tags in ADDCOHORT/MERGECOHORTS on line %u before input line %u.\n", rule->line, numLines);
1006 CG3Quit(1);
1007 }
1008 if (tter->type & T_BASEFORM) {
1009 readings.resize(readings.size() + 1);
1010 readings.back().push_back(wf);
1011 }
1012 if (readings.empty()) {
1013 u_fprintfu_fprintf_72(ux_stderr, "Error: There must be a baseform after the wordform in ADDCOHORT/MERGECOHORTS on line %u before input line %u.\n", rule->line, numLines);
1014 CG3Quit(1);
1015 }
1016 readings.back().push_back(tter);
1017 }
1018
1019 for (auto& tags : readings) {
1020 for (size_t i = 0; i < tags.size(); ++i) {
1021 if (tags[i]->hash == grammar->tag_any) {
1022 auto& nt = cohort->readings.front()->tags_list;
1023 if (nt.size() <= 2) {
1024 continue;
1025 }
1026 tags.reserve(tags.size() + nt.size() - 2);
1027 tags[i] = grammar->single_tags[nt[2]];
1028 for (size_t j = 3, k = 1; j < nt.size(); ++j) {
1029 if (grammar->single_tags[nt[j]]->type & T_DEPENDENCY) {
1030 continue;
1031 }
1032 tags.insert(tags.begin() + i + k, grammar->single_tags[nt[j]]);
1033 ++k;
1034 }
1035 }
1036 }
1037 }
1038
1039 for (auto& rit : readings) {
1040 Reading* cReading = alloc_reading(cCohort);
1041 ++numReadings;
1042 insert_if_exists(cReading->parent->possible_sets, grammar->sets_any);
1043 cReading->hit_by.push_back(rule->number);
1044 cReading->noprint = false;
1045 TagList mappings;
1046 for (auto tter : rit) {
1047 uint32_t hash = tter->hash;
1048 VARSTRINGIFY(tter)do { while ((tter)->type & T_VARSTRING) { (tter) = generateVarstringTag
((tter)); } } while (0)
;
1049 if (tter->type & T_MAPPING || tter->tag[0] == grammar->mapping_prefix) {
1050 mappings.push_back(tter);
1051 }
1052 else {
1053 hash = addTagToReading(*cReading, hash);
1054 }
1055 if (updateValidRules(rules, intersects, hash, *cReading)) {
1056 iter_rules = intersects.find(rule->number);
1057 iter_rules_end = intersects.end();
1058 }
1059 }
1060 if (!mappings.empty()) {
1061 splitMappings(mappings, *cCohort, *cReading);
1062 }
1063 cCohort->appendReading(cReading);
1064 }
1065
1066 current.parent->cohort_map[cCohort->global_number] = cCohort;
1067 current.parent->dep_window[cCohort->global_number] = cCohort;
1068 if (grammar->addcohort_attach && (rule->type == K_ADDCOHORT_BEFORE || rule->type == K_ADDCOHORT_AFTER)) {
1069 attachParentChild(*cohort, *cCohort);
1070 }
1071
1072 if (cCohort->readings.empty()) {
1073 initEmptyCohort(*cCohort);
1074 if (trace) {
1075 auto r = cCohort->readings.front();
1076 r->hit_by.push_back(rule->number);
1077 r->noprint = false;
1078 }
1079 }
1080
1081 CohortSet cohorts;
1082 collect_subtree(cohorts, cohort, rule->childset1);
1083
1084 if (rule->type == K_ADDCOHORT_BEFORE) {
1085 current.cohorts.insert(current.cohorts.begin() + cohorts.front()->local_number, cCohort);
1086 current.all_cohorts.insert(std::find(current.all_cohorts.begin() + cohorts.front()->local_number, current.all_cohorts.end(), cohorts.front()), cCohort);
1087 }
1088 else {
1089 current.cohorts.insert(current.cohorts.begin() + cohorts.back()->local_number + 1, cCohort);
1090 current.all_cohorts.insert(std::find(current.all_cohorts.begin() + cohorts.back()->local_number, current.all_cohorts.end(), cohorts.back()) + 1, cCohort);
1091 }
1092
1093 foreach (iter, current.cohorts)if (!(current.cohorts).empty()) for (auto iter = (current.cohorts
).begin(), iter_end = (current.cohorts).end(); iter != iter_end
; ++iter)
{
1094 (*iter)->local_number = UI32(std::distance(current.cohorts.begin(), iter));
1095 }
1096 gWindow->rebuildCohortLinks();
1097
1098 return cCohort;
1099 };
1100
1101 auto rem_cohort = [&](Cohort* cohort) {
1102 auto& current = *cohort->parent;
1103 for (auto iter : cohort->readings) {
1104 iter->hit_by.push_back(rule->number);
1105 iter->deleted = true;
1106 if (trace) {
1107 iter->noprint = false;
1108 }
1109 }
1110 // Remove the cohort from all rules
1111 for (auto& cs : current.rule_to_cohorts) {
1112 cs.erase(cohort);
1113 }
1114 // Forward all children of this cohort to the parent of this cohort
1115 // ToDo: Named relations must be erased
1116 while (!cohort->dep_children.empty()) {
1117 uint32_t ch = cohort->dep_children.back();
1118 if (cohort->dep_parent == DEP_NO_PARENT) {
1119 attachParentChild(*gWindow->cohort_map[0], *gWindow->cohort_map[ch], true, true);
1120 }
1121 else {
1122 attachParentChild(*gWindow->cohort_map[cohort->dep_parent], *gWindow->cohort_map[ch], true, true);
1123 }
1124 cohort->dep_children.erase(ch);
1125 }
1126 cohort->type |= CT_REMOVED;
1127 cohort->detach();
1128 for (auto& cm : gWindow->cohort_map) {
1129 cm.second->dep_children.erase(cohort->dep_self);
1130 }
1131 gWindow->cohort_map.erase(cohort->global_number);
1132 current.cohorts.erase(current.cohorts.begin() + cohort->local_number);
1133 foreach (iter, current.cohorts)if (!(current.cohorts).empty()) for (auto iter = (current.cohorts
).begin(), iter_end = (current.cohorts).end(); iter != iter_end
; ++iter)
{
1134 (*iter)->local_number = UI32(std::distance(current.cohorts.begin(), iter));
1135 }
1136
1137 if (current.cohorts.size() == 1 && &current != gWindow->current) {
1138 // This window is now empty, so remove it entirely from consideration so rules can look past it
1139 cohort = current.cohorts[0];
1140
1141 // Remove the cohort from all rules
1142 for (auto& cs : current.rule_to_cohorts) {
1143 cs.erase(cohort);
1144 }
1145 cohort->detach();
1146 for (auto& cm : gWindow->cohort_map) {
1147 cm.second->dep_children.erase(cohort->dep_self);
1148 }
1149 gWindow->cohort_map.erase(cohort->global_number);
1150 free_cohort(cohort);
1151
1152 if (current.previous) {
1153 current.previous->text += current.text + current.text_post;
1154 current.previous->all_cohorts.insert(current.previous->all_cohorts.end(), current.all_cohorts.begin() + 1, current.all_cohorts.end());
1155 }
1156 else if (current.next) {
1157 current.next->text = current.text_post + current.next->text;
1158 current.next->all_cohorts.insert(current.previous->all_cohorts.begin() + 1, current.all_cohorts.begin() + 1, current.all_cohorts.end());
1159 }
1160 current.all_cohorts.clear();
1161
1162 for (size_t i = 0; i < gWindow->previous.size(); ++i) {
1163 if (gWindow->previous[i] == &current) {
1164 free_swindow(gWindow->previous[i]);
1165 gWindow->previous.erase(gWindow->previous.begin() + i);
1166 break;
1167 }
1168 }
1169 for (size_t i = 0; i < gWindow->next.size(); ++i) {
1170 if (gWindow->next[i] == &current) {
1171 free_swindow(gWindow->next[i]);
1172 gWindow->next.erase(gWindow->next.begin() + i);
1173 break;
1174 }
1175 }
1176
1177 gWindow->rebuildSingleWindowLinks();
1178 }
1179
1180 gWindow->rebuildCohortLinks();
1181 };
1182
1183 auto ignore_cohort = [&](Cohort* cohort) {
1184 auto& current = *cohort->parent;
1185 for (auto iter : cohort->readings) {
1186 iter->hit_by.push_back(rule->number);
1187 }
1188 for (auto& cs : current.rule_to_cohorts) {
1189 cs.erase(cohort);
1190 }
1191 cohort->type |= CT_IGNORED;
1192 cohort->detach();
1193 gWindow->cohort_map.erase(cohort->global_number);
1194 current.cohorts.erase(current.cohorts.begin() + cohort->local_number);
1195 };
1196
1197 auto make_relation_rtag = [&](Tag* tag, uint32_t id) {
1198 UChar tmp[256] = { 0 };
1199 u_sprintfu_sprintf_72(tmp, "R:%S:%u", tag->tag.data(), id);
1200 auto nt = addTag(tmp);
1201 return nt;
1202 };
1203
1204 auto add_relation_rtag = [&](Cohort* cohort, Tag* tag, uint32_t id) {
1205 auto nt = make_relation_rtag(tag, id);
1206 for (auto& r : cohort->readings) {
1207 addTagToReading(*r, nt);
1208 }
1209 };
1210
1211 auto set_relation_rtag = [&](Cohort* cohort, Tag* tag, uint32_t id) {
1212 auto nt = make_relation_rtag(tag, id);
1213 for (auto& r : cohort->readings) {
1214 for (auto it = r->tags_list.begin(); it != r->tags_list.end();) {
1215 const auto& utag = grammar->single_tags[*it]->tag;
1216 if (utag[0] == 'R' && utag[1] == ':' && utag.size() > 2 + tag->tag.size() && utag[2 + tag->tag.size()] == ':' && utag.compare(2, tag->tag.size(), tag->tag) == 0) {
1217 r->tags.erase(*it);
1218 r->tags_textual.erase(*it);
1219 r->tags_numerical.erase(*it);
1220 r->tags_plain.erase(*it);
1221 it = r->tags_list.erase(it);
1222 }
1223 else {
1224 ++it;
1225 }
1226 }
1227 addTagToReading(*r, nt);
1228 }
1229 };
1230
1231 auto rem_relation_rtag = [&](Cohort* cohort, Tag* tag, uint32_t id) {
1232 auto nt = make_relation_rtag(tag, id);
1233 for (auto& r : cohort->readings) {
1234 delTagFromReading(*r, nt);
1235 }
1236 };
1237
1238 auto insert_taglist_to_reading = [&](auto& iter, auto& taglist, auto& reading, auto& mappings) {
1239 for (auto tag : taglist) {
1240 if (tag->type & T_VARSTRING) {
1241 tag = generateVarstringTag(tag);
1242 }
1243 if (tag->hash == grammar->tag_any) {
1244 break;
1245 }
1246 if (tag->type & T_MAPPING || tag->tag[0] == grammar->mapping_prefix) {
1247 mappings->push_back(tag);
1248 }
1249 else {
1250 iter = reading.tags_list.insert(iter, tag->hash);
1251 ++iter;
1252 }
1253 if (updateValidRules(rules, intersects, tag->hash, reading)) {
1254 iter_rules = intersects.find(rule->number);
1255 iter_rules_end = intersects.end();
1256 }
1257 }
1258 reflowReading(reading);
1259 };
1260
1261 auto cohort_cb = [&]() {
1262 if (rule->type == K_SELECT || (rule->type == K_IFF && !selected.empty())) {
1263 Cohort* target = get_apply_to().cohort;
1264 if (selected.size() < target->readings.size() && !selected.empty()) {
1265 ReadingList drop;
1266 size_t si = 0;
1267 for (size_t ri = 0; ri < target->readings.size(); ri++) {
1268 // Manually trace, since reading_cb doesn't get called on non-matching readings
1269 Reading* rd = target->readings[ri];
1270 if (rule->sub_reading != 32767) {
1271 rd = get_sub_reading(rd, rule->sub_reading);
1272 }
1273 if (rd) {
1274 rd->hit_by.push_back(rule->number);
1275 }
1276 if (si < selected.size() && target->readings[ri] == selected[si]) {
1277 si++;
1278 }
1279 else {
1280 target->readings[ri]->deleted = true;
1281 drop.push_back(target->readings[ri]);
1282 }
1283 }
1284 target->readings.swap(selected);
1285 if (rule->flags & RF_DELAYED) {
1286 target->delayed.insert(target->delayed.end(), drop.begin(), drop.end());
1287 }
1288 else if (rule->flags & RF_IGNORED) {
1289 target->ignored.insert(target->ignored.end(), drop.begin(), drop.end());
1290 }
1291 else {
1292 target->deleted.insert(target->deleted.end(), drop.begin(), drop.end());
1293 }
1294 readings_changed = true;
1295 }
1296 selected.clear();
1297 }
1298 else if (rule->type == K_REMOVE || rule->type == K_IFF) {
1299 if (!removed.empty() && (removed.size() < get_apply_to().cohort->readings.size() || (unsafe && !(rule->flags & RF_SAFE)) || (rule->flags & RF_UNSAFE))) {
1300 if (rule->flags & RF_DELAYED) {
1301 get_apply_to().cohort->delayed.insert(get_apply_to().cohort->delayed.end(), removed.begin(), removed.end());
1302 }
1303 else if (rule->flags & RF_IGNORED) {
1304 get_apply_to().cohort->ignored.insert(get_apply_to().cohort->ignored.end(), removed.begin(), removed.end());
1305 }
1306 else {
1307 get_apply_to().cohort->deleted.insert(get_apply_to().cohort->deleted.end(), removed.begin(), removed.end());
1308 }
1309 size_t oz = get_apply_to().cohort->readings.size();
1310 while (!removed.empty()) {
1311 removed.back()->deleted = true;
1312 for (size_t i = 0; i < oz; ++i) {
1313 if (get_apply_to().cohort->readings[i] == removed.back()) {
1314 --oz;
1315 std::swap(get_apply_to().cohort->readings[i], get_apply_to().cohort->readings[oz]);
1316 }
1317 }
1318 removed.pop_back();
1319 }
1320 get_apply_to().cohort->readings.resize(oz);
1321 if (debug_level > 0) {
1322 std::cerr << "DEBUG: Rule " << rule->line << " hit cohort " << get_apply_to().cohort->local_number << std::endl;
1323 }
1324 readings_changed = true;
1325 }
1326 if (get_apply_to().cohort->readings.empty()) {
1327 initEmptyCohort(*get_apply_to().cohort);
1328 }
1329 selected.clear();
1330 }
1331 else if (rule->type == K_JUMP) {
1332 auto to = getTagList(*rule->maplist).front();
1333 VARSTRINGIFY(to)do { while ((to)->type & T_VARSTRING) { (to) = generateVarstringTag
((to)); } } while (0)
;
1334 auto it = grammar->anchors.find(to->hash);
1335 if (it == grammar->anchors.end()) {
1336 u_fprintfu_fprintf_72(ux_stderr, "Warning: JUMP on line %u could not find anchor '%S'.\n", rule->line, to->tag.data());
1337 }
1338 else {
1339 iter_rules = intersects.lower_bound(it->second);
1340 finish_cohort_loop = false;
1341 should_repeat = true;
1342 }
1343 }
1344 else if (rule->type == K_REMVARIABLE) {
1345 auto names = getTagList(*rule->maplist);
1346 for (auto tag : names) {
1347 VARSTRINGIFY(tag)do { while ((tag)->type & T_VARSTRING) { (tag) = generateVarstringTag
((tag)); } } while (0)
;
1348 auto it = variables.begin();
1349 if (tag->type & T_REGEXP) {
1350 it = std::find_if(it, variables.end(), [&](auto& kv) { return doesTagMatchRegexp(kv.first, *tag); });
1351 }
1352 else if (tag->type & T_CASE_INSENSITIVE) {
1353 it = std::find_if(it, variables.end(), [&](auto& kv) { return doesTagMatchIcase(kv.first, *tag); });
1354 }
1355 else {
1356 it = variables.find(tag->hash);
1357 }
1358 if (it != variables.end()) {
1359 if (rule->flags & RF_OUTPUT) {
1360 current.variables_output.insert(it->first);
1361 }
1362 variables.erase(it);
1363 //u_fprintf(ux_stderr, "Info: RemVariable fired for %S.\n", tag->tag.data());
1364 }
1365 }
1366 }
1367 else if (rule->type == K_SETVARIABLE) {
1368 auto names = getTagList(*rule->maplist);
1369 auto values = getTagList(*rule->sublist);
1370 VARSTRINGIFY(names.front())do { while ((names.front())->type & T_VARSTRING) { (names
.front()) = generateVarstringTag((names.front())); } } while (
0)
;
1371 VARSTRINGIFY(values.front())do { while ((values.front())->type & T_VARSTRING) { (values
.front()) = generateVarstringTag((values.front())); } } while
(0)
;
1372 variables[names.front()->hash] = values.front()->hash;
1373 if (rule->flags & RF_OUTPUT) {
1374 current.variables_output.insert(names.front()->hash);
1375 }
1376 //u_fprintf(ux_stderr, "Info: SetVariable fired for %S.\n", names.front()->tag.data());
1377 }
1378 else if (rule->type == K_DELIMIT) {
1379 auto cohort = get_apply_to().cohort;
1380 if (cohort->parent->cohorts.size() > cohort->local_number + 1) {
1381 delimitAt(current, cohort);
1382 delimited = true;
1383 readings_changed = true;
1384 }
1385 }
1386 else if (rule->type == K_EXTERNAL_ONCE || rule->type == K_EXTERNAL_ALWAYS) {
1387 if (rule->type == K_EXTERNAL_ONCE && !current.hit_external.insert(rule->line).second) {
1388 return;
1389 }
1390
1391 auto ei = externals.find(rule->varname);
1392 if (ei == externals.end()) {
1393 Tag* ext = grammar->single_tags.find(rule->varname)->second;
1394 UErrorCode err = U_ZERO_ERROR;
1395 u_strToUTF8u_strToUTF8_72(&cbuffers[0][0], SI32(CG3_BUFFER_SIZE - 1), nullptr, ext->tag.data(), SI32(ext->tag.size()), &err);
1396
1397 Process& es = externals[rule->varname];
1398 try {
1399 es.start(&cbuffers[0][0]);
1400 writeRaw(es, CG3_EXTERNAL_PROTOCOL);
1401 }
1402 catch (std::exception& e) {
1403 u_fprintfu_fprintf_72(ux_stderr, "Error: External on line %u resulted in error: %s\n", rule->line, e.what());
1404 CG3Quit(1);
1405 }
1406 ei = externals.find(rule->varname);
1407 }
1408
1409 pipeOutSingleWindow(current, ei->second);
1410 pipeInSingleWindow(current, ei->second);
1411
1412 indexSingleWindow(current);
1413 readings_changed = true;
1414 index_ruleCohort_no.clear();
1415 intersects = current.valid_rules.intersect(rules);
1416 iter_rules = intersects.find(rule->number);
1417 iter_rules_end = intersects.end();
1418 reset_cohorts_for_loop = true;
1419 }
1420 else if (rule->type == K_REMCOHORT) {
1421 // REMCOHORT-IGNORED
1422 if (rule->flags & RF_IGNORED) {
1423 CohortSet cohorts;
1424 collect_subtree(cohorts, get_apply_to().cohort, rule->childset1);
1425 for (auto c : reversed(cohorts)) {
1426 ignore_cohort(c);
1427 }
1428 reindex();
1429 reflowDependencyWindow();
1430 }
1431 else {
1432 rem_cohort(get_apply_to().cohort);
1433 }
1434
1435 // If we just removed the last cohort, add <<< to the new last cohort
1436 if (get_apply_to().cohort->readings.front()->tags.count(endtag)) {
1437 for (auto r : current.cohorts.back()->readings) {
1438 addTagToReading(*r, endtag);
1439 if (updateValidRules(rules, intersects, endtag, *r)) {
1440 iter_rules = intersects.find(rule->number);
1441 iter_rules_end = intersects.end();
1442 }
1443 }
1444 index_ruleCohort_no.clear();
1445 }
1446 readings_changed = true;
1447 reset_cohorts_for_loop = true;
1448 }
1449 };
1450
1451 RuleCallback reading_cb = [&]() {
1452 if (rule->type == K_SELECT || (rule->type == K_IFF && get_apply_to().subreading->matched_tests)) {
1453 selected.push_back(get_apply_to().reading);
1454 index_ruleCohort_no.clear();
1455 }
1456 else if (rule->type == K_REMOVE || rule->type == K_IFF) {
1457 if (rule->type == K_REMOVE && (rule->flags & RF_UNMAPLAST) && removed.size() == get_apply_to().cohort->readings.size() - 1) {
1458 if (unmapReading(*get_apply_to().subreading, rule->number)) {
1459 readings_changed = true;
1460 }
1461 }
1462 else {
1463 TRACE;
1464 removed.push_back(get_apply_to().reading);
1465 }
1466 index_ruleCohort_no.clear();
1467 }
1468 else if (rule->type == K_PROTECT) {
1469 TRACE;
1470 get_apply_to().subreading->immutable = true;
1471 }
1472 else if (rule->type == K_UNPROTECT) {
1473 TRACE;
1474 get_apply_to().subreading->immutable = false;
1475 }
1476 else if (rule->type == K_UNMAP) {
1477 if (unmapReading(*get_apply_to().subreading, rule->number)) {
1478 index_ruleCohort_no.clear();
1479 readings_changed = true;
1480 }
1481 }
1482 else if (rule->type == K_ADDCOHORT_AFTER || rule->type == K_ADDCOHORT_BEFORE) {
1483 index_ruleCohort_no.clear();
1484 TRACE;
1485
1486 size_t spacesInAddedWf = 0; // not used here
1487 auto cCohort = add_cohort(get_apply_to().cohort, spacesInAddedWf);
1488
1489 // If the new cohort is now the last cohort, add <<< to it and remove <<< from previous last cohort
1490 if (current.cohorts.back() == cCohort) {
1491 for (auto r : current.cohorts[current.cohorts.size() - 2]->readings) {
1492 delTagFromReading(*r, endtag);
1493 }
1494 for (auto r : current.cohorts.back()->readings) {
1495 addTagToReading(*r, endtag);
1496 if (updateValidRules(rules, intersects, endtag, *r)) {
1497 iter_rules = intersects.find(rule->number);
1498 iter_rules_end = intersects.end();
1499 }
1500 }
1501 }
1502 indexSingleWindow(current);
1503 readings_changed = true;
1504
1505 reset_cohorts_for_loop = true;
1506 }
1507 else if (rule->type == K_SPLITCOHORT) {
1508 index_ruleCohort_no.clear();
1509
1510 std::vector<std::pair<Cohort*, std::vector<TagList>>> cohorts;
1511
1512 auto theTags = ss_taglist.get();
1513 getTagList(*rule->maplist, theTags);
1514
1515 for (auto& tter : *theTags) {
1516 if (tter->type & T_VSTR) {
1517 VARSTRINGIFY(tter)do { while ((tter)->type & T_VARSTRING) { (tter) = generateVarstringTag
((tter)); } } while (0)
;
1518 }
1519 }
1520
1521 Tag* wf = nullptr;
1522 for (auto tter : *theTags) {
1523 if (tter->type & T_WORDFORM) {
1524 cohorts.resize(cohorts.size() + 1);
1525 cohorts.back().first = alloc_cohort(&current);
1526 cohorts.back().first->global_number = gWindow->cohort_counter++;
1527 wf = tter;
1528 VARSTRINGIFY(wf)do { while ((wf)->type & T_VARSTRING) { (wf) = generateVarstringTag
((wf)); } } while (0)
;
1529 cohorts.back().first->wordform = wf;
1530 continue;
1531 }
1532 if (!wf) {
1533 u_fprintfu_fprintf_72(ux_stderr, "Error: There must be a wordform before any other tags in SPLITCOHORT on line %u before input line %u.\n", rule->line, numLines);
1534 CG3Quit(1);
1535 }
1536 }
1537
1538 uint32_t rel_trg = DEP_NO_PARENT;
1539 std::vector<std::pair<uint32_t, uint32_t>> cohort_dep(cohorts.size());
1540 cohort_dep.front().second = DEP_NO_PARENT;
1541 cohort_dep.back().first = DEP_NO_PARENT;
1542 cohort_dep.back().second = UI32(cohort_dep.size() - 1);
1543 for (size_t i = 1; i < cohort_dep.size() - 1; ++i) {
1544 cohort_dep[i].second = UI32(i);
1545 }
1546
1547 size_t i = 0;
1548 std::vector<TagList>* readings = &cohorts.front().second;
1549 Tag* bf = nullptr;
1550 for (auto tter : *theTags) {
1551 if (tter->type & T_WORDFORM) {
1552 ++i;
1553 bf = nullptr;
1554 continue;
1555 }
1556 if (tter->type & T_BASEFORM) {
1557 readings = &cohorts[i - 1].second;
1558 readings->resize(readings->size() + 1);
1559 readings->back().push_back(cohorts[i - 1].first->wordform);
1560 bf = tter;
1561 }
1562 if (!bf) {
1563 u_fprintfu_fprintf_72(ux_stderr, "Error: There must be a baseform after the wordform in SPLITCOHORT on line %u before input line %u.\n", rule->line, numLines);
1564 CG3Quit(1);
1565 }
1566
1567 UChar dep_self[12] = {};
1568 UChar dep_parent[12] = {};
1569 if (u_sscanfu_sscanf_72(tter->tag.data(), "%[0-9cd]->%[0-9pm]", &dep_self, &dep_parent) == 2) {
1570 if (dep_self[0] == 'c' || dep_self[0] == 'd') {
1571 cohort_dep[i - 1].first = DEP_NO_PARENT;
1572 if (rel_trg == DEP_NO_PARENT) {
1573 rel_trg = UI32(i - 1);
1574 }
1575 }
1576 else if (u_sscanfu_sscanf_72(dep_self, "%i", &cohort_dep[i - 1].first) != 1) {
1577 u_fprintfu_fprintf_72(ux_stderr, "Error: SPLITCOHORT dependency mapping dep_self was not valid on line %u before input line %u.\n", rule->line, numLines);
1578 CG3Quit(1);
1579 }
1580 if (dep_parent[0] == 'p' || dep_parent[0] == 'm') {
1581 cohort_dep[i - 1].second = DEP_NO_PARENT;
1582 }
1583 else if (u_sscanfu_sscanf_72(dep_parent, "%i", &cohort_dep[i - 1].second) != 1) {
1584 u_fprintfu_fprintf_72(ux_stderr, "Error: SPLITCOHORT dependency mapping dep_parent was not valid on line %u before input line %u.\n", rule->line, numLines);
1585 CG3Quit(1);
1586 }
1587 continue;
1588 }
1589 if (tter->tag.size() == 3 && tter->tag[0] == 'R' && tter->tag[1] == ':' && tter->tag[2] == '*') {
1590 rel_trg = UI32(i - 1);
1591 continue;
1592 }
1593 readings->back().push_back(tter);
1594 }
1595
1596 if (rel_trg == DEP_NO_PARENT) {
1597 rel_trg = UI32(cohorts.size() - 1);
1598 }
1599
1600 for (size_t i = 0; i < cohorts.size(); ++i) {
1601 Cohort* cCohort = cohorts[i].first;
1602 readings = &cohorts[i].second;
1603
1604 for (auto tags : *readings) {
1605 Reading* cReading = alloc_reading(cCohort);
1606 ++numReadings;
1607 insert_if_exists(cReading->parent->possible_sets, grammar->sets_any);
1608 cReading->hit_by.push_back(rule->number);
1609 cReading->noprint = false;
1610 TagList mappings;
1611
1612 for (size_t i = 0; i < tags.size(); ++i) {
1613 if (tags[i]->hash == grammar->tag_any) {
1614 uint32Vector& nt = get_apply_to().cohort->readings.front()->tags_list;
1615 if (nt.size() <= 2) {
1616 continue;
1617 }
1618 tags.reserve(tags.size() + nt.size() - 2);
1619 tags[i] = grammar->single_tags[nt[2]];
1620 for (size_t j = 3, k = 1; j < nt.size(); ++j) {
1621 if (grammar->single_tags[nt[j]]->type & T_DEPENDENCY) {
1622 continue;
1623 }
1624 tags.insert(tags.begin() + i + k, grammar->single_tags[nt[j]]);
1625 ++k;
1626 }
1627 }
1628 }
1629
1630 for (auto tter : tags) {
1631 uint32_t hash = tter->hash;
1632 VARSTRINGIFY(tter)do { while ((tter)->type & T_VARSTRING) { (tter) = generateVarstringTag
((tter)); } } while (0)
;
1633 if (tter->type & T_MAPPING || tter->tag[0] == grammar->mapping_prefix) {
1634 mappings.push_back(tter);
1635 }
1636 else {
1637 hash = addTagToReading(*cReading, hash);
1638 }
1639 if (updateValidRules(rules, intersects, hash, *cReading)) {
1640 iter_rules = intersects.find(rule->number);
1641 iter_rules_end = intersects.end();
1642 }
1643 }
1644 if (!mappings.empty()) {
1645 splitMappings(mappings, *cCohort, *cReading);
1646 }
1647 cCohort->appendReading(cReading);
1648 }
1649
1650 if (cCohort->readings.empty()) {
1651 initEmptyCohort(*cCohort);
1652 }
1653
1654 current.parent->dep_window[cCohort->global_number] = cCohort;
1655 current.parent->cohort_map[cCohort->global_number] = cCohort;
1656
1657 current.cohorts.insert(current.cohorts.begin() + get_apply_to().cohort->local_number + i + 1, cCohort);
1658 current.all_cohorts.insert(std::find(current.all_cohorts.begin() + get_apply_to().cohort->local_number, current.all_cohorts.end(), get_apply_to().cohort) + i + 1, cCohort);
1659 }
1660
1661 // Move text from the to-be-deleted cohort to the last new cohort
1662 std::swap(cohorts.back().first->text, get_apply_to().cohort->text);
1663
1664 for (size_t i = 0; i < cohorts.size(); ++i) {
1665 Cohort* cCohort = cohorts[i].first;
1666
1667 if (cohort_dep[i].first == DEP_NO_PARENT) {
1668 while (!get_apply_to().cohort->dep_children.empty()) {
1669 uint32_t ch = get_apply_to().cohort->dep_children.back();
1670 attachParentChild(*cCohort, *current.parent->cohort_map[ch], true, true);
1671 get_apply_to().cohort->dep_children.erase(ch); // Just in case the attachment can't be made for some reason
1672 }
1673 }
1674
1675 if (cohort_dep[i].second == DEP_NO_PARENT) {
1676 if (current.parent->cohort_map.count(get_apply_to().cohort->dep_parent)) {
1677 attachParentChild(*current.parent->cohort_map[get_apply_to().cohort->dep_parent], *cCohort, true, true);
1678 }
1679 }
1680 else {
1681 attachParentChild(*current.parent->cohort_map[cohorts.front().first->global_number + cohort_dep[i].second - 1], *cCohort, true, true);
1682 }
1683
1684 // Re-attach all named relations to the dependency tail or R:* cohort
1685 if (rel_trg == i && (get_apply_to().cohort->type & CT_RELATED)) {
1686 cCohort->setRelated();
1687 cCohort->relations.swap(get_apply_to().cohort->relations);
1688
1689 std::pair<SingleWindow**, size_t> swss[3] = {
1690 std::make_pair(&gWindow->previous[0], gWindow->previous.size()),
1691 std::make_pair(&gWindow->current, static_cast<size_t>(1)),
1692 std::make_pair(&gWindow->next[0], gWindow->next.size()),
1693 };
1694 for (auto sws : swss) {
1695 for (size_t sw = 0; sw < sws.second; ++sw) {
1696 for (auto ch : sws.first[sw]->cohorts) {
1697 for (auto& rel : ch->relations) {
1698 if (rel.second.count(get_apply_to().cohort->global_number)) {
1699 rel.second.erase(get_apply_to().cohort->global_number);
1700 rel.second.insert(cCohort->global_number);
1701 }
1702 }
1703 }
1704 }
1705 }
1706 }
1707 }
1708
1709 // Remove the source cohort
1710 for (auto iter : get_apply_to().cohort->readings) {
1711 iter->hit_by.push_back(rule->number);
1712 iter->deleted = true;
1713 }
1714 get_apply_to().cohort->type |= CT_REMOVED;
1715 get_apply_to().cohort->detach();
1716 for (auto& cm : current.parent->cohort_map) {
1717 cm.second->dep_children.erase(get_apply_to().cohort->dep_self);
1718 }
1719 current.parent->cohort_map.erase(get_apply_to().cohort->global_number);
1720 current.cohorts.erase(current.cohorts.begin() + get_apply_to().cohort->local_number);
1721
1722 reindex();
1723 indexSingleWindow(current);
1724 readings_changed = true;
1725
1726 reset_cohorts_for_loop = true;
1727 }
1728 else if (rule->type == K_ADD || rule->type == K_MAP) {
1729 TRACE;
1730 auto state_hash = get_apply_to().subreading->hash;
1731 index_ruleCohort_no.clear();
1732 auto& reading = *(get_apply_to().subreading);
1733 reading.noprint = false;
1734 auto mappings = ss_taglist.get();
1735 auto theTags = ss_taglist.get();
1736 getTagList(*rule->maplist, theTags);
1737
1738 bool did_insert = false;
1739 if (rule->childset1) {
1740 bool found_spot = false;
1741 auto spot_tags = ss_taglist.get();
1742 getTagList(*grammar->sets_list[rule->childset1], spot_tags);
1743 FILL_TAG_LIST(spot_tags)do { Reading& reading = *get_apply_to().subreading; for (
auto it = (spot_tags)->begin(); it != (spot_tags)->end(
);) { if (reading.tags.find((*it)->hash) == reading.tags.end
()) { auto tt = *it; it = (spot_tags)->erase(it); if (tt->
type & T_SPECIAL) { if (context_stack.back().regexgrps ==
nullptr) { context_stack.back().regexgrps = &regexgrps_store
[used_regex]; } auto stag = doesTagMatchReading(reading, *tt,
false, true); if (stag) { (spot_tags)->insert(it, grammar
->single_tags.find(stag)->second); } } continue; } ++it
; } } while (0)
;
1744 auto it = reading.tags_list.begin();
1745 for (; it != reading.tags_list.end(); ++it) {
1746 bool found = true;
1747 auto tmp = it;
1748 for (auto tag : *spot_tags) {
1749 if (*tmp != tag->hash) {
1750 found = false;
1751 break;
1752 }
1753 ++tmp;
1754 }
1755 if (found) {
1756 found_spot = true;
1757 break;
1758 }
1759 }
1760 if (found_spot) {
1761 if (rule->flags & RF_AFTER) {
1762 std::advance(it, spot_tags->size());
1763 }
1764 if (it != reading.tags_list.end()) {
1765 insert_taglist_to_reading(it, *theTags, reading, mappings);
1766 did_insert = true;
1767 }
1768 }
1769 }
1770
1771 if (!did_insert) {
1772 APPEND_TAGLIST_TO_READING(*theTags, reading)do { for (auto tter : (*theTags)) { while (tter->type &
T_VARSTRING) { tter = generateVarstringTag(tter); } auto hash
= tter->hash; if (tter->type & T_MAPPING || tter->
tag[0] == grammar->mapping_prefix) { mappings->push_back
(tter); } else { hash = addTagToReading((reading), tter); } if
(updateValidRules(rules, intersects, hash, reading)) { iter_rules
= intersects.find(rule->number); iter_rules_end = intersects
.end(); } } } while (0)
;
1773 }
1774 if (!mappings->empty()) {
1775 splitMappings(mappings, *get_apply_to().cohort, reading, rule->type == K_MAP);
1776 }
1777 if (rule->type == K_MAP) {
1778 reading.mapped = true;
1779 }
1780 if (reading.hash != state_hash) {
1781 readings_changed = true;
1782 }
1783 }
1784 else if (rule->type == K_RESTORE) {
1785 bool did_restore = false;
1786 auto move_rs = [&](ReadingList& rl) {
1787 for (size_t i = 0; i < rl.size();) {
1788 if (doesSetMatchReading(*rl[i], rule->maplist->number)) {
1789 rl[i]->deleted = false;
1790 rl[i]->hit_by.push_back(rule->number);
1791 get_apply_to().cohort->readings.push_back(rl[i]);
1792 rl.erase(rl.begin() + i);
1793 did_restore = true;
1794 }
1795 else {
1796 ++i;
1797 }
1798 }
1799 };
1800
1801 if (rule->flags & RF_DELAYED) {
1802 move_rs(get_apply_to().cohort->delayed);
1803 }
1804 else if (rule->flags & RF_IGNORED) {
1805 move_rs(get_apply_to().cohort->ignored);
1806 }
1807 else {
1808 move_rs(get_apply_to().cohort->deleted);
1809 }
1810
1811 if (did_restore) {
1812 TRACE;
1813 }
1814 finish_reading_loop = false;
1815 }
1816 else if (rule->type == K_REPLACE) {
1817 auto state_hash = get_apply_to().subreading->hash;
1818 index_ruleCohort_no.clear();
1819 TRACE;
1820 get_apply_to().subreading->noprint = false;
1821 get_apply_to().subreading->tags_list.clear();
1822 get_apply_to().subreading->tags_list.push_back(get_apply_to().cohort->wordform->hash);
1823 get_apply_to().subreading->tags_list.push_back(get_apply_to().subreading->baseform);
1824 reflowReading(*get_apply_to().subreading);
1825 auto mappings = ss_taglist.get();
1826 auto theTags = ss_taglist.get();
1827 getTagList(*rule->maplist, theTags);
1828
1829 APPEND_TAGLIST_TO_READING(*theTags, *get_apply_to().subreading)do { for (auto tter : (*theTags)) { while (tter->type &
T_VARSTRING) { tter = generateVarstringTag(tter); } auto hash
= tter->hash; if (tter->type & T_MAPPING || tter->
tag[0] == grammar->mapping_prefix) { mappings->push_back
(tter); } else { hash = addTagToReading((*get_apply_to().subreading
), tter); } if (updateValidRules(rules, intersects, hash, *get_apply_to
().subreading)) { iter_rules = intersects.find(rule->number
); iter_rules_end = intersects.end(); } } } while (0)
;
1830
1831 if (!mappings->empty()) {
1832 splitMappings(mappings, *get_apply_to().cohort, *get_apply_to().subreading, true);
1833 }
1834 if (get_apply_to().subreading->hash != state_hash) {
1835 readings_changed = true;
1836 }
1837 }
1838 else if (rule->type == K_SUBSTITUTE) {
1839 // ToDo: Check whether this substitution will do nothing at all to the end result
1840 // ToDo: Not actually...instead, test whether any reading in the cohort already is the end result
1841
1842 auto state_hash = get_apply_to().subreading->hash;
1843 auto theTags = ss_taglist.get();
1844 getTagList(*rule->sublist, theTags);
1845
1846 // Modify the list of tags to remove to be the actual list of tags present, including matching regex and icase tags
1847 FILL_TAG_LIST(theTags)do { Reading& reading = *get_apply_to().subreading; for (
auto it = (theTags)->begin(); it != (theTags)->end();) {
if (reading.tags.find((*it)->hash) == reading.tags.end())
{ auto tt = *it; it = (theTags)->erase(it); if (tt->type
& T_SPECIAL) { if (context_stack.back().regexgrps == nullptr
) { context_stack.back().regexgrps = &regexgrps_store[used_regex
]; } auto stag = doesTagMatchReading(reading, *tt, false, true
); if (stag) { (theTags)->insert(it, grammar->single_tags
.find(stag)->second); } } continue; } ++it; } } while (0)
;
1848
1849 // Perform the tag removal, remembering the position of the final removed tag for use as insertion spot
1850 size_t tpos = std::numeric_limits<size_t>::max();
1851 bool plain = true;
1852 for (size_t i = 0; i < get_apply_to().subreading->tags_list.size();) {
1853 auto& remter = get_apply_to().subreading->tags_list[i];
1854
1855 if (plain && remter == (*theTags->begin())->hash) {
1856 if (get_apply_to().subreading->baseform == remter) {
1857 get_apply_to().subreading->baseform = 0;
1858 }
1859 remter = substtag;
1860 tpos = i;
1861 for (size_t j = 1; j < theTags->size() && i < get_apply_to().subreading->tags_list.size(); ++j, ++i) {
1862 auto& remter = get_apply_to().subreading->tags_list[i];
1863 auto tter = (*theTags)[j]->hash;
1864 if (remter != tter) {
1865 plain = false;
1866 break;
1867 }
1868 get_apply_to().subreading->tags_list.erase(get_apply_to().subreading->tags_list.begin() + i);
1869 get_apply_to().subreading->tags.erase(tter);
1870 if (get_apply_to().subreading->baseform == tter) {
1871 get_apply_to().subreading->baseform = 0;
1872 }
1873 }
1874 continue;
1875 }
1876
1877 for (auto tter : *theTags) {
1878 if (remter != tter->hash) {
1879 continue;
1880 }
1881 tpos = i;
1882 remter = substtag;
1883 get_apply_to().subreading->tags.erase(tter->hash);
1884 if (get_apply_to().subreading->baseform == tter->hash) {
1885 get_apply_to().subreading->baseform = 0;
1886 }
1887 }
1888
1889 ++i;
1890 }
1891
1892 // Should Substitute really do nothing if no tags were removed? 2013-10-21, Eckhard says this is expected behavior.
1893 if (tpos != std::numeric_limits<size_t>::max()) {
1894 if (!plain) {
1895 for (size_t i = 0; i < get_apply_to().subreading->tags_list.size() && i < tpos;) {
1896 if (get_apply_to().subreading->tags_list[i] == substtag) {
1897 get_apply_to().subreading->tags_list.erase(get_apply_to().subreading->tags_list.begin() + i);
1898 --tpos;
1899 }
1900 else {
1901 ++i;
1902 }
1903 }
1904 }
1905
1906 Tag* wf = nullptr;
1907 index_ruleCohort_no.clear();
1908 TRACE;
1909 get_apply_to().subreading->noprint = false;
1910 if (tpos >= get_apply_to().subreading->tags_list.size()) {
1911 tpos = get_apply_to().subreading->tags_list.size() - 1;
1912 }
1913 ++tpos;
1914 auto mappings = ss_taglist.get();
1915 auto theTags = ss_taglist.get();
1916 getTagList(*rule->maplist, theTags);
1917
1918 for (size_t i = 0; i < get_apply_to().subreading->tags_list.size();) {
1919 if (get_apply_to().subreading->tags_list[i] == substtag) {
1920 get_apply_to().subreading->tags_list.erase(get_apply_to().subreading->tags_list.begin() + i);
1921 tpos = i;
1922
1923 for (auto tag : *theTags) {
1924 if (tag->type & T_VARSTRING) {
1925 tag = generateVarstringTag(tag);
1926 }
1927 if (tag->hash == grammar->tag_any) {
1928 break;
1929 }
1930 if (tag->type & T_MAPPING || tag->tag[0] == grammar->mapping_prefix) {
1931 mappings->push_back(tag);
1932 }
1933 else {
1934 if (tag->type & T_WORDFORM) {
1935 wf = tag;
1936 }
1937 get_apply_to().subreading->tags_list.insert(get_apply_to().subreading->tags_list.begin() + tpos, tag->hash);
1938 ++tpos;
1939 }
1940 if (updateValidRules(rules, intersects, tag->hash, *get_apply_to().subreading)) {
1941 iter_rules = intersects.find(rule->number);
1942 iter_rules_end = intersects.end();
1943 }
1944 }
1945 }
1946 else {
1947 ++i;
1948 }
1949 }
1950 reflowReading(*get_apply_to().subreading);
1951
1952 if (!mappings->empty()) {
1953 splitMappings(mappings, *get_apply_to().cohort, *get_apply_to().subreading, true);
1954 }
1955 if (wf && wf != get_apply_to().subreading->parent->wordform) {
1956 for (auto r : get_apply_to().subreading->parent->readings) {
1957 delTagFromReading(*r, get_apply_to().subreading->parent->wordform);
1958 addTagToReading(*r, wf);
1959 }
1960 for (auto r : get_apply_to().subreading->parent->deleted) {
1961 delTagFromReading(*r, get_apply_to().subreading->parent->wordform);
1962 addTagToReading(*r, wf);
1963 }
1964 for (auto r : get_apply_to().subreading->parent->delayed) {
1965 delTagFromReading(*r, get_apply_to().subreading->parent->wordform);
1966 addTagToReading(*r, wf);
1967 }
1968 get_apply_to().subreading->parent->wordform = wf;
1969 for (auto r : grammar->wf_rules) {
1970 if (doesWordformsMatch(wf, r->wordform)) {
1971 current.rule_to_cohorts[r->number].insert(get_apply_to().cohort);
1972 intersects.insert(r->number);
1973 }
1974 else {
1975 current.rule_to_cohorts[r->number].erase(get_apply_to().cohort);
1976 }
1977 }
1978 updateValidRules(rules, intersects, wf->hash, *get_apply_to().subreading);
1979 iter_rules = intersects.find(rule->number);
1980 iter_rules_end = intersects.end();
1981 }
1982 }
1983 if (get_apply_to().subreading->hash != state_hash) {
1984 readings_changed = true;
1985 }
1986 }
1987 else if (rule->type == K_APPEND) {
1988 index_ruleCohort_no.clear();
1989 TRACE;
1990
1991 Tag* bf = nullptr;
1992 std::vector<TagList> readings;
1993 auto theTags = ss_taglist.get();
1994 getTagList(*rule->maplist, theTags);
1995
1996 for (auto& tter : *theTags) {
1997 if (tter->type & T_VSTR) {
1998 VARSTRINGIFY(tter)do { while ((tter)->type & T_VARSTRING) { (tter) = generateVarstringTag
((tter)); } } while (0)
;
1999 }
2000 }
2001
2002 for (auto tter : *theTags) {
2003 VARSTRINGIFY(tter)do { while ((tter)->type & T_VARSTRING) { (tter) = generateVarstringTag
((tter)); } } while (0)
;
2004 if (tter->type & T_BASEFORM) {
2005 bf = tter;
2006 readings.resize(readings.size() + 1);
2007 }
2008 if (bf == nullptr) {
2009 u_fprintfu_fprintf_72(ux_stderr, "Error: There must be a baseform before any other tags in APPEND on line %u.\n", rule->line);
2010 CG3Quit(1);
2011 }
2012 readings.back().push_back(tter);
2013 }
2014
2015 for (const auto& rit : readings) {
2016 Reading* cReading = alloc_reading(get_apply_to().cohort);
2017 ++numReadings;
2018 insert_if_exists(cReading->parent->possible_sets, grammar->sets_any);
2019 addTagToReading(*cReading, get_apply_to().cohort->wordform);
2020 cReading->hit_by.push_back(rule->number);
2021 cReading->noprint = false;
2022 TagList mappings;
2023 for (auto tter : rit) {
2024 uint32_t hash = tter->hash;
2025 VARSTRINGIFY(tter)do { while ((tter)->type & T_VARSTRING) { (tter) = generateVarstringTag
((tter)); } } while (0)
;
2026 if (tter->type & T_MAPPING || tter->tag[0] == grammar->mapping_prefix) {
2027 mappings.push_back(tter);
2028 }
2029 else {
2030 hash = addTagToReading(*cReading, tter);
2031 }
2032 if (updateValidRules(rules, intersects, hash, *cReading)) {
2033 iter_rules = intersects.find(rule->number);
2034 iter_rules_end = intersects.end();
2035 }
2036 }
2037 if (!mappings.empty()) {
2038 splitMappings(mappings, *get_apply_to().cohort, *cReading);
2039 }
2040 get_apply_to().cohort->appendReading(cReading);
2041 }
2042
2043 if (get_apply_to().cohort->readings.size() > 1) {
2044 foreach (rit, get_apply_to().cohort->readings)if (!(get_apply_to().cohort->readings).empty()) for (auto rit
= (get_apply_to().cohort->readings).begin(), rit_end = (get_apply_to
().cohort->readings).end(); rit != rit_end; ++rit)
{
2045 if ((*rit)->noprint) {
2046 free_reading(*rit);
2047 rit = get_apply_to().cohort->readings.erase(rit);
2048 rit_end = get_apply_to().cohort->readings.end();
2049 }
2050 }
2051 }
2052
2053 readings_changed = true;
2054 finish_reading_loop = false;
2055 }
2056 else if (rule->type == K_COPY) {
2057 // ToDo: Maybe just goto Substitute directly?
2058 Reading* cReading = get_apply_to().cohort->allocateAppendReading(*get_apply_to().reading);
2059 ++numReadings;
2060 index_ruleCohort_no.clear();
2061 TRACE;
2062 cReading->hit_by.push_back(rule->number);
2063 cReading->noprint = false;
2064
2065 if (rule->sublist) {
2066 auto excepts = ss_taglist.get();
2067 getTagList(*rule->sublist, excepts);
2068 FILL_TAG_LIST_RAW(excepts)do { Reading& reading = *get_apply_to().subreading; for (
auto& tt : *(excepts)) { if (tt->type & T_SPECIAL)
{ if (context_stack.back().regexgrps == nullptr) { context_stack
.back().regexgrps = &regexgrps_store[used_regex]; } auto stag
= doesTagMatchReading(reading, *tt, false, true); if (stag) {
tt = grammar->single_tags.find(stag)->second; } } } } while
(0)
;
2069 for (auto r = cReading; r; r = r->next) {
2070 for (auto tter : *excepts) {
2071 delTagFromReading(*r, tter);
2072 }
2073 }
2074 }
2075
2076 auto mappings = ss_taglist.get();
2077 auto theTags = ss_taglist.get();
2078 getTagList(*rule->maplist, theTags);
2079
2080 bool did_insert = false;
2081 if (rule->childset1) {
2082 auto spot_tags = ss_taglist.get();
2083 getTagList(*grammar->sets_list[rule->childset1], spot_tags);
2084 FILL_TAG_LIST(spot_tags)do { Reading& reading = *get_apply_to().subreading; for (
auto it = (spot_tags)->begin(); it != (spot_tags)->end(
);) { if (reading.tags.find((*it)->hash) == reading.tags.end
()) { auto tt = *it; it = (spot_tags)->erase(it); if (tt->
type & T_SPECIAL) { if (context_stack.back().regexgrps ==
nullptr) { context_stack.back().regexgrps = &regexgrps_store
[used_regex]; } auto stag = doesTagMatchReading(reading, *tt,
false, true); if (stag) { (spot_tags)->insert(it, grammar
->single_tags.find(stag)->second); } } continue; } ++it
; } } while (0)
;
2085 auto it = cReading->tags_list.begin();
2086 for (; it != cReading->tags_list.end(); ++it) {
2087 bool found = true;
2088 auto tmp = it;
2089 for (auto tag : *spot_tags) {
2090 if (*tmp != tag->hash) {
2091 found = false;
2092 break;
2093 }
2094 ++tmp;
2095 }
2096 if (found) {
2097 break;
2098 }
2099 }
2100 if (rule->flags & RF_AFTER) {
2101 std::advance(it, spot_tags->size());
2102 }
2103 if (it != cReading->tags_list.end()) {
2104 insert_taglist_to_reading(it, *theTags, *cReading, mappings);
2105 did_insert = true;
2106 }
2107 }
2108
2109 if (!did_insert) {
2110 APPEND_TAGLIST_TO_READING(*theTags, *cReading)do { for (auto tter : (*theTags)) { while (tter->type &
T_VARSTRING) { tter = generateVarstringTag(tter); } auto hash
= tter->hash; if (tter->type & T_MAPPING || tter->
tag[0] == grammar->mapping_prefix) { mappings->push_back
(tter); } else { hash = addTagToReading((*cReading), tter); }
if (updateValidRules(rules, intersects, hash, *cReading)) { iter_rules
= intersects.find(rule->number); iter_rules_end = intersects
.end(); } } } while (0)
;
2111 }
2112 if (!mappings->empty()) {
2113 splitMappings(mappings, *get_apply_to().cohort, *cReading, true);
2114 }
2115 readings_changed = true;
2116 reflowReading(*cReading);
2117 }
2118 else if (rule->type == K_MERGECOHORTS) {
2119 index_ruleCohort_no.clear();
2120
2121 CohortSet withs;
2122 Cohort* target = get_apply_to().cohort;
2123 withs.insert(target);
2124 Cohort* merge_at = target;
2125 for (auto it : rule->dep_tests) {
2126 auto& at = context_stack.back().attach_to;
2127 at.cohort = nullptr;
2128 at.reading = nullptr;
2129 at.subreading = nullptr;
2130 merge_with = nullptr;
2131 set_mark(target);
2132 dep_deep_seen.clear();
2133 tmpl_cntx.clear();
2134 Cohort* attach = nullptr;
2135 bool test_good = (runContextualTest(target->parent, target->local_number, it, &attach) && attach);
2136
2137 profileRuleContext(test_good, rule, it);
2138
2139 if (!test_good) {
2140 finish_reading_loop = false;
2141 return;
2142 }
2143 if (get_attach_to().cohort) {
2144 merge_at = get_attach_to().cohort;
2145 if (merge_with) {
2146 withs.insert(merge_with);
2147 }
2148 }
2149 else if (merge_with) {
2150 withs.insert(merge_with);
2151 }
2152 else {
2153 withs.insert(attach);
2154 }
2155 }
2156
2157 size_t spacesInAddedWf = 0;
2158 context_stack.back().target.cohort = add_cohort(merge_at, spacesInAddedWf);
2159
2160 for (auto c : withs) {
2161 size_t foundSpace = c->text.find_first_of(' ');
2162 while(spacesInAddedWf && foundSpace != std::string::npos) {
2163 c->text.erase(foundSpace, 1);
2164 foundSpace = c->text.find_first_of(' ');
2165 spacesInAddedWf--;
2166 }
2167 rem_cohort(c);
2168 }
2169
2170 // If the last cohort was removed or inserted after, add <<< to the new end
2171 if (current.cohorts.back()->readings.front()->tags.count(endtag) == 0) {
2172 for (auto r : current.cohorts[current.cohorts.size() - 2]->readings) {
2173 delTagFromReading(*r, endtag);
2174 }
2175 for (auto r : current.cohorts.back()->readings) {
2176 addTagToReading(*r, endtag);
2177 if (updateValidRules(rules, intersects, endtag, *r)) {
2178 iter_rules = intersects.find(rule->number);
2179 iter_rules_end = intersects.end();
2180 }
2181 }
2182 }
2183 indexSingleWindow(current);
2184 readings_changed = true;
2185
2186 reset_cohorts_for_loop = true;
2187 }
2188 else if (rule->type == K_COPYCOHORT) {
2189 Cohort* attach = nullptr;
2190 Cohort* cohort = context_stack.back().target.cohort;
2191 uint32_t c = cohort->local_number;
2192 dep_deep_seen.clear();
2193 tmpl_cntx.clear();
2194 context_stack.back().attach_to.cohort = nullptr;
2195 context_stack.back().attach_to.reading = nullptr;
2196 context_stack.back().attach_to.subreading = nullptr;
2197 if (runContextualTest(&current, c, rule->dep_target, &attach) && attach) {
2198 profileRuleContext(true, rule, rule->dep_target);
2199
2200 if (get_attach_to().cohort) {
2201 attach = get_attach_to().cohort;
2202 }
2203 context_target = attach;
2204 bool good = true;
2205 for (auto it : rule->dep_tests) {
2206 context_stack.back().mark = attach;
2207 dep_deep_seen.clear();
2208 tmpl_cntx.clear();
2209 bool test_good = (runContextualTest(attach->parent, attach->local_number, it) != nullptr);
2210
2211 profileRuleContext(test_good, rule, it);
2212
2213 if (!test_good) {
2214 good = test_good;
2215 break;
2216 }
2217 }
2218
2219 if (!good || cohort == attach || cohort->local_number == 0) {
2220 return;
2221 }
2222
2223 auto childset = rule->childset2;
2224 if (rule->flags & RF_REVERSE) {
2225 std::swap(cohort, attach);
2226 childset = rule->childset1;
2227 }
2228
2229 Cohort* cCohort = alloc_cohort(attach->parent);
2230 cCohort->global_number = gWindow->cohort_counter++;
2231 cCohort->wordform = cohort->wordform;
2232 insert_if_exists(cCohort->possible_sets, grammar->sets_any);
2233
2234 auto theTags = ss_taglist.get();
2235 getTagList(*rule->maplist, theTags);
2236
2237 for (auto& tter : *theTags) {
2238 if (tter->type & T_VSTR) {
2239 VARSTRINGIFY(tter)do { while ((tter)->type & T_VARSTRING) { (tter) = generateVarstringTag
((tter)); } } while (0)
;
2240 }
2241 }
2242
2243 auto excepts = ss_taglist.get();
2244 if (rule->sublist) {
2245 getTagList(*rule->sublist, excepts);
2246 FILL_TAG_LIST_RAW(excepts)do { Reading& reading = *get_apply_to().subreading; for (
auto& tt : *(excepts)) { if (tt->type & T_SPECIAL)
{ if (context_stack.back().regexgrps == nullptr) { context_stack
.back().regexgrps = &regexgrps_store[used_regex]; } auto stag
= doesTagMatchReading(reading, *tt, false, true); if (stag) {
tt = grammar->single_tags.find(stag)->second; } } } } while
(0)
;
2247 }
2248
2249 std::vector<Reading*> rs;
2250 for (auto r : cohort->readings) {
2251 rs.clear();
2252 for (; r; r = r->next) {
2253 auto cReading = alloc_reading(cCohort);
2254 ++numReadings;
2255 cReading->hit_by.push_back(rule->number);
2256 cReading->noprint = false;
2257 TagList mappings;
2258 for (auto hash : r->tags_list) {
2259 auto tter = grammar->single_tags[hash];
2260 if (tter->type & T_MAPPING || tter->tag[0] == grammar->mapping_prefix) {
2261 mappings.push_back(tter);
2262 }
2263 else {
2264 hash = addTagToReading(*cReading, hash);
2265 }
2266 if (updateValidRules(rules, intersects, hash, *cReading)) {
2267 iter_rules = intersects.find(rule->number);
2268 iter_rules_end = intersects.end();
2269 }
2270 }
2271 for (auto tter : *theTags) {
2272 auto hash = tter->hash;
2273 if (hash == grammar->tag_any) {
2274 continue;
2275 }
2276 if (tter->type & T_MAPPING || tter->tag[0] == grammar->mapping_prefix) {
2277 mappings.push_back(tter);
2278 }
2279 else {
2280 hash = addTagToReading(*cReading, hash);
2281 }
2282 if (updateValidRules(rules, intersects, hash, *cReading)) {
2283 iter_rules = intersects.find(rule->number);
2284 iter_rules_end = intersects.end();
2285 }
2286 }
2287 if (!mappings.empty()) {
2288 splitMappings(mappings, *cCohort, *cReading);
2289 }
2290 rs.push_back(cReading);
2291 }
2292 auto rn = rs.front();
2293 for (size_t j = 1; j < rs.size(); ++j) {
2294 rn->next = rs[j];
2295 rn = rn->next;
2296 }
2297 cCohort->appendReading(rs.front());
2298 }
2299
2300 if (cCohort->readings.empty()) {
2301 initEmptyCohort(*cCohort);
2302 if (trace) {
2303 auto r = cCohort->readings.front();
2304 r->hit_by.push_back(rule->number);
2305 r->noprint = false;
2306 }
2307 }
2308
2309 for (auto r : cCohort->readings) {
2310 for (; r; r = r->next) {
2311 for (auto tter : *excepts) {
2312 delTagFromReading(*r, tter);
2313 }
2314 }
2315 }
2316
2317 if (cohort->wread) {
2318 cCohort->wread = alloc_reading(cCohort);
2319 for (auto hash : cohort->wread->tags_list) {
2320 hash = addTagToReading(*cCohort->wread, hash);
2321 if (updateValidRules(rules, intersects, hash, *cCohort->wread)) {
2322 iter_rules = intersects.find(rule->number);
2323 iter_rules_end = intersects.end();
2324 }
2325 }
2326 }
2327
2328 current.parent->cohort_map[cCohort->global_number] = cCohort;
2329 current.parent->dep_window[cCohort->global_number] = cCohort;
2330
2331 CohortSet edges;
2332 collect_subtree(edges, attach, childset);
2333
2334 if (rule->flags & RF_BEFORE) {
2335 attach->parent->cohorts.insert(attach->parent->cohorts.begin() + edges.front()->local_number, cCohort);
2336 attach->parent->all_cohorts.insert(std::find(attach->parent->all_cohorts.begin() + edges.front()->local_number, attach->parent->all_cohorts.end(), edges.front()), cCohort);
2337 attachParentChild(*edges.front(), *cCohort);
2338 }
2339 else {
2340 attach->parent->cohorts.insert(attach->parent->cohorts.begin() + edges.back()->local_number + 1, cCohort);
2341 attach->parent->all_cohorts.insert(std::find(attach->parent->all_cohorts.begin() + edges.back()->local_number, attach->parent->all_cohorts.end(), edges.back()) + 1, cCohort);
2342 attachParentChild(*edges.back(), *cCohort);
2343 }
2344
2345 reindex(attach->parent);
2346 indexSingleWindow(*attach->parent);
2347 readings_changed = true;
2348 reset_cohorts_for_loop = true;
2349 }
2350 }
2351 else if (rule->type == K_SETPARENT || rule->type == K_SETCHILD || rule->type == K_ADDRELATION || rule->type == K_SETRELATION || rule->type == K_REMRELATION || rule->type == K_ADDRELATIONS || rule->type == K_SETRELATIONS || rule->type == K_REMRELATIONS) {
2352 auto dep_target_cb = [&]() -> bool {
2353 Cohort* target = context_stack.back().target.cohort;
2354 Cohort* attach = context_stack.back().attach_to.cohort;
2355 swapper<Cohort*> sw((rule->flags & RF_REVERSE) != 0, target, attach);
2356 if (rule->type == K_SETPARENT || rule->type == K_SETCHILD) {
2357 bool attached = false;
2358 if (rule->type == K_SETPARENT) {
2359 attached = attachParentChild(*attach, *target, (rule->flags & RF_ALLOWLOOP) != 0, (rule->flags & RF_ALLOWCROSS) != 0);
2360 }
2361 else {
2362 attached = attachParentChild(*target, *attach, (rule->flags & RF_ALLOWLOOP) != 0, (rule->flags & RF_ALLOWCROSS) != 0);
2363 }
2364 if (attached) {
2365 index_ruleCohort_no.clear();
2366 // force TRACE to use target
2367 Cohort* at_was = context_stack.back().attach_to.cohort;
2368 context_stack.back().attach_to.cohort = nullptr;
2369 TRACE;
2370 context_stack.back().attach_to.cohort = at_was;
2371 context_stack.back().target.subreading->noprint = false;
2372 has_dep = true;
2373 readings_changed = true;
2374 }
2375 return attached;
2376 }
2377 else if (rule->type == K_ADDRELATION || rule->type == K_SETRELATION || rule->type == K_REMRELATION) {
2378 bool rel_did_anything = false;
2379 auto theTags = ss_taglist.get();
2380 getTagList(*rule->maplist, theTags);
2381 for (auto tter : *theTags) {
2382 VARSTRINGIFY(tter)do { while ((tter)->type & T_VARSTRING) { (tter) = generateVarstringTag
((tter)); } } while (0)
;
2383 if (rule->type == K_ADDRELATION) {
2384 attach->setRelated();
2385 target->setRelated();
2386 rel_did_anything |= target->addRelation(tter->hash, attach->global_number);
2387 add_relation_rtag(target, tter, attach->global_number);
2388 }
2389 else if (rule->type == K_SETRELATION) {
2390 attach->setRelated();
2391 target->setRelated();
2392 rel_did_anything |= target->setRelation(tter->hash, attach->global_number);
2393 set_relation_rtag(target, tter, attach->global_number);
2394 }
2395 else {
2396 rel_did_anything |= target->remRelation(tter->hash, attach->global_number);
2397 rem_relation_rtag(target, tter, attach->global_number);
2398 }
2399 }
2400 if (rel_did_anything) {
2401 index_ruleCohort_no.clear();
2402 // force TRACE to use target
2403 Cohort* at_was = context_stack.back().attach_to.cohort;
2404 context_stack.back().attach_to.cohort = nullptr;
2405 TRACE;
2406 context_stack.back().attach_to.cohort = at_was;
2407 context_stack.back().target.subreading->noprint = false;
2408 readings_changed = true;
2409 }
2410 // don't scan onwards if failed
2411 return true;
2412 }
2413 else if (rule->type == K_ADDRELATIONS || rule->type == K_SETRELATIONS || rule->type == K_REMRELATIONS) {
2414 bool rel_did_anything = false;
2415
2416 auto sublist = ss_taglist.get();
2417 getTagList(*rule->sublist, sublist);
2418
2419 auto maplist = ss_taglist.get();
2420 getTagList(*rule->maplist, maplist);
2421
2422 for (auto tter : *maplist) {
2423 VARSTRINGIFY(tter)do { while ((tter)->type & T_VARSTRING) { (tter) = generateVarstringTag
((tter)); } } while (0)
;
2424 if (rule->type == K_ADDRELATIONS) {
2425 target->setRelated();
2426 rel_did_anything |= target->addRelation(tter->hash, attach->global_number);
2427 add_relation_rtag(target, tter, attach->global_number);
2428 }
2429 else if (rule->type == K_SETRELATIONS) {
2430 target->setRelated();
2431 rel_did_anything |= target->setRelation(tter->hash, attach->global_number);
2432 set_relation_rtag(target, tter, attach->global_number);
2433 }
2434 else {
2435 rel_did_anything |= target->remRelation(tter->hash, attach->global_number);
2436 rem_relation_rtag(target, tter, attach->global_number);
2437 }
2438 }
2439 for (auto tter : *sublist) {
2440 VARSTRINGIFY(tter)do { while ((tter)->type & T_VARSTRING) { (tter) = generateVarstringTag
((tter)); } } while (0)
;
2441 if (rule->type == K_ADDRELATIONS) {
2442 attach->setRelated();
2443 rel_did_anything |= attach->addRelation(tter->hash, target->global_number);
2444 add_relation_rtag(attach, tter, target->global_number);
2445 }
2446 else if (rule->type == K_SETRELATIONS) {
2447 attach->setRelated();
2448 rel_did_anything |= attach->setRelation(tter->hash, target->global_number);
2449 set_relation_rtag(attach, tter, target->global_number);
2450 }
2451 else {
2452 rel_did_anything |= attach->remRelation(tter->hash, target->global_number);
2453 rem_relation_rtag(attach, tter, target->global_number);
2454 }
2455 }
2456 if (rel_did_anything) {
2457 index_ruleCohort_no.clear();
2458 // force TRACE to use target
2459 Cohort* at_was = context_stack.back().attach_to.cohort;
2460 context_stack.back().attach_to.cohort = nullptr;
2461 TRACE;
2462 context_stack.back().attach_to.cohort = at_was;
2463 context_stack.back().target.subreading->noprint = false;
2464 readings_changed = true;
2465 }
2466 // don't scan onwards if failed
2467 return true;
2468 }
2469 return true;
2470 };
2471 int32_t orgoffset = rule->dep_target->offset;
2472 auto seen_targets = ss_u32sv.get();
2473
2474 ReadingSpec orgtarget = context_stack.back().target;
2475 while (true) {
2476 auto utags = ss_utags.get();
2477 auto usets = ss_usets.get();
2478 *utags = *context_stack.back().unif_tags;
2479 *usets = *context_stack.back().unif_sets;
2480
2481 Cohort* attach = nullptr;
2482 Cohort* target = context_stack.back().target.cohort;
2483 seen_targets->insert(target->global_number);
2484 dep_deep_seen.clear();
2485 tmpl_cntx.clear();
2486 context_stack.back().attach_to.cohort = nullptr;
2487 context_stack.back().attach_to.reading = nullptr;
2488 context_stack.back().attach_to.subreading = nullptr;
2489 seen_barrier = false;
2490 if (runContextualTest(target->parent, target->local_number, rule->dep_target, &attach) && attach) {
2491 profileRuleContext(true, rule, rule->dep_target);
2492
2493 bool break_after = seen_barrier || (rule->flags & RF_NEAREST);
2494 if (get_attach_to().cohort) {
2495 attach = get_attach_to().cohort;
2496 }
2497 context_target = attach;
2498 bool good = true;
2499 for (auto it : rule->dep_tests) {
2500 context_stack.back().mark = attach;
2501 dep_deep_seen.clear();
2502 tmpl_cntx.clear();
2503 bool test_good = (runContextualTest(attach->parent, attach->local_number, it) != nullptr);
2504
2505 profileRuleContext(test_good, rule, it);
2506
2507 if (!test_good) {
2508 good = test_good;
2509 break;
2510 }
2511 }
2512 if (!get_attach_to().cohort) {
2513 context_stack.back().attach_to.cohort = attach;
2514 }
2515 if (good) {
2516 ReadingSpec temp = context_stack.back().target;
2517 context_stack.back().target = orgtarget;
2518 bool attached = dep_target_cb();
2519 if (attached) {
2520 break;
2521 }
2522 else {
2523 context_stack.back().target = temp;
2524 }
2525 }
2526 if (break_after) {
2527 break;
2528 }
2529 if (seen_targets->count(attach->global_number)) {
2530 // We've found a cohort we have seen before...
2531 // We assume running the test again would result in the same, so don't bother.
2532 break;
2533 }
2534 // Did not successfully attach due to loop restrictions; look onwards from here
2535 context_stack.back().target = context_stack.back().attach_to;
2536 context_stack.back().unif_tags->swap(utags);
2537 context_stack.back().unif_sets->swap(usets);
2538 if (rule->dep_target->offset != 0) {
2539 // Temporarily set offset to +/- 1
2540 rule->dep_target->offset = ((rule->dep_target->offset < 0) ? -1 : 1);
2541 }
2542 }
2543 else {
2544 break;
2545 }
2546 }
2547 rule->dep_target->offset = orgoffset;
2548 finish_reading_loop = false;
2549 }
2550 else if (rule->type == K_MOVE_AFTER || rule->type == K_MOVE_BEFORE || rule->type == K_SWITCH) {
2551 // this is a per-cohort rule
2552 finish_reading_loop = false;
2553 // Calculate hash of current state to later compare whether this move/switch actually did anything
2554 uint32_t phash = 0;
2555 uint32_t chash = 0;
2556 for (const auto& c : current.cohorts) {
2557 phash = hash_value(c->global_number, phash);
2558 chash = hash_value(c->readings[0]->hash, chash);
2559 }
2560
2561 // ToDo: ** tests will not correctly work for MOVE/SWITCH; cannot move cohorts between windows
2562 Cohort* attach = nullptr;
2563 Cohort* cohort = context_stack.back().target.cohort;
2564 uint32_t c = cohort->local_number;
2565 dep_deep_seen.clear();
2566 tmpl_cntx.clear();
2567 context_stack.back().attach_to.cohort = nullptr;
2568 context_stack.back().attach_to.reading = nullptr;
2569 context_stack.back().attach_to.subreading = nullptr;
2570 if (runContextualTest(&current, c, rule->dep_target, &attach) && attach && cohort->parent == attach->parent) {
2571 profileRuleContext(true, rule, rule->dep_target);
2572
2573 if (get_attach_to().cohort) {
2574 attach = get_attach_to().cohort;
2575 }
2576 context_target = attach;
2577 bool good = true;
2578 for (auto it : rule->dep_tests) {
2579 context_stack.back().mark = attach;
2580 dep_deep_seen.clear();
2581 tmpl_cntx.clear();
2582 bool test_good = (runContextualTest(attach->parent, attach->local_number, it) != nullptr);
2583
2584 profileRuleContext(test_good, rule, it);
2585
2586 if (!test_good) {
2587 good = test_good;
2588 break;
2589 }
2590 }
2591
2592 if (!good || cohort == attach || cohort->local_number == 0) {
2593 return;
2594 }
2595
2596 swapper<Cohort*> sw((rule->flags & RF_REVERSE) != 0, attach, cohort);
2597 CohortSet cohorts;
2598
2599 if (rule->type == K_SWITCH) {
2600 if (attach->local_number == 0) {
2601 return;
2602 }
2603 current.cohorts[cohort->local_number] = attach;
2604 current.cohorts[attach->local_number] = cohort;
2605 cohorts.insert(attach);
2606 cohorts.insert(cohort);
2607 auto ac_c = std::find(current.all_cohorts.begin() + cohort->local_number, current.all_cohorts.end(), cohort);
2608 auto ac_a = std::find(current.all_cohorts.begin() + attach->local_number, current.all_cohorts.end(), attach);
2609 *ac_c = attach;
2610 *ac_a = cohort;
2611 }
2612 else {
2613 CohortSet edges;
2614 collect_subtree(edges, attach, rule->childset2);
2615 collect_subtree(cohorts, cohort, rule->childset1);
2616
2617 bool need_clean = false;
2618 for (auto iter : cohorts) {
2619 if (edges.count(iter)) {
2620 need_clean = true;
2621 break;
2622 }
2623 }
2624
2625 if (need_clean) {
2626 if (isChildOf(cohort, attach)) {
2627 edges.erase(cohorts.rbegin(), cohorts.rend());
2628 }
2629 else /* if (isChildOf(attach, cohort)) */ {
2630 cohorts.erase(edges.rbegin(), edges.rend());
2631 }
2632 }
2633 if (cohorts.empty() || edges.empty()) {
2634 finish_reading_loop = false;
2635 return;
2636 }
2637
2638 for (auto c : reversed(cohorts)) {
2639 current.cohorts.erase(current.cohorts.begin() + c->local_number);
2640 current.all_cohorts.erase(std::find(current.all_cohorts.begin() + c->local_number, current.all_cohorts.end(), c));
2641 }
2642
2643 foreach (iter, current.cohorts)if (!(current.cohorts).empty()) for (auto iter = (current.cohorts
).begin(), iter_end = (current.cohorts).end(); iter != iter_end
; ++iter)
{
2644 (*iter)->local_number = UI32(std::distance(current.cohorts.begin(), iter));
2645 }
2646
2647 for (auto iter : edges) {
2648 if (iter->parent != get_apply_to().cohort->parent) {
2649 u_fprintfu_fprintf_72(ux_stderr, "Error: Move/Switch on line %u tried to move across window boundaries.\n", rule->line);
2650 CG3Quit(1);
2651 }
2652 for (auto cohort : cohorts) {
2653 if (iter == cohort) {
2654 u_fprintfu_fprintf_72(ux_stderr, "Error: Move/Switch on line %u tried to move to a removed position.\n", rule->line);
2655 CG3Quit(1);
2656 }
2657 }
2658 }
2659
2660 uint32_t spot = 0;
2661 auto ac_spot = current.all_cohorts.begin();
2662 if (rule->type == K_MOVE_BEFORE) {
2663 spot = edges.front()->local_number;
2664 if (spot == 0) {
2665 spot = 1;
2666 }
2667 ac_spot = std::find(current.all_cohorts.begin() + edges.front()->local_number, current.all_cohorts.end(), edges.front());
2668 if ((*ac_spot)->local_number == 0) {
2669 ++ac_spot;
2670 }
2671 }
2672 else if (rule->type == K_MOVE_AFTER) {
2673 spot = edges.back()->local_number + 1;
2674 ac_spot = std::find(current.all_cohorts.begin() + edges.front()->local_number, current.all_cohorts.end(), edges.back());
2675 ++ac_spot;
2676 }
2677
2678 if (spot > current.cohorts.size()) {
2679 u_fprintfu_fprintf_72(ux_stderr, "Error: Move/Switch on line %u tried to move out of bounds.\n", rule->line);
2680 CG3Quit(1);
2681 }
2682
2683 for (auto c : reversed(cohorts)) {
2684 current.cohorts.insert(current.cohorts.begin() + spot, c);
2685 current.all_cohorts.insert(ac_spot, c);
2686 }
2687 }
2688 reindex();
2689
2690 // Compare whether this move/switch actually did anything
2691 uint32_t phash_n = 0;
2692 uint32_t chash_n = 0;
2693 for (const auto& c : current.cohorts) {
2694 phash_n = hash_value(c->global_number, phash_n);
2695 chash_n = hash_value(c->readings[0]->hash, chash_n);
2696 }
2697
2698 if (phash != phash_n || chash != chash_n) {
2699 if (++rule_hits[rule->number] > current.cohorts.size() * 100) {
2700 u_fprintfu_fprintf_72(ux_stderr, "Warning: Move/Switch endless loop detected for rule on line %u around input line %u - bailing out!\n", rule->line, get_apply_to().cohort->line_number);
2701 should_bail = true;
2702 finish_cohort_loop = false;
2703 return;
2704 }
2705
2706 for (auto c : cohorts) {
2707 for (auto iter : c->readings) {
2708 iter->hit_by.push_back(rule->number);
2709 }
2710 }
2711 readings_changed = true;
2712 sorter.do_sort = true;
2713 }
2714 }
2715 }
2716 else if (rule->type == K_WITH) {
2717 TRACE;
2718 bool any_readings_changed = false;
2719 readings_changed = false;
2720 in_nested = true;
2721 for (auto& sr : rule->sub_rules) {
2722 Rule* cur_was = current_rule;
2723 Rule* rule_was = rule;
2724 current_rule = sr;
2725 rule = sr;
2726 bool result = false;
2727 do {
2728 readings_changed = false;
2729 result = runSingleRule(current, *rule, reading_cb, cohort_cb);
2730 any_readings_changed = any_readings_changed || result || readings_changed;
2731 } while ((result || readings_changed) && (rule->flags & RF_REPEAT) != 0) ;
2732 current_rule = cur_was;
2733 rule = rule_was;
2734 }
2735 in_nested = false;
2736 readings_changed = any_readings_changed;
2737 finish_reading_loop = false;
2738 }
2739 else if (rule->type != K_REMCOHORT) {
2740 TRACE;
2741 }
2742 };
2743
2744 removed.resize(0);
2745 selected.resize(0);
2746 bool rv = runSingleRule(current, *rule, reading_cb, cohort_cb);
2747 if (rv || readings_changed) {
2748 if (!(rule->flags & RF_NOITERATE) && section_max_count != 1) {
2749 section_did_something = true;
2750 }
2751 rule_did_something = true;
2752 }
2753 if (should_bail) {
2754 goto bailout;
2755 }
2756 if (should_repeat) {
2757 goto repeat_rule;
2758 }
2759
2760 if (rule_did_something) {
2761 if (trace_rules.contains(rule->line)) {
2762 retval |= RV_TRACERULE;
2763 }
2764 }
2765 if (delimited) {
2766 break;
2767 }
2768 if (rule_did_something && (rule->flags & RF_REPEAT)) {
2769 index_ruleCohort_no.clear();
2770 goto repeat_rule;
2771 }
2772
2773 if (false) {
2774 bailout:
2775 rule_hits[rule->number] = 0;
2776 index_ruleCohort_no.clear();
2777 }
2778
2779 if (retval & RV_TRACERULE) {
2780 break;
2781 }
2782 }
2783
2784 if (section_did_something) {
2785 retval |= RV_SOMETHING;
2786 }
2787 if (delimited) {
2788 retval |= RV_DELIMITED;
2789 }
2790 return retval;
2791}
2792
2793uint32_t GrammarApplicator::runGrammarOnSingleWindow(SingleWindow& current) {
2794 if (!grammar->before_sections.empty() && !no_before_sections) {
2795 uint32_t rv = runRulesOnSingleWindow(current, runsections[-1]);
2796 if (rv & (RV_DELIMITED | RV_TRACERULE)) {
2797 return rv;
2798 }
2799 }
2800
2801 if (!grammar->rules.empty() && !no_sections) {
2802 std::map<uint32_t, uint32_t> counter;
2803 // Caveat: This may look as if it is not recursing previous sections, but those rules are preprocessed into the successive sections so they are actually run.
2804 auto iter = runsections.begin();
2805 auto iter_end = runsections.end();
2806 for (size_t pass = 0; iter != iter_end; ++pass) {
2807 if (iter->first < 0 || (section_max_count && counter[iter->first] >= section_max_count)) {
2808 ++iter;
2809 continue;
2810 }
2811 uint32_t rv = 0;
2812 if (debug_level > 0) {
2813 std::cerr << "Running section " << iter->first << " (rules " << *(iter->second.begin()) << " through " << *(--(iter->second.end())) << ") on window " << current.number << std::endl;
2814 }
2815 rv = runRulesOnSingleWindow(current, iter->second);
2816 ++counter[iter->first];
2817 if (rv & (RV_DELIMITED | RV_TRACERULE)) {
2818 return rv;
2819 }
2820 if (!(rv & RV_SOMETHING)) {
2821 ++iter;
2822 pass = 0;
2823 }
2824 if (pass >= 1000) {
2825 u_fprintfu_fprintf_72(ux_stderr, "Warning: Endless loop detected before input line %u. Window contents was:", numLines);
2826 UString tag;
2827 for (size_t i = 1; i < current.cohorts.size(); ++i) {
2828 Tag* t = current.cohorts[i]->wordform;
2829 tag.assign(t->tag.begin() + 2, t->tag.begin() + t->tag.size() - 2);
2830 u_fprintfu_fprintf_72(ux_stderr, " %S", tag.data());
2831 }
2832 u_fprintfu_fprintf_72(ux_stderr, "\n");
2833 u_fflushu_fflush_72(ux_stderr);
2834 break;
2835 }
2836 }
2837 }
2838
2839 if (!grammar->after_sections.empty() && !no_after_sections) {
2840 uint32_t rv = runRulesOnSingleWindow(current, runsections[-2]);
2841 if (rv & (RV_DELIMITED | RV_TRACERULE)) {
2842 return rv;
2843 }
2844 }
2845
2846 return 0;
2847}
2848
2849void GrammarApplicator::runGrammarOnWindow() {
2850 SingleWindow* current = gWindow->current;
2851 did_final_enclosure = false;
2852
2853 for (const auto& vit : current->variables_set) {
2854 variables[vit.first] = vit.second;
2855 }
2856 for (auto vit : current->variables_rem) {
2857 variables.erase(vit);
2858 }
2859 variables[mprefix_key] = mprefix_value;
2860
2861 if (has_dep) {
2862 reflowDependencyWindow();
2863 if (!input_eof && !gWindow->next.empty() && gWindow->next.back()->cohorts.size() > 1) {
2864 for (auto cohort : gWindow->next.back()->cohorts) {
2865 gWindow->dep_window[cohort->global_number] = cohort;
2866 }
2867 }
2868 }
2869 if (has_relations) {
2870 reflowRelationWindow();
2871 }
2872
2873 if (!grammar->parentheses.empty()) {
2874 label_scanParentheses:
2875 reverse_foreach (iter, current->cohorts)if (!(current->cohorts).empty()) for (auto iter = (current
->cohorts).rbegin(), iter_end = (current->cohorts).rend
(); iter != iter_end; ++iter)
{
2876 Cohort* c = *iter;
2877 if (c->is_pleft == 0) {
2878 continue;
2879 }
2880 auto p = grammar->parentheses.find(c->is_pleft);
2881 if (p != grammar->parentheses.end()) {
2882 auto right = iter.base();
2883 --right;
2884 --right;
2885 c = *right;
Value stored to 'c' is never read
2886 ++right;
2887 bool found = false;
2888 CohortVector encs;
2889 for (; right != current->cohorts.end(); ++right) {
2890 Cohort* s = *right;
2891 encs.push_back(s);
2892 if (s->is_pright == p->second) {
2893 found = true;
2894 break;
2895 }
2896 }
2897 if (found) {
2898 auto left = iter.base();
2899 --left;
2900 uint32_t lc = (*left)->local_number;
2901 ++right;
2902 for (; right != current->cohorts.end(); ++right) {
2903 *left = *right;
2904 (*left)->local_number = lc;
2905 ++lc;
2906 ++left;
2907 }
2908 current->cohorts.resize(current->cohorts.size() - encs.size());
2909 auto ec = std::find(current->all_cohorts.begin() + encs.front()->local_number, current->all_cohorts.end(), encs.front());
2910 --ec;
2911 do {
2912 ++ec;
2913 (*ec)->type |= CT_ENCLOSED;
2914 ++((*ec)->enclosed);
2915 } while (*ec != encs.back());
2916 current->has_enclosures = true;
2917 goto label_scanParentheses;
2918 }
2919 }
2920 }
2921 }
2922
2923 par_left_tag = 0;
2924 par_right_tag = 0;
2925 par_left_pos = 0;
2926 par_right_pos = 0;
2927 uint32_t pass = 0;
2928
2929label_runGrammarOnWindow_begin:
2930 while (!gWindow->previous.empty() && gWindow->previous.size() > num_windows) {
2931 SingleWindow* tmp = gWindow->previous.front();
2932 printSingleWindow(tmp, *ux_stdout);
2933 free_swindow(tmp);
2934 gWindow->previous.erase(gWindow->previous.begin());
2935 }
2936
2937 rule_hits.clear();
2938 index_ruleCohort_no.clear();
2939 current = gWindow->current;
2940 indexSingleWindow(*current);
2941 current->hit_external.clear();
2942 gWindow->rebuildCohortLinks(); // ToDo: Hack. This can be done better...
2943
2944 ++pass;
2945 if (pass > 1000) {
2946 u_fprintfu_fprintf_72(ux_stderr, "Warning: Endless loop detected before input line %u. Window contents was:", numLines);
2947 UString tag;
2948 for (size_t i = 1; i < current->cohorts.size(); ++i) {
2949 Tag* t = current->cohorts[i]->wordform;
2950 tag.assign(t->tag.begin() + 2, t->tag.begin() + t->tag.size() - 2);
2951 u_fprintfu_fprintf_72(ux_stderr, " %S", tag.data());
2952 }
2953 u_fprintfu_fprintf_72(ux_stderr, "\n");
2954 u_fflushu_fflush_72(ux_stderr);
2955 return;
2956 }
2957
2958 if (trace_encl) {
2959 uint32_t hitpass = std::numeric_limits<uint32_t>::max() - pass;
2960 for (auto& c : current->cohorts) {
2961 for (auto rit : c->readings) {
2962 rit->hit_by.push_back(hitpass);
2963 }
2964 }
2965 }
2966
2967 uint32_t rv = runGrammarOnSingleWindow(*current);
2968 if (rv & RV_DELIMITED) {
2969 goto label_runGrammarOnWindow_begin;
2970 }
2971
2972label_unpackEnclosures:
2973 if (current->has_enclosures) {
2974 size_t nc = current->all_cohorts.size();
2975 for (size_t i = 0; i < nc; ++i) {
2976 Cohort* c = current->all_cohorts[i];
2977 if (c->enclosed == 1) {
2978 size_t la = i;
2979 for (; la > 0; --la) {
2980 if (!(current->all_cohorts[la - 1]->type & (CT_ENCLOSED | CT_REMOVED | CT_IGNORED))) {
2981 --la;
2982 break;
2983 }
2984 }
2985 size_t ni = current->all_cohorts[la]->local_number;
2986
2987 size_t ra = i;
2988 size_t ne = 0;
2989 for (; ra < nc; ++ra) {
2990 if (!(current->all_cohorts[ra]->type & (CT_ENCLOSED | CT_REMOVED | CT_IGNORED))) {
2991 break;
2992 }
2993 --(current->all_cohorts[ra]->enclosed);
2994 if (current->all_cohorts[ra]->enclosed == 0) {
2995 current->all_cohorts[ra]->type &= ~CT_ENCLOSED;
2996 ++ne;
2997 }
2998 }
2999
3000 current->cohorts.resize(current->cohorts.size() + ne, nullptr);
3001 for (size_t j = current->cohorts.size() - 1; j > ni + ne; --j) {
3002 current->cohorts[j] = current->cohorts[j - ne];
3003 current->cohorts[j]->local_number = UI32(j);
3004 current->cohorts[j - ne] = nullptr;
3005 }
3006 for (size_t j = 0; i < ra; ++i) {
3007 if (current->all_cohorts[i]->enclosed == 0) {
3008 current->cohorts[ni + j + 1] = current->all_cohorts[i];
3009 current->cohorts[ni + j + 1]->local_number = UI32(ni + j + 1);
3010 current->cohorts[ni + j + 1]->parent = current;
3011 ++j;
3012 }
3013 }
3014 par_left_tag = current->all_cohorts[la + 1]->is_pleft;
3015 par_right_tag = current->all_cohorts[ra - 1]->is_pright;
3016 par_left_pos = UI32(ni + 1);
3017 par_right_pos = UI32(ni + ne);
3018 if (rv & RV_TRACERULE) {
3019 goto label_unpackEnclosures;
3020 }
3021 goto label_runGrammarOnWindow_begin;
3022 }
3023 }
3024 if (!did_final_enclosure) {
3025 par_left_tag = 0;
3026 par_right_tag = 0;
3027 par_left_pos = 0;
3028 par_right_pos = 0;
3029 did_final_enclosure = true;
3030 if (rv & RV_TRACERULE) {
3031 goto label_unpackEnclosures;
3032 }
3033 goto label_runGrammarOnWindow_begin;
3034 }
3035 }
3036
3037 bool should_reflow = false;
3038 for (size_t i = current->all_cohorts.size(); i > 0; --i) {
3039 auto cohort = current->all_cohorts[i - 1];
3040 if (cohort->type & CT_IGNORED) {
3041 for (auto ins = i; ins > 0; --ins) {
3042 if (!(current->all_cohorts[ins - 1]->type & (CT_REMOVED | CT_ENCLOSED | CT_IGNORED))) {
3043 current->cohorts.insert(current->cohorts.begin() + current->all_cohorts[ins - 1]->local_number + 1, cohort);
3044 cohort->type &= ~CT_IGNORED;
3045 current->parent->cohort_map.insert(std::make_pair(cohort->global_number, cohort));
3046 should_reflow = true;
3047 break;
3048 }
3049 }
3050 }
3051 }
3052 if (should_reflow) {
3053 for (size_t i = 0; i < current->cohorts.size(); ++i) {
3054 current->cohorts[i]->local_number = UI32(i);
3055 }
3056 reflowDependencyWindow();
3057 }
3058}
3059}
3060
3061// This helps the all_vislcg3.cpp profiling builds
3062#undef TRACE