Bug Summary

File:GrammarApplicator_reflow.cpp
Warning:line 801, column 2
Forming reference to null pointer

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name GrammarApplicator_reflow.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src -resource-dir /usr/lib/llvm-16/lib/clang/16 -D BOOST_NO_CXX98_FUNCTION_BASE=1 -D HAS_FS -D UNISTR_FROM_CHAR_EXPLICIT=explicit -D UNISTR_FROM_STRING_EXPLICIT=explicit -D _POSIX_C_SOURCE=200112 -D cg3_EXPORTS -I /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/include/posix -I /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/include -I /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src -I /usr/local/include -D NDEBUG -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -Wno-unused-result -std=c++2b -fdebug-compilation-dir=/tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src -ferror-limit 19 -fvisibility-inlines-hidden -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/cg3/scan-build/2024-09-11-161008-13503-1 -x c++ /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src/GrammarApplicator_reflow.cpp
1/*
2* Copyright (C) 2007-2024, GrammarSoft ApS
3* Developed by Tino Didriksen <mail@tinodidriksen.com>
4* Design by Eckhard Bick <eckhard.bick@mail.dk>, Tino Didriksen <mail@tinodidriksen.com>
5*
6* This program is free software: you can redistribute it and/or modify
7* it under the terms of the GNU General Public License as published by
8* the Free Software Foundation, either version 3 of the License, or
9* (at your option) any later version.
10*
11* This program is distributed in the hope that it will be useful,
12* but WITHOUT ANY WARRANTY; without even the implied warranty of
13* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14* GNU General Public License for more details.
15*
16* You should have received a copy of the GNU General Public License
17* along with this progam. If not, see <https://www.gnu.org/licenses/>.
18*/
19
20#include "GrammarApplicator.hpp"
21#include "Strings.hpp"
22#include "Tag.hpp"
23#include "Grammar.hpp"
24#include "Window.hpp"
25#include "SingleWindow.hpp"
26#include "Reading.hpp"
27
28namespace CG3 {
29
30Tag* GrammarApplicator::makeBaseFromWord(uint32_t tag) {
31 return makeBaseFromWord(grammar->single_tags.find(tag)->second);
32}
33
34Tag* GrammarApplicator::makeBaseFromWord(Tag* tag) {
35 const size_t len = tag->tag.size();
36 if (len < 5) {
37 return tag;
38 }
39 static thread_local UString n;
40 n.clear();
41 n.resize(len - 2);
42 n[0] = n[len - 3] = '"';
43 u_strncpyu_strncpy_72(&n[1], tag->tag.data() + 2, SI32(len - 4));
44 Tag* nt = addTag(n);
45 return nt;
46}
47
48bool GrammarApplicator::isChildOf(const Cohort* child, const Cohort* parent) {
49 bool retval = false;
50
51 if (parent->global_number == child->global_number) {
52 retval = true;
53 }
54 else if (parent->global_number == child->dep_parent) {
55 retval = true;
56 }
57 else {
58 size_t i = 0;
59 for (const Cohort* inner = child; i < 1000; ++i) {
60 if (inner->dep_parent == 0 || inner->dep_parent == DEP_NO_PARENT) {
61 retval = false;
62 break;
63 }
64 auto it = gWindow->cohort_map.find(inner->dep_parent);
65 if (it != gWindow->cohort_map.end()) {
66 inner = it->second;
67 }
68 else {
69 break;
70 }
71 if (inner->dep_parent == parent->global_number) {
72 retval = true;
73 break;
74 }
75 }
76 if (i == 1000) {
77 if (verbosity_level > 0) {
78 u_fprintfu_fprintf_72(
79 ux_stderr,
80 "Warning: While testing whether %u is a child of %u the counter exceeded 1000 indicating a loop higher up in the tree.\n",
81 child->global_number, parent->global_number);
82 }
83 }
84 }
85 return retval;
86}
87
88bool GrammarApplicator::wouldParentChildLoop(const Cohort* parent, const Cohort* child) {
89 bool retval = false;
90
91 if (parent->global_number == child->global_number) {
92 retval = true;
93 }
94 else if (parent->global_number == child->dep_parent) {
95 retval = false;
96 }
97 else if (parent->global_number == parent->dep_parent) {
98 retval = false;
99 }
100 else if (parent->dep_parent == child->global_number) {
101 retval = true;
102 }
103 else {
104 size_t i = 0;
105 for (const Cohort* inner = parent; i < 1000; ++i) {
106 if (inner->dep_parent == 0 || inner->dep_parent == DEP_NO_PARENT) {
107 retval = false;
108 break;
109 }
110 auto it = gWindow->cohort_map.find(inner->dep_parent);
111 if (it != gWindow->cohort_map.end()) {
112 inner = it->second;
113 }
114 else {
115 break;
116 }
117 if (inner->dep_parent == child->global_number) {
118 retval = true;
119 break;
120 }
121 }
122 if (i == 1000) {
123 if (verbosity_level > 0) {
124 u_fprintfu_fprintf_72(
125 ux_stderr,
126 "Warning: While testing whether %u and %u would loop the counter exceeded 1000 indicating a loop higher up in the tree.\n",
127 child->global_number, parent->global_number);
128 }
129 }
130 }
131 return retval;
132}
133
134bool GrammarApplicator::wouldParentChildCross(const Cohort* parent, const Cohort* child) {
135 uint32_t mn = std::min(parent->global_number, child->global_number);
136 uint32_t mx = std::max(parent->global_number, child->global_number);
137
138 for (uint32_t i = mn + 1; i < mx; ++i) {
139 auto it = gWindow->cohort_map.find(parent->dep_parent);
140 if (it != gWindow->cohort_map.end() && it->second->dep_parent != DEP_NO_PARENT) {
141 if (it->second->dep_parent < mn || it->second->dep_parent > mx) {
142 return true;
143 }
144 }
145 }
146
147 return false;
148}
149
150bool GrammarApplicator::attachParentChild(Cohort& parent, Cohort& child, bool allowloop, bool allowcrossing) {
151 parent.dep_self = parent.global_number;
152 child.dep_self = child.global_number;
153
154 if (!allowloop && dep_block_loops && wouldParentChildLoop(&parent, &child)) {
155 if (verbosity_level > 0) {
156 u_fprintfu_fprintf_72(
157 ux_stderr,
158 "Warning: Dependency between %u and %u would cause a loop. Will not attach them.\n",
159 child.global_number, parent.global_number);
160 }
161 return false;
162 }
163
164 if (!allowcrossing && dep_block_crossing && wouldParentChildCross(&parent, &child)) {
165 if (verbosity_level > 0) {
166 u_fprintfu_fprintf_72(
167 ux_stderr,
168 "Warning: Dependency between %u and %u would cause crossing branches. Will not attach them.\n",
169 child.global_number, parent.global_number);
170 }
171 return false;
172 }
173
174 if (child.dep_parent == DEP_NO_PARENT) {
175 child.dep_parent = child.dep_self;
176 }
177 auto it = gWindow->cohort_map.find(child.dep_parent);
178 if (it != gWindow->cohort_map.end()) {
179 it->second->remChild(child.dep_self);
180 }
181
182 child.dep_parent = parent.global_number;
183 parent.addChild(child.global_number);
184
185 parent.type |= CT_DEP_DONE;
186 child.type |= CT_DEP_DONE;
187
188 if (!dep_has_spanned && child.parent != parent.parent) {
189 u_fprintfu_fprintf_72(
190 ux_stderr,
191 "Info: Dependency between %u and %u spans the window boundaries. Enumeration will be global from here on.\n",
192 child.global_number, parent.global_number);
193 dep_has_spanned = true;
194 }
195 return true;
196}
197
198void GrammarApplicator::reflowDependencyWindow(uint32_t max) {
199 if (dep_delimit && !max && !input_eof && !gWindow->next.empty() && gWindow->next.back()->cohorts.size() > 1) {
200 max = gWindow->next.back()->cohorts[1]->global_number;
201 }
202
203 if (gWindow->dep_window.empty() || gWindow->dep_window.begin()->second->parent == 0) {
204 gWindow->dep_window[0] = gWindow->current->cohorts[0];
205 }
206 else if (gWindow->dep_window.find(0) == gWindow->dep_window.end()) {
207 // This has to be done in 2 steps or it will segfault on Linux for some reason...
208 // Turns out g++ evaluates left side of = first, and MSVC++ does right side first, so g++ accessed its own newly created [0] at .begin()
209 Cohort* tmp = gWindow->dep_window.begin()->second->parent->cohorts[0];
210 gWindow->dep_window[0] = tmp;
211 }
212 if (gWindow->cohort_map.empty()) {
213 gWindow->cohort_map[0] = gWindow->current->cohorts[0];
214 }
215 else if (gWindow->cohort_map.find(0) == gWindow->cohort_map.end()) {
216 Cohort* tmp = gWindow->current->cohorts[0];
217 Cohort* c = gWindow->cohort_map.begin()->second;
218 if (c->parent) {
219 tmp = c->parent->cohorts[0];
220 }
221 gWindow->cohort_map[0] = tmp;
222 }
223
224 for (auto begin = gWindow->dep_window.begin(); begin != gWindow->dep_window.end();) {
225 while (begin != gWindow->dep_window.end() && (begin->second->type & CT_DEP_DONE || !begin->second->dep_self)) {
226 ++begin;
227 }
228 gWindow->dep_map.clear();
229
230 auto end = begin;
231 for (; end != gWindow->dep_window.end(); ++end) {
232 Cohort* cohort = end->second;
233 if (cohort->type & CT_DEP_DONE) {
234 continue;
235 }
236 if (!cohort->dep_self) {
237 continue;
238 }
239 if (max && cohort->global_number >= max) {
240 break;
241 }
242 if (gWindow->dep_map.find(cohort->dep_self) != gWindow->dep_map.end()) {
243 break;
244 }
245 gWindow->dep_map[cohort->dep_self] = cohort->global_number;
246 cohort->dep_self = cohort->global_number;
247 }
248
249 if (gWindow->dep_map.empty()) {
250 break;
251 }
252
253 gWindow->dep_map[0] = 0;
254 for (; begin != end; ++begin) {
255 Cohort* cohort = begin->second;
256 if (max && cohort->global_number >= max) {
257 break;
258 }
259 if (cohort->dep_parent == DEP_NO_PARENT) {
260 continue;
261 }
262 if (cohort->dep_self == cohort->global_number) {
263 if (!(cohort->type & CT_DEP_DONE) && gWindow->dep_map.find(cohort->dep_parent) == gWindow->dep_map.end()) {
264 if (verbosity_level > 0) {
265 u_fprintfu_fprintf_72(
266 ux_stderr,
267 "Warning: Parent %u of dep %u in cohort %u of window %u does not exist - ignoring.\n",
268 cohort->dep_parent, cohort->dep_self, cohort->local_number, cohort->parent->number);
269 u_fflushu_fflush_72(ux_stderr);
270 }
271 cohort->dep_parent = DEP_NO_PARENT;
272 }
273 else {
274 if (!(cohort->type & CT_DEP_DONE)) {
275 auto dep_real = gWindow->dep_map.find(cohort->dep_parent)->second;
276 cohort->dep_parent = dep_real;
277 }
278 gWindow->cohort_map[0] = cohort->parent->cohorts[0];
279 auto tmp = gWindow->cohort_map.find(cohort->dep_parent);
280 if (tmp != gWindow->cohort_map.end()) {
281 tmp->second->addChild(cohort->dep_self);
282 }
283 cohort->type |= CT_DEP_DONE;
284 }
285 }
286 }
287 }
288
289 gWindow->dep_map.clear();
290 gWindow->dep_window.clear();
291}
292
293void GrammarApplicator::reflowRelationWindow(uint32_t max) {
294 if (!max && !input_eof && !gWindow->next.empty() && gWindow->next.back()->cohorts.size() > 1) {
295 max = gWindow->next.back()->cohorts[0]->global_number;
296 }
297
298 Cohort* cohort = gWindow->current->cohorts[1];
299 while (cohort->prev) {
300 cohort = cohort->prev;
301 }
302
303 for (; cohort; cohort = cohort->next) {
304 if (max && cohort->global_number >= max) {
305 break;
306 }
307
308 for (auto rel = cohort->relations_input.begin(); rel != cohort->relations_input.end();) {
309 auto newrel = ss_u32sv.get();
310
311 for (auto target : rel->second) {
312 auto it = gWindow->relation_map.find(target);
313 if (it != gWindow->relation_map.end()) {
314 cohort->relations[rel->first].insert(it->second);
315 }
316 else {
317 newrel->insert(target);
318 }
319 }
320
321 // Defer missing relations for later
322 if (newrel->empty()) {
323 rel = cohort->relations_input.erase(rel);
324 }
325 else {
326 rel->second = newrel;
327 ++rel;
328 }
329 }
330 }
331}
332
333void GrammarApplicator::reflowReading(Reading& reading) {
334 reading.tags.clear();
335 reading.tags_plain.clear();
336 reading.tags_textual.clear();
337 reading.tags_numerical.clear();
338 reading.tags_bloom.clear();
339 reading.tags_textual_bloom.clear();
340 reading.tags_plain_bloom.clear();
341 reading.mapping = nullptr;
342 reading.tags_string.clear();
343
344 insert_if_exists(reading.parent->possible_sets, grammar->sets_any);
345
346 Reading::tags_list_t tlist;
347 tlist.swap(reading.tags_list);
348
349 for (auto tter : tlist) {
350 addTagToReading(reading, tter, false);
351 }
352
353 reading.rehash();
354}
355
356Tag* GrammarApplicator::generateVarstringTag(const Tag* tag) {
357 static thread_local UnicodeString tmp;
358 tmp.remove();
359 tmp.append(tag->tag.data(), SI32(tag->tag.size()));
360 bool did_something = false;
361
362 // Convert %[UuLl] markers to control codes to avoid having combined %$1 accidentally match %L
363 constexpr UStringView raw[] = { STR_VSu_raw, STR_VSU_raw, STR_VSl_raw, STR_VSL_raw };
364 constexpr UStringView x01[] = { STR_VSu, STR_VSU, STR_VSl, STR_VSL };
365 for (size_t i = 0; i < 4; ++i) {
366 findAndReplace(tmp, raw[i].data(), x01[i].data());
367 }
368
369 // Replace unified sets with their matching tags
370 if (tag->vs_sets) {
371 for (size_t i = 0; i < tag->vs_sets->size(); ++i) {
372 auto tags = ss_taglist.get();
373 getTagList(*(*tag->vs_sets)[i], tags);
374 static thread_local UString rpl;
375 rpl.clear();
376 // If there are multiple tags, such as from CompositeTags, put _ between them
377 foreach (iter, *tags)if (!(*tags).empty()) for (auto iter = (*tags).begin(), iter_end
= (*tags).end(); iter != iter_end; ++iter)
{
378 rpl += (*iter)->tag;
379 if (std::distance(iter, iter_end) > 1) {
380 rpl += '_';
381 }
382 }
383 findAndReplace(tmp, (*tag->vs_names)[i].data(), rpl.data());
384 did_something = true;
385 }
386 }
387
388 // Replace $1-$9 with their respective match groups
389 constexpr UStringView grp[] = { STR_VS1, STR_VS2, STR_VS3, STR_VS4, STR_VS5, STR_VS6, STR_VS7, STR_VS8, STR_VS9 };
390 for (size_t i = 0; i < context_stack.back().regexgrp_ct && i < 9; ++i) {
391 findAndReplace(tmp, grp[i].data(), USV((*context_stack.back().regexgrps)[i]));
392 did_something = true;
393 }
394
395 // Handle %U %u %L %l markers.
396 bool found;
397 do {
398 found = false;
399 int32_t pos = -1, mpos = -1;
400 if ((pos = tmp.lastIndexOf(STR_VSu.data(), SI32(STR_VSu.size()), 0)) != -1) {
401 found = true;
402 mpos = std::max(mpos, pos);
403 }
404 if ((pos = tmp.lastIndexOf(STR_VSU.data(), SI32(STR_VSU.size()), mpos)) != -1) {
405 found = true;
406 mpos = std::max(mpos, pos);
407 }
408 if ((pos = tmp.lastIndexOf(STR_VSl.data(), SI32(STR_VSl.size()), mpos)) != -1) {
409 found = true;
410 mpos = std::max(mpos, pos);
411 }
412 if ((pos = tmp.lastIndexOf(STR_VSL.data(), SI32(STR_VSL.size()), mpos)) != -1) {
413 found = true;
414 mpos = std::max(mpos, pos);
415 }
416 if (found && mpos != -1) {
417 UChar mode = tmp[mpos + 1];
418 tmp.remove(mpos, 2);
419 if (mode == 'u') {
420 UnicodeString range(tmp, mpos, 1);
421 range.toUpper();
422 tmp.setCharAt(mpos, range[0]);
423 }
424 else if (mode == 'U') {
425 UnicodeString range(tmp, mpos);
426 range.toUpper();
427 tmp.truncate(mpos);
428 tmp.append(range);
429 }
430 else if (mode == 'l') {
431 UnicodeString range(tmp, mpos, 1);
432 range.toLower();
433 tmp.setCharAt(mpos, range[0]);
434 }
435 else if (mode == 'L') {
436 UnicodeString range(tmp, mpos);
437 range.toLower();
438 tmp.truncate(mpos);
439 tmp.append(range);
440 }
441 did_something = true;
442 }
443 } while (found);
444
445 if (tag->type & T_CASE_INSENSITIVE) {
446 tmp += 'i';
447 }
448 if (tag->type & T_REGEXP) {
449 tmp += 'r';
450 }
451
452 const UChar* nt = tmp.getTerminatedBuffer();
453 if (!did_something && nt == tag->tag) {
454 u_fprintfu_fprintf_72(ux_stderr, "Warning: Unable to generate from tag '%S'! Possibly missing KEEPORDER and/or capturing regex from grammar on line %u before input line %u.\n", tag->tag.data(), grammar->lines, numLines);
455 u_fflushu_fflush_72(ux_stderr);
456 }
457 return addTag(nt, true);
458}
459
460uint32_t GrammarApplicator::addTagToReading(Reading& reading, uint32_t utag, bool rehash) {
461 Tag* tag = grammar->single_tags.find(utag)->second;
462 return addTagToReading(reading, tag, rehash);
463}
464
465uint32_t GrammarApplicator::addTagToReading(Reading& reading, Tag* tag, bool rehash) {
466 if (tag->type & T_VARSTRING) {
467 tag = generateVarstringTag(tag);
468 }
469
470 auto it = grammar->sets_by_tag.find(tag->hash);
471 if (it != grammar->sets_by_tag.end()) {
472 reading.parent->possible_sets.resize(std::max(reading.parent->possible_sets.size(), it->second.size()));
473 reading.parent->possible_sets |= it->second;
474 }
475 reading.tags.insert(tag->hash);
476 reading.tags_list.push_back(tag->hash);
477 reading.tags_bloom.insert(tag->hash);
478 // ToDo: Remove for real ordered mode
479 if (ordered) {
480 if (!reading.tags_string.empty()) {
481 reading.tags_string += ' ';
482 }
483 reading.tags_string += tag->tag;
484 reading.tags_string_hash = hash_value(reading.tags_string);
485 }
486 if (grammar->parentheses.find(tag->hash) != grammar->parentheses.end()) {
487 reading.parent->is_pleft = tag->hash;
488 }
489 if (grammar->parentheses_reverse.find(tag->hash) != grammar->parentheses_reverse.end()) {
490 reading.parent->is_pright = tag->hash;
491 }
492
493 if (tag->type & T_MAPPING || tag->tag[0] == grammar->mapping_prefix) {
494 if (reading.mapping && reading.mapping != tag) {
495 u_fprintfu_fprintf_72(ux_stderr, "Error: addTagToReading() cannot add a mapping tag to a reading which already is mapped!\n");
496 CG3Quit(1);
497 }
498 reading.mapping = tag;
499 }
500 if (tag->type & (T_TEXTUAL | T_WORDFORM | T_BASEFORM)) {
501 reading.tags_textual.insert(tag->hash);
502 reading.tags_textual_bloom.insert(tag->hash);
503 }
504 if (tag->type & T_NUMERICAL) {
505 reading.tags_numerical[tag->hash] = tag;
506 reading.parent->type &= ~CT_NUM_CURRENT;
507 }
508 if (!reading.baseform && (tag->type & T_BASEFORM)) {
509 reading.baseform = tag->hash;
510 }
511 if (parse_dep && (tag->type & T_DEPENDENCY) && !(reading.parent->type & CT_DEP_DONE)) {
512 reading.parent->dep_self = tag->dep_self;
513 reading.parent->dep_parent = tag->dep_parent;
514 if (tag->dep_parent == tag->dep_self) {
515 reading.parent->dep_parent = DEP_NO_PARENT;
516 }
517 has_dep = true;
518 }
519 if (grammar->has_relations && (tag->type & T_RELATION)) {
520 if (tag->dep_parent && tag->comparison_hash) {
521 reading.parent->relations_input[tag->comparison_hash].insert(tag->dep_parent);
522 }
523 if (tag->dep_self) {
524 gWindow->relation_map[tag->dep_self] = reading.parent->global_number;
525 }
526 has_relations = true;
527 reading.parent->setRelated();
528 }
529 if (!(tag->type & T_SPECIAL)) {
530 reading.tags_plain.insert(tag->hash);
531 reading.tags_plain_bloom.insert(tag->hash);
532 }
533 if (rehash) {
534 reading.rehash();
535 }
536
537 if (grammar->has_bag_of_tags) {
538 Reading& bot = reading.parent->parent->bag_of_tags;
539 bot.tags.insert(tag->hash);
540 bot.tags_list.push_back(tag->hash);
541 bot.tags_bloom.insert(tag->hash);
542
543 if (tag->type & (T_TEXTUAL | T_WORDFORM | T_BASEFORM)) {
544 bot.tags_textual.insert(tag->hash);
545 bot.tags_textual_bloom.insert(tag->hash);
546 }
547 if (tag->type & T_NUMERICAL) {
548 bot.tags_numerical[tag->hash] = tag;
549 }
550 if (!reading.baseform && (tag->type & T_BASEFORM)) {
551 bot.baseform = tag->hash;
552 }
553 if (!(tag->type & T_SPECIAL)) {
554 bot.tags_plain.insert(tag->hash);
555 bot.tags_plain_bloom.insert(tag->hash);
556 }
557 if (rehash) {
558 bot.rehash();
559 }
560 }
561
562 return tag->hash;
563}
564
565void GrammarApplicator::delTagFromReading(Reading& reading, uint32_t utag) {
566 erase(reading.tags_list, utag);
567 reading.tags.erase(utag);
568 reading.tags_textual.erase(utag);
569 reading.tags_numerical.erase(utag);
570 reading.tags_plain.erase(utag);
571 if (reading.mapping && utag == reading.mapping->hash) {
572 reading.mapping = nullptr;
573 }
574 if (utag == reading.baseform) {
575 reading.baseform = 0;
576 }
577 reading.rehash();
578 reading.parent->type &= ~CT_NUM_CURRENT;
579}
580
581void GrammarApplicator::delTagFromReading(Reading& reading, Tag* tag) {
582 return delTagFromReading(reading, tag->hash);
583}
584
585bool GrammarApplicator::unmapReading(Reading& reading, const uint32_t rule) {
586 bool readings_changed = false;
587 if (reading.mapping) {
588 reading.noprint = false;
589 delTagFromReading(reading, reading.mapping->hash);
590 readings_changed = true;
591 }
592 if (reading.mapped) {
593 reading.mapped = false;
594 readings_changed = true;
595 }
596 if (readings_changed) {
597 reading.hit_by.push_back(rule);
598 }
599 return readings_changed;
600}
601
602void GrammarApplicator::splitMappings(TagList& mappings, Cohort& cohort, Reading& reading, bool mapped) {
603 for (auto it = mappings.begin(); it != mappings.end();) {
604 Tag*& tag = *it;
605 while (tag->type & T_VARSTRING) {
606 tag = generateVarstringTag(tag);
607 }
608 if (!(tag->type & T_MAPPING || tag->tag[0] == grammar->mapping_prefix)) {
609 addTagToReading(reading, tag);
610 it = mappings.erase(it);
611 }
612 else {
613 ++it;
614 }
615 }
616
617 if (reading.mapping) {
618 mappings.push_back(reading.mapping);
619 delTagFromReading(reading, reading.mapping->hash);
620 }
621
622 Tag* tag = mappings.back();
623 mappings.pop_back();
624 size_t i = mappings.size();
625 for (auto ttag : mappings) {
626 // To avoid duplicating needlessly many times, check for a similar reading in the cohort that's already got this mapping
627 bool found = false;
628 for (auto itr : cohort.readings) {
629 if (itr->hash_plain == reading.hash_plain && itr->mapping && itr->mapping->hash == ttag->hash) {
630 found = true;
631 break;
632 }
633 }
634 if (found) {
635 continue;
636 }
637 Reading* nr = alloc_reading(reading);
638 nr->mapped = mapped;
639 nr->number = UI32(reading.number - i--);
640 uint32_t mp = addTagToReading(*nr, ttag);
641 if (mp != ttag->hash) {
642 nr->mapping = grammar->single_tags.find(mp)->second;
643 }
644 else {
645 nr->mapping = ttag;
646 }
647 cohort.appendReading(nr);
648 numReadings++;
649 }
650
651 reading.mapped = mapped;
652 uint32_t mp = addTagToReading(reading, tag);
653 if (mp != tag->hash) {
654 reading.mapping = grammar->single_tags.find(mp)->second;
655 }
656 else {
657 reading.mapping = tag;
658 }
659}
660
661void GrammarApplicator::splitAllMappings(all_mappings_t& all_mappings, Cohort& cohort, bool mapped) {
662 if (all_mappings.empty()) {
663 return;
664 }
665 static thread_local ReadingList readings;
666 readings = cohort.readings;
667 for (auto reading : readings) {
668 auto iter = all_mappings.find(reading);
669 if (iter == all_mappings.end()) {
670 continue;
671 }
672 splitMappings(iter->second, cohort, *reading, mapped);
673 }
674 std::sort(cohort.readings.begin(), cohort.readings.end(), Reading::cmp_number);
675 if (!grammar->reopen_mappings.empty()) {
676 for (auto reading : cohort.readings) {
677 if (reading->mapping && grammar->reopen_mappings.count(reading->mapping->hash)) {
678 reading->mapped = false;
679 }
680 }
681 }
682 all_mappings.clear();
683}
684
685void GrammarApplicator::mergeReadings(ReadingList& readings) {
686 static thread_local bc::flat_map<uint32_t, std::pair<uint32_t, Reading*>> mapped;
687 mapped.clear();
688 mapped.reserve(readings.size());
689 static thread_local bc::flat_map<uint32_t, ReadingList> mlist;
690 mlist.clear();
691 mlist.reserve(readings.size());
692
693 for (auto r : readings) {
694 uint32_t hp = r->hash_plain, hplain = r->hash_plain;
695 if (ordered) {
696 hp = hplain = r->tags_string_hash;
697 }
698 uint32_t nm = 0;
699 if (trace) {
700 for (auto iter_hb : r->hit_by) {
701 hp = hash_value(iter_hb, hp);
702 }
703 }
704 if (r->mapping) {
705 ++nm;
706 }
707 Reading* sub = r->next;
708 while (sub) {
709 if (ordered) {
710 hp = hash_value(sub->tags_string_hash, hp);
711 hplain = hash_value(sub->tags_string_hash, hplain);
712 }
713 else {
714 hp = hash_value(sub->hash_plain, hp);
715 hplain = hash_value(sub->hash_plain, hplain);
716 }
717 if (trace) {
718 for (auto iter_hb : sub->hit_by) {
719 hp = hash_value(iter_hb, hp);
720 }
721 }
722 if (sub->mapping) {
723 ++nm;
724 }
725 sub = sub->next;
726 }
727 if (mapped.count(hplain)) {
728 if (mapped[hplain].first != 0 && nm == 0) {
729 r->deleted = true;
730 }
731 else if (mapped[hplain].first != nm && mapped[hplain].first == 0) {
732 mapped[hplain].second->deleted = true;
733 }
734 }
735 mapped[hplain] = std::make_pair(nm, r);
736 mlist[hp + nm].push_back(r);
737 }
738
739 if (mlist.size() == readings.size()) {
740 return;
741 }
742
743 readings.clear();
744 static thread_local std::vector<Reading*> order;
745 order.clear();
746
747 for (auto& miter : mlist) {
748 const ReadingList& clist = miter.second;
749 Reading* nr = alloc_reading(*(clist.front()));
750 if (nr->mapping) {
751 erase(nr->tags_list, nr->mapping->hash);
752 }
753 for (auto iter1 : clist) {
754 if (iter1->mapping && std::find(nr->tags_list.begin(), nr->tags_list.end(), iter1->mapping->hash) == nr->tags_list.end()) {
755 nr->tags_list.push_back(iter1->mapping->hash);
756 }
757 free_reading(iter1);
758 }
759 order.push_back(nr);
760 }
761
762 std::sort(order.begin(), order.end(), Reading::cmp_number);
763 readings.insert(readings.begin(), order.begin(), order.end());
764}
765
766void GrammarApplicator::mergeMappings(Cohort& cohort) {
767 mergeReadings(cohort.readings);
768 if (trace) {
769 mergeReadings(cohort.deleted);
770 mergeReadings(cohort.delayed);
771 }
772}
773
774Cohort* GrammarApplicator::delimitAt(SingleWindow& current, Cohort* cohort) {
775 SingleWindow* nwin = nullptr;
1
'nwin' initialized to a null pointer value
776 if (current.parent->current == &current) {
2
Assuming the condition is false
3
Taking false branch
777 nwin = current.parent->allocPushSingleWindow();
778 }
779 else {
780 foreach (iter, current.parent->next)if (!(current.parent->next).empty()) for (auto iter = (current
.parent->next).begin(), iter_end = (current.parent->next
).end(); iter != iter_end; ++iter)
{
4
Assuming the condition is false
5
Taking false branch
781 if (*iter == &current) {
782 nwin = current.parent->allocSingleWindow();
783 current.parent->next.insert(++iter, nwin);
784 break;
785 }
786 }
787 if (!nwin
5.1
'nwin' is null
) {
6
Taking true branch
788 foreach (iter, current.parent->previous)if (!(current.parent->previous).empty()) for (auto iter = (
current.parent->previous).begin(), iter_end = (current.parent
->previous).end(); iter != iter_end; ++iter)
{
7
Assuming the condition is false
8
Taking false branch
789 if (*iter == &current) {
790 nwin = current.parent->allocSingleWindow();
791 current.parent->previous.insert(iter, nwin);
792 break;
793 }
794 }
795 }
796 gWindow->rebuildSingleWindowLinks();
797 }
798
799 assert(nwin != 0)(static_cast<void> (0));
800
801 std::swap(current.flush_after, nwin->flush_after);
9
Forming reference to null pointer
802 std::swap(current.text_post, nwin->text_post);
803 nwin->has_enclosures = current.has_enclosures;
804
805 Cohort* cCohort = alloc_cohort(nwin);
806 cCohort->global_number = current.parent->cohort_counter++;
807 cCohort->wordform = tag_begin;
808
809 Reading* cReading = alloc_reading(cCohort);
810 cReading->baseform = begintag;
811 insert_if_exists(cReading->parent->possible_sets, grammar->sets_any);
812 addTagToReading(*cReading, begintag);
813
814 cCohort->appendReading(cReading);
815 nwin->appendCohort(cCohort);
816
817 auto lc = cohort->local_number;
818 auto nc = std::find(current.all_cohorts.begin() + lc, current.all_cohorts.end(), cohort);
819 ++nc;
820 auto from = nc;
821 for (; nc != current.all_cohorts.end(); ++nc) {
822 (*nc)->parent = nwin;
823 if ((*nc)->type & (CT_ENCLOSED | CT_REMOVED | CT_IGNORED)) {
824 nwin->all_cohorts.push_back(*nc);
825 }
826 else {
827 nwin->appendCohort(*nc);
828 }
829 }
830 current.cohorts.erase(current.cohorts.begin() + lc + 1, current.cohorts.end());
831 current.all_cohorts.erase(from, current.all_cohorts.end());
832
833 cohort = current.cohorts.back();
834 for (auto reading : cohort->readings) {
835 addTagToReading(*reading, endtag);
836 }
837 gWindow->rebuildCohortLinks();
838
839 return cohort;
840}
841
842void GrammarApplicator::reflowTextuals_Reading(Reading& r) {
843 if (r.next) {
844 reflowTextuals_Reading(*r.next);
845 }
846 for (auto it : r.tags) {
847 Tag* tag = grammar->single_tags.find(it)->second;
848 if (tag->type & T_TEXTUAL) {
849 r.tags_textual.insert(it);
850 r.tags_textual_bloom.insert(it);
851 }
852 }
853}
854
855void GrammarApplicator::reflowTextuals_Cohort(Cohort& c) {
856 for (auto it : c.readings) {
857 reflowTextuals_Reading(*it);
858 }
859 for (auto it : c.deleted) {
860 reflowTextuals_Reading(*it);
861 }
862 for (auto it : c.ignored) {
863 reflowTextuals_Reading(*it);
864 }
865 for (auto it : c.delayed) {
866 reflowTextuals_Reading(*it);
867 }
868}
869
870void GrammarApplicator::reflowTextuals_SingleWindow(SingleWindow& sw) {
871 for (auto it : sw.all_cohorts) {
872 reflowTextuals_Cohort(*it);
873 }
874}
875
876void GrammarApplicator::reflowTextuals() {
877 for (auto swit : gWindow->previous) {
878 reflowTextuals_SingleWindow(*swit);
879 }
880 reflowTextuals_SingleWindow(*gWindow->current);
881 for (auto swit : gWindow->next) {
882 reflowTextuals_SingleWindow(*swit);
883 }
884}
885}