clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name state.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/lttoolbox/lttoolbox-3.7.13+g626~47e4bf9e/lttoolbox -resource-dir /usr/lib/llvm-16/lib/clang/16 -D HAVE_DECL_FGETC_UNLOCKED -D HAVE_DECL_FMEMOPEN -D HAVE_DECL_FPUTC_UNLOCKED -D HAVE_DECL_FPUTS_UNLOCKED -D HAVE_DECL_FREAD_UNLOCKED -D HAVE_DECL_FWRITE_UNLOCKED -D HAVE_GETOPT_LONG -D LTTOOLBOX_EXPORTS -D PACKAGE_VERSION="3.7.13" -D _GNU_SOURCE -D _POSIX_C_SOURCE=200112 -D lttoolbox_EXPORTS -I /usr/include/libxml2 -I /usr/include/utf8cpp -I /tmp/build/lttoolbox/lttoolbox-3.7.13+g626~47e4bf9e -I /tmp/build/lttoolbox/lttoolbox-3.7.13+g626~47e4bf9e/lttoolbox -I /usr/local/include -D NDEBUG -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -Wno-unused-result -std=c++2b -fdebug-compilation-dir=/tmp/build/lttoolbox/lttoolbox-3.7.13+g626~47e4bf9e/lttoolbox -ferror-limit 19 -fvisibility-inlines-hidden -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/lttoolbox/scan-build/2024-09-11-154858-201037-1 -x c++ /tmp/build/lttoolbox/lttoolbox-3.7.13+g626~47e4bf9e/lttoolbox/state.cc
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | #include <lttoolbox/state.h> |
18 | #include <lttoolbox/string_utils.h> |
19 | |
20 | #include <cstring> |
21 | #include <climits> |
22 | #include <algorithm> |
23 | |
24 | |
25 | |
26 | |
27 | |
28 | State::State() |
29 | { |
30 | } |
31 | |
32 | State::~State() |
33 | { |
34 | destroy(); |
35 | } |
36 | |
37 | State::State(State const &s) |
38 | { |
39 | copy(s); |
40 | } |
41 | |
42 | State & |
43 | State::operator =(State const &s) |
44 | { |
45 | if(this != &s) |
46 | { |
47 | destroy(); |
48 | copy(s); |
49 | } |
50 | |
51 | return *this; |
52 | } |
53 | |
54 | void |
55 | State::destroy() |
56 | { |
57 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
58 | { |
59 | delete state[i].sequence; |
60 | } |
61 | |
62 | state.clear(); |
63 | } |
64 | |
65 | void |
66 | State::copy(State const &s) |
67 | { |
68 | |
69 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
70 | { |
71 | delete state[i].sequence; |
72 | } |
73 | |
74 | state = s.state; |
75 | |
76 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
77 | { |
78 | std::vector<std::pair<int, double>> *tmp = new std::vector<std::pair<int, double>>(); |
79 | *tmp = *(state[i].sequence); |
80 | state[i].sequence = tmp; |
81 | } |
82 | } |
83 | |
84 | size_t |
85 | State::size() const |
86 | { |
87 | return state.size(); |
88 | } |
89 | |
90 | void |
91 | State::init(Node *initial) |
92 | { |
93 | state.clear(); |
94 | state.push_back(TNodeState(initial, new std::vector<std::pair<int, double>>(), false)); |
95 | state[0].sequence->clear(); |
96 | epsilonClosure(); |
97 | } |
98 | |
99 | bool |
100 | State::apply_into(std::vector<TNodeState>* new_state, int const input, int index, bool dirty) |
101 | { |
102 | std::map<int, Dest>::const_iterator it; |
103 | it = state[index].where->transitions.find(input); |
104 | if(it != state[index].where->transitions.end()) |
105 | { |
106 | for(int j = 0; j != it->second.size; j++) |
107 | { |
108 | std::vector<std::pair<int, double>> *new_v = new std::vector<std::pair<int, double>>(); |
109 | *new_v = *(state[index].sequence); |
110 | if(it->first != 0) |
111 | { |
112 | new_v->push_back({it->second.out_tag[j], it->second.out_weight[j]}); |
113 | } |
114 | new_state->push_back(TNodeState(it->second.dest[j], new_v, state[index].dirty||dirty)); |
115 | } |
116 | return true; |
117 | } |
118 | return false; |
119 | } |
120 | |
121 | bool |
122 | State::apply_into_override(std::vector<TNodeState>* new_state, int const input, int const old_sym, int const new_sym, int index, bool dirty) |
123 | { |
124 | std::map<int, Dest>::const_iterator it; |
125 | it = state[index].where->transitions.find(input); |
126 | if(it != state[index].where->transitions.end()) |
127 | { |
128 | for(int j = 0; j != it->second.size; j++) |
129 | { |
130 | std::vector<std::pair<int, double>> *new_v = new std::vector<std::pair<int, double>>(); |
131 | *new_v = *(state[index].sequence); |
132 | if(it->first != 0) |
133 | { |
134 | if(it->second.out_tag[j] == old_sym) |
135 | { |
136 | new_v->push_back({new_sym, it->second.out_weight[j]}); |
137 | } |
138 | else |
139 | { |
140 | new_v->push_back({it->second.out_tag[j], it->second.out_weight[j]}); |
141 | } |
142 | } |
143 | new_state->push_back(TNodeState(it->second.dest[j], new_v, state[index].dirty||dirty)); |
144 | } |
145 | return true; |
146 | } |
147 | return false; |
148 | } |
149 | |
150 | void |
151 | State::apply(int const input) |
152 | { |
153 | if(input == 0) |
154 | { |
155 | destroy(); |
156 | return; |
157 | } |
158 | |
159 | std::vector<TNodeState> new_state; |
160 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
161 | { |
162 | apply_into(&new_state, input, i, false); |
163 | delete state[i].sequence; |
164 | } |
165 | |
166 | state = new_state; |
167 | } |
168 | |
169 | void |
170 | State::apply_override(int const input, int const old_sym, int const new_sym) |
171 | { |
172 | if(input == 0 || old_sym == 0) |
173 | { |
174 | destroy(); |
175 | return; |
176 | } |
177 | |
178 | std::vector<TNodeState> new_state; |
179 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
180 | { |
181 | apply_into_override(&new_state, input, old_sym, new_sym, i, false); |
182 | apply_into_override(&new_state, old_sym, old_sym, new_sym, i, true); |
183 | delete state[i].sequence; |
184 | } |
185 | |
186 | state = new_state; |
187 | } |
188 | |
189 | void |
190 | State::apply_override(int const input, int const alt, int const old_sym, int const new_sym) |
191 | { |
192 | if(input == alt) |
193 | { |
194 | apply_override(input, old_sym, new_sym); |
195 | return; |
196 | } |
197 | |
198 | if(input == 0 || old_sym == 0) |
199 | { |
200 | destroy(); |
201 | return; |
202 | } |
203 | |
204 | std::vector<TNodeState> new_state; |
205 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
206 | { |
207 | apply_into_override(&new_state, input, old_sym, new_sym, i, false); |
208 | apply_into_override(&new_state, alt, old_sym, new_sym, i, true); |
209 | apply_into_override(&new_state, old_sym, old_sym, new_sym, i, true); |
210 | delete state[i].sequence; |
211 | } |
212 | |
213 | state = new_state; |
214 | } |
215 | |
216 | void |
217 | State::apply(int const input, int const alt) |
218 | { |
219 | if(input == 0 || alt == 0) |
220 | { |
221 | destroy(); |
222 | return; |
223 | } |
224 | |
225 | std::vector<TNodeState> new_state; |
226 | if(input == alt) |
227 | { |
228 | apply(input); |
229 | return; |
230 | } |
231 | |
232 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
233 | { |
234 | apply_into(&new_state, input, i, false); |
235 | apply_into(&new_state, alt, i, true); |
236 | delete state[i].sequence; |
237 | } |
238 | |
239 | state = new_state; |
240 | } |
241 | |
242 | void |
243 | State::apply_careful(int const input, int const alt) |
244 | { |
245 | if(input == 0 || alt == 0) |
246 | { |
247 | destroy(); |
248 | return; |
249 | } |
250 | |
251 | std::vector<TNodeState> new_state; |
252 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
253 | { |
254 | if(!apply_into(&new_state, input, i, false)) |
255 | { |
256 | apply_into(&new_state, alt, i, true); |
257 | } |
258 | delete state[i].sequence; |
259 | } |
260 | |
261 | state = new_state; |
262 | } |
263 | |
264 | void |
265 | State::epsilonClosure() |
266 | { |
267 | for(size_t i = 0; i != state.size(); i++) |
268 | { |
269 | auto it2 = state[i].where->transitions.find(0); |
270 | if(it2 != state[i].where->transitions.end()) |
271 | { |
272 | for(int j = 0 ; j != it2->second.size; j++) |
273 | { |
274 | std::vector<std::pair<int, double>> *tmp = new std::vector<std::pair<int, double>>(); |
275 | *tmp = *(state[i].sequence); |
276 | if(it2->second.out_tag[j] != 0) |
277 | { |
278 | tmp->push_back({it2->second.out_tag[j], it2->second.out_weight[j]}); |
279 | } |
280 | state.push_back(TNodeState(it2->second.dest[j], tmp, state[i].dirty)); |
281 | } |
282 | } |
283 | } |
284 | } |
285 | |
286 | void |
287 | State::apply(int const input, int const alt1, int const alt2) |
288 | { |
289 | std::vector<TNodeState> new_state; |
290 | if(input == 0 || alt1 == 0 || alt2 == 0) |
291 | { |
292 | state = new_state; |
293 | return; |
294 | } |
295 | |
296 | if(input == alt1) |
297 | { |
298 | apply(input, alt2); |
299 | return; |
300 | } |
301 | else if(input == alt2) |
302 | { |
303 | apply(input, alt1); |
304 | return; |
305 | } |
306 | |
307 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
308 | { |
309 | apply_into(&new_state, input, i, false); |
310 | apply_into(&new_state, alt1, i, true); |
311 | apply_into(&new_state, alt2, i, true); |
312 | delete state[i].sequence; |
313 | } |
314 | |
315 | state = new_state; |
316 | } |
317 | |
318 | void |
319 | State::apply(int const input, std::set<int> const alts) |
320 | { |
321 | std::vector<TNodeState> new_state; |
322 | bool has_null = false; |
323 | for(auto sit = alts.begin(); sit != alts.end(); sit++) |
324 | { |
325 | if(*sit == 0) |
326 | { |
327 | has_null = true; |
328 | } |
329 | } |
330 | if(input == 0 || has_null) |
331 | { |
332 | state = new_state; |
333 | return; |
334 | } |
335 | |
336 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
337 | { |
338 | apply_into(&new_state, input, i, false); |
339 | for(auto sit = alts.begin(); sit != alts.end(); sit++) |
340 | { |
341 | if(*sit == input) continue; |
342 | apply_into(&new_state, *sit, i, true); |
343 | } |
344 | |
345 | delete state[i].sequence; |
346 | } |
347 | |
348 | state = new_state; |
349 | } |
350 | |
351 | void |
352 | State::step(int const input) |
353 | { |
354 | apply(input); |
355 | epsilonClosure(); |
356 | } |
357 | |
358 | void |
359 | State::step(int const input, int const alt) |
360 | { |
361 | apply(input, alt); |
362 | epsilonClosure(); |
363 | } |
364 | |
365 | void |
366 | State::step_override(int const input, int const old_sym, int const new_sym) |
367 | { |
368 | apply_override(input, old_sym, new_sym); |
369 | epsilonClosure(); |
370 | } |
371 | |
372 | void |
373 | State::step_override(int const input, int const alt, int const old_sym, int const new_sym) |
374 | { |
375 | apply_override(input, alt, old_sym, new_sym); |
376 | epsilonClosure(); |
377 | } |
378 | |
379 | void |
380 | State::step_careful(int const input, int const alt) |
381 | { |
382 | apply_careful(input, alt); |
383 | epsilonClosure(); |
384 | } |
385 | |
386 | void |
387 | State::step(int const input, int const alt1, int const alt2) |
388 | { |
389 | apply(input, alt1, alt2); |
390 | epsilonClosure(); |
391 | } |
392 | |
393 | void |
394 | State::step(int const input, std::set<int> const alts) |
395 | { |
396 | apply(input, alts); |
397 | epsilonClosure(); |
398 | } |
399 | |
400 | void |
401 | State::step_case(UChar32 val, UChar32 val2, bool caseSensitive) |
402 | { |
403 | if (!u_isupper(val) || caseSensitive) { |
404 | step(val, val2); |
405 | } else if(val != u_tolower(val)) { |
406 | step(val, u_tolower(val), val2); |
407 | } else { |
408 | step(val, val2); |
409 | } |
410 | } |
411 | |
412 | |
413 | void |
414 | State::step_case(UChar32 val, bool caseSensitive) |
415 | { |
416 | if (!u_isupper(val) || caseSensitive) { |
417 | step(val); |
418 | } else { |
419 | step(val, u_tolower(val)); |
420 | } |
421 | } |
422 | |
423 | |
424 | void |
425 | State::step_case_override(UChar32 val, bool caseSensitive) |
426 | { |
427 | if (!u_isupper(val) || caseSensitive) { |
428 | step(val); |
429 | } else { |
430 | step_override(val, u_tolower(val), u_tolower(val), val); |
431 | } |
432 | } |
433 | |
434 | void |
435 | State::step_optional(UChar32 val) |
436 | { |
437 | if (val == 0) return; |
438 | std::vector<TNodeState> new_state; |
439 | for (size_t i = 0; i < state.size(); i++) { |
440 | apply_into(&new_state, val, i, false); |
441 | } |
442 | new_state.swap(state); |
443 | epsilonClosure(); |
444 | new_state.swap(state); |
445 | state.insert(state.end(), new_state.begin(), new_state.end()); |
446 | } |
447 | |
448 | bool |
449 | State::isFinal(std::map<Node *, double> const &finals) const |
450 | { |
451 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
452 | { |
453 | if(finals.find(state[i].where) != finals.end()) |
454 | { |
455 | return true; |
456 | } |
457 | } |
458 | |
459 | return false; |
460 | } |
461 | |
462 | |
463 | std::vector<std::pair< UString, double >> |
464 | State::NFinals(std::vector<std::pair<UString, double>> lf, int maxAnalyses, int maxWeightClasses) const |
465 | { |
466 | std::vector<std::pair<UString, double>> result; |
467 | |
468 | sort(lf.begin(), lf.end(), sort_weights<UString, double>()); |
469 | |
470 | for(auto it = lf.begin(); it != lf.end(); it++) |
471 | { |
472 | double last_weight = 0.0000; |
473 | if(maxAnalyses > 0 && maxWeightClasses > 0) |
474 | { |
475 | result.push_back({it->first, it->second}); |
476 | maxAnalyses--; |
477 | if(last_weight!=it->second) |
478 | { |
479 | maxWeightClasses--; |
480 | } |
481 | } |
482 | else break; |
483 | } |
484 | return result; |
485 | } |
486 | |
487 | |
488 | UString |
489 | State::filterFinals(std::map<Node *, double> const &finals, |
490 | Alphabet const &alphabet, |
491 | std::set<UChar32> const &escaped_chars, |
492 | bool display_weights, int max_analyses, int max_weight_classes, |
493 | bool uppercase, bool firstupper, int firstchar) const |
494 | { |
495 | std::vector<std::pair< UString, double >> response; |
496 | |
497 | UString result; |
498 | double cost = 0.0000; |
499 | |
500 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
501 | { |
502 | if(finals.find(state[i].where) != finals.end()) |
503 | { |
504 | if(state[i].dirty) |
505 | { |
506 | result.clear(); |
507 | cost = 0.0000; |
508 | unsigned int const first_char = result.size() + firstchar; |
509 | for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++) |
510 | { |
511 | if(escaped_chars.find(((*(state[i].sequence))[j]).first) != escaped_chars.end()) |
512 | { |
513 | result += '\\'; |
514 | } |
515 | alphabet.getSymbol(result, ((*(state[i].sequence))[j]).first, uppercase); |
516 | cost += ((*(state[i].sequence))[j]).second; |
517 | } |
518 | if(firstupper) |
519 | { |
520 | if(result[first_char] == '~') |
521 | { |
522 | |
523 | result[first_char+1] = u_toupper(result[first_char+1]); |
524 | } |
525 | else |
526 | { |
527 | result[first_char] = u_toupper(result[first_char]); |
528 | } |
529 | } |
530 | } |
531 | else |
532 | { |
533 | result.clear(); |
534 | cost = 0.0000; |
535 | for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++) |
536 | { |
537 | if(escaped_chars.find(((*(state[i].sequence))[j]).first) != escaped_chars.end()) |
538 | { |
539 | result += '\\'; |
540 | } |
541 | alphabet.getSymbol(result, ((*(state[i].sequence))[j]).first); |
542 | cost += ((*(state[i].sequence))[j]).second; |
543 | } |
544 | } |
545 | |
546 | |
547 | cost += (*(finals.find(state[i].where))).second; |
548 | response.push_back({result, cost}); |
549 | } |
550 | } |
551 | |
552 | response = NFinals(response, max_analyses, max_weight_classes); |
553 | |
554 | result.clear(); |
555 | std::set<UString> seen; |
556 | for(auto it = response.begin(); it != response.end(); it++) |
557 | { |
558 | if(seen.find(it->first) != seen.end()) { |
559 | continue; |
560 | } |
561 | seen.insert(it->first); |
562 | result += '/'; |
563 | result += it->first; |
564 | if(display_weights) |
565 | { |
566 | UChar temp[16]{}; |
567 | |
568 | u_sprintf(temp, "<W:%f>", it->second); |
569 | result += temp; |
570 | } |
571 | } |
572 | |
573 | return result; |
574 | } |
575 | |
576 | |
577 | std::set<std::pair<UString, std::vector<UString> > > |
578 | State::filterFinalsLRX(std::map<Node *, double> const &finals, |
579 | Alphabet const &alphabet, |
580 | std::set<UChar32> const &escaped_chars, |
581 | bool uppercase, bool firstupper, int firstchar) const |
582 | { |
583 | std::set<std::pair<UString, std::vector<UString> > > results; |
584 | |
585 | std::vector<UString> current_result; |
586 | UString rule_id; |
587 | |
588 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
589 | { |
590 | if(finals.find(state[i].where) != finals.end()) |
591 | { |
592 | current_result.clear(); |
593 | rule_id.clear(); |
594 | UString current_word; |
595 | for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++) |
596 | { |
597 | if(escaped_chars.find(((*(state[i].sequence))[j]).first) != escaped_chars.end()) |
598 | { |
599 | current_word += '\\'; |
600 | } |
601 | UString sym; |
602 | alphabet.getSymbol(sym, ((*(state[i].sequence))[j]).first, uppercase); |
603 | if(sym == u"<$>"_uv) |
604 | { |
605 | if(!current_word.empty()) |
606 | { |
607 | current_result.push_back(current_word); |
608 | } |
609 | current_word.clear(); |
610 | } |
611 | else |
612 | { |
613 | current_word += sym; |
614 | } |
615 | } |
616 | rule_id = current_word; |
617 | results.insert({rule_id, current_result}); |
618 | } |
619 | } |
620 | |
621 | return results; |
622 | } |
623 | |
624 | |
625 | UString |
626 | State::filterFinalsSAO(std::map<Node *, double> const &finals, |
627 | Alphabet const &alphabet, |
628 | std::set<UChar32> const &escaped_chars, |
629 | bool uppercase, bool firstupper, int firstchar) const |
630 | { |
631 | UString result; |
632 | UString annot; |
633 | |
634 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
635 | { |
636 | if(finals.find(state[i].where) != finals.end()) |
637 | { |
638 | result += '/'; |
639 | unsigned int const first_char = result.size() + firstchar; |
640 | for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++) |
641 | { |
642 | if(escaped_chars.find(((*(state[i].sequence))[j]).first) != escaped_chars.end()) |
643 | { |
644 | result += '\\'; |
645 | } |
646 | if(alphabet.isTag(((*(state[i].sequence))[j]).first)) |
647 | { |
648 | annot.clear(); |
649 | alphabet.getSymbol(annot, ((*(state[i].sequence))[j]).first); |
650 | result += '&'; |
651 | result += annot.substr(1,annot.length()-2); |
652 | result += ';'; |
653 | } |
654 | else |
655 | { |
656 | alphabet.getSymbol(result, ((*(state[i].sequence))[j]).first, uppercase); |
657 | } |
658 | } |
659 | if(firstupper) |
660 | { |
661 | if(result[first_char] == '~') |
662 | { |
663 | |
664 | result[first_char+1] = u_toupper(result[first_char+1]); |
665 | } |
666 | else |
667 | { |
668 | result[first_char] = u_toupper(result[first_char]); |
669 | } |
670 | } |
671 | } |
672 | } |
673 | |
674 | return result; |
675 | } |
676 | |
677 | UString |
678 | State::filterFinalsTM(std::map<Node *, double> const &finals, |
679 | Alphabet const &alphabet, |
680 | std::set<UChar32> const &escaped_chars, |
681 | std::queue<UString> &blankqueue, std::vector<UString> &numbers) const |
682 | { |
683 | UString result; |
684 | |
685 | for(size_t i = 0, limit = state.size(); i != limit; i++) |
686 | { |
687 | if(finals.find(state[i].where) != finals.end()) |
688 | { |
689 | result += '/'; |
690 | for(size_t j = 0, limit2 = state[i].sequence->size(); j != limit2; j++) |
691 | { |
692 | if(escaped_chars.find((*(state[i].sequence))[j].first) != escaped_chars.end()) |
693 | { |
694 | result += '\\'; |
695 | } |
696 | alphabet.getSymbol(result, (*(state[i].sequence))[j].first); |
697 | } |
698 | } |
699 | } |
700 | |
701 | |
702 | UString result2; |
703 | std::vector<UString> fragment; |
704 | fragment.push_back(""_u); |
705 | |
706 | for(unsigned int i = 0, limit = result.size(); i != limit ; i++) |
707 | { |
708 | if(result[i] == ')') |
709 | { |
710 | fragment.push_back(""_u); |
711 | } |
712 | else |
713 | { |
714 | fragment[fragment.size()-1] += result[i]; |
715 | } |
716 | } |
717 | |
718 | for(unsigned int i = 0, limit = fragment.size(); i != limit; i++) |
719 | { |
720 | if(i != limit -1) |
721 | { |
722 | if(fragment[i].size() >=2 && StringUtils::endswith(fragment[i], u"(#")) |
723 | { |
724 | UString whitespace = " "_u; |
725 | if(blankqueue.size() != 0) |
726 | { |
727 | whitespace = blankqueue.front().substr(1); |
728 | blankqueue.pop(); |
729 | whitespace = whitespace.substr(0, whitespace.size() - 1); |
730 | } |
731 | fragment[i] = fragment[i].substr(0, fragment[i].size()-2) + |
732 | whitespace; |
733 | } |
734 | else |
735 | { |
736 | bool substitute = false; |
737 | for(int j = fragment[i].size() - 1; j >= 0; j--) |
738 | { |
739 | if(fragment[i].size()-j > 3 && fragment[i][j] == '\\' && |
740 | fragment[i][j+1] == '@' && fragment[i][j+2] == '(') |
741 | { |
742 | int num = 0; |
743 | bool correct = true; |
744 | for(unsigned int k = (unsigned int) j+3, limit2 = fragment[i].size(); |
745 | k != limit2; k++) |
746 | { |
747 | if(u_isdigit(fragment[i][k])) |
748 | { |
749 | num = num * 10; |
750 | num += (int) fragment[i][k] - 48; |
751 | } |
752 | else |
753 | { |
754 | correct = false; |
755 | break; |
756 | } |
757 | } |
758 | if(correct) |
759 | { |
760 | fragment[i] = fragment[i].substr(0, j) + numbers[num - 1]; |
761 | substitute = true; |
762 | break; |
763 | } |
764 | } |
765 | } |
766 | if(substitute == false) |
767 | { |
768 | fragment[i] += ')'; |
769 | } |
770 | } |
771 | } |
772 | } |
773 | |
774 | result.clear(); |
775 | |
776 | for(unsigned int i = 0, limit = fragment.size(); i != limit; i++) |
777 | { |
778 | result += fragment[i]; |
779 | } |
780 | |
781 | return result; |
782 | } |
783 | |
784 | |
785 | |
786 | void |
787 | State::pruneCompounds(int requiredSymbol, int separationSymbol, int compound_max_elements) |
788 | { |
789 | int minNoOfCompoundElements = compound_max_elements; |
790 | int *noOfCompoundElements = new int[state.size()]; |
| 1 | Storing uninitialized value | |
|
791 | |
792 | for(unsigned int i = 0; i<state.size(); i++) |
| 2 | | Assuming the condition is false | |
|
| 3 | | Loop condition is false. Execution continues on line 811 | |
|
793 | { |
794 | std::vector<std::pair<int, double>> seq = *state.at(i).sequence; |
795 | |
796 | if(lastPartHasRequiredSymbol(seq, requiredSymbol, separationSymbol)) |
797 | { |
798 | int this_noOfCompoundElements = 0; |
799 | for (int j = seq.size()-2; j>0; j--) if ((seq.at(j)).first==separationSymbol) this_noOfCompoundElements++; |
800 | noOfCompoundElements[i] = this_noOfCompoundElements; |
801 | minNoOfCompoundElements = (minNoOfCompoundElements < this_noOfCompoundElements) ? |
802 | minNoOfCompoundElements : this_noOfCompoundElements; |
803 | } |
804 | else |
805 | { |
806 | noOfCompoundElements[i] = INT_MAX; |
807 | } |
808 | } |
809 | |
810 | |
811 | auto it = state.begin(); |
812 | int i=0; |
| |
813 | while(it != state.end()) |
| 5 | | Loop condition is true. Entering loop body | |
|
814 | { |
815 | if(noOfCompoundElements[i] > minNoOfCompoundElements) |
| 6 | | The left operand of '>' is a garbage value |
|
816 | { |
817 | delete (*it).sequence; |
818 | it = state.erase(it); |
819 | } |
820 | else |
821 | { |
822 | it++; |
823 | } |
824 | i++; |
825 | } |
826 | |
827 | delete[] noOfCompoundElements; |
828 | } |
829 | |
830 | |
831 | |
832 | void |
833 | State::pruneStatesWithForbiddenSymbol(int forbiddenSymbol) |
834 | { |
835 | auto it = state.begin(); |
836 | while(it != state.end()) |
837 | { |
838 | std::vector<std::pair<int, double>> *seq = (*it).sequence; |
839 | bool found = false; |
840 | for(int i = seq->size()-1; i>=0; i--) |
841 | { |
842 | if((seq->at(i)).first == forbiddenSymbol) |
843 | { |
844 | i=-1; |
845 | delete (*it).sequence; |
846 | it = state.erase(it); |
847 | found = true; |
848 | } |
849 | } |
850 | if(!found) |
851 | { |
852 | it++; |
853 | } |
854 | } |
855 | } |
856 | |
857 | |
858 | bool |
859 | State::hasSymbol(int requiredSymbol) |
860 | { |
861 | for(size_t i = 0; i<state.size(); i++) |
862 | { |
863 | |
864 | std::vector<std::pair<int, double>>* seq = state.at(i).sequence; |
865 | if(seq != NULL) for (unsigned int j=0; j<seq->size(); j++) |
866 | { |
867 | int symbol=(seq->at(j)).first; |
868 | if(symbol == requiredSymbol) |
869 | { |
870 | return true; |
871 | } |
872 | } |
873 | } |
874 | return false; |
875 | } |
876 | |
877 | |
878 | bool |
879 | State::lastPartHasRequiredSymbol(const std::vector<std::pair<int, double>> &seq, int requiredSymbol, int separationSymbol) |
880 | { |
881 | |
882 | bool restart=false; |
883 | for(int n=seq.size()-1; n>=0; n--) |
884 | { |
885 | int symbol=(seq.at(n)).first; |
886 | if(symbol==requiredSymbol) |
887 | { |
888 | restart=true; |
889 | break; |
890 | } |
891 | if(symbol==separationSymbol) |
892 | { |
893 | break; |
894 | } |
895 | } |
896 | return restart; |
897 | } |
898 | |
899 | |
900 | void |
901 | State::restartFinals(const std::map<Node *, double> &finals, int requiredSymbol, State *restart_state, int separationSymbol) |
902 | { |
903 | |
904 | for(unsigned int i=0; i<state.size(); i++) |
905 | { |
906 | TNodeState state_i = state.at(i); |
907 | |
908 | |
909 | if(finals.count(state_i.where) > 0) |
910 | { |
911 | bool restart = lastPartHasRequiredSymbol(*(state_i.sequence), requiredSymbol, separationSymbol); |
912 | if(restart) |
913 | { |
914 | if(restart_state != NULL) |
915 | { |
916 | for(unsigned int j=0; j<restart_state->state.size(); j++) |
917 | { |
918 | TNodeState initst = restart_state->state.at(j); |
919 | std::vector<std::pair<int, double>> *tnvec = new std::vector<std::pair<int, double>>; |
920 | |
921 | for(unsigned int k=0; k < state_i.sequence->size(); k++) |
922 | { |
923 | tnvec->push_back(state_i.sequence->at(k)); |
924 | } |
925 | TNodeState tn(initst.where, tnvec, state_i.dirty); |
926 | tn.sequence->push_back({separationSymbol, 0.0}); |
927 | state.push_back(tn); |
928 | } |
929 | } |
930 | } |
931 | } |
932 | } |
933 | } |
934 | |
935 | |
936 | |
937 | UString |
938 | State::getReadableString(const Alphabet &a) |
939 | { |
940 | UString retval; |
941 | retval += '['; |
942 | |
943 | for(unsigned int i=0; i<state.size(); i++) |
944 | { |
945 | std::vector<std::pair<int, double>>* seq = state.at(i).sequence; |
946 | if(seq != NULL) for (unsigned int j=0; j<seq->size(); j++) |
947 | { |
948 | UString ws; |
949 | a.getSymbol(ws, (seq->at(j)).first); |
950 | retval.append(ws); |
951 | } |
952 | |
953 | if(i+1 < state.size()) |
954 | { |
955 | retval += ','; |
956 | retval += ' '; |
957 | } |
958 | } |
959 | retval += ']'; |
960 | return retval; |
961 | } |
962 | |
963 | void |
964 | State::merge(const State& other) |
965 | { |
966 | for (auto& it : other.state) { |
967 | std::vector<std::pair<int, double>>* tmp = new std::vector<std::pair<int, double>>(); |
968 | *tmp = *(it.sequence); |
969 | TNodeState ns(it.where, tmp, it.dirty); |
970 | this->state.push_back(std::move(ns)); |
971 | } |
972 | } |