Bug Summary

File:interchunk.cc
Warning:line 286, column 6
Access to field 'name' results in a dereference of a null pointer (loaded from variable 'leftSide')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name interchunk.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -resource-dir /usr/lib/llvm-16/lib/clang/16 -D HAVE_CONFIG_H -I . -I .. -I /usr/include/utf8cpp/ -I /usr/local/include -I /usr/include/libxml2 -I /usr/local/include -D PIC -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -std=c++2b -fdeprecated-macro -fdebug-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -ferror-limit 19 -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/apertium/scan-build/2024-09-11-155328-205384-1 -x c++ interchunk.cc
1/*
2 * Copyright (C) 2005--2015 Universitat d'Alacant / Universidad de Alicante
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <https://www.gnu.org/licenses/>.
16 */
17#include <apertium/interchunk.h>
18
19#include <lttoolbox/xml_walk_util.h>
20#include <lttoolbox/string_utils.h>
21
22#include <iostream>
23
24using namespace std;
25
26Interchunk::Interchunk()
27{}
28
29bool
30Interchunk::checkIndex(xmlNode *element, int index, int limit)
31{
32 if(index >= limit)
33 {
34 cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index >= limit" << endl;
35 return false;
36 }
37 if(index < 0) {
38 cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index < 0" << endl;
39 return false;
40 }
41 if(word[index] == 0)
42 {
43 cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": Null access at word[index]" << endl;
44 return false;
45 }
46 return true;
47}
48
49UString
50Interchunk::evalCachedString(xmlNode* element)
51{
52 TransferInstr& ti = evalStringCache[element];
53 switch (ti.getType()) {
54 case ti_clip_tl:
55 if (checkIndex(element, ti.getPos(), lword)) {
56 if (ti.getContent() == "content"_u) {
57 UString wf = word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]);
58 return wf.substr(1, wf.length()-2); // trim { and }
59 } else {
60 return word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]);
61 }
62 }
63 break;
64
65 case ti_var:
66 return variables[ti.getContent()];
67
68 case ti_lit_tag:
69 case ti_lit:
70 return ti.getContent();
71
72 case ti_b:
73 if (!blank_queue.empty()) {
74 UString retblank = blank_queue.front();
75 if (in_out) {
76 blank_queue.pop();
77 }
78 return retblank;
79 } else {
80 return " "_u;
81 }
82 break;
83
84 case ti_get_case_from:
85 if (checkIndex(element, ti.getPos(), lword)) {
86 return copycase(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]),
87 evalString(ti.getPointer()));
88 }
89 break;
90
91 case ti_case_of_tl:
92 if (checkIndex(element, ti.getPos(), lword)) {
93 return StringUtils::getcase(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]));
94 }
95 break;
96
97 default:
98 return ""_u;
99 }
100 return ""_u;
101}
102
103void
104Interchunk::processClip(xmlNode* element)
105{
106 int pos = 0;
107 UString part;
108 for (xmlAttr* i = element->properties; i != NULL__null; i = i->next) {
109 if (!xmlStrcmp(i->name, (const xmlChar*) "part")) {
110 part = to_ustring((const char*) i->children->content);
111 } else if (!xmlStrcmp(i->name, (const xmlChar*) "pos")) {
112 pos = atoi((const char*) i->children->content) - 1;
113 }
114 }
115 evalStringCache[element] = TransferInstr(ti_clip_tl, part, pos, NULL__null);
116}
117
118void
119Interchunk::processBlank(xmlNode* element)
120{
121 if (element->properties == NULL__null) {
122 evalStringCache[element] = TransferInstr(ti_b, " "_u, -1);
123 } else {
124 int pos = atoi((const char*) element->properties->children->content) - 1;
125 evalStringCache[element] = TransferInstr(ti_b, ""_u, pos);
126 }
127}
128
129void
130Interchunk::processLuCount(xmlNode* element)
131{
132 cerr << "Error: unexpected expression: '" << element->name << "'" << endl;
133 exit(EXIT_FAILURE1);
134}
135
136UString
137Interchunk::processLu(xmlNode* element)
138{
139 cerr << "Error: unexpected expression: '" << element->name << "'" << endl;
140 exit(EXIT_FAILURE1);
141 return ""_u; // make the type checker happy
142}
143
144UString
145Interchunk::processMlu(xmlNode* element)
146{
147 cerr << "Error: unexpected expression: '" << element->name << "'" << endl;
148 exit(EXIT_FAILURE1);
149 return ""_u; // make the type checker happy
150}
151
152void
153Interchunk::processCaseOf(xmlNode* element)
154{
155 int pos = 0;
156 UString part;
157 for (xmlAttr* i = element->properties; i != NULL__null; i = i->next) {
158 if (!xmlStrcmp(i->name, (const xmlChar*) "part")) {
159 part = to_ustring((char*) i->children->content);
160 } else if (!xmlStrcmp(i->name, (const xmlChar*) "pos")) {
161 pos = atoi((const char*) i->children->content) - 1;
162 }
163 }
164 evalStringCache[element] = TransferInstr(ti_case_of_tl, part, pos);
165}
166
167void
168Interchunk::processOut(xmlNode *localroot)
169{
170 in_out = true;
171
172 for (auto i : children(localroot)) {
173 if(!xmlStrcmp(i->name, (const xmlChar *) "chunk")) {
174 write(processChunk(i), output);
175 } else { // 'b'
176 write(evalString(i), output);
177 }
178 }
179
180 in_out = false;
181}
182
183UString
184Interchunk::processChunk(xmlNode *localroot)
185{
186 UString result;
187 result.append("^"_u);
188
189 for (auto i : children(localroot)) {
190 result.append(evalString(i));
191 }
192
193 result.append("$"_u);
194 return result;
195}
196
197void
198Interchunk::processLet(xmlNode *localroot)
199{
200 xmlNode *leftSide = NULL__null, *rightSide = NULL__null;
201
202 for (auto i : children(localroot)) {
203 if(leftSide == NULL__null) {
204 leftSide = i;
205 } else {
206 rightSide = i;
207 break;
208 }
209 }
210
211 map<xmlNode *, TransferInstr>::iterator it = evalStringCache.find(leftSide);
212 if(it != evalStringCache.end())
213 {
214 TransferInstr &ti = it->second;
215 switch(ti.getType())
216 {
217 case ti_var:
218 variables[ti.getContent()] = evalString(rightSide);
219 return;
220
221 case ti_clip_tl:
222 {
223 bool match = word[ti.getPos()]->setChunkPart(attr_items[ti.getContent()], evalString(rightSide));
224 if(!match && trace)
225 {
226 cerr << "apertium-interchunk warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
227 }
228 }
229 return;
230
231 default:
232 return;
233 }
234 }
235 if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
236 {
237 UString const val = to_ustring((const char *) leftSide->properties->children->content);
238 variables[val] = evalString(rightSide);
239 evalStringCache[leftSide] = TransferInstr(ti_var, val, 0);
240 }
241 else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
242 {
243 int pos = 0;
244 UString part;
245
246 for(xmlAttr *i = leftSide->properties; i != NULL__null; i = i->next)
247 {
248 if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
249 {
250 part = to_ustring((char*)i->children->content);
251 }
252 else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
253 {
254 pos = atoi((const char *) i->children->content) - 1;
255 }
256 }
257
258
259 bool match = word[pos]->setChunkPart(attr_items[part],
260 evalString(rightSide));
261 if(!match && trace)
262 {
263 cerr << "apertium-interchunk warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
264 }
265 evalStringCache[leftSide] = TransferInstr(ti_clip_tl,
266 part,
267 pos, NULL__null);
268 }
269}
270
271void
272Interchunk::processModifyCase(xmlNode *localroot)
273{
274 if (dictionary_case) return;
1
Assuming field 'dictionary_case' is false
2
Taking false branch
275 xmlNode *leftSide = NULL__null, *rightSide = NULL__null;
3
'leftSide' initialized to a null pointer value
276
277 for (auto i : children(localroot)) {
278 if(leftSide == NULL__null) {
279 leftSide = i;
280 } else {
281 rightSide = i;
282 break;
283 }
284 }
285
286 if(leftSide->name != NULL__null && !xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
4
Access to field 'name' results in a dereference of a null pointer (loaded from variable 'leftSide')
287 {
288 int pos = 0;
289 UString part;
290
291 for(xmlAttr *i = leftSide->properties; i != NULL__null; i = i->next)
292 {
293 if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
294 {
295 part = to_ustring((char*)i->children->content);
296 }
297 else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
298 {
299 pos = atoi((const char *) i->children->content) - 1;
300 }
301 }
302
303 UString const result = StringUtils::copycase(evalString(rightSide),
304 word[pos]->chunkPart(attr_items[part]));
305 bool match = word[pos]->setChunkPart(attr_items[part], result);
306 if(!match && trace)
307 {
308 cerr << "apertium-interchunk warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
309 }
310 }
311 else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
312 {
313 UString const val = to_ustring((const char *) leftSide->properties->children->content);
314 variables[val] = StringUtils::copycase(evalString(rightSide), variables[val]);
315 }
316}
317
318void
319Interchunk::processCallMacro(xmlNode *localroot)
320{
321 UString n = to_ustring((const char *) localroot->properties->children->content);
322 int npar = 0;
323
324 xmlNode *macro = macro_map[macros[n]];
325
326 for(xmlAttr *i = macro->properties; i != NULL__null; i = i->next)
327 {
328 if(!xmlStrcmp(i->name, (const xmlChar *) "npar"))
329 {
330 npar = atoi((const char *) i->children->content);
331 break;
332 }
333 }
334
335 // ToDo: Is it at all valid if npar <= 0 ?
336
337 InterchunkWord **myword = NULL__null;
338 int idx = 0;
339 if(npar > 0)
340 {
341 myword = new InterchunkWord *[npar];
342 for (auto i : children(localroot)) {
343 int pos = atoi((const char *) i->properties->children->content)-1;
344 myword[idx] = word[pos];
345 idx++;
346 }
347 }
348
349 swap(myword, word);
350 swap(npar, lword);
351
352 for (auto i : children(macro)) {
353 processInstruction(i);
354 }
355
356 swap(myword, word);
357 swap(npar, lword);
358
359 delete[] myword;
360}
361
362TransferToken &
363Interchunk::readToken(InputFile& in)
364{
365 if(!input_buffer.isEmpty())
366 {
367 return input_buffer.next();
368 }
369
370 UString content;
371 while(true)
372 {
373 int val = in.get();
374 if(in.eof() || (internal_null_flush && val == 0))
375 {
376 return input_buffer.add(TransferToken(content, tt_eof));
377 }
378 if(val == '\\')
379 {
380 content += '\\';
381 content += in.get();
382 }
383 else if(val == '[')
384 {
385 content += '[';
386 while(true)
387 {
388 UChar32 val2 = in.get();
389 if(val2 == '\\') {
390 content += '\\';
391 content += in.get();
392 } else if(val2 == ']') {
393 content += ']';
394 break;
395 } else {
396 content += val2;
397 }
398 }
399 }
400 else if(inword && val == '{') {
401 content += '{';
402 while(true) {
403 UChar32 val2 = in.get();
404 if(val2 == '\\') {
405 content += '\\';
406 content += in.get();
407 } else if(val2 == '}') {
408 UChar32 val3 = in.peek();
409
410 content += '}';
411 if(val3 == '$') {
412 break;
413 }
414 } else {
415 content += val2;
416 }
417 }
418 }
419 else if(inword && val == '$')
420 {
421 inword = false;
422 return input_buffer.add(TransferToken(content, tt_word));
423 }
424 else if(val == '^')
425 {
426 inword = true;
427 return input_buffer.add(TransferToken(content, tt_blank));
428 }
429 else
430 {
431 content += val;
432 }
433 }
434}
435
436void
437Interchunk::interchunk_wrapper_null_flush(InputFile& in, UFILE* out)
438{
439 null_flush = false;
440 internal_null_flush = true;
441
442 while(!in.eof()) {
443 interchunk(in, out);
444 u_fputcu_fputc_72('\0', out);
445 u_fflushu_fflush_72(out);
446 variables = variable_defaults;
447 }
448 internal_null_flush = false;
449 null_flush = true;
450}
451
452
453void
454Interchunk::interchunk(InputFile& in, UFILE* out)
455{
456 if(getNullFlush())
457 {
458 interchunk_wrapper_null_flush(in, out);
459 }
460
461 unsigned int last = input_buffer.getPos();
462 unsigned int prev_last = last;
463 int lastrule_id = -1;
464 set<int> banned_rules;
465
466 output = out;
467 ms.init(me->getInitial());
468
469 while(true)
470 {
471 if(ms.size() == 0)
472 {
473 if(lastrule != NULL__null)
474 {
475 int num_words_to_consume = applyRule();
476
477 //Consume all the words from the input which matched the rule.
478 //This piece of code is executed unless the rule contains a "reject-current-rule" instruction
479 if(num_words_to_consume < 0)
480 {
481 banned_rules.clear();
482 input_buffer.setPos(last);
483 }
484 else if(num_words_to_consume > 0)
485 {
486 banned_rules.clear();
487 if(prev_last >= input_buffer.getSize())
488 {
489 input_buffer.setPos(0);
490 }
491 else
492 {
493 input_buffer.setPos(prev_last+1);
494 }
495 int num_consumed_words = 0;
496 while(num_consumed_words < num_words_to_consume && !input_buffer.isEmpty())
497 {
498 TransferToken& local_tt = input_buffer.next();
499 if (local_tt.getType() == tt_word)
500 {
501 num_consumed_words++;
502 }
503 }
504 }
505 else
506 {
507 //Add rule to banned rules
508 banned_rules.insert(lastrule_id);
509 input_buffer.setPos(prev_last);
510 input_buffer.next();
511 last = input_buffer.getPos();
512 }
513 lastrule_id = -1;
514 }
515 else
516 {
517 if(tmpword.size() != 0)
518 {
519 u_fprintfu_fprintf_72(output, "^%S$", tmpword[0]->c_str());
520 tmpword.clear();
521 input_buffer.setPos(last);
522 input_buffer.next();
523 prev_last = last;
524 banned_rules.clear();
525 last = input_buffer.getPos();
526 ms.init(me->getInitial());
527 }
528 else if(tmpblank.size() != 0) {
529 write(*tmpblank[0], output);
530 tmpblank.clear();
531 prev_last = last;
532 last = input_buffer.getPos();
533 ms.init(me->getInitial());
534 }
535 }
536 }
537 int val = ms.classifyFinals(me->getFinals(), banned_rules);
538 if(val != -1)
539 {
540 size_t lastrule_line = rule_lines[val-1];
541 lastrule = rule_map[val-1];
542 last = input_buffer.getPos();
543 lastrule_id = val;
544
545 last_lword = tmpword.size();
546
547 if(trace)
548 {
549 cerr << endl << "apertium-interchunk: Rule " << val << " line " << lastrule_line;
550 for (auto& it : tmpword) {
551 cerr << " " << *it;
552 }
553 cerr << endl;
554 }
555 }
556
557 TransferToken &current = readToken(in);
558
559 switch(current.getType())
560 {
561 case tt_word:
562 applyWord(current.getContent());
563 tmpword.push_back(&current.getContent());
564 break;
565
566 case tt_blank:
567 ms.step(' ');
568 tmpblank.push_back(&current.getContent());
569 break;
570
571 case tt_eof:
572 if(tmpword.size() != 0) {
573 tmpblank.push_back(&current.getContent());
574 ms.clear();
575 }
576 else {
577 write(current.getContent(), output);
578 tmpblank.clear();
579 return;
580 }
581 break;
582
583 default:
584 cerr << "Error: Unknown input token." << endl;
585 return;
586 }
587 }
588}
589
590int
591Interchunk::applyRule()
592{
593 unsigned int limit = tmpword.size();
594
595 for(unsigned int i = 0; i != limit; i++)
596 {
597 if(i == 0)
598 {
599 word = new InterchunkWord *[limit];
600 lword = limit;
601 }
602 else
603 {
604 if(int(blank_queue.size()) < last_lword - 1)
605 {
606 UString blank_to_add = UString(*tmpblank[i-1]);
607 blank_queue.push(blank_to_add);
608 }
609 }
610
611 word[i] = new InterchunkWord(*tmpword[i]);
612 }
613
614 int words_to_consume = processRule(lastrule);
615 lastrule = NULL__null;
616
617 if(word)
618 {
619 for(unsigned int i = 0; i != limit; i++)
620 {
621 delete word[i];
622 }
623 delete[] word;
624 }
625
626 word = NULL__null;
627 tmpword.clear();
628 tmpblank.clear();
629 ms.init(me->getInitial());
630 return words_to_consume;
631}
632
633void
634Interchunk::applyWord(UString const &word_str)
635{
636 ms.step('^');
637 for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
638 {
639 switch(word_str[i])
640 {
641 case '\\':
642 i++;
643 ms.step(u_toloweru_tolower_72(word_str[i]), any_char);
644 break;
645
646 case '<':
647 for(unsigned int j = i+1; j != limit; j++)
648 {
649 if(word_str[j] == '>')
650 {
651 int symbol = alphabet(word_str.substr(i, j-i+1));
652 if(symbol)
653 {
654 ms.step(symbol, any_tag);
655 }
656 else
657 {
658 ms.step(any_tag);
659 }
660 i = j;
661 break;
662 }
663 }
664 break;
665
666 case '{': // ignore the unmodifiable part of the chunk
667 ms.step('$');
668 return;
669
670 default:
671 ms.step(u_toloweru_tolower_72(word_str[i]), any_char);
672 break;
673 }
674 }
675 ms.step('$');
676}