Bug Summary

File:MweSplitApplicator.cpp
Warning:line 87, column 12
Access to field 'next' results in a dereference of a null pointer (loaded from variable 'prev')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name MweSplitApplicator.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src -resource-dir /usr/lib/llvm-16/lib/clang/16 -D BOOST_NO_CXX98_FUNCTION_BASE=1 -D HAS_FS -D UNISTR_FROM_CHAR_EXPLICIT=explicit -D UNISTR_FROM_STRING_EXPLICIT=explicit -D _POSIX_C_SOURCE=200112 -D cg3_EXPORTS -I /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/include/posix -I /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/include -I /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src -I /usr/local/include -D NDEBUG -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -Wno-unused-result -std=c++2b -fdebug-compilation-dir=/tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src -ferror-limit 19 -fvisibility-inlines-hidden -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/cg3/scan-build/2024-09-11-161008-13503-1 -x c++ /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src/MweSplitApplicator.cpp
1/*
2* Copyright (C) 2007-2024, GrammarSoft ApS
3* Developed by Tino Didriksen <mail@tinodidriksen.com>
4* Design by Eckhard Bick <eckhard.bick@mail.dk>, Tino Didriksen <mail@tinodidriksen.com>
5*
6* This program is free software: you can redistribute it and/or modify
7* it under the terms of the GNU General Public License as published by
8* the Free Software Foundation, either version 3 of the License, or
9* (at your option) any later version.
10*
11* This program is distributed in the hope that it will be useful,
12* but WITHOUT ANY WARRANTY; without even the implied warranty of
13* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14* GNU General Public License for more details.
15*
16* You should have received a copy of the GNU General Public License
17* along with this progam. If not, see <https://www.gnu.org/licenses/>.
18*/
19
20#include "MweSplitApplicator.hpp"
21
22namespace CG3 {
23
24MweSplitApplicator::MweSplitApplicator(std::ostream& ux_err)
25 : GrammarApplicator(ux_err)
26{
27 Grammar* grammar = new Grammar;
28 grammar->ux_stderr = ux_stderr;
29 grammar->allocateDummySet();
30 grammar->delimiters = grammar->allocateSet();
31 grammar->addTagToSet(grammar->allocateTag(STR_DUMMY), grammar->delimiters);
32 grammar->reindex();
33 setGrammar(grammar);
34 owns_grammar = true;
35 is_conv = true;
36}
37
38void MweSplitApplicator::runGrammarOnText(std::istream& input, std::ostream& output) {
39 GrammarApplicator::runGrammarOnText(input, output);
40}
41
42const Tag* MweSplitApplicator::maybeWfTag(const Reading* r) {
43 for (auto tter : r->tags_list) {
44 if ((!show_end_tags && tter == endtag) || tter == begintag) {
45 continue;
46 }
47 if (tter == r->baseform || tter == r->parent->wordform->hash) {
48 continue;
49 }
50 const Tag* tag = grammar->single_tags[tter];
51 // If we are to split, there has to be at least one wordform on a head (not-sub) reading
52 if (tag->type & T_WORDFORM) {
53 return tag;
54 }
55 }
56 return nullptr;
57}
58
59std::vector<Cohort*> MweSplitApplicator::splitMwe(Cohort* cohort) {
60 constexpr UChar rtrimblank[] = { ' ', '\n', '\r', '\t', 0 };
61 constexpr UChar textprefix[] = { ':', 0 };
62 std::vector<Cohort*> cos;
63 size_t n_wftags = 0;
64 size_t n_goodreadings = 0;
65 for (auto rter1 : cohort->readings) {
66 if (maybeWfTag(rter1) != nullptr) {
67 ++n_wftags;
68 }
69 ++n_goodreadings;
70 }
71
72 if (n_wftags
6.1
'n_wftags' is >= 'n_goodreadings'
< n_goodreadings) {
7
Taking false branch
73 if (n_wftags > 0) {
74 u_fprintfu_fprintf_72(ux_stderr, "WARNING: Line %u: Some but not all main-readings of %S had wordform-tags (not completely mwe-disambiguated?), not splitting.\n", cohort->line_number, cohort->wordform->tag.data());
75 // We also don't split if wordform-tags were only on sub-readings, but should we warn on such faulty input?
76 }
77 cos.push_back(cohort);
78 return cos;
79 }
80 UString pretext;
81 for (auto r : cohort->readings) {
82 size_t pos = std::numeric_limits<size_t>::max();
83 Reading* prev = nullptr; // prev == NULL || prev->next == rNew (or a ->next of rNew)
8
'prev' initialized to a null pointer value
84 for (auto sub = r; sub; sub = sub->next) {
9
Loop condition is true. Entering loop body
85 const Tag* wfTag = maybeWfTag(sub);
86 if (wfTag == nullptr) {
10
Assuming the condition is true
11
Taking true branch
87 prev = prev->next;
12
Access to field 'next' results in a dereference of a null pointer (loaded from variable 'prev')
88 }
89 else {
90 ++pos;
91 Cohort* c;
92 while (cos.size() < pos + 1) {
93 c = alloc_cohort(cohort->parent);
94 c->global_number = gWindow->cohort_counter++;
95 cohort->parent->appendCohort(c);
96 if(pretext.size() > 0) {
97 c->text = pretext;
98 pretext.clear();
99 }
100 cos.push_back(c);
101 }
102 c = cos[pos];
103
104 const size_t wfBeg = 2; // index after the initial '"<'
105 const size_t spBeg0 = wfTag->tag.find_first_not_of(rtrimblank, wfBeg); // index skipping initial space
106 const size_t spBeg = sub->next ? spBeg0 : wfBeg; // can't put pretext on first word / deepest reading
107 const size_t wfEnd = wfTag->tag.size() - 3; // index before the final '>"'
108 const size_t spEnd = 1 + wfTag->tag.find_last_not_of(rtrimblank, wfEnd); // index before post-space
109 const UString& wf =
110 wfTag->tag.substr(0, wfBeg)
111 + wfTag->tag.substr(spBeg, spEnd - spBeg)
112 + wfTag->tag.substr(wfEnd + 1);
113 if (c->wordform != 0 && wf != c->wordform->tag) {
114 u_fprintfu_fprintf_72(ux_stderr, "WARNING: Line %u: Ambiguous wordform-tags for same cohort, '%S' vs '%S', not splitting.\n", numLines, wf.data(), c->wordform->tag.data());
115 cos.clear();
116 cos.push_back(cohort);
117 return cos;
118 }
119 c->wordform = addTag(wf);
120 if (spBeg > wfBeg) {
121 pretext = textprefix + wfTag->tag.substr(wfBeg, spBeg - wfBeg);
122 }
123 if (spEnd < wfEnd + 1) {
124 c->text = textprefix + wfTag->tag.substr(spEnd, wfEnd + 1 - spEnd);
125 }
126
127 Reading* rNew = alloc_reading(*sub);
128 for (size_t i = 0; i < rNew->tags_list.size(); ++i) {
129 auto& tter = rNew->tags_list[i];
130 if (tter == wfTag->hash || tter == rNew->parent->wordform->hash) {
131 rNew->tags_list.erase(rNew->tags_list.begin() + i);
132 rNew->tags.erase(tter);
133 }
134 }
135 cos[pos]->appendReading(rNew);
136 rNew->parent = cos[pos];
137
138 if (prev != nullptr) {
139 free_reading(prev->next);
140 }
141 prev = rNew;
142 }
143 }
144 }
145 if (cos.size() == 0) {
146 u_fprintfu_fprintf_72(ux_stderr, "WARNING: Line %u: Tried splitting %S, but got no new cohorts; shouldn't happen.", numLines, cohort->wordform->tag.data());
147 cos.push_back(cohort);
148 }
149 // The last word forms are the top readings:
150 cos[0]->text = cohort->text;
151 std::reverse(cos.begin(), cos.end());
152 return cos;
153}
154
155void MweSplitApplicator::printSingleWindow(SingleWindow* window, std::ostream& output, bool profiling) {
156 for (auto var : window->variables_output) {
157 Tag* key = grammar->single_tags[var];
158 auto iter = window->variables_set.find(var);
159 if (iter != window->variables_set.end()) {
160 if (iter->second != grammar->tag_any) {
161 Tag* value = grammar->single_tags[iter->second];
162 u_fprintfu_fprintf_72(output, "%S%S=%S>\n", STR_CMD_SETVAR.data(), key->tag.data(), value->tag.data());
163 }
164 else {
165 u_fprintfu_fprintf_72(output, "%S%S>\n", STR_CMD_SETVAR.data(), key->tag.data());
166 }
167 }
168 else {
169 u_fprintfu_fprintf_72(output, "%S%S>\n", STR_CMD_REMVAR.data(), key->tag.data());
170 }
171 }
172
173 if (!window->text.empty()) {
1
Assuming the condition is true
2
Taking true branch
174 u_fprintfu_fprintf_72(output, "%S", window->text.data());
175 if (!ISNL(window->text.back())) {
3
Taking true branch
176 u_fputcu_fputc_72('\n', output);
177 }
178 }
179
180 auto cs = UI32(window->cohorts.size());
181 for (uint32_t c = 0; c < cs; c++) {
4
Assuming 'c' is < 'cs'
5
Loop condition is true. Entering loop body
182 Cohort* cohort = window->cohorts[c];
183 std::vector<Cohort*> cs = splitMwe(cohort);
6
Calling 'MweSplitApplicator::splitMwe'
184 for (auto& iter : cs) {
185 printCohort(iter, output, profiling);
186 }
187 }
188
189 if (!window->text_post.empty()) {
190 u_fprintfu_fprintf_72(output, "%S", window->text_post.data());
191 if (!ISNL(window->text_post.back())) {
192 u_fputcu_fputc_72('\n', output);
193 }
194 }
195
196 u_fputcu_fputc_72('\n', output);
197 if (window->flush_after) {
198 u_fprintfu_fprintf_72(output, "%S\n", STR_CMD_FLUSH.data());
199 }
200 u_fflushu_fflush_72(output);
201}
202}