Bug Summary

File:file_morpho_stream.cc
Warning:line 362, column 7
Value stored to 'symbol' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name file_morpho_stream.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -resource-dir /usr/lib/llvm-16/lib/clang/16 -D HAVE_CONFIG_H -I . -I .. -I /usr/include/utf8cpp/ -I /usr/local/include -I /usr/include/libxml2 -I /usr/local/include -D PIC -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -std=c++2b -fdeprecated-macro -fdebug-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -ferror-limit 19 -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/apertium/scan-build/2024-09-11-155328-205384-1 -x c++ file_morpho_stream.cc
1/*
2 * Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <https://www.gnu.org/licenses/>.
16 */
17/**
18 * Word class and MorphoStream class definitions
19 *
20 * @author Felipe Sánchez-Martínez
21 */
22
23#include <apertium/file_morpho_stream.h>
24#include <lttoolbox/string_utils.h>
25#include "apertium_config.h"
26#include <apertium/unlocked_cstdio.h>
27
28FileMorphoStream::FileMorphoStream(const char* ftxt, bool d, TaggerData *t) :
29 ms() {
30 foundEOF = false;
31 debug=d;
32 td = t;
33 me = td->getPatternList().newMatchExe();
34 alphabet = td->getPatternList().getAlphabet();
35 input.open(ftxt);
36 ca_any_char = alphabet(PatternList::ANY_CHAR);
37 ca_any_tag = alphabet(PatternList::ANY_TAG);
38
39 ConstantManager &constants = td->getConstants();
40 ca_kignorar = constants.getConstant("kIGNORAR"_u);
41 ca_kbarra = constants.getConstant("kBARRA"_u);
42 ca_kdollar = constants.getConstant("kDOLLAR"_u);
43 ca_kbegin = constants.getConstant("kBEGIN"_u);
44 ca_kmot = constants.getConstant("kMOT"_u);
45 ca_kmas = constants.getConstant("kMAS"_u);
46 ca_kunknown = constants.getConstant("kUNKNOWN"_u);
47
48 map<UString, int> &tag_index = td->getTagIndex();
49 ca_tag_keof = tag_index["TAG_kEOF"_u];
50 ca_tag_kundef = tag_index["TAG_kUNDEF"_u];
51
52 end_of_file = false;
53 null_flush = false;
54}
55
56FileMorphoStream::~FileMorphoStream()
57{
58 delete me;
59}
60
61TaggerWord *
62FileMorphoStream::get_next_word()
63{
64 if(vwords.size() != 0)
65 {
66 TaggerWord* word=vwords.front();
67 vwords.erase(vwords.begin());
68
69 if(word->isAmbiguous())
70 {
71 vector<UString> &ref = td->getDiscardRules();
72 for(unsigned int i = 0; i < ref.size(); i++)
73 {
74 word->discardOnAmbiguity(ref[i]);
75 }
76 }
77// cout << *word << endl;
78 return word;
79 }
80
81 if(input.eof())
82 {
83 return NULL__null;
84 }
85
86 int ivwords = 0;
87 vwords.push_back(new TaggerWord());
88
89 while(true)
90 {
91 UChar32 symbol = input.get();
92 if(input.eof() || (null_flush && symbol == '\0'))
93 {
94 end_of_file = true;
95 vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
96 return get_next_word();
97 }
98 if(symbol == '^')
99 {
100 readRestOfWord(ivwords);
101 return get_next_word();
102 }
103 else
104 {
105 UString str = ""_u;
106 if(symbol == '\\')
107 {
108 symbol = input.get();
109 str += '\\';
110 str += symbol;
111 symbol = '\\';
112 }
113 else
114 {
115 str += symbol;
116 }
117
118 while(symbol != '^')
119 {
120 symbol = input.get();
121 if(input.eof() || (null_flush && symbol == '\0')) {
122 end_of_file = true;
123 vwords[ivwords]->add_ignored_string(str);
124 vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
125 return get_next_word();
126 } else if(symbol == '\\') {
127 str += '\\';
128 symbol = input.get();
129 if(input.eof() || (null_flush && symbol == '\0')) {
130 end_of_file = true;
131 vwords[ivwords]->add_ignored_string(str);
132 vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
133 return get_next_word();
134 }
135 str += symbol;
136 symbol = '\\';
137 } else if(symbol == '^') {
138 if(str.size() > 0) {
139 vwords[ivwords]->add_ignored_string(str);
140 }
141 readRestOfWord(ivwords);
142 return get_next_word();
143 } else {
144 str += symbol;
145 }
146 }
147 }
148 }
149}
150
151void
152FileMorphoStream::lrlmClassify(UString const &str, int &ivwords)
153{
154 int floor = 0;
155 int last_type = -1;
156 int last_pos = 0;
157 int initial_iv = ivwords;
158
159 ms.init(me->getInitial());
160 for(int i = 0, limit = str.size(); i != limit; i++)
161 {
162 if(str[i] != '<')
163 {
164 if(str[i] == '+')
165 {
166 int val = ms.classifyFinals(me->getFinals());
167 if(val != -1)
168 {
169 last_pos = i-1;
170 last_type = val;
171 }
172 }
173 ms.step(u_toloweru_tolower_72(str[i]), ca_any_char);
174 }
175 else
176 {
177 UString tag;
178 for(int j = i+1; j != limit; j++)
179 {
180 if(str[j] == '\\')
181 {
182 j++;
183 }
184 else if(str[j] == '>')
185 {
186 tag = str.substr(i, j-i+1);
187 i = j;
188 break;
189 }
190 }
191
192 int symbol = alphabet(tag);
193 if(symbol)
194 {
195 ms.step(symbol, ca_any_tag);
196 }
197 else
198 {
199 ms.step(ca_any_tag);
200 }
201 }
202
203 if(ms.size() == 0)
204 {
205 if(last_pos != floor)
206 {
207 vwords[ivwords]->add_tag(last_type,
208 str.substr(floor, last_pos - floor + 1),
209 td->getPreferRules());
210 if(str[last_pos+1] == '+' && last_pos+1 < limit )
211 {
212 floor = last_pos + 1;
213 last_pos = floor + 1;
214 vwords[ivwords]->set_plus_cut(true);
215 if (((int)vwords.size())<=((int)(ivwords+1)))
216 vwords.push_back(new TaggerWord(true));
217 ivwords++;
218 ms.init(me->getInitial());
219 }
220 i = floor++;
221 }
222 else
223 {
224 if (debug)
225 {
226 cerr<<"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<"' of '" << str << "'\n";
227 cerr<<" This is because of an incomplete tagset definition or a dictionary error\n";
228 }
229 vwords[ivwords]->add_tag(ca_tag_kundef, str.substr(floor) , td->getPreferRules());
230 return;
231 }
232 }
233 else if(i == limit - 1)
234 {
235 if(ms.classifyFinals(me->getFinals()) == -1)
236 {
237 if(last_pos != floor)
238 {
239 vwords[ivwords]->add_tag(last_type,
240 str.substr(floor, last_pos - floor + 1),
241 td->getPreferRules());
242 if(str[last_pos+1] == '+' && last_pos+1 < limit )
243 {
244 floor = last_pos + 1;
245 last_pos = floor;
246 vwords[ivwords]->set_plus_cut(true);
247 if (((int)vwords.size())<=((int)(ivwords+1)))
248 vwords.push_back(new TaggerWord(true));
249 ivwords++;
250 ms.init(me->getInitial());
251 }
252 i = floor++;
253 }
254 else
255 {
256 if (debug)
257 {
258 cerr<<"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<"' of '" << str << "'\n";
259 cerr<<" This is because of an incomplete tagset definition or a dictionary error\n";
260 }
261 vwords[ivwords]->add_tag(ca_tag_kundef, str.substr(floor) , td->getPreferRules());
262 return;
263 }
264 }
265 }
266 }
267
268 int val = ms.classifyFinals(me->getFinals());
269 if(val == -1)
270 {
271 val = ca_tag_kundef;
272 if (debug)
273 {
274 cerr<<"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<"' of '" << str << "'\n";
275 cerr<<" This is because of an incomplete tagset definition or a dictionary error\n";
276 }
277 if(ivwords > initial_iv) {
278 // We've partially added a multiword -- undo the previous add to avoid outputting a partial (chopped off) lexical form:
279 while(ivwords > initial_iv) {
280 delete vwords[ivwords];
281 vwords.pop_back();
282 ivwords--;
283 }
284 vwords[ivwords]->set_plus_cut(false);
285 vwords[ivwords]->erase_tag(last_type);
286 vwords[ivwords]->add_tag(last_type, str, td->getPreferRules());
287 return;
288 }
289 }
290 vwords[ivwords]->add_tag(val, str.substr(floor), td->getPreferRules());
291}
292
293void
294FileMorphoStream::readRestOfWord(int &ivwords)
295{
296 // first we have the superficial form
297 UString str;
298
299 while(true)
300 {
301 UChar32 symbol = input.get();
302 if(input.eof() || (null_flush && symbol == '\0'))
303 {
304 end_of_file = true;
305 if(str.size() > 0)
306 {
307 vwords[ivwords]->add_ignored_string(str);
308 cerr<<"Warning (internal): kIGNORE was returned while reading a word\n";
309 cerr<<"Word being read: "<<vwords[ivwords]->get_superficial_form()<<"\n";
310 cerr<<"Debug: "<< str <<"\n";
311 }
312 vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
313 return;
314 }
315 else if(symbol == '\\')
316 {
317 symbol = input.get();
318 str += '\\';
319 str += symbol;
320 }
321 else if(symbol == '/')
322 {
323 vwords[ivwords]->set_superficial_form(str);
324 str.clear();
325 break;
326 }
327 else if(symbol == '$')
328 {
329 vwords[ivwords]->set_superficial_form(str);
330 vwords[ivwords]->add_ignored_string("$"_u);
331 break;
332 }
333 else
334 {
335 str += symbol;
336 }
337 }
338
339 // then we read the acceptions
340
341 while(true)
342 {
343 UChar32 symbol = input.get();
344 if(input.eof() || (null_flush && symbol == '\0'))
345 {
346 end_of_file = true;
347 if(str.size() > 0)
348 {
349 vwords[ivwords]->add_ignored_string(str);
350 cerr<<"Warning (internal): kIGNORE was returned while reading a word\n";
351 cerr<<"Word being read: "<<vwords[ivwords]->get_superficial_form()<<"\n";
352 cerr<<"Debug: "<< str <<"\n";
353 }
354 vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
355 return;
356 }
357 else if(symbol == '\\')
358 {
359 symbol = input.get();
360 str += '\\';
361 str += symbol;
362 symbol = '\\'; // to prevent exiting with '\$'
Value stored to 'symbol' is never read
363 }
364 else if(symbol == '/')
365 {
366 lrlmClassify(str, ivwords);
367 str.clear();
368 ivwords = 0;
369 continue;
370 }
371 else if(symbol == '$')
372 {
373 if(str[0] != '*')// do nothing with unknown words
374 {
375 lrlmClassify(str, ivwords);
376 }
377 return;
378 }
379 else
380 {
381 str += symbol;
382 }
383 }
384}
385
386void
387FileMorphoStream::setNullFlush(bool nf)
388{
389 null_flush = nf;
390}
391
392bool
393FileMorphoStream::getEndOfFile(void)
394{
395 return end_of_file;
396}
397
398void
399FileMorphoStream::setEndOfFile(bool eof)
400{
401 end_of_file = eof;
402}
403
404void
405FileMorphoStream::rewind()
406{
407 input.rewind();
408 end_of_file = false;
409}