file_morpho_stream.cc

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name file_morpho_stream.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -resource-dir /usr/lib/llvm-16/lib/clang/16 -D HAVE_CONFIG_H -I . -I .. -I /usr/include/utf8cpp/ -I /usr/local/include -I /usr/include/libxml2 -I /usr/local/include -D PIC -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -std=c++2b -fdeprecated-macro -fdebug-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -ferror-limit 19 -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/apertium/scan-build/2024-09-11-155328-205384-1 -x c++ file_morpho_stream.cc

File:	file_morpho_stream.cc
Warning:	line 362, column 7 Value stored to 'symbol' is never read

Bug Summary

Annotated Source Code

1	/*
2	* Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
3	*
4	* This program is free software; you can redistribute it and/or
5	* modify it under the terms of the GNU General Public License as
6	* published by the Free Software Foundation; either version 2 of the
7	* License, or (at your option) any later version.
8	*
9	* This program is distributed in the hope that it will be useful, but
10	* WITHOUT ANY WARRANTY; without even the implied warranty of
11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12	* General Public License for more details.
13	*
14	* You should have received a copy of the GNU General Public License
15	* along with this program; if not, see <https://www.gnu.org/licenses/>.
16	*/
17	/**
18	* Word class and MorphoStream class definitions
19	*
20	* @author Felipe Sánchez-Martínez
21	*/
22
23	#include <apertium/file_morpho_stream.h>
24	#include <lttoolbox/string_utils.h>
25	#include "apertium_config.h"
26	#include <apertium/unlocked_cstdio.h>
27
28	FileMorphoStream::FileMorphoStream(const char* ftxt, bool d, TaggerData *t) :
29	ms() {
30	foundEOF = false;
31	debug=d;
32	td = t;
33	me = td->getPatternList().newMatchExe();
34	alphabet = td->getPatternList().getAlphabet();
35	input.open(ftxt);
36	ca_any_char = alphabet(PatternList::ANY_CHAR);
37	ca_any_tag = alphabet(PatternList::ANY_TAG);
38
39	ConstantManager &constants = td->getConstants();
40	ca_kignorar = constants.getConstant("kIGNORAR"_u);
41	ca_kbarra = constants.getConstant("kBARRA"_u);
42	ca_kdollar = constants.getConstant("kDOLLAR"_u);
43	ca_kbegin = constants.getConstant("kBEGIN"_u);
44	ca_kmot = constants.getConstant("kMOT"_u);
45	ca_kmas = constants.getConstant("kMAS"_u);
46	ca_kunknown = constants.getConstant("kUNKNOWN"_u);
47
48	map<UString, int> &tag_index = td->getTagIndex();
49	ca_tag_keof = tag_index["TAG_kEOF"_u];
50	ca_tag_kundef = tag_index["TAG_kUNDEF"_u];
51
52	end_of_file = false;
53	null_flush = false;
54	}
55
56	FileMorphoStream::~FileMorphoStream()
57	{
58	delete me;
59	}
60
61	TaggerWord *
62	FileMorphoStream::get_next_word()
63	{
64	if(vwords.size() != 0)
65	{
66	TaggerWord* word=vwords.front();
67	vwords.erase(vwords.begin());
68
69	if(word->isAmbiguous())
70	{
71	vector<UString> &ref = td->getDiscardRules();
72	for(unsigned int i = 0; i < ref.size(); i++)
73	{
74	word->discardOnAmbiguity(ref[i]);
75	}
76	}
77	// cout << *word << endl;
78	return word;
79	}
80
81	if(input.eof())
82	{
83	return NULL__null;
84	}
85
86	int ivwords = 0;
87	vwords.push_back(new TaggerWord());
88
89	while(true)
90	{
91	UChar32 symbol = input.get();
92	if(input.eof() \|\| (null_flush && symbol == '\0'))
93	{
94	end_of_file = true;
95	vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
96	return get_next_word();
97	}
98	if(symbol == '^')
99	{
100	readRestOfWord(ivwords);
101	return get_next_word();
102	}
103	else
104	{
105	UString str = ""_u;
106	if(symbol == '\\')
107	{
108	symbol = input.get();
109	str += '\\';
110	str += symbol;
111	symbol = '\\';
112	}
113	else
114	{
115	str += symbol;
116	}
117
118	while(symbol != '^')
119	{
120	symbol = input.get();
121	if(input.eof() \|\| (null_flush && symbol == '\0')) {
122	end_of_file = true;
123	vwords[ivwords]->add_ignored_string(str);
124	vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
125	return get_next_word();
126	} else if(symbol == '\\') {
127	str += '\\';
128	symbol = input.get();
129	if(input.eof() \|\| (null_flush && symbol == '\0')) {
130	end_of_file = true;
131	vwords[ivwords]->add_ignored_string(str);
132	vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
133	return get_next_word();
134	}
135	str += symbol;
136	symbol = '\\';
137	} else if(symbol == '^') {
138	if(str.size() > 0) {
139	vwords[ivwords]->add_ignored_string(str);
140	}
141	readRestOfWord(ivwords);
142	return get_next_word();
143	} else {
144	str += symbol;
145	}
146	}
147	}
148	}
149	}
150
151	void
152	FileMorphoStream::lrlmClassify(UString const &str, int &ivwords)
153	{
154	int floor = 0;
155	int last_type = -1;
156	int last_pos = 0;
157	int initial_iv = ivwords;
158
159	ms.init(me->getInitial());
160	for(int i = 0, limit = str.size(); i != limit; i++)
161	{
162	if(str[i] != '<')
163	{
164	if(str[i] == '+')
165	{
166	int val = ms.classifyFinals(me->getFinals());
167	if(val != -1)
168	{
169	last_pos = i-1;
170	last_type = val;
171	}
172	}
173	ms.step(u_toloweru_tolower_72(str[i]), ca_any_char);
174	}
175	else
176	{
177	UString tag;
178	for(int j = i+1; j != limit; j++)
179	{
180	if(str[j] == '\\')
181	{
182	j++;
183	}
184	else if(str[j] == '>')
185	{
186	tag = str.substr(i, j-i+1);
187	i = j;
188	break;
189	}
190	}
191
192	int symbol = alphabet(tag);
193	if(symbol)
194	{
195	ms.step(symbol, ca_any_tag);
196	}
197	else
198	{
199	ms.step(ca_any_tag);
200	}
201	}
202
203	if(ms.size() == 0)
204	{
205	if(last_pos != floor)
206	{
207	vwords[ivwords]->add_tag(last_type,
208	str.substr(floor, last_pos - floor + 1),
209	td->getPreferRules());
210	if(str[last_pos+1] == '+' && last_pos+1 < limit )
211	{
212	floor = last_pos + 1;
213	last_pos = floor + 1;
214	vwords[ivwords]->set_plus_cut(true);
215	if (((int)vwords.size())<=((int)(ivwords+1)))
216	vwords.push_back(new TaggerWord(true));
217	ivwords++;
218	ms.init(me->getInitial());
219	}
220	i = floor++;
221	}
222	else
223	{
224	if (debug)
225	{
226	cerr<<"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<"' of '" << str << "'\n";
227	cerr<<" This is because of an incomplete tagset definition or a dictionary error\n";
228	}
229	vwords[ivwords]->add_tag(ca_tag_kundef, str.substr(floor) , td->getPreferRules());
230	return;
231	}
232	}
233	else if(i == limit - 1)
234	{
235	if(ms.classifyFinals(me->getFinals()) == -1)
236	{
237	if(last_pos != floor)
238	{
239	vwords[ivwords]->add_tag(last_type,
240	str.substr(floor, last_pos - floor + 1),
241	td->getPreferRules());
242	if(str[last_pos+1] == '+' && last_pos+1 < limit )
243	{
244	floor = last_pos + 1;
245	last_pos = floor;
246	vwords[ivwords]->set_plus_cut(true);
247	if (((int)vwords.size())<=((int)(ivwords+1)))
248	vwords.push_back(new TaggerWord(true));
249	ivwords++;
250	ms.init(me->getInitial());
251	}
252	i = floor++;
253	}
254	else
255	{
256	if (debug)
257	{
258	cerr<<"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<"' of '" << str << "'\n";
259	cerr<<" This is because of an incomplete tagset definition or a dictionary error\n";
260	}
261	vwords[ivwords]->add_tag(ca_tag_kundef, str.substr(floor) , td->getPreferRules());
262	return;
263	}
264	}
265	}
266	}
267
268	int val = ms.classifyFinals(me->getFinals());
269	if(val == -1)
270	{
271	val = ca_tag_kundef;
272	if (debug)
273	{
274	cerr<<"Warning: There is not coarse tag for the fine tag '"<< str.substr(floor) <<"' of '" << str << "'\n";
275	cerr<<" This is because of an incomplete tagset definition or a dictionary error\n";
276	}
277	if(ivwords > initial_iv) {
278	// We've partially added a multiword -- undo the previous add to avoid outputting a partial (chopped off) lexical form:
279	while(ivwords > initial_iv) {
280	delete vwords[ivwords];
281	vwords.pop_back();
282	ivwords--;
283	}
284	vwords[ivwords]->set_plus_cut(false);
285	vwords[ivwords]->erase_tag(last_type);
286	vwords[ivwords]->add_tag(last_type, str, td->getPreferRules());
287	return;
288	}
289	}
290	vwords[ivwords]->add_tag(val, str.substr(floor), td->getPreferRules());
291	}
292
293	void
294	FileMorphoStream::readRestOfWord(int &ivwords)
295	{
296	// first we have the superficial form
297	UString str;
298
299	while(true)
300	{
301	UChar32 symbol = input.get();
302	if(input.eof() \|\| (null_flush && symbol == '\0'))
303	{
304	end_of_file = true;
305	if(str.size() > 0)
306	{
307	vwords[ivwords]->add_ignored_string(str);
308	cerr<<"Warning (internal): kIGNORE was returned while reading a word\n";
309	cerr<<"Word being read: "<<vwords[ivwords]->get_superficial_form()<<"\n";
310	cerr<<"Debug: "<< str <<"\n";
311	}
312	vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
313	return;
314	}
315	else if(symbol == '\\')
316	{
317	symbol = input.get();
318	str += '\\';
319	str += symbol;
320	}
321	else if(symbol == '/')
322	{
323	vwords[ivwords]->set_superficial_form(str);
324	str.clear();
325	break;
326	}
327	else if(symbol == '$')
328	{
329	vwords[ivwords]->set_superficial_form(str);
330	vwords[ivwords]->add_ignored_string("$"_u);
331	break;
332	}
333	else
334	{
335	str += symbol;
336	}
337	}
338
339	// then we read the acceptions
340
341	while(true)
342	{
343	UChar32 symbol = input.get();
344	if(input.eof() \|\| (null_flush && symbol == '\0'))
345	{
346	end_of_file = true;
347	if(str.size() > 0)
348	{
349	vwords[ivwords]->add_ignored_string(str);
350	cerr<<"Warning (internal): kIGNORE was returned while reading a word\n";
351	cerr<<"Word being read: "<<vwords[ivwords]->get_superficial_form()<<"\n";
352	cerr<<"Debug: "<< str <<"\n";
353	}
354	vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules());
355	return;
356	}
357	else if(symbol == '\\')
358	{
359	symbol = input.get();
360	str += '\\';
361	str += symbol;
362	symbol = '\\'; // to prevent exiting with '\$'
	Value stored to 'symbol' is never read
363	}
364	else if(symbol == '/')
365	{
366	lrlmClassify(str, ivwords);
367	str.clear();
368	ivwords = 0;
369	continue;
370	}
371	else if(symbol == '$')
372	{
373	if(str[0] != '*')// do nothing with unknown words
374	{
375	lrlmClassify(str, ivwords);
376	}
377	return;
378	}
379	else
380	{
381	str += symbol;
382	}
383	}
384	}
385
386	void
387	FileMorphoStream::setNullFlush(bool nf)
388	{
389	null_flush = nf;
390	}
391
392	bool
393	FileMorphoStream::getEndOfFile(void)
394	{
395	return end_of_file;
396	}
397
398	void
399	FileMorphoStream::setEndOfFile(bool eof)
400	{
401	end_of_file = eof;
402	}
403
404	void
405	FileMorphoStream::rewind()
406	{
407	input.rewind();
408	end_of_file = false;
409	}