Bug Summary

File:flags.c
Warning:line 500, column 17
Result of 'realloc' is converted to a pointer of type 'struct fsm_state', which is incompatible with sizeof operand type 'struct fsm'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name flags.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -resource-dir /usr/lib/llvm-16/lib/clang/16 -D _GNU_SOURCE -I /tmp/build/foma/foma-0.10.0+g279~a2d32b38 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -std=c18 -fdebug-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -ferror-limit 19 -fvisibility=hidden -fgnuc-version=4.2.1 -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/foma/scan-build/2024-09-11-155945-2678-1 -x c /tmp/build/foma/foma-0.10.0+g279~a2d32b38/flags.c
1/* Foma: a finite-state toolkit and library. */
2/* Copyright © 2008-2021 Mans Hulden */
3
4/* This file is part of foma. */
5
6/* Licensed under the Apache License, Version 2.0 (the "License"); */
7/* you may not use this file except in compliance with the License. */
8/* You may obtain a copy of the License at */
9
10/* http://www.apache.org/licenses/LICENSE-2.0 */
11
12/* Unless required by applicable law or agreed to in writing, software */
13/* distributed under the License is distributed on an "AS IS" BASIS, */
14/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
15/* See the License for the specific language governing permissions and */
16/* limitations under the License. */
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21#include <locale.h>
22#include "foma.h"
23
24extern int g_verbose;
25
26#define FAIL1 1
27#define SUCCEED2 2
28#define NONE3 3
29
30static struct flags *flag_extract (struct fsm *net);
31static char *flag_type_to_char (int type);
32static void flag_purge (struct fsm *net, char *name);
33static struct fsm *flag_create_symbol(int type, char *name, char *value);
34
35struct flags {
36 int type;
37 char *name;
38 char *value;
39 struct flags *next;
40};
41
42/* We eliminate all flags by creating a list of them and building a regex filter */
43/* that successively removes unwanted paths. NB: flag_eliminate() called with the */
44/* second argument NULL eliminates all flags. */
45/* The regexes we build for each flag symbol are of the format: */
46/* ~[?* FAIL ~$SUCCEED THISFLAG ?*] for U,P,D */
47/* or */
48/* ~[(?* FAIL) ~$SUCCEED THISFLAG ?*] for the R flag */
49/* The function flag_build() determines, depending on the flag at hand for each */
50/* of the other flags occurring in the network if it belongs in FAIL, SUCCEED, */
51/* or neither. */
52/* The languages FAIL, SUCCEED is then the union of all symbols that cause */
53/* compatibility or incompatibility. */
54/* We intersect all these filters, creating a large filter that we compose both on */
55/* the upper side of the network and the lower side: */
56/* RESULT = FILTER .o. ORIGINAL .o. FILTER */
57/* We can't simply intersect the language with FILTER because the lower side flags */
58/* are independent of the upper side ones, and the network may be a transducer. */
59/* Finally, we replace the affected arcs with EPSILON arcs, and call */
60/* sigma_cleanup() to purge the symbols not occurring on arcs. */
61
62///
63///Eliminate a flag from a network. If called with name = NULL, eliminate all flags.
64///
65
66struct fsm *flag_eliminate(struct fsm *net, char *name) {
67
68 struct flags *flags, *f, *ff;
69 struct fsm *filter, *succeed_flags, *fail_flags, *self, *newfilter, *newnet;
70 int flag, fstatus, found;
71
72 filter = NULL((void*)0);
73
74 if (net->pathcount == 0) {
75 if (g_verbose)
76 {
77 fprintf(stderrstderr,"Skipping flag elimination since there are no paths in network.\n");
78 fflush(stderrstderr);
79 }
80 return(net);
81 }
82
83 flags = flag_extract(net);
84 /* Check that flag actually exists in net */
85 if (name != NULL((void*)0)) {
86 for (found = 0, f = flags; f != NULL((void*)0); f = f->next) {
87 if (strcmp(name,f->name) == 0)
88 found = 1;
89 }
90 if (found == 0) {
91 if (g_verbose)
92 {
93 fprintf(stderrstderr,"Flag attribute '%s' does not occur in the network.\n",name);
94 fflush(stderrstderr);
95 }
96 return(net);
97 }
98 }
99
100 flag = 0;
101
102 for (f = flags; f != NULL((void*)0); f = f->next) {
103
104 if ((name == NULL((void*)0) || strcmp(f->name,name) == 0) &&
105 (f->type | FLAG_UNIFY1 | FLAG_REQUIRE32 | FLAG_DISALLOW4 | FLAG_EQUAL64)) {
106
107 succeed_flags = fsm_empty_set();
108 fail_flags = fsm_empty_set();
109 self = flag_create_symbol(f->type, f->name, f->value);
110
111 for (ff = flags, flag = 0; ff != NULL((void*)0); ff = ff->next) {
112 fstatus = flag_build(f->type, f->name, f->value, ff->type, ff->name, ff->value);
113 if (fstatus == FAIL1) {
114 fail_flags = fsm_minimize(fsm_union(fail_flags, flag_create_symbol(ff->type, ff->name, ff->value)));
115 flag = 1;
116 }
117 if (fstatus == SUCCEED2) {
118 succeed_flags = fsm_minimize(fsm_union(succeed_flags, flag_create_symbol(ff->type, ff->name, ff->value)));
119 flag = 1;
120 }
121 }
122 }
123
124 if (flag) {
125 if (f->type == FLAG_REQUIRE32) {
126 newfilter = fsm_complement(fsm_concat(fsm_optionality(fsm_concat(fsm_universal(), fail_flags)), fsm_concat(fsm_complement(fsm_contains(succeed_flags)), fsm_concat(self, fsm_universal()))));
127
128 } else {
129 newfilter = fsm_complement(fsm_contains(fsm_concat(fail_flags,fsm_concat(fsm_complement(fsm_contains(succeed_flags)),self))));
130 }
131
132 filter = (filter == NULL((void*)0)) ? newfilter : fsm_intersect(filter, newfilter);
133 }
134 flag = 0;
135 }
136 if (filter != NULL((void*)0)) {
137 extern int g_flag_is_epsilon;
138 int old_g_flag_is_epsilon;
139 old_g_flag_is_epsilon = g_flag_is_epsilon;
140 g_flag_is_epsilon = 0;
141 newnet = fsm_compose(fsm_copy(filter),fsm_compose(net,fsm_copy(filter)));
142 g_flag_is_epsilon = old_g_flag_is_epsilon;
143 } else {
144 newnet = net;
145 }
146 flag_purge(newnet, name);
147 newnet = fsm_minimize(newnet);
148 sigma_cleanup(newnet,0);
149 sigma_sort(newnet);
150 free(flags);
151 return(fsm_topsort(newnet));
152}
153
154struct fsm *flag_create_symbol(int type, char *name, char *value) {
155 char *string;
156 if (value == NULL((void*)0))
157 value = "";
158
159 string = malloc(sizeof(char)*strlen(name)+strlen(value)+6);
160 *string = '\0';
161 strcat(string, "@");
162 strcat(string, flag_type_to_char(type));
163 strcat(string, ".");
164 strcat(string, name);
165 if (strcmp(value,"") != 0) {
166 strcat(string, ".");
167 strcat(string, value);
168 }
169 strcat(string, "@");
170
171 return(fsm_symbol(string));
172
173}
174
175char *flag_type_to_char (int type) {
176 switch(type) {
177 case FLAG_UNIFY1:
178 return("U");
179 case FLAG_CLEAR2:
180 return("C");
181 case FLAG_DISALLOW4:
182 return("D");
183 case FLAG_NEGATIVE8:
184 return("N");
185 case FLAG_POSITIVE16:
186 return("P");
187 case FLAG_REQUIRE32:
188 return("R");
189 case FLAG_EQUAL64:
190 return("E");
191 }
192 return NULL((void*)0);
193}
194
195int flag_build(int ftype, char *fname, char *fvalue, int fftype, char *ffname, char *ffvalue) {
196 int valeq, selfnull;
197
198 selfnull = 0; /* If current flag has no value, e.g. @R.A@ */
199 if (strcmp(fname,ffname) != 0)
200 return NONE3;
201
202 if (fvalue == NULL((void*)0)) {
203 fvalue = "";
204 selfnull = 1;
205 }
206
207 if (ffvalue == NULL((void*)0))
208 ffvalue = "";
209
210 valeq = strcmp(fvalue, ffvalue);
211 /* U flags */
212 if (ftype == FLAG_UNIFY1 && fftype == FLAG_POSITIVE16 && valeq == 0)
213 return SUCCEED2;
214 if (ftype == FLAG_UNIFY1 && fftype == FLAG_CLEAR2)
215 return SUCCEED2;
216 if (ftype == FLAG_UNIFY1 && fftype == FLAG_UNIFY1 && valeq != 0)
217 return FAIL1;
218 if (ftype == FLAG_UNIFY1 && fftype == FLAG_POSITIVE16 && valeq != 0)
219 return FAIL1;
220 if (ftype == FLAG_UNIFY1 && fftype == FLAG_NEGATIVE8 && valeq == 0)
221 return FAIL1;
222
223 /* R flag with value = 0 */
224 if (ftype == FLAG_REQUIRE32 && fftype == FLAG_UNIFY1 && selfnull)
225 return SUCCEED2;
226 if (ftype == FLAG_REQUIRE32 && fftype == FLAG_POSITIVE16 && selfnull)
227 return SUCCEED2;
228 if (ftype == FLAG_REQUIRE32 && fftype == FLAG_NEGATIVE8 && selfnull)
229 return SUCCEED2;
230 if (ftype == FLAG_REQUIRE32 && fftype == FLAG_CLEAR2 && selfnull)
231 return FAIL1;
232
233 /* R flag with value */
234 if (ftype == FLAG_REQUIRE32 && fftype == FLAG_POSITIVE16 && valeq == 0 && !selfnull)
235 return SUCCEED2;
236 if (ftype == FLAG_REQUIRE32 && fftype == FLAG_UNIFY1 && valeq == 0 && !selfnull)
237 return SUCCEED2;
238 if (ftype == FLAG_REQUIRE32 && fftype == FLAG_POSITIVE16 && valeq != 0 && !selfnull)
239 return FAIL1;
240 if (ftype == FLAG_REQUIRE32 && fftype == FLAG_UNIFY1 && valeq != 0 && !selfnull)
241 return FAIL1;
242 if (ftype == FLAG_REQUIRE32 && fftype == FLAG_NEGATIVE8 && !selfnull)
243 return FAIL1;
244 if (ftype == FLAG_REQUIRE32 && fftype == FLAG_CLEAR2 && !selfnull)
245 return FAIL1;
246
247 /* D flag with value = 0 */
248 if (ftype == FLAG_DISALLOW4 && fftype == FLAG_CLEAR2 && selfnull)
249 return SUCCEED2;
250 if (ftype == FLAG_DISALLOW4 && fftype == FLAG_POSITIVE16 && selfnull)
251 return FAIL1;
252 if (ftype == FLAG_DISALLOW4 && fftype == FLAG_UNIFY1 && selfnull)
253 return FAIL1;
254 if (ftype == FLAG_DISALLOW4 && fftype == FLAG_NEGATIVE8 && selfnull)
255 return FAIL1;
256
257 /* D flag with value */
258 if (ftype == FLAG_DISALLOW4 && fftype == FLAG_POSITIVE16 && valeq != 0 && !selfnull)
259 return SUCCEED2;
260 if (ftype == FLAG_DISALLOW4 && fftype == FLAG_CLEAR2 && !selfnull)
261 return SUCCEED2;
262 if (ftype == FLAG_DISALLOW4 && fftype == FLAG_NEGATIVE8 && valeq == 0 && !selfnull)
263 return SUCCEED2;
264 if (ftype == FLAG_DISALLOW4 && fftype == FLAG_POSITIVE16 && valeq == 0 && !selfnull)
265 return FAIL1;
266 if (ftype == FLAG_DISALLOW4 && fftype == FLAG_UNIFY1 && valeq == 0 && !selfnull)
267 return FAIL1;
268 if (ftype == FLAG_DISALLOW4 && fftype == FLAG_NEGATIVE8 && valeq != 0 && !selfnull)
269 return FAIL1;
270
271 return NONE3;
272}
273
274
275/* Remove flags that are being eliminated from arcs and sigma */
276
277void flag_purge (struct fsm *net, char *name) {
278 struct fsm_state *fsm;
279 struct sigma *sigma;
280 int i, *ftable, sigmasize;
281 char *csym;
282 sigmasize = sigma_max(net->sigma)+1;
283 ftable = malloc(sizeof(int) * sigmasize);
284 fsm = net->states;
285 for (i=0; i<sigmasize; i++)
286 *(ftable+i)=0;
287
288 for (sigma = net->sigma; sigma != NULL((void*)0) && sigma->number != -1; sigma = sigma->next) {
289
290 if (flag_check(sigma->symbol)) {
291 if (name == NULL((void*)0)) {
292 *(ftable+(sigma->number)) = 1;
293 } else {
294 csym = (sigma->symbol) + 3;
295 if (strncmp(csym,name,strlen(name)) == 0 && (strlen(csym)>strlen(name)) && (strncmp(csym+strlen(name),".",1) == 0 || strncmp(csym+strlen(name),"@",1) == 0)) {
296 *(ftable+(sigma->number)) = 1;
297 }
298 }
299 }
300 }
301 for (i = 0; i < sigmasize; i++) {
302 if (*(ftable+i)) {
303 net->sigma = sigma_remove_num(i, net->sigma);
304 }
305 }
306
307 for (i=0; (fsm+i)->state_no != -1; i++) {
308 if ((fsm+i)->in >= 0 && (fsm+i)->out >= 0) {
309 if (*(ftable+(fsm+i)->in))
310 (fsm+i)->in = EPSILON0;
311 if (*(ftable+(fsm+i)->out))
312 (fsm+i)->out = EPSILON0;
313 }
314 }
315
316 free(ftable);
317 net->is_deterministic = net->is_minimized = net->is_epsilon_free = NO0;
318 return;
319}
320
321/* Extract all flags from network and place them in struct flag linked list */
322
323struct flags *flag_extract (struct fsm *net) {
324 struct sigma *sigma;
325 struct flags *flags, *flagst;
326
327 flags = NULL((void*)0);
328 for (sigma = net->sigma ; sigma != NULL((void*)0); sigma = sigma->next) {
329 if (flag_check(sigma->symbol)) {
330 flagst = malloc(sizeof(struct flags));
331 flagst->next = flags;
332 flags = flagst;
333
334 flags->type = flag_get_type(sigma->symbol);
335 flags->name = flag_get_name(sigma->symbol);
336 flags->value = flag_get_value(sigma->symbol);
337 }
338 }
339 return(flags);
340}
341
342int flag_check(char *s) {
343
344 /* We simply simulate this regex (where ND is not dot) */
345 /* "@" [U|P|N|R|E|D] "." ND+ "." ND+ "@" | "@" [D|R|C] "." ND+ "@" */
346 /* and return 1 if it matches */
347
348 int i;
349 i = 0;
350
351 if (*(s+i) == '@') { i++; goto s1; } return 0;
352 s1:
353 if (*(s+i) == 'C') { i++; goto s4; }
354 if (*(s+i) == 'N' || *(s+i) == 'E' || *(s+i) == 'U' || *(s+i) == 'P') { i++; goto s2; }
355 if (*(s+i) == 'R' || *(s+i) == 'D') { i++; goto s3; } return 0;
356 s2:
357 if (*(s+i) == '.') { i++; goto s5; } return 0;
358 s3:
359 if (*(s+i) == '.') { i++; goto s6; } return 0;
360 s4:
361 if (*(s+i) == '.') { i++; goto s7; } return 0;
362 s5:
363 if (*(s+i) != '.' && *(s+i) != '\0') { i++; goto s8; } return 0;
364 s6:
365 if (*(s+i) != '.' && *(s+i) != '\0') { i++; goto s9; } return 0;
366 s7:
367 if (*(s+i) != '.' && *(s+i) != '\0') { i++; goto s10; } return 0;
368 s8:
369 if (*(s+i) == '.') { i++; goto s7; }
370 if (*(s+i) != '.' && *(s+i) != '\0') { i++; goto s8; } return 0;
371 s9:
372 if (*(s+i) == '@') { i++; goto s11; }
373 if (*(s+i) == '.') { i++; goto s7; }
374 if (*(s+i) != '.' && *(s+i) != '\0') { i++; goto s9; } return 0;
375
376 s10:
377 if (*(s+i) == '@') {i++; goto s11;}
378 if (*(s+i) != '.' && *(s+i) != '\0') { i++; goto s10; } return 0;
379 s11:
380 if (*(s+i) == '\0') {return 1;} return 0;
381}
382
383int flag_get_type(char *string) {
384 if (strncmp(string+1,"U.",2) == 0) {
385 return FLAG_UNIFY1;
386 }
387 if (strncmp(string+1,"C.",2) == 0) {
388 return FLAG_CLEAR2;
389 }
390 if (strncmp(string+1,"D.",2) == 0) {
391 return FLAG_DISALLOW4;
392 }
393 if (strncmp(string+1,"N.",2) == 0) {
394 return FLAG_NEGATIVE8;
395 }
396 if (strncmp(string+1,"P.",2) == 0) {
397 return FLAG_POSITIVE16;
398 }
399 if (strncmp(string+1,"R.",2) == 0) {
400 return FLAG_REQUIRE32;
401 }
402 if (strncmp(string+1,"E.",2) == 0) {
403 return FLAG_EQUAL64;
404 }
405 return 0;
406}
407
408char *flag_get_name(char *string) {
409 int i, start, end, len;
410 start = end = 0;
411 len = strlen(string);
412
413 for (i=0; i < len; i += (utf8skip(string+i) + 1)) {
414 if (*(string+i) == '.' && start == 0) {
415 start = i+1;
416 continue;
417 }
418 if ((*(string+i) == '.' || *(string+i) == '@') && start != 0) {
419 end = i;
420 break;
421 }
422 }
423 if (start > 0 && end > 0) {
424 return(xxstrndup(string+start,end-start));
425 }
426 return NULL((void*)0);
427}
428
429char *flag_get_value(char *string) {
430 int i, first, start, end, len;
431 first = start = end = 0;
432 len = strlen(string);
433
434 for (i=0; i < len; i += (utf8skip(string+i) + 1)) {
435 if (*(string+i) == '.' && first == 0) {
436 first = i+1;
437 continue;
438 }
439 if (*(string+i) == '@' && start != 0) {
440 end = i;
441 break;
442 }
443 if (*(string+i) == '.' && first != 0) {
444 start = i+1;
445 continue;
446 }
447 }
448 if (start > 0 && end > 0) {
449 return(xxstrndup(string+start,end-start));
450 }
451 return NULL((void*)0);
452}
453
454struct fsm *flag_twosided(struct fsm *net) {
455 struct fsm_state *fsm;
456 struct sigma *sigma;
457 int i, j, tail, *isflag, maxsigma, maxstate, newarcs, change;
458
459 /* Enforces twosided flag diacritics */
460
461 /* Mark flag symbols */
462 maxsigma = sigma_max(net->sigma);
463 isflag = calloc(maxsigma+1, sizeof(int));
464 fsm = net->states;
465 for (sigma = net->sigma ; sigma != NULL((void*)0); sigma = sigma->next) {
466 if (flag_check(sigma->symbol)) {
467 *(isflag+sigma->number) = 1;
468 } else {
469 *(isflag+sigma->number) = 0;
470 }
471 }
472 maxstate = 0;
473 change = 0;
474 for (i = 0, newarcs = 0; (fsm+i)->state_no != -1 ; i++) {
475 maxstate = (fsm+i)->state_no > maxstate ? (fsm+i)->state_no : maxstate;
476 if ((fsm+i)->target == -1)
477 continue;
478 if (*(isflag+(fsm+i)->in) && (fsm+i)->out == EPSILON0) {
479 change = 1;
480 (fsm+i)->out = (fsm+i)->in;
481 }
482 else if (*(isflag+(fsm+i)->out) && (fsm+i)->in == EPSILON0) {
483 change = 1;
484 (fsm+i)->in = (fsm+i)->out;
485 }
486 if ((*(isflag+(fsm+i)->in) || *(isflag+(fsm+i)->out)) && (fsm+i)->in != (fsm+i)->out) {
487 newarcs++;
488 }
489 }
490
491 if (newarcs == 0) {
492 if (change == 1) {
493 net->is_deterministic = UNK2;
494 net->is_minimized = UNK2;
495 net->is_pruned = UNK2;
496 return fsm_topsort(fsm_minimize(net));
497 }
498 return net;
499 }
500 net->states = realloc(net->states, sizeof(struct fsm)*(i+newarcs));
Result of 'realloc' is converted to a pointer of type 'struct fsm_state', which is incompatible with sizeof operand type 'struct fsm'
501 fsm = net->states;
502 tail = j = i;
503 maxstate++;
504 for (i = 0; i < tail; i++) {
505
506 if ((fsm+i)->target == -1)
507 continue;
508 if ((*(isflag+(fsm+i)->in) || *(isflag+(fsm+i)->out)) && (fsm+i)->in != (fsm+i)->out) {
509 if (*(isflag+(fsm+i)->in) && !*(isflag+(fsm+i)->out)) {
510 j = add_fsm_arc(fsm, j, maxstate, EPSILON0, (fsm+i)->out, (fsm+i)->target, 0, 0);
511 (fsm+i)->out = (fsm+i)->in;
512 (fsm+i)->target = maxstate;
513 maxstate++;
514 }
515 else if (*(isflag+(fsm+i)->out) && !*(isflag+(fsm+i)->in)) {
516 j = add_fsm_arc(fsm, j, maxstate, (fsm+i)->out, (fsm+i)->out, (fsm+i)->target, 0, 0);
517 (fsm+i)->out = EPSILON0;
518 (fsm+i)->target = maxstate;
519 maxstate++;
520 }
521 else if (*(isflag+(fsm+i)->in) && *(isflag+(fsm+i)->out)) {
522 j = add_fsm_arc(fsm, j, maxstate, (fsm+i)->out, (fsm+i)->out, (fsm+i)->target, 0, 0);
523 (fsm+i)->out = (fsm+i)->in;
524 (fsm+i)->target = maxstate;
525 maxstate++;
526 }
527 }
528 }
529 /* Add sentinel */
530 add_fsm_arc(fsm, j, -1, -1, -1, -1, -1, -1);
531 net->is_deterministic = UNK2;
532 net->is_minimized = UNK2;
533 return fsm_topsort(fsm_minimize(net));
534}