Bug Summary

File:spelling.c
Warning:line 416, column 28
The left operand of '==' is a garbage value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name spelling.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -resource-dir /usr/lib/llvm-16/lib/clang/16 -D _GNU_SOURCE -I /tmp/build/foma/foma-0.10.0+g279~a2d32b38 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -std=c18 -fdebug-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -ferror-limit 19 -fvisibility=hidden -fgnuc-version=4.2.1 -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/foma/scan-build/2024-09-11-155945-2678-1 -x c /tmp/build/foma/foma-0.10.0+g279~a2d32b38/spelling.c
1/* Foma: a finite-state toolkit and library. */
2/* Copyright © 2008-2021 Mans Hulden */
3
4/* This file is part of foma. */
5
6/* Licensed under the Apache License, Version 2.0 (the "License"); */
7/* you may not use this file except in compliance with the License. */
8/* You may obtain a copy of the License at */
9
10/* http://www.apache.org/licenses/LICENSE-2.0 */
11
12/* Unless required by applicable law or agreed to in writing, software */
13/* distributed under the License is distributed on an "AS IS" BASIS, */
14/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
15/* See the License for the specific language governing permissions and */
16/* limitations under the License. */
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21#include <limits.h>
22#include "foma.h"
23
24#define INITIAL_AGENDA_SIZE256 256
25#define INITIAL_HEAP_SIZE256 256
26#define INITIAL_STRING_SIZE256 256
27#define MED_DEFAULT_LIMIT4 4 /* Default max words to find */
28#define MED_DEFAULT_CUTOFF15 15 /* Default MED cost cutoff */
29#define MED_DEFAULT_MAX_HEAP_SIZE262145 262145 /* By default won't grow heap more than this */
30
31#define BITMASK(b)(1 << ((b) & 7)) (1 << ((b) & 7))
32#define BITSLOT(b)((b) >> 3) ((b) >> 3)
33#define BITSET(a,b)((a)[((b) >> 3)] |= (1 << ((b) & 7))) ((a)[BITSLOT(b)((b) >> 3)] |= BITMASK(b)(1 << ((b) & 7)))
34#define BITCLEAR(a,b)((a)[((b) >> 3)] &= ~(1 << ((b) & 7))) ((a)[BITSLOT(b)((b) >> 3)] &= ~BITMASK(b)(1 << ((b) & 7)))
35#define BITTEST(a,b)((a)[((b) >> 3)] & (1 << ((b) & 7))) ((a)[BITSLOT(b)((b) >> 3)] & BITMASK(b)(1 << ((b) & 7)))
36#define BITNSLOTS(nb)((nb + 8 - 1) / 8) ((nb + CHAR_BIT8 - 1) / CHAR_BIT8)
37#define min_(X, Y)((X) < (Y) ? (X) : (Y)) ((X) < (Y) ? (X) : (Y))
38
39static int calculate_h(struct apply_med_handle *medh, int *intword, int currpos, int state);
40static struct astarnode *node_delete_min(struct apply_med_handle *medh);
41int node_insert(struct apply_med_handle *medh, int wordpos, int fsmstate, int g, int h, int in, int out, int parent);
42
43char *print_sym(int sym, struct sigma *sigma) {
44 while (sigma != NULL((void*)0)) {
45 if (sigma->number == sym) {
46 return(sigma->symbol);
47 }
48 sigma = sigma->next;
49 }
50 return NULL((void*)0);
51}
52
53void print_match(struct apply_med_handle *medh, struct astarnode *node, struct sigma *sigma, char *word) {
54 int sym, i, wordlen , printptr;
55 struct astarnode *n;
56 int_stack_clear();
57 wordlen = medh->wordlen;
58 for (n = node; n != NULL((void*)0) ; n = medh->agenda+(n->parent)) {
59 if (n->in == 0 && n->out == 0)
60 break;
61 if (n->parent == -1)
62 break;
63 int_stack_push(n->in);
64 }
65 printptr = 0;
66 if (medh->outstring_length < 2*wordlen) {
67 medh->outstring_length *= 2;
68 medh->outstring = realloc(medh->outstring, medh->outstring_length*sizeof(char));
69 }
70 while (!(int_stack_isempty())) {
71 sym = int_stack_pop();
72 if (sym > 2) {
73 printptr += sprintf(medh->outstring+printptr,"%s", print_sym(sym, sigma));
74 }
75 if (sym == 0) {
76 if (medh->align_symbol) {
77 printptr += sprintf(medh->outstring+printptr,"%s",medh->align_symbol);
78 }
79 }
80 if (sym == 2) {
81 printptr += sprintf(medh->outstring+printptr,"@");
82 }
83 }
84 for (n = node; n != NULL((void*)0) ; n = medh->agenda+(n->parent)) {
85 if (n->in == 0 && n->out == 0)
86 break;
87 if (n->parent == -1)
88 break;
89 else
90 int_stack_push(n->out);
91 }
92 printptr = 0;
93 if (medh->instring_length < 2*wordlen) {
94 medh->instring_length *= 2;
95 medh->instring = realloc(medh->instring, medh->instring_length*sizeof(char));
96 }
97 for (i = 0; !(int_stack_isempty()); ) {
98 sym = int_stack_pop();
99 if (sym > 2) {
100 printptr += sprintf(medh->instring+printptr,"%s", print_sym(sym, sigma));
101 i += utf8skip(word+i)+1;
102 }
103 if (sym == 0) {
104 if (medh->align_symbol) {
105 printptr += sprintf(medh->instring+printptr,"%s",medh->align_symbol);
106 }
107 }
108 if (sym == 2) {
109 if (i > wordlen) {
110 printptr += sprintf(medh->instring+printptr,"*");
111 } else {
112 //printf("%.*s", utf8skip(word+i)+1, word+i);
113 printptr += sprintf(medh->instring+printptr,"%.*s", utf8skip(word+i)+1, word+i);
114 i+= utf8skip(word+i)+1;
115 }
116 }
117 }
118 medh->cost = node->g;
119 // printf("Cost[f]: %i\n\n", node->g);
120}
121
122void apply_med_clear(struct apply_med_handle *medh) {
123 if (medh == NULL((void*)0))
124 return;
125 if (medh->agenda != NULL((void*)0))
126 free(medh->agenda);
127 if (medh->instring != NULL((void*)0))
128 free(medh->instring);
129 if (medh->outstring != NULL((void*)0))
130 free(medh->outstring);
131 if (medh->heap != NULL((void*)0))
132 free(medh->heap);
133 if (medh->state_array != NULL((void*)0))
134 free(medh->state_array);
135 if (medh->align_symbol != NULL((void*)0))
136 free(medh->align_symbol);
137 if (medh->letterbits != NULL((void*)0))
138 free(medh->letterbits);
139 if (medh->nletterbits != NULL((void*)0))
140 free(medh->nletterbits);
141 if (medh->intword != NULL((void*)0))
142 free(medh->intword);
143 if (medh->sigmahash != NULL((void*)0))
144 sh_done(medh->sigmahash);
145 free(medh);
146}
147
148struct apply_med_handle *apply_med_init(struct fsm *net) {
149
150 struct apply_med_handle *medh;
151 struct sigma *sigma;
152 medh = calloc(1,sizeof(struct apply_med_handle));
153 medh->net = net;
154 medh->agenda = malloc(sizeof(struct astarnode)*INITIAL_AGENDA_SIZE256);
155 medh->agenda->f = -1;
156 medh->agenda_size = INITIAL_AGENDA_SIZE256;
157
158 medh->heap = malloc(sizeof(int)*INITIAL_HEAP_SIZE256);
159 medh->heap_size = INITIAL_HEAP_SIZE256;
160 *(medh->heap) = 0; /* Points to sentinel */
161 medh->astarcount = 1;
162 medh->heapcount = 0;
163 medh->state_array = map_firstlines(net);
164 if (net->medlookup != NULL((void*)0) && net->medlookup->confusion_matrix != NULL((void*)0)) {
165 medh->hascm = 1;
166 medh->cm = net->medlookup->confusion_matrix;
167 }
168 medh->maxsigma = sigma_max(net->sigma)+1;
169 medh->sigmahash = sh_init();
170 for (sigma = net->sigma; sigma != NULL((void*)0) && sigma->number != -1 ; sigma=sigma->next ) {
171 if (sigma->number > IDENTITY2) {
172 sh_add_string(medh->sigmahash, sigma->symbol, sigma->number);
173 }
174 }
175
176
177 fsm_create_letter_lookup(medh, net);
178
179 medh->instring = malloc(sizeof(char)*INITIAL_STRING_SIZE256);
180 medh->instring_length = INITIAL_STRING_SIZE256;
181 medh->outstring = malloc(sizeof(char)*INITIAL_STRING_SIZE256);
182 medh->outstring_length = INITIAL_STRING_SIZE256;
183
184 medh->med_limit = MED_DEFAULT_LIMIT4;
185 medh->med_cutoff = MED_DEFAULT_CUTOFF15;
186 medh->med_max_heap_size = MED_DEFAULT_MAX_HEAP_SIZE262145;
187 return(medh);
188}
189
190void apply_med_set_heap_max(struct apply_med_handle *medh, int max) {
191 if (medh != NULL((void*)0)) {
192 medh->med_max_heap_size = max;
193 }
194}
195
196void apply_med_set_align_symbol(struct apply_med_handle *medh, char *align) {
197 if (medh != NULL((void*)0)) {
198 medh->align_symbol = strdup(align);
199 }
200}
201
202void apply_med_set_med_limit(struct apply_med_handle *medh, int max) {
203 if (medh != NULL((void*)0)) {
204 medh->med_limit = max;
205 }
206}
207
208void apply_med_set_med_cutoff(struct apply_med_handle *medh, int max) {
209 if (medh != NULL((void*)0)) {
210 medh->med_cutoff = max;
211 }
212}
213
214int apply_med_get_cost(struct apply_med_handle *medh) {
215 return(medh->cost);
216}
217
218char *apply_med_get_instring(struct apply_med_handle *medh) {
219 return(medh->instring);
220}
221
222char *apply_med_get_outstring(struct apply_med_handle *medh) {
223 return(medh->outstring);
224}
225
226char *apply_med(struct apply_med_handle *medh, char *word) {
227
228 /* local ok: i, j, target, in, out, g, h, curr_node */
229 /* not ok: curr_ptr, curr_pos, lines, nummatches, nodes_expanded, curr_state */
230
231 int i, j, target, in, out, g, h, thisskip;
232
233 int delcost, subscost, inscost;
234 char temputf[5] ;
235
236 struct astarnode *curr_node;
237
238 delcost = subscost = inscost = 1;
239
240
241 if (word == NULL((void*)0)) {
1
Assuming 'word' is not equal to NULL
2
Taking false branch
242 goto resume;
243 }
244
245 medh->word = word;
246
247 medh->nodes_expanded = 0;
248 medh->astarcount = 1;
249 medh->heapcount = 0;
250
251 medh->wordlen = strlen(word);
252 medh->utf8len = utf8strlen(word);
253 if (medh->intword != NULL((void*)0)) {
3
Assuming field 'intword' is equal to NULL
4
Taking false branch
254 free(medh->intword);
255 }
256 medh->intword = malloc(sizeof(int)*(medh->utf8len+1));
5
Storing uninitialized value
257
258 /* intword -> sigma numbers of word */
259 for (i=0, j=0; i < medh->wordlen; i += thisskip, j++) {
6
Assuming 'i' is >= field 'wordlen'
7
Loop condition is false. Execution continues on line 272
260 thisskip = utf8skip(word+i)+1;
261 strncpy(temputf, word+i, thisskip);
262 temputf[thisskip] = '\0';
263 if (sh_find_string(medh->sigmahash, temputf) != NULL((void*)0)) {
264 *(medh->intword+j) = sh_get_value(medh->sigmahash);
265 } else {
266 *(medh->intword+j) = IDENTITY2;
267 }
268 }
269
270
271
272 *(medh->intword+j) = -1; /* sentinel */
273
274 /* Insert (0,0) g = 0 */
275
276 h = calculate_h(medh, medh->intword, 0, 0);
8
Calling 'calculate_h'
10
Returning from 'calculate_h'
277
278 /* Root node */
279
280 if (!node_insert(medh,0,0,0,h,0,0,-1)) { goto out; }
11
Taking false branch
281 medh->nummatches = 0;
282
283 for(;;) {
12
Loop condition is true. Entering loop body
284
285 curr_node = node_delete_min(medh);
286 /* Need to save this in case we realloc and print_match() */
287 medh->curr_agenda_offset = curr_node-medh->agenda;
288 if (curr_node
12.1
'curr_node' is not equal to NULL
== NULL((void*)0)) {
13
Taking false branch
289 //printf("Reached cutoff of %i.\n", medh->med_cutoff);
290 goto out;
291 }
292 medh->curr_state = curr_node->fsmstate;
293 medh->curr_ptr = (medh->state_array+medh->curr_state)->transitions;
294 if (!medh->curr_ptr->final_state || !(curr_node->wordpos == medh->utf8len)) {
14
Assuming field 'final_state' is 0
295 //continue;
296 }
297
298 medh->nodes_expanded++;
299
300 if (curr_node->f > medh->med_cutoff) {
15
Assuming field 'f' is <= field 'med_cutoff'
16
Taking false branch
301 //printf("Reached cutoff of %i\n", medh->med_cutoff);
302 goto out;
303 }
304
305 medh->curr_pos = curr_node->wordpos;
306 medh->curr_state = curr_node->fsmstate;
307 medh->curr_g = curr_node->g;
308
309 medh->lines = 0;
310 medh->curr_node_has_match = 0;
311
312 for (medh->curr_ptr = (medh->state_array+medh->curr_state)->transitions ; ;) {
17
Loop condition is true. Entering loop body
313 if (medh->curr_ptr->state_no == -1) {
18
Assuming the condition is false
19
Taking false branch
314 break;
315 }
316 medh->lines++;
317 if (medh->curr_ptr->final_state
19.1
Field 'final_state' is 0
&& medh->curr_pos == medh->utf8len) {
318 if (medh->curr_node_has_match == 0) {
319 /* Found a match */
320 medh->curr_node_has_match = 1;
321 print_match(medh, medh->agenda+medh->curr_agenda_offset, medh->net->sigma, medh->word);
322 medh->nummatches++;
323 return(medh->outstring);
324 }
325 }
326
327 resume:
328
329 if (medh->nummatches == medh->med_limit) {
20
Assuming field 'nummatches' is not equal to field 'med_limit'
330 goto out;
331 }
332
333 if (medh->curr_ptr->target == -1 && medh->curr_pos == medh->utf8len)
21
Assuming the condition is true
22
Assuming field 'curr_pos' is not equal to field 'utf8len'
334 break;
335 if (medh->curr_ptr->target == -1 && medh->lines
22.1
Field 'lines' is equal to 1
== 1)
23
Taking true branch
336 goto insert;
24
Control jumps to line 380
337 if (medh->curr_ptr->target == -1)
338 break;
339
340 target = medh->curr_ptr->target;
341 /* Add nodes to edge:0, edge:input, 0:edge */
342
343 /* Delete a symbol from input */
344 in = medh->curr_ptr->in;
345 out = 0;
346 g = medh->hascm ? medh->curr_g + *(medh->cm+in*medh->maxsigma) : medh->curr_g + delcost;
347 h = calculate_h(medh, medh->intword, medh->curr_pos, medh->curr_ptr->target);
348
349 if ((medh->curr_pos == medh->utf8len) && (medh->curr_ptr->final_state == 0) && (h == 0)) {
350 // h = 1;
351 }
352
353 if (g+h <= medh->med_cutoff) {
354 if (!node_insert(medh, medh->curr_pos, target, g, h, in, out, medh->curr_agenda_offset)) {
355 goto out;
356 }
357 }
358 if (medh->curr_pos == medh->utf8len)
359 goto skip;
360
361 /* Match/substitute */
362 in = medh->curr_ptr->in;
363 out = *(medh->intword+medh->curr_pos);
364 if (in != out) {
365 g = medh->hascm ? medh->curr_g + *(medh->cm+in*medh->maxsigma+out) : medh->curr_g + subscost;
366 } else {
367 g = medh->curr_g;
368 }
369
370 h = calculate_h(medh, medh->intword, medh->curr_pos+1, medh->curr_ptr->target);
371 if ((g+h) <= medh->med_cutoff) {
372 if (!node_insert(medh,medh->curr_pos+1, target, g, h, in, out, medh->curr_agenda_offset)) {
373 goto out;
374 }
375 }
376 insert:
377 /* Insert a symbol into input */
378 /* Can only be done once per state */
379
380 if (medh->lines
24.1
Field 'lines' is equal to 1
== 1) {
25
Taking true branch
381
382 in = 0;
383 out = *(medh->intword+medh->curr_pos);
384
385 g = medh->hascm ? medh->curr_g + *(medh->cm+out) : medh->curr_g + inscost;
26
Assuming field 'hascm' is false
27
'?' condition is false
386 h = calculate_h(medh, medh->intword, medh->curr_pos+1, medh->curr_state);
28
Calling 'calculate_h'
387
388 if (g+h <= medh->med_cutoff)
389 if (!node_insert(medh,medh->curr_pos+1, medh->curr_state, g, h, in, out, medh->curr_agenda_offset)) {
390 goto out;
391 }
392 }
393 if (medh->curr_ptr->target == -1)
394 break;
395 skip:
396 if ((medh->curr_ptr+1)->state_no == (medh->curr_ptr)->state_no)
397 medh->curr_ptr++;
398 else
399 break;
400 }
401 }
402 out:
403 return(NULL((void*)0));
404}
405
406int calculate_h(struct apply_med_handle *medh, int *intword, int currpos, int state) {
407 int i, j, hinf, hn, curr_sym;
408 uint8_t *bitptr, *nbitptr;
409 hinf = 0;
410 hn = 0;
411
412 bitptr = state*medh->bytes_per_letter_array + medh->letterbits;
413 nbitptr = state*medh->bytes_per_letter_array + medh->nletterbits;
414
415 /* For n = inf */
416 if (*(intword+currpos) == -1)
9
Taking true branch
29
The left operand of '==' is a garbage value
417 return 0;
418 for (i = currpos; *(intword+i) != -1; i++) {
419 curr_sym = *(intword+i);
420 if (!BITTEST(bitptr, curr_sym)((bitptr)[((curr_sym) >> 3)] & (1 << ((curr_sym
) & 7)))
) {
421 hinf++;
422 }
423 }
424 /* For n = maxdepth */
425 if (*(intword+currpos) == -1)
426 return 0;
427 for (i = currpos, j = 0; j < medh->maxdepth && *(intword+i) != -1; i++, j++) {
428 curr_sym = *(intword+i);
429 if (!BITTEST(nbitptr, curr_sym)((nbitptr)[((curr_sym) >> 3)] & (1 << ((curr_sym
) & 7)))
) {
430 hn++;
431 }
432 }
433 return(hinf > hn ? hinf : hn);
434}
435
436struct astarnode *node_delete_min(struct apply_med_handle *medh) {
437 int i, child;
438 struct astarnode *firstptr, *lastptr;
439 if (medh->heapcount == 0) {
440 return NULL((void*)0);
441 }
442
443 /* We find the min from the heap */
444
445 firstptr = medh->agenda+medh->heap[1];
446 lastptr = medh->agenda+medh->heap[medh->heapcount];
447 medh->heapcount--;
448
449 /* Adjust heap */
450 for (i = 1; (i<<1) <= medh->heapcount; i = child) {
451 child = i<<1;
452
453 /* If right child is smaller (higher priority) than left child */
454 if (child != medh->heapcount &&
455 ((medh->agenda+medh->heap[child+1])->f < (medh->agenda+medh->heap[child])->f ||
456 ((medh->agenda+medh->heap[child+1])->f <= (medh->agenda+medh->heap[child])->f &&
457 (medh->agenda+medh->heap[child+1])->wordpos > (medh->agenda+medh->heap[child])->wordpos))) {
458 child++;
459 }
460
461 /* If child has lower priority than last element */
462 if ((medh->agenda+medh->heap[child])->f < lastptr->f ||
463 ((medh->agenda+medh->heap[child])->f <= lastptr->f &&
464 (medh->agenda+medh->heap[child])->wordpos > lastptr->wordpos)) {
465
466 medh->heap[i] = medh->heap[child];
467 } else {
468 break;
469 }
470 }
471 medh->heap[i] = (lastptr-medh->agenda);
472 return(firstptr);
473}
474
475int node_insert(struct apply_med_handle *medh, int wordpos, int fsmstate, int g, int h, int in, int out, int parent) {
476 int i,j,f;
477 /* We add the node in the array */
478 i = medh->astarcount;
479 if (i >= (medh->agenda_size-1)) {
480 if (medh->agenda_size*2 >= medh->med_max_heap_size) {
481 //printf("heap limit reached by %i\n", medh->med_max_heap_size);
482 return 0;
483 }
484 medh->agenda_size *= 2;
485 medh->agenda = realloc(medh->agenda, sizeof(struct astarnode)*medh->agenda_size);
486 }
487 f = g + h;
488 (medh->agenda+i)->wordpos = wordpos;
489 (medh->agenda+i)->fsmstate = fsmstate;
490 (medh->agenda+i)->f = f;
491 (medh->agenda+i)->g = g;
492 (medh->agenda+i)->h = h;
493 (medh->agenda+i)->in = in;
494 (medh->agenda+i)->out = out;
495 (medh->agenda+i)->parent = parent;
496 medh->astarcount++;
497
498 /* We also put the ptr on the heap */
499 medh->heapcount++;
500
501 if (medh->heapcount == medh->heap_size-1) {
502 medh->heap = realloc(medh->heap, sizeof(int)*medh->heap_size*2);
503 medh->heap_size *= 2;
504 }
505 /* >= makes fifo */
506 // for (j = heapcount; (agenda+heap[j/2])->f > f; j /= 2) {
507 for (j = medh->heapcount; ( (medh->agenda+medh->heap[j>>1])->f > f) || ((medh->agenda+medh->heap[j>>1])->f >= f && (medh->agenda+medh->heap[j>>1])->wordpos <= wordpos) ; j = j >> 1) {
508 medh->heap[j] = medh->heap[j>>1];
509 }
510 medh->heap[j] = i;
511 return 1;
512}
513
514
515void letterbits_union(int v, int vp, uint8_t *ptr, int bytes_per_letter_array) {
516 int i;
517 uint8_t *vptr, *vpptr;
518 vptr = ptr+(v*bytes_per_letter_array);
519 vpptr = ptr+(vp*bytes_per_letter_array);
520 for (i=0; i < bytes_per_letter_array; i++) {
521 *(vptr+i) = *(vptr+i)|*(vpptr+i);
522 }
523}
524
525void letterbits_copy(int source, int target, uint8_t *ptr, int bytes_per_letter_array) {
526 int i;
527 uint8_t *sourceptr, *targetptr;
528 sourceptr = ptr+(source*bytes_per_letter_array);
529 targetptr = ptr+(target*bytes_per_letter_array);
530 for (i=0; i < bytes_per_letter_array; i++) {
531 *(targetptr+i) = *(sourceptr+i);
532 }
533}
534
535void letterbits_add(int v, int symbol, uint8_t *ptr, int bytes_per_letter_array) {
536 uint8_t *vptr;
537 vptr = ptr+(v*bytes_per_letter_array);
538 BITSET(vptr, symbol)((vptr)[((symbol) >> 3)] |= (1 << ((symbol) &
7)))
;
539}
540
541/* Creates, for each state, a list of symbols that can be matched */
542/* somewhere in subsequent paths (the case n = inf) */
543/* This is needed for h() of A*-search in approximate matching */
544/* This is done by a DFS where each state gets the properties */
545/* of the arcs it has as well as a copy of the properties of the target state */
546/* At the same time we keep track of the strongly connected components */
547/* And copy the properties from each root of the SCC to the children */
548/* The SCC part is required for the algorithm to work with cyclic graphs */
549/* This is basically a modification of Tarjan's (1972) SCC algorithm */
550/* However, it's converted from recursive form to iterative form using */
551/* goto statements. Here's the original recursive specification: */
552
553/* Input: FSM = (V,E) */
554/* Output: bitvector v.letters for each state */
555/* index = 1 DFS node number counter */
556
557/* procedure Mark(v) */
558/* v.index = index Set the depth index for v */
559/* v.lowlink = index */
560/* index++ */
561/* push(v) Push v on the stack */
562/* forall edge = (v, v') in E do Consider successors of v */
563/* v.letters |= v'.letters | edge Copy target v'.letters */
564/* if (v'.index == 0) Was successor v' visited? */
565/* Mark(v') Recurse */
566/* v.lowlink = min(v.lowlink, v'.lowlink) */
567/* elseif (v' is on stack) Was successor v' in stack S? */
568/* v.lowlink = min(v.lowlink, v'.index) */
569/* if (v.lowlink == v.index) Is v the root of a SCC? */
570/* do */
571/* pop(v') */
572/* v'.letters = v.letters */
573/* while (v' != v) */
574
575/* For keeping track of the strongly connected components */
576/* when doing the DFS */
577
578struct sccinfo {
579 int index;
580 int lowlink;
581 int on_t_stack;
582};
583
584void fsm_create_letter_lookup(struct apply_med_handle *medh, struct fsm *net) {
585
586 int num_states, num_symbols, index, v, vp, copystate, i, j;
587 struct fsm_state *curr_ptr;
588 struct sccinfo *sccinfo;
589 int depth;
590 medh->maxdepth = 2;
591
592 num_states = net->statecount;
593 num_symbols = sigma_max(net->sigma);
594
595 medh->bytes_per_letter_array = BITNSLOTS(num_symbols+1)((num_symbols+1 + 8 - 1) / 8);
596 medh->letterbits = calloc(medh->bytes_per_letter_array*num_states,sizeof(uint8_t));
597 medh->nletterbits = calloc(medh->bytes_per_letter_array*num_states,sizeof(uint8_t));
598
599 sccinfo = calloc(num_states,sizeof(struct sccinfo));
600
601 index = 1;
602 curr_ptr = net->states;
603 goto l1;
604
605 /* Here we go again, converting a recursive algorithm to an iterative one */
606 /* by gotos */
607
608 while(!ptr_stack_isempty()) {
609
610 curr_ptr = ptr_stack_pop();
611
612 v = curr_ptr->state_no; /* source state number */
613 vp = curr_ptr->target; /* target state number */
614
615 /* T: v.letterlist = list_union(v'->list, current edge label) */
616
617 letterbits_union(v, vp, medh->letterbits,medh->bytes_per_letter_array); /* add v' target bits to v */
618 letterbits_add(v, curr_ptr->in, medh->letterbits,medh->bytes_per_letter_array); /* add current arc label to v */
619
620 (sccinfo+v)->lowlink = min_((sccinfo+v)->lowlink,(sccinfo+vp)->lowlink)(((sccinfo+v)->lowlink) < ((sccinfo+vp)->lowlink) ? (
(sccinfo+v)->lowlink) : ((sccinfo+vp)->lowlink))
;
621
622 if ((curr_ptr+1)->state_no != curr_ptr->state_no) {
623 goto l4;
624 } else {
625 goto l3;
626 }
627
628 l1:
629 v = curr_ptr->state_no;
630 vp = curr_ptr->target; /* target */
631 /* T: v.lowlink = index, index++, Tpush(v) */
632 (sccinfo+v)->index = index;
633 (sccinfo+v)->lowlink = index;
634 index++;
635 int_stack_push(v);
636 (sccinfo+v)->on_t_stack = 1;
637 /* if v' not visited (is v'.index set) */
638
639 /* If done here, check lowlink, pop */
640
641 if (vp == -1)
642 goto l4;
643 l2:
644 letterbits_add(v, curr_ptr->in, medh->letterbits,medh->bytes_per_letter_array);
645 if ((sccinfo+vp)->index == 0) {
646 /* push (v,e) ptr on stack */
647 ptr_stack_push(curr_ptr);
648 curr_ptr = (medh->state_array+(curr_ptr->target))->transitions;
649 /* (v,e) = (v',firstedge), goto init */
650 goto l1;
651 /* if v' visited */
652 /* T: v.lowlink = min(v.lowlink, v'.lowlink), union v.list with e, move to next edge in v, goto loop */
653 } else if ((sccinfo+vp)->on_t_stack) {
654 (sccinfo+v)->lowlink = min_((sccinfo+v)->lowlink,(sccinfo+vp)->lowlink)(((sccinfo+v)->lowlink) < ((sccinfo+vp)->lowlink) ? (
(sccinfo+v)->lowlink) : ((sccinfo+vp)->lowlink))
;
655 }
656 /* If node is visited, copy its bits */
657 letterbits_union(v,vp,medh->letterbits,medh->bytes_per_letter_array);
658
659 l3:
660 if ((curr_ptr+1)->state_no == curr_ptr->state_no) {
661 curr_ptr++;
662 v = curr_ptr->state_no;
663 vp = curr_ptr->target; /* target */
664 goto l2;
665 }
666
667
668 /* T: if lastedge, v.lowlink == v.index, pop Tstack until v is popped and copy v.list to others */
669 /* Copy all bits from root of SCC to descendants */
670 l4:
671 if ((sccinfo+v)->lowlink == (sccinfo+v)->index) {
672 //printf("\nSCC: [%i] ",v);
673 while((copystate = int_stack_pop()) != v) {
674 (sccinfo+copystate)->on_t_stack = 0;
675 letterbits_copy(v, copystate, medh->letterbits, medh->bytes_per_letter_array);
676 //printf("%i ", copystate);
677 }
678 (sccinfo+v)->on_t_stack = 0;
679 //printf("\n");
680 }
681 }
682
683 for (i=0; i < num_states; i++) {
684 //printf("State %i: ",i);
685 for (j=0; j <= num_symbols; j++) {
686 if (BITTEST(medh->letterbits+(i*medh->bytes_per_letter_array),j)((medh->letterbits+(i*medh->bytes_per_letter_array))[((
j) >> 3)] & (1 << ((j) & 7)))
) {
687 //printf("[%i]",j);
688 }
689 }
690 //printf("\n");
691 }
692 int_stack_clear();
693
694 /* We do the same thing for some finite n (up to maxdepth) */
695 /* and store the result in nletterbits */
696
697 for (v=0; v < num_states; v++) {
698 ptr_stack_push((medh->state_array+v)->transitions);
699 int_stack_push(0);
700 while (!ptr_stack_isempty()) {
701 curr_ptr = ptr_stack_pop();
702 depth = int_stack_pop();
703 looper:
704 if (depth == medh->maxdepth)
705 continue;
706 if (curr_ptr->in != -1) {
707 letterbits_add(v, curr_ptr->in, medh->nletterbits, medh->bytes_per_letter_array);
708 }
709 if (curr_ptr->target != -1) {
710 if (curr_ptr->state_no == (curr_ptr+1)->state_no) {
711 ptr_stack_push(curr_ptr+1);
712 int_stack_push(depth);
713 }
714 depth++;
715 curr_ptr = (medh->state_array+(curr_ptr->target))->transitions;
716 goto looper;
717 }
718 }
719 }
720 for (i=0; i < num_states; i++) {
721 //printf("State %i: ",i);
722 for (j=0; j <= num_symbols; j++) {
723 if (BITTEST(medh->nletterbits+(i*medh->bytes_per_letter_array),j)((medh->nletterbits+(i*medh->bytes_per_letter_array))[(
(j) >> 3)] & (1 << ((j) & 7)))
) {
724 //printf("[%i]",j);
725 }
726 }
727 //printf("\n");
728 }
729 free(sccinfo);
730}
731
732void cmatrix_print_att(struct fsm *net, FILE *outfile) {
733 int i, j, *cm, maxsigma;
734 maxsigma = sigma_max(net->sigma) + 1;
735 cm = net->medlookup->confusion_matrix;
736
737
738 for (i = 0; i < maxsigma ; i++) {
739 for (j = 0; j < maxsigma ; j++) {
740 if ((i != 0 && i < 3) || (j != 0 && j < 3)) { continue; }
741 if (i == 0 && j != 0) {
742 fprintf(outfile,"0\t0\t%s\t%s\t%i\n", "@0@", sigma_string(j, net->sigma), *(cm+i*maxsigma+j));
743 } else if (j == 0 && i != 0) {
744 fprintf(outfile,"0\t0\t%s\t%s\t%i\n", sigma_string(i,net->sigma), "@0@", *(cm+i*maxsigma+j));
745 } else if (j != 0 && i != 0) {
746 fprintf(outfile,"0\t0\t%s\t%s\t%i\n", sigma_string(i,net->sigma),sigma_string(j, net->sigma), *(cm+i*maxsigma+j));
747 }
748 }
749 }
750 fprintf(outfile,"0\n");
751}
752
753void cmatrix_print(struct fsm *net) {
754 int lsymbol, i, j, *cm, maxsigma;
755 char *thisstring;
756 struct sigma *sigma;
757 maxsigma = sigma_max(net->sigma) + 1;
758 cm = net->medlookup->confusion_matrix;
759
760 lsymbol = 0 ;
761 for (sigma = net->sigma; sigma != NULL((void*)0); sigma = sigma->next) {
762 if (sigma->number < 3)
763 continue;
764 lsymbol = strlen(sigma->symbol) > lsymbol ? strlen(sigma->symbol) : lsymbol;
765 }
766 printf("%*s",lsymbol+2,"");
767 printf("%s","0 ");
768
769 for (i = 3; ; i++) {
770 if ((thisstring = sigma_string(i, net->sigma)) != NULL((void*)0)) {
771 printf("%s ",thisstring);
772 } else {
773 break;
774 }
775 }
776
777 printf("\n");
778
779 for (i = 0; i < maxsigma ; i++) {
780 for (j = 0; j < maxsigma ; j++) {
781 if (j == 0) {
782 if (i == 0) {
783 printf("%*s",lsymbol+1,"0");
784 printf("%*s",2,"*");
785 } else {
786 printf("%*s",lsymbol+1, sigma_string(i, net->sigma));
787 printf("%*d",2,*(cm+i*maxsigma+j));
788 }
789 j++;
790 j++;
791 continue;
792 }
793 if (i == j) {
794 printf("%.*s",(int)strlen(sigma_string(j, net->sigma))+1,"*");
795 } else {
796 printf("%.*d",(int)strlen(sigma_string(j, net->sigma))+1,*(cm+i*maxsigma+j));
797 }
798 }
799 printf("\n");
800 if (i == 0) {
801 i++; i++;
802 }
803 }
804}
805
806void cmatrix_init(struct fsm *net) {
807 int maxsigma, *cm, i, j;
808 if (net->medlookup == NULL((void*)0)) {
809 net->medlookup = calloc(1,sizeof(struct medlookup));
810 }
811 maxsigma = sigma_max(net->sigma)+1;
812 cm = calloc(maxsigma*maxsigma, sizeof(int));
813 net->medlookup->confusion_matrix = cm;
814 for (i = 0; i < maxsigma; i++) {
815 for (j = 0; j < maxsigma; j++) {
816 if (i == j)
817 *(cm+i*maxsigma+j) = 0;
818 else
819 *(cm+i*maxsigma+j) = 1;
820 }
821 }
822}
823
824void cmatrix_default_substitute(struct fsm *net, int cost) {
825 int i, j, maxsigma, *cm;
826 cm = net->medlookup->confusion_matrix;
827 maxsigma = sigma_max(net->sigma)+1;
828 for (i = 1; i < maxsigma; i++) {
829 for (j = 1; j < maxsigma; j++) {
830 if (i == j) {
831 *(cm+i*maxsigma+j) = 0;
832 } else {
833 *(cm+i*maxsigma+j) = cost;
834 }
835 }
836 }
837}
838
839void cmatrix_default_insert(struct fsm *net, int cost) {
840 int i, maxsigma, *cm;
841 cm = net->medlookup->confusion_matrix;
842 maxsigma = sigma_max(net->sigma)+1;
843 for (i = 0; i < maxsigma; i++) {
844 *(cm+i) = cost;
845 }
846}
847
848void cmatrix_default_delete(struct fsm *net, int cost) {
849 int i, maxsigma, *cm;
850 cm = net->medlookup->confusion_matrix;
851 maxsigma = sigma_max(net->sigma)+1;
852 for (i = 0; i < maxsigma; i++) {
853 *(cm+i*maxsigma) = cost;
854 }
855}
856
857void cmatrix_set_cost(struct fsm *net, char *in, char *out, int cost) {
858 int i, o, maxsigma, *cm;
859 cm = net->medlookup->confusion_matrix;
860 maxsigma = sigma_max(net->sigma) + 1;
861 if (in == NULL((void*)0)) {
862 i = 0;
863 } else {
864 i = sigma_find(in, net->sigma);
865 }
866 if (out == NULL((void*)0)) {
867 o = 0;
868 } else {
869 o = sigma_find(out, net->sigma);
870 }
871 if (i == -1) {
872 printf("Warning, symbol '%s' not in alphabet\n",in);
873 return;
874 }
875 if (o == -1) {
876 printf("Warning, symbol '%s' not in alphabet\n",out);
877 return;
878 }
879 *(cm+i*maxsigma+o) = cost;
880}