Bug Summary

File:apply.c
Warning:line 338, column 15
Access to field 'value' results in a dereference of a null pointer (loaded from variable 'flist')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name apply.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -resource-dir /usr/lib/llvm-16/lib/clang/16 -D _GNU_SOURCE -D foma_shared_EXPORTS -I /tmp/build/foma/foma-0.10.0+g279~a2d32b38 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -std=c18 -fdebug-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -ferror-limit 19 -fvisibility=hidden -fgnuc-version=4.2.1 -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/foma/scan-build/2024-09-11-155945-2678-1 -x c /tmp/build/foma/foma-0.10.0+g279~a2d32b38/apply.c
1/* Foma: a finite-state toolkit and library. */
2/* Copyright © 2008-2021 Mans Hulden */
3
4/* This file is part of foma. */
5
6/* Licensed under the Apache License, Version 2.0 (the "License"); */
7/* you may not use this file except in compliance with the License. */
8/* You may obtain a copy of the License at */
9
10/* http://www.apache.org/licenses/LICENSE-2.0 */
11
12/* Unless required by applicable law or agreed to in writing, software */
13/* distributed under the License is distributed on an "AS IS" BASIS, */
14/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
15/* See the License for the specific language governing permissions and */
16/* limitations under the License. */
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <time.h>
21#include <string.h>
22#include <limits.h>
23#include "foma.h"
24
25#define RANDOM1 1
26#define ENUMERATE2 2
27#define MATCH4 4
28#define UP8 8
29#define DOWN16 16
30#define LOWER32 32
31#define UPPER64 64
32#define SPACE128 128
33
34#define FAIL0 0
35#define SUCCEED1 1
36
37#define DEFAULT_OUTSTRING_SIZE4096 4096
38#define DEFAULT_STACK_SIZE128 128
39
40#define APPLY_BINSEARCH_THRESHOLD10 10
41
42#define BITMASK(b)(1 << ((b) & 7)) (1 << ((b) & 7))
43#define BITSLOT(b)((b) >> 3) ((b) >> 3)
44#define BITSET(a,b)((a)[((b) >> 3)] |= (1 << ((b) & 7))) ((a)[BITSLOT(b)((b) >> 3)] |= BITMASK(b)(1 << ((b) & 7)))
45#define BITCLEAR(a,b)((a)[((b) >> 3)] &= ~(1 << ((b) & 7))) ((a)[BITSLOT(b)((b) >> 3)] &= ~BITMASK(b)(1 << ((b) & 7)))
46#define BITTEST(a,b)((a)[((b) >> 3)] & (1 << ((b) & 7))) ((a)[BITSLOT(b)((b) >> 3)] & BITMASK(b)(1 << ((b) & 7)))
47#define BITNSLOTS(nb)((nb + 8 - 1) / 8) ((nb + CHAR_BIT8 - 1) / CHAR_BIT8)
48
49
50
51static int apply_append(struct apply_handle *h, int cptr, int sym);
52static char *apply_net(struct apply_handle *h);
53static void apply_create_statemap(struct apply_handle *h,struct fsm *net);
54static void apply_create_sigarray(struct apply_handle *h,struct fsm *net);
55static void apply_create_sigmatch(struct apply_handle *h);
56int apply_match_length(struct apply_handle *h, int symbol);
57static int apply_match_str(struct apply_handle *h,int symbol, int position);
58static void apply_add_flag(struct apply_handle *h,char *name);
59static int apply_check_flag(struct apply_handle *h,int type, char *name, char *value);
60static void apply_clear_flags(struct apply_handle *h);
61void apply_set_iptr(struct apply_handle *h);
62void apply_mark_flagstates(struct apply_handle *h);
63void apply_clear_index(struct apply_handle *h);
64
65static void apply_stack_clear(struct apply_handle *h);
66static int apply_stack_isempty(struct apply_handle *h);
67static void apply_stack_pop (struct apply_handle *h);
68static void apply_stack_push (struct apply_handle *h, int vmark, char *sflagname, char *sflagvalue, int sflagneg);
69static void apply_force_clear_stack(struct apply_handle *h);
70
71
72void apply_set_obey_flags(struct apply_handle *h, int value) {
73 h->obey_flags = value;
74}
75
76void apply_set_show_flags(struct apply_handle *h, int value) {
77 h->show_flags = value;
78}
79
80void apply_set_print_space(struct apply_handle *h, int value) {
81 h->print_space = value;
82 h->space_symbol = strdup(" ");
83}
84
85void apply_set_separator(struct apply_handle *h, char *symbol) {
86 h->separator = strdup(symbol);
87}
88
89void apply_set_epsilon(struct apply_handle *h, char *symbol) {
90 free(h->epsilon_symbol);
91 h->epsilon_symbol = strdup(symbol);
92 (h->sigs+EPSILON0)->symbol = h->epsilon_symbol;
93 (h->sigs+EPSILON0)->length = strlen(h->epsilon_symbol);
94}
95
96void apply_set_space_symbol(struct apply_handle *h, char *space) {
97 h->space_symbol = strdup(space);
98 h->print_space = 1;
99}
100
101void apply_set_print_pairs(struct apply_handle *h, int value) {
102 h->print_pairs = value;
103}
104
105static void apply_force_clear_stack(struct apply_handle *h) {
106 /* Make sure stack is empty and marks reset */
107 if (!apply_stack_isempty(h)) {
12
Taking true branch
108 *(h->marks+(h->gstates+h->ptr)->state_no) = 0;
109 while (!apply_stack_isempty(h)) {
13
Loop condition is true. Entering loop body
110 apply_stack_pop(h);
14
Calling 'apply_stack_pop'
111 *(h->marks+(h->gstates+h->ptr)->state_no) = 0;
112 }
113 h->iterator = 0;
114 h->iterate_old = 0;
115 apply_stack_clear(h);
116 }
117}
118
119char *apply_enumerate(struct apply_handle *h) {
120
121 char *result = NULL((void*)0);
122
123 if (h->last_net == NULL((void*)0) || h->last_net->finalcount == 0) {
124 return (NULL((void*)0));
125 }
126 h->binsearch = 0;
127 if (h->iterator == 0) {
128 h->iterate_old = 0;
129 apply_force_clear_stack(h);
130 result = apply_net(h);
131 if ((h->mode & RANDOM1) != RANDOM1)
132 (h->iterator)++;
133 } else {
134 h->iterate_old = 1;
135 result = apply_net(h);
136 }
137 return(result);
138}
139
140char *apply_words(struct apply_handle *h) {
141 h->mode = DOWN16 + ENUMERATE2 + LOWER32 + UPPER64;
142 return(apply_enumerate(h));
143}
144
145char *apply_upper_words(struct apply_handle *h) {
146 h->mode = DOWN16 + ENUMERATE2 + UPPER64;
147 return(apply_enumerate(h));
148}
149
150char *apply_lower_words(struct apply_handle *h) {
151 h->mode = DOWN16 + ENUMERATE2 + LOWER32;
152 return(apply_enumerate(h));
153}
154
155char *apply_random_words(struct apply_handle *h) {
156 apply_clear_flags(h);
157 h->mode = DOWN16 + ENUMERATE2 + LOWER32 + UPPER64 + RANDOM1;
158 return(apply_enumerate(h));
159}
160
161char *apply_random_lower(struct apply_handle *h) {
162 apply_clear_flags(h);
163 h->mode = DOWN16 + ENUMERATE2 + LOWER32 + RANDOM1;
164 return(apply_enumerate(h));
165}
166
167char *apply_random_upper(struct apply_handle *h) {
168 apply_clear_flags(h);
169 h->mode = DOWN16 + ENUMERATE2 + UPPER64 + RANDOM1;
170 return(apply_enumerate(h));
171}
172
173/* Frees memory associated with applies */
174void apply_clear(struct apply_handle *h) {
175 struct sigma_trie_arrays *sta, *stap;
176 for (sta = h->sigma_trie_arrays; sta != NULL((void*)0); ) {
177 stap = sta;
178 free(sta->arr);
179 sta = sta->next;
180 free(stap);
181 }
182 h->sigma_trie_arrays = NULL((void*)0);
183 if (h->statemap != NULL((void*)0)) {
184 free(h->statemap);
185 h->statemap = NULL((void*)0);
186 }
187 if (h->numlines != NULL((void*)0)) {
188 free(h->numlines);
189 h->numlines = NULL((void*)0);
190 }
191 if (h->marks != NULL((void*)0)) {
192 free(h->marks);
193 h->marks = NULL((void*)0);
194 }
195 if (h->searchstack != NULL((void*)0)) {
196 free(h->searchstack);
197 h->searchstack = NULL((void*)0);
198 }
199 if (h->sigs != NULL((void*)0)) {
200 free(h->sigs);
201 h->sigs = NULL((void*)0);
202 }
203 if (h->flag_lookup != NULL((void*)0)) {
204 free(h->flag_lookup);
205 h->flag_lookup = NULL((void*)0);
206 }
207 if (h->sigmatch_array != NULL((void*)0)) {
208 free(h->sigmatch_array);
209 h->sigmatch_array = NULL((void*)0);
210 }
211 if (h->flagstates != NULL((void*)0)) {
212 free(h->flagstates);
213 h->flagstates = NULL((void*)0);
214 }
215 apply_clear_index(h);
216 h->last_net = NULL((void*)0);
217 h->iterator = 0;
218 free(h->outstring);
219 free(h->separator);
220 free(h->epsilon_symbol);
221 free(h);
222}
223
224char *apply_updown(struct apply_handle *h, char *word) {
225
226 char *result = NULL((void*)0);
227
228 if (h->last_net
5.1
Field 'last_net' is not equal to NULL
== NULL((void*)0) || h->last_net->finalcount == 0)
6
Assuming field 'finalcount' is not equal to 0
7
Taking false branch
229 return (NULL((void*)0));
230
231 if (word == NULL((void*)0)) {
8
Assuming 'word' is not equal to NULL
9
Taking false branch
232 h->iterate_old = 1;
233 result = apply_net(h);
234 }
235 else if (word
9.1
'word' is not equal to NULL
!= NULL((void*)0)) {
10
Taking true branch
236 h->iterate_old = 0;
237 h->instring = word;
238 apply_create_sigmatch(h);
239
240 /* Remove old marks if necessary TODO: only pop marks */
241 apply_force_clear_stack(h);
11
Calling 'apply_force_clear_stack'
242 result = apply_net(h);
243 }
244 return(result);
245}
246
247char *apply_down(struct apply_handle *h, char *word) {
248
249 h->mode = DOWN16;
250 if (h->index_in) {
251 h->indexed = 1;
252 } else {
253 h->indexed = 0;
254 }
255 h->binsearch = (h->last_net->arcs_sorted_in == 1) ? 1 : 0;
256 return(apply_updown(h, word));
257}
258
259char *apply_up(struct apply_handle *h, char *word) {
260
261 h->mode = UP8;
262 if (h->index_out) {
1
Assuming field 'index_out' is null
2
Taking false branch
263 h->indexed = 1;
264 } else {
265 h->indexed = 0;
266 }
267 h->binsearch = (h->last_net->arcs_sorted_out == 1) ? 1 : 0;
3
Assuming field 'arcs_sorted_out' is not equal to 1
4
'?' condition is false
268 return(apply_updown(h, word));
5
Calling 'apply_updown'
269}
270
271struct apply_handle *apply_init(struct fsm *net) {
272 struct apply_handle *h;
273
274 srand((unsigned int) time(NULL((void*)0)));
275 h = calloc(1,sizeof(struct apply_handle));
276 /* Init */
277
278 h->iterate_old = 0;
279 h->iterator = 0;
280 h->instring = NULL((void*)0);
281 h->flag_list = NULL((void*)0);
282 h->flag_lookup = NULL((void*)0);
283 h->obey_flags = 1;
284 h->show_flags = 0;
285 h->print_space = 0;
286 h->print_pairs = 0;
287 h->separator = strdup(":");
288 h->epsilon_symbol = strdup("0");
289 h->last_net = net;
290 h->outstring = malloc(sizeof(char)*DEFAULT_OUTSTRING_SIZE4096);
291 h->outstringtop = DEFAULT_OUTSTRING_SIZE4096;
292 *(h->outstring) = '\0';
293 h->gstates = net->states;
294 h->gsigma = net->sigma;
295 h->printcount = 1;
296 apply_create_statemap(h, net);
297 h->searchstack = malloc(sizeof(struct searchstack) * DEFAULT_STACK_SIZE128);
298 h->apply_stack_top = DEFAULT_STACK_SIZE128;
299 apply_stack_clear(h);
300 apply_create_sigarray(h, net);
301 return(h);
302}
303
304int apply_stack_isempty (struct apply_handle *h) {
305 if (h->apply_stack_ptr == 0) {
306 return 1;
307 }
308 return 0;
309}
310
311void apply_stack_clear (struct apply_handle *h) {
312 h->apply_stack_ptr = 0;
313}
314
315void apply_stack_pop (struct apply_handle *h) {
316 struct flag_list *flist;
317 struct searchstack *ss;
318 (h->apply_stack_ptr)--;
319 ss = h->searchstack+h->apply_stack_ptr;
320
321 h->iptr = ss->iptr;
322 h->ptr = ss->offset;
323 h->ipos = ss->ipos;
324 h->opos = ss->opos;
325 h->state_has_index = ss->state_has_index;
326 /* Restore mark */
327 *(h->marks+(h->gstates+h->ptr)->state_no) = ss->visitmark;
328
329 if (h->has_flags && ss->flagname != NULL((void*)0)) {
15
Assuming field 'has_flags' is not equal to 0
16
Assuming field 'flagname' is not equal to NULL
17
Taking true branch
330 /* Restore flag */
331 for (flist = h->flag_list; flist != NULL((void*)0); flist = flist->next) {
18
Value assigned to 'flist'
19
Assuming 'flist' is equal to NULL
20
Loop condition is false. Execution continues on line 336
332 if (strcmp(flist->name, ss->flagname) == 0) {
333 break;
334 }
335 }
336 if (flist
20.1
'flist' is equal to NULL
== NULL((void*)0))
21
Taking true branch
337 perror("***Nothing to pop\n");
338 flist->value = ss->flagvalue;
22
Access to field 'value' results in a dereference of a null pointer (loaded from variable 'flist')
339 flist->neg = ss->flagneg;
340 }
341}
342
343static void apply_stack_push (struct apply_handle *h, int vmark, char *sflagname, char *sflagvalue, int sflagneg) {
344 struct searchstack *ss;
345 if (h->apply_stack_ptr == h->apply_stack_top) {
346 h->searchstack = realloc(h->searchstack, sizeof(struct searchstack)* ((h->apply_stack_top)*2));
347 if (h->searchstack == NULL((void*)0)) {
348 perror("Apply stack full!!!\n");
349 exit(0);
350 }
351 h->apply_stack_top *= 2;
352 }
353 ss = h->searchstack+h->apply_stack_ptr;
354 ss->offset = h->curr_ptr;
355 ss->ipos = h->ipos;
356 ss->opos = h->opos;
357 ss->visitmark = vmark;
358 ss->iptr = h->iptr;
359 ss->state_has_index = h->state_has_index;
360 if (h->has_flags) {
361 ss->flagname = sflagname;
362 ss->flagvalue = sflagvalue;
363 ss->flagneg = sflagneg;
364 }
365 (h->apply_stack_ptr)++;
366}
367
368void apply_reset_enumerator(struct apply_handle *h) {
369 int statecount, i;
370 statecount = h->last_net->statecount;
371 for (i=0; i < statecount; i++) {
372 *(h->marks+i) = 0;
373 }
374 h->iterator = 0;
375 h->iterate_old = 0;
376}
377
378void apply_clear_index_list(struct apply_handle *h, struct apply_state_index **index) {
379 int i, j, statecount;
380 struct apply_state_index *iptr, *iptr_tmp, *iptr_zero;
381 if (index == NULL((void*)0))
382 return;
383 statecount = h->last_net->statecount;
384 for (i = 0; i < statecount; i++) {
385 iptr = *(index+i);
386 if (iptr == NULL((void*)0)) {
387 continue;
388 }
389 iptr_zero = *(index+i);
390 for (j = h->sigma_size - 1 ; j >= 0; j--) { /* Make sure to not free the list in EPSILON */
391 iptr = *(index+i) + j; /* as the other states lists' tails point to it */
392 for (iptr = iptr->next ; iptr != NULL((void*)0) && iptr != iptr_zero; iptr = iptr_tmp) {
393 iptr_tmp = iptr->next;
394 free(iptr);
395 }
396 }
397 free(*(index+i));
398 }
399}
400
401void apply_clear_index(struct apply_handle *h) {
402 if (h->index_in) {
403 apply_clear_index_list(h, h->index_in);
404 free(h->index_in);
405 h->index_in = NULL((void*)0);
406 }
407 if (h->index_out) {
408 apply_clear_index_list(h, h->index_out);
409 free(h->index_out);
410 h->index_out = NULL((void*)0);
411 }
412}
413
414void apply_index(struct apply_handle *h, int inout, int densitycutoff, int mem_limit, int flags_only) {
415 struct fsm_state *fsm;
416 unsigned int cnt = 0;
417 int i, j, maxtrans, numtrans, laststate, sym;
418 fsm = h->gstates;
419
420 struct apply_state_index **indexptr, *iptr, *tempiptr;
421
422 struct pre_index {
423 int state_no;
424 struct pre_index *next;
425 } *pre_index, *tp, *tpp;
426 if (flags_only && !h->has_flags) {
427 return;
428 }
429 /* get numtrans */
430 for (i=0, laststate = 0, maxtrans = 0, numtrans = 0; (fsm+i)->state_no != -1; i++) {
431 if ((fsm+i)->state_no != laststate) {
432 maxtrans = numtrans > maxtrans ? numtrans : maxtrans;
433 numtrans = 0;
434 }
435 if ((fsm+i)->target != -1) {
436 numtrans++;
437 }
438 laststate = (fsm+i)->state_no;
439 }
440
441 pre_index = calloc(maxtrans+1, sizeof(struct pre_index));
442 for (i = 0; i <= maxtrans; i++) {
443 (pre_index+i)->state_no = -1;
444 }
445
446 /* We create an array of states, indexed by how many transitions they have */
447 /* so that later, we can traverse them in order densest first, in case we */
448 /* only want to index to some predefined maximum memory usage. */
449
450 for (i = 0, laststate = 0, maxtrans = 0, numtrans = 0; (fsm+i)->state_no != -1; i++) {
451 if ((fsm+i)->state_no != laststate) {
452 if ((pre_index+numtrans)->state_no == -1) {
453 (pre_index+numtrans)->state_no = laststate;
454 } else {
455 tp = calloc(1, sizeof(struct pre_index));
456 tp->state_no = laststate;
457 tp->next = (pre_index+numtrans)->next;
458 (pre_index+numtrans)->next = tp;
459 }
460 maxtrans = numtrans > maxtrans ? numtrans : maxtrans;
461 numtrans = 0;
462 }
463 if ((fsm+i)->target != -1) {
464 numtrans++;
465 }
466 laststate = (fsm+i)->state_no;
467 }
468 indexptr = NULL((void*)0);
469 cnt += round_up_to_power_of_two(h->last_net->statecount*sizeof(struct apply_state_index *));
470
471 if (cnt > mem_limit) {
472 cnt -= round_up_to_power_of_two(h->last_net->statecount*sizeof(struct apply_state_index *));
473 goto memlimitnoindex;
474 }
475
476 indexptr = calloc(h->last_net->statecount, sizeof(struct apply_state_index *));
477
478 if (h->has_flags && flags_only) {
479 /* Mark states that have flags */
480 if (!(h->flagstates)) {
481 apply_mark_flagstates(h);
482 }
483 }
484
485 for (i = maxtrans; i >= 0; i--) {
486 for (tp = pre_index+i; tp != NULL((void*)0); tp = tp->next) {
487 if (tp->state_no >= 0) {
488 if (i < densitycutoff) {
489 if (!(h->has_flags && flags_only && BITTEST(h->flagstates, tp->state_no)((h->flagstates)[((tp->state_no) >> 3)] & (1 <<
((tp->state_no) & 7)))
)) {
490 continue;
491 }
492 }
493 cnt += round_up_to_power_of_two(h->sigma_size*sizeof(struct apply_state_index));
494 if (cnt > mem_limit) {
495 cnt -= round_up_to_power_of_two(h->sigma_size*sizeof(struct apply_state_index));
496 goto memlimit;
497 }
498 *(indexptr + tp->state_no) = malloc(h->sigma_size*sizeof(struct apply_state_index));
499
500 /* We make the tail of all index linked lists point to the index */
501 /* for EPSILON, so that we automatically when EPSILON transitions */
502 /* also when traversing an index. */
503
504 for (j = 0; j < h->sigma_size; j++) {
505 (*(indexptr + tp->state_no) + j)->fsmptr = -1;
506 if (j == EPSILON0)
507 (*(indexptr + tp->state_no) + j)->next = NULL((void*)0);
508 else
509 (*(indexptr + tp->state_no) + j)->next = (*(indexptr + tp->state_no)); /* all tails point to epsilon */
510 }
511 }
512 }
513 }
514
515 memlimit:
516
517 for (i=0; (fsm+i)->state_no != -1; i++) {
518 iptr = *(indexptr + (fsm+i)->state_no);
519 if (iptr == NULL((void*)0) || (fsm+i)->target == -1) {
520 continue;
521 }
522 sym = inout == APPLY_INDEX_INPUT1 ? (fsm+i)->in : (fsm+i)->out;
523
524 if (h->has_flags && (h->flag_lookup+sym)->type) {
525 sym = EPSILON0;
526 }
527 if (sym == UNKNOWN1) { /* We make the index of UNKNOWN point to IDENTITY */
528 sym = IDENTITY2; /* since these are really the same symbol */
529 }
530 if ((iptr+sym)->fsmptr == -1) {
531 (iptr+sym)->fsmptr = i;
532 } else {
533 cnt += round_up_to_power_of_two(sizeof(struct apply_state_index));
534 tempiptr = calloc(1, sizeof(struct apply_state_index));
535
536 tempiptr->next = (iptr+sym)->next;
537 tempiptr->fsmptr = i;
538 (iptr+sym)->next = tempiptr;
539 }
540 }
541
542 /* Free preindex */
543
544 memlimitnoindex:
545
546 for (i = maxtrans; i >= 0; i--) {
547 for (tp = (pre_index+i)->next; tp != NULL((void*)0); tp = tpp) {
548 tpp = tp->next;
549 free(tp);
550 }
551 }
552 free(pre_index);
553
554 if (inout == APPLY_INDEX_INPUT1) {
555 h->index_in = indexptr;
556 } else {
557 h->index_out = indexptr;
558 }
559}
560
561int apply_binarysearch(struct apply_handle *h) {
562 int thisstate, nextsym, seeksym, thisptr, lastptr, midptr;
563
564 thisptr = h->curr_ptr = h->ptr;
565 nextsym = (((h->mode) & DOWN16) == DOWN16) ? (h->gstates+h->curr_ptr)->in : (h->gstates+h->curr_ptr)->out;
566 if (nextsym == EPSILON0)
567 return 1;
568 if (nextsym == -1)
569 return 0;
570 if (h->ipos >= h->current_instring_length) {
571 return 0;
572 }
573 seeksym = (h->sigmatch_array+h->ipos)->signumber;
574 if (seeksym == nextsym || (nextsym == UNKNOWN1 && seeksym == IDENTITY2))
575 return 1;
576
577 thisstate = (h->gstates+thisptr)->state_no;
578 lastptr = *(h->statemap+thisstate)+*(h->numlines+thisstate)-1;
579 thisptr++;
580
581 if (seeksym == IDENTITY2 || lastptr - thisptr < APPLY_BINSEARCH_THRESHOLD10) {
582 for ( ; thisptr <= lastptr; thisptr++) {
583 nextsym = (((h->mode) & DOWN16) == DOWN16) ? (h->gstates+thisptr)->in : (h->gstates+thisptr)->out;
584 if ((nextsym == seeksym) || (nextsym == UNKNOWN1 && seeksym == IDENTITY2)) {
585 h->curr_ptr = thisptr;
586 return 1;
587 }
588 if (nextsym > seeksym || nextsym == -1) {
589 return 0;
590 }
591 }
592 return 0;
593 }
594
595 for (;;) {
596 if (thisptr > lastptr) { return 0; }
597 midptr = (thisptr+lastptr)/2;
598 nextsym = (((h->mode) & DOWN16) == DOWN16) ? (h->gstates+midptr)->in : (h->gstates+midptr)->out;
599 if (seeksym < nextsym) {
600 lastptr = midptr - 1;
601 continue;
602 } else if (seeksym > nextsym) {
603 thisptr = midptr + 1;
604 continue;
605 } else {
606
607 while (((((h->mode) & DOWN16) == DOWN16) ? (h->gstates+(midptr-1))->in : (h->gstates+(midptr-1))->out) == seeksym) {
608 midptr--; /* Find first match in case of ties */
609 }
610 h->curr_ptr = midptr;
611 return 1;
612 }
613 }
614}
615
616int apply_follow_next_arc(struct apply_handle *h) {
617 char *fname, *fvalue;
618 int eatupi, eatupo, symin, symout, fneg;
619 int vcount, marksource, marktarget;
620
621 /* Here we follow three possible search strategies: */
622 /* (1) if the state in question has an index, we use that */
623 /* (2) if the state is binary searchable, we use that */
624 /* (3) otherwise we traverse arc-by-arc */
625 /* Condition (2) needs arcs to be sorted in the proper */
626 /* direction, and requires that the state be flag-free */
627 /* For those states that aren't flag-free, (3) is used */
628
629 if (h->state_has_index) {
630 for ( ; h->iptr != NULL((void*)0) && h->iptr->fsmptr != -1; ) {
631
632 h->ptr = h->curr_ptr = h->iptr->fsmptr;
633 if (((h->mode) & DOWN16) == DOWN16) {
634 symin = (h->gstates+h->curr_ptr)->in;
635 symout = (h->gstates+h->curr_ptr)->out;
636 } else {
637 symin = (h->gstates+h->curr_ptr)->out;
638 symout = (h->gstates+h->curr_ptr)->in;
639 }
640
641 marksource = *(h->marks+(h->gstates+h->ptr)->state_no);
642 marktarget = *(h->marks+(h->gstates+(*(h->statemap+(h->gstates+h->curr_ptr)->target)))->state_no);
643 eatupi = apply_match_length(h, symin);
644 if (!(eatupi == -1 || -1-(h->ipos)-eatupi == marktarget)) { /* input 2x EPSILON loop check */
645 if ((eatupi = apply_match_str(h, symin, h->ipos)) != -1) {
646 eatupo = apply_append(h, h->curr_ptr, symout);
647 if (h->obey_flags && h->has_flags && ((h->flag_lookup+symin)->type & (FLAG_UNIFY1|FLAG_CLEAR2|FLAG_POSITIVE16|FLAG_NEGATIVE8))) {
648 fname = (h->flag_lookup+symin)->name;
649 fvalue = h->oldflagvalue;
650 fneg = h->oldflagneg;
651 } else {
652 fname = fvalue = NULL((void*)0);
653 fneg = 0;
654 }
655 /* Push old position */
656 apply_stack_push(h, marksource, fname, fvalue, fneg);
657 h->ptr = *(h->statemap+(h->gstates+h->curr_ptr)->target);
658 h->ipos += eatupi;
659 h->opos += eatupo;
660 apply_set_iptr(h);
661 return 1;
662 }
663 }
664 h->iptr = h->iptr->next;
665 }
666 return 0;
667 } else if ((h->binsearch && !(h->has_flags)) || (h->binsearch && !(BITTEST(h->flagstates, (h->gstates+h->ptr)->state_no)((h->flagstates)[(((h->gstates+h->ptr)->state_no)
>> 3)] & (1 << (((h->gstates+h->ptr)->
state_no) & 7)))
))) {
668 for (;;) {
669 if (apply_binarysearch(h)) {
670 if (((h->mode) & DOWN16) == DOWN16) {
671 symin = (h->gstates+h->curr_ptr)->in;
672 symout = (h->gstates+h->curr_ptr)->out;
673 } else {
674 symin = (h->gstates+h->curr_ptr)->out;
675 symout = (h->gstates+h->curr_ptr)->in;
676 }
677
678 marksource = *(h->marks+(h->gstates+h->ptr)->state_no);
679 marktarget = *(h->marks+(h->gstates+(*(h->statemap+(h->gstates+h->curr_ptr)->target)))->state_no);
680
681 eatupi = apply_match_length(h, symin);
682 if (eatupi != -1 && -1-(h->ipos)-eatupi != marktarget) {
683 if ((eatupi = apply_match_str(h, symin, h->ipos)) != -1) {
684 eatupo = apply_append(h, h->curr_ptr, symout);
685
686 /* Push old position */
687 apply_stack_push(h, marksource, NULL((void*)0), NULL((void*)0), 0);
688
689 /* Follow arc */
690 h->ptr = *(h->statemap+(h->gstates+h->curr_ptr)->target);
691 h->ipos += eatupi;
692 h->opos += eatupo;
693 apply_set_iptr(h);
694 return 1;
695 }
696 }
697 if ((h->gstates+h->curr_ptr)->state_no == (h->gstates+h->curr_ptr+1)->state_no) {
698 h->curr_ptr++;
699 h->ptr = h->curr_ptr;
700 if ((h->gstates+h->curr_ptr)-> target == -1) {
701 return 0;
702 }
703 continue;
704 }
705 }
706 return 0;
707 }
708 } else {
709 for (h->curr_ptr = h->ptr; (h->gstates+h->curr_ptr)->state_no == (h->gstates+h->ptr)->state_no && (h->gstates+h->curr_ptr)-> in != -1; (h->curr_ptr)++) {
710
711 /* Select one random arc to follow out of all outgoing arcs */
712 if ((h->mode & RANDOM1) == RANDOM1) {
713 vcount = 0;
714 for (h->curr_ptr = h->ptr; (h->gstates+h->curr_ptr)->state_no == (h->gstates+h->ptr)->state_no && (h->gstates+h->curr_ptr)-> in != -1; (h->curr_ptr)++) {
715 vcount++;
716 }
717 if (vcount > 0) {
718 h->curr_ptr = h->ptr + (rand() % vcount);
719 } else {
720 h->curr_ptr = h->ptr;
721 }
722 }
723
724 if (((h->mode) & DOWN16) == DOWN16) {
725 symin = (h->gstates+h->curr_ptr)->in;
726 symout = (h->gstates+h->curr_ptr)->out;
727 } else {
728 symin = (h->gstates+h->curr_ptr)->out;
729 symout = (h->gstates+h->curr_ptr)->in;
730 }
731
732 marksource = *(h->marks+(h->gstates+h->ptr)->state_no);
733 marktarget = *(h->marks+(h->gstates+(*(h->statemap+(h->gstates+h->curr_ptr)->target)))->state_no);
734
735 eatupi = apply_match_length(h, symin);
736
737 if (eatupi == -1 || -1-(h->ipos)-eatupi == marktarget) { continue; } /* loop check */
738 if ((eatupi = apply_match_str(h, symin, h->ipos)) != -1) {
739 eatupo = apply_append(h, h->curr_ptr, symout);
740 if (h->obey_flags && h->has_flags && ((h->flag_lookup+symin)->type & (FLAG_UNIFY1|FLAG_CLEAR2|FLAG_POSITIVE16|FLAG_NEGATIVE8))) {
741
742 fname = (h->flag_lookup+symin)->name;
743 fvalue = h->oldflagvalue;
744 fneg = h->oldflagneg;
745 } else {
746 fname = fvalue = NULL((void*)0);
747 fneg = 0;
748 }
749
750 /* Push old position */
751 apply_stack_push(h, marksource, fname, fvalue, fneg);
752
753 /* Follow arc */
754 h->ptr = *(h->statemap+(h->gstates+h->curr_ptr)->target);
755 h->ipos += eatupi;
756 h->opos += eatupo;
757 apply_set_iptr(h);
758 return(1);
759 }
760 }
761 return(0);
762 }
763}
764
765char *apply_return_string(struct apply_handle *h) {
766 /* Stick a 0 to endpos to avoid getting old accumulated gunk strings printed */
767 *(h->outstring+h->opos) = '\0';
768 if (((h->mode) & RANDOM1) == RANDOM1) {
769 /* To end or not to end */
770 if (!(rand() % 2)) {
771 apply_stack_clear(h);
772 h->iterator = 0;
773 h->iterate_old = 0;
774 return(h->outstring);
775 }
776 } else {
777 return(h->outstring);
778 }
779 return(NULL((void*)0));
780}
781
782void apply_mark_state(struct apply_handle *h) {
783
784 /* This controls the how epsilon-loops are traversed. Such loops can */
785 /* only be followed once to reach a state already visited in the DFS. */
786 /* This requires that we store the number of input symbols consumed */
787 /* whenever we enter a new state. If we enter the same state twice */
788 /* with the same number of input symbols consumed, we abandon the search */
789 /* for that branch. Flags are epsilons from this point of view. */
790 /* The encoding of h->marks is: */
791 /* 0 = unseen, +ipos = seen at ipos, -ipos = seen second time at ipos */
792
793 if ((h->mode & RANDOM1) != RANDOM1) {
794 if (*(h->marks+(h->gstates+h->ptr)->state_no) == h->ipos+1) {
795 *(h->marks+(h->gstates+h->ptr)->state_no) = -(h->ipos+1);
796 } else {
797 *(h->marks+(h->gstates+h->ptr)->state_no) = h->ipos+1;
798 }
799 }
800}
801
802void apply_skip_this_arc(struct apply_handle *h) {
803 /* If we have index ptr */
804 if (h->iptr) {
805 h->ptr = h->iptr->fsmptr;
806 h->iptr = h->iptr->next;
807 /* Otherwise */
808 } else {
809 (h->ptr)++;
810 }
811}
812
813int apply_at_last_arc(struct apply_handle *h) {
814 int seeksym, nextsym;
815 if (h->state_has_index) {
816 if (h->iptr->next == NULL((void*)0) || h->iptr->next->fsmptr == -1) {
817 return 1;
818 }
819 } else {
820 if ((h->binsearch && !(h->has_flags)) || (h->binsearch && !(BITTEST(h->flagstates, (h->gstates+h->ptr)->state_no)((h->flagstates)[(((h->gstates+h->ptr)->state_no)
>> 3)] & (1 << (((h->gstates+h->ptr)->
state_no) & 7)))
))) {
821 if ((h->gstates+h->ptr)->state_no != (h->gstates+h->ptr+1)->state_no) {
822 return 1;
823 }
824 seeksym = (h->sigmatch_array+h->ipos)->signumber;
825 nextsym = (((h->mode) & DOWN16) == DOWN16) ? (h->gstates+h->ptr)->in : (h->gstates+h->ptr)->out;
826 if (nextsym == -1 || seeksym < nextsym) {
827 return 1;
828 }
829 } else {
830 if ((h->gstates+h->ptr)->state_no != (h->gstates+h->ptr+1)->state_no) {
831 return 1;
832 }
833 }
834 }
835 return 0;
836}
837
838/* map h->ptr (line pointer) to h->iptr (index pointer) */
839void apply_set_iptr(struct apply_handle *h) {
840 struct apply_state_index **idx, *sidx;
841 int stateno, seeksym;
842 /* Check if state has index */
843 if ((idx = ((h->mode) & DOWN16) == DOWN16 ? (h->index_in) : (h->index_out)) == NULL((void*)0)) {
844 return;
845 }
846
847 h->iptr = NULL((void*)0);
848 h->state_has_index = 0;
849 stateno = (h->gstates+h->ptr)->state_no;
850 if (stateno < 0) {
851 return;
852 }
853
854 sidx = *(idx + stateno);
855 if (sidx == NULL((void*)0)) { return; }
856 seeksym = (h->sigmatch_array+h->ipos)->signumber;
857 h->state_has_index = 1;
858 sidx = sidx + seeksym;
859 if (sidx->fsmptr == -1) {
860 if (sidx->next == NULL((void*)0)) {
861 return;
862 } else {
863 sidx = sidx->next;
864 }
865 }
866 h->iptr = sidx;
867 if (sidx->fsmptr == -1) {
868 h->iptr = NULL((void*)0);
869 }
870 h->state_has_index = 1;
871}
872
873char *apply_net(struct apply_handle *h) {
874
875/* We perform a basic DFS on the graph, with two minor complications: */
876
877/* 1. We keep a mark for each state which indicates how many input symbols */
878/* we had consumed the last time we entered that state on the current */
879/* "run." If we reach a state seen twice without consuming input, we */
880/* terminate that branch of the search. */
881/* As we pop a position, we also unmark the state we came from. */
882
883/* 2. If the graph has flags, we push the previous flag value when */
884/* traversing a flag-modifying arc (P,U,N, or C). This is because a */
885/* flag may have been set during the previous "run" and may not apply. */
886/* Since we're doing a DFS, we can be sure to return to the previous */
887/* global flag state by just remembering that last flag change. */
888
889/* 3. The whole system needs to work as an iterator, meaning we need to */
890/* store the global state of the search so we can resume it later to */
891/* to yield more possible output words with the same input string. */
892
893 char *returnstring;
894
895 if (h->iterate_old == 1) { /* If called with NULL as the input word, this will be set */
896 goto resume;
897 }
898
899 h->iptr = NULL((void*)0); h->ptr = 0; h->ipos = 0; h->opos = 0;
900 apply_set_iptr(h);
901
902 apply_stack_clear(h);
903
904 if (h->has_flags) {
905 apply_clear_flags(h);
906 }
907
908 /* "The use of four-letter words like goto can occasionally be justified */
909 /* even in the best of company." Knuth (1974). */
910
911 goto L2;
912
913 while(!apply_stack_isempty(h)) {
914 apply_stack_pop(h);
915 /* If last line was popped */
916 if (apply_at_last_arc(h)) {
917 *(h->marks+(h->gstates+h->ptr)->state_no) = 0; /* Unmark */
918 continue; /* pop next */
919 }
920 apply_skip_this_arc(h); /* skip old pushed arc */
921 L1:
922 if (!apply_follow_next_arc(h)) {
923 *(h->marks+(h->gstates+h->ptr)->state_no) = 0; /* Unmark */
924 continue; /* pop next */
925 }
926 L2:
927 /* Print accumulated string upon entry to state */
928 if ((h->gstates+h->ptr)->final_state == 1 && (h->ipos == h->current_instring_length || ((h->mode) & ENUMERATE2) == ENUMERATE2)) {
929 if ((returnstring = (apply_return_string(h))) != NULL((void*)0)) {
930 return(returnstring);
931 }
932 }
933
934 resume:
935 apply_mark_state(h); /* Mark upon arrival to new state */
936 goto L1;
937 }
938 if ((h->mode & RANDOM1) == RANDOM1) {
939 apply_stack_clear(h);
940 h->iterator = 0;
941 h->iterate_old = 0;
942 return(h->outstring);
943 }
944 apply_stack_clear(h);
945 return NULL((void*)0);
946}
947
948int apply_append(struct apply_handle *h, int cptr, int sym) {
949
950 char *astring, *bstring, *pstring;
951 int symin, symout, len, alen, blen, idlen;
952
953 symin = (h->gstates+cptr)->in;
954 symout = (h->gstates+cptr)->out;
955 astring = ((h->sigs)+symin)->symbol;
956 alen = ((h->sigs)+symin)->length;
957 bstring = ((h->sigs)+symout)->symbol;
958 blen = ((h->sigs)+symout)->length;
959
960 while (alen + blen + h->opos + 2 + strlen(h->separator) >= h->outstringtop) {
961 // while (alen + blen + h->opos + 3 >= h->outstringtop) {
962 h->outstring = realloc(h->outstring, sizeof(char) * ((h->outstringtop) * 2));
963 (h->outstringtop) *= 2;
964 }
965
966 if ((h->has_flags) && !h->show_flags && (h->flag_lookup+symin)->type) {
967 astring = ""; alen = 0;
968 }
969 if (h->has_flags && !h->show_flags && (h->flag_lookup+symout)->type) {
970 bstring = ""; blen = 0;
971 }
972 if (((h->mode) & ENUMERATE2) == ENUMERATE2) {
973 /* Print both sides separated by colon */
974 /* if we're printing "words" */
975 if (((h->mode) & (UPPER64 | LOWER32)) == (UPPER64|LOWER32)) {
976
977 if (astring == bstring) {
978 strcpy(h->outstring+h->opos, astring);
979 len = alen;
980 } else {
981 strcpy(h->outstring+h->opos, astring);
982 // strcpy(h->outstring+h->opos+alen,":");
983 strcpy(h->outstring+h->opos+alen,h->separator);
984 //strcpy(h->outstring+h->opos+alen+1,bstring);
985 strcpy(h->outstring+h->opos+alen+strlen(h->separator),bstring);
986 // len = alen+blen+1;
987 len = alen+blen+strlen(h->separator);
988 }
989 }
990
991 /* Print one side only */
992 if (((h->mode) & (UPPER64|LOWER32)) != (UPPER64|LOWER32)) {
993
994 if (symin == EPSILON0) {
995 astring = ""; alen = 0;
996 }
997 if (symout == EPSILON0) {
998 bstring = ""; blen = 0;
999 }
1000 if (((h->mode) & (UPPER64|LOWER32)) == UPPER64) {
1001 pstring = astring;
1002 len = alen;
1003 } else {
1004 pstring = bstring;
1005 len = blen;
1006 }
1007 //strcpy(h->outstring+h->opos, pstring);
1008 memcpy(h->outstring+h->opos, pstring, len);
1009 }
1010 }
1011 if (((h->mode) & ENUMERATE2) != ENUMERATE2) {
1012 /* Print pairs is ON and symbols are different */
1013 if (h->print_pairs && (symin != symout)) {
1014
1015 if (symin == UNKNOWN1 && ((h->mode) & DOWN16) == DOWN16)
1016 strncpy(astring, h->instring+h->ipos, 1);
1017 if (symout == UNKNOWN1 && ((h->mode) & UP8) == UP8)
1018 strncpy(bstring, h->instring+h->ipos, 1);
1019 strcpy(h->outstring+h->opos, "<");
1020 strcpy(h->outstring+h->opos+1, astring);
1021 //strcpy(h->outstring+h->opos+alen+1,":");
1022 strcpy(h->outstring+h->opos+alen+1,h->separator);
1023 //strcpy(h->outstring+h->opos+alen+2,bstring);
1024 strcpy(h->outstring+h->opos+alen+1+strlen(h->separator), bstring);
1025 //strcpy(h->outstring+h->opos+alen+blen+2,">");
1026 strcpy(h->outstring+h->opos+alen+blen+1+strlen(h->separator),">");
1027 //len = alen+blen+3;
1028 len = alen+blen+2+strlen(h->separator);
1029 }
1030
1031 else if (sym == IDENTITY2) {
1032 /* Apply up/down */
1033 //idlen = utf8skip(h->instring+h->ipos)+1;
1034 idlen = (h->sigmatch_array+h->ipos)->consumes; // here
1035 strncpy(h->outstring+h->opos, h->instring+h->ipos, idlen);
1036 strncpy(h->outstring+h->opos+idlen,"", 1);
1037 len = idlen;
1038 } else if (sym == EPSILON0) {
1039 return(0);
1040 } else {
1041 if (((h->mode) & DOWN16) == DOWN16) {
1042 pstring = bstring;
1043 len = blen;
1044 } else {
1045 pstring = astring;
1046 len = alen;
1047 }
1048 memcpy(h->outstring+h->opos, pstring, len);
1049 }
1050 }
1051 if (h->print_space && len > 0) {
1052 strcpy(h->outstring+h->opos+len, h->space_symbol);
1053 len++;
1054 }
1055 return(len);
1056}
1057
1058int apply_match_length(struct apply_handle *h, int symbol) {
1059 if (symbol == EPSILON0) {
1060 return 0;
1061 }
1062 if (h->has_flags && (h->flag_lookup+symbol)->type) {
1063 return 0;
1064 }
1065 if (((h->mode) & ENUMERATE2) == ENUMERATE2) {
1066 return 0;
1067 }
1068 if (h->ipos >= h->current_instring_length) {
1069 return -1;
1070 }
1071 if ((h->sigmatch_array+(h->ipos))->signumber == symbol) {
1072 return((h->sigmatch_array+(h->ipos))->consumes);
1073 }
1074 if ((symbol == IDENTITY2) || (symbol == UNKNOWN1)) {
1075 if ((h->sigmatch_array+h->ipos)->signumber == IDENTITY2) {
1076 return((h->sigmatch_array+(h->ipos))->consumes);
1077 }
1078 }
1079 return -1;
1080}
1081
1082/* Match a symbol from sigma against the current position in string */
1083/* Return the number of symbols consumed in input string */
1084/* For flags, we consume 0 symbols of the input string, naturally */
1085
1086int apply_match_str(struct apply_handle *h, int symbol, int position) {
1087 if (((h->mode) & ENUMERATE2) == ENUMERATE2) {
1088 if (h->has_flags && (h->flag_lookup+symbol)->type) {
1089 if (!h->obey_flags) {
1090 return 0;
1091 }
1092 if (apply_check_flag(h,(h->flag_lookup+symbol)->type, (h->flag_lookup+symbol)->name, (h->flag_lookup+symbol)->value) == SUCCEED1) {
1093 return 0;
1094 } else {
1095 return -1;
1096 }
1097 }
1098 return(0);
1099 }
1100
1101
1102 if (symbol == EPSILON0) {
1103 return 0;
1104 }
1105
1106 /* If symbol is a flag, we need to check consistency */
1107 if (h->has_flags && (h->flag_lookup+symbol)->type) {
1108 if (!h->obey_flags) {
1109 return 0;
1110 }
1111 if (apply_check_flag(h,(h->flag_lookup+symbol)->type, (h->flag_lookup+symbol)->name, (h->flag_lookup+symbol)->value) == SUCCEED1) {
1112 return 0;
1113 } else {
1114 return -1;
1115 }
1116 }
1117
1118 if (position >= h->current_instring_length) {
1119 return -1;
1120 }
1121 if ((h->sigmatch_array+position)->signumber == symbol) {
1122 return((h->sigmatch_array+position)->consumes);
1123 }
1124 if ((symbol == IDENTITY2) || (symbol == UNKNOWN1)) {
1125 if ((h->sigmatch_array+position)->signumber == IDENTITY2) {
1126 return((h->sigmatch_array+position)->consumes);
1127 }
1128 }
1129 return -1;
1130}
1131
1132void apply_create_statemap(struct apply_handle *h, struct fsm *net) {
1133 int i;
1134 struct fsm_state *fsm;
1135 fsm = net->states;
1136 h->statemap = malloc(sizeof(int)*net->statecount);
1137 h->marks = malloc(sizeof(int)*net->statecount);
1138 h->numlines = malloc(sizeof(int)*net->statecount);
1139
1140 for (i=0; i < net->statecount; i++) {
1141 *(h->numlines+i) = 0; /* Only needed in binary search */
1142 *(h->statemap+i) = -1;
1143 *(h->marks+i) = 0;
1144 }
1145 for (i=0; (fsm+i)->state_no != -1; i++) {
1146 *(h->numlines+(fsm+i)->state_no) = *(h->numlines+(fsm+i)->state_no)+1;
1147 if (*(h->statemap+(fsm+i)->state_no) == -1) {
1148 *(h->statemap+(fsm+i)->state_no) = i;
1149 }
1150 }
1151}
1152
1153void apply_add_sigma_trie(struct apply_handle *h, int number, char *symbol, int len) {
1154
1155 /* Create a trie of sigma symbols (prefixes) so we can */
1156 /* quickly (in O(n)) tokenize an arbitrary string into */
1157 /* integer sequences representing symbols, using longest- */
1158 /* leftmost factorization. */
1159
1160 int i;
1161 struct sigma_trie *st;
1162 struct sigma_trie_arrays *sta;
1163
1164 st = h->sigma_trie;
1165 for (i = 0; i < len; i++) {
1166 st = st+(unsigned char)*(symbol+i);
1167 if (i == (len-1)) {
1168 st->signum = number;
1169 } else {
1170 if (st->next == NULL((void*)0)) {
1171 st->next = calloc(256,sizeof(struct sigma_trie));
1172 st = st->next;
1173 /* store these arrays to free them later */
1174 sta = malloc(sizeof(struct sigma_trie_arrays));
1175 sta->arr = st;
1176 sta->next = h->sigma_trie_arrays;
1177 h->sigma_trie_arrays = sta;
1178 } else {
1179 st = st->next;
1180 }
1181 }
1182 }
1183}
1184
1185void apply_mark_flagstates(struct apply_handle *h) {
1186 int i;
1187 struct fsm_state *fsm;
1188
1189 /* Create bitarray with those states that have a flag symbol on an arc */
1190 /* This is needed to decide whether we can perform a binary search. */
1191
1192 if (!h->has_flags || h->flag_lookup == NULL((void*)0)) {
1193 return;
1194 }
1195 if (h->flagstates) {
1196 free(h->flagstates);
1197 }
1198 h->flagstates = calloc(BITNSLOTS(h->last_net->statecount)((h->last_net->statecount + 8 - 1) / 8), sizeof(uint8_t));
1199 fsm = h->last_net->states;
1200 for (i=0; (fsm+i)->state_no != -1; i++) {
1201 if ((fsm+i)->target == -1) {
1202 continue;
1203 }
1204 if ((h->flag_lookup+(fsm+i)->in)->type) {
1205 BITSET(h->flagstates,(fsm+i)->state_no)((h->flagstates)[(((fsm+i)->state_no) >> 3)] |= (
1 << (((fsm+i)->state_no) & 7)))
;
1206 }
1207 if ((h->flag_lookup+(fsm+i)->out)->type) {
1208 BITSET(h->flagstates,(fsm+i)->state_no)((h->flagstates)[(((fsm+i)->state_no) >> 3)] |= (
1 << (((fsm+i)->state_no) & 7)))
;
1209 }
1210 }
1211}
1212
1213void apply_create_sigarray(struct apply_handle *h, struct fsm *net) {
1214 struct sigma *sig;
1215 int i, maxsigma;
1216
1217 maxsigma = sigma_max(net->sigma);
1218 h->sigma_size = maxsigma+1;
1219 // Default size created at init, resized later if necessary
1220 h->sigmatch_array = calloc(1024,sizeof(struct sigmatch_array));
1221 h->sigmatch_array_size = 1024;
1222
1223 h->sigs = malloc(sizeof(struct sigs)*(maxsigma+1));
1224 h->has_flags = 0;
1225 h->flag_list = NULL((void*)0);
1226
1227 /* Malloc first array of trie and store trie ptrs to be able to free later */
1228 /* when apply_clear() is called. */
1229
1230 h->sigma_trie = calloc(256,sizeof(struct sigma_trie));
1231 h->sigma_trie_arrays = malloc(sizeof(struct sigma_trie_arrays));
1232 h->sigma_trie_arrays->arr = h->sigma_trie;
1233 h->sigma_trie_arrays->next = NULL((void*)0);
1234
1235 for (i=0;i<256;i++)
1236 (h->sigma_trie+i)->next = NULL((void*)0);
1237 for (sig = h->gsigma; sig != NULL((void*)0) && sig->number != -1; sig = sig->next) {
1238 if (flag_check(sig->symbol)) {
1239 h->has_flags = 1;
1240 apply_add_flag(h, flag_get_name(sig->symbol));
1241 }
1242 (h->sigs+(sig->number))->symbol = sig->symbol;
1243 (h->sigs+(sig->number))->length = strlen(sig->symbol);
1244 /* Add sigma entry to trie */
1245 if (sig->number > IDENTITY2) {
1246 apply_add_sigma_trie(h, sig->number, sig->symbol, (h->sigs+(sig->number))->length);
1247 }
1248 }
1249 if (maxsigma >= IDENTITY2) {
1250 (h->sigs+EPSILON0)->symbol = h->epsilon_symbol;
1251 (h->sigs+EPSILON0)->length = strlen(h->epsilon_symbol);
1252 (h->sigs+UNKNOWN1)->symbol = "?";
1253 (h->sigs+UNKNOWN1)->length = 1;
1254 (h->sigs+IDENTITY2)->symbol = "@";
1255 (h->sigs+IDENTITY2)->length = 1;
1256 }
1257 if (h->has_flags) {
1258
1259 h->flag_lookup = malloc(sizeof(struct flag_lookup)*(maxsigma+1));
1260 for (i=0; i <= maxsigma; i++) {
1261 (h->flag_lookup+i)->type = 0;
1262 (h->flag_lookup+i)->name = NULL((void*)0);
1263 (h->flag_lookup+i)->value = NULL((void*)0);
1264 }
1265 for (sig = h->gsigma; sig != NULL((void*)0) ; sig = sig->next) {
1266 if (flag_check(sig->symbol)) {
1267 (h->flag_lookup+sig->number)->type = flag_get_type(sig->symbol);
1268 (h->flag_lookup+sig->number)->name = flag_get_name(sig->symbol);
1269 (h->flag_lookup+sig->number)->value = flag_get_value(sig->symbol);
1270 }
1271 }
1272 apply_mark_flagstates(h);
1273 }
1274}
1275
1276/* We need to know which symbols in sigma we can match for all positions */
1277/* in the input string. Alternatively, if there is no input string as is the case */
1278/* when we just list the words or randomly search the graph, we can match */
1279/* any symbol in sigma. */
1280
1281/* We create an array that for each position in the input string */
1282/* has information on which symbol we can match at that position */
1283/* as well as how many symbols matching consumes */
1284
1285void apply_create_sigmatch(struct apply_handle *h) {
1286 char *symbol;
1287 struct sigma_trie *st;
1288 int i, j, inlen, lastmatch, consumes, cons;
1289 /* We create a sigmatch array only in case we match against a real string */
1290 if (((h->mode) & ENUMERATE2) == ENUMERATE2) {
1291 return;
1292 }
1293 symbol = h->instring;
1294 inlen = strlen(symbol);
1295 h->current_instring_length = inlen;
1296 if (inlen >= h->sigmatch_array_size) {
1297 free(h->sigmatch_array);
1298 h->sigmatch_array = malloc(sizeof(struct sigmatch_array)*(inlen));
1299 h->sigmatch_array_size = inlen;
1300 }
1301 /* Find longest match in alphabet at current position */
1302 /* by traversing the trie of alphabet symbols */
1303 for (i=0; i < inlen; i += consumes ) {
1304 st = h->sigma_trie;
1305 for (j=0, lastmatch = 0; ; j++) {
1306 if (*(symbol+i+j) == '\0') {
1307 break;
1308 }
1309 st = st+(unsigned char)*(symbol+i+j);
1310 if (st->signum != 0) {
1311 lastmatch = st->signum;
1312 if (st->next == NULL((void*)0))
1313 break;
1314 st = st->next;
1315 } else if (st->next != NULL((void*)0)) {
1316 st = st->next;
1317 } else {
1318 break;
1319 }
1320 }
1321 if (lastmatch != 0) {
1322 (h->sigmatch_array+i)->signumber = lastmatch;
1323 consumes = (h->sigs+lastmatch)->length;
1324 } else {
1325 /* Not found in trie */
1326 (h->sigmatch_array+i)->signumber = IDENTITY2;
1327 consumes = utf8skip(symbol+i)+1;
1328 }
1329
1330 /* If we now find trailing unicode combining characters (0300-036F): */
1331 /* (1) Merge them all with current symbol */
1332 /* (2) Declare the whole sequence one ? (IDENTITY) symbol */
1333 /* Step 2 is motivated by the fact that */
1334 /* if the input is S(symbol) + D(diacritic) */
1335 /* and SD were a symbol in the alphabet, then this would have been */
1336 /* found when searching the alphabet symbols earlier, so SD+ => ? */
1337 /* Note that this means that a multi-char symbol followed by a */
1338 /* diacritic gets converted to a single ?, e.g. */
1339 /* [TAG] + D => ? if [TAG] is in the alphabet, but [TAG]+D isn't. */
1340
1341 for ( ; (cons = utf8iscombining((unsigned char *)(symbol+i+consumes))); consumes += cons) {
1342 (h->sigmatch_array+i)->signumber = IDENTITY2;
1343 }
1344 (h->sigmatch_array+i)->consumes = consumes;
1345 }
1346}
1347
1348void apply_add_flag(struct apply_handle *h, char *name) {
1349 struct flag_list *flist, *flist_prev;
1350 if (h->flag_list == NULL((void*)0)) {
1351 flist = h->flag_list = malloc(sizeof(struct flag_list));
1352 } else {
1353 for (flist = h->flag_list; flist != NULL((void*)0); flist_prev = flist, flist = flist->next) {
1354 if (strcmp(flist->name, name) == 0) {
1355 return;
1356 }
1357 }
1358 flist = malloc(sizeof(struct flag_list));
1359 flist_prev->next = flist;
1360 }
1361 flist->name = name;
1362 flist->value = NULL((void*)0);
1363 flist->neg = 0;
1364 flist->next = NULL((void*)0);
1365 return;
1366}
1367
1368void apply_clear_flags(struct apply_handle *h) {
1369 struct flag_list *flist;
1370 for (flist = h->flag_list; flist != NULL((void*)0); flist = flist->next) {
1371 flist->value = NULL((void*)0);
1372 flist->neg = 0;
1373 }
1374 return;
1375}
1376
1377/* Check for flag consistency by looking at the current states of */
1378/* flags in flist */
1379
1380int apply_check_flag(struct apply_handle *h, int type, char *name, char *value) {
1381 struct flag_list *flist, *flist2;
1382 for (flist = h->flag_list; flist != NULL((void*)0); flist = flist->next) {
1383 if (strcmp(flist->name, name) == 0) {
1384 break;
1385 }
1386 }
1387 h->oldflagvalue = flist->value;
1388 h->oldflagneg = flist->neg;
1389
1390 if (type == FLAG_UNIFY1) {
1391 if (flist->value == NULL((void*)0)) {
1392 flist->value = strdup(value);
1393 return SUCCEED1;
1394 }
1395 else if (strcmp(value, flist->value) == 0 && flist->neg == 0) {
1396 return SUCCEED1;
1397 } else if (strcmp(value, flist->value) != 0 && flist->neg == 1) {
1398 flist->value = strdup(value);
1399 flist->neg = 0;
1400 return SUCCEED1;
1401 }
1402 return FAIL0;
1403 }
1404
1405 if (type == FLAG_CLEAR2) {
1406 flist->value = NULL((void*)0);
1407 flist->neg = 0;
1408 return SUCCEED1;
1409 }
1410
1411 if (type == FLAG_DISALLOW4) {
1412 if (flist->value == NULL((void*)0)) {
1413 return SUCCEED1;
1414 }
1415 if (value == NULL((void*)0) && flist->value != NULL((void*)0)) {
1416 return FAIL0;
1417 }
1418 if (strcmp(value, flist->value) != 0) {
1419 if (flist->neg == 1)
1420 return FAIL0;
1421 return SUCCEED1;
1422 }
1423 if (strcmp(value, flist->value) == 0 && flist->neg == 1) {
1424 return SUCCEED1;
1425 }
1426 return FAIL0;
1427 }
1428
1429 if (type == FLAG_NEGATIVE8) {
1430 flist->value = value;
1431 flist->neg = 1;
1432 return SUCCEED1;
1433 }
1434
1435 if (type == FLAG_POSITIVE16) {
1436 flist->value = value;
1437 flist->neg = 0;
1438 return SUCCEED1;
1439 }
1440
1441 if (type == FLAG_REQUIRE32) {
1442
1443 if (value == NULL((void*)0)) {
1444 if (flist->value == NULL((void*)0)) {
1445 return FAIL0;
1446 } else {
1447 return SUCCEED1;
1448 }
1449 } else {
1450 if (flist->value == NULL((void*)0)) {
1451 return FAIL0;
1452 }
1453 if (strcmp(value, flist->value) != 0) {
1454 return FAIL0;
1455 } else {
1456 if (flist->neg == 1) {
1457 return FAIL0;
1458 }
1459 return SUCCEED1;
1460 }
1461 }
1462 }
1463
1464 if (type == FLAG_EQUAL64) {
1465 for (flist2 = h->flag_list; flist2 != NULL((void*)0); flist2 = flist2->next) {
1466 if (strcmp(flist2->name, value) == 0) {
1467 break;
1468 }
1469 }
1470
1471 if (flist2 == NULL((void*)0) && flist->value != NULL((void*)0))
1472 return FAIL0;
1473 if (flist2 == NULL((void*)0) && flist->value == NULL((void*)0)) {
1474 return SUCCEED1;
1475 }
1476 if (flist2->value == NULL((void*)0) || flist->value == NULL((void*)0)) {
1477 if (flist2->value == NULL((void*)0) && flist->value == NULL((void*)0) && flist->neg == flist2->neg) {
1478 return SUCCEED1;
1479 } else {
1480 return FAIL0;
1481 }
1482 } else if (strcmp(flist2->value, flist->value) == 0 && flist->neg == flist2->neg) {
1483 return SUCCEED1;
1484 }
1485 return FAIL0;
1486 }
1487 fprintf(stderrstderr,"***Don't know what do with flag [%i][%s][%s]\n", type, name, value);
1488 return FAIL0;
1489}