Bug Summary

File:constructions.c
Warning:line 677, column 17
Access to field 'mainloop' results in a dereference of a null pointer (loaded from variable 'iptr')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name constructions.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -resource-dir /usr/lib/llvm-16/lib/clang/16 -D _GNU_SOURCE -D foma_shared_EXPORTS -I /tmp/build/foma/foma-0.10.0+g279~a2d32b38 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -std=c18 -fdebug-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -ferror-limit 19 -fvisibility=hidden -fgnuc-version=4.2.1 -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/foma/scan-build/2024-09-11-155945-2678-1 -x c /tmp/build/foma/foma-0.10.0+g279~a2d32b38/constructions.c
1/* Foma: a finite-state toolkit and library. */
2/* Copyright © 2008-2021 Mans Hulden */
3
4/* This file is part of foma. */
5
6/* Licensed under the Apache License, Version 2.0 (the "License"); */
7/* you may not use this file except in compliance with the License. */
8/* You may obtain a copy of the License at */
9
10/* http://www.apache.org/licenses/LICENSE-2.0 */
11
12/* Unless required by applicable law or agreed to in writing, software */
13/* distributed under the License is distributed on an "AS IS" BASIS, */
14/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
15/* See the License for the specific language governing permissions and */
16/* limitations under the License. */
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21#include "foma.h"
22
23#define KLEENE_STAR0 0
24#define KLEENE_PLUS1 1
25#define OPTIONALITY2 2
26
27#define COMPLEMENT0 0
28#define COMPLETE1 1
29
30#define STACK_3_PUSH(a,b,c)int_stack_push(a); int_stack_push(b); int_stack_push(c); int_stack_push(a); int_stack_push(b); int_stack_push(c);
31#define STACK_2_PUSH(a,b)int_stack_push(a); int_stack_push(b); int_stack_push(a); int_stack_push(b);
32
33struct mergesigma {
34 char *symbol;
35 unsigned char presence; /* 1 = in net 1, 2 = in net 2, 3 = in both */
36 int number;
37 struct mergesigma *next;
38};
39
40int sort_cmp(const void *a, const void *b) {
41 return (((const struct fsm_state *)a)->state_no - ((const struct fsm_state *)b)->state_no);
42}
43
44int add_fsm_arc(struct fsm_state *fsm, int offset, int state_no, int in, int out, int target, int final_state, int start_state);
45
46struct fsm *fsm_kleene_closure(struct fsm *net, int optionality);
47
48struct fsm *fsm_kleene_star(struct fsm *net) {
49 return (fsm_kleene_closure(net, KLEENE_STAR0));
50}
51
52struct fsm *fsm_kleene_plus(struct fsm *net) {
53 return (fsm_kleene_closure(net, KLEENE_PLUS1));
54}
55
56struct fsm *fsm_optionality(struct fsm *net) {
57 return (fsm_kleene_closure(net, OPTIONALITY2));
58}
59
60struct fsm *fsm_escape(char *symbol) {
61 return(fsm_symbol(symbol+1));
62}
63
64/* Convert a multicharacter-string-containing machine */
65/* to the equivalent "letter" machine where all arcs */
66/* are single utf8 letters. */
67
68struct fsm *fsm_letter_machine(struct fsm *net) {
69
70 struct fsm *outnet;
71 struct fsm_read_handle *inh;
72 struct fsm_construct_handle *outh;
73 int i, steps, source, target, addstate, innum, outnum, inlen, outlen;
74 char *in, *out, *currin, *currout, tmpin[128], tmpout[128];
75
76 inh = fsm_read_init(fsm_minimize(net));
77 outh = fsm_construct_init("name");
78 addstate = net->statecount;
79
80 while (fsm_get_next_arc(inh)) {
81 in = fsm_get_arc_in(inh);
82 out = fsm_get_arc_out(inh);
83 innum = fsm_get_arc_num_in(inh);
84 outnum = fsm_get_arc_num_out(inh);
85 source = fsm_get_arc_source(inh);
86 target = fsm_get_arc_target(inh);
87
88 if (((innum > IDENTITY2) && utf8strlen(in) > 1) || ((outnum > IDENTITY2) && utf8strlen(out) > 1)) {
89 inlen = innum <= IDENTITY2 ? 1 : utf8strlen(in);
90 outlen = outnum <= IDENTITY2 ? 1 : utf8strlen(out);
91 steps = inlen > outlen ? inlen : outlen;
92
93 target = addstate;
94 for (i = 0; i < steps; i++) {
95 if (innum <= IDENTITY2 || inlen < 1) {
96 if (inlen < 1)
97 currin = "@_EPSILON_SYMBOL_@";
98 else
99 currin = in;
100 } else {
101 strncpy(tmpin, in, utf8skip(in)+1);
102 *(tmpin+utf8skip(in)+1) = '\0';
103 currin = tmpin;
104 inlen--;
105 in = in+utf8skip(in)+1;
106 }
107 if (outnum <= IDENTITY2 || outlen < 1) {
108 if (outlen < 1)
109 currout = "@_EPSILON_SYMBOL_@";
110 else
111 currout = out;
112 } else {
113 strncpy(tmpout, out, utf8skip(in)+1);
114 *(tmpout+utf8skip(out)+1) = '\0';
115 currout = tmpout;
116 out = out+utf8skip(out)+1;
117 outlen--;
118 }
119 if (i == 0 && steps > 1) {
120 target = addstate;
121 addstate++;
122 }
123 if (i > 0 && (steps-i == 1)) {
124 source = addstate - 1;
125 target = fsm_get_arc_target(inh);
126 }
127 if (i > 0 && (steps-i != 1)) {
128 source = addstate-1;
129 target = addstate;
130 addstate++;
131 }
132 fsm_construct_add_arc(outh,source,target,currin,currout);
133 }
134 } else {
135 fsm_construct_add_arc(outh,source,target,in,out);
136 }
137 }
138 while ((i = fsm_get_next_final(inh)) != -1) {
139 fsm_construct_set_final(outh, i);
140 }
141 while ((i = fsm_get_next_initial(inh)) != -1) {
142 fsm_construct_set_initial(outh, i);
143 }
144 fsm_read_done(inh);
145 outnet = fsm_construct_done(outh);
146 return(outnet);
147}
148
149struct fsm *fsm_explode(char *symbol) {
150 struct fsm *net;
151 struct fsm_construct_handle *h;
152 char *tempstring;
153 int length, i, j, skip;
154
155 h = fsm_construct_init("");
156 fsm_construct_set_initial(h,0);
157
158 length = strlen(symbol)-2;
159 for (i=1, j=1; i <= length; i += skip, j++) {
160 skip = utf8skip(symbol+i)+1;
161 tempstring = xxstrndup(((symbol+i)),skip);
162 fsm_construct_add_arc(h,j-1,j,tempstring,tempstring);
163 free(tempstring);
164 }
165 fsm_construct_set_final(h, j-1);
166 net = fsm_construct_done(h);
167 return(net);
168}
169
170struct fsm *fsm_symbol(char *symbol) {
171 struct fsm *net;
172 int symbol_no;
173
174 net = fsm_create("");
175 fsm_update_flags(net, YES1, YES1, YES1, YES1, YES1, NO0);
176 if (strcmp(symbol,"@_EPSILON_SYMBOL_@")==0) {
177 /* Epsilon */
178 (void)sigma_add_special(EPSILON0, net->sigma);
179 net->states = malloc(sizeof(struct fsm_state)*2);
180 add_fsm_arc(net->states, 0, 0, -1,-1,-1,1,1);
181 add_fsm_arc(net->states, 1, -1,-1,-1,-1,-1,-1);
182 net->arccount = 0;
183 net->statecount = 1;
184 net->linecount = 1;
185 net->finalcount = 1;
186 net->is_deterministic = NO0;
187 net->is_minimized = NO0;
188 net->is_epsilon_free = NO0;
189 } else {
190 if ((strcmp(symbol,"@_IDENTITY_SYMBOL_@") == 0)) {
191 symbol_no = sigma_add_special(IDENTITY2,net->sigma);
192 } else {
193 symbol_no = sigma_add(symbol,net->sigma);
194 }
195 net->states = malloc(sizeof(struct fsm_state)*3);
196 add_fsm_arc(net->states, 0, 0, symbol_no, symbol_no, 1, 0, 1);
197 add_fsm_arc(net->states, 1, 1, -1, -1, -1, 1, 0);
198 add_fsm_arc(net->states, 2, -1, -1, -1, -1, -1, -1);
199 net->arity = 1;
200 net->pathcount = 1;
201 net->arccount = 1;
202 net->statecount = 2;
203 net->linecount = 2;
204 net->finalcount = 1;
205 net->arcs_sorted_in = YES1;
206 net->arcs_sorted_out = YES1;
207 net->is_deterministic = YES1;
208 net->is_minimized = YES1;
209 net->is_epsilon_free = YES1;
210 }
211 return(net);
212}
213
214void fsm_sort_lines(struct fsm *net) {
215 struct fsm_state *fsm;
216 fsm = net->states;
217 qsort(fsm, find_arccount(fsm), sizeof(struct fsm_state), sort_cmp);
218}
219
220void fsm_update_flags(struct fsm *net, int det, int pru, int min, int eps, int loop, int completed) {
221 net->is_deterministic = det;
222 net->is_pruned = pru;
223 net->is_minimized = min;
224 net->is_epsilon_free = eps;
225 net->is_loop_free = loop;
226 net->is_completed = completed;
227 net->arcs_sorted_in = NO0;
228 net->arcs_sorted_out = NO0;
229}
230
231int fsm_count_states(struct fsm_state *fsm) {
232 int i, temp = -1, states = 0;
233 for(i=0; (fsm+i)->state_no != -1; i++) {
234 if (temp != (fsm+i)->state_no) {
235 states++;
236 temp = (fsm+i)->state_no;
237 }
238 }
239 return(states);
240}
241
242struct state_arr {
243 int final;
244 int start;
245 struct fsm_state *transitions;
246};
247
248struct state_arr *init_state_pointers(struct fsm_state *fsm_state) {
249
250 /* Create an array for quick lookup of whether states are final, and a pointer to the first line regarding each state */
251
252 struct state_arr *state_arr;
253 int states, i, sold;
254 sold = -1;
255 states = fsm_count_states(fsm_state);
256 state_arr = malloc(sizeof(struct state_arr)*(states+1));
257 for (i=0; i<states; i++) {
258 (state_arr+i)->final = 0;
259 (state_arr+i)->start = 0;
260 }
261
262 for (i=0; (fsm_state+i)->state_no != -1; i++) {
263 if ((fsm_state+i)->final_state == 1)
264 (state_arr+((fsm_state+i)->state_no))->final = 1;
265 if ((fsm_state+i)->start_state == 1)
266 (state_arr+((fsm_state+i)->state_no))->start = 1;
267 if ((fsm_state+i)->state_no != sold) {
268 (state_arr+((fsm_state+i)->state_no))->transitions = (fsm_state+i);
269 sold = (fsm_state+i)->state_no;
270 }
271 }
272 return(state_arr);
273}
274
275/* An open addressing scheme (with linear probing) to store triplets of ints */
276/* and hashing them with an automatically increasing key at every insert */
277/* The table is rehashed whenever occupancy reaches 0.5 */
278
279struct triplethash_triplets {
280 int a;
281 int b;
282 int c;
283 int key;
284};
285
286struct triplethash {
287 struct triplethash_triplets *triplets;
288 unsigned int tablesize;
289 int occupancy;
290};
291
292struct triplethash *triplet_hash_init() {
293 struct triplethash *th;
294 int i;
295 th = malloc(sizeof(struct triplethash));
296 th->tablesize = 128;
297 th->occupancy = 0;
298 th->triplets = malloc(sizeof(struct triplethash_triplets) * th->tablesize);
299 for (i = 0; i < th->tablesize; i++) {
300 (th->triplets+i)->key = -1;
301 }
302 return(th);
303}
304
305unsigned int triplethash_hashf(int a, int b, int c) {
306 return((unsigned int)(a * 7907 + b * 86028157 + c * 7919));
307}
308
309void triplet_hash_free(struct triplethash *th) {
310 if (th != NULL((void*)0)) {
311 if (th->triplets != NULL((void*)0)) {
312 free(th->triplets);
313 }
314 free(th);
315 }
316}
317
318void triplet_hash_rehash(struct triplethash *th);
319
320void triplet_hash_insert_with_key(struct triplethash *th, int a, int b, int c, int key) {
321 struct triplethash_triplets *th_table;
322 unsigned int hash;
323 th_table = th->triplets;
324 hash = triplethash_hashf(a, b, c) % th->tablesize;
325 for (;;) {
326 if ((th_table + hash)->key == -1) {
327 (th_table + hash)->key = key;
328 (th_table + hash)->a = a;
329 (th_table + hash)->b = b;
330 (th_table + hash)->c = c;
331 break;
332 }
333 hash++;
334 if (hash >= th->tablesize)
335 hash -= th->tablesize;
336 }
337}
338
339int triplet_hash_insert(struct triplethash *th, int a, int b, int c) {
340 struct triplethash_triplets *th_table;
341 unsigned int hash;
342 th_table = th->triplets;
343 hash = triplethash_hashf(a,b,c) % th->tablesize;
344 for (;;) {
345 if ((th_table + hash)->key == - 1) {
346 (th_table + hash)->key = th->occupancy;
347 (th_table + hash)->a = a;
348 (th_table + hash)->b = b;
349 (th_table + hash)->c = c;
350 th->occupancy = th->occupancy + 1;
351 if (th->occupancy > th->tablesize / 2) {
352 triplet_hash_rehash(th);
353 }
354 return(th->occupancy - 1);
355 }
356 hash++;
357 if (hash >= th->tablesize)
358 hash -= th->tablesize;
359 }
360}
361
362void triplet_hash_rehash(struct triplethash *th) {
363 int i;
364 unsigned int newtablesize, oldtablesize;
365 struct triplethash_triplets *oldtriplets;
366 newtablesize = th->tablesize * 2;
367 oldtablesize = th->tablesize;
368 oldtriplets = th->triplets;
369 th->triplets = malloc(sizeof(struct triplethash_triplets) * newtablesize);
370 th->tablesize = newtablesize;
371 for (i = 0; i < newtablesize; i++) {
372 (th->triplets+i)->key = -1;
373 }
374 for (i = 0; i < oldtablesize; i++) {
375 if ((oldtriplets+i)-> key != -1) {
376 triplet_hash_insert_with_key(th, (oldtriplets+i)->a, (oldtriplets+i)->b, (oldtriplets+i)->c, (oldtriplets+i)->key);
377 }
378 }
379 free(oldtriplets);
380}
381
382int triplet_hash_find(struct triplethash *th, int a, int b, int c) {
383 struct triplethash_triplets *th_table;
384 unsigned int hash, j;
385 th_table = th->triplets;
386 hash = triplethash_hashf(a, b, c) % th->tablesize;
387 for (j = 0; j < th->tablesize; j++) {
388 if ((th_table + hash)->key == - 1)
389 return -1;
390 if ((th_table + hash)->a == a && (th_table + hash)->b == b && (th_table + hash)->c == c) {
391 return((th_table + hash)->key);
392 }
393 hash++;
394 if (hash >= th->tablesize)
395 hash -= th->tablesize;
396 }
397 return -1;
398}
399
400struct fsm *fsm_intersect(struct fsm *net1, struct fsm *net2) {
401
402 struct blookup {int mainloop; int target; } *array, *bptr;
403 int a,b,current_state, current_start, current_final, target_number, sigma2size, mainloop;
404 struct fsm_state *machine_a, *machine_b;
405 struct state_arr *point_a, *point_b;
406 struct fsm *new_net;
407 struct triplethash *th;
408
409 net1 = fsm_minimize(net1);
410 net2 = fsm_minimize(net2);
411
412 if (fsm_isempty(net1) || fsm_isempty(net2)) {
413 fsm_destroy(net1);
414 fsm_destroy(net2);
415 return(fsm_empty_set());
416 }
417
418 fsm_merge_sigma(net1, net2);
419
420 fsm_update_flags(net1, YES1, NO0, UNK2, YES1, UNK2, UNK2);
421
422 machine_a = net1->states;
423 machine_b = net2->states;
424
425 sigma2size = sigma_max(net2->sigma)+1;
426 array = calloc(sigma2size*sigma2size, sizeof(struct blookup));
427 mainloop = 0;
428
429 /* Intersect two networks by the running-in-parallel method */
430 /* new state 0 = {0,0} */
431
432 STACK_2_PUSH(0,0)int_stack_push(0); int_stack_push(0);;
433
434 th = triplet_hash_init();
435 triplet_hash_insert(th, 0, 0, 0);
436
437 fsm_state_init(sigma_max(net1->sigma));
438
439 point_a = init_state_pointers(machine_a);
440 point_b = init_state_pointers(machine_b);
441
442 while (!int_stack_isempty()) {
443
444 /* Get a pair of states to examine */
445
446 a = int_stack_pop();
447 b = int_stack_pop();
448
449 current_state = triplet_hash_find(th, a, b, 0);
450 current_start = (((point_a+a)->start == 1) && ((point_b+b)->start == 1)) ? 1 : 0;
451 current_final = (((point_a+a)->final == 1) && ((point_b+b)->final == 1)) ? 1 : 0;
452
453 fsm_state_set_current_state(current_state, current_final, current_start);
454
455 /* Create a lookup index for machine b */
456 /* array[in][out] holds the target for this state and the symbol pair in:out */
457 /* Also, we keep track of whether an entry is fresh by the mainloop counter */
458 /* so we don't mistakenly use an old entry and don't have to clear the table */
459 /* between each state pair we encounter */
460
461 for (mainloop++, machine_b = (point_b+b)->transitions; machine_b->state_no == b; machine_b++) {
462 if (machine_b->in < 0) continue;
463 bptr = array+(machine_b->in*sigma2size)+machine_b->out;
464 bptr->mainloop = mainloop;
465 bptr->target = machine_b->target;
466 }
467
468 /* The main loop where we run the machines in parallel */
469 /* We look at each transition of a in this state, and consult the index of b */
470 /* we just created */
471
472 for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) {
473 if (machine_a->in < 0 || machine_a->out < 0) continue;
474 bptr = array+(machine_a->in*sigma2size)+machine_a->out;
475
476 if (bptr->mainloop != mainloop)
477 continue;
478
479 if ((target_number = triplet_hash_find(th, machine_a->target, bptr->target, 0)) == -1) {
480 STACK_2_PUSH(bptr->target, machine_a->target)int_stack_push(bptr->target); int_stack_push(machine_a->
target);
;
481 target_number = triplet_hash_insert(th, machine_a->target, bptr->target, 0);
482 }
483
484 fsm_state_add_arc(current_state, machine_a->in, machine_a->out, target_number, current_final, current_start);
485
486 }
487 fsm_state_end_state();
488 }
489 new_net = fsm_create("");
490 fsm_sigma_destroy(new_net->sigma);
491 new_net->sigma = net1->sigma;
492 net1->sigma = NULL((void*)0);
493 fsm_destroy(net2);
494 fsm_destroy(net1);
495 fsm_state_close(new_net);
496 free(point_a);
497 free(point_b);
498 free(array);
499 triplet_hash_free(th);
500 return(fsm_coaccessible(new_net));
501}
502
503struct fsm *fsm_compose(struct fsm *net1, struct fsm *net2) {
504
505
506 /* The composition algorithm is the basic naive composition where we lazily */
507 /* take the cross-product of states P and Q and move to a new state with symbols */
508 /* ain, bout if the symbols aout = bin. Also, if aout = 0 state p goes to */
509 /* its target, while q stays. Similarly, if bin = 0, q goes to its target */
510 /* while p stays. */
511
512 /* We have two variants of the algorithm to avoid creating multiple paths: */
513 /* 1) Bistate composition. In this variant, when we create a new state, we call it */
514 /* (p,q,mode) where mode = 0 or 1, depending on what kind of an arc we followed */
515 /* to get there. If we followed an x:y arc where x and y are both real symbols */
516 /* we always go to mode 0, however, if we followed an 0:y arc, we go to mode 1. */
517 /* from mode 1, we do not follow x:0 arcs. Each (p,q,mode) is unique, and */
518 /* from (p,q,X) we always consider the transitions from p and q. */
519 /* We never create arcs (x:0 0:y) yielding x:y. */
520
521 /* 2) Tristate composition. Here we always go to mode 0 with a x:y arc. */
522 /* (x:0,0:y) yielding x:y is allowed, but only in mode 0 */
523 /* (x:y y:z) is always allowed and results in target = mode 0 */
524 /* 0:y arcs lead to mode 2, and from there we stay in mode 2 with 0:y */
525 /* in mode 2 we only consider 0:y and x:y arcs */
526 /* x:0 arcs lead to mode 1, and from there we stay in mode 1 with x:0 */
527 /* in mode 1 we only consider x:0 and x:y arcs */
528
529 /* It seems unsettled which type of composition is better. Tristate is similar to */
530 /* the filter transducer given in Mohri, Pereira and Riley (1996) and works well */
531 /* for cases such as [a:0 b:0 c:0 .o. 0:d 0:e 0:f], yielding the shortest path. */
532 /* However, for generic cases, bistate seems to yield smaller transducers. */
533 /* The global variable g_compose_tristate is set to OFF by default */
534
535 struct outarray {
536 short int symin;
537 short int symout;
538 int target;
539 int mainloop;
540 } *outarray, *iptr, *currtail;
541
542 struct index {
543 struct outarray *tail;
544 } *index;
545
546 extern int g_compose_tristate, g_flag_is_epsilon;
547 int a,b,i,mainloop,current_state, current_start, current_final, target_number, ain, bin, aout, bout, asearch, max2sigma;
548 struct fsm_state *machine_a, *machine_b;
549 struct state_arr *point_a, *point_b;
550 struct triplethash *th;
551 int mode;
552 _Bool *is_flag = NULL((void*)0);
553
554
555 net1 = fsm_minimize(net1);
556 net2 = fsm_minimize(net2);
557
558 if (fsm_isempty(net1) || fsm_isempty(net2)) {
1
Assuming the condition is false
2
Assuming the condition is false
3
Taking false branch
559 fsm_destroy(net1);
560 fsm_destroy(net2);
561 return(fsm_empty_set());
562 }
563
564 /* If flag-is-epsilon is on, we need to add the flag symbols */
565 /* in both networks to each other's sigma so that UNKNOWN */
566 /* or IDENTITY symbols do not match these flags, since they are */
567 /* supposed to have the behavior of EPSILON */
568 /* And we need to do this before merging the sigmas, of course */
569
570 if (g_flag_is_epsilon) {
4
Assuming 'g_flag_is_epsilon' is 0
5
Taking false branch
571 struct sigma *sig1, *sig2;
572 int flags1, flags2;
573 flags1 = flags2 = 0;
574 sig2 = net2->sigma;
575 max2sigma = sigma_max(net2->sigma);
576 for (sig1 = net1->sigma; sig1 != NULL((void*)0); sig1 = sig1->next) {
577 if (flag_check(sig1->symbol)) {
578 flags1 = 1;
579 if (sigma_find(sig1->symbol, sig2) == -1) {
580 sigma_add(sig1->symbol, sig2);
581 }
582 }
583 }
584
585 sig1 = net1->sigma;
586 for (sig2 = net2->sigma; sig2 != NULL((void*)0) ; sig2 = sig2->next) {
587 if (flag_check(sig2->symbol)) {
588 if (sig2->number <= max2sigma) {
589 flags2 = 1;
590 }
591 if (sigma_find(sig2->symbol, sig1) == -1) {
592 sigma_add(sig2->symbol, sig1);
593 }
594 }
595 }
596 sigma_sort(net2);
597 sigma_sort(net1);
598 if (flags1 && flags2) {
599 printf("***Warning: flag-is-epsilon is ON and both networks contain flags in composition. This may yield incorrect results. Set flag-is-epsilon to OFF.\n");
600 }
601 }
602
603 fsm_merge_sigma(net1, net2);
604
605 if (g_flag_is_epsilon) {
6
Assuming 'g_flag_is_epsilon' is 0
7
Taking false branch
606 /* Create lookup table for quickly checking if a symbol is a flag */
607 struct sigma *sig1;
608 is_flag = malloc(sizeof(_Bool)*(sigma_max(net1->sigma)+1));
609 for (sig1 = net1->sigma; sig1 != NULL((void*)0); sig1=sig1->next) {
610 if (flag_check(sig1->symbol)) {
611 *(is_flag+(sig1->number)) = 1;
612 } else {
613 *(is_flag+(sig1->number)) = 0;
614 }
615 }
616 }
617
618 fsm_update_flags(net1, YES1, NO0, UNK2, YES1, UNK2, UNK2);
619
620 machine_a = net1->states;
621 machine_b = net2->states;
622
623 max2sigma = sigma_max(net2->sigma);
624
625 /* Create an index for looking up input symbols in machine b quickly */
626 /* We store each machine_b->in symbol in outarray[symin][...] */
627 /* the array index[symin] points to the tail of the current list in outarray */
628 /* (we may have many entries for one input symbol as there may be many outputs */
629 /* The field mainloop tells us whether the entry is current as we want to loop */
630 /* UNKNOWN and IDENTITY are indexed as UNKNOWN because we need to find both */
631 /* as they share some semantics */
632
633 index = calloc(max2sigma+1, sizeof(struct index));
8
Null pointer value stored to field 'tail'
634 outarray = calloc((max2sigma+2)*(max2sigma+1), sizeof(struct outarray));
635
636 for (i=0; i <= max2sigma; i++) {
9
Assuming 'i' is > 'max2sigma'
10
Loop condition is false. Execution continues on line 642
637 (index+i)->tail = outarray + ((max2sigma+2)*i);
638 }
639
640
641 /* Mode, a, b */
642 STACK_3_PUSH(0,0,0)int_stack_push(0); int_stack_push(0); int_stack_push(0);;
643
644 th = triplet_hash_init();
645 triplet_hash_insert(th, 0, 0, 0);
646
647 fsm_state_init(sigma_max(net1->sigma));
648
649 point_a = init_state_pointers(machine_a);
650 point_b = init_state_pointers(machine_b);
651
652 mainloop = 0;
653
654 while (!int_stack_isempty()) {
11
Assuming the condition is true
12
Loop condition is true. Entering loop body
655
656 /* Get a pair of states to examine */
657
658 a = int_stack_pop();
659 b = int_stack_pop();
660 mode = int_stack_pop();
661
662 current_state = triplet_hash_find(th, a,b,mode);
663 current_start = (((point_a+a)->start == 1) && ((point_b+b)->start == 1) && (mode == 0)) ? 1 : 0;
13
Assuming field 'start' is not equal to 1
664 current_final = (((point_a+a)->final == 1) && ((point_b+b)->final == 1)) ? 1 : 0;
14
Assuming field 'final' is not equal to 1
665
666 fsm_state_set_current_state(current_state, current_final, current_start);
667
668 /* Create the index for machine b in this state */
669 for (mainloop++, machine_b = (point_b+b)->transitions; machine_b->state_no == b ; machine_b++) {
15
Assuming 'b' is equal to field 'state_no'
16
Loop condition is true. Entering loop body
670 int bindex;
671 /* Index b */
672 bindex = (machine_b->in == IDENTITY2) ? UNKNOWN1 : machine_b->in;
17
Assuming field 'in' is equal to IDENTITY
18
'?' condition is true
673 if (bindex
18.1
'bindex' is >= 0
< 0 || machine_b->target < 0)
19
Assuming field 'target' is >= 0
20
Taking false branch
674 continue;
675
676 iptr = (index+bindex)->tail;
21
Null pointer value stored to 'iptr'
677 if (iptr->mainloop != mainloop) {
22
Access to field 'mainloop' results in a dereference of a null pointer (loaded from variable 'iptr')
678 iptr = (index+bindex)->tail = outarray+(bindex*(max2sigma+2));
679 } else {
680 iptr++;
681 }
682 iptr->symin = machine_b->in;
683 iptr->symout = machine_b->out;
684 iptr->mainloop = mainloop;
685 iptr->target = machine_b->target;
686 (index+bindex)->tail = iptr;
687 }
688
689 for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) {
690
691 /* If we have the same transition from (a,b)-> some state */
692 /* If we have x:y y:z trans to some state */
693 aout = machine_a->out;
694 ain = machine_a->in;
695 /* IDENTITY is indexed under UNKNOWN (see above) */
696 asearch = (aout == IDENTITY2) ? UNKNOWN1 : aout;
697 if (aout < 0) continue;
698 iptr = outarray+(asearch*(max2sigma+2));
699 currtail = (index+asearch)->tail + 1;
700 for ( ; iptr != currtail && iptr->mainloop == mainloop ; iptr++) {
701
702 ain = machine_a->in;
703 aout = machine_a->out;
704 bin = iptr->symin;
705 bout = iptr->symout;
706
707 if (aout == IDENTITY2 && bin == UNKNOWN1) {
708 ain = aout = UNKNOWN1;
709 }
710 else if (aout == UNKNOWN1 && bin == IDENTITY2) {
711 bin = bout = UNKNOWN1;
712 }
713
714 if (!g_compose_tristate) {
715 if (bin == aout && bin != -1 && bin != EPSILON0) {
716 /* mode -> 0 */
717 if ((target_number = triplet_hash_find(th, machine_a->target, iptr->target, 0)) == -1) {
718 STACK_3_PUSH(0, iptr->target, machine_a->target)int_stack_push(0); int_stack_push(iptr->target); int_stack_push
(machine_a->target);
;
719 target_number = triplet_hash_insert(th, machine_a->target, iptr->target, 0);
720 }
721
722 fsm_state_add_arc(current_state, ain, bout, target_number, current_final, current_start);
723 }
724 }
725
726 else if (g_compose_tristate) {
727 if (bin == aout && bin != -1 && ((bin != EPSILON0 || mode == 0))) {
728 /* mode -> 0 */
729 if ((target_number = triplet_hash_find(th, machine_a->target, iptr->target, 0)) == -1) {
730 STACK_3_PUSH(0, iptr->target, machine_a->target)int_stack_push(0); int_stack_push(iptr->target); int_stack_push
(machine_a->target);
;
731 target_number = triplet_hash_insert(th, machine_a->target, iptr->target, 0);
732 }
733
734 fsm_state_add_arc(current_state, ain, bout, target_number, current_final, current_start);
735 }
736 }
737
738 }
739 }
740
741 /* Treat epsilon outputs on machine a (may include flags) */
742 for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) {
743 aout = machine_a->out;
744 if (aout != EPSILON0 && g_flag_is_epsilon == 0)
745 continue;
746 ain = machine_a->in;
747
748 if (g_flag_is_epsilon && aout != -1 && mode == 0 && *(is_flag+aout)) {
749 if ((target_number = triplet_hash_find(th, machine_a->target, b, 0)) == -1) {
750 STACK_3_PUSH(0, b, machine_a->target)int_stack_push(0); int_stack_push(b); int_stack_push(machine_a
->target);
;
751 target_number = triplet_hash_insert(th, machine_a->target, b, 0);
752 }
753 fsm_state_add_arc(current_state, ain, aout, target_number, current_final, current_start);
754 }
755
756 if (!g_compose_tristate) {
757 /* Check A:0 arcs on upper side */
758 if (aout == EPSILON0 && mode == 0) {
759 /* mode -> 0 */
760 if ((target_number = triplet_hash_find(th, machine_a->target, b, 0)) == -1) {
761 STACK_3_PUSH(0, b, machine_a->target)int_stack_push(0); int_stack_push(b); int_stack_push(machine_a
->target);
;
762 target_number = triplet_hash_insert(th, machine_a->target, b, 0);
763 }
764
765 fsm_state_add_arc(current_state, ain, EPSILON0, target_number, current_final, current_start);
766 }
767 }
768
769 else if (g_compose_tristate) {
770 if (aout == EPSILON0 && (mode != 2)) {
771 /* mode -> 1 */
772 if ((target_number = triplet_hash_find(th, machine_a->target, b, 1)) == -1) {
773 STACK_3_PUSH(1, b, machine_a->target)int_stack_push(1); int_stack_push(b); int_stack_push(machine_a
->target);
;
774 target_number = triplet_hash_insert(th, machine_a->target, b, 1);
775 }
776
777 fsm_state_add_arc(current_state, ain, EPSILON0, target_number, current_final, current_start);
778
779 }
780 }
781
782 }
783 /* Treat epsilon inputs on machine b (may include flags) */
784 for (machine_b = (point_b+b)->transitions; machine_b->state_no == b ; machine_b++) {
785 bin = machine_b->in;
786 if (bin != EPSILON0 && g_flag_is_epsilon == 0)
787 continue;
788
789 bout = machine_b->out;
790
791 if (g_flag_is_epsilon && bin != -1 && *(is_flag+bin)) {
792 if ((target_number = triplet_hash_find(th, a, machine_b->target, 1)) == -1) {
793 STACK_3_PUSH(1, machine_b->target,a)int_stack_push(1); int_stack_push(machine_b->target); int_stack_push
(a);
;
794 target_number = triplet_hash_insert(th, a, machine_b->target, 1);
795 }
796 fsm_state_add_arc(current_state, bin, bout, target_number, current_final, current_start);
797 }
798
799 if (!g_compose_tristate) {
800 /* Check 0:A arcs on lower side */
801 if (bin == EPSILON0) {
802 /* mode -> 1 */
803 if ((target_number = triplet_hash_find(th, a, machine_b->target, 1)) == -1) {
804 STACK_3_PUSH(1, machine_b->target,a)int_stack_push(1); int_stack_push(machine_b->target); int_stack_push
(a);
;
805 target_number = triplet_hash_insert(th, a, machine_b->target, 1);
806 }
807
808 fsm_state_add_arc(current_state, EPSILON0, bout, target_number, current_final, current_start);
809 }
810 }
811
812 else if (g_compose_tristate) {
813 /* Check 0:A arcs on lower side */
814 if (bin == EPSILON0 && mode != 1) {
815 /* mode -> 1 */
816 if ((target_number = triplet_hash_find(th, a, machine_b->target, 2)) == -1) {
817 STACK_3_PUSH(2, machine_b->target, a)int_stack_push(2); int_stack_push(machine_b->target); int_stack_push
(a);
;
818 target_number = triplet_hash_insert(th, a, machine_b->target, 2);
819 }
820
821 fsm_state_add_arc(current_state, EPSILON0, bout, target_number, current_final, current_start);
822 }
823 }
824 }
825 fsm_state_end_state();
826 }
827
828 free(net1->states);
829 fsm_destroy(net2);
830 fsm_state_close(net1);
831 free(point_a);
832 free(point_b);
833 free(index);
834 free(outarray);
835
836 if (g_flag_is_epsilon)
837 free(is_flag);
838 triplet_hash_free(th);
839 net1 = fsm_topsort(fsm_coaccessible(net1));
840 return(fsm_coaccessible(net1));
841}
842
843struct mergesigma *add_to_mergesigma(struct mergesigma *msigma, struct sigma *sigma, short presence) {
844 int number = 0;
845
846 if (msigma->number == -1) {
847 number = 2;
848 } else {
849 msigma->next = malloc(sizeof(struct mergesigma));
850 number = msigma->number;
851 msigma = msigma->next;
852 msigma->next = NULL((void*)0);
853 }
854
855 if (sigma->number < 3) {
856 msigma->number = sigma->number;
857 } else {
858 if (number < 3)
859 number = 2;
860 msigma->number = number+1;
861 }
862 msigma->symbol = sigma->symbol;
863 msigma->presence = presence;
864 return(msigma);
865}
866
867struct sigma *copy_mergesigma(struct mergesigma *mergesigma) {
868 struct sigma *sigma, *new_sigma;
869
870 sigma = new_sigma = NULL((void*)0);
871 while(mergesigma != NULL((void*)0)) {
872 if (sigma == NULL((void*)0)) {
873 sigma = malloc(sizeof(struct sigma));
874 new_sigma = sigma;
875 } else {
876 sigma->next = malloc(sizeof(struct sigma));
877 sigma = sigma->next;
878 }
879 sigma->next = NULL((void*)0);
880 sigma->number = mergesigma->number;
881
882 sigma->symbol = NULL((void*)0);
883 if (mergesigma->symbol != NULL((void*)0))
884 sigma->symbol = strdup(mergesigma->symbol);
885 mergesigma = mergesigma->next;
886 }
887 return(new_sigma);
888}
889
890void fsm_merge_sigma(struct fsm *net1, struct fsm *net2) {
891
892 struct sigma *sigma_1, *sigma_2, *new_sigma_1 = NULL((void*)0), *new_sigma_2 = NULL((void*)0);
893 struct mergesigma *mergesigma, *mergesigma2, *start_mergesigma;
894 struct fsm_state *fsm_state, *new_1_state, *new_2_state;
895 int i, j, end_1 = 0, end_2 = 0, sigmasizes, *mapping_1, *mapping_2, equal = 1, unknown_1 = 0, unknown_2 = 0, net_unk = 0, net_adds = 0, net_lines;
896
897 if (!fsm_options.skip_word_boundary_marker) {
898 i = sigma_find(".#.", net1->sigma);
899 j = sigma_find(".#.", net2->sigma);
900 if (i != -1 && j == -1) {
901 sigma_add(".#.", net2->sigma);
902 sigma_sort(net2);
903 }
904 if (j != -1 && i == -1) {
905 sigma_add(".#.", net1->sigma);
906 sigma_sort(net1);
907 }
908 }
909
910 sigma_1 = net1->sigma;
911 sigma_2 = net2->sigma;
912
913 sigmasizes = sigma_max(sigma_1) + sigma_max(sigma_2) + 3;
914
915 mapping_1 = malloc(sizeof(int)*sigmasizes);
916 mapping_2 = malloc(sizeof(int)*sigmasizes);
917
918 /* Fill mergesigma */
919
920 mergesigma = malloc(sizeof(struct mergesigma));
921 mergesigma->number = -1;
922 mergesigma->symbol = NULL((void*)0);
923 mergesigma->next = NULL((void*)0);
924 start_mergesigma = mergesigma;
925
926 /* Loop over sigma 1, sigma 2 */
927 for (;;) {
928 if (sigma_1 == NULL((void*)0))
929 end_1 = 1;
930 if (sigma_2 == NULL((void*)0))
931 end_2 = 1;
932 if (end_1 && end_2)
933 break;
934 if (end_2) {
935 /* Treating only 1 now */
936 mergesigma = add_to_mergesigma(mergesigma, sigma_1, 1);
937 *(mapping_1+(sigma_1->number)) = mergesigma->number;
938 sigma_1 = sigma_1->next;
939 equal = 0;
940 continue;
941 }
942 else if (end_1) {
943 /* Treating only 2 now */
944 mergesigma = add_to_mergesigma(mergesigma, sigma_2, 2);
945 *(mapping_2+(sigma_2->number)) = mergesigma->number;
946 sigma_2 = sigma_2->next;
947 equal = 0;
948 continue;
949 }
950
951 else {
952
953 /* Both alive */
954
955 /* 1 or 2 contains special characters */
956 if ((sigma_1->number <= IDENTITY2) || (sigma_2->number <= IDENTITY2)) {
957
958 /* Treating zeros or unknowns */
959
960 if ((sigma_1->number == UNKNOWN1) || (sigma_1->number == IDENTITY2))
961 unknown_1 = 1;
962 if ((sigma_2->number == UNKNOWN1) || (sigma_2->number == IDENTITY2))
963 unknown_2 = 1;
964
965 if (sigma_1->number == sigma_2->number) {
966 mergesigma = add_to_mergesigma(mergesigma, sigma_1, 3);
967 sigma_1 = sigma_1->next;
968 sigma_2 = sigma_2->next;
969 }
970 else if (sigma_1->number < sigma_2->number) {
971 mergesigma = add_to_mergesigma(mergesigma, sigma_1, 1);
972 sigma_1 = sigma_1->next;
973 equal = 0;
974 }
975 else {
976 mergesigma = add_to_mergesigma(mergesigma, sigma_2, 2);
977 sigma_2 = sigma_2->next;
978 equal = 0;
979 }
980 continue;
981 }
982 /* Both contain non-special chars */
983 if (strcmp(sigma_1->symbol, sigma_2->symbol) == 0) {
984 mergesigma = add_to_mergesigma(mergesigma, sigma_1, 3);
985 /* Add symbol numbers to mapping */
986 *(mapping_1+(sigma_1->number)) = mergesigma->number;
987 *(mapping_2+(sigma_2->number)) = mergesigma->number;
988
989 sigma_1 = sigma_1->next;
990 sigma_2 = sigma_2->next;
991 }
992 else if (strcmp(sigma_1->symbol, sigma_2->symbol) < 0) {
993 mergesigma = add_to_mergesigma(mergesigma, sigma_1, 1);
994 *(mapping_1+(sigma_1->number)) = mergesigma->number;
995 sigma_1 = sigma_1->next;
996 equal = 0;
997 }
998 else {
999 mergesigma = add_to_mergesigma(mergesigma, sigma_2, 2);
1000 *(mapping_2+(sigma_2->number)) = mergesigma->number;
1001 sigma_2 = sigma_2->next;
1002 equal = 0;
1003 }
1004 continue;
1005 }
1006 }
1007
1008 /* Go over both net1 and net2 and replace arc numbers with new mappings */
1009
1010 fsm_state = net1->states;
1011 for (i=0; (fsm_state+i)->state_no != -1; i++) {
1012 if ((fsm_state+i)->in > 2)
1013 (fsm_state+i)->in = *(mapping_1+(fsm_state+i)->in);
1014 if ((fsm_state+i)->out > 2)
1015 (fsm_state+i)->out = *(mapping_1+(fsm_state+i)->out);
1016 }
1017 fsm_state = net2->states;
1018 for (i=0; (fsm_state+i)->state_no != -1; i++) {
1019 if ((fsm_state+i)->in > 2)
1020 (fsm_state+i)->in = *(mapping_2+(fsm_state+i)->in);
1021 if ((fsm_state+i)->out > 2)
1022 (fsm_state+i)->out = *(mapping_2+(fsm_state+i)->out);
1023 }
1024
1025 /* Copy mergesigma to net1, net2 */
1026
1027 new_sigma_1 = copy_mergesigma(start_mergesigma);
1028 new_sigma_2 = copy_mergesigma(start_mergesigma);
1029
1030 fsm_sigma_destroy(net1->sigma);
1031 fsm_sigma_destroy(net2->sigma);
1032
1033 net1->sigma = new_sigma_1;
1034 net2->sigma = new_sigma_2;
1035
1036 /* Expand on ?, ?:x, y:? */
1037
1038 if (unknown_1 && !equal) {
1039 /* Expand net 1 */
1040 fsm_state = net1->states;
1041 net_lines = find_arccount(net1->states);
1042 for(mergesigma = start_mergesigma; mergesigma != NULL((void*)0); mergesigma=mergesigma->next) {
1043 if(mergesigma->presence == 2) {
1044 net_unk++;
1045 }
1046 }
1047 for(net_adds = 0, i=0; (fsm_state+i)->state_no != -1; i++) {
1048 if ((fsm_state+i)->in == IDENTITY2)
1049 net_adds += net_unk;
1050 if (((fsm_state+i)->in == UNKNOWN1) && ((fsm_state+i)->out != UNKNOWN1))
1051 net_adds += net_unk;
1052 if (((fsm_state+i)->out == UNKNOWN1) && ((fsm_state+i)->in != UNKNOWN1))
1053 net_adds += net_unk;
1054 if (((fsm_state+i)->in == UNKNOWN1) && ((fsm_state+i)->out == UNKNOWN1))
1055 net_adds += net_unk*net_unk - net_unk + 2*net_unk;
1056 }
1057
1058 new_1_state = malloc(sizeof(struct fsm_state)*(net_adds+net_lines+1));
1059 for(i=0,j=0; (fsm_state+i)->state_no != -1; i++) {
1060
1061 if ((fsm_state+i)->in == IDENTITY2) {
1062 add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1063 j++;
1064 for (mergesigma=start_mergesigma; mergesigma != NULL((void*)0); mergesigma=mergesigma->next) {
1065 if ((mergesigma->presence == 2) && (mergesigma->number > IDENTITY2)) {
1066 add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, mergesigma->number, mergesigma->number, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1067 j++;
1068 }
1069 }
1070 }
1071
1072 if ((fsm_state+i)->in == UNKNOWN1 && (fsm_state+i)->out != UNKNOWN1) {
1073 add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1074 j++;
1075 for (mergesigma=start_mergesigma; mergesigma!=NULL((void*)0); mergesigma=mergesigma->next) {
1076 if ((mergesigma->presence == 2) && (mergesigma->number > IDENTITY2)) {
1077 add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, mergesigma->number, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1078 j++;
1079 }
1080 }
1081 }
1082
1083 if (((fsm_state+i)->in != UNKNOWN1) && ((fsm_state+i)->out == UNKNOWN1)) {
1084 add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1085 j++;
1086 for (mergesigma=start_mergesigma; mergesigma != NULL((void*)0); mergesigma = mergesigma->next) {
1087 if ((mergesigma->presence == 2) && (mergesigma->number > IDENTITY2)) {
1088 add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, mergesigma->number, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1089 j++;
1090 }
1091 }
1092 }
1093
1094 /* Replace ?:? with ?:[all unknowns] [all unknowns]:? and [all unknowns]:[all unknowns] where a != b */
1095 if ((fsm_state+i)->in == UNKNOWN1 && (fsm_state+i)->out == UNKNOWN1) {
1096 add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1097 j++;
1098 for (mergesigma2=start_mergesigma; mergesigma2 != NULL((void*)0) ; mergesigma2 = mergesigma2->next) {
1099 for (mergesigma=start_mergesigma; mergesigma!=NULL((void*)0); mergesigma=mergesigma->next) {
1100 if (((mergesigma->presence == 2 && mergesigma2->presence == 2 && mergesigma->number > IDENTITY2 && mergesigma2->number > IDENTITY2) || (mergesigma->number == UNKNOWN1 && mergesigma2->number > IDENTITY2 && mergesigma2->presence == 2) || (mergesigma2->number == UNKNOWN1 && mergesigma->number > IDENTITY2 && mergesigma->presence == 2)) && mergesigma->number != mergesigma2->number) {
1101 add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, mergesigma->number, mergesigma2->number, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1102 j++;
1103 }
1104 }
1105 }
1106 }
1107
1108 /* Simply copy arcs that are not IDENTITY or UNKNOWN */
1109 if (((fsm_state+i)->in > IDENTITY2 || (fsm_state+i)->in == EPSILON0) && ((fsm_state+i)->out > IDENTITY2 || (fsm_state+i)->out == EPSILON0)) {
1110 add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1111 j++;
1112 }
1113
1114 if ((fsm_state+i)->in == -1) {
1115 add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1116 j++;
1117 }
1118 }
1119
1120 add_fsm_arc(new_1_state, j, -1, -1, -1, -1, -1, -1);
1121 free(net1->states);
1122 net1->states = new_1_state;
1123 }
1124
1125 if (unknown_2 && !equal) {
1126 /* Expand net 2 */
1127 fsm_state = net2->states;
1128 net_lines = find_arccount(net2->states);
1129 for(net_unk = 0, mergesigma = start_mergesigma; mergesigma != NULL((void*)0); mergesigma=mergesigma->next) {
1130 if(mergesigma->presence == 1) {
1131 net_unk++;
1132 }
1133 }
1134
1135 for(net_adds = 0, i=0; (fsm_state+i)->state_no != -1; i++) {
1136 if ((fsm_state+i)->in == IDENTITY2)
1137 net_adds += net_unk;
1138 if (((fsm_state+i)->in == UNKNOWN1) && ((fsm_state+i)->out != UNKNOWN1))
1139 net_adds += net_unk;
1140 if (((fsm_state+i)->out == UNKNOWN1) && ((fsm_state+i)->in != UNKNOWN1))
1141 net_adds += net_unk;
1142 if (((fsm_state+i)->in == UNKNOWN1) && ((fsm_state+i)->out == UNKNOWN1))
1143 net_adds += net_unk*net_unk - net_unk + 2*net_unk;
1144 }
1145
1146 /* We need net_add new lines in fsm_state */
1147 new_2_state = malloc(sizeof(struct fsm_state)*(net_adds+net_lines+1));
1148 for(i=0,j=0; (fsm_state+i)->state_no != -1; i++) {
1149
1150 if ((fsm_state+i)->in == IDENTITY2) {
1151 add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1152 j++;
1153 for (mergesigma=start_mergesigma; mergesigma!=NULL((void*)0); mergesigma=mergesigma->next) {
1154 if ((mergesigma->presence == 1) && (mergesigma->number > IDENTITY2)) {
1155 add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, mergesigma->number, mergesigma->number, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1156 j++;
1157 }
1158 }
1159 }
1160
1161 if ((fsm_state+i)->in == UNKNOWN1 && (fsm_state+i)->out != UNKNOWN1) {
1162 add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1163 j++;
1164 for (mergesigma=start_mergesigma; mergesigma!=NULL((void*)0); mergesigma=mergesigma->next) {
1165 if (mergesigma->presence == 1 && mergesigma->number > IDENTITY2) {
1166 add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, mergesigma->number, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1167 j++;
1168 }
1169 }
1170 }
1171
1172 if ((fsm_state+i)->in != UNKNOWN1 && (fsm_state+i)->out == UNKNOWN1) {
1173 add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1174 j++;
1175 for (mergesigma=start_mergesigma; mergesigma!=NULL((void*)0); mergesigma=mergesigma->next) {
1176 if ((mergesigma->presence == 1) && (mergesigma->number > IDENTITY2)) {
1177 add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, mergesigma->number, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1178 j++;
1179 }
1180 }
1181 }
1182
1183 if ((fsm_state+i)->in == UNKNOWN1 && (fsm_state+i)->out == UNKNOWN1) {
1184 add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1185 j++;
1186 for (mergesigma2=start_mergesigma; mergesigma2 != NULL((void*)0) ; mergesigma2 = mergesigma2->next) {
1187 for (mergesigma=start_mergesigma; mergesigma!=NULL((void*)0); mergesigma=mergesigma->next) {
1188 if (((mergesigma->presence == 1 && mergesigma2->presence == 1 && mergesigma->number > IDENTITY2 && mergesigma2->number > IDENTITY2) || (mergesigma->number == UNKNOWN1 && mergesigma2->number > IDENTITY2 && mergesigma2->presence == 1) || (mergesigma2->number == UNKNOWN1 && mergesigma->number > IDENTITY2 && mergesigma->presence == 1)) && mergesigma->number != mergesigma2->number) {
1189 add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, mergesigma->number, mergesigma2->number, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1190 j++;
1191 }
1192 }
1193 }
1194 }
1195
1196 /* Simply copy arcs that are not IDENTITY or UNKNOWN */
1197 if (((fsm_state+i)->in > IDENTITY2 || (fsm_state+i)->in == EPSILON0) && ((fsm_state+i)->out > IDENTITY2 || (fsm_state+i)->out == EPSILON0)) {
1198
1199 add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1200 j++;
1201 }
1202
1203 if ((fsm_state+i)->in == -1) {
1204 add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state);
1205 j++;
1206 }
1207 }
1208
1209 add_fsm_arc(new_2_state, j, -1, -1, -1, -1, -1, -1);
1210 free(net2->states);
1211 net2->states = new_2_state;
1212 }
1213 free(mapping_1);
1214 free(mapping_2);
1215
1216 /* Free structure */
1217 for (mergesigma2 = NULL((void*)0); start_mergesigma != NULL((void*)0); ) {
1218 mergesigma2 = start_mergesigma;
1219 start_mergesigma = start_mergesigma->next;
1220 free(mergesigma2);
1221 }
1222}
1223
1224
1225int add_fsm_arc(struct fsm_state *fsm, int offset, int state_no, int in, int out, int target, int final_state, int start_state) {
1226
1227 (fsm+offset)->state_no = state_no;
1228 (fsm+offset)->in = in;
1229 (fsm+offset)->out= out;
1230 (fsm+offset)->target = target;
1231 (fsm+offset)->final_state = final_state;
1232 (fsm+offset)->start_state = start_state;
1233 offset++;
1234 return(offset);
1235}
1236
1237
1238void fsm_count(struct fsm *net) {
1239 struct fsm_state *fsm;
1240 int i, linecount, arccount, oldstate, finalcount, maxstate;
1241 linecount = arccount = finalcount = maxstate = 0;
1242
1243 oldstate = -1;
1244
1245 fsm = net->states;
1246 for (i=0; (fsm+i)->state_no != -1; i++) {
1247 if ((fsm+i)->state_no > maxstate)
1248 maxstate = (fsm+i)->state_no;
1249
1250 linecount++;
1251 if ((fsm+i)->target != -1) {
1252 arccount++;
1253 // if (((fsm+i)->in != (fsm+i)->out) || ((fsm+i)->in == UNKNOWN) || ((fsm+i)->out == UNKNOWN))
1254 // arity = 2;
1255 }
1256 if ((fsm+i)->state_no != oldstate) {
1257 if ((fsm+i)->final_state) {
1258 finalcount++;
1259 }
1260 oldstate = (fsm+i)->state_no;
1261 }
1262 }
1263
1264 linecount++;
1265 net->statecount = maxstate+1;
1266 net->linecount = linecount;
1267 net->arccount = arccount;
1268 net->finalcount = finalcount;
1269}
1270
1271static void fsm_add_to_states(struct fsm *net, int add) {
1272 struct fsm_state *fsm;
1273 int i;
1274
1275 fsm=net->states;
1276 for(i=0; (fsm+i)->state_no != -1; i++) {
1277 (fsm+i)->state_no = (fsm+i)->state_no + add;
1278 if ((fsm+i)->target != -1)
1279 (fsm+i)->target = (fsm+i)->target + add;
1280 }
1281}
1282
1283struct fsm *fsm_concat_m_n(struct fsm *net1, int m, int n) {
1284 struct fsm *acc;
1285 int i;
1286 acc = fsm_empty_string();
1287 for (i = 1; i <= n ;i++) {
1288 if (i > m)
1289 acc = fsm_concat(acc, fsm_optionality(fsm_copy(net1)));
1290 else
1291 acc = fsm_concat(acc, fsm_copy(net1));
1292 }
1293 fsm_destroy(net1);
1294 return(acc);
1295}
1296
1297struct fsm *fsm_concat_n(struct fsm *net1, int n) {
1298 return(fsm_concat_m_n(net1,n,n));
1299}
1300
1301struct fsm *fsm_concat(struct fsm *net1, struct fsm *net2) {
1302 struct fsm_state *fsm1, *fsm2, *new_fsm;
1303 int i,j,current_final;
1304
1305 fsm_merge_sigma(net1, net2);
1306
1307 fsm1 = net1->states;
1308 fsm2 = net2->states;
1309 fsm_count(net1);
1310 fsm_count(net2);
1311 /* The concatenation of a language with no final state should yield the empty language */
1312 if ((net1->finalcount == 0) || (net2->finalcount == 0)) {
1313 fsm_destroy(net1);
1314 fsm_destroy(net2);
1315 net1 = fsm_empty_set();
1316 return(net1);
1317 }
1318
1319 /* Add |fsm1| states to the state numbers of fsm2 */
1320 fsm_add_to_states(net2, net1->statecount);
1321
1322 new_fsm = malloc(((sizeof(struct fsm_state))*(net1->linecount + net2->linecount + net1->finalcount + 2 )));
1323 current_final = -1;
1324 /* Copy fsm1, fsm2 after each other, adding appropriate epsilon arcs */
1325 for(i=0,j=0; (fsm1+i)->state_no != -1; i++) {
1326 if (((fsm1+i)->final_state == 1) && ((fsm1+i)->state_no != current_final)) {
1327 add_fsm_arc(new_fsm, j, (fsm1+i)->state_no, EPSILON0, EPSILON0, net1->statecount, 0, (fsm1+i)->start_state);
1328 current_final = (fsm1+i)->state_no;
1329 j++;
1330 }
1331 if (!(((fsm1+i)->target == -1) && ((fsm1+i)->final_state == 1))) {
1332 add_fsm_arc(new_fsm, j, (fsm1+i)->state_no, (fsm1+i)->in, (fsm1+i)->out, (fsm1+i)->target, 0, (fsm1+i)->start_state);
1333 j++;
1334 }
1335 }
1336
1337 for(i=0; (fsm2+i)->state_no != -1; i++, j++) {
1338 add_fsm_arc(new_fsm, j, (fsm2+i)->state_no, (fsm2+i)->in, (fsm2+i)->out, (fsm2+i)->target, (fsm2+i)->final_state, 0);
1339 }
1340 add_fsm_arc(new_fsm, j, -1, -1, -1, -1, -1, -1);
1341 free(net1->states);
1342 fsm_destroy(net2);
1343 net1->states = new_fsm;
1344 if (sigma_find_number(EPSILON0, net1->sigma) == -1) {
1345 sigma_add_special(EPSILON0, net1->sigma);
1346 }
1347 fsm_count(net1);
1348 net1->is_epsilon_free = NO0;
1349 net1->is_deterministic = NO0;
1350 net1->is_minimized = NO0;
1351 net1->is_pruned = NO0;
1352 return(fsm_minimize(net1));
1353}
1354
1355struct fsm *fsm_union(struct fsm *net1, struct fsm *net2) {
1356 struct fsm_state *new_fsm, *fsm1, *fsm2;
1357 int i, j, net1_offset, net2_offset, new_target, arccount;
1358
1359 fsm_merge_sigma(net1, net2);
1360
1361 fsm_count(net1);
1362 fsm_count(net2);
1363
1364 fsm1 = net1->states;
1365 fsm2 = net2->states;
1366
1367 net1_offset = 1;
1368 net2_offset = net1->statecount + 1;
1369 new_fsm = malloc((net1->linecount + net2->linecount + 2) * sizeof(struct fsm_state));
1370
1371 j = 0;
1372
1373 add_fsm_arc(new_fsm, j++, 0, EPSILON0, EPSILON0, net1_offset, 0 , 1);
1374 add_fsm_arc(new_fsm, j++, 0, EPSILON0, EPSILON0, net2_offset, 0 , 1);
1375 arccount = 2;
1376 for (i=0 ; (fsm1+i)->state_no != -1; i++) {
1377 new_target = (fsm1+i)->target == -1 ? -1 : (fsm1+i)->target + net1_offset;
1378 add_fsm_arc(new_fsm, j++, (fsm1+i)->state_no + net1_offset, (fsm1+i)->in, (fsm1+i)->out, new_target, (fsm1+i)->final_state, 0);
1379 if (new_target != -1) arccount++;
1380 }
1381 for (i=0 ; (fsm2+i)->state_no != -1; i++) {
1382 new_target = (fsm2+i)->target == -1 ? -1 : (fsm2+i)->target + net2_offset;
1383 add_fsm_arc(new_fsm, j++, (fsm2+i)->state_no + net2_offset, (fsm2+i)->in, (fsm2+i)->out, new_target, (fsm2+i)->final_state, 0);
1384 if (new_target != -1) arccount++;
1385 }
1386 add_fsm_arc(new_fsm, j++, -1, -1, -1, -1, -1, -1);
1387 free(net1->states);
1388 net1->states = new_fsm;
1389 net1->statecount = net1->statecount + net2->statecount + 1;
1390 net1->linecount = j;
1391 net1->arccount = arccount;
1392 net1->finalcount = net1->finalcount + net2->finalcount;
1393 fsm_destroy(net2);
1394 fsm_update_flags(net1,NO0,NO0,NO0,NO0,UNK2,NO0);
1395 if (sigma_find_number(EPSILON0, net1->sigma) == -1) {
1396 sigma_add_special(EPSILON0, net1->sigma);
1397 }
1398 return(net1);
1399}
1400
1401struct fsm *fsm_completes(struct fsm *net, int operation) {
1402 struct fsm_state *fsm, *new_fsm;
1403 int i, j, offset, statecount, sigsize, *state_table, sink_state, target, last_sigma = 0, arccount = 0, incomplete;
1404 short *starts, *finals, *sinks;
1405
1406 /* TODO: this currently relies on that the sigma is gap-free in its numbering */
1407 /* which can't always be counted on, especially when reading external machines */
1408
1409 /* TODO: check arity */
1410
1411 if (net->is_minimized != YES1)
1412 net = fsm_minimize(net);
1413
1414 incomplete = 0;
1415 fsm = net->states;
1416 if (sigma_find_number(UNKNOWN1, net->sigma) != -1) {
1417 sigma_remove("@_UNKNOWN_SYMBOL_@",net->sigma);
1418 }
1419 if (sigma_find_number(IDENTITY2, net->sigma) == -1) {
1420 sigma_add_special(IDENTITY2, net->sigma);
1421 incomplete = 1;
1422 }
1423
1424 sigsize = sigma_size(net->sigma);
1425 last_sigma = sigma_max(net->sigma);
1426
1427 if (sigma_find_number(EPSILON0, net->sigma) != -1)
1428 sigsize--;
1429
1430 fsm_count(net);
1431 statecount = net->statecount;
1432 starts = malloc(sizeof(short)*(statecount+1)); /* +1 for sink state */
1433 finals = malloc(sizeof(short)*(statecount+1));
1434 sinks = malloc(sizeof(short)*(statecount+1));
1435
1436 /* Init starts, finals, sinks arrays */
1437
1438 for (i=0; i < statecount; i++) {
1439 *(sinks+i) = 1;
1440 *(finals+i) = 0;
1441 *(starts+i) = 0;
1442 }
1443 for (i=0; (fsm+i)->state_no != -1; i++) {
1444 if (operation == COMPLEMENT0) {
1445 if ((fsm+i)->final_state == 1) {
1446 (fsm+i)->final_state = 0;
1447 } else if ((fsm+i)->final_state == 0) {
1448 (fsm+i)->final_state = 1;
1449 }
1450 }
1451 if ((fsm+i)->target != -1)
1452 arccount++;
1453 starts[(fsm+i)->state_no] = (fsm+i)->start_state;
1454 finals[(fsm+i)->state_no] = (fsm+i)->final_state;
1455 if ((fsm+i)->final_state && operation != COMPLEMENT0)
1456 *(sinks+((fsm+i)->state_no)) = 0;
1457 if ((fsm+i)->final_state == 0 && (operation == COMPLEMENT0))
1458 *(sinks+((fsm+i)->state_no)) = 0;
1459 if (((fsm+i)->target != -1) && ((fsm+i)->state_no != (fsm+i)->target))
1460 *(sinks+((fsm+i)->state_no)) = 0;
1461 }
1462
1463 net->is_loop_free = NO0;
1464 net->pathcount = PATHCOUNT_CYCLIC-1;
1465
1466 if (incomplete == 0 && (arccount == (sigsize)*statecount)) {
1467 /* printf("Already complete!\n"); */
1468
1469/* if (operation == COMPLEMENT) { */
1470/* for (i=0; (fsm+i)->state_no != -1; i++) { */
1471/* if ((fsm+i)->final_state) { */
1472/* (fsm+i)->final_state = 0; */
1473/* } else { */
1474/* (fsm+i)->final_state = 1; */
1475/* } */
1476/* } */
1477/* } */
1478 free(starts);
1479 free(finals);
1480 free(sinks);
1481 net->is_completed = YES1;
1482 net->is_minimized = YES1;
1483 net->is_pruned = NO0;
1484 net->is_deterministic = YES1;
1485 return(net);
1486 }
1487
1488 /* Find an existing sink state, or invent a new one */
1489
1490 for (i=0, sink_state = -1; i<statecount; i++) {
1491 if (sinks[i] == 1) {
1492 sink_state = i;
1493 break;
1494 }
1495 }
1496
1497 if (sink_state == -1) {
1498 sink_state = statecount;
1499 *(starts+sink_state) = 0;
1500 if (operation == COMPLEMENT0) {
1501 *(finals+sink_state) = 1;
1502 } else {
1503 *(finals+sink_state) = 0;
1504 }
1505 statecount++;
1506 }
1507
1508
1509 /* We can build a state table without memory problems since the size */
1510 /* of the completed machine will be |Sigma| * |States| in all cases */
1511
1512 sigsize += 2;
1513
1514 state_table = malloc(sizeof(int)*sigsize*statecount);
1515
1516 /* Init state table */
1517 /* i = state #, j = sigma # */
1518 for (i=0; i<statecount; i++) {
1519 for (j=0; j<sigsize; j++) {
1520 *(state_table+(i*sigsize+j)) = -1;
1521 }
1522 }
1523
1524 for (i=0; (fsm+i)->state_no != -1; i++) {
1525 if ((fsm+i)->target != -1) {
1526 *(state_table+(((fsm+i)->state_no)*sigsize+((fsm+i)->in))) = (fsm+i)->target;
1527 }
1528 }
1529 /* Add looping arcs from and to sink state */
1530 for (j=2; j<=last_sigma; j++)
1531 *(state_table+(sink_state*sigsize+j)) = sink_state;
1532 /* Add missing arcs to sink state from all states */
1533 for (i=0; i<statecount; i++) {
1534 for (j=2; j<=last_sigma; j++) {
1535 if (*(state_table+(i*sigsize+j)) == -1)
1536 *(state_table+(i*sigsize+j)) = sink_state;
1537 }
1538 }
1539
1540 new_fsm = malloc(sizeof(struct fsm_state)*(sigsize*statecount+1));
1541
1542/* Complement requires toggling final, nonfinal states */
1543/* if (operation == COMPLEMENT) */
1544/* for (i=0; i < statecount; i++) */
1545/* *(finals+i) = *(finals+i) == 0 ? 1 : 0; */
1546
1547 for (i=0, offset = 0; i<statecount; i++) {
1548 for (j=2; j<=last_sigma; j++) {
1549 target = *(state_table+(i*sigsize+j)) == -1 ? sink_state : *(state_table+(i*sigsize+j));
1550 add_fsm_arc(new_fsm, offset, i, j, j, target, finals[i], starts[i]);
1551 offset++;
1552 }
1553 }
1554 add_fsm_arc(new_fsm, offset, -1, -1, -1, -1, -1, -1);
1555 offset++;
1556 free(net->states);
1557 net->states = new_fsm;
1558 free(starts);
1559 free(finals);
1560 free(sinks);
1561 free(state_table);
1562 net->is_minimized = NO0;
1563 net->is_pruned = NO0;
1564 net->is_completed = YES1;
1565 net->statecount = statecount;
1566 return(net);
1567}
1568
1569struct fsm *fsm_complete(struct fsm *net) {
1570 return(fsm_completes(net, COMPLETE1));
1571}
1572
1573struct fsm *fsm_complement(struct fsm *net) {
1574 return(fsm_completes(net, COMPLEMENT0));
1575}
1576
1577struct fsm *fsm_kleene_closure(struct fsm *net, int operation) {
1578 struct fsm_state *fsm, *new_fsm;
1579 int i, j, laststate, curr_state, curr_target, arccount;
1580
1581 if (operation == OPTIONALITY2) {
1582 return(fsm_union(net,fsm_empty_string()));
1583 }
1584
1585 net = fsm_minimize(net);
1586 fsm_count(net);
1587
1588 fsm = net->states;
1589
1590 new_fsm = malloc( (net->linecount + net->finalcount + 1) * sizeof(struct fsm_state));
1591
1592 j = 0;
1593 if (operation == KLEENE_STAR0)
1594 add_fsm_arc(new_fsm, j++, 0, EPSILON0, EPSILON0, 1, 1, 1);
1595 if (operation == KLEENE_PLUS1)
1596 add_fsm_arc(new_fsm, j++, 0, EPSILON0, EPSILON0, 1, 0, 1);
1597 laststate = 0;
1598 arccount = 1;
1599 for (i = 0 ; (fsm+i)->state_no != -1; i++, laststate = curr_state) {
1600 curr_state = (fsm+i)->state_no + 1;
1601 curr_target = (fsm+i)->target == -1 ? -1 : (fsm+i)->target + 1;
1602 if (curr_target == -1 && (fsm+i)->final_state == 1) {
1603 add_fsm_arc(new_fsm, j++, curr_state, EPSILON0, EPSILON0, 0, 1, 0);
1604 arccount++;
1605 continue;
1606 }
1607 if (curr_state != laststate && (fsm+i)->final_state == 1) {
1608 arccount++;
1609 add_fsm_arc(new_fsm, j++, curr_state, EPSILON0, EPSILON0, 0, 1, 0);
1610 }
1611 add_fsm_arc(new_fsm, j++, curr_state, (fsm+i)->in, (fsm+i)->out, curr_target, (fsm+i)->final_state, 0);
1612 if (curr_target != -1) arccount++;
1613 }
1614 add_fsm_arc(new_fsm, j++, -1,-1,-1,-1,-1,-1);
1615 net->statecount = net->statecount+1;
1616 net->linecount = j;
1617 net->finalcount = operation == KLEENE_STAR0 ? net->finalcount+1 : net->finalcount;
1618 net->arccount = arccount;
1619 net->pathcount = PATHCOUNT_UNKNOWN-3;
1620 free(net->states);
1621 net->states = new_fsm;
1622 if (sigma_find_number(EPSILON0, net->sigma) == -1)
1623 sigma_add_special(EPSILON0, net->sigma);
1624 fsm_update_flags(net,NO0,NO0,NO0,NO0,UNK2,NO0);
1625 return(net);
1626}
1627
1628char *fsm_network_to_char(struct fsm *net) {
1629 struct sigma *sigma, *sigprev;
1630 sigma = net->sigma;
1631 if (sigma->number == -1) {
1632 return NULL((void*)0);
1633 }
1634 for (; sigma != NULL((void*)0) && sigma->number != -1 ; sigma = sigma->next) {
1635 sigprev = sigma;
1636 }
1637 return(strdup(sigprev->symbol));
1638}
1639
1640struct fsm *fsm_substitute_label(struct fsm *net, char *original, struct fsm *substitute) {
1641
1642 struct fsm *outnet, *subnet2;
1643 struct fsm_read_handle *inh, *subh, *subh2;
1644 struct fsm_construct_handle *outh;
1645 char *subin, *subout;
1646 int i, repsym, source, target, in, out, addstate1, addstate2;
1647
1648 fsm_merge_sigma(net, substitute);
1649 addstate1 = net->statecount;
1650 addstate2 = substitute->statecount;
1651
1652 inh = fsm_read_init(net);
1653 subh = fsm_read_init(substitute);
1654 repsym = fsm_get_symbol_number(inh, original);
1655 if (repsym == -1) {
1656 fsm_read_done(inh);
1657 return(net);
1658 }
1659 outh = fsm_construct_init(net->name);
1660 fsm_construct_copy_sigma(outh, net->sigma);
1661 while (fsm_get_next_arc(inh)) {
1662 source = fsm_get_arc_source(inh);
1663 target = fsm_get_arc_target(inh);
1664 in = fsm_get_arc_num_in(inh);
1665 out = fsm_get_arc_num_out(inh);
1666
1667 /* Double-sided arc, splice in substitute network */
1668 if (in == repsym && out == repsym) {
1669 fsm_read_reset(subh);
1670 fsm_construct_add_arc_nums(outh, source, addstate1, EPSILON0, EPSILON0);
1671 while (fsm_get_next_arc(subh)) {
1672 source = fsm_get_arc_source(subh);
1673 target = fsm_get_arc_target(subh);
1674 subin = fsm_get_arc_in(subh);
1675 subout = fsm_get_arc_out(subh);
1676 fsm_construct_add_arc(outh, source+addstate1, target+addstate1, subin, subout);
1677 }
1678 while ((i = fsm_get_next_final(subh)) != -1) {
1679 target = fsm_get_arc_target(inh);
1680 fsm_construct_add_arc_nums(outh, addstate1+i, target, EPSILON0, EPSILON0);
1681 }
1682 addstate1 = addstate1 + addstate2;
1683 /* One-sided replace, splice in repsym .x. sub or sub .x. repsym */
1684 } else if (in == repsym || out == repsym) {
1685 if (in == repsym) {
1686 subnet2 = fsm_minimize(fsm_cross_product(fsm_copy(substitute), fsm_symbol(fsm_get_arc_out(inh))));
1687 } else {
1688 subnet2 = fsm_minimize(fsm_cross_product(fsm_symbol(fsm_get_arc_in(inh)),fsm_copy(substitute)));
1689 }
1690 fsm_construct_add_arc_nums(outh, source, addstate1, EPSILON0, EPSILON0);
1691 subh2 = fsm_read_init(subnet2);
1692 while (fsm_get_next_arc(subh2)) {
1693 source = fsm_get_arc_source(subh2);
1694 target = fsm_get_arc_target(subh2);
1695 subin = fsm_get_arc_in(subh2);
1696 subout = fsm_get_arc_out(subh2);
1697 fsm_construct_add_arc(outh, source+addstate1, target+addstate1, subin, subout);
1698 }
1699 while ((i = fsm_get_next_final(subh2)) != -1) {
1700 target = fsm_get_arc_target(inh);
1701 fsm_construct_add_arc_nums(outh, addstate1+i, target, EPSILON0, EPSILON0);
1702 }
1703 fsm_read_done(subh2);
1704 addstate1 = addstate1 + subnet2->statecount;
1705 fsm_destroy(subnet2);
1706 } else {
1707 /* Default, just copy arc */
1708 fsm_construct_add_arc_nums(outh, source, target, in, out);
1709 }
1710 }
1711
1712 while ((i = fsm_get_next_final(inh)) != -1) {
1713 fsm_construct_set_final(outh, i);
1714 }
1715 while ((i = fsm_get_next_initial(inh)) != -1) {
1716 fsm_construct_set_initial(outh, i);
1717 }
1718 fsm_read_done(inh);
1719 fsm_read_done(subh);
1720 outnet = fsm_construct_done(outh);
1721 return(outnet);
1722}
1723
1724struct fsm *fsm_substitute_symbol(struct fsm *net, char *original, char *substitute) {
1725 struct fsm_state *fsm;
1726 int i,o,s = EPSILON0;
1727 if (strcmp(original,substitute) == 0)
1728 return(net);
1729 if ((o = sigma_find(original, net->sigma)) == -1) {
1730 //fprintf(stderr, "\nSymbol '%s' not found in network!\n", original);
1731 return(net);
1732 }
1733 if (strcmp(substitute,"0") == 0)
1734 s = EPSILON0;
1735 else if (substitute != NULL((void*)0) && (s = sigma_find(substitute, net->sigma)) == -1) {
1736 s = sigma_add(substitute, net->sigma);
1737 }
1738 for (i=0, fsm = net->states; (fsm+i)->state_no != -1; i++) {
1739 if ((fsm+i)->in == o) {
1740 (fsm+i)->in = s;
1741 }
1742 if ((fsm+i)->out == o) {
1743 (fsm+i)->out = s;
1744 }
1745 }
1746 net->sigma = sigma_remove(original, net->sigma);
1747 sigma_sort(net);
1748 fsm_update_flags(net, NO0, NO0, NO0, NO0, NO0, NO0);
1749 sigma_cleanup(net,0);
1750 /* if s = epsilon */
1751 net->is_minimized = NO0;
1752 return(fsm_determinize(net));
1753}
1754
1755struct fsm *fsm_cross_product(struct fsm *net1, struct fsm *net2) {
1756 int i, a, b, current_state, current_start, current_final, target_number, symbol1, symbol2, epsilon = 0, unknown = 0;
1757 struct fsm_state *machine_a, *machine_b, *fsm;
1758 struct state_arr *point_a, *point_b;
1759 struct triplethash *th;
1760
1761 /* Perform a cross product by running two machines in parallel */
1762 /* The approach here allows a state to stay, creating a a:0 or 0:b transition */
1763 /* with the a/b-state waiting, and the arc going to {a,stay} or {stay,b} */
1764 /* the wait maneuver is only possible if the waiting state is final */
1765
1766 /* For the rewrite rules compilation, a different cross-product is used: */
1767 /* rewrite_cp() synchronizes A and B as long as possible to get a unique */
1768 /* output match for each cross product. */
1769
1770 /* This behavior where we postpone zeroes on either side and perform */
1771 /* and equal length cross-product as long as possible and never intermix */
1772 /* ?:0 and 0:? arcs (i.e. we keep both machines synchronized as long as possible */
1773 /* can be done by [A .x. B] & ?:?* [?:0*|0:?*] at the cost of possibly */
1774 /* up to three times larger transducers. */
1775 /* This is very similar to the idea in "tristate composition" in fsm_compose() */
1776
1777 /* This function is only used for explicit cross products */
1778 /* such as a:b or A.x.B, etc. In rewrite rules, we use rewrite_cp() */
1779
1780 net1 = fsm_minimize(net1);
1781 net2 = fsm_minimize(net2);
1782
1783 fsm_merge_sigma(net1, net2);
1784
1785 fsm_count(net1);
1786 fsm_count(net2);
1787
1788 machine_a = net1->states;
1789 machine_b = net2->states;
1790
1791 /* new state 0 = {0,0} */
1792
1793 STACK_2_PUSH(0,0)int_stack_push(0); int_stack_push(0);;
1794
1795 th = triplet_hash_init();
1796 triplet_hash_insert(th, 0, 0, 0);
1797
1798 fsm_state_init(sigma_max(net1->sigma));
1799
1800 point_a = init_state_pointers(machine_a);
1801 point_b = init_state_pointers(machine_b);
1802
1803 while (!int_stack_isempty()) {
1804
1805 /* Get a pair of states to examine */
1806
1807 a = int_stack_pop();
1808 b = int_stack_pop();
1809
1810 /* printf("Treating pair: {%i,%i}\n",a,b); */
1811
1812 current_state = triplet_hash_find(th, a, b, 0);
1813 current_start = (((point_a+a)->start == 1) && ((point_b+b)->start == 1)) ? 1 : 0;
1814 current_final = (((point_a+a)->final == 1) && ((point_b+b)->final == 1)) ? 1 : 0;
1815
1816 fsm_state_set_current_state(current_state, current_final, current_start);
1817
1818 for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) {
1819 for (machine_b = (point_b+b)->transitions; machine_b->state_no == b ; machine_b++) {
1820
1821 if ((machine_a->target == -1) && (machine_b->target == -1)) {
1822 continue;
1823 }
1824 if ((machine_a->target == -1) && (machine_a->final_state == 0)) {
1825 continue;
1826 }
1827 if ((machine_b->target == -1) && (machine_b->final_state == 0)) {
1828 continue;
1829 }
1830 /* Main check */
1831 if (!((machine_a->target == -1) || (machine_b->target == -1))) {
1832 if ((target_number = triplet_hash_find(th, machine_a->target, machine_b->target, 0)) == -1) {
1833 STACK_2_PUSH(machine_b->target, machine_a->target)int_stack_push(machine_b->target); int_stack_push(machine_a
->target);
;
1834 target_number = triplet_hash_insert(th, machine_a->target, machine_b->target, 0);
1835 }
1836 symbol1 = machine_a->in;
1837 symbol2 = machine_b->in;
1838 if (symbol1 == IDENTITY2 && symbol2 != IDENTITY2)
1839 symbol1 = UNKNOWN1;
1840 if (symbol2 == IDENTITY2 && symbol1 != IDENTITY2)
1841 symbol2 = UNKNOWN1;
1842
1843 fsm_state_add_arc(current_state, symbol1, symbol2, target_number, current_final, current_start);
1844 /* @:@ -> @:@ and also ?:? */
1845 if ((machine_a->in == IDENTITY2) && (machine_b->in == IDENTITY2)) {
1846 fsm_state_add_arc(current_state, UNKNOWN1, UNKNOWN1, target_number, current_final, current_start);
1847 }
1848 }
1849 if (machine_a->final_state == 1 && machine_b->target != -1) {
1850
1851 /* Add 0:b i.e. stay in state A */
1852 if ((target_number = triplet_hash_find(th, machine_a->state_no, machine_b->target, 0)) == -1) {
1853 STACK_2_PUSH(machine_b->target, machine_a->state_no)int_stack_push(machine_b->target); int_stack_push(machine_a
->state_no);
;
1854 target_number = triplet_hash_insert(th, machine_a->state_no, machine_b->target, 0);
1855 }
1856 /* @:0 becomes ?:0 */
1857 symbol2 = machine_b->in == IDENTITY2 ? UNKNOWN1 : machine_b->in;
1858 fsm_state_add_arc(current_state, EPSILON0, symbol2, target_number, current_final, current_start);
1859 }
1860
1861 if (machine_b->final_state == 1 && machine_a->target != -1) {
1862
1863 /* Add a:0 i.e. stay in state B */
1864 if ((target_number = triplet_hash_find(th, machine_a->target, machine_b->state_no, 0)) == -1) {
1865 STACK_2_PUSH(machine_b->state_no, machine_a->target)int_stack_push(machine_b->state_no); int_stack_push(machine_a
->target);
;
1866 target_number = triplet_hash_insert(th, machine_a->target, machine_b->state_no, 0);
1867 }
1868 /* @:0 becomes ?:0 */
1869 symbol1 = machine_a->in == IDENTITY2 ? UNKNOWN1 : machine_a->in;
1870 fsm_state_add_arc(current_state, symbol1, EPSILON0, target_number, current_final, current_start);
1871 }
1872 }
1873 }
1874 /* Check arctrack */
1875 fsm_state_end_state();
1876 }
1877
1878 free(net1->states);
1879 fsm_state_close(net1);
1880
1881 for (i=0, fsm = net1->states; (fsm+i)->state_no != -1; i++) {
1882 if (((fsm+i)->in == EPSILON0) || ((fsm+i)->out == EPSILON0))
1883 epsilon = 1;
1884 if (((fsm+i)->in == UNKNOWN1) || ((fsm+i)->out == UNKNOWN1))
1885 unknown = 1;
1886 }
1887 if (epsilon == 1) {
1888 if (sigma_find_number(EPSILON0, net1->sigma) == -1) {
1889 sigma_add_special(EPSILON0, net1->sigma);
1890 }
1891 }
1892 if (unknown == 1) {
1893 if (sigma_find_number(UNKNOWN1, net1->sigma) == -1) {
1894 sigma_add_special(UNKNOWN1, net1->sigma);
1895 }
1896 }
1897 free(point_a);
1898 free(point_b);
1899 fsm_destroy(net2);
1900 triplet_hash_free(th);
1901 return(fsm_coaccessible(net1));
1902}
1903
1904struct fsm *fsm_precedes(struct fsm *net1, struct fsm *net2) {
1905 return(fsm_complement(fsm_minimize(fsm_contains(fsm_minimize(fsm_concat(fsm_minimize(fsm_copy(net2)),fsm_concat(fsm_universal(),fsm_minimize(fsm_copy(net1)))))))));
1906}
1907
1908struct fsm *fsm_follows(struct fsm *net1, struct fsm *net2) {
1909 return(fsm_complement(fsm_minimize(fsm_contains(fsm_minimize(fsm_concat(fsm_minimize(fsm_copy(net1)),fsm_concat(fsm_universal(),fsm_minimize(fsm_copy(net2)))))))));
1910}
1911
1912struct fsm *fsm_unflatten(struct fsm *net, char *epsilon_sym, char *repeat_sym) {
1913 int a, b, current_state, current_start, current_final, target_number, epsilon, repeat, in, out;
1914 struct fsm_state *even_state, *odd_state;
1915 struct state_arr *point_a;
1916 struct triplethash *th;
1917
1918 fsm_minimize(net);
1919 fsm_count(net);
1920
1921 epsilon = sigma_find(epsilon_sym, net->sigma);
1922 repeat = sigma_find(repeat_sym, net->sigma);
1923
1924 even_state = net->states;
1925
1926 /* new state 0 = {0,0} */
1927
1928 STACK_2_PUSH(0,0)int_stack_push(0); int_stack_push(0);;
1929
1930 th = triplet_hash_init();
1931 triplet_hash_insert(th, 0, 0, 0);
1932
1933 fsm_state_init(sigma_max(net->sigma));
1934
1935 point_a = init_state_pointers(even_state);
1936
1937 while (!int_stack_isempty()) {
1938
1939 /* Get a pair of states to examine */
1940
1941 a = int_stack_pop();
1942 a = int_stack_pop();
1943
1944 /* printf("Treating pair: {%i,%i}\n",a,b); */
1945
1946 current_state = triplet_hash_find(th, a, a, 0);
1947 current_start = ((point_a+a)->start == 1) ? 1 : 0;
1948 current_final = ((point_a+a)->final == 1) ? 1 : 0;
1949
1950 fsm_state_set_current_state(current_state, current_final, current_start);
1951
1952 for (even_state = (point_a+a)->transitions; even_state->state_no == a; even_state++) {
1953 if (even_state->target == -1) {
1954 continue;
1955 }
1956 in = even_state->in;
1957 b = even_state->target;
1958 for (odd_state = (point_a+b)->transitions; odd_state->state_no == b; odd_state++) {
1959 if (odd_state->target == -1) {
1960 continue;
1961 }
1962 if ((target_number = triplet_hash_find(th, odd_state->target, odd_state->target, 0)) == -1) {
1963 STACK_2_PUSH(odd_state->target, odd_state->target)int_stack_push(odd_state->target); int_stack_push(odd_state
->target);
;
1964 target_number = triplet_hash_insert(th, odd_state->target, odd_state->target, 0);
1965 }
1966 in = even_state->in;
1967 out = odd_state->in;
1968 if (out == repeat) {
1969 out = in;
1970 } else if (in == IDENTITY2 || out == IDENTITY2) {
1971 in = in == IDENTITY2 ? UNKNOWN1 : in;
1972 out = out == IDENTITY2 ? UNKNOWN1 : out;
1973 }
1974 if (in == epsilon) {
1975 in = EPSILON0;
1976 }
1977 if (out == epsilon) {
1978 out = EPSILON0;
1979 }
1980 fsm_state_add_arc(current_state, in, out, target_number, current_final, current_start);
1981 }
1982 }
1983 fsm_state_end_state();
1984 }
1985 free(net->states);
1986 fsm_state_close(net);
1987 free(point_a);
1988 triplet_hash_free(th);
1989 return(net);
1990}
1991
1992
1993struct fsm *fsm_shuffle(struct fsm *net1, struct fsm *net2) {
1994 int a, b, current_state, current_start, current_final, target_number;
1995 struct fsm_state *machine_a, *machine_b;
1996 struct state_arr *point_a, *point_b;
1997 struct triplethash *th;
1998
1999 /* Shuffle A and B by making alternatively A move and B stay at each or */
2000 /* vice versa at each step */
2001
2002 fsm_minimize(net1);
2003 fsm_minimize(net2);
2004
2005 fsm_merge_sigma(net1, net2);
2006
2007 fsm_count(net1);
2008 fsm_count(net2);
2009
2010 machine_a = net1->states;
2011 machine_b = net2->states;
2012
2013 /* new state 0 = {0,0} */
2014
2015 STACK_2_PUSH(0,0)int_stack_push(0); int_stack_push(0);;
2016
2017 th = triplet_hash_init();
2018 triplet_hash_insert(th, 0, 0, 0);
2019
2020 fsm_state_init(sigma_max(net1->sigma));
2021
2022 point_a = init_state_pointers(machine_a);
2023 point_b = init_state_pointers(machine_b);
2024
2025 while (!int_stack_isempty()) {
2026
2027 /* Get a pair of states to examine */
2028
2029 a = int_stack_pop();
2030 b = int_stack_pop();
2031
2032 /* printf("Treating pair: {%i,%i}\n",a,b); */
2033
2034 current_state = triplet_hash_find(th, a, b, 0);
2035 current_start = (((point_a+a)->start == 1) && ((point_b+b)->start == 1)) ? 1 : 0;
2036 current_final = (((point_a+a)->final == 1) && ((point_b+b)->final == 1)) ? 1 : 0;
2037
2038 fsm_state_set_current_state(current_state, current_final, current_start);
2039
2040 /* Follow A, B stays */
2041 for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) {
2042 if (machine_a->target == -1) {
2043 continue;
2044 }
2045 if ((target_number = triplet_hash_find(th, machine_a->target, b, 0)) == -1) {
2046 STACK_2_PUSH(b, machine_a->target)int_stack_push(b); int_stack_push(machine_a->target);;
2047 target_number = triplet_hash_insert(th, machine_a->target, b, 0);
2048 }
2049
2050 fsm_state_add_arc(current_state, machine_a->in, machine_a->out, target_number, current_final, current_start);
2051 }
2052
2053 /* Follow B, A stays */
2054 for (machine_b = (point_b+b)->transitions; machine_b->state_no == b ; machine_b++) {
2055
2056 if (machine_b->target == -1) {
2057 continue;
2058 }
2059
2060 if ((target_number = triplet_hash_find(th, a, machine_b->target, 0)) == -1) {
2061 STACK_2_PUSH(machine_b->target, a)int_stack_push(machine_b->target); int_stack_push(a);;
2062 target_number = triplet_hash_insert(th, a, machine_b->target, 0);
2063 }
2064 fsm_state_add_arc(current_state, machine_b->in, machine_b->out, target_number, current_final, current_start);
2065 }
2066
2067 /* Check arctrack */
2068 fsm_state_end_state();
2069 }
2070
2071 free(net1->states);
2072 fsm_state_close(net1);
2073 free(point_a);
2074 free(point_b);
2075 fsm_destroy(net2);
2076 triplet_hash_free(th);
2077 return(net1);
2078}
2079
2080int fsm_equivalent(struct fsm *net1, struct fsm *net2) {
2081 /* Test path equivalence of two FSMs by traversing both in parallel */
2082 int a, b, matching_arc, equivalent;
2083 struct fsm_state *machine_a, *machine_b;
2084 struct state_arr *point_a, *point_b;
2085 struct triplethash *th;
2086
2087 fsm_merge_sigma(net1, net2);
2088
2089 fsm_count(net1);
2090 fsm_count(net2);
2091
2092 machine_a = net1->states;
2093 machine_b = net2->states;
2094
2095 equivalent = 0;
2096 /* new state 0 = {0,0} */
2097 STACK_2_PUSH(0,0)int_stack_push(0); int_stack_push(0);;
2098
2099 th = triplet_hash_init();
2100 triplet_hash_insert(th, 0, 0, 0);
2101
2102 point_a = init_state_pointers(machine_a);
2103 point_b = init_state_pointers(machine_b);
2104
2105 while (!int_stack_isempty()) {
2106
2107 /* Get a pair of states to examine */
2108
2109 a = int_stack_pop();
2110 b = int_stack_pop();
2111
2112 if ((point_a+a)->final != (point_b+b)->final) {
2113 goto not_equivalent;
2114 }
2115 /* Check that all arcs in A have matching arc in B, push new state pair on stack */
2116 for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) {
2117 if (machine_a->target == -1) {
2118 break;
2119 }
2120 matching_arc = 0;
2121 for (machine_b = (point_b+b)->transitions; machine_b->state_no == b ; machine_b++) {
2122 if (machine_b->target == -1) {
2123 break;
2124 }
2125 if (machine_a->in == machine_b->in && machine_a->out == machine_b->out) {
2126 matching_arc = 1;
2127 if ((triplet_hash_find(th, machine_a->target, machine_b->target, 0)) == -1) {
2128 STACK_2_PUSH(machine_b->target, machine_a->target)int_stack_push(machine_b->target); int_stack_push(machine_a
->target);
;
2129 triplet_hash_insert(th, machine_a->target, machine_b->target, 0);
2130 }
2131 break;
2132 }
2133 }
2134 if (matching_arc == 0) {
2135 goto not_equivalent;
2136 }
2137 }
2138 for (machine_b = (point_b+b)->transitions; machine_b->state_no == b ; machine_b++) {
2139 if (machine_b->target == -1) {
2140 break;
2141 }
2142 matching_arc = 0;
2143 for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) {
2144 if (machine_a->in == machine_b->in && machine_a->out == machine_b->out) {
2145 matching_arc = 1;
2146 break;
2147 }
2148 }
2149 if (matching_arc == 0) {
2150 goto not_equivalent;
2151 }
2152 }
2153 }
2154 equivalent = 1;
2155 not_equivalent:
2156 fsm_destroy(net1);
2157 fsm_destroy(net2);
2158 free(point_a);
2159 free(point_b);
2160 triplet_hash_free(th);
2161 return(equivalent);
2162}
2163
2164
2165struct fsm *fsm_minus(struct fsm *net1, struct fsm *net2) {
2166 int a, b, current_state, current_start, current_final, target_number, b_has_trans, btarget, statecount;
2167 struct fsm_state *machine_a, *machine_b;
2168 struct state_arr *point_a, *point_b;
2169 struct triplethash *th;
2170 statecount = 0;
2171
2172 net1 = fsm_minimize(net1);
2173 net2 = fsm_minimize(net2);
2174
2175 fsm_merge_sigma(net1, net2);
2176
2177 fsm_count(net1);
2178 fsm_count(net2);
2179
2180 machine_a = net1->states;
2181 machine_b = net2->states;
2182
2183 /* new state 0 = {1,1} */
2184
2185 int_stack_clear();
2186 STACK_2_PUSH(1,1)int_stack_push(1); int_stack_push(1);;
2187
2188 th = triplet_hash_init();
2189 triplet_hash_insert(th, 1, 1, 0);
2190
2191 point_a = init_state_pointers(machine_a);
2192 point_b = init_state_pointers(machine_b);
2193
2194 fsm_state_init(sigma_max(net1->sigma));
2195
2196 while (!int_stack_isempty()) {
2197 statecount++;
2198 /* Get a pair of states to examine */
2199
2200 a = int_stack_pop();
2201 b = int_stack_pop();
2202
2203 current_state = triplet_hash_find(th, a, b, 0);
2204 a--;
2205 b--;
2206
2207 if (b == -1) {
2208 current_start = 0;
2209 current_final = (point_a+a)->final;
2210 } else {
2211 current_start = (a == 0 && b == 0) ? 1 : 0;
2212 current_final = (((point_a+a)->final == 1) && ((point_b+b)->final == 0)) ? 1 : 0;
2213 }
2214
2215 fsm_state_set_current_state(current_state, current_final, current_start);
2216
2217 for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) {
2218 if (machine_a->target == -1) {
2219 break;
2220 continue;
2221 }
2222 if (b == -1) {
2223 /* b is dead */
2224 if ((target_number = triplet_hash_find(th, (machine_a->target)+1, 0, 0)) == -1) {
2225 STACK_2_PUSH(0, (machine_a->target)+1)int_stack_push(0); int_stack_push((machine_a->target)+1);;
2226 target_number = triplet_hash_insert(th, (machine_a->target)+1, 0, 0);
2227 }
2228 } else {
2229 /* b is alive */
2230 b_has_trans = 0;
2231 for (machine_b = (point_b+b)->transitions ; machine_b->state_no == b ; machine_b++) {
2232 if (machine_a->in == machine_b->in && machine_a->out == machine_b->out) {
2233 b_has_trans = 1;
2234 btarget = machine_b->target;
2235 break;
2236 }
2237 }
2238 if (b_has_trans) {
2239 if ((target_number = triplet_hash_find(th, (machine_a->target)+1, btarget+1, 0)) == -1) {
2240 STACK_2_PUSH(btarget+1, (machine_a->target)+1)int_stack_push(btarget+1); int_stack_push((machine_a->target
)+1);
;
2241 target_number = triplet_hash_insert(th, (machine_a->target)+1, (machine_b->target)+1, 0);
2242 }
2243 } else {
2244 /* b is dead */
2245 if ((target_number = triplet_hash_find(th, (machine_a->target)+1, 0, 0)) == -1) {
2246 STACK_2_PUSH(0, (machine_a->target)+1)int_stack_push(0); int_stack_push((machine_a->target)+1);;
2247 target_number = triplet_hash_insert(th, (machine_a->target)+1, 0, 0);
2248 }
2249 }
2250 }
2251 fsm_state_add_arc(current_state, machine_a->in, machine_a->out, target_number, current_final, current_start);
2252 }
2253 fsm_state_end_state();
2254 }
2255
2256 free(net1->states);
2257 fsm_state_close(net1);
2258 free(point_a);
2259 free(point_b);
2260 fsm_destroy(net2);
2261 triplet_hash_free(th);
2262 return(fsm_minimize(net1));
2263}
2264
2265struct fsm *fsm_contains(struct fsm *net) {
2266 /* [?* A ?*] */
2267 struct fsm *net2;
2268
2269 net2 = fsm_concat(fsm_concat(fsm_universal(),net),fsm_universal());
2270 return(net2);
2271}
2272
2273struct fsm *fsm_universal() {
2274 struct fsm *net;
2275 int s;
2276 net = fsm_create("");
2277 fsm_update_flags(net, YES1, YES1, YES1, YES1, NO0, NO0);
2278 net->states = malloc(sizeof(struct fsm_state)*2);
2279 s = sigma_add_special(IDENTITY2,net->sigma);
2280 add_fsm_arc(net->states, 0, 0, s, s, 0, 1, 1);
2281 add_fsm_arc(net->states, 1, -1, -1, -1, -1, -1, -1);
2282 net->arccount = 1;
2283 net->statecount = 1;
2284 net->linecount = 2;
2285 net->finalcount = 1;
2286 net->pathcount = PATHCOUNT_CYCLIC-1;
2287 return(net);
2288}
2289
2290struct fsm *fsm_contains_one(struct fsm *net) {
2291 /* $A - $[[?+ A ?* & A ?*] | [A ?+ & A]] */
2292 struct fsm *ret;
2293 ret = fsm_minus(fsm_contains(fsm_copy(net)),fsm_contains(fsm_union(fsm_intersect(fsm_concat(fsm_kleene_plus(fsm_identity()),fsm_concat(fsm_copy(net),fsm_universal())) , fsm_concat(fsm_copy(net),fsm_universal())),fsm_intersect(fsm_concat(fsm_copy(net),fsm_kleene_plus(fsm_identity())), fsm_copy(net)))));
2294 fsm_destroy(net);
2295 return(ret);
2296}
2297
2298struct fsm *fsm_contains_opt_one(struct fsm *net) {
2299 /* $.A | ~$A */
2300 struct fsm *ret;
2301 ret = fsm_union(fsm_contains_one(fsm_copy(net)),fsm_complement(fsm_contains(fsm_copy(net))));
2302 fsm_destroy(net);
2303 return(ret);
2304}
2305
2306struct fsm *fsm_simple_replace(struct fsm *net1, struct fsm *net2) {
2307 /* [~[?* [A-0] ?*] [A.x.B]]* ~[?* [A-0] ?*] */
2308
2309 struct fsm *UPlus, *ret;
2310 UPlus = fsm_minimize(fsm_kleene_plus(fsm_identity()));
2311 ret = fsm_concat(fsm_minimize(fsm_kleene_star(fsm_minimize(fsm_concat(fsm_complement(fsm_minimize(fsm_concat(fsm_concat(fsm_universal(),fsm_minimize(fsm_intersect(fsm_copy(net1),fsm_copy(UPlus)))),fsm_universal()))),fsm_minimize(fsm_cross_product(fsm_copy(net1),fsm_copy(net2))))))),fsm_minimize(fsm_complement(fsm_minimize(fsm_concat(fsm_concat(fsm_universal(), fsm_intersect(fsm_copy(net1),fsm_copy(UPlus))),fsm_universal())))));
2312 fsm_destroy(net1);
2313 fsm_destroy(net2);
2314 fsm_destroy(UPlus);
2315 return(ret);
2316}
2317
2318struct fsm *fsm_priority_union_upper(struct fsm *net1, struct fsm *net2) {
2319 /* A .P. B = A | [~[A.u] .o. B] */
2320 struct fsm *ret;
2321 ret = fsm_union(fsm_copy(net1),fsm_compose(fsm_complement(fsm_upper(fsm_copy(net1))),net2));
2322 fsm_destroy(net1);
2323 return(ret);
2324}
2325
2326struct fsm *fsm_priority_union_lower(struct fsm *net1, struct fsm *net2) {
2327 /* A .p. B = A | B .o. ~[A.l] */
2328 struct fsm *ret;
2329 ret = fsm_union(fsm_copy(net1),fsm_compose(net2,fsm_complement(fsm_lower(fsm_copy(net1)))));
2330 fsm_destroy(net1);
2331 return(ret);
2332}
2333
2334struct fsm *fsm_lenient_compose(struct fsm *net1, struct fsm *net2) {
2335 /* A .O. B = [A .o. B] .P. B */
2336 struct fsm *ret;
2337 ret = fsm_priority_union_upper(fsm_compose(fsm_copy(net1),net2),fsm_copy(net1));
2338 fsm_destroy(net1);
2339 return(ret);
2340}
2341
2342struct fsm *fsm_term_negation(struct fsm *net1) {
2343 return(fsm_intersect(fsm_identity(),fsm_complement(net1)));
2344}
2345
2346struct fsm *fsm_quotient_interleave(struct fsm *net1, struct fsm *net2) {
2347 /* A/\/B = The set of strings you can interleave in B and get a string from A */
2348 /* [B/[x \x* x] & A/x .o. [[[\x]:0]* (x:0 \x* x:0)]*].l */
2349 struct fsm *Result;
2350 Result = fsm_lower(fsm_compose(fsm_intersect(fsm_ignore(net2,fsm_concat(fsm_symbol("@>@"),fsm_concat(fsm_kleene_star(fsm_term_negation(fsm_symbol("@>@"))),fsm_symbol("@>@"))),OP_IGNORE_ALL1),fsm_ignore(net1,fsm_symbol("@>@"),OP_IGNORE_ALL1)),fsm_kleene_star(fsm_concat(fsm_kleene_star(fsm_cross_product(fsm_term_negation(fsm_symbol("@>@")),fsm_empty_string())),fsm_optionality(fsm_concat(fsm_cross_product(fsm_symbol("@>@"),fsm_empty_string()),fsm_concat(fsm_kleene_star(fsm_term_negation(fsm_symbol("@>@"))),fsm_cross_product(fsm_symbol("@>@"),fsm_empty_string()))))))));
2351
2352 Result->sigma = sigma_remove("@>@",Result->sigma);
2353 /* Could clean up sigma */
2354 return(Result);
2355}
2356
2357struct fsm *fsm_quotient_left(struct fsm *net1, struct fsm *net2) {
2358 /* A\\\B = [B .o. A:0 ?*].l; */
2359 /* A\\\B = the set of suffixes you can add to A to get a string in B */
2360 struct fsm *Result;
2361 Result = fsm_lower(fsm_compose(net2,fsm_concat(fsm_cross_product(net1,fsm_empty_string()),fsm_universal())));
2362 return(Result);
2363}
2364
2365struct fsm *fsm_quotient_right(struct fsm *net1, struct fsm *net2) {
2366 struct fsm *Result;
2367
2368 /* A///B = [A .o. ?* B:0].l; */
2369 /* A///B = the set of prefixes you can add to B to get strings in A */
2370 Result = fsm_lower(fsm_compose(net1, fsm_concat(fsm_universal(),fsm_cross_product(net2,fsm_empty_string()))));
2371 return(Result);
2372}
2373
2374struct fsm *fsm_ignore(struct fsm *net1, struct fsm *net2, int operation) {
2375 struct fsm_state *fsm1, *fsm2, *new_fsm;
2376 struct fsm *Result;
2377 short *handled_states1, *handled_states2;
2378 int i, j, k, state_add_counter = 0, malloc_size, splices = 0, returns, target, splice_size, start_splice, states1, states2, lines1, lines2, *return_state;
2379
2380 net1 = fsm_minimize(net1);
2381 net2 = fsm_minimize(net2);
2382
2383 if (fsm_isempty(net2)) {
2384 fsm_destroy(net2);
2385 return(net1);
2386 }
2387 fsm_merge_sigma(net1, net2);
2388
2389 fsm_count(net1);
2390 fsm_count(net2);
2391
2392 states1 = net1->statecount;
2393 states2 = net2->statecount;
2394 lines1 = net1->linecount;
2395 lines2 = net2->linecount;
2396 fsm1 = net1->states;
2397 fsm2 = net2->states;
2398
2399 if (operation == OP_IGNORE_INTERNAL2) {
2400 Result = fsm_lower(fsm_compose(fsm_ignore(fsm_copy(net1),fsm_symbol("@i<@"),OP_IGNORE_ALL1),fsm_compose(fsm_complement(fsm_union(fsm_concat(fsm_symbol("@i<@"),fsm_universal()),fsm_concat(fsm_universal(),fsm_symbol("@i<@")))),fsm_simple_replace(fsm_symbol("@i<@"),fsm_copy(net2)))));
2401 Result->sigma = sigma_remove("@i<@",Result->sigma);
2402 fsm_destroy(net1);
2403 fsm_destroy(net2);
2404 return(Result);
2405 }
2406
2407 malloc_size = lines1 + (states1 * (lines2 + net2->finalcount + 1));
2408 new_fsm = malloc(sizeof(struct fsm_state)*(malloc_size+1));
2409
2410 /* Mark if a state has been handled with ignore */
2411 handled_states1 = malloc(sizeof(short)*states1);
2412 handled_states2 = malloc(sizeof(short)*states2);
2413
2414 /* Mark which ignores return to which state */
2415 return_state = malloc(sizeof(int)*states1);
2416 splice_size = states2;
2417 start_splice = states1;
2418 for (k=0; k<states1; k++)
2419 *(handled_states1+k) = 0;
2420
2421 for (i=0, j=0; (fsm1+i)->state_no != -1; i++) {
2422 if (*(handled_states1+(fsm1+i)->state_no) == 0) {
2423 target = start_splice + splices * splice_size;
2424 add_fsm_arc(new_fsm, j, (fsm1+i)->state_no, EPSILON0, EPSILON0, target, (fsm1+i)->final_state, (fsm1+i)->start_state);
2425 *(return_state+splices) = (fsm1+i)->state_no;
2426 *(handled_states1+(fsm1+i)->state_no) = 1;
2427 j++;
2428 splices++;
2429 if ((fsm1+i)->in != -1) {
2430 add_fsm_arc(new_fsm, j, (fsm1+i)->state_no, (fsm1+i)->in, (fsm1+i)->out, (fsm1+i)->target, (fsm1+i)->final_state, (fsm1+i)->start_state);
2431 j++;
2432 }
2433 } else {
2434 add_fsm_arc(new_fsm, j, (fsm1+i)->state_no, (fsm1+i)->in, (fsm1+i)->out, (fsm1+i)->target, (fsm1+i)->final_state, (fsm1+i)->start_state);
2435 j++;
2436 }
2437 }
2438
2439 /* Add a sequence of fsm2s at the end, with arcs back to the appropriate states */
2440
2441 state_add_counter = start_splice;
2442
2443 for (returns = 0; splices>0; splices--, returns++) {
2444 /* Zero handled return arc states */
2445
2446 for (k=0; k<states2; k++)
2447 *(handled_states2+k) = 0;
2448
2449 for (i=0; (fsm2+i)->state_no != -1; i++) {
2450 if ((fsm2+i)->final_state == 1 && *(handled_states2+(fsm2+i)->state_no) == 0) {
2451 add_fsm_arc(new_fsm, j, (fsm2+i)->state_no + state_add_counter, EPSILON0, EPSILON0, *(return_state+returns), 0, 0);
2452 j++;
2453 *(handled_states2+(fsm2+i)->state_no) = 1;
2454 if ((fsm2+i)->target != -1) {
2455 add_fsm_arc(new_fsm, j, (fsm2+i)->state_no + state_add_counter, (fsm2+i)->in, (fsm2+i)->out , (fsm2+i)->target + state_add_counter, 0, 0);
2456 j++;
2457 }
2458 } else {
2459 add_fsm_arc(new_fsm, j, (fsm2+i)->state_no + state_add_counter, (fsm2+i)->in, (fsm2+i)->out, (fsm2+i)->target + state_add_counter, 0, 0);
2460 j++;
2461 }
2462 }
2463 state_add_counter = state_add_counter + states2;
2464 }
2465
2466 add_fsm_arc(new_fsm, j, -1, -1, -1, -1, -1, -1);
2467 free(handled_states1);
2468 free(handled_states2);
2469 free(return_state);
2470 free(net1->states);
2471 fsm_destroy(net2);
2472 net1->states = new_fsm;
2473 fsm_update_flags(net1, NO0, NO0, NO0, NO0, NO0, NO0);
2474 fsm_count(net1);
2475 return(net1);
2476}
2477
2478/* Remove those symbols from sigma that have the same distribution as IDENTITY */
2479
2480void fsm_compact(struct fsm *net) {
2481 struct checktable {
2482 int state_no;
2483 int target;
2484 } *checktable;
2485
2486 struct fsm_state *fsm;
2487 struct sigma *sig, *sigprev, *sign;
2488 _Bool *potential;
2489 int i, j, prevstate, numsymbols, in, out, state, target, removable;
2490
2491 fsm = net->states;
2492 numsymbols = sigma_max(net->sigma);
2493
2494 potential = malloc(sizeof(_Bool)*(numsymbols+1));
2495 checktable = malloc(sizeof(struct checktable)*(numsymbols+1));
2496
2497 for (i=0; i <= numsymbols; i++) {
2498 *(potential+i) = 1;
2499 (checktable+i)->state_no = -1;
2500 (checktable+i)->target = -1;
2501 }
2502 /* For consistency reasons, can't remove symbols longer than 1 */
2503 /* since @ and ? only match utf8 symbols of length 1 */
2504
2505 for (sig = net->sigma; sig != NULL((void*)0) && sig->number != -1; sig = sig->next) {
2506 if (utf8strlen(sig->symbol) > 1) {
2507 *(potential+sig->number) = 0;
2508 }
2509 }
2510
2511 prevstate = 0;
2512
2513 for (i=0; ; i++) {
2514
2515 if ((fsm+i)->state_no != prevstate) {
2516 for (j=3; j<=numsymbols;j++) {
2517 if ((checktable+j)->state_no != prevstate && (checktable+IDENTITY2)->state_no != prevstate) {
2518 continue;
2519 }
2520 if ((checktable+j)->target == (checktable+IDENTITY2)->target && (checktable+j)->state_no == (checktable+IDENTITY2)->state_no) {
2521 continue;
2522 }
2523 *(potential+j) = 0;
2524 }
2525 }
2526
2527 if ((fsm+i)->state_no == -1)
2528 break;
2529
2530 in = (fsm+i)->in;
2531 out = (fsm+i)->out;
2532 state = (fsm+i)->state_no;
2533 target = (fsm+i)->target;
2534
2535 if (in != -1 && out != -1) {
2536 if (((in == out && in > 2) || in == IDENTITY2)) {
2537 (checktable+in)->state_no = state;
2538 (checktable+in)->target = target;
2539 }
2540 if (in != out && in > 2) {
2541 *(potential+in) = 0;
2542 }
2543 if (in != out && out > 2) {
2544 *(potential+out) = 0;
2545 }
2546 }
2547 prevstate = state;
2548 }
2549 for (removable = 0, i=3; i <= numsymbols; i++) {
2550 if (*(potential+i) == 1) {
2551 removable = 1;
2552 }
2553
2554 }
2555 if (removable == 0) {
2556 free(potential);
2557 free(checktable);
2558 return;
2559 }
2560 i = j = 0;
2561 do {
2562 in = (fsm+i)->in;
2563
2564 add_fsm_arc(fsm, j ,(fsm+i)->state_no,(fsm+i)->in,(fsm+i)->out,(fsm+i)->target,(fsm+i)->final_state,(fsm+i)->start_state);
2565 if (in == -1) {
2566 i++;
2567 j++;
2568 }
2569 else if (*(potential+in) == 1 && in > 2) {
2570 i++;
2571 } else {
2572 i++;
2573 j++;
2574 }
2575 } while ((fsm+i)->state_no != -1);
2576 add_fsm_arc(fsm, j ,(fsm+i)->state_no,(fsm+i)->in,(fsm+i)->out,(fsm+i)->target,(fsm+i)->final_state,(fsm+i)->start_state);
2577
2578 sigprev = NULL((void*)0);
2579 for (sig = net->sigma; sig != NULL((void*)0) && sig->number != -1; sig = sign) {
2580
2581 if ((sig->number > 2) && (*(potential+sig->number) == 1)) {
2582 sigprev->next = sig->next;
2583 sign = sig->next;
2584 free(sig->symbol);
2585 free(sig);
2586 } else {
2587 sigprev = sig;
2588 sign = sig->next;
2589 }
2590 }
2591 free(potential);
2592 free(checktable);
2593 sigma_cleanup(net,0);
2594}
2595
2596int fsm_symbol_occurs(struct fsm *net, char *symbol, int side) {
2597 struct fsm_state *fsm;
2598 int i, sym;
2599 sym = sigma_find(symbol, net->sigma);
2600 if (sym == -1) {
2601 return 0;
2602 }
2603 for (i=0, fsm = net->states; (fsm+i)->state_no != -1; i++) {
2604 if (side == M_UPPER1 && (fsm+i)->in == sym)
2605 return 1;
2606 if (side == M_LOWER2 && (fsm+i)->out == sym)
2607 return 1;
2608 if (side == (M_UPPER1 + M_LOWER2) && ( (fsm+i)->in == sym || (fsm+i)->out == sym))
2609 return 1;
2610 }
2611 return 0;
2612}
2613
2614struct fsm *fsm_equal_substrings(struct fsm *net, struct fsm *left, struct fsm *right) {
2615
2616 /* The algorithm extracts from the lower side all and only those strings where */
2617 /* every X occurring in different substrings ... left X right ... is identical. */
2618
2619 /* Caveat: there is no reliable termination condition for the loop that extracts */
2620 /* identities. This means that if run on languages where there are potentially */
2621 /* infinite-length identical delimited substrings, it will not terminate. */
2622
2623 /* For example: _eq(l a* r l a* r, l , r) will not terminate. */
2624
2625 /* However, even if the languages occuring between left and right are infinite */
2626 /* the algorithm terminates eventually if the the possible combinations of */
2627 /* identical substrings is finite in length. */
2628
2629 /* For example _eq([l a* b r l a b* r]*, l, r) does terminate even though */
2630 /* it contains a potentially infinite number of delimited substrings since */
2631 /* the maximum length of the possible identical delimited substrings is finite. */
2632
2633 /* In this case the above example evaluates to the language [l a b r l a b r]* */
2634
2635 /* The algorithm: */
2636 /* Input: L, left, right */
2637 /* Output: the language that preserves only those strings in L */
2638 /* where X is the same for all left X right sequences */
2639
2640 /* 1. mark all instances of left with ... LB and right with RB ... in L */
2641
2642 /* 2. split L into Leq and Lbypass */
2643 /* where Lbypass are all those strings where LB RB sequences aren't */
2644 /* properly nested (we must have ... LB ... RB ... LB ... RB ... etc.) */
2645 /* Lbypass also includes all those with less than two bracketed */
2646 /* instances, since we don't need to check equality if there's only */
2647 /* one bracketed substring. */
2648 /* We also remove the auxiliary LB and RB symbols from Lbypass */
2649
2650 /* 3. We extract all the possible symbols occurring between LB and RB */
2651 /* from Leq */
2652
2653 /* 4. We create the transducer Move from all symbols in (3) */
2654 /* Move = M(sym_1) | ... | M(sym_n) */
2655 /* where M(a) is defined as [\LB* LB:a a:LB]* \LB* */
2656 /* i.e. it rewrites bracketed strings such as "LB a b RB LB a b RB" */
2657 /* to "a LB b RB a LB b RB" in effect moving brackets to the right */
2658 /* one step for a symbol. */
2659
2660 /* 5. Leq = Cleanup(Leq) */
2661 /* Cleanup removes LB RB sequences and, at the same time filters out */
2662 /* any strings where we find both LB RB and LB X RB where X is not 0. */
2663 /* since we know such sequences could not possibly be identical */
2664 /* Cleanup is implemented by composing Leq with */
2665 /* \LB* [LB:0 RB:0 \LB*]* | ~$[LB RB] */
2666 /* - if the symbol LB does not occur on the lower side of Leq, goto(6) */
2667 /* - else Leq = Move(Leq), goto(5) */
2668
2669 /* 6. Result = L .o. [Leq | Lbypass] */
2670
2671 int syms;
2672 struct sigma *sig;
2673 struct fsm *LB, *RB, *NOLB, *NORB, *InsertBrackets, *RemoveBrackets, *Lbracketed, *NOBR, *BracketFilter, *Lbypass, *Leq, *Labels, *Cleanup, *ThisMove, *ThisSymbol, *Move, *Result, *oldnet;
2674
2675 oldnet = fsm_copy(net);
2676
2677 /* LB = "@<eq<@" */
2678 /* RB = "@>eq>@" */
2679
2680 LB = fsm_symbol("@<eq<@");
2681 NOLB = fsm_minimize(fsm_term_negation(fsm_copy(LB)));
2682 RB = fsm_symbol("@>eq>@");
2683 NORB = fsm_minimize(fsm_term_negation(fsm_copy(RB)));
2684 /* NOBR = ~$[LB|RB] */
2685 NOBR = fsm_minimize(fsm_complement(fsm_contains(fsm_union(fsm_copy(LB),fsm_copy(RB)))));
2686
2687 sigma_add("@<eq<@", net->sigma);
2688 sigma_add("@>eq>@", net->sigma);
2689 sigma_sort(net);
2690
2691 /* Insert our aux markers into the language */
2692
2693 /* InsertBrackets = [~$[L|R] [L 0:LB|0:RB R]]* ~$[L|R]; */
2694
2695 InsertBrackets = fsm_minimize(fsm_concat(fsm_kleene_star(fsm_concat(fsm_complement(fsm_contains(fsm_union(fsm_copy(left),fsm_copy(right)))),fsm_union(fsm_concat(fsm_copy(left),fsm_cross_product(fsm_empty_string(),fsm_copy(LB))),fsm_concat(fsm_cross_product(fsm_empty_string(),fsm_copy(RB)),fsm_copy(right))))),fsm_complement(fsm_contains(fsm_union(fsm_copy(left),fsm_copy(right))))));
2696
2697
2698 /* Lbracketed = L .o. InsertBrackets */
2699
2700 Lbracketed = fsm_compose(fsm_copy(net), InsertBrackets);
2701
2702 /* Filter out improper nestings, or languages with less than two marker pairs */
2703
2704 /* BracketFilter = NOBR LB NOBR RB NOBR [LB NOBR RB NOBR]+ */
2705
2706 BracketFilter = fsm_concat(fsm_copy(NOBR),fsm_concat(fsm_copy(LB),fsm_concat(fsm_copy(NOBR),fsm_concat(fsm_copy(RB),fsm_concat(fsm_copy(NOBR),fsm_kleene_plus(fsm_concat(fsm_copy(LB),fsm_concat(fsm_copy(NOBR),fsm_concat(fsm_copy(RB),fsm_copy(NOBR))))))))));
2707
2708 /* RemoveBrackets = [LB:0|RB:0|NOBR]* */
2709 /* Lbypass = [Lbracketed .o. ~BracketFilter .o. LB|RB -> 0] */
2710 /* Leq = [Lbracketed .o. BracketFilter] */
2711
2712 RemoveBrackets = fsm_kleene_star(fsm_union(fsm_cross_product(fsm_copy(LB),fsm_empty_string()),fsm_union(fsm_cross_product(fsm_copy(RB),fsm_empty_string()),fsm_copy(NOBR))));
2713
2714
2715 Lbypass = fsm_lower(fsm_compose(fsm_copy(Lbracketed),fsm_compose(fsm_complement(fsm_copy(BracketFilter)),RemoveBrackets)));
2716 Leq = fsm_compose(Lbracketed, BracketFilter);
2717
2718 /* Extract labels from lower side of L */
2719 /* [Leq .o. [\LB:0* LB:0 \RB* RB:0]* \LB:0*].l */
2720
2721 Labels = fsm_sigma_pairs_net(fsm_lower(fsm_compose(fsm_copy(Leq),fsm_concat(fsm_kleene_star(fsm_concat(fsm_kleene_star(fsm_cross_product(fsm_copy(NOLB),fsm_empty_string())),fsm_concat(fsm_cross_product(fsm_copy(LB),fsm_empty_string()),fsm_concat(fsm_kleene_star(fsm_copy(NORB)),fsm_cross_product(fsm_copy(RB),fsm_empty_string()))))),fsm_kleene_star(fsm_cross_product(fsm_copy(NOLB),fsm_empty_string()))))));
2722
2723 /* Cleanup = \LB* [LB:0 RB:0 \LB*]* | ~$[LB RB] */
2724
2725 Cleanup = fsm_minimize(fsm_union(fsm_concat(fsm_kleene_star(fsm_copy(NOLB)),fsm_kleene_star(fsm_concat(fsm_cross_product(fsm_copy(LB),fsm_empty_string()),fsm_concat(fsm_cross_product(fsm_copy(RB),fsm_empty_string()),fsm_kleene_star(fsm_copy(NOLB)))))),fsm_complement(fsm_contains(fsm_concat(fsm_copy(LB),fsm_copy(RB))))));
2726
2727 /* Construct the move function */
2728
2729 Move = fsm_empty_string();
2730
2731 syms = 0;
2732 for (sig = Labels->sigma; sig != NULL((void*)0); sig = sig->next) {
2733 /* Unclear which is faster: the first or the second version */
2734 /* ThisMove = [\LB* LB:X X:LB]* \LB* */
2735 /* ThisMove = [\LB* LB:0 X 0:LB]* \LB* */
2736 if (sig->number >= 3) {
2737 ThisSymbol = fsm_symbol(sig->symbol);
2738 //ThisMove = fsm_concat(fsm_kleene_star(fsm_concat(fsm_kleene_star(fsm_copy(NOLB)),fsm_concat(fsm_cross_product(fsm_copy(LB),fsm_copy(ThisSymbol)),fsm_cross_product(fsm_copy(ThisSymbol),fsm_copy(LB))))), fsm_kleene_star(fsm_copy(NOLB)));
2739 ThisMove = fsm_concat(fsm_kleene_star(fsm_concat(fsm_kleene_star(fsm_copy(NOLB)),fsm_concat(fsm_cross_product(fsm_copy(LB),fsm_empty_string()), fsm_concat(fsm_copy(ThisSymbol), fsm_cross_product(fsm_empty_string(),fsm_copy(LB)))))), fsm_kleene_star(fsm_copy(NOLB)));
2740
2741 Move = fsm_union(Move, ThisMove);
2742 syms++;
2743 }
2744 }
2745 Move = fsm_minimize(Move);
2746 if (syms == 0) {
2747 //printf("no syms");
2748 fsm_destroy(net);
2749 return(oldnet);
2750 }
2751
2752 /* Move until no bracket symbols remain */
2753 for (;;) {
2754 //printf("Zapping\n");
2755 Leq = fsm_compose(Leq, fsm_copy(Cleanup));
2756 if (!fsm_symbol_occurs(Leq, "@<eq<@", M_LOWER2))
2757 break;
2758 Leq = fsm_compose(Leq, fsm_copy(Move));
2759 //Leq = fsm_minimize(fsm_compose(Leq, fsm_copy(Move)));
2760 // printf("size: %i\n",Leq->statecount);
2761 }
2762
2763 /* Result = L .o. [Leq | Lbypass] */
2764 Result = fsm_minimize(fsm_compose(net, fsm_union(fsm_lower(Leq), Lbypass)));
2765 sigma_remove("@<eq<@", Result->sigma);
2766 sigma_remove("@>eq>@", Result->sigma);
2767 fsm_compact(Result);
2768 sigma_sort(Result);
2769 fsm_destroy(oldnet);
2770 return(Result);
2771}
2772
2773struct fsm *fsm_invert(struct fsm *net) {
2774 struct fsm_state *fsm;
2775 int i, temp;
2776
2777 fsm = net->states;
2778 for (i = 0; (fsm+i)->state_no != -1; i++) {
2779 temp = (fsm+i)->in;
2780 (fsm+i)->in = (fsm+i)->out;
2781 (fsm+i)->out = temp;
2782 }
2783 i = net->arcs_sorted_in;
2784 net->arcs_sorted_in = net->arcs_sorted_out;
2785 net->arcs_sorted_out = i;
2786 return (net);
2787}
2788
2789struct fsm *fsm_sequentialize(struct fsm *net) {
2790 printf("Implementation pending\n");
2791 return(net);
2792}
2793
2794
2795struct fsm *fsm_bimachine(struct fsm *net) {
2796 printf("implementation pending\n");
2797 return(net);
2798}
2799
2800/* _leftrewr(L, a:b) does a -> b || .#. L _ */
2801/* _leftrewr(?* L, a:b) does a -> b || L _ */
2802/* works only with single symbols, but is fast */
2803
2804struct fsm *fsm_left_rewr(struct fsm *net, struct fsm *rewr) {
2805 struct fsm_construct_handle *outh;
2806 struct fsm_read_handle *inh;
2807 struct fsm *newnet;
2808 int i, maxsigma, *sigmatable, currstate, sinkstate, seensource, innum, outnum, relabelin, relabelout, addedsink;
2809
2810 fsm_merge_sigma(net, rewr);
2811 relabelin = rewr->states->in;
2812 relabelout = rewr->states->out;
2813
2814 inh = fsm_read_init(net);
2815 sinkstate = fsm_get_num_states(inh);
2816 outh = fsm_construct_init(net->name);
2817 fsm_construct_copy_sigma(outh, net->sigma);
2818 maxsigma = sigma_max(net->sigma);
2819 maxsigma++;
2820 sigmatable = malloc(maxsigma * sizeof(int));
2821 for (i = 0; i < maxsigma; i++) {
2822 *(sigmatable+i) = -1;
2823 }
2824 addedsink = 0;
2825 while ((currstate = fsm_get_next_state(inh)) != -1) {
2826 seensource = 0;
2827 fsm_construct_set_final(outh, currstate);
2828
2829 while (fsm_get_next_state_arc(inh)) {
2830 innum = fsm_get_arc_num_in(inh);
2831 outnum = fsm_get_arc_num_out(inh);
2832 *(sigmatable+innum) = currstate;
2833 if (innum == relabelin) {
2834 seensource = 1;
2835 if (fsm_read_is_final(inh, currstate)) {
2836 outnum = relabelout;
2837 }
2838 }
2839 fsm_construct_add_arc_nums(outh, fsm_get_arc_source(inh), fsm_get_arc_target(inh), innum, outnum);
2840 }
2841 for (i = 2; i < maxsigma; i++) {
2842 if (*(sigmatable+i) != currstate && i != relabelin) {
2843 fsm_construct_add_arc_nums(outh, currstate, sinkstate, i, i);
2844 addedsink = 1;
2845 }
2846 }
2847 if (seensource == 0) {
2848 addedsink = 1;
2849 if (fsm_read_is_final(inh, currstate)) {
2850 fsm_construct_add_arc_nums(outh, currstate, sinkstate, relabelin, relabelout);
2851 } else {
2852 fsm_construct_add_arc_nums(outh, currstate, sinkstate, relabelin, relabelin);
2853 }
2854 }
2855 }
2856 if (addedsink) {
2857 for (i = 2; i < maxsigma; i++) {
2858 fsm_construct_add_arc_nums(outh, sinkstate, sinkstate, i, i);
2859 }
2860 fsm_construct_set_final(outh, sinkstate);
2861 }
2862 fsm_construct_set_initial(outh, 0);
2863 fsm_read_done(inh);
2864 newnet = fsm_construct_done(outh);
2865 free(sigmatable);
2866 fsm_destroy(net);
2867 fsm_destroy(rewr);
2868 return(newnet);
2869}
2870
2871struct fsm *fsm_add_sink(struct fsm *net, int final) {
2872 struct fsm_construct_handle *outh;
2873 struct fsm_read_handle *inh;
2874 struct fsm *newnet;
2875 int i, maxsigma, *sigmatable, currstate, sinkstate;
2876
2877 inh = fsm_read_init(net);
2878 sinkstate = fsm_get_num_states(inh);
2879 outh = fsm_construct_init(net->name);
2880 fsm_construct_copy_sigma(outh, net->sigma);
2881 maxsigma = sigma_max(net->sigma);
2882 maxsigma++;
2883 sigmatable = malloc(maxsigma * sizeof(int));
2884 for (i = 0; i < maxsigma; i++) {
2885 *(sigmatable+i) = -1;
2886 }
2887 while ((currstate = fsm_get_next_state(inh)) != -1) {
2888 while (fsm_get_next_state_arc(inh)) {
2889 fsm_construct_add_arc_nums(outh, fsm_get_arc_source(inh), fsm_get_arc_target(inh), fsm_get_arc_num_in(inh), fsm_get_arc_num_out(inh));
2890 *(sigmatable+fsm_get_arc_num_in(inh)) = currstate;
2891 }
2892 for (i = 2; i < maxsigma; i++) {
2893 if (*(sigmatable+i) != currstate) {
2894 fsm_construct_add_arc_nums(outh, currstate, sinkstate, i, i);
2895 }
2896 }
2897 }
2898 for (i = 2; i < maxsigma; i++) {
2899 fsm_construct_add_arc_nums(outh, sinkstate, sinkstate, i, i);
2900 }
2901
2902 while ((i = fsm_get_next_final(inh)) != -1) {
2903 fsm_construct_set_final(outh, i);
2904 }
2905 if (final == 1) {
2906 fsm_construct_set_final(outh, sinkstate);
2907 }
2908 fsm_construct_set_initial(outh, 0);
2909 fsm_read_done(inh);
2910 newnet = fsm_construct_done(outh);
2911 fsm_destroy(net);
2912 return(newnet);
2913}
2914
2915/* _addfinalloop(L, "#":0) adds "#":0 at all final states */
2916/* _addnonfinalloop(L, "#":0) adds "#":0 at all nonfinal states */
2917/* _addloop(L, "#":0) adds "#":0 at all states */
2918
2919/* Adds loops at finals = 0 nonfinals, finals = 1 finals, finals = 2, all */
2920
2921struct fsm *fsm_add_loop(struct fsm *net, struct fsm *marker, int finals) {
2922 struct fsm *newnet;
2923 struct fsm_construct_handle *outh;
2924 struct fsm_read_handle *inh, *minh;
2925 int i;
2926
2927 inh = fsm_read_init(net);
2928 minh = fsm_read_init(marker);
2929
2930 outh = fsm_construct_init(net->name);
2931 fsm_construct_copy_sigma(outh, net->sigma);
2932
2933 while (fsm_get_next_arc(inh)) {
2934 fsm_construct_add_arc_nums(outh, fsm_get_arc_source(inh), fsm_get_arc_target(inh), fsm_get_arc_num_in(inh), fsm_get_arc_num_out(inh));
2935 }
2936 /* Where to put the loops */
2937 if (finals == 1) {
2938 while ((i = fsm_get_next_final(inh)) != -1) {
2939 fsm_construct_set_final(outh, i);
2940 fsm_read_reset(minh);
2941 while (fsm_get_next_arc(minh)) {
2942 fsm_construct_add_arc(outh, i, i, fsm_get_arc_in(minh), fsm_get_arc_out(minh));
2943 }
2944 }
2945 } else if (finals == 0 || finals == 2) {
2946 for (i=0; i < net->statecount; i++) {
2947 if (finals == 2 || !fsm_read_is_final(inh, i)) {
2948 fsm_read_reset(minh);
2949 while (fsm_get_next_arc(minh)) {
2950 fsm_construct_add_arc(outh, i, i, fsm_get_arc_in(minh), fsm_get_arc_out(minh));
2951 }
2952 }
2953 }
2954 }
2955 while ((i = fsm_get_next_final(inh)) != -1) {
2956 fsm_construct_set_final(outh, i);
2957 }
2958 fsm_construct_set_initial(outh, 0);
2959 fsm_read_done(inh);
2960 fsm_read_done(minh);
2961 newnet = fsm_construct_done(outh);
2962 fsm_destroy(net);
2963 return(newnet);
2964}
2965
2966/* _marktail(?* L, 0:x) does ~$x .o. [..] -> x || L _ ; */
2967/* _marktail(?* R.r, 0:x).r does ~$x .o. [..] -> x || _ R */
2968
2969struct fsm *fsm_mark_fsm_tail(struct fsm *net, struct fsm *marker) {
2970 struct fsm *newnet;
2971 struct fsm_construct_handle *outh;
2972 struct fsm_read_handle *inh, *minh;
2973 int i, *mappings, maxstate, target, newtarget;
2974
2975 inh = fsm_read_init(net);
2976 minh = fsm_read_init(marker);
2977
2978 outh = fsm_construct_init(net->name);
2979 fsm_construct_copy_sigma(outh, net->sigma);
2980
2981 mappings = calloc(net->statecount, sizeof(int));
2982 maxstate = net->statecount;
2983
2984 while (fsm_get_next_arc(inh)) {
2985 target = fsm_get_arc_target(inh);
2986 if (fsm_read_is_final(inh, target)) {
2987 if (!*(mappings+target)) {
2988 newtarget = maxstate;
2989 *(mappings+target) = newtarget;
2990 fsm_read_reset(minh);
2991 while (fsm_get_next_arc(minh)) {
2992 fsm_construct_add_arc(outh, newtarget, target, fsm_get_arc_in(minh), fsm_get_arc_out(minh));
2993 }
2994 maxstate++;
2995 } else {
2996 newtarget = *(mappings+target);
2997 }
2998 fsm_construct_add_arc_nums(outh, fsm_get_arc_source(inh), newtarget, fsm_get_arc_num_in(inh), fsm_get_arc_num_out(inh));
2999 } else {
3000 fsm_construct_add_arc_nums(outh, fsm_get_arc_source(inh), target, fsm_get_arc_num_in(inh), fsm_get_arc_num_out(inh));
3001 }
3002 }
3003 for (i=0; i < net->statecount; i++) {
3004 fsm_construct_set_final(outh,i);
3005 }
3006
3007 fsm_construct_set_initial(outh, 0);
3008 fsm_read_done(inh);
3009 fsm_read_done(minh);
3010 newnet = fsm_construct_done(outh);
3011 fsm_destroy(net);
3012 free(mappings);
3013 return(newnet);
3014}
3015
3016struct fsm *fsm_context_restrict(struct fsm *X, struct fsmcontexts *LR) {
3017
3018 struct fsm *Var, *Notvar, *UnionL, *UnionP, *Result, *Word;
3019 struct fsmcontexts *pairs;
3020
3021 /* [.#. \.#.* .#.]-`[[ [\X* X C X \X*]&~[\X* [L1 X \X* X R1|...|Ln X \X* X Rn] \X*]],X,0] */
3022 /* Where X = variable symbol */
3023 /* The above only works if we do the subtraction iff the right hand side contains .#. in */
3024 /* its alphabet */
3025 /* A more generic formula is the following: */
3026
3027 /* `[[[(?) \.#.* (?)] - `[[[\X* X C X \X*] - [\X* [L1 X \X* X R1|...|Ln X \X* X Rn] \X*] ],X,0],.#.,0]; */
3028 /* Here, the LHS is another way of saying ~[?+ .#. ?+] */
3029
3030 Var = fsm_symbol("@VARX@");
3031 Notvar = fsm_minimize(fsm_kleene_star(fsm_term_negation(fsm_symbol("@VARX@"))));
3032
3033 /* We add the variable symbol to all alphabets to avoid ? mathing it */
3034 /* which would cause extra nondeterminism */
3035 sigma_add("@VARX@", X->sigma);
3036 sigma_sort(X);
3037
3038 /* Also, if any L or R is undeclared we add 0 */
3039 for (pairs = LR; pairs != NULL((void*)0); pairs = pairs->next) {
3040 if (pairs->left == NULL((void*)0)) {
3041 pairs->left = fsm_empty_string();
3042 } else {
3043 sigma_add("@VARX@",pairs->left->sigma);
3044 sigma_substitute(".#.", "@#@", pairs->left->sigma);
3045 sigma_sort(pairs->left);
3046 }
3047 if (pairs->right == NULL((void*)0)) {
3048 pairs->right = fsm_empty_string();
3049 } else {
3050 sigma_add("@VARX@",pairs->right->sigma);
3051 sigma_substitute(".#.", "@#@", pairs->right->sigma);
3052 sigma_sort(pairs->right);
3053 }
3054 }
3055
3056 UnionP = fsm_empty_set();
3057
3058 for (pairs = LR; pairs != NULL((void*)0) ; pairs = pairs->next) {
3059 UnionP = fsm_minimize(fsm_union(fsm_minimize(fsm_concat(fsm_copy(pairs->left),fsm_concat(fsm_copy(Var),fsm_concat(fsm_copy(Notvar),fsm_concat(fsm_copy(Var),fsm_copy(pairs->right)))))), UnionP));
3060 }
3061
3062 UnionL = fsm_minimize(fsm_concat(fsm_copy(Notvar),fsm_concat(fsm_copy(Var), fsm_concat(fsm_copy(X), fsm_concat(fsm_copy(Var),fsm_copy(Notvar))))));
3063
3064 Result = fsm_intersect(UnionL, fsm_complement(fsm_concat(fsm_copy(Notvar),fsm_minimize(fsm_concat(fsm_copy(UnionP),fsm_copy(Notvar))))));
3065 if (sigma_find("@VARX@", Result->sigma) != -1) {
3066 Result = fsm_complement(fsm_substitute_symbol(Result, "@VARX@","@_EPSILON_SYMBOL_@"));
3067 } else {
3068 Result = fsm_complement(Result);
3069 }
3070
3071 if (sigma_find("@#@", Result->sigma) != -1) {
3072 Word = fsm_minimize(fsm_concat(fsm_symbol("@#@"),fsm_concat(fsm_kleene_star(fsm_term_negation(fsm_symbol("@#@"))),fsm_symbol("@#@"))));
3073 Result = fsm_intersect(Word, Result);
3074 Result = fsm_substitute_symbol(Result, "@#@", "@_EPSILON_SYMBOL_@");
3075 }
3076 fsm_destroy(UnionP);
3077 fsm_destroy(Var);
3078 fsm_destroy(Notvar);
3079 fsm_destroy(X);
3080 fsm_clear_contexts(pairs);
3081 return(Result);
3082}
3083
3084struct fsm *fsm_flatten(struct fsm *net, struct fsm *epsilon) {
3085 struct fsm *newnet;
3086 struct fsm_construct_handle *outh;
3087 struct fsm_read_handle *inh, *eps;
3088 int i, maxstate, in, out, target;
3089 char *epssym, *instring, *outstring;
3090
3091 net = fsm_minimize(net);
3092
3093 inh = fsm_read_init(net);
3094 eps = fsm_read_init(epsilon);
3095 if (fsm_get_next_arc(eps) == -1) {
3096 fsm_destroy(net);
3097 fsm_destroy(epsilon);
3098 return NULL((void*)0);
3099 }
3100 epssym = strdup(fsm_get_arc_in(eps));
3101 fsm_read_done(eps);
3102
3103 outh = fsm_construct_init(net->name);
3104 maxstate = net->statecount;
3105
3106 fsm_construct_copy_sigma(outh, net->sigma);
3107
3108 while (fsm_get_next_arc(inh)) {
3109 target = fsm_get_arc_target(inh);
3110 in = fsm_get_arc_num_in(inh);
3111 out = fsm_get_arc_num_out(inh);
3112 if (in == EPSILON0 || out == EPSILON0) {
3113 instring = fsm_get_arc_in(inh);
3114 outstring = fsm_get_arc_out(inh);
3115 if (in == EPSILON0) { instring = epssym; }
3116 if (out == EPSILON0) { outstring = epssym; }
3117
3118 fsm_construct_add_arc(outh, fsm_get_arc_source(inh), maxstate, instring, instring);
3119 fsm_construct_add_arc(outh, maxstate, target, outstring, outstring);
3120 } else {
3121 fsm_construct_add_arc_nums(outh, fsm_get_arc_source(inh), maxstate, in, in);
3122 fsm_construct_add_arc_nums(outh, maxstate, target, out, out);
3123 }
3124 maxstate++;
3125 }
3126 while ((i = fsm_get_next_final(inh)) != -1) {
3127 fsm_construct_set_final(outh, i);
3128 }
3129 while ((i = fsm_get_next_initial(inh)) != -1) {
3130 fsm_construct_set_initial(outh, i);
3131 }
3132
3133 fsm_read_done(inh);
3134 newnet = fsm_construct_done(outh);
3135 fsm_destroy(net);
3136 fsm_destroy(epsilon);
3137 free(epssym);
3138 return(newnet);
3139}
3140
3141/* Removes IDENTITY and UNKNOWN transitions. If mode = 1, only removes UNKNOWNs */
3142struct fsm *fsm_close_sigma(struct fsm *net, int mode) {
3143 struct fsm *newnet;
3144 struct fsm_construct_handle *newh;
3145 struct fsm_read_handle *inh;
3146 int i;
3147
3148 inh = fsm_read_init(net);
3149 newh = fsm_construct_init(net->name);
3150 fsm_construct_copy_sigma(newh, net->sigma);
3151
3152 while (fsm_get_next_arc(inh)) {
3153 if ((fsm_get_arc_num_in(inh) != UNKNOWN1 && fsm_get_arc_num_in(inh) != IDENTITY2 && fsm_get_arc_num_out(inh) != UNKNOWN1 && fsm_get_arc_num_out(inh) != IDENTITY2) || (mode == 1 && fsm_get_arc_num_in(inh) != UNKNOWN1 && fsm_get_arc_num_out(inh) != UNKNOWN1))
3154 fsm_construct_add_arc_nums(newh, fsm_get_arc_source(inh), fsm_get_arc_target(inh), fsm_get_arc_num_in(inh), fsm_get_arc_num_out(inh));
3155 }
3156 while ((i = fsm_get_next_final(inh)) != -1) {
3157 fsm_construct_set_final(newh, i);
3158 }
3159 while ((i = fsm_get_next_initial(inh)) != -1) {
3160 fsm_construct_set_initial(newh, i);
3161 }
3162 fsm_read_done(inh);
3163 newnet = fsm_construct_done(newh);
3164 fsm_destroy(net);
3165 return(fsm_minimize(newnet));
3166}