File: | constructions.c |
Warning: | line 791, column 51 Dereference of null pointer |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* Foma: a finite-state toolkit and library. */ | |||
2 | /* Copyright © 2008-2021 Mans Hulden */ | |||
3 | ||||
4 | /* This file is part of foma. */ | |||
5 | ||||
6 | /* Licensed under the Apache License, Version 2.0 (the "License"); */ | |||
7 | /* you may not use this file except in compliance with the License. */ | |||
8 | /* You may obtain a copy of the License at */ | |||
9 | ||||
10 | /* http://www.apache.org/licenses/LICENSE-2.0 */ | |||
11 | ||||
12 | /* Unless required by applicable law or agreed to in writing, software */ | |||
13 | /* distributed under the License is distributed on an "AS IS" BASIS, */ | |||
14 | /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ | |||
15 | /* See the License for the specific language governing permissions and */ | |||
16 | /* limitations under the License. */ | |||
17 | ||||
18 | #include <stdio.h> | |||
19 | #include <stdlib.h> | |||
20 | #include <string.h> | |||
21 | #include "foma.h" | |||
22 | ||||
23 | #define KLEENE_STAR0 0 | |||
24 | #define KLEENE_PLUS1 1 | |||
25 | #define OPTIONALITY2 2 | |||
26 | ||||
27 | #define COMPLEMENT0 0 | |||
28 | #define COMPLETE1 1 | |||
29 | ||||
30 | #define STACK_3_PUSH(a,b,c)int_stack_push(a); int_stack_push(b); int_stack_push(c); int_stack_push(a); int_stack_push(b); int_stack_push(c); | |||
31 | #define STACK_2_PUSH(a,b)int_stack_push(a); int_stack_push(b); int_stack_push(a); int_stack_push(b); | |||
32 | ||||
33 | struct mergesigma { | |||
34 | char *symbol; | |||
35 | unsigned char presence; /* 1 = in net 1, 2 = in net 2, 3 = in both */ | |||
36 | int number; | |||
37 | struct mergesigma *next; | |||
38 | }; | |||
39 | ||||
40 | int sort_cmp(const void *a, const void *b) { | |||
41 | return (((const struct fsm_state *)a)->state_no - ((const struct fsm_state *)b)->state_no); | |||
42 | } | |||
43 | ||||
44 | int add_fsm_arc(struct fsm_state *fsm, int offset, int state_no, int in, int out, int target, int final_state, int start_state); | |||
45 | ||||
46 | struct fsm *fsm_kleene_closure(struct fsm *net, int optionality); | |||
47 | ||||
48 | struct fsm *fsm_kleene_star(struct fsm *net) { | |||
49 | return (fsm_kleene_closure(net, KLEENE_STAR0)); | |||
50 | } | |||
51 | ||||
52 | struct fsm *fsm_kleene_plus(struct fsm *net) { | |||
53 | return (fsm_kleene_closure(net, KLEENE_PLUS1)); | |||
54 | } | |||
55 | ||||
56 | struct fsm *fsm_optionality(struct fsm *net) { | |||
57 | return (fsm_kleene_closure(net, OPTIONALITY2)); | |||
58 | } | |||
59 | ||||
60 | struct fsm *fsm_escape(char *symbol) { | |||
61 | return(fsm_symbol(symbol+1)); | |||
62 | } | |||
63 | ||||
64 | /* Convert a multicharacter-string-containing machine */ | |||
65 | /* to the equivalent "letter" machine where all arcs */ | |||
66 | /* are single utf8 letters. */ | |||
67 | ||||
68 | struct fsm *fsm_letter_machine(struct fsm *net) { | |||
69 | ||||
70 | struct fsm *outnet; | |||
71 | struct fsm_read_handle *inh; | |||
72 | struct fsm_construct_handle *outh; | |||
73 | int i, steps, source, target, addstate, innum, outnum, inlen, outlen; | |||
74 | char *in, *out, *currin, *currout, tmpin[128], tmpout[128]; | |||
75 | ||||
76 | inh = fsm_read_init(fsm_minimize(net)); | |||
77 | outh = fsm_construct_init("name"); | |||
78 | addstate = net->statecount; | |||
79 | ||||
80 | while (fsm_get_next_arc(inh)) { | |||
81 | in = fsm_get_arc_in(inh); | |||
82 | out = fsm_get_arc_out(inh); | |||
83 | innum = fsm_get_arc_num_in(inh); | |||
84 | outnum = fsm_get_arc_num_out(inh); | |||
85 | source = fsm_get_arc_source(inh); | |||
86 | target = fsm_get_arc_target(inh); | |||
87 | ||||
88 | if (((innum > IDENTITY2) && utf8strlen(in) > 1) || ((outnum > IDENTITY2) && utf8strlen(out) > 1)) { | |||
89 | inlen = innum <= IDENTITY2 ? 1 : utf8strlen(in); | |||
90 | outlen = outnum <= IDENTITY2 ? 1 : utf8strlen(out); | |||
91 | steps = inlen > outlen ? inlen : outlen; | |||
92 | ||||
93 | target = addstate; | |||
94 | for (i = 0; i < steps; i++) { | |||
95 | if (innum <= IDENTITY2 || inlen < 1) { | |||
96 | if (inlen < 1) | |||
97 | currin = "@_EPSILON_SYMBOL_@"; | |||
98 | else | |||
99 | currin = in; | |||
100 | } else { | |||
101 | strncpy(tmpin, in, utf8skip(in)+1); | |||
102 | *(tmpin+utf8skip(in)+1) = '\0'; | |||
103 | currin = tmpin; | |||
104 | inlen--; | |||
105 | in = in+utf8skip(in)+1; | |||
106 | } | |||
107 | if (outnum <= IDENTITY2 || outlen < 1) { | |||
108 | if (outlen < 1) | |||
109 | currout = "@_EPSILON_SYMBOL_@"; | |||
110 | else | |||
111 | currout = out; | |||
112 | } else { | |||
113 | strncpy(tmpout, out, utf8skip(in)+1); | |||
114 | *(tmpout+utf8skip(out)+1) = '\0'; | |||
115 | currout = tmpout; | |||
116 | out = out+utf8skip(out)+1; | |||
117 | outlen--; | |||
118 | } | |||
119 | if (i == 0 && steps > 1) { | |||
120 | target = addstate; | |||
121 | addstate++; | |||
122 | } | |||
123 | if (i > 0 && (steps-i == 1)) { | |||
124 | source = addstate - 1; | |||
125 | target = fsm_get_arc_target(inh); | |||
126 | } | |||
127 | if (i > 0 && (steps-i != 1)) { | |||
128 | source = addstate-1; | |||
129 | target = addstate; | |||
130 | addstate++; | |||
131 | } | |||
132 | fsm_construct_add_arc(outh,source,target,currin,currout); | |||
133 | } | |||
134 | } else { | |||
135 | fsm_construct_add_arc(outh,source,target,in,out); | |||
136 | } | |||
137 | } | |||
138 | while ((i = fsm_get_next_final(inh)) != -1) { | |||
139 | fsm_construct_set_final(outh, i); | |||
140 | } | |||
141 | while ((i = fsm_get_next_initial(inh)) != -1) { | |||
142 | fsm_construct_set_initial(outh, i); | |||
143 | } | |||
144 | fsm_read_done(inh); | |||
145 | outnet = fsm_construct_done(outh); | |||
146 | return(outnet); | |||
147 | } | |||
148 | ||||
149 | struct fsm *fsm_explode(char *symbol) { | |||
150 | struct fsm *net; | |||
151 | struct fsm_construct_handle *h; | |||
152 | char *tempstring; | |||
153 | int length, i, j, skip; | |||
154 | ||||
155 | h = fsm_construct_init(""); | |||
156 | fsm_construct_set_initial(h,0); | |||
157 | ||||
158 | length = strlen(symbol)-2; | |||
159 | for (i=1, j=1; i <= length; i += skip, j++) { | |||
160 | skip = utf8skip(symbol+i)+1; | |||
161 | tempstring = xxstrndup(((symbol+i)),skip); | |||
162 | fsm_construct_add_arc(h,j-1,j,tempstring,tempstring); | |||
163 | free(tempstring); | |||
164 | } | |||
165 | fsm_construct_set_final(h, j-1); | |||
166 | net = fsm_construct_done(h); | |||
167 | return(net); | |||
168 | } | |||
169 | ||||
170 | struct fsm *fsm_symbol(char *symbol) { | |||
171 | struct fsm *net; | |||
172 | int symbol_no; | |||
173 | ||||
174 | net = fsm_create(""); | |||
175 | fsm_update_flags(net, YES1, YES1, YES1, YES1, YES1, NO0); | |||
176 | if (strcmp(symbol,"@_EPSILON_SYMBOL_@")==0) { | |||
177 | /* Epsilon */ | |||
178 | (void)sigma_add_special(EPSILON0, net->sigma); | |||
179 | net->states = malloc(sizeof(struct fsm_state)*2); | |||
180 | add_fsm_arc(net->states, 0, 0, -1,-1,-1,1,1); | |||
181 | add_fsm_arc(net->states, 1, -1,-1,-1,-1,-1,-1); | |||
182 | net->arccount = 0; | |||
183 | net->statecount = 1; | |||
184 | net->linecount = 1; | |||
185 | net->finalcount = 1; | |||
186 | net->is_deterministic = NO0; | |||
187 | net->is_minimized = NO0; | |||
188 | net->is_epsilon_free = NO0; | |||
189 | } else { | |||
190 | if ((strcmp(symbol,"@_IDENTITY_SYMBOL_@") == 0)) { | |||
191 | symbol_no = sigma_add_special(IDENTITY2,net->sigma); | |||
192 | } else { | |||
193 | symbol_no = sigma_add(symbol,net->sigma); | |||
194 | } | |||
195 | net->states = malloc(sizeof(struct fsm_state)*3); | |||
196 | add_fsm_arc(net->states, 0, 0, symbol_no, symbol_no, 1, 0, 1); | |||
197 | add_fsm_arc(net->states, 1, 1, -1, -1, -1, 1, 0); | |||
198 | add_fsm_arc(net->states, 2, -1, -1, -1, -1, -1, -1); | |||
199 | net->arity = 1; | |||
200 | net->pathcount = 1; | |||
201 | net->arccount = 1; | |||
202 | net->statecount = 2; | |||
203 | net->linecount = 2; | |||
204 | net->finalcount = 1; | |||
205 | net->arcs_sorted_in = YES1; | |||
206 | net->arcs_sorted_out = YES1; | |||
207 | net->is_deterministic = YES1; | |||
208 | net->is_minimized = YES1; | |||
209 | net->is_epsilon_free = YES1; | |||
210 | } | |||
211 | return(net); | |||
212 | } | |||
213 | ||||
214 | void fsm_sort_lines(struct fsm *net) { | |||
215 | struct fsm_state *fsm; | |||
216 | fsm = net->states; | |||
217 | qsort(fsm, find_arccount(fsm), sizeof(struct fsm_state), sort_cmp); | |||
218 | } | |||
219 | ||||
220 | void fsm_update_flags(struct fsm *net, int det, int pru, int min, int eps, int loop, int completed) { | |||
221 | net->is_deterministic = det; | |||
222 | net->is_pruned = pru; | |||
223 | net->is_minimized = min; | |||
224 | net->is_epsilon_free = eps; | |||
225 | net->is_loop_free = loop; | |||
226 | net->is_completed = completed; | |||
227 | net->arcs_sorted_in = NO0; | |||
228 | net->arcs_sorted_out = NO0; | |||
229 | } | |||
230 | ||||
231 | int fsm_count_states(struct fsm_state *fsm) { | |||
232 | int i, temp = -1, states = 0; | |||
233 | for(i=0; (fsm+i)->state_no != -1; i++) { | |||
234 | if (temp != (fsm+i)->state_no) { | |||
235 | states++; | |||
236 | temp = (fsm+i)->state_no; | |||
237 | } | |||
238 | } | |||
239 | return(states); | |||
240 | } | |||
241 | ||||
242 | struct state_arr { | |||
243 | int final; | |||
244 | int start; | |||
245 | struct fsm_state *transitions; | |||
246 | }; | |||
247 | ||||
248 | struct state_arr *init_state_pointers(struct fsm_state *fsm_state) { | |||
249 | ||||
250 | /* Create an array for quick lookup of whether states are final, and a pointer to the first line regarding each state */ | |||
251 | ||||
252 | struct state_arr *state_arr; | |||
253 | int states, i, sold; | |||
254 | sold = -1; | |||
255 | states = fsm_count_states(fsm_state); | |||
256 | state_arr = malloc(sizeof(struct state_arr)*(states+1)); | |||
257 | for (i=0; i<states; i++) { | |||
258 | (state_arr+i)->final = 0; | |||
259 | (state_arr+i)->start = 0; | |||
260 | } | |||
261 | ||||
262 | for (i=0; (fsm_state+i)->state_no != -1; i++) { | |||
263 | if ((fsm_state+i)->final_state == 1) | |||
264 | (state_arr+((fsm_state+i)->state_no))->final = 1; | |||
265 | if ((fsm_state+i)->start_state == 1) | |||
266 | (state_arr+((fsm_state+i)->state_no))->start = 1; | |||
267 | if ((fsm_state+i)->state_no != sold) { | |||
268 | (state_arr+((fsm_state+i)->state_no))->transitions = (fsm_state+i); | |||
269 | sold = (fsm_state+i)->state_no; | |||
270 | } | |||
271 | } | |||
272 | return(state_arr); | |||
273 | } | |||
274 | ||||
275 | /* An open addressing scheme (with linear probing) to store triplets of ints */ | |||
276 | /* and hashing them with an automatically increasing key at every insert */ | |||
277 | /* The table is rehashed whenever occupancy reaches 0.5 */ | |||
278 | ||||
279 | struct triplethash_triplets { | |||
280 | int a; | |||
281 | int b; | |||
282 | int c; | |||
283 | int key; | |||
284 | }; | |||
285 | ||||
286 | struct triplethash { | |||
287 | struct triplethash_triplets *triplets; | |||
288 | unsigned int tablesize; | |||
289 | int occupancy; | |||
290 | }; | |||
291 | ||||
292 | struct triplethash *triplet_hash_init() { | |||
293 | struct triplethash *th; | |||
294 | int i; | |||
295 | th = malloc(sizeof(struct triplethash)); | |||
296 | th->tablesize = 128; | |||
297 | th->occupancy = 0; | |||
298 | th->triplets = malloc(sizeof(struct triplethash_triplets) * th->tablesize); | |||
299 | for (i = 0; i < th->tablesize; i++) { | |||
300 | (th->triplets+i)->key = -1; | |||
301 | } | |||
302 | return(th); | |||
303 | } | |||
304 | ||||
305 | unsigned int triplethash_hashf(int a, int b, int c) { | |||
306 | return((unsigned int)(a * 7907 + b * 86028157 + c * 7919)); | |||
307 | } | |||
308 | ||||
309 | void triplet_hash_free(struct triplethash *th) { | |||
310 | if (th != NULL((void*)0)) { | |||
311 | if (th->triplets != NULL((void*)0)) { | |||
312 | free(th->triplets); | |||
313 | } | |||
314 | free(th); | |||
315 | } | |||
316 | } | |||
317 | ||||
318 | void triplet_hash_rehash(struct triplethash *th); | |||
319 | ||||
320 | void triplet_hash_insert_with_key(struct triplethash *th, int a, int b, int c, int key) { | |||
321 | struct triplethash_triplets *th_table; | |||
322 | unsigned int hash; | |||
323 | th_table = th->triplets; | |||
324 | hash = triplethash_hashf(a, b, c) % th->tablesize; | |||
325 | for (;;) { | |||
326 | if ((th_table + hash)->key == -1) { | |||
327 | (th_table + hash)->key = key; | |||
328 | (th_table + hash)->a = a; | |||
329 | (th_table + hash)->b = b; | |||
330 | (th_table + hash)->c = c; | |||
331 | break; | |||
332 | } | |||
333 | hash++; | |||
334 | if (hash >= th->tablesize) | |||
335 | hash -= th->tablesize; | |||
336 | } | |||
337 | } | |||
338 | ||||
339 | int triplet_hash_insert(struct triplethash *th, int a, int b, int c) { | |||
340 | struct triplethash_triplets *th_table; | |||
341 | unsigned int hash; | |||
342 | th_table = th->triplets; | |||
343 | hash = triplethash_hashf(a,b,c) % th->tablesize; | |||
344 | for (;;) { | |||
345 | if ((th_table + hash)->key == - 1) { | |||
346 | (th_table + hash)->key = th->occupancy; | |||
347 | (th_table + hash)->a = a; | |||
348 | (th_table + hash)->b = b; | |||
349 | (th_table + hash)->c = c; | |||
350 | th->occupancy = th->occupancy + 1; | |||
351 | if (th->occupancy > th->tablesize / 2) { | |||
352 | triplet_hash_rehash(th); | |||
353 | } | |||
354 | return(th->occupancy - 1); | |||
355 | } | |||
356 | hash++; | |||
357 | if (hash >= th->tablesize) | |||
358 | hash -= th->tablesize; | |||
359 | } | |||
360 | } | |||
361 | ||||
362 | void triplet_hash_rehash(struct triplethash *th) { | |||
363 | int i; | |||
364 | unsigned int newtablesize, oldtablesize; | |||
365 | struct triplethash_triplets *oldtriplets; | |||
366 | newtablesize = th->tablesize * 2; | |||
367 | oldtablesize = th->tablesize; | |||
368 | oldtriplets = th->triplets; | |||
369 | th->triplets = malloc(sizeof(struct triplethash_triplets) * newtablesize); | |||
370 | th->tablesize = newtablesize; | |||
371 | for (i = 0; i < newtablesize; i++) { | |||
372 | (th->triplets+i)->key = -1; | |||
373 | } | |||
374 | for (i = 0; i < oldtablesize; i++) { | |||
375 | if ((oldtriplets+i)-> key != -1) { | |||
376 | triplet_hash_insert_with_key(th, (oldtriplets+i)->a, (oldtriplets+i)->b, (oldtriplets+i)->c, (oldtriplets+i)->key); | |||
377 | } | |||
378 | } | |||
379 | free(oldtriplets); | |||
380 | } | |||
381 | ||||
382 | int triplet_hash_find(struct triplethash *th, int a, int b, int c) { | |||
383 | struct triplethash_triplets *th_table; | |||
384 | unsigned int hash, j; | |||
385 | th_table = th->triplets; | |||
386 | hash = triplethash_hashf(a, b, c) % th->tablesize; | |||
387 | for (j = 0; j < th->tablesize; j++) { | |||
388 | if ((th_table + hash)->key == - 1) | |||
389 | return -1; | |||
390 | if ((th_table + hash)->a == a && (th_table + hash)->b == b && (th_table + hash)->c == c) { | |||
391 | return((th_table + hash)->key); | |||
392 | } | |||
393 | hash++; | |||
394 | if (hash >= th->tablesize) | |||
395 | hash -= th->tablesize; | |||
396 | } | |||
397 | return -1; | |||
398 | } | |||
399 | ||||
400 | struct fsm *fsm_intersect(struct fsm *net1, struct fsm *net2) { | |||
401 | ||||
402 | struct blookup {int mainloop; int target; } *array, *bptr; | |||
403 | int a,b,current_state, current_start, current_final, target_number, sigma2size, mainloop; | |||
404 | struct fsm_state *machine_a, *machine_b; | |||
405 | struct state_arr *point_a, *point_b; | |||
406 | struct fsm *new_net; | |||
407 | struct triplethash *th; | |||
408 | ||||
409 | net1 = fsm_minimize(net1); | |||
410 | net2 = fsm_minimize(net2); | |||
411 | ||||
412 | if (fsm_isempty(net1) || fsm_isempty(net2)) { | |||
413 | fsm_destroy(net1); | |||
414 | fsm_destroy(net2); | |||
415 | return(fsm_empty_set()); | |||
416 | } | |||
417 | ||||
418 | fsm_merge_sigma(net1, net2); | |||
419 | ||||
420 | fsm_update_flags(net1, YES1, NO0, UNK2, YES1, UNK2, UNK2); | |||
421 | ||||
422 | machine_a = net1->states; | |||
423 | machine_b = net2->states; | |||
424 | ||||
425 | sigma2size = sigma_max(net2->sigma)+1; | |||
426 | array = calloc(sigma2size*sigma2size, sizeof(struct blookup)); | |||
427 | mainloop = 0; | |||
428 | ||||
429 | /* Intersect two networks by the running-in-parallel method */ | |||
430 | /* new state 0 = {0,0} */ | |||
431 | ||||
432 | STACK_2_PUSH(0,0)int_stack_push(0); int_stack_push(0);; | |||
433 | ||||
434 | th = triplet_hash_init(); | |||
435 | triplet_hash_insert(th, 0, 0, 0); | |||
436 | ||||
437 | fsm_state_init(sigma_max(net1->sigma)); | |||
438 | ||||
439 | point_a = init_state_pointers(machine_a); | |||
440 | point_b = init_state_pointers(machine_b); | |||
441 | ||||
442 | while (!int_stack_isempty()) { | |||
443 | ||||
444 | /* Get a pair of states to examine */ | |||
445 | ||||
446 | a = int_stack_pop(); | |||
447 | b = int_stack_pop(); | |||
448 | ||||
449 | current_state = triplet_hash_find(th, a, b, 0); | |||
450 | current_start = (((point_a+a)->start == 1) && ((point_b+b)->start == 1)) ? 1 : 0; | |||
451 | current_final = (((point_a+a)->final == 1) && ((point_b+b)->final == 1)) ? 1 : 0; | |||
452 | ||||
453 | fsm_state_set_current_state(current_state, current_final, current_start); | |||
454 | ||||
455 | /* Create a lookup index for machine b */ | |||
456 | /* array[in][out] holds the target for this state and the symbol pair in:out */ | |||
457 | /* Also, we keep track of whether an entry is fresh by the mainloop counter */ | |||
458 | /* so we don't mistakenly use an old entry and don't have to clear the table */ | |||
459 | /* between each state pair we encounter */ | |||
460 | ||||
461 | for (mainloop++, machine_b = (point_b+b)->transitions; machine_b->state_no == b; machine_b++) { | |||
462 | if (machine_b->in < 0) continue; | |||
463 | bptr = array+(machine_b->in*sigma2size)+machine_b->out; | |||
464 | bptr->mainloop = mainloop; | |||
465 | bptr->target = machine_b->target; | |||
466 | } | |||
467 | ||||
468 | /* The main loop where we run the machines in parallel */ | |||
469 | /* We look at each transition of a in this state, and consult the index of b */ | |||
470 | /* we just created */ | |||
471 | ||||
472 | for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) { | |||
473 | if (machine_a->in < 0 || machine_a->out < 0) continue; | |||
474 | bptr = array+(machine_a->in*sigma2size)+machine_a->out; | |||
475 | ||||
476 | if (bptr->mainloop != mainloop) | |||
477 | continue; | |||
478 | ||||
479 | if ((target_number = triplet_hash_find(th, machine_a->target, bptr->target, 0)) == -1) { | |||
480 | STACK_2_PUSH(bptr->target, machine_a->target)int_stack_push(bptr->target); int_stack_push(machine_a-> target);; | |||
481 | target_number = triplet_hash_insert(th, machine_a->target, bptr->target, 0); | |||
482 | } | |||
483 | ||||
484 | fsm_state_add_arc(current_state, machine_a->in, machine_a->out, target_number, current_final, current_start); | |||
485 | ||||
486 | } | |||
487 | fsm_state_end_state(); | |||
488 | } | |||
489 | new_net = fsm_create(""); | |||
490 | fsm_sigma_destroy(new_net->sigma); | |||
491 | new_net->sigma = net1->sigma; | |||
492 | net1->sigma = NULL((void*)0); | |||
493 | fsm_destroy(net2); | |||
494 | fsm_destroy(net1); | |||
495 | fsm_state_close(new_net); | |||
496 | free(point_a); | |||
497 | free(point_b); | |||
498 | free(array); | |||
499 | triplet_hash_free(th); | |||
500 | return(fsm_coaccessible(new_net)); | |||
501 | } | |||
502 | ||||
503 | struct fsm *fsm_compose(struct fsm *net1, struct fsm *net2) { | |||
504 | ||||
505 | ||||
506 | /* The composition algorithm is the basic naive composition where we lazily */ | |||
507 | /* take the cross-product of states P and Q and move to a new state with symbols */ | |||
508 | /* ain, bout if the symbols aout = bin. Also, if aout = 0 state p goes to */ | |||
509 | /* its target, while q stays. Similarly, if bin = 0, q goes to its target */ | |||
510 | /* while p stays. */ | |||
511 | ||||
512 | /* We have two variants of the algorithm to avoid creating multiple paths: */ | |||
513 | /* 1) Bistate composition. In this variant, when we create a new state, we call it */ | |||
514 | /* (p,q,mode) where mode = 0 or 1, depending on what kind of an arc we followed */ | |||
515 | /* to get there. If we followed an x:y arc where x and y are both real symbols */ | |||
516 | /* we always go to mode 0, however, if we followed an 0:y arc, we go to mode 1. */ | |||
517 | /* from mode 1, we do not follow x:0 arcs. Each (p,q,mode) is unique, and */ | |||
518 | /* from (p,q,X) we always consider the transitions from p and q. */ | |||
519 | /* We never create arcs (x:0 0:y) yielding x:y. */ | |||
520 | ||||
521 | /* 2) Tristate composition. Here we always go to mode 0 with a x:y arc. */ | |||
522 | /* (x:0,0:y) yielding x:y is allowed, but only in mode 0 */ | |||
523 | /* (x:y y:z) is always allowed and results in target = mode 0 */ | |||
524 | /* 0:y arcs lead to mode 2, and from there we stay in mode 2 with 0:y */ | |||
525 | /* in mode 2 we only consider 0:y and x:y arcs */ | |||
526 | /* x:0 arcs lead to mode 1, and from there we stay in mode 1 with x:0 */ | |||
527 | /* in mode 1 we only consider x:0 and x:y arcs */ | |||
528 | ||||
529 | /* It seems unsettled which type of composition is better. Tristate is similar to */ | |||
530 | /* the filter transducer given in Mohri, Pereira and Riley (1996) and works well */ | |||
531 | /* for cases such as [a:0 b:0 c:0 .o. 0:d 0:e 0:f], yielding the shortest path. */ | |||
532 | /* However, for generic cases, bistate seems to yield smaller transducers. */ | |||
533 | /* The global variable g_compose_tristate is set to OFF by default */ | |||
534 | ||||
535 | struct outarray { | |||
536 | short int symin; | |||
537 | short int symout; | |||
538 | int target; | |||
539 | int mainloop; | |||
540 | } *outarray, *iptr, *currtail; | |||
541 | ||||
542 | struct index { | |||
543 | struct outarray *tail; | |||
544 | } *index; | |||
545 | ||||
546 | extern int g_compose_tristate, g_flag_is_epsilon; | |||
547 | int a,b,i,mainloop,current_state, current_start, current_final, target_number, ain, bin, aout, bout, asearch, max2sigma; | |||
548 | struct fsm_state *machine_a, *machine_b; | |||
549 | struct state_arr *point_a, *point_b; | |||
550 | struct triplethash *th; | |||
551 | int mode; | |||
552 | _Bool *is_flag = NULL((void*)0); | |||
| ||||
553 | ||||
554 | ||||
555 | net1 = fsm_minimize(net1); | |||
556 | net2 = fsm_minimize(net2); | |||
557 | ||||
558 | if (fsm_isempty(net1) || fsm_isempty(net2)) { | |||
559 | fsm_destroy(net1); | |||
560 | fsm_destroy(net2); | |||
561 | return(fsm_empty_set()); | |||
562 | } | |||
563 | ||||
564 | /* If flag-is-epsilon is on, we need to add the flag symbols */ | |||
565 | /* in both networks to each other's sigma so that UNKNOWN */ | |||
566 | /* or IDENTITY symbols do not match these flags, since they are */ | |||
567 | /* supposed to have the behavior of EPSILON */ | |||
568 | /* And we need to do this before merging the sigmas, of course */ | |||
569 | ||||
570 | if (g_flag_is_epsilon) { | |||
571 | struct sigma *sig1, *sig2; | |||
572 | int flags1, flags2; | |||
573 | flags1 = flags2 = 0; | |||
574 | sig2 = net2->sigma; | |||
575 | max2sigma = sigma_max(net2->sigma); | |||
576 | for (sig1 = net1->sigma; sig1 != NULL((void*)0); sig1 = sig1->next) { | |||
577 | if (flag_check(sig1->symbol)) { | |||
578 | flags1 = 1; | |||
579 | if (sigma_find(sig1->symbol, sig2) == -1) { | |||
580 | sigma_add(sig1->symbol, sig2); | |||
581 | } | |||
582 | } | |||
583 | } | |||
584 | ||||
585 | sig1 = net1->sigma; | |||
586 | for (sig2 = net2->sigma; sig2 != NULL((void*)0) ; sig2 = sig2->next) { | |||
587 | if (flag_check(sig2->symbol)) { | |||
588 | if (sig2->number <= max2sigma) { | |||
589 | flags2 = 1; | |||
590 | } | |||
591 | if (sigma_find(sig2->symbol, sig1) == -1) { | |||
592 | sigma_add(sig2->symbol, sig1); | |||
593 | } | |||
594 | } | |||
595 | } | |||
596 | sigma_sort(net2); | |||
597 | sigma_sort(net1); | |||
598 | if (flags1 && flags2) { | |||
599 | printf("***Warning: flag-is-epsilon is ON and both networks contain flags in composition. This may yield incorrect results. Set flag-is-epsilon to OFF.\n"); | |||
600 | } | |||
601 | } | |||
602 | ||||
603 | fsm_merge_sigma(net1, net2); | |||
604 | ||||
605 | if (g_flag_is_epsilon) { | |||
606 | /* Create lookup table for quickly checking if a symbol is a flag */ | |||
607 | struct sigma *sig1; | |||
608 | is_flag = malloc(sizeof(_Bool)*(sigma_max(net1->sigma)+1)); | |||
609 | for (sig1 = net1->sigma; sig1 != NULL((void*)0); sig1=sig1->next) { | |||
610 | if (flag_check(sig1->symbol)) { | |||
611 | *(is_flag+(sig1->number)) = 1; | |||
612 | } else { | |||
613 | *(is_flag+(sig1->number)) = 0; | |||
614 | } | |||
615 | } | |||
616 | } | |||
617 | ||||
618 | fsm_update_flags(net1, YES1, NO0, UNK2, YES1, UNK2, UNK2); | |||
619 | ||||
620 | machine_a = net1->states; | |||
621 | machine_b = net2->states; | |||
622 | ||||
623 | max2sigma = sigma_max(net2->sigma); | |||
624 | ||||
625 | /* Create an index for looking up input symbols in machine b quickly */ | |||
626 | /* We store each machine_b->in symbol in outarray[symin][...] */ | |||
627 | /* the array index[symin] points to the tail of the current list in outarray */ | |||
628 | /* (we may have many entries for one input symbol as there may be many outputs */ | |||
629 | /* The field mainloop tells us whether the entry is current as we want to loop */ | |||
630 | /* UNKNOWN and IDENTITY are indexed as UNKNOWN because we need to find both */ | |||
631 | /* as they share some semantics */ | |||
632 | ||||
633 | index = calloc(max2sigma+1, sizeof(struct index)); | |||
634 | outarray = calloc((max2sigma+2)*(max2sigma+1), sizeof(struct outarray)); | |||
635 | ||||
636 | for (i=0; i <= max2sigma; i++) { | |||
637 | (index+i)->tail = outarray + ((max2sigma+2)*i); | |||
638 | } | |||
639 | ||||
640 | ||||
641 | /* Mode, a, b */ | |||
642 | STACK_3_PUSH(0,0,0)int_stack_push(0); int_stack_push(0); int_stack_push(0);; | |||
643 | ||||
644 | th = triplet_hash_init(); | |||
645 | triplet_hash_insert(th, 0, 0, 0); | |||
646 | ||||
647 | fsm_state_init(sigma_max(net1->sigma)); | |||
648 | ||||
649 | point_a = init_state_pointers(machine_a); | |||
650 | point_b = init_state_pointers(machine_b); | |||
651 | ||||
652 | mainloop = 0; | |||
653 | ||||
654 | while (!int_stack_isempty()) { | |||
655 | ||||
656 | /* Get a pair of states to examine */ | |||
657 | ||||
658 | a = int_stack_pop(); | |||
659 | b = int_stack_pop(); | |||
660 | mode = int_stack_pop(); | |||
661 | ||||
662 | current_state = triplet_hash_find(th, a,b,mode); | |||
663 | current_start = (((point_a+a)->start == 1) && ((point_b+b)->start == 1) && (mode == 0)) ? 1 : 0; | |||
664 | current_final = (((point_a+a)->final == 1) && ((point_b+b)->final == 1)) ? 1 : 0; | |||
665 | ||||
666 | fsm_state_set_current_state(current_state, current_final, current_start); | |||
667 | ||||
668 | /* Create the index for machine b in this state */ | |||
669 | for (mainloop++, machine_b = (point_b+b)->transitions; machine_b->state_no == b ; machine_b++) { | |||
670 | int bindex; | |||
671 | /* Index b */ | |||
672 | bindex = (machine_b->in == IDENTITY2) ? UNKNOWN1 : machine_b->in; | |||
673 | if (bindex < 0 || machine_b->target < 0) | |||
674 | continue; | |||
675 | ||||
676 | iptr = (index+bindex)->tail; | |||
677 | if (iptr->mainloop != mainloop) { | |||
678 | iptr = (index+bindex)->tail = outarray+(bindex*(max2sigma+2)); | |||
679 | } else { | |||
680 | iptr++; | |||
681 | } | |||
682 | iptr->symin = machine_b->in; | |||
683 | iptr->symout = machine_b->out; | |||
684 | iptr->mainloop = mainloop; | |||
685 | iptr->target = machine_b->target; | |||
686 | (index+bindex)->tail = iptr; | |||
687 | } | |||
688 | ||||
689 | for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) { | |||
690 | ||||
691 | /* If we have the same transition from (a,b)-> some state */ | |||
692 | /* If we have x:y y:z trans to some state */ | |||
693 | aout = machine_a->out; | |||
694 | ain = machine_a->in; | |||
695 | /* IDENTITY is indexed under UNKNOWN (see above) */ | |||
696 | asearch = (aout == IDENTITY2) ? UNKNOWN1 : aout; | |||
697 | if (aout < 0) continue; | |||
698 | iptr = outarray+(asearch*(max2sigma+2)); | |||
699 | currtail = (index+asearch)->tail + 1; | |||
700 | for ( ; iptr != currtail && iptr->mainloop == mainloop ; iptr++) { | |||
701 | ||||
702 | ain = machine_a->in; | |||
703 | aout = machine_a->out; | |||
704 | bin = iptr->symin; | |||
705 | bout = iptr->symout; | |||
706 | ||||
707 | if (aout == IDENTITY2 && bin == UNKNOWN1) { | |||
708 | ain = aout = UNKNOWN1; | |||
709 | } | |||
710 | else if (aout == UNKNOWN1 && bin == IDENTITY2) { | |||
711 | bin = bout = UNKNOWN1; | |||
712 | } | |||
713 | ||||
714 | if (!g_compose_tristate) { | |||
715 | if (bin == aout && bin != -1 && bin != EPSILON0) { | |||
716 | /* mode -> 0 */ | |||
717 | if ((target_number = triplet_hash_find(th, machine_a->target, iptr->target, 0)) == -1) { | |||
718 | STACK_3_PUSH(0, iptr->target, machine_a->target)int_stack_push(0); int_stack_push(iptr->target); int_stack_push (machine_a->target);; | |||
719 | target_number = triplet_hash_insert(th, machine_a->target, iptr->target, 0); | |||
720 | } | |||
721 | ||||
722 | fsm_state_add_arc(current_state, ain, bout, target_number, current_final, current_start); | |||
723 | } | |||
724 | } | |||
725 | ||||
726 | else if (g_compose_tristate) { | |||
727 | if (bin == aout && bin != -1 && ((bin != EPSILON0 || mode == 0))) { | |||
728 | /* mode -> 0 */ | |||
729 | if ((target_number = triplet_hash_find(th, machine_a->target, iptr->target, 0)) == -1) { | |||
730 | STACK_3_PUSH(0, iptr->target, machine_a->target)int_stack_push(0); int_stack_push(iptr->target); int_stack_push (machine_a->target);; | |||
731 | target_number = triplet_hash_insert(th, machine_a->target, iptr->target, 0); | |||
732 | } | |||
733 | ||||
734 | fsm_state_add_arc(current_state, ain, bout, target_number, current_final, current_start); | |||
735 | } | |||
736 | } | |||
737 | ||||
738 | } | |||
739 | } | |||
740 | ||||
741 | /* Treat epsilon outputs on machine a (may include flags) */ | |||
742 | for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a
| |||
743 | aout = machine_a->out; | |||
744 | if (aout != EPSILON0 && g_flag_is_epsilon == 0) | |||
745 | continue; | |||
746 | ain = machine_a->in; | |||
747 | ||||
748 | if (g_flag_is_epsilon && aout != -1 && mode == 0 && *(is_flag+aout)) { | |||
749 | if ((target_number = triplet_hash_find(th, machine_a->target, b, 0)) == -1) { | |||
750 | STACK_3_PUSH(0, b, machine_a->target)int_stack_push(0); int_stack_push(b); int_stack_push(machine_a ->target);; | |||
751 | target_number = triplet_hash_insert(th, machine_a->target, b, 0); | |||
752 | } | |||
753 | fsm_state_add_arc(current_state, ain, aout, target_number, current_final, current_start); | |||
754 | } | |||
755 | ||||
756 | if (!g_compose_tristate) { | |||
757 | /* Check A:0 arcs on upper side */ | |||
758 | if (aout == EPSILON0 && mode == 0) { | |||
759 | /* mode -> 0 */ | |||
760 | if ((target_number = triplet_hash_find(th, machine_a->target, b, 0)) == -1) { | |||
761 | STACK_3_PUSH(0, b, machine_a->target)int_stack_push(0); int_stack_push(b); int_stack_push(machine_a ->target);; | |||
762 | target_number = triplet_hash_insert(th, machine_a->target, b, 0); | |||
763 | } | |||
764 | ||||
765 | fsm_state_add_arc(current_state, ain, EPSILON0, target_number, current_final, current_start); | |||
766 | } | |||
767 | } | |||
768 | ||||
769 | else if (g_compose_tristate) { | |||
770 | if (aout == EPSILON0 && (mode != 2)) { | |||
771 | /* mode -> 1 */ | |||
772 | if ((target_number = triplet_hash_find(th, machine_a->target, b, 1)) == -1) { | |||
773 | STACK_3_PUSH(1, b, machine_a->target)int_stack_push(1); int_stack_push(b); int_stack_push(machine_a ->target);; | |||
774 | target_number = triplet_hash_insert(th, machine_a->target, b, 1); | |||
775 | } | |||
776 | ||||
777 | fsm_state_add_arc(current_state, ain, EPSILON0, target_number, current_final, current_start); | |||
778 | ||||
779 | } | |||
780 | } | |||
781 | ||||
782 | } | |||
783 | /* Treat epsilon inputs on machine b (may include flags) */ | |||
784 | for (machine_b = (point_b+b)->transitions; machine_b->state_no == b
| |||
785 | bin = machine_b->in; | |||
786 | if (bin
| |||
787 | continue; | |||
788 | ||||
789 | bout = machine_b->out; | |||
790 | ||||
791 | if (g_flag_is_epsilon
| |||
| ||||
792 | if ((target_number = triplet_hash_find(th, a, machine_b->target, 1)) == -1) { | |||
793 | STACK_3_PUSH(1, machine_b->target,a)int_stack_push(1); int_stack_push(machine_b->target); int_stack_push (a);; | |||
794 | target_number = triplet_hash_insert(th, a, machine_b->target, 1); | |||
795 | } | |||
796 | fsm_state_add_arc(current_state, bin, bout, target_number, current_final, current_start); | |||
797 | } | |||
798 | ||||
799 | if (!g_compose_tristate) { | |||
800 | /* Check 0:A arcs on lower side */ | |||
801 | if (bin == EPSILON0) { | |||
802 | /* mode -> 1 */ | |||
803 | if ((target_number = triplet_hash_find(th, a, machine_b->target, 1)) == -1) { | |||
804 | STACK_3_PUSH(1, machine_b->target,a)int_stack_push(1); int_stack_push(machine_b->target); int_stack_push (a);; | |||
805 | target_number = triplet_hash_insert(th, a, machine_b->target, 1); | |||
806 | } | |||
807 | ||||
808 | fsm_state_add_arc(current_state, EPSILON0, bout, target_number, current_final, current_start); | |||
809 | } | |||
810 | } | |||
811 | ||||
812 | else if (g_compose_tristate) { | |||
813 | /* Check 0:A arcs on lower side */ | |||
814 | if (bin == EPSILON0 && mode != 1) { | |||
815 | /* mode -> 1 */ | |||
816 | if ((target_number = triplet_hash_find(th, a, machine_b->target, 2)) == -1) { | |||
817 | STACK_3_PUSH(2, machine_b->target, a)int_stack_push(2); int_stack_push(machine_b->target); int_stack_push (a);; | |||
818 | target_number = triplet_hash_insert(th, a, machine_b->target, 2); | |||
819 | } | |||
820 | ||||
821 | fsm_state_add_arc(current_state, EPSILON0, bout, target_number, current_final, current_start); | |||
822 | } | |||
823 | } | |||
824 | } | |||
825 | fsm_state_end_state(); | |||
826 | } | |||
827 | ||||
828 | free(net1->states); | |||
829 | fsm_destroy(net2); | |||
830 | fsm_state_close(net1); | |||
831 | free(point_a); | |||
832 | free(point_b); | |||
833 | free(index); | |||
834 | free(outarray); | |||
835 | ||||
836 | if (g_flag_is_epsilon) | |||
837 | free(is_flag); | |||
838 | triplet_hash_free(th); | |||
839 | net1 = fsm_topsort(fsm_coaccessible(net1)); | |||
840 | return(fsm_coaccessible(net1)); | |||
841 | } | |||
842 | ||||
843 | struct mergesigma *add_to_mergesigma(struct mergesigma *msigma, struct sigma *sigma, short presence) { | |||
844 | int number = 0; | |||
845 | ||||
846 | if (msigma->number == -1) { | |||
847 | number = 2; | |||
848 | } else { | |||
849 | msigma->next = malloc(sizeof(struct mergesigma)); | |||
850 | number = msigma->number; | |||
851 | msigma = msigma->next; | |||
852 | msigma->next = NULL((void*)0); | |||
853 | } | |||
854 | ||||
855 | if (sigma->number < 3) { | |||
856 | msigma->number = sigma->number; | |||
857 | } else { | |||
858 | if (number < 3) | |||
859 | number = 2; | |||
860 | msigma->number = number+1; | |||
861 | } | |||
862 | msigma->symbol = sigma->symbol; | |||
863 | msigma->presence = presence; | |||
864 | return(msigma); | |||
865 | } | |||
866 | ||||
867 | struct sigma *copy_mergesigma(struct mergesigma *mergesigma) { | |||
868 | struct sigma *sigma, *new_sigma; | |||
869 | ||||
870 | sigma = new_sigma = NULL((void*)0); | |||
871 | while(mergesigma != NULL((void*)0)) { | |||
872 | if (sigma == NULL((void*)0)) { | |||
873 | sigma = malloc(sizeof(struct sigma)); | |||
874 | new_sigma = sigma; | |||
875 | } else { | |||
876 | sigma->next = malloc(sizeof(struct sigma)); | |||
877 | sigma = sigma->next; | |||
878 | } | |||
879 | sigma->next = NULL((void*)0); | |||
880 | sigma->number = mergesigma->number; | |||
881 | ||||
882 | sigma->symbol = NULL((void*)0); | |||
883 | if (mergesigma->symbol != NULL((void*)0)) | |||
884 | sigma->symbol = strdup(mergesigma->symbol); | |||
885 | mergesigma = mergesigma->next; | |||
886 | } | |||
887 | return(new_sigma); | |||
888 | } | |||
889 | ||||
890 | void fsm_merge_sigma(struct fsm *net1, struct fsm *net2) { | |||
891 | ||||
892 | struct sigma *sigma_1, *sigma_2, *new_sigma_1 = NULL((void*)0), *new_sigma_2 = NULL((void*)0); | |||
893 | struct mergesigma *mergesigma, *mergesigma2, *start_mergesigma; | |||
894 | struct fsm_state *fsm_state, *new_1_state, *new_2_state; | |||
895 | int i, j, end_1 = 0, end_2 = 0, sigmasizes, *mapping_1, *mapping_2, equal = 1, unknown_1 = 0, unknown_2 = 0, net_unk = 0, net_adds = 0, net_lines; | |||
896 | ||||
897 | if (!fsm_options.skip_word_boundary_marker) { | |||
898 | i = sigma_find(".#.", net1->sigma); | |||
899 | j = sigma_find(".#.", net2->sigma); | |||
900 | if (i != -1 && j == -1) { | |||
901 | sigma_add(".#.", net2->sigma); | |||
902 | sigma_sort(net2); | |||
903 | } | |||
904 | if (j != -1 && i == -1) { | |||
905 | sigma_add(".#.", net1->sigma); | |||
906 | sigma_sort(net1); | |||
907 | } | |||
908 | } | |||
909 | ||||
910 | sigma_1 = net1->sigma; | |||
911 | sigma_2 = net2->sigma; | |||
912 | ||||
913 | sigmasizes = sigma_max(sigma_1) + sigma_max(sigma_2) + 3; | |||
914 | ||||
915 | mapping_1 = malloc(sizeof(int)*sigmasizes); | |||
916 | mapping_2 = malloc(sizeof(int)*sigmasizes); | |||
917 | ||||
918 | /* Fill mergesigma */ | |||
919 | ||||
920 | mergesigma = malloc(sizeof(struct mergesigma)); | |||
921 | mergesigma->number = -1; | |||
922 | mergesigma->symbol = NULL((void*)0); | |||
923 | mergesigma->next = NULL((void*)0); | |||
924 | start_mergesigma = mergesigma; | |||
925 | ||||
926 | /* Loop over sigma 1, sigma 2 */ | |||
927 | for (;;) { | |||
928 | if (sigma_1 == NULL((void*)0)) | |||
929 | end_1 = 1; | |||
930 | if (sigma_2 == NULL((void*)0)) | |||
931 | end_2 = 1; | |||
932 | if (end_1 && end_2) | |||
933 | break; | |||
934 | if (end_2) { | |||
935 | /* Treating only 1 now */ | |||
936 | mergesigma = add_to_mergesigma(mergesigma, sigma_1, 1); | |||
937 | *(mapping_1+(sigma_1->number)) = mergesigma->number; | |||
938 | sigma_1 = sigma_1->next; | |||
939 | equal = 0; | |||
940 | continue; | |||
941 | } | |||
942 | else if (end_1) { | |||
943 | /* Treating only 2 now */ | |||
944 | mergesigma = add_to_mergesigma(mergesigma, sigma_2, 2); | |||
945 | *(mapping_2+(sigma_2->number)) = mergesigma->number; | |||
946 | sigma_2 = sigma_2->next; | |||
947 | equal = 0; | |||
948 | continue; | |||
949 | } | |||
950 | ||||
951 | else { | |||
952 | ||||
953 | /* Both alive */ | |||
954 | ||||
955 | /* 1 or 2 contains special characters */ | |||
956 | if ((sigma_1->number <= IDENTITY2) || (sigma_2->number <= IDENTITY2)) { | |||
957 | ||||
958 | /* Treating zeros or unknowns */ | |||
959 | ||||
960 | if ((sigma_1->number == UNKNOWN1) || (sigma_1->number == IDENTITY2)) | |||
961 | unknown_1 = 1; | |||
962 | if ((sigma_2->number == UNKNOWN1) || (sigma_2->number == IDENTITY2)) | |||
963 | unknown_2 = 1; | |||
964 | ||||
965 | if (sigma_1->number == sigma_2->number) { | |||
966 | mergesigma = add_to_mergesigma(mergesigma, sigma_1, 3); | |||
967 | sigma_1 = sigma_1->next; | |||
968 | sigma_2 = sigma_2->next; | |||
969 | } | |||
970 | else if (sigma_1->number < sigma_2->number) { | |||
971 | mergesigma = add_to_mergesigma(mergesigma, sigma_1, 1); | |||
972 | sigma_1 = sigma_1->next; | |||
973 | equal = 0; | |||
974 | } | |||
975 | else { | |||
976 | mergesigma = add_to_mergesigma(mergesigma, sigma_2, 2); | |||
977 | sigma_2 = sigma_2->next; | |||
978 | equal = 0; | |||
979 | } | |||
980 | continue; | |||
981 | } | |||
982 | /* Both contain non-special chars */ | |||
983 | if (strcmp(sigma_1->symbol, sigma_2->symbol) == 0) { | |||
984 | mergesigma = add_to_mergesigma(mergesigma, sigma_1, 3); | |||
985 | /* Add symbol numbers to mapping */ | |||
986 | *(mapping_1+(sigma_1->number)) = mergesigma->number; | |||
987 | *(mapping_2+(sigma_2->number)) = mergesigma->number; | |||
988 | ||||
989 | sigma_1 = sigma_1->next; | |||
990 | sigma_2 = sigma_2->next; | |||
991 | } | |||
992 | else if (strcmp(sigma_1->symbol, sigma_2->symbol) < 0) { | |||
993 | mergesigma = add_to_mergesigma(mergesigma, sigma_1, 1); | |||
994 | *(mapping_1+(sigma_1->number)) = mergesigma->number; | |||
995 | sigma_1 = sigma_1->next; | |||
996 | equal = 0; | |||
997 | } | |||
998 | else { | |||
999 | mergesigma = add_to_mergesigma(mergesigma, sigma_2, 2); | |||
1000 | *(mapping_2+(sigma_2->number)) = mergesigma->number; | |||
1001 | sigma_2 = sigma_2->next; | |||
1002 | equal = 0; | |||
1003 | } | |||
1004 | continue; | |||
1005 | } | |||
1006 | } | |||
1007 | ||||
1008 | /* Go over both net1 and net2 and replace arc numbers with new mappings */ | |||
1009 | ||||
1010 | fsm_state = net1->states; | |||
1011 | for (i=0; (fsm_state+i)->state_no != -1; i++) { | |||
1012 | if ((fsm_state+i)->in > 2) | |||
1013 | (fsm_state+i)->in = *(mapping_1+(fsm_state+i)->in); | |||
1014 | if ((fsm_state+i)->out > 2) | |||
1015 | (fsm_state+i)->out = *(mapping_1+(fsm_state+i)->out); | |||
1016 | } | |||
1017 | fsm_state = net2->states; | |||
1018 | for (i=0; (fsm_state+i)->state_no != -1; i++) { | |||
1019 | if ((fsm_state+i)->in > 2) | |||
1020 | (fsm_state+i)->in = *(mapping_2+(fsm_state+i)->in); | |||
1021 | if ((fsm_state+i)->out > 2) | |||
1022 | (fsm_state+i)->out = *(mapping_2+(fsm_state+i)->out); | |||
1023 | } | |||
1024 | ||||
1025 | /* Copy mergesigma to net1, net2 */ | |||
1026 | ||||
1027 | new_sigma_1 = copy_mergesigma(start_mergesigma); | |||
1028 | new_sigma_2 = copy_mergesigma(start_mergesigma); | |||
1029 | ||||
1030 | fsm_sigma_destroy(net1->sigma); | |||
1031 | fsm_sigma_destroy(net2->sigma); | |||
1032 | ||||
1033 | net1->sigma = new_sigma_1; | |||
1034 | net2->sigma = new_sigma_2; | |||
1035 | ||||
1036 | /* Expand on ?, ?:x, y:? */ | |||
1037 | ||||
1038 | if (unknown_1 && !equal) { | |||
1039 | /* Expand net 1 */ | |||
1040 | fsm_state = net1->states; | |||
1041 | net_lines = find_arccount(net1->states); | |||
1042 | for(mergesigma = start_mergesigma; mergesigma != NULL((void*)0); mergesigma=mergesigma->next) { | |||
1043 | if(mergesigma->presence == 2) { | |||
1044 | net_unk++; | |||
1045 | } | |||
1046 | } | |||
1047 | for(net_adds = 0, i=0; (fsm_state+i)->state_no != -1; i++) { | |||
1048 | if ((fsm_state+i)->in == IDENTITY2) | |||
1049 | net_adds += net_unk; | |||
1050 | if (((fsm_state+i)->in == UNKNOWN1) && ((fsm_state+i)->out != UNKNOWN1)) | |||
1051 | net_adds += net_unk; | |||
1052 | if (((fsm_state+i)->out == UNKNOWN1) && ((fsm_state+i)->in != UNKNOWN1)) | |||
1053 | net_adds += net_unk; | |||
1054 | if (((fsm_state+i)->in == UNKNOWN1) && ((fsm_state+i)->out == UNKNOWN1)) | |||
1055 | net_adds += net_unk*net_unk - net_unk + 2*net_unk; | |||
1056 | } | |||
1057 | ||||
1058 | new_1_state = malloc(sizeof(struct fsm_state)*(net_adds+net_lines+1)); | |||
1059 | for(i=0,j=0; (fsm_state+i)->state_no != -1; i++) { | |||
1060 | ||||
1061 | if ((fsm_state+i)->in == IDENTITY2) { | |||
1062 | add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1063 | j++; | |||
1064 | for (mergesigma=start_mergesigma; mergesigma != NULL((void*)0); mergesigma=mergesigma->next) { | |||
1065 | if ((mergesigma->presence == 2) && (mergesigma->number > IDENTITY2)) { | |||
1066 | add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, mergesigma->number, mergesigma->number, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1067 | j++; | |||
1068 | } | |||
1069 | } | |||
1070 | } | |||
1071 | ||||
1072 | if ((fsm_state+i)->in == UNKNOWN1 && (fsm_state+i)->out != UNKNOWN1) { | |||
1073 | add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1074 | j++; | |||
1075 | for (mergesigma=start_mergesigma; mergesigma!=NULL((void*)0); mergesigma=mergesigma->next) { | |||
1076 | if ((mergesigma->presence == 2) && (mergesigma->number > IDENTITY2)) { | |||
1077 | add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, mergesigma->number, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1078 | j++; | |||
1079 | } | |||
1080 | } | |||
1081 | } | |||
1082 | ||||
1083 | if (((fsm_state+i)->in != UNKNOWN1) && ((fsm_state+i)->out == UNKNOWN1)) { | |||
1084 | add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1085 | j++; | |||
1086 | for (mergesigma=start_mergesigma; mergesigma != NULL((void*)0); mergesigma = mergesigma->next) { | |||
1087 | if ((mergesigma->presence == 2) && (mergesigma->number > IDENTITY2)) { | |||
1088 | add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, mergesigma->number, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1089 | j++; | |||
1090 | } | |||
1091 | } | |||
1092 | } | |||
1093 | ||||
1094 | /* Replace ?:? with ?:[all unknowns] [all unknowns]:? and [all unknowns]:[all unknowns] where a != b */ | |||
1095 | if ((fsm_state+i)->in == UNKNOWN1 && (fsm_state+i)->out == UNKNOWN1) { | |||
1096 | add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1097 | j++; | |||
1098 | for (mergesigma2=start_mergesigma; mergesigma2 != NULL((void*)0) ; mergesigma2 = mergesigma2->next) { | |||
1099 | for (mergesigma=start_mergesigma; mergesigma!=NULL((void*)0); mergesigma=mergesigma->next) { | |||
1100 | if (((mergesigma->presence == 2 && mergesigma2->presence == 2 && mergesigma->number > IDENTITY2 && mergesigma2->number > IDENTITY2) || (mergesigma->number == UNKNOWN1 && mergesigma2->number > IDENTITY2 && mergesigma2->presence == 2) || (mergesigma2->number == UNKNOWN1 && mergesigma->number > IDENTITY2 && mergesigma->presence == 2)) && mergesigma->number != mergesigma2->number) { | |||
1101 | add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, mergesigma->number, mergesigma2->number, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1102 | j++; | |||
1103 | } | |||
1104 | } | |||
1105 | } | |||
1106 | } | |||
1107 | ||||
1108 | /* Simply copy arcs that are not IDENTITY or UNKNOWN */ | |||
1109 | if (((fsm_state+i)->in > IDENTITY2 || (fsm_state+i)->in == EPSILON0) && ((fsm_state+i)->out > IDENTITY2 || (fsm_state+i)->out == EPSILON0)) { | |||
1110 | add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1111 | j++; | |||
1112 | } | |||
1113 | ||||
1114 | if ((fsm_state+i)->in == -1) { | |||
1115 | add_fsm_arc(new_1_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1116 | j++; | |||
1117 | } | |||
1118 | } | |||
1119 | ||||
1120 | add_fsm_arc(new_1_state, j, -1, -1, -1, -1, -1, -1); | |||
1121 | free(net1->states); | |||
1122 | net1->states = new_1_state; | |||
1123 | } | |||
1124 | ||||
1125 | if (unknown_2 && !equal) { | |||
1126 | /* Expand net 2 */ | |||
1127 | fsm_state = net2->states; | |||
1128 | net_lines = find_arccount(net2->states); | |||
1129 | for(net_unk = 0, mergesigma = start_mergesigma; mergesigma != NULL((void*)0); mergesigma=mergesigma->next) { | |||
1130 | if(mergesigma->presence == 1) { | |||
1131 | net_unk++; | |||
1132 | } | |||
1133 | } | |||
1134 | ||||
1135 | for(net_adds = 0, i=0; (fsm_state+i)->state_no != -1; i++) { | |||
1136 | if ((fsm_state+i)->in == IDENTITY2) | |||
1137 | net_adds += net_unk; | |||
1138 | if (((fsm_state+i)->in == UNKNOWN1) && ((fsm_state+i)->out != UNKNOWN1)) | |||
1139 | net_adds += net_unk; | |||
1140 | if (((fsm_state+i)->out == UNKNOWN1) && ((fsm_state+i)->in != UNKNOWN1)) | |||
1141 | net_adds += net_unk; | |||
1142 | if (((fsm_state+i)->in == UNKNOWN1) && ((fsm_state+i)->out == UNKNOWN1)) | |||
1143 | net_adds += net_unk*net_unk - net_unk + 2*net_unk; | |||
1144 | } | |||
1145 | ||||
1146 | /* We need net_add new lines in fsm_state */ | |||
1147 | new_2_state = malloc(sizeof(struct fsm_state)*(net_adds+net_lines+1)); | |||
1148 | for(i=0,j=0; (fsm_state+i)->state_no != -1; i++) { | |||
1149 | ||||
1150 | if ((fsm_state+i)->in == IDENTITY2) { | |||
1151 | add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1152 | j++; | |||
1153 | for (mergesigma=start_mergesigma; mergesigma!=NULL((void*)0); mergesigma=mergesigma->next) { | |||
1154 | if ((mergesigma->presence == 1) && (mergesigma->number > IDENTITY2)) { | |||
1155 | add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, mergesigma->number, mergesigma->number, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1156 | j++; | |||
1157 | } | |||
1158 | } | |||
1159 | } | |||
1160 | ||||
1161 | if ((fsm_state+i)->in == UNKNOWN1 && (fsm_state+i)->out != UNKNOWN1) { | |||
1162 | add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1163 | j++; | |||
1164 | for (mergesigma=start_mergesigma; mergesigma!=NULL((void*)0); mergesigma=mergesigma->next) { | |||
1165 | if (mergesigma->presence == 1 && mergesigma->number > IDENTITY2) { | |||
1166 | add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, mergesigma->number, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1167 | j++; | |||
1168 | } | |||
1169 | } | |||
1170 | } | |||
1171 | ||||
1172 | if ((fsm_state+i)->in != UNKNOWN1 && (fsm_state+i)->out == UNKNOWN1) { | |||
1173 | add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1174 | j++; | |||
1175 | for (mergesigma=start_mergesigma; mergesigma!=NULL((void*)0); mergesigma=mergesigma->next) { | |||
1176 | if ((mergesigma->presence == 1) && (mergesigma->number > IDENTITY2)) { | |||
1177 | add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, mergesigma->number, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1178 | j++; | |||
1179 | } | |||
1180 | } | |||
1181 | } | |||
1182 | ||||
1183 | if ((fsm_state+i)->in == UNKNOWN1 && (fsm_state+i)->out == UNKNOWN1) { | |||
1184 | add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1185 | j++; | |||
1186 | for (mergesigma2=start_mergesigma; mergesigma2 != NULL((void*)0) ; mergesigma2 = mergesigma2->next) { | |||
1187 | for (mergesigma=start_mergesigma; mergesigma!=NULL((void*)0); mergesigma=mergesigma->next) { | |||
1188 | if (((mergesigma->presence == 1 && mergesigma2->presence == 1 && mergesigma->number > IDENTITY2 && mergesigma2->number > IDENTITY2) || (mergesigma->number == UNKNOWN1 && mergesigma2->number > IDENTITY2 && mergesigma2->presence == 1) || (mergesigma2->number == UNKNOWN1 && mergesigma->number > IDENTITY2 && mergesigma->presence == 1)) && mergesigma->number != mergesigma2->number) { | |||
1189 | add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, mergesigma->number, mergesigma2->number, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1190 | j++; | |||
1191 | } | |||
1192 | } | |||
1193 | } | |||
1194 | } | |||
1195 | ||||
1196 | /* Simply copy arcs that are not IDENTITY or UNKNOWN */ | |||
1197 | if (((fsm_state+i)->in > IDENTITY2 || (fsm_state+i)->in == EPSILON0) && ((fsm_state+i)->out > IDENTITY2 || (fsm_state+i)->out == EPSILON0)) { | |||
1198 | ||||
1199 | add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1200 | j++; | |||
1201 | } | |||
1202 | ||||
1203 | if ((fsm_state+i)->in == -1) { | |||
1204 | add_fsm_arc(new_2_state, j, (fsm_state+i)->state_no, (fsm_state+i)->in, (fsm_state+i)->out, (fsm_state+i)->target, (fsm_state+i)->final_state, (fsm_state+i)->start_state); | |||
1205 | j++; | |||
1206 | } | |||
1207 | } | |||
1208 | ||||
1209 | add_fsm_arc(new_2_state, j, -1, -1, -1, -1, -1, -1); | |||
1210 | free(net2->states); | |||
1211 | net2->states = new_2_state; | |||
1212 | } | |||
1213 | free(mapping_1); | |||
1214 | free(mapping_2); | |||
1215 | ||||
1216 | /* Free structure */ | |||
1217 | for (mergesigma2 = NULL((void*)0); start_mergesigma != NULL((void*)0); ) { | |||
1218 | mergesigma2 = start_mergesigma; | |||
1219 | start_mergesigma = start_mergesigma->next; | |||
1220 | free(mergesigma2); | |||
1221 | } | |||
1222 | } | |||
1223 | ||||
1224 | ||||
1225 | int add_fsm_arc(struct fsm_state *fsm, int offset, int state_no, int in, int out, int target, int final_state, int start_state) { | |||
1226 | ||||
1227 | (fsm+offset)->state_no = state_no; | |||
1228 | (fsm+offset)->in = in; | |||
1229 | (fsm+offset)->out= out; | |||
1230 | (fsm+offset)->target = target; | |||
1231 | (fsm+offset)->final_state = final_state; | |||
1232 | (fsm+offset)->start_state = start_state; | |||
1233 | offset++; | |||
1234 | return(offset); | |||
1235 | } | |||
1236 | ||||
1237 | ||||
1238 | void fsm_count(struct fsm *net) { | |||
1239 | struct fsm_state *fsm; | |||
1240 | int i, linecount, arccount, oldstate, finalcount, maxstate; | |||
1241 | linecount = arccount = finalcount = maxstate = 0; | |||
1242 | ||||
1243 | oldstate = -1; | |||
1244 | ||||
1245 | fsm = net->states; | |||
1246 | for (i=0; (fsm+i)->state_no != -1; i++) { | |||
1247 | if ((fsm+i)->state_no > maxstate) | |||
1248 | maxstate = (fsm+i)->state_no; | |||
1249 | ||||
1250 | linecount++; | |||
1251 | if ((fsm+i)->target != -1) { | |||
1252 | arccount++; | |||
1253 | // if (((fsm+i)->in != (fsm+i)->out) || ((fsm+i)->in == UNKNOWN) || ((fsm+i)->out == UNKNOWN)) | |||
1254 | // arity = 2; | |||
1255 | } | |||
1256 | if ((fsm+i)->state_no != oldstate) { | |||
1257 | if ((fsm+i)->final_state) { | |||
1258 | finalcount++; | |||
1259 | } | |||
1260 | oldstate = (fsm+i)->state_no; | |||
1261 | } | |||
1262 | } | |||
1263 | ||||
1264 | linecount++; | |||
1265 | net->statecount = maxstate+1; | |||
1266 | net->linecount = linecount; | |||
1267 | net->arccount = arccount; | |||
1268 | net->finalcount = finalcount; | |||
1269 | } | |||
1270 | ||||
1271 | static void fsm_add_to_states(struct fsm *net, int add) { | |||
1272 | struct fsm_state *fsm; | |||
1273 | int i; | |||
1274 | ||||
1275 | fsm=net->states; | |||
1276 | for(i=0; (fsm+i)->state_no != -1; i++) { | |||
1277 | (fsm+i)->state_no = (fsm+i)->state_no + add; | |||
1278 | if ((fsm+i)->target != -1) | |||
1279 | (fsm+i)->target = (fsm+i)->target + add; | |||
1280 | } | |||
1281 | } | |||
1282 | ||||
1283 | struct fsm *fsm_concat_m_n(struct fsm *net1, int m, int n) { | |||
1284 | struct fsm *acc; | |||
1285 | int i; | |||
1286 | acc = fsm_empty_string(); | |||
1287 | for (i = 1; i <= n ;i++) { | |||
1288 | if (i > m) | |||
1289 | acc = fsm_concat(acc, fsm_optionality(fsm_copy(net1))); | |||
1290 | else | |||
1291 | acc = fsm_concat(acc, fsm_copy(net1)); | |||
1292 | } | |||
1293 | fsm_destroy(net1); | |||
1294 | return(acc); | |||
1295 | } | |||
1296 | ||||
1297 | struct fsm *fsm_concat_n(struct fsm *net1, int n) { | |||
1298 | return(fsm_concat_m_n(net1,n,n)); | |||
1299 | } | |||
1300 | ||||
1301 | struct fsm *fsm_concat(struct fsm *net1, struct fsm *net2) { | |||
1302 | struct fsm_state *fsm1, *fsm2, *new_fsm; | |||
1303 | int i,j,current_final; | |||
1304 | ||||
1305 | fsm_merge_sigma(net1, net2); | |||
1306 | ||||
1307 | fsm1 = net1->states; | |||
1308 | fsm2 = net2->states; | |||
1309 | fsm_count(net1); | |||
1310 | fsm_count(net2); | |||
1311 | /* The concatenation of a language with no final state should yield the empty language */ | |||
1312 | if ((net1->finalcount == 0) || (net2->finalcount == 0)) { | |||
1313 | fsm_destroy(net1); | |||
1314 | fsm_destroy(net2); | |||
1315 | net1 = fsm_empty_set(); | |||
1316 | return(net1); | |||
1317 | } | |||
1318 | ||||
1319 | /* Add |fsm1| states to the state numbers of fsm2 */ | |||
1320 | fsm_add_to_states(net2, net1->statecount); | |||
1321 | ||||
1322 | new_fsm = malloc(((sizeof(struct fsm_state))*(net1->linecount + net2->linecount + net1->finalcount + 2 ))); | |||
1323 | current_final = -1; | |||
1324 | /* Copy fsm1, fsm2 after each other, adding appropriate epsilon arcs */ | |||
1325 | for(i=0,j=0; (fsm1+i)->state_no != -1; i++) { | |||
1326 | if (((fsm1+i)->final_state == 1) && ((fsm1+i)->state_no != current_final)) { | |||
1327 | add_fsm_arc(new_fsm, j, (fsm1+i)->state_no, EPSILON0, EPSILON0, net1->statecount, 0, (fsm1+i)->start_state); | |||
1328 | current_final = (fsm1+i)->state_no; | |||
1329 | j++; | |||
1330 | } | |||
1331 | if (!(((fsm1+i)->target == -1) && ((fsm1+i)->final_state == 1))) { | |||
1332 | add_fsm_arc(new_fsm, j, (fsm1+i)->state_no, (fsm1+i)->in, (fsm1+i)->out, (fsm1+i)->target, 0, (fsm1+i)->start_state); | |||
1333 | j++; | |||
1334 | } | |||
1335 | } | |||
1336 | ||||
1337 | for(i=0; (fsm2+i)->state_no != -1; i++, j++) { | |||
1338 | add_fsm_arc(new_fsm, j, (fsm2+i)->state_no, (fsm2+i)->in, (fsm2+i)->out, (fsm2+i)->target, (fsm2+i)->final_state, 0); | |||
1339 | } | |||
1340 | add_fsm_arc(new_fsm, j, -1, -1, -1, -1, -1, -1); | |||
1341 | free(net1->states); | |||
1342 | fsm_destroy(net2); | |||
1343 | net1->states = new_fsm; | |||
1344 | if (sigma_find_number(EPSILON0, net1->sigma) == -1) { | |||
1345 | sigma_add_special(EPSILON0, net1->sigma); | |||
1346 | } | |||
1347 | fsm_count(net1); | |||
1348 | net1->is_epsilon_free = NO0; | |||
1349 | net1->is_deterministic = NO0; | |||
1350 | net1->is_minimized = NO0; | |||
1351 | net1->is_pruned = NO0; | |||
1352 | return(fsm_minimize(net1)); | |||
1353 | } | |||
1354 | ||||
1355 | struct fsm *fsm_union(struct fsm *net1, struct fsm *net2) { | |||
1356 | struct fsm_state *new_fsm, *fsm1, *fsm2; | |||
1357 | int i, j, net1_offset, net2_offset, new_target, arccount; | |||
1358 | ||||
1359 | fsm_merge_sigma(net1, net2); | |||
1360 | ||||
1361 | fsm_count(net1); | |||
1362 | fsm_count(net2); | |||
1363 | ||||
1364 | fsm1 = net1->states; | |||
1365 | fsm2 = net2->states; | |||
1366 | ||||
1367 | net1_offset = 1; | |||
1368 | net2_offset = net1->statecount + 1; | |||
1369 | new_fsm = malloc((net1->linecount + net2->linecount + 2) * sizeof(struct fsm_state)); | |||
1370 | ||||
1371 | j = 0; | |||
1372 | ||||
1373 | add_fsm_arc(new_fsm, j++, 0, EPSILON0, EPSILON0, net1_offset, 0 , 1); | |||
1374 | add_fsm_arc(new_fsm, j++, 0, EPSILON0, EPSILON0, net2_offset, 0 , 1); | |||
1375 | arccount = 2; | |||
1376 | for (i=0 ; (fsm1+i)->state_no != -1; i++) { | |||
1377 | new_target = (fsm1+i)->target == -1 ? -1 : (fsm1+i)->target + net1_offset; | |||
1378 | add_fsm_arc(new_fsm, j++, (fsm1+i)->state_no + net1_offset, (fsm1+i)->in, (fsm1+i)->out, new_target, (fsm1+i)->final_state, 0); | |||
1379 | if (new_target != -1) arccount++; | |||
1380 | } | |||
1381 | for (i=0 ; (fsm2+i)->state_no != -1; i++) { | |||
1382 | new_target = (fsm2+i)->target == -1 ? -1 : (fsm2+i)->target + net2_offset; | |||
1383 | add_fsm_arc(new_fsm, j++, (fsm2+i)->state_no + net2_offset, (fsm2+i)->in, (fsm2+i)->out, new_target, (fsm2+i)->final_state, 0); | |||
1384 | if (new_target != -1) arccount++; | |||
1385 | } | |||
1386 | add_fsm_arc(new_fsm, j++, -1, -1, -1, -1, -1, -1); | |||
1387 | free(net1->states); | |||
1388 | net1->states = new_fsm; | |||
1389 | net1->statecount = net1->statecount + net2->statecount + 1; | |||
1390 | net1->linecount = j; | |||
1391 | net1->arccount = arccount; | |||
1392 | net1->finalcount = net1->finalcount + net2->finalcount; | |||
1393 | fsm_destroy(net2); | |||
1394 | fsm_update_flags(net1,NO0,NO0,NO0,NO0,UNK2,NO0); | |||
1395 | if (sigma_find_number(EPSILON0, net1->sigma) == -1) { | |||
1396 | sigma_add_special(EPSILON0, net1->sigma); | |||
1397 | } | |||
1398 | return(net1); | |||
1399 | } | |||
1400 | ||||
1401 | struct fsm *fsm_completes(struct fsm *net, int operation) { | |||
1402 | struct fsm_state *fsm, *new_fsm; | |||
1403 | int i, j, offset, statecount, sigsize, *state_table, sink_state, target, last_sigma = 0, arccount = 0, incomplete; | |||
1404 | short *starts, *finals, *sinks; | |||
1405 | ||||
1406 | /* TODO: this currently relies on that the sigma is gap-free in its numbering */ | |||
1407 | /* which can't always be counted on, especially when reading external machines */ | |||
1408 | ||||
1409 | /* TODO: check arity */ | |||
1410 | ||||
1411 | if (net->is_minimized != YES1) | |||
1412 | net = fsm_minimize(net); | |||
1413 | ||||
1414 | incomplete = 0; | |||
1415 | fsm = net->states; | |||
1416 | if (sigma_find_number(UNKNOWN1, net->sigma) != -1) { | |||
1417 | sigma_remove("@_UNKNOWN_SYMBOL_@",net->sigma); | |||
1418 | } | |||
1419 | if (sigma_find_number(IDENTITY2, net->sigma) == -1) { | |||
1420 | sigma_add_special(IDENTITY2, net->sigma); | |||
1421 | incomplete = 1; | |||
1422 | } | |||
1423 | ||||
1424 | sigsize = sigma_size(net->sigma); | |||
1425 | last_sigma = sigma_max(net->sigma); | |||
1426 | ||||
1427 | if (sigma_find_number(EPSILON0, net->sigma) != -1) | |||
1428 | sigsize--; | |||
1429 | ||||
1430 | fsm_count(net); | |||
1431 | statecount = net->statecount; | |||
1432 | starts = malloc(sizeof(short)*(statecount+1)); /* +1 for sink state */ | |||
1433 | finals = malloc(sizeof(short)*(statecount+1)); | |||
1434 | sinks = malloc(sizeof(short)*(statecount+1)); | |||
1435 | ||||
1436 | /* Init starts, finals, sinks arrays */ | |||
1437 | ||||
1438 | for (i=0; i < statecount; i++) { | |||
1439 | *(sinks+i) = 1; | |||
1440 | *(finals+i) = 0; | |||
1441 | *(starts+i) = 0; | |||
1442 | } | |||
1443 | for (i=0; (fsm+i)->state_no != -1; i++) { | |||
1444 | if (operation == COMPLEMENT0) { | |||
1445 | if ((fsm+i)->final_state == 1) { | |||
1446 | (fsm+i)->final_state = 0; | |||
1447 | } else if ((fsm+i)->final_state == 0) { | |||
1448 | (fsm+i)->final_state = 1; | |||
1449 | } | |||
1450 | } | |||
1451 | if ((fsm+i)->target != -1) | |||
1452 | arccount++; | |||
1453 | starts[(fsm+i)->state_no] = (fsm+i)->start_state; | |||
1454 | finals[(fsm+i)->state_no] = (fsm+i)->final_state; | |||
1455 | if ((fsm+i)->final_state && operation != COMPLEMENT0) | |||
1456 | *(sinks+((fsm+i)->state_no)) = 0; | |||
1457 | if ((fsm+i)->final_state == 0 && (operation == COMPLEMENT0)) | |||
1458 | *(sinks+((fsm+i)->state_no)) = 0; | |||
1459 | if (((fsm+i)->target != -1) && ((fsm+i)->state_no != (fsm+i)->target)) | |||
1460 | *(sinks+((fsm+i)->state_no)) = 0; | |||
1461 | } | |||
1462 | ||||
1463 | net->is_loop_free = NO0; | |||
1464 | net->pathcount = PATHCOUNT_CYCLIC-1; | |||
1465 | ||||
1466 | if (incomplete == 0 && (arccount == (sigsize)*statecount)) { | |||
1467 | /* printf("Already complete!\n"); */ | |||
1468 | ||||
1469 | /* if (operation == COMPLEMENT) { */ | |||
1470 | /* for (i=0; (fsm+i)->state_no != -1; i++) { */ | |||
1471 | /* if ((fsm+i)->final_state) { */ | |||
1472 | /* (fsm+i)->final_state = 0; */ | |||
1473 | /* } else { */ | |||
1474 | /* (fsm+i)->final_state = 1; */ | |||
1475 | /* } */ | |||
1476 | /* } */ | |||
1477 | /* } */ | |||
1478 | free(starts); | |||
1479 | free(finals); | |||
1480 | free(sinks); | |||
1481 | net->is_completed = YES1; | |||
1482 | net->is_minimized = YES1; | |||
1483 | net->is_pruned = NO0; | |||
1484 | net->is_deterministic = YES1; | |||
1485 | return(net); | |||
1486 | } | |||
1487 | ||||
1488 | /* Find an existing sink state, or invent a new one */ | |||
1489 | ||||
1490 | for (i=0, sink_state = -1; i<statecount; i++) { | |||
1491 | if (sinks[i] == 1) { | |||
1492 | sink_state = i; | |||
1493 | break; | |||
1494 | } | |||
1495 | } | |||
1496 | ||||
1497 | if (sink_state == -1) { | |||
1498 | sink_state = statecount; | |||
1499 | *(starts+sink_state) = 0; | |||
1500 | if (operation == COMPLEMENT0) { | |||
1501 | *(finals+sink_state) = 1; | |||
1502 | } else { | |||
1503 | *(finals+sink_state) = 0; | |||
1504 | } | |||
1505 | statecount++; | |||
1506 | } | |||
1507 | ||||
1508 | ||||
1509 | /* We can build a state table without memory problems since the size */ | |||
1510 | /* of the completed machine will be |Sigma| * |States| in all cases */ | |||
1511 | ||||
1512 | sigsize += 2; | |||
1513 | ||||
1514 | state_table = malloc(sizeof(int)*sigsize*statecount); | |||
1515 | ||||
1516 | /* Init state table */ | |||
1517 | /* i = state #, j = sigma # */ | |||
1518 | for (i=0; i<statecount; i++) { | |||
1519 | for (j=0; j<sigsize; j++) { | |||
1520 | *(state_table+(i*sigsize+j)) = -1; | |||
1521 | } | |||
1522 | } | |||
1523 | ||||
1524 | for (i=0; (fsm+i)->state_no != -1; i++) { | |||
1525 | if ((fsm+i)->target != -1) { | |||
1526 | *(state_table+(((fsm+i)->state_no)*sigsize+((fsm+i)->in))) = (fsm+i)->target; | |||
1527 | } | |||
1528 | } | |||
1529 | /* Add looping arcs from and to sink state */ | |||
1530 | for (j=2; j<=last_sigma; j++) | |||
1531 | *(state_table+(sink_state*sigsize+j)) = sink_state; | |||
1532 | /* Add missing arcs to sink state from all states */ | |||
1533 | for (i=0; i<statecount; i++) { | |||
1534 | for (j=2; j<=last_sigma; j++) { | |||
1535 | if (*(state_table+(i*sigsize+j)) == -1) | |||
1536 | *(state_table+(i*sigsize+j)) = sink_state; | |||
1537 | } | |||
1538 | } | |||
1539 | ||||
1540 | new_fsm = malloc(sizeof(struct fsm_state)*(sigsize*statecount+1)); | |||
1541 | ||||
1542 | /* Complement requires toggling final, nonfinal states */ | |||
1543 | /* if (operation == COMPLEMENT) */ | |||
1544 | /* for (i=0; i < statecount; i++) */ | |||
1545 | /* *(finals+i) = *(finals+i) == 0 ? 1 : 0; */ | |||
1546 | ||||
1547 | for (i=0, offset = 0; i<statecount; i++) { | |||
1548 | for (j=2; j<=last_sigma; j++) { | |||
1549 | target = *(state_table+(i*sigsize+j)) == -1 ? sink_state : *(state_table+(i*sigsize+j)); | |||
1550 | add_fsm_arc(new_fsm, offset, i, j, j, target, finals[i], starts[i]); | |||
1551 | offset++; | |||
1552 | } | |||
1553 | } | |||
1554 | add_fsm_arc(new_fsm, offset, -1, -1, -1, -1, -1, -1); | |||
1555 | offset++; | |||
1556 | free(net->states); | |||
1557 | net->states = new_fsm; | |||
1558 | free(starts); | |||
1559 | free(finals); | |||
1560 | free(sinks); | |||
1561 | free(state_table); | |||
1562 | net->is_minimized = NO0; | |||
1563 | net->is_pruned = NO0; | |||
1564 | net->is_completed = YES1; | |||
1565 | net->statecount = statecount; | |||
1566 | return(net); | |||
1567 | } | |||
1568 | ||||
1569 | struct fsm *fsm_complete(struct fsm *net) { | |||
1570 | return(fsm_completes(net, COMPLETE1)); | |||
1571 | } | |||
1572 | ||||
1573 | struct fsm *fsm_complement(struct fsm *net) { | |||
1574 | return(fsm_completes(net, COMPLEMENT0)); | |||
1575 | } | |||
1576 | ||||
1577 | struct fsm *fsm_kleene_closure(struct fsm *net, int operation) { | |||
1578 | struct fsm_state *fsm, *new_fsm; | |||
1579 | int i, j, laststate, curr_state, curr_target, arccount; | |||
1580 | ||||
1581 | if (operation == OPTIONALITY2) { | |||
1582 | return(fsm_union(net,fsm_empty_string())); | |||
1583 | } | |||
1584 | ||||
1585 | net = fsm_minimize(net); | |||
1586 | fsm_count(net); | |||
1587 | ||||
1588 | fsm = net->states; | |||
1589 | ||||
1590 | new_fsm = malloc( (net->linecount + net->finalcount + 1) * sizeof(struct fsm_state)); | |||
1591 | ||||
1592 | j = 0; | |||
1593 | if (operation == KLEENE_STAR0) | |||
1594 | add_fsm_arc(new_fsm, j++, 0, EPSILON0, EPSILON0, 1, 1, 1); | |||
1595 | if (operation == KLEENE_PLUS1) | |||
1596 | add_fsm_arc(new_fsm, j++, 0, EPSILON0, EPSILON0, 1, 0, 1); | |||
1597 | laststate = 0; | |||
1598 | arccount = 1; | |||
1599 | for (i = 0 ; (fsm+i)->state_no != -1; i++, laststate = curr_state) { | |||
1600 | curr_state = (fsm+i)->state_no + 1; | |||
1601 | curr_target = (fsm+i)->target == -1 ? -1 : (fsm+i)->target + 1; | |||
1602 | if (curr_target == -1 && (fsm+i)->final_state == 1) { | |||
1603 | add_fsm_arc(new_fsm, j++, curr_state, EPSILON0, EPSILON0, 0, 1, 0); | |||
1604 | arccount++; | |||
1605 | continue; | |||
1606 | } | |||
1607 | if (curr_state != laststate && (fsm+i)->final_state == 1) { | |||
1608 | arccount++; | |||
1609 | add_fsm_arc(new_fsm, j++, curr_state, EPSILON0, EPSILON0, 0, 1, 0); | |||
1610 | } | |||
1611 | add_fsm_arc(new_fsm, j++, curr_state, (fsm+i)->in, (fsm+i)->out, curr_target, (fsm+i)->final_state, 0); | |||
1612 | if (curr_target != -1) arccount++; | |||
1613 | } | |||
1614 | add_fsm_arc(new_fsm, j++, -1,-1,-1,-1,-1,-1); | |||
1615 | net->statecount = net->statecount+1; | |||
1616 | net->linecount = j; | |||
1617 | net->finalcount = operation == KLEENE_STAR0 ? net->finalcount+1 : net->finalcount; | |||
1618 | net->arccount = arccount; | |||
1619 | net->pathcount = PATHCOUNT_UNKNOWN-3; | |||
1620 | free(net->states); | |||
1621 | net->states = new_fsm; | |||
1622 | if (sigma_find_number(EPSILON0, net->sigma) == -1) | |||
1623 | sigma_add_special(EPSILON0, net->sigma); | |||
1624 | fsm_update_flags(net,NO0,NO0,NO0,NO0,UNK2,NO0); | |||
1625 | return(net); | |||
1626 | } | |||
1627 | ||||
1628 | char *fsm_network_to_char(struct fsm *net) { | |||
1629 | struct sigma *sigma, *sigprev; | |||
1630 | sigma = net->sigma; | |||
1631 | if (sigma->number == -1) { | |||
1632 | return NULL((void*)0); | |||
1633 | } | |||
1634 | for (; sigma != NULL((void*)0) && sigma->number != -1 ; sigma = sigma->next) { | |||
1635 | sigprev = sigma; | |||
1636 | } | |||
1637 | return(strdup(sigprev->symbol)); | |||
1638 | } | |||
1639 | ||||
1640 | struct fsm *fsm_substitute_label(struct fsm *net, char *original, struct fsm *substitute) { | |||
1641 | ||||
1642 | struct fsm *outnet, *subnet2; | |||
1643 | struct fsm_read_handle *inh, *subh, *subh2; | |||
1644 | struct fsm_construct_handle *outh; | |||
1645 | char *subin, *subout; | |||
1646 | int i, repsym, source, target, in, out, addstate1, addstate2; | |||
1647 | ||||
1648 | fsm_merge_sigma(net, substitute); | |||
1649 | addstate1 = net->statecount; | |||
1650 | addstate2 = substitute->statecount; | |||
1651 | ||||
1652 | inh = fsm_read_init(net); | |||
1653 | subh = fsm_read_init(substitute); | |||
1654 | repsym = fsm_get_symbol_number(inh, original); | |||
1655 | if (repsym == -1) { | |||
1656 | fsm_read_done(inh); | |||
1657 | return(net); | |||
1658 | } | |||
1659 | outh = fsm_construct_init(net->name); | |||
1660 | fsm_construct_copy_sigma(outh, net->sigma); | |||
1661 | while (fsm_get_next_arc(inh)) { | |||
1662 | source = fsm_get_arc_source(inh); | |||
1663 | target = fsm_get_arc_target(inh); | |||
1664 | in = fsm_get_arc_num_in(inh); | |||
1665 | out = fsm_get_arc_num_out(inh); | |||
1666 | ||||
1667 | /* Double-sided arc, splice in substitute network */ | |||
1668 | if (in == repsym && out == repsym) { | |||
1669 | fsm_read_reset(subh); | |||
1670 | fsm_construct_add_arc_nums(outh, source, addstate1, EPSILON0, EPSILON0); | |||
1671 | while (fsm_get_next_arc(subh)) { | |||
1672 | source = fsm_get_arc_source(subh); | |||
1673 | target = fsm_get_arc_target(subh); | |||
1674 | subin = fsm_get_arc_in(subh); | |||
1675 | subout = fsm_get_arc_out(subh); | |||
1676 | fsm_construct_add_arc(outh, source+addstate1, target+addstate1, subin, subout); | |||
1677 | } | |||
1678 | while ((i = fsm_get_next_final(subh)) != -1) { | |||
1679 | target = fsm_get_arc_target(inh); | |||
1680 | fsm_construct_add_arc_nums(outh, addstate1+i, target, EPSILON0, EPSILON0); | |||
1681 | } | |||
1682 | addstate1 = addstate1 + addstate2; | |||
1683 | /* One-sided replace, splice in repsym .x. sub or sub .x. repsym */ | |||
1684 | } else if (in == repsym || out == repsym) { | |||
1685 | if (in == repsym) { | |||
1686 | subnet2 = fsm_minimize(fsm_cross_product(fsm_copy(substitute), fsm_symbol(fsm_get_arc_out(inh)))); | |||
1687 | } else { | |||
1688 | subnet2 = fsm_minimize(fsm_cross_product(fsm_symbol(fsm_get_arc_in(inh)),fsm_copy(substitute))); | |||
1689 | } | |||
1690 | fsm_construct_add_arc_nums(outh, source, addstate1, EPSILON0, EPSILON0); | |||
1691 | subh2 = fsm_read_init(subnet2); | |||
1692 | while (fsm_get_next_arc(subh2)) { | |||
1693 | source = fsm_get_arc_source(subh2); | |||
1694 | target = fsm_get_arc_target(subh2); | |||
1695 | subin = fsm_get_arc_in(subh2); | |||
1696 | subout = fsm_get_arc_out(subh2); | |||
1697 | fsm_construct_add_arc(outh, source+addstate1, target+addstate1, subin, subout); | |||
1698 | } | |||
1699 | while ((i = fsm_get_next_final(subh2)) != -1) { | |||
1700 | target = fsm_get_arc_target(inh); | |||
1701 | fsm_construct_add_arc_nums(outh, addstate1+i, target, EPSILON0, EPSILON0); | |||
1702 | } | |||
1703 | fsm_read_done(subh2); | |||
1704 | addstate1 = addstate1 + subnet2->statecount; | |||
1705 | fsm_destroy(subnet2); | |||
1706 | } else { | |||
1707 | /* Default, just copy arc */ | |||
1708 | fsm_construct_add_arc_nums(outh, source, target, in, out); | |||
1709 | } | |||
1710 | } | |||
1711 | ||||
1712 | while ((i = fsm_get_next_final(inh)) != -1) { | |||
1713 | fsm_construct_set_final(outh, i); | |||
1714 | } | |||
1715 | while ((i = fsm_get_next_initial(inh)) != -1) { | |||
1716 | fsm_construct_set_initial(outh, i); | |||
1717 | } | |||
1718 | fsm_read_done(inh); | |||
1719 | fsm_read_done(subh); | |||
1720 | outnet = fsm_construct_done(outh); | |||
1721 | return(outnet); | |||
1722 | } | |||
1723 | ||||
1724 | struct fsm *fsm_substitute_symbol(struct fsm *net, char *original, char *substitute) { | |||
1725 | struct fsm_state *fsm; | |||
1726 | int i,o,s = EPSILON0; | |||
1727 | if (strcmp(original,substitute) == 0) | |||
1728 | return(net); | |||
1729 | if ((o = sigma_find(original, net->sigma)) == -1) { | |||
1730 | //fprintf(stderr, "\nSymbol '%s' not found in network!\n", original); | |||
1731 | return(net); | |||
1732 | } | |||
1733 | if (strcmp(substitute,"0") == 0) | |||
1734 | s = EPSILON0; | |||
1735 | else if (substitute != NULL((void*)0) && (s = sigma_find(substitute, net->sigma)) == -1) { | |||
1736 | s = sigma_add(substitute, net->sigma); | |||
1737 | } | |||
1738 | for (i=0, fsm = net->states; (fsm+i)->state_no != -1; i++) { | |||
1739 | if ((fsm+i)->in == o) { | |||
1740 | (fsm+i)->in = s; | |||
1741 | } | |||
1742 | if ((fsm+i)->out == o) { | |||
1743 | (fsm+i)->out = s; | |||
1744 | } | |||
1745 | } | |||
1746 | net->sigma = sigma_remove(original, net->sigma); | |||
1747 | sigma_sort(net); | |||
1748 | fsm_update_flags(net, NO0, NO0, NO0, NO0, NO0, NO0); | |||
1749 | sigma_cleanup(net,0); | |||
1750 | /* if s = epsilon */ | |||
1751 | net->is_minimized = NO0; | |||
1752 | return(fsm_determinize(net)); | |||
1753 | } | |||
1754 | ||||
1755 | struct fsm *fsm_cross_product(struct fsm *net1, struct fsm *net2) { | |||
1756 | int i, a, b, current_state, current_start, current_final, target_number, symbol1, symbol2, epsilon = 0, unknown = 0; | |||
1757 | struct fsm_state *machine_a, *machine_b, *fsm; | |||
1758 | struct state_arr *point_a, *point_b; | |||
1759 | struct triplethash *th; | |||
1760 | ||||
1761 | /* Perform a cross product by running two machines in parallel */ | |||
1762 | /* The approach here allows a state to stay, creating a a:0 or 0:b transition */ | |||
1763 | /* with the a/b-state waiting, and the arc going to {a,stay} or {stay,b} */ | |||
1764 | /* the wait maneuver is only possible if the waiting state is final */ | |||
1765 | ||||
1766 | /* For the rewrite rules compilation, a different cross-product is used: */ | |||
1767 | /* rewrite_cp() synchronizes A and B as long as possible to get a unique */ | |||
1768 | /* output match for each cross product. */ | |||
1769 | ||||
1770 | /* This behavior where we postpone zeroes on either side and perform */ | |||
1771 | /* and equal length cross-product as long as possible and never intermix */ | |||
1772 | /* ?:0 and 0:? arcs (i.e. we keep both machines synchronized as long as possible */ | |||
1773 | /* can be done by [A .x. B] & ?:?* [?:0*|0:?*] at the cost of possibly */ | |||
1774 | /* up to three times larger transducers. */ | |||
1775 | /* This is very similar to the idea in "tristate composition" in fsm_compose() */ | |||
1776 | ||||
1777 | /* This function is only used for explicit cross products */ | |||
1778 | /* such as a:b or A.x.B, etc. In rewrite rules, we use rewrite_cp() */ | |||
1779 | ||||
1780 | net1 = fsm_minimize(net1); | |||
1781 | net2 = fsm_minimize(net2); | |||
1782 | ||||
1783 | fsm_merge_sigma(net1, net2); | |||
1784 | ||||
1785 | fsm_count(net1); | |||
1786 | fsm_count(net2); | |||
1787 | ||||
1788 | machine_a = net1->states; | |||
1789 | machine_b = net2->states; | |||
1790 | ||||
1791 | /* new state 0 = {0,0} */ | |||
1792 | ||||
1793 | STACK_2_PUSH(0,0)int_stack_push(0); int_stack_push(0);; | |||
1794 | ||||
1795 | th = triplet_hash_init(); | |||
1796 | triplet_hash_insert(th, 0, 0, 0); | |||
1797 | ||||
1798 | fsm_state_init(sigma_max(net1->sigma)); | |||
1799 | ||||
1800 | point_a = init_state_pointers(machine_a); | |||
1801 | point_b = init_state_pointers(machine_b); | |||
1802 | ||||
1803 | while (!int_stack_isempty()) { | |||
1804 | ||||
1805 | /* Get a pair of states to examine */ | |||
1806 | ||||
1807 | a = int_stack_pop(); | |||
1808 | b = int_stack_pop(); | |||
1809 | ||||
1810 | /* printf("Treating pair: {%i,%i}\n",a,b); */ | |||
1811 | ||||
1812 | current_state = triplet_hash_find(th, a, b, 0); | |||
1813 | current_start = (((point_a+a)->start == 1) && ((point_b+b)->start == 1)) ? 1 : 0; | |||
1814 | current_final = (((point_a+a)->final == 1) && ((point_b+b)->final == 1)) ? 1 : 0; | |||
1815 | ||||
1816 | fsm_state_set_current_state(current_state, current_final, current_start); | |||
1817 | ||||
1818 | for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) { | |||
1819 | for (machine_b = (point_b+b)->transitions; machine_b->state_no == b ; machine_b++) { | |||
1820 | ||||
1821 | if ((machine_a->target == -1) && (machine_b->target == -1)) { | |||
1822 | continue; | |||
1823 | } | |||
1824 | if ((machine_a->target == -1) && (machine_a->final_state == 0)) { | |||
1825 | continue; | |||
1826 | } | |||
1827 | if ((machine_b->target == -1) && (machine_b->final_state == 0)) { | |||
1828 | continue; | |||
1829 | } | |||
1830 | /* Main check */ | |||
1831 | if (!((machine_a->target == -1) || (machine_b->target == -1))) { | |||
1832 | if ((target_number = triplet_hash_find(th, machine_a->target, machine_b->target, 0)) == -1) { | |||
1833 | STACK_2_PUSH(machine_b->target, machine_a->target)int_stack_push(machine_b->target); int_stack_push(machine_a ->target);; | |||
1834 | target_number = triplet_hash_insert(th, machine_a->target, machine_b->target, 0); | |||
1835 | } | |||
1836 | symbol1 = machine_a->in; | |||
1837 | symbol2 = machine_b->in; | |||
1838 | if (symbol1 == IDENTITY2 && symbol2 != IDENTITY2) | |||
1839 | symbol1 = UNKNOWN1; | |||
1840 | if (symbol2 == IDENTITY2 && symbol1 != IDENTITY2) | |||
1841 | symbol2 = UNKNOWN1; | |||
1842 | ||||
1843 | fsm_state_add_arc(current_state, symbol1, symbol2, target_number, current_final, current_start); | |||
1844 | /* @:@ -> @:@ and also ?:? */ | |||
1845 | if ((machine_a->in == IDENTITY2) && (machine_b->in == IDENTITY2)) { | |||
1846 | fsm_state_add_arc(current_state, UNKNOWN1, UNKNOWN1, target_number, current_final, current_start); | |||
1847 | } | |||
1848 | } | |||
1849 | if (machine_a->final_state == 1 && machine_b->target != -1) { | |||
1850 | ||||
1851 | /* Add 0:b i.e. stay in state A */ | |||
1852 | if ((target_number = triplet_hash_find(th, machine_a->state_no, machine_b->target, 0)) == -1) { | |||
1853 | STACK_2_PUSH(machine_b->target, machine_a->state_no)int_stack_push(machine_b->target); int_stack_push(machine_a ->state_no);; | |||
1854 | target_number = triplet_hash_insert(th, machine_a->state_no, machine_b->target, 0); | |||
1855 | } | |||
1856 | /* @:0 becomes ?:0 */ | |||
1857 | symbol2 = machine_b->in == IDENTITY2 ? UNKNOWN1 : machine_b->in; | |||
1858 | fsm_state_add_arc(current_state, EPSILON0, symbol2, target_number, current_final, current_start); | |||
1859 | } | |||
1860 | ||||
1861 | if (machine_b->final_state == 1 && machine_a->target != -1) { | |||
1862 | ||||
1863 | /* Add a:0 i.e. stay in state B */ | |||
1864 | if ((target_number = triplet_hash_find(th, machine_a->target, machine_b->state_no, 0)) == -1) { | |||
1865 | STACK_2_PUSH(machine_b->state_no, machine_a->target)int_stack_push(machine_b->state_no); int_stack_push(machine_a ->target);; | |||
1866 | target_number = triplet_hash_insert(th, machine_a->target, machine_b->state_no, 0); | |||
1867 | } | |||
1868 | /* @:0 becomes ?:0 */ | |||
1869 | symbol1 = machine_a->in == IDENTITY2 ? UNKNOWN1 : machine_a->in; | |||
1870 | fsm_state_add_arc(current_state, symbol1, EPSILON0, target_number, current_final, current_start); | |||
1871 | } | |||
1872 | } | |||
1873 | } | |||
1874 | /* Check arctrack */ | |||
1875 | fsm_state_end_state(); | |||
1876 | } | |||
1877 | ||||
1878 | free(net1->states); | |||
1879 | fsm_state_close(net1); | |||
1880 | ||||
1881 | for (i=0, fsm = net1->states; (fsm+i)->state_no != -1; i++) { | |||
1882 | if (((fsm+i)->in == EPSILON0) || ((fsm+i)->out == EPSILON0)) | |||
1883 | epsilon = 1; | |||
1884 | if (((fsm+i)->in == UNKNOWN1) || ((fsm+i)->out == UNKNOWN1)) | |||
1885 | unknown = 1; | |||
1886 | } | |||
1887 | if (epsilon == 1) { | |||
1888 | if (sigma_find_number(EPSILON0, net1->sigma) == -1) { | |||
1889 | sigma_add_special(EPSILON0, net1->sigma); | |||
1890 | } | |||
1891 | } | |||
1892 | if (unknown == 1) { | |||
1893 | if (sigma_find_number(UNKNOWN1, net1->sigma) == -1) { | |||
1894 | sigma_add_special(UNKNOWN1, net1->sigma); | |||
1895 | } | |||
1896 | } | |||
1897 | free(point_a); | |||
1898 | free(point_b); | |||
1899 | fsm_destroy(net2); | |||
1900 | triplet_hash_free(th); | |||
1901 | return(fsm_coaccessible(net1)); | |||
1902 | } | |||
1903 | ||||
1904 | struct fsm *fsm_precedes(struct fsm *net1, struct fsm *net2) { | |||
1905 | return(fsm_complement(fsm_minimize(fsm_contains(fsm_minimize(fsm_concat(fsm_minimize(fsm_copy(net2)),fsm_concat(fsm_universal(),fsm_minimize(fsm_copy(net1))))))))); | |||
1906 | } | |||
1907 | ||||
1908 | struct fsm *fsm_follows(struct fsm *net1, struct fsm *net2) { | |||
1909 | return(fsm_complement(fsm_minimize(fsm_contains(fsm_minimize(fsm_concat(fsm_minimize(fsm_copy(net1)),fsm_concat(fsm_universal(),fsm_minimize(fsm_copy(net2))))))))); | |||
1910 | } | |||
1911 | ||||
1912 | struct fsm *fsm_unflatten(struct fsm *net, char *epsilon_sym, char *repeat_sym) { | |||
1913 | int a, b, current_state, current_start, current_final, target_number, epsilon, repeat, in, out; | |||
1914 | struct fsm_state *even_state, *odd_state; | |||
1915 | struct state_arr *point_a; | |||
1916 | struct triplethash *th; | |||
1917 | ||||
1918 | fsm_minimize(net); | |||
1919 | fsm_count(net); | |||
1920 | ||||
1921 | epsilon = sigma_find(epsilon_sym, net->sigma); | |||
1922 | repeat = sigma_find(repeat_sym, net->sigma); | |||
1923 | ||||
1924 | even_state = net->states; | |||
1925 | ||||
1926 | /* new state 0 = {0,0} */ | |||
1927 | ||||
1928 | STACK_2_PUSH(0,0)int_stack_push(0); int_stack_push(0);; | |||
1929 | ||||
1930 | th = triplet_hash_init(); | |||
1931 | triplet_hash_insert(th, 0, 0, 0); | |||
1932 | ||||
1933 | fsm_state_init(sigma_max(net->sigma)); | |||
1934 | ||||
1935 | point_a = init_state_pointers(even_state); | |||
1936 | ||||
1937 | while (!int_stack_isempty()) { | |||
1938 | ||||
1939 | /* Get a pair of states to examine */ | |||
1940 | ||||
1941 | a = int_stack_pop(); | |||
1942 | a = int_stack_pop(); | |||
1943 | ||||
1944 | /* printf("Treating pair: {%i,%i}\n",a,b); */ | |||
1945 | ||||
1946 | current_state = triplet_hash_find(th, a, a, 0); | |||
1947 | current_start = ((point_a+a)->start == 1) ? 1 : 0; | |||
1948 | current_final = ((point_a+a)->final == 1) ? 1 : 0; | |||
1949 | ||||
1950 | fsm_state_set_current_state(current_state, current_final, current_start); | |||
1951 | ||||
1952 | for (even_state = (point_a+a)->transitions; even_state->state_no == a; even_state++) { | |||
1953 | if (even_state->target == -1) { | |||
1954 | continue; | |||
1955 | } | |||
1956 | in = even_state->in; | |||
1957 | b = even_state->target; | |||
1958 | for (odd_state = (point_a+b)->transitions; odd_state->state_no == b; odd_state++) { | |||
1959 | if (odd_state->target == -1) { | |||
1960 | continue; | |||
1961 | } | |||
1962 | if ((target_number = triplet_hash_find(th, odd_state->target, odd_state->target, 0)) == -1) { | |||
1963 | STACK_2_PUSH(odd_state->target, odd_state->target)int_stack_push(odd_state->target); int_stack_push(odd_state ->target);; | |||
1964 | target_number = triplet_hash_insert(th, odd_state->target, odd_state->target, 0); | |||
1965 | } | |||
1966 | in = even_state->in; | |||
1967 | out = odd_state->in; | |||
1968 | if (out == repeat) { | |||
1969 | out = in; | |||
1970 | } else if (in == IDENTITY2 || out == IDENTITY2) { | |||
1971 | in = in == IDENTITY2 ? UNKNOWN1 : in; | |||
1972 | out = out == IDENTITY2 ? UNKNOWN1 : out; | |||
1973 | } | |||
1974 | if (in == epsilon) { | |||
1975 | in = EPSILON0; | |||
1976 | } | |||
1977 | if (out == epsilon) { | |||
1978 | out = EPSILON0; | |||
1979 | } | |||
1980 | fsm_state_add_arc(current_state, in, out, target_number, current_final, current_start); | |||
1981 | } | |||
1982 | } | |||
1983 | fsm_state_end_state(); | |||
1984 | } | |||
1985 | free(net->states); | |||
1986 | fsm_state_close(net); | |||
1987 | free(point_a); | |||
1988 | triplet_hash_free(th); | |||
1989 | return(net); | |||
1990 | } | |||
1991 | ||||
1992 | ||||
1993 | struct fsm *fsm_shuffle(struct fsm *net1, struct fsm *net2) { | |||
1994 | int a, b, current_state, current_start, current_final, target_number; | |||
1995 | struct fsm_state *machine_a, *machine_b; | |||
1996 | struct state_arr *point_a, *point_b; | |||
1997 | struct triplethash *th; | |||
1998 | ||||
1999 | /* Shuffle A and B by making alternatively A move and B stay at each or */ | |||
2000 | /* vice versa at each step */ | |||
2001 | ||||
2002 | fsm_minimize(net1); | |||
2003 | fsm_minimize(net2); | |||
2004 | ||||
2005 | fsm_merge_sigma(net1, net2); | |||
2006 | ||||
2007 | fsm_count(net1); | |||
2008 | fsm_count(net2); | |||
2009 | ||||
2010 | machine_a = net1->states; | |||
2011 | machine_b = net2->states; | |||
2012 | ||||
2013 | /* new state 0 = {0,0} */ | |||
2014 | ||||
2015 | STACK_2_PUSH(0,0)int_stack_push(0); int_stack_push(0);; | |||
2016 | ||||
2017 | th = triplet_hash_init(); | |||
2018 | triplet_hash_insert(th, 0, 0, 0); | |||
2019 | ||||
2020 | fsm_state_init(sigma_max(net1->sigma)); | |||
2021 | ||||
2022 | point_a = init_state_pointers(machine_a); | |||
2023 | point_b = init_state_pointers(machine_b); | |||
2024 | ||||
2025 | while (!int_stack_isempty()) { | |||
2026 | ||||
2027 | /* Get a pair of states to examine */ | |||
2028 | ||||
2029 | a = int_stack_pop(); | |||
2030 | b = int_stack_pop(); | |||
2031 | ||||
2032 | /* printf("Treating pair: {%i,%i}\n",a,b); */ | |||
2033 | ||||
2034 | current_state = triplet_hash_find(th, a, b, 0); | |||
2035 | current_start = (((point_a+a)->start == 1) && ((point_b+b)->start == 1)) ? 1 : 0; | |||
2036 | current_final = (((point_a+a)->final == 1) && ((point_b+b)->final == 1)) ? 1 : 0; | |||
2037 | ||||
2038 | fsm_state_set_current_state(current_state, current_final, current_start); | |||
2039 | ||||
2040 | /* Follow A, B stays */ | |||
2041 | for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) { | |||
2042 | if (machine_a->target == -1) { | |||
2043 | continue; | |||
2044 | } | |||
2045 | if ((target_number = triplet_hash_find(th, machine_a->target, b, 0)) == -1) { | |||
2046 | STACK_2_PUSH(b, machine_a->target)int_stack_push(b); int_stack_push(machine_a->target);; | |||
2047 | target_number = triplet_hash_insert(th, machine_a->target, b, 0); | |||
2048 | } | |||
2049 | ||||
2050 | fsm_state_add_arc(current_state, machine_a->in, machine_a->out, target_number, current_final, current_start); | |||
2051 | } | |||
2052 | ||||
2053 | /* Follow B, A stays */ | |||
2054 | for (machine_b = (point_b+b)->transitions; machine_b->state_no == b ; machine_b++) { | |||
2055 | ||||
2056 | if (machine_b->target == -1) { | |||
2057 | continue; | |||
2058 | } | |||
2059 | ||||
2060 | if ((target_number = triplet_hash_find(th, a, machine_b->target, 0)) == -1) { | |||
2061 | STACK_2_PUSH(machine_b->target, a)int_stack_push(machine_b->target); int_stack_push(a);; | |||
2062 | target_number = triplet_hash_insert(th, a, machine_b->target, 0); | |||
2063 | } | |||
2064 | fsm_state_add_arc(current_state, machine_b->in, machine_b->out, target_number, current_final, current_start); | |||
2065 | } | |||
2066 | ||||
2067 | /* Check arctrack */ | |||
2068 | fsm_state_end_state(); | |||
2069 | } | |||
2070 | ||||
2071 | free(net1->states); | |||
2072 | fsm_state_close(net1); | |||
2073 | free(point_a); | |||
2074 | free(point_b); | |||
2075 | fsm_destroy(net2); | |||
2076 | triplet_hash_free(th); | |||
2077 | return(net1); | |||
2078 | } | |||
2079 | ||||
2080 | int fsm_equivalent(struct fsm *net1, struct fsm *net2) { | |||
2081 | /* Test path equivalence of two FSMs by traversing both in parallel */ | |||
2082 | int a, b, matching_arc, equivalent; | |||
2083 | struct fsm_state *machine_a, *machine_b; | |||
2084 | struct state_arr *point_a, *point_b; | |||
2085 | struct triplethash *th; | |||
2086 | ||||
2087 | fsm_merge_sigma(net1, net2); | |||
2088 | ||||
2089 | fsm_count(net1); | |||
2090 | fsm_count(net2); | |||
2091 | ||||
2092 | machine_a = net1->states; | |||
2093 | machine_b = net2->states; | |||
2094 | ||||
2095 | equivalent = 0; | |||
2096 | /* new state 0 = {0,0} */ | |||
2097 | STACK_2_PUSH(0,0)int_stack_push(0); int_stack_push(0);; | |||
2098 | ||||
2099 | th = triplet_hash_init(); | |||
2100 | triplet_hash_insert(th, 0, 0, 0); | |||
2101 | ||||
2102 | point_a = init_state_pointers(machine_a); | |||
2103 | point_b = init_state_pointers(machine_b); | |||
2104 | ||||
2105 | while (!int_stack_isempty()) { | |||
2106 | ||||
2107 | /* Get a pair of states to examine */ | |||
2108 | ||||
2109 | a = int_stack_pop(); | |||
2110 | b = int_stack_pop(); | |||
2111 | ||||
2112 | if ((point_a+a)->final != (point_b+b)->final) { | |||
2113 | goto not_equivalent; | |||
2114 | } | |||
2115 | /* Check that all arcs in A have matching arc in B, push new state pair on stack */ | |||
2116 | for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) { | |||
2117 | if (machine_a->target == -1) { | |||
2118 | break; | |||
2119 | } | |||
2120 | matching_arc = 0; | |||
2121 | for (machine_b = (point_b+b)->transitions; machine_b->state_no == b ; machine_b++) { | |||
2122 | if (machine_b->target == -1) { | |||
2123 | break; | |||
2124 | } | |||
2125 | if (machine_a->in == machine_b->in && machine_a->out == machine_b->out) { | |||
2126 | matching_arc = 1; | |||
2127 | if ((triplet_hash_find(th, machine_a->target, machine_b->target, 0)) == -1) { | |||
2128 | STACK_2_PUSH(machine_b->target, machine_a->target)int_stack_push(machine_b->target); int_stack_push(machine_a ->target);; | |||
2129 | triplet_hash_insert(th, machine_a->target, machine_b->target, 0); | |||
2130 | } | |||
2131 | break; | |||
2132 | } | |||
2133 | } | |||
2134 | if (matching_arc == 0) { | |||
2135 | goto not_equivalent; | |||
2136 | } | |||
2137 | } | |||
2138 | for (machine_b = (point_b+b)->transitions; machine_b->state_no == b ; machine_b++) { | |||
2139 | if (machine_b->target == -1) { | |||
2140 | break; | |||
2141 | } | |||
2142 | matching_arc = 0; | |||
2143 | for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) { | |||
2144 | if (machine_a->in == machine_b->in && machine_a->out == machine_b->out) { | |||
2145 | matching_arc = 1; | |||
2146 | break; | |||
2147 | } | |||
2148 | } | |||
2149 | if (matching_arc == 0) { | |||
2150 | goto not_equivalent; | |||
2151 | } | |||
2152 | } | |||
2153 | } | |||
2154 | equivalent = 1; | |||
2155 | not_equivalent: | |||
2156 | fsm_destroy(net1); | |||
2157 | fsm_destroy(net2); | |||
2158 | free(point_a); | |||
2159 | free(point_b); | |||
2160 | triplet_hash_free(th); | |||
2161 | return(equivalent); | |||
2162 | } | |||
2163 | ||||
2164 | ||||
2165 | struct fsm *fsm_minus(struct fsm *net1, struct fsm *net2) { | |||
2166 | int a, b, current_state, current_start, current_final, target_number, b_has_trans, btarget, statecount; | |||
2167 | struct fsm_state *machine_a, *machine_b; | |||
2168 | struct state_arr *point_a, *point_b; | |||
2169 | struct triplethash *th; | |||
2170 | statecount = 0; | |||
2171 | ||||
2172 | net1 = fsm_minimize(net1); | |||
2173 | net2 = fsm_minimize(net2); | |||
2174 | ||||
2175 | fsm_merge_sigma(net1, net2); | |||
2176 | ||||
2177 | fsm_count(net1); | |||
2178 | fsm_count(net2); | |||
2179 | ||||
2180 | machine_a = net1->states; | |||
2181 | machine_b = net2->states; | |||
2182 | ||||
2183 | /* new state 0 = {1,1} */ | |||
2184 | ||||
2185 | int_stack_clear(); | |||
2186 | STACK_2_PUSH(1,1)int_stack_push(1); int_stack_push(1);; | |||
2187 | ||||
2188 | th = triplet_hash_init(); | |||
2189 | triplet_hash_insert(th, 1, 1, 0); | |||
2190 | ||||
2191 | point_a = init_state_pointers(machine_a); | |||
2192 | point_b = init_state_pointers(machine_b); | |||
2193 | ||||
2194 | fsm_state_init(sigma_max(net1->sigma)); | |||
2195 | ||||
2196 | while (!int_stack_isempty()) { | |||
2197 | statecount++; | |||
2198 | /* Get a pair of states to examine */ | |||
2199 | ||||
2200 | a = int_stack_pop(); | |||
2201 | b = int_stack_pop(); | |||
2202 | ||||
2203 | current_state = triplet_hash_find(th, a, b, 0); | |||
2204 | a--; | |||
2205 | b--; | |||
2206 | ||||
2207 | if (b == -1) { | |||
2208 | current_start = 0; | |||
2209 | current_final = (point_a+a)->final; | |||
2210 | } else { | |||
2211 | current_start = (a == 0 && b == 0) ? 1 : 0; | |||
2212 | current_final = (((point_a+a)->final == 1) && ((point_b+b)->final == 0)) ? 1 : 0; | |||
2213 | } | |||
2214 | ||||
2215 | fsm_state_set_current_state(current_state, current_final, current_start); | |||
2216 | ||||
2217 | for (machine_a = (point_a+a)->transitions ; machine_a->state_no == a ; machine_a++) { | |||
2218 | if (machine_a->target == -1) { | |||
2219 | break; | |||
2220 | continue; | |||
2221 | } | |||
2222 | if (b == -1) { | |||
2223 | /* b is dead */ | |||
2224 | if ((target_number = triplet_hash_find(th, (machine_a->target)+1, 0, 0)) == -1) { | |||
2225 | STACK_2_PUSH(0, (machine_a->target)+1)int_stack_push(0); int_stack_push((machine_a->target)+1);; | |||
2226 | target_number = triplet_hash_insert(th, (machine_a->target)+1, 0, 0); | |||
2227 | } | |||
2228 | } else { | |||
2229 | /* b is alive */ | |||
2230 | b_has_trans = 0; | |||
2231 | for (machine_b = (point_b+b)->transitions ; machine_b->state_no == b ; machine_b++) { | |||
2232 | if (machine_a->in == machine_b->in && machine_a->out == machine_b->out) { | |||
2233 | b_has_trans = 1; | |||
2234 | btarget = machine_b->target; | |||
2235 | break; | |||
2236 | } | |||
2237 | } | |||
2238 | if (b_has_trans) { | |||
2239 | if ((target_number = triplet_hash_find(th, (machine_a->target)+1, btarget+1, 0)) == -1) { | |||
2240 | STACK_2_PUSH(btarget+1, (machine_a->target)+1)int_stack_push(btarget+1); int_stack_push((machine_a->target )+1);; | |||
2241 | target_number = triplet_hash_insert(th, (machine_a->target)+1, (machine_b->target)+1, 0); | |||
2242 | } | |||
2243 | } else { | |||
2244 | /* b is dead */ | |||
2245 | if ((target_number = triplet_hash_find(th, (machine_a->target)+1, 0, 0)) == -1) { | |||
2246 | STACK_2_PUSH(0, (machine_a->target)+1)int_stack_push(0); int_stack_push((machine_a->target)+1);; | |||
2247 | target_number = triplet_hash_insert(th, (machine_a->target)+1, 0, 0); | |||
2248 | } | |||
2249 | } | |||
2250 | } | |||
2251 | fsm_state_add_arc(current_state, machine_a->in, machine_a->out, target_number, current_final, current_start); | |||
2252 | } | |||
2253 | fsm_state_end_state(); | |||
2254 | } | |||
2255 | ||||
2256 | free(net1->states); | |||
2257 | fsm_state_close(net1); | |||
2258 | free(point_a); | |||
2259 | free(point_b); | |||
2260 | fsm_destroy(net2); | |||
2261 | triplet_hash_free(th); | |||
2262 | return(fsm_minimize(net1)); | |||
2263 | } | |||
2264 | ||||
2265 | struct fsm *fsm_contains(struct fsm *net) { | |||
2266 | /* [?* A ?*] */ | |||
2267 | struct fsm *net2; | |||
2268 | ||||
2269 | net2 = fsm_concat(fsm_concat(fsm_universal(),net),fsm_universal()); | |||
2270 | return(net2); | |||
2271 | } | |||
2272 | ||||
2273 | struct fsm *fsm_universal() { | |||
2274 | struct fsm *net; | |||
2275 | int s; | |||
2276 | net = fsm_create(""); | |||
2277 | fsm_update_flags(net, YES1, YES1, YES1, YES1, NO0, NO0); | |||
2278 | net->states = malloc(sizeof(struct fsm_state)*2); | |||
2279 | s = sigma_add_special(IDENTITY2,net->sigma); | |||
2280 | add_fsm_arc(net->states, 0, 0, s, s, 0, 1, 1); | |||
2281 | add_fsm_arc(net->states, 1, -1, -1, -1, -1, -1, -1); | |||
2282 | net->arccount = 1; | |||
2283 | net->statecount = 1; | |||
2284 | net->linecount = 2; | |||
2285 | net->finalcount = 1; | |||
2286 | net->pathcount = PATHCOUNT_CYCLIC-1; | |||
2287 | return(net); | |||
2288 | } | |||
2289 | ||||
2290 | struct fsm *fsm_contains_one(struct fsm *net) { | |||
2291 | /* $A - $[[?+ A ?* & A ?*] | [A ?+ & A]] */ | |||
2292 | struct fsm *ret; | |||
2293 | ret = fsm_minus(fsm_contains(fsm_copy(net)),fsm_contains(fsm_union(fsm_intersect(fsm_concat(fsm_kleene_plus(fsm_identity()),fsm_concat(fsm_copy(net),fsm_universal())) , fsm_concat(fsm_copy(net),fsm_universal())),fsm_intersect(fsm_concat(fsm_copy(net),fsm_kleene_plus(fsm_identity())), fsm_copy(net))))); | |||
2294 | fsm_destroy(net); | |||
2295 | return(ret); | |||
2296 | } | |||
2297 | ||||
2298 | struct fsm *fsm_contains_opt_one(struct fsm *net) { | |||
2299 | /* $.A | ~$A */ | |||
2300 | struct fsm *ret; | |||
2301 | ret = fsm_union(fsm_contains_one(fsm_copy(net)),fsm_complement(fsm_contains(fsm_copy(net)))); | |||
2302 | fsm_destroy(net); | |||
2303 | return(ret); | |||
2304 | } | |||
2305 | ||||
2306 | struct fsm *fsm_simple_replace(struct fsm *net1, struct fsm *net2) { | |||
2307 | /* [~[?* [A-0] ?*] [A.x.B]]* ~[?* [A-0] ?*] */ | |||
2308 | ||||
2309 | struct fsm *UPlus, *ret; | |||
2310 | UPlus = fsm_minimize(fsm_kleene_plus(fsm_identity())); | |||
2311 | ret = fsm_concat(fsm_minimize(fsm_kleene_star(fsm_minimize(fsm_concat(fsm_complement(fsm_minimize(fsm_concat(fsm_concat(fsm_universal(),fsm_minimize(fsm_intersect(fsm_copy(net1),fsm_copy(UPlus)))),fsm_universal()))),fsm_minimize(fsm_cross_product(fsm_copy(net1),fsm_copy(net2))))))),fsm_minimize(fsm_complement(fsm_minimize(fsm_concat(fsm_concat(fsm_universal(), fsm_intersect(fsm_copy(net1),fsm_copy(UPlus))),fsm_universal()))))); | |||
2312 | fsm_destroy(net1); | |||
2313 | fsm_destroy(net2); | |||
2314 | fsm_destroy(UPlus); | |||
2315 | return(ret); | |||
2316 | } | |||
2317 | ||||
2318 | struct fsm *fsm_priority_union_upper(struct fsm *net1, struct fsm *net2) { | |||
2319 | /* A .P. B = A | [~[A.u] .o. B] */ | |||
2320 | struct fsm *ret; | |||
2321 | ret = fsm_union(fsm_copy(net1),fsm_compose(fsm_complement(fsm_upper(fsm_copy(net1))),net2)); | |||
2322 | fsm_destroy(net1); | |||
2323 | return(ret); | |||
2324 | } | |||
2325 | ||||
2326 | struct fsm *fsm_priority_union_lower(struct fsm *net1, struct fsm *net2) { | |||
2327 | /* A .p. B = A | B .o. ~[A.l] */ | |||
2328 | struct fsm *ret; | |||
2329 | ret = fsm_union(fsm_copy(net1),fsm_compose(net2,fsm_complement(fsm_lower(fsm_copy(net1))))); | |||
2330 | fsm_destroy(net1); | |||
2331 | return(ret); | |||
2332 | } | |||
2333 | ||||
2334 | struct fsm *fsm_lenient_compose(struct fsm *net1, struct fsm *net2) { | |||
2335 | /* A .O. B = [A .o. B] .P. B */ | |||
2336 | struct fsm *ret; | |||
2337 | ret = fsm_priority_union_upper(fsm_compose(fsm_copy(net1),net2),fsm_copy(net1)); | |||
2338 | fsm_destroy(net1); | |||
2339 | return(ret); | |||
2340 | } | |||
2341 | ||||
2342 | struct fsm *fsm_term_negation(struct fsm *net1) { | |||
2343 | return(fsm_intersect(fsm_identity(),fsm_complement(net1))); | |||
2344 | } | |||
2345 | ||||
2346 | struct fsm *fsm_quotient_interleave(struct fsm *net1, struct fsm *net2) { | |||
2347 | /* A/\/B = The set of strings you can interleave in B and get a string from A */ | |||
2348 | /* [B/[x \x* x] & A/x .o. [[[\x]:0]* (x:0 \x* x:0)]*].l */ | |||
2349 | struct fsm *Result; | |||
2350 | Result = fsm_lower(fsm_compose(fsm_intersect(fsm_ignore(net2,fsm_concat(fsm_symbol("@>@"),fsm_concat(fsm_kleene_star(fsm_term_negation(fsm_symbol("@>@"))),fsm_symbol("@>@"))),OP_IGNORE_ALL1),fsm_ignore(net1,fsm_symbol("@>@"),OP_IGNORE_ALL1)),fsm_kleene_star(fsm_concat(fsm_kleene_star(fsm_cross_product(fsm_term_negation(fsm_symbol("@>@")),fsm_empty_string())),fsm_optionality(fsm_concat(fsm_cross_product(fsm_symbol("@>@"),fsm_empty_string()),fsm_concat(fsm_kleene_star(fsm_term_negation(fsm_symbol("@>@"))),fsm_cross_product(fsm_symbol("@>@"),fsm_empty_string())))))))); | |||
2351 | ||||
2352 | Result->sigma = sigma_remove("@>@",Result->sigma); | |||
2353 | /* Could clean up sigma */ | |||
2354 | return(Result); | |||
2355 | } | |||
2356 | ||||
2357 | struct fsm *fsm_quotient_left(struct fsm *net1, struct fsm *net2) { | |||
2358 | /* A\\\B = [B .o. A:0 ?*].l; */ | |||
2359 | /* A\\\B = the set of suffixes you can add to A to get a string in B */ | |||
2360 | struct fsm *Result; | |||
2361 | Result = fsm_lower(fsm_compose(net2,fsm_concat(fsm_cross_product(net1,fsm_empty_string()),fsm_universal()))); | |||
2362 | return(Result); | |||
2363 | } | |||
2364 | ||||
2365 | struct fsm *fsm_quotient_right(struct fsm *net1, struct fsm *net2) { | |||
2366 | struct fsm *Result; | |||
2367 | ||||
2368 | /* A///B = [A .o. ?* B:0].l; */ | |||
2369 | /* A///B = the set of prefixes you can add to B to get strings in A */ | |||
2370 | Result = fsm_lower(fsm_compose(net1, fsm_concat(fsm_universal(),fsm_cross_product(net2,fsm_empty_string())))); | |||
2371 | return(Result); | |||
2372 | } | |||
2373 | ||||
2374 | struct fsm *fsm_ignore(struct fsm *net1, struct fsm *net2, int operation) { | |||
2375 | struct fsm_state *fsm1, *fsm2, *new_fsm; | |||
2376 | struct fsm *Result; | |||
2377 | short *handled_states1, *handled_states2; | |||
2378 | int i, j, k, state_add_counter = 0, malloc_size, splices = 0, returns, target, splice_size, start_splice, states1, states2, lines1, lines2, *return_state; | |||
2379 | ||||
2380 | net1 = fsm_minimize(net1); | |||
2381 | net2 = fsm_minimize(net2); | |||
2382 | ||||
2383 | if (fsm_isempty(net2)) { | |||
2384 | fsm_destroy(net2); | |||
2385 | return(net1); | |||
2386 | } | |||
2387 | fsm_merge_sigma(net1, net2); | |||
2388 | ||||
2389 | fsm_count(net1); | |||
2390 | fsm_count(net2); | |||
2391 | ||||
2392 | states1 = net1->statecount; | |||
2393 | states2 = net2->statecount; | |||
2394 | lines1 = net1->linecount; | |||
2395 | lines2 = net2->linecount; | |||
2396 | fsm1 = net1->states; | |||
2397 | fsm2 = net2->states; | |||
2398 | ||||
2399 | if (operation == OP_IGNORE_INTERNAL2) { | |||
2400 | Result = fsm_lower(fsm_compose(fsm_ignore(fsm_copy(net1),fsm_symbol("@i<@"),OP_IGNORE_ALL1),fsm_compose(fsm_complement(fsm_union(fsm_concat(fsm_symbol("@i<@"),fsm_universal()),fsm_concat(fsm_universal(),fsm_symbol("@i<@")))),fsm_simple_replace(fsm_symbol("@i<@"),fsm_copy(net2))))); | |||
2401 | Result->sigma = sigma_remove("@i<@",Result->sigma); | |||
2402 | fsm_destroy(net1); | |||
2403 | fsm_destroy(net2); | |||
2404 | return(Result); | |||
2405 | } | |||
2406 | ||||
2407 | malloc_size = lines1 + (states1 * (lines2 + net2->finalcount + 1)); | |||
2408 | new_fsm = malloc(sizeof(struct fsm_state)*(malloc_size+1)); | |||
2409 | ||||
2410 | /* Mark if a state has been handled with ignore */ | |||
2411 | handled_states1 = malloc(sizeof(short)*states1); | |||
2412 | handled_states2 = malloc(sizeof(short)*states2); | |||
2413 | ||||
2414 | /* Mark which ignores return to which state */ | |||
2415 | return_state = malloc(sizeof(int)*states1); | |||
2416 | splice_size = states2; | |||
2417 | start_splice = states1; | |||
2418 | for (k=0; k<states1; k++) | |||
2419 | *(handled_states1+k) = 0; | |||
2420 | ||||
2421 | for (i=0, j=0; (fsm1+i)->state_no != -1; i++) { | |||
2422 | if (*(handled_states1+(fsm1+i)->state_no) == 0) { | |||
2423 | target = start_splice + splices * splice_size; | |||
2424 | add_fsm_arc(new_fsm, j, (fsm1+i)->state_no, EPSILON0, EPSILON0, target, (fsm1+i)->final_state, (fsm1+i)->start_state); | |||
2425 | *(return_state+splices) = (fsm1+i)->state_no; | |||
2426 | *(handled_states1+(fsm1+i)->state_no) = 1; | |||
2427 | j++; | |||
2428 | splices++; | |||
2429 | if ((fsm1+i)->in != -1) { | |||
2430 | add_fsm_arc(new_fsm, j, (fsm1+i)->state_no, (fsm1+i)->in, (fsm1+i)->out, (fsm1+i)->target, (fsm1+i)->final_state, (fsm1+i)->start_state); | |||
2431 | j++; | |||
2432 | } | |||
2433 | } else { | |||
2434 | add_fsm_arc(new_fsm, j, (fsm1+i)->state_no, (fsm1+i)->in, (fsm1+i)->out, (fsm1+i)->target, (fsm1+i)->final_state, (fsm1+i)->start_state); | |||
2435 | j++; | |||
2436 | } | |||
2437 | } | |||
2438 | ||||
2439 | /* Add a sequence of fsm2s at the end, with arcs back to the appropriate states */ | |||
2440 | ||||
2441 | state_add_counter = start_splice; | |||
2442 | ||||
2443 | for (returns = 0; splices>0; splices--, returns++) { | |||
2444 | /* Zero handled return arc states */ | |||
2445 | ||||
2446 | for (k=0; k<states2; k++) | |||
2447 | *(handled_states2+k) = 0; | |||
2448 | ||||
2449 | for (i=0; (fsm2+i)->state_no != -1; i++) { | |||
2450 | if ((fsm2+i)->final_state == 1 && *(handled_states2+(fsm2+i)->state_no) == 0) { | |||
2451 | add_fsm_arc(new_fsm, j, (fsm2+i)->state_no + state_add_counter, EPSILON0, EPSILON0, *(return_state+returns), 0, 0); | |||
2452 | j++; | |||
2453 | *(handled_states2+(fsm2+i)->state_no) = 1; | |||
2454 | if ((fsm2+i)->target != -1) { | |||
2455 | add_fsm_arc(new_fsm, j, (fsm2+i)->state_no + state_add_counter, (fsm2+i)->in, (fsm2+i)->out , (fsm2+i)->target + state_add_counter, 0, 0); | |||
2456 | j++; | |||
2457 | } | |||
2458 | } else { | |||
2459 | add_fsm_arc(new_fsm, j, (fsm2+i)->state_no + state_add_counter, (fsm2+i)->in, (fsm2+i)->out, (fsm2+i)->target + state_add_counter, 0, 0); | |||
2460 | j++; | |||
2461 | } | |||
2462 | } | |||
2463 | state_add_counter = state_add_counter + states2; | |||
2464 | } | |||
2465 | ||||
2466 | add_fsm_arc(new_fsm, j, -1, -1, -1, -1, -1, -1); | |||
2467 | free(handled_states1); | |||
2468 | free(handled_states2); | |||
2469 | free(return_state); | |||
2470 | free(net1->states); | |||
2471 | fsm_destroy(net2); | |||
2472 | net1->states = new_fsm; | |||
2473 | fsm_update_flags(net1, NO0, NO0, NO0, NO0, NO0, NO0); | |||
2474 | fsm_count(net1); | |||
2475 | return(net1); | |||
2476 | } | |||
2477 | ||||
2478 | /* Remove those symbols from sigma that have the same distribution as IDENTITY */ | |||
2479 | ||||
2480 | void fsm_compact(struct fsm *net) { | |||
2481 | struct checktable { | |||
2482 | int state_no; | |||
2483 | int target; | |||
2484 | } *checktable; | |||
2485 | ||||
2486 | struct fsm_state *fsm; | |||
2487 | struct sigma *sig, *sigprev, *sign; | |||
2488 | _Bool *potential; | |||
2489 | int i, j, prevstate, numsymbols, in, out, state, target, removable; | |||
2490 | ||||
2491 | fsm = net->states; | |||
2492 | numsymbols = sigma_max(net->sigma); | |||
2493 | ||||
2494 | potential = malloc(sizeof(_Bool)*(numsymbols+1)); | |||
2495 | checktable = malloc(sizeof(struct checktable)*(numsymbols+1)); | |||
2496 | ||||
2497 | for (i=0; i <= numsymbols; i++) { | |||
2498 | *(potential+i) = 1; | |||
2499 | (checktable+i)->state_no = -1; | |||
2500 | (checktable+i)->target = -1; | |||
2501 | } | |||
2502 | /* For consistency reasons, can't remove symbols longer than 1 */ | |||
2503 | /* since @ and ? only match utf8 symbols of length 1 */ | |||
2504 | ||||
2505 | for (sig = net->sigma; sig != NULL((void*)0) && sig->number != -1; sig = sig->next) { | |||
2506 | if (utf8strlen(sig->symbol) > 1) { | |||
2507 | *(potential+sig->number) = 0; | |||
2508 | } | |||
2509 | } | |||
2510 | ||||
2511 | prevstate = 0; | |||
2512 | ||||
2513 | for (i=0; ; i++) { | |||
2514 | ||||
2515 | if ((fsm+i)->state_no != prevstate) { | |||
2516 | for (j=3; j<=numsymbols;j++) { | |||
2517 | if ((checktable+j)->state_no != prevstate && (checktable+IDENTITY2)->state_no != prevstate) { | |||
2518 | continue; | |||
2519 | } | |||
2520 | if ((checktable+j)->target == (checktable+IDENTITY2)->target && (checktable+j)->state_no == (checktable+IDENTITY2)->state_no) { | |||
2521 | continue; | |||
2522 | } | |||
2523 | *(potential+j) = 0; | |||
2524 | } | |||
2525 | } | |||
2526 | ||||
2527 | if ((fsm+i)->state_no == -1) | |||
2528 | break; | |||
2529 | ||||
2530 | in = (fsm+i)->in; | |||
2531 | out = (fsm+i)->out; | |||
2532 | state = (fsm+i)->state_no; | |||
2533 | target = (fsm+i)->target; | |||
2534 | ||||
2535 | if (in != -1 && out != -1) { | |||
2536 | if (((in == out && in > 2) || in == IDENTITY2)) { | |||
2537 | (checktable+in)->state_no = state; | |||
2538 | (checktable+in)->target = target; | |||
2539 | } | |||
2540 | if (in != out && in > 2) { | |||
2541 | *(potential+in) = 0; | |||
2542 | } | |||
2543 | if (in != out && out > 2) { | |||
2544 | *(potential+out) = 0; | |||
2545 | } | |||
2546 | } | |||
2547 | prevstate = state; | |||
2548 | } | |||
2549 | for (removable = 0, i=3; i <= numsymbols; i++) { | |||
2550 | if (*(potential+i) == 1) { | |||
2551 | removable = 1; | |||
2552 | } | |||
2553 | ||||
2554 | } | |||
2555 | if (removable == 0) { | |||
2556 | free(potential); | |||
2557 | free(checktable); | |||
2558 | return; | |||
2559 | } | |||
2560 | i = j = 0; | |||
2561 | do { | |||
2562 | in = (fsm+i)->in; | |||
2563 | ||||
2564 | add_fsm_arc(fsm, j ,(fsm+i)->state_no,(fsm+i)->in,(fsm+i)->out,(fsm+i)->target,(fsm+i)->final_state,(fsm+i)->start_state); | |||
2565 | if (in == -1) { | |||
2566 | i++; | |||
2567 | j++; | |||
2568 | } | |||
2569 | else if (*(potential+in) == 1 && in > 2) { | |||
2570 | i++; | |||
2571 | } else { | |||
2572 | i++; | |||
2573 | j++; | |||
2574 | } | |||
2575 | } while ((fsm+i)->state_no != -1); | |||
2576 | add_fsm_arc(fsm, j ,(fsm+i)->state_no,(fsm+i)->in,(fsm+i)->out,(fsm+i)->target,(fsm+i)->final_state,(fsm+i)->start_state); | |||
2577 | ||||
2578 | sigprev = NULL((void*)0); | |||
2579 | for (sig = net->sigma; sig != NULL((void*)0) && sig->number != -1; sig = sign) { | |||
2580 | ||||
2581 | if ((sig->number > 2) && (*(potential+sig->number) == 1)) { | |||
2582 | sigprev->next = sig->next; | |||
2583 | sign = sig->next; | |||
2584 | free(sig->symbol); | |||
2585 | free(sig); | |||
2586 | } else { | |||
2587 | sigprev = sig; | |||
2588 | sign = sig->next; | |||
2589 | } | |||
2590 | } | |||
2591 | free(potential); | |||
2592 | free(checktable); | |||
2593 | sigma_cleanup(net,0); | |||
2594 | } | |||
2595 | ||||
2596 | int fsm_symbol_occurs(struct fsm *net, char *symbol, int side) { | |||
2597 | struct fsm_state *fsm; | |||
2598 | int i, sym; | |||
2599 | sym = sigma_find(symbol, net->sigma); | |||
2600 | if (sym == -1) { | |||
2601 | return 0; | |||
2602 | } | |||
2603 | for (i=0, fsm = net->states; (fsm+i)->state_no != -1; i++) { | |||
2604 | if (side == M_UPPER1 && (fsm+i)->in == sym) | |||
2605 | return 1; | |||
2606 | if (side == M_LOWER2 && (fsm+i)->out == sym) | |||
2607 | return 1; | |||
2608 | if (side == (M_UPPER1 + M_LOWER2) && ( (fsm+i)->in == sym || (fsm+i)->out == sym)) | |||
2609 | return 1; | |||
2610 | } | |||
2611 | return 0; | |||
2612 | } | |||
2613 | ||||
2614 | struct fsm *fsm_equal_substrings(struct fsm *net, struct fsm *left, struct fsm *right) { | |||
2615 | ||||
2616 | /* The algorithm extracts from the lower side all and only those strings where */ | |||
2617 | /* every X occurring in different substrings ... left X right ... is identical. */ | |||
2618 | ||||
2619 | /* Caveat: there is no reliable termination condition for the loop that extracts */ | |||
2620 | /* identities. This means that if run on languages where there are potentially */ | |||
2621 | /* infinite-length identical delimited substrings, it will not terminate. */ | |||
2622 | ||||
2623 | /* For example: _eq(l a* r l a* r, l , r) will not terminate. */ | |||
2624 | ||||
2625 | /* However, even if the languages occuring between left and right are infinite */ | |||
2626 | /* the algorithm terminates eventually if the the possible combinations of */ | |||
2627 | /* identical substrings is finite in length. */ | |||
2628 | ||||
2629 | /* For example _eq([l a* b r l a b* r]*, l, r) does terminate even though */ | |||
2630 | /* it contains a potentially infinite number of delimited substrings since */ | |||
2631 | /* the maximum length of the possible identical delimited substrings is finite. */ | |||
2632 | ||||
2633 | /* In this case the above example evaluates to the language [l a b r l a b r]* */ | |||
2634 | ||||
2635 | /* The algorithm: */ | |||
2636 | /* Input: L, left, right */ | |||
2637 | /* Output: the language that preserves only those strings in L */ | |||
2638 | /* where X is the same for all left X right sequences */ | |||
2639 | ||||
2640 | /* 1. mark all instances of left with ... LB and right with RB ... in L */ | |||
2641 | ||||
2642 | /* 2. split L into Leq and Lbypass */ | |||
2643 | /* where Lbypass are all those strings where LB RB sequences aren't */ | |||
2644 | /* properly nested (we must have ... LB ... RB ... LB ... RB ... etc.) */ | |||
2645 | /* Lbypass also includes all those with less than two bracketed */ | |||
2646 | /* instances, since we don't need to check equality if there's only */ | |||
2647 | /* one bracketed substring. */ | |||
2648 | /* We also remove the auxiliary LB and RB symbols from Lbypass */ | |||
2649 | ||||
2650 | /* 3. We extract all the possible symbols occurring between LB and RB */ | |||
2651 | /* from Leq */ | |||
2652 | ||||
2653 | /* 4. We create the transducer Move from all symbols in (3) */ | |||
2654 | /* Move = M(sym_1) | ... | M(sym_n) */ | |||
2655 | /* where M(a) is defined as [\LB* LB:a a:LB]* \LB* */ | |||
2656 | /* i.e. it rewrites bracketed strings such as "LB a b RB LB a b RB" */ | |||
2657 | /* to "a LB b RB a LB b RB" in effect moving brackets to the right */ | |||
2658 | /* one step for a symbol. */ | |||
2659 | ||||
2660 | /* 5. Leq = Cleanup(Leq) */ | |||
2661 | /* Cleanup removes LB RB sequences and, at the same time filters out */ | |||
2662 | /* any strings where we find both LB RB and LB X RB where X is not 0. */ | |||
2663 | /* since we know such sequences could not possibly be identical */ | |||
2664 | /* Cleanup is implemented by composing Leq with */ | |||
2665 | /* \LB* [LB:0 RB:0 \LB*]* | ~$[LB RB] */ | |||
2666 | /* - if the symbol LB does not occur on the lower side of Leq, goto(6) */ | |||
2667 | /* - else Leq = Move(Leq), goto(5) */ | |||
2668 | ||||
2669 | /* 6. Result = L .o. [Leq | Lbypass] */ | |||
2670 | ||||
2671 | int syms; | |||
2672 | struct sigma *sig; | |||
2673 | struct fsm *LB, *RB, *NOLB, *NORB, *InsertBrackets, *RemoveBrackets, *Lbracketed, *NOBR, *BracketFilter, *Lbypass, *Leq, *Labels, *Cleanup, *ThisMove, *ThisSymbol, *Move, *Result, *oldnet; | |||
2674 | ||||
2675 | oldnet = fsm_copy(net); | |||
2676 | ||||
2677 | /* LB = "@<eq<@" */ | |||
2678 | /* RB = "@>eq>@" */ | |||
2679 | ||||
2680 | LB = fsm_symbol("@<eq<@"); | |||
2681 | NOLB = fsm_minimize(fsm_term_negation(fsm_copy(LB))); | |||
2682 | RB = fsm_symbol("@>eq>@"); | |||
2683 | NORB = fsm_minimize(fsm_term_negation(fsm_copy(RB))); | |||
2684 | /* NOBR = ~$[LB|RB] */ | |||
2685 | NOBR = fsm_minimize(fsm_complement(fsm_contains(fsm_union(fsm_copy(LB),fsm_copy(RB))))); | |||
2686 | ||||
2687 | sigma_add("@<eq<@", net->sigma); | |||
2688 | sigma_add("@>eq>@", net->sigma); | |||
2689 | sigma_sort(net); | |||
2690 | ||||
2691 | /* Insert our aux markers into the language */ | |||
2692 | ||||
2693 | /* InsertBrackets = [~$[L|R] [L 0:LB|0:RB R]]* ~$[L|R]; */ | |||
2694 | ||||
2695 | InsertBrackets = fsm_minimize(fsm_concat(fsm_kleene_star(fsm_concat(fsm_complement(fsm_contains(fsm_union(fsm_copy(left),fsm_copy(right)))),fsm_union(fsm_concat(fsm_copy(left),fsm_cross_product(fsm_empty_string(),fsm_copy(LB))),fsm_concat(fsm_cross_product(fsm_empty_string(),fsm_copy(RB)),fsm_copy(right))))),fsm_complement(fsm_contains(fsm_union(fsm_copy(left),fsm_copy(right)))))); | |||
2696 | ||||
2697 | ||||
2698 | /* Lbracketed = L .o. InsertBrackets */ | |||
2699 | ||||
2700 | Lbracketed = fsm_compose(fsm_copy(net), InsertBrackets); | |||
2701 | ||||
2702 | /* Filter out improper nestings, or languages with less than two marker pairs */ | |||
2703 | ||||
2704 | /* BracketFilter = NOBR LB NOBR RB NOBR [LB NOBR RB NOBR]+ */ | |||
2705 | ||||
2706 | BracketFilter = fsm_concat(fsm_copy(NOBR),fsm_concat(fsm_copy(LB),fsm_concat(fsm_copy(NOBR),fsm_concat(fsm_copy(RB),fsm_concat(fsm_copy(NOBR),fsm_kleene_plus(fsm_concat(fsm_copy(LB),fsm_concat(fsm_copy(NOBR),fsm_concat(fsm_copy(RB),fsm_copy(NOBR)))))))))); | |||
2707 | ||||
2708 | /* RemoveBrackets = [LB:0|RB:0|NOBR]* */ | |||
2709 | /* Lbypass = [Lbracketed .o. ~BracketFilter .o. LB|RB -> 0] */ | |||
2710 | /* Leq = [Lbracketed .o. BracketFilter] */ | |||
2711 | ||||
2712 | RemoveBrackets = fsm_kleene_star(fsm_union(fsm_cross_product(fsm_copy(LB),fsm_empty_string()),fsm_union(fsm_cross_product(fsm_copy(RB),fsm_empty_string()),fsm_copy(NOBR)))); | |||
2713 | ||||
2714 | ||||
2715 | Lbypass = fsm_lower(fsm_compose(fsm_copy(Lbracketed),fsm_compose(fsm_complement(fsm_copy(BracketFilter)),RemoveBrackets))); | |||
2716 | Leq = fsm_compose(Lbracketed, BracketFilter); | |||
2717 | ||||
2718 | /* Extract labels from lower side of L */ | |||
2719 | /* [Leq .o. [\LB:0* LB:0 \RB* RB:0]* \LB:0*].l */ | |||
2720 | ||||
2721 | Labels = fsm_sigma_pairs_net(fsm_lower(fsm_compose(fsm_copy(Leq),fsm_concat(fsm_kleene_star(fsm_concat(fsm_kleene_star(fsm_cross_product(fsm_copy(NOLB),fsm_empty_string())),fsm_concat(fsm_cross_product(fsm_copy(LB),fsm_empty_string()),fsm_concat(fsm_kleene_star(fsm_copy(NORB)),fsm_cross_product(fsm_copy(RB),fsm_empty_string()))))),fsm_kleene_star(fsm_cross_product(fsm_copy(NOLB),fsm_empty_string())))))); | |||
2722 | ||||
2723 | /* Cleanup = \LB* [LB:0 RB:0 \LB*]* | ~$[LB RB] */ | |||
2724 | ||||
2725 | Cleanup = fsm_minimize(fsm_union(fsm_concat(fsm_kleene_star(fsm_copy(NOLB)),fsm_kleene_star(fsm_concat(fsm_cross_product(fsm_copy(LB),fsm_empty_string()),fsm_concat(fsm_cross_product(fsm_copy(RB),fsm_empty_string()),fsm_kleene_star(fsm_copy(NOLB)))))),fsm_complement(fsm_contains(fsm_concat(fsm_copy(LB),fsm_copy(RB)))))); | |||
2726 | ||||
2727 | /* Construct the move function */ | |||
2728 | ||||
2729 | Move = fsm_empty_string(); | |||
2730 | ||||
2731 | syms = 0; | |||
2732 | for (sig = Labels->sigma; sig != NULL((void*)0); sig = sig->next) { | |||
2733 | /* Unclear which is faster: the first or the second version */ | |||
2734 | /* ThisMove = [\LB* LB:X X:LB]* \LB* */ | |||
2735 | /* ThisMove = [\LB* LB:0 X 0:LB]* \LB* */ | |||
2736 | if (sig->number >= 3) { | |||
2737 | ThisSymbol = fsm_symbol(sig->symbol); | |||
2738 | //ThisMove = fsm_concat(fsm_kleene_star(fsm_concat(fsm_kleene_star(fsm_copy(NOLB)),fsm_concat(fsm_cross_product(fsm_copy(LB),fsm_copy(ThisSymbol)),fsm_cross_product(fsm_copy(ThisSymbol),fsm_copy(LB))))), fsm_kleene_star(fsm_copy(NOLB))); | |||
2739 | ThisMove = fsm_concat(fsm_kleene_star(fsm_concat(fsm_kleene_star(fsm_copy(NOLB)),fsm_concat(fsm_cross_product(fsm_copy(LB),fsm_empty_string()), fsm_concat(fsm_copy(ThisSymbol), fsm_cross_product(fsm_empty_string(),fsm_copy(LB)))))), fsm_kleene_star(fsm_copy(NOLB))); | |||
2740 | ||||
2741 | Move = fsm_union(Move, ThisMove); | |||
2742 | syms++; | |||
2743 | } | |||
2744 | } | |||
2745 | Move = fsm_minimize(Move); | |||
2746 | if (syms == 0) { | |||
2747 | //printf("no syms"); | |||
2748 | fsm_destroy(net); | |||
2749 | return(oldnet); | |||
2750 | } | |||
2751 | ||||
2752 | /* Move until no bracket symbols remain */ | |||
2753 | for (;;) { | |||
2754 | //printf("Zapping\n"); | |||
2755 | Leq = fsm_compose(Leq, fsm_copy(Cleanup)); | |||
2756 | if (!fsm_symbol_occurs(Leq, "@<eq<@", M_LOWER2)) | |||
2757 | break; | |||
2758 | Leq = fsm_compose(Leq, fsm_copy(Move)); | |||
2759 | //Leq = fsm_minimize(fsm_compose(Leq, fsm_copy(Move))); | |||
2760 | // printf("size: %i\n",Leq->statecount); | |||
2761 | } | |||
2762 | ||||
2763 | /* Result = L .o. [Leq | Lbypass] */ | |||
2764 | Result = fsm_minimize(fsm_compose(net, fsm_union(fsm_lower(Leq), Lbypass))); | |||
2765 | sigma_remove("@<eq<@", Result->sigma); | |||
2766 | sigma_remove("@>eq>@", Result->sigma); | |||
2767 | fsm_compact(Result); | |||
2768 | sigma_sort(Result); | |||
2769 | fsm_destroy(oldnet); | |||
2770 | return(Result); | |||
2771 | } | |||
2772 | ||||
2773 | struct fsm *fsm_invert(struct fsm *net) { | |||
2774 | struct fsm_state *fsm; | |||
2775 | int i, temp; | |||
2776 | ||||
2777 | fsm = net->states; | |||
2778 | for (i = 0; (fsm+i)->state_no != -1; i++) { | |||
2779 | temp = (fsm+i)->in; | |||
2780 | (fsm+i)->in = (fsm+i)->out; | |||
2781 | (fsm+i)->out = temp; | |||
2782 | } | |||
2783 | i = net->arcs_sorted_in; | |||
2784 | net->arcs_sorted_in = net->arcs_sorted_out; | |||
2785 | net->arcs_sorted_out = i; | |||
2786 | return (net); | |||
2787 | } | |||
2788 | ||||
2789 | struct fsm *fsm_sequentialize(struct fsm *net) { | |||
2790 | printf("Implementation pending\n"); | |||
2791 | return(net); | |||
2792 | } | |||
2793 | ||||
2794 | ||||
2795 | struct fsm *fsm_bimachine(struct fsm *net) { | |||
2796 | printf("implementation pending\n"); | |||
2797 | return(net); | |||
2798 | } | |||
2799 | ||||
2800 | /* _leftrewr(L, a:b) does a -> b || .#. L _ */ | |||
2801 | /* _leftrewr(?* L, a:b) does a -> b || L _ */ | |||
2802 | /* works only with single symbols, but is fast */ | |||
2803 | ||||
2804 | struct fsm *fsm_left_rewr(struct fsm *net, struct fsm *rewr) { | |||
2805 | struct fsm_construct_handle *outh; | |||
2806 | struct fsm_read_handle *inh; | |||
2807 | struct fsm *newnet; | |||
2808 | int i, maxsigma, *sigmatable, currstate, sinkstate, seensource, innum, outnum, relabelin, relabelout, addedsink; | |||
2809 | ||||
2810 | fsm_merge_sigma(net, rewr); | |||
2811 | relabelin = rewr->states->in; | |||
2812 | relabelout = rewr->states->out; | |||
2813 | ||||
2814 | inh = fsm_read_init(net); | |||
2815 | sinkstate = fsm_get_num_states(inh); | |||
2816 | outh = fsm_construct_init(net->name); | |||
2817 | fsm_construct_copy_sigma(outh, net->sigma); | |||
2818 | maxsigma = sigma_max(net->sigma); | |||
2819 | maxsigma++; | |||
2820 | sigmatable = malloc(maxsigma * sizeof(int)); | |||
2821 | for (i = 0; i < maxsigma; i++) { | |||
2822 | *(sigmatable+i) = -1; | |||
2823 | } | |||
2824 | addedsink = 0; | |||
2825 | while ((currstate = fsm_get_next_state(inh)) != -1) { | |||
2826 | seensource = 0; | |||
2827 | fsm_construct_set_final(outh, currstate); | |||
2828 | ||||
2829 | while (fsm_get_next_state_arc(inh)) { | |||
2830 | innum = fsm_get_arc_num_in(inh); | |||
2831 | outnum = fsm_get_arc_num_out(inh); | |||
2832 | *(sigmatable+innum) = currstate; | |||
2833 | if (innum == relabelin) { | |||
2834 | seensource = 1; | |||
2835 | if (fsm_read_is_final(inh, currstate)) { | |||
2836 | outnum = relabelout; | |||
2837 | } | |||
2838 | } | |||
2839 | fsm_construct_add_arc_nums(outh, fsm_get_arc_source(inh), fsm_get_arc_target(inh), innum, outnum); | |||
2840 | } | |||
2841 | for (i = 2; i < maxsigma; i++) { | |||
2842 | if (*(sigmatable+i) != currstate && i != relabelin) { | |||
2843 | fsm_construct_add_arc_nums(outh, currstate, sinkstate, i, i); | |||
2844 | addedsink = 1; | |||
2845 | } | |||
2846 | } | |||
2847 | if (seensource == 0) { | |||
2848 | addedsink = 1; | |||
2849 | if (fsm_read_is_final(inh, currstate)) { | |||
2850 | fsm_construct_add_arc_nums(outh, currstate, sinkstate, relabelin, relabelout); | |||
2851 | } else { | |||
2852 | fsm_construct_add_arc_nums(outh, currstate, sinkstate, relabelin, relabelin); | |||
2853 | } | |||
2854 | } | |||
2855 | } | |||
2856 | if (addedsink) { | |||
2857 | for (i = 2; i < maxsigma; i++) { | |||
2858 | fsm_construct_add_arc_nums(outh, sinkstate, sinkstate, i, i); | |||
2859 | } | |||
2860 | fsm_construct_set_final(outh, sinkstate); | |||
2861 | } | |||
2862 | fsm_construct_set_initial(outh, 0); | |||
2863 | fsm_read_done(inh); | |||
2864 | newnet = fsm_construct_done(outh); | |||
2865 | free(sigmatable); | |||
2866 | fsm_destroy(net); | |||
2867 | fsm_destroy(rewr); | |||
2868 | return(newnet); | |||
2869 | } | |||
2870 | ||||
2871 | struct fsm *fsm_add_sink(struct fsm *net, int final) { | |||
2872 | struct fsm_construct_handle *outh; | |||
2873 | struct fsm_read_handle *inh; | |||
2874 | struct fsm *newnet; | |||
2875 | int i, maxsigma, *sigmatable, currstate, sinkstate; | |||
2876 | ||||
2877 | inh = fsm_read_init(net); | |||
2878 | sinkstate = fsm_get_num_states(inh); | |||
2879 | outh = fsm_construct_init(net->name); | |||
2880 | fsm_construct_copy_sigma(outh, net->sigma); | |||
2881 | maxsigma = sigma_max(net->sigma); | |||
2882 | maxsigma++; | |||
2883 | sigmatable = malloc(maxsigma * sizeof(int)); | |||
2884 | for (i = 0; i < maxsigma; i++) { | |||
2885 | *(sigmatable+i) = -1; | |||
2886 | } | |||
2887 | while ((currstate = fsm_get_next_state(inh)) != -1) { | |||
2888 | while (fsm_get_next_state_arc(inh)) { | |||
2889 | fsm_construct_add_arc_nums(outh, fsm_get_arc_source(inh), fsm_get_arc_target(inh), fsm_get_arc_num_in(inh), fsm_get_arc_num_out(inh)); | |||
2890 | *(sigmatable+fsm_get_arc_num_in(inh)) = currstate; | |||
2891 | } | |||
2892 | for (i = 2; i < maxsigma; i++) { | |||
2893 | if (*(sigmatable+i) != currstate) { | |||
2894 | fsm_construct_add_arc_nums(outh, currstate, sinkstate, i, i); | |||
2895 | } | |||
2896 | } | |||
2897 | } | |||
2898 | for (i = 2; i < maxsigma; i++) { | |||
2899 | fsm_construct_add_arc_nums(outh, sinkstate, sinkstate, i, i); | |||
2900 | } | |||
2901 | ||||
2902 | while ((i = fsm_get_next_final(inh)) != -1) { | |||
2903 | fsm_construct_set_final(outh, i); | |||
2904 | } | |||
2905 | if (final == 1) { | |||
2906 | fsm_construct_set_final(outh, sinkstate); | |||
2907 | } | |||
2908 | fsm_construct_set_initial(outh, 0); | |||
2909 | fsm_read_done(inh); | |||
2910 | newnet = fsm_construct_done(outh); | |||
2911 | fsm_destroy(net); | |||
2912 | return(newnet); | |||
2913 | } | |||
2914 | ||||
2915 | /* _addfinalloop(L, "#":0) adds "#":0 at all final states */ | |||
2916 | /* _addnonfinalloop(L, "#":0) adds "#":0 at all nonfinal states */ | |||
2917 | /* _addloop(L, "#":0) adds "#":0 at all states */ | |||
2918 | ||||
2919 | /* Adds loops at finals = 0 nonfinals, finals = 1 finals, finals = 2, all */ | |||
2920 | ||||
2921 | struct fsm *fsm_add_loop(struct fsm *net, struct fsm *marker, int finals) { | |||
2922 | struct fsm *newnet; | |||
2923 | struct fsm_construct_handle *outh; | |||
2924 | struct fsm_read_handle *inh, *minh; | |||
2925 | int i; | |||
2926 | ||||
2927 | inh = fsm_read_init(net); | |||
2928 | minh = fsm_read_init(marker); | |||
2929 | ||||
2930 | outh = fsm_construct_init(net->name); | |||
2931 | fsm_construct_copy_sigma(outh, net->sigma); | |||
2932 | ||||
2933 | while (fsm_get_next_arc(inh)) { | |||
2934 | fsm_construct_add_arc_nums(outh, fsm_get_arc_source(inh), fsm_get_arc_target(inh), fsm_get_arc_num_in(inh), fsm_get_arc_num_out(inh)); | |||
2935 | } | |||
2936 | /* Where to put the loops */ | |||
2937 | if (finals == 1) { | |||
2938 | while ((i = fsm_get_next_final(inh)) != -1) { | |||
2939 | fsm_construct_set_final(outh, i); | |||
2940 | fsm_read_reset(minh); | |||
2941 | while (fsm_get_next_arc(minh)) { | |||
2942 | fsm_construct_add_arc(outh, i, i, fsm_get_arc_in(minh), fsm_get_arc_out(minh)); | |||
2943 | } | |||
2944 | } | |||
2945 | } else if (finals == 0 || finals == 2) { | |||
2946 | for (i=0; i < net->statecount; i++) { | |||
2947 | if (finals == 2 || !fsm_read_is_final(inh, i)) { | |||
2948 | fsm_read_reset(minh); | |||
2949 | while (fsm_get_next_arc(minh)) { | |||
2950 | fsm_construct_add_arc(outh, i, i, fsm_get_arc_in(minh), fsm_get_arc_out(minh)); | |||
2951 | } | |||
2952 | } | |||
2953 | } | |||
2954 | } | |||
2955 | while ((i = fsm_get_next_final(inh)) != -1) { | |||
2956 | fsm_construct_set_final(outh, i); | |||
2957 | } | |||
2958 | fsm_construct_set_initial(outh, 0); | |||
2959 | fsm_read_done(inh); | |||
2960 | fsm_read_done(minh); | |||
2961 | newnet = fsm_construct_done(outh); | |||
2962 | fsm_destroy(net); | |||
2963 | return(newnet); | |||
2964 | } | |||
2965 | ||||
2966 | /* _marktail(?* L, 0:x) does ~$x .o. [..] -> x || L _ ; */ | |||
2967 | /* _marktail(?* R.r, 0:x).r does ~$x .o. [..] -> x || _ R */ | |||
2968 | ||||
2969 | struct fsm *fsm_mark_fsm_tail(struct fsm *net, struct fsm *marker) { | |||
2970 | struct fsm *newnet; | |||
2971 | struct fsm_construct_handle *outh; | |||
2972 | struct fsm_read_handle *inh, *minh; | |||
2973 | int i, *mappings, maxstate, target, newtarget; | |||
2974 | ||||
2975 | inh = fsm_read_init(net); | |||
2976 | minh = fsm_read_init(marker); | |||
2977 | ||||
2978 | outh = fsm_construct_init(net->name); | |||
2979 | fsm_construct_copy_sigma(outh, net->sigma); | |||
2980 | ||||
2981 | mappings = calloc(net->statecount, sizeof(int)); | |||
2982 | maxstate = net->statecount; | |||
2983 | ||||
2984 | while (fsm_get_next_arc(inh)) { | |||
2985 | target = fsm_get_arc_target(inh); | |||
2986 | if (fsm_read_is_final(inh, target)) { | |||
2987 | if (!*(mappings+target)) { | |||
2988 | newtarget = maxstate; | |||
2989 | *(mappings+target) = newtarget; | |||
2990 | fsm_read_reset(minh); | |||
2991 | while (fsm_get_next_arc(minh)) { | |||
2992 | fsm_construct_add_arc(outh, newtarget, target, fsm_get_arc_in(minh), fsm_get_arc_out(minh)); | |||
2993 | } | |||
2994 | maxstate++; | |||
2995 | } else { | |||
2996 | newtarget = *(mappings+target); | |||
2997 | } | |||
2998 | fsm_construct_add_arc_nums(outh, fsm_get_arc_source(inh), newtarget, fsm_get_arc_num_in(inh), fsm_get_arc_num_out(inh)); | |||
2999 | } else { | |||
3000 | fsm_construct_add_arc_nums(outh, fsm_get_arc_source(inh), target, fsm_get_arc_num_in(inh), fsm_get_arc_num_out(inh)); | |||
3001 | } | |||
3002 | } | |||
3003 | for (i=0; i < net->statecount; i++) { | |||
3004 | fsm_construct_set_final(outh,i); | |||
3005 | } | |||
3006 | ||||
3007 | fsm_construct_set_initial(outh, 0); | |||
3008 | fsm_read_done(inh); | |||
3009 | fsm_read_done(minh); | |||
3010 | newnet = fsm_construct_done(outh); | |||
3011 | fsm_destroy(net); | |||
3012 | free(mappings); | |||
3013 | return(newnet); | |||
3014 | } | |||
3015 | ||||
3016 | struct fsm *fsm_context_restrict(struct fsm *X, struct fsmcontexts *LR) { | |||
3017 | ||||
3018 | struct fsm *Var, *Notvar, *UnionL, *UnionP, *Result, *Word; | |||
3019 | struct fsmcontexts *pairs; | |||
3020 | ||||
3021 | /* [.#. \.#.* .#.]-`[[ [\X* X C X \X*]&~[\X* [L1 X \X* X R1|...|Ln X \X* X Rn] \X*]],X,0] */ | |||
3022 | /* Where X = variable symbol */ | |||
3023 | /* The above only works if we do the subtraction iff the right hand side contains .#. in */ | |||
3024 | /* its alphabet */ | |||
3025 | /* A more generic formula is the following: */ | |||
3026 | ||||
3027 | /* `[[[(?) \.#.* (?)] - `[[[\X* X C X \X*] - [\X* [L1 X \X* X R1|...|Ln X \X* X Rn] \X*] ],X,0],.#.,0]; */ | |||
3028 | /* Here, the LHS is another way of saying ~[?+ .#. ?+] */ | |||
3029 | ||||
3030 | Var = fsm_symbol("@VARX@"); | |||
3031 | Notvar = fsm_minimize(fsm_kleene_star(fsm_term_negation(fsm_symbol("@VARX@")))); | |||
3032 | ||||
3033 | /* We add the variable symbol to all alphabets to avoid ? mathing it */ | |||
3034 | /* which would cause extra nondeterminism */ | |||
3035 | sigma_add("@VARX@", X->sigma); | |||
3036 | sigma_sort(X); | |||
3037 | ||||
3038 | /* Also, if any L or R is undeclared we add 0 */ | |||
3039 | for (pairs = LR; pairs != NULL((void*)0); pairs = pairs->next) { | |||
3040 | if (pairs->left == NULL((void*)0)) { | |||
3041 | pairs->left = fsm_empty_string(); | |||
3042 | } else { | |||
3043 | sigma_add("@VARX@",pairs->left->sigma); | |||
3044 | sigma_substitute(".#.", "@#@", pairs->left->sigma); | |||
3045 | sigma_sort(pairs->left); | |||
3046 | } | |||
3047 | if (pairs->right == NULL((void*)0)) { | |||
3048 | pairs->right = fsm_empty_string(); | |||
3049 | } else { | |||
3050 | sigma_add("@VARX@",pairs->right->sigma); | |||
3051 | sigma_substitute(".#.", "@#@", pairs->right->sigma); | |||
3052 | sigma_sort(pairs->right); | |||
3053 | } | |||
3054 | } | |||
3055 | ||||
3056 | UnionP = fsm_empty_set(); | |||
3057 | ||||
3058 | for (pairs = LR; pairs != NULL((void*)0) ; pairs = pairs->next) { | |||
3059 | UnionP = fsm_minimize(fsm_union(fsm_minimize(fsm_concat(fsm_copy(pairs->left),fsm_concat(fsm_copy(Var),fsm_concat(fsm_copy(Notvar),fsm_concat(fsm_copy(Var),fsm_copy(pairs->right)))))), UnionP)); | |||
3060 | } | |||
3061 | ||||
3062 | UnionL = fsm_minimize(fsm_concat(fsm_copy(Notvar),fsm_concat(fsm_copy(Var), fsm_concat(fsm_copy(X), fsm_concat(fsm_copy(Var),fsm_copy(Notvar)))))); | |||
3063 | ||||
3064 | Result = fsm_intersect(UnionL, fsm_complement(fsm_concat(fsm_copy(Notvar),fsm_minimize(fsm_concat(fsm_copy(UnionP),fsm_copy(Notvar)))))); | |||
3065 | if (sigma_find("@VARX@", Result->sigma) != -1) { | |||
3066 | Result = fsm_complement(fsm_substitute_symbol(Result, "@VARX@","@_EPSILON_SYMBOL_@")); | |||
3067 | } else { | |||
3068 | Result = fsm_complement(Result); | |||
3069 | } | |||
3070 | ||||
3071 | if (sigma_find("@#@", Result->sigma) != -1) { | |||
3072 | Word = fsm_minimize(fsm_concat(fsm_symbol("@#@"),fsm_concat(fsm_kleene_star(fsm_term_negation(fsm_symbol("@#@"))),fsm_symbol("@#@")))); | |||
3073 | Result = fsm_intersect(Word, Result); | |||
3074 | Result = fsm_substitute_symbol(Result, "@#@", "@_EPSILON_SYMBOL_@"); | |||
3075 | } | |||
3076 | fsm_destroy(UnionP); | |||
3077 | fsm_destroy(Var); | |||
3078 | fsm_destroy(Notvar); | |||
3079 | fsm_destroy(X); | |||
3080 | fsm_clear_contexts(pairs); | |||
3081 | return(Result); | |||
3082 | } | |||
3083 | ||||
3084 | struct fsm *fsm_flatten(struct fsm *net, struct fsm *epsilon) { | |||
3085 | struct fsm *newnet; | |||
3086 | struct fsm_construct_handle *outh; | |||
3087 | struct fsm_read_handle *inh, *eps; | |||
3088 | int i, maxstate, in, out, target; | |||
3089 | char *epssym, *instring, *outstring; | |||
3090 | ||||
3091 | net = fsm_minimize(net); | |||
3092 | ||||
3093 | inh = fsm_read_init(net); | |||
3094 | eps = fsm_read_init(epsilon); | |||
3095 | if (fsm_get_next_arc(eps) == -1) { | |||
3096 | fsm_destroy(net); | |||
3097 | fsm_destroy(epsilon); | |||
3098 | return NULL((void*)0); | |||
3099 | } | |||
3100 | epssym = strdup(fsm_get_arc_in(eps)); | |||
3101 | fsm_read_done(eps); | |||
3102 | ||||
3103 | outh = fsm_construct_init(net->name); | |||
3104 | maxstate = net->statecount; | |||
3105 | ||||
3106 | fsm_construct_copy_sigma(outh, net->sigma); | |||
3107 | ||||
3108 | while (fsm_get_next_arc(inh)) { | |||
3109 | target = fsm_get_arc_target(inh); | |||
3110 | in = fsm_get_arc_num_in(inh); | |||
3111 | out = fsm_get_arc_num_out(inh); | |||
3112 | if (in == EPSILON0 || out == EPSILON0) { | |||
3113 | instring = fsm_get_arc_in(inh); | |||
3114 | outstring = fsm_get_arc_out(inh); | |||
3115 | if (in == EPSILON0) { instring = epssym; } | |||
3116 | if (out == EPSILON0) { outstring = epssym; } | |||
3117 | ||||
3118 | fsm_construct_add_arc(outh, fsm_get_arc_source(inh), maxstate, instring, instring); | |||
3119 | fsm_construct_add_arc(outh, maxstate, target, outstring, outstring); | |||
3120 | } else { | |||
3121 | fsm_construct_add_arc_nums(outh, fsm_get_arc_source(inh), maxstate, in, in); | |||
3122 | fsm_construct_add_arc_nums(outh, maxstate, target, out, out); | |||
3123 | } | |||
3124 | maxstate++; | |||
3125 | } | |||
3126 | while ((i = fsm_get_next_final(inh)) != -1) { | |||
3127 | fsm_construct_set_final(outh, i); | |||
3128 | } | |||
3129 | while ((i = fsm_get_next_initial(inh)) != -1) { | |||
3130 | fsm_construct_set_initial(outh, i); | |||
3131 | } | |||
3132 | ||||
3133 | fsm_read_done(inh); | |||
3134 | newnet = fsm_construct_done(outh); | |||
3135 | fsm_destroy(net); | |||
3136 | fsm_destroy(epsilon); | |||
3137 | free(epssym); | |||
3138 | return(newnet); | |||
3139 | } | |||
3140 | ||||
3141 | /* Removes IDENTITY and UNKNOWN transitions. If mode = 1, only removes UNKNOWNs */ | |||
3142 | struct fsm *fsm_close_sigma(struct fsm *net, int mode) { | |||
3143 | struct fsm *newnet; | |||
3144 | struct fsm_construct_handle *newh; | |||
3145 | struct fsm_read_handle *inh; | |||
3146 | int i; | |||
3147 | ||||
3148 | inh = fsm_read_init(net); | |||
3149 | newh = fsm_construct_init(net->name); | |||
3150 | fsm_construct_copy_sigma(newh, net->sigma); | |||
3151 | ||||
3152 | while (fsm_get_next_arc(inh)) { | |||
3153 | if ((fsm_get_arc_num_in(inh) != UNKNOWN1 && fsm_get_arc_num_in(inh) != IDENTITY2 && fsm_get_arc_num_out(inh) != UNKNOWN1 && fsm_get_arc_num_out(inh) != IDENTITY2) || (mode == 1 && fsm_get_arc_num_in(inh) != UNKNOWN1 && fsm_get_arc_num_out(inh) != UNKNOWN1)) | |||
3154 | fsm_construct_add_arc_nums(newh, fsm_get_arc_source(inh), fsm_get_arc_target(inh), fsm_get_arc_num_in(inh), fsm_get_arc_num_out(inh)); | |||
3155 | } | |||
3156 | while ((i = fsm_get_next_final(inh)) != -1) { | |||
3157 | fsm_construct_set_final(newh, i); | |||
3158 | } | |||
3159 | while ((i = fsm_get_next_initial(inh)) != -1) { | |||
3160 | fsm_construct_set_initial(newh, i); | |||
3161 | } | |||
3162 | fsm_read_done(inh); | |||
3163 | newnet = fsm_construct_done(newh); | |||
3164 | fsm_destroy(net); | |||
3165 | return(fsm_minimize(newnet)); | |||
3166 | } |