File: | structures.c |
Warning: | line 745, column 21 Result of 'calloc' is converted to a pointer of type 'short', which is incompatible with sizeof operand type 'int' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* Foma: a finite-state toolkit and library. */ |
2 | /* Copyright © 2008-2021 Mans Hulden */ |
3 | |
4 | /* This file is part of foma. */ |
5 | |
6 | /* Licensed under the Apache License, Version 2.0 (the "License"); */ |
7 | /* you may not use this file except in compliance with the License. */ |
8 | /* You may obtain a copy of the License at */ |
9 | |
10 | /* http://www.apache.org/licenses/LICENSE-2.0 */ |
11 | |
12 | /* Unless required by applicable law or agreed to in writing, software */ |
13 | /* distributed under the License is distributed on an "AS IS" BASIS, */ |
14 | /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ |
15 | /* See the License for the specific language governing permissions and */ |
16 | /* limitations under the License. */ |
17 | |
18 | #include <stdio.h> |
19 | #include <string.h> |
20 | #include <stdlib.h> |
21 | #include "foma.h" |
22 | |
23 | static struct defined_quantifiers *quantifiers; |
24 | struct _fsm_options fsm_options; |
25 | |
26 | char *fsm_get_library_version_string() { |
27 | static char s[20]; |
28 | sprintf(s,"%i.%i.%i%s",MAJOR_VERSION0,MINOR_VERSION10,BUILD_VERSION0,STATUS_VERSION"alpha"); |
29 | return(s); |
30 | } |
31 | |
32 | _Bool fsm_set_option(unsigned long long option, void *value) { |
33 | switch (option) { |
34 | case FSMO_SKIP_WORD_BOUNDARY_MARKER: |
35 | fsm_options.skip_word_boundary_marker = *((_Bool*)value); |
36 | return 1; |
37 | } |
38 | return 0; |
39 | } |
40 | |
41 | void *fsm_get_option(unsigned long long option) { |
42 | switch (option) { |
43 | case FSMO_SKIP_WORD_BOUNDARY_MARKER: |
44 | return &fsm_options.skip_word_boundary_marker; |
45 | } |
46 | return NULL((void*)0); |
47 | } |
48 | |
49 | int linesortcompin(struct fsm_state *a, struct fsm_state *b) { |
50 | return (a->in - b->in); |
51 | } |
52 | |
53 | int linesortcompout(struct fsm_state *a, struct fsm_state *b) { |
54 | return (a->out - b->out); |
55 | } |
56 | |
57 | void fsm_sort_arcs(struct fsm *net, int direction) { |
58 | /* direction 1 = in, direction = 2, out */ |
59 | struct fsm_state *fsm; |
60 | int i, lasthead, numlines; |
61 | int(*scin)() = linesortcompin; |
62 | int(*scout)() = linesortcompout; |
63 | fsm = net->states; |
64 | for (i=0, numlines = 0, lasthead = 0 ; (fsm+i)->state_no != -1; i++) { |
65 | if ((fsm+i)->state_no != (fsm+i+1)->state_no || (fsm+i)->target == -1) { |
66 | numlines++; |
67 | if ((fsm+i)->target == -1) { |
68 | numlines--; |
69 | } |
70 | if (numlines > 1) { |
71 | /* Sort, set numlines = 0 */ |
72 | if (direction == 1) |
73 | qsort(fsm+lasthead, numlines, sizeof(struct fsm_state), scin); |
74 | else |
75 | qsort(fsm+lasthead, numlines, sizeof(struct fsm_state), scout); |
76 | } |
77 | numlines = 0; |
78 | lasthead = i + 1; |
79 | continue; |
80 | } |
81 | numlines++; |
82 | } |
83 | if (net->arity == 1) { |
84 | net->arcs_sorted_in = 1; |
85 | net->arcs_sorted_out = 1; |
86 | return; |
87 | } |
88 | if (direction == 1) { |
89 | net->arcs_sorted_in = 1; |
90 | net->arcs_sorted_out = 0; |
91 | } |
92 | if (direction == 2) { |
93 | net->arcs_sorted_out = 1; |
94 | net->arcs_sorted_in = 0; |
95 | } |
96 | } |
97 | |
98 | struct state_array *map_firstlines(struct fsm *net) { |
99 | struct fsm_state *fsm; |
100 | struct state_array *sa; |
101 | int i, sold; |
102 | sold = -1; |
103 | sa = malloc(sizeof(struct state_array)*((net->statecount)+1)); |
104 | fsm = net->states; |
105 | for (i=0; (fsm+i)->state_no != -1; i++) { |
106 | if ((fsm+i)->state_no != sold) { |
107 | (sa+((fsm+i)->state_no))->transitions = fsm+i; |
108 | sold = (fsm+i)->state_no; |
109 | } |
110 | } |
111 | return(sa); |
112 | } |
113 | |
114 | struct fsm *fsm_boolean(int value) { |
115 | if (value == 0) |
116 | return (fsm_empty_set()); |
117 | else |
118 | return(fsm_empty_string()); |
119 | } |
120 | |
121 | struct fsm *fsm_sigma_net(struct fsm *net) { |
122 | /* Extract sigma and create net with one arc */ |
123 | /* from state 0 to state 1 with each (state 1 is final) */ |
124 | struct sigma *sig; |
125 | int pathcount; |
126 | |
127 | if (sigma_size(net->sigma) == 0) { |
128 | fsm_destroy(net); |
129 | return(fsm_empty_set()); |
130 | } |
131 | |
132 | fsm_state_init(sigma_max(net->sigma)); |
133 | fsm_state_set_current_state(0, 0, 1); |
134 | pathcount = 0; |
135 | for (sig = net->sigma; sig != NULL((void*)0); sig = sig->next) { |
136 | if (sig->number >=3 || sig->number == IDENTITY2) { |
137 | pathcount++; |
138 | fsm_state_add_arc(0,sig->number, sig->number, 1, 0, 1); |
139 | } |
140 | } |
141 | fsm_state_end_state(); |
142 | fsm_state_set_current_state(1, 1, 0); |
143 | fsm_state_end_state(); |
144 | free(net->states); |
145 | fsm_state_close(net); |
146 | net->is_minimized = YES1; |
147 | net->is_loop_free = YES1; |
148 | net->pathcount = pathcount; |
149 | sigma_cleanup(net, 1); |
150 | return(net); |
151 | } |
152 | |
153 | struct fsm *fsm_sigma_pairs_net(struct fsm *net) { |
154 | /* Create FSM of attested pairs */ |
155 | struct fsm_state *fsm; |
156 | char *pairs; |
157 | short int in, out; |
158 | int i, pathcount, smax; |
159 | |
160 | smax = sigma_max(net->sigma)+1; |
161 | pairs = calloc(smax*smax, sizeof(char)); |
162 | |
163 | fsm_state_init(sigma_max(net->sigma)); |
164 | fsm_state_set_current_state(0, 0, 1); |
165 | pathcount = 0; |
166 | for (fsm = net->states, i=0; (fsm+i)->state_no != -1; i++) { |
167 | if ((fsm+i)->target == -1) |
168 | continue; |
169 | in = (fsm+i)->in; |
170 | out = (fsm+i)->out; |
171 | if (*(pairs+smax*in+out) == 0) { |
172 | fsm_state_add_arc(0,in,out, 1, 0, 1); |
173 | *(pairs+smax*in+out) = 1; |
174 | pathcount++; |
175 | } |
176 | } |
177 | fsm_state_end_state(); |
178 | fsm_state_set_current_state(1, 1, 0); |
179 | fsm_state_end_state(); |
180 | |
181 | free(pairs); |
182 | free(net->states); |
183 | |
184 | fsm_state_close(net); |
185 | if (pathcount == 0) { |
186 | fsm_destroy(net); |
187 | return(fsm_empty_set()); |
188 | } |
189 | net->is_minimized = YES1; |
190 | net->is_loop_free = YES1; |
191 | net->pathcount = pathcount; |
192 | sigma_cleanup(net, 1); |
193 | return(net); |
194 | } |
195 | |
196 | int fsm_sigma_destroy(struct sigma *sigma) { |
197 | struct sigma *sig, *sigp; |
198 | for (sig = sigma, sigp = NULL((void*)0); sig != NULL((void*)0); sig = sigp) { |
199 | sigp = sig->next; |
200 | if (sig->symbol != NULL((void*)0)) { |
201 | free(sig->symbol); |
202 | sig->symbol = NULL((void*)0); |
203 | } |
204 | free(sig); |
205 | } |
206 | return 1; |
207 | } |
208 | |
209 | int fsm_destroy(struct fsm *net) { |
210 | if (net == NULL((void*)0)) { |
211 | return 0; |
212 | } |
213 | if (net->medlookup != NULL((void*)0) && net->medlookup->confusion_matrix != NULL((void*)0)) { |
214 | free(net->medlookup->confusion_matrix); |
215 | net->medlookup->confusion_matrix = NULL((void*)0); |
216 | } |
217 | if (net->medlookup != NULL((void*)0)) { |
218 | free(net->medlookup); |
219 | net->medlookup = NULL((void*)0); |
220 | } |
221 | fsm_sigma_destroy(net->sigma); |
222 | net->sigma = NULL((void*)0); |
223 | if (net->states != NULL((void*)0)) { |
224 | free(net->states); |
225 | net->states = NULL((void*)0); |
226 | } |
227 | free(net); |
228 | return(1); |
229 | } |
230 | |
231 | struct fsm *fsm_create (char *name) { |
232 | if (strlen(name) > FSM_NAME_LEN40) { |
233 | printf("Network name '%s' should consist of at most %d characters.\n", name, FSM_NAME_LEN40); |
234 | } |
235 | struct fsm *fsm; |
236 | fsm = malloc(sizeof(struct fsm)); |
237 | strncpy(fsm->name, name, FSM_NAME_LEN40); |
238 | fsm->arity = 1; |
239 | fsm->arccount = 0; |
240 | fsm->is_deterministic = NO0; |
241 | fsm->is_pruned = NO0; |
242 | fsm->is_minimized = NO0; |
243 | fsm->is_epsilon_free = NO0; |
244 | fsm->is_loop_free = NO0; |
245 | fsm->arcs_sorted_in = NO0; |
246 | fsm->arcs_sorted_out = NO0; |
247 | fsm->sigma = sigma_create(); |
248 | fsm->states = NULL((void*)0); |
249 | fsm->medlookup = NULL((void*)0); |
250 | return(fsm); |
251 | } |
252 | |
253 | struct fsm *fsm_empty_string() { |
254 | struct fsm *net; |
255 | net = fsm_create(""); |
256 | net->states = malloc(sizeof(struct fsm_state)*2); |
257 | add_fsm_arc(net->states, 0, 0, -1, -1, -1, 1, 1); |
258 | add_fsm_arc(net->states, 1, -1, -1, -1, -1, -1, -1); |
259 | fsm_update_flags(net,YES1,YES1,YES1,YES1,YES1,NO0); |
260 | net->statecount = 1; |
261 | net->finalcount = 1; |
262 | net->arccount = 0; |
263 | net->linecount = 2; |
264 | net->pathcount = 1; |
265 | return(net); |
266 | } |
267 | |
268 | struct fsm *fsm_identity() { |
269 | struct fsm *net; |
270 | struct sigma *sigma; |
271 | net = fsm_create(""); |
272 | free(net->sigma); |
273 | net->states = malloc(sizeof(struct fsm_state)*3); |
274 | add_fsm_arc(net->states, 0, 0, 2, 2, 1, 0, 1); |
275 | add_fsm_arc(net->states, 1, 1, -1, -1, -1, 1, 0); |
276 | add_fsm_arc(net->states, 2, -1, -1, -1, -1, -1, -1); |
277 | sigma = malloc(sizeof(struct sigma)); |
278 | sigma->number = IDENTITY2; |
279 | sigma->symbol = strdup("@_IDENTITY_SYMBOL_@"); |
280 | sigma->next = NULL((void*)0); |
281 | net->sigma = sigma; |
282 | fsm_update_flags(net,YES1,YES1,YES1,YES1,YES1,NO0); |
283 | net->statecount = 2; |
284 | net->finalcount = 1; |
285 | net->arccount = 1; |
286 | net->linecount = 3; |
287 | net->pathcount = 1; |
288 | return(net); |
289 | } |
290 | |
291 | struct fsm *fsm_empty_set() { |
292 | struct fsm *net; |
293 | net = fsm_create(""); |
294 | net->states = fsm_empty(); |
295 | fsm_update_flags(net,YES1,YES1,YES1,YES1,YES1,NO0); |
296 | net->statecount = 1; |
297 | net->finalcount = 0; |
298 | net->arccount = 0; |
299 | net->linecount = 2; |
300 | net->pathcount = 0; |
301 | return(net); |
302 | } |
303 | |
304 | struct fsm_state *fsm_empty() { |
305 | struct fsm_state *new_fsm; |
306 | new_fsm = malloc(sizeof(struct fsm_state)*2); |
307 | add_fsm_arc(new_fsm, 0, 0, -1, -1, -1, 0, 1); |
308 | add_fsm_arc(new_fsm, 1, -1, -1, -1, -1, -1, -1); |
309 | return(new_fsm); |
310 | } |
311 | |
312 | int fsm_isuniversal(struct fsm *net) { |
313 | struct fsm_state *fsm; |
314 | net = fsm_minimize(net); |
315 | fsm_compact(net); |
316 | fsm = net->states; |
317 | if ((fsm->target == 0 && fsm->final_state == 1 && (fsm+1)->state_no == 0) && |
318 | (fsm->in == IDENTITY2 && fsm->out == IDENTITY2) && |
319 | ((fsm+1)->state_no == -1) && |
320 | (sigma_max(net->sigma)<3) ) { |
321 | return 1; |
322 | } else { |
323 | return 0; |
324 | } |
325 | } |
326 | |
327 | int fsm_isempty(struct fsm *net) { |
328 | int result; |
329 | struct fsm *minimal = fsm_minimize(fsm_copy(net)); |
330 | struct fsm_state *fsm = minimal->states; |
331 | if (fsm->target == -1 && fsm->final_state == 0 && (fsm+1)->state_no == -1) |
332 | result = 1; |
333 | else |
334 | result = 0; |
335 | fsm_destroy(minimal); |
336 | return result; |
337 | } |
338 | |
339 | int fsm_issequential(struct fsm *net) { |
340 | int i, *sigtable, sequential, seentrans, epstrans, laststate, insym; |
341 | struct fsm_state *fsm; |
342 | sigtable = calloc(sigma_max(net->sigma)+1,sizeof(int)); |
343 | for (i = 0 ; i < sigma_max(net->sigma)+1; i++) { |
344 | sigtable[i] = -2; |
345 | } |
346 | fsm = net->states; |
347 | seentrans = epstrans = 0; |
348 | laststate = -1; |
349 | for (sequential = 1, i = 0; (fsm+i)->state_no != -1 ; i++) { |
350 | insym = (fsm+i)->in; |
351 | if (insym < 0) { |
352 | continue; |
353 | } |
354 | if ((fsm+i)->state_no != laststate) { |
355 | laststate = (fsm+i)->state_no; |
356 | epstrans = 0; |
357 | seentrans = 0; |
358 | } |
359 | if (*(sigtable+insym) == laststate || epstrans == 1) { |
360 | sequential = 0; |
361 | break; |
362 | } |
363 | if (insym == EPSILON0) { |
364 | if (epstrans == 1 || seentrans == 1) { |
365 | sequential = 0; |
366 | break; |
367 | } |
368 | epstrans = 1; |
369 | } |
370 | *(sigtable+insym) = laststate; |
371 | seentrans = 1; |
372 | } |
373 | free(sigtable); |
374 | if (!sequential) |
375 | printf("fails at state %i\n",(fsm+i)->state_no); |
376 | return(sequential); |
377 | } |
378 | |
379 | int fsm_isfunctional(struct fsm *net) { |
380 | struct fsm *tmp = fsm_minimize(fsm_compose(fsm_invert(fsm_copy(net)),fsm_copy(net))); |
381 | int result = fsm_isidentity(tmp); |
382 | fsm_destroy(tmp); |
383 | return result; |
384 | } |
385 | |
386 | int fsm_isunambiguous(struct fsm *net) { |
387 | struct fsm *loweruniqnet, *testnet; |
388 | int ret; |
389 | loweruniqnet = fsm_lowerdet(fsm_copy(net)); |
390 | testnet = fsm_minimize(fsm_compose(fsm_invert(fsm_copy(loweruniqnet)),fsm_copy(loweruniqnet))); |
391 | ret = fsm_isidentity(testnet); |
392 | fsm_destroy(loweruniqnet); |
393 | fsm_destroy(testnet); |
394 | return(ret); |
395 | } |
396 | |
397 | struct fsm *fsm_extract_ambiguous_domain(struct fsm *net) { |
398 | // define AmbiguousDom(T) [_loweruniq(T) .o. _notid(_loweruniq(T).i .o. _loweruniq(T))].u; |
399 | struct fsm *loweruniqnet, *result; |
400 | loweruniqnet = fsm_lowerdet(net); |
401 | result = fsm_topsort(fsm_minimize(fsm_upper(fsm_compose(fsm_copy(loweruniqnet),fsm_extract_nonidentity(fsm_compose(fsm_invert(fsm_copy(loweruniqnet)), fsm_copy(loweruniqnet))))))); |
402 | fsm_destroy(loweruniqnet); |
403 | sigma_cleanup(result,1); |
404 | fsm_compact(result); |
405 | sigma_sort(result); |
406 | return(result); |
407 | } |
408 | |
409 | struct fsm *fsm_extract_ambiguous(struct fsm *net) { |
410 | struct fsm *result; |
411 | result = fsm_topsort(fsm_minimize(fsm_compose(fsm_extract_ambiguous_domain(fsm_copy(net)), net))); |
412 | return(result); |
413 | } |
414 | |
415 | struct fsm *fsm_extract_unambiguous(struct fsm *net) { |
416 | struct fsm *result; |
417 | result = fsm_topsort(fsm_minimize(fsm_compose(fsm_complement(fsm_extract_ambiguous_domain(fsm_copy(net))), net))); |
418 | return(result); |
419 | } |
420 | |
421 | int fsm_isidentity(struct fsm *net) { |
422 | |
423 | /* We check whether a given transducer only produces identity relations */ |
424 | /* By doing a DFS on the graph, and storing, for each state a "discrepancy" */ |
425 | /* string, showing the current "debt" on the upper or lower side. */ |
426 | /* We immediately fail if: */ |
427 | /* a) we encounter an already seen state with a different current */ |
428 | /* discrepancy than what is stored in the state. */ |
429 | /* b) when traversing an arc, we encounter a mismatch between the arc and */ |
430 | /* the current discrepancy. */ |
431 | /* c) we encounter a final state and have a non-null current discrepancy. */ |
432 | /* d) we encounter @ with a non-null discrepancy anywhere. */ |
433 | /* e) we encounter ? anywhere. */ |
434 | |
435 | struct discrepancy { |
436 | short int *string; |
437 | short int length; |
438 | _Bool visited; |
439 | }; |
440 | |
441 | struct state_array *state_array; |
442 | struct fsm_state *curr_ptr; |
443 | int i, j, v, vp, num_states, factor = 0, newlength = 1, startfrom; |
444 | short int in, out, *newstring = NULL((void*)0); |
445 | struct discrepancy *discrepancy, *currd, *targetd; |
446 | struct fsm *tmp; |
447 | |
448 | tmp = fsm_minimize(fsm_copy(net)); |
449 | fsm_count(tmp); |
450 | |
451 | num_states = tmp->statecount; |
452 | discrepancy = calloc(num_states,sizeof(struct discrepancy)); |
453 | state_array = map_firstlines(tmp); |
454 | ptr_stack_clear(); |
455 | ptr_stack_push(state_array->transitions); |
456 | |
457 | while(!ptr_stack_isempty()) { |
458 | |
459 | curr_ptr = ptr_stack_pop(); |
460 | |
461 | nopop: |
462 | v = curr_ptr->state_no; /* source state number */ |
463 | vp = curr_ptr->target; /* target state number */ |
464 | currd = discrepancy+v; |
465 | if (v != -1) |
466 | currd->visited = 1; |
467 | if (v == -1 || vp == -1) |
468 | continue; |
469 | in = curr_ptr->in; |
470 | out = curr_ptr->out; |
471 | |
472 | targetd = discrepancy+vp; |
473 | /* Check arc and conditions e) d) b) */ |
474 | /* e) */ |
475 | if (in == UNKNOWN1 || out == UNKNOWN1) |
476 | goto fail; |
477 | /* d) */ |
478 | if (in == IDENTITY2 && currd->length != 0) |
479 | goto fail; |
480 | /* b) */ |
481 | if (currd->length != 0) { |
482 | if (currd->length > 0 && out != EPSILON0 && out != *(currd->string)) |
483 | goto fail; |
484 | if (currd->length < 0 && in != EPSILON0 && in != *(currd->string)) |
485 | goto fail; |
486 | } |
487 | if (currd->length == 0 && in != out && in != EPSILON0 && out != EPSILON0) { |
488 | goto fail; |
489 | } |
490 | |
491 | /* Calculate new discrepancy */ |
492 | if (currd->length != 0) { |
493 | if (in != EPSILON0 && out != EPSILON0) |
494 | factor = 0; |
495 | else if (in == EPSILON0) |
496 | factor = -1; |
497 | else if (out == EPSILON0) |
498 | factor = 1; |
499 | |
500 | newlength = currd->length + factor; |
501 | startfrom = (abs(newlength) <= abs(currd->length)) ? 1 : 0; |
502 | |
503 | } else if (currd->length == 0) { |
504 | if (in != EPSILON0 && out != EPSILON0) { |
505 | newlength = 0; |
506 | } else { |
507 | newlength = (out == EPSILON0) ? 1 : -1; |
508 | } |
509 | startfrom = 0; |
510 | } |
511 | |
512 | if (newstring != NULL((void*)0)) { |
513 | free(newstring); |
514 | newstring = NULL((void*)0); |
515 | } |
516 | newstring = calloc(abs(newlength),sizeof(int)); |
517 | |
518 | for (i = startfrom, j = 0; i < abs(currd->length); i++, j++) { |
519 | *(newstring+j) = *((currd->string)+i); |
520 | } |
521 | if (newlength != 0) { |
522 | if (currd->length > 0 && newlength >= currd->length) { |
523 | *(newstring+j) = in; |
524 | } |
525 | if (currd->length < 0 && newlength <= currd->length) { |
526 | *(newstring+j) = out; |
527 | } |
528 | if (currd->length == 0 && newlength < currd->length) { |
529 | *(newstring+j) = out; |
530 | } |
531 | if (currd->length == 0 && newlength > currd->length) { |
532 | *(newstring+j) = in; |
533 | } |
534 | } |
535 | |
536 | /* Check target conditions a) c) */ |
537 | /* a) */ |
538 | if (((state_array+vp)->transitions)->final_state && newlength != 0) |
539 | goto fail; |
540 | if (curr_ptr->state_no == (curr_ptr+1)->state_no) { |
541 | ptr_stack_push(curr_ptr+1); |
542 | } |
543 | if ((discrepancy+vp)->visited) { |
544 | //free(newstring); |
545 | if (targetd->length != newlength) |
546 | goto fail; |
547 | for (i=0 ; i < abs(newlength); i++) { |
548 | if (*((targetd->string)+i) != *(newstring+i)) |
549 | goto fail; |
550 | } |
551 | } else { |
552 | /* Add discrepancy to target state */ |
553 | targetd->length = newlength; |
554 | targetd->string = newstring; |
555 | curr_ptr = (state_array+vp)->transitions; |
556 | goto nopop; |
557 | } |
558 | } |
559 | free(state_array); |
560 | free(discrepancy); |
561 | fsm_destroy(tmp); |
562 | if (newstring != NULL((void*)0)) |
563 | free(newstring); |
564 | return 1; |
565 | fail: |
566 | free(state_array); |
567 | free(discrepancy); |
568 | ptr_stack_clear(); |
569 | fsm_destroy(tmp); |
570 | if (newstring != NULL((void*)0)) |
571 | free(newstring); |
572 | return 0; |
573 | } |
574 | |
575 | struct fsm *fsm_markallfinal(struct fsm *net) { |
576 | struct fsm_state *fsm; |
577 | int i; |
578 | fsm = net->states; |
579 | for (i=0; (fsm+i)->state_no != -1; i++) { |
580 | (fsm+i)->final_state = YES1; |
581 | } |
582 | return net; |
583 | } |
584 | |
585 | struct fsm *fsm_lowerdet(struct fsm *net) { |
586 | unsigned int newsym; /* Running number for new syms */ |
587 | struct fsm_state *fsm; |
588 | char repstr[13]; |
589 | int i,j,maxsigma,maxarc; |
590 | net = fsm_minimize(net); |
591 | fsm_count(net); |
592 | newsym = 8723643; |
593 | fsm = net->states; |
594 | maxarc = 0; |
595 | maxsigma = sigma_max(net->sigma); |
596 | |
597 | for (i=0, j=0; (fsm+i)->state_no != -1; i++) { |
598 | if ((fsm+i)->target != -1) |
599 | j++; |
600 | if ((fsm+i+1)->state_no != (fsm+i)->state_no) { |
601 | maxarc = maxarc > j ? maxarc : j; |
602 | j = 0; |
603 | } |
604 | } |
605 | if (maxarc > (maxsigma-2)) { |
606 | for (i=maxarc; i > (maxsigma-2); i--) { |
607 | sprintf(repstr,"%012X",newsym++); |
608 | sigma_add(repstr, net->sigma); |
609 | } |
610 | sigma_sort(net); |
611 | } |
612 | for (i=0, j=3; (fsm+i)->state_no != -1; i++) { |
613 | if ((fsm+i)->target != -1) { |
614 | (fsm+i)->out = j++; |
615 | (fsm+i)->in = ((fsm+i)->in == IDENTITY2) ? UNKNOWN1 : (fsm+i)->in; |
616 | } |
617 | if ((fsm+i+1)->state_no != (fsm+i)->state_no) { |
618 | j = 3; |
619 | } |
620 | } |
621 | return(net); |
622 | } |
623 | |
624 | struct fsm *fsm_lowerdeteps(struct fsm *net) { |
625 | unsigned int newsym; /* Running number for new syms */ |
626 | struct fsm_state *fsm; |
627 | char repstr[13]; |
628 | int i,j,maxsigma,maxarc; |
629 | net = fsm_minimize(net); |
630 | fsm_count(net); |
631 | newsym = 8723643; |
632 | fsm = net->states; |
633 | maxarc = 0; |
634 | maxsigma = sigma_max(net->sigma); |
635 | |
636 | for (i=0, j=0; (fsm+i)->state_no != -1; i++) { |
637 | if ((fsm+i)->target != -1) |
638 | j++; |
639 | if ((fsm+i+1)->state_no != (fsm+i)->state_no) { |
640 | maxarc = maxarc > j ? maxarc : j; |
641 | j = 0; |
642 | } |
643 | } |
644 | if (maxarc > (maxsigma-2)) { |
645 | for (i=maxarc; i > (maxsigma-2); i--) { |
646 | sprintf(repstr,"%012X",newsym++); |
647 | sigma_add(repstr, net->sigma); |
648 | } |
649 | sigma_sort(net); |
650 | } |
651 | for (i=0, j=3; (fsm+i)->state_no != -1; i++) { |
652 | if ((fsm+i)->target != -1 && (fsm+i)->out != EPSILON0) { |
653 | (fsm+i)->out = j++; |
654 | (fsm+i)->in = ((fsm+i)->in == IDENTITY2) ? UNKNOWN1 : (fsm+i)->in; |
655 | } |
656 | if ((fsm+i+1)->state_no != (fsm+i)->state_no) { |
657 | j = 3; |
658 | } |
659 | } |
660 | return(net); |
661 | } |
662 | |
663 | struct fsm *fsm_extract_nonidentity(struct fsm *net) { |
664 | |
665 | /* Same algorithm as for test identity, except we mark the arcs that cause nonidentity */ |
666 | /* Experimental. */ |
667 | |
668 | struct discrepancy { |
669 | short int *string; |
670 | short int length; |
671 | _Bool visited; |
672 | }; |
673 | |
674 | struct state_array *state_array; |
675 | struct fsm_state *curr_ptr; |
676 | struct fsm *net2; |
677 | int i, j, v, vp, num_states, factor = 0, newlength = 1, startfrom, killnum; |
678 | short int in, out, *newstring; |
679 | struct discrepancy *discrepancy, *currd, *targetd; |
680 | |
681 | fsm_minimize(net); |
682 | fsm_count(net); |
683 | killnum = sigma_add("@KILL@", net->sigma); |
684 | |
685 | num_states = net->statecount; |
686 | discrepancy = calloc(num_states,sizeof(struct discrepancy)); |
687 | state_array = map_firstlines(net); |
688 | ptr_stack_push(state_array->transitions); |
689 | |
690 | while(!ptr_stack_isempty()) { |
691 | |
692 | curr_ptr = ptr_stack_pop(); |
693 | |
694 | nopop: |
695 | v = curr_ptr->state_no; /* source state number */ |
696 | vp = curr_ptr->target; /* target state number */ |
697 | currd = discrepancy+v; |
698 | if (v != -1) |
699 | currd->visited = 1; |
700 | if (v == -1 || vp == -1) |
701 | continue; |
702 | in = curr_ptr->in; |
703 | out = curr_ptr->out; |
704 | |
705 | targetd = discrepancy+vp; |
706 | /* Check arc and conditions e) d) b) */ |
707 | /* e) */ |
708 | if (in == UNKNOWN1 || out == UNKNOWN1) |
709 | goto fail; |
710 | /* d) */ |
711 | if (in == IDENTITY2 && currd->length != 0) |
712 | goto fail; |
713 | /* b) */ |
714 | if (currd->length != 0) { |
715 | if (currd->length > 0 && out != EPSILON0 && out != *(currd->string)) |
716 | goto fail; |
717 | if (currd->length < 0 && in != EPSILON0 && in != *(currd->string)) |
718 | goto fail; |
719 | } |
720 | if (currd->length == 0 && in != out && in != EPSILON0 && out != EPSILON0) { |
721 | goto fail; |
722 | } |
723 | |
724 | /* Calculate new discrepancy */ |
725 | if (currd->length != 0) { |
726 | if (in != EPSILON0 && out != EPSILON0) |
727 | factor = 0; |
728 | else if (in == EPSILON0) |
729 | factor = -1; |
730 | else if (out == EPSILON0) |
731 | factor = 1; |
732 | |
733 | newlength = currd->length + factor; |
734 | startfrom = (abs(newlength) <= abs(currd->length)) ? 1 : 0; |
735 | |
736 | } else if (currd->length == 0) { |
737 | if (in != EPSILON0 && out != EPSILON0) { |
738 | newlength = 0; |
739 | } else { |
740 | newlength = (out == EPSILON0) ? 1 : -1; |
741 | } |
742 | startfrom = 0; |
743 | } |
744 | |
745 | newstring = calloc(abs(newlength),sizeof(int)); |
Result of 'calloc' is converted to a pointer of type 'short', which is incompatible with sizeof operand type 'int' | |
746 | |
747 | for (i = startfrom, j = 0; i < abs(currd->length); i++, j++) { |
748 | *(newstring+j) = *((currd->string)+i); |
749 | } |
750 | if (newlength != 0) { |
751 | if (currd->length > 0 && newlength >= currd->length) { |
752 | *(newstring+j) = in; |
753 | } |
754 | if (currd->length < 0 && newlength <= currd->length) { |
755 | *(newstring+j) = out; |
756 | } |
757 | if (currd->length == 0 && newlength < currd->length) { |
758 | *(newstring+j) = out; |
759 | } |
760 | if (currd->length == 0 && newlength > currd->length) { |
761 | *(newstring+j) = in; |
762 | } |
763 | |
764 | } |
765 | |
766 | /* Check target conditions a) c) */ |
767 | /* a) */ |
768 | if (((state_array+vp)->transitions)->final_state && newlength != 0) |
769 | goto fail; |
770 | if (curr_ptr->state_no == (curr_ptr+1)->state_no) { |
771 | ptr_stack_push(curr_ptr+1); |
772 | } |
773 | |
774 | if ((discrepancy+vp)->visited) { |
775 | //free(newstring); |
776 | if (targetd->length != newlength) |
777 | goto fail; |
778 | for (i=0 ; i < abs(newlength); i++) { |
779 | if (*((targetd->string)+i) != *(newstring+i)) |
780 | goto fail; |
781 | } |
782 | } else { |
783 | /* Add discrepancy to target state */ |
784 | targetd->length = newlength; |
785 | targetd->string = newstring; |
786 | curr_ptr = (state_array+vp)->transitions; |
787 | goto nopop; |
788 | } |
789 | continue; |
790 | fail: |
791 | curr_ptr->out = killnum; |
792 | if (curr_ptr->state_no == (curr_ptr+1)->state_no) { |
793 | ptr_stack_push(curr_ptr+1); |
794 | } |
795 | } |
796 | ptr_stack_clear(); |
797 | sigma_sort(net); |
798 | net2 = fsm_upper(fsm_compose(net,fsm_contains(fsm_symbol("@KILL@")))); |
799 | sigma_remove("@KILL@",net2->sigma); |
800 | sigma_sort(net2); |
801 | free(state_array); |
802 | free(discrepancy); |
803 | return(net2); |
804 | } |
805 | |
806 | struct fsm *fsm_copy (struct fsm *net) { |
807 | struct fsm *net_copy; |
808 | if (net == NULL((void*)0)) |
809 | return net; |
810 | |
811 | net_copy = malloc(sizeof(struct fsm)); |
812 | memcpy(net_copy, net, sizeof(struct fsm)); |
813 | |
814 | fsm_count(net); |
815 | net_copy->sigma = sigma_copy(net->sigma); |
816 | net_copy->states = fsm_state_copy(net->states, net->linecount); |
817 | return(net_copy); |
818 | } |
819 | |
820 | struct fsm_state *fsm_state_copy(struct fsm_state *fsm_state, int linecount) { |
821 | struct fsm_state *new_fsm_state; |
822 | |
823 | new_fsm_state = malloc(sizeof(struct fsm_state)*(linecount)); |
824 | memcpy(new_fsm_state, fsm_state, linecount*sizeof(struct fsm_state)); |
825 | return(new_fsm_state); |
826 | } |
827 | |
828 | /* TODO: separate linecount and arccount */ |
829 | int find_arccount(struct fsm_state *fsm) { |
830 | int i; |
831 | for (i=0;(fsm+i)->state_no != -1; i++) { |
832 | } |
833 | return i; |
834 | } |
835 | |
836 | void clear_quantifiers() { |
837 | quantifiers = NULL((void*)0); |
838 | } |
839 | |
840 | int count_quantifiers() { |
841 | struct defined_quantifiers *q; |
842 | int i; |
843 | for (i = 0, q = quantifiers; q != NULL((void*)0); q = q->next) { |
844 | i++; |
845 | } |
846 | return(i); |
847 | } |
848 | |
849 | void add_quantifier (char *string) { |
850 | struct defined_quantifiers *q; |
851 | if (quantifiers == NULL((void*)0)) { |
852 | q = malloc(sizeof(struct defined_quantifiers)); |
853 | quantifiers = q; |
854 | } else { |
855 | for (q = quantifiers; q->next != NULL((void*)0); q = q->next) { |
856 | |
857 | } |
858 | q->next = malloc(sizeof(struct defined_quantifiers)); |
859 | q = q->next; |
860 | } |
861 | q->name = strdup(string); |
862 | q->next = NULL((void*)0); |
863 | } |
864 | |
865 | struct fsm *union_quantifiers() { |
866 | /* We create a FSM that simply accepts the union of all */ |
867 | /* quantifier symbols */ |
868 | |
869 | struct fsm *net; |
870 | struct defined_quantifiers *q; |
871 | int i, syms, s, symlo; |
872 | |
873 | net = fsm_create(""); |
874 | fsm_update_flags(net, YES1, YES1, YES1, YES1, NO0, NO0); |
875 | |
876 | for (syms = 0, symlo = 0, q = quantifiers; q != NULL((void*)0); q = q->next) { |
877 | s = sigma_add(q->name, net->sigma); |
878 | if (symlo == 0) { |
879 | symlo = s; |
880 | } |
881 | syms++; |
882 | } |
883 | net->states = malloc(sizeof(struct fsm_state)*(syms+1)); |
884 | for (i = 0; i < syms; i++) { |
885 | add_fsm_arc(net->states, i, 0, symlo+i, symlo+i, 0, 1, 1); |
886 | } |
887 | add_fsm_arc(net->states, i, -1, -1, -1, -1 ,-1 ,-1); |
888 | net->arccount = syms; |
889 | net->statecount = net->finalcount = 1; |
890 | net->linecount = syms; |
891 | return(net); |
892 | } |
893 | |
894 | char *find_quantifier (char *string) { |
895 | struct defined_quantifiers *q; |
896 | for (q = quantifiers; q != NULL((void*)0); q = q->next) { |
897 | if (strcmp(string,q->name) == 0) |
898 | return q->name; |
899 | } |
900 | return(NULL((void*)0)); |
901 | } |
902 | |
903 | void purge_quantifier (char *string) { |
904 | struct defined_quantifiers *q, *q_prev; |
905 | for (q = quantifiers, q_prev = NULL((void*)0); q != NULL((void*)0); q_prev = q, q = q->next) { |
906 | if (strcmp(string, q->name) == 0) { |
907 | if (q_prev != NULL((void*)0)) { |
908 | q_prev->next = q->next; |
909 | } else { |
910 | quantifiers = q->next; |
911 | } |
912 | } |
913 | } |
914 | } |
915 | |
916 | struct fsm *fsm_quantifier(char *string) { |
917 | |
918 | /* \x* x \x* x \x* */ |
919 | return(fsm_concat(fsm_kleene_star(fsm_term_negation(fsm_symbol(string))),fsm_concat(fsm_symbol(string),fsm_concat(fsm_kleene_star(fsm_term_negation(fsm_symbol(string))),fsm_concat(fsm_symbol(string),fsm_kleene_star(fsm_term_negation(fsm_symbol(string)))))))); |
920 | |
921 | } |
922 | |
923 | struct fsm *fsm_logical_precedence(char *string1, char *string2) { |
924 | /* x < y = \y* x \y* [x | y Q* x] ?* */ |
925 | /* 1 2 3 4 5 */ |
926 | |
927 | return(fsm_concat(fsm_kleene_star(fsm_term_negation(fsm_symbol(string2))),fsm_concat(fsm_symbol(string1),fsm_concat(fsm_kleene_star(fsm_term_negation(fsm_symbol(string2))),fsm_concat(fsm_union(fsm_symbol(string1),fsm_concat(fsm_symbol(string2),fsm_concat(union_quantifiers(),fsm_symbol(string1)))),fsm_universal()))))); |
928 | |
929 | /* 1,3 fsm_kleene_star(fsm_term_negation(fsm_symbol(string2))) */ |
930 | /* 2 = fsm_symbol(string1) */ |
931 | /* 5 = fsm_universal() */ |
932 | /* 4 = fsm_union(fsm_symbol(string1),fsm_concat(fsm_symbol(string2),fsm_concat(union_quantifiers(),fsm_symbol(string1)))) */ |
933 | |
934 | } |
935 | |
936 | /** Logical equivalence, i.e. where two variables span the same substring */ |
937 | /** x = y = ?* [x y|y x]/Q ?* [x y|y x]/Q ?* */ |
938 | struct fsm *fsm_logical_eq(char *string1, char *string2) { |
939 | return(fsm_concat(fsm_universal(),fsm_concat(fsm_ignore(fsm_union(fsm_concat(fsm_symbol(string1),fsm_symbol(string2)),fsm_concat(fsm_symbol(string2),fsm_symbol(string1))),union_quantifiers(),OP_IGNORE_ALL1),fsm_concat(fsm_universal(),fsm_concat(fsm_ignore(fsm_union(fsm_concat(fsm_symbol(string1),fsm_symbol(string2)),fsm_concat(fsm_symbol(string2),fsm_symbol(string1))),union_quantifiers(),OP_IGNORE_ALL1),fsm_universal()))))); |
940 | } |
941 |