Bug Summary

File:io.c
Warning:line 601, column 5
Potential leak of memory pointed to by 'net_name'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name io.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -resource-dir /usr/lib/llvm-16/lib/clang/16 -D _GNU_SOURCE -I /tmp/build/foma/foma-0.10.0+g279~a2d32b38 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -std=c18 -fdebug-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -ferror-limit 19 -fvisibility=hidden -fgnuc-version=4.2.1 -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/foma/scan-build/2024-09-11-155945-2678-1 -x c /tmp/build/foma/foma-0.10.0+g279~a2d32b38/io.c
1/* Foma: a finite-state toolkit and library. */
2/* Copyright © 2008-2021 Mans Hulden */
3
4/* This file is part of foma. */
5
6/* Licensed under the Apache License, Version 2.0 (the "License"); */
7/* you may not use this file except in compliance with the License. */
8/* You may obtain a copy of the License at */
9
10/* http://www.apache.org/licenses/LICENSE-2.0 */
11
12/* Unless required by applicable law or agreed to in writing, software */
13/* distributed under the License is distributed on an "AS IS" BASIS, */
14/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
15/* See the License for the specific language governing permissions and */
16/* limitations under the License. */
17
18#include <stdio.h>
19#include <string.h>
20#include <stdlib.h>
21#include <stdarg.h>
22#include "foma.h"
23#include "zlib.h"
24
25#define TYPE_TRANSITION1 1
26#define TYPE_SYMBOL2 2
27#define TYPE_FINAL3 3
28#define TYPE_PROPERTY4 4
29#define TYPE_END5 5
30#define TYPE_ERROR6 6
31
32#define READ_BUF_SIZE4096 4096
33
34struct binaryline {
35 int type;
36 int state;
37 int in;
38 int target;
39 int out;
40 int symbol;
41 char *name;
42 char *value;
43};
44
45extern char *g_att_epsilon;
46
47struct io_buf_handle {
48 char *io_buf;
49 char *io_buf_ptr;
50};
51
52struct io_buf_handle *io_init();
53void io_free(struct io_buf_handle *iobh);
54static int io_gets(struct io_buf_handle *iobh, char *target);
55static size_t io_get_gz_file_size(char *filename);
56static size_t io_get_file_size(char *filename);
57static size_t io_get_regular_file_size(char *filename);
58size_t io_gz_file_to_mem (struct io_buf_handle *iobh, char *filename);
59int foma_net_print(struct fsm *net, gzFile outfile);
60struct fsm *io_net_read(struct io_buf_handle *iobh, char **net_name);
61static INLINEinline int explode_line (char *buf, int *values);
62
63
64void escape_print(FILE *stream, char* string) {
65 int i;
66 if (strchr(string, '"') != NULL((void*)0)) {
67 for (i = 0; *(string+i) != '\0'; i++) {
68 if (*(string+i) == '"') {
69 fprintf(stream, "\\\"");
70 } else {
71 fputc(*(string+i), stream);
72 }
73 }
74 } else {
75 fprintf(stream, "%s", string);
76 }
77}
78
79int foma_write_prolog (struct fsm *net, char *filename) {
80 struct fsm_state *stateptr;
81 int i, *finals, *used_symbols, maxsigma;
82 FILE *out;
83 char *outstring, *instring, identifier[100];
84
85 if (filename == NULL((void*)0)) {
86 out = stdoutstdout;
87 } else {
88 if ((out = fopen(filename, "w")) == NULL((void*)0)) {
89 printf("Error writing to file '%s'. Using stdout.\n", filename);
90 out = stdoutstdout;
91 }
92 printf("Writing prolog to file '%s'.\n", filename);
93 }
94 fsm_count(net);
95 maxsigma = sigma_max(net->sigma);
96 used_symbols = calloc(maxsigma+1,sizeof(int));
97 finals = malloc(sizeof(int)*(net->statecount));
98 stateptr = net->states;
99 identifier[0] = '\0';
100
101 strcpy(identifier, net->name);
102
103 /* Print identifier */
104 fprintf(out, "%s%s%s", "network(",identifier,").\n");
105
106 for (i=0; (stateptr+i)->state_no != -1; i++) {
107 if ((stateptr+i)->final_state == 1) {
108 *(finals+((stateptr+i)->state_no)) = 1;
109 } else {
110 *(finals+((stateptr+i)->state_no)) = 0;
111 }
112 if ((stateptr+i)->in != -1) {
113 *(used_symbols+((stateptr+i)->in)) = 1;
114 }
115 if ((stateptr+i)->out != -1) {
116 *(used_symbols+((stateptr+i)->out)) = 1;
117 }
118
119 }
120
121 for (i = 3; i <= maxsigma; i++) {
122 if (*(used_symbols+i) == 0) {
123 instring = sigma_string(i, net->sigma);
124 if (strcmp(instring,"0") == 0) {
125 instring = "%0";
126 }
127 fprintf(out, "symbol(%s, \"", identifier);
128 escape_print(out, instring);
129 fprintf(out, "\").\n");
130
131 }
132 }
133
134 for (; stateptr->state_no != -1; stateptr++) {
135 if (stateptr->target == -1)
136 continue;
137 fprintf(out, "arc(%s, %i, %i, ", identifier, stateptr->state_no, stateptr->target);
138 if (stateptr->in == 0) instring = "0";
139 else if (stateptr->in == 1) instring = "?";
140 else if (stateptr->in == 2) instring = "?";
141 else instring = sigma_string(stateptr->in, net->sigma);
142 if (stateptr->out == 0) outstring = "0";
143 else if (stateptr->out == 1) outstring = "?";
144 else if (stateptr->out == 2) outstring = "?";
145 else outstring = sigma_string(stateptr->out, net->sigma);
146
147 if (strcmp(instring,"0") == 0 && stateptr->in != 0) instring = "%0";
148 if (strcmp(outstring,"0") == 0 && stateptr->out != 0) outstring = "%0";
149 if (strcmp(instring,"?") == 0 && stateptr->in > 2) instring = "%?";
150 if (strcmp(outstring,"?") == 0 && stateptr->in > 2) outstring = "%?";
151 /* Escape quotes */
152
153 if (net->arity == 2 && stateptr->in == IDENTITY2 && stateptr->out == IDENTITY2) {
154 fprintf(out, "\"?\").\n");
155 }
156 else if (net->arity == 2 && stateptr->in == stateptr->out && stateptr->in != UNKNOWN1) {
157 fprintf(out, "\"");
158 escape_print(out, instring);
159 fprintf(out, "\").\n");
160 }
161 else if (net->arity == 2) {
162 fprintf(out, "\"");
163 escape_print(out, instring);
164 fprintf(out, "\":\"");
165 escape_print(out, outstring);
166 fprintf(out, "\").\n");
167 }
168 else if (net->arity == 1) {
169 fprintf(out, "\"");
170 escape_print(out, instring);
171 fprintf(out, "\").\n");
172 }
173 }
174
175 for (i = 0; i < net->statecount; i++) {
176 if (*(finals+i)) {
177 fprintf(out, "final(%s, %i).\n", identifier, i);
178 }
179 }
180 if (filename != NULL((void*)0)) {
181 fclose(out);
182 }
183 free(finals);
184 free(used_symbols);
185 return 1;
186}
187
188struct fsm *read_att(char *filename) {
189
190 struct fsm_construct_handle *h;
191 struct fsm *net;
192 int i;
193 char inword[1024], delimiters[] = "\t", *tokens[6];
194 FILE *INFILE;
195
196 INFILE = fopen(filename, "r");
197 if (INFILE == NULL((void*)0)) {
198 return(NULL((void*)0));
199 }
200
201 h = fsm_construct_init(filename);
202 while (fgets(inword, 1024, INFILE) != NULL((void*)0)) {
203 if (inword[strlen(inword)-1] == '\n') {
204 inword[strlen(inword)-1] = '\0';
205 }
206 tokens[0] = strtok(inword, delimiters);
207 i = 0;
208 if (tokens[0] != NULL((void*)0)) {
209 i = 1;
210 for ( ; ; ) {
211 tokens[i] = strtok(NULL((void*)0), delimiters);
212 if (tokens[i] == NULL((void*)0)) {
213 break;
214 }
215 i++;
216 if (i == 6)
217 break;
218 }
219 }
220 if (i == 0) { continue; }
221 if (i >= 4) {
222 if (strcmp(tokens[2],g_att_epsilon) == 0)
223 tokens[2] = "@_EPSILON_SYMBOL_@";
224 if (strcmp(tokens[3],g_att_epsilon) == 0)
225 tokens[3] = "@_EPSILON_SYMBOL_@";
226
227 fsm_construct_add_arc(h, atoi(tokens[0]), atoi(tokens[1]), tokens[2], tokens[3]);
228 }
229 else if (i <= 3 && i > 0) {
230 fsm_construct_set_final(h,atoi(tokens[0]));
231 }
232 }
233 fsm_construct_set_initial(h,0);
234 fclose(INFILE);
235 net = fsm_construct_done(h);
236 fsm_count(net);
237 net = fsm_topsort(net);
238 return(net);
239}
240
241struct fsm *fsm_read_prolog (char *filename) {
242 char buf [1024], temp [1024], in [128], out[128], *temp_ptr, *temp_ptr2;
243 int arity, source, target, has_net;
244 struct fsm *outnet;
245 struct fsm_construct_handle *outh = NULL((void*)0);
246 FILE *prolog_file;
247
248 has_net = 0;
249 prolog_file = fopen(filename, "r");
250 if (prolog_file == NULL((void*)0)) {
251 return NULL((void*)0);
252 }
253
254 while (fgets(buf, 1023, prolog_file) != NULL((void*)0)) {
255 if (strstr(buf, "network(") == buf) {
256 /* Extract network name */
257 if (has_net == 1) {
258 perror("WARNING: prolog file contains multiple nets. Only returning the first one.\n");
259 break;
260 } else {
261 has_net = 1;
262 }
263 temp_ptr = strstr(buf, "network(")+8;
264 temp_ptr2 = strstr(buf, ").");
265 strncpy(temp, temp_ptr, (temp_ptr2 - temp_ptr));
266 temp[(temp_ptr2-temp_ptr)] = '\0';
267
268 /* Start network */
269 outh = fsm_construct_init(temp);
270 }
271 if (strstr(buf, "final(") == buf) {
272 temp_ptr = strstr(buf, " ");
273 temp_ptr++;
274 temp_ptr2 = strstr(temp_ptr, ").");
275 strncpy(temp, temp_ptr, (temp_ptr2 - temp_ptr));
276 temp[(temp_ptr2-temp_ptr)] = '\0';
277
278 fsm_construct_set_final(outh, atoi(temp));
279 }
280 if (strstr(buf, "symbol(") == buf) {
281 temp_ptr = strstr(buf, ", \"")+3;
282 temp_ptr2 = strstr(temp_ptr, "\").");
283 strncpy(temp, temp_ptr, (temp_ptr2 - temp_ptr));
284 temp[(temp_ptr2-temp_ptr)] = '\0';
285 if (strcmp(temp, "%0") == 0)
286 strcpy(temp, "0");
287 //printf("special: %s\n",temp);
288
289 if (fsm_construct_check_symbol(outh, temp) == -1) {
290 fsm_construct_add_symbol(outh, temp);
291 }
292 continue;
293 }
294 if (strstr(buf, "arc(") == buf) {
295 in[0] = '\0';
296 out[0] = '\0';
297
298 if (strstr(buf, "\":\"") == NULL((void*)0) || strstr(buf, ", \":\").") != NULL((void*)0)) {
299 arity = 1;
300 } else {
301 arity = 2;
302 }
303
304 /* Get source */
305 temp_ptr = strstr(buf, " ");
306 temp_ptr++;
307 temp_ptr2 = strstr(temp_ptr, ",");
308 strncpy(temp, temp_ptr, (temp_ptr2 - temp_ptr));
309 temp[(temp_ptr2-temp_ptr)] = '\0';
310 source = atoi(temp);
311
312 /* Get target */
313 temp_ptr = strstr(temp_ptr2, " ");
314 temp_ptr++;
315 temp_ptr2 = strstr(temp_ptr, ",");
316 strncpy(temp, temp_ptr, (temp_ptr2 - temp_ptr));
317 temp[(temp_ptr2-temp_ptr)] = '\0';
318 target = atoi(temp);
319
320 temp_ptr = strstr(temp_ptr2, "\"");
321 temp_ptr++;
322 if (arity == 2) {
323 temp_ptr2 = strstr(temp_ptr, "\":");
324 } else {
325 temp_ptr2 = strstr(temp_ptr, "\").");
326 }
327 strncpy(in, temp_ptr, (temp_ptr2 - temp_ptr));
328 in[(temp_ptr2 - temp_ptr)] = '\0';
329
330 if (arity == 2) {
331 temp_ptr = strstr(temp_ptr2, ":\"");
332 temp_ptr += 2;
333 temp_ptr2 = strstr(temp_ptr, "\").");
334 strncpy(out, temp_ptr, (temp_ptr2 - temp_ptr));
335 out[(temp_ptr2 - temp_ptr)] = '\0';
336 }
337 if (arity == 1 && (strcmp(in, "?") == 0)) {
338 strcpy(in,"@_IDENTITY_SYMBOL_@");
339 }
340 if (arity == 2 && (strcmp(in, "?") == 0)) {
341 strcpy(in,"@_UNKNOWN_SYMBOL_@");
342 }
343 if (arity == 2 && (strcmp(out, "?") == 0)) {
344 strcpy(out,"@_UNKNOWN_SYMBOL_@");
345 }
346 if (strcmp(in, "0") == 0) {
347 strcpy(in,"@_EPSILON_SYMBOL_@");
348 }
349 if (strcmp(out, "0") == 0) {
350 strcpy(out,"@_EPSILON_SYMBOL_@");
351 }
352 if (strcmp(in, "%0") == 0) {
353 strcpy(in,"0");
354 }
355 if (strcmp(out, "%0") == 0) {
356 strcpy(out,"0");
357 }
358 if (strcmp(in, "%?") == 0) {
359 strcpy(in,"?");
360 }
361 if (strcmp(out, "%?") == 0) {
362 strcpy(out,"?");
363 }
364
365 if (arity == 1) {
366 fsm_construct_add_arc(outh, source, target, in, in);
367 } else {
368 fsm_construct_add_arc(outh, source, target, in, out);
369 }
370 }
371 }
372 fclose(prolog_file);
373 if (has_net == 1) {
374 fsm_construct_set_initial(outh, 0);
375 outnet = fsm_construct_done(outh);
376 fsm_topsort(outnet);
377 return(outnet);
378 } else {
379 return(NULL((void*)0));
380 }
381}
382
383struct io_buf_handle *io_init() {
384 struct io_buf_handle *iobh;
385 iobh = malloc(sizeof(struct io_buf_handle));
386 (iobh->io_buf) = NULL((void*)0);
387 (iobh->io_buf_ptr) = NULL((void*)0);
388 return(iobh);
389}
390
391void io_free(struct io_buf_handle *iobh) {
392 if (iobh->io_buf != NULL((void*)0)) {
393 free(iobh->io_buf);
394 (iobh->io_buf) = NULL((void*)0);
395 }
396 free(iobh);
397}
398
399char *spacedtext_get_next_line(char **text) {
400 char *t, *ret;
401 ret = *text;
402 if (**text == '\0')
403 return NULL((void*)0);
404 for (t = *text; *t != '\0' && *t != '\n'; t++) {
405 }
406 if (*t == '\0')
407 *text = t;
408 else
409 *text = t+1;
410 *t = '\0';
411 return(ret);
412}
413
414char *spacedtext_get_next_token(char **text) {
415 char *t, *ret;
416 if (**text == '\0' || **text == '\n')
417 return NULL((void*)0);
418 for ( ; **text == ' ' ; (*text)++) {
419 }
420 ret = *text;
421 for (t = *text; *t != '\0' && *t != '\n' && *t != ' '; t++) {
422 }
423 if (*t == '\0' || *t == '\n')
424 *text = t;
425 else
426 *text = t+1;
427 *t = '\0';
428 return(ret);
429}
430
431struct fsm *fsm_read_spaced_text_file(char *filename) {
432 struct fsm_trie_handle *th;
433 char *text, *textorig, *insym, *outsym, *t1, *t2, *l1, *l2;
434
435 text = textorig = file_to_mem(filename);
436
437 if (text == NULL((void*)0))
438 return NULL((void*)0);
439 th = fsm_trie_init();
440 for (;;) {
441 for ( ; *text != '\0' && *text == '\n'; text++) { }
442 t1 = spacedtext_get_next_line(&text);
443 if (t1 == NULL((void*)0))
444 break;
445 if (strlen(t1) == 0)
446 continue;
447 t2 = spacedtext_get_next_line(&text);
448 if (t2 == NULL((void*)0) || strlen(t2) == 0) {
449 for (l1 = t1; (insym = spacedtext_get_next_token(&l1)) != NULL((void*)0); ) {
450 if (strcmp(insym, "0") == 0)
451 fsm_trie_symbol(th, "@_EPSILON_SYMBOL_@", "@_EPSILON_SYMBOL_@");
452 else if (strcmp(insym, "%0") == 0)
453 fsm_trie_symbol(th, "0", "0");
454 else
455 fsm_trie_symbol(th, insym, insym);
456 }
457 fsm_trie_end_word(th);
458 } else {
459 for (l1 = t1, l2 = t2; ; ) {
460 insym = spacedtext_get_next_token(&l1);
461 outsym = spacedtext_get_next_token(&l2);
462 if (insym == NULL((void*)0) && outsym == NULL((void*)0))
463 break;
464 if (insym == NULL((void*)0) || strcmp(insym, "0") == 0)
465 insym = "@_EPSILON_SYMBOL_@";
466 if (strcmp(insym, "%0") == 0)
467 insym = "0";
468 if (outsym == NULL((void*)0) || strcmp(outsym, "0") == 0)
469 outsym = "@_EPSILON_SYMBOL_@";
470 if (strcmp(outsym, "%0") == 0)
471 outsym = "0";
472 fsm_trie_symbol(th, insym, outsym);
473 }
474 fsm_trie_end_word(th);
475 }
476 }
477 free(textorig);
478 return(fsm_trie_done(th));
479}
480
481struct fsm *fsm_read_text_file(char *filename) {
482 struct fsm_trie_handle *th;
483 char *text, *textp1, *textp2;
484 int lastword;
485
486 text = file_to_mem(filename);
487 if (text == NULL((void*)0)) {
488 return NULL((void*)0);
489 }
490 textp1 = text;
491 th = fsm_trie_init();
492
493 for (lastword = 0 ; lastword == 0 ; textp1 = textp2+1) {
494 for (textp2 = textp1 ; *textp2 != '\n' && *textp2 != '\0'; textp2++) {
495 }
496 if (*textp2 == '\0') {
497 lastword = 1;
498 if (textp2 == textp1)
499 break;
500 }
501 *textp2 = '\0';
502 if (strlen(textp1) > 0)
503 fsm_trie_add_word(th, textp1);
504 }
505 free(text);
506 return(fsm_trie_done(th));
507}
508
509int fsm_write_binary_file(struct fsm *net, char *filename) {
510 gzFile outfile;
511 if ((outfile = gzopen(filename,"wb")) == NULL((void*)0)) {
512 return(1);
513 }
514 foma_net_print(net, outfile);
515 gzclose(outfile);
516 return(0);
517}
518
519struct fsm *fsm_read_binary_file_multiple(fsm_read_binary_handle fsrh) {
520 char *net_name;
521 struct fsm *net;
522 struct io_buf_handle *iobh;
523 iobh = (struct io_buf_handle *) fsrh;
524 net = io_net_read(iobh, &net_name);
525 if (net == NULL((void*)0)) {
526 io_free(iobh);
527 return(NULL((void*)0));
528 } else {
529 free(net_name);
530 return(net);
531 }
532}
533
534fsm_read_binary_handle fsm_read_binary_file_multiple_init(char *filename) {
535
536 struct io_buf_handle *iobh;
537 fsm_read_binary_handle fsm_read_handle;
538
539 iobh = io_init();
540 if (io_gz_file_to_mem(iobh, filename) == 0) {
541 io_free(iobh);
542 return NULL((void*)0);
543 }
544 fsm_read_handle = (void *) iobh;
545 return(fsm_read_handle);
546}
547
548struct fsm *fsm_read_binary_file(char *filename) {
549 char *net_name;
550 struct fsm *net;
551 struct io_buf_handle *iobh;
552 iobh = io_init();
553 if (io_gz_file_to_mem(iobh, filename) == 0) {
554 io_free(iobh);
555 return NULL((void*)0);
556 }
557 net = io_net_read(iobh, &net_name);
558 io_free(iobh);
559 return(net);
560}
561
562int save_defined(struct defined_networks *def, char *filename) {
563 struct defined_networks *d;
564 gzFile outfile;
565 if (def == NULL((void*)0)) {
566 fprintf(stderrstderr, "No defined networks.\n");
567 return(0);
568 }
569 if ((outfile = gzopen(filename, "wb")) == NULL((void*)0)) {
570 printf("Error opening file %s for writing.\n", filename);
571 return(-1);
572 }
573 printf("Writing definitions to file %s.\n", filename);
574 for (d = def; d != NULL((void*)0); d = d->next) {
575 if (!d->net) {
576 printf("Skipping definition without network.\n");
577 continue;
578 }
579 strncpy(d->net->name, d->name, FSM_NAME_LEN40);
580 foma_net_print(d->net, outfile);
581 }
582 gzclose(outfile);
583 return(1);
584}
585
586int load_defined(struct defined_networks *def, char *filename) {
587 struct fsm *net;
588 char *net_name;
589 struct io_buf_handle *iobh;
590
591 iobh = io_init();
592 printf("Loading definitions from %s.\n",filename);
593 if (io_gz_file_to_mem(iobh, filename) == 0) {
1
Taking false branch
594 fprintf(stderrstderr, "File error.\n");
595 io_free(iobh);
596 return 0;
597 }
598 while ((net = io_net_read(iobh, &net_name)) != NULL((void*)0)) {
2
Calling 'io_net_read'
14
Returned allocated memory via 2nd parameter
15
Loop condition is false. Execution continues on line 601
599 add_defined(def, net, net_name);
600 }
601 io_free(iobh);
16
Potential leak of memory pointed to by 'net_name'
602 return(1);
603}
604
605static INLINEinline int explode_line(char *buf, int *values) {
606 int i, j, items;
607 j = i = items = 0;
608 for (;;) {
609 for (i = j; *(buf+j) != ' ' && *(buf+j) != '\0'; j++) { }
610 if (*(buf+j) == '\0') {
611 *(values+items) = atoi(buf+i);
612 items++;
613 break;
614 } else{
615 *(buf+j) = '\0';
616 *(values+items) = atoi(buf+i);
617 items++;
618 j++;
619 }
620 }
621 return(items);
622}
623
624/* The file format we use is an extremely simple text format */
625/* which is gzip compressed through libz and consists of the following sections: */
626
627/* ##foma-net VERSION##*/
628/* ##props## */
629/* PROPERTIES LINE */
630/* ##sigma## */
631/* ...SIGMA LINES... */
632/* ##states## */
633/* ...TRANSITION LINES... */
634/* ##end## */
635
636/* Several networks may be concatenated in one file */
637
638/* The initial identifier is "##foma-net 1.0##" */
639/* where 1.0 is the version number for the file format */
640/* followed by the line "##props##" */
641/* which is followed by a line of space separated integers */
642/* which correpond to: */
643
644/* arity arccount statecount linecount finalcount pathcount is_deterministic */
645/* is_pruned is_minimized is_epsilon_free is_loop_free is_completed name */
646
647/* where name is used if defined networks are saved/loaded */
648
649/* Following the props line, we accept anything (for future expansion) */
650/* until we find ##sigma## */
651
652/* the section beginning with "##sigma##" consists of lines with two fields: */
653/* number string */
654/* correponding to the symbol number and the symbol string */
655
656/* the section beginning with "##states##" consists of lines of ASCII integers */
657/* with 2-5 fields to avoid some redundancy in every line corresponding to a */
658/* transition where otherwise state numbers would be unnecessarily repeated and */
659/* out symbols also (if in = out as is the case for recognizers/simple automata) */
660
661/* The information depending on the number of fields in the lines is as follows: */
662
663/* 2: in target (here state_no is the same as the last mentioned one and out = in) */
664/* 3: in out target (again, state_no is the same as the last mentioned one) */
665/* 4: state_no in target final_state (where out = in) */
666/* 5: state_no in out target final_state */
667
668/* There is no harm in always using 5 fields; however this will take up more space */
669
670/* As in struct fsm_state, states without transitions are represented as a 4-field: */
671/* state_no -1 -1 final_state (since in=out for 4-field lines, out = -1 as well) */
672
673/* AS gzopen will read uncompressed files as well, one can gunzip a file */
674/* that contains a network and still read it */
675
676struct fsm *io_net_read(struct io_buf_handle *iobh, char **net_name) {
677
678 char buf[READ_BUF_SIZE4096];
679 struct fsm *net;
680 struct fsm_state *fsm;
681
682 char *new_symbol;
683 int i, items, new_symbol_number, laststate, lineint[5], *cm;
684 int extras;
685 char last_final = '1';
686
687 if (io_gets(iobh, buf) == 0) {
3
Assuming the condition is false
4
Taking false branch
688 return NULL((void*)0);
689 }
690
691 net = fsm_create("");
692
693 if (strcmp(buf, "##foma-net 1.0##") != 0) {
5
Assuming the condition is false
6
Taking false branch
694 fsm_destroy(net);
695 perror("File format error foma!\n");
696 return NULL((void*)0);
697 }
698 io_gets(iobh, buf);
699 if (strcmp(buf, "##props##") != 0) {
7
Assuming the condition is false
8
Taking false branch
700 perror("File format error props!\n");
701 fsm_destroy(net);
702 return NULL((void*)0);
703 }
704 /* Properties */
705 io_gets(iobh, buf);
706 extras = 0;
707 sscanf(buf, "%i %i %i %i %i %lld %i %i %i %i %i %i %s", &net->arity, &net->arccount, &net->statecount, &net->linecount, &net->finalcount, &net->pathcount, &net->is_deterministic, &net->is_pruned, &net->is_minimized, &net->is_epsilon_free, &net->is_loop_free, &extras, buf);
708 strncpy(net->name, buf, FSM_NAME_LEN40);
709 *net_name = strdup(buf);
9
Memory is allocated
710 io_gets(iobh, buf);
711
712 net->is_completed = (extras & 3);
713 net->arcs_sorted_in = (extras & 12) >> 2;
714 net->arcs_sorted_out = (extras & 48) >> 4;
715
716 /* Sigma */
717 while (strcmp(buf, "##sigma##") != 0) { /* Loop until we encounter ##sigma## */
10
Assuming the condition is true
11
Loop condition is true. Entering loop body
718 if (buf[0] == '\0') {
12
Assuming the condition is true
13
Taking true branch
719 printf("File format error at sigma definition!\n");
720 fsm_destroy(net);
721 return NULL((void*)0);
722 }
723 io_gets(iobh, buf);
724 }
725
726 for (;;) {
727 io_gets(iobh, buf);
728 if (buf[0] == '#') break;
729 if (buf[0] == '\0') continue;
730 new_symbol = strstr(buf, " ");
731 new_symbol[0] = '\0';
732 new_symbol++;
733 if (new_symbol[0] == '\0') {
734 sscanf(buf,"%i", &new_symbol_number);
735 sigma_add_number(net->sigma, "\n", new_symbol_number);
736 } else {
737 sscanf(buf,"%i", &new_symbol_number);
738 sigma_add_number(net->sigma, new_symbol, new_symbol_number);
739 }
740 }
741
742 /* States */
743 if (strcmp(buf, "##states##") != 0) {
744 printf("File format error!\n");
745 return NULL((void*)0);
746 }
747 net->states = malloc(net->linecount*sizeof(struct fsm_state));
748 fsm = net->states;
749 laststate = -1;
750 for (i=0; ;i++) {
751 io_gets(iobh, buf);
752 if (buf[0] == '#') break;
753
754 /* scanf is just too slow here */
755
756 //items = sscanf(buf, "%i %i %i %i %i",&lineint[0], &lineint[1], &lineint[2], &lineint[3], &lineint[4]);
757
758 items = explode_line(buf, &lineint[0]);
759
760 switch (items) {
761 case 2:
762 (fsm+i)->state_no = laststate;
763 (fsm+i)->in = lineint[0];
764 (fsm+i)->out = lineint[0];
765 (fsm+i)->target = lineint[1];
766 (fsm+i)->final_state = last_final;
767 break;
768 case 3:
769 (fsm+i)->state_no = laststate;
770 (fsm+i)->in = lineint[0];
771 (fsm+i)->out = lineint[1];
772 (fsm+i)->target = lineint[2];
773 (fsm+i)->final_state = last_final;
774 break;
775 case 4:
776 (fsm+i)->state_no = lineint[0];
777 (fsm+i)->in = lineint[1];
778 (fsm+i)->out = lineint[1];
779 (fsm+i)->target = lineint[2];
780 (fsm+i)->final_state = lineint[3];
781 laststate = lineint[0];
782 last_final = lineint[3];
783 break;
784 case 5:
785 (fsm+i)->state_no = lineint[0];
786 (fsm+i)->in = lineint[1];
787 (fsm+i)->out = lineint[2];
788 (fsm+i)->target = lineint[3];
789 (fsm+i)->final_state = lineint[4];
790 laststate = lineint[0];
791 last_final = lineint[4];
792 break;
793 default:
794 printf("File format error\n");
795 return NULL((void*)0);
796 }
797 if (laststate > 0) {
798 (fsm+i)->start_state = 0;
799 } else if (laststate == -1) {
800 (fsm+i)->start_state = -1;
801 } else {
802 (fsm+i)->start_state = 1;
803 }
804
805 }
806 if (strcmp(buf, "##cmatrix##") == 0) {
807 cmatrix_init(net);
808 cm = net->medlookup->confusion_matrix;
809 for (;;) {
810 io_gets(iobh, buf);
811 if (buf[0] == '#') break;
812 sscanf(buf,"%i", &i);
813 *cm = i;
814 cm++;
815 }
816 }
817 if (strcmp(buf, "##end##") != 0) {
818 printf("File format error!\n");
819 return NULL((void*)0);
820 }
821 return(net);
822}
823
824static int io_gets(struct io_buf_handle *iobh, char *target) {
825 int i;
826 for (i = 0; *((iobh->io_buf_ptr)+i) != '\n' && *((iobh->io_buf_ptr)+i) != '\0'; i++) {
827 *(target+i) = *((iobh->io_buf_ptr)+i);
828 }
829 *(target+i) = '\0';
830 if (*((iobh->io_buf_ptr)+i) == '\0')
831 (iobh->io_buf_ptr) = (iobh->io_buf_ptr) + i;
832 else
833 (iobh->io_buf_ptr) = (iobh->io_buf_ptr) + i + 1;
834
835 return(i);
836}
837
838int foma_net_print(struct fsm *net, gzFile outfile) {
839 struct sigma *sigma;
840 struct fsm_state *fsm;
841 int i, maxsigma, laststate, *cm, extras;
842
843 /* Header */
844 gzprintf(outfile, "%s","##foma-net 1.0##\n");
845
846 /* Properties */
847 gzprintf(outfile, "%s","##props##\n");
848
849 extras = (net->is_completed) | (net->arcs_sorted_in << 2) | (net->arcs_sorted_out << 4);
850
851 gzprintf(outfile,
852 "%i %i %i %i %i %lld %i %i %i %i %i %i %s\n", net->arity, net->arccount, net->statecount, net->linecount, net->finalcount, net->pathcount, net->is_deterministic, net->is_pruned, net->is_minimized, net->is_epsilon_free, net->is_loop_free, extras, net->name);
853
854 /* Sigma */
855 gzprintf(outfile, "%s","##sigma##\n");
856 for (sigma = net->sigma; sigma != NULL((void*)0) && sigma->number != -1; sigma = sigma->next) {
857 gzprintf(outfile, "%i %s\n",sigma->number, sigma->symbol);
858 }
859
860 /* State array */
861 laststate = -1;
862 gzprintf(outfile, "%s","##states##\n");
863 for (fsm = net->states; fsm->state_no !=-1; fsm++) {
864 if (fsm->state_no != laststate) {
865 if (fsm->in != fsm->out) {
866 gzprintf(outfile, "%i %i %i %i %i\n",fsm->state_no, fsm->in, fsm->out, fsm->target, fsm->final_state);
867 } else {
868 gzprintf(outfile, "%i %i %i %i\n",fsm->state_no, fsm->in, fsm->target, fsm->final_state);
869 }
870 } else {
871 if (fsm->in != fsm->out) {
872 gzprintf(outfile, "%i %i %i\n", fsm->in, fsm->out, fsm->target);
873 } else {
874 gzprintf(outfile, "%i %i\n", fsm->in, fsm->target);
875 }
876 }
877 laststate = fsm->state_no;
878 }
879 /* Sentinel for states */
880 gzprintf(outfile, "-1 -1 -1 -1 -1\n");
881
882 /* Store confusion matrix */
883 if (net->medlookup != NULL((void*)0) && net->medlookup->confusion_matrix != NULL((void*)0)) {
884
885 gzprintf(outfile, "%s","##cmatrix##\n");
886 cm = net->medlookup->confusion_matrix;
887 maxsigma = sigma_max(net->sigma)+1;
888 for (i=0; i < maxsigma*maxsigma; i++) {
889 gzprintf(outfile, "%i\n", *(cm+i));
890 }
891 }
892
893 /* End */
894 gzprintf(outfile, "%s","##end##\n");
895 return(1);
896}
897
898int net_print_att(struct fsm *net, FILE *outfile) {
899 struct fsm_state *fsm;
900 struct fsm_sigma_list *sl;
901 int i, prev;
902
903 fsm = net->states;
904 sl = sigma_to_list(net->sigma);
905 if (sigma_max(net->sigma) >= 0) {
906 (sl+0)->symbol = g_att_epsilon;
907 }
908 for (i=0; (fsm+i)->state_no != -1; i++) {
909 if ((fsm+i)->target != -1) {
910 fprintf(outfile, "%i\t%i\t%s\t%s\n",(fsm+i)->state_no,(fsm+i)->target, (sl+(fsm+i)->in)->symbol, (sl+(fsm+i)->out)->symbol);
911 }
912 }
913 prev = -1;
914 for (i=0; (fsm+i)->state_no != -1; prev = (fsm+i)->state_no, i++) {
915 if ((fsm+i)->state_no != prev && (fsm+i)->final_state == 1) {
916 fprintf(outfile, "%i\n",(fsm+i)->state_no);
917 }
918 }
919 free(sl);
920 return(1);
921}
922
923static size_t io_get_gz_file_size(char *filename) {
924
925 FILE *infile;
926 size_t numbytes;
927 unsigned char bytes[4];
928 unsigned int ints[4], i;
929
930 /* The last four bytes in a .gz file shows the size of the uncompressed data */
931 infile = fopen(filename, "r");
932 fseek(infile, -4, SEEK_END2);
933 fread(&bytes, 1, 4, infile);
934 fclose(infile);
935 for (i = 0 ; i < 4 ; i++) {
936 ints[i] = bytes[i];
937 }
938 numbytes = ints[0] | (ints[1] << 8) | (ints[2] << 16 ) | (ints[3] << 24);
939 return(numbytes);
940}
941
942static size_t io_get_regular_file_size(char *filename) {
943
944 FILE *infile;
945 size_t numbytes;
946
947 infile = fopen(filename, "r");
948 fseek(infile, 0L, SEEK_END2);
949 numbytes = ftell(infile);
950 fclose(infile);
951 return(numbytes);
952}
953
954
955static size_t io_get_file_size(char *filename) {
956 gzFile FILE;
957 size_t size;
958 FILE = gzopen(filename, "r");
959 if (FILE == NULL((void*)0)) {
960 return(0);
961 }
962 if (gzdirect(FILE) == 1) {
963 gzclose(FILE);
964 size = io_get_regular_file_size(filename);
965 } else {
966 gzclose(FILE);
967 size = io_get_gz_file_size(filename);
968 }
969 return(size);
970}
971
972size_t io_gz_file_to_mem(struct io_buf_handle *iobh, char *filename) {
973
974 size_t size;
975 gzFile FILE;
976
977 size = io_get_file_size(filename);
978 if (size == 0) {
979 return 0;
980 }
981 (iobh->io_buf) = malloc((size+1)*sizeof(char));
982 FILE = gzopen(filename, "rb");
983 gzread(FILE, iobh->io_buf, size);
984 gzclose(FILE);
985 *((iobh->io_buf)+size) = '\0';
986 iobh->io_buf_ptr = iobh->io_buf;
987 return(size);
988}
989
990typedef struct BOM {
991 char code[4];
992 int len;
993 char* name;
994} BOM;
995
996static BOM BOM_codes[] = {
997 { { 0xEF, 0xBB, 0xBF }, 3, "UTF-8"},
998 { { 0xFF, 0xFE, 0x00, 0x00 }, 4, "UTF-32LE" },
999 { { 0x00, 0x00, 0xFE, 0xFF }, 4, "UTF-32BE" },
1000 { { 0xFF, 0xFE }, 2, "UTF16-LE" },
1001 { { 0xFE, 0xFF }, 2, "UTF16-BE" },
1002 { { 0, } , 0, NULL((void*)0) },
1003};
1004
1005BOM *check_BOM(char *buffer) {
1006 BOM *bom;
1007 for(bom = BOM_codes; bom->len; bom++) {
1008 if(strncmp(bom->code, buffer, bom->len) == 0) {
1009 return bom;
1010 }
1011 }
1012 return NULL((void*)0);
1013}
1014
1015char *file_to_mem(char *name) {
1016 FILE *infile;
1017 size_t numbytes;
1018 char *buffer;
1019 BOM *bom;
1020 infile = fopen(name, "r");
1021 if(infile == NULL((void*)0)) {
1022 printf("Error opening file '%s'\n",name);
1023 return NULL((void*)0);
1024 }
1025 fseek(infile, 0L, SEEK_END2);
1026 numbytes = ftell(infile);
1027 fseek(infile, 0L, SEEK_SET0);
1028 buffer = (char*)malloc((numbytes+1) * sizeof(char));
1029 if(buffer == NULL((void*)0)) {
1030 printf("Error reading file '%s'\n",name);
1031 return NULL((void*)0);
1032 }
1033 if (fread(buffer, sizeof(char), numbytes, infile) != numbytes) {
1034 printf("Error reading file '%s'\n",name);
1035 return NULL((void*)0);
1036 }
1037
1038 bom = check_BOM(buffer);
1039 if (bom != NULL((void*)0)) {
1040 printf("%s BOM mark is detected in file '%s'.\n",bom->name,name);
1041 return NULL((void*)0);
1042 }
1043 fclose(infile);
1044 *(buffer+numbytes)='\0';
1045 return(buffer);
1046}