Bug Summary

File:rewrite.c
Warning:line 134, column 2
Value stored to 'dir' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name rewrite.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -resource-dir /usr/lib/llvm-16/lib/clang/16 -D _GNU_SOURCE -I /tmp/build/foma/foma-0.10.0+g279~a2d32b38 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -std=c18 -fdebug-compilation-dir=/tmp/build/foma/foma-0.10.0+g279~a2d32b38 -ferror-limit 19 -fvisibility=hidden -fgnuc-version=4.2.1 -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/foma/scan-build/2024-09-11-155945-2678-1 -x c /tmp/build/foma/foma-0.10.0+g279~a2d32b38/rewrite.c
1/* Foma: a finite-state toolkit and library. */
2/* Copyright © 2008-2021 Mans Hulden */
3
4/* This file is part of foma. */
5
6/* Licensed under the Apache License, Version 2.0 (the "License"); */
7/* you may not use this file except in compliance with the License. */
8/* You may obtain a copy of the License at */
9
10/* http://www.apache.org/licenses/LICENSE-2.0 */
11
12/* Unless required by applicable law or agreed to in writing, software */
13/* distributed under the License is distributed on an "AS IS" BASIS, */
14/* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */
15/* See the License for the specific language governing permissions and */
16/* limitations under the License. */
17
18#include <stdio.h>
19#include <stdlib.h>
20#include <string.h>
21#include "foma.h"
22
23// Lower(X) puts X on output tape (may also be represented by @ID@ on input tape)
24// Upper(X) puts X on input tape
25// Unrewritten(X) X on input tape, not rewritten (aligned with @O@ symbols)
26// NotContain(X) MT does not contain MT configuration X
27
28// Boundary: every MT word begins and ends with boundary, i.e. the @#@ symbol on the input tape, output tape, and relevant semantic symbols
29
30/*
31
32 [ @O@ ] [ @I[@ ] [ @I@ ] [ @I]@ ] [ @I[]@ ]
33 [ @0@ ] [ @#0001@ ] [ @#0001@ ] [ @#0001@ ] [ @#0001@ ]
34 [ @#@ ] [ ANY|@0@ ] [ ANY|@0@ ] [ ANY|@0@ ] [ ANY|@0@ ]
35 [ @ID@ ] [ ANY|@ID|@0@ ] [ ANY|@ID|@0@ ] [ ANY|@ID|@0@ ] [ ANY|@ID|@0@ ]
36
37
38*/
39/* Special symbols used:
40 @0@ Epsilon
41 @O@ Outside rewrite
42 @I@ Inside rewrite
43 @I[@ Beginning of rewrite
44 @I[]@ Beginning and end of rewrite
45 @I]@ End of rewrite
46 @ID@ Identity symbol (= repeat symbol on previous tape at this position)
47 @#@ Boundary (the symbol .#. is mapped to this in contexts before the compilation procedure)
48 @#X@ X = rule number (one for each rule, starting with @#0001@)
49*/
50
51struct rewrite_batch {
52
53 struct rewrite_set *rewrite_set;
54 struct fsm *Rulenames;
55 struct fsm *ISyms;
56 struct fsm *ANY;
57 struct fsm *IOpen;
58 struct fsm *IClose;
59 struct fsm *ITape;
60 struct fsm *Any4Tape;
61 struct fsm *Epextend;
62 int num_rules;
63 char (*namestrings)[8];
64
65};
66
67char *specialsymbols[] = {"@0@","@O@","@I@","@I[@","@I[]@","@I]@","@ID@","@#@", NULL((void*)0)};
68
69void rewrite_add_special_syms(struct rewrite_batch *rb, struct fsm *net);
70struct fsm *rewrite_upper(struct rewrite_batch *rb, struct fsm *upper);
71struct fsm *rewrite_lower(struct rewrite_batch *rb, struct fsm *lower);
72struct fsm *rewrite_two_level(struct rewrite_batch *rb, struct fsm *lang, int rightside);
73struct fsm *rewr_context_restrict(struct rewrite_batch *rb, struct fsm *X, struct fsmcontexts *LR);
74struct fsm *rewr_contains(struct rewrite_batch *rb, struct fsm *lang);
75struct fsm *rewr_unrewritten(struct rewrite_batch *rb, struct fsm *lang);
76struct fsm *rewr_notleftmost(struct rewrite_batch *rb, struct fsm *lang, int rule_number, int arrow_type);
77struct fsm *rewr_notshortest(struct rewrite_batch *rb, struct fsm *lang, int rule_number);
78struct fsm *rewr_notlongest(struct rewrite_batch *rb, struct fsm *lang, int rule_number, int arrow_type);
79struct fsm *rewrite_tape_m_to_n_of_k(struct fsm *lang, int m, int n, int k);
80struct fsm *rewrite_cp(struct rewrite_batch *rb, struct fsm *upper, struct fsm *lower, int rule_number);
81struct fsm *rewrite_cp_transducer(struct rewrite_batch *rb, struct fsm *t, int rule_number);
82struct fsm *rewrite_cp_markup(struct rewrite_batch *rb, struct fsm *upper, struct fsm *lower1, struct fsm *lower2, int rule_number);
83struct fsm *rewrite_epextend(struct rewrite_batch *rb);
84struct fsm *rewrite_any_4tape(struct rewrite_batch *rb);
85struct fsm *rewrite_itape(struct rewrite_batch *rb);
86void rewrite_cleanup(struct rewrite_batch *rb);
87
88
89struct fsm *fsm_rewrite(struct rewrite_set *all_rules) {
90 struct rewrite_batch *rb;
91 struct rewrite_set *ruleset;
92 struct fsmrules *rules;
93 struct fsmcontexts *contexts;
94 struct fsm *RuleCP, *Base, *Boundary, *Outside, *CP, *C, *LeftExtend, *RightExtend, *Center;
95 int i, num_rules, rule_number, dir;
96 /* Count parallel rules */
97 for (ruleset = all_rules, num_rules = 0; ruleset != NULL((void*)0); ruleset = ruleset->next) {
98 for (rules = ruleset->rewrite_rules; rules != NULL((void*)0); rules = rules->next) {
99 num_rules++;
100 }
101 }
102
103 rb = calloc(1, sizeof(struct rewrite_batch));
104 rb->rewrite_set = all_rules;
105 rb->num_rules = num_rules;
106 rb->namestrings = malloc(sizeof *rb->namestrings * num_rules);
107 for (i = 0; i < rb->num_rules; i++) {
108 sprintf(rb->namestrings[i], "@#%04i@", i+1);
109 }
110
111 rb->ISyms = fsm_minimize(fsm_union(fsm_symbol("@I@"), fsm_union(fsm_symbol("@I[]@"), fsm_union(fsm_symbol("@I[@"), fsm_symbol("@I]@")))));
112 rb->Rulenames = fsm_empty_set();
113 for (i = 1; i <= num_rules; i++) {
114 rb->Rulenames = fsm_minimize(fsm_union(rb->Rulenames, fsm_symbol(rb->namestrings[i-1])));
115 }
116 rb->ANY = fsm_identity();
117 rewrite_add_special_syms(rb, rb->ANY);
118
119 /* Add auxiliary symbols to all alphabets */
120 for (ruleset = all_rules; ruleset != NULL((void*)0); ruleset = ruleset->next) {
121 for (rules = ruleset->rewrite_rules; rules != NULL((void*)0); rules = rules->next) {
122 rewrite_add_special_syms(rb, rules->left);
123 rewrite_add_special_syms(rb, rules->right);
124 rewrite_add_special_syms(rb, rules->right2);
125 }
126 for (contexts = ruleset->rewrite_contexts; contexts != NULL((void*)0); contexts = contexts->next) {
127 rewrite_add_special_syms(rb, contexts->left);
128 rewrite_add_special_syms(rb, contexts->right);
129 }
130 }
131 /* Get cross-product of every rule, according to its type */
132 RuleCP = fsm_empty_set();
133 for (ruleset = all_rules, rule_number = 1; ruleset != NULL((void*)0); ruleset = ruleset->next) {
134 dir = ruleset->rule_direction;
Value stored to 'dir' is never read
135 for (rules = ruleset->rewrite_rules; rules != NULL((void*)0); rules = rules->next) {
136 if (rules->right == NULL((void*)0)) {
137 /* T(x)-type rule */
138 CP = rewrite_cp_transducer(rb, fsm_copy(rules->left), rule_number);
139 rules->cross_product = fsm_copy(CP);
140 rules->right = fsm_minimize(fsm_lower(fsm_copy(rules->left)));
141 rules->left = fsm_minimize(fsm_upper(fsm_copy(rules->left)));
142 rewrite_add_special_syms(rb, rules->right);
143 rewrite_add_special_syms(rb, rules->left);
144 } else if (rules->right2 == NULL((void*)0)) {
145 /* Regular rewrite rule */
146 CP = rewrite_cp(rb, fsm_copy(rules->left), fsm_copy(rules->right), rule_number);
147 rules->cross_product = fsm_copy(CP);
148 } else {
149 /* A -> B ... C -type rule */
150 CP = rewrite_cp_markup(rb, fsm_copy(rules->left), fsm_copy(rules->right), fsm_copy(rules->right2), rule_number);
151 rules->cross_product = fsm_copy(CP);
152 }
153 RuleCP = fsm_minimize(fsm_union(RuleCP, CP));
154 rule_number++;
155 }
156 }
157
158 /* Create Base language */
159 Boundary = fsm_parse_regex("\"@O@\" \"@0@\" \"@#@\" \"@ID@\"", NULL((void*)0), NULL((void*)0));
160 Outside = fsm_minimize(fsm_concat(fsm_symbol("@O@"), fsm_concat(fsm_symbol("@0@"), fsm_concat(fsm_copy(rb->ANY), fsm_symbol("@ID@")))));
161 Base = fsm_minimize(fsm_concat(fsm_copy(Boundary), fsm_concat(fsm_kleene_star(fsm_union(RuleCP, Outside)), fsm_copy(Boundary))));
162 fsm_destroy(Boundary);
163 for (ruleset = all_rules, rule_number = 1; ruleset != NULL((void*)0); ruleset = ruleset->next) {
164 dir = ruleset->rule_direction;
165 /* Replace all context spec with Upper/Lower, depending on rule_direction */
166 for (contexts = ruleset->rewrite_contexts; contexts != NULL((void*)0); contexts = contexts->next) {
167 switch(dir) {
168 case OP_UPWARD_REPLACE1:
169 contexts->cpleft = rewrite_upper(rb, fsm_copy(contexts->left));
170 contexts->cpright = rewrite_upper(rb, fsm_copy(contexts->right));
171 break;
172 case OP_RIGHTWARD_REPLACE2:
173 contexts->cpleft = rewrite_lower(rb, fsm_copy(contexts->left));
174 contexts->cpright = rewrite_upper(rb, fsm_copy(contexts->right));
175 break;
176 case OP_LEFTWARD_REPLACE3:
177 contexts->cpleft = rewrite_upper(rb, fsm_copy(contexts->left));
178 contexts->cpright = rewrite_lower(rb, fsm_copy(contexts->right));
179 break;
180 case OP_DOWNWARD_REPLACE4:
181 contexts->cpleft = rewrite_lower(rb, fsm_copy(contexts->left));
182 contexts->cpright = rewrite_lower(rb, fsm_copy(contexts->right));
183 break;
184 case OP_TWO_LEVEL_REPLACE5:
185 contexts->cpleft = rewrite_two_level(rb, fsm_copy(contexts->left), 0);
186 contexts->cpright = rewrite_two_level(rb, fsm_copy(contexts->right), 1);
187 break;
188 }
189 }
190 for (rules = ruleset->rewrite_rules; rules != NULL((void*)0); rules = rules->next) {
191 /* Just the rule center w/ number without CP() contests */
192 /* Actually, maybe better to include CP(U,L) in this, very slow with e.g. a -> a || _ b^15 */
193 if (rules->arrow_type & ARROW_DOTTED8) {
194 /* define EP Tape1of4("@O@") | [ Tape1of4("@I[@" "@I@"* "@I]@" | "@I[]@") & Tape3of4(~["@0@"*]) ] ; */
195 /* Additional constraint: 0->x is only allowed between EP _ EP */
196 /* The left and right sides can be checked separately */
197 /* ~[?* Center ~[EP ?*]] & ~[~[?* EP] Center ?*] */
198 Center = fsm_copy(rules->cross_product);
199 Base = fsm_intersect(fsm_intersect(Base, fsm_complement(fsm_concat(rewrite_any_4tape(rb), fsm_concat(fsm_copy(Center), fsm_complement(fsm_concat(rewrite_epextend(rb), rewrite_any_4tape(rb))))))), fsm_complement(fsm_concat(fsm_complement(fsm_concat(rewrite_any_4tape(rb), rewrite_epextend(rb))), fsm_concat(fsm_copy(Center), rewrite_any_4tape(rb)))));
200 fsm_destroy(Center);
201 }
202 if (ruleset->rewrite_contexts) {
203 Base = fsm_intersect(Base, rewr_context_restrict(rb, rules->cross_product, ruleset->rewrite_contexts));
204 }
205 /* Determine C (based on rule type) */
206 C = fsm_empty_set();
207 if ((rules->arrow_type & ARROW_RIGHT1) && !(rules->arrow_type & ARROW_OPTIONAL4)) {
208 C = fsm_union(C, rewr_unrewritten(rb, fsm_minimize(fsm_minus(fsm_copy(rules->left), fsm_empty_string()))));
209 }
210 if ((rules->arrow_type & ARROW_LEFT2) && !(rules->arrow_type & ARROW_OPTIONAL4)) {
211 C = fsm_union(C, rewr_unrewritten(rb, fsm_minimize(fsm_minus(fsm_copy(rules->right), fsm_empty_string()))));
212 }
213 if (rules->arrow_type & ARROW_LONGEST_MATCH16) {
214 if (rules->arrow_type & ARROW_RIGHT1) {
215 C = fsm_union(C, rewr_notleftmost(rb, rewrite_upper(rb, fsm_copy(rules->left)), rule_number, rules->arrow_type));
216 C = fsm_union(C, rewr_notlongest(rb, rewrite_upper(rb, fsm_copy(rules->left)), rule_number, rules->arrow_type));
217 }
218 if (rules->arrow_type & ARROW_LEFT2) {
219 C = fsm_union(C, rewr_notleftmost(rb, rewrite_lower(rb, fsm_copy(rules->right)), rule_number, rules->arrow_type));
220 C = fsm_union(C, rewr_notlongest(rb, rewrite_lower(rb, fsm_copy(rules->right)), rule_number, rules->arrow_type));
221 }
222 }
223 if (rules->arrow_type & ARROW_SHORTEST_MATCH32) {
224 if (rules->arrow_type & ARROW_RIGHT1) {
225 C = fsm_union(C, rewr_notleftmost(rb, rewrite_upper(rb, fsm_copy(rules->left)), rule_number, rules->arrow_type));
226 C = fsm_union(C, rewr_notshortest(rb, rewrite_upper(rb, fsm_copy(rules->left)), rule_number));
227 }
228 if (rules->arrow_type & ARROW_LEFT2) {
229 C = fsm_union(C, rewr_notleftmost(rb, rewrite_lower(rb, fsm_copy(rules->right)), rule_number, rules->arrow_type));
230 C = fsm_union(C, rewr_notshortest(rb, rewrite_lower(rb, fsm_copy(rules->right)), rule_number));
231 }
232 }
233 if (!ruleset->rewrite_contexts) {
234 if (rules->arrow_type & ARROW_DOTTED8 && !(rules->arrow_type & ARROW_OPTIONAL4)) {
235 Base = fsm_minus(Base, rewr_contains(rb, fsm_concat(rewrite_epextend(rb), rewrite_epextend(rb))));
236 } else {
237 Base = fsm_minus(Base, rewr_contains(rb, fsm_copy(C)));
238 }
239 }
240 for (contexts = ruleset->rewrite_contexts; contexts != NULL((void*)0); contexts = contexts->next) {
241 /* Constraints: running intersect w/ Base */
242 /* NotContain(LC [Unrewritten|LM|...] RC) */
243 if (rules->arrow_type & ARROW_DOTTED8 && !(rules->arrow_type & ARROW_OPTIONAL4)) {
244 /* Extend left and right */
245 LeftExtend = fsm_minimize(fsm_intersect(fsm_concat(rewrite_any_4tape(rb), fsm_copy(contexts->cpleft)), fsm_concat(rewrite_any_4tape(rb), rewrite_epextend(rb))));
246 RightExtend = fsm_minimize(fsm_intersect(fsm_concat(rewrite_epextend(rb), rewrite_any_4tape(rb)), fsm_concat(fsm_copy(contexts->cpright), rewrite_any_4tape(rb))));
247 Base = fsm_minus(Base, rewr_contains(rb, fsm_minimize(fsm_concat(LeftExtend, RightExtend))));
248 } else {
249 Base = fsm_minus(Base, rewr_contains(rb, fsm_concat(fsm_copy(contexts->cpleft), fsm_concat(fsm_copy(C), fsm_copy(contexts->cpright)))));
250 }
251 }
252 rule_number++;
253 fsm_destroy(C);
254 }
255 }
256 Base = fsm_minimize(fsm_lower(fsm_compose(Base, fsm_parse_regex("[?:0]^4 [?:0 ?:0 ? ?]* [?:0]^4", NULL((void*)0), NULL((void*)0)))));
257 Base = fsm_unflatten(Base, "@0@", "@ID@");
258
259 for (i = 0; specialsymbols[i] != NULL((void*)0); i++) {
260 Base->sigma = sigma_remove(specialsymbols[i], Base->sigma);
261 }
262 for (rule_number = 1; rule_number <= num_rules; rule_number++)
263 Base->sigma = sigma_remove(rb->namestrings[rule_number-1], Base->sigma);
264
265 fsm_compact(Base);
266 sigma_sort(Base);
267 rewrite_cleanup(rb);
268 return Base;
269}
270
271void rewrite_cleanup(struct rewrite_batch *rb) {
272
273 if (rb->Rulenames != NULL((void*)0))
274 fsm_destroy(rb->Rulenames);
275 if (rb->ISyms != NULL((void*)0))
276 fsm_destroy(rb->ISyms);
277 if (rb->ANY != NULL((void*)0))
278 fsm_destroy(rb->ANY);
279 if (rb->IOpen != NULL((void*)0))
280 fsm_destroy(rb->IOpen);
281 if (rb->IClose != NULL((void*)0))
282 fsm_destroy(rb->IClose);
283 if (rb->ITape != NULL((void*)0))
284 fsm_destroy(rb->ITape);
285 if (rb->Any4Tape != NULL((void*)0))
286 fsm_destroy(rb->Any4Tape);
287 if (rb->Epextend != NULL((void*)0))
288 fsm_destroy(rb->Epextend);
289 if (rb->namestrings != NULL((void*)0))
290 free(rb->namestrings);
291 free(rb);
292 return;
293}
294
295
296struct fsm *rewr_notlongest(struct rewrite_batch *rb, struct fsm *lang, int rule_number, int arrow_type) {
297 /* define NotLongest(X) [Upper(X)/Lower(X) & Tape1of4(IOpen Tape1Sig* ["@O@" | IOpen] Tape1Sig*)] */
298 struct fsm *nl, *flt, *rulenum;
299 nl = fsm_parse_regex("[\"@I[@\"|\"@I[]@\"] [\"@I[@\"|\"@I[]@\"|\"@I]@\"|\"@I@\"|\"@O@\"]* [\"@O@\"|\"@I[@\"|\"@I[]@\"] [\"@I[@\"|\"@I[]@\"|\"@I]@\"|\"@I@\"|\"@O@\"]*", NULL((void*)0), NULL((void*)0));
300 nl = rewrite_tape_m_to_n_of_k(nl, 1, 1, 4);
301 rulenum = fsm_minimize(fsm_concat(fsm_identity(), fsm_concat(fsm_symbol(rb->namestrings[rule_number-1]), fsm_concat(fsm_identity(), fsm_concat(fsm_identity(), fsm_universal())))));
302 nl = fsm_intersect(nl, rulenum);
303 /* lang can't end in @0@ */
304 if (arrow_type & ARROW_RIGHT1) {
305 flt = fsm_parse_regex("[? ? ? ?]* [? ? [?-\"@0@\"] ?]", NULL((void*)0), NULL((void*)0));
306 } else {
307 flt = fsm_parse_regex("[? ? ? ?]* [? ? ? [?-\"@0@\"]]", NULL((void*)0), NULL((void*)0));
308 }
309 return fsm_minimize(fsm_intersect(fsm_intersect(nl, fsm_copy(lang)), flt));
310}
311
312struct fsm *rewr_notshortest(struct rewrite_batch *rb, struct fsm *lang, int rule_number) {
313 /* define NotShortest(X) [Upper/Lower(X) & Tape1of4("@I[@" \IClose*)] */
314 struct fsm *ns, *rulenum;
315 ns = fsm_parse_regex("[\"@I[@\"] \\[\"@I]@\"]*", NULL((void*)0), NULL((void*)0));
316 rulenum = fsm_minimize(fsm_concat(fsm_identity(), fsm_concat(fsm_symbol(rb->namestrings[rule_number-1]), fsm_concat(fsm_identity(), fsm_concat(fsm_identity(), fsm_universal())))));
317 ns = rewrite_tape_m_to_n_of_k(ns, 1, 1, 4);
318 ns = fsm_intersect(ns, rulenum);
319 return fsm_minimize(fsm_intersect(ns, fsm_copy(lang)));
320}
321
322struct fsm *rewr_notleftmost(struct rewrite_batch *rb, struct fsm *lang, int rule_number, int arrow_type) {
323 struct fsm *nl, *flt, *rulenum;
324 /* define Leftmost(X) [Upper/Lower(X) & Tape1of4("@O@" Tape1Sig* IOpen Tape1Sig*) ] */
325 nl = fsm_parse_regex("\"@O@\" [\"@O@\"]* [\"@I[@\"|\"@I[]@\"] [\"@I[@\"|\"@I[]@\"|\"@I]@\"|\"@I@\"|\"@O@\"]*", NULL((void*)0), NULL((void*)0));
326 nl = rewrite_tape_m_to_n_of_k(nl, 1, 1, 4);
327 rulenum = fsm_minimize(fsm_concat(fsm_concat(fsm_symbol("@O@"), fsm_concat(fsm_identity(), fsm_concat(fsm_identity(), fsm_identity()))), fsm_concat(fsm_kleene_star(fsm_concat(fsm_symbol("@O@"), fsm_concat(fsm_identity(), fsm_concat(fsm_identity(), fsm_identity())))), fsm_concat(fsm_union(fsm_symbol("@I[@"), fsm_symbol("@I[]@")), fsm_concat(fsm_symbol(rb->namestrings[rule_number-1]), fsm_universal())))));
328 nl = fsm_intersect(nl, rulenum);
329 if (arrow_type & ARROW_RIGHT1) {
330 flt = fsm_parse_regex("[? ? ? ?]* [? ? [?-\"@0@\"] ?]", NULL((void*)0), NULL((void*)0));
331 } else {
332 flt = fsm_parse_regex("[? ? ? ?]* [? ? ? [?-\"@0@\"]]", NULL((void*)0), NULL((void*)0));
333 }
334 return fsm_minimize(fsm_intersect(fsm_intersect(nl, fsm_copy(lang)), flt));
335}
336
337struct fsm *rewr_unrewritten(struct rewrite_batch *rb, struct fsm *lang) {
338 /* define Unrewritten(X) [X .o. [0:"@O@" 0:"@0@" ? 0:"@ID@"]*].l; */
339 struct fsm *C;
340 C = fsm_minimize(fsm_kleene_star(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@O@")), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@0@")), fsm_concat(fsm_copy(rb->ANY), fsm_cross_product(fsm_empty_string(), fsm_symbol("@ID@")))))));
341 return fsm_minimize(fsm_lower(fsm_compose(lang, C)));
342}
343
344struct fsm *rewr_contains(struct rewrite_batch *rb, struct fsm *lang) {
345 /* define NotContain(X) ~[[Tape1Sig Tape2Sig Tape3Sig Tape4Sig]* X ?*]; */
346 return fsm_minimize(fsm_concat(rewrite_any_4tape(rb), fsm_concat(lang, rewrite_any_4tape(rb))));
347}
348
349struct fsm *rewrite_tape_m_to_n_of_k(struct fsm *lang, int m, int n, int k) {
350 /* [X .o. [0:?^(m-1) ?^(n-m+1) 0:?^(k-n)]*].l */
351 return fsm_minimize(fsm_lower(fsm_compose(lang, fsm_kleene_star(fsm_concat(fsm_concat_n(fsm_cross_product(fsm_empty_string(), fsm_identity()), m-1), fsm_concat(fsm_concat_n(fsm_identity(), n-m+1), fsm_concat_n(fsm_cross_product(fsm_empty_string(), fsm_identity()), k-n)))))));
352}
353
354struct fsm *rewrite_two_level(struct rewrite_batch *rb, struct fsm *lang, int rightside) {
355 struct fsm *Lower, *Upper, *Result;
356 Lower = rewrite_lower(rb, fsm_minimize(fsm_lower(fsm_copy(lang))));
357 Upper = rewrite_upper(rb, fsm_minimize(fsm_upper(lang)));
358 if (rightside == 1) {
359 Result = fsm_minimize(fsm_intersect(fsm_concat(Lower, rewrite_any_4tape(rb)), fsm_concat(Upper, rewrite_any_4tape(rb))));
360 } else {
361 Result = fsm_minimize(fsm_intersect(fsm_concat(rewrite_any_4tape(rb), Lower), fsm_concat(rewrite_any_4tape(rb), Upper)));
362 }
363 return Result;
364}
365
366struct fsm *rewrite_lower(struct rewrite_batch *rb, struct fsm *lower) {
367
368 /*
369 Lower:
370
371 [ @O@ | ISyms | ISyms ]*
372 [ @0@ | Rulenums | Rulenums ]
373 [ <R>,@#@ | @0@,R | R ]
374 [ @ID@ | <R> | @0@ ]
375
376 R = any real symbol
377 <R> = any real symbol, not inserted
378
379 */
380
381 struct fsm *One, *Two, *Three, *Filter;
382
383 One = fsm_minimize(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@O@")), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@0@")), fsm_concat(fsm_union(fsm_symbol("@#@"), fsm_copy(rb->ANY)), fsm_cross_product(fsm_empty_string(),fsm_symbol("@ID@"))))));
384
385 Two = fsm_minimize(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->ISyms)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->Rulenames)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_union(fsm_copy(rb->ANY), fsm_symbol("@0@"))), fsm_copy(rb->ANY)))));
386
387 Three = fsm_minimize(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->ISyms)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->Rulenames)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->ANY)), fsm_cross_product(fsm_empty_string(), fsm_symbol("@0@"))))));
388
389 Filter = fsm_minimize(fsm_kleene_star(fsm_union(One, fsm_union(Two, Three))));
390 return fsm_minimize(fsm_lower(fsm_compose(lower, Filter)));
391}
392
393struct fsm *rewrite_any_4tape(struct rewrite_batch *rb) {
394
395 /*
396 Upper:
397
398 [ @O@ | ISyms ]*
399 [ @0@ | Rulenums ]
400 [ <R>,@#@ | @0@,R ]
401 [ @ID@ | R,@ID@,@0@ ]
402
403 R = any real symbol
404 <R> = any real symbol, not inserted
405 */
406 if (rb->Any4Tape == NULL((void*)0)) {
407 rb->Any4Tape = fsm_minimize(fsm_kleene_star(fsm_union(fsm_concat(fsm_symbol("@O@"), fsm_concat(fsm_symbol("@0@"), fsm_concat(fsm_union(fsm_copy(rb->ANY), fsm_symbol("@#@")), fsm_symbol("@ID@")))), fsm_concat(fsm_copy(rb->ISyms), fsm_concat(fsm_copy(rb->Rulenames), fsm_concat(fsm_union(fsm_copy(rb->ANY), fsm_symbol("@0@")), fsm_union(fsm_copy(rb->ANY), fsm_union(fsm_symbol("@ID@"), fsm_symbol("@0@")))))))));
408 }
409 return fsm_copy(rb->Any4Tape);
410}
411
412struct fsm *rewrite_upper(struct rewrite_batch *rb, struct fsm *upper) {
413 /*
414 Upper:
415
416 [ @O@ | ISyms | ISyms ]*
417 [ @0@ | Rulenums | Rulenums ]
418 [ <R>,@#@ | @0@ | <R> ]
419 [ @ID@ | R | R,@ID@,@0@ ]
420
421 R = any real symbol
422 <R> = any real symbol, not inserted
423 */
424
425 struct fsm *One, *Two, *Three, *Filter;
426
427 One = fsm_minimize(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@O@")), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@0@")), fsm_concat(fsm_union(fsm_symbol("@#@"), fsm_copy(rb->ANY)), fsm_cross_product(fsm_empty_string(),fsm_symbol("@ID@"))))));
428
429 Two = fsm_minimize(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->ISyms)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->Rulenames)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@0@")), fsm_cross_product(fsm_empty_string(), fsm_copy(rb->ANY))))));
430
431 Three = fsm_minimize(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->ISyms)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->Rulenames)), fsm_concat(fsm_copy(rb->ANY), fsm_cross_product(fsm_empty_string(), fsm_union(fsm_union(fsm_symbol("@0@"), fsm_copy(rb->ANY)), fsm_symbol("@ID@")))))));
432
433 Filter = fsm_minimize(fsm_kleene_star(fsm_union(One, fsm_union(Two, Three))));
434 return fsm_minimize(fsm_lower(fsm_compose(upper, Filter)));
435}
436
437struct fsm *rewrite_align(struct fsm *upper, struct fsm *lower) {
438 struct fsm *first, *second, *third, *align, *align2;
439 /* `[[`[[Tape1of2(upper "@0@"*) & Tape2of2(lower "@0@"*) & ~[[? ?]* "@0@" "@0@" [? ?]*]], %@%_IDENTITY%_SYMBOL%_%@,%@UNK%@] .o. [? ?|"@UNK@" "@UNK@":"@ID@"]*].l, %@UNK%@,%@%_IDENTITY%_SYMBOL%_%@] */
440 first = fsm_minimize(rewrite_tape_m_to_n_of_k(fsm_concat(upper, fsm_kleene_star(fsm_symbol("@0@"))), 1, 1, 2));
441 second = fsm_minimize(rewrite_tape_m_to_n_of_k(fsm_concat(lower, fsm_kleene_star(fsm_symbol("@0@"))), 2, 2, 2));
442 third = fsm_minimize(fsm_parse_regex("~[[? ?]* \"@0@\" \"@0@\" [? ?]*]", NULL((void*)0), NULL((void*)0)));
443
444 align = fsm_minimize(fsm_intersect(third, fsm_intersect(first, second)));
445 align = fsm_minimize(fsm_substitute_symbol(align, "@_IDENTITY_SYMBOL_@", "@UNK@"));
446 align2 = fsm_minimize(fsm_lower(fsm_compose(align, fsm_parse_regex("[? ? | \"@UNK@\" \"@UNK@\":\"@ID@\" ]*", NULL((void*)0), NULL((void*)0)))));
447 align2 = fsm_minimize(fsm_substitute_symbol(align2, "@UNK@", "@_IDENTITY_SYMBOL_@"));
448 return align2;
449}
450
451struct fsm *rewrite_align_markup(struct fsm *upper, struct fsm *lower1, struct fsm *lower2) {
452 struct fsm *first, *second, *third, *fourth, *fifth, *sixth, *align, *align2;
453 /* [Tape1of2("@0@"*) & Tape2of2(lower1)] [Tape1of2(upper) & Tape2of2("@ID@"*)] [ Tape1of2(lower1) & Tape2of2("@0@"*)] */
454 /* + make sure IDENTITY and UNKNOWN are taken care of */
455 first = fsm_minimize(rewrite_tape_m_to_n_of_k(fsm_kleene_star(fsm_symbol("@0@")), 1, 1, 2));
456 second = fsm_minimize(rewrite_tape_m_to_n_of_k(lower1, 2, 2, 2));
457 third = fsm_minimize(rewrite_tape_m_to_n_of_k(upper, 1, 1, 2));
458 fourth = fsm_minimize(rewrite_tape_m_to_n_of_k(fsm_kleene_star(fsm_symbol("@ID@")), 2, 2, 2));
459 fifth = fsm_minimize(rewrite_tape_m_to_n_of_k(fsm_kleene_star(fsm_symbol("@0@")), 1, 1, 2));
460 sixth = fsm_minimize(rewrite_tape_m_to_n_of_k(lower2, 2, 2, 2));
461 align = fsm_minimize(fsm_concat(fsm_intersect(first, second), fsm_concat(fsm_intersect(third, fourth), fsm_intersect(fifth, sixth))));
462 align = fsm_minimize(fsm_substitute_symbol(align, "@_IDENTITY_SYMBOL_@", "@UNK@"));
463 align2 = fsm_minimize(fsm_lower(fsm_compose(align, fsm_parse_regex("[? ? | \"@UNK@\" \"@UNK@\":\"@ID@\" ]*", NULL((void*)0), NULL((void*)0)))));
464 align2 = fsm_minimize(fsm_substitute_symbol(align2, "@UNK@", "@_IDENTITY_SYMBOL_@"));
465 return align2;
466}
467
468struct fsm *rewrite_itape(struct rewrite_batch *rb) {
469 if (rb->ITape == NULL((void*)0)) {
470 rb->ITape = fsm_parse_regex("[\"@I[]@\" ? ? ? | \"@I[@\" ? ? ? [\"@I@\" ? ? ?]* \"@I]@\" ? [?-\"@0@\"] ? ] [\"@I]@\" ? \"@0@\" ?]* | 0" , NULL((void*)0), NULL((void*)0));
471 }
472 return fsm_copy(rb->ITape);
473}
474
475struct fsm *rewrite_cp_markup(struct rewrite_batch *rb, struct fsm *upper, struct fsm *lower1, struct fsm *lower2, int rule_number) {
476 /* Same as rewrite_cp, could be consolidated */
477 struct fsm *Aligned, *threetape, *rulenumtape;
478 /* define CP(X,Y) Tape23of3(Align2(X,Y)) & [ "@I[@" ? ? ["@I@" ? ?]* "@I]@" ? ? | "@I[]@" ? ? | 0 ] */
479 Aligned = rewrite_align_markup(upper, lower1, lower2);
480 Aligned = rewrite_tape_m_to_n_of_k(Aligned, 3, 4, 4);
481 threetape = fsm_minimize(fsm_intersect(Aligned, rewrite_itape(rb)));
482 rulenumtape = rewrite_tape_m_to_n_of_k(fsm_minimize(fsm_kleene_star(fsm_symbol(rb->namestrings[rule_number-1]))), 2, 2, 4);
483 return fsm_minimize(fsm_intersect(threetape, rulenumtape));
484}
485
486struct fsm *rewrite_cp_transducer(struct rewrite_batch *rb, struct fsm *t, int rule_number) {
487 struct fsm *Aligned, *threetape, *rulenumtape;
488 Aligned = fsm_flatten(t, fsm_symbol("@0@"));
489 Aligned = rewrite_tape_m_to_n_of_k(Aligned, 3, 4, 4);
490 threetape = fsm_minimize(fsm_intersect(Aligned, rewrite_itape(rb)));
491 rulenumtape = rewrite_tape_m_to_n_of_k(fsm_minimize(fsm_kleene_star(fsm_symbol(rb->namestrings[rule_number-1]))), 2, 2, 4);
492 return fsm_minimize(fsm_intersect(threetape, rulenumtape));
493}
494
495struct fsm *rewrite_cp(struct rewrite_batch *rb, struct fsm *upper, struct fsm *lower, int rule_number) {
496 struct fsm *Aligned, *threetape, *rulenumtape;
497 /* define CP(X,Y) Tape23of3(Align2(X,Y)) & [ "@I[@" ? ? ["@I@" ? ?]* "@I]@" ? ? | "@I[]@" ? ? | 0 ] */
498 Aligned = rewrite_align(upper, lower);
499 Aligned = rewrite_tape_m_to_n_of_k(Aligned, 3, 4, 4);
500 threetape = fsm_minimize(fsm_intersect(Aligned, rewrite_itape(rb)));
501 rulenumtape = rewrite_tape_m_to_n_of_k(fsm_minimize(fsm_kleene_star(fsm_symbol(rb->namestrings[rule_number-1]))), 2, 2, 4);
502 return fsm_minimize(fsm_intersect(threetape, rulenumtape));
503}
504
505void rewrite_add_special_syms(struct rewrite_batch *rb, struct fsm *net) {
506 int i;
507 if (net == NULL((void*)0))
508 return;
509 sigma_substitute(".#.", "@#@", net->sigma); /* We convert boundaries to our interal rep. */
510 /* This is because sigma merging (fsm_merge_sigma()) is handled */
511 /* in a special way for .#., which we don't want here. */
512
513 for (i = 0; specialsymbols[i] != NULL((void*)0); i++) {
514 if (sigma_find(specialsymbols[i], net->sigma) == -1)
515 sigma_add(specialsymbols[i], net->sigma);
516 }
517 for (i = 1; i <= rb->num_rules; i++) {
518 sigma_add(rb->namestrings[i-1], net->sigma);
519 }
520 sigma_sort(net);
521}
522
523
524void fsm_clear_contexts(struct fsmcontexts *contexts) {
525 struct fsmcontexts *c, *cp;
526 for (c = contexts; c != NULL((void*)0); c = cp) {
527 fsm_destroy(c->left);
528 fsm_destroy(c->right);
529 fsm_destroy(c->cpleft);
530 fsm_destroy(c->cpright);
531 cp = c->next;
532 free(c);
533 }
534}
535
536
537struct fsm *rewr_context_restrict(struct rewrite_batch *rb, struct fsm *X, struct fsmcontexts *LR) {
538
539 struct fsm *Var, *Notvar, *UnionL, *UnionP, *Result, *NewX, *Left, *Right;
540 struct fsmcontexts *pairs;
541
542 Var = fsm_symbol("@VARX@");
543 //Notvar = fsm_minimize(fsm_kleene_star(fsm_term_negation(fsm_symbol("@VARX@"))));
544 Notvar = fsm_minus(rewrite_any_4tape(rb), fsm_contains(fsm_symbol("@VARX@")));
545 /* We add the variable symbol to all alphabets to avoid ? matching it */
546 /* which would cause extra nondeterminism */
547
548 NewX = fsm_copy(X);
549 if (sigma_find("@VARX@", NewX->sigma) == -1) {
550 sigma_add("@VARX@", NewX->sigma);
551 sigma_sort(NewX);
552 }
553 UnionP = fsm_empty_set();
554
555 for (pairs = LR; pairs != NULL((void*)0) ; pairs = pairs->next) {
556 if (pairs->left == NULL((void*)0)) {
557 Left = fsm_empty_string();
558 } else {
559 Left = fsm_copy(pairs->cpleft);
560 sigma_add("@VARX@", Left->sigma);
561 sigma_sort(Left);
562 }
563 if (pairs->right == NULL((void*)0)) {
564 Right = fsm_empty_string();
565 } else {
566 Right = fsm_copy(pairs->cpright);
567 sigma_add("@VARX@", Right->sigma);
568 sigma_sort(Right);
569 }
570 UnionP = fsm_union(fsm_concat(Left, fsm_concat(fsm_copy(Var), fsm_concat(fsm_copy(Notvar), fsm_concat(fsm_copy(Var), Right)))), UnionP);
571 }
572 UnionL = fsm_concat(fsm_copy(Notvar), fsm_concat(fsm_copy(Var), fsm_concat(fsm_copy(NewX), fsm_concat(fsm_copy(Var), fsm_copy(Notvar)))));
573 Result = fsm_minus(UnionL, fsm_concat(fsm_copy(Notvar), fsm_concat(fsm_copy(UnionP), fsm_copy(Notvar))));
574
575 if (sigma_find("@VARX@", Result->sigma) != -1) {
576 Result = fsm_complement(fsm_substitute_symbol(Result, "@VARX@","@_EPSILON_SYMBOL_@"));
577 } else {
578 Result = fsm_complement(Result);
579 }
580 fsm_destroy(UnionP);
581 fsm_destroy(Var);
582 fsm_destroy(Notvar);
583 fsm_destroy(NewX);
584 return(Result);
585}
586
587struct fsm *rewrite_epextend(struct rewrite_batch *rb) {
588
589 struct fsm *one, *two, *allzeroupper, *threea, *threeb, *threec, *three;
590
591 /* 1. @O@ @0@ [ANY|@#@] @ID@ */
592 /* 2. @I[]@ @#Rule@ [ANY] [@ID@|@0@|ANY] */
593 /* 3a. @I[@ @#Rule@ [ANY] [@ID@|@0@|ANY] */
594 /* 3b. @I@ @#Rule@ [ANY] [@ID@|@0@|ANY] */
595 /* 3c. @I]@ @#Rule@ [ANY] [@ID@|@0@|ANY] */
596 /* 3. [3a|3b|3c] & ~[[? ? "@0@" ?]*] */
597
598 /* TODO lower version as well */
599
600 if (rb->Epextend == NULL((void*)0)) {
601 one = fsm_minimize(fsm_concat(fsm_symbol("@O@"), fsm_concat(fsm_symbol("@0@"), fsm_concat(fsm_union(fsm_copy(rb->ANY), fsm_symbol("@#@")), fsm_symbol("@ID@")))));
602 two = fsm_minimize(fsm_concat(fsm_symbol("@I[]@"), fsm_concat(fsm_copy(rb->Rulenames), fsm_concat(fsm_copy(rb->ANY), fsm_union(fsm_symbol("@0@"), fsm_union(fsm_symbol("@ID@"), fsm_copy(rb->ANY)))))));
603 allzeroupper = fsm_parse_regex("~[[? ? \"@0@\" ?]*]", NULL((void*)0), NULL((void*)0));
604 threea = fsm_minimize(fsm_concat(fsm_symbol("@I[@"), fsm_concat(fsm_copy(rb->Rulenames), fsm_concat(fsm_union(fsm_copy(rb->ANY), fsm_symbol("@0@")), fsm_union(fsm_symbol("@0@"), fsm_union(fsm_symbol("@ID@"), fsm_copy(rb->ANY)))))));
605 threeb = fsm_minimize(fsm_kleene_star(fsm_concat(fsm_symbol("@I@"), fsm_concat(fsm_copy(rb->Rulenames), fsm_concat(fsm_union(fsm_copy(rb->ANY), fsm_symbol("@0@")), fsm_union(fsm_symbol("@0@"), fsm_union(fsm_symbol("@ID@"), fsm_copy(rb->ANY))))))));
606 threec = fsm_minimize(fsm_concat(fsm_symbol("@I]@"), fsm_concat(fsm_copy(rb->Rulenames), fsm_concat(fsm_union(fsm_copy(rb->ANY), fsm_symbol("@0@")), fsm_union(fsm_symbol("@0@"), fsm_union(fsm_symbol("@ID@"), fsm_copy(rb->ANY)))))));
607 three = fsm_intersect(allzeroupper, fsm_concat(threea, fsm_concat(threeb, threec)));
608 rb->Epextend = fsm_minimize(fsm_union(fsm_union(one, two), three));
609 }
610 return fsm_copy(rb->Epextend);
611}