File: | rewrite.c |
Warning: | line 134, column 2 Value stored to 'dir' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* Foma: a finite-state toolkit and library. */ |
2 | /* Copyright © 2008-2021 Mans Hulden */ |
3 | |
4 | /* This file is part of foma. */ |
5 | |
6 | /* Licensed under the Apache License, Version 2.0 (the "License"); */ |
7 | /* you may not use this file except in compliance with the License. */ |
8 | /* You may obtain a copy of the License at */ |
9 | |
10 | /* http://www.apache.org/licenses/LICENSE-2.0 */ |
11 | |
12 | /* Unless required by applicable law or agreed to in writing, software */ |
13 | /* distributed under the License is distributed on an "AS IS" BASIS, */ |
14 | /* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ |
15 | /* See the License for the specific language governing permissions and */ |
16 | /* limitations under the License. */ |
17 | |
18 | #include <stdio.h> |
19 | #include <stdlib.h> |
20 | #include <string.h> |
21 | #include "foma.h" |
22 | |
23 | // Lower(X) puts X on output tape (may also be represented by @ID@ on input tape) |
24 | // Upper(X) puts X on input tape |
25 | // Unrewritten(X) X on input tape, not rewritten (aligned with @O@ symbols) |
26 | // NotContain(X) MT does not contain MT configuration X |
27 | |
28 | // Boundary: every MT word begins and ends with boundary, i.e. the @#@ symbol on the input tape, output tape, and relevant semantic symbols |
29 | |
30 | /* |
31 | |
32 | [ @O@ ] [ @I[@ ] [ @I@ ] [ @I]@ ] [ @I[]@ ] |
33 | [ @0@ ] [ @#0001@ ] [ @#0001@ ] [ @#0001@ ] [ @#0001@ ] |
34 | [ @#@ ] [ ANY|@0@ ] [ ANY|@0@ ] [ ANY|@0@ ] [ ANY|@0@ ] |
35 | [ @ID@ ] [ ANY|@ID|@0@ ] [ ANY|@ID|@0@ ] [ ANY|@ID|@0@ ] [ ANY|@ID|@0@ ] |
36 | |
37 | |
38 | */ |
39 | /* Special symbols used: |
40 | @0@ Epsilon |
41 | @O@ Outside rewrite |
42 | @I@ Inside rewrite |
43 | @I[@ Beginning of rewrite |
44 | @I[]@ Beginning and end of rewrite |
45 | @I]@ End of rewrite |
46 | @ID@ Identity symbol (= repeat symbol on previous tape at this position) |
47 | @#@ Boundary (the symbol .#. is mapped to this in contexts before the compilation procedure) |
48 | @#X@ X = rule number (one for each rule, starting with @#0001@) |
49 | */ |
50 | |
51 | struct rewrite_batch { |
52 | |
53 | struct rewrite_set *rewrite_set; |
54 | struct fsm *Rulenames; |
55 | struct fsm *ISyms; |
56 | struct fsm *ANY; |
57 | struct fsm *IOpen; |
58 | struct fsm *IClose; |
59 | struct fsm *ITape; |
60 | struct fsm *Any4Tape; |
61 | struct fsm *Epextend; |
62 | int num_rules; |
63 | char (*namestrings)[8]; |
64 | |
65 | }; |
66 | |
67 | char *specialsymbols[] = {"@0@","@O@","@I@","@I[@","@I[]@","@I]@","@ID@","@#@", NULL((void*)0)}; |
68 | |
69 | void rewrite_add_special_syms(struct rewrite_batch *rb, struct fsm *net); |
70 | struct fsm *rewrite_upper(struct rewrite_batch *rb, struct fsm *upper); |
71 | struct fsm *rewrite_lower(struct rewrite_batch *rb, struct fsm *lower); |
72 | struct fsm *rewrite_two_level(struct rewrite_batch *rb, struct fsm *lang, int rightside); |
73 | struct fsm *rewr_context_restrict(struct rewrite_batch *rb, struct fsm *X, struct fsmcontexts *LR); |
74 | struct fsm *rewr_contains(struct rewrite_batch *rb, struct fsm *lang); |
75 | struct fsm *rewr_unrewritten(struct rewrite_batch *rb, struct fsm *lang); |
76 | struct fsm *rewr_notleftmost(struct rewrite_batch *rb, struct fsm *lang, int rule_number, int arrow_type); |
77 | struct fsm *rewr_notshortest(struct rewrite_batch *rb, struct fsm *lang, int rule_number); |
78 | struct fsm *rewr_notlongest(struct rewrite_batch *rb, struct fsm *lang, int rule_number, int arrow_type); |
79 | struct fsm *rewrite_tape_m_to_n_of_k(struct fsm *lang, int m, int n, int k); |
80 | struct fsm *rewrite_cp(struct rewrite_batch *rb, struct fsm *upper, struct fsm *lower, int rule_number); |
81 | struct fsm *rewrite_cp_transducer(struct rewrite_batch *rb, struct fsm *t, int rule_number); |
82 | struct fsm *rewrite_cp_markup(struct rewrite_batch *rb, struct fsm *upper, struct fsm *lower1, struct fsm *lower2, int rule_number); |
83 | struct fsm *rewrite_epextend(struct rewrite_batch *rb); |
84 | struct fsm *rewrite_any_4tape(struct rewrite_batch *rb); |
85 | struct fsm *rewrite_itape(struct rewrite_batch *rb); |
86 | void rewrite_cleanup(struct rewrite_batch *rb); |
87 | |
88 | |
89 | struct fsm *fsm_rewrite(struct rewrite_set *all_rules) { |
90 | struct rewrite_batch *rb; |
91 | struct rewrite_set *ruleset; |
92 | struct fsmrules *rules; |
93 | struct fsmcontexts *contexts; |
94 | struct fsm *RuleCP, *Base, *Boundary, *Outside, *CP, *C, *LeftExtend, *RightExtend, *Center; |
95 | int i, num_rules, rule_number, dir; |
96 | /* Count parallel rules */ |
97 | for (ruleset = all_rules, num_rules = 0; ruleset != NULL((void*)0); ruleset = ruleset->next) { |
98 | for (rules = ruleset->rewrite_rules; rules != NULL((void*)0); rules = rules->next) { |
99 | num_rules++; |
100 | } |
101 | } |
102 | |
103 | rb = calloc(1, sizeof(struct rewrite_batch)); |
104 | rb->rewrite_set = all_rules; |
105 | rb->num_rules = num_rules; |
106 | rb->namestrings = malloc(sizeof *rb->namestrings * num_rules); |
107 | for (i = 0; i < rb->num_rules; i++) { |
108 | sprintf(rb->namestrings[i], "@#%04i@", i+1); |
109 | } |
110 | |
111 | rb->ISyms = fsm_minimize(fsm_union(fsm_symbol("@I@"), fsm_union(fsm_symbol("@I[]@"), fsm_union(fsm_symbol("@I[@"), fsm_symbol("@I]@"))))); |
112 | rb->Rulenames = fsm_empty_set(); |
113 | for (i = 1; i <= num_rules; i++) { |
114 | rb->Rulenames = fsm_minimize(fsm_union(rb->Rulenames, fsm_symbol(rb->namestrings[i-1]))); |
115 | } |
116 | rb->ANY = fsm_identity(); |
117 | rewrite_add_special_syms(rb, rb->ANY); |
118 | |
119 | /* Add auxiliary symbols to all alphabets */ |
120 | for (ruleset = all_rules; ruleset != NULL((void*)0); ruleset = ruleset->next) { |
121 | for (rules = ruleset->rewrite_rules; rules != NULL((void*)0); rules = rules->next) { |
122 | rewrite_add_special_syms(rb, rules->left); |
123 | rewrite_add_special_syms(rb, rules->right); |
124 | rewrite_add_special_syms(rb, rules->right2); |
125 | } |
126 | for (contexts = ruleset->rewrite_contexts; contexts != NULL((void*)0); contexts = contexts->next) { |
127 | rewrite_add_special_syms(rb, contexts->left); |
128 | rewrite_add_special_syms(rb, contexts->right); |
129 | } |
130 | } |
131 | /* Get cross-product of every rule, according to its type */ |
132 | RuleCP = fsm_empty_set(); |
133 | for (ruleset = all_rules, rule_number = 1; ruleset != NULL((void*)0); ruleset = ruleset->next) { |
134 | dir = ruleset->rule_direction; |
Value stored to 'dir' is never read | |
135 | for (rules = ruleset->rewrite_rules; rules != NULL((void*)0); rules = rules->next) { |
136 | if (rules->right == NULL((void*)0)) { |
137 | /* T(x)-type rule */ |
138 | CP = rewrite_cp_transducer(rb, fsm_copy(rules->left), rule_number); |
139 | rules->cross_product = fsm_copy(CP); |
140 | rules->right = fsm_minimize(fsm_lower(fsm_copy(rules->left))); |
141 | rules->left = fsm_minimize(fsm_upper(fsm_copy(rules->left))); |
142 | rewrite_add_special_syms(rb, rules->right); |
143 | rewrite_add_special_syms(rb, rules->left); |
144 | } else if (rules->right2 == NULL((void*)0)) { |
145 | /* Regular rewrite rule */ |
146 | CP = rewrite_cp(rb, fsm_copy(rules->left), fsm_copy(rules->right), rule_number); |
147 | rules->cross_product = fsm_copy(CP); |
148 | } else { |
149 | /* A -> B ... C -type rule */ |
150 | CP = rewrite_cp_markup(rb, fsm_copy(rules->left), fsm_copy(rules->right), fsm_copy(rules->right2), rule_number); |
151 | rules->cross_product = fsm_copy(CP); |
152 | } |
153 | RuleCP = fsm_minimize(fsm_union(RuleCP, CP)); |
154 | rule_number++; |
155 | } |
156 | } |
157 | |
158 | /* Create Base language */ |
159 | Boundary = fsm_parse_regex("\"@O@\" \"@0@\" \"@#@\" \"@ID@\"", NULL((void*)0), NULL((void*)0)); |
160 | Outside = fsm_minimize(fsm_concat(fsm_symbol("@O@"), fsm_concat(fsm_symbol("@0@"), fsm_concat(fsm_copy(rb->ANY), fsm_symbol("@ID@"))))); |
161 | Base = fsm_minimize(fsm_concat(fsm_copy(Boundary), fsm_concat(fsm_kleene_star(fsm_union(RuleCP, Outside)), fsm_copy(Boundary)))); |
162 | fsm_destroy(Boundary); |
163 | for (ruleset = all_rules, rule_number = 1; ruleset != NULL((void*)0); ruleset = ruleset->next) { |
164 | dir = ruleset->rule_direction; |
165 | /* Replace all context spec with Upper/Lower, depending on rule_direction */ |
166 | for (contexts = ruleset->rewrite_contexts; contexts != NULL((void*)0); contexts = contexts->next) { |
167 | switch(dir) { |
168 | case OP_UPWARD_REPLACE1: |
169 | contexts->cpleft = rewrite_upper(rb, fsm_copy(contexts->left)); |
170 | contexts->cpright = rewrite_upper(rb, fsm_copy(contexts->right)); |
171 | break; |
172 | case OP_RIGHTWARD_REPLACE2: |
173 | contexts->cpleft = rewrite_lower(rb, fsm_copy(contexts->left)); |
174 | contexts->cpright = rewrite_upper(rb, fsm_copy(contexts->right)); |
175 | break; |
176 | case OP_LEFTWARD_REPLACE3: |
177 | contexts->cpleft = rewrite_upper(rb, fsm_copy(contexts->left)); |
178 | contexts->cpright = rewrite_lower(rb, fsm_copy(contexts->right)); |
179 | break; |
180 | case OP_DOWNWARD_REPLACE4: |
181 | contexts->cpleft = rewrite_lower(rb, fsm_copy(contexts->left)); |
182 | contexts->cpright = rewrite_lower(rb, fsm_copy(contexts->right)); |
183 | break; |
184 | case OP_TWO_LEVEL_REPLACE5: |
185 | contexts->cpleft = rewrite_two_level(rb, fsm_copy(contexts->left), 0); |
186 | contexts->cpright = rewrite_two_level(rb, fsm_copy(contexts->right), 1); |
187 | break; |
188 | } |
189 | } |
190 | for (rules = ruleset->rewrite_rules; rules != NULL((void*)0); rules = rules->next) { |
191 | /* Just the rule center w/ number without CP() contests */ |
192 | /* Actually, maybe better to include CP(U,L) in this, very slow with e.g. a -> a || _ b^15 */ |
193 | if (rules->arrow_type & ARROW_DOTTED8) { |
194 | /* define EP Tape1of4("@O@") | [ Tape1of4("@I[@" "@I@"* "@I]@" | "@I[]@") & Tape3of4(~["@0@"*]) ] ; */ |
195 | /* Additional constraint: 0->x is only allowed between EP _ EP */ |
196 | /* The left and right sides can be checked separately */ |
197 | /* ~[?* Center ~[EP ?*]] & ~[~[?* EP] Center ?*] */ |
198 | Center = fsm_copy(rules->cross_product); |
199 | Base = fsm_intersect(fsm_intersect(Base, fsm_complement(fsm_concat(rewrite_any_4tape(rb), fsm_concat(fsm_copy(Center), fsm_complement(fsm_concat(rewrite_epextend(rb), rewrite_any_4tape(rb))))))), fsm_complement(fsm_concat(fsm_complement(fsm_concat(rewrite_any_4tape(rb), rewrite_epextend(rb))), fsm_concat(fsm_copy(Center), rewrite_any_4tape(rb))))); |
200 | fsm_destroy(Center); |
201 | } |
202 | if (ruleset->rewrite_contexts) { |
203 | Base = fsm_intersect(Base, rewr_context_restrict(rb, rules->cross_product, ruleset->rewrite_contexts)); |
204 | } |
205 | /* Determine C (based on rule type) */ |
206 | C = fsm_empty_set(); |
207 | if ((rules->arrow_type & ARROW_RIGHT1) && !(rules->arrow_type & ARROW_OPTIONAL4)) { |
208 | C = fsm_union(C, rewr_unrewritten(rb, fsm_minimize(fsm_minus(fsm_copy(rules->left), fsm_empty_string())))); |
209 | } |
210 | if ((rules->arrow_type & ARROW_LEFT2) && !(rules->arrow_type & ARROW_OPTIONAL4)) { |
211 | C = fsm_union(C, rewr_unrewritten(rb, fsm_minimize(fsm_minus(fsm_copy(rules->right), fsm_empty_string())))); |
212 | } |
213 | if (rules->arrow_type & ARROW_LONGEST_MATCH16) { |
214 | if (rules->arrow_type & ARROW_RIGHT1) { |
215 | C = fsm_union(C, rewr_notleftmost(rb, rewrite_upper(rb, fsm_copy(rules->left)), rule_number, rules->arrow_type)); |
216 | C = fsm_union(C, rewr_notlongest(rb, rewrite_upper(rb, fsm_copy(rules->left)), rule_number, rules->arrow_type)); |
217 | } |
218 | if (rules->arrow_type & ARROW_LEFT2) { |
219 | C = fsm_union(C, rewr_notleftmost(rb, rewrite_lower(rb, fsm_copy(rules->right)), rule_number, rules->arrow_type)); |
220 | C = fsm_union(C, rewr_notlongest(rb, rewrite_lower(rb, fsm_copy(rules->right)), rule_number, rules->arrow_type)); |
221 | } |
222 | } |
223 | if (rules->arrow_type & ARROW_SHORTEST_MATCH32) { |
224 | if (rules->arrow_type & ARROW_RIGHT1) { |
225 | C = fsm_union(C, rewr_notleftmost(rb, rewrite_upper(rb, fsm_copy(rules->left)), rule_number, rules->arrow_type)); |
226 | C = fsm_union(C, rewr_notshortest(rb, rewrite_upper(rb, fsm_copy(rules->left)), rule_number)); |
227 | } |
228 | if (rules->arrow_type & ARROW_LEFT2) { |
229 | C = fsm_union(C, rewr_notleftmost(rb, rewrite_lower(rb, fsm_copy(rules->right)), rule_number, rules->arrow_type)); |
230 | C = fsm_union(C, rewr_notshortest(rb, rewrite_lower(rb, fsm_copy(rules->right)), rule_number)); |
231 | } |
232 | } |
233 | if (!ruleset->rewrite_contexts) { |
234 | if (rules->arrow_type & ARROW_DOTTED8 && !(rules->arrow_type & ARROW_OPTIONAL4)) { |
235 | Base = fsm_minus(Base, rewr_contains(rb, fsm_concat(rewrite_epextend(rb), rewrite_epextend(rb)))); |
236 | } else { |
237 | Base = fsm_minus(Base, rewr_contains(rb, fsm_copy(C))); |
238 | } |
239 | } |
240 | for (contexts = ruleset->rewrite_contexts; contexts != NULL((void*)0); contexts = contexts->next) { |
241 | /* Constraints: running intersect w/ Base */ |
242 | /* NotContain(LC [Unrewritten|LM|...] RC) */ |
243 | if (rules->arrow_type & ARROW_DOTTED8 && !(rules->arrow_type & ARROW_OPTIONAL4)) { |
244 | /* Extend left and right */ |
245 | LeftExtend = fsm_minimize(fsm_intersect(fsm_concat(rewrite_any_4tape(rb), fsm_copy(contexts->cpleft)), fsm_concat(rewrite_any_4tape(rb), rewrite_epextend(rb)))); |
246 | RightExtend = fsm_minimize(fsm_intersect(fsm_concat(rewrite_epextend(rb), rewrite_any_4tape(rb)), fsm_concat(fsm_copy(contexts->cpright), rewrite_any_4tape(rb)))); |
247 | Base = fsm_minus(Base, rewr_contains(rb, fsm_minimize(fsm_concat(LeftExtend, RightExtend)))); |
248 | } else { |
249 | Base = fsm_minus(Base, rewr_contains(rb, fsm_concat(fsm_copy(contexts->cpleft), fsm_concat(fsm_copy(C), fsm_copy(contexts->cpright))))); |
250 | } |
251 | } |
252 | rule_number++; |
253 | fsm_destroy(C); |
254 | } |
255 | } |
256 | Base = fsm_minimize(fsm_lower(fsm_compose(Base, fsm_parse_regex("[?:0]^4 [?:0 ?:0 ? ?]* [?:0]^4", NULL((void*)0), NULL((void*)0))))); |
257 | Base = fsm_unflatten(Base, "@0@", "@ID@"); |
258 | |
259 | for (i = 0; specialsymbols[i] != NULL((void*)0); i++) { |
260 | Base->sigma = sigma_remove(specialsymbols[i], Base->sigma); |
261 | } |
262 | for (rule_number = 1; rule_number <= num_rules; rule_number++) |
263 | Base->sigma = sigma_remove(rb->namestrings[rule_number-1], Base->sigma); |
264 | |
265 | fsm_compact(Base); |
266 | sigma_sort(Base); |
267 | rewrite_cleanup(rb); |
268 | return Base; |
269 | } |
270 | |
271 | void rewrite_cleanup(struct rewrite_batch *rb) { |
272 | |
273 | if (rb->Rulenames != NULL((void*)0)) |
274 | fsm_destroy(rb->Rulenames); |
275 | if (rb->ISyms != NULL((void*)0)) |
276 | fsm_destroy(rb->ISyms); |
277 | if (rb->ANY != NULL((void*)0)) |
278 | fsm_destroy(rb->ANY); |
279 | if (rb->IOpen != NULL((void*)0)) |
280 | fsm_destroy(rb->IOpen); |
281 | if (rb->IClose != NULL((void*)0)) |
282 | fsm_destroy(rb->IClose); |
283 | if (rb->ITape != NULL((void*)0)) |
284 | fsm_destroy(rb->ITape); |
285 | if (rb->Any4Tape != NULL((void*)0)) |
286 | fsm_destroy(rb->Any4Tape); |
287 | if (rb->Epextend != NULL((void*)0)) |
288 | fsm_destroy(rb->Epextend); |
289 | if (rb->namestrings != NULL((void*)0)) |
290 | free(rb->namestrings); |
291 | free(rb); |
292 | return; |
293 | } |
294 | |
295 | |
296 | struct fsm *rewr_notlongest(struct rewrite_batch *rb, struct fsm *lang, int rule_number, int arrow_type) { |
297 | /* define NotLongest(X) [Upper(X)/Lower(X) & Tape1of4(IOpen Tape1Sig* ["@O@" | IOpen] Tape1Sig*)] */ |
298 | struct fsm *nl, *flt, *rulenum; |
299 | nl = fsm_parse_regex("[\"@I[@\"|\"@I[]@\"] [\"@I[@\"|\"@I[]@\"|\"@I]@\"|\"@I@\"|\"@O@\"]* [\"@O@\"|\"@I[@\"|\"@I[]@\"] [\"@I[@\"|\"@I[]@\"|\"@I]@\"|\"@I@\"|\"@O@\"]*", NULL((void*)0), NULL((void*)0)); |
300 | nl = rewrite_tape_m_to_n_of_k(nl, 1, 1, 4); |
301 | rulenum = fsm_minimize(fsm_concat(fsm_identity(), fsm_concat(fsm_symbol(rb->namestrings[rule_number-1]), fsm_concat(fsm_identity(), fsm_concat(fsm_identity(), fsm_universal()))))); |
302 | nl = fsm_intersect(nl, rulenum); |
303 | /* lang can't end in @0@ */ |
304 | if (arrow_type & ARROW_RIGHT1) { |
305 | flt = fsm_parse_regex("[? ? ? ?]* [? ? [?-\"@0@\"] ?]", NULL((void*)0), NULL((void*)0)); |
306 | } else { |
307 | flt = fsm_parse_regex("[? ? ? ?]* [? ? ? [?-\"@0@\"]]", NULL((void*)0), NULL((void*)0)); |
308 | } |
309 | return fsm_minimize(fsm_intersect(fsm_intersect(nl, fsm_copy(lang)), flt)); |
310 | } |
311 | |
312 | struct fsm *rewr_notshortest(struct rewrite_batch *rb, struct fsm *lang, int rule_number) { |
313 | /* define NotShortest(X) [Upper/Lower(X) & Tape1of4("@I[@" \IClose*)] */ |
314 | struct fsm *ns, *rulenum; |
315 | ns = fsm_parse_regex("[\"@I[@\"] \\[\"@I]@\"]*", NULL((void*)0), NULL((void*)0)); |
316 | rulenum = fsm_minimize(fsm_concat(fsm_identity(), fsm_concat(fsm_symbol(rb->namestrings[rule_number-1]), fsm_concat(fsm_identity(), fsm_concat(fsm_identity(), fsm_universal()))))); |
317 | ns = rewrite_tape_m_to_n_of_k(ns, 1, 1, 4); |
318 | ns = fsm_intersect(ns, rulenum); |
319 | return fsm_minimize(fsm_intersect(ns, fsm_copy(lang))); |
320 | } |
321 | |
322 | struct fsm *rewr_notleftmost(struct rewrite_batch *rb, struct fsm *lang, int rule_number, int arrow_type) { |
323 | struct fsm *nl, *flt, *rulenum; |
324 | /* define Leftmost(X) [Upper/Lower(X) & Tape1of4("@O@" Tape1Sig* IOpen Tape1Sig*) ] */ |
325 | nl = fsm_parse_regex("\"@O@\" [\"@O@\"]* [\"@I[@\"|\"@I[]@\"] [\"@I[@\"|\"@I[]@\"|\"@I]@\"|\"@I@\"|\"@O@\"]*", NULL((void*)0), NULL((void*)0)); |
326 | nl = rewrite_tape_m_to_n_of_k(nl, 1, 1, 4); |
327 | rulenum = fsm_minimize(fsm_concat(fsm_concat(fsm_symbol("@O@"), fsm_concat(fsm_identity(), fsm_concat(fsm_identity(), fsm_identity()))), fsm_concat(fsm_kleene_star(fsm_concat(fsm_symbol("@O@"), fsm_concat(fsm_identity(), fsm_concat(fsm_identity(), fsm_identity())))), fsm_concat(fsm_union(fsm_symbol("@I[@"), fsm_symbol("@I[]@")), fsm_concat(fsm_symbol(rb->namestrings[rule_number-1]), fsm_universal()))))); |
328 | nl = fsm_intersect(nl, rulenum); |
329 | if (arrow_type & ARROW_RIGHT1) { |
330 | flt = fsm_parse_regex("[? ? ? ?]* [? ? [?-\"@0@\"] ?]", NULL((void*)0), NULL((void*)0)); |
331 | } else { |
332 | flt = fsm_parse_regex("[? ? ? ?]* [? ? ? [?-\"@0@\"]]", NULL((void*)0), NULL((void*)0)); |
333 | } |
334 | return fsm_minimize(fsm_intersect(fsm_intersect(nl, fsm_copy(lang)), flt)); |
335 | } |
336 | |
337 | struct fsm *rewr_unrewritten(struct rewrite_batch *rb, struct fsm *lang) { |
338 | /* define Unrewritten(X) [X .o. [0:"@O@" 0:"@0@" ? 0:"@ID@"]*].l; */ |
339 | struct fsm *C; |
340 | C = fsm_minimize(fsm_kleene_star(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@O@")), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@0@")), fsm_concat(fsm_copy(rb->ANY), fsm_cross_product(fsm_empty_string(), fsm_symbol("@ID@"))))))); |
341 | return fsm_minimize(fsm_lower(fsm_compose(lang, C))); |
342 | } |
343 | |
344 | struct fsm *rewr_contains(struct rewrite_batch *rb, struct fsm *lang) { |
345 | /* define NotContain(X) ~[[Tape1Sig Tape2Sig Tape3Sig Tape4Sig]* X ?*]; */ |
346 | return fsm_minimize(fsm_concat(rewrite_any_4tape(rb), fsm_concat(lang, rewrite_any_4tape(rb)))); |
347 | } |
348 | |
349 | struct fsm *rewrite_tape_m_to_n_of_k(struct fsm *lang, int m, int n, int k) { |
350 | /* [X .o. [0:?^(m-1) ?^(n-m+1) 0:?^(k-n)]*].l */ |
351 | return fsm_minimize(fsm_lower(fsm_compose(lang, fsm_kleene_star(fsm_concat(fsm_concat_n(fsm_cross_product(fsm_empty_string(), fsm_identity()), m-1), fsm_concat(fsm_concat_n(fsm_identity(), n-m+1), fsm_concat_n(fsm_cross_product(fsm_empty_string(), fsm_identity()), k-n))))))); |
352 | } |
353 | |
354 | struct fsm *rewrite_two_level(struct rewrite_batch *rb, struct fsm *lang, int rightside) { |
355 | struct fsm *Lower, *Upper, *Result; |
356 | Lower = rewrite_lower(rb, fsm_minimize(fsm_lower(fsm_copy(lang)))); |
357 | Upper = rewrite_upper(rb, fsm_minimize(fsm_upper(lang))); |
358 | if (rightside == 1) { |
359 | Result = fsm_minimize(fsm_intersect(fsm_concat(Lower, rewrite_any_4tape(rb)), fsm_concat(Upper, rewrite_any_4tape(rb)))); |
360 | } else { |
361 | Result = fsm_minimize(fsm_intersect(fsm_concat(rewrite_any_4tape(rb), Lower), fsm_concat(rewrite_any_4tape(rb), Upper))); |
362 | } |
363 | return Result; |
364 | } |
365 | |
366 | struct fsm *rewrite_lower(struct rewrite_batch *rb, struct fsm *lower) { |
367 | |
368 | /* |
369 | Lower: |
370 | |
371 | [ @O@ | ISyms | ISyms ]* |
372 | [ @0@ | Rulenums | Rulenums ] |
373 | [ <R>,@#@ | @0@,R | R ] |
374 | [ @ID@ | <R> | @0@ ] |
375 | |
376 | R = any real symbol |
377 | <R> = any real symbol, not inserted |
378 | |
379 | */ |
380 | |
381 | struct fsm *One, *Two, *Three, *Filter; |
382 | |
383 | One = fsm_minimize(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@O@")), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@0@")), fsm_concat(fsm_union(fsm_symbol("@#@"), fsm_copy(rb->ANY)), fsm_cross_product(fsm_empty_string(),fsm_symbol("@ID@")))))); |
384 | |
385 | Two = fsm_minimize(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->ISyms)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->Rulenames)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_union(fsm_copy(rb->ANY), fsm_symbol("@0@"))), fsm_copy(rb->ANY))))); |
386 | |
387 | Three = fsm_minimize(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->ISyms)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->Rulenames)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->ANY)), fsm_cross_product(fsm_empty_string(), fsm_symbol("@0@")))))); |
388 | |
389 | Filter = fsm_minimize(fsm_kleene_star(fsm_union(One, fsm_union(Two, Three)))); |
390 | return fsm_minimize(fsm_lower(fsm_compose(lower, Filter))); |
391 | } |
392 | |
393 | struct fsm *rewrite_any_4tape(struct rewrite_batch *rb) { |
394 | |
395 | /* |
396 | Upper: |
397 | |
398 | [ @O@ | ISyms ]* |
399 | [ @0@ | Rulenums ] |
400 | [ <R>,@#@ | @0@,R ] |
401 | [ @ID@ | R,@ID@,@0@ ] |
402 | |
403 | R = any real symbol |
404 | <R> = any real symbol, not inserted |
405 | */ |
406 | if (rb->Any4Tape == NULL((void*)0)) { |
407 | rb->Any4Tape = fsm_minimize(fsm_kleene_star(fsm_union(fsm_concat(fsm_symbol("@O@"), fsm_concat(fsm_symbol("@0@"), fsm_concat(fsm_union(fsm_copy(rb->ANY), fsm_symbol("@#@")), fsm_symbol("@ID@")))), fsm_concat(fsm_copy(rb->ISyms), fsm_concat(fsm_copy(rb->Rulenames), fsm_concat(fsm_union(fsm_copy(rb->ANY), fsm_symbol("@0@")), fsm_union(fsm_copy(rb->ANY), fsm_union(fsm_symbol("@ID@"), fsm_symbol("@0@"))))))))); |
408 | } |
409 | return fsm_copy(rb->Any4Tape); |
410 | } |
411 | |
412 | struct fsm *rewrite_upper(struct rewrite_batch *rb, struct fsm *upper) { |
413 | /* |
414 | Upper: |
415 | |
416 | [ @O@ | ISyms | ISyms ]* |
417 | [ @0@ | Rulenums | Rulenums ] |
418 | [ <R>,@#@ | @0@ | <R> ] |
419 | [ @ID@ | R | R,@ID@,@0@ ] |
420 | |
421 | R = any real symbol |
422 | <R> = any real symbol, not inserted |
423 | */ |
424 | |
425 | struct fsm *One, *Two, *Three, *Filter; |
426 | |
427 | One = fsm_minimize(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@O@")), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@0@")), fsm_concat(fsm_union(fsm_symbol("@#@"), fsm_copy(rb->ANY)), fsm_cross_product(fsm_empty_string(),fsm_symbol("@ID@")))))); |
428 | |
429 | Two = fsm_minimize(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->ISyms)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->Rulenames)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_symbol("@0@")), fsm_cross_product(fsm_empty_string(), fsm_copy(rb->ANY)))))); |
430 | |
431 | Three = fsm_minimize(fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->ISyms)), fsm_concat(fsm_cross_product(fsm_empty_string(), fsm_copy(rb->Rulenames)), fsm_concat(fsm_copy(rb->ANY), fsm_cross_product(fsm_empty_string(), fsm_union(fsm_union(fsm_symbol("@0@"), fsm_copy(rb->ANY)), fsm_symbol("@ID@"))))))); |
432 | |
433 | Filter = fsm_minimize(fsm_kleene_star(fsm_union(One, fsm_union(Two, Three)))); |
434 | return fsm_minimize(fsm_lower(fsm_compose(upper, Filter))); |
435 | } |
436 | |
437 | struct fsm *rewrite_align(struct fsm *upper, struct fsm *lower) { |
438 | struct fsm *first, *second, *third, *align, *align2; |
439 | /* `[[`[[Tape1of2(upper "@0@"*) & Tape2of2(lower "@0@"*) & ~[[? ?]* "@0@" "@0@" [? ?]*]], %@%_IDENTITY%_SYMBOL%_%@,%@UNK%@] .o. [? ?|"@UNK@" "@UNK@":"@ID@"]*].l, %@UNK%@,%@%_IDENTITY%_SYMBOL%_%@] */ |
440 | first = fsm_minimize(rewrite_tape_m_to_n_of_k(fsm_concat(upper, fsm_kleene_star(fsm_symbol("@0@"))), 1, 1, 2)); |
441 | second = fsm_minimize(rewrite_tape_m_to_n_of_k(fsm_concat(lower, fsm_kleene_star(fsm_symbol("@0@"))), 2, 2, 2)); |
442 | third = fsm_minimize(fsm_parse_regex("~[[? ?]* \"@0@\" \"@0@\" [? ?]*]", NULL((void*)0), NULL((void*)0))); |
443 | |
444 | align = fsm_minimize(fsm_intersect(third, fsm_intersect(first, second))); |
445 | align = fsm_minimize(fsm_substitute_symbol(align, "@_IDENTITY_SYMBOL_@", "@UNK@")); |
446 | align2 = fsm_minimize(fsm_lower(fsm_compose(align, fsm_parse_regex("[? ? | \"@UNK@\" \"@UNK@\":\"@ID@\" ]*", NULL((void*)0), NULL((void*)0))))); |
447 | align2 = fsm_minimize(fsm_substitute_symbol(align2, "@UNK@", "@_IDENTITY_SYMBOL_@")); |
448 | return align2; |
449 | } |
450 | |
451 | struct fsm *rewrite_align_markup(struct fsm *upper, struct fsm *lower1, struct fsm *lower2) { |
452 | struct fsm *first, *second, *third, *fourth, *fifth, *sixth, *align, *align2; |
453 | /* [Tape1of2("@0@"*) & Tape2of2(lower1)] [Tape1of2(upper) & Tape2of2("@ID@"*)] [ Tape1of2(lower1) & Tape2of2("@0@"*)] */ |
454 | /* + make sure IDENTITY and UNKNOWN are taken care of */ |
455 | first = fsm_minimize(rewrite_tape_m_to_n_of_k(fsm_kleene_star(fsm_symbol("@0@")), 1, 1, 2)); |
456 | second = fsm_minimize(rewrite_tape_m_to_n_of_k(lower1, 2, 2, 2)); |
457 | third = fsm_minimize(rewrite_tape_m_to_n_of_k(upper, 1, 1, 2)); |
458 | fourth = fsm_minimize(rewrite_tape_m_to_n_of_k(fsm_kleene_star(fsm_symbol("@ID@")), 2, 2, 2)); |
459 | fifth = fsm_minimize(rewrite_tape_m_to_n_of_k(fsm_kleene_star(fsm_symbol("@0@")), 1, 1, 2)); |
460 | sixth = fsm_minimize(rewrite_tape_m_to_n_of_k(lower2, 2, 2, 2)); |
461 | align = fsm_minimize(fsm_concat(fsm_intersect(first, second), fsm_concat(fsm_intersect(third, fourth), fsm_intersect(fifth, sixth)))); |
462 | align = fsm_minimize(fsm_substitute_symbol(align, "@_IDENTITY_SYMBOL_@", "@UNK@")); |
463 | align2 = fsm_minimize(fsm_lower(fsm_compose(align, fsm_parse_regex("[? ? | \"@UNK@\" \"@UNK@\":\"@ID@\" ]*", NULL((void*)0), NULL((void*)0))))); |
464 | align2 = fsm_minimize(fsm_substitute_symbol(align2, "@UNK@", "@_IDENTITY_SYMBOL_@")); |
465 | return align2; |
466 | } |
467 | |
468 | struct fsm *rewrite_itape(struct rewrite_batch *rb) { |
469 | if (rb->ITape == NULL((void*)0)) { |
470 | rb->ITape = fsm_parse_regex("[\"@I[]@\" ? ? ? | \"@I[@\" ? ? ? [\"@I@\" ? ? ?]* \"@I]@\" ? [?-\"@0@\"] ? ] [\"@I]@\" ? \"@0@\" ?]* | 0" , NULL((void*)0), NULL((void*)0)); |
471 | } |
472 | return fsm_copy(rb->ITape); |
473 | } |
474 | |
475 | struct fsm *rewrite_cp_markup(struct rewrite_batch *rb, struct fsm *upper, struct fsm *lower1, struct fsm *lower2, int rule_number) { |
476 | /* Same as rewrite_cp, could be consolidated */ |
477 | struct fsm *Aligned, *threetape, *rulenumtape; |
478 | /* define CP(X,Y) Tape23of3(Align2(X,Y)) & [ "@I[@" ? ? ["@I@" ? ?]* "@I]@" ? ? | "@I[]@" ? ? | 0 ] */ |
479 | Aligned = rewrite_align_markup(upper, lower1, lower2); |
480 | Aligned = rewrite_tape_m_to_n_of_k(Aligned, 3, 4, 4); |
481 | threetape = fsm_minimize(fsm_intersect(Aligned, rewrite_itape(rb))); |
482 | rulenumtape = rewrite_tape_m_to_n_of_k(fsm_minimize(fsm_kleene_star(fsm_symbol(rb->namestrings[rule_number-1]))), 2, 2, 4); |
483 | return fsm_minimize(fsm_intersect(threetape, rulenumtape)); |
484 | } |
485 | |
486 | struct fsm *rewrite_cp_transducer(struct rewrite_batch *rb, struct fsm *t, int rule_number) { |
487 | struct fsm *Aligned, *threetape, *rulenumtape; |
488 | Aligned = fsm_flatten(t, fsm_symbol("@0@")); |
489 | Aligned = rewrite_tape_m_to_n_of_k(Aligned, 3, 4, 4); |
490 | threetape = fsm_minimize(fsm_intersect(Aligned, rewrite_itape(rb))); |
491 | rulenumtape = rewrite_tape_m_to_n_of_k(fsm_minimize(fsm_kleene_star(fsm_symbol(rb->namestrings[rule_number-1]))), 2, 2, 4); |
492 | return fsm_minimize(fsm_intersect(threetape, rulenumtape)); |
493 | } |
494 | |
495 | struct fsm *rewrite_cp(struct rewrite_batch *rb, struct fsm *upper, struct fsm *lower, int rule_number) { |
496 | struct fsm *Aligned, *threetape, *rulenumtape; |
497 | /* define CP(X,Y) Tape23of3(Align2(X,Y)) & [ "@I[@" ? ? ["@I@" ? ?]* "@I]@" ? ? | "@I[]@" ? ? | 0 ] */ |
498 | Aligned = rewrite_align(upper, lower); |
499 | Aligned = rewrite_tape_m_to_n_of_k(Aligned, 3, 4, 4); |
500 | threetape = fsm_minimize(fsm_intersect(Aligned, rewrite_itape(rb))); |
501 | rulenumtape = rewrite_tape_m_to_n_of_k(fsm_minimize(fsm_kleene_star(fsm_symbol(rb->namestrings[rule_number-1]))), 2, 2, 4); |
502 | return fsm_minimize(fsm_intersect(threetape, rulenumtape)); |
503 | } |
504 | |
505 | void rewrite_add_special_syms(struct rewrite_batch *rb, struct fsm *net) { |
506 | int i; |
507 | if (net == NULL((void*)0)) |
508 | return; |
509 | sigma_substitute(".#.", "@#@", net->sigma); /* We convert boundaries to our interal rep. */ |
510 | /* This is because sigma merging (fsm_merge_sigma()) is handled */ |
511 | /* in a special way for .#., which we don't want here. */ |
512 | |
513 | for (i = 0; specialsymbols[i] != NULL((void*)0); i++) { |
514 | if (sigma_find(specialsymbols[i], net->sigma) == -1) |
515 | sigma_add(specialsymbols[i], net->sigma); |
516 | } |
517 | for (i = 1; i <= rb->num_rules; i++) { |
518 | sigma_add(rb->namestrings[i-1], net->sigma); |
519 | } |
520 | sigma_sort(net); |
521 | } |
522 | |
523 | |
524 | void fsm_clear_contexts(struct fsmcontexts *contexts) { |
525 | struct fsmcontexts *c, *cp; |
526 | for (c = contexts; c != NULL((void*)0); c = cp) { |
527 | fsm_destroy(c->left); |
528 | fsm_destroy(c->right); |
529 | fsm_destroy(c->cpleft); |
530 | fsm_destroy(c->cpright); |
531 | cp = c->next; |
532 | free(c); |
533 | } |
534 | } |
535 | |
536 | |
537 | struct fsm *rewr_context_restrict(struct rewrite_batch *rb, struct fsm *X, struct fsmcontexts *LR) { |
538 | |
539 | struct fsm *Var, *Notvar, *UnionL, *UnionP, *Result, *NewX, *Left, *Right; |
540 | struct fsmcontexts *pairs; |
541 | |
542 | Var = fsm_symbol("@VARX@"); |
543 | //Notvar = fsm_minimize(fsm_kleene_star(fsm_term_negation(fsm_symbol("@VARX@")))); |
544 | Notvar = fsm_minus(rewrite_any_4tape(rb), fsm_contains(fsm_symbol("@VARX@"))); |
545 | /* We add the variable symbol to all alphabets to avoid ? matching it */ |
546 | /* which would cause extra nondeterminism */ |
547 | |
548 | NewX = fsm_copy(X); |
549 | if (sigma_find("@VARX@", NewX->sigma) == -1) { |
550 | sigma_add("@VARX@", NewX->sigma); |
551 | sigma_sort(NewX); |
552 | } |
553 | UnionP = fsm_empty_set(); |
554 | |
555 | for (pairs = LR; pairs != NULL((void*)0) ; pairs = pairs->next) { |
556 | if (pairs->left == NULL((void*)0)) { |
557 | Left = fsm_empty_string(); |
558 | } else { |
559 | Left = fsm_copy(pairs->cpleft); |
560 | sigma_add("@VARX@", Left->sigma); |
561 | sigma_sort(Left); |
562 | } |
563 | if (pairs->right == NULL((void*)0)) { |
564 | Right = fsm_empty_string(); |
565 | } else { |
566 | Right = fsm_copy(pairs->cpright); |
567 | sigma_add("@VARX@", Right->sigma); |
568 | sigma_sort(Right); |
569 | } |
570 | UnionP = fsm_union(fsm_concat(Left, fsm_concat(fsm_copy(Var), fsm_concat(fsm_copy(Notvar), fsm_concat(fsm_copy(Var), Right)))), UnionP); |
571 | } |
572 | UnionL = fsm_concat(fsm_copy(Notvar), fsm_concat(fsm_copy(Var), fsm_concat(fsm_copy(NewX), fsm_concat(fsm_copy(Var), fsm_copy(Notvar))))); |
573 | Result = fsm_minus(UnionL, fsm_concat(fsm_copy(Notvar), fsm_concat(fsm_copy(UnionP), fsm_copy(Notvar)))); |
574 | |
575 | if (sigma_find("@VARX@", Result->sigma) != -1) { |
576 | Result = fsm_complement(fsm_substitute_symbol(Result, "@VARX@","@_EPSILON_SYMBOL_@")); |
577 | } else { |
578 | Result = fsm_complement(Result); |
579 | } |
580 | fsm_destroy(UnionP); |
581 | fsm_destroy(Var); |
582 | fsm_destroy(Notvar); |
583 | fsm_destroy(NewX); |
584 | return(Result); |
585 | } |
586 | |
587 | struct fsm *rewrite_epextend(struct rewrite_batch *rb) { |
588 | |
589 | struct fsm *one, *two, *allzeroupper, *threea, *threeb, *threec, *three; |
590 | |
591 | /* 1. @O@ @0@ [ANY|@#@] @ID@ */ |
592 | /* 2. @I[]@ @#Rule@ [ANY] [@ID@|@0@|ANY] */ |
593 | /* 3a. @I[@ @#Rule@ [ANY] [@ID@|@0@|ANY] */ |
594 | /* 3b. @I@ @#Rule@ [ANY] [@ID@|@0@|ANY] */ |
595 | /* 3c. @I]@ @#Rule@ [ANY] [@ID@|@0@|ANY] */ |
596 | /* 3. [3a|3b|3c] & ~[[? ? "@0@" ?]*] */ |
597 | |
598 | /* TODO lower version as well */ |
599 | |
600 | if (rb->Epextend == NULL((void*)0)) { |
601 | one = fsm_minimize(fsm_concat(fsm_symbol("@O@"), fsm_concat(fsm_symbol("@0@"), fsm_concat(fsm_union(fsm_copy(rb->ANY), fsm_symbol("@#@")), fsm_symbol("@ID@"))))); |
602 | two = fsm_minimize(fsm_concat(fsm_symbol("@I[]@"), fsm_concat(fsm_copy(rb->Rulenames), fsm_concat(fsm_copy(rb->ANY), fsm_union(fsm_symbol("@0@"), fsm_union(fsm_symbol("@ID@"), fsm_copy(rb->ANY))))))); |
603 | allzeroupper = fsm_parse_regex("~[[? ? \"@0@\" ?]*]", NULL((void*)0), NULL((void*)0)); |
604 | threea = fsm_minimize(fsm_concat(fsm_symbol("@I[@"), fsm_concat(fsm_copy(rb->Rulenames), fsm_concat(fsm_union(fsm_copy(rb->ANY), fsm_symbol("@0@")), fsm_union(fsm_symbol("@0@"), fsm_union(fsm_symbol("@ID@"), fsm_copy(rb->ANY))))))); |
605 | threeb = fsm_minimize(fsm_kleene_star(fsm_concat(fsm_symbol("@I@"), fsm_concat(fsm_copy(rb->Rulenames), fsm_concat(fsm_union(fsm_copy(rb->ANY), fsm_symbol("@0@")), fsm_union(fsm_symbol("@0@"), fsm_union(fsm_symbol("@ID@"), fsm_copy(rb->ANY)))))))); |
606 | threec = fsm_minimize(fsm_concat(fsm_symbol("@I]@"), fsm_concat(fsm_copy(rb->Rulenames), fsm_concat(fsm_union(fsm_copy(rb->ANY), fsm_symbol("@0@")), fsm_union(fsm_symbol("@0@"), fsm_union(fsm_symbol("@ID@"), fsm_copy(rb->ANY))))))); |
607 | three = fsm_intersect(allzeroupper, fsm_concat(threea, fsm_concat(threeb, threec))); |
608 | rb->Epextend = fsm_minimize(fsm_union(fsm_union(one, two), three)); |
609 | } |
610 | return fsm_copy(rb->Epextend); |
611 | } |