clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name transfer.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -resource-dir /usr/lib/llvm-16/lib/clang/16 -D HAVE_CONFIG_H -I . -I .. -I /usr/include/utf8cpp/ -I /usr/local/include -I /usr/include/libxml2 -I /usr/local/include -D PIC -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -std=c++2b -fdeprecated-macro -fdebug-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -ferror-limit 19 -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/apertium/scan-build/2024-09-11-155328-205384-1 -x c++ transfer.cc
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | #include <apertium/transfer.h> |
18 | |
19 | #include <lttoolbox/string_utils.h> |
20 | #include <lttoolbox/xml_walk_util.h> |
21 | |
22 | #include <iostream> |
23 | |
24 | using namespace std; |
25 | |
26 | Transfer::Transfer() |
27 | {} |
28 | |
29 | void |
30 | Transfer::readBil(string const &fstfile) |
31 | { |
32 | FILE *in = fopen(fstfile.c_str(), "rb"); |
33 | if(!in) |
34 | { |
35 | cerr << "Error: Could not open file '" << fstfile << "'." << endl; |
36 | exit(EXIT_FAILURE); |
37 | } |
38 | fstp.load(in); |
39 | fstp.initBiltrans(); |
40 | fclose(in); |
41 | } |
42 | |
43 | void |
44 | Transfer::setExtendedDictionary(string const &fstfile) |
45 | { |
46 | FILE *in = fopen(fstfile.c_str(), "rb"); |
47 | if(!in) |
48 | { |
49 | cerr << "Error: Could not open extended dictionary file '" << fstfile << "'." << endl; |
50 | exit(EXIT_FAILURE); |
51 | } |
52 | extended.load(in); |
53 | extended.initBiltrans(); |
54 | fclose(in); |
55 | isExtended = true; |
56 | } |
57 | |
58 | void |
59 | Transfer::read(string const &transferfile, string const &datafile, |
60 | string const &fstfile) |
61 | { |
62 | TransferBase::read(transferfile.c_str(), datafile.c_str()); |
63 | if (getattr(root_element, "default") == "chunk"_u) { |
64 | defaultAttrs = chunk; |
65 | } else { |
66 | defaultAttrs = lu; |
67 | } |
68 | if (!fstfile.empty()) { |
69 | readBil(fstfile); |
70 | } |
71 | } |
72 | |
73 | bool |
74 | Transfer::checkIndex(xmlNode *element, int index, int limit) |
75 | { |
76 | if(index >= limit) |
77 | { |
78 | cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index >= limit" << endl; |
79 | return false; |
80 | } |
81 | if(index < 0) { |
82 | cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index < 0" << endl; |
83 | return false; |
84 | } |
85 | if(word[index] == 0) |
86 | { |
87 | cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": Null access at word[index]" << endl; |
88 | return false; |
89 | } |
90 | return true; |
91 | } |
92 | |
93 | UString |
94 | Transfer::evalCachedString(xmlNode *element) |
95 | { |
96 | TransferInstr& ti = evalStringCache[element]; |
97 | switch (ti.getType()) { |
98 | case ti_clip_sl: |
99 | if (checkIndex(element, ti.getPos(), lword)) { |
100 | if (gettingLemmaFromWord(ti.getContent()) && last_lword > 1) { |
101 | if(in_lu) { |
102 | out_wblank = combineWblanks(out_wblank, word[ti.getPos()]->getWblank()); |
103 | } else if (in_let_var) { |
104 | var_out_wblank[var_val] = combineWblanks(var_out_wblank[var_val], word[ti.getPos()]->getWblank()); |
105 | } |
106 | } |
107 | |
108 | return word[ti.getPos()]->source(attr_items[ti.getContent()], ti.getCondition()); |
109 | } |
110 | break; |
111 | |
112 | case ti_clip_tl: |
113 | if(checkIndex(element, ti.getPos(), lword)) { |
114 | if(gettingLemmaFromWord(ti.getContent()) && last_lword > 1) { |
115 | if(in_lu) { |
116 | out_wblank = combineWblanks(out_wblank, word[ti.getPos()]->getWblank()); |
117 | } else if(in_let_var) { |
118 | var_out_wblank[var_val] = combineWblanks(var_out_wblank[var_val], word[ti.getPos()]->getWblank()); |
119 | } |
120 | } |
121 | |
122 | return word[ti.getPos()]->target(attr_items[ti.getContent()], ti.getCondition()); |
123 | } |
124 | break; |
125 | |
126 | case ti_clip_ref: |
127 | if(checkIndex(element, ti.getPos(), lword)) { |
128 | return word[ti.getPos()]->reference(attr_items[ti.getContent()], ti.getCondition()); |
129 | } |
130 | break; |
131 | |
132 | case ti_linkto_sl: |
133 | if(checkIndex(element, ti.getPos(), lword)) { |
134 | if(!word[ti.getPos()]->source(attr_items[ti.getContent()], ti.getCondition()).empty()) { |
135 | UString ret; |
136 | ret += '<'; |
137 | ret += ti.getStrval(); |
138 | ret += '>'; |
139 | return ret; |
140 | } else { |
141 | return ""_u; |
142 | } |
143 | } |
144 | break; |
145 | |
146 | case ti_linkto_tl: |
147 | if(checkIndex(element, ti.getPos(), lword)) { |
148 | if(!word[ti.getPos()]->target(attr_items[ti.getContent()], ti.getCondition()).empty()) { |
149 | UString ret; |
150 | ret += '<'; |
151 | ret += ti.getStrval(); |
152 | ret += '>'; |
153 | return ret; |
154 | } else { |
155 | return ""_u; |
156 | } |
157 | } |
158 | break; |
159 | |
160 | case ti_linkto_ref: |
161 | if(checkIndex(element, ti.getPos(), lword)) { |
162 | if(!word[ti.getPos()]->reference(attr_items[ti.getContent()], ti.getCondition()).empty()) { |
163 | UString ret; |
164 | ret += '<'; |
165 | ret += ti.getStrval(); |
166 | ret += '>'; |
167 | return ret; |
168 | } else { |
169 | return ""_u; |
170 | } |
171 | } |
172 | break; |
173 | |
174 | case ti_var: |
175 | if(last_lword > 1) { |
176 | out_wblank = combineWblanks(out_wblank, var_out_wblank[ti.getContent()]); |
177 | } |
178 | return variables[ti.getContent()]; |
179 | |
180 | case ti_lit_tag: |
181 | case ti_lit: |
182 | return ti.getContent(); |
183 | |
184 | case ti_b: |
185 | if(!blank_queue.empty()) { |
186 | UString retblank = blank_queue.front(); |
187 | if(in_out) { |
188 | blank_queue.pop(); |
189 | } |
190 | |
191 | return retblank; |
192 | } else { |
193 | return " "_u; |
194 | } |
195 | break; |
196 | |
197 | case ti_get_case_from: |
198 | if(checkIndex(element, ti.getPos(), lword)) { |
199 | return copycase(word[ti.getPos()]->source(attr_items[ti.getContent()]), |
200 | evalString(ti.getPointer())); |
201 | } |
202 | break; |
203 | |
204 | case ti_case_of_sl: |
205 | if(checkIndex(element, ti.getPos(), lword)) { |
206 | return StringUtils::getcase(word[ti.getPos()]->source(attr_items[ti.getContent()])); |
207 | } |
208 | break; |
209 | |
210 | case ti_case_of_tl: |
211 | if(checkIndex(element, ti.getPos(), lword)) { |
212 | return StringUtils::getcase(word[ti.getPos()]->target(attr_items[ti.getContent()])); |
213 | } |
214 | break; |
215 | |
216 | case ti_case_of_ref: |
217 | if(checkIndex(element, ti.getPos(), lword)) { |
218 | return StringUtils::getcase(word[ti.getPos()]->reference(attr_items[ti.getContent()])); |
219 | } |
220 | break; |
221 | |
222 | default: |
223 | return ""_u; |
224 | } |
225 | return ""_u; |
226 | } |
227 | |
228 | void |
229 | Transfer::processClip(xmlNode* element) |
230 | { |
231 | int pos = 0; |
232 | xmlChar *side = NULL; |
233 | UString as; |
234 | UString part; |
235 | bool queue = true; |
236 | |
237 | for(xmlAttr *i = element->properties; i != NULL; i = i->next) { |
238 | if(!xmlStrcmp(i->name, (const xmlChar *) "side")) { |
239 | side = i->children->content; |
240 | } else if(!xmlStrcmp(i->name, (const xmlChar *) "part")) { |
241 | part = to_ustring((const char*) i->children->content); |
242 | } else if(!xmlStrcmp(i->name, (const xmlChar *) "pos")) { |
243 | pos = atoi((const char *)i->children->content) - 1; |
244 | } else if(!xmlStrcmp(i->name, (const xmlChar *) "queue")) { |
245 | if(!xmlStrcmp(i->children->content, (const xmlChar *) "no")) { |
246 | queue = false; |
247 | } |
248 | } else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to")) { |
249 | as = to_ustring((const char*)i->children->content); |
250 | } |
251 | } |
252 | |
253 | if(!as.empty()) { |
254 | if(!xmlStrcmp(side, (const xmlChar *) "sl")) { |
255 | evalStringCache[element] = TransferInstr(ti_linkto_sl, part, pos, NULL, queue, as); |
256 | } else if(!xmlStrcmp(side, (const xmlChar *) "ref")) { |
257 | evalStringCache[element] = TransferInstr(ti_linkto_ref, part, pos, NULL, queue, as); |
258 | } else { |
259 | evalStringCache[element] = TransferInstr(ti_linkto_tl, part, pos, NULL, queue, as); |
260 | } |
261 | } else if(!xmlStrcmp(side, (const xmlChar *) "sl")) { |
262 | evalStringCache[element] = TransferInstr(ti_clip_sl, part, pos, NULL, queue); |
263 | } else if(!xmlStrcmp(side, (const xmlChar *) "ref")) { |
264 | evalStringCache[element] = TransferInstr(ti_clip_ref, part, pos, NULL, queue); |
265 | } else { |
266 | evalStringCache[element] = TransferInstr(ti_clip_tl, part, pos, NULL, queue); |
267 | } |
268 | } |
269 | |
270 | void |
271 | Transfer::processBlank(xmlNode* element) |
272 | { |
273 | evalStringCache[element] = TransferInstr(ti_b, " "_u, -1); |
274 | } |
275 | |
276 | void |
277 | Transfer::processCaseOf(xmlNode* element) |
278 | { |
279 | int pos = 0; |
280 | xmlChar *side = NULL; |
281 | UString part; |
282 | |
283 | for(xmlAttr *i = element->properties; i != NULL; i = i->next) { |
284 | if(!xmlStrcmp(i->name, (const xmlChar *) "side")) { |
285 | side = i->children->content; |
286 | } else if(!xmlStrcmp(i->name, (const xmlChar *) "part")) { |
287 | part = to_ustring((const char*) i->children->content); |
288 | } else if(!xmlStrcmp(i->name, (const xmlChar *) "pos")) { |
289 | pos = atoi((const char *) i->children->content) - 1; |
290 | } |
291 | } |
292 | |
293 | if(!xmlStrcmp(side, (const xmlChar *) "sl")) { |
294 | evalStringCache[element] = TransferInstr(ti_case_of_sl, part, pos); |
295 | } else if(!xmlStrcmp(side, (const xmlChar *) "ref")) { |
296 | evalStringCache[element] = TransferInstr(ti_case_of_ref, part, pos); |
297 | } else { |
298 | evalStringCache[element] = TransferInstr(ti_case_of_tl, part, pos); |
299 | } |
300 | } |
301 | |
302 | UString |
303 | Transfer::processLu(xmlNode* element) |
304 | { |
305 | in_lu = true; |
306 | out_wblank.clear(); |
307 | |
308 | UString myword; |
309 | for (auto i : children(element)) { |
310 | myword.append(evalString(i)); |
311 | } |
312 | |
313 | in_lu = false; |
314 | |
315 | |
316 | |
317 | if(last_lword == 1 && word != nullptr) { |
318 | out_wblank = word[0]->getWblank(); |
319 | } |
320 | |
321 | if(!myword.empty()) { |
322 | if(myword[0] != '[' || myword[1] != '[') { |
323 | UString ret = out_wblank; |
324 | ret += '^'; |
325 | ret += myword; |
326 | ret += '$'; |
327 | return ret; |
328 | } else { |
329 | myword += '$'; |
330 | return myword; |
331 | } |
332 | } else { |
333 | return ""_u; |
334 | } |
335 | } |
336 | |
337 | UString |
338 | Transfer::processMlu(xmlNode* element) |
339 | { |
340 | UString value; |
341 | |
342 | bool first_time = true; |
343 | out_wblank.clear(); |
344 | |
345 | in_lu = true; |
346 | for (auto i : children(element)) { |
347 | UString myword; |
348 | for (auto j : children(i)) { |
349 | myword.append(evalString(j)); |
350 | } |
351 | |
352 | if (!first_time) { |
353 | if(!myword.empty() && myword[0] != '#') { |
354 | value += '+'; |
355 | } |
356 | } else { |
357 | if (!myword.empty()) { |
358 | first_time = false; |
359 | } |
360 | } |
361 | |
362 | value.append(myword); |
363 | } |
364 | |
365 | if(last_lword == 1) { |
366 | out_wblank = word[0]->getWblank(); |
367 | } |
368 | |
369 | if(!value.empty()) { |
370 | UString ret = out_wblank; |
371 | ret += '^'; |
372 | ret += value; |
373 | ret += '$'; |
374 | return ret; |
375 | } else { |
376 | return ""_u; |
377 | } |
378 | } |
379 | |
380 | void |
381 | Transfer::processLuCount(xmlNode* element) |
382 | { |
383 | cerr << "Error: unexpected expression: '" << element->name << "'" << endl; |
384 | exit(EXIT_FAILURE); |
385 | } |
386 | |
387 | void |
388 | Transfer::processOut(xmlNode *localroot) |
389 | { |
390 | in_out = true; |
391 | |
392 | for (auto i : children(localroot)) { |
393 | if(defaultAttrs == lu) { |
394 | if(!xmlStrcmp(i->name, (const xmlChar *) "lu")) { |
395 | write(processLu(i), output); |
396 | } else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) { |
397 | write(processMlu(i), output); |
398 | } else { |
399 | write(evalString(i), output); |
400 | } |
401 | } else { |
402 | if(!xmlStrcmp(i->name, (const xmlChar *) "chunk")) { |
403 | write(processChunk(i), output); |
404 | } else { |
405 | write(evalString(i), output); |
406 | } |
407 | } |
408 | } |
409 | in_out = false; |
410 | } |
411 | |
412 | UString |
413 | Transfer::processChunk(xmlNode *localroot) |
414 | { |
415 | UString name, namefrom; |
416 | UString caseofchunk = "aa"_u; |
417 | UString result; |
418 | |
419 | for(xmlAttr *i = localroot->properties; i != NULL; i = i->next) |
420 | { |
421 | if(!xmlStrcmp(i->name, (const xmlChar *) "name")) |
422 | { |
423 | name = to_ustring((const char *) i->children->content); |
424 | } |
425 | else if(!xmlStrcmp(i->name, (const xmlChar *) "namefrom")) |
426 | { |
427 | namefrom = to_ustring((const char *) i->children->content); |
428 | } |
429 | else if(!xmlStrcmp(i->name, (const xmlChar *) "case")) |
430 | { |
431 | caseofchunk = to_ustring((const char *) i->children->content); |
432 | } |
433 | } |
434 | |
435 | result += '^'; |
436 | if(!caseofchunk.empty() && !dictionary_case) |
437 | { |
438 | if(!name.empty()) |
439 | { |
440 | result.append(StringUtils::copycase(variables[caseofchunk], name)); |
441 | } |
442 | else if(!namefrom.empty()) |
443 | { |
444 | result.append(StringUtils::copycase(variables[caseofchunk], variables[namefrom])); |
445 | } |
446 | else |
447 | { |
448 | cerr << "Error: you must specify either 'name' or 'namefrom' for the 'chunk' element" << endl; |
449 | exit(EXIT_FAILURE); |
450 | } |
451 | } |
452 | else |
453 | { |
454 | if(!name.empty()) |
455 | { |
456 | result.append(name); |
457 | } |
458 | else if(!namefrom.empty()) |
459 | { |
460 | result.append(variables[namefrom]); |
461 | } |
462 | else |
463 | { |
464 | cerr << "Error: you must specify either 'name' or 'namefrom' for the 'chunk' element" << endl; |
465 | exit(EXIT_FAILURE); |
466 | } |
467 | } |
468 | |
469 | for (auto i : children(localroot)) { |
470 | if(!xmlStrcmp(i->name, (const xmlChar *) "tags")) { |
471 | result.append(processTags(i)); |
472 | result += '{'; |
473 | } else if(!xmlStrcmp(i->name, (const xmlChar *) "lu")) { |
474 | result.append(processLu(i)); |
475 | } else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) { |
476 | result.append(processMlu(i)); |
477 | } else { |
478 | result.append(evalString(i)); |
479 | } |
480 | } |
481 | result += '}'; |
482 | result += '$'; |
483 | return result; |
484 | } |
485 | |
486 | UString |
487 | Transfer::processTags(xmlNode *localroot) |
488 | { |
489 | UString result; |
490 | for (auto i : children(localroot)) { |
491 | if (!xmlStrcmp(i->name, (const xmlChar*) "tag")) { |
492 | for (auto j : children(i)) { |
493 | result.append(evalString(j)); |
494 | } |
495 | } |
496 | } |
497 | return result; |
498 | } |
499 | |
500 | void |
501 | Transfer::processLet(xmlNode *localroot) |
502 | { |
503 | xmlNode *leftSide = NULL, *rightSide = NULL; |
504 | |
505 | for (auto i : children(localroot)) { |
506 | if(leftSide == NULL) { |
507 | leftSide = i; |
508 | } else { |
509 | rightSide = i; |
510 | break; |
511 | } |
512 | } |
513 | |
514 | map<xmlNode *, TransferInstr>::iterator it = evalStringCache.find(leftSide); |
515 | if(it != evalStringCache.end()) |
516 | { |
517 | TransferInstr &ti = it->second; |
518 | switch(ti.getType()) |
519 | { |
520 | case ti_var: |
521 | in_let_var = true; |
522 | var_val = ti.getContent(); |
523 | |
524 | var_out_wblank[var_val].clear(); |
525 | |
526 | variables[ti.getContent()] = evalString(rightSide); |
527 | |
528 | in_let_var = false; |
529 | |
530 | return; |
531 | |
532 | case ti_clip_sl: |
533 | if (checkIndex(leftSide, ti.getPos(), lword)) { |
534 | bool match = word[ti.getPos()]->setSource(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition()); |
535 | if (!match && trace) |
536 | { |
537 | cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl; |
538 | } |
539 | } |
540 | return; |
541 | |
542 | case ti_clip_tl: |
543 | if (checkIndex(leftSide, ti.getPos(), lword)) { |
544 | bool match = word[ti.getPos()]->setTarget(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition()); |
545 | if (!match && trace) |
546 | { |
547 | cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl; |
548 | } |
549 | } |
550 | return; |
551 | |
552 | case ti_clip_ref: |
553 | if (checkIndex(leftSide, ti.getPos(), lword)) { |
554 | bool match = word[ti.getPos()]->setReference(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition()); |
555 | if (!match && trace) |
556 | { |
557 | cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl; |
558 | } |
559 | } |
560 | return; |
561 | |
562 | default: |
563 | return; |
564 | } |
565 | } |
566 | if(leftSide->name != NULL && !xmlStrcmp(leftSide->name, (const xmlChar *) "var")) |
567 | { |
568 | in_let_var = true; |
569 | |
570 | UString const val = to_ustring((const char *) leftSide->properties->children->content); |
571 | |
572 | var_val = val; |
573 | var_out_wblank[var_val].clear(); |
574 | |
575 | variables[val] = evalString(rightSide); |
576 | |
577 | in_let_var = false; |
578 | evalStringCache[leftSide] = TransferInstr(ti_var, val, 0); |
579 | } |
580 | else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip")) |
581 | { |
582 | int pos = 0; |
583 | xmlChar *side = NULL, *as = NULL; |
584 | UString part; |
585 | bool queue = true; |
586 | |
587 | for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next) |
588 | { |
589 | if(!xmlStrcmp(i->name, (const xmlChar *) "side")) |
590 | { |
591 | side = i->children->content; |
592 | } |
593 | else if(!xmlStrcmp(i->name, (const xmlChar *) "part")) |
594 | { |
595 | part = to_ustring((const char*) i->children->content); |
596 | } |
597 | else if(!xmlStrcmp(i->name, (const xmlChar *) "pos")) |
598 | { |
599 | pos = atoi((const char *) i->children->content) - 1; |
600 | } |
601 | else if(!xmlStrcmp(i->name, (const xmlChar *) "queue")) |
602 | { |
603 | if(!xmlStrcmp(i->children->content, (const xmlChar *) "no")) |
604 | { |
605 | queue = false; |
606 | } |
607 | } |
608 | else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to")) |
609 | { |
610 | as = i->children->content; |
611 | } |
612 | } |
613 | |
614 | if (pos >= lword) { |
615 | cerr << "Error: Transfer::processLet() bad access on pos >= lword" << endl; |
616 | return; |
617 | } |
618 | if (word[pos] == 0) { |
619 | cerr << "Error: Transfer::processLet() null access on word[pos]" << endl; |
620 | return; |
621 | } |
622 | |
623 | if(!xmlStrcmp(side, (const xmlChar *) "tl")) |
624 | { |
625 | bool match = word[pos]->setTarget(attr_items[part], evalString(rightSide), queue); |
626 | if(!match && trace) |
627 | { |
628 | cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl; |
629 | } |
630 | evalStringCache[leftSide] = TransferInstr(ti_clip_tl, part, pos, NULL, queue); |
631 | } |
632 | else if(!xmlStrcmp(side, (const xmlChar *) "ref")) |
633 | { |
634 | bool match = word[pos]->setReference(attr_items[part], evalString(rightSide), queue); |
635 | if(!match && trace) |
636 | { |
637 | cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl; |
638 | } |
639 | evalStringCache[leftSide] = TransferInstr(ti_clip_ref, part, pos, NULL, queue); |
640 | } |
641 | else |
642 | { |
643 | bool match = word[pos]->setSource(attr_items[part], evalString(rightSide), queue); |
644 | if(!match && trace) |
645 | { |
646 | cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl; |
647 | } |
648 | evalStringCache[leftSide] = TransferInstr(ti_clip_sl, part, pos, NULL, queue); |
649 | } |
650 | } |
651 | } |
652 | |
653 | void |
654 | Transfer::processModifyCase(xmlNode *localroot) |
655 | { |
656 | if (dictionary_case) return; |
| 1 | Assuming field 'dictionary_case' is false | |
|
| |
657 | xmlNode *leftSide = NULL, *rightSide = NULL; |
| 3 | | 'leftSide' initialized to a null pointer value | |
|
658 | |
659 | for (auto i : children(localroot)) { |
660 | if(leftSide == NULL) { |
661 | leftSide = i; |
662 | } else { |
663 | rightSide = i; |
664 | break; |
665 | } |
666 | } |
667 | |
668 | if(leftSide->name != NULL && !xmlStrcmp(leftSide->name, (const xmlChar *) "clip")) |
| 4 | | Access to field 'name' results in a dereference of a null pointer (loaded from variable 'leftSide') |
|
669 | { |
670 | int pos = 0; |
671 | xmlChar *side = NULL, *as = NULL; |
672 | UString part; |
673 | bool queue = true; |
674 | |
675 | for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next) |
676 | { |
677 | if(!xmlStrcmp(i->name, (const xmlChar *) "side")) |
678 | { |
679 | side = i->children->content; |
680 | } |
681 | else if(!xmlStrcmp(i->name, (const xmlChar *) "part")) |
682 | { |
683 | part = to_ustring((const char*)i->children->content); |
684 | } |
685 | else if(!xmlStrcmp(i->name, (const xmlChar *) "pos")) |
686 | { |
687 | pos = atoi((const char *) i->children->content) - 1; |
688 | } |
689 | else if(!xmlStrcmp(i->name, (const xmlChar *) "queue")) |
690 | { |
691 | if(!xmlStrcmp(i->children->content, (xmlChar const *) "no")) |
692 | { |
693 | queue = false; |
694 | } |
695 | } |
696 | else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to")) |
697 | { |
698 | as = i->children->content; |
699 | (void)as; |
700 | } |
701 | } |
702 | if(!xmlStrcmp(side, (const xmlChar *) "sl")) |
703 | { |
704 | UString const result = StringUtils::copycase(evalString(rightSide), |
705 | word[pos]->source(attr_items[part], queue)); |
706 | bool match = word[pos]->setSource(attr_items[part], result); |
707 | if(!match && trace) |
708 | { |
709 | cerr << "apertium-transfer warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl; |
710 | } |
711 | } |
712 | else if(!xmlStrcmp(side, (const xmlChar *) "ref")) |
713 | { |
714 | UString const result = StringUtils::copycase(evalString(rightSide), |
715 | word[pos]->reference(attr_items[part], queue)); |
716 | bool match = word[pos]->setReference(attr_items[part], result); |
717 | if(!match && trace) |
718 | { |
719 | cerr << "apertium-transfer warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl; |
720 | } |
721 | } |
722 | else |
723 | { |
724 | UString const result = StringUtils::copycase(evalString(rightSide), |
725 | word[pos]->target(attr_items[part], queue)); |
726 | bool match = word[pos]->setTarget(attr_items[part], result); |
727 | if(!match && trace) |
728 | { |
729 | cerr << "apertium-transfer warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl; |
730 | } |
731 | } |
732 | } |
733 | else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var")) |
734 | { |
735 | UString const val = to_ustring((const char *) leftSide->properties->children->content); |
736 | variables[val] = StringUtils::copycase(evalString(rightSide), variables[val]); |
737 | } |
738 | } |
739 | |
740 | void |
741 | Transfer::processCallMacro(xmlNode *localroot) |
742 | { |
743 | UString const n = to_ustring((const char *) localroot->properties->children->content); |
744 | int npar = 0; |
745 | |
746 | xmlNode *macro = macro_map[macros[n]]; |
747 | |
748 | for(xmlAttr *i = macro->properties; i != NULL; i = i->next) |
749 | { |
750 | if(!xmlStrcmp(i->name, (const xmlChar *) "npar")) |
751 | { |
752 | npar = atoi((const char *) i->children->content); |
753 | break; |
754 | } |
755 | } |
756 | |
757 | |
758 | |
759 | TransferWord **myword = NULL; |
760 | int idx = 0; |
761 | if(npar > 0) |
762 | { |
763 | myword = new TransferWord *[npar]; |
764 | std::fill(myword, myword+npar, (TransferWord *)(0)); |
765 | for (auto i : children(localroot)) { |
766 | if (idx >= npar) { |
767 | cerr << "Error: processCallMacro() number of arguments >= npar at line " << i->line << endl; |
768 | return; |
769 | } |
770 | int pos = atoi((const char *) i->properties->children->content)-1; |
771 | myword[idx] = word[pos]; |
772 | |
773 | idx++; |
774 | } |
775 | } |
776 | |
777 | swap(myword, word); |
778 | swap(npar, lword); |
779 | |
780 | for (auto i : children(macro)) { |
781 | processInstruction(i); |
782 | } |
783 | |
784 | swap(myword, word); |
785 | swap(npar, lword); |
786 | |
787 | delete[] myword; |
788 | } |
789 | |
790 | TransferToken & |
791 | Transfer::readToken(InputFile& in) |
792 | { |
793 | if(!input_buffer.isEmpty()) |
794 | { |
795 | return input_buffer.next(); |
796 | } |
797 | |
798 | UString content; |
799 | while(true) |
800 | { |
801 | UChar32 val = in.get(); |
802 | if(in.eof() || (val == 0 && internal_null_flush)) |
803 | { |
804 | in_wblank = false; |
805 | return input_buffer.add(TransferToken(content, tt_eof)); |
806 | } |
807 | if(in_wblank) |
808 | { |
809 | content = "[["_u; |
810 | content += val; |
811 | |
812 | while(true) |
813 | { |
814 | UChar32 val3 = in.get(); |
815 | if(val3 == '\\') |
816 | { |
817 | content += '\\'; |
818 | content += in.get(); |
819 | } |
820 | else if(val3 == '$') |
821 | { |
822 | in_wblank = false; |
823 | return input_buffer.add(TransferToken(content, tt_word)); |
824 | } |
825 | else if(val3 == '\0' && null_flush) |
826 | { |
827 | in_wblank = false; |
828 | u_fflush(output); |
829 | } |
830 | else |
831 | { |
832 | content += val3; |
833 | } |
834 | } |
835 | } |
836 | if(val == '\\') |
837 | { |
838 | content += '\\'; |
839 | content += in.get(); |
840 | } |
841 | else if(val == '[') |
842 | { |
843 | content += '['; |
844 | while(true) |
845 | { |
846 | UChar32 val2 = in.get(); |
847 | if(val2 == '\\') |
848 | { |
849 | content += '\\'; |
850 | content += in.get(); |
851 | } |
852 | else if(val2 == '[') |
853 | { |
854 | in_wblank = true; |
855 | content.pop_back(); |
856 | |
857 | return input_buffer.add(TransferToken(content, tt_blank)); |
858 | } |
859 | else if(val2 == ']') |
860 | { |
861 | content += ']'; |
862 | break; |
863 | } |
864 | else |
865 | { |
866 | content += val2; |
867 | } |
868 | } |
869 | } |
870 | else if(val == '$') |
871 | { |
872 | return input_buffer.add(TransferToken(content, tt_word)); |
873 | } |
874 | else if(val == '^') |
875 | { |
876 | return input_buffer.add(TransferToken(content, tt_blank)); |
877 | } |
878 | else if(val == '\0' && null_flush) |
879 | { |
880 | in_wblank = false; |
881 | u_fflush(output); |
882 | } |
883 | else |
884 | { |
885 | content += val; |
886 | } |
887 | } |
888 | } |
889 | |
890 | void |
891 | Transfer::setTraceATT(bool trace) |
892 | { |
893 | this->trace_att = trace; |
894 | } |
895 | |
896 | void |
897 | Transfer::tmp_clear() |
898 | { |
899 | tmpblank.clear(); |
900 | tmpword.clear(); |
901 | variables = variable_defaults; |
902 | } |
903 | |
904 | void |
905 | Transfer::transfer_wrapper_null_flush(InputFile& in, UFILE* out) |
906 | { |
907 | null_flush = false; |
908 | internal_null_flush = true; |
909 | |
910 | while(!in.eof()) |
911 | { |
912 | tmp_clear(); |
913 | transfer(in, out); |
914 | u_fputc('\0', out); |
915 | u_fflush(out); |
916 | } |
917 | |
918 | internal_null_flush = false; |
919 | null_flush = true; |
920 | } |
921 | |
922 | void |
923 | Transfer::transfer(InputFile& in, UFILE* out) |
924 | { |
925 | if(getNullFlush()) |
926 | { |
927 | transfer_wrapper_null_flush(in, out); |
928 | } |
929 | |
930 | unsigned int last = input_buffer.getPos(); |
931 | unsigned int prev_last = last; |
932 | int lastrule_id = -1; |
933 | set<int> banned_rules; |
934 | in_wblank = false; |
935 | |
936 | output = out; |
937 | ms.init(me->getInitial()); |
938 | |
939 | while(true) |
940 | { |
941 | if(trace_att) |
942 | { |
943 | cerr << "Loop start " << endl; |
944 | cerr << "ms.size: " << ms.size() << endl; |
945 | |
946 | cerr << "tmpword.size(): " << tmpword.size() << endl; |
947 | for (unsigned int ind = 0; ind < tmpword.size(); ind++) |
948 | { |
949 | if(ind != 0) |
950 | { |
951 | cerr << " "; |
952 | } |
953 | cerr << *tmpword[ind]; |
954 | } |
955 | cerr << endl; |
956 | |
957 | cerr << "tmpblank.size(): " << tmpblank.size() << endl; |
958 | for (unsigned int ind = 0; ind < tmpblank.size(); ind++) |
959 | { |
960 | cerr << "'"; |
961 | cerr << *tmpblank[ind]; |
962 | cerr << "' "; |
963 | } |
964 | cerr << endl; |
965 | |
966 | cerr << "last: " << last << endl; |
967 | cerr << "prev_last: " << prev_last << endl << endl; |
968 | } |
969 | |
970 | if(ms.size() == 0) |
971 | { |
972 | if(lastrule != NULL) |
973 | { |
974 | int num_words_to_consume = applyRule(); |
975 | |
976 | if(trace_att) |
977 | { |
978 | cerr << "num_words_to_consume: " << num_words_to_consume << endl; |
979 | } |
980 | |
981 | |
982 | |
983 | if(num_words_to_consume < 0) |
984 | { |
985 | banned_rules.clear(); |
986 | input_buffer.setPos(last); |
987 | } |
988 | else if(num_words_to_consume > 0) |
989 | { |
990 | banned_rules.clear(); |
991 | if(prev_last >= input_buffer.getSize()) |
992 | { |
993 | input_buffer.setPos(0); |
994 | } |
995 | else |
996 | { |
997 | input_buffer.setPos(prev_last+1); |
998 | } |
999 | int num_consumed_words = 0; |
1000 | while(num_consumed_words < num_words_to_consume && !input_buffer.isEmpty()) |
1001 | { |
1002 | TransferToken& local_tt = input_buffer.next(); |
1003 | if (local_tt.getType() == tt_word) |
1004 | { |
1005 | num_consumed_words++; |
1006 | } |
1007 | } |
1008 | } |
1009 | else |
1010 | { |
1011 | |
1012 | banned_rules.insert(lastrule_id); |
1013 | input_buffer.setPos(prev_last); |
1014 | input_buffer.next(); |
1015 | last = input_buffer.getPos(); |
1016 | } |
1017 | lastrule_id = -1; |
1018 | } |
1019 | else |
1020 | { |
1021 | if(tmpword.size() != 0) |
1022 | { |
1023 | if(trace_att) |
1024 | { |
1025 | cerr << "printing tmpword[0]" <<endl; |
1026 | } |
1027 | |
1028 | pair<UString, int> tr; |
1029 | UString tr_wblank; |
1030 | if(useBilingual && preBilingual == false) |
1031 | { |
1032 | if(isExtended && (*tmpword[0])[0] == '*') { |
1033 | tr = extended.biltransWithQueue((*tmpword[0]).substr(1), false); |
1034 | if(tr.first[0] == '@') { |
1035 | tr.first[0] = '*'; |
1036 | } else { |
1037 | UString temp; |
1038 | temp += '%'; |
1039 | temp.append(tr.first); |
1040 | temp.swap(tr.first); |
1041 | } |
1042 | } else { |
1043 | tr = fstp.biltransWithQueue(*tmpword[0], false); |
1044 | } |
1045 | } |
1046 | else if(preBilingual) |
1047 | { |
1048 | UString sl; |
1049 | UString tl; |
1050 | UString ref; |
1051 | UString wblank; |
1052 | UString cur; |
1053 | |
1054 | int seenSlash = 0; |
1055 | for(UString::const_iterator it = tmpword[0]->begin(); it != tmpword[0]->end(); it++) |
1056 | { |
1057 | if(*it == '\\') { |
1058 | cur.push_back(*it); |
1059 | it++; |
1060 | cur.push_back(*it); |
1061 | continue; |
1062 | } |
1063 | else if(*it == '[') |
1064 | { |
1065 | if(*(it+1) == '[') |
1066 | { |
1067 | while(true) |
1068 | { |
1069 | if(*it == '\\') |
1070 | { |
1071 | wblank.push_back(*it); |
1072 | it++; |
1073 | wblank.push_back(*it); |
1074 | } |
1075 | else if(*it == '^' && *(it-1) == ']' && *(it-2) == ']') |
1076 | { |
1077 | break; |
1078 | } |
1079 | else |
1080 | { |
1081 | wblank.push_back(*it); |
1082 | } |
1083 | |
1084 | it++; |
1085 | } |
1086 | } |
1087 | else |
1088 | { |
1089 | cur.push_back(*it); |
1090 | } |
1091 | continue; |
1092 | } |
1093 | else if(*it == '/') |
1094 | { |
1095 | ref.clear(); |
1096 | switch (seenSlash) { |
1097 | case 0: cur.swap(sl); break; |
1098 | case 1: cur.swap(tl); break; |
1099 | default: cur.swap(ref); |
1100 | } |
1101 | seenSlash++; |
1102 | continue; |
1103 | } |
1104 | else if (*it == '<') { |
1105 | while (*it != '>') { |
1106 | cur.push_back(*it); |
1107 | if (*it == '\\') { |
1108 | it++; |
1109 | cur.push_back(*it); |
1110 | } |
1111 | it++; |
1112 | } |
1113 | } |
1114 | cur.push_back(*it); |
1115 | } |
1116 | if (!cur.empty()) { |
1117 | ref.clear(); |
1118 | switch (seenSlash) { |
1119 | case 0: cur.swap(sl); break; |
1120 | case 1: cur.swap(tl); break; |
1121 | default: cur.swap(ref); |
1122 | } |
1123 | } |
1124 | |
1125 | tr = pair<UString, int>(tl, false); |
1126 | tr_wblank = wblank; |
1127 | |
1128 | } |
1129 | else |
1130 | { |
1131 | tr = pair<UString, int>(*tmpword[0], 0); |
1132 | } |
1133 | |
1134 | if(tr.first.size() != 0) { |
1135 | if(defaultAttrs == lu) { |
1136 | if(tr.first[0] != '[' || tr.first[1] != '[') { |
1137 | u_fprintf(output, "%S^", tr_wblank.c_str()); |
1138 | } |
1139 | u_fprintf(output, "%S$", tr.first.c_str()); |
1140 | } else { |
1141 | if(tr.first[0] == '*') { |
1142 | u_fprintf(output, "^unknown<unknown>{%S^", tr_wblank.c_str()); |
1143 | } else { |
1144 | u_fprintf(output, "^default<default>{%S^", tr_wblank.c_str()); |
1145 | } |
1146 | u_fprintf(output, "%S$}$", tr.first.c_str()); |
1147 | } |
1148 | } |
1149 | banned_rules.clear(); |
1150 | tmpword.clear(); |
1151 | input_buffer.setPos(last); |
1152 | input_buffer.next(); |
1153 | prev_last = last; |
1154 | last = input_buffer.getPos(); |
1155 | ms.init(me->getInitial()); |
1156 | } |
1157 | else if(tmpblank.size() != 0) |
1158 | { |
1159 | if(trace_att) { |
1160 | cerr << "printing tmpblank[0]" <<endl; |
1161 | } |
1162 | write(*tmpblank[0], output); |
1163 | tmpblank.clear(); |
1164 | prev_last = last; |
1165 | last = input_buffer.getPos(); |
1166 | ms.init(me->getInitial()); |
1167 | } |
1168 | } |
1169 | } |
1170 | int val = ms.classifyFinals(me->getFinals(), banned_rules); |
1171 | if(val != -1) |
1172 | { |
1173 | size_t lastrule_line = rule_lines[val-1]; |
1174 | lastrule = rule_map[val-1]; |
1175 | lastrule_id = val; |
1176 | last = input_buffer.getPos(); |
1177 | last_lword = tmpword.size(); |
1178 | |
1179 | if(trace) { |
1180 | cerr << endl << "apertium-transfer: Rule " << val << " line " << lastrule_line; |
1181 | for (auto& it : tmpword) { |
1182 | cerr << " " << *it; |
1183 | } |
1184 | cerr << endl; |
1185 | } |
1186 | } |
1187 | |
1188 | TransferToken ¤t = readToken(in); |
1189 | |
1190 | switch(current.getType()) |
1191 | { |
1192 | case tt_word: |
1193 | applyWord(current.getContent()); |
1194 | tmpword.push_back(¤t.getContent()); |
1195 | break; |
1196 | |
1197 | case tt_blank: |
1198 | ms.step(' '); |
1199 | tmpblank.push_back(¤t.getContent()); |
1200 | break; |
1201 | |
1202 | case tt_eof: |
1203 | if(tmpword.size() != 0) |
1204 | { |
1205 | tmpblank.push_back(¤t.getContent()); |
1206 | ms.clear(); |
1207 | } |
1208 | else |
1209 | { |
1210 | write(current.getContent(), output); |
1211 | return; |
1212 | } |
1213 | break; |
1214 | |
1215 | default: |
1216 | cerr << "Error: Unknown input token." << endl; |
1217 | return; |
1218 | } |
1219 | } |
1220 | } |
1221 | |
1222 | int |
1223 | Transfer::applyRule() |
1224 | { |
1225 | int words_to_consume; |
1226 | unsigned int limit = tmpword.size(); |
1227 | |
1228 | |
1229 | for(unsigned int i = 0; i != limit; i++) |
1230 | { |
1231 | if(i == 0) |
1232 | { |
1233 | word = new TransferWord *[limit]; |
1234 | std::fill(word, word+limit, (TransferWord *)(0)); |
1235 | lword = limit; |
1236 | } |
1237 | else |
1238 | { |
1239 | if(int(blank_queue.size()) < last_lword - 1) |
1240 | { |
1241 | blank_queue.push(*tmpblank[i-1]); |
1242 | } |
1243 | } |
1244 | |
1245 | if(useBilingual && preBilingual == false) { |
1246 | auto tr = fstp.biltransWithQueue(*tmpword[i], false); |
1247 | word[i] = new TransferWord(*tmpword[i], tr.first, ""_u, ""_u, tr.second); |
1248 | } |
1249 | else if(preBilingual) { |
1250 | UString sl; |
1251 | UString tl; |
1252 | UString ref; |
1253 | UString wblank; |
1254 | |
1255 | int seenSlash = 0; |
1256 | bool inTag = false; |
1257 | for(UString::const_iterator it = tmpword[i]->begin(); it != tmpword[i]->end(); it++) |
1258 | { |
1259 | if(*it == '\\') |
1260 | { |
1261 | if(seenSlash == 0) |
1262 | { |
1263 | sl.push_back(*it); |
1264 | it++; |
1265 | sl.push_back(*it); |
1266 | } |
1267 | else if(seenSlash == 1) |
1268 | { |
1269 | tl.push_back(*it); |
1270 | it++; |
1271 | tl.push_back(*it); |
1272 | } |
1273 | else |
1274 | { |
1275 | ref.push_back(*it); |
1276 | it++; |
1277 | ref.push_back(*it); |
1278 | } |
1279 | continue; |
1280 | } |
1281 | else if(*it == '[') |
1282 | { |
1283 | if(*(it+1) == '[') |
1284 | { |
1285 | while(true) { |
1286 | if(*it == '\\') { |
1287 | wblank.push_back(*it); |
1288 | it++; |
1289 | wblank.push_back(*it); |
1290 | } |
1291 | else if(*it == '^' && *(it-1) == ']' && *(it-2) == ']') { |
1292 | break; |
1293 | } |
1294 | else { |
1295 | wblank.push_back(*it); |
1296 | } |
1297 | |
1298 | it++; |
1299 | } |
1300 | } |
1301 | else { |
1302 | if(seenSlash == 0) sl.push_back(*it); |
1303 | else if(seenSlash == 1) tl.push_back(*it); |
1304 | else ref.push_back(*it); |
1305 | } |
1306 | continue; |
1307 | } |
1308 | |
1309 | if(*it == '/' && !inTag) { |
1310 | seenSlash++; |
1311 | |
1312 | ref.clear(); |
1313 | continue; |
1314 | } |
1315 | |
1316 | |
1317 | else if (*it == '<') { |
1318 | inTag = true; |
1319 | } |
1320 | else if (*it == '>') { |
1321 | inTag = false; |
1322 | } |
1323 | if(seenSlash == 0) sl.push_back(*it); |
1324 | else if(seenSlash == 1) tl.push_back(*it); |
1325 | else ref.push_back(*it); |
1326 | } |
1327 | word[i] = new TransferWord(sl, tl, ref, wblank, false); |
1328 | } |
1329 | else |
1330 | { |
1331 | word[i] = new TransferWord(*tmpword[i], *tmpword[i], ""_u, ""_u, false); |
1332 | } |
1333 | } |
1334 | |
1335 | words_to_consume = processRule(lastrule); |
1336 | lastrule = NULL; |
1337 | |
1338 | if(word) |
1339 | { |
1340 | for(unsigned int i = 0; i != limit; i++) |
1341 | { |
1342 | delete word[i]; |
1343 | word[i] = 0; |
1344 | } |
1345 | delete[] word; |
1346 | } |
1347 | word = NULL; |
1348 | tmpword.clear(); |
1349 | tmpblank.clear(); |
1350 | ms.init(me->getInitial()); |
1351 | return words_to_consume; |
1352 | } |
1353 | |
1354 | |
1355 | void |
1356 | Transfer::applyWord(UString const &word_str) |
1357 | { |
1358 | ms.step('^'); |
1359 | |
1360 | for(unsigned int i = 0, limit = word_str.size(); i < limit; i++) |
1361 | { |
1362 | switch(word_str[i]) |
1363 | { |
1364 | case '\\': |
1365 | i++; |
1366 | ms.step(u_tolower(word_str[i]), any_char); |
1367 | break; |
1368 | |
1369 | case '[': |
1370 | if(word_str[i+1] == '[') |
1371 | { |
1372 | while(true) |
1373 | { |
1374 | if(word_str[i] == '\\') |
1375 | { |
1376 | i++; |
1377 | } |
1378 | else if(i >= 4) |
1379 | { |
1380 | if(word_str[i] == '^' && word_str[i-1] == ']' && word_str[i-2] == ']') |
1381 | { |
1382 | break; |
1383 | } |
1384 | } |
1385 | |
1386 | i++; |
1387 | } |
1388 | } |
1389 | else |
1390 | { |
1391 | ms.step(u_tolower(word_str[i]), any_char); |
1392 | } |
1393 | break; |
1394 | |
1395 | case '/': |
1396 | i = limit; |
1397 | break; |
1398 | |
1399 | case '<': |
1400 | for(unsigned int j = i+1; j != limit; j++) |
1401 | { |
1402 | if(word_str[j] == '>') |
1403 | { |
1404 | int symbol = alphabet(word_str.substr(i, j-i+1)); |
1405 | if(symbol) |
1406 | { |
1407 | ms.step(symbol, any_tag); |
1408 | } |
1409 | else |
1410 | { |
1411 | ms.step(any_tag); |
1412 | } |
1413 | i = j; |
1414 | break; |
1415 | } |
1416 | } |
1417 | break; |
1418 | |
1419 | default: |
1420 | ms.step(u_tolower(word_str[i]), any_char); |
1421 | break; |
1422 | } |
1423 | } |
1424 | ms.step('$'); |
1425 | } |
1426 | |
1427 | void |
1428 | Transfer::setPreBilingual(bool value) |
1429 | { |
1430 | preBilingual = value; |
1431 | } |
1432 | |
1433 | bool |
1434 | Transfer::getPreBilingual(void) const |
1435 | { |
1436 | return preBilingual; |
1437 | } |
1438 | |
1439 | void |
1440 | Transfer::setUseBilingual(bool value) |
1441 | { |
1442 | useBilingual = value; |
1443 | } |
1444 | |
1445 | bool |
1446 | Transfer::getUseBilingual(void) const |
1447 | { |
1448 | return useBilingual; |
1449 | } |
1450 | |
1451 | void |
1452 | Transfer::setCaseSensitiveness(bool value) |
1453 | { |
1454 | fstp.setCaseSensitiveMode(value); |
1455 | } |