Bug Summary

File:transfer.cc
Warning:line 668, column 6
Access to field 'name' results in a dereference of a null pointer (loaded from variable 'leftSide')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name transfer.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -resource-dir /usr/lib/llvm-16/lib/clang/16 -D HAVE_CONFIG_H -I . -I .. -I /usr/include/utf8cpp/ -I /usr/local/include -I /usr/include/libxml2 -I /usr/local/include -D PIC -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -std=c++2b -fdeprecated-macro -fdebug-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -ferror-limit 19 -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/apertium/scan-build/2024-09-11-155328-205384-1 -x c++ transfer.cc
1/*
2 * Copyright (C) 2005--2015 Universitat d'Alacant / Universidad de Alicante
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <https://www.gnu.org/licenses/>.
16 */
17#include <apertium/transfer.h>
18
19#include <lttoolbox/string_utils.h>
20#include <lttoolbox/xml_walk_util.h>
21
22#include <iostream>
23
24using namespace std;
25
26Transfer::Transfer()
27{}
28
29void
30Transfer::readBil(string const &fstfile)
31{
32 FILE *in = fopen(fstfile.c_str(), "rb");
33 if(!in)
34 {
35 cerr << "Error: Could not open file '" << fstfile << "'." << endl;
36 exit(EXIT_FAILURE1);
37 }
38 fstp.load(in);
39 fstp.initBiltrans();
40 fclose(in);
41}
42
43void
44Transfer::setExtendedDictionary(string const &fstfile)
45{
46 FILE *in = fopen(fstfile.c_str(), "rb");
47 if(!in)
48 {
49 cerr << "Error: Could not open extended dictionary file '" << fstfile << "'." << endl;
50 exit(EXIT_FAILURE1);
51 }
52 extended.load(in);
53 extended.initBiltrans();
54 fclose(in);
55 isExtended = true;
56}
57
58void
59Transfer::read(string const &transferfile, string const &datafile,
60 string const &fstfile)
61{
62 TransferBase::read(transferfile.c_str(), datafile.c_str());
63 if (getattr(root_element, "default") == "chunk"_u) {
64 defaultAttrs = chunk;
65 } else {
66 defaultAttrs = lu;
67 }
68 if (!fstfile.empty()) {
69 readBil(fstfile);
70 }
71}
72
73bool
74Transfer::checkIndex(xmlNode *element, int index, int limit)
75{
76 if(index >= limit)
77 {
78 cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index >= limit" << endl;
79 return false;
80 }
81 if(index < 0) {
82 cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index < 0" << endl;
83 return false;
84 }
85 if(word[index] == 0)
86 {
87 cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": Null access at word[index]" << endl;
88 return false;
89 }
90 return true;
91}
92
93UString
94Transfer::evalCachedString(xmlNode *element)
95{
96 TransferInstr& ti = evalStringCache[element];
97 switch (ti.getType()) {
98 case ti_clip_sl:
99 if (checkIndex(element, ti.getPos(), lword)) {
100 if (gettingLemmaFromWord(ti.getContent()) && last_lword > 1) {
101 if(in_lu) {
102 out_wblank = combineWblanks(out_wblank, word[ti.getPos()]->getWblank());
103 } else if (in_let_var) {
104 var_out_wblank[var_val] = combineWblanks(var_out_wblank[var_val], word[ti.getPos()]->getWblank());
105 }
106 }
107
108 return word[ti.getPos()]->source(attr_items[ti.getContent()], ti.getCondition());
109 }
110 break;
111
112 case ti_clip_tl:
113 if(checkIndex(element, ti.getPos(), lword)) {
114 if(gettingLemmaFromWord(ti.getContent()) && last_lword > 1) {
115 if(in_lu) {
116 out_wblank = combineWblanks(out_wblank, word[ti.getPos()]->getWblank());
117 } else if(in_let_var) {
118 var_out_wblank[var_val] = combineWblanks(var_out_wblank[var_val], word[ti.getPos()]->getWblank());
119 }
120 }
121
122 return word[ti.getPos()]->target(attr_items[ti.getContent()], ti.getCondition());
123 }
124 break;
125
126 case ti_clip_ref:
127 if(checkIndex(element, ti.getPos(), lword)) {
128 return word[ti.getPos()]->reference(attr_items[ti.getContent()], ti.getCondition());
129 }
130 break;
131
132 case ti_linkto_sl:
133 if(checkIndex(element, ti.getPos(), lword)) {
134 if(!word[ti.getPos()]->source(attr_items[ti.getContent()], ti.getCondition()).empty()) {
135 UString ret;
136 ret += '<';
137 ret += ti.getStrval();
138 ret += '>';
139 return ret;
140 } else {
141 return ""_u;
142 }
143 }
144 break;
145
146 case ti_linkto_tl:
147 if(checkIndex(element, ti.getPos(), lword)) {
148 if(!word[ti.getPos()]->target(attr_items[ti.getContent()], ti.getCondition()).empty()) {
149 UString ret;
150 ret += '<';
151 ret += ti.getStrval();
152 ret += '>';
153 return ret;
154 } else {
155 return ""_u;
156 }
157 }
158 break;
159
160 case ti_linkto_ref:
161 if(checkIndex(element, ti.getPos(), lword)) {
162 if(!word[ti.getPos()]->reference(attr_items[ti.getContent()], ti.getCondition()).empty()) {
163 UString ret;
164 ret += '<';
165 ret += ti.getStrval();
166 ret += '>';
167 return ret;
168 } else {
169 return ""_u;
170 }
171 }
172 break;
173
174 case ti_var:
175 if(last_lword > 1) {
176 out_wblank = combineWblanks(out_wblank, var_out_wblank[ti.getContent()]);
177 }
178 return variables[ti.getContent()];
179
180 case ti_lit_tag:
181 case ti_lit:
182 return ti.getContent();
183
184 case ti_b:
185 if(!blank_queue.empty()) {
186 UString retblank = blank_queue.front();
187 if(in_out) {
188 blank_queue.pop();
189 }
190
191 return retblank;
192 } else {
193 return " "_u;
194 }
195 break;
196
197 case ti_get_case_from:
198 if(checkIndex(element, ti.getPos(), lword)) {
199 return copycase(word[ti.getPos()]->source(attr_items[ti.getContent()]),
200 evalString(ti.getPointer()));
201 }
202 break;
203
204 case ti_case_of_sl:
205 if(checkIndex(element, ti.getPos(), lword)) {
206 return StringUtils::getcase(word[ti.getPos()]->source(attr_items[ti.getContent()]));
207 }
208 break;
209
210 case ti_case_of_tl:
211 if(checkIndex(element, ti.getPos(), lword)) {
212 return StringUtils::getcase(word[ti.getPos()]->target(attr_items[ti.getContent()]));
213 }
214 break;
215
216 case ti_case_of_ref:
217 if(checkIndex(element, ti.getPos(), lword)) {
218 return StringUtils::getcase(word[ti.getPos()]->reference(attr_items[ti.getContent()]));
219 }
220 break;
221
222 default:
223 return ""_u;
224 }
225 return ""_u;
226}
227
228void
229Transfer::processClip(xmlNode* element)
230{
231 int pos = 0;
232 xmlChar *side = NULL__null;
233 UString as;
234 UString part;
235 bool queue = true;
236
237 for(xmlAttr *i = element->properties; i != NULL__null; i = i->next) {
238 if(!xmlStrcmp(i->name, (const xmlChar *) "side")) {
239 side = i->children->content;
240 } else if(!xmlStrcmp(i->name, (const xmlChar *) "part")) {
241 part = to_ustring((const char*) i->children->content);
242 } else if(!xmlStrcmp(i->name, (const xmlChar *) "pos")) {
243 pos = atoi((const char *)i->children->content) - 1;
244 } else if(!xmlStrcmp(i->name, (const xmlChar *) "queue")) {
245 if(!xmlStrcmp(i->children->content, (const xmlChar *) "no")) {
246 queue = false;
247 }
248 } else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to")) {
249 as = to_ustring((const char*)i->children->content);
250 }
251 }
252
253 if(!as.empty()) {
254 if(!xmlStrcmp(side, (const xmlChar *) "sl")) {
255 evalStringCache[element] = TransferInstr(ti_linkto_sl, part, pos, NULL__null, queue, as);
256 } else if(!xmlStrcmp(side, (const xmlChar *) "ref")) {
257 evalStringCache[element] = TransferInstr(ti_linkto_ref, part, pos, NULL__null, queue, as);
258 } else {
259 evalStringCache[element] = TransferInstr(ti_linkto_tl, part, pos, NULL__null, queue, as);
260 }
261 } else if(!xmlStrcmp(side, (const xmlChar *) "sl")) {
262 evalStringCache[element] = TransferInstr(ti_clip_sl, part, pos, NULL__null, queue);
263 } else if(!xmlStrcmp(side, (const xmlChar *) "ref")) {
264 evalStringCache[element] = TransferInstr(ti_clip_ref, part, pos, NULL__null, queue);
265 } else {
266 evalStringCache[element] = TransferInstr(ti_clip_tl, part, pos, NULL__null, queue);
267 }
268}
269
270void
271Transfer::processBlank(xmlNode* element)
272{
273 evalStringCache[element] = TransferInstr(ti_b, " "_u, -1);
274}
275
276void
277Transfer::processCaseOf(xmlNode* element)
278{
279 int pos = 0;
280 xmlChar *side = NULL__null;
281 UString part;
282
283 for(xmlAttr *i = element->properties; i != NULL__null; i = i->next) {
284 if(!xmlStrcmp(i->name, (const xmlChar *) "side")) {
285 side = i->children->content;
286 } else if(!xmlStrcmp(i->name, (const xmlChar *) "part")) {
287 part = to_ustring((const char*) i->children->content);
288 } else if(!xmlStrcmp(i->name, (const xmlChar *) "pos")) {
289 pos = atoi((const char *) i->children->content) - 1;
290 }
291 }
292
293 if(!xmlStrcmp(side, (const xmlChar *) "sl")) {
294 evalStringCache[element] = TransferInstr(ti_case_of_sl, part, pos);
295 } else if(!xmlStrcmp(side, (const xmlChar *) "ref")) {
296 evalStringCache[element] = TransferInstr(ti_case_of_ref, part, pos);
297 } else {
298 evalStringCache[element] = TransferInstr(ti_case_of_tl, part, pos);
299 }
300}
301
302UString
303Transfer::processLu(xmlNode* element)
304{
305 in_lu = true;
306 out_wblank.clear();
307
308 UString myword;
309 for (auto i : children(element)) {
310 myword.append(evalString(i));
311 }
312
313 in_lu = false;
314
315 // if word == nullptr then we're outputting a word
316 // inside an npar=0 macro and there's not much we can do
317 if(last_lword == 1 && word != nullptr) {
318 out_wblank = word[0]->getWblank();
319 }
320
321 if(!myword.empty()) {
322 if(myword[0] != '[' || myword[1] != '[') {
323 UString ret = out_wblank;
324 ret += '^';
325 ret += myword;
326 ret += '$';
327 return ret;
328 } else {
329 myword += '$';
330 return myword;
331 }
332 } else {
333 return ""_u;
334 }
335}
336
337UString
338Transfer::processMlu(xmlNode* element)
339{
340 UString value;
341
342 bool first_time = true;
343 out_wblank.clear();
344
345 in_lu = true;
346 for (auto i : children(element)) {
347 UString myword;
348 for (auto j : children(i)) {
349 myword.append(evalString(j));
350 }
351
352 if (!first_time) {
353 if(!myword.empty() && myword[0] != '#') { //'+#' problem
354 value += '+';
355 }
356 } else {
357 if (!myword.empty()) {
358 first_time = false;
359 }
360 }
361
362 value.append(myword);
363 }
364
365 if(last_lword == 1) {
366 out_wblank = word[0]->getWblank();
367 }
368
369 if(!value.empty()) {
370 UString ret = out_wblank;
371 ret += '^';
372 ret += value;
373 ret += '$';
374 return ret;
375 } else {
376 return ""_u;
377 }
378}
379
380void
381Transfer::processLuCount(xmlNode* element)
382{
383 cerr << "Error: unexpected expression: '" << element->name << "'" << endl;
384 exit(EXIT_FAILURE1);
385}
386
387void
388Transfer::processOut(xmlNode *localroot)
389{
390 in_out = true;
391
392 for (auto i : children(localroot)) {
393 if(defaultAttrs == lu) {
394 if(!xmlStrcmp(i->name, (const xmlChar *) "lu")) {
395 write(processLu(i), output);
396 } else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) {
397 write(processMlu(i), output);
398 } else { // 'b'
399 write(evalString(i), output);
400 }
401 } else {
402 if(!xmlStrcmp(i->name, (const xmlChar *) "chunk")) {
403 write(processChunk(i), output);
404 } else { // 'b'
405 write(evalString(i), output);
406 }
407 }
408 }
409 in_out = false;
410}
411
412UString
413Transfer::processChunk(xmlNode *localroot)
414{
415 UString name, namefrom;
416 UString caseofchunk = "aa"_u;
417 UString result;
418
419 for(xmlAttr *i = localroot->properties; i != NULL__null; i = i->next)
420 {
421 if(!xmlStrcmp(i->name, (const xmlChar *) "name"))
422 {
423 name = to_ustring((const char *) i->children->content);
424 }
425 else if(!xmlStrcmp(i->name, (const xmlChar *) "namefrom"))
426 {
427 namefrom = to_ustring((const char *) i->children->content);
428 }
429 else if(!xmlStrcmp(i->name, (const xmlChar *) "case"))
430 {
431 caseofchunk = to_ustring((const char *) i->children->content);
432 }
433 }
434
435 result += '^';
436 if(!caseofchunk.empty() && !dictionary_case)
437 {
438 if(!name.empty())
439 {
440 result.append(StringUtils::copycase(variables[caseofchunk], name));
441 }
442 else if(!namefrom.empty())
443 {
444 result.append(StringUtils::copycase(variables[caseofchunk], variables[namefrom]));
445 }
446 else
447 {
448 cerr << "Error: you must specify either 'name' or 'namefrom' for the 'chunk' element" << endl;
449 exit(EXIT_FAILURE1);
450 }
451 }
452 else
453 {
454 if(!name.empty())
455 {
456 result.append(name);
457 }
458 else if(!namefrom.empty())
459 {
460 result.append(variables[namefrom]);
461 }
462 else
463 {
464 cerr << "Error: you must specify either 'name' or 'namefrom' for the 'chunk' element" << endl;
465 exit(EXIT_FAILURE1);
466 }
467 }
468
469 for (auto i : children(localroot)) {
470 if(!xmlStrcmp(i->name, (const xmlChar *) "tags")) {
471 result.append(processTags(i));
472 result += '{';
473 } else if(!xmlStrcmp(i->name, (const xmlChar *) "lu")) {
474 result.append(processLu(i));
475 } else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) {
476 result.append(processMlu(i));
477 } else { // 'b'
478 result.append(evalString(i));
479 }
480 }
481 result += '}';
482 result += '$';
483 return result;
484}
485
486UString
487Transfer::processTags(xmlNode *localroot)
488{
489 UString result;
490 for (auto i : children(localroot)) {
491 if (!xmlStrcmp(i->name, (const xmlChar*) "tag")) {
492 for (auto j : children(i)) {
493 result.append(evalString(j));
494 }
495 }
496 }
497 return result;
498}
499
500void
501Transfer::processLet(xmlNode *localroot)
502{
503 xmlNode *leftSide = NULL__null, *rightSide = NULL__null;
504
505 for (auto i : children(localroot)) {
506 if(leftSide == NULL__null) {
507 leftSide = i;
508 } else {
509 rightSide = i;
510 break;
511 }
512 }
513
514 map<xmlNode *, TransferInstr>::iterator it = evalStringCache.find(leftSide);
515 if(it != evalStringCache.end())
516 {
517 TransferInstr &ti = it->second;
518 switch(ti.getType())
519 {
520 case ti_var:
521 in_let_var = true;
522 var_val = ti.getContent();
523
524 var_out_wblank[var_val].clear();
525
526 variables[ti.getContent()] = evalString(rightSide);
527
528 in_let_var = false;
529
530 return;
531
532 case ti_clip_sl:
533 if (checkIndex(leftSide, ti.getPos(), lword)) {
534 bool match = word[ti.getPos()]->setSource(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition());
535 if (!match && trace)
536 {
537 cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
538 }
539 }
540 return;
541
542 case ti_clip_tl:
543 if (checkIndex(leftSide, ti.getPos(), lword)) {
544 bool match = word[ti.getPos()]->setTarget(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition());
545 if (!match && trace)
546 {
547 cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
548 }
549 }
550 return;
551
552 case ti_clip_ref:
553 if (checkIndex(leftSide, ti.getPos(), lword)) {
554 bool match = word[ti.getPos()]->setReference(attr_items[ti.getContent()], evalString(rightSide), ti.getCondition());
555 if (!match && trace)
556 {
557 cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
558 }
559 }
560 return;
561
562 default:
563 return;
564 }
565 }
566 if(leftSide->name != NULL__null && !xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
567 {
568 in_let_var = true;
569
570 UString const val = to_ustring((const char *) leftSide->properties->children->content);
571
572 var_val = val;
573 var_out_wblank[var_val].clear();
574
575 variables[val] = evalString(rightSide);
576
577 in_let_var = false;
578 evalStringCache[leftSide] = TransferInstr(ti_var, val, 0);
579 }
580 else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
581 {
582 int pos = 0;
583 xmlChar *side = NULL__null, *as = NULL__null;
584 UString part;
585 bool queue = true;
586
587 for(xmlAttr *i = leftSide->properties; i != NULL__null; i = i->next)
588 {
589 if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
590 {
591 side = i->children->content;
592 }
593 else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
594 {
595 part = to_ustring((const char*) i->children->content);
596 }
597 else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
598 {
599 pos = atoi((const char *) i->children->content) - 1;
600 }
601 else if(!xmlStrcmp(i->name, (const xmlChar *) "queue"))
602 {
603 if(!xmlStrcmp(i->children->content, (const xmlChar *) "no"))
604 {
605 queue = false;
606 }
607 }
608 else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to"))
609 {
610 as = i->children->content; // TODO: set but never read
611 }
612 }
613
614 if (pos >= lword) {
615 cerr << "Error: Transfer::processLet() bad access on pos >= lword" << endl;
616 return;
617 }
618 if (word[pos] == 0) {
619 cerr << "Error: Transfer::processLet() null access on word[pos]" << endl;
620 return;
621 }
622
623 if(!xmlStrcmp(side, (const xmlChar *) "tl"))
624 {
625 bool match = word[pos]->setTarget(attr_items[part], evalString(rightSide), queue);
626 if(!match && trace)
627 {
628 cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
629 }
630 evalStringCache[leftSide] = TransferInstr(ti_clip_tl, part, pos, NULL__null, queue);
631 }
632 else if(!xmlStrcmp(side, (const xmlChar *) "ref"))
633 {
634 bool match = word[pos]->setReference(attr_items[part], evalString(rightSide), queue);
635 if(!match && trace)
636 {
637 cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
638 }
639 evalStringCache[leftSide] = TransferInstr(ti_clip_ref, part, pos, NULL__null, queue);
640 }
641 else
642 {
643 bool match = word[pos]->setSource(attr_items[part], evalString(rightSide), queue);
644 if(!match && trace)
645 {
646 cerr << "apertium-transfer warning: <let> on line " << localroot->line << " sometimes discards its value." << endl;
647 }
648 evalStringCache[leftSide] = TransferInstr(ti_clip_sl, part, pos, NULL__null, queue);
649 }
650 }
651}
652
653void
654Transfer::processModifyCase(xmlNode *localroot)
655{
656 if (dictionary_case) return;
1
Assuming field 'dictionary_case' is false
2
Taking false branch
657 xmlNode *leftSide = NULL__null, *rightSide = NULL__null;
3
'leftSide' initialized to a null pointer value
658
659 for (auto i : children(localroot)) {
660 if(leftSide == NULL__null) {
661 leftSide = i;
662 } else {
663 rightSide = i;
664 break;
665 }
666 }
667
668 if(leftSide->name != NULL__null && !xmlStrcmp(leftSide->name, (const xmlChar *) "clip"))
4
Access to field 'name' results in a dereference of a null pointer (loaded from variable 'leftSide')
669 {
670 int pos = 0;
671 xmlChar *side = NULL__null, *as = NULL__null;
672 UString part;
673 bool queue = true;
674
675 for(xmlAttr *i = leftSide->properties; i != NULL__null; i = i->next)
676 {
677 if(!xmlStrcmp(i->name, (const xmlChar *) "side"))
678 {
679 side = i->children->content;
680 }
681 else if(!xmlStrcmp(i->name, (const xmlChar *) "part"))
682 {
683 part = to_ustring((const char*)i->children->content);
684 }
685 else if(!xmlStrcmp(i->name, (const xmlChar *) "pos"))
686 {
687 pos = atoi((const char *) i->children->content) - 1;
688 }
689 else if(!xmlStrcmp(i->name, (const xmlChar *) "queue"))
690 {
691 if(!xmlStrcmp(i->children->content, (xmlChar const *) "no"))
692 {
693 queue = false;
694 }
695 }
696 else if(!xmlStrcmp(i->name, (const xmlChar *) "link-to"))
697 {
698 as = i->children->content;
699 (void)as; // ToDo, remove "as" and the whole else?
700 }
701 }
702 if(!xmlStrcmp(side, (const xmlChar *) "sl"))
703 {
704 UString const result = StringUtils::copycase(evalString(rightSide),
705 word[pos]->source(attr_items[part], queue));
706 bool match = word[pos]->setSource(attr_items[part], result);
707 if(!match && trace)
708 {
709 cerr << "apertium-transfer warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
710 }
711 }
712 else if(!xmlStrcmp(side, (const xmlChar *) "ref"))
713 {
714 UString const result = StringUtils::copycase(evalString(rightSide),
715 word[pos]->reference(attr_items[part], queue));
716 bool match = word[pos]->setReference(attr_items[part], result);
717 if(!match && trace)
718 {
719 cerr << "apertium-transfer warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
720 }
721 }
722 else
723 {
724 UString const result = StringUtils::copycase(evalString(rightSide),
725 word[pos]->target(attr_items[part], queue));
726 bool match = word[pos]->setTarget(attr_items[part], result);
727 if(!match && trace)
728 {
729 cerr << "apertium-transfer warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl;
730 }
731 }
732 }
733 else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var"))
734 {
735 UString const val = to_ustring((const char *) leftSide->properties->children->content);
736 variables[val] = StringUtils::copycase(evalString(rightSide), variables[val]);
737 }
738}
739
740void
741Transfer::processCallMacro(xmlNode *localroot)
742{
743 UString const n = to_ustring((const char *) localroot->properties->children->content);
744 int npar = 0;
745
746 xmlNode *macro = macro_map[macros[n]];
747
748 for(xmlAttr *i = macro->properties; i != NULL__null; i = i->next)
749 {
750 if(!xmlStrcmp(i->name, (const xmlChar *) "npar"))
751 {
752 npar = atoi((const char *) i->children->content);
753 break;
754 }
755 }
756
757 // ToDo: Is it at all valid if npar <= 0 ?
758
759 TransferWord **myword = NULL__null;
760 int idx = 0;
761 if(npar > 0)
762 {
763 myword = new TransferWord *[npar];
764 std::fill(myword, myword+npar, (TransferWord *)(0));
765 for (auto i : children(localroot)) {
766 if (idx >= npar) {
767 cerr << "Error: processCallMacro() number of arguments >= npar at line " << i->line << endl;
768 return;
769 }
770 int pos = atoi((const char *) i->properties->children->content)-1;
771 myword[idx] = word[pos];
772
773 idx++;
774 }
775 }
776
777 swap(myword, word);
778 swap(npar, lword);
779
780 for (auto i : children(macro)) {
781 processInstruction(i);
782 }
783
784 swap(myword, word);
785 swap(npar, lword);
786
787 delete[] myword;
788}
789
790TransferToken &
791Transfer::readToken(InputFile& in)
792{
793 if(!input_buffer.isEmpty())
794 {
795 return input_buffer.next();
796 }
797
798 UString content;
799 while(true)
800 {
801 UChar32 val = in.get();
802 if(in.eof() || (val == 0 && internal_null_flush))
803 {
804 in_wblank = false;
805 return input_buffer.add(TransferToken(content, tt_eof));
806 }
807 if(in_wblank)
808 {
809 content = "[["_u;
810 content += val;
811
812 while(true)
813 {
814 UChar32 val3 = in.get();
815 if(val3 == '\\')
816 {
817 content += '\\';
818 content += in.get();
819 }
820 else if(val3 == '$') //[[..]]^..$ is the LU
821 {
822 in_wblank = false;
823 return input_buffer.add(TransferToken(content, tt_word));
824 }
825 else if(val3 == '\0' && null_flush)
826 {
827 in_wblank = false;
828 u_fflushu_fflush_72(output);
829 }
830 else
831 {
832 content += val3;
833 }
834 }
835 }
836 if(val == '\\')
837 {
838 content += '\\';
839 content += in.get();
840 }
841 else if(val == '[')
842 {
843 content += '[';
844 while(true)
845 {
846 UChar32 val2 = in.get();
847 if(val2 == '\\')
848 {
849 content += '\\';
850 content += in.get();
851 }
852 else if(val2 == '[')
853 { //wordbound blank
854 in_wblank = true;
855 content.pop_back();
856
857 return input_buffer.add(TransferToken(content, tt_blank));
858 }
859 else if(val2 == ']')
860 {
861 content += ']';
862 break;
863 }
864 else
865 {
866 content += val2;
867 }
868 }
869 }
870 else if(val == '$')
871 {
872 return input_buffer.add(TransferToken(content, tt_word));
873 }
874 else if(val == '^')
875 {
876 return input_buffer.add(TransferToken(content, tt_blank));
877 }
878 else if(val == '\0' && null_flush)
879 {
880 in_wblank = false;
881 u_fflushu_fflush_72(output);
882 }
883 else
884 {
885 content += val;
886 }
887 }
888}
889
890void
891Transfer::setTraceATT(bool trace)
892{
893 this->trace_att = trace;
894}
895
896void
897Transfer::tmp_clear()
898{
899 tmpblank.clear();
900 tmpword.clear();
901 variables = variable_defaults;
902}
903
904void
905Transfer::transfer_wrapper_null_flush(InputFile& in, UFILE* out)
906{
907 null_flush = false;
908 internal_null_flush = true;
909
910 while(!in.eof())
911 {
912 tmp_clear();
913 transfer(in, out);
914 u_fputcu_fputc_72('\0', out);
915 u_fflushu_fflush_72(out);
916 }
917
918 internal_null_flush = false;
919 null_flush = true;
920}
921
922void
923Transfer::transfer(InputFile& in, UFILE* out)
924{
925 if(getNullFlush())
926 {
927 transfer_wrapper_null_flush(in, out);
928 }
929
930 unsigned int last = input_buffer.getPos();
931 unsigned int prev_last = last;
932 int lastrule_id = -1;
933 set<int> banned_rules;
934 in_wblank = false;
935
936 output = out;
937 ms.init(me->getInitial());
938
939 while(true)
940 {
941 if(trace_att)
942 {
943 cerr << "Loop start " << endl;
944 cerr << "ms.size: " << ms.size() << endl;
945
946 cerr << "tmpword.size(): " << tmpword.size() << endl;
947 for (unsigned int ind = 0; ind < tmpword.size(); ind++)
948 {
949 if(ind != 0)
950 {
951 cerr << " ";
952 }
953 cerr << *tmpword[ind];
954 }
955 cerr << endl;
956
957 cerr << "tmpblank.size(): " << tmpblank.size() << endl;
958 for (unsigned int ind = 0; ind < tmpblank.size(); ind++)
959 {
960 cerr << "'";
961 cerr << *tmpblank[ind];
962 cerr << "' ";
963 }
964 cerr << endl;
965
966 cerr << "last: " << last << endl;
967 cerr << "prev_last: " << prev_last << endl << endl;
968 }
969
970 if(ms.size() == 0)
971 {
972 if(lastrule != NULL__null)
973 {
974 int num_words_to_consume = applyRule();
975
976 if(trace_att)
977 {
978 cerr << "num_words_to_consume: " << num_words_to_consume << endl;
979 }
980
981 //Consume all the words from the input which matched the rule.
982 //This piece of code is executed unless the rule contains a "reject-current-rule" instruction
983 if(num_words_to_consume < 0)
984 {
985 banned_rules.clear();
986 input_buffer.setPos(last);
987 }
988 else if(num_words_to_consume > 0)
989 {
990 banned_rules.clear();
991 if(prev_last >= input_buffer.getSize())
992 {
993 input_buffer.setPos(0);
994 }
995 else
996 {
997 input_buffer.setPos(prev_last+1);
998 }
999 int num_consumed_words = 0;
1000 while(num_consumed_words < num_words_to_consume && !input_buffer.isEmpty())
1001 {
1002 TransferToken& local_tt = input_buffer.next();
1003 if (local_tt.getType() == tt_word)
1004 {
1005 num_consumed_words++;
1006 }
1007 }
1008 }
1009 else
1010 {
1011 //Add rule to banned rules
1012 banned_rules.insert(lastrule_id);
1013 input_buffer.setPos(prev_last);
1014 input_buffer.next();
1015 last = input_buffer.getPos();
1016 }
1017 lastrule_id = -1;
1018 }
1019 else
1020 {
1021 if(tmpword.size() != 0)
1022 {
1023 if(trace_att)
1024 {
1025 cerr << "printing tmpword[0]" <<endl;
1026 }
1027
1028 pair<UString, int> tr;
1029 UString tr_wblank;
1030 if(useBilingual && preBilingual == false)
1031 {
1032 if(isExtended && (*tmpword[0])[0] == '*') {
1033 tr = extended.biltransWithQueue((*tmpword[0]).substr(1), false);
1034 if(tr.first[0] == '@') {
1035 tr.first[0] = '*';
1036 } else {
1037 UString temp;
1038 temp += '%';
1039 temp.append(tr.first);
1040 temp.swap(tr.first);
1041 }
1042 } else {
1043 tr = fstp.biltransWithQueue(*tmpword[0], false);
1044 }
1045 }
1046 else if(preBilingual)
1047 {
1048 UString sl;
1049 UString tl;
1050 UString ref;
1051 UString wblank;
1052 UString cur;
1053
1054 int seenSlash = 0;
1055 for(UString::const_iterator it = tmpword[0]->begin(); it != tmpword[0]->end(); it++)
1056 {
1057 if(*it == '\\') {
1058 cur.push_back(*it);
1059 it++;
1060 cur.push_back(*it);
1061 continue;
1062 }
1063 else if(*it == '[')
1064 {
1065 if(*(it+1) == '[') //wordbound blank
1066 {
1067 while(true)
1068 {
1069 if(*it == '\\')
1070 {
1071 wblank.push_back(*it);
1072 it++;
1073 wblank.push_back(*it);
1074 }
1075 else if(*it == '^' && *(it-1) == ']' && *(it-2) == ']')
1076 {
1077 break;
1078 }
1079 else
1080 {
1081 wblank.push_back(*it);
1082 }
1083
1084 it++;
1085 }
1086 }
1087 else
1088 {
1089 cur.push_back(*it);
1090 }
1091 continue;
1092 }
1093 else if(*it == '/')
1094 {
1095 ref.clear();
1096 switch (seenSlash) {
1097 case 0: cur.swap(sl); break;
1098 case 1: cur.swap(tl); break;
1099 default: cur.swap(ref);
1100 }
1101 seenSlash++;
1102 continue;
1103 }
1104 else if (*it == '<') {
1105 while (*it != '>') {
1106 cur.push_back(*it);
1107 if (*it == '\\') {
1108 it++;
1109 cur.push_back(*it);
1110 }
1111 it++;
1112 }
1113 }
1114 cur.push_back(*it);
1115 }
1116 if (!cur.empty()) {
1117 ref.clear();
1118 switch (seenSlash) {
1119 case 0: cur.swap(sl); break;
1120 case 1: cur.swap(tl); break;
1121 default: cur.swap(ref);
1122 }
1123 }
1124 //tmpword[0]->assign(sl);
1125 tr = pair<UString, int>(tl, false);
1126 tr_wblank = wblank;
1127 //cerr << "pb: " << *tmpword[0] << " :: " << sl << " >> " << tl << endl ;
1128 }
1129 else
1130 {
1131 tr = pair<UString, int>(*tmpword[0], 0);
1132 }
1133
1134 if(tr.first.size() != 0) {
1135 if(defaultAttrs == lu) {
1136 if(tr.first[0] != '[' || tr.first[1] != '[') {
1137 u_fprintfu_fprintf_72(output, "%S^", tr_wblank.c_str());
1138 }
1139 u_fprintfu_fprintf_72(output, "%S$", tr.first.c_str());
1140 } else {
1141 if(tr.first[0] == '*') {
1142 u_fprintfu_fprintf_72(output, "^unknown<unknown>{%S^", tr_wblank.c_str());
1143 } else {
1144 u_fprintfu_fprintf_72(output, "^default<default>{%S^", tr_wblank.c_str());
1145 }
1146 u_fprintfu_fprintf_72(output, "%S$}$", tr.first.c_str());
1147 }
1148 }
1149 banned_rules.clear();
1150 tmpword.clear();
1151 input_buffer.setPos(last);
1152 input_buffer.next();
1153 prev_last = last;
1154 last = input_buffer.getPos();
1155 ms.init(me->getInitial());
1156 }
1157 else if(tmpblank.size() != 0)
1158 {
1159 if(trace_att) {
1160 cerr << "printing tmpblank[0]" <<endl;
1161 }
1162 write(*tmpblank[0], output);
1163 tmpblank.clear();
1164 prev_last = last;
1165 last = input_buffer.getPos();
1166 ms.init(me->getInitial());
1167 }
1168 }
1169 }
1170 int val = ms.classifyFinals(me->getFinals(), banned_rules);
1171 if(val != -1)
1172 {
1173 size_t lastrule_line = rule_lines[val-1];
1174 lastrule = rule_map[val-1];
1175 lastrule_id = val;
1176 last = input_buffer.getPos();
1177 last_lword = tmpword.size();
1178
1179 if(trace) {
1180 cerr << endl << "apertium-transfer: Rule " << val << " line " << lastrule_line;
1181 for (auto& it : tmpword) {
1182 cerr << " " << *it;
1183 }
1184 cerr << endl;
1185 }
1186 }
1187
1188 TransferToken &current = readToken(in);
1189
1190 switch(current.getType())
1191 {
1192 case tt_word:
1193 applyWord(current.getContent());
1194 tmpword.push_back(&current.getContent());
1195 break;
1196
1197 case tt_blank:
1198 ms.step(' ');
1199 tmpblank.push_back(&current.getContent());
1200 break;
1201
1202 case tt_eof:
1203 if(tmpword.size() != 0)
1204 {
1205 tmpblank.push_back(&current.getContent());
1206 ms.clear();
1207 }
1208 else
1209 {
1210 write(current.getContent(), output);
1211 return;
1212 }
1213 break;
1214
1215 default:
1216 cerr << "Error: Unknown input token." << endl;
1217 return;
1218 }
1219 }
1220}
1221
1222int
1223Transfer::applyRule()
1224{
1225 int words_to_consume;
1226 unsigned int limit = tmpword.size();
1227 //cerr << "applyRule: " << tmpword.size() << endl;
1228
1229 for(unsigned int i = 0; i != limit; i++)
1230 {
1231 if(i == 0)
1232 {
1233 word = new TransferWord *[limit];
1234 std::fill(word, word+limit, (TransferWord *)(0));
1235 lword = limit;
1236 }
1237 else
1238 {
1239 if(int(blank_queue.size()) < last_lword - 1)
1240 {
1241 blank_queue.push(*tmpblank[i-1]);
1242 }
1243 }
1244
1245 if(useBilingual && preBilingual == false) {
1246 auto tr = fstp.biltransWithQueue(*tmpword[i], false);
1247 word[i] = new TransferWord(*tmpword[i], tr.first, ""_u, ""_u, tr.second);
1248 }
1249 else if(preBilingual) {
1250 UString sl;
1251 UString tl;
1252 UString ref;
1253 UString wblank;
1254
1255 int seenSlash = 0;
1256 bool inTag = false;
1257 for(UString::const_iterator it = tmpword[i]->begin(); it != tmpword[i]->end(); it++)
1258 {
1259 if(*it == '\\')
1260 {
1261 if(seenSlash == 0)
1262 {
1263 sl.push_back(*it);
1264 it++;
1265 sl.push_back(*it);
1266 }
1267 else if(seenSlash == 1)
1268 {
1269 tl.push_back(*it);
1270 it++;
1271 tl.push_back(*it);
1272 }
1273 else
1274 {
1275 ref.push_back(*it);
1276 it++;
1277 ref.push_back(*it);
1278 }
1279 continue;
1280 }
1281 else if(*it == '[')
1282 {
1283 if(*(it+1) == '[') //wordbound blank
1284 {
1285 while(true) {
1286 if(*it == '\\') {
1287 wblank.push_back(*it);
1288 it++;
1289 wblank.push_back(*it);
1290 }
1291 else if(*it == '^' && *(it-1) == ']' && *(it-2) == ']') {
1292 break;
1293 }
1294 else {
1295 wblank.push_back(*it);
1296 }
1297
1298 it++;
1299 }
1300 }
1301 else {
1302 if(seenSlash == 0) sl.push_back(*it);
1303 else if(seenSlash == 1) tl.push_back(*it);
1304 else ref.push_back(*it);
1305 }
1306 continue;
1307 }
1308
1309 if(*it == '/' && !inTag) {
1310 seenSlash++;
1311
1312 ref.clear(); //word after last slash is ref
1313 continue;
1314 }
1315 // slashes in tags are accepted elsewhere in the pipe,
1316 // so don't split readings on them here
1317 else if (*it == '<') {
1318 inTag = true;
1319 }
1320 else if (*it == '>') {
1321 inTag = false;
1322 }
1323 if(seenSlash == 0) sl.push_back(*it);
1324 else if(seenSlash == 1) tl.push_back(*it);
1325 else ref.push_back(*it);
1326 }
1327 word[i] = new TransferWord(sl, tl, ref, wblank, false);
1328 }
1329 else // neither useBilingual nor preBilingual (sl==tl)
1330 {
1331 word[i] = new TransferWord(*tmpword[i], *tmpword[i], ""_u, ""_u, false);
1332 }
1333 }
1334
1335 words_to_consume = processRule(lastrule);
1336 lastrule = NULL__null;
1337
1338 if(word)
1339 {
1340 for(unsigned int i = 0; i != limit; i++)
1341 {
1342 delete word[i];
1343 word[i] = 0; // ToDo: That this changes things means there are much bigger problems elsewhere
1344 }
1345 delete[] word;
1346 }
1347 word = NULL__null;
1348 tmpword.clear();
1349 tmpblank.clear();
1350 ms.init(me->getInitial());
1351 return words_to_consume;
1352}
1353
1354/* HERE */
1355void
1356Transfer::applyWord(UString const &word_str)
1357{
1358 ms.step('^');
1359
1360 for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
1361 {
1362 switch(word_str[i])
1363 {
1364 case '\\':
1365 i++;
1366 ms.step(u_toloweru_tolower_72(word_str[i]), any_char);
1367 break;
1368
1369 case '[':
1370 if(word_str[i+1] == '[')
1371 {
1372 while(true)
1373 {
1374 if(word_str[i] == '\\')
1375 {
1376 i++;
1377 }
1378 else if(i >= 4)
1379 {
1380 if(word_str[i] == '^' && word_str[i-1] == ']' && word_str[i-2] == ']')
1381 {
1382 break;
1383 }
1384 }
1385
1386 i++;
1387 }
1388 }
1389 else
1390 {
1391 ms.step(u_toloweru_tolower_72(word_str[i]), any_char);
1392 }
1393 break;
1394
1395 case '/':
1396 i = limit;
1397 break;
1398
1399 case '<':
1400 for(unsigned int j = i+1; j != limit; j++)
1401 {
1402 if(word_str[j] == '>')
1403 {
1404 int symbol = alphabet(word_str.substr(i, j-i+1));
1405 if(symbol)
1406 {
1407 ms.step(symbol, any_tag);
1408 }
1409 else
1410 {
1411 ms.step(any_tag);
1412 }
1413 i = j;
1414 break;
1415 }
1416 }
1417 break;
1418
1419 default:
1420 ms.step(u_toloweru_tolower_72(word_str[i]), any_char);
1421 break;
1422 }
1423 }
1424 ms.step('$');
1425}
1426
1427void
1428Transfer::setPreBilingual(bool value)
1429{
1430 preBilingual = value;
1431}
1432
1433bool
1434Transfer::getPreBilingual(void) const
1435{
1436 return preBilingual;
1437}
1438
1439void
1440Transfer::setUseBilingual(bool value)
1441{
1442 useBilingual = value;
1443}
1444
1445bool
1446Transfer::getUseBilingual(void) const
1447{
1448 return useBilingual;
1449}
1450
1451void
1452Transfer::setCaseSensitiveness(bool value)
1453{
1454 fstp.setCaseSensitiveMode(value);
1455}