clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name interchunk.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -resource-dir /usr/lib/llvm-16/lib/clang/16 -D HAVE_CONFIG_H -I . -I .. -I /usr/include/utf8cpp/ -I /usr/local/include -I /usr/include/libxml2 -I /usr/local/include -D PIC -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -std=c++2b -fdeprecated-macro -fdebug-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -ferror-limit 19 -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/apertium/scan-build/2024-09-11-155328-205384-1 -x c++ interchunk.cc
| 1 | |
| 2 | |
| 3 | |
| 4 | |
| 5 | |
| 6 | |
| 7 | |
| 8 | |
| 9 | |
| 10 | |
| 11 | |
| 12 | |
| 13 | |
| 14 | |
| 15 | |
| 16 | |
| 17 | #include <apertium/interchunk.h> |
| 18 | |
| 19 | #include <lttoolbox/xml_walk_util.h> |
| 20 | #include <lttoolbox/string_utils.h> |
| 21 | |
| 22 | #include <iostream> |
| 23 | |
| 24 | using namespace std; |
| 25 | |
| 26 | Interchunk::Interchunk() |
| 27 | {} |
| 28 | |
| 29 | bool |
| 30 | Interchunk::checkIndex(xmlNode *element, int index, int limit) |
| 31 | { |
| 32 | if(index >= limit) |
| 33 | { |
| 34 | cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index >= limit" << endl; |
| 35 | return false; |
| 36 | } |
| 37 | if(index < 0) { |
| 38 | cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": index < 0" << endl; |
| 39 | return false; |
| 40 | } |
| 41 | if(word[index] == 0) |
| 42 | { |
| 43 | cerr << "Error in " << (char *) doc->URL << ": line " << element->line << ": Null access at word[index]" << endl; |
| 44 | return false; |
| 45 | } |
| 46 | return true; |
| 47 | } |
| 48 | |
| 49 | UString |
| 50 | Interchunk::evalCachedString(xmlNode* element) |
| 51 | { |
| 52 | TransferInstr& ti = evalStringCache[element]; |
| 53 | switch (ti.getType()) { |
| 54 | case ti_clip_tl: |
| 55 | if (checkIndex(element, ti.getPos(), lword)) { |
| 56 | if (ti.getContent() == "content"_u) { |
| 57 | UString wf = word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]); |
| 58 | return wf.substr(1, wf.length()-2); |
| 59 | } else { |
| 60 | return word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]); |
| 61 | } |
| 62 | } |
| 63 | break; |
| 64 | |
| 65 | case ti_var: |
| 66 | return variables[ti.getContent()]; |
| 67 | |
| 68 | case ti_lit_tag: |
| 69 | case ti_lit: |
| 70 | return ti.getContent(); |
| 71 | |
| 72 | case ti_b: |
| 73 | if (!blank_queue.empty()) { |
| 74 | UString retblank = blank_queue.front(); |
| 75 | if (in_out) { |
| 76 | blank_queue.pop(); |
| 77 | } |
| 78 | return retblank; |
| 79 | } else { |
| 80 | return " "_u; |
| 81 | } |
| 82 | break; |
| 83 | |
| 84 | case ti_get_case_from: |
| 85 | if (checkIndex(element, ti.getPos(), lword)) { |
| 86 | return copycase(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()]), |
| 87 | evalString(ti.getPointer())); |
| 88 | } |
| 89 | break; |
| 90 | |
| 91 | case ti_case_of_tl: |
| 92 | if (checkIndex(element, ti.getPos(), lword)) { |
| 93 | return StringUtils::getcase(word[ti.getPos()]->chunkPart(attr_items[ti.getContent()])); |
| 94 | } |
| 95 | break; |
| 96 | |
| 97 | default: |
| 98 | return ""_u; |
| 99 | } |
| 100 | return ""_u; |
| 101 | } |
| 102 | |
| 103 | void |
| 104 | Interchunk::processClip(xmlNode* element) |
| 105 | { |
| 106 | int pos = 0; |
| 107 | UString part; |
| 108 | for (xmlAttr* i = element->properties; i != NULL; i = i->next) { |
| 109 | if (!xmlStrcmp(i->name, (const xmlChar*) "part")) { |
| 110 | part = to_ustring((const char*) i->children->content); |
| 111 | } else if (!xmlStrcmp(i->name, (const xmlChar*) "pos")) { |
| 112 | pos = atoi((const char*) i->children->content) - 1; |
| 113 | } |
| 114 | } |
| 115 | evalStringCache[element] = TransferInstr(ti_clip_tl, part, pos, NULL); |
| 116 | } |
| 117 | |
| 118 | void |
| 119 | Interchunk::processBlank(xmlNode* element) |
| 120 | { |
| 121 | if (element->properties == NULL) { |
| 122 | evalStringCache[element] = TransferInstr(ti_b, " "_u, -1); |
| 123 | } else { |
| 124 | int pos = atoi((const char*) element->properties->children->content) - 1; |
| 125 | evalStringCache[element] = TransferInstr(ti_b, ""_u, pos); |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | void |
| 130 | Interchunk::processLuCount(xmlNode* element) |
| 131 | { |
| 132 | cerr << "Error: unexpected expression: '" << element->name << "'" << endl; |
| 133 | exit(EXIT_FAILURE); |
| 134 | } |
| 135 | |
| 136 | UString |
| 137 | Interchunk::processLu(xmlNode* element) |
| 138 | { |
| 139 | cerr << "Error: unexpected expression: '" << element->name << "'" << endl; |
| 140 | exit(EXIT_FAILURE); |
| 141 | return ""_u; |
| 142 | } |
| 143 | |
| 144 | UString |
| 145 | Interchunk::processMlu(xmlNode* element) |
| 146 | { |
| 147 | cerr << "Error: unexpected expression: '" << element->name << "'" << endl; |
| 148 | exit(EXIT_FAILURE); |
| 149 | return ""_u; |
| 150 | } |
| 151 | |
| 152 | void |
| 153 | Interchunk::processCaseOf(xmlNode* element) |
| 154 | { |
| 155 | int pos = 0; |
| 156 | UString part; |
| 157 | for (xmlAttr* i = element->properties; i != NULL; i = i->next) { |
| 158 | if (!xmlStrcmp(i->name, (const xmlChar*) "part")) { |
| 159 | part = to_ustring((char*) i->children->content); |
| 160 | } else if (!xmlStrcmp(i->name, (const xmlChar*) "pos")) { |
| 161 | pos = atoi((const char*) i->children->content) - 1; |
| 162 | } |
| 163 | } |
| 164 | evalStringCache[element] = TransferInstr(ti_case_of_tl, part, pos); |
| 165 | } |
| 166 | |
| 167 | void |
| 168 | Interchunk::processOut(xmlNode *localroot) |
| 169 | { |
| 170 | in_out = true; |
| 171 | |
| 172 | for (auto i : children(localroot)) { |
| 173 | if(!xmlStrcmp(i->name, (const xmlChar *) "chunk")) { |
| 174 | write(processChunk(i), output); |
| 175 | } else { |
| 176 | write(evalString(i), output); |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | in_out = false; |
| 181 | } |
| 182 | |
| 183 | UString |
| 184 | Interchunk::processChunk(xmlNode *localroot) |
| 185 | { |
| 186 | UString result; |
| 187 | result.append("^"_u); |
| 188 | |
| 189 | for (auto i : children(localroot)) { |
| 190 | result.append(evalString(i)); |
| 191 | } |
| 192 | |
| 193 | result.append("$"_u); |
| 194 | return result; |
| 195 | } |
| 196 | |
| 197 | void |
| 198 | Interchunk::processLet(xmlNode *localroot) |
| 199 | { |
| 200 | xmlNode *leftSide = NULL, *rightSide = NULL; |
| 201 | |
| 202 | for (auto i : children(localroot)) { |
| 203 | if(leftSide == NULL) { |
| 204 | leftSide = i; |
| 205 | } else { |
| 206 | rightSide = i; |
| 207 | break; |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | map<xmlNode *, TransferInstr>::iterator it = evalStringCache.find(leftSide); |
| 212 | if(it != evalStringCache.end()) |
| 213 | { |
| 214 | TransferInstr &ti = it->second; |
| 215 | switch(ti.getType()) |
| 216 | { |
| 217 | case ti_var: |
| 218 | variables[ti.getContent()] = evalString(rightSide); |
| 219 | return; |
| 220 | |
| 221 | case ti_clip_tl: |
| 222 | { |
| 223 | bool match = word[ti.getPos()]->setChunkPart(attr_items[ti.getContent()], evalString(rightSide)); |
| 224 | if(!match && trace) |
| 225 | { |
| 226 | cerr << "apertium-interchunk warning: <let> on line " << localroot->line << " sometimes discards its value." << endl; |
| 227 | } |
| 228 | } |
| 229 | return; |
| 230 | |
| 231 | default: |
| 232 | return; |
| 233 | } |
| 234 | } |
| 235 | if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var")) |
| 236 | { |
| 237 | UString const val = to_ustring((const char *) leftSide->properties->children->content); |
| 238 | variables[val] = evalString(rightSide); |
| 239 | evalStringCache[leftSide] = TransferInstr(ti_var, val, 0); |
| 240 | } |
| 241 | else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "clip")) |
| 242 | { |
| 243 | int pos = 0; |
| 244 | UString part; |
| 245 | |
| 246 | for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next) |
| 247 | { |
| 248 | if(!xmlStrcmp(i->name, (const xmlChar *) "part")) |
| 249 | { |
| 250 | part = to_ustring((char*)i->children->content); |
| 251 | } |
| 252 | else if(!xmlStrcmp(i->name, (const xmlChar *) "pos")) |
| 253 | { |
| 254 | pos = atoi((const char *) i->children->content) - 1; |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | |
| 259 | bool match = word[pos]->setChunkPart(attr_items[part], |
| 260 | evalString(rightSide)); |
| 261 | if(!match && trace) |
| 262 | { |
| 263 | cerr << "apertium-interchunk warning: <let> on line " << localroot->line << " sometimes discards its value." << endl; |
| 264 | } |
| 265 | evalStringCache[leftSide] = TransferInstr(ti_clip_tl, |
| 266 | part, |
| 267 | pos, NULL); |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | void |
| 272 | Interchunk::processModifyCase(xmlNode *localroot) |
| 273 | { |
| 274 | if (dictionary_case) return; |
| 1 | Assuming field 'dictionary_case' is false | |
|
| |
| 275 | xmlNode *leftSide = NULL, *rightSide = NULL; |
| 3 | | 'leftSide' initialized to a null pointer value | |
|
| 276 | |
| 277 | for (auto i : children(localroot)) { |
| 278 | if(leftSide == NULL) { |
| 279 | leftSide = i; |
| 280 | } else { |
| 281 | rightSide = i; |
| 282 | break; |
| 283 | } |
| 284 | } |
| 285 | |
| 286 | if(leftSide->name != NULL && !xmlStrcmp(leftSide->name, (const xmlChar *) "clip")) |
| 4 | | Access to field 'name' results in a dereference of a null pointer (loaded from variable 'leftSide') |
|
| 287 | { |
| 288 | int pos = 0; |
| 289 | UString part; |
| 290 | |
| 291 | for(xmlAttr *i = leftSide->properties; i != NULL; i = i->next) |
| 292 | { |
| 293 | if(!xmlStrcmp(i->name, (const xmlChar *) "part")) |
| 294 | { |
| 295 | part = to_ustring((char*)i->children->content); |
| 296 | } |
| 297 | else if(!xmlStrcmp(i->name, (const xmlChar *) "pos")) |
| 298 | { |
| 299 | pos = atoi((const char *) i->children->content) - 1; |
| 300 | } |
| 301 | } |
| 302 | |
| 303 | UString const result = StringUtils::copycase(evalString(rightSide), |
| 304 | word[pos]->chunkPart(attr_items[part])); |
| 305 | bool match = word[pos]->setChunkPart(attr_items[part], result); |
| 306 | if(!match && trace) |
| 307 | { |
| 308 | cerr << "apertium-interchunk warning: <modify-case> on line " << localroot->line << " sometimes discards its value." << endl; |
| 309 | } |
| 310 | } |
| 311 | else if(!xmlStrcmp(leftSide->name, (const xmlChar *) "var")) |
| 312 | { |
| 313 | UString const val = to_ustring((const char *) leftSide->properties->children->content); |
| 314 | variables[val] = StringUtils::copycase(evalString(rightSide), variables[val]); |
| 315 | } |
| 316 | } |
| 317 | |
| 318 | void |
| 319 | Interchunk::processCallMacro(xmlNode *localroot) |
| 320 | { |
| 321 | UString n = to_ustring((const char *) localroot->properties->children->content); |
| 322 | int npar = 0; |
| 323 | |
| 324 | xmlNode *macro = macro_map[macros[n]]; |
| 325 | |
| 326 | for(xmlAttr *i = macro->properties; i != NULL; i = i->next) |
| 327 | { |
| 328 | if(!xmlStrcmp(i->name, (const xmlChar *) "npar")) |
| 329 | { |
| 330 | npar = atoi((const char *) i->children->content); |
| 331 | break; |
| 332 | } |
| 333 | } |
| 334 | |
| 335 | |
| 336 | |
| 337 | InterchunkWord **myword = NULL; |
| 338 | int idx = 0; |
| 339 | if(npar > 0) |
| 340 | { |
| 341 | myword = new InterchunkWord *[npar]; |
| 342 | for (auto i : children(localroot)) { |
| 343 | int pos = atoi((const char *) i->properties->children->content)-1; |
| 344 | myword[idx] = word[pos]; |
| 345 | idx++; |
| 346 | } |
| 347 | } |
| 348 | |
| 349 | swap(myword, word); |
| 350 | swap(npar, lword); |
| 351 | |
| 352 | for (auto i : children(macro)) { |
| 353 | processInstruction(i); |
| 354 | } |
| 355 | |
| 356 | swap(myword, word); |
| 357 | swap(npar, lword); |
| 358 | |
| 359 | delete[] myword; |
| 360 | } |
| 361 | |
| 362 | TransferToken & |
| 363 | Interchunk::readToken(InputFile& in) |
| 364 | { |
| 365 | if(!input_buffer.isEmpty()) |
| 366 | { |
| 367 | return input_buffer.next(); |
| 368 | } |
| 369 | |
| 370 | UString content; |
| 371 | while(true) |
| 372 | { |
| 373 | int val = in.get(); |
| 374 | if(in.eof() || (internal_null_flush && val == 0)) |
| 375 | { |
| 376 | return input_buffer.add(TransferToken(content, tt_eof)); |
| 377 | } |
| 378 | if(val == '\\') |
| 379 | { |
| 380 | content += '\\'; |
| 381 | content += in.get(); |
| 382 | } |
| 383 | else if(val == '[') |
| 384 | { |
| 385 | content += '['; |
| 386 | while(true) |
| 387 | { |
| 388 | UChar32 val2 = in.get(); |
| 389 | if(val2 == '\\') { |
| 390 | content += '\\'; |
| 391 | content += in.get(); |
| 392 | } else if(val2 == ']') { |
| 393 | content += ']'; |
| 394 | break; |
| 395 | } else { |
| 396 | content += val2; |
| 397 | } |
| 398 | } |
| 399 | } |
| 400 | else if(inword && val == '{') { |
| 401 | content += '{'; |
| 402 | while(true) { |
| 403 | UChar32 val2 = in.get(); |
| 404 | if(val2 == '\\') { |
| 405 | content += '\\'; |
| 406 | content += in.get(); |
| 407 | } else if(val2 == '}') { |
| 408 | UChar32 val3 = in.peek(); |
| 409 | |
| 410 | content += '}'; |
| 411 | if(val3 == '$') { |
| 412 | break; |
| 413 | } |
| 414 | } else { |
| 415 | content += val2; |
| 416 | } |
| 417 | } |
| 418 | } |
| 419 | else if(inword && val == '$') |
| 420 | { |
| 421 | inword = false; |
| 422 | return input_buffer.add(TransferToken(content, tt_word)); |
| 423 | } |
| 424 | else if(val == '^') |
| 425 | { |
| 426 | inword = true; |
| 427 | return input_buffer.add(TransferToken(content, tt_blank)); |
| 428 | } |
| 429 | else |
| 430 | { |
| 431 | content += val; |
| 432 | } |
| 433 | } |
| 434 | } |
| 435 | |
| 436 | void |
| 437 | Interchunk::interchunk_wrapper_null_flush(InputFile& in, UFILE* out) |
| 438 | { |
| 439 | null_flush = false; |
| 440 | internal_null_flush = true; |
| 441 | |
| 442 | while(!in.eof()) { |
| 443 | interchunk(in, out); |
| 444 | u_fputc('\0', out); |
| 445 | u_fflush(out); |
| 446 | variables = variable_defaults; |
| 447 | } |
| 448 | internal_null_flush = false; |
| 449 | null_flush = true; |
| 450 | } |
| 451 | |
| 452 | |
| 453 | void |
| 454 | Interchunk::interchunk(InputFile& in, UFILE* out) |
| 455 | { |
| 456 | if(getNullFlush()) |
| 457 | { |
| 458 | interchunk_wrapper_null_flush(in, out); |
| 459 | } |
| 460 | |
| 461 | unsigned int last = input_buffer.getPos(); |
| 462 | unsigned int prev_last = last; |
| 463 | int lastrule_id = -1; |
| 464 | set<int> banned_rules; |
| 465 | |
| 466 | output = out; |
| 467 | ms.init(me->getInitial()); |
| 468 | |
| 469 | while(true) |
| 470 | { |
| 471 | if(ms.size() == 0) |
| 472 | { |
| 473 | if(lastrule != NULL) |
| 474 | { |
| 475 | int num_words_to_consume = applyRule(); |
| 476 | |
| 477 | |
| 478 | |
| 479 | if(num_words_to_consume < 0) |
| 480 | { |
| 481 | banned_rules.clear(); |
| 482 | input_buffer.setPos(last); |
| 483 | } |
| 484 | else if(num_words_to_consume > 0) |
| 485 | { |
| 486 | banned_rules.clear(); |
| 487 | if(prev_last >= input_buffer.getSize()) |
| 488 | { |
| 489 | input_buffer.setPos(0); |
| 490 | } |
| 491 | else |
| 492 | { |
| 493 | input_buffer.setPos(prev_last+1); |
| 494 | } |
| 495 | int num_consumed_words = 0; |
| 496 | while(num_consumed_words < num_words_to_consume && !input_buffer.isEmpty()) |
| 497 | { |
| 498 | TransferToken& local_tt = input_buffer.next(); |
| 499 | if (local_tt.getType() == tt_word) |
| 500 | { |
| 501 | num_consumed_words++; |
| 502 | } |
| 503 | } |
| 504 | } |
| 505 | else |
| 506 | { |
| 507 | |
| 508 | banned_rules.insert(lastrule_id); |
| 509 | input_buffer.setPos(prev_last); |
| 510 | input_buffer.next(); |
| 511 | last = input_buffer.getPos(); |
| 512 | } |
| 513 | lastrule_id = -1; |
| 514 | } |
| 515 | else |
| 516 | { |
| 517 | if(tmpword.size() != 0) |
| 518 | { |
| 519 | u_fprintf(output, "^%S$", tmpword[0]->c_str()); |
| 520 | tmpword.clear(); |
| 521 | input_buffer.setPos(last); |
| 522 | input_buffer.next(); |
| 523 | prev_last = last; |
| 524 | banned_rules.clear(); |
| 525 | last = input_buffer.getPos(); |
| 526 | ms.init(me->getInitial()); |
| 527 | } |
| 528 | else if(tmpblank.size() != 0) { |
| 529 | write(*tmpblank[0], output); |
| 530 | tmpblank.clear(); |
| 531 | prev_last = last; |
| 532 | last = input_buffer.getPos(); |
| 533 | ms.init(me->getInitial()); |
| 534 | } |
| 535 | } |
| 536 | } |
| 537 | int val = ms.classifyFinals(me->getFinals(), banned_rules); |
| 538 | if(val != -1) |
| 539 | { |
| 540 | size_t lastrule_line = rule_lines[val-1]; |
| 541 | lastrule = rule_map[val-1]; |
| 542 | last = input_buffer.getPos(); |
| 543 | lastrule_id = val; |
| 544 | |
| 545 | last_lword = tmpword.size(); |
| 546 | |
| 547 | if(trace) |
| 548 | { |
| 549 | cerr << endl << "apertium-interchunk: Rule " << val << " line " << lastrule_line; |
| 550 | for (auto& it : tmpword) { |
| 551 | cerr << " " << *it; |
| 552 | } |
| 553 | cerr << endl; |
| 554 | } |
| 555 | } |
| 556 | |
| 557 | TransferToken ¤t = readToken(in); |
| 558 | |
| 559 | switch(current.getType()) |
| 560 | { |
| 561 | case tt_word: |
| 562 | applyWord(current.getContent()); |
| 563 | tmpword.push_back(¤t.getContent()); |
| 564 | break; |
| 565 | |
| 566 | case tt_blank: |
| 567 | ms.step(' '); |
| 568 | tmpblank.push_back(¤t.getContent()); |
| 569 | break; |
| 570 | |
| 571 | case tt_eof: |
| 572 | if(tmpword.size() != 0) { |
| 573 | tmpblank.push_back(¤t.getContent()); |
| 574 | ms.clear(); |
| 575 | } |
| 576 | else { |
| 577 | write(current.getContent(), output); |
| 578 | tmpblank.clear(); |
| 579 | return; |
| 580 | } |
| 581 | break; |
| 582 | |
| 583 | default: |
| 584 | cerr << "Error: Unknown input token." << endl; |
| 585 | return; |
| 586 | } |
| 587 | } |
| 588 | } |
| 589 | |
| 590 | int |
| 591 | Interchunk::applyRule() |
| 592 | { |
| 593 | unsigned int limit = tmpword.size(); |
| 594 | |
| 595 | for(unsigned int i = 0; i != limit; i++) |
| 596 | { |
| 597 | if(i == 0) |
| 598 | { |
| 599 | word = new InterchunkWord *[limit]; |
| 600 | lword = limit; |
| 601 | } |
| 602 | else |
| 603 | { |
| 604 | if(int(blank_queue.size()) < last_lword - 1) |
| 605 | { |
| 606 | UString blank_to_add = UString(*tmpblank[i-1]); |
| 607 | blank_queue.push(blank_to_add); |
| 608 | } |
| 609 | } |
| 610 | |
| 611 | word[i] = new InterchunkWord(*tmpword[i]); |
| 612 | } |
| 613 | |
| 614 | int words_to_consume = processRule(lastrule); |
| 615 | lastrule = NULL; |
| 616 | |
| 617 | if(word) |
| 618 | { |
| 619 | for(unsigned int i = 0; i != limit; i++) |
| 620 | { |
| 621 | delete word[i]; |
| 622 | } |
| 623 | delete[] word; |
| 624 | } |
| 625 | |
| 626 | word = NULL; |
| 627 | tmpword.clear(); |
| 628 | tmpblank.clear(); |
| 629 | ms.init(me->getInitial()); |
| 630 | return words_to_consume; |
| 631 | } |
| 632 | |
| 633 | void |
| 634 | Interchunk::applyWord(UString const &word_str) |
| 635 | { |
| 636 | ms.step('^'); |
| 637 | for(unsigned int i = 0, limit = word_str.size(); i < limit; i++) |
| 638 | { |
| 639 | switch(word_str[i]) |
| 640 | { |
| 641 | case '\\': |
| 642 | i++; |
| 643 | ms.step(u_tolower(word_str[i]), any_char); |
| 644 | break; |
| 645 | |
| 646 | case '<': |
| 647 | for(unsigned int j = i+1; j != limit; j++) |
| 648 | { |
| 649 | if(word_str[j] == '>') |
| 650 | { |
| 651 | int symbol = alphabet(word_str.substr(i, j-i+1)); |
| 652 | if(symbol) |
| 653 | { |
| 654 | ms.step(symbol, any_tag); |
| 655 | } |
| 656 | else |
| 657 | { |
| 658 | ms.step(any_tag); |
| 659 | } |
| 660 | i = j; |
| 661 | break; |
| 662 | } |
| 663 | } |
| 664 | break; |
| 665 | |
| 666 | case '{': |
| 667 | ms.step('$'); |
| 668 | return; |
| 669 | |
| 670 | default: |
| 671 | ms.step(u_tolower(word_str[i]), any_char); |
| 672 | break; |
| 673 | } |
| 674 | } |
| 675 | ms.step('$'); |
| 676 | } |