Bug Summary

File:tagger.cc
Warning:line 749, column 34
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name tagger.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -resource-dir /usr/lib/llvm-16/lib/clang/16 -D HAVE_CONFIG_H -I . -I .. -I /usr/include/utf8cpp/ -I /usr/local/include -I /usr/include/libxml2 -I /usr/local/include -D PIC -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -std=c++2b -fdeprecated-macro -fdebug-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -ferror-limit 19 -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/apertium/scan-build/2024-09-11-155328-205384-1 -x c++ tagger.cc
1// Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
2//
3// This program is free software; you can redistribute it and/or
4// modify it under the terms of the GNU General Public License as
5// published by the Free Software Foundation; either version 2 of the
6// License, or (at your option) any later version.
7//
8// This program is distributed in the hope that it will be useful, but
9// WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11// General Public License for more details.
12//
13// You should have received a copy of the GNU General Public License
14// along with this program; if not, see <https://www.gnu.org/licenses/>.
15
16#include <apertium/tagger.h>
17
18#include "apertium_config.h"
19
20#include "align.h"
21#include <lttoolbox/exception.h>
22#include "exception.h"
23#include "linebreak.h"
24#include "unigram_tagger.h"
25#include <apertium/perceptron_tagger.h>
26#include <apertium/hmm.h>
27#include <apertium/lswpost.h>
28#include <apertium/tagger_word.h>
29#include <apertium/shell_utils.h>
30
31#include <lttoolbox/lt_locale.h>
32
33#include "getopt_long.h"
34#include <cerrno>
35#include <cstdio>
36#include <cstdlib>
37#include <cstring>
38#include <fstream>
39#include <iomanip>
40#include <ios>
41#include <iostream>
42#include <locale>
43#include <sstream>
44#include <string>
45#include <unistd.h>
46
47namespace Apertium {
48using namespace ShellUtils;
49using namespace tagger_utils;
50
51/** Top level argument parsing */
52
53apertium_tagger::apertium_tagger(int &argc, char **&argv)
54 : argc(argc), argv(argv), The_val(), nonoptarg(),
55
56 The_indexptr(), FunctionTypeTypeOption_indexptr(),
57 FunctionTypeOption_indexptr(),
58
59 TheFunctionTypeType(), TheUnigramType(), TheFunctionType(),
60 TheFunctionTypeOptionArgument(0), TheFlags() {
61 try {
62 /*Set optind so that multiple instances can be created */
63 optind = 1;
64 while (true) {
1
Loop condition is true. Entering loop body
65 The_val = getopt_long(argc, argv, "bdfegmpr:s:t:u:wxz", longopts, &The_indexptr);
66
67 if (The_val == -1)
2
Assuming the condition is true
3
Taking true branch
68 break;
69
70 set_indexptr();
71
72 switch (The_val) {
73 case 'b':
74 flagOptionCase(&TaggerFlags::getSentSeg,
75 &TaggerFlags::setSentSeg);
76 break;
77 case 'd':
78 flagOptionCase(&TaggerFlags::getDebug,
79 &TaggerFlags::setDebug);
80 break;
81 case 'e':
82 flagOptionCase(&TaggerFlags::getSkipErrors,
83 &TaggerFlags::setSkipErrors);
84 break;
85 case 'f':
86 flagOptionCase(&TaggerFlags::getFirst,
87 &TaggerFlags::setFirst);
88 break;
89 case 'm':
90 flagOptionCase(&TaggerFlags::getMark,
91 &TaggerFlags::setMark);
92 break;
93 case 'p':
94 flagOptionCase(&TaggerFlags::getShowSuperficial,
95 &TaggerFlags::setShowSuperficial);
96 break;
97 case 'z':
98 flagOptionCase(&TaggerFlags::getNullFlush,
99 &TaggerFlags::setNullFlush);
100 break;
101 case 'u':
102 functionTypeTypeOptionCase(Unigram);
103
104 if (std::strncmp(optarg, "1", sizeof "1" - 1) == 0) {
105 TheUnigramType = Stream_5_3_1;
106 break;
107 }
108
109 if (std::strncmp(optarg, "2", sizeof "2" - 1) == 0) {
110 TheUnigramType = Stream_5_3_2;
111 break;
112 }
113
114 if (std::strncmp(optarg, "3", sizeof "3" - 1) == 0) {
115 TheUnigramType = Stream_5_3_3;
116 break;
117 }
118
119 {
120 std::stringstream what_;
121 what_ << "invalid argument '" << optarg << "' for '--unigram'\n"
122 "Valid arguments are:\n"
123 " - '1'\n"
124 " - '2'\n"
125 " - '3'";
126 throw Exception::apertium_tagger::InvalidArgument(what_);
127 }
128 break;
129 case 'w':
130 functionTypeTypeOptionCase(SlidingWindow);
131 break;
132 case 'x':
133 functionTypeTypeOptionCase(Perceptron);
134 break;
135 case 'g':
136 functionTypeOptionCase(Tagger);
137 break;
138 case 'r':
139 functionTypeOptionCase(Retrain);
140 getIterationsArgument();
141 break;
142 case 's':
143 functionTypeOptionCase(Supervised);
144 getIterationsArgument();
145 break;
146 case 't':
147 functionTypeOptionCase(Train);
148 getIterationsArgument();
149 break;
150 case 'h':
151 help();
152 return;
153 default:
154 throw Exception::apertium_tagger::err_Exception("");
155 }
156 }
157
158 if (!TheFunctionType) {
4
Execution continues on line 158
5
Taking false branch
159 help();
160 return;
161 }
162
163 nonoptarg = argc - optind;
164 switch (*TheFunctionType) {
6
Control jumps to 'case Train:' at line 268
165 case Tagger:
166 if (!TheFunctionTypeType) {
167 try {
168 PerceptronTagger percep(TheFlags);
169 g_StreamTagger(percep);
170 } catch (DeserialisationException) {
171 HMM HiddenMarkovModelTagger_(TheFlags);
172 g_FILE_Tagger(HiddenMarkovModelTagger_);
173 }
174 break;
175 }
176 switch (*TheFunctionTypeType) {
177 case Unigram: {
178 UnigramTagger UnigramTagger_(TheFlags);
179 switch (*TheUnigramType) {
180 case Stream_5_3_1:
181 UnigramTagger_.setModel(UnigramTaggerModel1);
182 break;
183 case Stream_5_3_2:
184 UnigramTagger_.setModel(UnigramTaggerModel2);
185 break;
186 case Stream_5_3_3:
187 UnigramTagger_.setModel(UnigramTaggerModel3);
188 break;
189 default:
190 std::abort();
191 }
192 g_StreamTagger(UnigramTagger_);
193 } break;
194 case SlidingWindow: {
195 LSWPoST SlidingWindowTagger_(TheFlags);
196 g_FILE_Tagger(SlidingWindowTagger_);
197 } break;
198 case Perceptron: {
199 PerceptronTagger perceptron(TheFlags);
200 g_StreamTagger(perceptron);
201 } break;
202 default:
203 std::abort();
204 }
205
206 break;
207 case Retrain:
208 if (!TheFunctionTypeType) {
209 HMM HiddenMarkovModelTagger_(TheFlags);
210 r_FILE_Tagger(HiddenMarkovModelTagger_);
211 break;
212 }
213
214 switch (*TheFunctionTypeType) {
215 case Unigram: {
216 std::stringstream what_;
217 what_ << "invalid option -- 'u'";
218 throw Exception::apertium_tagger::InvalidOption(what_);
219 }
220 case SlidingWindow: {
221 LSWPoST SlidingWindowTagger_(TheFlags);
222 r_FILE_Tagger(SlidingWindowTagger_);
223 } break;
224 default:
225 std::abort();
226 }
227
228 break;
229 case Supervised:
230 if (!TheFunctionTypeType) {
231 HMM HiddenMarkovModelTagger_(TheFlags);
232 s_FILE_Tagger(HiddenMarkovModelTagger_);
233 break;
234 }
235
236 switch (*TheFunctionTypeType) {
237 case Unigram: {
238 UnigramTagger UnigramTagger_(TheFlags);
239 switch (*TheUnigramType) {
240 case Stream_5_3_1:
241 UnigramTagger_.setModel(UnigramTaggerModel1);
242 break;
243 case Stream_5_3_2:
244 UnigramTagger_.setModel(UnigramTaggerModel2);
245 break;
246 case Stream_5_3_3:
247 UnigramTagger_.setModel(UnigramTaggerModel3);
248 break;
249 default:
250 std::abort();
251 }
252 s_StreamTaggerTrainer(UnigramTagger_);
253 } break;
254 case SlidingWindow: {
255 std::stringstream what_;
256 what_ << "invalid option -- 'w'";
257 throw Exception::apertium_tagger::InvalidOption(what_);
258 } break;
259 case Perceptron: {
260 PerceptronTagger perceptron(TheFlags);
261 s_StreamTaggerTrainer(perceptron);
262 } break;
263 default:
264 std::abort();
265 }
266
267 break;
268 case Train:
269 if (!TheFunctionTypeType) {
7
Taking true branch
270 HMM HiddenMarkovModelTagger_(TheFlags);
271 t_FILE_Tagger(HiddenMarkovModelTagger_);
8
Calling 'apertium_tagger::t_FILE_Tagger'
272 break;
273 }
274
275 switch (*TheFunctionTypeType) {
276 case Unigram: {
277 std::stringstream what_;
278 what_ << "invalid option -- 'u'";
279 throw Exception::apertium_tagger::InvalidOption(what_);
280 }
281 case SlidingWindow: {
282 LSWPoST SlidingWindowTagger_(TheFlags);
283 t_FILE_Tagger(SlidingWindowTagger_);
284 } break;
285 default:
286 std::abort();
287 }
288
289 break;
290 default:
291 std::abort();
292 }
293 } catch (const ExceptionType &ExceptionType_) {
294 std::cerr << "apertium-tagger: " << ExceptionType_.what() << std::endl;
295 throw Exception::apertium_tagger::err_Exception("");
296 }
297}
298
299apertium_tagger::~apertium_tagger() {}
300
301void apertium_tagger::help() {
302
303 std::cerr <<
304"Usage: apertium-tagger [OPTION]... -g SERIALISED_TAGGER \\\n"
305" [INPUT \\\n"
306" [OUTPUT]]\n"
307"\n"
308" or: apertium-tagger [OPTION]... -r ITERATIONS \\\n"
309" CORPUS \\\n"
310" SERIALISED_TAGGER\n"
311"\n"
312" or: apertium-tagger [OPTION]... -s ITERATIONS \\\n"
313" DICTIONARY \\\n"
314" CORPUS \\\n"
315" TAGGER_SPECIFICATION \\\n"
316" SERIALISED_TAGGER \\\n"
317" TAGGED_CORPUS \\\n"
318" UNTAGGED_CORPUS\n"
319"\n"
320" or: apertium-tagger [OPTION]... -s 0 \\\n"
321" DICTIONARY \\\n"
322" TAGGER_SPECIFICATION \\\n"
323" SERIALISED_TAGGER \\\n"
324" TAGGED_CORPUS \\\n"
325" UNTAGGED_CORPUS\n"
326"\n"
327" or: apertium-tagger [OPTION]... -s 0 \\\n"
328" -u MODEL \\\n"
329" SERIALISED_TAGGER \\\n"
330" TAGGED_CORPUS\n"
331"\n"
332" or: apertium-tagger [OPTION]... -t ITERATIONS \\\n"
333" DICTIONARY \\\n"
334" CORPUS \\\n"
335" TAGGER_SPECIFICATION \\\n"
336" SERIALISED_TAGGER\n"
337"\n"
338"Mandatory arguments to long options are mandatory for short options too.\n"
339"\n";
340
341 std::vector<std::pair<std::string, std::string> > options_description_;
342 options_description_.push_back(std::make_pair("-d, --debug", "with -g, print error messages about the input"));
343 options_description_.push_back(std::make_pair("-f, --first", "with -g, reorder each lexical unit's analyses so that the chosen one is first"));
344 options_description_.push_back(std::make_pair("-m, --mark", "with -g, mark disambiguated lexical units"));
345 options_description_.push_back(std::make_pair("-p, --show-superficial", "with -g, output each lexical unit's surface form"));
346 options_description_.push_back(std::make_pair("-z, --null-flush", "with -g, flush the output after getting each null character"));
347 align::align_(options_description_);
348 std::cerr << '\n';
349 options_description_.clear();
350 options_description_.push_back(std::make_pair("-u, --unigram=MODEL", "use unigram algorithm MODEL from <https://coltekin.net/cagri/papers/trmorph-tools.pdf>"));
351 align::align_(options_description_);
352 std::cerr << '\n';
353 options_description_.clear();
354 options_description_.push_back(std::make_pair("-w, --sliding-window", "use the Light Sliding Window algorithm"));
355 options_description_.push_back(std::make_pair("-x, --perceptron", "use the averaged perceptron algorithm"));
356 options_description_.push_back(std::make_pair("-e, --skip-on-error", "with -xs, ignore certain types of errors with the training corpus"));
357 align::align_(options_description_);
358 std::cerr << '\n';
359 options_description_.clear();
360 options_description_.push_back(std::make_pair("-g, --tagger", "disambiguate the input"));
361 align::align_(options_description_);
362 std::cerr << '\n';
363 options_description_.clear();
364 options_description_.push_back(std::make_pair("-r, --retrain=ITERATIONS", "with -u: exit;\notherwise: retrain the tagger with ITERATIONS unsupervised iterations"));
365 options_description_.push_back(std::make_pair("-s, --supervised=ITERATIONS", "with -u: train the tagger with a hand-tagged corpus;\nwith -w: exit;\notherwise: initialise the tagger with a hand-tagged corpus and retrain it with ITERATIONS unsupervised iterations"));
366 options_description_.push_back(std::make_pair("-t, --train=ITERATIONS", "with -u: exit;\notherwise: train the tagger with ITERATIONS unsupervised iterations"));
367 align::align_(options_description_);
368 std::cerr << '\n';
369 options_description_.clear();
370 options_description_.push_back(std::make_pair("-h, --help", "display this help and exit"));
371 align::align_(options_description_);
372}
373
374const struct option apertium_tagger::longopts[] = {
375 {"help", no_argument0, 0, 'h'},
376 {"sent-seg", no_argument0, 0, 'b'},
377 {"debug", no_argument0, 0, 'd'},
378 {"skip-on-error", no_argument0, 0, 'e'},
379 {"first", no_argument0, 0, 'f'},
380 {"mark", no_argument0, 0, 'm'},
381 {"show-superficial", no_argument0, 0, 'p'},
382 {"null-flush", no_argument0, 0, 'z'},
383 {"unigram", required_argument1, 0, 'u'},
384 {"sliding-window", no_argument0, 0, 'w'},
385 {"perceptron", no_argument0, 0, 'x'},
386 {"tagger", no_argument0, 0, 'g'},
387 {"retrain", required_argument1, 0, 'r'},
388 {"supervised", required_argument1, 0, 's'},
389 {"train", required_argument1, 0, 't'},
390 {0, 0, 0, 0}};
391
392/** Utilities */
393
394std::string apertium_tagger::option_string(const int &indexptr_) {
395 return option_string(longopts[indexptr_]);
396}
397
398std::string apertium_tagger::option_string(const struct option &option_) {
399 std::stringstream option_string_;
400 option_string_ << "--" << option_.name;
401 return option_string_.str();
402}
403
404void apertium_tagger::locale_global_() {
405
406#if defined __clang__1
407
408 std::locale::global(std::locale(""));
409
410#else
411#if defined __APPLE__
412
413 LtLocale::tryToSetLocale();
414
415#else
416
417 std::locale::global(std::locale(""));
418
419#endif // defined __APPLE__
420#endif // defined __clang__
421}
422
423void apertium_tagger::set_indexptr() {
424 if (The_val == longopts[The_indexptr].val)
425 return;
426
427 for (std::size_t longopts_Index = 0; longopts[longopts_Index].val != 0;
428 ++longopts_Index) {
429 if (The_val == longopts[longopts_Index].val) {
430 The_indexptr = longopts_Index;
431 return;
432 }
433 }
434}
435
436void apertium_tagger::flagOptionCase(
437 bool (TaggerFlags::*GetFlag)(),
438 void (TaggerFlags::*SetFlag)(const bool &)) {
439 if ((TheFlags.*GetFlag)()) {
440 std::stringstream what_;
441 what_ << "unexpected '" << option_string() << "' following '"
442 << option_string() << '\'';
443 throw Exception::apertium_tagger::UnexpectedFlagOption(what_);
444 }
445
446 (TheFlags.*SetFlag)(true);
447}
448
449std::string apertium_tagger::option_string() {
450 return option_string(The_indexptr);
451}
452
453void apertium_tagger::functionTypeTypeOptionCase(
454 const FunctionTypeType &FunctionTypeType_) {
455 if (FunctionTypeTypeOption_indexptr) {
456 std::stringstream what_;
457 what_ << "unexpected '" << option_string() << "' following '"
458 << option_string(*FunctionTypeTypeOption_indexptr)
459 << '\'';
460 throw Exception::apertium_tagger::UnexpectedFunctionTypeTypeOption(what_);
461 }
462
463 TheFunctionTypeType = FunctionTypeType_;
464 FunctionTypeTypeOption_indexptr = The_indexptr;
465}
466
467void apertium_tagger::functionTypeOptionCase(
468 const FunctionType &FunctionType_) {
469 if (FunctionTypeOption_indexptr) {
470 std::stringstream what_;
471 what_ << "unexpected '" << option_string() << "' following '"
472 << option_string(*FunctionTypeOption_indexptr)
473 << '\'';
474 throw Exception::apertium_tagger::UnexpectedFunctionTypeOption(what_);
475 }
476
477 TheFunctionType = FunctionType_;
478 FunctionTypeOption_indexptr = The_indexptr;
479}
480
481void apertium_tagger::getIterationsArgument() {
482 try {
483 TheFunctionTypeOptionArgument = optarg_unsigned_long("ITERATIONS");
484 } catch (const ExceptionType &ExceptionType_) {
485 std::stringstream what_;
486 what_ << "invalid argument '" << optarg << "' for '" << option_string()
487 << '\'';
488 throw Exception::apertium_tagger::InvalidArgument(what_);
489 }
490}
491
492static unsigned long parse_unsigned_long(const char *metavar, const char *val) {
493 char *str_end;
494 errno(*__errno_location ()) = 0;
495 unsigned long N_0 = std::strtoul(val, &str_end, 10);
496
497 if (*str_end != '\0') {
498 std::stringstream what_;
499 what_ << "can't convert " << metavar << " \"" << val << "\" to unsigned long";
500 throw Exception::apertium_tagger::str_end_not_eq_NULL(what_);
501 }
502
503 if (*val == '\0') {
504 std::stringstream what_;
505 what_ << "can't convert " << metavar << " of size 1 \"\" to unsigned long";
506 throw Exception::apertium_tagger::optarg_eq_NULL(what_);
507 }
508
509 if (errno(*__errno_location ()) == ERANGE34) {
510 std::stringstream what_;
511 what_ << "can't convert " << metavar << " \"" << val
512 << "\" to unsigned long, not in unsigned long range";
513 throw Exception::apertium_tagger::ERANGE_(what_);
514 }
515
516 return N_0;
517}
518
519unsigned long apertium_tagger::optarg_unsigned_long(const char *metavar) {
520 return parse_unsigned_long(metavar, optarg);
521}
522
523void apertium_tagger::get_file_arguments(
524 bool get_crp_fn,
525 char **DicFn, char **CrpFn,
526 char **TaggedFn, char **UntaggedFn,
527 char **TsxFn, char **ProbFn) {
528 if (*TheFunctionType != Retrain) {
11
Assuming the condition is false
12
Taking false branch
529 *DicFn = argv[optind++];
530 }
531 if (get_crp_fn
12.1
'get_crp_fn' is false
) {
13
Taking false branch
532 *CrpFn = argv[optind++];
533 }
534 if (*TheFunctionType == Supervised) {
14
Taking false branch
535 *TsxFn = argv[optind++];
536 *ProbFn = argv[optind++];
537 *TaggedFn = argv[optind++];
538 }
539 *UntaggedFn = argv[optind++];
540 if (*TheFunctionType == Supervised && !get_crp_fn) {
541 *CrpFn = *UntaggedFn;
542 }
543 if (*TheFunctionType != Supervised) {
15
Taking true branch
544 if (*TheFunctionType != Retrain) {
16
Taking false branch
545 *TsxFn = argv[optind++];
546 }
547 *ProbFn = argv[optind++];
548 }
549}
17
Returning without writing to '*TsxFn'
550
551void apertium_tagger::init_FILE_Tagger(FILE_Tagger &FILE_Tagger_, string const &TsxFn) {
552 FILE_Tagger_.deserialise(TsxFn);
553 TaggerWord::setArrayTags(FILE_Tagger_.getArrayTags());
554}
555
556MorphoStream* apertium_tagger::setup_untagged_morpho_stream(
557 FILE_Tagger &FILE_Tagger_,
558 char *DicFn, char *UntaggedFn,
559 UFILE* *UntaggedCorpus) {
560 *UntaggedCorpus = try_open_file_utf8("UNTAGGED_CORPUS", UntaggedFn, "r");
561
562 FILE_Tagger_.read_dictionary(DicFn);
563
564 return new FileMorphoStream(UntaggedFn, true, &FILE_Tagger_.get_tagger_data());
565}
566
567/** Implementation of flags/subcommands */
568
569void apertium_tagger::g_StreamTagger(StreamTagger &StreamTagger_) {
570 locale_global_();
571
572 expect_file_arguments(nonoptarg, 1, 4);
573
574 std::ifstream SerialisedAnalysisFrequencies;
575 try_open_fstream("SERIALISED_TAGGER", argv[optind],
576 SerialisedAnalysisFrequencies);
577
578 try {
579 StreamTagger_.deserialise(SerialisedAnalysisFrequencies);
580 } catch (const ExceptionType &ExceptionType_) {
581 std::stringstream what_;
582 what_ << "can't deserialise SERIALISED_TAGGER file \"" << argv[optind]
583 << "\" Reason: " << ExceptionType_.what();
584 throw Exception::apertium_tagger::deserialise(what_);
585 }
586
587 if (nonoptarg < 2) {
588 Stream Input(TheFlags);
589 StreamTagger_.tag(Input, std::cout);
590 return;
591 }
592
593 Stream Input(TheFlags, argv[optind + 1]);
594
595 if (nonoptarg < 3) {
596 StreamTagger_.tag(Input, std::cout);
597 return;
598 }
599
600 std::ofstream Output_stream;
601 try_open_fstream("OUTPUT", argv[optind + 2], Output_stream);
602
603 StreamTagger_.tag(Input, Output_stream);
604}
605
606void apertium_tagger::s_StreamTaggerTrainer(
607 StreamTagger &StreamTaggerTrainer_) {
608 locale_global_();
609
610 if (TheFunctionTypeOptionArgument != 0 && *TheFunctionTypeType != Perceptron) {
611 std::stringstream what_;
612 what_ << "invalid argument '" << TheFunctionTypeOptionArgument
613 << "' for '--supervised'";
614 throw Exception::apertium_tagger::InvalidArgument(what_);
615 }
616
617 if (*TheFunctionTypeType == Perceptron) {
618 expect_file_arguments(nonoptarg, 4);
619 } else {
620 expect_file_arguments(nonoptarg, 2);
621 }
622
623 Stream TaggedCorpus(TheFlags, argv[optind + 1]);
624
625 if (*TheFunctionTypeType == Perceptron) {
626 Stream UntaggedCorpus(TheFlags, argv[optind + 2]);
627
628 PerceptronTagger &pt = dynamic_cast<PerceptronTagger&>(StreamTaggerTrainer_);
629 pt.read_spec(argv[optind + 3]);
630 pt.train(TaggedCorpus, UntaggedCorpus, TheFunctionTypeOptionArgument);
631 } else {
632 StreamTaggerTrainer_.train(TaggedCorpus);
633 }
634
635 std::ofstream Serialised_basic_Tagger;
636 try_open_fstream("SERIALISED_TAGGER", argv[optind],
637 Serialised_basic_Tagger);
638
639 StreamTaggerTrainer_.serialise(Serialised_basic_Tagger);
640}
641
642void apertium_tagger::g_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
643 LtLocale::tryToSetLocale();
644 expect_file_arguments(nonoptarg, 1, 4);
645
646 FILE *Serialised_FILE_Tagger =
647 try_open_file("SERIALISED_TAGGER", argv[optind], "rb");
648 FILE_Tagger_.deserialise(Serialised_FILE_Tagger);
649 try_close_file("SERIALISED_TAGGER", argv[optind], Serialised_FILE_Tagger);
650 TaggerWord::setArrayTags(FILE_Tagger_.getArrayTags());
651 TaggerWord::generate_marks = TheFlags.getMark();
652 const char* infile = NULL__null;
653 UFILE* Output = u_finitu_finit_72(stdoutstdout, NULL__null, NULL__null);
654 if (nonoptarg >= 2) {
655 infile = argv[optind + 1];
656 if (nonoptarg >= 3) {
657 Output = try_open_file_utf8("OUTPUT", argv[optind + 2], "w");
658 }
659 }
660 FILE_Tagger_.tagger(infile, Output);
661 u_fcloseu_fclose_72(Output);
662}
663
664void apertium_tagger::r_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
665 LtLocale::tryToSetLocale();
666
667 expect_file_arguments(nonoptarg, 2);
668
669 char *ProbFn, *UntaggedFn;
670
671 get_file_arguments(
672 false,
673 NULL__null, NULL__null, NULL__null, &UntaggedFn,
674 NULL__null, &ProbFn);
675
676 FILE *Serialised_FILE_Tagger =
677 try_open_file("SERIALISED_TAGGER", ProbFn, "rb");
678 FILE_Tagger_.deserialise(Serialised_FILE_Tagger);
679 try_close_file("SERIALISED_TAGGER", ProbFn, Serialised_FILE_Tagger);
680
681 TaggerWord::setArrayTags(FILE_Tagger_.getArrayTags());
682
683 UFILE* UntaggedCorpus;
684 MorphoStream* ms = setup_untagged_morpho_stream(
685 FILE_Tagger_,
686 NULL__null, UntaggedFn,
687 &UntaggedCorpus);
688
689 FILE_Tagger_.train(*ms, TheFunctionTypeOptionArgument);
690 delete ms;
691 u_fcloseu_fclose_72(UntaggedCorpus);
692
693 Serialised_FILE_Tagger =
694 try_open_file("SERIALISED_TAGGER", ProbFn, "wb");
695 FILE_Tagger_.serialise(Serialised_FILE_Tagger);
696 try_close_file("SERIALISED_TAGGER", ProbFn, Serialised_FILE_Tagger);
697}
698
699void apertium_tagger::s_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
700 LtLocale::tryToSetLocale();
701
702 if (TheFunctionTypeOptionArgument == 0) {
703 expect_file_arguments(nonoptarg, 5, 7);
704 } else {
705 expect_file_arguments(nonoptarg, 6);
706 }
707 char *DicFn, *CrpFn, *TsxFn, *ProbFn, *TaggedFn, *UntaggedFn;
708 bool do_unsup = nonoptarg == 6;
709
710 get_file_arguments(
711 do_unsup,
712 &DicFn, &CrpFn, &TaggedFn, &UntaggedFn,
713 &TsxFn, &ProbFn);
714 init_FILE_Tagger(FILE_Tagger_, TsxFn);
715
716 UFILE* UntaggedCorpus;
717 MorphoStream* ms = setup_untagged_morpho_stream(
718 FILE_Tagger_,
719 DicFn, UntaggedFn,
720 &UntaggedCorpus);
721 FileMorphoStream tms(TaggedFn, true, &FILE_Tagger_.get_tagger_data());
722
723 FILE_Tagger_.init_probabilities_from_tagged_text_(tms, *ms);
724 delete ms;
725 u_fcloseu_fclose_72(UntaggedCorpus);
726
727 if (do_unsup) {
728 FILE_Tagger_.train(CrpFn, TheFunctionTypeOptionArgument);
729 }
730
731 FILE *Serialised_FILE_Tagger =
732 try_open_file("SERIALISED_TAGGER", ProbFn, "wb");
733 FILE_Tagger_.serialise(Serialised_FILE_Tagger);
734 try_close_file("SERIALISED_TAGGER", ProbFn, Serialised_FILE_Tagger);
735}
736
737void apertium_tagger::t_FILE_Tagger(FILE_Tagger &FILE_Tagger_) {
738 LtLocale::tryToSetLocale();
739
740 expect_file_arguments(nonoptarg, 4);
741
742 char *DicFn, *TsxFn, *ProbFn, *UntaggedFn;
9
'TsxFn' declared without an initial value
743 UntaggedFn = NULL__null;
744
745 get_file_arguments(
10
Calling 'apertium_tagger::get_file_arguments'
18
Returning from 'apertium_tagger::get_file_arguments'
746 false,
747 &DicFn, NULL__null, NULL__null, &UntaggedFn,
748 &TsxFn, &ProbFn);
749 init_FILE_Tagger(FILE_Tagger_, TsxFn);
19
1st function call argument is an uninitialized value
750
751 UFILE* UntaggedCorpus;
752 MorphoStream* ms = setup_untagged_morpho_stream(
753 FILE_Tagger_,
754 DicFn, UntaggedFn,
755 &UntaggedCorpus);
756
757 FILE_Tagger_.init_and_train(*ms, TheFunctionTypeOptionArgument);
758 delete ms;
759 u_fcloseu_fclose_72(UntaggedCorpus);
760
761 FILE *Serialised_FILE_Tagger =
762 try_open_file("SERIALISED_TAGGER", ProbFn, "wb");
763 FILE_Tagger_.serialise(Serialised_FILE_Tagger);
764 try_close_file("SERIALISED_TAGGER", ProbFn, Serialised_FILE_Tagger);
765
766}
767}