Bug Summary

File:tmx_aligner_tool.cc
Warning:line 276, column 7
Value stored to 'globalQuality' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name tmx_aligner_tool.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -resource-dir /usr/lib/llvm-16/lib/clang/16 -D HAVE_CONFIG_H -I . -I .. -I /usr/include/utf8cpp/ -I /usr/local/include -I /usr/include/libxml2 -I /usr/local/include -D PIC -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -std=c++2b -fdeprecated-macro -fdebug-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -ferror-limit 19 -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/apertium/scan-build/2024-09-11-155328-205384-1 -x c++ tmx_aligner_tool.cc
1/*************************************************************************
2* *
3* (C) Copyright 2004. Media Research Centre at the *
4* Sociology and Communications Department of the *
5* Budapest University of Technology and Economics. *
6* *
7* Developed by Daniel Varga. *
8* *
9* From hunalign; for license see ../AUTHORS and ../COPYING.hunalign *
10* *
11*************************************************************************/
12#include <apertium/tmx_aligner_tool.h>
13#include <lttoolbox/string_utils.h>
14
15namespace TMXAligner
16{
17
18extern std::string hunglishDictionaryHome;
19extern std::string hunglishExperimentsHome;
20
21void readTrailOrBisentenceList( std::istream& is, Trail& trail )
22{
23 trail.clear();
24 while ( is.peek() != -1 )
25 {
26 int huPos, enPos;
27
28 is >> huPos;
29 if (is.peek()!=' ')
30 {
31 std::cerr << "no space in line" << std::endl;
32 throw "data error";
33 }
34 is.ignore();
35
36 is >> enPos;
37 if (is.peek()!='\n')
38 {
39 std::cerr << "too much data in line" << std::endl;
40 throw "data error";
41 }
42 is.ignore();
43
44 trail.push_back(std::make_pair(huPos,enPos));
45 }
46}
47
48void scoreBisentenceListByFile( const BisentenceList& bisentenceList, const std::string& handAlignFile )
49{
50 Trail trailHand;
51 std::ifstream is( handAlignFile.c_str() );
52 readTrailOrBisentenceList( is, trailHand );
53
54 scoreBisentenceList( bisentenceList, trailHand );
55}
56
57void scoreTrailByFile( const Trail& bestTrail, const std::string& handAlignFile )
58{
59 Trail trailHand;
60 std::ifstream is( handAlignFile.c_str() );
61 readTrailOrBisentenceList( is, trailHand );
62
63 scoreTrail( bestTrail, trailHand );
64}
65
66// TEMP TEMP
67void logLexiconCoverageOfBicorpus( SentenceList& huSentenceList, SentenceList& enSentenceList,
68 const DictionaryItems& dictionaryItems );
69
70
71// The <p> scores should not be counted. This causes some complications.
72// Otherwise, this is just the average score of segments.
73// Currently this does not like segment lengths of more than two.
74double globalScoreOfTrail( const Trail& trail, const AlignMatrix& dynMatrix,
75 const SentenceList& huSentenceListGarbled, const SentenceList& enSentenceListGarbled )
76{
77 TrailScoresInterval trailScoresInterval( trail, dynMatrix, huSentenceListGarbled, enSentenceListGarbled );
78
79 return trailScoresInterval(0,trail.size()-1);
80}
81
82
83void collectBisentences( const Trail& bestTrail, const AlignMatrix& dynMatrix,
84 const SentenceList& huSentenceListPretty, const SentenceList& enSentenceListPretty,
85 SentenceList& huBisentences, SentenceList& enBisentences,
86 double qualityThreshold )
87{
88 huBisentences.clear();
89 enBisentences.clear();
90
91 BisentenceList bisentenceList;
92
93 TrailScores trailScores( bestTrail, dynMatrix );
94 trailToBisentenceList( bestTrail, trailScores, qualityThreshold, bisentenceList );
95
96 for (size_t i=0; i<bisentenceList.size(); ++i )
97 {
98 huBisentences.push_back( huSentenceListPretty[ bisentenceList[i].first ] );
99 enBisentences.push_back( enSentenceListPretty[ bisentenceList[i].second ] );
100 }
101
102// std::cerr << huBisentences.size() << " bisentences collected." << std::endl;
103
104}
105
106
107void temporaryDumpOfAlignMatrix( std::ostream& os, const AlignMatrix& alignMatrix )
108{
109 for ( int huPos=0; huPos<alignMatrix.size(); ++huPos )
110 {
111 int rowStart = alignMatrix.rowStart(huPos);
112 int rowEnd = alignMatrix.rowEnd(huPos);
113 for ( int enPos=rowStart; enPos<rowEnd; ++enPos )
114 {
115 bool numeric = true;
116 if (numeric)
117 {
118 os << alignMatrix[huPos][enPos] << "\t" ;
119 }
120 else
121 {
122 if (alignMatrix[huPos][enPos]<0)
123 {
124 os << ". " ;
125 }
126 else if (alignMatrix[huPos][enPos]<10)
127 {
128 os << alignMatrix[huPos][enPos] << " " ;
129 }
130 else
131 {
132 os << "X " ;
133 }
134 }
135 }
136 os << std::endl;
137 }
138}
139
140
141double alignerToolWithObjects( const DictionaryItems& dictionary,
142 SentenceList& huSentenceListPretty,
143 SentenceList& enSentenceList,
144 const AlignParameters& alignParameters,
145 std::ostream& os )
146{
147 int huBookSize = huSentenceListPretty.size();
148 int enBookSize = enSentenceList.size();
149
150 SentenceValues huLength,enLength;
151 setSentenceValues( huSentenceListPretty, huLength, alignParameters.utfCharCountingMode ); // Here we use the most originalest Hungarian text.
152 setSentenceValues( enSentenceList, enLength, alignParameters.utfCharCountingMode );
153
154 bool quasiglobal_stopwordRemoval = false;
155// std::cerr << "quasiglobal_stopwordRemoval is set to " << quasiglobal_stopwordRemoval << std::endl;
156 if (quasiglobal_stopwordRemoval)
157 {
158 removeStopwords( huSentenceListPretty, enSentenceList );
159// std::cerr << "Stopwords removed." << std::endl;
160 }
161
162 SentenceList huSentenceListGarbled, enSentenceListGarbled;
163
164 normalizeTextsForIdentity( dictionary,
165 huSentenceListPretty, enSentenceList,
166 huSentenceListGarbled, enSentenceListGarbled );
167
168 const int minimalThickness = 500;
169
170 const double quasiglobal_maximalSizeInMegabytes = 4000;
171
172 const int maximalThickness = (int) (
173 quasiglobal_maximalSizeInMegabytes
174 * 1024*1024 /*bytes*/
175 / ( 2*sizeof(double)+sizeof(char) ) /* for the similarity, dynprog and trelli matrices */
176 / (double)( huBookSize ) /* the memory consumption of alignMatrix( huBookSize, enBookSize, thickness ) is huBookSize*thickness. */
177 / 2.4 /* unexplained empirically observed factor. linux top is weird. :) */
178 ) ;
179
180 // Note that thickness is not a radius, it's a diameter.
181 const double thicknessRatio = 10.0;
182
183 int thickness = (int) ( (double)( huBookSize>enBookSize ? huBookSize : enBookSize ) / thicknessRatio ) ;
184
185 thickness = ( thickness>minimalThickness ? thickness : minimalThickness ) ;
186
187 if (thickness>maximalThickness)
188 {
189// std::cerr << "WARNING: Downgrading planned thickness " << thickness << " to " << maximalThickness ;
190// std::cerr << " to obey memory constraint of " << quasiglobal_maximalSizeInMegabytes << " megabytes " << std::endl;
191// std::cerr << "You should recompile if you have much more physical RAM than that. People of the near-future, forgive me for the inconvenience." << std::endl;
192
193 thickness = maximalThickness;
194 }
195
196 AlignMatrix similarityMatrix( huBookSize, enBookSize, thickness, outsideOfRadiusValue );
197
198 sentenceListsToAlignMatrixIdentity( huSentenceListGarbled, enSentenceListGarbled, similarityMatrix );
199// std::cerr << std::endl;
200// std::cerr << "Rough translation-based similarity matrix ready." << std::endl;
201
202 Trail bestTrail;
203 AlignMatrix dynMatrix( huBookSize+1, enBookSize+1, thickness, 1e30 );
204
205 align( similarityMatrix, huLength, enLength, bestTrail, dynMatrix );
206// std::cerr << "Align ready." << std::endl;
207
208 double globalQuality;
209 globalQuality = globalScoreOfTrail( bestTrail, dynMatrix,
210 huSentenceListGarbled, enSentenceListGarbled );
211
212 // std::cerr << "Global quality of unfiltered align " << globalQuality << std::endl;
213
214 if (alignParameters.realignType==AlignParameters::NoRealign)
215 {
216 }
217 else
218 {
219 AlignMatrix similarityMatrixDetailed( huBookSize, enBookSize, thickness, outsideOfRadiusValue );
220
221 bool success = borderDetailedAlignMatrix( similarityMatrixDetailed, bestTrail, 5/*radius*/ );
222
223 if (!success)
224 {
225// std::cerr << "Realign zone too close to quasidiagonal border. Abandoning realign. The align itself is suspicious." << std::endl;
226 }
227 else
228 {
229// std::cerr << "Border of realign zone determined." << std::endl;
230
231 switch (alignParameters.realignType)
232 {
233 case AlignParameters::ModelOneRealign:
234 {
235 IBMModelOne modelOne;
236
237 SentenceList huBisentences,enBisentences;
238
239 throw "unimplemented";
240// std::cerr << "Plausible bisentences filtered." << std::endl;
241
242 modelOne.build(huBisentences,enBisentences);
243// std::cerr << "IBM Model I ready." << std::endl;
244
245 sentenceListsToAlignMatrixIBMModelOne( huSentenceListPretty, enSentenceList, modelOne, similarityMatrixDetailed );
246// std::cerr << "IBM Model I based similarity matrix ready." << std::endl;
247 break;
248 }
249 case AlignParameters::FineTranslationRealign:
250 {
251 TransLex transLex;
252 transLex.build(dictionary);
253// std::cerr << "Hashtable for dictionary ready." << std::endl;
254
255 sentenceListsToAlignMatrixTranslation( huSentenceListPretty, enSentenceList, transLex, similarityMatrixDetailed );
256
257// std::cerr << "Fine translation-based similarity matrix ready." << std::endl;
258 break;
259 }
260
261 case AlignParameters::NoRealign:
262 default:
263 {
264 break;
265 }
266 }
267
268 Trail bestTrailDetailed;
269 AlignMatrix dynMatrixDetailed( huBookSize+1, enBookSize+1, thickness, 1e30 );
270 align( similarityMatrixDetailed, huLength, enLength, bestTrailDetailed, dynMatrixDetailed );
271// std::cerr << "Detail realign ready." << std::endl;
272
273 bestTrail = bestTrailDetailed;
274 dynMatrix = dynMatrixDetailed;
275
276 globalQuality = globalScoreOfTrail( bestTrail, dynMatrix,
Value stored to 'globalQuality' is never read
277 huSentenceListGarbled, enSentenceListGarbled );
278
279 // std::cerr << "Global quality of unfiltered align after realign " << globalQuality << std::endl;
280 }
281 }
282
283 TrailScoresInterval trailScoresInterval( bestTrail, dynMatrix, huSentenceListGarbled, enSentenceListGarbled );
284
285 if ( alignParameters.postprocessTrailQualityThreshold != -1 )
286 {
287 postprocessTrail( bestTrail, trailScoresInterval, alignParameters.postprocessTrailQualityThreshold );
288// std::cerr << "Trail start and end postprocessed by score." << std::endl;
289 }
290
291 if ( alignParameters.postprocessTrailStartAndEndQualityThreshold != -1 )
292 {
293 postprocessTrailStartAndEnd( bestTrail, trailScoresInterval, alignParameters.postprocessTrailStartAndEndQualityThreshold );
294// std::cerr << "Trail start and end postprocessed by score." << std::endl;
295 }
296
297 if ( alignParameters.postprocessTrailByTopologyQualityThreshold != -1 )
298 {
299 postprocessTrailByTopology( bestTrail, alignParameters.postprocessTrailByTopologyQualityThreshold );
300// std::cerr << "Trail postprocessed by topology." << std::endl;
301 }
302
303 bool quasiglobal_spaceOutBySentenceLength = true;
304// std::cerr << "quasiglobal_spaceOutBySentenceLength is set to " << quasiglobal_spaceOutBySentenceLength << std::endl;
305 if (quasiglobal_spaceOutBySentenceLength)
306 {
307 spaceOutBySentenceLength( bestTrail, huSentenceListPretty, enSentenceList, alignParameters.utfCharCountingMode );
308// std::cerr << "Trail spaced out by sentence length." << std::endl;
309 }
310
311 // In cautious mode, auto-aligned rundles are thrown away if
312 // their left or right neighbour holes are not one-to-one.
313 if (alignParameters.cautiousMode)
314 {
315 cautiouslyFilterTrail( bestTrail );
316// std::cerr << "Trail filtered by topology." << std::endl;
317 }
318
319 globalQuality = globalScoreOfTrail( bestTrail, dynMatrix,
320 huSentenceListGarbled, enSentenceListGarbled );
321
322 // std::cerr << "Global quality of unfiltered align after realign " << globalQuality << std::endl;
323
324 bool textual = ! alignParameters.justSentenceIds ;
325
326 if (alignParameters.justBisentences)
327 {
328 BisentenceList bisentenceList;
329 trailToBisentenceList( bestTrail, bisentenceList );
330
331 filterBisentenceListByQuality( bisentenceList, dynMatrix, alignParameters.qualityThreshold );
332
333 BisentenceListScores bisentenceListScores(bisentenceList, dynMatrix);
334
335 for ( size_t i=0; i<bisentenceList.size(); ++i )
336 {
337 int huPos = bisentenceList[i].first;
338 int enPos = bisentenceList[i].second;
339
340 if (textual)
341 {
342 os << huSentenceListPretty[huPos].words;
343 }
344 else
345 {
346 os << huPos ;
347 }
348
349 os << "\t" ;
350
351 if (textual)
352 {
353 os << enSentenceList[enPos].words;
354 }
355 else
356 {
357 os << enPos ;
358 }
359
360 os << "\t" << bisentenceListScores(i);
361
362 os << std::endl;
363 }
364
365 if (! alignParameters.handAlignFilename.empty())
366 {
367 scoreBisentenceListByFile( bisentenceList, alignParameters.handAlignFilename );
368 }
369 }
370 else
371 {
372 filterTrailByQuality( bestTrail, trailScoresInterval, alignParameters.qualityThreshold );
373
374 for ( size_t i=0; i<bestTrail.size()-1; ++i )
375 {
376 // The [huPos, nexthuPos) interval corresponds to the [enPos, nextenPos) interval.
377 int huPos = bestTrail[i].first;
378 int enPos = bestTrail[i].second;
379 int nexthuPos = bestTrail[i+1].first;
380 int nextenPos = bestTrail[i+1].second;
381
382 if (textual)
383 {
384 int j;
385 for ( j=huPos; j<nexthuPos; ++j )
386 {
387 os << huSentenceListPretty[j].words;
388
389 if (j+1<nexthuPos)
390 os << " "; // os << " ~~~ ";
391 }
392
393 os << "\t" ;
394
395 for ( j=enPos; j<nextenPos; ++j )
396 {
397 os << enSentenceList[j].words;
398 if (j+1<nextenPos)
399 {
400 os << " "; // os << " ~~~ ";
401 }
402 }
403 }
404 else // (!textual)
405 {
406 os << huPos << "\t" << enPos ;
407 }
408
409 os << "\t" << trailScoresInterval(i);
410
411 os << std::endl;
412 }
413
414 if (! alignParameters.handAlignFilename.empty())
415 {
416 scoreTrailByFile( bestTrail, alignParameters.handAlignFilename );
417 }
418 }
419
420 return globalQuality;
421}
422
423
424void alignerToolWithFilenames( const DictionaryItems& dictionary,
425 const std::string& huFilename, const std::string& enFilename,
426 const AlignParameters& alignParameters,
427 const std::string& outputFilename)
428{
429 std::ifstream hus(huFilename.c_str());
430 SentenceList huSentenceListPretty;
431 huSentenceListPretty.readNoIds( hus );
432// std::cerr << huSentenceListPretty.size() << " hungarian sentences read." << std::endl;
433
434 std::ifstream ens(enFilename.c_str());
435 SentenceList enSentenceList;
436 enSentenceList.readNoIds( ens );
437// std::cerr << enSentenceList.size() << " english sentences read." << std::endl;
438
439 if ( (enSentenceList. size() < huSentenceListPretty.size()/5) ||
440 (huSentenceListPretty.size() < enSentenceList. size()/5) )
441 {
442// std::cerr << "Sizes differing too much. Ignoring files to avoid a rare loop bug." << std::endl;
443 return;
444 }
445
446 if (outputFilename.empty())
447 {
448 /* double globalQuality = */alignerToolWithObjects
449 ( dictionary, huSentenceListPretty, enSentenceList, alignParameters, std::cout );
450
451// std::cerr << "Quality " << globalQuality << std::endl ;
452
453 }
454 else
455 {
456 std::ofstream os(outputFilename.c_str());
457 /*double globalQuality = */ alignerToolWithObjects
458 ( dictionary, huSentenceListPretty, enSentenceList, alignParameters, os );
459
460 // If you want to collect global quality information in batch mode, grep "^Quality" of stderr must do.
461// std::cerr << "Quality\t" << outputFilename << "\t" << globalQuality << std::endl ;
462 }
463
464}
465
466void fillPercentParameter( Arguments& args, const std::string& argName, double& value )
467{
468 int valueInt;
469 if ( args.getNumericParam(argName, valueInt))
470 {
471 value = 1.0 * valueInt / 100 ;
472 }
473}
474
475void main_alignerToolUsage()
476{
477 std::cerr << "Usage (either):\n\
478 alignerTool [ common_arguments ] [ -hand=hand_align_file ] dictionary_file source_text target_text\n\
479\n\
480or:\n\
481 alignerTool [ common_arguments ] -batch dictionary_file batch_file\n\
482\n\
483where\n\
484common_arguments ::= [ -text ] [ -bisent ] [ -utf ] [ -cautious ] [ -realign [ -autodict=filename ] ]\n\
485 [ -thresh=n ] [ -ppthresh=n ] [ -headerthresh=n ] [ -topothresh=n ]\n\
486\n\
487Arguments:\n\
488\n\
489-text\n\
490 The output should be in text format, rather than the default (numeric) ladder format.\n\
491\n\
492-bisent\n\
493 Only bisentences (one-to-one alignment segments) are printed. In non-text mode, their\n\
494 starting rung is printed.\n\
495\n\
496-cautious\n\
497 In -bisent mode, only bisentences for which both the preceding and the following\n\
498 segments are one-to-one are printed. In the default non-bisent mode, only rungs\n\
499 for which both the preceding and the following segments are one-to-one are printed.\n\
500\n\
501-hand=file\n\
502 When this argument is given, the precision and recall of the alignment is calculated\n\
503 based on the manually built ladder file. Information like the following is written\n\
504 on the standard error: \n\
505 53 misaligned out of 6446 correct items, 6035 bets.\n\
506 Precision: 0.991218, Recall: 0.928017\n\
507 \n\
508 Note that by default, 'item' means rung. The switch -bisent also changes the semantics\n\
509 of the scoring from rung-based to bisentence-based and in this case 'item' means bisentences.\n\
510 See File formats about the format of this input align file.\n\
511\n\
512-autodict=filename\n\
513 The dictionary built during realign is saved to this file. By default, it is not saved.\n\
514\n\
515-utf\n\
516 The system uses the character counts of the sentences as information for the\n\
517 pairing of sentences. By default, it assumes one-byte character encoding such\n\
518 as ISO Latin-1 when calculating these counts. If our text is in UTF-8 format,\n\
519 byte counts and character counts are different, and we must use the -utf switch\n\
520 to force the system to properly calculate character counts.\n\
521 Note: UTF-16 input is not supported.\n\
522\n\
523Postfiltering options:\n\
524There are various postprocessors which remove implausible rungs based on various heuristics.\n\
525\n\
526-thresh=n\n\
527 Don't print out segments with score lower than n/100.\n\
528\n\
529-ppthresh=n\n\
530 Filter rungs with less than n/100 average score in their vicinity.\n\
531\n\
532-headerthresh=n\n\
533 Filter all rungs at the start and end of texts until finding a reliably\n\
534 plausible region.\n\
535\n\
536-topothresh=n\n\
537 Filter rungs with less than n percent of one-to-one segments in their vicinity.\n\
538\n\
539";
540}
541
542int main_alignerTool(int argC, char* argV[])
543{
544#ifndef _DEBUG
545 try
546#endif
547 {
548 if (argC<4)
549 {
550 main_alignerToolUsage();
551 throw "";
552 }
553
554 Arguments args;
555 std::vector<const char*> remains;
556 args.read( argC, argV, remains );
557
558 AlignParameters alignParameters;
559
560 if (args.getSwitchCompact("text"))
561 {
562 alignParameters.justSentenceIds = false;
563 }
564
565 if (args.getSwitchCompact("bisent"))
566 {
567 alignParameters.justBisentences = true;
568 }
569
570 if (args.getSwitchCompact("cautious"))
571 {
572 alignParameters.cautiousMode = true;
573 }
574
575 alignParameters.utfCharCountingMode = args.getSwitchCompact("utf");
576
577 fillPercentParameter( args, "thresh", alignParameters.qualityThreshold );
578
579 fillPercentParameter( args, "ppthresh", alignParameters.postprocessTrailQualityThreshold );
580
581 fillPercentParameter( args, "headerthresh", alignParameters.postprocessTrailStartAndEndQualityThreshold );
582
583 fillPercentParameter( args, "topothresh", alignParameters.postprocessTrailByTopologyQualityThreshold );
584
585 bool batchMode = args.getSwitchCompact("batch") ;
586
587 if (batchMode && (remains.size()!=2) )
588 {
589 std::cerr << "Batch mode requires exactly two file arguments." << std::endl;
590 std::cerr << std::endl;
591
592 main_alignerToolUsage();
593 throw "argument error";
594 }
595
596 std::string handArgumentname = "hand";
597 if (args.find(handArgumentname)!=args.end())
598 {
599 if (batchMode)
600 {
601 std::cerr << "-batch and -" << handArgumentname << " are incompatible switches." << std::endl;
602 throw "argument error";
603 }
604 else
605 {
606 alignParameters.handAlignFilename = args[handArgumentname].dString ;
607 args.erase(handArgumentname);
608
609 if (alignParameters.handAlignFilename.empty())
610 {
611 std::cerr << "-" << handArgumentname << " switch requires a filename value." << std::endl;
612 throw "argument error";
613 }
614 }
615 }
616
617 std::string autoDictDumpArgumentname = "autodict";
618 if (args.find(autoDictDumpArgumentname)!=args.end())
619 {
620 if (batchMode)
621 {
622 std::cerr << "-batch and -" << autoDictDumpArgumentname << " are incompatible switches." << std::endl;
623 throw "argument error";
624 }
625 else
626 {
627 alignParameters.autoDictionaryDumpFilename = args[autoDictDumpArgumentname].dString ;
628 args.erase(autoDictDumpArgumentname);
629
630 if (alignParameters.autoDictionaryDumpFilename.empty())
631 {
632 std::cerr << "-" << autoDictDumpArgumentname << " switch requires a filename value." << std::endl;
633 throw "argument error";
634 }
635 }
636 }
637
638 if (!batchMode && (remains.size()!=3) )
639 {
640 std::cerr << "Nonbatch mode requires exactly three file arguments." << std::endl;
641 std::cerr << std::endl;
642
643 main_alignerToolUsage();
644 throw "argument error";
645 }
646
647 try
648 {
649 args.checkEmptyArgs();
650 }
651 catch (...)
652 {
653 std::cerr << std::endl;
654
655 main_alignerToolUsage();
656 throw "argument error";
657 }
658
659// std::cerr << "Reading dictionary..." << std::endl;
660 const char* dicFilename = remains[0] ;
661 DictionaryItems dictionary;
662 std::ifstream dis(dicFilename);
663 dictionary.read(dis);
664
665 if (batchMode)
666 {
667 const char* batchFilename = remains[1] ;
668 std::ifstream bis(batchFilename);
669
670 while (bis.good()&&!bis.eof())
671 {
672 std::string line;
673 std::getline(bis,line);
674
675 std::vector<std::string> words;
676 split( line, words, '\t' );
677
678 if (words.size()!=3)
679 {
680 std::cerr << "Batch file has incorrect format." << std::endl;
681 throw "data error";
682 }
683
684 std::string huFilename, enFilename, outFilename;
685 huFilename = words[0];
686 enFilename = words[1];
687 outFilename = words[2];
688
689// std::cerr << "Processing " << outFilename << std::endl;
690 bool failed = false;
691 try
692 {
693 alignerToolWithFilenames( dictionary, huFilename, enFilename, alignParameters, outFilename );
694 }
695 catch ( const char* errorType )
696 {
697 std::cerr << errorType << std::endl;
698 failed = true;
699 }
700 catch ( std::exception& e )
701 {
702 std::cerr << "some failed assertion:" << e.what() << std::endl;
703 failed = true;
704 }
705 catch ( ... )
706 {
707 std::cerr << "some unknown failed assertion..." << std::endl;
708 failed = true;
709 }
710
711 if (failed)
712 {
713 std::cerr << "Align failed for " << outFilename << std::endl;
714 }
715 }
716 }
717 else
718 {
719 const char* huFilename = remains[1] ;
720 const char* enFilename = remains[2] ;
721
722 alignerToolWithFilenames( dictionary, huFilename, enFilename, alignParameters );
723 }
724 }
725#ifndef _DEBUG
726 catch ( const char* errorType )
727 {
728 std::cerr << errorType << std::endl;
729 return -1;
730 }
731 catch ( std::exception& e )
732 {
733 std::cerr << "some failed assertion:" << e.what() << std::endl;
734 return -1;
735 }
736 catch ( ... )
737 {
738 std::cerr << "some unknown failed assertion..." << std::endl;
739 return -1;
740 }
741#endif
742 return 0;
743}
744
745}