Bug Summary

File:tmx_alignment.cc
Warning:line 136, column 13
Value stored to 'lengthFitness' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name tmx_alignment.cc -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=all -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -resource-dir /usr/lib/llvm-16/lib/clang/16 -D HAVE_CONFIG_H -I . -I .. -I /usr/include/utf8cpp/ -I /usr/local/include -I /usr/include/libxml2 -I /usr/local/include -D PIC -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -std=c++2b -fdeprecated-macro -fdebug-compilation-dir=/tmp/build/apertium/apertium-3.9.12+g928~04ac90c6/apertium -ferror-limit 19 -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/apertium/scan-build/2024-09-11-155328-205384-1 -x c++ tmx_alignment.cc
1/*************************************************************************
2* *
3* (C) Copyright 2004. Media Research Centre at the *
4* Sociology and Communications Department of the *
5* Budapest University of Technology and Economics. *
6* *
7* Developed by Daniel Varga. *
8* *
9* From hunalign; for license see ../AUTHORS and ../COPYING.hunalign *
10* *
11*************************************************************************/
12#include <apertium/tmx_alignment.h>
13
14#include <apertium/tmx_words.h> // For SentenceList
15#include <apertium/tmx_dictionary.h> // For FrequencyMap
16#include <lttoolbox/string_utils.h>
17
18#include <iostream>
19#include <map>
20#include <set>
21#include <algorithm>
22
23// Copypaste-elve. TODO Elhelyezni.
24#define massert(e)if (!(e)) { std::cerr << "e" << " failed" <<
std::endl; throw "assert"; }
if (!(e)) { std::cerr << #e << " failed" << std::endl; throw "assert"; }
25
26std::ostream& operator<<( std::ostream& os, std::pair<int,int> p )
27{
28 os << p.first << "," << p.second;
29 return os;
30}
31
32namespace TMXAligner
33{
34
35
36// Attention, the two-sentence length is the first argument. Usually the Hungarian is, but not here.
37// The bigger the better. closeness is always smaller than bestScore.
38double closeness( double twoSentenceLength, double oneSentenceLength )
39{
40 const double bestScore = 0.3;
41 const double quasiglobal_closenessMultiplier = 0.3;
42
43 double ratio;
44
45 if (twoSentenceLength>oneSentenceLength)
46 {
47 ratio = (twoSentenceLength+1)/(oneSentenceLength+1);
48 }
49 else
50 {
51 ratio = (oneSentenceLength+1)/(twoSentenceLength+1);
52 }
53
54 ratio -= 1.0;
55
56 // assert(ratio>=0);
57 return bestScore - quasiglobal_closenessMultiplier * ratio;
58}
59
60const unsigned char Diag = 1;
61const unsigned char HuSkip = 2;
62const unsigned char EnSkip = 3;
63const unsigned char HuHuEnSkip = 4;
64const unsigned char HuEnEnSkip = 5;
65const unsigned char Dead = 6;
66
67void buildDynProgMatrix( const AlignMatrix& w, const SentenceValues& huLength, const SentenceValues& enLength,
68 QuasiDiagonal<double>& v, TrelliMatrix& trellis )
69{
70 const int huBookSize = w.size();
71
72
73 int huPos,enPos;
74
75 // v[huPos][enPos] gives the similarity of the [0,huPos) and [0,enPos) intervals.
76 // The smaller value, the better similarity. (Unlike in the original similarity matrix w, where bigger is better.)
77
78 double infinity = 1e6;
79
80 for ( huPos=0; huPos<=huBookSize; ++huPos )
81 {
82 int rowStart = v.rowStart(huPos);
83 int rowEnd = v.rowEnd(huPos);
84 for ( enPos=rowStart; enPos<rowEnd; ++enPos )
85 {
86 double& val = v.cell(huPos,enPos);
87 unsigned char& trail = trellis.cell(huPos,enPos);
88
89 bool quasiglobal_knightsMoveAllowed = true;
90 if (quasiglobal_knightsMoveAllowed)
91 {
92 double lengthFitness(0);
93
94 bool quasiglobal_lengthFitnessApplied = true;
95
96 // The array is indexed by the step directions. The smaller value, the better.
97 double values[Dead];
98 int i;
99 for ( i=1; i<Dead; ++i )
100 values[i] = infinity;
101
102 if (huPos>0)
103 {
104 values[HuSkip] = v[huPos-1][enPos] - skipScore;
105 }
106
107 if (enPos>0)
108 {
109 values[EnSkip] = v[huPos][enPos-1] - skipScore;
110 }
111
112 if ((huPos>0) && (enPos>0))
113 {
114 if (quasiglobal_lengthFitnessApplied)
115 {
116 lengthFitness = closeness(huLength[huPos-1], enLength[enPos-1]);
117 }
118 else
119 {
120 lengthFitness = 0;
121 }
122
123 values[Diag] = v[huPos-1][enPos-1] - w[huPos-1][enPos-1] - lengthFitness ;
124 }
125
126 const double dotLength = 2.0 ;
127
128 if ((huPos>1) && (enPos>0))
129 {
130 if (quasiglobal_lengthFitnessApplied)
131 {
132 lengthFitness = closeness(huLength[huPos-2]+huLength[huPos-1]+dotLength, enLength[enPos-1]);
133 }
134 else
135 {
136 lengthFitness = 0;
Value stored to 'lengthFitness' is never read
137 }
138
139 }
140
141 if ((huPos>0) && (enPos>1))
142 {
143 if (quasiglobal_lengthFitnessApplied)
144 {
145 // Attention, the two-sentence length is the first argument. Usually the Hungarian is the first argument, but not here.
146 lengthFitness = closeness(enLength[enPos-2]+enLength[enPos-1]+dotLength, huLength[huPos-1]);
147 }
148 else
149 {
150 lengthFitness = 0;
151 }
152
153 const double& a = w[huPos-1][enPos-1] ;
154 const double& b = w[huPos-1][enPos-2] ;
155 values[HuEnEnSkip] = v[huPos-1][enPos-2] - ( a<b ? a : b ) - skipScore - lengthFitness ; // The worse of the two crossed square.
156 }
157
158 unsigned char direction = Dead;
159 double bestValue = infinity;
160 for ( i=1; i<Dead; ++i )
161 {
162 if (values[i]<bestValue)
163 {
164 bestValue = values[i];
165 direction = i;
166 }
167 }
168
169 trail = direction;
170 if (direction==Dead)
171 {
172 val = 0;
173 }
174 else
175 {
176 val = bestValue;
177 }
178 }
179 else // (!quasiglobal_knightsMoveAllowed)
180 {
181 int borderCase = ( (huPos==0) ? 0 : 2 ) + ( (enPos==0) ? 0 : 1 ) ;
182
183 switch (borderCase)
184 {
185 case 0:
186 {
187 val = 0;
188 trail = Dead;
189 break;
190 }
191 case 1: // huPos==0
192 {
193 val = v[0][enPos-1] - skipScore ;
194 trail = EnSkip;
195 break;
196 }
197 case 2: // enPos==0
198 {
199 val = v[huPos-1][0] - skipScore ;
200 trail = HuSkip;
201 break;
202 }
203 case 3:
204 {
205 double x = v[huPos-1][enPos] - skipScore ;
206 double y = v[huPos] [enPos-1] - skipScore ;
207 double xy = v[huPos-1][enPos-1] - w[huPos-1][enPos-1] ;
208
209 double best = xy;
210 trail = Diag;
211 if (x<best)
212 {
213 best = x;
214 trail = HuSkip;
215 }
216 if (y<best)
217 {
218 best = y;
219 trail = EnSkip;
220 }
221 val = best;
222 break;
223 }
224 }
225 }
226 }
227 }
228}
229
230void trelliToLadder( const TrelliMatrix& trellis, Trail& bestTrail )
231{
232 bestTrail.clear();
233
234 // The -1 is needed because the trellis matrix is one larger than the similarity matrix.
235 // This points to its downmost rightmost element.
236 const int huBookSize = trellis.size()-1;
237 const int enBookSize = trellis.otherSize()-1;
238
239 int huPos=huBookSize;
240 int enPos=enBookSize;
241
242 bool logging = false;
243
244 if (logging) std::cerr << std::endl;
245
246 bool over = false;
247 bool hopelesslyBadTrail = false;
248 bestTrail.push_back(std::make_pair(huPos,enPos));
249
250 while (true)
251 {
252 unsigned char trelli = trellis[huPos][enPos];
253
254 if ((huPos==0) || (enPos==0))
255 break;
256
257 switch (trelli)
258 {
259 case Diag :
260 {
261 --huPos;
262 --enPos;
263 break;
264 }
265 case HuSkip :
266 {
267 --huPos;
268 break;
269 }
270 case EnSkip :
271 {
272 --enPos;
273 break;
274 }
275 case HuHuEnSkip :
276 {
277 huPos -= 2;
278 --enPos;
279 break;
280 }
281 case HuEnEnSkip :
282 {
283 --huPos;
284 enPos -= 2;
285 break;
286 }
287 case Dead :
288 {
289 over = true;
290 break;
291 }
292 default:
293 {
294 hopelesslyBadTrail = true;
295 over = true;
296 break;
297 }
298 }
299
300 if (over)
301 break;
302
303 bestTrail.push_back(std::make_pair(huPos,enPos));
304
305 if (logging)
306 {
307 std::cerr << huPos << " \t" << enPos << std::endl;
308 }
309
310 }
311
312 if (hopelesslyBadTrail)
313 {
314 bestTrail.clear();
315 bestTrail.push_back(std::make_pair(huBookSize,enBookSize));
316 bestTrail.push_back(std::make_pair(0,0));
317 std::cerr << "Error: hopelessly bad trail." << std::endl;
318 }
319
320 std::reverse(bestTrail.begin(), bestTrail.end() );
321}
322
323
324void align( const AlignMatrix& w, const SentenceValues& huLength, const SentenceValues& enLength,
325 Trail& bestTrail, AlignMatrix& v )
326{
327 const int huBookSize = w.size();
328 const int enBookSize = w.otherSize();
329 const int thickness = w.thickness();
330
331 massert(w.size()+1 == v.size())if (!(w.size()+1 == v.size())) { std::cerr << "w.size()+1 == v.size()"
<< " failed" << std::endl; throw "assert"; }
;
332 massert(w.otherSize()+1 == v.otherSize())if (!(w.otherSize()+1 == v.otherSize())) { std::cerr <<
"w.otherSize()+1 == v.otherSize()" << " failed" <<
std::endl; throw "assert"; }
;
333
334 TrelliMatrix trellis( huBookSize+1,enBookSize+1,thickness, Dead );
335
336 buildDynProgMatrix( w, huLength, enLength, v, trellis );
337
338// std::cerr << "Matrix built." << std::endl;
339
340 trelliToLadder( trellis, bestTrail );
341
342// std::cerr << "Trail found." << std::endl;
343}
344
345
346bool oneToOne( const Trail& bestTrail, int pos )
347{
348 return (
349 ( bestTrail[pos+1].first -bestTrail[pos].first == 1 )
350 &&
351 ( bestTrail[pos+1].second-bestTrail[pos].second == 1 )
352 );
353}
354
355
356int countIntersectionOfTrails( const Trail& sx, const Trail& sy )
357{
358 int inter(0);
359
360 Trail::const_iterator sxt = sx.begin();
361 Trail::const_iterator syt = sy.begin();
362 Trail::const_iterator sxe = sx.end();
363 Trail::const_iterator sye = sy.end();
364 for ( ; sxt!=sxe && syt!=sye ; )
365 {
366 if ( *sxt < *syt )
367 ++sxt;
368 else if ( *sxt > *syt )
369 ++syt;
370 else
371 {
372 ++inter;
373 ++sxt;
374 ++syt;
375 }
376 }
377 return inter;
378}
379
380
381// A bit of an abuse of the fact that Trail and BisentenceList are typedef'd to the same structure.
382double scoreTrailOrBisentenceList( const Trail& trailAuto, const Trail& trailHand )
383{
384 int score = countIntersectionOfTrails( trailAuto, trailHand );
385
386 std::cerr << trailAuto.size()-score << " misaligned out of " << trailHand.size() << " correct items, "
387 << trailAuto.size() << " bets." << std::endl;
388
389 std::cerr << "Precision: " << 1.0*score/trailAuto.size()
390 << ", Recall: " << 1.0*score/trailHand.size() << std::endl;
391
392 double ratio = 1.0*(trailAuto.size()-score)/trailAuto.size();
393 return ratio;
394}
395
396
397void trailToBisentenceList( const Trail& bestTrail,
398 BisentenceList& bisentenceList )
399{
400 bisentenceList.clear();
401
402 int trailSize = bestTrail.size();
403
404 for ( int pos=0; pos<trailSize-1; ++pos )
405 {
406 if ( oneToOne(bestTrail,pos) )
407 {
408 bisentenceList.push_back(bestTrail [pos]);
409 }
410 }
411}
412
413
414double scoreBisentenceList( const BisentenceList& bisentenceListAuto, const Trail& trailHand )
415{
416 BisentenceList bisentenceListHand;
417 trailToBisentenceList( trailHand, bisentenceListHand );
418
419 double score = scoreTrailOrBisentenceList( bisentenceListAuto, bisentenceListHand ) ;
420
421 return score;
422}
423
424double scoreTrail( const Trail& trailAuto, const Trail& trailHand )
425{
426 return ( scoreTrailOrBisentenceList( trailAuto, trailHand ) );
427}
428
429
430void setBox( AlignMatrix& m, int huPos, int enPos, int radius, int insideOfRadiusValue )
431{
432 for ( int x=huPos-radius; x<=huPos+radius; ++x )
433 {
434 for ( int y=enPos-radius; y<=enPos+radius; ++y )
435 {
436 if ( (x>=0) && (x<m.size()) && (y>=0) && (y<m.otherSize()) )
437 {
438 m.cell(x,y) = insideOfRadiusValue ; // ToDo: Should this be (y,x) instead? Function has args y,x not x,y. Fix here or function
439 }
440 }
441 }
442}
443
444// Fills the complement of the radius of the trail with minus infties.
445// The return value true means success. Failure means that during the fill,
446// we intersected the outside of the quasidiagonal area.
447// In this case, the operation is not finished.
448bool borderDetailedAlignMatrix( AlignMatrix& alignMatrix, const Trail& trail, int radius )
449{
450 int huBookSize = alignMatrix.size();
451
452 int huPos, enPos;
453 for ( huPos=0; huPos<huBookSize; ++huPos )
454 {
455 int rowStart = alignMatrix.rowStart(huPos);
456 int rowEnd = alignMatrix.rowEnd(huPos);
457 for ( enPos=rowStart; enPos<rowEnd; ++enPos )
458 {
459 alignMatrix.cell(huPos,enPos) = outsideOfRadiusValue;
460 }
461 }
462
463 // We seriously use the fact that many-to-zero segments are subdivided into one-to-zero segments.
464 // Inside setBox, an exception is thrown if we try to write outside the quasidiagonal.
465 // If we catch such an exception, it means that the quasidiagonal is not thick enough.
466 // In this case, we abandon the whole align, just to be sure.
467 try
468 {
469 for ( size_t i=0; i<trail.size(); ++i )
470 {
471 setBox( alignMatrix, trail[i].first, trail[i].second, radius, insideOfRadiusValue );
472 }
473 }
474 catch ( const char* errorType )
475 {
476 massert( std::string(errorType) == "out of quasidiagonal" )if (!(std::string(errorType) == "out of quasidiagonal")) { std
::cerr << "std::string(errorType) == \"out of quasidiagonal\""
<< " failed" << std::endl; throw "assert"; }
477 return false;
478 }
479
480 bool verify = true;
481 if (verify)
482 {
483 int numberOfEvaluatedItems(0);
484 for ( huPos=0; huPos<huBookSize; ++huPos )
485 {
486 int rowStart = alignMatrix.rowStart(huPos);
487 int rowEnd = alignMatrix.rowEnd(huPos);
488 for ( enPos=rowStart; enPos<rowEnd; ++enPos )
489 {
490 if (alignMatrix[huPos][enPos]==insideOfRadiusValue)
491 {
492 ++numberOfEvaluatedItems;
493 }
494 }
495 }
496
497 std::cerr << numberOfEvaluatedItems << " items inside the border." << std::endl;
498 }
499
500 return true;
501}
502
503template <class T>
504void dumpAlignMatrix( const QuasiDiagonal<T>& alignMatrix )
505{
506 int huPos,enPos;
507
508 int huBookSize = alignMatrix.size();
509 int enBookSize = alignMatrix.otherSize();
510
511 for ( huPos=0; huPos<huBookSize; ++huPos )
512 {
513 for ( enPos=0; enPos<enBookSize; ++enPos )
514 {
515 int start = alignMatrix.rowStart(huPos);
516 int end = alignMatrix.rowEnd (huPos);
517
518 if ( (enPos<start) || (enPos>=end) )
519 {
520 std::cout << "-1\t";
521 continue;
522 }
523
524 std::cout << alignMatrix[huPos][enPos] << "\t";
525 }
526 std::cout << std::endl;
527 }
528}
529
530void dumpAlignMatrix( const QuasiDiagonal<int>& alignMatrix, bool graphical )
531{
532 int huPos,enPos;
533
534 int huBookSize = alignMatrix.size();
535 int enBookSize = alignMatrix.otherSize();
536
537 for ( huPos=0; huPos<huBookSize; ++huPos )
538 {
539 for ( enPos=0; enPos<enBookSize; ++enPos )
540 {
541 int start = alignMatrix.rowStart(huPos);
542 int end = alignMatrix.rowEnd (huPos);
543
544 if ( (enPos<start) || (enPos>=end) )
545 {
546 if (graphical)
547 {
548 std::cout << " ";
549 }
550 else
551 {
552 std::cout << "-1\t";
553 }
554 continue;
555 }
556
557 if (graphical)
558 {
559 char c(' ');
560 switch (alignMatrix[huPos][enPos])
561 {
562 case 0: c=' '; break;
563 case 1: c='.'; break;
564 case 2: c=':'; break;
565 case 3: c='|'; break;
566 case 4: c='+'; break;
567 default: c='X'; break;
568 }
569 std::cout << c << " ";
570 }
571 else
572 {
573 std::cout << alignMatrix[huPos][enPos] << "\t";
574 }
575 }
576 std::cout << std::endl;
577 }
578}
579
580void dumpTrelliMatrix( const TrelliMatrix& trellis )
581{
582 std::map<int, std::string> directions;
583
584 directions[Diag] = "HuEn";
585 directions[HuSkip] = "Hu";
586 directions[EnSkip] = "En";
587 directions[HuHuEnSkip] = "HuHuEn";
588 directions[HuEnEnSkip] = "HuEnEn";
589 directions[Dead] = "Dead";
590
591 int huPos,enPos;
592
593 int huBookSize = trellis.size();
594 int enBookSize = trellis.otherSize();
595
596 for ( huPos=0; huPos<huBookSize; ++huPos )
597 {
598 for ( enPos=0; enPos<enBookSize; ++enPos )
599 {
600 int start = trellis.rowStart(huPos);
601 int end = trellis.rowEnd (huPos);
602
603 if ( (enPos<start) || (enPos>=end) )
604 {
605 std::cout << "-1\t";
606 continue;
607 }
608
609 std::cout << directions[trellis[huPos][enPos]] << "\t";
610 }
611 std::cout << std::endl;
612 }
613}
614
615} // namespace TMXAligner