clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name MweSplitApplicator.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src -resource-dir /usr/lib/llvm-16/lib/clang/16 -D BOOST_NO_CXX98_FUNCTION_BASE=1 -D HAS_FS -D UNISTR_FROM_CHAR_EXPLICIT=explicit -D UNISTR_FROM_STRING_EXPLICIT=explicit -D _POSIX_C_SOURCE=200112 -D cg3_EXPORTS -I /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/include/posix -I /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/include -I /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src -I /usr/local/include -D NDEBUG -internal-isystem /usr/lib/llvm-16/bin/../include/c++/v1 -internal-isystem /usr/lib/llvm-16/lib/clang/16/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/14/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O3 -Wno-missing-field-initializers -Wno-deprecated -Wno-unused-parameter -Wno-unused-result -std=c++2b -fdebug-compilation-dir=/tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src -ferror-limit 19 -fvisibility-inlines-hidden -fgnuc-version=4.2.1 -fno-implicit-modules -fcxx-exceptions -fexceptions -vectorize-loops -vectorize-slp -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/build/cg3/scan-build/2024-09-11-161008-13503-1 -x c++ /tmp/build/cg3/cg3-1.4.17+g2285~f7d45cea/src/MweSplitApplicator.cpp
| 1 | |
| 2 | |
| 3 | |
| 4 | |
| 5 | |
| 6 | |
| 7 | |
| 8 | |
| 9 | |
| 10 | |
| 11 | |
| 12 | |
| 13 | |
| 14 | |
| 15 | |
| 16 | |
| 17 | |
| 18 | |
| 19 | |
| 20 | #include "MweSplitApplicator.hpp" |
| 21 | |
| 22 | namespace CG3 { |
| 23 | |
| 24 | MweSplitApplicator::MweSplitApplicator(std::ostream& ux_err) |
| 25 | : GrammarApplicator(ux_err) |
| 26 | { |
| 27 | Grammar* grammar = new Grammar; |
| 28 | grammar->ux_stderr = ux_stderr; |
| 29 | grammar->allocateDummySet(); |
| 30 | grammar->delimiters = grammar->allocateSet(); |
| 31 | grammar->addTagToSet(grammar->allocateTag(STR_DUMMY), grammar->delimiters); |
| 32 | grammar->reindex(); |
| 33 | setGrammar(grammar); |
| 34 | owns_grammar = true; |
| 35 | is_conv = true; |
| 36 | } |
| 37 | |
| 38 | void MweSplitApplicator::runGrammarOnText(std::istream& input, std::ostream& output) { |
| 39 | GrammarApplicator::runGrammarOnText(input, output); |
| 40 | } |
| 41 | |
| 42 | const Tag* MweSplitApplicator::maybeWfTag(const Reading* r) { |
| 43 | for (auto tter : r->tags_list) { |
| 44 | if ((!show_end_tags && tter == endtag) || tter == begintag) { |
| 45 | continue; |
| 46 | } |
| 47 | if (tter == r->baseform || tter == r->parent->wordform->hash) { |
| 48 | continue; |
| 49 | } |
| 50 | const Tag* tag = grammar->single_tags[tter]; |
| 51 | |
| 52 | if (tag->type & T_WORDFORM) { |
| 53 | return tag; |
| 54 | } |
| 55 | } |
| 56 | return nullptr; |
| 57 | } |
| 58 | |
| 59 | std::vector<Cohort*> MweSplitApplicator::splitMwe(Cohort* cohort) { |
| 60 | constexpr UChar rtrimblank[] = { ' ', '\n', '\r', '\t', 0 }; |
| 61 | constexpr UChar textprefix[] = { ':', 0 }; |
| 62 | std::vector<Cohort*> cos; |
| 63 | size_t n_wftags = 0; |
| 64 | size_t n_goodreadings = 0; |
| 65 | for (auto rter1 : cohort->readings) { |
| 66 | if (maybeWfTag(rter1) != nullptr) { |
| 67 | ++n_wftags; |
| 68 | } |
| 69 | ++n_goodreadings; |
| 70 | } |
| 71 | |
| 72 | if (n_wftags < n_goodreadings) { |
| |
| 73 | if (n_wftags > 0) { |
| 74 | u_fprintf(ux_stderr, "WARNING: Line %u: Some but not all main-readings of %S had wordform-tags (not completely mwe-disambiguated?), not splitting.\n", cohort->line_number, cohort->wordform->tag.data()); |
| 75 | |
| 76 | } |
| 77 | cos.push_back(cohort); |
| 78 | return cos; |
| 79 | } |
| 80 | UString pretext; |
| 81 | for (auto r : cohort->readings) { |
| 82 | size_t pos = std::numeric_limits<size_t>::max(); |
| 83 | Reading* prev = nullptr; |
| 8 | | 'prev' initialized to a null pointer value | |
|
| 84 | for (auto sub = r; sub; sub = sub->next) { |
| 9 | | Loop condition is true. Entering loop body | |
|
| 85 | const Tag* wfTag = maybeWfTag(sub); |
| 86 | if (wfTag == nullptr) { |
| 10 | | Assuming the condition is true | |
|
| |
| 87 | prev = prev->next; |
| 12 | | Access to field 'next' results in a dereference of a null pointer (loaded from variable 'prev') |
|
| 88 | } |
| 89 | else { |
| 90 | ++pos; |
| 91 | Cohort* c; |
| 92 | while (cos.size() < pos + 1) { |
| 93 | c = alloc_cohort(cohort->parent); |
| 94 | c->global_number = gWindow->cohort_counter++; |
| 95 | cohort->parent->appendCohort(c); |
| 96 | if(pretext.size() > 0) { |
| 97 | c->text = pretext; |
| 98 | pretext.clear(); |
| 99 | } |
| 100 | cos.push_back(c); |
| 101 | } |
| 102 | c = cos[pos]; |
| 103 | |
| 104 | const size_t wfBeg = 2; |
| 105 | const size_t spBeg0 = wfTag->tag.find_first_not_of(rtrimblank, wfBeg); |
| 106 | const size_t spBeg = sub->next ? spBeg0 : wfBeg; |
| 107 | const size_t wfEnd = wfTag->tag.size() - 3; |
| 108 | const size_t spEnd = 1 + wfTag->tag.find_last_not_of(rtrimblank, wfEnd); |
| 109 | const UString& wf = |
| 110 | wfTag->tag.substr(0, wfBeg) |
| 111 | + wfTag->tag.substr(spBeg, spEnd - spBeg) |
| 112 | + wfTag->tag.substr(wfEnd + 1); |
| 113 | if (c->wordform != 0 && wf != c->wordform->tag) { |
| 114 | u_fprintf(ux_stderr, "WARNING: Line %u: Ambiguous wordform-tags for same cohort, '%S' vs '%S', not splitting.\n", numLines, wf.data(), c->wordform->tag.data()); |
| 115 | cos.clear(); |
| 116 | cos.push_back(cohort); |
| 117 | return cos; |
| 118 | } |
| 119 | c->wordform = addTag(wf); |
| 120 | if (spBeg > wfBeg) { |
| 121 | pretext = textprefix + wfTag->tag.substr(wfBeg, spBeg - wfBeg); |
| 122 | } |
| 123 | if (spEnd < wfEnd + 1) { |
| 124 | c->text = textprefix + wfTag->tag.substr(spEnd, wfEnd + 1 - spEnd); |
| 125 | } |
| 126 | |
| 127 | Reading* rNew = alloc_reading(*sub); |
| 128 | for (size_t i = 0; i < rNew->tags_list.size(); ++i) { |
| 129 | auto& tter = rNew->tags_list[i]; |
| 130 | if (tter == wfTag->hash || tter == rNew->parent->wordform->hash) { |
| 131 | rNew->tags_list.erase(rNew->tags_list.begin() + i); |
| 132 | rNew->tags.erase(tter); |
| 133 | } |
| 134 | } |
| 135 | cos[pos]->appendReading(rNew); |
| 136 | rNew->parent = cos[pos]; |
| 137 | |
| 138 | if (prev != nullptr) { |
| 139 | free_reading(prev->next); |
| 140 | } |
| 141 | prev = rNew; |
| 142 | } |
| 143 | } |
| 144 | } |
| 145 | if (cos.size() == 0) { |
| 146 | u_fprintf(ux_stderr, "WARNING: Line %u: Tried splitting %S, but got no new cohorts; shouldn't happen.", numLines, cohort->wordform->tag.data()); |
| 147 | cos.push_back(cohort); |
| 148 | } |
| 149 | |
| 150 | cos[0]->text = cohort->text; |
| 151 | std::reverse(cos.begin(), cos.end()); |
| 152 | return cos; |
| 153 | } |
| 154 | |
| 155 | void MweSplitApplicator::printSingleWindow(SingleWindow* window, std::ostream& output, bool profiling) { |
| 156 | for (auto var : window->variables_output) { |
| 157 | Tag* key = grammar->single_tags[var]; |
| 158 | auto iter = window->variables_set.find(var); |
| 159 | if (iter != window->variables_set.end()) { |
| 160 | if (iter->second != grammar->tag_any) { |
| 161 | Tag* value = grammar->single_tags[iter->second]; |
| 162 | u_fprintf(output, "%S%S=%S>\n", STR_CMD_SETVAR.data(), key->tag.data(), value->tag.data()); |
| 163 | } |
| 164 | else { |
| 165 | u_fprintf(output, "%S%S>\n", STR_CMD_SETVAR.data(), key->tag.data()); |
| 166 | } |
| 167 | } |
| 168 | else { |
| 169 | u_fprintf(output, "%S%S>\n", STR_CMD_REMVAR.data(), key->tag.data()); |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | if (!window->text.empty()) { |
| 1 | Assuming the condition is true | |
|
| |
| 174 | u_fprintf(output, "%S", window->text.data()); |
| 175 | if (!ISNL(window->text.back())) { |
| |
| 176 | u_fputc('\n', output); |
| 177 | } |
| 178 | } |
| 179 | |
| 180 | auto cs = UI32(window->cohorts.size()); |
| 181 | for (uint32_t c = 0; c < cs; c++) { |
| |
| 5 | | Loop condition is true. Entering loop body | |
|
| 182 | Cohort* cohort = window->cohorts[c]; |
| 183 | std::vector<Cohort*> cs = splitMwe(cohort); |
| 6 | | Calling 'MweSplitApplicator::splitMwe' | |
|
| 184 | for (auto& iter : cs) { |
| 185 | printCohort(iter, output, profiling); |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | if (!window->text_post.empty()) { |
| 190 | u_fprintf(output, "%S", window->text_post.data()); |
| 191 | if (!ISNL(window->text_post.back())) { |
| 192 | u_fputc('\n', output); |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | u_fputc('\n', output); |
| 197 | if (window->flush_after) { |
| 198 | u_fprintf(output, "%S\n", STR_CMD_FLUSH.data()); |
| 199 | } |
| 200 | u_fflush(output); |
| 201 | } |
| 202 | } |