commit 225fa52e622b61b3b409f10d45b8aa91712fe69e Author: Daniel Swanson Date: Mon Jun 14 11:16:56 2021 -0500 pass some vectors by value and use StringUtils case functions diff --git a/src/parse_arx.cc b/src/parse_arx.cc index a2ab266..0bf4eda 100644 --- a/src/parse_arx.cc +++ b/src/parse_arx.cc @@ -27,6 +27,7 @@ #include #include #include +#include void print_tags(vector input) { @@ -279,7 +280,7 @@ void ParseArx::parsePatterns (xmlDocPtr doc, xmlNodePtr cur, UString markable_na Attr = xmlGetProp(cur, (const xmlChar *)"n"); UString score_ws = to_ustring((const char*)Attr); - int score_int = stoi(score_ws); + int score_int = StringUtils::stoi(score_ws); xmlChar *parameter_name = xmlGetProp(cur, (const xmlChar *)"parameter"); diff --git a/src/pattern_arx.cc b/src/pattern_arx.cc index e18fe55..324ce8e 100644 --- a/src/pattern_arx.cc +++ b/src/pattern_arx.cc @@ -24,148 +24,82 @@ #include #include #include -#include -#include -#include +#include using namespace std; void print_markable(acceptable_patterns inp) { - for(acceptable_patterns::iterator i = inp.begin(); i != inp.end(); i++) - { + for (auto& i : inp) { cerr <<"Pattern:\n"; - for(vector::iterator j = (*i).begin(); j != (*i).end(); j++) - { - cerr << (*j).name; - cerr << "\n"; + for (auto& j : i) { + cerr << j.name << endl; } } } -int contains(vector tags, UString tag) +bool +contains(const vector& tags, const UString& tag) { - if(std::find(tags.begin(), tags.end(), tag) != tags.end()) - return 1; - else - return 0; + return (std::find(tags.begin(), tags.end(), tag) != tags.end()); } -int contains_any(vector tags, vector candidates) +bool +contains_any(const vector& tags, const vector& candidates) { - for(vector::iterator it=candidates.begin();it!=candidates.end();++it) - { - if(std::find(tags.begin(), tags.end(), *it) != tags.end()) - return 1; + for (auto& it : candidates) { + if(std::find(tags.begin(), tags.end(), it) != tags.end()) + return true; } - return 0; + return false; } -void toLower(UString& s) +bool +check_acceptable_tags(const vector& input_tags, const UString& input_sl_lemma, const acceptable_tags& check_tags) //check has-tags, exclude-tags, lemma { - UString temp; - size_t i = 0; - size_t len = s.size(); - UChar32 c; - while (i < len) { - U16_NEXT(s, i, len, c); - temp += u_tolower(c); - } - s.swap(temp); -} + for (auto& i : check_tags) { + bool flag_contains_all = true; -int check_acceptable_tags(vector input_tags, UString input_sl_lemma, acceptable_tags check_tags) //check has-tags, exclude-tags, lemma -{ - for (acceptable_tags::iterator i = check_tags.begin(); i != check_tags.end(); ++i) - { + for (auto& j : i.has_tags) { + if(j == "*"_u) //ignore * in the tags list + continue; - int flag_contains_all = 1; + if(!contains(input_tags, j)) { + //if the has-tag is NOT in the input LU tags + flag_contains_all = false; + break; + } + } - vector temp_tags = i->has_tags; - vector temp_exclude_tags = i->exclude_tags; + if (!flag_contains_all) continue; - for (auto& j : temp_tags) { - if(j == "*"_u) //ignore * in the tags list - continue; + if (contains_any(input_tags, i.exclude_tags)) continue; - if(!contains(input_tags, j)) { - //if the has-tag is NOT in the input LU tags - flag_contains_all = 0; - break; + if (!i.lemma.empty()) { + if (!StringUtils::caseequal(input_sl_lemma, i.lemma)) { + flag_contains_all = false; + continue; + } } - } - if(flag_contains_all == 0) - { - continue; - } - - for(std::vector::iterator j = temp_exclude_tags.begin(); j != temp_exclude_tags.end(); ++j) - { - if(contains(input_tags, *j)) - { - flag_contains_all = 0; - break; - } - } - - if(flag_contains_all == 0) - { - continue; - } - - if(!(i->lemma).empty()) - { - UString temp_lemma = i->lemma; - - if(input_sl_lemma.length() == temp_lemma.length()) - { - if(input_sl_lemma.compare(temp_lemma) != 0) - { - toLower(input_sl_lemma); - toLower(temp_lemma); - - if(input_sl_lemma.compare(temp_lemma) != 0) - { - flag_contains_all = 0; - } - } - } - else - { - flag_contains_all = 0; - } - } - - if(flag_contains_all == 0) - { - continue; - } - else - { - return 1; - } + if (flag_contains_all) return true; } - return 0; + return false; } -parameter_return check_pattern_name(vector input_tags, UString input_sl_lemma, unordered_map parameter_names) +parameter_return check_pattern_name(const vector& input_tags, const UString& input_sl_lemma, const unordered_map& parameter_names) { parameter_return retval; retval.found = 0; - for (unordered_map::iterator it = parameter_names.begin(); it != parameter_names.end(); it++) - { - UString parameter_name = it->first; - acceptable_tags parameter_tags= it->second; - - if(check_acceptable_tags(input_tags, input_sl_lemma, parameter_tags)) + for (auto& it : parameter_names) { + if(check_acceptable_tags(input_tags, input_sl_lemma, it.second)) { retval.found = 1; - retval.parameter_name = parameter_name; + retval.parameter_name = it.first; return retval; } diff --git a/src/pattern_arx.h b/src/pattern_arx.h index 5c67fd6..103d727 100644 --- a/src/pattern_arx.h +++ b/src/pattern_arx.h @@ -32,8 +32,8 @@ struct unique_LU int id; UString wordform; UString tl_wordform; - UString sl_lemma; - UString tl_lemma; + UString sl_lemma; + UString tl_lemma; vector pos_tags; vector properties; }; @@ -50,11 +50,11 @@ struct parameter_return UString parameter_name; }; -int contains(vector tags, UString tag); -int contains_any(vector tags, vector candidates); +bool contains(const vector& tags, const UString& tag); +bool contains_any(const vector& tags, const vector& candidates); -int check_acceptable_tags(vector input_tags, UString input_sl_lemma, acceptable_tags check_tags); -parameter_return check_pattern_name(vector input_tags, UString input_sl_lemma, unordered_map parameter_names); +bool check_acceptable_tags(const vector& input_tags, const UString& input_sl_lemma, const acceptable_tags& check_tags); +parameter_return check_pattern_name(const vector& input_tags, const UString& input_sl_lemma, const unordered_map& parameter_names); deque< vector > add_properties(deque< vector > context, ParseArx arx_file); diff --git a/src/score.cc b/src/score.cc index 522a7cb..8055395 100644 --- a/src/score.cc +++ b/src/score.cc @@ -186,20 +186,18 @@ void Scoring::apply_indicators(unique_LU anaphor, ParseArx arx_file, UString par //Impeding Indicators //Indicators from XML file (iterate through all markables that provided a score without mentioning parameter_name) - for(unordered_map::iterator x = all_markables_score.begin(); x != all_markables_score.end(); ++x) - { - if(contains(antecedent_LU.properties, x->first)) + for (auto& x : all_markables_score) { + if(contains(antecedent_LU.properties, x.first)) { - temp_score += x->second; + temp_score += x.second; } } //Now get the scores from the markables that mentioned this specific parameter name - for(unordered_map::iterator x = parameter_markables_score.begin(); x != parameter_markables_score.end(); ++x) - { - if(contains(antecedent_LU.properties, x->first)) + for (auto& x : parameter_markables_score) { + if(contains(antecedent_LU.properties, x.first)) { - temp_score += x->second; + temp_score += x.second; } }