commit e8b46bb163003946242a999a93032f431be4dbb0 Author: Tanmai Khanna Date: Fri Jul 19 16:16:00 2019 +0530 Code to check and use parameters from external XML file | Removed all hardcoded parameters diff --git a/src/anaphora.cc b/src/anaphora.cc index dd5c3b4..a721d33 100644 --- a/src/anaphora.cc +++ b/src/anaphora.cc @@ -134,7 +134,7 @@ int main(int argc, char **argv) { int retval; - retval = score_module.add_word(gen_id, sl_form, sl_tags, tl_form); //Give word to Scoring Module + retval = score_module.add_word(gen_id, sl_form, sl_tags, tl_form, ref_file.get_parameters()); //Give word to Scoring Module //If retval is 0, nothing will be added in side ref //If retval is 1, we call get_antecedent() and add it to ref diff --git a/src/parse_ref.cc b/src/parse_ref.cc index 2ac43c0..c3b459f 100644 --- a/src/parse_ref.cc +++ b/src/parse_ref.cc @@ -16,8 +16,8 @@ void print_tags(vector< wstring > input) { for (int i = 0; i < input.size(); i++) { - wcout << input[i]; - cout << " "; + wcerr << input[i]; + cerr << " "; } } @@ -50,9 +50,9 @@ vector ParseRef::parseTags (wstring tags) if(!temptag.empty()) //if any tag remaining temp_tags_list.push_back(temptag); - print_tags(temp_tags_list); + //print_tags(temp_tags_list); - cout << "\n"; + //cerr << "\n"; return temp_tags_list; } @@ -70,7 +70,7 @@ void ParseRef::parseParameterItem (xmlDocPtr doc, xmlNodePtr cur, wstring parame { Attr = xmlGetProp(cur, (const xmlChar *)"tags"); - printf("ParameterItem: "); + //fprintf(stderr, "ParameterItem: "); temp_tags_list = parseTags(XMLParseUtil::towstring(Attr)); parameters[parameter_name].push_back(temp_tags_list); @@ -99,9 +99,9 @@ void ParseRef::parseParameters (xmlDocPtr doc, xmlNodePtr cur) { parameter_name = XMLParseUtil::towstring(cur->name); - cout << "\n"; - wcout << parameter_name; - cout << "\n"; + //cerr << "\n"; + //wcerr << parameter_name; + //cerr << "\n"; parseParameterItem(doc,cur,parameter_name); } @@ -123,7 +123,7 @@ void ParseRef::parseCatItem (xmlDocPtr doc, xmlNodePtr cur, wstring cat_name) if ((!xmlStrcmp(cur->name, (const xmlChar *)"cat-item"))) { Attr = xmlGetProp(cur, (const xmlChar *)"tags"); - printf("catItem: "); + //fprintf(stderr, "catItem: "); temp_tags_list = parseTags(XMLParseUtil::towstring(Attr)); cats[cat_name].push_back(temp_tags_list); @@ -150,7 +150,7 @@ void ParseRef::parseCats (xmlDocPtr doc, xmlNodePtr cur) if ((!xmlStrcmp(cur->name, (const xmlChar *)"def-cat"))) { Attr = xmlGetProp(cur, (const xmlChar *)"n"); - printf("catName: %s\n", Attr); + //fprintf(stderr, "catName: %s\n", Attr); parseCatItem(doc,cur, XMLParseUtil::towstring(Attr)); xmlFree(Attr); @@ -177,8 +177,8 @@ vector ParseRef::parsePatternItem (xmlDocPtr doc, xmlNodePtr c Attr = xmlGetProp(cur, (const xmlChar *)"n"); temp.name = XMLParseUtil::towstring(Attr); - wcout << temp.name; - cout << " "; + //wcerr << temp.name; + //cerr << " "; xmlFree(Attr); @@ -187,7 +187,7 @@ vector ParseRef::parsePatternItem (xmlDocPtr doc, xmlNodePtr c if(Attr != NULL) { temp.head = 1; - printf("[HEAD!]"); + //fprintf(stderr, "[HEAD!]"); } else temp.head = 0; @@ -209,8 +209,8 @@ void ParseRef::parsePatterns (xmlDocPtr doc, xmlNodePtr cur, wstring markable_na vector temp_pattern_list; - wcout << markable_name; - cout << "\n"; + //wcerr << markable_name; + //cerr << "\n"; while (cur != NULL) { @@ -222,7 +222,7 @@ void ParseRef::parsePatterns (xmlDocPtr doc, xmlNodePtr cur, wstring markable_na cur = cur->next; - cout << "\n"; + //cerr << "\n"; } return; } @@ -237,7 +237,7 @@ void ParseRef::parseMarkables (xmlDocPtr doc, xmlNodePtr cur) if ((!xmlStrcmp(cur->name, (const xmlChar *)"markable"))) { Attr = xmlGetProp(cur, (const xmlChar *)"n"); - printf("MarkableName: "); + //fprintf(stderr, "MarkableName: "); parsePatterns(doc,cur, XMLParseUtil::towstring(Attr)); @@ -303,6 +303,21 @@ void ParseRef::parseDoc(char *docname) return; } +unordered_map ParseRef::get_parameters() +{ + return parameters; +} + +unordered_map ParseRef::get_cats() +{ + return cats; +} + +unordered_map ParseRef::get_markables() +{ + return markables; +} + /* //Code for Testing int main(int argc, char **argv) { @@ -310,7 +325,7 @@ int main(int argc, char **argv) if (argc <= 1) { - printf("Usage: %s docname\n", argv[0]); + fprintf(stderr, "Usage: %s docname\n", argv[0]); return(0); } diff --git a/src/parse_ref.h b/src/parse_ref.h index 7a71b67..a1f8385 100644 --- a/src/parse_ref.h +++ b/src/parse_ref.h @@ -22,6 +22,8 @@ struct markable_pattern typedef vector< vector > acceptable_patterns; +void print_tags(vector< wstring > input); + class ParseRef { private: @@ -43,6 +45,11 @@ public: vector parsePatternItem (xmlDocPtr doc, xmlNodePtr cur); vector parseTags (wstring tags); + + unordered_map get_parameters(); + unordered_map get_cats(); + + unordered_map get_markables(); }; #endif \ No newline at end of file diff --git a/src/score.cc b/src/score.cc index 7b3962f..e4b5de8 100644 --- a/src/score.cc +++ b/src/score.cc @@ -1,4 +1,5 @@ #include "score.h" +#include "parse_ref.h" #include #include @@ -14,15 +15,15 @@ void showq(deque < vector > gq) { vector temp_sentence = *j; - cout << "\n"; + cerr << "\n"; for (std::vector::iterator i = temp_sentence.begin(); i != temp_sentence.end(); ++i) { - wcout << (*i).wordform; + wcerr << (*i).wordform; } - cout << "\n"; + cerr << "\n"; } - cout << '\n'; + cerr << '\n'; } void clearq(queue < vector > q) @@ -52,7 +53,44 @@ int contains_any(vector tags, vector candidates) return 0; //if no matches } -int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform) +int check_acceptable_tags(vector input_tags, acceptable_tags check_tags) +{ + for (acceptable_tags::iterator i = check_tags.begin(); i != check_tags.end(); ++i) + { + + int flag_contains_all = 1; + + vector temp_tags = *i; + + for(std::vector::iterator j = temp_tags.begin(); j != temp_tags.end(); ++j) + { + + if(*j == L"*") //ignore * in the tags list + continue; + + if(!contains(input_tags, *j)) //if the required tag is NOT in the input LU tags + { + flag_contains_all = 0; + break; + } + /* + else + { + cerr << "FoundTag:"; + wcerr << *j; + cerr <<"\n"; + } + */ + } + + if(flag_contains_all == 1) //if any tag list fully matched + return 1; //else continue to next tag list + } + + return 0; //if it didn't return 1 then no tag list was fully matched +} + +int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform, unordered_map ref_parameters) { unique_LU input_LU = {input_id, input_wordform, input_tl_wordform, input_pos_tags}; //initialise in context with score 0 @@ -63,7 +101,7 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst context.push_back(sentence); - if(contains(input_LU.pos_tags, L"sent")) //if sentence end (somehow the first LU is a sentence end) + if(check_acceptable_tags(input_LU.pos_tags, ref_parameters[L"delimiter"]) ) //if sentence end (somehow the first LU is a sentence end) { vector new_sentence; @@ -74,7 +112,7 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst { context.back().push_back(input_LU); //add word to the latest added sentence in the queue - if(contains(input_LU.pos_tags, L"sent")) + if(check_acceptable_tags(input_LU.pos_tags, ref_parameters[L"delimiter"]) ) { vector new_sentence; @@ -83,9 +121,9 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst if(context.size() > 4) context.pop_front(); //remove the earliest added sentence (We only want current and three previous sentences in context) } - else if( contains(input_LU.pos_tags, L"det") && contains(input_LU.pos_tags, L"pos") ) + else if( check_acceptable_tags(input_LU.pos_tags, ref_parameters[L"anaphor"]) ) //check if tags of current word match with anaphor tags in ref file { - apply_indicators(input_LU); + apply_indicators(input_LU, ref_parameters); return 1; //To show that something will be added in side ref } } @@ -93,7 +131,7 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst return 0; //To show that nothing will be added in side ref } -void Scoring::apply_indicators(unique_LU anaphor) +void Scoring::apply_indicators(unique_LU anaphor, unordered_map ref_parameters) { int distance_marker = 2; //starts from 2 for current sentence and reduces till -1 as we go to previous sentences int temp_score; @@ -108,7 +146,7 @@ void Scoring::apply_indicators(unique_LU anaphor) for (vector::iterator j = (*i).begin(); j!=(*i).end(); ++j) //read through sentence { - if(contains((*j).pos_tags, L"n")) + if(check_acceptable_tags((*j).pos_tags, ref_parameters[L"antecedent"]) ) // if it is antecedent (based on external xml file) { temp_score = 0; @@ -135,9 +173,9 @@ void Scoring::apply_indicators(unique_LU anaphor) } else { - cout << "\nAgreement Failed for:"; - wcout << antecedent_LU.wordform; - cout << "\n"; + cerr << "\nAgreement Failed for:"; + wcerr << antecedent_LU.wordform; + cerr << "\n"; } } } @@ -166,9 +204,9 @@ wstring Scoring::get_antecedent() for(vector::reverse_iterator it=antecedent_list.rbegin();it!=antecedent_list.rend();++it) //read it in reverse so that we read from furthest to nearest { - //cout << "\n" << (*it).LU.id << ": "; - //wcout << (*it).LU.wordform; - //cout << " : " << (*it).score << "\n"; + //cerr << "\n" << (*it).LU.id << ": "; + //wcerr << (*it).LU.wordform; + //cerr << " : " << (*it).score << "\n"; if((*it).score >= final_antecedent.score) //picking the highest scored and latest added (most recent) antecedent final_antecedent = (*it); diff --git a/src/score.h b/src/score.h index 4186363..5336af5 100644 --- a/src/score.h +++ b/src/score.h @@ -1,6 +1,8 @@ #ifndef _MITKOVSCORE_ #define _MITKOVSCORE_ +#include "parse_ref.h" + #include #include #include @@ -27,6 +29,8 @@ void showq(deque < vector > gq); int contains(vector tags, wstring tag); int contains_any(vector tags, vector candidates); +int check_acceptable_tags(vector input_tags, acceptable_tags check_tags); + class Scoring { private: @@ -34,8 +38,8 @@ private: vector antecedent_list; //A list of antecedents public: - int add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform); - void apply_indicators(unique_LU anaphor); + int add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform, unordered_map ref_parameters); + void apply_indicators(unique_LU anaphor, unordered_map ref_parameters); int check_agreement(vector antecedent_tags, vector anaphor_tags); wstring get_antecedent(); void clear();