commit 215995037fb19d6101c0d1a5efd581116cc238b2 Author: Tanmai Khanna Date: Wed Jun 17 18:20:09 2020 +0530 fix whitespace | remove unnecessary comments, debugging remnants diff --git a/src/anaphora.cc b/src/anaphora.cc index d690191..66366d0 100644 --- a/src/anaphora.cc +++ b/src/anaphora.cc @@ -66,10 +66,6 @@ void help_message(char *progname) wcerr << "USAGE: " << basename(progname) << " arx_file [input [output]]" << endl; wcerr << " " << basename(progname) << " -z arx_file [input [output]]" << endl; wcerr << " arx_file Anaphora Resolution rules file (apertium-xxx-yyy.xxx-yyy.arx)" << endl; - - //wcerr << " input input file, standard input by default" << endl; - //wcerr << " output output file, standard output by default" << endl; - wcerr << " -z null-flushing output on \\0" << endl; wcerr << " -h shows this message" << endl; @@ -77,7 +73,6 @@ void help_message(char *progname) } - int main(int argc, char **argv) { int debug_flag = 0; //flag set by --debug @@ -188,7 +183,7 @@ int main(int argc, char **argv) while(input_char!=EOF) { - if(nullFlush && input_char == L'\0') //nullFlush + if(nullFlush && input_char == L'\0') { fputwc(input_char, output); fflush(output); @@ -208,9 +203,9 @@ int main(int argc, char **argv) flag_LU = 0; } - else if(input_char == L'\\') //dealing with escaped characters + else if(input_char == L'\\') { - if(flag_LU == 0) // not inside LU + if(flag_LU == 0) { fputwc(input_char, output); @@ -218,7 +213,7 @@ int main(int argc, char **argv) fputwc(input_char, output); } - else //inside LU + else { input_stream.push_back(input_char); fputwc(input_char, output); @@ -231,7 +226,7 @@ int main(int argc, char **argv) } else { - if(flag_LU == 0) //Not Part of an LU + if(flag_LU == 0) { fputwc(input_char, output); @@ -239,17 +234,17 @@ int main(int argc, char **argv) flag_LU = 1; } - else if(flag_LU == 1) //Part of an LU + else if(flag_LU == 1) { if(input_char == L'$') { - gen_id++; //generate ids for LUs + gen_id++; fputwc(L'/', output); //for adding ref flag_LU = 0; - ParseLexicalUnit LU(input_stream); //Parse Lexical Unit using parse_biltrans + ParseLexicalUnit LU(input_stream); tl_form = LU.get_tl_form(); tl_tags = LU.get_tl_tags(); @@ -258,11 +253,11 @@ int main(int argc, char **argv) sl_lemma = LU.get_sl_lemma(); tl_lemma = LU.get_tl_lemma(); - if(!tl_form.empty()) //if TL exists + if(!tl_form.empty()) { int retval; - retval = score_module.add_word(gen_id, sl_form, sl_tags, tl_form, sl_lemma, tl_lemma, arx_file, debug_flag); //Give word to Scoring Module + retval = score_module.add_word(gen_id, sl_form, sl_tags, tl_form, sl_lemma, tl_lemma, arx_file, debug_flag); //If retval is 0, nothing will be added in side ref //If retval is 1, we call get_antecedent() and add it to ref @@ -270,7 +265,7 @@ int main(int argc, char **argv) { final_ref = score_module.get_antecedent(debug_flag); - fputws(final_ref.c_str(), output); //add antecedent to side ref of LU + fputws(final_ref.c_str(), output); } } @@ -293,4 +288,3 @@ int main(int argc, char **argv) return 0; } - diff --git a/src/parse_arx.cc b/src/parse_arx.cc index 174e228..3d96172 100644 --- a/src/parse_arx.cc +++ b/src/parse_arx.cc @@ -28,7 +28,7 @@ #include #include -void print_tags(vector input) //testing function +void print_tags(vector input) { for (size_t i = 0; i < input.size(); ++i) { @@ -45,7 +45,7 @@ vector ParseArx::parseTags (wstring tags) for (std::wstring::iterator i = tags.begin(); i != tags.end(); ++i) { - if(*i == '\\') //dealing with escaped characters + if(*i == '\\') { temptag.push_back(*i); ++i; @@ -62,7 +62,7 @@ vector ParseArx::parseTags (wstring tags) } } - if(!temptag.empty()) //if any tag remaining + if(!temptag.empty()) temp_tags_list.push_back(temptag); return temp_tags_list; @@ -78,34 +78,34 @@ void ParseArx::parseParameterItem (xmlDocPtr doc, xmlNodePtr cur, wstring parame while (cur != NULL) { - temp_item.has_tags.clear(); - temp_item.exclude_tags.clear(); - temp_item.lemma.clear(); - - if ((!xmlStrcmp(cur->name, (const xmlChar *)"parameter-item"))) + temp_item.has_tags.clear(); + temp_item.exclude_tags.clear(); + temp_item.lemma.clear(); + + if ((!xmlStrcmp(cur->name, (const xmlChar *)"parameter-item"))) + { + Attr = xmlGetProp(cur, (const xmlChar *)"has-tags"); + if (Attr) + { + temp_item.has_tags = parseTags(XMLParseUtil::towstring(Attr)); + } + + Attr = xmlGetProp(cur, (const xmlChar *)"exclude-tags"); + if (Attr) { - Attr = xmlGetProp(cur, (const xmlChar *)"has-tags"); - if (Attr) - { - temp_item.has_tags = parseTags(XMLParseUtil::towstring(Attr)); - } - - Attr = xmlGetProp(cur, (const xmlChar *)"exclude-tags"); - if (Attr) - { - temp_item.exclude_tags = parseTags(XMLParseUtil::towstring(Attr)); - } - - Attr = xmlGetProp(cur, (const xmlChar *)"lemma"); - if (Attr) - { - temp_item.lemma = XMLParseUtil::towstring(Attr); - } - - parameters[parameter_type][parameter_name].push_back(temp_item); - - xmlFree(Attr); - } + temp_item.exclude_tags = parseTags(XMLParseUtil::towstring(Attr)); + } + + Attr = xmlGetProp(cur, (const xmlChar *)"lemma"); + if (Attr) + { + temp_item.lemma = XMLParseUtil::towstring(Attr); + } + + parameters[parameter_type][parameter_name].push_back(temp_item); + + xmlFree(Attr); + } cur = cur->next; } @@ -140,20 +140,19 @@ void ParseArx::parseParameters (xmlDocPtr doc, xmlNodePtr cur) while (cur != NULL) { - if ((!xmlStrcmp(cur->name, (const xmlChar *)"def-parameter"))) - { - parameter_name = xmlGetProp(cur, (const xmlChar *)"n"); - //fprintf(stderr, "catName: %s\n", Attr); + if ((!xmlStrcmp(cur->name, (const xmlChar *)"def-parameter"))) + { + parameter_name = xmlGetProp(cur, (const xmlChar *)"n"); - parseParameterTypes(doc,cur, XMLParseUtil::towstring(parameter_name)); - xmlFree(parameter_name); - } - else if ((!xmlStrcmp(cur->name, (const xmlChar *)"delimiter"))) - { - parameter_type = XMLParseUtil::towstring(cur->name); + parseParameterTypes(doc,cur, XMLParseUtil::towstring(parameter_name)); + xmlFree(parameter_name); + } + else if ((!xmlStrcmp(cur->name, (const xmlChar *)"delimiter"))) + { + parameter_type = XMLParseUtil::towstring(cur->name); - parseParameterItem(doc, cur, parameter_type, L"default"); - } + parseParameterItem(doc, cur, parameter_type, L"default"); + } cur = cur->next; } @@ -169,34 +168,34 @@ void ParseArx::parseCatItem (xmlDocPtr doc, xmlNodePtr cur, wstring cat_name) while (cur != NULL) { - temp_item.has_tags.clear(); - temp_item.exclude_tags.clear(); - temp_item.lemma.clear(); - - if ((!xmlStrcmp(cur->name, (const xmlChar *)"cat-item"))) + temp_item.has_tags.clear(); + temp_item.exclude_tags.clear(); + temp_item.lemma.clear(); + + if ((!xmlStrcmp(cur->name, (const xmlChar *)"cat-item"))) + { + Attr = xmlGetProp(cur, (const xmlChar *)"has-tags"); + if (Attr) + { + temp_item.has_tags = parseTags(XMLParseUtil::towstring(Attr)); + } + + Attr = xmlGetProp(cur, (const xmlChar *)"exclude-tags"); + if (Attr) { - Attr = xmlGetProp(cur, (const xmlChar *)"has-tags"); - if (Attr) - { - temp_item.has_tags = parseTags(XMLParseUtil::towstring(Attr)); - } - - Attr = xmlGetProp(cur, (const xmlChar *)"exclude-tags"); - if (Attr) - { - temp_item.exclude_tags = parseTags(XMLParseUtil::towstring(Attr)); - } - - Attr = xmlGetProp(cur, (const xmlChar *)"lemma"); - if (Attr) - { - temp_item.lemma = XMLParseUtil::towstring(Attr); - } - - cats[cat_name].push_back(temp_item); - - xmlFree(Attr); - } + temp_item.exclude_tags = parseTags(XMLParseUtil::towstring(Attr)); + } + + Attr = xmlGetProp(cur, (const xmlChar *)"lemma"); + if (Attr) + { + temp_item.lemma = XMLParseUtil::towstring(Attr); + } + + cats[cat_name].push_back(temp_item); + + xmlFree(Attr); + } cur = cur->next; } @@ -210,14 +209,13 @@ void ParseArx::parseCats (xmlDocPtr doc, xmlNodePtr cur) while (cur != NULL) { - if ((!xmlStrcmp(cur->name, (const xmlChar *)"def-cat"))) - { - Attr = xmlGetProp(cur, (const xmlChar *)"n"); - //fprintf(stderr, "catName: %s\n", Attr); + if ((!xmlStrcmp(cur->name, (const xmlChar *)"def-cat"))) + { + Attr = xmlGetProp(cur, (const xmlChar *)"n"); - parseCatItem(doc,cur, XMLParseUtil::towstring(Attr)); - xmlFree(Attr); - } + parseCatItem(doc,cur, XMLParseUtil::towstring(Attr)); + xmlFree(Attr); + } cur = cur->next; } @@ -233,32 +231,28 @@ vector ParseArx::parsePatternItem (xmlDocPtr doc, xmlNodePtr c while (cur != NULL) { - if ((!xmlStrcmp(cur->name, (const xmlChar *)"pattern-item"))) - { - markable_pattern temp; - - Attr = xmlGetProp(cur, (const xmlChar *)"n"); - temp.name = XMLParseUtil::towstring(Attr); + if ((!xmlStrcmp(cur->name, (const xmlChar *)"pattern-item"))) + { + markable_pattern temp; - //wcerr << temp.name; - //cerr << " "; + Attr = xmlGetProp(cur, (const xmlChar *)"n"); + temp.name = XMLParseUtil::towstring(Attr); - xmlFree(Attr); + xmlFree(Attr); - Attr = xmlGetProp(cur, (const xmlChar *)"head"); + Attr = xmlGetProp(cur, (const xmlChar *)"head"); - if(Attr != NULL) - { - temp.head = 1; - //fprintf(stderr, "[HEAD!]"); - } - else - temp.head = 0; + if(Attr != NULL) + { + temp.head = 1; + } + else + temp.head = 0; - xmlFree(Attr); + xmlFree(Attr); - temp_pattern.push_back(temp); - } + temp_pattern.push_back(temp); + } cur = cur->next; } @@ -272,37 +266,34 @@ void ParseArx::parsePatterns (xmlDocPtr doc, xmlNodePtr cur, wstring markable_na cur = cur->xmlChildrenNode; - //wcerr << markable_name; - //cerr << "\n"; - while (cur != NULL) { - if ((!xmlStrcmp(cur->name, (const xmlChar *)"pattern"))) - { - vector temp_pattern = parsePatternItem(doc,cur); + if ((!xmlStrcmp(cur->name, (const xmlChar *)"pattern"))) + { + vector temp_pattern = parsePatternItem(doc,cur); - markables[markable_name].push_back(temp_pattern); - } + markables[markable_name].push_back(temp_pattern); + } - else if ((!xmlStrcmp(cur->name, (const xmlChar *)"score"))) - { - Attr = xmlGetProp(cur, (const xmlChar *)"n"); + else if ((!xmlStrcmp(cur->name, (const xmlChar *)"score"))) + { + Attr = xmlGetProp(cur, (const xmlChar *)"n"); - wstring score_ws = XMLParseUtil::towstring(Attr); - int score_int = std::stoi(score_ws); + wstring score_ws = XMLParseUtil::towstring(Attr); + int score_int = std::stoi(score_ws); - xmlChar *parameter_name = xmlGetProp(cur, (const xmlChar *)"parameter"); + xmlChar *parameter_name = xmlGetProp(cur, (const xmlChar *)"parameter"); - if (parameter_name) - { - wstring parameter_name_ws = XMLParseUtil::towstring(parameter_name); - parameter_markables_score[parameter_name_ws][markable_name] = score_int; - } - else - { - all_markables_score[markable_name] = score_int; - } - } + if (parameter_name) + { + wstring parameter_name_ws = XMLParseUtil::towstring(parameter_name); + parameter_markables_score[parameter_name_ws][markable_name] = score_int; + } + else + { + all_markables_score[markable_name] = score_int; + } + } cur = cur->next; } @@ -316,15 +307,14 @@ void ParseArx::parseMarkables (xmlDocPtr doc, xmlNodePtr cur) while (cur != NULL) { - if ((!xmlStrcmp(cur->name, (const xmlChar *)"markable"))) - { - Attr = xmlGetProp(cur, (const xmlChar *)"n"); - //fprintf(stderr, "MarkableName: "); + if ((!xmlStrcmp(cur->name, (const xmlChar *)"markable"))) + { + Attr = xmlGetProp(cur, (const xmlChar *)"n"); - parsePatterns(doc,cur, XMLParseUtil::towstring(Attr)); + parsePatterns(doc,cur, XMLParseUtil::towstring(Attr)); - xmlFree(Attr); - } + xmlFree(Attr); + } cur = cur->next; } @@ -341,7 +331,7 @@ int ParseArx::parseDoc(char *docname) if (doc == NULL ) { fprintf(stderr,"Document not parsed successfully. \n"); - return -1; //return error + return -1; } cur = xmlDocGetRootElement(doc); @@ -350,14 +340,14 @@ int ParseArx::parseDoc(char *docname) { fprintf(stderr,"Empty Document!\n"); xmlFreeDoc(doc); - return 1; //return error + return 1; } if (xmlStrcmp(cur->name, (const xmlChar *) "ref")) { fprintf(stderr,"Document of the wrong type! Root node should be ref.\n"); xmlFreeDoc(doc); - return 2; //return error + return 2; } cur = cur->xmlChildrenNode; @@ -409,24 +399,3 @@ unordered_map ParseArx::get_parameter_markables_score(wstring para { return parameter_markables_score[parameter_name]; } - -/* //Code for Testing -int main(int argc, char **argv) -{ - char *docname; - - if (argc <= 1) - { - fprintf(stderr, "Usage: %s docname\n", argv[0]); - return(0); - } - - docname = argv[1]; - - ParseArx ref; - - ref.parseDoc(docname); - - return (1); -} -*/ diff --git a/src/parse_biltrans.cc b/src/parse_biltrans.cc index 4865ec8..bea0462 100644 --- a/src/parse_biltrans.cc +++ b/src/parse_biltrans.cc @@ -33,11 +33,11 @@ ParseLexicalUnit::ParseLexicalUnit(wstring input_LU) for (std::wstring::iterator i = input_LU.begin(); i != input_LU.end(); ++i) { - if(*i == L'\\') //dealing with escaped characters + if(*i == L'\\') { - if(seenSlash == 0) //sl + if(seenSlash == 0) { - if(seenTag == 1) //in a tag + if(seenTag == 1) { temptag.push_back(*i); sl_form.push_back(*i); @@ -45,7 +45,7 @@ ParseLexicalUnit::ParseLexicalUnit(wstring input_LU) temptag.push_back(*i); sl_form.push_back(*i); } - else //not in a tag + else { sl_form.push_back(*i); sl_lemma.push_back(*i); @@ -56,7 +56,7 @@ ParseLexicalUnit::ParseLexicalUnit(wstring input_LU) } else if(seenSlash == 1) //tl (only first entry) { - if(seenTag == 1) //in a tag + if(seenTag == 1) { temptag.push_back(*i); tl_form.push_back(*i); @@ -64,7 +64,7 @@ ParseLexicalUnit::ParseLexicalUnit(wstring input_LU) temptag.push_back(*i); tl_form.push_back(*i); } - else //not in a tag + else { tl_form.push_back(*i); tl_lemma.push_back(*i); @@ -82,25 +82,25 @@ ParseLexicalUnit::ParseLexicalUnit(wstring input_LU) else if(*i == L'/') seenSlash++; - else if(seenSlash == 0) //sl + else if(seenSlash == 0) { - sl_form.push_back(*i); //add to the sl form + sl_form.push_back(*i); - if(*i == L'<') //start reading tag + if(*i == L'<') seenTag++; - else if(seenTag == 1) //inside a tag + else if(seenTag == 1) { - if(*i == L'>') //if tag ends + if(*i == L'>') { seenTag--; - sl_tags.push_back(temptag); //add tag to list of sl tags + sl_tags.push_back(temptag); temptag.clear(); } else { - temptag.push_back(*i); //add char to current tag + temptag.push_back(*i); } } @@ -110,25 +110,25 @@ ParseLexicalUnit::ParseLexicalUnit(wstring input_LU) } } - else if(seenSlash == 1) //tl (only first entry in tl) + else if(seenSlash == 1) { - tl_form.push_back(*i); //add to the tl form + tl_form.push_back(*i); - if(*i == L'<') //start reading tag + if(*i == L'<') seenTag++; - else if(seenTag == 1) //inside a tag + else if(seenTag == 1) { - if(*i == L'>') //if tag ends + if(*i == L'>') { seenTag--; - tl_tags.push_back(temptag); //add tag to list of tl tags + tl_tags.push_back(temptag); temptag.clear(); } else { - temptag.push_back(*i); //add char to current tag + temptag.push_back(*i); } } @@ -138,7 +138,7 @@ ParseLexicalUnit::ParseLexicalUnit(wstring input_LU) } } - else //if tl has more than one entry + else { break; } @@ -174,44 +174,3 @@ wstring ParseLexicalUnit::get_tl_lemma() { return tl_lemma; } - -/* //Uncomment to test this code - -void print_tags(vector< wstring > input) -{ - for (int i = 0; i < input.size(); i++) - { - wcout << input[i]; - cout << " "; - } -} - -int main() -{ - wstring inputlu; - char input_char; - - input_char = fgetc(stdin); - - while(input_char != '\n') - { - inputlu.push_back(input_char); - - input_char = fgetc(stdin); - } - - ParseLexicalUnit lu(inputlu); - - cout << "SL: "; - wcout << lu.get_sl_form(); - cout << endl << "SL tags: "; - print_tags(lu.get_sl_tags()); - cout << endl << "TL: "; - wcout << lu.get_tl_form(); - cout << endl << "TL tags: "; - print_tags(lu.get_tl_tags()); - cout << endl; - - return 0; -} -*/ diff --git a/src/pattern_arx.cc b/src/pattern_arx.cc index 8b89b9d..64edb64 100644 --- a/src/pattern_arx.cc +++ b/src/pattern_arx.cc @@ -28,7 +28,7 @@ using namespace std; -void print_markable(acceptable_patterns inp) //testing function +void print_markable(acceptable_patterns inp) { for(acceptable_patterns::iterator i = inp.begin(); i != inp.end(); i++) { @@ -55,10 +55,10 @@ int contains_any(vector tags, vector candidates) for(vector::iterator it=candidates.begin();it!=candidates.end();++it) { if(std::find(tags.begin(), tags.end(), *it) != tags.end()) - return 1; //if any of the tags in candidates matches the tags list + return 1; } - return 0; //if no matches + return 0; } void toLower(basic_string& s) @@ -79,7 +79,7 @@ int check_acceptable_tags(vector input_tags, wstring input_sl_lemma, ac vector temp_tags = i->has_tags; vector temp_exclude_tags = i->exclude_tags; - for(std::vector::iterator j = temp_tags.begin(); j != temp_tags.end(); ++j) //check for the tags in has-tags + for(std::vector::iterator j = temp_tags.begin(); j != temp_tags.end(); ++j) { if(*j == L"*") //ignore * in the tags list continue; @@ -96,9 +96,9 @@ int check_acceptable_tags(vector input_tags, wstring input_sl_lemma, ac continue; } - for(std::vector::iterator j = temp_exclude_tags.begin(); j != temp_exclude_tags.end(); ++j) //check for the tags in exclude-tags + for(std::vector::iterator j = temp_exclude_tags.begin(); j != temp_exclude_tags.end(); ++j) { - if(contains(input_tags, *j)) //if the exclude-tag IS in the input LU tags + if(contains(input_tags, *j)) { flag_contains_all = 0; break; @@ -137,17 +137,16 @@ int check_acceptable_tags(vector input_tags, wstring input_sl_lemma, ac { continue; } - else //if any tag list fully matched (i.e. has-tags present, exclude-tags absent) + else { return 1; } } - return 0; //if it didn't return 1 then no tag list was fully matched + return 0; } parameter_return check_pattern_name(vector input_tags, wstring input_sl_lemma, unordered_map parameter_names) -//find out if any of the anaphors match wrt tags, and if yes, return the unique name { parameter_return retval; retval.found = 0; @@ -175,14 +174,12 @@ deque< vector > add_properties(deque< vector > context, Pa unordered_map arx_markables = arx_file.get_markables(); unordered_map arx_cats = arx_file.get_cats(); - for (unordered_map::iterator it = arx_markables.begin(); it != arx_markables.end(); it++ ) //go through markables defined in xml file + for (unordered_map::iterator it = arx_markables.begin(); it != arx_markables.end(); it++ ) { //for each markable wstring markable_name = it->first; acceptable_patterns patterns_list = it->second; - //print_markable(patterns_list); - for(acceptable_patterns::iterator i = patterns_list.begin(); i!=patterns_list.end(); ++i) //go through patterns in the markable { //for each pattern @@ -200,9 +197,7 @@ deque< vector > add_properties(deque< vector > context, Pa for(size_t x = 0; x < len_pattern; ++x) { - //this is the window -- check if pattern matches - - acceptable_tags pattern_item_tags = arx_cats[current_pattern[x].name]; //get pattern item tags from def-cats + acceptable_tags pattern_item_tags = arx_cats[current_pattern[x].name]; if(check_acceptable_tags((*(n+x)).pos_tags, (*(n+x)).sl_lemma, pattern_item_tags)) //comparing current LU tags to pattern tags and lemma { @@ -214,28 +209,19 @@ deque< vector > add_properties(deque< vector > context, Pa match_flag = 0; break; } - - //wcerr << (*(n+x)).wordform; } - if(match_flag == 1) //if the entire pattern matched + if(match_flag == 1) { //Add Property to the LUs - /* - cerr << "\n"; - wcerr << markable_name; - cerr << " Pattern Matched at: "; - wcerr << (*n).wordform; - cerr << "\n"; - */ for(size_t x = 0; x < len_pattern; ++x) { - ((*(n+x)).properties).push_back(markable_name); //add markable name to properties + ((*(n+x)).properties).push_back(markable_name); if(current_pattern[x].head == 1) { - ((*(n+x)).properties).push_back(L"head"); // add "head" to properties + ((*(n+x)).properties).push_back(L"head"); // } } @@ -247,4 +233,3 @@ deque< vector > add_properties(deque< vector > context, Pa return context; } - diff --git a/src/score.cc b/src/score.cc index 2724383..bd307c1 100644 --- a/src/score.cc +++ b/src/score.cc @@ -28,38 +28,38 @@ using namespace std; -void showq(deque < vector > gq) //to display context if needed (testing function) -can be added to debug +void showq(deque < vector > gq) { - for(std::deque < vector >::iterator j = gq.begin(); j != gq.end(); ++j) - { - vector temp_sentence = *j; - - cerr << "\n"; - for (std::vector::iterator i = temp_sentence.begin(); i != temp_sentence.end(); ++i) - { - wcerr << (*i).tl_wordform; + for(std::deque < vector >::iterator j = gq.begin(); j != gq.end(); ++j) + { + vector temp_sentence = *j; - for (std::vector::iterator k = (*i).pos_tags.begin(); k != (*i).pos_tags.end(); ++k) - { - cerr << "<"; - wcerr << (*k); - cerr << ">"; - } + cerr << "\n"; + for (std::vector::iterator i = temp_sentence.begin(); i != temp_sentence.end(); ++i) + { + wcerr << (*i).tl_wordform; - cerr << ":"; + for (std::vector::iterator k = (*i).pos_tags.begin(); k != (*i).pos_tags.end(); ++k) + { + cerr << "<"; + wcerr << (*k); + cerr << ">"; + } - for (std::vector::iterator l = (*i).properties.begin(); l != (*i).properties.end(); ++l) - { - cerr << " "; - wcerr << (*l); - } + cerr << ":"; - cerr << "\t"; - } + for (std::vector::iterator l = (*i).properties.begin(); l != (*i).properties.end(); ++l) + { + cerr << " "; + wcerr << (*l); + } - cerr << "\n"; + cerr << "\t"; } - cerr << '\n'; + + cerr << "\n"; + } + cerr << '\n'; } int Scoring::add_word(int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform, wstring input_sl_lemma, wstring input_tl_lemma, ParseArx arx_file, int debug_flag) @@ -67,56 +67,55 @@ int Scoring::add_word(int input_id, wstring input_wordform, vector< wstring > in vector temp_prop; parameters_datatype arx_parameters = arx_file.get_parameters(); - unique_LU input_LU = {input_id, input_wordform, input_tl_wordform, input_sl_lemma, input_tl_lemma, input_pos_tags, temp_prop}; //initialise LU + unique_LU input_LU = {input_id, input_wordform, input_tl_wordform, input_sl_lemma, input_tl_lemma, input_pos_tags, temp_prop}; - if(context.empty()) //if queue is empty + if(context.empty()) { - vector sentence; //initialise a sentence - sentence.push_back(input_LU); //add the first word to the sentence + vector sentence; + sentence.push_back(input_LU); context.push_back(sentence); - if(check_acceptable_tags(input_LU.pos_tags, input_LU.sl_lemma, arx_parameters[L"delimiter"][L"default"]) ) //if sentence end (somehow the first LU is a sentence end) + if(check_acceptable_tags(input_LU.pos_tags, input_LU.sl_lemma, arx_parameters[L"delimiter"][L"default"]) ) { vector new_sentence; - context.push_back(new_sentence); //add an empty sentence + context.push_back(new_sentence); } } - else //if queue is not empty + else { if(check_acceptable_tags(input_LU.pos_tags, input_LU.sl_lemma, arx_parameters[L"delimiter"][L"default"])) { - context.back().push_back(input_LU); //add to context so that it can also be matched in a pattern + context.back().push_back(input_LU); vector new_sentence; - context.push_back(new_sentence); //add an empty sentence + context.push_back(new_sentence); if(context.size() > 4) - context.pop_front(); //remove the earliest added sentence (We only want current and three previous sentences in context) + context.pop_front(); } else { parameter_return retval = check_pattern_name(input_LU.pos_tags, input_LU.sl_lemma, arx_parameters[L"anaphor"]); - if(retval.found == 1) //check if tags,lemma of current word match with anaphor tags in arx file + if(retval.found == 1) //check if tags,lemma of current word match with anaphor in arx file { unique_LU anaphor_LU = input_LU; vector temp_pos_tags = anaphor_LU.pos_tags; - temp_pos_tags.push_back(L"anaphor"); //add the tag to the anaphor pos tags - anaphor_LU.pos_tags = temp_pos_tags; //add the modified pos tags to the LU + temp_pos_tags.push_back(L"anaphor"); + anaphor_LU.pos_tags = temp_pos_tags; - context.back().push_back(anaphor_LU); //add modified anaphor LU to the context + context.back().push_back(anaphor_LU); apply_indicators(anaphor_LU, arx_file, retval.parameter_name, debug_flag); - context.back().pop_back(); //remove modified anaphor LU now that scoring is done - context.back().push_back(input_LU); //add normal LU to the context (so that anaphor tag doesn't remain in context) - //NOTE: tag is only for CURRENT anaphor + context.back().pop_back(); + context.back().push_back(input_LU); return 1; //To show that something will be added in side ref } @@ -128,23 +127,21 @@ int Scoring::add_word(int input_id, wstring input_wordform, vector< wstring > in } - return 0; //To show that nothing will be added in side ref + return 0; } void Scoring::apply_indicators(unique_LU anaphor, ParseArx arx_file, wstring parameter_name, int debug_flag) { int distance_marker = 2; //starts from 2 for current sentence and reduces till -1 as we go to previous sentences int temp_score; - int firstNP; //first NP flag + int firstNP; - antecedent_list.clear(); //clear it from the last anaphor + antecedent_list.clear(); - //Go through the context and add properties based on external file - deque< vector > context_with_prop = add_properties(context, arx_file); //dont add properties in the actual context (might wanna change) + deque< vector > context_with_prop = add_properties(context, arx_file); distance_marker = distance_marker - context_with_prop.size() + 1; //set distance to earliest sentence based on number of sentences in context - //Get scores for markables in a variable unordered_map all_markables_score = arx_file.get_all_markables_score(); unordered_map parameter_markables_score = arx_file.get_parameter_markables_score(parameter_name); @@ -158,11 +155,11 @@ void Scoring::apply_indicators(unique_LU anaphor, ParseArx arx_file, wstring par } //Start going through sentences(earliest to current) and apply all indicators to modify scores of the NPs - for(deque< vector >::iterator i = context_with_prop.begin(); i!=context_with_prop.end(); ++i) //read through the queue in reverse + for(deque< vector >::iterator i = context_with_prop.begin(); i!=context_with_prop.end(); ++i) { - firstNP = 1; //firstNP flag true + firstNP = 1; - for (vector::iterator j = (*i).begin(); j!=(*i).end(); ++j) //read through sentence + for (vector::iterator j = (*i).begin(); j!=(*i).end(); ++j) { if(debug_flag) { @@ -173,13 +170,12 @@ void Scoring::apply_indicators(unique_LU anaphor, ParseArx arx_file, wstring par cerr << "\n"; } - if(check_acceptable_tags((*j).pos_tags, (*j).sl_lemma, arx_file.get_parameters()[L"antecedent"][parameter_name]) ) // if it is antecedent (based on external xml file) + if(check_acceptable_tags((*j).pos_tags, (*j).sl_lemma, arx_file.get_parameters()[L"antecedent"][parameter_name])) { temp_score = 0; - unique_LU antecedent_LU = *j; //create a temp copy of the potential antecedent + unique_LU antecedent_LU = *j; - //Check Agreement if(check_agreement(antecedent_LU.pos_tags, anaphor.pos_tags)) { //Add or Remove Indicators Here @@ -188,7 +184,7 @@ void Scoring::apply_indicators(unique_LU anaphor, ParseArx arx_file, wstring par //Boosting Indicators if(firstNP) { - temp_score += 1; //First NP + temp_score += 1; firstNP = 0; } @@ -197,30 +193,21 @@ void Scoring::apply_indicators(unique_LU anaphor, ParseArx arx_file, wstring par //Indicators from XML file (iterate through all markables that provided a score without mentioning parameter_name) for(unordered_map::iterator x = all_markables_score.begin(); x != all_markables_score.end(); ++x) { - //cout << "Checking for: "; - //wcout << x->first; - //cout << "\n"; - - if(contains(antecedent_LU.properties, x->first)) //if markable name present in current antecedent + if(contains(antecedent_LU.properties, x->first)) { - temp_score += x->second; //Add score to the temp score (could be negative also) + temp_score += x->second; } } //Now get the scores from the markables that mentioned this specific parameter name for(unordered_map::iterator x = parameter_markables_score.begin(); x != parameter_markables_score.end(); ++x) { - //cout << "Checking for: "; - //wcout << x->first; - //cout << "\n"; - - if(contains(antecedent_LU.properties, x->first)) //if markable name present in current antecedent + if(contains(antecedent_LU.properties, x->first)) { - temp_score += x->second; //Add score to the temp score (could be negative also) + temp_score += x->second; } } - //Add to Antecedent List with Score antecedent antecedent_with_score = {antecedent_LU, temp_score}; antecedent_list.push_back(antecedent_with_score); } @@ -271,7 +258,7 @@ wstring Scoring::get_antecedent(int debug_flag) cerr << " : " << (*it).score << "\n"; } - if((*it).score >= final_antecedent.score) //picking the highest scored and latest added (most recent) antecedent + if((*it).score >= final_antecedent.score) final_antecedent = (*it); } @@ -289,8 +276,8 @@ wstring Scoring::get_antecedent(int debug_flag) return final_antecedent.LU.tl_wordform; } -void Scoring::clear() //use a destructor? +void Scoring::clear() { - context.clear(); //empty queue - antecedent_list.clear(); //empty antecedent list + context.clear(); + antecedent_list.clear(); } diff --git a/src/score.h b/src/score.h index f08d189..3f70621 100644 --- a/src/score.h +++ b/src/score.h @@ -35,7 +35,7 @@ class Scoring { private: deque< vector > context; //A queue of sentences. Each sentence is a vector of Lexical Units - vector antecedent_list; //A list of antecedents + vector antecedent_list; public: int add_word(int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform, wstring input_sl_lemma, wstring input_tl_lemma, ParseArx arx_file, int debug_flag);