commit 8697249752caffa95eb1cfed77cfbd7fbbe64790 Author: Tanmai Khanna Date: Mon Jul 1 17:32:58 2019 +0530 Added Scoring Capability | Implemented Referential Distance Boosting Indicator diff --git a/src/anaphora.cc b/src/anaphora.cc index 6f8fdf7..34e0cac 100644 --- a/src/anaphora.cc +++ b/src/anaphora.cc @@ -19,15 +19,15 @@ int main(int argc, char **argv) nullFlush = 1; } - vector< sal_score > scores; - sal_score unit; - char input_char; input_char = fgetc(stdin); //change to fgetwc ? wstring input_stream; - wstring last_noun; + + wstring final_ref; + Scoring score_module; + unsigned int gen_id = 0; wstring temp_form; vector< wstring > temp_tags; @@ -38,11 +38,11 @@ int main(int argc, char **argv) { if(nullFlush && input_char == '\0') //nullFlush { - scores.clear(); input_stream.clear(); - last_noun.clear(); temp_form.clear(); temp_tags.clear(); + gen_id = 0; + score_module.clear(); flag_LU = 0; } @@ -82,7 +82,9 @@ int main(int argc, char **argv) { if(input_char == '$') { - fprintf(stdout, "/"); //for extra LU + gen_id++; //generate ids for LUs + + fprintf(stdout, "/"); //for adding ref flag_LU = 0; ParseLexicalUnit LU(input_stream); //Parse Lexical Unit using parse_biltrans @@ -92,21 +94,18 @@ int main(int argc, char **argv) if(!temp_form.empty()) //if TL exists { - if(contains(temp_tags, L"n")) - /* if TL contains antecedent tag */ - { - last_noun = temp_form; - } + score_module.add_word(gen_id, temp_form, temp_tags); - if( (contains(temp_tags, L"det") && contains(temp_tags, L"pos") ) || contains(temp_tags, L"prn") || contains(temp_tags, L"vblex") || contains(temp_tags, L"vbser") || contains(temp_tags, L"vbhaver") || contains(temp_tags, L"vbmod") ) + if( (contains(temp_tags, L"det") && contains(temp_tags, L"pos") ) )//|| contains(temp_tags, L"prn") || contains(temp_tags, L"vblex") || contains(temp_tags, L"vbser") || contains(temp_tags, L"vbhaver") || contains(temp_tags, L"vbmod") ) /* if TL tags has det and pos OR just prn OR any verb*/ { - wcout << last_noun; //add last seen noun to LU //CHANGE + //cout << "\n\nHERE!!\n\n"; + final_ref = score_module.get_antecedent(); + wcout << final_ref; //add antecedent to side ref of LU } } input_stream.clear(); - } else { diff --git a/src/score.cc b/src/score.cc index 5970803..f59fcee 100644 --- a/src/score.cc +++ b/src/score.cc @@ -1,2 +1,77 @@ #include "score.h" +#include +#include +#include + +using namespace std; + +int contains(vector tags, wstring tag) +{ + if(std::find(tags.begin(), tags.end(), tag) != tags.end()) + return 1; + else + return 0; +} + +int contains_any(vector tags, vector candidates) +{ + for(vector::iterator it=candidates.begin();it!=candidates.end();++it) + { + if(std::find(tags.begin(), tags.end(), *it) != tags.end()) + return 1; //if any of the tags in candidates matches the tags list + } + + return 0; //if no matches +} + +void Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags) +{ + unique_LU input_LU = {input_id, input_wordform}; + context.push_back(input_LU); //add to context + + if(contains(pos_tags, L"n")) //if word is a noun, add to antecedents list with score=2 as it is in current context(referential distance) + { + antecedent input_antecedent = {input_id, input_wordform, 2}; + antecedent_list.push_back(input_antecedent); + } + + if(contains(pos_tags, L"sent")) //if reached sentence boundary, reduce scores (referential distance) + referential_distance(); +} + +void Scoring::referential_distance() +{ + for(vector::iterator it=antecedent_list.begin();it!=antecedent_list.end();++it) + { + if((*it).score > -2) //-2 is minimum score + (*it).score--; + } +} + +wstring Scoring::get_antecedent() +{ + antecedent final_antecedent = {0, L"", -5}; + + for(vector::iterator it=antecedent_list.begin();it!=antecedent_list.end();++it) + { + cout << "\n" << (*it).id << ": "; + wcout << (*it).wordform; + cout << " : " << (*it).score << "\n"; + + if((*it).score >= final_antecedent.score) //picking the highest scored and latest added (most recent) antecedent + { + final_antecedent.id = (*it).id; + final_antecedent.wordform = (*it).wordform; + final_antecedent.score = (*it).score; + } + } + + return final_antecedent.wordform; +} + +void Scoring::clear() +{ + context.clear(); + antecedent_list.clear(); +} \ No newline at end of file diff --git a/src/score.h b/src/score.h index 9dca34e..fce7404 100644 --- a/src/score.h +++ b/src/score.h @@ -4,6 +4,7 @@ #include #include +using namespace std; struct unique_LU { @@ -18,24 +19,8 @@ struct antecedent int score; }; -int contains(vector tags, wstring tag) -{ - if(std::find(tags.begin(), tags.end(), tag) != tags.end()) - return 1; - else - return 0; -} - -int contains_any(vector tags, vector candidates) -{ - for(vector::iterator it=candidates.begin();it!=candidates.end();++it) - { - if(std::find(tags.begin(), tags.end(), *it) != tags.end()) - return 1; //if any of the tags in candidates matches the tags list - } - - return 0; //if no matches -} +int contains(vector tags, wstring tag); +int contains_any(vector tags, vector candidates); class Scoring { @@ -44,19 +29,10 @@ private: vector antecedent_list; public: - void add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags) - { - unique_LU input_LU = {input_id, input_wordform}; - context.push_back(input_LU); //add to context - - if(contains(pos_tags, L"n")) //if word is a noun, add to antecedents list with score=0 - { - antecedent input_antecedent = {input_id, input_wordform, 0}; - } - } - - void apply_indicators(); //Need to first understand what each indicator needs and how to apply - + void add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags); + void referential_distance(); + wstring get_antecedent(); + void clear(); };