commit 470ac71436255e9791782e4ec7dee1d07e3ae517 Author: Tanmai Khanna Date: Sat Jul 6 02:37:06 2019 +0530 Implemented Referential Distance | Code to return highest indicator | Boilerplate for other Antecedent Indicators diff --git a/src/anaphora.cc b/src/anaphora.cc index d0ed191..8ac52c0 100644 --- a/src/anaphora.cc +++ b/src/anaphora.cc @@ -102,16 +102,17 @@ int main(int argc, char **argv) if(!tl_form.empty()) //if TL exists { - score_module.add_word(gen_id, sl_form, sl_tags, tl_form); + int retval; - /* - if( (contains(sl_tags, L"det") && contains(sl_tags, L"pos") ) )//|| contains(temp_tags, L"prn") || contains(temp_tags, L"vblex") || contains(temp_tags, L"vbser") || contains(temp_tags, L"vbhaver") || contains(temp_tags, L"vbmod") ) - //if TL tags has det and pos OR just prn OR any verb + retval = score_module.add_word(gen_id, sl_form, sl_tags, tl_form); //Give word to Scoring Module + //If retval is 0, nothing will be added in side ref + + //If retval is 1, we call get_antecedent() and add it to ref + if(retval == 1) { final_ref = score_module.get_antecedent(); wcout << final_ref; //add antecedent to side ref of LU //CHANGE } - */ } input_stream.clear(); diff --git a/src/score.cc b/src/score.cc index 4203db2..aa5d68f 100644 --- a/src/score.cc +++ b/src/score.cc @@ -18,7 +18,6 @@ void showq(deque < vector > gq) for (std::vector::iterator i = temp_sentence.begin(); i != temp_sentence.end(); ++i) { wcout << (*i).wordform; - cout << ": " << (*i).score << " "; } cout << "\n"; @@ -53,9 +52,9 @@ int contains_any(vector tags, vector candidates) return 0; //if no matches } -void Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform) +int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform) { - unique_LU input_LU = {input_id, input_wordform, input_tl_wordform, input_pos_tags, 0}; //initialise in context with score 0 + unique_LU input_LU = {input_id, input_wordform, input_tl_wordform, input_pos_tags}; //initialise in context with score 0 if(context.empty()) //if queue is empty { @@ -87,42 +86,50 @@ void Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< ws else if( contains(input_LU.pos_tags, L"det") && contains(input_LU.pos_tags, L"pos") ) { apply_indicators(input_LU); + return 1; //To show that something will be added in side ref } } + + return 0; //To show that nothing will be added in side ref } void Scoring::apply_indicators(unique_LU anaphor) { int distance_marker = 2; //starts from 2 for current sentence and reduces till -1 as we go to previous sentences + int temp_score; + + antecedent_list.clear(); //clear it from the last anaphor (might not want it as a class variable) //Start going through sentences(current to earliest) and apply all indicators to modify scores of the NPs for(deque< vector >::reverse_iterator i = context.rbegin(); i!=context.rend(); ++i) //read through the queue in reverse { - cout<<"\nSentence:" << distance_marker << "\n"; for (vector::iterator j = (*i).begin(); j!=(*i).end(); ++j) //read through sentence { if(contains((*j).pos_tags, L"n")) { - unique_LU antecedent = *j; //create a temp copy of the potential antecedent + temp_score = 0; + + unique_LU antecedent_LU = *j; //create a temp copy of the potential antecedent //Check Agreement - if(check_agreement(antecedent.pos_tags, anaphor.pos_tags)) + if(check_agreement(antecedent_LU.pos_tags, anaphor.pos_tags)) { //Add or Remove Indicators Here - + temp_score += distance_marker; //Referential Distance (based on how close the antecedent is to the pronoun) //Boosting Indicators //Impeding Indicators - wcout << antecedent.wordform; - cout << ": " << antecedent.score << "\n"; + //Add to Antecedent List with Score + antecedent antecedent_with_score = {antecedent_LU, temp_score}; + antecedent_list.push_back(antecedent_with_score); } else { cout << "\nAgreement Failed for:"; - wcout << antecedent.wordform; + wcout << antecedent_LU.wordform; cout << "\n"; } } @@ -141,44 +148,28 @@ int Scoring::check_agreement(vector antecedent_tags, vector an if(contains(anaphor_tags, L"m") && contains(antecedent_tags, L"f")) return 0; - if(contains(anaphor_tags, L"sg") && contains(antecedent_tags, L"pl")) - return 0; - - if(contains(anaphor_tags, L"pl") && contains(antecedent_tags, L"sg")) - return 0; - return 1; } -/* -void Scoring::referential_distance(int distance) -{ - -} - wstring Scoring::get_antecedent() { - antecedent final_antecedent = {0, L"", -5}; + unique_LU final_antecedent_LU; + antecedent final_antecedent = {final_antecedent_LU, -5}; - for(vector::iterator it=antecedent_list.begin();it!=antecedent_list.end();++it) + for(vector::reverse_iterator it=antecedent_list.rbegin();it!=antecedent_list.rend();++it) //read it in reverse so that we read from furthest to nearest { - cout << "\n" << (*it).id << ": "; - wcout << (*it).wordform; + cout << "\n" << (*it).LU.id << ": "; + wcout << (*it).LU.wordform; cout << " : " << (*it).score << "\n"; if((*it).score >= final_antecedent.score) //picking the highest scored and latest added (most recent) antecedent - { - final_antecedent.id = (*it).id; - final_antecedent.wordform = (*it).wordform; - final_antecedent.score = (*it).score; - final_antecedent.tl_wordform = (*it).tl_wordform; - } + final_antecedent = (*it); } - return final_antecedent.tl_wordform; + return final_antecedent.LU.tl_wordform; } -*/ + void Scoring::clear() //use a destructor? { context.clear(); //empty queue diff --git a/src/score.h b/src/score.h index af846f1..4186363 100644 --- a/src/score.h +++ b/src/score.h @@ -14,6 +14,11 @@ struct unique_LU wstring wordform; wstring tl_wordform; vector pos_tags; +}; + +struct antecedent +{ + unique_LU LU; int score; }; @@ -26,14 +31,13 @@ class Scoring { private: deque< vector > context; //A queue of sentences. Each sentence is a vector of Lexical Units - vector antecedent_list; //A list of antecedents + vector antecedent_list; //A list of antecedents public: - void add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform); + int add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform); void apply_indicators(unique_LU anaphor); int check_agreement(vector antecedent_tags, vector anaphor_tags); - //void referential_distance(); - //wstring get_antecedent(); + wstring get_antecedent(); void clear(); };