commit 7babad1600dce8b0182c12866ce7c54028684156 Author: Tanmai Khanna Date: Thu Jul 4 02:22:27 2019 +0530 Completely change score.cc now uses queues and vectors to add words diff --git a/src/score.cc b/src/score.cc index 7d3f0cf..c6ec31d 100644 --- a/src/score.cc +++ b/src/score.cc @@ -3,9 +3,29 @@ #include #include #include +#include using namespace std; +void showq(queue gq) +{ + queue g = gq; + while (!g.empty()) + { + cout << '\t' << g.front(); + g.pop(); + } + cout << '\n'; +} + +void clearq(queue q) +{ + while(!q.empty()) + { + q.pop(); + } +} + int contains(vector tags, wstring tag) { if(std::find(tags.begin(), tags.end(), tag) != tags.end()) @@ -25,33 +45,37 @@ int contains_any(vector tags, vector candidates) return 0; //if no matches } -Scoring::Scoring() -{ - firstNP_flag = 1; -} - -void Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform) +void Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform) { - unique_LU input_LU = {input_id, input_wordform}; - context.push_back(input_LU); //add to context + unique_LU input_LU = {input_id, input_wordform, input_tl_wordform, input_pos_tags, 0}; //initialise in context with score 0 - if(contains(pos_tags, L"n")) //if word is a noun, add to antecedents list with score=2 as it is in current context(referential distance) + if(context.empty()) //if queue is empty { - antecedent input_antecedent = {input_id, input_wordform, 2, input_tl_wordform}; + vector sentence; //initialise a sentence + sentence.push_back(input_LU); //add the first word to the sentence - if(firstNP_flag == 1) + context.push(sentence); + + if(contains(input_LU.pos_tags, L"sent")) //if sentence end (somehow the first LU is a sentence end) { - input_antecedent.score++; //+1 for First NP in a sentence - firstNP_flag = 0; + vector new_sentence; + + context.push(new_sentence); //add an empty sentence } - - antecedent_list.push_back(input_antecedent); } - - if(contains(pos_tags, L"sent")) //if reached sentence boundary, reduce scores (referential distance) + else //if queue is not empty { - firstNP_flag = 1; - referential_distance(); + context.back().push_back(input_LU); //add word to the latest added sentence in the queue + + if((contains(input_LU.pos_tags, L"sent"))) + { + vector new_sentence; + + context.push(new_sentence); //add an empty sentence + + if(context.size() > 4) + context.pop(); //remove the earliest added sentence (We only want current and three previous sentences in context) + } } } diff --git a/src/score.h b/src/score.h index 62a2923..006260e 100644 --- a/src/score.h +++ b/src/score.h @@ -3,6 +3,7 @@ #include #include +#include using namespace std; @@ -10,14 +11,9 @@ struct unique_LU { unsigned int id; wstring wordform; -}; - -struct antecedent -{ - unsigned int id; - wstring wordform; - int score; wstring tl_wordform; + vector pos_tags; + int score; }; int contains(vector tags, wstring tag); @@ -26,13 +22,11 @@ int contains_any(vector tags, vector candidates); class Scoring { private: - vector context; - vector antecedent_list; - - int firstNP_flag; + queue< vector > context; //A queue of sentences. Each sentence is a vector of Lexical Units + unique_LU anaphor; + //vector antecedent_list; //A list of antecedents public: - Scoring(); void add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform); void referential_distance(); wstring get_antecedent();