commit 249ea5823781ff15c65b007897426feea6f7ae63 Author: Tanmai Khanna Date: Sun Jun 30 05:39:54 2019 +0530 Initial Code for Mitkov Scoring Module | Decisions made | Few modifications to Anaphora Code diff --git a/src/anaphora.cc b/src/anaphora.cc index 997deba..6f8fdf7 100644 --- a/src/anaphora.cc +++ b/src/anaphora.cc @@ -1,4 +1,5 @@ #include "parse_biltrans.h" +#include "score.h" #include #include @@ -8,31 +9,6 @@ using namespace std; -int contains(vector tags, wstring tag) -{ - if(std::find(tags.begin(), tags.end(), tag) != tags.end()) - return 1; - else - return 0; -} - -int contains_any(vector tags, vector candidates) -{ - for(vector::iterator it=candidates.begin();it!=candidates.end();++it) - { - if(std::find(tags.begin(), tags.end(), *it) != tags.end()) - return 1; //if any of the tags in candidates matches the tags list - } - - return 0; //if no matches -} - -struct sal_score -{ - wstring wordform; - int score; -}; - int main(int argc, char **argv) { int nullFlush = 0; @@ -109,7 +85,7 @@ int main(int argc, char **argv) fprintf(stdout, "/"); //for extra LU flag_LU = 0; - LexicalUnit LU(input_stream); //Parse Lexical Unit using parse_biltrans + ParseLexicalUnit LU(input_stream); //Parse Lexical Unit using parse_biltrans temp_form = LU.get_tl_form(); temp_tags = LU.get_tl_tags(); diff --git a/src/parse_biltrans.cc b/src/parse_biltrans.cc index 2e9d82a..e9ace79 100644 --- a/src/parse_biltrans.cc +++ b/src/parse_biltrans.cc @@ -6,7 +6,7 @@ using namespace std; -LexicalUnit::LexicalUnit(wstring input_LU) +ParseLexicalUnit::ParseLexicalUnit(wstring input_LU) { int seenSlash = 0; int seenTag = 0; @@ -113,22 +113,22 @@ LexicalUnit::LexicalUnit(wstring input_LU) } } -wstring LexicalUnit::get_sl_form() +wstring ParseLexicalUnit::get_sl_form() { return sl_form; } -wstring LexicalUnit::get_tl_form() +wstring ParseLexicalUnit::get_tl_form() { return tl_form; } -vector< wstring > LexicalUnit::get_sl_tags() +vector< wstring > ParseLexicalUnit::get_sl_tags() { return sl_tags; } -vector< wstring > LexicalUnit::get_tl_tags() +vector< wstring > ParseLexicalUnit::get_tl_tags() { return tl_tags; } @@ -158,7 +158,7 @@ int main() input_char = fgetc(stdin); } - LexicalUnit lu(inputlu); + ParseLexicalUnit lu(inputlu); cout << "SL: "; wcout << lu.get_sl_form(); diff --git a/src/parse_biltrans.h b/src/parse_biltrans.h index 321f811..63a45ed 100644 --- a/src/parse_biltrans.h +++ b/src/parse_biltrans.h @@ -9,7 +9,7 @@ using namespace std; /** * Parsing Lexical Unit from biltrans for the Anaphora Module */ -class LexicalUnit +class ParseLexicalUnit { private: @@ -38,7 +38,7 @@ public: * Constructor to fill all variables * @param input_LU one lexical unit between ^ and $ (excluded) */ - LexicalUnit(wstring input_LU); + ParseLexicalUnit(wstring input_LU); /** * Return the Source Language Form diff --git a/src/score.cc b/src/score.cc new file mode 100644 index 0000000..5970803 --- /dev/null +++ b/src/score.cc @@ -0,0 +1,2 @@ +#include "score.h" + diff --git a/src/score.h b/src/score.h new file mode 100644 index 0000000..9dca34e --- /dev/null +++ b/src/score.h @@ -0,0 +1,63 @@ +#ifndef _MITKOVSCORE_ +#define _MITKOVSCORE_ + +#include +#include + + +struct unique_LU +{ + unsigned int id; + wstring wordform; +}; + +struct antecedent +{ + unsigned int id; + wstring wordform; + int score; +}; + +int contains(vector tags, wstring tag) +{ + if(std::find(tags.begin(), tags.end(), tag) != tags.end()) + return 1; + else + return 0; +} + +int contains_any(vector tags, vector candidates) +{ + for(vector::iterator it=candidates.begin();it!=candidates.end();++it) + { + if(std::find(tags.begin(), tags.end(), *it) != tags.end()) + return 1; //if any of the tags in candidates matches the tags list + } + + return 0; //if no matches +} + +class Scoring +{ +private: + vector context; + vector antecedent_list; + +public: + void add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags) + { + unique_LU input_LU = {input_id, input_wordform}; + context.push_back(input_LU); //add to context + + if(contains(pos_tags, L"n")) //if word is a noun, add to antecedents list with score=0 + { + antecedent input_antecedent = {input_id, input_wordform, 0}; + } + } + + void apply_indicators(); //Need to first understand what each indicator needs and how to apply + +}; + + +#endif \ No newline at end of file