commit 528ae004fad525598470f5b145058cd8f913d270 Author: Tanmai Khanna Date: Sat Jul 20 17:33:44 2019 +0530 Naive Pattern Matching(Need to fix bugs) diff --git a/src/anaphora.cc b/src/anaphora.cc index a721d33..6f8e577 100644 --- a/src/anaphora.cc +++ b/src/anaphora.cc @@ -134,7 +134,7 @@ int main(int argc, char **argv) { int retval; - retval = score_module.add_word(gen_id, sl_form, sl_tags, tl_form, ref_file.get_parameters()); //Give word to Scoring Module + retval = score_module.add_word(gen_id, sl_form, sl_tags, tl_form, ref_file); //Give word to Scoring Module //If retval is 0, nothing will be added in side ref //If retval is 1, we call get_antecedent() and add it to ref diff --git a/src/pattern_ref.cc b/src/pattern_ref.cc new file mode 100644 index 0000000..e5c3903 --- /dev/null +++ b/src/pattern_ref.cc @@ -0,0 +1,105 @@ +#include "pattern_ref.h" +#include "parse_ref.h" + +#include +#include + +using namespace std; + +int contains(vector tags, wstring tag) +{ + if(std::find(tags.begin(), tags.end(), tag) != tags.end()) + return 1; + else + return 0; +} + +int contains_any(vector tags, vector candidates) +{ + for(vector::iterator it=candidates.begin();it!=candidates.end();++it) + { + if(std::find(tags.begin(), tags.end(), *it) != tags.end()) + return 1; //if any of the tags in candidates matches the tags list + } + + return 0; //if no matches +} + +int check_acceptable_tags(vector input_tags, acceptable_tags check_tags) //all tags in any tag list in check_tags must exist in input_tags +{ + for (acceptable_tags::iterator i = check_tags.begin(); i != check_tags.end(); ++i) + { + + int flag_contains_all = 1; + + vector temp_tags = *i; + + for(std::vector::iterator j = temp_tags.begin(); j != temp_tags.end(); ++j) + { + + if(*j == L"*") //ignore * in the tags list + continue; + + if(!contains(input_tags, *j)) //if the required tag is NOT in the input LU tags + { + flag_contains_all = 0; + break; + } + /* + else + { + cerr << "FoundTag:"; + wcerr << *j; + cerr <<"\n"; + } + */ + } + + if(flag_contains_all == 1) //if any tag list fully matched + return 1; //else continue to next tag list + } + + return 0; //if it didn't return 1 then no tag list was fully matched +} + +check_acceptable_patterns(vector) + + +deque< vector > add_properties(deque< vector > context, ParseRef ref_file) +{ + unordered_map ref_markables = ref_file.markables; + unordered_map ref_cats = ref_file.cats; + + for (unordered_map::iterator::it = ref_markables.begin(); it != ref_markables.end(); it++ ) //go through markables defined in xml file + { + //for each markable + wstring markable_name = it->first; + acceptable_patterns patterns_list = it->second; + + for(acceptable_patterns::iterator i = patterns_list.begin(); i!=patterns_list.end(); ++i) //go through patterns in the markable + { + //for each pattern + vector current_pattern = *i; + + for(deque< vector >::iterator j = context.begin(); j!=context.end(); ++j) //go through queue of context to get sentences + { + for (vector::iterator j = (*i).begin(); j!=(*i).end(); ++j) //go through sentence to look for the pattern with a sliding window of size = pattern length + { + + } + } + + } + + } + + /* + for(deque< vector >::iterator i = context.begin(); i!=context.end(); ++i) + { + + } + */ + + return context; +} + diff --git a/src/pattern_ref.h b/src/pattern_ref.h new file mode 100644 index 0000000..a16c01b --- /dev/null +++ b/src/pattern_ref.h @@ -0,0 +1,16 @@ +#ifndef _PATTERNREF_ +#define _PATTERNREF_ + +#include "parse_ref.h" + +#include +#include + +using namespace std; + +int contains(vector tags, wstring tag); +int contains_any(vector tags, vector candidates); + +int check_acceptable_tags(vector input_tags, acceptable_tags check_tags); + +#endif \ No newline at end of file diff --git a/src/score.cc b/src/score.cc index e4b5de8..e059a2e 100644 --- a/src/score.cc +++ b/src/score.cc @@ -1,5 +1,6 @@ #include "score.h" #include "parse_ref.h" +#include "pattern_ref.h" #include #include @@ -34,65 +35,12 @@ void clearq(queue < vector > q) } } -int contains(vector tags, wstring tag) +int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform, ParseRef ref_file) { - if(std::find(tags.begin(), tags.end(), tag) != tags.end()) - return 1; - else - return 0; -} - -int contains_any(vector tags, vector candidates) -{ - for(vector::iterator it=candidates.begin();it!=candidates.end();++it) - { - if(std::find(tags.begin(), tags.end(), *it) != tags.end()) - return 1; //if any of the tags in candidates matches the tags list - } - - return 0; //if no matches -} - -int check_acceptable_tags(vector input_tags, acceptable_tags check_tags) -{ - for (acceptable_tags::iterator i = check_tags.begin(); i != check_tags.end(); ++i) - { - - int flag_contains_all = 1; - - vector temp_tags = *i; - - for(std::vector::iterator j = temp_tags.begin(); j != temp_tags.end(); ++j) - { + vector temp_prop; + unordered_map ref_parameters = ref_file.get_parameters(); - if(*j == L"*") //ignore * in the tags list - continue; - - if(!contains(input_tags, *j)) //if the required tag is NOT in the input LU tags - { - flag_contains_all = 0; - break; - } - /* - else - { - cerr << "FoundTag:"; - wcerr << *j; - cerr <<"\n"; - } - */ - } - - if(flag_contains_all == 1) //if any tag list fully matched - return 1; //else continue to next tag list - } - - return 0; //if it didn't return 1 then no tag list was fully matched -} - -int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform, unordered_map ref_parameters) -{ - unique_LU input_LU = {input_id, input_wordform, input_tl_wordform, input_pos_tags}; //initialise in context with score 0 + unique_LU input_LU = {input_id, input_wordform, input_tl_wordform, input_pos_tags, temp_prop}; //initialise LU if(context.empty()) //if queue is empty { @@ -123,7 +71,7 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst } else if( check_acceptable_tags(input_LU.pos_tags, ref_parameters[L"anaphor"]) ) //check if tags of current word match with anaphor tags in ref file { - apply_indicators(input_LU, ref_parameters); + apply_indicators(input_LU, ref_file); return 1; //To show that something will be added in side ref } } @@ -131,7 +79,7 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst return 0; //To show that nothing will be added in side ref } -void Scoring::apply_indicators(unique_LU anaphor, unordered_map ref_parameters) +void Scoring::apply_indicators(unique_LU anaphor, ParseRef ref_file) { int distance_marker = 2; //starts from 2 for current sentence and reduces till -1 as we go to previous sentences int temp_score; @@ -139,6 +87,10 @@ void Scoring::apply_indicators(unique_LU anaphor, unordered_map >::reverse_iterator i = context.rbegin(); i!=context.rend(); ++i) //read through the queue in reverse { @@ -146,7 +98,7 @@ void Scoring::apply_indicators(unique_LU anaphor, unordered_map::iterator j = (*i).begin(); j!=(*i).end(); ++j) //read through sentence { - if(check_acceptable_tags((*j).pos_tags, ref_parameters[L"antecedent"]) ) // if it is antecedent (based on external xml file) + if(check_acceptable_tags((*j).pos_tags, ref_file.get_parameters()[L"antecedent"]) ) // if it is antecedent (based on external xml file) { temp_score = 0; diff --git a/src/score.h b/src/score.h index 5336af5..10cc34f 100644 --- a/src/score.h +++ b/src/score.h @@ -16,6 +16,7 @@ struct unique_LU wstring wordform; wstring tl_wordform; vector pos_tags; + vector properties; }; struct antecedent @@ -26,11 +27,6 @@ struct antecedent void showq(deque < vector > gq); -int contains(vector tags, wstring tag); -int contains_any(vector tags, vector candidates); - -int check_acceptable_tags(vector input_tags, acceptable_tags check_tags); - class Scoring { private: @@ -38,8 +34,8 @@ private: vector antecedent_list; //A list of antecedents public: - int add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform, unordered_map ref_parameters); - void apply_indicators(unique_LU anaphor, unordered_map ref_parameters); + int add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform, ParseRef ref_file); + void apply_indicators(unique_LU anaphor, ParseRef ref_file); int check_agreement(vector antecedent_tags, vector anaphor_tags); wstring get_antecedent(); void clear();