commit b880e1d159bfc9cfa96a75e4cd84fd5bb9ea2bf8 Author: Tanmai Khanna Date: Mon Jul 22 05:09:41 2019 +0530 Pattern Matching DONE diff --git a/src/parse_ref.cc b/src/parse_ref.cc index c3b459f..0f2d862 100644 --- a/src/parse_ref.cc +++ b/src/parse_ref.cc @@ -1,3 +1,5 @@ +#include "parse_ref.h" + #include #include #include @@ -10,8 +12,6 @@ #include -#include "parse_ref.h" - void print_tags(vector< wstring > input) { for (int i = 0; i < input.size(); i++) @@ -207,19 +207,18 @@ void ParseRef::parsePatterns (xmlDocPtr doc, xmlNodePtr cur, wstring markable_na { cur = cur->xmlChildrenNode; - vector temp_pattern_list; - //wcerr << markable_name; //cerr << "\n"; while (cur != NULL) { if ((!xmlStrcmp(cur->name, (const xmlChar *)"pattern"))) - temp_pattern_list = parsePatternItem(doc,cur); - - markables[markable_name].push_back(temp_pattern_list); - temp_pattern_list.clear(); + { + vector temp_pattern = parsePatternItem(doc,cur); + markables[markable_name].push_back(temp_pattern); + } + cur = cur->next; //cerr << "\n"; diff --git a/src/pattern_ref.cc b/src/pattern_ref.cc index 8bef1c9..508e724 100644 --- a/src/pattern_ref.cc +++ b/src/pattern_ref.cc @@ -8,6 +8,20 @@ using namespace std; +void print_markable(acceptable_patterns inp) +{ + for(acceptable_patterns::iterator i = inp.begin(); i != inp.end(); i++) + { + cerr <<"Pattern:\n"; + + for(vector::iterator j = (*i).begin(); j != (*i).end(); j++) + { + wcerr << (*j).name; + cerr << "\n"; + } + } +} + int contains(vector tags, wstring tag) { if(std::find(tags.begin(), tags.end(), tag) != tags.end()) @@ -69,18 +83,16 @@ int check_acceptable_tags(vector input_tags, acceptable_tags check_tags deque< vector > add_properties(deque< vector > context, ParseRef ref_file) { - unordered_map ref_markables = ref_file.markables; - unordered_map ref_cats = ref_file.cats; + unordered_map ref_markables = ref_file.get_markables(); + unordered_map ref_cats = ref_file.get_cats(); - for (unordered_map::iterator::it = ref_markables.begin(); it != ref_markables.end(); it++ ) //go through markables defined in xml file + for (unordered_map::iterator it = ref_markables.begin(); it != ref_markables.end(); it++ ) //go through markables defined in xml file { //for each markable wstring markable_name = it->first; acceptable_patterns patterns_list = it->second; - cout << "Markable: "; - wcout << markable_name; - cout << "\n"; + //print_markable(patterns_list); for(acceptable_patterns::iterator i = patterns_list.begin(); i!=patterns_list.end(); ++i) //go through patterns in the markable { @@ -88,18 +100,43 @@ deque< vector > add_properties(deque< vector > context, Pa vector current_pattern = *i; int len_pattern = current_pattern.size(); - cout << "New Pattern!!!\n"; - - for(deque< vector >::iterator m = context.begin(); m!=context.end(); ++m) //go through queue of context to get sentences + for(deque< vector >::iterator m = context.begin(); m!=context.end(); ++m) //go through sentences in the queue of context { - for (vector::iterator n = (*m).begin(); n!=(*m).end()-len+1; ++n) //go through sentence to look for the pattern with a sliding window of size = pattern length + if(len_pattern > (*m).size()) //if pattern is longer then sentence length then skip + continue; + + for (vector::iterator n = (*m).begin(); n+len_pattern-1 !=(*m).end(); ++n) //go through LUs in sentence to look for the pattern with a sliding window of size = pattern length { - //i have a current_pattern to compare with a window of pattern size in this sentence + int match_flag = 0; + + for(int x = 0; x < len_pattern; ++x) + { + //this is the window -- check if pattern matches + + acceptable_tags pattern_item_tags = ref_cats[current_pattern[x].name]; //get pattern item tags from def-cats + + if(check_acceptable_tags((*(n+x)).pos_tags, pattern_item_tags)) //comparing current LU tags to pattern tags + { + match_flag = 1; + + } + else + { + match_flag = 0; + break; + } + + //wcerr << (*(n+x)).wordform; + } - for(int x = 0; x <= len_pattern; ++x) + if(match_flag == 1) { - wcout << (*n+x).wordform; - cout << "\n"; + //Add Property to the LUs + cerr << "\n"; + wcerr << markable_name; + cerr << " Pattern Matched at: "; + wcerr << (*n).wordform; + cerr << "\n"; } } } diff --git a/src/pattern_ref.h b/src/pattern_ref.h index 47d648e..c5bb86d 100644 --- a/src/pattern_ref.h +++ b/src/pattern_ref.h @@ -29,6 +29,6 @@ int contains_any(vector tags, vector candidates); int check_acceptable_tags(vector input_tags, acceptable_tags check_tags); -deque< vector > add_properties(deque< vector > context, ParseRef ref_file) +deque< vector > add_properties(deque< vector > context, ParseRef ref_file); #endif \ No newline at end of file diff --git a/src/score.cc b/src/score.cc index 0096c22..b835b77 100644 --- a/src/score.cc +++ b/src/score.cc @@ -80,7 +80,7 @@ void Scoring::apply_indicators(unique_LU anaphor, ParseRef ref_file) antecedent_list.clear(); //clear it from the last anaphor //Go through the context and add properties based on external file - //add_properties(context, ref_parameters) + context = add_properties(context, ref_file); //Start going through sentences(current to earliest) and apply all indicators to modify scores of the NPs