commit 2aa708116435b733cb9b4a61a4061db74780ca8c Author: Tanmai Khanna Date: Fri Aug 16 01:41:01 2019 +0530 Change all ref to arx diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b40e4a8..128f726 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,6 @@ add_executable(apertium-anaphora - pattern_ref.cc - parse_ref.cc + pattern_arx.cc + parse_arx.cc parse_biltrans.cc score.cc anaphora.cc diff --git a/src/anaphora.cc b/src/anaphora.cc index 6da52e9..9416ce7 100644 --- a/src/anaphora.cc +++ b/src/anaphora.cc @@ -17,10 +17,10 @@ */ -#include "parse_ref.h" +#include "parse_arx.h" #include "parse_biltrans.h" #include "score.h" -#include "pattern_ref.h" +#include "pattern_arx.h" #include @@ -55,7 +55,7 @@ static int debug_flag; //flag set by --debug int main(int argc, char **argv) { - char *refFileName = nullptr; + char *arxFileName = nullptr; int nullFlush = 0; @@ -115,7 +115,7 @@ int main(int argc, char **argv) if(argc - optind != 1) help_message(argv[0]); - refFileName = argv[optind]; //Name of Ref File is the remaining argument + arxFileName = argv[optind]; //Name of Arx File is the remaining argument wchar_t input_char; @@ -130,8 +130,8 @@ int main(int argc, char **argv) vector sl_tags; vector tl_tags; - ParseRef ref_file; - ref_file.parseDoc(refFileName); + ParseArx arx_file; + arx_file.parseDoc(arxFileName); int flag_LU = 0; @@ -205,7 +205,7 @@ int main(int argc, char **argv) { int retval; - retval = score_module.add_word(gen_id, sl_form, sl_tags, tl_form, ref_file); //Give word to Scoring Module + retval = score_module.add_word(gen_id, sl_form, sl_tags, tl_form, arx_file); //Give word to Scoring Module //If retval is 0, nothing will be added in side ref //If retval is 1, we call get_antecedent() and add it to ref diff --git a/src/parse_ref.cc b/src/parse_arx.cc similarity index 88% rename from src/parse_ref.cc rename to src/parse_arx.cc index a164355..99477e2 100644 --- a/src/parse_ref.cc +++ b/src/parse_arx.cc @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -#include "parse_ref.h" +#include "parse_arx.h" #include #include @@ -37,7 +37,7 @@ void print_tags(const vector& input) } } -vector ParseRef::parseTags (wstring tags) +vector ParseArx::parseTags (wstring tags) { vector temp_tags_list; @@ -72,7 +72,7 @@ vector ParseRef::parseTags (wstring tags) return temp_tags_list; } -void ParseRef::parseParameterItem (xmlDocPtr doc, xmlNodePtr cur, wstring parameter_name) +void ParseArx::parseParameterItem (xmlDocPtr doc, xmlNodePtr cur, wstring parameter_name) { xmlChar *Attr; cur = cur->xmlChildrenNode; @@ -102,7 +102,7 @@ void ParseRef::parseParameterItem (xmlDocPtr doc, xmlNodePtr cur, wstring parame return; } -void ParseRef::parseParameters (xmlDocPtr doc, xmlNodePtr cur) +void ParseArx::parseParameters (xmlDocPtr doc, xmlNodePtr cur) { wstring parameter_name; @@ -126,7 +126,7 @@ void ParseRef::parseParameters (xmlDocPtr doc, xmlNodePtr cur) return; } -void ParseRef::parseCatItem (xmlDocPtr doc, xmlNodePtr cur, wstring cat_name) +void ParseArx::parseCatItem (xmlDocPtr doc, xmlNodePtr cur, wstring cat_name) { xmlChar *Attr; cur = cur->xmlChildrenNode; @@ -155,7 +155,7 @@ void ParseRef::parseCatItem (xmlDocPtr doc, xmlNodePtr cur, wstring cat_name) return; } -void ParseRef::parseCats (xmlDocPtr doc, xmlNodePtr cur) +void ParseArx::parseCats (xmlDocPtr doc, xmlNodePtr cur) { xmlChar *Attr; cur = cur->xmlChildrenNode; @@ -176,7 +176,7 @@ void ParseRef::parseCats (xmlDocPtr doc, xmlNodePtr cur) return; } -vector ParseRef::parsePatternItem (xmlDocPtr doc, xmlNodePtr cur) +vector ParseArx::parsePatternItem (xmlDocPtr doc, xmlNodePtr cur) { xmlChar *Attr; cur = cur->xmlChildrenNode; @@ -218,7 +218,7 @@ vector ParseRef::parsePatternItem (xmlDocPtr doc, xmlNodePtr c return temp_pattern; } -void ParseRef::parsePatterns (xmlDocPtr doc, xmlNodePtr cur, wstring markable_name) +void ParseArx::parsePatterns (xmlDocPtr doc, xmlNodePtr cur, wstring markable_name) { xmlChar *Attr; @@ -253,7 +253,7 @@ void ParseRef::parsePatterns (xmlDocPtr doc, xmlNodePtr cur, wstring markable_na return; } -void ParseRef::parseMarkables (xmlDocPtr doc, xmlNodePtr cur) +void ParseArx::parseMarkables (xmlDocPtr doc, xmlNodePtr cur) { xmlChar *Attr; cur = cur->xmlChildrenNode; @@ -275,7 +275,7 @@ void ParseRef::parseMarkables (xmlDocPtr doc, xmlNodePtr cur) return; } -void ParseRef::parseDoc(char *docname) +void ParseArx::parseDoc(char *docname) { xmlDocPtr doc; xmlNodePtr cur; @@ -329,22 +329,22 @@ void ParseRef::parseDoc(char *docname) return; } -unordered_map ParseRef::get_parameters() +unordered_map ParseArx::get_parameters() { return parameters; } -unordered_map ParseRef::get_cats() +unordered_map ParseArx::get_cats() { return cats; } -unordered_map ParseRef::get_markables() +unordered_map ParseArx::get_markables() { return markables; } -unordered_map ParseRef::get_markables_score() +unordered_map ParseArx::get_markables_score() { return markables_score; } @@ -362,7 +362,7 @@ int main(int argc, char **argv) docname = argv[1]; - ParseRef ref; + ParseArx ref; ref.parseDoc(docname); diff --git a/src/parse_ref.h b/src/parse_arx.h similarity index 97% rename from src/parse_ref.h rename to src/parse_arx.h index dde2f0a..92eae7e 100644 --- a/src/parse_ref.h +++ b/src/parse_arx.h @@ -16,8 +16,8 @@ * along with this program. If not, see . */ -#ifndef _PARSEREF_ -#define _PARSEREF_ +#ifndef _PARSEARX_ +#define _PARSEARX_ #include #include @@ -42,7 +42,7 @@ typedef vector< vector > acceptable_patterns; void print_tags(vector< wstring > input); -class ParseRef +class ParseArx { private: unordered_map parameters; //parameter name mapped to the acceptable tag lists diff --git a/src/pattern_ref.cc b/src/pattern_arx.cc similarity index 93% rename from src/pattern_ref.cc rename to src/pattern_arx.cc index df5e828..2232e9c 100644 --- a/src/pattern_ref.cc +++ b/src/pattern_arx.cc @@ -16,8 +16,8 @@ * along with this program. If not, see . */ -#include "pattern_ref.h" -#include "parse_ref.h" +#include "pattern_arx.h" +#include "parse_arx.h" #include #include @@ -100,12 +100,12 @@ int check_acceptable_tags(vector input_tags, acceptable_tags check_tags //check_acceptable_patterns(vector) -deque< vector > add_properties(deque< vector > context, ParseRef ref_file) +deque< vector > add_properties(deque< vector > context, ParseArx arx_file) { - unordered_map ref_markables = ref_file.get_markables(); - unordered_map ref_cats = ref_file.get_cats(); + unordered_map arx_markables = arx_file.get_markables(); + unordered_map arx_cats = arx_file.get_cats(); - for (unordered_map::iterator it = ref_markables.begin(); it != ref_markables.end(); it++ ) //go through markables defined in xml file + for (unordered_map::iterator it = arx_markables.begin(); it != arx_markables.end(); it++ ) //go through markables defined in xml file { //for each markable wstring markable_name = it->first; @@ -132,7 +132,7 @@ deque< vector > add_properties(deque< vector > context, Pa { //this is the window -- check if pattern matches - acceptable_tags pattern_item_tags = ref_cats[current_pattern[x].name]; //get pattern item tags from def-cats + acceptable_tags pattern_item_tags = arx_cats[current_pattern[x].name]; //get pattern item tags from def-cats if(check_acceptable_tags((*(n+x)).pos_tags, pattern_item_tags)) //comparing current LU tags to pattern tags { diff --git a/src/pattern_ref.h b/src/pattern_arx.h similarity index 92% rename from src/pattern_ref.h rename to src/pattern_arx.h index 9f8539d..a4ecca3 100644 --- a/src/pattern_ref.h +++ b/src/pattern_arx.h @@ -16,10 +16,10 @@ * along with this program. If not, see . */ -#ifndef _PATTERNREF_ -#define _PATTERNREF_ +#ifndef _PATTERNARX_ +#define _PATTERNARX_ -#include "parse_ref.h" +#include "parse_arx.h" #include #include @@ -47,6 +47,6 @@ int contains_any(vector tags, vector candidates); int check_acceptable_tags(vector input_tags, acceptable_tags check_tags); -deque< vector > add_properties(deque< vector > context, ParseRef ref_file); +deque< vector > add_properties(deque< vector > context, ParseArx arx_file); #endif \ No newline at end of file diff --git a/src/score.cc b/src/score.cc index 20a0cea..6a37806 100644 --- a/src/score.cc +++ b/src/score.cc @@ -17,8 +17,8 @@ */ #include "score.h" -#include "parse_ref.h" -#include "pattern_ref.h" +#include "parse_arx.h" +#include "pattern_arx.h" #include #include @@ -45,10 +45,10 @@ void showq(deque < vector > gq) cerr << '\n'; } -int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform, ParseRef ref_file) +int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform, ParseArx arx_file) { vector temp_prop; - unordered_map ref_parameters = ref_file.get_parameters(); + unordered_map arx_parameters = arx_file.get_parameters(); unique_LU input_LU = {input_id, input_wordform, input_tl_wordform, input_pos_tags, temp_prop}; //initialise LU @@ -59,7 +59,7 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst context.push_back(sentence); - if(check_acceptable_tags(input_LU.pos_tags, ref_parameters[L"delimiter"]) ) //if sentence end (somehow the first LU is a sentence end) + if(check_acceptable_tags(input_LU.pos_tags, arx_parameters[L"delimiter"]) ) //if sentence end (somehow the first LU is a sentence end) { vector new_sentence; @@ -70,7 +70,7 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst { context.back().push_back(input_LU); //add word to the latest added sentence in the queue - if(check_acceptable_tags(input_LU.pos_tags, ref_parameters[L"delimiter"]) ) + if(check_acceptable_tags(input_LU.pos_tags, arx_parameters[L"delimiter"]) ) { vector new_sentence; @@ -79,9 +79,9 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst if(context.size() > 4) context.pop_front(); //remove the earliest added sentence (We only want current and three previous sentences in context) } - else if( check_acceptable_tags(input_LU.pos_tags, ref_parameters[L"anaphor"]) ) //check if tags of current word match with anaphor tags in ref file + else if( check_acceptable_tags(input_LU.pos_tags, arx_parameters[L"anaphor"]) ) //check if tags of current word match with anaphor tags in arx file { - apply_indicators(input_LU, ref_file); + apply_indicators(input_LU, arx_file); return 1; //To show that something will be added in side ref } } @@ -89,7 +89,7 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst return 0; //To show that nothing will be added in side ref } -void Scoring::apply_indicators(unique_LU anaphor, ParseRef ref_file) +void Scoring::apply_indicators(unique_LU anaphor, ParseArx arx_file) { int distance_marker = 2; //starts from 2 for current sentence and reduces till -1 as we go to previous sentences int temp_score; @@ -98,12 +98,12 @@ void Scoring::apply_indicators(unique_LU anaphor, ParseRef ref_file) antecedent_list.clear(); //clear it from the last anaphor //Go through the context and add properties based on external file - deque< vector > context_with_prop = add_properties(context, ref_file); //dont add properties in the actual context (might wanna change) + deque< vector > context_with_prop = add_properties(context, arx_file); //dont add properties in the actual context (might wanna change) distance_marker = distance_marker - context_with_prop.size() + 1; //set distance to earliest sentence based on number of sentences in context //Get scores for markables in a variable - unordered_map markables_score = ref_file.get_markables_score(); + unordered_map markables_score = arx_file.get_markables_score(); //Start going through sentences(earliest to current) and apply all indicators to modify scores of the NPs for(deque< vector >::iterator i = context_with_prop.begin(); i!=context_with_prop.end(); ++i) //read through the queue in reverse @@ -118,7 +118,7 @@ void Scoring::apply_indicators(unique_LU anaphor, ParseRef ref_file) //print_tags((*j).properties); //cerr << "\n"; - if(check_acceptable_tags((*j).pos_tags, ref_file.get_parameters()[L"antecedent"]) ) // if it is antecedent (based on external xml file) + if(check_acceptable_tags((*j).pos_tags, arx_file.get_parameters()[L"antecedent"]) ) // if it is antecedent (based on external xml file) { temp_score = 0; diff --git a/src/score.h b/src/score.h index a6eaf5d..ddba92a 100644 --- a/src/score.h +++ b/src/score.h @@ -19,8 +19,8 @@ #ifndef _MITKOVSCORE_ #define _MITKOVSCORE_ -#include "parse_ref.h" -#include "pattern_ref.h" +#include "parse_arx.h" +#include "pattern_arx.h" #include #include @@ -38,8 +38,8 @@ private: vector antecedent_list; //A list of antecedents public: - int add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform, ParseRef ref_file); - void apply_indicators(unique_LU anaphor, ParseRef ref_file); + int add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform, ParseArx arx_file); + void apply_indicators(unique_LU anaphor, ParseArx arx_file); int check_agreement(vector antecedent_tags, vector anaphor_tags); wstring get_antecedent(); void clear();