commit eea4f8bd8b642370b571ac97d84d30f6140bf1d5 Author: Tanmai Khanna Date: Tue Jul 2 00:04:34 2019 +0530 Anaphora Resolution now happens with SL | Added wchar everywhere diff --git a/src/anaphora.cc b/src/anaphora.cc index 34e0cac..3f4e14b 100644 --- a/src/anaphora.cc +++ b/src/anaphora.cc @@ -19,9 +19,9 @@ int main(int argc, char **argv) nullFlush = 1; } - char input_char; + wchar_t input_char; - input_char = fgetc(stdin); //change to fgetwc ? + input_char = fgetwc(stdin); //change to fgetwc ? wstring input_stream; @@ -29,42 +29,48 @@ int main(int argc, char **argv) Scoring score_module; unsigned int gen_id = 0; - wstring temp_form; - vector< wstring > temp_tags; + wstring sl_form; + wstring tl_form; + vector sl_tags; + vector tl_tags; int flag_LU = 0; while(input_char!=EOF) // should I made feof(input_char) ? { - if(nullFlush && input_char == '\0') //nullFlush + if(nullFlush && input_char == L'\0') //nullFlush { input_stream.clear(); - temp_form.clear(); - temp_tags.clear(); + sl_form.clear(); + tl_form.clear(); + sl_tags.clear(); + tl_tags.clear(); gen_id = 0; score_module.clear(); + final_ref.clear(); + flag_LU = 0; } - else if(input_char == '\\') //dealing with escaped characters + else if(input_char == L'\\') //dealing with escaped characters { if(flag_LU == 0) // not inside LU { - fprintf(stdout, "%c", input_char); + fwprintf(stdout, L"%C", input_char); input_char = fgetc(stdin); - fprintf(stdout, "%c", input_char); + fwprintf(stdout, L"%C", input_char); } else //inside LU { input_stream.push_back(input_char); - fprintf(stdout, "%c", input_char); + fwprintf(stdout, L"%C", input_char); input_char = fgetc(stdin); - fprintf(stdout, "%c", input_char); + fwprintf(stdout, L"%C", input_char); input_stream.push_back(input_char); } } @@ -72,36 +78,37 @@ int main(int argc, char **argv) { if(flag_LU == 0) //Not Part of an LU { - fprintf(stdout, "%c", input_char); + fwprintf(stdout, L"%C", input_char); - if(input_char == '^') + if(input_char == L'^') flag_LU = 1; } else if(flag_LU == 1) //Part of an LU { - if(input_char == '$') + if(input_char == L'$') { gen_id++; //generate ids for LUs - fprintf(stdout, "/"); //for adding ref + fwprintf(stdout, L"/"); //for adding ref flag_LU = 0; ParseLexicalUnit LU(input_stream); //Parse Lexical Unit using parse_biltrans - temp_form = LU.get_tl_form(); - temp_tags = LU.get_tl_tags(); + tl_form = LU.get_tl_form(); + tl_tags = LU.get_tl_tags(); + sl_form = LU.get_sl_form(); + sl_tags = LU.get_sl_tags(); - if(!temp_form.empty()) //if TL exists + if(!tl_form.empty()) //if TL exists { - score_module.add_word(gen_id, temp_form, temp_tags); + score_module.add_word(gen_id, sl_form, sl_tags, tl_form); - if( (contains(temp_tags, L"det") && contains(temp_tags, L"pos") ) )//|| contains(temp_tags, L"prn") || contains(temp_tags, L"vblex") || contains(temp_tags, L"vbser") || contains(temp_tags, L"vbhaver") || contains(temp_tags, L"vbmod") ) + if( (contains(sl_tags, L"det") && contains(sl_tags, L"pos") ) )//|| contains(temp_tags, L"prn") || contains(temp_tags, L"vblex") || contains(temp_tags, L"vbser") || contains(temp_tags, L"vbhaver") || contains(temp_tags, L"vbmod") ) /* if TL tags has det and pos OR just prn OR any verb*/ { - //cout << "\n\nHERE!!\n\n"; final_ref = score_module.get_antecedent(); - wcout << final_ref; //add antecedent to side ref of LU + wcout << final_ref; //add antecedent to side ref of LU //CHANGE } } @@ -112,12 +119,12 @@ int main(int argc, char **argv) input_stream.push_back(input_char); } - fprintf(stdout, "%c", input_char); + fwprintf(stdout, L"%C", input_char); } } - input_char = fgetc(stdin); + input_char = fgetwc(stdin); } //fclose(fin); diff --git a/src/score.cc b/src/score.cc index f59fcee..82f4b3e 100644 --- a/src/score.cc +++ b/src/score.cc @@ -25,14 +25,14 @@ int contains_any(vector tags, vector candidates) return 0; //if no matches } -void Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags) +void Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform) { unique_LU input_LU = {input_id, input_wordform}; context.push_back(input_LU); //add to context if(contains(pos_tags, L"n")) //if word is a noun, add to antecedents list with score=2 as it is in current context(referential distance) { - antecedent input_antecedent = {input_id, input_wordform, 2}; + antecedent input_antecedent = {input_id, input_wordform, 2, input_tl_wordform}; antecedent_list.push_back(input_antecedent); } @@ -44,7 +44,7 @@ void Scoring::referential_distance() { for(vector::iterator it=antecedent_list.begin();it!=antecedent_list.end();++it) { - if((*it).score > -2) //-2 is minimum score + if((*it).score > -1) //-1 is minimum score (*it).score--; } } @@ -64,10 +64,11 @@ wstring Scoring::get_antecedent() final_antecedent.id = (*it).id; final_antecedent.wordform = (*it).wordform; final_antecedent.score = (*it).score; + final_antecedent.tl_wordform = (*it).tl_wordform; } } - return final_antecedent.wordform; + return final_antecedent.tl_wordform; } void Scoring::clear() diff --git a/src/score.h b/src/score.h index fce7404..b82f43d 100644 --- a/src/score.h +++ b/src/score.h @@ -17,6 +17,7 @@ struct antecedent unsigned int id; wstring wordform; int score; + wstring tl_wordform; }; int contains(vector tags, wstring tag); @@ -29,7 +30,7 @@ private: vector antecedent_list; public: - void add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags); + void add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform); void referential_distance(); wstring get_antecedent(); void clear();