commit 985b008c248999733436f762f388e196a58adef5
Author: Tanmai Khanna <khanna.tanmai@gmail.com>
Date:   Tue Jun 18 01:42:12 2019 +0530

    Anaphora module coded in C++ (slight bug to fix)

diff --git a/src/anaphora.cc b/src/anaphora.cc
index 96439c1..d42edf3 100644
--- a/src/anaphora.cc
+++ b/src/anaphora.cc
@@ -1,43 +1,134 @@
+#include "parse_biltrans.h"
+
 #include<cstdio>
 #include<fstream>
 #include<string>
+#include<iostream>
 
-int main()
+using namespace std;
+
+void print(vector<char> const &input)
 {
-	char input_char;
+	for (int i = 0; i < input.size(); i++) 
+	{
+		fprintf(stdout, "%c", input.at(i));
+	}
+}
 
-	input_char = fgetc(stdin);
+int contains(vector< vector<char> > tags, vector<char> tag)
+{
+	if(std::find(tags.begin(), tags.end(), tag) != tags.end())
+		return 1;
+	else
+		return 0;
+}
 
-	char input_stream[100] = "";
-	char output_stream[100] = "";
+vector<char> string_to_vector(char *string_in)
+{
+	vector<char> temp;
 
-	int flag_LU = 0;
+	for(int i = 0; i < strlen(string_in); i++)
+		temp.push_back(string_in[i]);
 
-	while(input_char!=EOF)
-	{
-		fprintf(stdout, "%c",input_char);
+	return temp;
+}
 
-		if(input_char == '^')
-			flag_LU = 1;
+int main()
+{
+	char input_char;
 
-		if(flag_LU == 1) //Part of an LU
-			strcat(input_stream, string(input_char))
+	input_char = fgetc(stdin);
 
-		if(flag_LU == 0) //Not Part of an LU
-			fprintf(stdout, "%c", input_char);
+	vector<char> input_stream;
+	vector<char> last_noun;
 
-		if(input_char == '$')
-		{
-			flag_LU = 0;
+	vector<char> temp_form;
+	vector< vector<char> > temp_tags;
 
-			LU = parse(input_stream);
+	vector<char> antecedent_tag = string_to_vector("n");
 
-		}
+	vector<char> r1_tag1 = string_to_vector("det");
+	vector<char> r1_tag2 = string_to_vector("pos");
 
+	vector<char> r2_tag1 = string_to_vector("prn");
 
+	int flag_LU = 0;
 
+	while(input_char!=EOF)
+	{
 
-		input_char = fgetc(stdin);
+		if(input_char == '\\') //dealing with escaped characters
+		{
+			if(flag_LU == 0) // not inside LU
+			{
+				fprintf(stdout, "%c", input_char);
+				
+				input_char = fgetc(stdin);
+				
+				fprintf(stdout, "%c", input_char);
+			}
+			else //inside LU
+			{
+				input_stream.push_back(input_char);
+				fprintf(stdout, "%c", input_char);
+
+				input_char = fgetc(stdin);
+				
+				fprintf(stdout, "%c", input_char);
+				input_stream.push_back(input_char);
+			}
+		}
+		else
+		{
+			if(flag_LU == 0) //Not Part of an LU
+			{
+				fprintf(stdout, "%c", input_char);
+
+				if(input_char == '^')
+					flag_LU = 1;
+			}
+
+			else if(flag_LU == 1) //Part of an LU
+			{
+				if(input_char == '$')
+				{
+					fprintf(stdout, "/"); //for extra LU
+
+					flag_LU = 0;
+					LexicalUnit LU(input_stream); //Parse Lexical Unit using parse_biltrans
+
+					temp_form = LU.get_tl_form();  
+					temp_tags = LU.get_tl_tags();
+
+					if(!temp_form.empty()) //if TL exists
+					{
+						if(contains(temp_tags, antecedent_tag)) 
+							/* if TL contains antecedent tag */
+						{
+							last_noun = temp_form;
+						}
+
+						if((contains(temp_tags, r1_tag1) && contains(temp_tags, r1_tag2)) || contains(temp_tags, r2_tag1))
+							/* if TL tags has det and pos OR just prn*/
+						{
+							print(last_noun); //add last seen noun to LU
+						}
+					}
+
+					input_stream.clear();
+
+				}
+				else
+				{
+					input_stream.push_back(input_char);
+				}
+
+				fprintf(stdout, "%c", input_char);
+				
+			}
+
+			input_char = fgetc(stdin);
+		}
 	}
 
 	//fclose(fin);
diff --git a/src/parse_biltrans.cc b/src/parse_biltrans.cc
index 07ab1fe..5b77d3d 100644
--- a/src/parse_biltrans.cc
+++ b/src/parse_biltrans.cc
@@ -1,140 +1,129 @@
+#include "parse_biltrans.h"
+
 #include<vector>
 #include<iostream>
 
 using namespace std;
 
-class LexicalUnit
+LexicalUnit::LexicalUnit(vector<char> input_LU)
 {
+	int seenSlash = 0;
+	int seenTag = 0;
 
-private:
-
-	vector<char> sl_form;
-	vector<char> tl_form;
-
-	vector< vector<char> > sl_tags;
-	vector< vector<char> > tl_tags;
+	vector<char> temptag;
 
-public:
-
-	LexicalUnit(vector<char> input_LU)
+	for (auto i = input_LU.begin(); i != input_LU.end(); ++i)
 	{
-		int seenSlash = 0;
-		int seenTag = 0;
-
-		vector<char> temptag;
-
-		for (auto i = input_LU.begin(); i != input_LU.end(); ++i)
-		{
-			if(*i == '\\') //dealing with escaped characters
+		if(*i == '\\') //dealing with escaped characters
+		{	
+			if(seenSlash == 0) //sl
 			{	
-				if(seenSlash == 0) //sl
-				{	
-					if(seenTag == 1) //in a tag
-					{
-						temptag.push_back(*i);
-						sl_form.push_back(*i);
-						++i;
-						temptag.push_back(*i);
-						sl_form.push_back(*i);
-					}
-					else //not in a tag
-					{
-						sl_form.push_back(*i);
-						++i;
-						sl_form.push_back(*i);
-					}
+				if(seenTag == 1) //in a tag
+				{
+					temptag.push_back(*i);
+					sl_form.push_back(*i);
+					++i;
+					temptag.push_back(*i);
+					sl_form.push_back(*i);
+				}
+				else //not in a tag
+				{
+					sl_form.push_back(*i);
+					++i;
+					sl_form.push_back(*i);
+				}
+			}
+			else //tl
+			{
+				if(seenTag == 1) //in a tag
+				{
+					temptag.push_back(*i);
+					tl_form.push_back(*i);
+					++i;
+					temptag.push_back(*i);
+					tl_form.push_back(*i);
 				}
-				else //tl
+				else //not in a tag
 				{
-					if(seenTag == 1) //in a tag
-					{
-						temptag.push_back(*i);
-						tl_form.push_back(*i);
-						++i;
-						temptag.push_back(*i);
-						tl_form.push_back(*i);
-					}
-					else //not in a tag
-					{
-						tl_form.push_back(*i);
-						++i;
-						tl_form.push_back(*i);
-					}
+					tl_form.push_back(*i);
+					++i;
+					tl_form.push_back(*i);
 				}
 			}
+		}
 
-			else if(*i == '/')
-				seenSlash++;
+		else if(*i == '/')
+			seenSlash++;
 
-			else if(seenSlash == 0) //sl
-			{
-				sl_form.push_back(*i); //add to the sl form
+		else if(seenSlash == 0) //sl
+		{
+			sl_form.push_back(*i); //add to the sl form
 
-				if(*i == '<') //start reading tag
-					seenTag++;
+			if(*i == '<') //start reading tag
+				seenTag++;
 
-				else if(seenTag == 1) //inside a tag
+			else if(seenTag == 1) //inside a tag
+			{
+				if(*i == '>') //if tag ends
+				{
+					seenTag--;
+					sl_tags.push_back(temptag); //add tag to list of sl tags
+
+					temptag.clear();
+				}
+				else
 				{
-					if(*i == '>') //if tag ends
-					{
-						seenTag--;
-						sl_tags.push_back(temptag); //add tag to list of sl tags
-
-						temptag.clear();
-					}
-					else
-					{
-						temptag.push_back(*i); //add char to current tag
-					}
+					temptag.push_back(*i); //add char to current tag
 				}
 			}
+		}
 
-			else //tl
-			{
-				tl_form.push_back(*i); //add to the tl form
+		else //tl
+		{
+			tl_form.push_back(*i); //add to the tl form
+
+			if(*i == '<') //start reading tag
+				seenTag++;
 
-				if(*i == '<') //start reading tag
-					seenTag++;
+			else if(seenTag == 1) //inside a tag
+			{
+				if(*i == '>') //if tag ends
+				{
+					seenTag--;
+					tl_tags.push_back(temptag); //add tag to list of tl tags
 
-				else if(seenTag == 1) //inside a tag
+					temptag.clear();
+				}
+				else
 				{
-					if(*i == '>') //if tag ends
-					{
-						seenTag--;
-						tl_tags.push_back(temptag); //add tag to list of tl tags
-
-						temptag.clear();
-					}
-					else
-					{
-						temptag.push_back(*i); //add char to current tag
-					}
+					temptag.push_back(*i); //add char to current tag
 				}
 			}
 		}
 	}
+}
 
-	vector<char> get_sl_form()
-	{
-		return sl_form;
-	}
+vector<char> LexicalUnit::get_sl_form()
+{
+	return sl_form;
+}
 
-	vector<char> get_tl_form()
-	{
-		return tl_form;
-	}
+vector<char> LexicalUnit::get_tl_form()
+{
+	return tl_form;
+}
 
-	vector< vector<char> > get_sl_tags()
-	{
-		return sl_tags;
-	}
+vector< vector<char> > LexicalUnit::get_sl_tags()
+{
+	return sl_tags;
+}
 
-	vector< vector<char> > get_tl_tags()
-	{
-		return tl_tags;
-	}
+vector< vector<char> > LexicalUnit::get_tl_tags()
+{
+	return tl_tags;
+}
 	
-};
+/* Uncomment to test this code
 
 void print(vector<char> const &input)
 {
@@ -182,5 +171,7 @@ int main()
 	return 0;
 }
 
+*/
+
 
 
diff --git a/src/parse_biltrans.h b/src/parse_biltrans.h
new file mode 100644
index 0000000..87351fd
--- /dev/null
+++ b/src/parse_biltrans.h
@@ -0,0 +1,64 @@
+#ifndef _PARSEBILTRANS_
+#define _PARSEBILTRANS_
+
+#include<vector>
+
+using namespace std;
+
+/**
+ * Parsing Lexical Unit from biltrans for the Anaphora Module
+ */
+class LexicalUnit
+{
+
+private:
+	/**
+   	 * Source language word and tags
+     */
+	vector<char> sl_form;
+
+	/**
+   	 * Target language word and tags
+     */
+	vector<char> tl_form;
+
+	/**
+   	 * Source language tags
+     */
+	vector< vector<char> > sl_tags;
+
+	/**
+   	 * Target language tags
+     */
+	vector< vector<char> > tl_tags;
+
+public:
+	/**
+   	 * Constructor to fill all variables
+   	 * @param input_LU one lexical unit between ^ and $ (excluded)
+     */
+	LexicalUnit(vector<char> input_LU);
+
+	/**
+   	 * Return the Source Language Form
+     */
+	vector<char> get_sl_form();
+
+	/**
+   	 * Return the Target Language Form
+     */
+	vector<char> get_tl_form();
+
+	/**
+   	 * Return the Source Language Tags
+     */
+	vector< vector<char> > get_sl_tags();
+
+	/**
+   	 * Return the Target Language Form
+     */
+	vector< vector<char> > get_tl_tags();
+
+};
+
+#endif
\ No newline at end of file