commit 391ebde6b54dae40646bfcf5ab52c50e4be99242
Author: Tanmai Khanna <khanna.tanmai@gmail.com>
Date:   Wed Jun 17 16:24:21 2020 +0530

    Can add lemmas in cat-items/parameter-items now! | New test added

diff --git a/src/anaphora.cc b/src/anaphora.cc
index 2a54f62..d690191 100644
--- a/src/anaphora.cc
+++ b/src/anaphora.cc
@@ -173,6 +173,8 @@ int main(int argc, char **argv)
 	wstring tl_form;
 	vector<wstring> sl_tags;
 	vector<wstring> tl_tags;
+  wstring sl_lemma;
+  wstring tl_lemma;
 
 	ParseArx arx_file;
 	int parse_arx_retval = arx_file.parseDoc(arxFileName);
@@ -196,6 +198,8 @@ int main(int argc, char **argv)
 			tl_form.clear();
 			sl_tags.clear();
 			tl_tags.clear();
+      sl_lemma.clear();
+      tl_lemma.clear();
 			gen_id = 0;
 			score_module.clear();
 
@@ -251,12 +255,14 @@ int main(int argc, char **argv)
 					tl_tags = LU.get_tl_tags();
 					sl_form = LU.get_sl_form();
 					sl_tags = LU.get_sl_tags();
+          sl_lemma = LU.get_sl_lemma();
+          tl_lemma = LU.get_tl_lemma();
 
 					if(!tl_form.empty()) //if TL exists
 					{
 						int retval;
 
-						retval = score_module.add_word(gen_id, sl_form, sl_tags, tl_form, arx_file, debug_flag); //Give word to Scoring Module
+						retval = score_module.add_word(gen_id, sl_form, sl_tags, tl_form, sl_lemma, tl_lemma, arx_file, debug_flag); //Give word to Scoring Module
 						//If retval is 0, nothing will be added in side ref
 
 						//If retval is 1, we call get_antecedent() and add it to ref
diff --git a/src/anaphora.dtd b/src/anaphora.dtd
index 5998af8..0a2d811 100644
--- a/src/anaphora.dtd
+++ b/src/anaphora.dtd
@@ -16,8 +16,9 @@
  -->
  
 <!ELEMENT parameter-item EMPTY>
-<!ATTLIST parameter-item has-tags CDATA #REQUIRED
-                         exclude-tags CDATA #IMPLIED>
+<!ATTLIST parameter-item has-tags CDATA #IMPLIED
+                         exclude-tags CDATA #IMPLIED
+                         lemma CDATA #IMPLIED>
 <!--
     Each 'parameter-item' represents a set of tags, using the attribute "has-tags".
     This will match only Lexical Units which have the tags inside this attribute (space-separated list).
@@ -58,13 +59,15 @@
 -->
 
 <!ELEMENT cat-item EMPTY>
-<!ATTLIST cat-item has-tags CDATA #REQUIRED
-                   exclude-tags CDATA #IMPLIED>
+<!ATTLIST cat-item has-tags CDATA #IMPLIED
+                   exclude-tags CDATA #IMPLIED
+                   lemma CDATA #IMPLIED>
 <!--
-    Each 'cat-item' represents a set of tags, using the attribute "has-tags".
-    This will match only Lexical Units which have the tags inside this attribute (space-separated list).
+    Each 'cat-item' represents a Lexical Unit we want to match, using several attributes.
+    "has-tags" will match only Lexical Units which have the tags inside this attribute (space-separated list).
     Another optional attribute is "exclude-tags", which defines a set of tags that should not be in the
     Lexical Unit for it to match.
+    "lemma" will match Lexical Units that have that lemma.
 -->
 
 <!ELEMENT section-markables (markable+)>
diff --git a/src/parse_arx.cc b/src/parse_arx.cc
index 943512c..174e228 100644
--- a/src/parse_arx.cc
+++ b/src/parse_arx.cc
@@ -65,10 +65,6 @@ vector<wstring> ParseArx::parseTags (wstring tags)
 	if(!temptag.empty()) //if any tag remaining
 		temp_tags_list.push_back(temptag);
 
-	//print_tags(temp_tags_list);
-
-	//cerr << "\n";
-
 	return temp_tags_list;
 }
 
@@ -77,29 +73,39 @@ void ParseArx::parseParameterItem (xmlDocPtr doc, xmlNodePtr cur, wstring parame
 {
 	xmlChar *Attr;
 	cur = cur->xmlChildrenNode;
-
-	pair< vector <wstring>, vector <wstring> > temp_tags_list;
-
-	while (cur != NULL)
-	{
-	    if ((!xmlStrcmp(cur->name, (const xmlChar *)"parameter-item")))
-	    {
-	    	Attr = xmlGetProp(cur, (const xmlChar *)"has-tags");
-        temp_tags_list.first = parseTags(XMLParseUtil::towstring(Attr));
+  
+	item temp_item;
+
+  while (cur != NULL)
+  {
+      temp_item.has_tags.clear();
+      temp_item.exclude_tags.clear();
+      temp_item.lemma.clear();
+    
+      if ((!xmlStrcmp(cur->name, (const xmlChar *)"parameter-item")))
+      {
+        Attr = xmlGetProp(cur, (const xmlChar *)"has-tags");
+        if (Attr)
+        {
+          temp_item.has_tags = parseTags(XMLParseUtil::towstring(Attr));
+        }
         
         Attr = xmlGetProp(cur, (const xmlChar *)"exclude-tags");
         if (Attr)
         {
-          temp_tags_list.second = parseTags(XMLParseUtil::towstring(Attr));
+          temp_item.exclude_tags = parseTags(XMLParseUtil::towstring(Attr));
         }
         
-    		parameters[parameter_type][parameter_name].push_back(temp_tags_list);
-
-        temp_tags_list.first.clear();
-        temp_tags_list.second.clear();
+        Attr = xmlGetProp(cur, (const xmlChar *)"lemma");
+        if (Attr)
+        {
+          temp_item.lemma = XMLParseUtil::towstring(Attr);
+        }
+        
+        parameters[parameter_type][parameter_name].push_back(temp_item);
 
-		    xmlFree(Attr);
- 	    }
+        xmlFree(Attr);
+       }
 
 		cur = cur->next;
 	}
@@ -117,16 +123,9 @@ void ParseArx::parseParameterTypes (xmlDocPtr doc, xmlNodePtr cur, wstring param
 		if(cur->type == XML_ELEMENT_NODE)
 		{
 			parameter_type = XMLParseUtil::towstring(cur->name);
-      /*
-			cerr << "\nname: ";
-	    	wcerr << parameter_name;
-	    	cerr << "\ntype: ";
-	    	wcerr << parameter_type;
-	    	cerr << "\n";
-       */
-
-	    	parseParameterItem(doc, cur, parameter_type, parameter_name);
-	    }
+
+      parseParameterItem(doc, cur, parameter_type, parameter_name);
+    }
 
 		cur = cur->next;
 	}
@@ -166,28 +165,38 @@ void ParseArx::parseCatItem (xmlDocPtr doc, xmlNodePtr cur, wstring cat_name)
 	xmlChar *Attr;
 	cur = cur->xmlChildrenNode;
 
-	pair< vector <wstring>, vector <wstring> > temp_tags_list;
+	item temp_item;
 
-	while (cur != NULL)
-	{
-	    if ((!xmlStrcmp(cur->name, (const xmlChar *)"cat-item")))
-	    {
+  while (cur != NULL)
+  {
+      temp_item.has_tags.clear();
+      temp_item.exclude_tags.clear();
+      temp_item.lemma.clear();
+    
+      if ((!xmlStrcmp(cur->name, (const xmlChar *)"cat-item")))
+      {
         Attr = xmlGetProp(cur, (const xmlChar *)"has-tags");
-        temp_tags_list.first = parseTags(XMLParseUtil::towstring(Attr));
+        if (Attr)
+        {
+          temp_item.has_tags = parseTags(XMLParseUtil::towstring(Attr));
+        }
         
         Attr = xmlGetProp(cur, (const xmlChar *)"exclude-tags");
         if (Attr)
         {
-          temp_tags_list.second = parseTags(XMLParseUtil::towstring(Attr));
+          temp_item.exclude_tags = parseTags(XMLParseUtil::towstring(Attr));
         }
-		    cats[cat_name].push_back(temp_tags_list);
-
-        temp_tags_list.first.clear();
-        temp_tags_list.second.clear();
-
-		    xmlFree(Attr);
+        
+        Attr = xmlGetProp(cur, (const xmlChar *)"lemma");
+        if (Attr)
+        {
+          temp_item.lemma = XMLParseUtil::towstring(Attr);
+        }
+        
+        cats[cat_name].push_back(temp_item);
 
- 	    }
+        xmlFree(Attr);
+       }
 
 		cur = cur->next;
 	}
diff --git a/src/parse_arx.h b/src/parse_arx.h
index 90a39ab..eefe6da 100644
--- a/src/parse_arx.h
+++ b/src/parse_arx.h
@@ -30,7 +30,13 @@
 
 using namespace std;
 
-typedef vector< pair < vector<wstring>, vector<wstring> > > acceptable_tags; //a vector of pairs of tags to match and exclude
+struct item { //for cat-item and parameter-item
+  vector<wstring> has_tags;
+  vector<wstring> exclude_tags;
+  wstring lemma;
+};
+
+typedef vector<item> acceptable_tags;
 
 struct markable_pattern
 {
diff --git a/src/parse_biltrans.cc b/src/parse_biltrans.cc
index 6c6d822..4865ec8 100644
--- a/src/parse_biltrans.cc
+++ b/src/parse_biltrans.cc
@@ -48,8 +48,10 @@ ParseLexicalUnit::ParseLexicalUnit(wstring input_LU)
 				else //not in a tag
 				{
 					sl_form.push_back(*i);
+          sl_lemma.push_back(*i);
 					++i;
 					sl_form.push_back(*i);
+          sl_lemma.push_back(*i);
 				}
 			}
 			else if(seenSlash == 1) //tl (only first entry)
@@ -65,8 +67,10 @@ ParseLexicalUnit::ParseLexicalUnit(wstring input_LU)
 				else //not in a tag
 				{
 					tl_form.push_back(*i);
+          tl_lemma.push_back(*i);
 					++i;
 					tl_form.push_back(*i);
+          tl_lemma.push_back(*i);
 				}
 			}
 			else
@@ -99,6 +103,11 @@ ParseLexicalUnit::ParseLexicalUnit(wstring input_LU)
 					temptag.push_back(*i); //add char to current tag
 				}
 			}
+      
+      else
+      {
+        sl_lemma.push_back(*i);
+      }
 		}
 
 		else if(seenSlash == 1) //tl (only first entry in tl)
@@ -122,6 +131,11 @@ ParseLexicalUnit::ParseLexicalUnit(wstring input_LU)
 					temptag.push_back(*i); //add char to current tag
 				}
 			}
+      
+      else
+      {
+        tl_lemma.push_back(*i);
+      }
 		}
 
 		else //if tl has more than one entry
@@ -151,6 +165,16 @@ vector< wstring > ParseLexicalUnit::get_tl_tags()
 	return tl_tags;
 }
 
+wstring ParseLexicalUnit::get_sl_lemma()
+{
+  return sl_lemma;
+}
+
+wstring ParseLexicalUnit::get_tl_lemma()
+{
+  return tl_lemma;
+}
+
 /* //Uncomment to test this code
 
 void print_tags(vector< wstring > input)
@@ -191,5 +215,3 @@ int main()
 	return 0;
 }
 */
-
-
diff --git a/src/parse_biltrans.h b/src/parse_biltrans.h
index 6b4120a..4b0eff1 100644
--- a/src/parse_biltrans.h
+++ b/src/parse_biltrans.h
@@ -50,6 +50,16 @@ private:
    	 * Target language tags
      */
 	vector< wstring > tl_tags;
+  
+  /**
+      * Source language lemma
+     */
+  wstring sl_lemma;
+  
+  /**
+      * Target language lemma
+     */
+  wstring tl_lemma;
 
 public:
 	/**
@@ -77,7 +87,17 @@ public:
    	 * Return the Target Language Form
      */
 	vector< wstring > get_tl_tags();
+  
+  /**
+      * Return the Source Language Lemma
+     */
+  wstring get_sl_lemma();
+  
+  /**
+      * Return the Target Language Lemma
+     */
+  wstring get_tl_lemma();
 
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/pattern_arx.cc b/src/pattern_arx.cc
index a1d8a72..8b89b9d 100644
--- a/src/pattern_arx.cc
+++ b/src/pattern_arx.cc
@@ -24,6 +24,7 @@
 #include <deque>
 #include <iostream>
 #include <algorithm>
+#include <cwctype>
 
 using namespace std;
 
@@ -60,15 +61,23 @@ int contains_any(vector<wstring> tags, vector<wstring> candidates)
 	return 0; //if no matches
 }
 
-int check_acceptable_tags(vector<wstring> input_tags, acceptable_tags check_tags) //all tags in any tag list in check_tags must exist in input_tags
+void toLower(basic_string<wchar_t>& s)
+{
+   for (basic_string<wchar_t>::iterator p = s.begin(); p != s.end(); ++p)
+   {
+      *p = towlower(*p);
+   }
+}
+
+int check_acceptable_tags(vector<wstring> input_tags, wstring input_sl_lemma, acceptable_tags check_tags) //check has-tags, exclude-tags, lemma
 {
 	for (acceptable_tags::iterator i = check_tags.begin(); i != check_tags.end(); ++i)
 	{
 
 		int flag_contains_all = 1;
 
-    vector<wstring> temp_tags = i->first;
-    vector<wstring> temp_exclude_tags = i->second;
+    vector<wstring> temp_tags = i->has_tags;
+    vector<wstring> temp_exclude_tags = i->exclude_tags;
     
 		for(std::vector<wstring>::iterator j = temp_tags.begin(); j != temp_tags.end(); ++j) //check for the tags in has-tags
 		{
@@ -82,6 +91,11 @@ int check_acceptable_tags(vector<wstring> input_tags, acceptable_tags check_tags
 			}
 		}
     
+    if(flag_contains_all == 0)
+    {
+      continue;
+    }
+    
     for(std::vector<wstring>::iterator j = temp_exclude_tags.begin(); j != temp_exclude_tags.end(); ++j) //check for the tags in exclude-tags
     {
       if(contains(input_tags, *j)) //if the exclude-tag IS in the input LU tags
@@ -90,16 +104,49 @@ int check_acceptable_tags(vector<wstring> input_tags, acceptable_tags check_tags
         break;
       }
     }
-
-		if(flag_contains_all == 1) //if any tag list fully matched (i.e. has-tags present, exclude-tags absent)
-			return 1;
-    //else continue to next tag list
+    
+    if(flag_contains_all == 0)
+    {
+      continue;
+    }
+    
+    if(!(i->lemma).empty())
+    {
+      wstring temp_lemma = i->lemma;
+      
+      if(input_sl_lemma.length() == temp_lemma.length())
+      {
+        if(input_sl_lemma.compare(temp_lemma) != 0)
+        {
+          toLower(input_sl_lemma);
+          toLower(temp_lemma);
+          
+          if(input_sl_lemma.compare(temp_lemma) != 0)
+          {
+            flag_contains_all = 0;
+          }
+        }
+      }
+      else
+      {
+        flag_contains_all = 0;
+      }
+    }
+    
+    if(flag_contains_all == 0)
+    {
+      continue;
+    }
+    else //if any tag list fully matched (i.e. has-tags present, exclude-tags absent)
+    {
+      return 1;
+    }
 	}
 
 	return 0; //if it didn't return 1 then no tag list was fully matched
 }
 
-parameter_return check_pattern_name(vector<wstring> input_tags, unordered_map<wstring, acceptable_tags> parameter_names)
+parameter_return check_pattern_name(vector<wstring> input_tags, wstring input_sl_lemma, unordered_map<wstring, acceptable_tags> parameter_names)
 //find out if any of the anaphors match wrt tags, and if yes, return the unique name
 {
 	parameter_return retval;
@@ -110,7 +157,7 @@ parameter_return check_pattern_name(vector<wstring> input_tags, unordered_map<ws
 		wstring parameter_name = it->first;
 		acceptable_tags parameter_tags= it->second;
 
-		if(check_acceptable_tags(input_tags, parameter_tags))
+		if(check_acceptable_tags(input_tags, input_sl_lemma, parameter_tags))
 		{
 			retval.found = 1;
 			retval.parameter_name = parameter_name;
@@ -157,7 +204,7 @@ deque< vector<unique_LU> > add_properties(deque< vector<unique_LU> > context, Pa
 
 						acceptable_tags pattern_item_tags = arx_cats[current_pattern[x].name]; //get pattern item tags from def-cats
 
-						if(check_acceptable_tags((*(n+x)).pos_tags, pattern_item_tags)) //comparing current LU tags to pattern tags
+						if(check_acceptable_tags((*(n+x)).pos_tags, (*(n+x)).sl_lemma, pattern_item_tags)) //comparing current LU tags to pattern tags and lemma
 						{
 							match_flag = 1;
 
diff --git a/src/pattern_arx.h b/src/pattern_arx.h
index 6422a5a..0a2ba2e 100644
--- a/src/pattern_arx.h
+++ b/src/pattern_arx.h
@@ -29,9 +29,11 @@ using namespace std;
 
 struct unique_LU
 {
-	unsigned int id;
+	int id;
 	wstring wordform;
 	wstring tl_wordform;
+  wstring sl_lemma;
+  wstring tl_lemma;
 	vector<wstring> pos_tags;
 	vector<wstring> properties;
 };
@@ -50,10 +52,11 @@ struct parameter_return
 
 int contains(vector<wstring> tags, wstring tag);
 int contains_any(vector<wstring> tags, vector<wstring> candidates);
+void toLower(basic_string<wchar_t>& s);
 
-int check_acceptable_tags(vector<wstring> input_tags, acceptable_tags check_tags);
-parameter_return check_pattern_name(vector<wstring> input_tags, unordered_map<wstring, acceptable_tags> parameter_names);
+int check_acceptable_tags(vector<wstring> input_tags, wstring input_sl_lemma, acceptable_tags check_tags);
+parameter_return check_pattern_name(vector<wstring> input_tags, wstring input_sl_lemma, unordered_map<wstring, acceptable_tags> parameter_names);
 
 deque< vector<unique_LU> > add_properties(deque< vector<unique_LU> > context, ParseArx arx_file);
 
-#endif
\ No newline at end of file
+#endif
diff --git a/src/score.cc b/src/score.cc
index 50f17cb..2724383 100644
--- a/src/score.cc
+++ b/src/score.cc
@@ -62,12 +62,12 @@ void showq(deque < vector<unique_LU> > gq) //to display context if needed (testi
     cerr << '\n';
 }
 
-int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform, ParseArx arx_file, int debug_flag)
+int Scoring::add_word(int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform, wstring input_sl_lemma, wstring input_tl_lemma, ParseArx arx_file, int debug_flag)
 {
 	vector<wstring> temp_prop;
 	parameters_datatype arx_parameters = arx_file.get_parameters();
 
-	unique_LU input_LU = {input_id, input_wordform, input_tl_wordform, input_pos_tags, temp_prop}; //initialise LU
+	unique_LU input_LU = {input_id, input_wordform, input_tl_wordform, input_sl_lemma, input_tl_lemma, input_pos_tags, temp_prop}; //initialise LU
 
 	if(context.empty()) //if queue is empty
 	{
@@ -76,7 +76,7 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst
 
 		context.push_back(sentence);
 
-		if(check_acceptable_tags(input_LU.pos_tags, arx_parameters[L"delimiter"][L"default"]) ) //if sentence end (somehow the first LU is a sentence end)
+    if(check_acceptable_tags(input_LU.pos_tags, input_LU.sl_lemma, arx_parameters[L"delimiter"][L"default"]) ) //if sentence end (somehow the first LU is a sentence end)
 		{
 			vector<unique_LU> new_sentence;
 
@@ -85,7 +85,7 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst
 	}
 	else //if queue is not empty
 	{
-		if(check_acceptable_tags(input_LU.pos_tags, arx_parameters[L"delimiter"][L"default"]))
+    if(check_acceptable_tags(input_LU.pos_tags, input_LU.sl_lemma, arx_parameters[L"delimiter"][L"default"]))
 		{
 			context.back().push_back(input_LU); //add <sent> to context so that it can also be matched in a pattern
 
@@ -99,9 +99,9 @@ int Scoring::add_word(unsigned int input_id, wstring input_wordform, vector< wst
 
 		else 
 		{
-			parameter_return retval = check_pattern_name(input_LU.pos_tags, arx_parameters[L"anaphor"]);
+      parameter_return retval = check_pattern_name(input_LU.pos_tags, input_LU.sl_lemma, arx_parameters[L"anaphor"]);
 
-			if(retval.found == 1) //check if tags of current word match with anaphor tags in arx file
+			if(retval.found == 1) //check if tags,lemma of current word match with anaphor tags in arx file
 			{
 				unique_LU anaphor_LU = input_LU;
 
@@ -173,7 +173,7 @@ void Scoring::apply_indicators(unique_LU anaphor, ParseArx arx_file, wstring par
 				cerr << "\n";
 			}
 
-			if(check_acceptable_tags((*j).pos_tags, arx_file.get_parameters()[L"antecedent"][parameter_name]) ) // if it is antecedent (based on external xml file)
+			if(check_acceptable_tags((*j).pos_tags, (*j).sl_lemma, arx_file.get_parameters()[L"antecedent"][parameter_name]) ) // if it is antecedent (based on external xml file)
 			{
 				temp_score = 0;
 
@@ -293,4 +293,4 @@ void Scoring::clear() //use a destructor?
 {
 	context.clear(); //empty queue
 	antecedent_list.clear(); //empty antecedent list
-}
\ No newline at end of file
+}
diff --git a/src/score.h b/src/score.h
index 95e3326..f08d189 100644
--- a/src/score.h
+++ b/src/score.h
@@ -38,7 +38,7 @@ private:
 	vector<antecedent> antecedent_list; //A list of antecedents
 
 public:
-	int add_word(unsigned int input_id, wstring input_wordform, vector< wstring > pos_tags, wstring input_tl_wordform, ParseArx arx_file, int debug_flag);
+  int add_word(int input_id, wstring input_wordform, vector< wstring > input_pos_tags, wstring input_tl_wordform, wstring input_sl_lemma, wstring input_tl_lemma, ParseArx arx_file, int debug_flag);
 	void apply_indicators(unique_LU anaphor, ParseArx arx_file, wstring parameter_name, int debug_flag);
 	int check_agreement(vector<wstring> antecedent_tags, vector<wstring> anaphor_tags);
 	wstring get_antecedent(int debug_flag);
@@ -46,4 +46,4 @@ public:
 };
 
 
-#endif
\ No newline at end of file
+#endif
diff --git a/tests/apertium-eng-spa.spa-eng.arx b/tests/apertium-eng-spa.spa-eng.arx
index 82c3edb..e02cb8e 100644
--- a/tests/apertium-eng-spa.spa-eng.arx
+++ b/tests/apertium-eng-spa.spa-eng.arx
@@ -9,7 +9,7 @@
 
 		<def-parameter n="detpos">
 			<anaphor>
-				<parameter-item has-tags="det pos"/>
+				<parameter-item has-tags="det pos" lemma="el seu"/>
 			</anaphor>
 
 			<antecedent>
@@ -28,6 +28,16 @@
 				<parameter-item has-tags="np"/>
 			</antecedent>
 		</def-parameter>
+    
+    <def-parameter n="lemtest">
+      <anaphor>
+        <parameter-item has-tags="lemtesta" lemma="test"/>
+      </anaphor>
+
+      <antecedent>
+        <parameter-item has-tags="lemtestant" lemma="paRlaMenT"/>
+      </antecedent>
+    </def-parameter>
 
 	</section-parameters>
 
@@ -71,6 +81,10 @@
     		<cat-item has-tags="cop"/>
     	</def-cat>
       
+      <def-cat n="cop2">
+        <cat-item has-tags="cop2" lemma="es"/>
+      </def-cat>
+      
       <def-cat n="adv">
         <cat-item has-tags="adv"/>
       </def-cat>
@@ -212,6 +226,16 @@
 
 			<score n="-5" parameter="detpos"/>
 		</markable>
+    
+    <markable n="Cop2">
+      <pattern>
+        <pattern-item n="nom"/>
+        <pattern-item n="cop2"/>
+        <pattern-item n="anaphor"/>
+      </pattern>
+
+      <score n="-5" parameter="lemtest"/>
+    </markable>
 		
 		<!-- Noun, CopulaVerb, Anaphor - in this order, would mean that the noun can't be its antecedent -->
 
diff --git a/tests/test_lemma.in b/tests/test_lemma.in
new file mode 100644
index 0000000..a9c57a7
--- /dev/null
+++ b/tests/test_lemma.in
@@ -0,0 +1,10 @@
+^El<det><def><m><pl>/The<det><def><m><pl>$ ^grup<lemtestant>/group<n><pl>$ ^test<n><m><pl>/grouptest<n><pl>$  ^de<pr>/of<pr>/from<pr>$
+^el<det><def><m><sg>/the<det><def><m><sg>$ ^Parlament<lemtestant>/Parliament<n><sg>$
+^haver<vbhaver><pri><p3><pl>/have<vbhaver><pri><p3><pl>$ 
+^mostrar<vblex><pp><m><sg>/show<vblex><pp><m><sg>/display<vblex><pp><m><sg>$ 
+^aquest<det><dem><m><sg>/this<det><dem><m><sg>$ ^dimarts<n><m><sp>/Tuesday<n><ND>$ 
+^el seu<lemtesta>/his<det><pos><m><sg>$ ^suport<n><m><sg>/support<n><sg>$
+^a<pr>/at<pr>/in<pr>/to<pr>$ ^test<lemtesta>/his<det><pos><m><sg>$
+^*batle/*batle$ ^de<pr>/of<pr>/from<pr>$ ^*Alaró/*Alaró$^.<sent>/.<sent>$
+^Parlament<n><lemtestant><cop2test>/Parliament<n><sg><cop2test>$ ^es<cop2>/is<cop2>$ ^test<lemtesta>/his<det><pos><m><sg>$
+^Parlament<n><lemtestant><cop2test2>/Parliament<n><sg><cop2test2>$ ^esta<cop2>/is<cop2>$ ^test<lemtesta>/his<det><pos><m><sg>$
diff --git a/tests/test_lemma.out b/tests/test_lemma.out
new file mode 100644
index 0000000..7cb1bcb
--- /dev/null
+++ b/tests/test_lemma.out
@@ -0,0 +1,10 @@
+^El<det><def><m><pl>/The<det><def><m><pl>/$ ^grup<lemtestant>/group<n><pl>/$ ^test<n><m><pl>/grouptest<n><pl>/$  ^de<pr>/of<pr>/from<pr>/$
+^el<det><def><m><sg>/the<det><def><m><sg>/$ ^Parlament<lemtestant>/Parliament<n><sg>/$
+^haver<vbhaver><pri><p3><pl>/have<vbhaver><pri><p3><pl>/$ 
+^mostrar<vblex><pp><m><sg>/show<vblex><pp><m><sg>/display<vblex><pp><m><sg>/$ 
+^aquest<det><dem><m><sg>/this<det><dem><m><sg>/$ ^dimarts<n><m><sp>/Tuesday<n><ND>/$ 
+^el seu<lemtesta>/his<det><pos><m><sg>/$ ^suport<n><m><sg>/support<n><sg>/$
+^a<pr>/at<pr>/in<pr>/to<pr>/$ ^test<lemtesta>/his<det><pos><m><sg>/Parliament<n><sg>$
+^*batle/*batle/$ ^de<pr>/of<pr>/from<pr>/$ ^*Alaró/*Alaró/$^.<sent>/.<sent>/$
+^Parlament<n><lemtestant><cop2test>/Parliament<n><sg><cop2test>/$ ^es<cop2>/is<cop2>/$ ^test<lemtesta>/his<det><pos><m><sg>/Parliament<n><sg>$
+^Parlament<n><lemtestant><cop2test2>/Parliament<n><sg><cop2test2>/$ ^esta<cop2>/is<cop2>/$ ^test<lemtesta>/his<det><pos><m><sg>/Parliament<n><sg><cop2test>$