Index: branches/weighted-transfer/apertium/apertium/transfer.cc
===================================================================
--- branches/weighted-transfer/apertium/apertium/transfer.cc	(revision 72228)
+++ branches/weighted-transfer/apertium/apertium/transfer.cc	(revision 72229)
@@ -77,107 +77,13 @@
 }
 
 void
-Transfer::readData(FILE *in)
-{
-  // Read transfer rules data from .t*x.bin file
-
-  alphabet.read(in);
-
-  any_char = alphabet(TRXReader::ANY_CHAR);
-  any_tag = alphabet(TRXReader::ANY_TAG);
-
-  Transducer t;
-  t.read(in, alphabet.size());
-
-  map<int, int> finals;
-
-  // finals
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    int key = Compression::multibyte_read(in);
-    finals[key] = Compression::multibyte_read(in);
-  }
-
-  me = new MatchExe(t, finals);
-
-  // attr_items
-  bool recompile_attrs = Compression::string_read(in) != string(pcre_version());
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    attr_items[cad_k].read(in);
-    wstring fallback = Compression::wstring_read(in);
-    if(recompile_attrs) {
-      attr_items[cad_k].compile(UtfConverter::toUtf8(fallback));
-    }
-  }
-
-  // variables
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    variables[cad_k] = UtfConverter::toUtf8(Compression::wstring_read(in));
-  }
-
-  // macros
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-    macros[cad_k] = Compression::multibyte_read(in);
-  }
-
-  // lists
-  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
-  {
-    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
-
-    for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
-    {
-      wstring const cad_v = Compression::wstring_read(in);
-      lists[cad_k].insert(UtfConverter::toUtf8(cad_v));
-      listslow[cad_k].insert(UtfConverter::toUtf8(StringUtils::tolower(cad_v)));
-    }
-  }
-}
-
-void
-Transfer::readBil(string const &fstfile)
-{
-  FILE *in = fopen(fstfile.c_str(), "rb");
-  if(!in)
-  {
-    cerr << "Error: Could not open file '" << fstfile << "'." << endl;
-    exit(EXIT_FAILURE);
-  }
-  fstp.load(in);
-  fstp.initBiltrans();
-  fclose(in);
-}
-
-void
-Transfer::setExtendedDictionary(string const &fstfile)
-{
-  FILE *in = fopen(fstfile.c_str(), "rb");
-  if(!in)
-  {
-    cerr << "Error: Could not open extended dictionary file '" << fstfile << "'." << endl;
-    exit(EXIT_FAILURE);
-  }
-  extended.load(in);
-  extended.initBiltrans();
-  fclose(in);
-  isExtended = true;
-}
-
-void
 Transfer::read(string const &transferfile, string const &datafile,
                string const &weightsfile, string const &fstfile)
 { 
-  // read and parse .t*x transfer file
+  // read and parse .t*x xml file with transfer rules //tc
   readTransfer(transferfile);
 
-  // open precompiled .t*x.bin file and read data from it
-  cerr << "Reading data from " << datafile.c_str() << endl;
+  // open precompiled .t*x.bin binary file and read data from it //tc
   FILE *in = fopen(datafile.c_str(), "rb");
   if(!in)
   {
@@ -187,17 +93,15 @@
   readData(in);
   fclose(in);
 
-  // read data from transfer weights file if specified
+  // read data from w*x xml file with transfer weights if specified //tc
   if(weightsfile != "")
   {
-    //cerr << "Reading weights from " << weightsfile << endl; // di
     readTransferWeights(weightsfile);
   }
 
-  // read data from bilingual letter transducer file if specified
+  // read data from bilingual letter transducer file if specified //tc
   if(fstfile != "")
   {
-    cerr << "Reading fst data from " << fstfile << endl; // di
     readBil(fstfile);
   }
 }
@@ -204,11 +108,7 @@
 
 void
 Transfer::readTransfer(string const &in)
-{ 
-  // Read transfer rules from .t*x file.
-  // In fact, here we collect only default attribute value,
-  // macros, and actions specified in rules.
-  cerr << "Reading transfer rules from " << in.c_str() << endl; // di
+{
   doc = xmlReadFile(in.c_str(), NULL, 0);
   if(doc == NULL)
   {
@@ -218,27 +118,27 @@
 
   root_element = xmlDocGetRootElement(doc);
 
-  // search through attributes of root element 
+  // search through attributes of root element //tc
   for(xmlAttr *i = root_element->properties; i != NULL; i = i->next)
   { 
-    // only check for 'default' attribute
+    // check for 'default' attribute //tc
     if(!xmlStrcmp(i->name, (const xmlChar *) "default"))
     {
-      // assuming either default="chunk" or something else
+      // assuming either default="chunk" or something else //tc
       if(!xmlStrcmp(i->children->content, (const xmlChar *) "chunk"))
       {
-        // if default="chunk", set it to chunk
+        // if default is "chunk", set it to chunk //tc
         defaultAttrs = chunk; // default value for 'chunk'
       }
       else
       { 
-        // if not default="chunk", set it to default
+        // if default is not "chunk", set it to default //tc
         defaultAttrs = lu; // default value for 'default'
       }
     }
   }
 
-  // search through root's children nodes for macroses & rules
+  // search through children nodes of root node for macros and rules //tc
   for(xmlNode *i = root_element->children; i != NULL; i = i->next)
   {
     if(i->type == XML_ELEMENT_NODE)
@@ -253,39 +153,31 @@
       }
     }
   }
-
-  /*if (useWeights) // di
-  { // di
-    // double-check rule ids in rule_id_map and rule_ids // di
-    cerr << endl << "Those are the ids you wanted: " << endl; //di
-    for (int k = 1; k < rule_ids.size(); k++) // di
-    { // di
-      cerr << "rule_ids[" << k << "]: " << rule_ids[k] << endl; // di
-      cerr << "rule_id_map[" << rule_ids[k] << "]: " << rule_id_map[rule_ids[k]] << endl << endl; // di
-    } // di
-  } // di*/
 }
 
 void
 Transfer::collectRules(xmlNode *localroot)
 { 
-  // go through subelements of 'section-rules'
+  // counting rule numbers; in fact, rules are indexed from 1 //tc
   int rule_index = 0;
   string rule_id = "";
-  rule_ids.push_back(""); // fictive zero position element to not bother with i-1 thing
-  rule_id_map[""] = 0; // a uniformed answer to all empty string ids since we're not interested
+  // fictive zero position element to not bother with i-1 thing //tc
+  rule_ids.push_back("");
+  // a uniformed answer to all empty string ids //tc
+  rule_id_map[""] = 0;
 
+  // go through subelements of 'section-rules' //tc
   for(xmlNode *i = localroot->children; i != NULL; i = i->next)
   {
+    // go through 'rule' elements //tc
     if(i->type == XML_ELEMENT_NODE && !xmlStrcmp(i->name, (const xmlChar *) "rule"))
     {
-      // 'rule' element
       rule_index++;
-      //cerr << "Collecting rule # " << rule_index << endl; //di
 
-      if (useWeights) // only need ids if weights are used
+      // we only need to collect ids if weights are used //tc
+      if (useWeights)
       {
-        // get rule id and add it to rule_ids
+        // get rule id and add it to rule_ids //tc
         rule_id = getRuleId(i);
         if (rule_id != "")
         {
@@ -293,17 +185,15 @@
         }
         rule_ids.push_back(rule_id);
         rule_id = "";
-        //cerr << endl; // di
       }
 
-      // go through subelements of current 'rule' element looking for some action
+      // go through subelements of current 'rule' element looking for some action //tc
       for(xmlNode *j = i->children; ; j = j->next)
       {
-        // check if subelement is 'action' element
+        // check if subelement is 'action' element //tc
         if(j->type == XML_ELEMENT_NODE && !xmlStrcmp(j->name, (const xmlChar *) "action"))
         {
-          // if so, add it at the end of the rule map
-          //cerr << "Collected '" << i->name << "' part '" << j->name << "'" << endl; // di
+          // if so, add it at the end of the rule map //tc
           rule_map.push_back(j);
           break;
         }
@@ -315,12 +205,12 @@
 void
 Transfer::collectMacros(xmlNode *localroot)
 { 
-  // go through subelements of 'section-macros'
-  // and add all subelements, normally 'def-macros' nodes
+  // go through subelements of 'section-macros' //tc
   for(xmlNode *i = localroot->children; i != NULL; i = i->next)
   {
     if(i->type == XML_ELEMENT_NODE)
     {
+      // and add all subelements, normally 'def-macros' nodes //tc
       macro_map.push_back(i);
     }
   }
@@ -329,7 +219,6 @@
 void
 Transfer::readTransferWeights(string const &in)
 {
-  // Read transfer weights from .w*x file.
   int rule_group_index = 0;
   double weight = 0.;
   string lemma = "";
@@ -336,17 +225,12 @@
   string tags = "";
   string rule_id = "";
   string current_pattern = "";
-  vector<string> current_rule_group; // to track all rules in one group
-  rule_group_map[""] = -1; // a uniformed answer to all empty rule_group ids
-  map<string, double> current_pattern_group;
-  weighted_patterns[""] = current_pattern_group;
+  map<string, vector<pair<string, double> > > weighted_pattern_rule_group;
+  // to track all rules in one group //tc
+  vector<string> current_rule_group;
+  // a uniformed answer to all empty rule_group ids //tc
+  rule_group_map[""] = -1;
 
-  pcre *reCompiled;
-  pcre_extra *pcreExtra;
-  const char *pcreErrorStr;
-  int pcreErrorOffset;
-
-  cerr << "Reading transfer weights from " << in.c_str() << endl << endl; // di
   doc = xmlReadFile(in.c_str(), NULL, 0);
   if(doc == NULL)
   {
@@ -356,24 +240,24 @@
 
   root_element = xmlDocGetRootElement(doc);
 
-  // search through root's children nodes for 'rule-group' elements
+  // search through children nodes of the root for 'rule-group' elements //tc
   for(xmlNode *i = root_element->children; i != NULL; i = i->next)
   {
     if(i->type == XML_ELEMENT_NODE && !xmlStrcmp(i->name, (const xmlChar *) "rule-group"))
     {
       cerr << "Collecting rule group # " << rule_group_index << endl; // di
-      // get ids of all rules in rule group
+      // get ids of all rules in rule group //tc
       for(xmlNode *j = i->children; j != NULL; j = j->next)
       {
         if(j->type == XML_ELEMENT_NODE && !xmlStrcmp(j->name, (const xmlChar *) "rule"))
         {
-          // get id
+          // get id and store it //tc
           rule_id = getRuleId(j);
           current_rule_group.push_back(rule_id);
           rule_group_map[rule_id] = rule_group_index;
           cerr << "Rule id: " << rule_id << endl; // di
 
-          // get patterns
+          // get patterns, parse them, and store //tc
           for(xmlNode *k = j->children; k != NULL; k = k->next)
           {
             if(k->type == XML_ELEMENT_NODE && !xmlStrcmp(k->name, (const xmlChar *) "pattern"))
@@ -386,15 +270,6 @@
                    lemma = getNodeAttr(patit, "lemma");
                    current_pattern = current_pattern + lemma;
 
-                   /*if (lemma == "")
-                   {
-                     regex = regex + "[^<>]*?";
-                   }
-                   else
-                   {
-                     regex = regex + lemma;
-                   }*/
-
                    tags = getNodeAttr(patit, "tags");
                    if (tags != "")
                    {
@@ -415,51 +290,19 @@
                      }
                      current_pattern = current_pattern + "> ";
                    } 
-                   //regex = regex + "\\S*? ";
-
-                   /*unsigned int tags_len = tags.size();
-                   if (tags_len > 0 && tags != "*")
-                   {
-                     regex = regex + "<";
-                   } 
-                   char curr_char;
-                   for(unsigned int i = 0; i < tags_len; i++)
-                   {
-                     curr_char = tags[i];
-                     switch (curr_char)
-                     {
-                       case '.':
-                         regex = regex + "><";
-                         break;
-
-                       case '*':
-                         regex = regex + ".*?";
-                         break;
-                     
-                       default:
-                         regex = regex + curr_char;
                      }
                    }
-                   if (tags_len > 0 && tags != "*")
-                   {
-                     regex = regex + ">";
-                   } 
-                   regex = regex + "\\S*? ";*/
+               cerr << "  " << weight << " " << current_pattern << endl; // di
+               weighted_pattern_rule_group[current_pattern].push_back(make_pair(rule_id, weight));
+               current_pattern = "";
                  }
                }
-               cerr << "  " << weight << " " << current_pattern << endl;
-               //reCompiled = pcre_compile(regex.c_str(), 0, &pcreErrorStr, &pcreErrorOffset, NULL);
-               //pcreExtra = pcre_study(reCompiled, 0, &pcreErrorStr);
-               current_pattern_group[current_pattern] = weight;
-               current_pattern = "";
             }
           }
-          weighted_patterns[rule_id] = current_pattern_group;
-          current_pattern_group.clear();
+      // push newly acquired current_rule_group into rule_groups //tc
+      weighted_patterns.push_back(weighted_pattern_rule_group);
+      weighted_pattern_rule_group.clear();
           cerr << endl; // di
-        }
-      }
-      // push newly acquired current_rule_group into rule_groups
       rule_groups.push_back(current_rule_group);
       current_rule_group.clear();
       rule_group_index++;
@@ -467,12 +310,11 @@
   }
 
   // print out what was collected // di
-  /*cerr << "These are the rule groups you collected:" << endl; // di
-  unsigned int k1, k2; // di
-  for (k1 = 0; k1 < rule_groups.size(); k1++) // di
+  cerr << "These are the rule groups you collected:" << endl; // di
+  for (unsigned int k1 = 0; k1 < rule_groups.size(); k1++) // di
   { // di
     cerr << "rule_groups[" << k1 << "]:" << endl; // di
-    for (k2 = 0; k2 < rule_groups[k1].size(); k2++) // di
+    for (unsigned int k2 = 0; k2 < rule_groups[k1].size(); k2++) // di
     { // di
       cerr << "  " << rule_groups[k1][k2] << endl; // di
       //cerr << "    rule_group_map[" << rule_groups[k1][k2] << "]: "; // di
@@ -479,22 +321,23 @@
       //cerr << rule_group_map[rule_groups[k1][k2]] << endl; // di
     } // di
     cerr << endl; // di
-  } // di*/
-
-  /*cerr << "And these are the patterns:" << endl; // di
-  for (k1 = 1; k1 < rule_ids.size(); k1++) // di
-  { // di
-    if (rule_ids[k1] != "") // di
-    { // di
-      cerr << "Patterns for rule " << rule_ids[k1] << endl; // di
-      for (k2 = 0; k2 < weighted_patterns[rule_ids[k1]].size(); k2++) // di
-      { // di
-        cerr << "  " << weighted_patterns[rule_ids[k1]][k2].first << " "; // di
-        cerr << weighted_patterns[rule_ids[k1]][k2].second << endl; // di
       } // di
-    } // di
-  } // di*/
 
+  cerr << "And these are the patterns:" << endl; // di
+  for (unsigned int it1 = 0; it1 < weighted_patterns.size(); it1++) //di
+  { //di
+    cerr << it1 << endl; //di
+    for (map<string, vector<pair<string, double> > >::iterator it2 = weighted_patterns[it1].begin(); it2 != weighted_patterns[it1].end(); it2++) //di
+    { //di
+      cerr << "  " << it2->first; //di
+      for (unsigned int it3 = 0; it3 < (it2->second).size(); it3++) //di
+      { //di
+        cerr << "  " << (it2->second)[it3].first << ": " << (it2->second)[it3].second; //di
+      } //di
+      cerr << endl; //di
+    } //di
+    cerr << endl; //di
+  } //di
 }
 
 string
@@ -501,23 +344,15 @@
 Transfer::getRuleId(xmlNode *localroot)
 {
   string rule_id = "";
-      // normally looking at a 'rule' node now
+  // localroot is normally a 'rule' node //tc
+  // go through its properties looking for 'id' //tc
       for(xmlAttr *j = localroot->properties; j != NULL; j = j->next)
       {
-        if(!xmlStrcmp(j->name, (const xmlChar *) "comment")) // di
-        { // di
-          //cerr << "Rule comment: " << xmlNodeListGetString(localroot->doc, j->children, 1) << endl;   // di
-        } // di
-        else //di
-        { //di
           if(!xmlStrcmp(j->name, (const xmlChar *) "id"))
           {
-            // add rule id to rule_id_map
             rule_id = (const char*)xmlNodeListGetString(localroot->doc, j->children, 1);
-            //cerr << "Rule id: " << rule_id << endl;   // di           
           }
-        } // di
-      } // di
+  }
   return rule_id;
 }
 
@@ -525,6 +360,7 @@
 Transfer::getNodeAttr(xmlNode *localroot, const char* attr_name)
 {
   string attr_val = "";
+  // go through localroot node properties looking for attr_name //tc
   for(xmlAttr *j = localroot->properties; j != NULL; j = j->next)
   {
     if(!xmlStrcmp(j->name, (const xmlChar *) attr_name))
@@ -535,6 +371,97 @@
   return attr_val;
 }
 
+void
+Transfer::readData(FILE *in)
+{
+  alphabet.read(in);
+
+  any_char = alphabet(TRXReader::ANY_CHAR);
+  any_tag = alphabet(TRXReader::ANY_TAG);
+
+  Transducer t;
+  t.read(in, alphabet.size());
+
+  map<int, int> finals;
+
+  // finals
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    int key = Compression::multibyte_read(in);
+    finals[key] = Compression::multibyte_read(in);
+  }
+
+  me = new MatchExe(t, finals);
+
+  // attr_items
+  bool recompile_attrs = Compression::string_read(in) != string(pcre_version());
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    attr_items[cad_k].read(in);
+    wstring fallback = Compression::wstring_read(in);
+    if(recompile_attrs) {
+      attr_items[cad_k].compile(UtfConverter::toUtf8(fallback));
+    }
+  }
+
+  // variables
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    variables[cad_k] = UtfConverter::toUtf8(Compression::wstring_read(in));
+  }
+
+  // macros
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+    macros[cad_k] = Compression::multibyte_read(in);
+  }
+
+  // lists
+  for(int i = 0, limit = Compression::multibyte_read(in); i != limit; i++)
+  {
+    string const cad_k = UtfConverter::toUtf8(Compression::wstring_read(in));
+
+    for(int j = 0, limit2 = Compression::multibyte_read(in); j != limit2; j++)
+    {
+      wstring const cad_v = Compression::wstring_read(in);
+      lists[cad_k].insert(UtfConverter::toUtf8(cad_v));
+      listslow[cad_k].insert(UtfConverter::toUtf8(StringUtils::tolower(cad_v)));
+    }
+  }
+}
+
+void
+Transfer::readBil(string const &fstfile)
+{
+  FILE *in = fopen(fstfile.c_str(), "rb");
+  if(!in)
+  {
+    cerr << "Error: Could not open file '" << fstfile << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  fstp.load(in);
+  fstp.initBiltrans();
+  fclose(in);
+}
+
+void
+Transfer::setExtendedDictionary(string const &fstfile)
+{
+  FILE *in = fopen(fstfile.c_str(), "rb");
+  if(!in)
+  {
+    cerr << "Error: Could not open extended dictionary file '" << fstfile << "'." << endl;
+    exit(EXIT_FAILURE);
+  }
+  extended.load(in);
+  extended.initBiltrans();
+  fclose(in);
+  isExtended = true;
+}
+
 bool
 Transfer::checkIndex(xmlNode *element, int index, int limit)
 { 
@@ -575,42 +502,30 @@
 string
 Transfer::evalString(xmlNode *element)
 {
-  // This function evaluates an xml element
-  // and executes appropriate instruction.
+  /* //tc
+    Evaluate an xml element and execute appropriate instruction.
+    Used for lowest-level action elements, such as 'clip' or 'lit-tag'.
 
-  // I believe it is used for lowest-level action elements, 
-  // such as 'clip' or 'lit-tag'.
+    If TransferInstr object corresponding to the element is already
+    in evalStringCache, execute that instruction,
+    if not, first add the instruction to evalStringCache,
+    then call evalString again, which will execute that instruction,
+    because it is already in evalStringCache.
+  */
 
-  // If TransferInstr object corresponding to the element is already
-  // in evalStringCache, execute that instruction,
-  // if not, first add the instruction to evalStringCache,
-  // then call evalString again and execute that instruction.
-  
-  // First, let's see what we've got. // di
-  //if (element->type == XML_ELEMENT_NODE)  // di
-  //{  // di
-    //cerr << "Evaluating " << element->name << " "; // di
-    //for(xmlAttr *prop = element->properties; prop != NULL; prop = prop->next) // di
-    //{  // di
-      //cerr << prop->name << "='" << xmlNodeListGetString(element->doc, prop->children, 1) << "' "; // di
-    //} // di
-    //cerr << endl; // di
-  //} // di
-
+  // Check if the TransferInstr object corresponding to the element //tc
+  // is already in evalStringCache... //tc
   map<xmlNode *, TransferInstr>::iterator it;
   it = evalStringCache.find(element); 
-
-  // Check if the TransferInstr object corresponding to the element
-  // is already in evalStringCache...
   if(it != evalStringCache.end())
   { 
-    // ...if it is, execute the corresponding instruction...
+    // ...if it is, execute the corresponding instruction... //tc
     TransferInstr &ti = it->second;
 
-    // ...depending on its type 
+    // ...depending on its type //tc
     switch(ti.getType())
     {
-      case ti_clip_sl: // <clip ... side="sl" ...>
+      case ti_clip_sl:
         if(checkIndex(element, ti.getPos(), lword))
         {
           return word[ti.getPos()]->source(attr_items[ti.getContent()], ti.getCondition());
@@ -617,7 +532,7 @@
         }
         break;
 
-      case ti_clip_tl: // <clip ... side="tl" ...>
+      case ti_clip_tl:
         if(checkIndex(element, ti.getPos(), lword))
         {
           return word[ti.getPos()]->target(attr_items[ti.getContent()], ti.getCondition());
@@ -655,11 +570,11 @@
       case ti_var:
         return variables[ti.getContent()];
 
-      case ti_lit_tag: // <lit-tag .../>
-      case ti_lit: // <lit .../>
-        return ti.getContent(); // just output what's specified in 'v'
+      case ti_lit_tag:
+      case ti_lit:
+        return ti.getContent(); // just output what's specified in 'v' //tc
 
-      case ti_b: // <b/>
+      case ti_b:
         if(checkIndex(element, ti.getPos(), lblank))
         {
           if(ti.getPos() >= 0)
@@ -696,12 +611,14 @@
         return "";
     }
     return "";
-  } // end of if(it != evalStringCache.end()) clause
+  }
 
-  // The following code is executed if TransferInstr object 
-  // corresponding to the element is not in evalStringCache yet.
-  // It parses lowest-level element, creates TransferInstr object
-  // out of it, and pushes it into evalStringCache.
+  /* //tc
+    The following code is executed if TransferInstr object
+    corresponding to the element is not in evalStringCache yet.
+    It parses lowest-level element, creates TransferInstr object
+    out of it, and pushes it into evalStringCache.
+  */
   if(!xmlStrcmp(element->name, (const xmlChar *) "clip"))
   {
     int pos = 0;
@@ -916,14 +833,15 @@
   }
 
   return evalString(element);
-} // end of evalString
+}
 
 void
 Transfer::processOut(xmlNode *localroot)
 { 
-  // apply 'out' subelement of a rule, one subelement at a time,
-  // depending on subelement type
-  //cerr << "Applying 'out' element" << endl; // di
+  /* //tc
+    Apply 'out' subelement of a rule, one subelement
+    at a time, depending on subelement type.
+  */
   for(xmlNode *i = localroot->children; i != NULL; i = i->next)
   {
     if(i->type == XML_ELEMENT_NODE)
@@ -1003,20 +921,21 @@
       }
     }
   }
-} // end of processOut
+}
 
 string
 Transfer::processChunk(xmlNode *localroot)
 { 
-  // apply 'chunk' subelement of 'out' element of a rule,
-  // one subelement at a time, depending on subelement type
+  /* //tc
+    Apply 'chunk' subelement of 'out' element of a rule,
+    one subelement at a time, depending on subelement type.
+  */
 
-  //cerr << "Applying 'chunk' element" << endl; // di
   string name, namefrom;
   string caseofchunk = "aa";
   string result;
 
-  // this will be the cache of source language lemmas found in chunk
+  // cache of source language lemmas found in chunk //tc
   unsigned int limit = tmpword.size();
   string* wordcache;
   wordcache = new string [limit];
@@ -1037,10 +956,10 @@
     }
   }
 
-  // starting to build the chunk
+  // start building the chunk //tc
   result.append("^");
 
-  // adding chunk name
+  // add chunk name //tc
   if(caseofchunk != "")
   {
     if(name != "")
@@ -1074,7 +993,7 @@
     }
   }
 
-  // processing and adding chunk subelements one element at a time
+  // processand add chunk subelements one element at a time //tc
   int count = 0; // di
   for(xmlNode *i = localroot->children; i != NULL; i = i->next)
   {
@@ -1083,13 +1002,13 @@
       count++; // di
       if(!xmlStrcmp(i->name, (const xmlChar *) "tags"))
       {
-        // add chunk tags
+        // add chunk tags //tc
         result.append(processTags(i));
         result.append("{");
       }
       else if(!xmlStrcmp(i->name, (const xmlChar *) "lu"))
       {
-        // process and add one 'lu' element
+        // process and add one 'lu' element //tc
         string myword;
         string untouched;
         int untouched_pos;
@@ -1159,10 +1078,10 @@
     }
   }
 
-  // finishing the chunk
+  // finish he chunk //tc
   result.append("}$");
 
-  // now it's time to check if there was a magic word in the chunk
+  // check if there was a magic word in the chunk
   bool stopword = false;
   for (int k = 0; k < limit && !stopword; k++)
   {
@@ -1185,7 +1104,7 @@
     result.append("$}$");
   }
   return result;
-} // end of processChunk
+}
 
 string
 Transfer::processTags(xmlNode *localroot)
@@ -1213,9 +1132,6 @@
 int
 Transfer::processInstruction(xmlNode *localroot)
 { 
-  // process instruction specified in rule action based on its name
-  //cerr << "Processing instruction '" << localroot->name << "'" << endl; // di
-
   int words_to_consume = -1;
   if(!xmlStrcmp(localroot->name, (const xmlChar *) "choose"))
   {
@@ -2136,8 +2052,8 @@
 int
 Transfer::processRule(xmlNode *localroot)
 {
+  // localroot is supposed to point to an 'action' element of xml tree. //tc
   int instruction_return, words_to_consume = -1;
-  // localroot is supposed to be an 'action' tag
   for(xmlNode *i = localroot->children; i != NULL; i = i->next)
   {
     if(i->type == XML_ELEMENT_NODE)
@@ -2201,12 +2117,10 @@
     }
     else if(val == L'$')
     {
-      //cerr << UtfConverter::toUtf8(content) << endl; // di
       return input_buffer.add(TransferToken(content, tt_word));
     }
     else if(val == L'^')
     {
-      // cerr << UtfConverter::toUtf8(content) << endl; // di
       return input_buffer.add(TransferToken(content, tt_blank));
     }
     else if(val == L'\0' && null_flush)
@@ -2218,7 +2132,7 @@
       content += wchar_t(val);
     }
   }
-} // end of readToken
+}
 
 bool
 Transfer::getNullFlush(void)
@@ -2267,9 +2181,7 @@
 
 void
 Transfer::transfer(FILE *in, FILE *out)
-{ 
-  //cerr << "Transfer starts here" << endl << endl; // di
-
+{
   if(getNullFlush())
   {
     transfer_wrapper_null_flush(in, out);
@@ -2283,50 +2195,13 @@
   output = out;
   ms.init(me->getInitial());
 
-  int counter = 0; // di
   while(true)
-  { 
-    //cerr << "Transfer iteration # " << counter << endl; // di
-    //cerr << "last: " << last << endl; // di
-    //cerr << "prev_last: " << prev_last << endl; // di
-    //cerr << "lastrule_num: " << lastrule_num << endl; // di
-    //cerr << "ms.size(): " << ms.size() << endl; // di
-
-    // Let's look at input_buffer contents // di
-    /*int initbuffpos = input_buffer.getPos(); // di
-    //cerr << "input_buffer position: " << initbuffpos << endl << endl; // di
-    input_buffer.setPos(0); // di
-    int currbuffpos, prevbuffpos = input_buffer.getPos(); // di
-    TransferToken currbufftok, prevbufftok = input_buffer.next(); // di
-    bool run = true; // di
-    while (run) { // di
-      currbuffpos = input_buffer.getPos(); // di
-      currbufftok = input_buffer.next(); // di
-      //cerr << "input_buffer.buf[" << prevbuffpos << "]: " << UtfConverter::toUtf8(prevbufftok.getContent()) << endl; // di
-      if (currbuffpos == prevbuffpos) { // di
-        run = false; // di
-      } else { // di
-        prevbuffpos = currbuffpos; // di
-        prevbufftok = currbufftok; // di
-      } // di
-    } // di
-    cerr << endl; // di
-    // Return input_buffer to its initial position // di
-    input_buffer.setPos(initbuffpos); // di
-    // List banned_rules //di
-    //cerr << "banned_rules:" << endl;
-    //for(set<int>::iterator iter=banned_rules.begin(); iter != banned_rules.end(); iter++) { // di
-    //    cerr << *iter << ", "; // di
-    //} // di
-    //cerr << endl; // di*/
-
+  {
     if(trace_att)
     {
       cerr << "Loop start " << endl;
       cerr << "ms.size: " << ms.size() << endl;
-
       cerr << "tmpword.size(): " << tmpword.size() << endl;
-
       for (unsigned int ind = 0; ind < tmpword.size(); ind++)
       {
         if(ind != 0)
@@ -2336,7 +2211,6 @@
         wcerr << *tmpword[ind];
       }
       wcerr << endl;
-
       cerr << "tmpblank.size(): " << tmpblank.size() << endl;
       for (unsigned int ind = 0; ind < tmpblank.size(); ind++)
       {
@@ -2345,16 +2219,15 @@
         wcerr << L"' ";
       }
       wcerr << endl;
-
       cerr << "last: " << last << endl;
       cerr << "prev_last: " << prev_last << endl << endl;
-    } // if(trace_att) ends here 
+    } 
 
     if (ms.size() == 0)
     {
       if(lastrule != NULL)
       {
-        // this is the branch where a rule specified by lastrule_num is applied
+        // here, the rule specified by lastrule_num is applied //tc
         int num_words_to_consume = applyRule();
 
         if(trace_att)
@@ -2400,7 +2273,7 @@
         } // thy words consumed
         lastrule_num = -1;
       }
-      else // lastrule == NULL
+      else // lastrule == NULL //tc
       {
         if(tmpword.size() != 0)
         {
@@ -2521,8 +2394,8 @@
           last = input_buffer.getPos();
           ms.init(me->getInitial());
         }
-      } // lastrule == NULL ends here
-    } // if(ms.size() == 0) ends here
+      }
+    }
 
     int val = ms.classifyFinals(me->getFinals(), banned_rules);
     if(val != -1)
@@ -2576,9 +2449,8 @@
         cerr << "Error: Unknown input token." << endl;
         return;
     }
-    counter++; // di
   }
-} // end of transfer
+}
 
 int
 Transfer::applyRule()
@@ -2587,11 +2459,11 @@
   unsigned int limit = tmpword.size();
 
   wstring wtmpchunk = L"";
-  string tmpchunk;
+  string tmpchunk = "";
+  string fallback_tmpchunk = "";
 
   for(unsigned int i = 0; i != limit; i++)
-  { 
-    //cerr << "applyRule iteration # " << i << endl; // di
+  {
     if(i == 0)
     {
       word = new TransferWord *[limit];
@@ -2620,11 +2492,13 @@
     }
     else if(preBilingual)
     { 
-      // this part is dedicated to splitting token by slash, e.g.
-      // if tmpword[i] was word_in_lang1<its><tags>/word_in_lang2<its><tags> 
-      // then
-      // sl = word_in_lang1<its><tags>
-      // tl = word_in_lang2<its><tags>
+      /* //tc
+        split bilingual token by slash, e.g.
+        if tmpword[i] was word_in_lang1<its><tags>/word_in_lang2<its><tags> 
+        then
+        sl = word_in_lang1<its><tags>
+        tl = word_in_lang2<its><tags>
+      */
 
       wstring sl;
       wstring tl;
@@ -2676,8 +2550,7 @@
       }
     }
     else
-    { 
-      // here we don't need to split anything
+    {
       tr = pair<wstring, int>(*tmpword[i], false);
     }
 
@@ -2685,91 +2558,78 @@
                                UtfConverter::toUtf8(tr.first), tr.second);
   }
 
-  // check if we use weights
   if (useWeights)
   {
-    //int pcreExecRet;
-    //int subStrVec[30];
     double chosen_weight = 0., current_weight = 0.;
     string chosen_rule_id = rule_ids[lastrule_num];
     string current_rule_id = "";
     unsigned int rule_group_num = -1;
 
-    // check if rule id is not empty
     if (chosen_rule_id != "")
     {
-      // check if there are other rules in its group
+      /* //tc
+        If there are other rules in the rule group,
+        that means chosen rule is ambiguous.
+      */
+      
       rule_group_num = rule_group_map[chosen_rule_id];
       if (rule_groups[rule_group_num].size() > 1)
       {
-        map<string,double>::iterator lookup_pattern;
+        map<string, vector<pair<string, double> > >::iterator lookup_pattern;
         tmpchunk = UtfConverter::toUtf8(wtmpchunk);
-        string lookup_chunk = "";
-        unsigned int tmpchunk_size = tmpchunk.size();
-        unsigned int chunk_end;
 
-        for (chunk_end = tmpchunk_size - 2; tmpchunk[chunk_end] != ' ' && chunk_end > 0; chunk_end--);
-        for (unsigned int i = 0; i <= chunk_end; i++)
+        cerr << "Rule # " << lastrule_num << " with id '" << chosen_rule_id; // di
+        cerr << "' is ambiguous on input:" << endl; // di
+        cerr << tmpchunk << endl << endl; // di
+     
+        // go through patterns //tc
+        bool found = false;
+        lookup_pattern = weighted_patterns[rule_group_num].find(tmpchunk);
+        if (lookup_pattern != weighted_patterns[rule_group_num].end())
         {
-          lookup_chunk = lookup_chunk + tmpchunk[i];
+          found = true;
         }
-
-        cerr << "Rule # " << lastrule_num << " with id '" << chosen_rule_id << "' is ambiguous on input:" << endl; // di
-        cerr << lookup_chunk << endl << endl; // di
-
-        /*cerr << "Rules in the group: " << endl; // di
-        for (unsigned int ind = 0; ind < rule_groups[rule_group_num].size(); ind++) // di
-        {  // di
-          cerr << "  " << rule_groups[rule_group_num][ind] << endl; // di
-        } // di
-        cerr << endl; // di*/
-     
-        // let's check the weights for each rule in the group
-        chosen_weight = 0.;
-        for (unsigned int ind = 0; ind < rule_groups[rule_group_num].size(); ind++)
+        else
         {
-          current_weight = 0.;
-          current_rule_id = rule_groups[rule_group_num][ind];
-
-          cerr << "Checking rule # " << rule_id_map[current_rule_id] << " with id '" << current_rule_id << "'" << endl; // di
-          // go through patterns
-          lookup_pattern = weighted_patterns[current_rule_id].find(lookup_chunk);
-          if (lookup_pattern == weighted_patterns[current_rule_id].end())
+          cerr << "Pattern not found" << endl; // di
+          unsigned int chunk_end;
+          for (chunk_end = tmpchunk.size() - 2; tmpchunk[chunk_end] != ' ' && chunk_end > 0; chunk_end--);
+          fallback_tmpchunk = "";
+          for (unsigned int i = 0; i <= chunk_end; i++)
           {
-            cerr << "Pattern not found" << endl; // di
+            fallback_tmpchunk = fallback_tmpchunk + tmpchunk[i];
           }
+          lookup_pattern = weighted_patterns[rule_group_num].find(fallback_tmpchunk);
+          if (lookup_pattern == weighted_patterns[rule_group_num].end())
+          {
+            cerr << "Pattern still not found" << endl; // di
+          }
           else
-          { 
-            cerr << "  Pattern " << lookup_pattern->first; // di
-            current_weight = lookup_pattern->second;
-            cerr << " matched with weight " << current_weight << endl; // di
-            if (current_weight > chosen_weight) // heavier rule
             {
-              chosen_weight = current_weight;
-              chosen_rule_id = current_rule_id;
+            found = true;
             }
           }
 
-          /*for (unsigned int k = 0; k < weighted_patterns[current_rule_id].size(); k++)
-          { 
-            pcreExecRet = pcre_exec(weighted_patterns[current_rule_id][k].first, NULL, 
-                                    tmpchunk.c_str(), tmpchunk.length(), 
-                                    0, 0, subStrVec, 30);
-            if(pcreExecRet >= 0) // bingo!
+        if (found)
             {
-              cerr << "  Pattern # " << k; // di
-              current_weight = weighted_patterns[current_rule_id][k].second;
-              cerr << " matched with weight " << current_weight << endl; // di
-              if (current_weight > chosen_weight) // heavier rule
+          // let's check the weights for each rule in the group //tc
+          chosen_weight = 0.;
+          for (unsigned int ind = 0; ind < (lookup_pattern->second).size(); ind++)
               {
+            current_rule_id = (lookup_pattern->second)[ind].first;
+            current_weight = (lookup_pattern->second)[ind].second;
+            cerr << "  Pattern " << lookup_pattern->first; // di
+            cerr << " matched for rule " << current_rule_id; // di
+            cerr << " with weight " << current_weight << endl; // di
+            if (current_weight > chosen_weight) // found heavier rule //tc
+            {
                 chosen_weight = current_weight;
                 chosen_rule_id = current_rule_id;
               }
             }
-          }*/
         }
         cerr << endl; // di
-        // substitute lastrule with the chosen one
+        // substitute lastrule with the chosen one //tc
         lastrule_num = rule_id_map[chosen_rule_id];
         lastrule = rule_map[lastrule_num-1];
         cerr << "Rule # " << lastrule_num << " with id '" << chosen_rule_id;
@@ -2780,7 +2640,7 @@
 
   words_to_consume = processRule(lastrule);
 
-  // some cleanup ?
+  // some cleanup //tc
   lastrule = NULL;
 
   if(word)
@@ -2805,15 +2665,11 @@
   tmpblank.clear();
   ms.init(me->getInitial());
   return words_to_consume;
-} // end of applyRule
+}
 
-/* HERE */
 void
 Transfer::applyWord(wstring const &word_str)
 {
-  // Here, the token contained in word_str is fed 
-  // to the fst by stepping with ms
-
   ms.step(L'^');
   for(unsigned int i = 0, limit = word_str.size(); i < limit; i++)
   {
@@ -2824,23 +2680,25 @@
         ms.step(towlower(word_str[i]), any_char);
         break;
 
-      case L'/': // got to the end of left side part (source token)
+      case L'/': // got to the end of left side part (source token) //tc
         i = limit;
         break;
 
-      case L'<': // got to the start of a tag
+      case L'<': // got to the start of a tag //tc
         for(unsigned int j = i+1; j != limit; j++)
         {
-          if(word_str[j] == L'>') // got to the end of the tag
+          if(word_str[j] == L'>') // got to the end of the tag //tc
           {
-            // try to get the symbol corresponding to the tag
+            // try to get the symbol corresponding to the tag //tc
             int symbol = alphabet(word_str.substr(i, j-i+1));
-            if(symbol) // there is such symbol in alphabet
+            if(symbol)
             {
+              // there is such symbol in alphabet //tc
               ms.step(symbol, any_tag);
             }
-            else // there is no such symbol in alphabet
+            else
             {
+              // there is no such symbol in alphabet //tc
               ms.step(any_tag);
             }
             i = j;
@@ -2849,13 +2707,15 @@
         }
         break;
 
-      default: // default is applying lemma's symbols one by one
+      default:
+        // default is applying lemma's symbols one by one //tc
         ms.step(towlower(word_str[i]), any_char);
         break;
     }
   }
-  ms.step(L'$'); // push the end of token
-} // end of applyWord
+  // push the end of token //tc
+  ms.step(L'$');
+}
 
 void
 Transfer::setPreBilingual(bool value)
Index: branches/weighted-transfer/apertium/apertium/transfer.h
===================================================================
--- branches/weighted-transfer/apertium/apertium/transfer.h	(revision 72228)
+++ branches/weighted-transfer/apertium/apertium/transfer.h	(revision 72229)
@@ -50,14 +50,30 @@
   map<string, int, Ltstr> macros;
   map<string, set<string, Ltstr>, Ltstr> lists;
   map<string, set<string, Ltstr>, Ltstr> listslow;
+
+  // all macros in xml in order of appearance in transfer file
   vector<xmlNode *> macro_map;
+
+  // all rule actions in xml in order of appearance in transfer file
   vector<xmlNode *> rule_map;
-  vector<string> rule_ids; // rule number -> rule id, first meaningful rule at position 1
-  map<string, int> rule_id_map; // rule id -> rule number
-  vector<vector<string> > rule_groups; // rule group number -> rule ids
-  map<string, int> rule_group_map; // id -> rule group number
-  //map<string, vector<pair<pcre*, double> > > weighted_patterns; 
-  map<string, map<string, double> > weighted_patterns; // all weighted patterns, grouped by rule id
+
+  // rule number -> rule id, first meaningful rule at position 1
+  vector<string> rule_ids;
+
+  // rule id : rule number
+  map<string, int> rule_id_map;
+
+  // rule group number : rule ids
+  vector<vector<string> > rule_groups; 
+
+  // id : rule group number
+  map<string, int> rule_group_map; 
+
+  // all weighted patterns, grouped by rule group number
+  // index of outer vector corresponds to rule group numbers
+  // map is pattern string : vector of pairs <rule_id, weight>
+  vector<map<string, vector<pair<string, double> > > > weighted_patterns;
+
   xmlDoc *doc;
   xmlNode *root_element;
   TransferWord **word;
@@ -93,21 +109,65 @@
   string emptyblank;
   
   void destroy();
-  void readData(FILE *input);
-  void readBil(string const &filename);
+
+  /**
+    Read transfer rules from t*x xml file.
+    In fact, only only default attribute value, macros, rule actions,
+    and rule ids (if using weights) are read here.
+  */
   void readTransfer(string const &input);
-  void readTransferWeights(string const &in); // read transfer weights file
+
+  /**
+    Read transfer weights from w*x xml file.
+  */
+  void readTransferWeights(string const &in);
+
+  /**
+    Read macros from t*x xml file.
+    localroot must point to 'section-macros' element in transfer xml tree.
+  */
   void collectMacros(xmlNode *localroot);
+
+  /**
+    Read rule actions, and rule ids (if using weights) from t*x xml file.
+    localroot must point to 'section-rules' element in transfer xml tree.
+  */
   void collectRules(xmlNode *localroot);
-  string getRuleId(xmlNode *localroot); // get value of 'id' property of 'rule' element
+
+  /**
+    Get the value of 'id' attribute of 'rule' element.
+    localrootand must point to 'rule' element in transfer xml tree.
+  */
+  string getRuleId(xmlNode *localroot);
+
+  /**
+    Get the value of attr_name attribute of xml tree element,
+    which is pointed to by localroot.
+  */
   string getNodeAttr(xmlNode *localroot, const char* attr_name);
+
+  /**
+    Read precompiled transfer rules from t*x.bin binary file.
+  */
+  void readData(FILE *input);
+
+  /**
+    Read data from bilingual letter transducer file if specified.
+  */
+  void readBil(string const &filename);
+
   string caseOf(string const &str);
   string copycase(string const &source_word, string const &target_word);
 
-  void processLet(xmlNode *localroot);
-  void processAppend(xmlNode *localroot);
-  int processRejectCurrentRule(xmlNode *localroot);
+  /**
+    Apply subelements of 'out' subelement of rule action, one subelement
+    at a time, depending on subelement type.
+  */
   void processOut(xmlNode *localroot);
+
+  /**
+    Apply various types of rule action subelements.
+  */
   void processCallMacro(xmlNode *localroot);
   void processModifyCase(xmlNode *localroot);
   bool processLogical(xmlNode *localroot);
@@ -122,11 +182,33 @@
   bool processContainsSubstring(xmlNode *localroot);
   bool processNot(xmlNode *localroot);
   bool processIn(xmlNode *localroot);
+  void processLet(xmlNode *localroot);
+  void processAppend(xmlNode *localroot);
+  int processRejectCurrentRule(xmlNode *localroot);
+
+  /**
+    Process instructions specified in previously stored 'action' part
+    of the rule. localroot must point to an 'action' element of xml tree.
+  */
   int processRule(xmlNode *localroot);
+
+  /**
+    Evaluate an xml element and execute appropriate instruction.
+    Used for lowest-level action elements, such as 'clip' or 'lit-tag'.
+  */
   string evalString(xmlNode *localroot);
   void evalStringClip(xmlNode *element, string &lemma, int &pos); // the dark horse
+
+  /**
+    Process instruction specified in rule action based on instruction name.
+  */
   int processInstruction(xmlNode *localroot);
   int processChoose(xmlNode *localroot);
+
+  /**
+    Apply 'chunk' subelement of 'out' element of a rule,
+    one subelement at a time, depending on subelement type.
+  */
   string processChunk(xmlNode *localroot);
   string processTags(xmlNode *localroot);
 
@@ -137,18 +219,35 @@
   wstring readWord(FILE *in);
   wstring readBlank(FILE *in);
   wstring readUntil(FILE *in, int const symbol) const;
+
+  /**
+    Feed the token contained in word_str
+    to internal FST by transiting over its states with ms.
+  */
   void applyWord(wstring const &word_str);
   int applyRule();
   TransferToken & readToken(FILE *in);
   bool checkIndex(xmlNode *element, int index, int limit);
   void transfer_wrapper_null_flush(FILE *in, FILE *out);
+
 public:
   Transfer();
   ~Transfer();
   
+  /**
+    Read all data needed for transfer
+  */
   void read(string const &transferfile, string const &datafile, 
             string const &weightsfile = "", string const &fstfile = "");
+
+  /**
+    Perform transfer.
+  */
   void transfer(FILE *in, FILE *out);
+
+  /**
+    Boilerplate for setting and getting values of private attributes.
+  */
   void setUseBilingual(bool value);
   bool getUseBilingual(void) const;
   void setPreBilingual(bool value);