Index: branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-eng/apertium-eng.eng.dix =================================================================== --- branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-eng/apertium-eng.eng.dix (revision 70375) +++ branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-eng/apertium-eng.eng.dix (revision 70382) @@ -99,7 +99,7 @@ - +

@@ -111,7 +111,7 @@

s

- +

an'san

en'san

anan

@@ -130,7 +130,8 @@
- deadm + m + deadm island sea boot @@ -142,8 +143,8 @@ of a the - big - new + big + new thefirst thesecond thethird Index: branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-eng/eng-rus.automorf.bin =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-eng/rus-eng.autogen.bin =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus/apertium-rus.rus.dix =================================================================== --- branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus/apertium-rus.rus.dix (revision 70375) +++ branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus/apertium-rus.rus.dix (revision 70382) @@ -105,14 +105,14 @@ +

ыа

+

а

аа

-

ыа

уа

уа

ойа

еа

ыа

-

а

ыа

ыа

амиа

@@ -119,14 +119,14 @@

аха

+

а

+

ов

-

а

у

а

ом

е

ы

-

ов

ам

ов

ами

@@ -133,14 +133,14 @@

ах

+

а

+

ов

-

а

у

а

ом

е

а

-

ов

ам

ов

ами

@@ -147,14 +147,14 @@

ах

+

а

+

ов

-

а

у

ом

е

ы

-

ов

ам

а

ами

@@ -161,15 +161,15 @@

ах

+

а

+

+

ов

-

а

у

ом

е

и

-

-

ов

ам

и

ами

@@ -176,14 +176,14 @@

ах

+

а

+

ов

-

а

у

ом

е

и

-

ов

ам

и

ами

@@ -190,14 +190,14 @@

ах

+

каок

+

ковок

окок

-

каок

куок

окок

комок

кеок

киок

-

ковок

камок

киок

камиок

@@ -204,14 +204,14 @@

кахок

+

а

+

ов

-

а

у

ом

е

а

-

ов

ам

а

ами

@@ -218,14 +218,14 @@

ах

+

яе

+

ейе

ее

-

яе

юе

ее

еме

ее

яе

-

ейе

яме

яе

ямие

@@ -232,14 +232,14 @@

яхе

+

ао

+

о

оо

-

ао

уо

оо

омо

ео

ао

-

о

амо

ао

амио

@@ -246,14 +246,14 @@

ахо

+

няень

+

нейень

еньень

-

няень

нюень

еньень

немень

неень

ниень

-

нейень

нямень

ниень

нямиень

@@ -260,14 +260,14 @@

няхень

+

+

-

-

@@ -274,8 +274,9 @@

+

огоой

+

ихой

ойой

-

огоой

омуой

огоой

ойой

@@ -282,7 +283,6 @@

имой

омой

иеой

-

ихой

имой

ихой

иеой

@@ -290,8 +290,9 @@

ихой

+

огоый

+

ыхый

ыйый

-

огоый

омуый

огоый

ыйый

@@ -298,7 +299,6 @@

ымый

омый

ыеый

-

ыхый

ымый

ыхый

ыеый

@@ -306,8 +306,9 @@

ыхый

+

ьегоий

+

ьихий

ийий

-

ьегоий

ьемуий

ьегоий

ийий

@@ -314,7 +315,6 @@

ьимий

ьимий

иеий

-

ьихий

ьимий

ьихий

иеий

@@ -322,8 +322,9 @@

ьихий

+

огоой

+

ихой

ойой

-

огоой

омуой

огоой

ойой

@@ -330,7 +331,6 @@

имой

омой

иеой

-

ихой

имой

ихой

иеой

@@ -338,8 +338,9 @@

ихой

+

огоый

+

ыхый

ыйый

-

огоый

омуый

огоый

ыйый

@@ -346,7 +347,6 @@

ымый

омый

ыеый

-

ыхый

ымый

ыхый

ыеый

@@ -354,8 +354,9 @@

ыхый

+

ьегоий

+

ьихий

ийий

-

ьегоий

ьемуий

ьегоий

ийий

@@ -362,7 +363,6 @@

ьимий

ьимий

иеий

-

ьихий

ьимий

ьихий

иеий

@@ -376,6 +376,7 @@ остров мор мертвец + человек сапог сундук пар Index: branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus/eng-rus.autogen.bin =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus/rus-eng.automorf.bin =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus-eng/apertium-rus-eng.rus-eng.dix =================================================================== --- branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus-eng/apertium-rus-eng.rus-eng.dix (revision 70375) +++ branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus-eng/apertium-rus-eng.rus-eng.dix (revision 70382) @@ -105,6 +105,7 @@

замокlock

носокsock

островisland

+

человекman

мертвецdeadman

мореsea

параpair

Index: branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus-eng/apertium-rus-eng.rus-eng.w1x =================================================================== --- branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus-eng/apertium-rus-eng.rus-eng.w1x (revision 70375) +++ branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus-eng/apertium-rus-eng.rus-eng.w1x (revision 70382) @@ -50,7 +50,7 @@ - + @@ -62,7 +62,7 @@ - + @@ -69,12 +69,12 @@ - + - + Index: branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus-eng/eng-rus.autobil.bin =================================================================== --- branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus-eng/eng-rus.autobil.bin (revision 70375) +++ branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus-eng/eng-rus.autobil.bin (revision 70382) @@ -1,5 +1,5 @@ @v@A@B@C@D@E@F@G@H@I@J@K@L@M@N@O@P@Q@R@S@T@U@V@W@X@Y@Z@a@b@c@d@e@f@g@h@i@j@k@l@m@n@o@p@q@r@s@t@u@v@w@x@y@zDDDDDDDDDDDDDDDDDDD D!D"D#D$D%D&D'D(D)D*D+D,D-D.D/D0D1D2D3D4D5DQD6D7D8D:D;DD?D@DADBDCDDDEDFDGDHDIDLDKDJDMDNDO@M@n@n@p@v@b@l@e@x@v@b@m@o@d@v@a@u@x@v@b@s@e@r@v@b@h@a@v@e@r@c@n@j@s@u@b@c@n@j@c@o@o@c@n@j@a@d@v@p@r@a@d@v@i@j@a@d@j@p@r@n@a@t@t@r@t@o@p@a@n@t@c@o@g@o@r@g@a@l@p@e@r@s@r@e@f@r@e@s@d@e@t@p@o@s@p1@p2@p3@n@t@u@t@f@m@u@n@s@g@d@u@p@l@s@p@i@n@d@d@e@f@n@o@m@g@e@n@a@c@c@d@a@t@i@n@s@l@o@c@d@e@m@i@t@g@q@n@t@n@e@g@e@m@p@h@i@n@f@p@r@e@s@i@m@p@p@r@e@t@p@p@p@p@r@s@p@a@s@v@a@c@t@v@s@i@n@t@p@s@t@c@o@m@p@s@u@p@o@r@d@a@c@r@u@n@c@n@u@m@s@e@n@t@c@m@a@p@o@s@g@u@i@o@l@p@a@r@r@p@a@r@l@q@u@o@t@r@q@u@o@t@c@o@m@p@o@u@n@d-@o@n@l@y-@L -@c@o@m@p@o@u@n@d-@R@z@M@M@D@D}@D@D@LD@M@L@D@D@D@D@D@LD@D@D@D@D@D@D@D@D@D@D@D@L@L@D@D@D@mD@D@D@@L@L@M@D@D@LD@D@D}@D@D}@D@D@D@LD@D~@D?D@MD@MD@MD@M?@D@D?D@D@D@D@D@D@ +@c@o@m@p@o@u@n@d-@R@@M@M@D@D}@D@D@LD@M@L@D@D@D@D@D@LD@D@D@D@D@D@D@D@D@D@D@D@L@L@D@D@D@LD@MD@MD@MD@D@D@D@mD@D@D@@L@L@M@D@D@LD@D@D}@D@D}@D@D@LD@D~@D?D@MD@MD@MD@M?@D@D?D@D@D@D@D@D@ @@M@@M @M@D@D@mD@D@D@ @@M@@M@@M@D@D@mD@D@D@ @@ -8,17 +8,15 @@ @D@D@mD@D@ @D@D@D@D@D@mD@D@D@D@ @D@n@n@{@{@@@@@@AAA A `s`s @y@y -@m@a@i@n@@@s@t@a@n@d@a@r@d)@{  -  -  -"%)"7 @@$@I7777@w@y &*-48@P"@]@P@]@`@]  -#'+.5@X@A@a@J@J@a@i@i  @a@`/19@Y@B@e@K@Q@B!@H@j@n -0@n2@P: @M @C  @L @R : " @n @k - -; -@Z -@D -@f -@M +@m@a@i@n@@@s@t@a@n@d@a@r@d+@  +   +   +!),)= @F$@O====@}@ -03:>@V)@c!@V@c@f@c  +"*.14;#@^@G@g@P#@P@g@o@o  #@e@d57?@_@H@k@Q@W@H(@N@p@t$6@s8@S@@ @S @I  @R @X @@ ) @t @q @d% +@A +@` +@J +@l @S -@^@b @l  < = @E @V@y@N@T@c@m 2=@[@F@G@U@F@n>+@V@z \ No newline at end of file +@Y +@d@h @r & @B @C @K @\@@T@Z@i@s'4@C@a@L@M@[@L@t@D-@\@ \ No newline at end of file Index: branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus-eng/rus-eng.autobil.bin =================================================================== --- branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus-eng/rus-eng.autobil.bin (revision 70375) +++ branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/apertium-rus-eng/rus-eng.autobil.bin (revision 70382) @@ -1,5 +1,5 @@ @v@A@B@C@D@E@F@G@H@I@J@K@L@M@N@O@P@Q@R@S@T@U@V@W@X@Y@Z@a@b@c@d@e@f@g@h@i@j@k@l@m@n@o@p@q@r@s@t@u@v@w@x@y@zDDDDDDDDDDDDDDDDDDD D!D"D#D$D%D&D'D(D)D*D+D,D-D.D/D0D1D2D3D4D5DQD6D7D8D:D;DD?D@DADBDCDDDEDFDGDHDIDLDKDJDMDNDO@M@n@n@p@v@b@l@e@x@v@b@m@o@d@v@a@u@x@v@b@s@e@r@v@b@h@a@v@e@r@c@n@j@s@u@b@c@n@j@c@o@o@c@n@j@a@d@v@p@r@a@d@v@i@j@a@d@j@p@r@n@a@t@t@r@t@o@p@a@n@t@c@o@g@o@r@g@a@l@p@e@r@s@r@e@f@r@e@s@d@e@t@p@o@s@p1@p2@p3@n@t@u@t@f@m@u@n@s@g@d@u@p@l@s@p@i@n@d@d@e@f@n@o@m@g@e@n@a@c@c@d@a@t@i@n@s@l@o@c@d@e@m@i@t@g@q@n@t@n@e@g@e@m@p@h@i@n@f@p@r@e@s@i@m@p@p@r@e@t@p@p@p@p@r@s@p@a@s@v@a@c@t@v@s@i@n@t@p@s@t@c@o@m@p@s@u@p@o@r@d@a@c@r@u@n@c@n@u@m@s@e@n@t@c@m@a@p@o@s@g@u@i@o@l@p@a@r@r@p@a@r@l@q@u@o@t@r@q@u@o@t@c@o@m@p@o@u@n@d-@o@n@l@y-@L -@c@o@m@p@o@u@n@d-@R@z@M@MD@D}@D@D@D@L@L@MD@D@D@D@D@D@LD@D@D@D@D@D@D@D@D@D@D@@L@LD@D@D@D@mD@D@@L@@M@LD@D@D@LD@D}@D@D}@D@D@D@D@LD~@D@D?D@MD@MD@M?@MD@D@D?D@D@D@D@D@ +@c@o@m@p@o@u@n@d-@R@@M@MD@D}@D@D@D@L@L@MD@D@D@D@D@D@LD@D@D@D@D@D@D@D@D@D@D@@L@LD@D@D@D@LD@MD@MD@MD@D@D@D@mD@D@@L@@M@LD@D@D@LD@D}@D@D}@D@D@D@LD~@D@D?D@MD@MD@M?@MD@D@D?D@D@D@D@D@ @@M@@M@@M D@D@D@mD@D@ @@M@@M@@M@D@D@D@mD@D@ @@ -8,17 +8,15 @@ @D@D@D@mD@ @D@D@D@D@D@D@mD@D@D@ @D@@n@n@{@{@@@@@@AAA A `s`s @y@y -@m@a@i@n@@@s@t@a@n@d@a@r@d)@{  -  -  -"%)"7 @@$@I7777@w@y &*-48@P"@]@P@]@`@]  -#'+.5@X@A@a@J@J@a@i@i  @a@`/19@Y@B@e@K@Q@B!@H@j@n -0@n2@P: @M @C  @L @R : " @n @k - -; -@Z -@D -@f -@M +@m@a@i@n@@@s@t@a@n@d@a@r@d+@  +   +   +!),)= @F$@O====@}@ -03:>@V)@c!@V@c@f@c  +"*.14;#@^@G@g@P#@P@g@o@o  #@e@d57?@_@H@k@Q@W@H(@N@p@t$6@s8@S@@ @S @I  @R @X @@ ) @t @q @d% +@A +@` +@J +@l @S -@^@b @l  < = @E @V@y@N@T@c@m 2=@[@F@G@U@F@n>+@V@z \ No newline at end of file +@Y +@d@h @r & @B @C @K @\@@T@Z@i@s'4@C@a@L@M@[@L@t@D-@\@ \ No newline at end of file Index: branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/midterm-demo.sh =================================================================== --- branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/midterm-demo.sh (nonexistent) +++ branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/midterm-demo.sh (revision 70382) @@ -0,0 +1,32 @@ +#! /bin/sh + +printf "\n------------------------------------------------------\n" +printf "midterm-demo: processing 'сундук мертвеца'\n\n" +printf "Since 'мертвец'/'dead man' is an animate noun,\nin most cases it is preferable that it\nforms possessive case, which fact is\nreflected by giving the first pattern\nfor 'two_pt_poss' rule a high weight." +printf "\n------------------------------------------------------\n" +./process.sh "сундук мертвеца" +printf "\n------------------------------------------------------\n" +printf "midterm-demo: processing 'замок сундука'\n\n" +printf "On the other hand, 'сундук'/'chest' is an inanimate noun,\nso I naively suggest it should form attributive\nconstruction, therefore rule 'two_pt_attr' has patterns\nof high weight for the cases when that word is involved." +printf "\n------------------------------------------------------\n" +./process.sh "замок сундука" +printf "\n------------------------------------------------------\n" +printf "midterm-demo: processing 'пара носков'\n\n" +printf "The word 'пара' means 'pair' (or 'couple'\nbut since we have no lexical selection here,\nI omit that fact), and if used with a plural noun\nin genitive should be translated as 'pair of'\nno matter the animacy of the second noun.\nThis is reflected by giving the second pattern\nfor rule 'two_pt_of' weight of 1 which\noverrides the weight of 0.9 for pattern with 'носок'/'sock'\nas the second word in rule 'two_pt_attr'\n(which is identical to that for 'сундук')." +printf "\n------------------------------------------------------\n" +./process.sh "пара носков" +printf "\n------------------------------------------------------\n" +printf "midterm-demo: processing 'пара носка'\n\n" +printf "However, when the second word with 'пара' is\nsingular, that is better be translated\nas attributive or possessive construction\nsince it doesn't mean 'two matching items'\nbut rather 'a match for the item'.\nHere for simplicity I designed it\nto be always possessive, irregardless\nof animacy." +printf "\n------------------------------------------------------\n" +./process.sh "пара носка" +printf "\n------------------------------------------------------\n" +printf "midterm-demo: processing 'замок большого сундука'\n\n" +printf "The weights for three-piece expressions\nare underdeveloped. In fact,they only\ndistinguish betweenthe cases when\nthe middle word is an adjective or an ordinal number.\nWith adjectives the weights are distributed\nuniformly therefore for 'большой'/'big'\nthe first rule\is chosen" +printf "\n------------------------------------------------------\n" +./process.sh "замок большого сундука" +printf "\n------------------------------------------------------\n" +printf "midterm-demo: processing 'замок первого сундука'\n\n" +printf "For ordinal numbers, the of-construction\nis designed to be preferrable." +printf "\n------------------------------------------------------\n" +./process.sh "замок первого сундука" Property changes on: branches/weighted-transfer/tags/midterm/apertium-toy-ru-en/midterm-demo.sh ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property Index: branches/weighted-transfer/tags/midterm/apertium/apertium/transfer.cc =================================================================== --- branches/weighted-transfer/tags/midterm/apertium/apertium/transfer.cc (revision 70375) +++ branches/weighted-transfer/tags/midterm/apertium/apertium/transfer.cc (revision 70382) @@ -80,10 +80,8 @@ Transfer::readData(FILE *in) { // Read transfer rules data from .t*x.bin file - //cerr << "readData" << endl; // di alphabet.read(in); - //cerr << "Alphabet size: " << alphabet.size() << endl; // di any_char = alphabet(TRXReader::ANY_CHAR); any_tag = alphabet(TRXReader::ANY_TAG); @@ -144,8 +142,7 @@ void Transfer::readBil(string const &fstfile) -{ - cerr << "readBil" << endl; // di +{ FILE *in = fopen(fstfile.c_str(), "rb"); if(!in) { @@ -159,8 +156,7 @@ void Transfer::setExtendedDictionary(string const &fstfile) -{ - cerr << "setExtendedDictionary" << endl; // di +{ FILE *in = fopen(fstfile.c_str(), "rb"); if(!in) { @@ -181,7 +177,7 @@ readTransfer(transferfile); // open precompiled .t*x.bin file and read data from it - cerr << "Reading data from " << datafile.c_str() << endl << endl; + cerr << "Reading data from " << datafile.c_str() << endl; FILE *in = fopen(datafile.c_str(), "rb"); if(!in) { @@ -221,7 +217,6 @@ } root_element = xmlDocGetRootElement(doc); - //cerr << root_element->properties << endl; // di // search through attributes of root element for(xmlAttr *i = root_element->properties; i != NULL; i = i->next) @@ -259,7 +254,7 @@ } } - if (useWeights) // di + /*if (useWeights) // di { // di // double-check rule ids in rule_id_map and rule_ids // di cerr << endl << "Those are the ids you wanted: " << endl; //di @@ -268,7 +263,7 @@ cerr << "rule_ids[" << k << "]: " << rule_ids[k] << endl; // di cerr << "rule_id_map[" << rule_ids[k] << "]: " << rule_id_map[rule_ids[k]] << endl << endl; // di } // di - } // di + } // di*/ } void @@ -286,7 +281,7 @@ { // 'rule' element rule_index++; - cerr << "Collecting rule # " << rule_index << endl; //di + //cerr << "Collecting rule # " << rule_index << endl; //di if (useWeights) // only need ids if weights are used { @@ -298,7 +293,7 @@ } rule_ids.push_back(rule_id); rule_id = ""; - cerr << endl; // di + //cerr << endl; // di } // go through subelements of current 'rule' element looking for some action @@ -351,7 +346,6 @@ int pcreErrorOffset; cerr << "Reading transfer weights from " << in.c_str() << endl << endl; // di - // di doc = xmlReadFile(in.c_str(), NULL, 0); if(doc == NULL) { @@ -360,7 +354,6 @@ } root_element = xmlDocGetRootElement(doc); - //cerr << root_element->name << endl; // di // search through root's children nodes for 'rule-group' elements for(xmlNode *i = root_element->children; i != NULL; i = i->next) @@ -367,7 +360,7 @@ { if(i->type == XML_ELEMENT_NODE && !xmlStrcmp(i->name, (const xmlChar *) "rule-group")) { - cerr << "Collecting rule-group # " << rule_group_index << endl; // di + cerr << "Collecting rule group # " << rule_group_index << endl; // di // get ids of all rules in rule group for(xmlNode *j = i->children; j != NULL; j = j->next) { @@ -377,7 +370,7 @@ rule_id = getRuleId(j); current_rule_group.push_back(rule_id); rule_group_map[rule_id] = rule_group_index; - cerr << endl; // di + cerr << "Rule id: " << rule_id << endl; // di // get patterns for(xmlNode *k = j->children; k != NULL; k = k->next) @@ -385,7 +378,6 @@ if(k->type == XML_ELEMENT_NODE && !xmlStrcmp(k->name, (const xmlChar *) "pattern")) { weight = atof(getNodeAttr(k, "weight").c_str()); - cerr << weight << endl; for(xmlNode *patit = k->children; patit != NULL; patit = patit->next) { if(patit->type == XML_ELEMENT_NODE && !xmlStrcmp(patit->name, (const xmlChar *) "pattern-item")) @@ -428,11 +420,10 @@ { regex = regex + ">"; } - cerr << lemma << " " << tags << endl; regex = regex + "\\S*? "; - cerr << regex << endl; } } + cerr << " " << weight << " " << regex << endl; reCompiled = pcre_compile(regex.c_str(), 0, &pcreErrorStr, &pcreErrorOffset, NULL); //pcreExtra = pcre_study(reCompiled, 0, &pcreErrorStr); current_pattern_group.push_back(make_pair(reCompiled, weight)); @@ -441,18 +432,18 @@ } weighted_patterns[rule_id] = current_pattern_group; current_pattern_group.clear(); + cerr << endl; // di } } // push newly acquired current_rule_group into rule_groups rule_groups.push_back(current_rule_group); current_rule_group.clear(); - cerr << endl; // di rule_group_index++; } } // print out what was collected // di - cerr << "These are the rule groups you collected:" << endl; // di + /*cerr << "These are the rule groups you collected:" << endl; // di unsigned int k1, k2; // di for (k1 = 0; k1 < rule_groups.size(); k1++) // di { // di @@ -460,13 +451,13 @@ for (k2 = 0; k2 < rule_groups[k1].size(); k2++) // di { // di cerr << " " << rule_groups[k1][k2] << endl; // di - cerr << " rule_group_map[" << rule_groups[k1][k2] << "]: "; // di - cerr << rule_group_map[rule_groups[k1][k2]] << endl; // di + //cerr << " rule_group_map[" << rule_groups[k1][k2] << "]: "; // di + //cerr << rule_group_map[rule_groups[k1][k2]] << endl; // di } // di cerr << endl; // di - } // di + } // di*/ - cerr << "And these are the patterns:" << endl; // di + /*cerr << "And these are the patterns:" << endl; // di for (k1 = 1; k1 < rule_ids.size(); k1++) // di { // di if (rule_ids[k1] != "") // di @@ -478,7 +469,7 @@ cerr << weighted_patterns[rule_ids[k1]][k2].second << endl; // di } // di } // di - } // di + } // di*/ } @@ -491,7 +482,7 @@ { if(!xmlStrcmp(j->name, (const xmlChar *) "comment")) // di { // di - cerr << "Rule comment: " << xmlNodeListGetString(localroot->doc, j->children, 1) << endl; // di + //cerr << "Rule comment: " << xmlNodeListGetString(localroot->doc, j->children, 1) << endl; // di } // di else //di { //di @@ -499,7 +490,7 @@ { // add rule id to rule_id_map rule_id = (const char*)xmlNodeListGetString(localroot->doc, j->children, 1); - cerr << "Rule id: " << rule_id << endl; // di + //cerr << "Rule id: " << rule_id << endl; // di } } // di } // di @@ -923,7 +914,6 @@ if(j->type == XML_ELEMENT_NODE) { myword.append(evalString(j)); - //cerr << myword << endl; // di } } if(myword != "") @@ -931,7 +921,6 @@ fputwc_unlocked(L'^', output); fputws_unlocked(UtfConverter::fromUtf8(myword).c_str(), output); fputwc_unlocked(L'$', output); - //cerr << UtfConverter::fromUtf8(myword).c_str() << endl; // di } } else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) @@ -982,7 +971,6 @@ { string processed = processChunk(i); fputws_unlocked(UtfConverter::fromUtf8(processed).c_str(), output); - cerr << "Hey, I just made a chunk: " << processed << endl; // di } else // 'b' { @@ -1027,7 +1015,6 @@ // starting to build the chunk result.append("^"); - //cerr << result << endl; // di // adding chunk name if(caseofchunk != "") @@ -1035,12 +1022,10 @@ if(name != "") { result.append(copycase(variables[caseofchunk], name)); - //cerr << result << endl; // di } else if(namefrom != "") { result.append(copycase(variables[caseofchunk], variables[namefrom])); - //cerr << result << endl; // di } else { @@ -1053,12 +1038,10 @@ if(name != "") { result.append(name); - //cerr << result << endl; // di } else if(namefrom != "") { result.append(variables[namefrom]); - //cerr << result << endl; // di } else { @@ -1072,8 +1055,7 @@ for(xmlNode *i = localroot->children; i != NULL; i = i->next) { if(i->type == XML_ELEMENT_NODE) - { - cerr << "chunk element # " << count << ": " << i->name << endl; // di + { count++; // di if(!xmlStrcmp(i->name, (const xmlChar *) "tags")) { @@ -1080,7 +1062,6 @@ // add chunk tags result.append(processTags(i)); result.append("{"); - //cerr << result << endl; // di } else if(!xmlStrcmp(i->name, (const xmlChar *) "lu")) { @@ -1093,13 +1074,11 @@ { if(j->type == XML_ELEMENT_NODE) { - //cerr << "Executing " << j->name << endl; // di myword.append(evalString(j)); evalStringClip(j, untouched, untouched_pos); // black magic if(untouched_pos != -1) { - //cerr << "Got untouched: " << untouched_pos << ", " << untouched << endl; // di wordcache[untouched_pos].append(untouched); } } @@ -1106,11 +1085,9 @@ } if(myword != "") { - //cerr << myword << endl; result.append("^"); result.append(myword); result.append("$"); - //cerr << result << endl; } } else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) @@ -1146,17 +1123,14 @@ } if(myword != "") { - //cerr << myword << endl; // di result.append("^"); result.append(myword); result.append("$"); - //cerr << result << endl; // di } } else // 'b' { result.append(evalString(i)); - //cerr << result << endl; // di } } } @@ -1191,8 +1165,7 @@ string Transfer::processTags(xmlNode *localroot) -{ - //cerr << "processTags" << endl; // di +{ string result; for(xmlNode *i = localroot->children; i != NULL; i = i->next) { @@ -1253,8 +1226,7 @@ int Transfer::processRejectCurrentRule(xmlNode *localroot) -{ - cerr << "processRejectCurrentRule" << endl; // di +{ bool shifting = true; string value; for(xmlAttr *i = localroot->properties; i != NULL; i = i->next) @@ -1276,8 +1248,7 @@ void Transfer::processLet(xmlNode *localroot) -{ - cerr << "processLet" << endl; // di +{ xmlNode *leftSide = NULL, *rightSide = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -1372,8 +1343,7 @@ void Transfer::processAppend(xmlNode *localroot) -{ - cerr << "processAppend" << endl; // di +{ string name; for(xmlAttr *i = localroot->properties; i != NULL; i = i->next) { @@ -1395,8 +1365,7 @@ void Transfer::processModifyCase(xmlNode *localroot) -{ - cerr << "processModifyCase" << endl; // di +{ xmlNode *leftSide = NULL, *rightSide = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -1470,8 +1439,7 @@ void Transfer::processCallMacro(xmlNode *localroot) -{ - cerr << "processCallMacro" << endl; // di +{ string const n = (const char *) localroot->properties->children->content; int npar = 0; @@ -1539,8 +1507,7 @@ int Transfer::processChoose(xmlNode *localroot) -{ - cerr << "processChoose" << endl; // di +{ int words_to_consume = -1; for(xmlNode *i = localroot->children; i != NULL; i = i->next) { @@ -1601,8 +1568,7 @@ bool Transfer::processLogical(xmlNode *localroot) -{ - cerr << "processLogical" << endl; // di +{ if(!xmlStrcmp(localroot->name, (const xmlChar *) "equal")) { return processEqual(localroot); @@ -1649,8 +1615,7 @@ bool Transfer::processIn(xmlNode *localroot) -{ - cerr << "processIn" << endl; // di +{ xmlNode *value = NULL; xmlChar *idlist = NULL; @@ -1701,8 +1666,7 @@ bool Transfer::processTest(xmlNode *localroot) -{ - cerr << "processTest" << endl; // di +{ for(xmlNode *i = localroot->children; i != NULL; i = i->next) { if(i->type == XML_ELEMENT_NODE) @@ -1715,8 +1679,7 @@ bool Transfer::processAnd(xmlNode *localroot) -{ - cerr << "processAnd" << endl; // di +{ bool val = true; for(xmlNode *i = localroot->children; val && i != NULL; i = i->next) { @@ -1731,8 +1694,7 @@ bool Transfer::processOr(xmlNode *localroot) -{ - cerr << "processOr" << endl; // di +{ bool val = false; for(xmlNode *i = localroot->children; !val && i != NULL ; i = i->next) { @@ -1747,8 +1709,7 @@ bool Transfer::processNot(xmlNode *localroot) -{ - cerr << "processNot" << endl; // di +{ for(xmlNode *i = localroot->children; i != NULL; i = i->next) { if(i->type == XML_ELEMENT_NODE) @@ -1761,8 +1722,7 @@ bool Transfer::processEqual(xmlNode *localroot) -{ - cerr << "processEqual" << endl; // di +{ xmlNode *first = NULL, *second = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -1801,8 +1761,7 @@ bool Transfer::beginsWith(string const &s1, string const &s2) const -{ - cerr << "beginsWith" << endl; // di +{ int const limit = s2.size(), constraint = s1.size(); if(constraint < limit) @@ -1822,8 +1781,7 @@ bool Transfer::endsWith(string const &s1, string const &s2) const -{ - cerr << "endsWith" << endl; // di +{ int const limit = s2.size(), constraint = s1.size(); if(constraint < limit) @@ -1844,8 +1802,7 @@ bool Transfer::processBeginsWith(xmlNode *localroot) -{ - cerr << "processBeginsWith" << endl; // di +{ xmlNode *first = NULL, *second = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -1884,8 +1841,7 @@ bool Transfer::processEndsWith(xmlNode *localroot) -{ - cerr << "processEndsWith" << endl; // di +{ xmlNode *first = NULL, *second = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -1924,8 +1880,7 @@ bool Transfer::processBeginsWithList(xmlNode *localroot) -{ - cerr << "processBeginsWithList" << endl; // di +{ xmlNode *first = NULL, *second = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -1974,8 +1929,7 @@ bool Transfer::processEndsWithList(xmlNode *localroot) -{ - cerr << "processEndsWithList" << endl; // di +{ xmlNode *first = NULL, *second = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -2023,8 +1977,7 @@ bool Transfer::processContainsSubstring(xmlNode *localroot) -{ - cerr << "processContainsSubstring" << endl; // di +{ xmlNode *first = NULL, *second = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -2063,8 +2016,7 @@ string Transfer::copycase(string const &source_word, string const &target_word) -{ - //cerr << "copycase" << endl; // di +{ wstring result; wstring const s_word = UtfConverter::fromUtf8(source_word); wstring const t_word = UtfConverter::fromUtf8(target_word); @@ -2094,8 +2046,7 @@ string Transfer::caseOf(string const &str) -{ - cerr << "caseOf" << endl; // di +{ wstring const s = UtfConverter::fromUtf8(str); if(s.size() > 1) @@ -2132,15 +2083,13 @@ string Transfer::tolower(string const &str) const -{ - cerr << "tolower" << endl; // di +{ return UtfConverter::toUtf8(StringUtils::tolower(UtfConverter::fromUtf8(str))); } string Transfer::tags(string const &str) const -{ - //cerr << "tags" << endl; // di +{ string result = "<"; for(unsigned int i = 0, limit = str.size(); i != limit; i++) @@ -2163,7 +2112,6 @@ int Transfer::processRule(xmlNode *localroot) { - cerr << "processRule" << endl; // di int instruction_return, words_to_consume = -1; // localroot is supposed to be an 'action' tag for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -2185,8 +2133,7 @@ TransferToken & Transfer::readToken(FILE *in) -{ - //cerr << "readToken" << endl; // di +{ if(!input_buffer.isEmpty()) { return input_buffer.next(); @@ -2196,7 +2143,6 @@ while(true) { int val = fgetwc_unlocked(in); - //wcerr << UtfConverter::toUtf8(wchar_t(val)) << endl; // di if(feof(in) || (val == 0 && internal_null_flush)) { @@ -2206,12 +2152,10 @@ { content += L'\\'; content += (wchar_t) fgetwc_unlocked(in); - //wcerr << content << endl; // di } else if(val == L'[') { content += L'['; - //wcerr << content << endl; // di while(true) { int val2 = fgetwc_unlocked(in); @@ -2219,29 +2163,26 @@ { content += L'\\'; content += wchar_t(fgetwc_unlocked(in)); - //wcerr << content << endl; // di } else if(val2 == L']') { content += L']'; - //wcerr << content << endl; // di break; } else { content += wchar_t(val2); - //cerr << UtfConverter::toUtf8(content) << endl; // di } } } else if(val == L'$') { - cerr << UtfConverter::toUtf8(content) << endl; + //cerr << UtfConverter::toUtf8(content) << endl; // di return input_buffer.add(TransferToken(content, tt_word)); } else if(val == L'^') { - cerr << UtfConverter::toUtf8(content) << endl; + // cerr << UtfConverter::toUtf8(content) << endl; // di return input_buffer.add(TransferToken(content, tt_blank)); } else if(val == L'\0' && null_flush) @@ -2257,36 +2198,31 @@ bool Transfer::getNullFlush(void) -{ - //cerr << "getNullFlush" << endl; // di +{ return null_flush; } void Transfer::setNullFlush(bool null_flush) -{ - cerr << "setNullFlush" << endl; // di +{ this->null_flush = null_flush; } void Transfer::setTrace(bool trace) -{ - cerr << "setTrace" << endl; // di +{ this->trace = trace; } void Transfer::setTraceATT(bool trace) -{ - cerr << "setTraceATT" << endl; // di +{ this->trace_att = trace; } void Transfer::transfer_wrapper_null_flush(FILE *in, FILE *out) -{ - cerr << "transfer_wrapper_null_flush" << endl; // di +{ null_flush = false; internal_null_flush = true; @@ -2308,7 +2244,7 @@ void Transfer::transfer(FILE *in, FILE *out) { - cerr << "Transfer starts here" << endl << endl; // di + //cerr << "Transfer starts here" << endl << endl; // di if(getNullFlush()) { @@ -2326,11 +2262,11 @@ int counter = 0; // di while(true) { - cerr << "Transfer iteration # " << counter << endl; // di - cerr << "last: " << last << endl; // di - cerr << "prev_last: " << prev_last << endl; // di - cerr << "lastrule_num: " << lastrule_num << endl; // di - cerr << "ms.size(): " << ms.size() << endl; // di + //cerr << "Transfer iteration # " << counter << endl; // di + //cerr << "last: " << last << endl; // di + //cerr << "prev_last: " << prev_last << endl; // di + //cerr << "lastrule_num: " << lastrule_num << endl; // di + //cerr << "ms.size(): " << ms.size() << endl; // di // Let's look at input_buffer contents // di /*int initbuffpos = input_buffer.getPos(); // di @@ -2391,13 +2327,10 @@ } // if(trace_att) ends here if (ms.size() == 0) - { - //cerr << "(ms.size() == 0)" << endl; // di + { if(lastrule != NULL) { // this is the branch where a rule specified by lastrule_num is applied - - //cerr << "lastrule != NULL" << endl; // di int num_words_to_consume = applyRule(); if(trace_att) @@ -2409,13 +2342,11 @@ //This piece of code is executed unless the rule contains a "reject-current-rule" instruction if(num_words_to_consume < 0) { - cerr << "num_words_to_consume < 0" << endl; // di banned_rules.clear(); input_buffer.setPos(last); } else if(num_words_to_consume > 0) { - cerr << "num_words_to_consume > 0" << endl; // di banned_rules.clear(); if(prev_last >= input_buffer.getSize()) { @@ -2437,7 +2368,6 @@ } else { - cerr << "num_words_to_consume == 0" << endl; // di //Add rule to banned rules banned_rules.insert(lastrule_num); input_buffer.setPos(prev_last); @@ -2557,7 +2487,6 @@ } else if(tmpblank.size() != 0) { - cerr << "tmpblank.size() != 0" << endl; if(trace_att) { cerr << "printing tmpblank[0]" < tr; if(useBilingual && preBilingual == false) - { - cerr << "useBilingual && preBilingual == false" << endl; // di + { tr = fstp.biltransWithQueue(*tmpword[i], false); cerr << i << " "; - wcerr << tr.first << " "; // di - cerr << tr.second << endl; // di } else if(preBilingual) { @@ -2691,7 +2607,6 @@ // sl = word_in_lang1 // tl = word_in_lang2 - cerr << "preBilingual" << endl; // di wstring sl; wstring tl; int seenSlash = 0; @@ -2733,8 +2648,7 @@ break; } } - cerr << UtfConverter::toUtf8(sl) << endl; // di - wcerr << tl << endl; // di + //tmpword[i]->assign(sl); tr = pair(tl, false); if (useWeights) @@ -2745,15 +2659,11 @@ else { // here we don't need to split anything - cerr << "else" << endl; // di tr = pair(*tmpword[i], false); } - //wcerr << L"tr.first: " << tr.first << endl; // di word[i] = new TransferWord(UtfConverter::toUtf8(*tmpword[i]), UtfConverter::toUtf8(tr.first), tr.second); - //cerr << i << " "; // di - //wcerr << L"word[" << i << L"]: " << UtfConverter::fromUtf8(tr.first) << endl; // di } // check if we use weights @@ -2760,7 +2670,7 @@ if (useWeights) { tmpchunk = UtfConverter::toUtf8(wtmpchunk); - cerr << "Got an lchunk: " << tmpchunk << endl << endl; // di + //cerr << tmpchunk << endl << endl; // di int pcreExecRet; int subStrVec[30]; @@ -2776,14 +2686,14 @@ rule_group_num = rule_group_map[chosen_rule_id]; if (rule_groups[rule_group_num].size() > 1) { - cerr << "Rule # " << lastrule_num << " is ambiguous" << endl; // di - cerr << "Rule id: " << chosen_rule_id << endl; // di - cerr << "Rules in the group: " << endl; // di + cerr << "Rule # " << lastrule_num << " with id '" << chosen_rule_id << "' is ambiguous on input:" << endl; // di + cerr << tmpchunk << endl << endl; // di + /*cerr << "Rules in the group: " << endl; // di for (unsigned int ind = 0; ind < rule_groups[rule_group_num].size(); ind++) // di { // di cerr << " " << rule_groups[rule_group_num][ind] << endl; // di } // di - cerr << endl; // di + cerr << endl; // di*/ // let's check the weights for each rule in the group chosen_weight = 0.; @@ -2792,7 +2702,7 @@ current_weight = 0.; current_rule_id = rule_groups[rule_group_num][ind]; - cerr << "Checking " << current_rule_id << endl; // di + cerr << "Checking rule # " << rule_id_map[current_rule_id] << " with id '" << current_rule_id << "'" << endl; // di // go through patterns for (unsigned int k = 0; k < weighted_patterns[current_rule_id].size(); k++) { @@ -2801,9 +2711,9 @@ 0, 0, subStrVec, 30); if(pcreExecRet >= 0) // bingo! { - cerr << "Pattern matched " << weighted_patterns[current_rule_id][k].first; // di + cerr << " Pattern # " << k; // di current_weight = weighted_patterns[current_rule_id][k].second; - cerr << " with weight " << current_weight << endl; // di + cerr << " matched with weight " << current_weight << endl; // di if (current_weight > chosen_weight) // heavier rule { chosen_weight = current_weight; @@ -2816,8 +2726,8 @@ // substitute lastrule with the chosen one lastrule_num = rule_id_map[chosen_rule_id]; lastrule = rule_map[lastrule_num-1]; - cerr << "Chose rule # " << lastrule_num << " id: " << chosen_rule_id; - cerr << " with weight " << chosen_weight << endl; // di + cerr << "Rule # " << lastrule_num << " with id '" << chosen_rule_id; + cerr << "' wins with weight " << chosen_weight << endl << endl; // di } } } @@ -2848,7 +2758,6 @@ tmpword.clear(); tmpblank.clear(); ms.init(me->getInitial()); - cerr << "wtc: " << words_to_consume << endl; // di return words_to_consume; } // end of applyRule @@ -2859,7 +2768,6 @@ // Here, the token contained in word_str is fed // to the fst by stepping with ms - //cerr << "applyWord: applying to " << UtfConverter::toUtf8(word_str) << endl; // di ms.step(L'^'); for(unsigned int i = 0, limit = word_str.size(); i < limit; i++) { @@ -2901,7 +2809,6 @@ } } ms.step(L'$'); // push the end of token - //cerr << UtfConverter::toUtf8(word_str) << endl; // di } // end of applyWord void Index: branches/weighted-transfer/apertium/apertium/transfer.cc =================================================================== --- branches/weighted-transfer/apertium/apertium/transfer.cc (revision 70375) +++ branches/weighted-transfer/apertium/apertium/transfer.cc (revision 70382) @@ -80,10 +80,8 @@ Transfer::readData(FILE *in) { // Read transfer rules data from .t*x.bin file - //cerr << "readData" << endl; // di alphabet.read(in); - //cerr << "Alphabet size: " << alphabet.size() << endl; // di any_char = alphabet(TRXReader::ANY_CHAR); any_tag = alphabet(TRXReader::ANY_TAG); @@ -144,8 +142,7 @@ void Transfer::readBil(string const &fstfile) -{ - cerr << "readBil" << endl; // di +{ FILE *in = fopen(fstfile.c_str(), "rb"); if(!in) { @@ -159,8 +156,7 @@ void Transfer::setExtendedDictionary(string const &fstfile) -{ - cerr << "setExtendedDictionary" << endl; // di +{ FILE *in = fopen(fstfile.c_str(), "rb"); if(!in) { @@ -181,7 +177,7 @@ readTransfer(transferfile); // open precompiled .t*x.bin file and read data from it - cerr << "Reading data from " << datafile.c_str() << endl << endl; + cerr << "Reading data from " << datafile.c_str() << endl; FILE *in = fopen(datafile.c_str(), "rb"); if(!in) { @@ -221,7 +217,6 @@ } root_element = xmlDocGetRootElement(doc); - //cerr << root_element->properties << endl; // di // search through attributes of root element for(xmlAttr *i = root_element->properties; i != NULL; i = i->next) @@ -259,7 +254,7 @@ } } - if (useWeights) // di + /*if (useWeights) // di { // di // double-check rule ids in rule_id_map and rule_ids // di cerr << endl << "Those are the ids you wanted: " << endl; //di @@ -268,7 +263,7 @@ cerr << "rule_ids[" << k << "]: " << rule_ids[k] << endl; // di cerr << "rule_id_map[" << rule_ids[k] << "]: " << rule_id_map[rule_ids[k]] << endl << endl; // di } // di - } // di + } // di*/ } void @@ -286,7 +281,7 @@ { // 'rule' element rule_index++; - cerr << "Collecting rule # " << rule_index << endl; //di + //cerr << "Collecting rule # " << rule_index << endl; //di if (useWeights) // only need ids if weights are used { @@ -298,7 +293,7 @@ } rule_ids.push_back(rule_id); rule_id = ""; - cerr << endl; // di + //cerr << endl; // di } // go through subelements of current 'rule' element looking for some action @@ -351,7 +346,6 @@ int pcreErrorOffset; cerr << "Reading transfer weights from " << in.c_str() << endl << endl; // di - // di doc = xmlReadFile(in.c_str(), NULL, 0); if(doc == NULL) { @@ -360,7 +354,6 @@ } root_element = xmlDocGetRootElement(doc); - //cerr << root_element->name << endl; // di // search through root's children nodes for 'rule-group' elements for(xmlNode *i = root_element->children; i != NULL; i = i->next) @@ -367,7 +360,7 @@ { if(i->type == XML_ELEMENT_NODE && !xmlStrcmp(i->name, (const xmlChar *) "rule-group")) { - cerr << "Collecting rule-group # " << rule_group_index << endl; // di + cerr << "Collecting rule group # " << rule_group_index << endl; // di // get ids of all rules in rule group for(xmlNode *j = i->children; j != NULL; j = j->next) { @@ -377,7 +370,7 @@ rule_id = getRuleId(j); current_rule_group.push_back(rule_id); rule_group_map[rule_id] = rule_group_index; - cerr << endl; // di + cerr << "Rule id: " << rule_id << endl; // di // get patterns for(xmlNode *k = j->children; k != NULL; k = k->next) @@ -385,7 +378,6 @@ if(k->type == XML_ELEMENT_NODE && !xmlStrcmp(k->name, (const xmlChar *) "pattern")) { weight = atof(getNodeAttr(k, "weight").c_str()); - cerr << weight << endl; for(xmlNode *patit = k->children; patit != NULL; patit = patit->next) { if(patit->type == XML_ELEMENT_NODE && !xmlStrcmp(patit->name, (const xmlChar *) "pattern-item")) @@ -428,11 +420,10 @@ { regex = regex + ">"; } - cerr << lemma << " " << tags << endl; regex = regex + "\\S*? "; - cerr << regex << endl; } } + cerr << " " << weight << " " << regex << endl; reCompiled = pcre_compile(regex.c_str(), 0, &pcreErrorStr, &pcreErrorOffset, NULL); //pcreExtra = pcre_study(reCompiled, 0, &pcreErrorStr); current_pattern_group.push_back(make_pair(reCompiled, weight)); @@ -441,18 +432,18 @@ } weighted_patterns[rule_id] = current_pattern_group; current_pattern_group.clear(); + cerr << endl; // di } } // push newly acquired current_rule_group into rule_groups rule_groups.push_back(current_rule_group); current_rule_group.clear(); - cerr << endl; // di rule_group_index++; } } // print out what was collected // di - cerr << "These are the rule groups you collected:" << endl; // di + /*cerr << "These are the rule groups you collected:" << endl; // di unsigned int k1, k2; // di for (k1 = 0; k1 < rule_groups.size(); k1++) // di { // di @@ -460,13 +451,13 @@ for (k2 = 0; k2 < rule_groups[k1].size(); k2++) // di { // di cerr << " " << rule_groups[k1][k2] << endl; // di - cerr << " rule_group_map[" << rule_groups[k1][k2] << "]: "; // di - cerr << rule_group_map[rule_groups[k1][k2]] << endl; // di + //cerr << " rule_group_map[" << rule_groups[k1][k2] << "]: "; // di + //cerr << rule_group_map[rule_groups[k1][k2]] << endl; // di } // di cerr << endl; // di - } // di + } // di*/ - cerr << "And these are the patterns:" << endl; // di + /*cerr << "And these are the patterns:" << endl; // di for (k1 = 1; k1 < rule_ids.size(); k1++) // di { // di if (rule_ids[k1] != "") // di @@ -478,7 +469,7 @@ cerr << weighted_patterns[rule_ids[k1]][k2].second << endl; // di } // di } // di - } // di + } // di*/ } @@ -491,7 +482,7 @@ { if(!xmlStrcmp(j->name, (const xmlChar *) "comment")) // di { // di - cerr << "Rule comment: " << xmlNodeListGetString(localroot->doc, j->children, 1) << endl; // di + //cerr << "Rule comment: " << xmlNodeListGetString(localroot->doc, j->children, 1) << endl; // di } // di else //di { //di @@ -499,7 +490,7 @@ { // add rule id to rule_id_map rule_id = (const char*)xmlNodeListGetString(localroot->doc, j->children, 1); - cerr << "Rule id: " << rule_id << endl; // di + //cerr << "Rule id: " << rule_id << endl; // di } } // di } // di @@ -923,7 +914,6 @@ if(j->type == XML_ELEMENT_NODE) { myword.append(evalString(j)); - //cerr << myword << endl; // di } } if(myword != "") @@ -931,7 +921,6 @@ fputwc_unlocked(L'^', output); fputws_unlocked(UtfConverter::fromUtf8(myword).c_str(), output); fputwc_unlocked(L'$', output); - //cerr << UtfConverter::fromUtf8(myword).c_str() << endl; // di } } else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) @@ -982,7 +971,6 @@ { string processed = processChunk(i); fputws_unlocked(UtfConverter::fromUtf8(processed).c_str(), output); - cerr << "Hey, I just made a chunk: " << processed << endl; // di } else // 'b' { @@ -1027,7 +1015,6 @@ // starting to build the chunk result.append("^"); - //cerr << result << endl; // di // adding chunk name if(caseofchunk != "") @@ -1035,12 +1022,10 @@ if(name != "") { result.append(copycase(variables[caseofchunk], name)); - //cerr << result << endl; // di } else if(namefrom != "") { result.append(copycase(variables[caseofchunk], variables[namefrom])); - //cerr << result << endl; // di } else { @@ -1053,12 +1038,10 @@ if(name != "") { result.append(name); - //cerr << result << endl; // di } else if(namefrom != "") { result.append(variables[namefrom]); - //cerr << result << endl; // di } else { @@ -1072,8 +1055,7 @@ for(xmlNode *i = localroot->children; i != NULL; i = i->next) { if(i->type == XML_ELEMENT_NODE) - { - cerr << "chunk element # " << count << ": " << i->name << endl; // di + { count++; // di if(!xmlStrcmp(i->name, (const xmlChar *) "tags")) { @@ -1080,7 +1062,6 @@ // add chunk tags result.append(processTags(i)); result.append("{"); - //cerr << result << endl; // di } else if(!xmlStrcmp(i->name, (const xmlChar *) "lu")) { @@ -1093,13 +1074,11 @@ { if(j->type == XML_ELEMENT_NODE) { - //cerr << "Executing " << j->name << endl; // di myword.append(evalString(j)); evalStringClip(j, untouched, untouched_pos); // black magic if(untouched_pos != -1) { - //cerr << "Got untouched: " << untouched_pos << ", " << untouched << endl; // di wordcache[untouched_pos].append(untouched); } } @@ -1106,11 +1085,9 @@ } if(myword != "") { - //cerr << myword << endl; result.append("^"); result.append(myword); result.append("$"); - //cerr << result << endl; } } else if(!xmlStrcmp(i->name, (const xmlChar *) "mlu")) @@ -1146,17 +1123,14 @@ } if(myword != "") { - //cerr << myword << endl; // di result.append("^"); result.append(myword); result.append("$"); - //cerr << result << endl; // di } } else // 'b' { result.append(evalString(i)); - //cerr << result << endl; // di } } } @@ -1191,8 +1165,7 @@ string Transfer::processTags(xmlNode *localroot) -{ - //cerr << "processTags" << endl; // di +{ string result; for(xmlNode *i = localroot->children; i != NULL; i = i->next) { @@ -1253,8 +1226,7 @@ int Transfer::processRejectCurrentRule(xmlNode *localroot) -{ - cerr << "processRejectCurrentRule" << endl; // di +{ bool shifting = true; string value; for(xmlAttr *i = localroot->properties; i != NULL; i = i->next) @@ -1276,8 +1248,7 @@ void Transfer::processLet(xmlNode *localroot) -{ - cerr << "processLet" << endl; // di +{ xmlNode *leftSide = NULL, *rightSide = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -1372,8 +1343,7 @@ void Transfer::processAppend(xmlNode *localroot) -{ - cerr << "processAppend" << endl; // di +{ string name; for(xmlAttr *i = localroot->properties; i != NULL; i = i->next) { @@ -1395,8 +1365,7 @@ void Transfer::processModifyCase(xmlNode *localroot) -{ - cerr << "processModifyCase" << endl; // di +{ xmlNode *leftSide = NULL, *rightSide = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -1470,8 +1439,7 @@ void Transfer::processCallMacro(xmlNode *localroot) -{ - cerr << "processCallMacro" << endl; // di +{ string const n = (const char *) localroot->properties->children->content; int npar = 0; @@ -1539,8 +1507,7 @@ int Transfer::processChoose(xmlNode *localroot) -{ - cerr << "processChoose" << endl; // di +{ int words_to_consume = -1; for(xmlNode *i = localroot->children; i != NULL; i = i->next) { @@ -1601,8 +1568,7 @@ bool Transfer::processLogical(xmlNode *localroot) -{ - cerr << "processLogical" << endl; // di +{ if(!xmlStrcmp(localroot->name, (const xmlChar *) "equal")) { return processEqual(localroot); @@ -1649,8 +1615,7 @@ bool Transfer::processIn(xmlNode *localroot) -{ - cerr << "processIn" << endl; // di +{ xmlNode *value = NULL; xmlChar *idlist = NULL; @@ -1701,8 +1666,7 @@ bool Transfer::processTest(xmlNode *localroot) -{ - cerr << "processTest" << endl; // di +{ for(xmlNode *i = localroot->children; i != NULL; i = i->next) { if(i->type == XML_ELEMENT_NODE) @@ -1715,8 +1679,7 @@ bool Transfer::processAnd(xmlNode *localroot) -{ - cerr << "processAnd" << endl; // di +{ bool val = true; for(xmlNode *i = localroot->children; val && i != NULL; i = i->next) { @@ -1731,8 +1694,7 @@ bool Transfer::processOr(xmlNode *localroot) -{ - cerr << "processOr" << endl; // di +{ bool val = false; for(xmlNode *i = localroot->children; !val && i != NULL ; i = i->next) { @@ -1747,8 +1709,7 @@ bool Transfer::processNot(xmlNode *localroot) -{ - cerr << "processNot" << endl; // di +{ for(xmlNode *i = localroot->children; i != NULL; i = i->next) { if(i->type == XML_ELEMENT_NODE) @@ -1761,8 +1722,7 @@ bool Transfer::processEqual(xmlNode *localroot) -{ - cerr << "processEqual" << endl; // di +{ xmlNode *first = NULL, *second = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -1801,8 +1761,7 @@ bool Transfer::beginsWith(string const &s1, string const &s2) const -{ - cerr << "beginsWith" << endl; // di +{ int const limit = s2.size(), constraint = s1.size(); if(constraint < limit) @@ -1822,8 +1781,7 @@ bool Transfer::endsWith(string const &s1, string const &s2) const -{ - cerr << "endsWith" << endl; // di +{ int const limit = s2.size(), constraint = s1.size(); if(constraint < limit) @@ -1844,8 +1802,7 @@ bool Transfer::processBeginsWith(xmlNode *localroot) -{ - cerr << "processBeginsWith" << endl; // di +{ xmlNode *first = NULL, *second = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -1884,8 +1841,7 @@ bool Transfer::processEndsWith(xmlNode *localroot) -{ - cerr << "processEndsWith" << endl; // di +{ xmlNode *first = NULL, *second = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -1924,8 +1880,7 @@ bool Transfer::processBeginsWithList(xmlNode *localroot) -{ - cerr << "processBeginsWithList" << endl; // di +{ xmlNode *first = NULL, *second = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -1974,8 +1929,7 @@ bool Transfer::processEndsWithList(xmlNode *localroot) -{ - cerr << "processEndsWithList" << endl; // di +{ xmlNode *first = NULL, *second = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -2023,8 +1977,7 @@ bool Transfer::processContainsSubstring(xmlNode *localroot) -{ - cerr << "processContainsSubstring" << endl; // di +{ xmlNode *first = NULL, *second = NULL; for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -2063,8 +2016,7 @@ string Transfer::copycase(string const &source_word, string const &target_word) -{ - //cerr << "copycase" << endl; // di +{ wstring result; wstring const s_word = UtfConverter::fromUtf8(source_word); wstring const t_word = UtfConverter::fromUtf8(target_word); @@ -2094,8 +2046,7 @@ string Transfer::caseOf(string const &str) -{ - cerr << "caseOf" << endl; // di +{ wstring const s = UtfConverter::fromUtf8(str); if(s.size() > 1) @@ -2132,15 +2083,13 @@ string Transfer::tolower(string const &str) const -{ - cerr << "tolower" << endl; // di +{ return UtfConverter::toUtf8(StringUtils::tolower(UtfConverter::fromUtf8(str))); } string Transfer::tags(string const &str) const -{ - //cerr << "tags" << endl; // di +{ string result = "<"; for(unsigned int i = 0, limit = str.size(); i != limit; i++) @@ -2163,7 +2112,6 @@ int Transfer::processRule(xmlNode *localroot) { - cerr << "processRule" << endl; // di int instruction_return, words_to_consume = -1; // localroot is supposed to be an 'action' tag for(xmlNode *i = localroot->children; i != NULL; i = i->next) @@ -2185,8 +2133,7 @@ TransferToken & Transfer::readToken(FILE *in) -{ - //cerr << "readToken" << endl; // di +{ if(!input_buffer.isEmpty()) { return input_buffer.next(); @@ -2196,7 +2143,6 @@ while(true) { int val = fgetwc_unlocked(in); - //wcerr << UtfConverter::toUtf8(wchar_t(val)) << endl; // di if(feof(in) || (val == 0 && internal_null_flush)) { @@ -2206,12 +2152,10 @@ { content += L'\\'; content += (wchar_t) fgetwc_unlocked(in); - //wcerr << content << endl; // di } else if(val == L'[') { content += L'['; - //wcerr << content << endl; // di while(true) { int val2 = fgetwc_unlocked(in); @@ -2219,29 +2163,26 @@ { content += L'\\'; content += wchar_t(fgetwc_unlocked(in)); - //wcerr << content << endl; // di } else if(val2 == L']') { content += L']'; - //wcerr << content << endl; // di break; } else { content += wchar_t(val2); - //cerr << UtfConverter::toUtf8(content) << endl; // di } } } else if(val == L'$') { - cerr << UtfConverter::toUtf8(content) << endl; + //cerr << UtfConverter::toUtf8(content) << endl; // di return input_buffer.add(TransferToken(content, tt_word)); } else if(val == L'^') { - cerr << UtfConverter::toUtf8(content) << endl; + // cerr << UtfConverter::toUtf8(content) << endl; // di return input_buffer.add(TransferToken(content, tt_blank)); } else if(val == L'\0' && null_flush) @@ -2257,36 +2198,31 @@ bool Transfer::getNullFlush(void) -{ - //cerr << "getNullFlush" << endl; // di +{ return null_flush; } void Transfer::setNullFlush(bool null_flush) -{ - cerr << "setNullFlush" << endl; // di +{ this->null_flush = null_flush; } void Transfer::setTrace(bool trace) -{ - cerr << "setTrace" << endl; // di +{ this->trace = trace; } void Transfer::setTraceATT(bool trace) -{ - cerr << "setTraceATT" << endl; // di +{ this->trace_att = trace; } void Transfer::transfer_wrapper_null_flush(FILE *in, FILE *out) -{ - cerr << "transfer_wrapper_null_flush" << endl; // di +{ null_flush = false; internal_null_flush = true; @@ -2308,7 +2244,7 @@ void Transfer::transfer(FILE *in, FILE *out) { - cerr << "Transfer starts here" << endl << endl; // di + //cerr << "Transfer starts here" << endl << endl; // di if(getNullFlush()) { @@ -2326,11 +2262,11 @@ int counter = 0; // di while(true) { - cerr << "Transfer iteration # " << counter << endl; // di - cerr << "last: " << last << endl; // di - cerr << "prev_last: " << prev_last << endl; // di - cerr << "lastrule_num: " << lastrule_num << endl; // di - cerr << "ms.size(): " << ms.size() << endl; // di + //cerr << "Transfer iteration # " << counter << endl; // di + //cerr << "last: " << last << endl; // di + //cerr << "prev_last: " << prev_last << endl; // di + //cerr << "lastrule_num: " << lastrule_num << endl; // di + //cerr << "ms.size(): " << ms.size() << endl; // di // Let's look at input_buffer contents // di /*int initbuffpos = input_buffer.getPos(); // di @@ -2391,13 +2327,10 @@ } // if(trace_att) ends here if (ms.size() == 0) - { - //cerr << "(ms.size() == 0)" << endl; // di + { if(lastrule != NULL) { // this is the branch where a rule specified by lastrule_num is applied - - //cerr << "lastrule != NULL" << endl; // di int num_words_to_consume = applyRule(); if(trace_att) @@ -2409,13 +2342,11 @@ //This piece of code is executed unless the rule contains a "reject-current-rule" instruction if(num_words_to_consume < 0) { - cerr << "num_words_to_consume < 0" << endl; // di banned_rules.clear(); input_buffer.setPos(last); } else if(num_words_to_consume > 0) { - cerr << "num_words_to_consume > 0" << endl; // di banned_rules.clear(); if(prev_last >= input_buffer.getSize()) { @@ -2437,7 +2368,6 @@ } else { - cerr << "num_words_to_consume == 0" << endl; // di //Add rule to banned rules banned_rules.insert(lastrule_num); input_buffer.setPos(prev_last); @@ -2557,7 +2487,6 @@ } else if(tmpblank.size() != 0) { - cerr << "tmpblank.size() != 0" << endl; if(trace_att) { cerr << "printing tmpblank[0]" < tr; if(useBilingual && preBilingual == false) - { - cerr << "useBilingual && preBilingual == false" << endl; // di + { tr = fstp.biltransWithQueue(*tmpword[i], false); cerr << i << " "; - wcerr << tr.first << " "; // di - cerr << tr.second << endl; // di } else if(preBilingual) { @@ -2691,7 +2607,6 @@ // sl = word_in_lang1 // tl = word_in_lang2 - cerr << "preBilingual" << endl; // di wstring sl; wstring tl; int seenSlash = 0; @@ -2733,8 +2648,7 @@ break; } } - cerr << UtfConverter::toUtf8(sl) << endl; // di - wcerr << tl << endl; // di + //tmpword[i]->assign(sl); tr = pair(tl, false); if (useWeights) @@ -2745,15 +2659,11 @@ else { // here we don't need to split anything - cerr << "else" << endl; // di tr = pair(*tmpword[i], false); } - //wcerr << L"tr.first: " << tr.first << endl; // di word[i] = new TransferWord(UtfConverter::toUtf8(*tmpword[i]), UtfConverter::toUtf8(tr.first), tr.second); - //cerr << i << " "; // di - //wcerr << L"word[" << i << L"]: " << UtfConverter::fromUtf8(tr.first) << endl; // di } // check if we use weights @@ -2760,7 +2670,7 @@ if (useWeights) { tmpchunk = UtfConverter::toUtf8(wtmpchunk); - cerr << "Got an lchunk: " << tmpchunk << endl << endl; // di + //cerr << tmpchunk << endl << endl; // di int pcreExecRet; int subStrVec[30]; @@ -2776,14 +2686,14 @@ rule_group_num = rule_group_map[chosen_rule_id]; if (rule_groups[rule_group_num].size() > 1) { - cerr << "Rule # " << lastrule_num << " is ambiguous" << endl; // di - cerr << "Rule id: " << chosen_rule_id << endl; // di - cerr << "Rules in the group: " << endl; // di + cerr << "Rule # " << lastrule_num << " with id '" << chosen_rule_id << "' is ambiguous on input:" << endl; // di + cerr << tmpchunk << endl << endl; // di + /*cerr << "Rules in the group: " << endl; // di for (unsigned int ind = 0; ind < rule_groups[rule_group_num].size(); ind++) // di { // di cerr << " " << rule_groups[rule_group_num][ind] << endl; // di } // di - cerr << endl; // di + cerr << endl; // di*/ // let's check the weights for each rule in the group chosen_weight = 0.; @@ -2792,7 +2702,7 @@ current_weight = 0.; current_rule_id = rule_groups[rule_group_num][ind]; - cerr << "Checking " << current_rule_id << endl; // di + cerr << "Checking rule # " << rule_id_map[current_rule_id] << " with id '" << current_rule_id << "'" << endl; // di // go through patterns for (unsigned int k = 0; k < weighted_patterns[current_rule_id].size(); k++) { @@ -2801,9 +2711,9 @@ 0, 0, subStrVec, 30); if(pcreExecRet >= 0) // bingo! { - cerr << "Pattern matched " << weighted_patterns[current_rule_id][k].first; // di + cerr << " Pattern # " << k; // di current_weight = weighted_patterns[current_rule_id][k].second; - cerr << " with weight " << current_weight << endl; // di + cerr << " matched with weight " << current_weight << endl; // di if (current_weight > chosen_weight) // heavier rule { chosen_weight = current_weight; @@ -2816,8 +2726,8 @@ // substitute lastrule with the chosen one lastrule_num = rule_id_map[chosen_rule_id]; lastrule = rule_map[lastrule_num-1]; - cerr << "Chose rule # " << lastrule_num << " id: " << chosen_rule_id; - cerr << " with weight " << chosen_weight << endl; // di + cerr << "Rule # " << lastrule_num << " with id '" << chosen_rule_id; + cerr << "' wins with weight " << chosen_weight << endl << endl; // di } } } @@ -2848,7 +2758,6 @@ tmpword.clear(); tmpblank.clear(); ms.init(me->getInitial()); - cerr << "wtc: " << words_to_consume << endl; // di return words_to_consume; } // end of applyRule @@ -2859,7 +2768,6 @@ // Here, the token contained in word_str is fed // to the fst by stepping with ms - //cerr << "applyWord: applying to " << UtfConverter::toUtf8(word_str) << endl; // di ms.step(L'^'); for(unsigned int i = 0, limit = word_str.size(); i < limit; i++) { @@ -2901,7 +2809,6 @@ } } ms.step(L'$'); // push the end of token - //cerr << UtfConverter::toUtf8(word_str) << endl; // di } // end of applyWord void Index: branches/weighted-transfer/apertium-toy-ru-en/apertium-eng/apertium-eng.eng.dix =================================================================== --- branches/weighted-transfer/apertium-toy-ru-en/apertium-eng/apertium-eng.eng.dix (revision 70375) +++ branches/weighted-transfer/apertium-toy-ru-en/apertium-eng/apertium-eng.eng.dix (revision 70382) @@ -99,7 +99,7 @@ - +

@@ -111,7 +111,7 @@

s

- +

an'san

en'san

anan

@@ -130,7 +130,8 @@
- deadm + m + deadm island sea boot @@ -142,8 +143,8 @@ of a the - big - new + big + new thefirst thesecond thethird Index: branches/weighted-transfer/apertium-toy-ru-en/apertium-eng/eng-rus.automorf.bin =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: branches/weighted-transfer/apertium-toy-ru-en/apertium-eng/rus-eng.autogen.bin =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: branches/weighted-transfer/apertium-toy-ru-en/apertium-rus/apertium-rus.rus.dix =================================================================== --- branches/weighted-transfer/apertium-toy-ru-en/apertium-rus/apertium-rus.rus.dix (revision 70375) +++ branches/weighted-transfer/apertium-toy-ru-en/apertium-rus/apertium-rus.rus.dix (revision 70382) @@ -105,14 +105,14 @@ +

ыа

+

а

аа

-

ыа

уа

уа

ойа

еа

ыа

-

а

ыа

ыа

амиа

@@ -119,14 +119,14 @@

аха

+

а

+

ов

-

а

у

а

ом

е

ы

-

ов

ам

ов

ами

@@ -133,14 +133,14 @@

ах

+

а

+

ов

-

а

у

а

ом

е

а

-

ов

ам

ов

ами

@@ -147,14 +147,14 @@

ах

+

а

+

ов

-

а

у

ом

е

ы

-

ов

ам

а

ами

@@ -161,15 +161,15 @@

ах

+

а

+

+

ов

-

а

у

ом

е

и

-

-

ов

ам

и

ами

@@ -176,14 +176,14 @@

ах

+

а

+

ов

-

а

у

ом

е

и

-

ов

ам

и

ами

@@ -190,14 +190,14 @@

ах

+

каок

+

ковок

окок

-

каок

куок

окок

комок

кеок

киок

-

ковок

камок

киок

камиок

@@ -204,14 +204,14 @@

кахок

+

а

+

ов

-

а

у

ом

е

а

-

ов

ам

а

ами

@@ -218,14 +218,14 @@

ах

+

яе

+

ейе

ее

-

яе

юе

ее

еме

ее

яе

-

ейе

яме

яе

ямие

@@ -232,14 +232,14 @@

яхе

+

ао

+

о

оо

-

ао

уо

оо

омо

ео

ао

-

о

амо

ао

амио

@@ -246,14 +246,14 @@

ахо

+

няень

+

нейень

еньень

-

няень

нюень

еньень

немень

неень

ниень

-

нейень

нямень

ниень

нямиень

@@ -260,14 +260,14 @@

няхень

+

+

-

-

@@ -274,8 +274,9 @@

+

огоой

+

ихой

ойой

-

огоой

омуой

огоой

ойой

@@ -282,7 +283,6 @@

имой

омой

иеой

-

ихой

имой

ихой

иеой

@@ -290,8 +290,9 @@

ихой

+

огоый

+

ыхый

ыйый

-

огоый

омуый

огоый

ыйый

@@ -298,7 +299,6 @@

ымый

омый

ыеый

-

ыхый

ымый

ыхый

ыеый

@@ -306,8 +306,9 @@

ыхый

+

ьегоий

+

ьихий

ийий

-

ьегоий

ьемуий

ьегоий

ийий

@@ -314,7 +315,6 @@

ьимий

ьимий

иеий

-

ьихий

ьимий

ьихий

иеий

@@ -322,8 +322,9 @@

ьихий

+

огоой

+

ихой

ойой

-

огоой

омуой

огоой

ойой

@@ -330,7 +331,6 @@

имой

омой

иеой

-

ихой

имой

ихой

иеой

@@ -338,8 +338,9 @@

ихой

+

огоый

+

ыхый

ыйый

-

огоый

омуый

огоый

ыйый

@@ -346,7 +347,6 @@

ымый

омый

ыеый

-

ыхый

ымый

ыхый

ыеый

@@ -354,8 +354,9 @@

ыхый

+

ьегоий

+

ьихий

ийий

-

ьегоий

ьемуий

ьегоий

ийий

@@ -362,7 +363,6 @@

ьимий

ьимий

иеий

-

ьихий

ьимий

ьихий

иеий

@@ -376,6 +376,7 @@ остров мор мертвец + человек сапог сундук пар Index: branches/weighted-transfer/apertium-toy-ru-en/apertium-rus/eng-rus.autogen.bin =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: branches/weighted-transfer/apertium-toy-ru-en/apertium-rus/rus-eng.automorf.bin =================================================================== Cannot display: file marked as a binary type. svn:mime-type = application/octet-stream Index: branches/weighted-transfer/apertium-toy-ru-en/apertium-rus-eng/apertium-rus-eng.rus-eng.dix =================================================================== --- branches/weighted-transfer/apertium-toy-ru-en/apertium-rus-eng/apertium-rus-eng.rus-eng.dix (revision 70375) +++ branches/weighted-transfer/apertium-toy-ru-en/apertium-rus-eng/apertium-rus-eng.rus-eng.dix (revision 70382) @@ -105,6 +105,7 @@

замокlock

носокsock

островisland

+

человекman

мертвецdeadman

мореsea

параpair

Index: branches/weighted-transfer/apertium-toy-ru-en/apertium-rus-eng/apertium-rus-eng.rus-eng.w1x =================================================================== --- branches/weighted-transfer/apertium-toy-ru-en/apertium-rus-eng/apertium-rus-eng.rus-eng.w1x (revision 70375) +++ branches/weighted-transfer/apertium-toy-ru-en/apertium-rus-eng/apertium-rus-eng.rus-eng.w1x (revision 70382) @@ -50,7 +50,7 @@ - + @@ -62,7 +62,7 @@ - + @@ -69,12 +69,12 @@ - + - + Index: branches/weighted-transfer/apertium-toy-ru-en/apertium-rus-eng/eng-rus.autobil.bin =================================================================== --- branches/weighted-transfer/apertium-toy-ru-en/apertium-rus-eng/eng-rus.autobil.bin (revision 70375) +++ branches/weighted-transfer/apertium-toy-ru-en/apertium-rus-eng/eng-rus.autobil.bin (revision 70382) @@ -1,5 +1,5 @@ @v@A@B@C@D@E@F@G@H@I@J@K@L@M@N@O@P@Q@R@S@T@U@V@W@X@Y@Z@a@b@c@d@e@f@g@h@i@j@k@l@m@n@o@p@q@r@s@t@u@v@w@x@y@zDDDDDDDDDDDDDDDDDDD D!D"D#D$D%D&D'D(D)D*D+D,D-D.D/D0D1D2D3D4D5DQD6D7D8D:D;DD?D@DADBDCDDDEDFDGDHDIDLDKDJDMDNDO@M@n@n@p@v@b@l@e@x@v@b@m@o@d@v@a@u@x@v@b@s@e@r@v@b@h@a@v@e@r@c@n@j@s@u@b@c@n@j@c@o@o@c@n@j@a@d@v@p@r@a@d@v@i@j@a@d@j@p@r@n@a@t@t@r@t@o@p@a@n@t@c@o@g@o@r@g@a@l@p@e@r@s@r@e@f@r@e@s@d@e@t@p@o@s@p1@p2@p3@n@t@u@t@f@m@u@n@s@g@d@u@p@l@s@p@i@n@d@d@e@f@n@o@m@g@e@n@a@c@c@d@a@t@i@n@s@l@o@c@d@e@m@i@t@g@q@n@t@n@e@g@e@m@p@h@i@n@f@p@r@e@s@i@m@p@p@r@e@t@p@p@p@p@r@s@p@a@s@v@a@c@t@v@s@i@n@t@p@s@t@c@o@m@p@s@u@p@o@r@d@a@c@r@u@n@c@n@u@m@s@e@n@t@c@m@a@p@o@s@g@u@i@o@l@p@a@r@r@p@a@r@l@q@u@o@t@r@q@u@o@t@c@o@m@p@o@u@n@d-@o@n@l@y-@L -@c@o@m@p@o@u@n@d-@R@z@M@M@D@D}@D@D@LD@M@L@D@D@D@D@D@LD@D@D@D@D@D@D@D@D@D@D@D@L@L@D@D@D@mD@D@D@@L@L@M@D@D@LD@D@D}@D@D}@D@D@D@LD@D~@D?D@MD@MD@MD@M?@D@D?D@D@D@D@D@D@ +@c@o@m@p@o@u@n@d-@R@@M@M@D@D}@D@D@LD@M@L@D@D@D@D@D@LD@D@D@D@D@D@D@D@D@D@D@D@L@L@D@D@D@LD@MD@MD@MD@D@D@D@mD@D@D@@L@L@M@D@D@LD@D@D}@D@D}@D@D@LD@D~@D?D@MD@MD@MD@M?@D@D?D@D@D@D@D@D@ @@M@@M @M@D@D@mD@D@D@ @@M@@M@@M@D@D@mD@D@D@ @@ -8,17 +8,15 @@ @D@D@mD@D@ @D@D@D@D@D@mD@D@D@D@ @D@n@n@{@{@@@@@@AAA A `s`s @y@y -@m@a@i@n@@@s@t@a@n@d@a@r@d)@{  -  -  -"%)"7 @@$@I7777@w@y &*-48@P"@]@P@]@`@]  -#'+.5@X@A@a@J@J@a@i@i  @a@`/19@Y@B@e@K@Q@B!@H@j@n -0@n2@P: @M @C  @L @R : " @n @k - -; -@Z -@D -@f -@M +@m@a@i@n@@@s@t@a@n@d@a@r@d+@  +   +   +!),)= @F$@O====@}@ -03:>@V)@c!@V@c@f@c  +"*.14;#@^@G@g@P#@P@g@o@o  #@e@d57?@_@H@k@Q@W@H(@N@p@t$6@s8@S@@ @S @I  @R @X @@ ) @t @q @d% +@A +@` +@J +@l @S -@^@b @l  < = @E @V@y@N@T@c@m 2=@[@F@G@U@F@n>+@V@z \ No newline at end of file +@Y +@d@h @r & @B @C @K @\@@T@Z@i@s'4@C@a@L@M@[@L@t@D-@\@ \ No newline at end of file Index: branches/weighted-transfer/apertium-toy-ru-en/apertium-rus-eng/rus-eng.autobil.bin =================================================================== --- branches/weighted-transfer/apertium-toy-ru-en/apertium-rus-eng/rus-eng.autobil.bin (revision 70375) +++ branches/weighted-transfer/apertium-toy-ru-en/apertium-rus-eng/rus-eng.autobil.bin (revision 70382) @@ -1,5 +1,5 @@ @v@A@B@C@D@E@F@G@H@I@J@K@L@M@N@O@P@Q@R@S@T@U@V@W@X@Y@Z@a@b@c@d@e@f@g@h@i@j@k@l@m@n@o@p@q@r@s@t@u@v@w@x@y@zDDDDDDDDDDDDDDDDDDD D!D"D#D$D%D&D'D(D)D*D+D,D-D.D/D0D1D2D3D4D5DQD6D7D8D:D;DD?D@DADBDCDDDEDFDGDHDIDLDKDJDMDNDO@M@n@n@p@v@b@l@e@x@v@b@m@o@d@v@a@u@x@v@b@s@e@r@v@b@h@a@v@e@r@c@n@j@s@u@b@c@n@j@c@o@o@c@n@j@a@d@v@p@r@a@d@v@i@j@a@d@j@p@r@n@a@t@t@r@t@o@p@a@n@t@c@o@g@o@r@g@a@l@p@e@r@s@r@e@f@r@e@s@d@e@t@p@o@s@p1@p2@p3@n@t@u@t@f@m@u@n@s@g@d@u@p@l@s@p@i@n@d@d@e@f@n@o@m@g@e@n@a@c@c@d@a@t@i@n@s@l@o@c@d@e@m@i@t@g@q@n@t@n@e@g@e@m@p@h@i@n@f@p@r@e@s@i@m@p@p@r@e@t@p@p@p@p@r@s@p@a@s@v@a@c@t@v@s@i@n@t@p@s@t@c@o@m@p@s@u@p@o@r@d@a@c@r@u@n@c@n@u@m@s@e@n@t@c@m@a@p@o@s@g@u@i@o@l@p@a@r@r@p@a@r@l@q@u@o@t@r@q@u@o@t@c@o@m@p@o@u@n@d-@o@n@l@y-@L -@c@o@m@p@o@u@n@d-@R@z@M@MD@D}@D@D@D@L@L@MD@D@D@D@D@D@LD@D@D@D@D@D@D@D@D@D@D@@L@LD@D@D@D@mD@D@@L@@M@LD@D@D@LD@D}@D@D}@D@D@D@D@LD~@D@D?D@MD@MD@M?@MD@D@D?D@D@D@D@D@ +@c@o@m@p@o@u@n@d-@R@@M@MD@D}@D@D@D@L@L@MD@D@D@D@D@D@LD@D@D@D@D@D@D@D@D@D@D@@L@LD@D@D@D@LD@MD@MD@MD@D@D@D@mD@D@@L@@M@LD@D@D@LD@D}@D@D}@D@D@D@LD~@D@D?D@MD@MD@M?@MD@D@D?D@D@D@D@D@ @@M@@M@@M D@D@D@mD@D@ @@M@@M@@M@D@D@D@mD@D@ @@ -8,17 +8,15 @@ @D@D@D@mD@ @D@D@D@D@D@D@mD@D@D@ @D@@n@n@{@{@@@@@@AAA A `s`s @y@y -@m@a@i@n@@@s@t@a@n@d@a@r@d)@{  -  -  -"%)"7 @@$@I7777@w@y &*-48@P"@]@P@]@`@]  -#'+.5@X@A@a@J@J@a@i@i  @a@`/19@Y@B@e@K@Q@B!@H@j@n -0@n2@P: @M @C  @L @R : " @n @k - -; -@Z -@D -@f -@M +@m@a@i@n@@@s@t@a@n@d@a@r@d+@  +   +   +!),)= @F$@O====@}@ -03:>@V)@c!@V@c@f@c  +"*.14;#@^@G@g@P#@P@g@o@o  #@e@d57?@_@H@k@Q@W@H(@N@p@t$6@s8@S@@ @S @I  @R @X @@ ) @t @q @d% +@A +@` +@J +@l @S -@^@b @l  < = @E @V@y@N@T@c@m 2=@[@F@G@U@F@n>+@V@z \ No newline at end of file +@Y +@d@h @r & @B @C @K @\@@T@Z@i@s'4@C@a@L@M@[@L@t@D-@\@ \ No newline at end of file Index: branches/weighted-transfer/apertium-toy-ru-en/midterm-demo.sh =================================================================== --- branches/weighted-transfer/apertium-toy-ru-en/midterm-demo.sh (nonexistent) +++ branches/weighted-transfer/apertium-toy-ru-en/midterm-demo.sh (revision 70382) @@ -0,0 +1,32 @@ +#! /bin/sh + +printf "\n------------------------------------------------------\n" +printf "midterm-demo: processing 'сундук мертвеца'\n\n" +printf "Since 'мертвец'/'dead man' is an animate noun,\nin most cases it is preferable that it\nforms possessive case, which fact is\nreflected by giving the first pattern\nfor 'two_pt_poss' rule a high weight." +printf "\n------------------------------------------------------\n" +./process.sh "сундук мертвеца" +printf "\n------------------------------------------------------\n" +printf "midterm-demo: processing 'замок сундука'\n\n" +printf "On the other hand, 'сундук'/'chest' is an inanimate noun,\nso I naively suggest it should form attributive\nconstruction, therefore rule 'two_pt_attr' has patterns\nof high weight for the cases when that word is involved." +printf "\n------------------------------------------------------\n" +./process.sh "замок сундука" +printf "\n------------------------------------------------------\n" +printf "midterm-demo: processing 'пара носков'\n\n" +printf "The word 'пара' means 'pair' (or 'couple'\nbut since we have no lexical selection here,\nI omit that fact), and if used with a plural noun\nin genitive should be translated as 'pair of'\nno matter the animacy of the second noun.\nThis is reflected by giving the second pattern\nfor rule 'two_pt_of' weight of 1 which\noverrides the weight of 0.9 for pattern with 'носок'/'sock'\nas the second word in rule 'two_pt_attr'\n(which is identical to that for 'сундук')." +printf "\n------------------------------------------------------\n" +./process.sh "пара носков" +printf "\n------------------------------------------------------\n" +printf "midterm-demo: processing 'пара носка'\n\n" +printf "However, when the second word with 'пара' is\nsingular, that is better be translated\nas attributive or possessive construction\nsince it doesn't mean 'two matching items'\nbut rather 'a match for the item'.\nHere for simplicity I designed it\nto be always possessive, irregardless\nof animacy." +printf "\n------------------------------------------------------\n" +./process.sh "пара носка" +printf "\n------------------------------------------------------\n" +printf "midterm-demo: processing 'замок большого сундука'\n\n" +printf "The weights for three-piece expressions\nare underdeveloped. In fact,they only\ndistinguish betweenthe cases when\nthe middle word is an adjective or an ordinal number.\nWith adjectives the weights are distributed\nuniformly therefore for 'большой'/'big'\nthe first rule\is chosen" +printf "\n------------------------------------------------------\n" +./process.sh "замок большого сундука" +printf "\n------------------------------------------------------\n" +printf "midterm-demo: processing 'замок первого сундука'\n\n" +printf "For ordinal numbers, the of-construction\nis designed to be preferrable." +printf "\n------------------------------------------------------\n" +./process.sh "замок первого сундука" Property changes on: branches/weighted-transfer/apertium-toy-ru-en/midterm-demo.sh ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* \ No newline at end of property