commit ed91b927e53d3aa7df56c255805824a95d27c647 Author: elmurod1202 Date: Fri Aug 21 18:48:23 2020 +0200 TESTVOC: fixed scripts to work with tur-uzb. Should work. diff --git a/testvoc/lite/testvoc-summary.tur-uzb.txt b/testvoc/lite/testvoc-summary.tur-uzb.txt new file mode 100644 index 0000000..e367ecc --- /dev/null +++ b/testvoc/lite/testvoc-summary.tur-uzb.txt @@ -0,0 +1,20 @@ + +Fri Aug 21 16:01:51 CEST 2020 +=============================================== +POS Total Clean With @ With # Clean % +adj 1050 1050 0 0 100 +vaux 0 0 0 0 100 +v 0 0 0 0 100 +prn 0 0 0 0 100 +post 0 0 0 0 100 +num 0 0 0 0 100 +np 0 0 0 0 100 +n 0 0 0 0 100 +ij 0 0 0 0 100 +guio 0 0 0 0 100 +det 0 0 0 0 100 +cnjsub 0 0 0 0 100 +cnjcoo 0 0 0 0 100 +cm 0 0 0 0 100 +adv 0 0 0 0 100 +=============================================== diff --git a/testvoc/lite/testvoc.sh b/testvoc/lite/testvoc.sh index 41e6946..75c7c69 100644 --- a/testvoc/lite/testvoc.sh +++ b/testvoc/lite/testvoc.sh @@ -3,8 +3,8 @@ # A script to run the "lite" ("one-word-per-each-paradigm-") testvoc. # # Assumes the pair is compiled. -# Extracts lexical units from compressed text files in languages/apertium-uzb/ -# tests/morphotactics/ and languages/apertium-kaa/tests/morphotactics +# Extracts lexical units from compressed text files in languages/apertium-tur/ +# tests/morphotactics/ and languages/apertium-uzb/tests/morphotactics # and passes them through the translator (=INCONSISTENCY script). # Produces 'testvoc-summary' files using the INCONSISTENCY_SUMMARY script. # @@ -27,24 +27,24 @@ function extract_lexical_units { } #------------------------------------------------------------------------------- -# Uzbek->Karakalpak testvoc +# Turkish->Uzbek testvoc #------------------------------------------------------------------------------- -PARDEF_FILES=../../../../languages/apertium-uzb/tests/morphotactics/*.txt.gz +PARDEF_FILES=../../../apertium-tur/tests/morphotactics/*.txt.gz -echo "==Uzbek->Karakalpak===========================" +echo "==Turkish->Uzbek===========================" -echo "" > $TMPDIR/uzb-kaa.testvoc +echo "" > $TMPDIR/tur-uzb.testvoc for file in $PARDEF_FILES; do zcat $file | extract_lexical_units | - $INCONSISTENCY uzb-kaa >> $TMPDIR/uzb-kaa.testvoc + $INCONSISTENCY tur-uzb >> $TMPDIR/tur-uzb.testvoc done -$INCONSISTENCY_SUMMARY $TMPDIR/uzb-kaa.testvoc uzb-kaa +$INCONSISTENCY_SUMMARY $TMPDIR/tur-uzb.testvoc tur-uzb #------------------------------------------------------------------------------- -# Karakalpak->Uzbek testvoc +# Uzbek->Turkish testvoc #------------------------------------------------------------------------------- # TODO diff --git a/testvoc/standard/inconsistency.sh b/testvoc/standard/inconsistency.sh index de1f49c..f87fdb7 100644 --- a/testvoc/standard/inconsistency.sh +++ b/testvoc/standard/inconsistency.sh @@ -19,15 +19,15 @@ GENOUT=$TMPDIR/testvoc_genout.txt DIR=$1 -if [[ $DIR = "uzb-kaa" ]]; then +if [[ $DIR = "tur-uzb" ]]; then PRETRANSFER="apertium-pretransfer" - LEXTRANSFER="lt-proc -b ../../uzb-kaa.autobil.bin" - LEXSELECTION="lrx-proc -m ../../uzb-kaa.autolex.bin" - TRANSFER="rtx-proc ../../uzb-kaa.rtx.bin" - #TRANSFER_1="apertium-transfer -b ../../apertium-uzb-kaa.uzb-kaa.t1x ../../uzb-kaa.rtx.bin" - #TRANSFER_2="apertium-transfer -n ../../apertium-uzb-kaa.uzb-kaa.t2x ../../uzb-kaa.rlx.bin" - GENERATOR="lt-proc -d ../../uzb-kaa.autogen.bin" + LEXTRANSFER="lt-proc -b ../../tur-uzb.autobil.bin" + LEXSELECTION="lrx-proc -m ../../tur-uzb.autolex.bin" + TRANSFER="rtx-proc ../../tur-uzb.rtx.bin" + #TRANSFER_1="apertium-transfer -b ../../apertium-tur-uzb.tur-uzb.t1x ../../tur-uzb.rtx.bin" + #TRANSFER_2="apertium-transfer -n ../../apertium-tur-uzb.tur-uzb.t2x ../../tur-uzb.rlx.bin" + GENERATOR="lt-proc -d ../../tur-uzb.autogen.bin" tee $INPUT | $PRETRANSFER | $LEXTRANSFER | $LEXSELECTION | diff --git a/testvoc/standard/testvoc.sh b/testvoc/standard/testvoc.sh index 010f51e..e382800 100644 --- a/testvoc/standard/testvoc.sh +++ b/testvoc/standard/testvoc.sh @@ -18,7 +18,7 @@ export TMPDIR # Testvoc will finish in a reasonable time if we comment out the line # with numerals regex in bidix: cd ../../ -sed -i 's_ *\[№.*$__' apertium-uzb-kaa.uzb-kaa.dix +sed -i 's_ *\[№.*$__' apertium-tur-uzb.tur-uzb.dix make cd testvoc/standard/ @@ -28,13 +28,13 @@ function expand_monodix { } #------------------------------------------------------------------------------- -# Uzbek->Karakalpak testvoc +# Turkish->Uzbek testvoc #------------------------------------------------------------------------------- -MONODIX=../../.deps/uzb-kaa.automorf.trimmed +MONODIX=../../.deps/tur-uzb.automorf.trimmed -echo "==Uzbek->Karakalpak===========================" +echo "== Turkish -> Uzbek ===========================" expand_monodix | -bash inconsistency.sh uzb-kaa > $TMPDIR/uzb-kaa.testvoc -bash inconsistency-summary.sh $TMPDIR/uzb-kaa.testvoc uzb-kaa +bash inconsistency.sh tur-uzb > $TMPDIR/tur-uzb.testvoc +bash inconsistency-summary.sh $TMPDIR/tur-uzb.testvoc tur-uzb