Index: staging/apertium-eng-kaz/apertium-eng-kaz.kaz-eng.t1x =================================================================== --- staging/apertium-eng-kaz/apertium-eng-kaz.kaz-eng.t1x (revision 69108) +++ staging/apertium-eng-kaz/apertium-eng-kaz.kaz-eng.t1x (revision 69109) @@ -3200,7 +3200,7 @@ - + Index: staging/apertium-eng-kaz/dev/testvoc2/lite/testvoc-summary.kaz-eng.txt =================================================================== --- staging/apertium-eng-kaz/dev/testvoc2/lite/testvoc-summary.kaz-eng.txt (revision 69108) +++ staging/apertium-eng-kaz/dev/testvoc2/lite/testvoc-summary.kaz-eng.txt (revision 69109) @@ -1,14 +1,14 @@ -Wed Jun 1 16:48:46 CEST 2016 +Wed Jun 1 19:24:37 CEST 2016 =============================================== POS Total Clean With @ With # Clean % v 49395 49395 0 0 100 -cop 48464 40784 0 7680 84.16 +cop 48464 48464 0 0 100 adj 20197 20167 0 30 99.85 n 12512 12512 0 0 100 -prn 10873 3192 0 7681 29.36 +prn 10873 10872 0 1 99.99 det 2248 2248 0 0 100 -cnjcoo 1389 429 0 960 30.89 +cnjcoo 1389 1389 0 0 100 vaux 808 808 0 0 100 post 464 416 0 48 89.66 np 155 155 0 0 100 Index: staging/apertium-eng-kaz/useful notes for Aida =================================================================== --- staging/apertium-eng-kaz/useful notes for Aida (revision 69108) +++ staging/apertium-eng-kaz/useful notes for Aida (revision 69109) @@ -488,4 +488,6 @@ head -1000 ../apertium-eng-kaz/Coding_challenge/corpus\ Lab\ IIS\ \(5925\).kz | apertium -d ~/apertium-eng-kaz/ kaz-eng-morph | sed 's/\$\W*\^/$\n^/g' | grep '\*' | cut -f2 -d'^' | cut -f1 -d'/' | grep -v '^$' | sort -f | uniq -c | sort -gr | grep -v '[0-9] [0-9]'| apertium-destxt | lt-proc -w /home/apertium/apertium-kaz/kaz.automorf.bin| grep -v '\*' | cg-proc merge-analyses.bin | apertium-retxt| grep -v '\/.*\/.*\/.*\/'| cut -f4 -d'/' | cut -f1 -d'$' | sed 's/<\([a-z0-9A-Z]\+\)>//g' +words with starts: +cat Coding_challenge/corpus\ Lab\ IIS\ \(5925\).kz |apertium -d. kaz-eng| grep -o '*[^"]*'|cut -f1 -d ' '