Index: incubator/apertium-bel-rus/dev/testvoc/bidix-unknowns.sh
===================================================================
--- incubator/apertium-bel-rus/dev/testvoc/bidix-unknowns.sh	(nonexistent)
+++ incubator/apertium-bel-rus/dev/testvoc/bidix-unknowns.sh	(revision 66830)
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+set -e -u
+
+if [[ $# -eq 1 ]]; then
+    lang=$1
+    monodix=guess
+    bidix=guess
+    side=guess
+elif  [[ $# -eq 4 ]]; then
+    lang=$1
+    monodix=$2
+    bidix=$3
+    side=$4
+else
+    cat >&2 <<EOF
+Usage: $0 lang
+or:    $0 lang mono.dix bi.dix [r|l]
+
+Expands the analyser, and looks up all the bidix entries in the
+expanded analyser.
+
+For example, do \`$0 nno' in trunk/apertium-nno-nob/' to find
+nno-words that are in bidix but not the nno-analyser. If the source
+.dix files have a non-standard name, you can specify them in the
+second and third arguments, for example
+\`$0 eng ../apertium-eng_feil/apertium-eng.eng.dix apertium-eng-sco.eng-sco.dix l'
+(where \`l' means eng is the left-side of the bidix).
+EOF
+    exit 1
+fi
+
+
+if [[ ${monodix} = guess ]]; then
+    langdir=$(grep -m1 "^AP_SRC.*apertium-${lang}" config.log | sed "s/^[^=]*='//;s/'$//")
+    monodix=${langdir}/apertium-${lang}.${lang}.dix
+fi
+if [[ ${bidix} = guess ]]; then
+    basename=$(grep -m1 "^PACKAGE='apertium-" config.log | sed "s/^[^=]*='//;s/'$//")
+    pair=${basename##apertium-}
+    bidix=${basename}.${pair}.dix
+fi
+if [[ ${side} = guess ]]; then
+    if [[ ${lang} = ${pair%%-*} ]]; then
+        side=l
+    else
+        side=r
+    fi
+fi
+
+exp=$(mktemp -t bidix-unknowns.XXXXXXXX)
+trap 'rm -f "${exp}"' EXIT
+
+echo "Expanding monodix …" >&2
+lt-expand "${monodix}" \
+    | grep -ve __REGEXP__ \
+    | sed 's/[^:]*//; s/\(<.*>\)\(#.*\)/\2\1/' \
+    | LC_ALL=C sort -u >"${exp}"
+
+in_mono () {
+    # bidix has prefixes of monodix, have to use look instead of comm :-/
+    LC_ALL=C look "$1" "${exp}" >/dev/null
+}
+echo "Expanding bidix and checking for entries missing from monodix …" >&2
+lt-expand "${bidix}" \
+    | awk -vside="${side}" -F':|:[<>]:' '
+        BEGIN {
+          if(side=="l") {
+            nside=1
+            LR=":>:"
+            RL=":<:"
+          }
+          else {
+            nside=2
+            LR=":<:"            # flip it
+            RL=":>:"            # and reverse
+          }
+        }
+        # Make bidix match up with monodix (left=left, right=right):
+        /:>:/ { print LR $nside; next }
+        /:<:/ { print RL $nside; next }
+        /:/   { print ":"$nside }
+' \
+    | while read -r bientry; do
+          # Bidix now normalised to have the requested monodix on the "left"
+          case ${bientry} in
+              ":>:"* ) # If it's LR in bidix, then we check if unmarked / LR is in monodix
+                       in_mono "${bientry##:>}" || in_mono "${bientry}" || echo "${bientry}"
+                       ;;
+              ":<:"* ) # If it's RL in bidix, then we check if unmarked / RL is in monodix
+                       in_mono "${bientry##:<}" || in_mono "${bientry}" || echo "${bientry}"
+                       ;;
+              ":"* ) # If it's unmarked in bidix, then we check if unmarked / LR / RL in monodix
+                     in_mono "${bientry}" || in_mono ":>${bientry}" || in_mono ":<${bientry}" || echo "${bientry}"
+                     ;;
+              *) echo "ERROR: unexpected bientry format: ${bientry}" >&2;;
+          esac
+      done
Index: incubator/apertium-bel-rus/dev/testvoc/generation.sh
===================================================================
--- incubator/apertium-bel-rus/dev/testvoc/generation.sh	(nonexistent)
+++ incubator/apertium-bel-rus/dev/testvoc/generation.sh	(revision 66830)
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+set -e -u
+
+if [[ $# -eq 1 ]]; then
+    mode=$1
+    dix=guess
+elif  [[ $# -eq 2 ]]; then
+    mode=$1
+    dix=$2
+else
+    cat >&2 <<EOF
+Usage: $0 lang1-lang2
+or:    $0 lang1-lang2 foo.dix
+
+Replaces the first step of the pipeline with the expanded analyser and
+shows the resulting generation errors.
+
+For example, do \`$0 nno-nob' in trunk/apertium-nno-nob/'
+to find generation errors in the nno-nob direction (assumes that
+modes/nno-nob.mode exists). If the source .dix file has a non-standard
+name, you can specify it in the second argument, for example
+\`$0 eng-sco ../apertium-eng_feil/apertium-eng.eng.dix'
+EOF
+    exit 1
+fi
+
+analysis_expansion () {
+    lt-print "$1".automorf.bin \
+        | sed 's/ /@_SPACE_@/g' \
+        | hfst-txt2fst -e ε \
+        | hfst-project -p lower \
+        | hfst-fst2strings  -c0  \
+        | awk -v clb="$2" '
+          /[][$^{}\\]/{next} # skip escaping hell
+          {
+            gsub("]","\\]")
+            esc=$0
+            gsub("/","\\/",esc)
+            gsub("^","\\^",esc)
+            gsub("$","\\$",esc)
+            print "["esc"] ^"$0"$ ^.<sent>"clb"$"
+          }'
+          # give the "disambiguated" output, no forms
+}
+
+split_ambig () {
+    if command -V pypy3 &>/dev/null; then
+        python=pypy3
+    else
+        python=python3
+    fi
+    PYTHONPATH="$(dirname "$0"):${PYTHONPATH:-}" "${python}" -c '
+from streamparser import parse_file, readingToString
+import sys
+for blank, lu in parse_file(sys.stdin, withText=True):
+    print(blank+" ".join("^{}/{}$".format(lu.wordform, readingToString(r))
+                         for r in lu.readings),
+          end="")'
+
+}
+
+mode_after_analysis ()
+{
+    eval $(grep '|' "$1" |\
+                  sed 's/[^|]*|//' |\
+                  sed 's/.*apertium-pretransfer/apertium-pretransfer/' |\
+                  sed 's/lt-proc -p[^|]*/cat/' |\
+                  sed 's/autobil.bin *|/& split_ambig |/' |\
+                  sed 's/\$1/-d/g;s/\$2//g')
+    # lt-proc -p fails
+}
+
+only_errs () {
+    grep '][^<]*[#/]'
+}
+
+
+lang1=${mode%%-*}
+
+if [[ ${dix} = guess ]]; then
+    lang1dir=$(grep -m1 "^AP_SRC.*apertium-${lang1}" config.log | sed "s/^[^=]*='//;s/'$//")
+    dix=${lang1dir}/apertium-${lang1}.${lang1}.dix
+fi
+
+clb=""
+case ${lang1} in
+    nno|nob) clb="<clb>" ;;
+esac
+
+# Make it possible to edit the .dix while testvoc is running:
+dixtmp=$(mktemp -t gentestvoc.XXXXXXXXXXX)
+trap 'rm -f "${dixtmp}"' EXIT
+cat "${dix}" > "${dixtmp}"
+
+analysis_expansion "${mode}" "${clb}" \
+    | mode_after_analysis modes/"${mode}".mode \
+    | only_errs
Index: incubator/apertium-bel-rus/dev/testvoc/streamparser.py
===================================================================
--- incubator/apertium-bel-rus/dev/testvoc/streamparser.py	(nonexistent)
+++ incubator/apertium-bel-rus/dev/testvoc/streamparser.py	(revision 66830)
@@ -0,0 +1,143 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+"""
+Usage: streamparser.py [FILE]
+
+Consumes input from a file (first argument) or stdin, parsing and pretty printing the readings of lexical units found.
+"""
+
+import re, pprint, sys, itertools, fileinput
+from collections import namedtuple
+
+SReading = namedtuple('SReading', ['baseform', 'tags'])
+
+def subreadingToString(sub):
+    return sub.baseform+"".join("<"+t+">" for t in sub.tags)
+
+def readingToString(reading):
+    return "+".join(subreadingToString(sub) for sub in reading)
+
+def mainpos(reading, ltr=False):
+    """Return the first part-of-speech tag of a reading. If there are
+    several subreadings, by default give the first tag of the last
+    subreading. If ltr=True, give the first tag of the first
+    subreading, see
+    http://beta.visl.sdu.dk/cg3/single/#sub-stream-apertium for more
+    information.
+
+    """
+    if ltr:
+        return reading[0].tags[0]
+    else:
+        return reading[-1].tags[0]
+
+class LexicalUnit:
+
+    """A lexical unit consisting of a lemma and its readings.
+
+    Attributes:
+        lexicalUnit (str): The lexical unit in Apertium stream format.
+        wordform (str): The word form (surface form) of the lexical unit.
+        readings (list of list of SReading): The analyses of the lexical unit with sublists containing all subreadings.
+    """
+
+    def __init__(self, lexicalUnit):
+        self.lexicalUnit = lexicalUnit
+
+        cohort = re.split(r'(?<!\\)/', lexicalUnit)
+        self.wordform = cohort[0]
+        readings = cohort[1:]
+
+        self.readings = []
+        for reading in readings:
+            if len(reading) < 1:
+                print("Couldn't parse {}".format(self.lexicalUnit), file=sys.stderr)
+            elif reading[0] not in '*#@':
+                subreadings = []
+
+                subreadingParts = re.findall(r'([^<]+)((?:<[^>]+>)+)', reading)
+                for subreading in subreadingParts:
+                    baseform = subreading[0].lstrip('+')
+                    tags = re.findall(r'<([^>]+)>', subreading[1])
+
+                    subreadings.append(SReading(baseform=baseform, tags=tags))
+
+                self.readings.append(subreadings)
+
+    def __repr__(self):
+        return self.lexicalUnit
+
+
+def parse(stream, withText=False):
+    """Generates lexical units from a character stream.
+
+    Args:
+        stream (iterable): A character stream containing lexical units, superblanks and other text.
+        withText (bool, optional): A boolean defining whether to output preceding text with each lexical unit.
+
+    Yields:
+        LexicalUnit: The next lexical unit found in the character stream. (if withText is False)
+        (str, LexicalUnit): The next lexical unit found in the character stream and the the text that seperated it from the prior unit in a tuple. (if withText is True)
+    """
+
+    buffer = ''
+    textBuffer = ''
+    inLexicalUnit = False
+    inSuperblank = False
+
+    for char in stream:
+
+        if inSuperblank:
+            if char == ']':
+                inSuperblank = False
+                textBuffer += char
+            elif char == '\\':
+                textBuffer += char
+                textBuffer += next(stream)
+            else:
+                textBuffer += char
+        elif inLexicalUnit:
+            if char == '$':
+                if withText:
+                    yield (textBuffer, LexicalUnit(buffer))
+                else:
+                    yield LexicalUnit(buffer)
+                buffer = ''
+                textBuffer = ''
+                inLexicalUnit = False
+            elif char == '\\':
+                buffer += char
+                buffer += next(stream)
+            else:
+                buffer += char
+        else:
+            if char == '[':
+                inSuperblank = True
+                textBuffer += char
+            elif char == '^':
+                inLexicalUnit = True
+            elif char == '\\':
+                textBuffer += char
+                textBuffer += next(stream)
+            else:
+                textBuffer += char
+
+
+def parse_file(f, withText=False):
+    """Generates lexical units from a file.
+
+    Args:
+        f (file): A file containing lexical units, superblanks and other text.
+
+    Yields:
+        LexicalUnit: The next lexical unit found in the file.
+    """
+
+    return parse(itertools.chain.from_iterable(f), withText)
+
+
+if __name__ == '__main__':
+    lexicalUnits = parse_file(fileinput.input())
+
+    for lexicalUnit in lexicalUnits:
+        pprint.pprint(lexicalUnit.readings, width=120)
Index: incubator/apertium-bel-rus/apertium-bel-rus.bel-rus.t1x
===================================================================
--- incubator/apertium-bel-rus/apertium-bel-rus.bel-rus.t1x	(revision 66827)
+++ incubator/apertium-bel-rus/apertium-bel-rus.bel-rus.t1x	(revision 66830)
@@ -152,6 +152,7 @@
   <def-attr n="a_num">
      <attr-item tags="num"/>
      <attr-item tags="num.coll"/>
+     <attr-item tags="num.percent"/> <!-- numbers with percent -->
      <attr-item tags="n"/> <!-- for кількадесят-->
   </def-attr>
 
Index: incubator/apertium-bel/apertium-bel.bel.dix
===================================================================
--- incubator/apertium-bel/apertium-bel.bel.dix	(revision 66827)
+++ incubator/apertium-bel/apertium-bel.bel.dix	(revision 66830)
@@ -86,6 +86,7 @@
     <sdef n="lquot"      c="Left quote"/>
     <sdef n="rquot"      c="Right quote"/>
     <sdef n="guio"       c="Hyphen"/>
+    <sdef n="percent"    c="Percent"/>
 
   </sdefs>
   <pardefs>
@@ -93,7 +94,7 @@
 
     <pardef n="numeros">
       <e><re>[0-9]+([.,][0-9]+)?</re><p><l></l><r><s n="num"/></r></p></e>
-      <e><re>[0-9]+([.,][0-9]+)? ?%</re><p><l></l><r><s n="num"/></r></p></e>
+      <e><re>[0-9]+([.,][0-9]+)? ?%</re><p><l></l><r><s n="num"/><s n="percent"/></r></p></e>
       <e><re>[0-9]+([.,][0-9]+)?</re><i>-</i><re>[0-9]+([.,][0-9]+)?%?</re><p><l></l><r><s n="num"/></r></p></e>
       <e><re>[0-9]+([.,][0-9]+)?[:.][0-9]+([.,][0-9]+)?</re><p><l></l><r><s n="num"/></r></p></e>
     </pardef>