Index: incubator/apertium-fao-nor/apertium-fao-nor.fao-nor.dix
===================================================================
--- incubator/apertium-fao-nor/apertium-fao-nor.fao-nor.dix (revision 79333)
+++ incubator/apertium-fao-nor/apertium-fao-nor.fao-nor.dix (revision 79334)
@@ -980,8 +980,8 @@
eldurild
eldureld
einginingen
-einen
-einein
+einen
+einein
egjeg
egeg
EffersøeEffersøe
Index: incubator/apertium-fao-nor/generation.sh
===================================================================
--- incubator/apertium-fao-nor/generation.sh (nonexistent)
+++ incubator/apertium-fao-nor/generation.sh (revision 79334)
@@ -0,0 +1,206 @@
+#!/bin/bash
+
+set -e -u
+
+# You can override the below variables by doing e.g.
+# $ export CYCLES=1 BLOCK=1M
+# before running this script.
+
+# How many times to follow cycle when expanding with --hfst; gets slow if too high:
+declare -ir CYCLES=${CYCLES-0}
+# How many parallel pipelines to run (requires GNU parallel installed;
+# only worth increasing if CPU's are not saturated and there's free
+# RAM while running):
+declare -ir J=${J-1}
+# How much data to translate before restarting the pipeline (some
+# pipelines have memory leaks and need restarting every so often):
+declare -r BLOCK=${BLOCK:-100M}
+
+
+if [[ $# -ge 1 && $1 = --hfst ]]; then
+ HFST=true
+ shift
+else
+ HFST=false
+fi
+
+if [[ $# -eq 1 ]]; then
+ mode=$1
+ dix=guess
+elif [[ $# -eq 2 ]]; then
+ mode=$1
+ dix=$2
+else
+ cat >&2 <]:' '
+ /:<:/ {next}
+ $2 ~ /|DUE_TO_LT_PROC_HANG|__REGEXP__/ {next}
+ {
+ esc=$2
+ gsub("/","\\/",esc)
+ gsub("^","\\^",esc)
+ gsub("$","\\$",esc)
+ print "["esc"] ^"$1"/"$2"$ ^./."clb"$"
+ }'
+}
+
+analyser_to_hfst () {
+ case "$(head -c4 "$1")" in
+ HFST)
+ hfst-fst2fst -t "$1"
+ ;;
+ *) # lttoolbox bin's start with their 's :(
+ lt-print "$1" \
+ | sed 's/ /@_SPACE_@/g' \
+ | hfst-txt2fst -e ε
+ ;;
+ esac
+}
+
+analysis_expansion_hfst () {
+ analyser_to_hfst "$1" \
+ | hfst-project -p lower \
+ | hfst-fst2strings -c"${CYCLES}" \
+ | awk -v clb="$2" '
+ /[][$^{}\\]/{next} # skip escaping hell
+ /|DUE_TO_LT_PROC_HANG|__REGEXP__/ {next}
+ {
+ gsub("]","\\]")
+ esc=$0
+ gsub("/","\\/",esc)
+ gsub("^","\\^",esc)
+ gsub("$","\\$",esc)
+ print "["esc"] ^"$0"$ ^."clb"$"
+ }'
+ # give the "disambiguated" output, no forms
+}
+
+only_errs () {
+ if [[ $# -ge 1 && $1 = --no-@ ]]; then
+ atfilter () { grep -v '].*/@'; }
+ else
+ atfilter () { cat; }
+ fi
+ # turn escaped SOLIDUS into DIVISION SLASH, so we don't grep correct stuff ("A/S" is a possible lemma)
+ sed 's%\\/%∕%g' |\
+ atfilter |\
+ grep '][^<]*[#/]'
+}
+
+run_mode () {
+ if command -V parallel &>/dev/null; then
+ parallel -j"$J" --pipe --block "${BLOCK}" -- bash "$@"
+ else
+ bash "$@"
+ fi
+}
+
+declare -a TMPFILES
+cleanup () {
+ for f in "${TMPFILES[@]}"; do
+ rm -f "$f"
+ done
+}
+trap 'cleanup' EXIT
+
+
+PYTHONPATH="$(dirname "$0"):${PYTHONPATH:-}"
+export PYTHONPATH
+if command -V pypy3 &>/dev/null; then
+ python=pypy3
+else
+ python=python3
+fi
+split_ambig=$(mktemp -t gentestvoc.XXXXXXXXXXX)
+TMPFILES+=("${split_ambig}")
+cat >"${split_ambig}" < "${mode_after_analysis}"
+
+mode_after_tagger=$(mktemp -t gentestvoc.XXXXXXXXXXX)
+TMPFILES+=("${mode_after_tagger}")
+grep '|' modes/"${mode}".mode \
+ | sed 's/[^|]*|//' \
+ | sed 's/.*apertium-pretransfer/apertium-pretransfer/' \
+ | sed 's/lt-proc -p[^|]*/cat/' \
+ | sed "s%autobil.bin'* *|%& ${split_ambig} |%" \
+ | sed 's/\$1/-d/g;s/\$2//g' \
+ > "${mode_after_tagger}"
+# lt-proc -p fails, that's why we remove that
+
+
+lang1=${mode%%-*}
+
+clb=""
+case ${lang1} in
+ nno|nob) clb="" ;;
+esac
+
+if $HFST; then
+ if [[ ${dix} = guess ]]; then
+ dix=$(xmllint --xpath "string(/modes/mode[@name = '${mode}']/pipeline/program[1]/file[1]/@name)" modes.xml)
+ fi
+ analysis_expansion_hfst "${dix}" "${clb}" \
+ | run_mode "${mode_after_tagger}" \
+ | only_errs
+else
+ if [[ ${dix} = guess ]]; then
+ lang1dir=$(grep -m1 "^AP_SRC.*apertium-${lang1}" config.log | sed "s/^[^=]*='//;s/'$//")
+ dix=${lang1dir}/apertium-${lang1}.${lang1}.dix
+ fi
+ # Make it possible to edit the .dix while testvoc is running:
+ dixtmp=$(mktemp -t gentestvoc.XXXXXXXXXXX)
+ TMPFILES+=("${dixtmp}")
+ cat "${dix}" > "${dixtmp}"
+ analysis_expansion "${dixtmp}" "${clb}" \
+ | run_mode "${mode_after_analysis}" \
+ | only_errs --no-@
+fi
Property changes on: incubator/apertium-fao-nor/generation.sh
___________________________________________________________________
Added: svn:executable
## -0,0 +1 ##
+*
\ No newline at end of property