# -*- sh-basic-offset: 2 -*-
# Copyright (C) 2005 Universitat d'Alacant / Universidad de Alicante
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see .
message ()
{
echo "USAGE: $(basename $0) [-d datadir] [-f format] [-u] [in [out]]"
echo " -d datadir directory of linguistic data"
echo " -f format one of: txt (default), html, rtf, odt, docx, wxml, xlsx, pptx,"
echo " xpresstag, html-noent, latex, latex-raw"
echo " -a display ambiguity"
echo " -u don't display marks '*' for unknown words"
echo " -n don't insert period before possible sentence-ends"
echo " -m memory.tmx use a translation memory to recycle translations"
echo " -o direction translation direction using the translation memory,"
echo " by default 'direction' is used instead"
echo " -l lists the available translation directions and exits"
echo " direction typically, LANG1-LANG2, but see modes.xml in language data"
echo " in input file (stdin by default)"
echo " out output file (stdout by default)"
exit 1
}
list_directions ()
{
for mode in "$DATADIR"/modes/*.mode; do
echo " $(basename "${mode%%.mode}")"
done
}
locale_utf8 ()
{
export LC_CTYPE=$(locale -a|grep -i "utf[.]*8"|head -1);
if [ LC_CTYPE = "" ]; then
echo "Error: Install an UTF-8 locale in your system";
exit 1;
fi
}
locale_latin1 ()
{
export LC_CTYPE=$(locale -a|grep -i -e "8859-1" -e "@euro"|head -1);
if [ LC_CTYPE = "" ]; then
echo "Error: Install a Latin-1 locale in your system";
exit 1;
fi
}
test_zip ()
{
if [ "$(which zip)" = "" ]; then
echo "Error: Install 'zip' command in your system";
exit 1;
fi
if [ "$(which unzip)" = "" ]; then
echo "Error: Install 'unzip' command in your system";
exit 1;
fi
}
test_gawk ()
{
GAWK=$(which gawk)
if [ "$GAWK" = "" ]; then
echo "Error: Install 'gawk' in your system"
exit 1
fi
}
translate_latex()
{
test_gawk
if [ "$INFILE" = "" -o "$INFILE" = /dev/stdin ]; then
INFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
cat > "$INFILE"
BORRAFICHERO="true"
fi
if [ "$(file -b --mime-encoding "$INFILE")" == "utf-8" ]; then
locale_latin1
else locale_utf8
fi
"$APERTIUM_PATH/apertium-prelatex" "$INFILE" | \
"$APERTIUM_PATH/apertium-utils-fixlatex" | \
"$APERTIUM_PATH/apertium-deslatex" ${FORMAT_OPTIONS} | \
if [ "$TRANSLATION_MEMORY_FILE" = "" ];
then cat;
else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
fi | \
if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
fi | \
"$APERTIUM_PATH/apertium-relatex"| \
awk '{gsub("", ""); print;}' | \
if [ "$REDIR" == "" ]; then "$APERTIUM_PATH/apertium-postlatex-raw"; else "$APERTIUM_PATH/apertium-postlatex-raw" > "$SALIDA"; fi
if [ "$BORRAFICHERO" = "true" ]; then
rm -Rf "$INFILE"
fi
}
translate_latex_raw()
{
test_gawk
if [ "$INFILE" = "" -o "$INFILE" = /dev/stdin ]; then
INFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
cat > "$INFILE"
BORRAFICHERO="true"
fi
if [ "$(file -b --mime-encoding "$INFILE")" = "utf-8" ]; then
locale_latin1
else locale_utf8
fi
"$APERTIUM_PATH/apertium-prelatex" "$INFILE" | \
"$APERTIUM_PATH/apertium-utils-fixlatex" | \
"$APERTIUM_PATH/apertium-deslatex" ${FORMAT_OPTIONS} | \
if [ "$TRANSLATION_MEMORY_FILE" = "" ];
then cat;
else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
fi | \
if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
fi | \
"$APERTIUM_PATH/apertium-relatex"| \
awk '{gsub("", ""); print;}' | \
if [ "$REDIR" == "" ]; then "$APERTIUM_PATH/apertium-postlatex-raw"; else "$APERTIUM_PATH/apertium-postlatex-raw" > "$SALIDA"; fi
}
translate_odt ()
{
INPUT_TMPDIR=$(mktemp -d "$TMPDIR/apertium.XXXXXXXX")
locale_utf8
test_zip
if [ "$INFILE" = "" ]; then
INFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
cat > "$INFILE"
BORRAFICHERO="true"
fi
OTRASALIDA=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
unzip -q -o -d "$INPUT_TMPDIR" "$INFILE"
find "$INPUT_TMPDIR" | grep "content\\.xml\\|styles\\.xml" |\
awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
"$APERTIUM_PATH/apertium-desodt" ${FORMAT_OPTIONS} |\
if [ "$TRANSLATION_MEMORY_FILE" = "" ];
then cat;
else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
fi | \
if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
fi | \
"$APERTIUM_PATH/apertium-reodt"|\
awk '{punto = index($0, "/>") + 3; cabeza = substr($0, 1, punto-1); cola = substr($0, punto); n1 = substr(cabeza, index(cabeza, "\"")+1); name = substr(n1, 1, index(n1, "\"")-1); gsub("[?]> ", "?>\n", cola); print cola > name;}'
VUELVE=$(pwd)
cd "$INPUT_TMPDIR"
rm -Rf ObjectReplacements
zip -q -r - . >"$OTRASALIDA"
cd "$VUELVE"
rm -Rf "$INPUT_TMPDIR"
if [ "$BORRAFICHERO" = "true" ]; then
rm -Rf "$INFILE";
fi
if [ "$REDIR" == "" ]; then cat "$OTRASALIDA"; else cat "$OTRASALIDA" > "$SALIDA"; fi
rm -Rf "$OTRASALIDA"
rm -Rf "$TMCOMPFILE"
}
translate_docx ()
{
INPUT_TMPDIR=$(mktemp -d "$TMPDIR/apertium.XXXXXXXX")
locale_utf8
test_zip
if [ "$INFILE" = "" ]; then
INFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
cat > "$INFILE"
BORRAFICHERO="true"
fi
OTRASALIDA=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
if [ "$UWORDS" = "no" ]; then
OPCIONU="-u";
else OPCIONU="";
fi
unzip -q -o -d "$INPUT_TMPDIR" "$INFILE"
for i in $(find "$INPUT_TMPDIR"|grep "xlsx$");
do LOCALTEMP=$(mktemp "$TMPDIR/apertium.XXXXXXXX");
"$APERTIUM_PATH/apertium" -f xlsx -d "$DATADIR" "$OPCIONU" "$PAIR" <"$i" >"$LOCALTEMP";
cp "$LOCALTEMP" "$i";
rm "$LOCALTEMP";
done;
find "$INPUT_TMPDIR" | grep "xml" |\
grep -v -i \\\(settings\\\|theme\\\|styles\\\|font\\\|rels\\\|docProps\\\) |\
awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
"$APERTIUM_PATH/apertium-deswxml" ${FORMAT_OPTIONS} |\
if [ "$TRANSLATION_MEMORY_FILE" = "" ];
then cat;
else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
fi | \
if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
fi | \
"$APERTIUM_PATH/apertium-rewxml"|\
awk '{punto = index($0, "/>") + 3; cabeza = substr($0, 1, punto-1); cola = substr($0, punto); n1 = substr(cabeza, index(cabeza, "\"")+1); name = substr(n1, 1, index(n1, "\"")-1); gsub("[?]> ", "?>\n", cola); print cola > name;}'
VUELVE=$(pwd)
cd "$INPUT_TMPDIR"
zip -q -r - . >"$OTRASALIDA"
cd "$VUELVE"
rm -Rf "$INPUT_TMPDIR"
if [ "$BORRAFICHERO" = "true" ]; then
rm -Rf "$INFILE";
fi
if [ "$REDIR" == "" ]; then cat "$OTRASALIDA"; else cat "$OTRASALIDA" > "$SALIDA"; fi
rm -Rf "$OTRASALIDA"
rm -Rf "$TMCOMPFILE"
}
translate_pptx ()
{
INPUT_TMPDIR=$(mktemp -d "$TMPDIR/apertium.XXXXXXXX")
locale_utf8
test_zip
if [ "$INFILE" = "" ]; then
INFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
cat > "$INFILE"
BORRAFICHERO="true"
fi
OTRASALIDA=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
if [ "$UWORDS" = "no" ]; then
OPCIONU="-u";
else OPCIONU="";
fi
unzip -q -o -d "$INPUT_TMPDIR" "$INFILE"
for i in $(find "$INPUT_TMPDIR"|grep "xlsx$"); do
LOCALTEMP=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
"$APERTIUM_PATH/apertium" -f xlsx -d "$DATADIR" "$OPCIONU" "$PAIR" <"$i" >"$LOCALTEMP";
cp "$LOCALTEMP" "$i"
rm "$LOCALTEMP"
done;
find "$INPUT_TMPDIR" | grep "xml$" |\
grep "slides\/slide" |\
awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
"$APERTIUM_PATH/apertium-despptx" ${FORMAT_OPTIONS} |\
if [ "$TRANSLATION_MEMORY_FILE" = "" ];
then cat;
else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
fi | \
if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
fi | \
"$APERTIUM_PATH/apertium-repptx" |\
awk '{punto = index($0, "/>") + 3; cabeza = substr($0, 1, punto-1); cola = substr($0, punto); n1 = substr(cabeza, index(cabeza, "\"")+1); name = substr(n1, 1, index(n1, "\"")-1); gsub("[?]> ", "?>\n", cola); print cola > name;}'
VUELVE=$(pwd)
cd "$INPUT_TMPDIR"
zip -q -r - . >"$OTRASALIDA"
cd "$VUELVE"
rm -Rf "$INPUT_TMPDIR"
if [ "$BORRAFICHERO" = "true" ]; then
rm -Rf "$INFILE";
fi
if [ "$REDIR" == "" ]; then cat "$OTRASALIDA"; else cat "$OTRASALIDA" > "$SALIDA"; fi
rm -Rf "$OTRASALIDA"
rm -Rf "$TMCOMPFILE"
}
translate_xlsx ()
{
INPUT_TMPDIR=$(mktemp -d "$TMPDIR/apertium.XXXXXXXX")
locale_utf8
test_zip
if [ "$INFILE" = "" ]; then
INFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
cat > "$INFILE"
BORRAFICHERO="true"
fi
OTRASALIDA=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
unzip -q -o -d "$INPUT_TMPDIR" "$INFILE"
find "$INPUT_TMPDIR" | grep "sharedStrings.xml" |\
awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
"$APERTIUM_PATH/apertium-desxlsx" ${FORMAT_OPTIONS} |\
if [ "$TRANSLATION_MEMORY_FILE" = "" ];
then cat;
else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
fi | \
if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
fi | \
"$APERTIUM_PATH/apertium-rexlsx" |\
awk '{punto = index($0, "/>") + 3; cabeza = substr($0, 1, punto-1); cola = substr($0, punto); n1 = substr(cabeza, index(cabeza, "\"")+1); name = substr(n1, 1, index(n1, "\"")-1); gsub("[?]> ", "?>\n", cola); print cola > name;}'
VUELVE=$(pwd)
cd "$INPUT_TMPDIR"
zip -q -r - . >"$OTRASALIDA"
cd "$VUELVE"
rm -Rf "$INPUT_TMPDIR"
if [ "$BORRAFICHERO" = "true" ]; then
rm -Rf "$INFILE";
fi
if [ "$REDIR" == "" ]; then cat "$OTRASALIDA"; else cat "$OTRASALIDA" > "$SALIDA"; fi
rm -Rf "$OTRASALIDA"
rm -Rf "$TMCOMPFILE"
}
translate_htmlnoent ()
{
"$APERTIUM_PATH/apertium-deshtml" ${FORMAT_OPTIONS} "$INFILE" | \
if [ "$TRANSLATION_MEMORY_FILE" = "" ]; then
cat
else "$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE";
fi | if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
else "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
fi | if [ "$FORMAT" = "none" ]; then
if [ "$REDIR" == "" ]; then cat; else cat > "$SALIDA"; fi
else if [ "$REDIR" == "" ]; then "$APERTIUM_PATH/apertium-rehtml-noent"; else "$APERTIUM_PATH/apertium-rehtml-noent" > "$SALIDA"; fi
fi
rm -Rf "$TMCOMPFILE"
}
##########################################################
# Option and argument parsing, setting globals variables #
##########################################################
PATH="${APERTIUM_PATH}:${PATH}"
[[ -z $TMPDIR ]] && TMPDIR=/tmp
TMCOMPFILE=$(mktemp "$TMPDIR/apertium.XXXXXXXX")
trap 'rm -Rf "$TMCOMPFILE"' EXIT
# Default values, may be overridden below:
PAIR=""
INFILE="/dev/stdin"
FORMAT="txt"
DATADIR=$DEFAULT_DIRECTORY
TRANSLATION_MEMORY_DIRECTION=$PAIR
LIST_MODES_AND_EXIT=false
FORMAT_OPTIONS=""
# Skip (but store) non-option arguments that come before options:
declare -a ARGS_PREOPT
declare -i OPTIND=1
while [[ $OPTIND -le $# ]]; do
arg=${@:$OPTIND:1}
case $arg in
-*) break ;;
*) ARGS_PREOPT+=($arg); (( OPTIND++ )) ;;
esac
done
while getopts ":uahlf:d:m:o:n" opt; do
case "$opt" in
f) FORMAT=$OPTARG ;;
d) DATADIR=$OPTARG ;;
m) TRANSLATION_MEMORY_FILE=$OPTARG ;;
o) TRANSLATION_MEMORY_DIRECTION=$OPTARG ;;
u) UWORDS="no" ;;
n) FORMAT_OPTIONS="-n" ;;
a) OPTION_TAGGER="-m" ;;
l) LIST_MODES_AND_EXIT=true ;;
h) message ;;
\?) echo "ERROR: Unknown option $OPTARG" >&2; message >&2 ;;
:) echo "ERROR: $OPTARG requires an argument" >&2; message >&2 ;;
esac
done
shift $(($OPTIND-1))
if $LIST_MODES_AND_EXIT; then list_directions; exit 0; fi
# Restore non-option arguments that came before options back into arg list:
set -- "${ARGS_PREOPT[@]}" "$@"
case "$#" in
3)
SALIDA=$3
REDIR=">"
INFILE=$2
PAIR=$1
if [[ ! -e "$INFILE" ]]; then
echo "Error: file '$INFILE' not found." >&2
message >&2
fi
;;
2)
INFILE=$2
PAIR=$1
if [[ ! -e "$INFILE" ]]; then
echo "Error: file '$INFILE' not found." >&2
message >&2
fi
;;
1)
PAIR=$1
;;
*)
message >&2
;;
esac
if [[ -n $TRANSLATION_MEMORY_FILE ]]; then
"$APERTIUM_PATH/lt-tmxcomp" "$TRANSLATION_MEMORY_DIRECTION" "$TRANSLATION_MEMORY_FILE" "$TMCOMPFILE" >/dev/null
if [ "$?" != "0" ]; then
echo "Error: Cannot compile TM '$TRANSLATION_MEMORY_FILE'" >&2
echo" hint: use -o parameter" >&2
message >&2
fi
fi
if [[ ! -d "$DATADIR/modes" ]]; then
echo "Error: Directory '$DATADIR/modes' does not exist." >&2
message >&2
fi
if [[ ! -e "$DATADIR/modes/$PAIR.mode" ]]; then
echo -n "Error: Mode $PAIR does not exist"
c=$(find "$DATADIR/modes"|wc -l)
if [ "$c" -le 1 ]; then
echo "."
else
echo ". Try one of:"
list_directions
fi
exit 1
fi
#Parametro opcional, de no estar, lee de la entrada estandar (stdin)
case "$FORMAT" in
none)
if [ "$UWORDS" = "no" ]; then
OPTION="-n";
else OPTION="-g";
fi
;;
txt|rtf|html|xpresstag|mediawiki)
if [ "$UWORDS" = "no" ]; then OPTION="-n";
else OPTION="-g";
fi;
;;
rtf)
if [ "$UWORDS" = "no" ]; then OPTION="-n";
else OPTION="-g";
fi;
MILOCALE=$(locale -a|grep -i -v "utf\|^C$\|^POSIX$"|head -1);
if [ "$MILOCALE" = "" ]; then
echo "Error: Install a ISO-8859-1 compatible locale in your system";
exit 1;
fi
export LC_CTYPE=$MILOCALE
;;
odt)
if [ "$UWORDS" = "no" ]; then OPTION="-n";
else OPTION="-g";
fi;
translate_odt
exit 0
;;
latex)
if [ "$UWORDS" = "no" ]; then OPTION="-n";
else OPTION="-g";
fi;
translate_latex
exit 0
;;
latex-raw)
if [ "$UWORDS" = "no" ]; then OPTION="-n";
else OPTION="-g";
fi;
translate_latex_raw
exit 0
;;
docx)
if [ "$UWORDS" = "no" ]; then OPTION="-n";
else OPTION="-g";
fi;
translate_docx
exit 0
;;
xlsx)
if [ "$UWORDS" = "no" ]; then OPTION="-n";
else OPTION="-g";
fi;
translate_xlsx
exit 0
;;
pptx)
if [ "$UWORDS" = "no" ]; then OPTION="-n";
else OPTION="-g";
fi;
translate_pptx
exit 0
;;
html-noent)
if [ "$UWORDS" = "no" ]; then OPTION="-n";
else OPTION="-g";
fi;
translate_htmlnoent
exit 0
;;
wxml)
if [ "$UWORDS" = "no" ]; then OPTION="-n";
else OPTION="-g";
fi;
locale_utf8
;;
txtu)
FORMAT="txt";
OPTION="-n"
;;
htmlu)
FORMAT="html";
OPTION="-n";
;;
xpresstagu)
FORMAT="xpresstag";
OPTION="-n";
;;
rtfu)
FORMAT="rtf";
OPTION="-n";
MILOCALE=$(locale -a|grep -i -v "utf\|^C$\|^POSIX$"|head -1);
if [ "$MILOCALE" = "" ]; then
echo "Error: Install a ISO-8859-1 compatible locale in your system";
exit 1;
fi
export LC_CTYPE=$MILOCALE
;;
odtu)
OPTION="-n"
translate_odt
exit 0
;;
docxu)
OPTION="-n"
translate_docx
exit 0
;;
xlsxu)
OPTION="-n"
translate_xlsx
exit 0
;;
pptxu)
OPTION="-n"
translate_pptx
exit 0
;;
wxmlu)
OPTION="-n";
locale_utf8
;;
*) # Por defecto asumimos txt
FORMAT="txt"
OPTION="-g"
;;
esac
if [ -z "$REF" ]
then
REF=$FORMAT
fi
set -e -o pipefail
if [ "$FORMAT" = "none" ]; then
cat "$INFILE"
else
"$APERTIUM_PATH/apertium-des$FORMAT" ${FORMAT_OPTIONS} "$INFILE"
fi | if [ "$TRANSLATION_MEMORY_FILE" = "" ];
then
cat
else
"$APERTIUM_PATH/lt-tmxproc" "$TMCOMPFILE"
fi | if [ ! -x "$DATADIR/modes/$PAIR.mode" ]; then
sh "$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
else
"$DATADIR/modes/$PAIR.mode" "$OPTION" "$OPTION_TAGGER"
fi | if [ "$FORMAT" = "none" ]; then
if [ "$REDIR" = "" ]; then
cat
else
cat > "$SALIDA"
fi
else
if [ "$REDIR" = "" ]; then
"$APERTIUM_PATH/apertium-re$FORMAT"
else
"$APERTIUM_PATH/apertium-re$FORMAT" > "$SALIDA"
fi
fi