#!/bin/sh
APERTIUM_PATH="/tmp/v19/bin"
LTTOOLBOX_PATH="/tmp/v19/bin"
DEFAULT_DIRECTORY="/tmp/v19/share/apertium"
PAIR=""
INPUT_FILE="/dev/stdin"
OUTPUT_FILE="/dev/stdout"
[ -z "$TMPDIR" ] && TMPDIR=/tmp
message ()
{
echo "USAGE: $(basename $0) [-f format] [in [out]]"
echo " -f format one of: txt (default), html, rtf, odt, docx, wxml, xlsx, pptx"
echo " in input file (stdin by default)"
echo " out output file (stdout by default)"
exit 1;
}
locale_utf8 ()
{
export LC_CTYPE=$(locale -a|grep -i "utf[.]*8"|head -1);
if [ "$LC_CTYPE" = "" ]
then echo "Error: Install an UTF-8 locale in your system";
exit 1;
fi
}
test_zip ()
{
if [ "$(which zip)" = "" ]
then echo "Error: Install 'zip' command in your system";
exit 1;
fi
if [ "$(which unzip)" = "" ]
then echo "Error: Install 'unzip' command in your system";
exit 1;
fi
}
test_gawk ()
{
GAWK=$(which gawk)
if [ "$GAWK" = "" ]
then echo "Error: Install 'gawk' in your system"
exit 1
fi
}
unformat_latex()
{
test_gawk
if [ "$FICHERO" = "" ]
then FICHERO=$(mktemp $TMPDIR/apertium.XXXXXXXX)
cat > $FICHERO
BORRAFICHERO="true"
fi
$APERTIUM_PATH/apertium-prelatex $FICHERO | \
$APERTIUM_PATH/apertium-utils-fixlatex | \
$APERTIUM_PATH/apertium-deslatex >$SALIDA
if [ "$BORRAFICHERO" = "true" ]
then rm -Rf $FICHERO
fi
}
unformat_odt ()
{
INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX)
locale_utf8
test_zip
unzip -q -o -d $INPUT_TMPDIR $FICHERO
find $INPUT_TMPDIR | grep content\\\.xml |\
awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
$APERTIUM_PATH/apertium-desodt >$SALIDA
rm -Rf $INPUT_TMPDIR
}
unformat_docx ()
{
INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX)
locale_utf8
test_zip
unzip -q -o -d $INPUT_TMPDIR $FICHERO
for i in $(find $INPUT_TMPDIR|grep "xlsx$");
do LOCALTEMP=$(mktemp $TMPDIR/apertium.XXXXXXXX)
$APERTIUM_PATH/apertium -f xlsx -d $DIRECTORY $OPCIONU $PREFIJO <$i >$LOCALTEMP;
cp $LOCALTEMP $i;
rm $LOCALTEMP;
done;
find $INPUT_TMPDIR | grep "xml" |\
grep -v -i \\\(settings\\\|theme\\\|styles\\\|font\\\|rels\\\|docProps\\\) |\
awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
$APERTIUM_PATH/apertium-deswxml >$SALIDA
rm -Rf $INPUT_TMPDIR
}
unformat_pptx ()
{
INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX)
locale_utf8
test_zip
unzip -q -o -d $INPUT_TMPDIR $FICHERO
for i in $(find $INPUT_TMPDIR|grep "xlsx$");
do LOCALTEMP=$(mktemp $TMPDIR/apertium.XXXXXXXX)
$APERTIUM_PATH/apertium -f xlsx -d $DIRECTORY $OPCIONU $PREFIJO <$i >$LOCALTEMP
cp $LOCALTEMP $i
rm $LOCALTEMP
done;
find $INPUT_TMPDIR | grep "xml$" |\
grep "slides\/slide" |\
awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
$APERTIUM_PATH/apertium-despptx >$SALIDA
rm -Rf $INPUT_TMPDIR
}
unformat_xlsx ()
{
INPUT_TMPDIR=$(mktemp -d $TMPDIR/apertium.XXXXXXXX)
locale_utf8
test_zip
unzip -q -o -d $INPUT_TMPDIR $FICHERO
find $INPUT_TMPDIR | grep "sharedStrings.xml" |\
awk '{printf ""; PART = $0; while(getline < PART) printf(" %s", $0); printf("\n");}' |\
$APERTIUM_PATH/apertium-desxlsx >$SALIDA
rm -Rf $INPUT_TMPDIR
}
ARGS=$(getopt "f:" $*)
set -- $ARGS
for i
do
case "$i" in
-f) shift; FORMAT=$1; shift;;
--) shift; break;;
esac
done
case "$#" in
2)
OUTPUT_FILE=$2;
INPUT_FILE=$1;
if [ ! -e $INPUT_FILE ];
then echo "Error: file '$INPUT_FILE' not found."
message;
fi
;;
1)
INPUT_FILE=$1;
if [ ! -e $INPUT_FILE ];
then echo "Error: file '$INPUT_FILE' not found."
message;
fi
;;
0)
;;
*)
message
;;
esac
if [ x$FORMAT = x ]; then FORMAT="txt"; fi
FORMATADOR=$FORMAT;
FICHERO=$INPUT_FILE;
SALIDA=$OUTPUT_FILE;
case "$FORMATADOR" in
rtf)
MILOCALE=$(locale -a|grep -i -v "utf\|^C$\|^POSIX$"|head -1);
if [ "$MILOCALE" = "" ]
then echo "Error: Install a ISO-8859-1 compatible locale in your system";
exit 1;
fi
export LC_CTYPE=$MILOCALE
;;
html-noent)
FORMATADOR="html"
;;
latex)
unformat_latex
exit 0
;;
odt)
unformat_odt
exit 0
;;
docx)
unformat_docx
exit 0
;;
xlsx)
unformat_xlsx
exit 0
;;
pptx)
unformat_pptx
exit 0
;;
wxml)
locale_utf8
;;
*)
;;
esac
$APERTIUM_PATH/apertium-des$FORMATADOR $FICHERO >$SALIDA