User:Matthias Buchmeier/cross-search

This is a bash script to create dictionaries between two foreign languages (FL), eg. French-Spanish. FL1-FL2 bilingual dictionaries are extracted from en-FL1 and en-FL2 ding-formated dictionaries.

Required programs: bash, sed, grep, sort and uniq.

Code: edit

#!/bin/sh

# script copied and modified/improved from the
# DingMee Translator: http://chrm.info/cms/dingmee-translator
# GNU General Public License either version 3 of the License, or
# (at your option) any later version.

if [ $# -ne 3 ] ; then
    echo "Usage: $0 <DICT1> <DICT2> <OUTPUT-FILE>"
    exit
fi

DICT1=$1
DICT2=$2
OUTPUT=$3

a=0

while read line
    do 
    
# Find in another file
#    lineEN=`echo "$line" | sed "s/\ ::\ .*$//g"`
    lineEN=`echo "$line" | sed -e 's/\ ::\ .*$//g' -e 's/\/.*\/\ //' -e 's/[]]/\\\\]/g' -e 's/[[]/\\\\[/g'`
    termEN=`echo "$lineEN" | sed "s/\ (.*//g"`
    glossEN=`echo "$lineEN" | sed "s/[^(]*//"`
    lineSOURCE=`echo "$line" | sed "s/.*::\ \(.*$\)/\1/g"`

#    echo "[$lineEN]"
#    echo "[$termEN]"
#    echo "[$glossEN]"
#    echo "[$lineSOURCE]"

    TMP=`echo $lineEN | grep SEE`
    if [ "$TMP" != "" ]; then
        echo "--------- SEE --------"
        echo "[$lineEN]"
        echo "[$lineSOURCE]"
        continue
    fi

# gloss-less entries must be excluded
# as they are poor quality 
# and will match any gloss-entry of the same POS
    if [ "$glossEN" = "" ]; then
        echo "--------- NON-GlOSS ENTRY --------"
        echo "[$lineEN]"
        continue
    fi

#    lineDEST=`grep "$lineEN" $DICT2 | sed "s/.*::\ \(.*$\)/\1/g"`
     lineDEST=`grep "$termEN.*$glossEN" $DICT2 | sed "s/.*::\ \(.*$\)/\1/g"`
     

    if [ "$lineDEST" != "" -a "$lineSOURCE" != "" ]; then
        a=$(($a+1));
        echo $a;
        echo "-------- FOUND --------"
        echo "$lineSOURCE :: $lineDEST" >> $OUTPUT
    fi
done < $DICT1

# Cleanup
cat $OUTPUT | grep -v "SEE:" > ${OUTPUT}.new
cat ${OUTPUT}.new | sort | uniq > $OUTPUT
cat $OUTPUT | grep "::" > ${OUTPUT}.new
mv ${OUTPUT}.new $OUTPUT

echo "Ready. Generated file: $OUTPUT"