52 lines
1.5 KiB
Bash
52 lines
1.5 KiB
Bash
|
#!/usr/bin/env bash
|
||
|
#
|
||
|
# Defines standard configurations for parsing with the
|
||
|
# multilingual parsers (Arabic, Chinese, German, French).
|
||
|
#
|
||
|
# For English, it is easier to use lexparser.sh, although you can load
|
||
|
# an English grammar with this script.
|
||
|
#
|
||
|
# For details on the language-specific options, see the javadocs and
|
||
|
# lexparser_lang.def.
|
||
|
#
|
||
|
|
||
|
# Memory limit
|
||
|
mem=3g
|
||
|
|
||
|
if [ ! $# -ge 5 ]; then
|
||
|
echo Usage: `basename $0` lang len grammar out_file FILE...
|
||
|
echo
|
||
|
echo ' lang : Language to parse (Arabic, English, Chinese, German, French)'
|
||
|
echo ' len : Maximum length of the sentences to parse'
|
||
|
echo ' grammar : Serialized grammar file (look in the models jar)'
|
||
|
echo ' out_file : Prefix for the output filename'
|
||
|
echo ' FILE : List of files to parse'
|
||
|
echo
|
||
|
echo 'To set additional parser options, modify parse_opts in lexparser_lang.def'
|
||
|
echo
|
||
|
echo 'Parser memory limit is currently:' "$mem"
|
||
|
echo
|
||
|
exit
|
||
|
fi
|
||
|
|
||
|
# Setup command-line options
|
||
|
lang=$1
|
||
|
len=$2
|
||
|
grammar=$3
|
||
|
out_file=$4
|
||
|
|
||
|
shift 4
|
||
|
|
||
|
# Language-specific configuration
|
||
|
scriptdir=`dirname $0`
|
||
|
source $scriptdir/lexparser_lang.def
|
||
|
|
||
|
# Setting classpath
|
||
|
CLASSPATH="$CLASSPATH":"$scriptdir/*"
|
||
|
|
||
|
# Run the Stanford parser
|
||
|
java -Xmx"$mem" -cp "$CLASSPATH" edu.stanford.nlp.parser.lexparser.LexicalizedParser -maxLength "$len" \
|
||
|
-tLPP "$tlp" $lang_opts $parse_opts -writeOutputFiles \
|
||
|
-outputFilesExtension "$out_file"."$len".stp -outputFormat "penn" \
|
||
|
-outputFormatOptions "removeTopBracket,includePunctuationDependencies" -loadFromSerializedFile $grammar $*
|