CofeehousePy/services/corenlp/scripts/lexparser/run-tb-preproc

66 lines
1.9 KiB
Python

#!/usr/bin/env python
#
# Convenience script for running
# edu.stanford.nlp.trees.treebank.TreebankPreprocessor.
#
# This package automatically generates the Arabic and French
# parser training data from the respective source distributions.
#
# See the README for more details.
#
# author: Spence Green
##############################
import sys
from optparse import OptionParser
import os
import subprocess
from time import sleep
def run_treebank_pipeline(opts,conf_file):
cmd_line = 'java -Xmx%s -Xms%s edu.stanford.nlp.trees.treebank.TreebankPreprocessor' % (opts.jmem,opts.jmem)
if opts.verbose:
cmd_line = cmd_line + ' -v'
if opts.extra:
cmd_line = cmd_line + ' ' + opts.extra
if opts.output_path:
cmd_line = cmd_line + ' -p ' + opts.output_path
cmd_line = cmd_line + ' ' + conf_file
p = call_command(cmd_line)
while p.poll() == None:
out_str = p.stdout.readline()
if out_str != '':
print out_str[:-1]
# TODO: this will not handle spaces in the input or output paths
def call_command(command):
process = subprocess.Popen(command.split(' '), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
return process
def main():
usage = 'usage: %prog [opts] conf_file'
parser = OptionParser(usage=usage)
parser.add_option('-m','--java-mem',dest='jmem',default='500m',help='Set JVM memory heap size (e.g. 500m)')
parser.add_option('-v','--verbose',dest='verbose',action='store_true',default=False,help='Verbose mode')
parser.add_option('-o','--options',dest='extra',help='Pass options directly to TreebankPreprocessor')
parser.add_option('-p','--output-path',dest='output_path',help="Destination directory for the output")
(opts,args) = parser.parse_args()
if len(args) != 1:
parser.print_help()
sys.exit(-1)
conf_file = args[0]
run_treebank_pipeline(opts,conf_file)
if __name__ == '__main__':
main()