2021-01-14 08:07:24 +01:00
|
|
|
#!/usr/bin/env python
|
|
|
|
#
|
|
|
|
# Convenience script for running
|
|
|
|
# edu.stanford.nlp.trees.treebank.TreebankPreprocessor.
|
|
|
|
#
|
|
|
|
# This package automatically generates the Arabic and French
|
|
|
|
# parser training data from the respective source distributions.
|
|
|
|
#
|
|
|
|
# See the README for more details.
|
|
|
|
#
|
|
|
|
# author: Spence Green
|
|
|
|
##############################
|
|
|
|
|
|
|
|
import sys
|
|
|
|
from optparse import OptionParser
|
|
|
|
import os
|
|
|
|
import subprocess
|
|
|
|
from time import sleep
|
|
|
|
|
|
|
|
def run_treebank_pipeline(opts,conf_file):
|
|
|
|
cmd_line = 'java -Xmx%s -Xms%s edu.stanford.nlp.trees.treebank.TreebankPreprocessor' % (opts.jmem,opts.jmem)
|
|
|
|
|
|
|
|
if opts.verbose:
|
|
|
|
cmd_line = cmd_line + ' -v'
|
|
|
|
|
|
|
|
if opts.extra:
|
|
|
|
cmd_line = cmd_line + ' ' + opts.extra
|
|
|
|
|
|
|
|
if opts.output_path:
|
|
|
|
cmd_line = cmd_line + ' -p ' + opts.output_path
|
|
|
|
|
|
|
|
cmd_line = cmd_line + ' ' + conf_file
|
|
|
|
|
|
|
|
p = call_command(cmd_line)
|
|
|
|
|
|
|
|
while p.poll() == None:
|
|
|
|
out_str = p.stdout.readline()
|
|
|
|
if out_str != '':
|
|
|
|
print out_str[:-1]
|
|
|
|
|
|
|
|
# TODO: this will not handle spaces in the input or output paths
|
|
|
|
def call_command(command):
|
|
|
|
process = subprocess.Popen(command.split(' '), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
|
|
|
return process
|
|
|
|
|
|
|
|
def main():
|
|
|
|
usage = 'usage: %prog [opts] conf_file'
|
|
|
|
parser = OptionParser(usage=usage)
|
|
|
|
parser.add_option('-m','--java-mem',dest='jmem',default='500m',help='Set JVM memory heap size (e.g. 500m)')
|
|
|
|
parser.add_option('-v','--verbose',dest='verbose',action='store_true',default=False,help='Verbose mode')
|
|
|
|
parser.add_option('-o','--options',dest='extra',help='Pass options directly to TreebankPreprocessor')
|
|
|
|
parser.add_option('-p','--output-path',dest='output_path',help="Destination directory for the output")
|
|
|
|
|
|
|
|
(opts,args) = parser.parse_args()
|
|
|
|
|
|
|
|
if len(args) != 1:
|
|
|
|
parser.print_help()
|
|
|
|
sys.exit(-1)
|
|
|
|
|
|
|
|
conf_file = args[0]
|
|
|
|
|
|
|
|
run_treebank_pipeline(opts,conf_file)
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|