#!/usr/bin/env python # # Convenience script for running # edu.stanford.nlp.trees.treebank.TreebankPreprocessor. # # This package automatically generates the Arabic and French # parser training data from the respective source distributions. # # See the README for more details. # # author: Spence Green ############################## import sys from optparse import OptionParser import os import subprocess from time import sleep def run_treebank_pipeline(opts,conf_file): cmd_line = 'java -Xmx%s -Xms%s edu.stanford.nlp.trees.treebank.TreebankPreprocessor' % (opts.jmem,opts.jmem) if opts.verbose: cmd_line = cmd_line + ' -v' if opts.extra: cmd_line = cmd_line + ' ' + opts.extra if opts.output_path: cmd_line = cmd_line + ' -p ' + opts.output_path cmd_line = cmd_line + ' ' + conf_file p = call_command(cmd_line) while p.poll() == None: out_str = p.stdout.readline() if out_str != '': print out_str[:-1] # TODO: this will not handle spaces in the input or output paths def call_command(command): process = subprocess.Popen(command.split(' '), stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) return process def main(): usage = 'usage: %prog [opts] conf_file' parser = OptionParser(usage=usage) parser.add_option('-m','--java-mem',dest='jmem',default='500m',help='Set JVM memory heap size (e.g. 500m)') parser.add_option('-v','--verbose',dest='verbose',action='store_true',default=False,help='Verbose mode') parser.add_option('-o','--options',dest='extra',help='Pass options directly to TreebankPreprocessor') parser.add_option('-p','--output-path',dest='output_path',help="Destination directory for the output") (opts,args) = parser.parse_args() if len(args) != 1: parser.print_help() sys.exit(-1) conf_file = args[0] run_treebank_pipeline(opts,conf_file) if __name__ == '__main__': main()