Added CoreNLP

This commit is contained in:
Netkas 2021-01-08 21:43:33 -05:00
parent 22cc9ad1d5
commit 6bb11b5d3f
2744 changed files with 2278210 additions and 2 deletions

21
.gitignore vendored
View File

@ -126,4 +126,23 @@ venv.bak/
dmypy.json
# Pyre type checker
.pyre/
.pyre/
*.pyc
*.swp
*.swo
*.o
*.so
.DS_Store
.settings/**
# stuff gets built here
classes/**
# IntelliJ IDEA
*.eml
*.iml
.idea/**
# emacs
*~

View File

@ -1,9 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/services/corenlp/src" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="head.load" level="application" />
<orderEntry type="library" name="d3" level="application" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />

View File

@ -0,0 +1,48 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="itest/src"/>
<classpathentry kind="src" path="src"/>
<classpathentry kind="src" path="test/src"/>
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry exported="true" kind="lib" path="lib/ant-contrib-1.0b3.jar" sourcepath="libsrc/ant-contrib-1.0b3-src.zip"/>
<classpathentry exported="true" kind="lib" path="lib/tomcat/el-api.jar" sourcepath="libsrc/tomcat/apache-tomcat-7.0.12-src.zip"/>
<classpathentry exported="true" kind="lib" path="lib/tomcat/jasper.jar" sourcepath="libsrc/tomcat/apache-tomcat-7.0.12-src.zip"/>
<classpathentry exported="true" kind="lib" path="lib/tomcat/tomcat-juli.jar" sourcepath="libsrc/tomcat/apache-tomcat-7.0.12-src.zip"/>
<classpathentry exported="true" kind="lib" path="lib/tomcat/jasper-el.jar" sourcepath="libsrc/tomcat/apache-tomcat-7.0.12-src.zip"/>
<classpathentry exported="true" kind="lib" path="lib/tomcat/jsp-api.jar" sourcepath="libsrc/tomcat/apache-tomcat-7.0.12-src.zip"/>
<classpathentry exported="true" kind="lib" path="lib/tomcat/tomcat-api.jar" sourcepath="libsrc/tomcat/apache-tomcat-7.0.12-src.zip"/>
<classpathentry exported="true" kind="lib" path="lib/commons-logging.jar" sourcepath="libsrc/commons-logging-1.1.1-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/junit-4.13.1.jar" sourcepath="libsrc/junit-4.13.1-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/javax.servlet.jar" sourcepath="libsrc/tomcat/apache-tomcat-7.0.12-src.zip"/>
<classpathentry exported="true" kind="lib" path="lib/AppleJavaExtensions.jar"/>
<classpathentry exported="true" kind="lib" path="lib/jflex-1.8.2.jar" sourcepath="libsrc/jflex-1.8.2-src.zip"/>
<classpathentry exported="true" kind="lib" path="lib/xom-1.3.2.jar" sourcepath="libsrc/xom-1.3.2-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/joda-time.jar" sourcepath="libsrc/joda-time-2.10.5-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/jollyday-0.4.9.jar" sourcepath="libsrc/jollyday-0.4.9-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/log4j-1.2.16.jar" sourcepath="libsrc/log4j-1.2.16-src.zip"/>
<classpathentry exported="true" kind="lib" path="lib/commons-lang3-3.1.jar" sourcepath="libsrc/commons-lang3-3.1-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/protobuf.jar" sourcepath="libsrc/protobuf-java-3.9.2-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/javacc.jar"/>
<classpathentry exported="true" kind="lib" path="lib/javax.json.jar" sourcepath="libsrc/javax.json-api-1.0-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/lucene-core-7.5.0.jar" sourcepath="libsrc/lucene-core-7.5.0-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/lucene-demo-7.5.0.jar" sourcepath="libsrc/lucene-demo-7.5.0-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/lucene-analyzers-common-7.5.0.jar" sourcepath="lucene-analyzers-common-7.5.0-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/lucene-queryparser-7.5.0.jar" sourcepath="lucene-queryparser-7.5.0-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/slf4j-api.jar" sourcepath="libsrc/slf4j-api-1.7.12-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/slf4j-simple.jar" sourcepath="libsrc/slf4j-simple-1.7.12-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/javax.activation-api-1.2.0.jar" sourcepath="libsrc/javax.activation-api-1.2.0-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/jaxb-api-2.4.0-b180830.0359.jar" sourcepath="libsrc/jaxb-api-2.4.0-b180830.0359-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/jaxb-core-2.3.0.1.jar" sourcepath="libsrc/jaxb-core-2.3.0.1-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/jaxb-impl-2.4.0-b180830.0438.jar" sourcepath="libsrc/jaxb-impl-2.4.0-b180830.0438-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/ejml-core-0.39.jar" sourcepath="libsrc/projects/core/libsrc/ejml-core-0.39-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/ejml-ddense-0.39.jar" sourcepath="libsrc/projects/core/libsrc/ejml-ddense-0.39-sources.jar"/>
<classpathentry exported="true" kind="lib" path="lib/ejml-simple-0.39.jar" sourcepath="libsrc/projects/core/libsrc/ejml-simple-0.39-sources.jar"/>
<classpathentry kind="lib" path="liblocal/antlr-runtime.jar"/>
<classpathentry kind="lib" path="liblocal/hamcrest-2.2.jar " sourcepath="libsrc/hamcrest-2.2-sources.jar"/>
<classpathentry kind="lib" path="liblocal/javaruntype.jar"/>
- <classpathentry kind="lib" path="liblocal/junit-quickcheck-core-0.4.jar" sourcepath="libsrc/junit-quickcheck-core-0.4-sources.jar"/>
- <classpathentry kind="lib" path="liblocal/junit-quickcheck-generators-0.4.jar" sourcepath="libsrc/junit-quickcheck-generators-0.4-sources.jar"/>
<classpathentry kind="lib" path="liblocal/junit-theories.jar"/>
<classpathentry kind="lib" path="liblocal/ognl.jar"/>
<classpathentry kind="output" path="classes"/>
</classpath>

17
services/corenlp/.project Normal file
View File

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>CoreNLP</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
</projectDescription>

View File

View File

@ -0,0 +1,73 @@
# Stanford CoreNLP
[![Build Status](https://travis-ci.org/stanfordnlp/CoreNLP.svg?branch=master)](https://travis-ci.org/stanfordnlp/CoreNLP)
[![Maven Central](https://img.shields.io/maven-central/v/edu.stanford.nlp/stanford-corenlp.svg)](https://mvnrepository.com/artifact/edu.stanford.nlp/stanford-corenlp)
[![Twitter](https://img.shields.io/twitter/follow/stanfordnlp.svg?style=social&label=Follow)](https://twitter.com/stanfordnlp/)
Stanford CoreNLP provides a set of natural language analysis tools written in Java. It can take raw human language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize and interpret dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases or word dependencies, and indicate which noun phrases refer to the same entities. It was originally developed for English, but now also provides varying levels of support for (Modern Standard) Arabic, (mainland) Chinese, French, German, and Spanish. Stanford CoreNLP is an integrated framework, which makes it very easy to apply a bunch of language analysis tools to a piece of text. Starting from plain text, you can run all the tools with just two lines of code. Its analyses provide the foundational building blocks for higher-level and domain-specific text understanding applications. Stanford CoreNLP is a set of stable and well-tested natural language processing tools, widely used by various groups in academia, industry, and government. The tools variously use rule-based, probabilistic machine learning, and deep learning components.
The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in proprietary software that you distribute to others.
### Build Instructions
Several times a year we distribute a new version of the software, which corresponds to a stable commit.
During the time between releases, one can always use the latest, under development version of our code.
Here are some helpful instructions to use the latest code:
#### Provided build
Sometimes we will provide updated jars here which have the latest version of the code.
At present, [the current released version of the code](https://stanfordnlp.github.io/CoreNLP/#download) is our most recent released jar, though you can always build the very latest from GitHub HEAD yourself.
<!---
[stanford-corenlp.jar (last built: 2017-04-14)](http://nlp.stanford.edu/software/stanford-corenlp-2017-04-14-build.jar)
-->
#### Build with Ant
1. Make sure you have Ant installed, details here: [http://ant.apache.org/](http://ant.apache.org/)
2. Compile the code with this command: `cd CoreNLP ; ant`
3. Then run this command to build a jar with the latest version of the code: `cd CoreNLP/classes ; jar -cf ../stanford-corenlp.jar edu`
4. This will create a new jar called stanford-corenlp.jar in the CoreNLP folder which contains the latest code
5. The dependencies that work with the latest code are in CoreNLP/lib and CoreNLP/liblocal, so make sure to include those in your CLASSPATH.
6. When using the latest version of the code make sure to download the latest versions of the [corenlp-models](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar), [english-models](http://nlp.stanford.edu/software/stanford-english-corenlp-models-current.jar), and [english-models-kbp](http://nlp.stanford.edu/software/stanford-english-kbp-corenlp-models-current.jar) and include them in your CLASSPATH. If you are processing languages other than English, make sure to download the latest version of the models jar for the language you are interested in.
#### Build with Maven
1. Make sure you have Maven installed, details here: [https://maven.apache.org/](https://maven.apache.org/)
2. If you run this command in the CoreNLP directory: `mvn package` , it should run the tests and build this jar file: `CoreNLP/target/stanford-corenlp-4.0.0.jar`
3. When using the latest version of the code make sure to download the latest versions of the [corenlp-models](http://nlp.stanford.edu/software/stanford-corenlp-models-current.jar), [english-models](http://nlp.stanford.edu/software/stanford-english-corenlp-models-current.jar), and [english-models-kbp](http://nlp.stanford.edu/software/stanford-english-kbp-corenlp-models-current.jar) and include them in your CLASSPATH. If you are processing languages other than English, make sure to download the latest version of the models jar for the language you are interested in.
4. If you want to use Stanford CoreNLP as part of a Maven project you need to install the models jars into your Maven repository. Below is a sample command for installing the Spanish models jar. For other languages just change the language name in the command. To install `stanford-corenlp-models-current.jar` you will need to set `-Dclassifier=models`. Here is the sample command for Spanish: `mvn install:install-file -Dfile=/location/of/stanford-spanish-corenlp-models-current.jar -DgroupId=edu.stanford.nlp -DartifactId=stanford-corenlp -Dversion=4.0.0 -Dclassifier=models-spanish -Dpackaging=jar`
#### Models
The models jars that correspond to the latest code can be found in the table below.
Some of the larger (English) models -- like the shift-reduce parser and WikiDict -- are not distributed with our default models jar.
These require downloading the English (extra) and English (kbp) jars. Resources for other languages require usage of the corresponding
models jar.
| Language | Model Jar | Last Updated |
| --- | --- | --- |
| Arabic | [download](http://nlp.stanford.edu/software/stanford-corenlp-4.2.0-models-arabic.jar) | 4.2.0 |
| Chinese | [download](http://nlp.stanford.edu/software/stanford-corenlp-4.2.0-models-chinese.jar) | 4.2.0 |
| English (default) | [download](http://nlp.stanford.edu/software/stanford-corenlp-4.2.0-models.jar) | 4.2.0 |
| English (extra) | [download](http://nlp.stanford.edu/software/stanford-corenlp-4.2.0-models-english.jar) | 4.2.0 |
| English (kbp) | [download](http://nlp.stanford.edu/software/stanford-corenlp-4.2.0-models-english-kbp.jar) | 4.2.0 |
| French | [download](http://nlp.stanford.edu/software/stanford-corenlp-4.2.0-models-french.jar) | 4.2.0 |
| German | [download](http://nlp.stanford.edu/software/stanford-corenlp-4.2.0-models-german.jar) | 4.2.0 |
| Spanish | [download](http://nlp.stanford.edu/software/stanford-corenlp-4.2.0-models-spanish.jar) | 4.2.0 |
### Useful resources
You can find releases of Stanford CoreNLP on [Maven Central](https://search.maven.org/artifact/edu.stanford.nlp/stanford-corenlp/4.0.0/jar).
You can find more explanation and documentation on [the Stanford CoreNLP homepage](http://stanfordnlp.github.io/CoreNLP/).
For information about making contributions to Stanford CoreNLP, see the file [CONTRIBUTING.md](CONTRIBUTING.md).
Questions about CoreNLP can either be posted on StackOverflow with the tag [stanford-nlp](http://stackoverflow.com/questions/tagged/stanford-nlp),
or on the [mailing lists](https://nlp.stanford.edu/software/#Mail).

View File

@ -0,0 +1,10 @@
These resources were used in the creation of Stanford CoreNLP:
-----------------------------------------------------------------
Spanish Billion Word Corpus Embeddings
Notes: These embeddings were used in the training of the Spanish dependency parser.
URL: http://crscardellino.me/SBWCE/
License: Creative Commons Attribution-ShareAlike 4.0 International License

View File

@ -0,0 +1,69 @@
//
// Stanford CoreNLP build specification for
// Gradle.
//
apply plugin: 'java'
apply plugin: 'eclipse'
apply plugin: 'application'
// Gradle java plugin
sourceCompatibility = 1.8
targetCompatibility = 1.8
compileJava.options.encoding = 'UTF-8'
version = '4.2.0'
// Gradle application plugin
mainClassName = "edu.stanford.nlp.pipeline.StanfordCoreNLP"
// Jar creation
jar {
manifest {
attributes 'Implementation-Title': 'Stanford CoreNLP',
'Implementation-Version': version,
'Main-Class': 'edu.stanford.nlp.pipeline.StanfordCoreNLP'
}
}
uploadArchives {
repositories {
flatDir {
dirs 'repos'
}
}
}
// Add src-extra build target
sourceSets {
main {
java.srcDirs = ['src/']
resources.srcDirs = ['src/']
}
}
task listDeps {
doLast {
configurations.compile.each { File file -> println file.name }
}
}
dependencies {
compile fileTree(dir: 'lib', include: '*.jar')
testCompile fileTree(dir: 'liblocal', include: '*.jar')
}
// Eclipse plugin setup
eclipse {
classpath {
defaultOutputDir = file('bin/')
file {
beforeMerged { classpath ->
classpath.entries.removeAll { entry -> entry.kind == 'lib' }
}
}
}
}
task wrapper(type: Wrapper) {
gradleVersion = '3.2'
}

654
services/corenlp/build.xml Normal file
View File

@ -0,0 +1,654 @@
<!-- JavaNLP core build file -->
<project name="core" default="compile" basedir=".">
<property name="build.path" value="${basedir}/classes" />
<property name="source.path" value="${basedir}/src" />
<property name="doc.path" value="${basedir}/doc" />
<property name="data.path" value="${basedir}/data" />
<property name="tests.path" value="${basedir}/test/src" />
<property name="itests.path" value="${basedir}/itest/src" />
<!-- included tasks may use project.core as a property name -->
<property name="project.core" value="${basedir}" />
<property name="compile.debug" value="true"/>
<property name="compile.deprecation" value="false"/>
<property name="compile.optimize" value="true"/>
<property name="compile.source" value="1.8" />
<property name="compile.target" value="1.8" />
<property name="compile.encoding" value="utf-8" />
<target name="classpath" description="Sets the classpath">
<echo message="${ant.project.name}" />
<path id="classpath">
<fileset dir="${basedir}/lib">
<include name="*.jar"/>
<exclude name="javanlp*"/>
</fileset>
<fileset dir="${basedir}/liblocal">
<include name="*.jar"/>
<exclude name="javanlp*"/>
</fileset>
</path>
</target>
<target name="models.classpath" description="Sets the models classpath, for itests and such">
<path id="models.classpath">
<fileset dir="${env.CORENLP_MODELS_HOME}">
<include name="*.jar"/>
</fileset>
</path>
</target>
<target name="clean" description="Delete old classes">
<echo message="${ant.project.name}" />
<delete dir="${build.path}"/>
</target>
<target name="javacceverything" depends="classpath"
description="javacc everything that can be javacced">
<taskdef resource="net/sf/antcontrib/antlib.xml">
<classpath>
<pathelement location="${project.core}/lib/ant-contrib-1.0b3.jar"/>
</classpath>
</taskdef>
<echo message="${ant.project.name}" />
<for param="javacc.target">
<path>
<fileset dir="${source.path}">
<include name="**/*.jj"/>
</fileset>
</path>
<sequential>
<javacc target="@{javacc.target}" javacchome="${basedir}/lib" />
</sequential>
</for>
</target>
<target name="flexeverything" depends="classpath"
description="JFlex everything that can be JFlexed">
<taskdef resource="net/sf/antcontrib/antlib.xml">
<classpath>
<pathelement location="${project.core}/lib/ant-contrib-1.0b3.jar"/>
</classpath>
</taskdef>
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath>
<pathelement location="${project.core}/lib/jflex-full-1.8.2.jar"/>
</classpath>
</taskdef>
<echo message="${ant.project.name}" />
<for param="flex.target">
<path>
<fileset dir="${source.path}">
<include name="**/*.flex"/>
</fileset>
</path>
<sequential>
<jflex file="@{flex.target}" />
</sequential>
</for>
</target>
<target name="compile" depends="classpath"
description="Compile core sources">
<echo message="${ant.project.name}" />
<mkdir dir="${build.path}" />
<javac srcdir="${source.path}:${tests.path}:${itests.path}"
destdir="${build.path}"
debug="${compile.debug}"
encoding="${compile.encoding}"
deprecation="${compile.deprecation}"
optimize="${compile.optimize}"
source="${compile.source}"
target="${compile.target}"
fork="true"
memorymaximumsize="2g"
includeantruntime="false">
<classpath refid="classpath" />
<!-- <compilerarg value="-Xmaxerrs"/>
<compilerarg value="20"/> -->
<compilerarg value="-Xlint:cast"/>
<compilerarg value="-Xlint:classfile"/>
<compilerarg value="-Xlint:divzero"/>
<compilerarg value="-Xlint:empty"/>
<compilerarg value="-Xlint:finally"/>
<compilerarg value="-Xlint:path"/>
<compilerarg value="-Xlint:try"/>
<!--
<compilerarg value="-Xlint:deprecation"/>
<compilerarg value="-Xlint:dep-ann"/>
<compilerarg value="-Xlint:fallthrough"/>
<compilerarg value="-Xlint:options"/>
<compilerarg value="-Xlint:overrides"/>
<compilerarg value="-Xlint:processing"/>
<compilerarg value="-Xlint:rawtypes"/>
<compilerarg value="-Xlint:serial"/>
<compilerarg value="-Xlint:static"/>
<compilerarg value="-Xlint:unchecked"/>
<compilerarg value="-Xlint:varargs"/>
-->
<!-- <compilerarg value="-Xlint:all"/>
<compilerarg value="-Xmaxwarns"/>
<compilerarg value="10000"/> -->
</javac>
<copy todir="${build.path}/edu/stanford/nlp/pipeline/demo">
<fileset dir="${source.path}/edu/stanford/nlp/pipeline/demo">
<exclude name="**/*.java"/>
</fileset>
</copy>
<copy todir="${build.path}/edu/stanford/nlp/pipeline">
<fileset dir="${source.path}/edu/stanford/nlp/pipeline">
<exclude name="**/*.java"/>
</fileset>
</copy>
</target>
<target name="test" depends="classpath,compile"
description="Run core unit tests">
<echo message="${ant.project.name}" />
<junit fork="true" maxmemory="1g" printsummary="off" outputtoformatters="false" forkmode="perBatch" haltonfailure="no"
haltonerror="no" failureproperty="test.failed" errorproperty="test.failed">
<classpath refid="classpath"/>
<classpath path="${build.path}"/>
<classpath path="${data.path}"/>
<classpath path="${source.path}"/>
<formatter type="brief" usefile="false"/>
<batchtest fork="yes">
<fileset dir="${tests.path}">
<include name="**/*Test.java"/>
</fileset>
</batchtest>
</junit>
<fail message="Test error or failure detected, check test results." if="test.failed" />
</target>
<target name="itest" depends="classpath,compile,models.classpath"
description="Run core integration tests">
<echo message="${ant.project.name}" />
<junit fork="yes" maxmemory="12g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="no"
haltonerror="no" failureproperty="test.failed" errorproperty="test.failed">
<classpath refid="classpath"/>
<classpath refid="models.classpath"/>
<classpath path="${build.path}"/>
<classpath path="${data.path}"/>
<classpath path="${source.path}"/>
<formatter type="brief" usefile="false"/>
<batchtest fork="yes">
<fileset dir="${itests.path}">
<include name="**/*ITest*.java"/>
<exclude name="**/*SlowITest*.java"/>
</fileset>
</batchtest>
</junit>
<fail message="Test error or failure detected, check test results." if="test.failed" />
</target>
<target name="itest-many-docs" depends="classpath,compile,models.classpath"
description="Run StanfordCoreNLP on a large volume of documents.">
<echo message="${ant.project.name}" />
<junit fork="yes" maxmemory="14g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="no"
haltonerror="no" failureproperty="test.failed" errorproperty="test.failed">
<classpath refid="classpath"/>
<classpath refid="models.classpath"/>
<classpath path="${build.path}"/>
<classpath path="${data.path}"/>
<classpath path="${source.path}"/>
<formatter type="brief" usefile="false"/>
<batchtest fork="yes">
<fileset dir="${itests.path}">
<include name="**/*StanfordCoreNLPSlowITest.java"/>
</fileset>
</batchtest>
</junit>
<fail message="Test error or failure detected, check test results." if="test.failed" />
</target>
<target name="itest-coreference" depends="classpath,compile,models.classpath"
description="Coreference related slow itests.">
<echo message="${ant.project.name}" />
<junit fork="yes" maxmemory="7g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="no"
haltonerror="no" failureproperty="test.failed" errorproperty="test.failed">
<classpath refid="classpath"/>
<classpath refid="models.classpath"/>
<classpath path="${build.path}"/>
<classpath path="${data.path}"/>
<classpath path="${source.path}"/>
<formatter type="brief" usefile="false"/>
<batchtest fork="yes">
<fileset dir="${itests.path}">
<include name="**/*Coref*SlowITest.java"/>
<include name="**/DcorefBenchmarkSlowITest.java"/>
<include name="**/DcorefSlowITest.java"/>
<include name="**/ChineseCorefBenchmarkSlowITest.java"/>
</fileset>
</batchtest>
</junit>
<fail message="Test error or failure detected, check test results." if="test.failed" />
</target>
<target name="itest-protobuf" depends="classpath,compile,models.classpath"
description="Protocol buffer related slow itests.">
<echo message="${ant.project.name}" />
<junit fork="yes" maxmemory="14g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="no"
haltonerror="no" failureproperty="test.failed" errorproperty="test.failed">
<classpath refid="classpath"/>
<classpath refid="models.classpath"/>
<classpath path="${build.path}"/>
<classpath path="${data.path}"/>
<classpath path="${source.path}"/>
<formatter type="brief" usefile="false"/>
<batchtest fork="yes">
<fileset dir="${itests.path}">
<include name="**/*Protobuf*SlowITest.java"/>
</fileset>
</batchtest>
</junit>
<fail message="Test error or failure detected, check test results." if="test.failed" />
</target>
<target name="itest-kbp" depends="classpath,compile,models.classpath"
description="KBP related slow itests.">
<echo message="${ant.project.name}" />
<junit fork="yes" maxmemory="14g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="no"
haltonerror="no" failureproperty="test.failed" errorproperty="test.failed">
<classpath refid="classpath"/>
<classpath refid="models.classpath"/>
<classpath path="${build.path}"/>
<classpath path="${data.path}"/>
<classpath path="${source.path}"/>
<formatter type="brief" usefile="false"/>
<batchtest fork="yes">
<fileset dir="${itests.path}">
<include name="**/*KBP*SlowITest.java"/>
</fileset>
</batchtest>
</junit>
<fail message="Test error or failure detected, check test results." if="test.failed" />
</target>
<target name="itest-ner" depends="classpath,compile,models.classpath"
description="NER related slow itests">
<echo message="${ant.project.name}" />
<junit fork="yes" maxmemory="8g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="no"
haltonerror="no" failureproperty="test.failed" errorproperty="test.failed">
<classpath refid="classpath"/>
<classpath refid="models.classpath"/>
<classpath path="${build.path}"/>
<classpath path="${data.path}"/>
<classpath path="${source.path}"/>
<formatter type="brief" usefile="false"/>
<batchtest fork="yes">
<fileset dir="${itests.path}">
<include name="**/NERBenchmark*SlowITest.java"/>
<include name="**/NERPipelineEndToEndSlowITest.java"/>
<include name="**/NERTokenizationSlowITest.java"/>
<include name="**/TrainCRFClassifierSlowITest.java"/>
</fileset>
</batchtest>
</junit>
<fail message="Test error or failure detected, check test results." if="test.failed" />
</target>
<target name="itest-misc" depends="classpath,compile,models.classpath"
description="Other assorted slow itests">
<echo message="${ant.project.name}" />
<junit fork="yes" maxmemory="14g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="no"
haltonerror="no" failureproperty="test.failed" errorproperty="test.failed">
<classpath refid="classpath"/>
<classpath refid="models.classpath"/>
<classpath path="${build.path}"/>
<classpath path="${data.path}"/>
<classpath path="${source.path}"/>
<formatter type="brief" usefile="false"/>
<batchtest fork="yes">
<fileset dir="${itests.path}">
<include name="**/RequirementsCorrectSlowITest.java"/>
<include name="**/ThreadedParserSlowITest.java"/>
</fileset>
</batchtest>
</junit>
<fail message="Test error or failure detected, check test results." if="test.failed" />
</target>
<target name="itest-debug" depends="classpath,compile,models.classpath"
description="target for debug purposes">
<echo message="${ant.project.name}" />
<junit fork="yes" maxmemory="14g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="no"
haltonerror="no" failureproperty="test.failed" errorproperty="test.failed">
<classpath refid="classpath"/>
<classpath path="${build.path}"/>
<classpath path="${data.path}"/>
<classpath path="${source.path}"/>
<formatter type="brief" usefile="false"/>
<batchtest fork="yes">
<fileset dir="${itests.path}">
<include name="**/HeidelTimeKBPAnnotatorITest.java"/>
</fileset>
</batchtest>
</junit>
</target>
<target name="slowitest" depends="classpath,compile"
description="Run really slow integration tests">
<echo message="${ant.project.name}" />
<junit fork="yes" maxmemory="12g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="true">
<classpath refid="classpath"/>
<classpath path="${build.path}"/>
<classpath path="${data.path}"/>
<classpath path="${source.path}"/>
<formatter type="brief" usefile="false"/>
<batchtest fork="yes">
<fileset dir="${itests.path}">
<include name="**/*SlowITest.java"/>
</fileset>
</batchtest>
</junit>
</target>
<target name="compile-all" depends="compile,jsp"
description="Compile everything" />
<target name="all" depends="clean,compile-all"
description="Clean and re-compile." />
<!-- This file contains the .jsp build target -->
<import file="commonbuildjsp.xml" />
<!-- This runs the specified class, using a separate Java VM -->
<!-- Specify class to run via "run.class," arg, i.e., -->
<!-- ant run -Drun.class=edu.stanford.nlp.tagger.maxent.MaxentTaggerGUI -->
<!-- Use "jvmarg" to pass arguments to the VM, use "arg" to pass cmdline args to the class -->
<target name="run" depends="classpath">
<java classname="${run.class}" fork="true" jvm="java">
<jvmarg value="-Xmx2g"/>
<jvmarg value="-server"/>
<classpath>
<path refid="classpath"/>
<pathelement path="${build.path}"/>
</classpath>
</java>
</target>
<!-- Same as "run," except causes the VM to wait until debugger is attached -->
<!-- See http://nlp.stanford.edu/javanlp/did_you_know/eclipse_debug.html for example -->
<target name="run-debug" depends="classpath">
<java classname="${run.class}" fork="true" jvm="java">
<jvmarg value="-Xmx2g"/>
<!-- Following args should cause this to freeze and wait for debug attach -->
<jvmarg value="-Xdebug"/>
<jvmarg value="-Xnoagent"/>
<jvmarg value="-Djava.compiler=None"/>
<jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=9099"/>
<classpath>
<path refid="classpath"/>
<pathelement path="${build.path}"/>
</classpath>
</java>
</target>
<target name="jar" depends="compile">
<jar destfile="javanlp-core.jar">
<fileset dir="${project.core}/classes" />
</jar>
</target>
<!-- I wrote this for a side project, so if you one day see it in a
build file of mine, you can't sue me for taking it from
Stanford. -jab -->
<!-- This macro builds a .war for a jsp, configured by several
attributes which say which .jar file to use as a base library,
which other libraries are needed, which data files to grab,
etc. In order to verify that the .jsp will work, it
precompiles it before building the .war. -->
<macrodef name="buildjsp">
<attribute name="webapp.path"/> <!-- where to find the .jsp files -->
<attribute name="webapp.war"/> <!-- name of the .war to produce -->
<attribute name="webapp.jar"/> <!-- probably javanlp-core.jar -->
<element name="webapp.lib"/> <!-- libs to include -->
<element name="webapp.data"/> <!-- data files to include -->
<sequential>
<!-- This tool turns .jsp into .java -->
<taskdef classname="org.apache.jasper.JspC" name="jasper2" >
<classpath refid="webapp.classpath"/>
</taskdef>
<echo message="Building .jsp @{webapp.path}" />
<echo message="Building from ${source.path}/@{webapp.path}"/>
<!-- First, compile the .jsp into .java -->
<jasper2
validateXml="false"
uriroot="${source.path}/@{webapp.path}"
webXmlFragment="${source.path}/@{webapp.path}/WEB-INF/generated.xml"
addWebXmlMappings="true"
outputDir="${build.path}/@{webapp.path}/WEB-INF/src" />
<mkdir dir="${build.path}/@{webapp.path}/WEB-INF/classes"/>
<!-- Then compile the .java into .class -->
<javac destdir="${build.path}/@{webapp.path}/WEB-INF/classes"
optimize="${compile.optimize}"
encoding="${compile.encoding}"
debug="${compile.debug}"
srcdir="${build.path}/@{webapp.path}/WEB-INF/src"
includeantruntime="false"
excludes="**/*.smap">
<classpath refid="webapp.classpath"/>
<classpath refid="classpath"/>
<classpath path="${build.path}"/>
<include name="**" />
<exclude name="tags/**" />
</javac>
<!-- Now, build a .war out of the compiled java and all of the
specified libraries. -->
<war destfile="@{webapp.war}"
webxml="${source.path}/@{webapp.path}/WEB-INF/web.xml">
<classes dir="${build.path}/@{webapp.path}/WEB-INF/classes"/>
<webapp.lib/>
<lib dir=".">
<include name="@{webapp.jar}"/>
</lib>
<webapp.data/>
</war>
</sequential>
</macrodef>
<target name="sutime.war" depends="compile,jar"
description="build the sutime webapp">
<war destfile="sutime.war"
webxml="${source.path}/edu/stanford/nlp/time/suservlet/WEB-INF/web.xml">
<lib dir=".">
<include name="javanlp-core.jar"/>
</lib>
<lib dir="${basedir}/lib">
<include name="commons-lang3-3.1.jar"/>
<include name="xom-1.2.10.jar"/>
<include name="joda-time.jar"/>
<include name="jollyday-0.4.9.jar"/>
</lib>
<zipfileset prefix="WEB-INF/data"
file="/u/nlp/data/pos-tagger/distrib/english-left3words-distsim.tagger"/>
<zipfileset prefix="WEB-INF/data/rules"
file="${source.path}/edu/stanford/nlp/time/rules/*"/>
<zipfileset prefix="WEB-INF/data/holidays"
file="${source.path}/edu/stanford/nlp/time/holidays/*"/>
<zipfileset file="${data.path}/webapps/favicon.ico"/>
<zipfileset file="${source.path}/edu/stanford/nlp/time/suservlet/header.jsp"/>
<zipfileset file="${source.path}/edu/stanford/nlp/time/suservlet/footer.jsp"/>
<zipfileset file="${source.path}/edu/stanford/nlp/time/suservlet/sutime.css"/>
<zipfileset file="${source.path}/edu/stanford/nlp/time/suservlet/calendarview.css"/>
<zipfileset file="${source.path}/edu/stanford/nlp/time/suservlet/calendarview.js"/>
<zipfileset file="${source.path}/edu/stanford/nlp/time/suservlet/prototype.js"/>
</war>
</target>
<target name="openie.war" depends="compile,jar"
description="build the openie webapp">
<war destfile="openie.war"
webxml="${source.path}/edu/stanford/nlp/naturalli/demo/web.xml">
<lib dir=".">
<include name="javanlp-core.jar"/>
</lib>
<zipfileset prefix="WEB-INF/data"
file="/u/nlp/data/pos-tagger/distrib/english-left3words-distsim.tagger"/>
<zipfileset prefix="WEB-INF/data"
file="${source.path}/edu/stanford/nlp/time/rules/*"/>
<zipfileset prefix="WEB-INF/data"
file="${source.path}/edu/stanford/nlp/time/holidays/*"/>
<zipfileset prefix="WEB-INF/data"
file="/u/nlp/data/ner/goodClassifiers/english.all.3class.distsim.crf.ser.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/u/nlp/data/ner/goodClassifiers/english.conll.4class.distsim.crf.ser.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/u/nlp/data/ner/goodClassifiers/english.muc.7class.distsim.crf.ser.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/u/nlp/data/depparser/nn/distrib/english_SD.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/u/nlp/data/lexparser/englishPCFG.ser.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/home/gabor/workspace/naturalli/etc/clauseSplitterModel.ser.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/home/gabor/workspace/naturalli/etc/pp.tab.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/home/gabor/workspace/naturalli/etc/obj.tab.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/home/gabor/workspace/naturalli/etc/privative.tab.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/home/gabor/workspace/naturalli/etc/subj_obj_pp.tab.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/home/gabor/workspace/naturalli/etc/subj_pp_obj.tab.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/home/gabor/workspace/naturalli/etc/subj_pp_pp.tab.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/home/gabor/workspace/naturalli/etc/subj_pp.tab.gz"/>
</war>
</target>
<target name="parser.war" depends="compile,jar"
description="build the parser webapp">
<buildjsp webapp.path="edu/stanford/nlp/parser/webapp"
webapp.war="parser.war"
webapp.jar="javanlp-core.jar">
<webapp.lib>
<lib dir="/u/nlp/data/StanfordCoreNLPModels">
<include name="stanford-spanish-corenlp-models-current.jar"/>
</lib>
</webapp.lib>
<webapp.data>
<zipfileset prefix="WEB-INF/data"
file="/u/nlp/data/lexparser/englishPCFG.ser.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/u/nlp/data/lexparser/arabicFactored.ser.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/u/nlp/data/lexparser/frenchFactored.ser.gz"/>
<zipfileset prefix="WEB-INF/data"
file="/u/nlp/data/lexparser/chineseFactored.ser.gz"/>
<zipfileset prefix="WEB-INF/data/chinesesegmenter"
file="/u/nlp/data/gale/segtool/stanford-seg/classifiers-2010/05202008-ctb6.processed-chris6.lex.gz"/>
<zipfileset prefix="WEB-INF/data/chinesesegmenter"
dir="/u/nlp/data/gale/segtool/stanford-seg/releasedata"/>
<zipfileset prefix="WEB-INF/data"
file="/u/nlp/data/lexparser/spanishPCFG.ser.gz"/>
<zipfileset file="${data.path}/webapps/favicon.ico"/>
</webapp.data>
</buildjsp>
</target>
<target name="ner.war" depends="compile,jar"
description="build the ner webapp">
<!-- This is built from a java servlet, not a jsp. -->
<!-- The class needed for the results page will already be in
javanlp-core.jar -->
<war destfile="ner.war"
webxml="${source.path}/edu/stanford/nlp/ie/ner/webapp/WEB-INF/web.xml">
<lib dir=".">
<include name="javanlp-core.jar"/>
</lib>
<lib dir="${basedir}/lib">
<include name="commons-lang3-3.1.jar"/>
</lib>
<zipfileset file="${source.path}/edu/stanford/nlp/ie/ner/webapp/ner.jsp"/>
<zipfileset file="${source.path}/edu/stanford/nlp/ie/ner/webapp/header.jsp"/>
<zipfileset file="${source.path}/edu/stanford/nlp/ie/ner/webapp/footer.jsp"/>
<zipfileset prefix="WEB-INF/data/models"
file="/u/nlp/data/ner/goodClassifiers/english.all.3class.distsim.crf.ser.gz"/>
<zipfileset prefix="WEB-INF/data/models"
file="/u/nlp/data/ner/goodClassifiers/english.conll.4class.distsim.crf.ser.gz"/>
<zipfileset prefix="WEB-INF/data/models"
file="/u/nlp/data/ner/goodClassifiers/english.muc.7class.distsim.crf.ser.gz"/>
<zipfileset prefix="WEB-INF/data/models"
file="/u/nlp/data/ner/goodClassifiers/german.distsim.crf.ser.gz"/>
<zipfileset prefix="WEB-INF/data/models"
file="/u/nlp/data/ner/goodClassifiers/chinese.misc.distsim.crf.ser.gz"/>
<zipfileset file="${data.path}/webapps/favicon.ico"/>
</war>
</target>
<target name="corenlp.war" depends="compile,jar"
description="build the corenlp webapp">
<!-- This is built from a java servlet, not a jsp. -->
<!-- The class needed for the results page will already be in
javanlp-core.jar -->
<war destfile="corenlp.war"
webxml="${source.path}/edu/stanford/nlp/pipeline/webapp/WEB-INF/web.xml">
<lib dir=".">
<include name="javanlp-core.jar"/>
</lib>
<lib dir="${basedir}/lib">
<include name="commons-lang3-3.1.jar"/>
<include name="xom-1.2.10.jar"/>
<include name="xml-apis.jar"/>
<include name="joda-time.jar"/>
<include name="jollyday-0.4.9.jar"/>
</lib>
<!-- note for John: c:/Users/John Bauer/nlp/stanford-releases -->
<lib dir="/u/nlp/data/StanfordCoreNLPModels">
<include name="stanford-corenlp-models-current.jar"/>
<include name="stanford-chinese-corenlp-models-current.jar"/>
</lib>
<classes dir="${source.path}/edu/stanford/nlp/pipeline">
<include name="StanfordCoreNLP.properties"/>
</classes>
<zipfileset file="${source.path}/edu/stanford/nlp/pipeline/webapp/header.jsp"/>
<zipfileset file="${source.path}/edu/stanford/nlp/pipeline/webapp/footer.jsp"/>
<zipfileset file="${source.path}/edu/stanford/nlp/pipeline/webapp/brat.js"/>
<zipfileset file="${data.path}/webapps/favicon.ico"/>
<zipfileset prefix="WEB-INF/data"
file="${doc.path}/corenlp/CoreNLP-to-HTML.xsl"/>
</war>
</target>
<property environment="env" />
<condition property="version1.7">
<equals arg1="${ant.java.version}" arg2="1.7" />
</condition>
<target name="tregex-osx" if="version1.7" depends="jar"
description="Build an OS X app for TregexGUI">
<fail unless="env.JAVA_HOME"
message="Environment variable JAVA_HOME not set." />
<taskdef name="bundleapp"
classname="com.oracle.appbundler.AppBundlerTask"
classpath="lib/appbundler-1.0.jar" />
<bundleapp outputdirectory="."
name="Stanford Tregex"
displayname="Stanford Tregex"
icon="doc/tregex/nlp-logo-6x6.icns"
identifier="edu.stanford.nlp.trees.tregex.gui.TregexGUI"
mainclassname="edu.stanford.nlp.trees.tregex.gui.TregexGUI">
<runtime dir="${env.JAVA_HOME}" />
<classpath file="javanlp-core.jar" />
</bundleapp>
</target>
</project>

View File

@ -0,0 +1,119 @@
<!-- JavaNLP ... include this file to find & compile .jsp servlets -->
<project name="commonbuildjsp" basedir="../..">
<!-- This classpath is needed for jasper and javac in this target -->
<path id="webapp.classpath">
<fileset dir="${project.core}/lib/tomcat">
<include name="*.jar"/>
</fileset>
<pathelement location="${build.path}"/>
<pathelement location="${project.core}/lib/commons-logging.jar"/>
<pathelement location="${project.core}/lib/javax.servlet.jar"/>
</path>
<target name="jsp" depends="classpath,compile">
<!-- This target looks for all files that end with .jsp. It then
compiles each directory that those files reside in as a separate
webapp. Since there is currently only one webapp in all of core,
the effort taken to make this as general as possible instead of
hardcoding the path to that webapp was probably not effort well
spent. Still, one day there will be another webapp under core,
and then who will be laughing? I WILL.
November 2011: There are now four different webapps under
core. Yeah. I'm laughing. -->
<echo message="${ant.project.name}" />
<!-- This tool turns .jsp into .java -->
<taskdef classname="org.apache.jasper.JspC" name="jasper2" >
<classpath refid="webapp.classpath"/>
</taskdef>
<!-- This tool includes all sorts of useful stuff, like "if" and "for" -->
<taskdef resource="net/sf/antcontrib/antlib.xml">
<classpath>
<pathelement location="${project.core}/lib/ant-contrib-1.0b3.jar"/>
</classpath>
</taskdef>
<!-- This is a list of all the jsp files under this project -->
<path id="webapp.jspfiles">
<fileset dir="${source.path}">
<include name="**/*.jsp"/>
</fileset>
</path>
<!-- Turn all of the absolute paths into relative paths -->
<pathconvert property="webapp.relativejspfiles"
refid="webapp.jspfiles">
<map from="${source.path}${file.separator}" to=""/>
</pathconvert>
<!-- Make sure the paths are all sorted -->
<sortlist property="webapp.sortedjspfiles"
value="${webapp.relativejspfiles}"
delimiter="${path.separator}" />
<!-- We use this property to make sure we don't compile the same
directory twice in a row. This is where it helps to assume
the paths are all sorted. -->
<property name="webapp.previouspath" value=""/>
<!-- Now, for each file we found earlier... -->
<for param="webapp.currentfile" list="${webapp.sortedjspfiles}"
delimiter="${path.separator}">
<sequential>
<!-- First extract the directory containing the file. This works
on both linux and windows, assuming we don't have any \ or /
in the file name, but that would just be silly. However, it
would still be nice to find a cleaner way of doing this. -->
<propertyregex property="webapp.path"
override="true"
input="@{webapp.currentfile}"
regexp="(.*)[\\/][^\\/]*.jsp"
select="\1" />
<!-- Then build the webapp in that path -->
<if>
<not>
<equals arg1="${webapp.path}" arg2="${webapp.previouspath}" />
</not>
<then>
<echo message="Building .jsp ${webapp.path}" />
<jasper2
validateXml="false"
uriroot="${source.path}/${webapp.path}"
webXmlFragment="${build.path}/${webapp.path}/WEB-INF/generated_web.xml"
outputDir="${build.path}/${webapp.path}/WEB-INF/src" />
<mkdir dir="${build.path}/${webapp.path}/WEB-INF/classes"/>
<mkdir dir="${build.path}/${webapp.path}/WEB-INF/lib"/>
<javac destdir="${build.path}/${webapp.path}/WEB-INF/classes"
optimize="${compile.optimize}"
encoding="${compile.encoding}"
debug="${compile.debug}"
srcdir="${build.path}/${webapp.path}/WEB-INF/src"
includeantruntime="false"
excludes="**/*.smap">
<classpath refid="webapp.classpath"/>
<classpath refid="classpath"/>
<classpath path="${build.path}"/>
<include name="**" />
<exclude name="tags/**" />
</javac>
</then>
</if>
<!-- Save the path we just looked at to ensure we don't
compile it twice in a row. The compiler will actually be
smart enough to not compile twice, but even using it to check
would be time consuming. -->
<propertycopy name="webapp.previouspath" override="true"
from="webapp.path"/>
</sequential>
</for>
</target>
</project>

View File

@ -0,0 +1,14 @@
README for data
---------------
Small data files (test files, properties files, etc) can go here.
Don't put large models here. That would cause git to bloat too much.
Open source: Don't put here any file that is copyright or that we
don't have the rights to redistribute. The contents of this
directory appear in our public github repository.
Contact: John Bauer (horatio@gmail.com)
Last Modified: 2010-10-27

View File

@ -0,0 +1,109 @@
ColumnDataClassifier invoked at Thu Oct 28 01:47:49 PDT 2010 with arguments:
-prop projects/core/data/edu/stanford/nlp/classify/iris2007.prop
Gold answer column is 0
numDatums: 130
numLabels: 3 [Iris-setosa, Iris-versicolor, Iris-virginica]
numFeatures (Phi(X) types): 5
QNMinimizer called on double function of 15 variables, using M = 15.
An explanation of the output:
Iter The number of iterations
evals The number of function evaluations
SCALING <D> Diagonal scaling was used; <I> Scaled Identity
LINESEARCH [## M steplength] Minpack linesearch
1-Function value was too high
2-Value ok, gradient positive, positive curvature
3-Value ok, gradient negative, positive curvature
4-Value ok, gradient negative, negative curvature
[.. B] Backtracking
VALUE The current function value
TIME Total elapsed time
|GNORM| The current norm of the gradient
{RELNORM} The ratio of the current to initial gradient norms
AVEIMPROVE The average improvement / current value
Iter ## evals ## <SCALING> [LINESEARCH] VALUE TIME |GNORM| {RELNORM} AVEIMPROVE
Iter 1 evals 1 <D> [113M 7.322E-4] 1.343E2 0.01s |1.294E2| {8.774E-1} 0.000E0
Iter 2 evals 5 <D> [M 1.000E0] 1.218E2 0.01s |1.203E2| {8.153E-1} 5.149E-2
Iter 3 evals 6 <D> [1M 2.579E-1] 9.447E1 0.01s |3.005E2| {2.038E0} 1.406E-1
Iter 4 evals 8 <D> [M 1.000E0] 6.653E1 0.02s |8.207E1| {5.564E-1} 2.548E-1
Iter 5 evals 9 <D> [M 1.000E0] 5.801E1 0.02s |6.732E1| {4.564E-1} 2.631E-1
Iter 6 evals 10 <D> [M 1.000E0] 5.165E1 0.02s |5.683E1| {3.853E-1} 2.668E-1
Iter 7 evals 11 <D> [M 1.000E0] 4.106E1 0.02s |1.640E1| {1.112E-1} 3.245E-1
Iter 8 evals 12 <D> [M 1.000E0] 3.117E1 0.02s |1.342E1| {9.098E-2} 4.137E-1
Iter 9 evals 13 <D> [M 1.000E0] 2.379E1 0.02s |7.857E0| {5.327E-2} 5.162E-1
Iter 10 evals 14 <D> [M 1.000E0] 2.081E1 0.03s |4.359E0| {2.955E-2} 5.454E-1
Iter 11 evals 15 <D> [M 1.000E0] 1.948E1 0.03s |2.471E0| {1.675E-2} 4.775E-1
Iter 12 evals 16 <D> [M 1.000E0] 1.822E1 0.03s |2.027E0| {1.374E-2} 3.804E-1
Iter 13 evals 17 <D> [1M 2.803E-1] 1.765E1 0.03s |3.416E0| {2.316E-2} 2.517E-1
Iter 14 evals 19 <D> [1M 3.647E-1] 1.742E1 0.04s |5.151E0| {3.492E-2} 2.118E-1
Iter 15 evals 21 <D> [M 1.000E0] 1.733E1 0.04s |5.687E0| {3.856E-2} 1.800E-1
Iter 16 evals 22 <D> [M 1.000E0] 1.715E1 0.04s |4.654E0| {3.155E-2} 1.268E-1
Iter 17 evals 23 <D> [13M 2.793E-1] 1.654E1 0.04s |2.496E1| {1.692E-1} 8.040E-2
Iter 18 evals 26 <D> [M 1.000E0] 1.625E1 0.05s |1.109E1| {7.520E-2} 4.218E-2
Iter 19 evals 27 <D> [M 1.000E0] 1.616E1 0.05s |6.277E0| {4.256E-2} 2.615E-2
Iter 20 evals 28 <D> [M 1.000E0] 1.611E1 0.05s |1.533E0| {1.039E-2} 1.902E-2
Iter 21 evals 29 <D> [1M 3.770E-1] 1.610E1 0.05s |7.873E-1| {5.337E-3} 1.197E-2
Iter 22 evals 31 <D> [M 1.000E0] 1.610E1 0.05s |4.135E-1| {2.804E-3} 8.769E-3
Iter 23 evals 32 <D> [M 1.000E0] 1.609E1 0.06s |5.164E-1| {3.501E-3} 7.497E-3
Iter 24 evals 33 <D> [M 1.000E0] 1.608E1 0.06s |9.113E-1| {6.179E-3} 7.047E-3
Iter 25 evals 34 <D> [M 1.000E0] 1.606E1 0.06s |1.449E0| {9.821E-3} 6.179E-3
Iter 26 evals 35 <D> [M 1.000E0] 1.603E1 0.06s |1.457E0| {9.876E-3} 2.920E-3
Iter 27 evals 36 <D> [M 1.000E0] 1.601E1 0.06s |6.130E-1| {4.156E-3} 1.393E-3
Iter 28 evals 37 <D> [M 1.000E0] 1.600E1 0.06s |1.423E-1| {9.646E-4} 9.225E-4
Iter 29 evals 38 <D> [M 1.000E0] 1.600E1 0.06s |3.959E-1| {2.684E-3} 6.343E-4
Iter 30 evals 39 <D> [M 1.000E0] 1.599E1 0.07s |3.814E-1| {2.586E-3} 6.082E-4
Iter 31 evals 40 <D> [M 1.000E0] 1.599E1 0.07s |1.383E-1| {9.376E-4} 6.128E-4
Iter 32 evals 41 <D> [1M 2.468E-1] 1.599E1 0.07s |6.242E-1| {4.232E-3} 5.882E-4
Iter 33 evals 43 <D> [M 1.000E0] 1.599E1 0.07s |5.413E-2| {3.670E-4} 5.399E-4
Iter 34 evals 44 <D> [M 1.000E0] 1.599E1 0.07s |9.543E-2| {6.470E-4} 3.936E-4
Iter 35 evals 45 <D> [M 1.000E0] 1.599E1 0.07s |7.560E-2| {5.125E-4} 2.130E-4
Iter 36 evals 46 <D> [M 1.000E0] 1.599E1 0.07s |2.585E-1| {1.752E-3} 1.090E-4
Iter 37 evals 47 <D> [1M 2.684E-1] 1.599E1 0.07s
QNMinimizer terminated due to average improvement: | newest_val - previous_val | / |newestVal| < TOL
Total time spent in optimization: 0.07s
Built this classifier: LinearClassifier [printing top 200 features]
(3-Value,Iris-virginica) 3.9660
(4-Value,Iris-virginica) 3.9054
(2-Value,Iris-setosa) 2.7759
(CLASS,Iris-versicolor) 2.5068
(1-Value,Iris-setosa) 1.3780
(1-Value,Iris-versicolor) 0.6052
(CLASS,Iris-setosa) 0.5928
(2-Value,Iris-versicolor) 0.0735
(3-Value,Iris-versicolor) -0.0444
(1-Value,Iris-virginica) -1.8754
(4-Value,Iris-setosa) -1.9822
(4-Value,Iris-versicolor) -1.9982
(2-Value,Iris-virginica) -2.8192
(CLASS,Iris-virginica) -3.1561
(3-Value,Iris-setosa) -3.8551
Output format: dataColumn1 goldAnswer classifierAnswer P(classifierAnswer)
5 Iris-setosa Iris-setosa 0.995615365125735
4.6 Iris-setosa Iris-setosa 0.9994804135630505
5.1 Iris-setosa Iris-setosa 0.9937095680980086
4.9 Iris-setosa Iris-setosa 0.9905109629700247
5.4 Iris-setosa Iris-setosa 0.9982151488134486
4.4 Iris-setosa Iris-setosa 0.9944214428148407
5.3 Iris-setosa Iris-setosa 0.9984497925740373
6.1 Iris-versicolor Iris-versicolor 0.8873152482428373
6 Iris-versicolor Iris-versicolor 0.9424246013278404
5.5 Iris-versicolor Iris-versicolor 0.9030026595536319
6.5 Iris-versicolor Iris-versicolor 0.928816167001929
6.8 Iris-versicolor Iris-versicolor 0.9569376555329442
6.2 Iris-versicolor Iris-versicolor 0.9857141927233324
6.7 Iris-virginica Iris-virginica 0.9698639532763317
6.4 Iris-virginica Iris-virginica 0.8982390073296296
5.7 Iris-virginica Iris-virginica 0.9920401400173403
6.7 Iris-virginica Iris-virginica 0.968576539063806
6.8 Iris-virginica Iris-virginica 0.9957320369272686
7.7 Iris-virginica Iris-virginica 0.9900526044768513
7.3 Iris-virginica Iris-virginica 0.9766204287594443
20 examples in test set
Cls Iris-setosa: TP=7 FN=0 FP=0 TN=13; Acc 1.000 P 1.000 R 1.000 F1 1.000
Cls Iris-versicolor: TP=6 FN=0 FP=0 TN=14; Acc 1.000 P 1.000 R 1.000 F1 1.000
Cls Iris-virginica: TP=7 FN=0 FP=0 TN=13; Acc 1.000 P 1.000 R 1.000 F1 1.000
Micro-averaged accuracy/F1: 1.00000
Macro-averaged F1: 1.00000

View File

@ -0,0 +1,20 @@
5 Iris-setosa Iris-setosa 0.996 0.996
4.6 Iris-setosa Iris-setosa 0.999 0.999
5.1 Iris-setosa Iris-setosa 0.994 0.994
4.9 Iris-setosa Iris-setosa 0.991 0.991
5.4 Iris-setosa Iris-setosa 0.998 0.998
4.4 Iris-setosa Iris-setosa 0.994 0.994
5.3 Iris-setosa Iris-setosa 0.998 0.998
6.1 Iris-versicolor Iris-versicolor 0.887 0.887
6 Iris-versicolor Iris-versicolor 0.942 0.942
5.5 Iris-versicolor Iris-versicolor 0.903 0.903
6.5 Iris-versicolor Iris-versicolor 0.929 0.929
6.8 Iris-versicolor Iris-versicolor 0.957 0.957
6.2 Iris-versicolor Iris-versicolor 0.986 0.986
6.7 Iris-virginica Iris-virginica 0.970 0.970
6.4 Iris-virginica Iris-virginica 0.898 0.898
5.7 Iris-virginica Iris-virginica 0.992 0.992
6.7 Iris-virginica Iris-virginica 0.969 0.969
6.8 Iris-virginica Iris-virginica 0.996 0.996
7.7 Iris-virginica Iris-virginica 0.990 0.990
7.3 Iris-virginica Iris-virginica 0.977 0.977

View File

@ -0,0 +1,20 @@
5 Iris-setosa Iris-setosa 0.992 0.992
4.6 Iris-setosa Iris-setosa 0.999 0.999
5.1 Iris-setosa Iris-setosa 0.989 0.989
4.9 Iris-setosa Iris-setosa 0.984 0.984
5.4 Iris-setosa Iris-setosa 0.996 0.996
4.4 Iris-setosa Iris-setosa 0.991 0.991
5.3 Iris-setosa Iris-setosa 0.997 0.997
6.1 Iris-versicolor Iris-versicolor 0.847 0.847
6 Iris-versicolor Iris-versicolor 0.931 0.931
5.5 Iris-versicolor Iris-versicolor 0.798 0.798
6.5 Iris-versicolor Iris-versicolor 0.873 0.873
6.8 Iris-versicolor Iris-versicolor 0.914 0.914
6.2 Iris-versicolor Iris-versicolor 0.969 0.969
6.7 Iris-virginica Iris-virginica 0.951 0.951
6.4 Iris-virginica Iris-virginica 0.833 0.833
5.7 Iris-virginica Iris-virginica 0.986 0.986
6.7 Iris-virginica Iris-virginica 0.928 0.928
6.8 Iris-virginica Iris-virginica 0.987 0.987
7.7 Iris-virginica Iris-virginica 0.981 0.981
7.3 Iris-virginica Iris-virginica 0.956 0.956

View File

@ -0,0 +1,35 @@
#
# Print features
#
useClassFeature=true
1.realValued=true
2.realValued=true
3.realValued=true
4.realValued=true
printClassifier=HighWeight
printClassifierParam=200
# printTo=classifier.txt
#
# Mapping
#
goldAnswerColumn=0
displayedColumn=1
#
# Optimization
#
intern=true
sigma=3
useQN=true
QNsize=15
tolerance=1e-4
#
# Training input
#
#trainFile=./examples/iris.train
#testFile=./examples/iris.test
trainFile=data/edu/stanford/nlp/classify/iris.train
testFile=data/edu/stanford/nlp/classify/iris.test

View File

@ -0,0 +1,20 @@
Iris-setosa 5 3.4 1.5 0.2
Iris-setosa 4.6 3.6 1 0.2
Iris-setosa 5.1 3.8 1.9 0.4
Iris-setosa 4.9 3 1.4 0.2
Iris-setosa 5.4 3.9 1.7 0.4
Iris-setosa 4.4 3.2 1.3 0.2
Iris-setosa 5.3 3.7 1.5 0.2
Iris-versicolor 6.1 2.9 4.7 1.4
Iris-versicolor 6 3.4 4.5 1.6
Iris-versicolor 5.5 2.3 4 1.3
Iris-versicolor 6.5 2.8 4.6 1.5
Iris-versicolor 6.8 2.8 4.8 1.4
Iris-versicolor 6.2 2.9 4.3 1.3
Iris-virginica 6.7 3 5.2 2.3
Iris-virginica 6.4 3.1 5.5 1.8
Iris-virginica 5.7 2.5 5 2
Iris-virginica 6.7 3.3 5.7 2.1
Iris-virginica 6.8 3.2 5.9 2.3
Iris-virginica 7.7 3 6.1 2.3
Iris-virginica 7.3 2.9 6.3 1.8

View File

@ -0,0 +1,130 @@
Iris-setosa 5.1 3.5 1.4 0.2
Iris-setosa 4.7 3.2 1.3 0.2
Iris-setosa 4.6 3.1 1.5 0.2
Iris-setosa 5 3.6 1.4 0.2
Iris-setosa 4.6 3.4 1.4 0.3
Iris-setosa 4.4 2.9 1.4 0.2
Iris-setosa 4.9 3.1 1.5 0.1
Iris-setosa 5.4 3.7 1.5 0.2
Iris-setosa 4.8 3.4 1.6 0.2
Iris-setosa 4.8 3 1.4 0.1
Iris-setosa 4.3 3 1.1 0.1
Iris-setosa 5.8 4 1.2 0.2
Iris-setosa 5.7 4.4 1.5 0.4
Iris-setosa 5.4 3.9 1.3 0.4
Iris-setosa 5.1 3.5 1.4 0.3
Iris-setosa 5.7 3.8 1.7 0.3
Iris-setosa 5.1 3.8 1.5 0.3
Iris-setosa 5.4 3.4 1.7 0.2
Iris-setosa 5.1 3.7 1.5 0.4
Iris-setosa 5.1 3.3 1.7 0.5
Iris-setosa 4.8 3.4 1.9 0.2
Iris-setosa 5 3 1.6 0.2
Iris-setosa 5 3.4 1.6 0.4
Iris-setosa 5.2 3.5 1.5 0.2
Iris-setosa 5.2 3.4 1.4 0.2
Iris-setosa 4.7 3.2 1.6 0.2
Iris-setosa 4.8 3.1 1.6 0.2
Iris-setosa 5.4 3.4 1.5 0.4
Iris-setosa 5.2 4.1 1.5 0.1
Iris-setosa 5.5 4.2 1.4 0.2
Iris-setosa 4.9 3.1 1.5 0.1
Iris-setosa 5 3.2 1.2 0.2
Iris-setosa 5.5 3.5 1.3 0.2
Iris-setosa 4.9 3.1 1.5 0.1
Iris-setosa 4.4 3 1.3 0.2
Iris-setosa 5.1 3.4 1.5 0.2
Iris-setosa 5 3.5 1.3 0.3
Iris-setosa 4.5 2.3 1.3 0.3
Iris-setosa 5 3.5 1.6 0.6
Iris-setosa 4.8 3 1.4 0.3
Iris-setosa 5.1 3.8 1.6 0.2
Iris-setosa 4.6 3.2 1.4 0.2
Iris-setosa 5 3.3 1.4 0.2
Iris-versicolor 7 3.2 4.7 1.4
Iris-versicolor 6.4 3.2 4.5 1.5
Iris-versicolor 6.9 3.1 4.9 1.5
Iris-versicolor 5.7 2.8 4.5 1.3
Iris-versicolor 6.3 3.3 4.7 1.6
Iris-versicolor 4.9 2.4 3.3 1
Iris-versicolor 6.6 2.9 4.6 1.3
Iris-versicolor 5.2 2.7 3.9 1.4
Iris-versicolor 5 2 3.5 1
Iris-versicolor 5.9 3 4.2 1.5
Iris-versicolor 6 2.2 4 1
Iris-versicolor 5.6 2.9 3.6 1.3
Iris-versicolor 6.7 3.1 4.4 1.4
Iris-versicolor 5.6 3 4.5 1.5
Iris-versicolor 5.8 2.7 4.1 1
Iris-versicolor 6.2 2.2 4.5 1.5
Iris-versicolor 5.6 2.5 3.9 1.1
Iris-versicolor 5.9 3.2 4.8 1.8
Iris-versicolor 6.1 2.8 4 1.3
Iris-versicolor 6.3 2.5 4.9 1.5
Iris-versicolor 6.1 2.8 4.7 1.2
Iris-versicolor 6.4 2.9 4.3 1.3
Iris-versicolor 6.6 3 4.4 1.4
Iris-versicolor 6.7 3 5 1.7
Iris-versicolor 6 2.9 4.5 1.5
Iris-versicolor 5.7 2.6 3.5 1
Iris-versicolor 5.5 2.4 3.8 1.1
Iris-versicolor 5.5 2.4 3.7 1
Iris-versicolor 5.8 2.7 3.9 1.2
Iris-versicolor 6 2.7 5.1 1.6
Iris-versicolor 5.4 3 4.5 1.5
Iris-versicolor 6.7 3.1 4.7 1.5
Iris-versicolor 6.3 2.3 4.4 1.3
Iris-versicolor 5.6 3 4.1 1.3
Iris-versicolor 5.5 2.5 4 1.3
Iris-versicolor 5.5 2.6 4.4 1.2
Iris-versicolor 6.1 3 4.6 1.4
Iris-versicolor 5.8 2.6 4 1.2
Iris-versicolor 5 2.3 3.3 1
Iris-versicolor 5.6 2.7 4.2 1.3
Iris-versicolor 5.7 3 4.2 1.2
Iris-versicolor 5.7 2.9 4.2 1.3
Iris-versicolor 5.1 2.5 3 1.1
Iris-versicolor 5.7 2.8 4.1 1.3
Iris-virginica 6.3 3.3 6 2.5
Iris-virginica 5.8 2.7 5.1 1.9
Iris-virginica 7.1 3 5.9 2.1
Iris-virginica 6.3 2.9 5.6 1.8
Iris-virginica 6.5 3 5.8 2.2
Iris-virginica 7.6 3 6.6 2.1
Iris-virginica 4.9 2.5 4.5 1.7
Iris-virginica 6.7 2.5 5.8 1.8
Iris-virginica 7.2 3.6 6.1 2.5
Iris-virginica 6.5 3.2 5.1 2
Iris-virginica 6.4 2.7 5.3 1.9
Iris-virginica 6.8 3 5.5 2.1
Iris-virginica 5.8 2.8 5.1 2.4
Iris-virginica 6.4 3.2 5.3 2.3
Iris-virginica 6.5 3 5.5 1.8
Iris-virginica 7.7 3.8 6.7 2.2
Iris-virginica 7.7 2.6 6.9 2.3
Iris-virginica 6 2.2 5 1.5
Iris-virginica 6.9 3.2 5.7 2.3
Iris-virginica 5.6 2.8 4.9 2
Iris-virginica 7.7 2.8 6.7 2
Iris-virginica 6.3 2.7 4.9 1.8
Iris-virginica 7.2 3.2 6 1.8
Iris-virginica 6.2 2.8 4.8 1.8
Iris-virginica 6.1 3 4.9 1.8
Iris-virginica 6.4 2.8 5.6 2.1
Iris-virginica 7.2 3 5.8 1.6
Iris-virginica 7.4 2.8 6.1 1.9
Iris-virginica 7.9 3.8 6.4 2
Iris-virginica 6.4 2.8 5.6 2.2
Iris-virginica 6.3 2.8 5.1 1.5
Iris-virginica 6.1 2.6 5.6 1.4
Iris-virginica 6.3 3.4 5.6 2.4
Iris-virginica 6 3 4.8 1.8
Iris-virginica 6.9 3.1 5.4 2.1
Iris-virginica 6.7 3.1 5.6 2.4
Iris-virginica 6.9 3.1 5.1 2.3
Iris-virginica 5.8 2.7 5.1 1.9
Iris-virginica 6.7 3.3 5.7 2.5
Iris-virginica 6.3 2.5 5 1.9
Iris-virginica 6.5 3 5.2 2
Iris-virginica 6.2 3.4 5.4 2.3
Iris-virginica 5.9 3 5.1 1.8

View File

@ -0,0 +1,359 @@
1
1 STILLALONEWOLF_20050102.1100 .
2
2 Munir
3 Lone Wolf Mohammed Munir
3 a strange person
3 he
4 he
4 him
6 he
8 Munir
3
2 eng.LDC2005E83 WEB TEXT
4
2 2005-01-02 Munir , Al - Warsha and Extraction
5
2 The Clutches Of Winter Depression
17 the clutches of winter depression
6
2 Winter Depression
17 winter depression
9
3 you
11
3 a concert
8 the concert
8 the concert
8 the concert
9 the concert
9 the concert
12
4 I
4 me
5 I
5 I
5 I
6 me
8 I
10 I
10 me
11 I
15 me
17 I
17 I
17 myself
15
4 the worst possible performance from him as well as the rest of the band
17
4 the rest of the band
18
4 the band
19
4 a bad choice of songs and even disrespect to the audience
20
4 a bad choice of songs
21
4 songs
22
4 even disrespect to the audience
23
4 the audience
25
5 friends
28
5 concerts
31
6 the greatest performance and well - chosen songs
32
6 the greatest performance
33
6 well - chosen songs
34
7 Abdullah Hilmi
35
7 the end
36
7 we
10 we
37
7 the ney and dokah flutist Abdullah Hilmi
38
7 the ney
39
7 dokah flutist Abdullah Hilmi
40
7 half the group
41
8 One
43
8 The problem
9 it
9 " banjo "
9 it
9 " hashish "
46
8 prediction and personal intuition
47
8 the best solution
48
8 the location of the concert
50
8 The Opera concerts
51
8 normally good standards
52
8 One witty person
8 he
10 him
10 him
16 the person
55
8 the type of " smoke " or the type of narcotic
56
8 the type of " smoke "
57
8 the type of narcotic
64
10 Alexandria
65
10 this
10 the decisive factor
11 this
69
10 residence in Alexandria , where we can help him to present the most beautiful concerts :) , In general , thank you Mohammed Munir for giving me unexpected pleasure on New Year 's Eve
70
10 residence in Alexandria
73
10 the most beautiful concerts
74
10 you
75
10 Mohammed Munir for giving me unexpected pleasure on New Year 's Eve
77
10 New Year 's Eve
78
10 New Year 's
79
11 Al - Warsha Theatre Company
80
11 the way
81
11 one day
84
11 two hours
85
12 Al - Zamalek
86
12 They
15 They
15 their
87
12 the show " Halawat Al - Dunia " ( The beauty of life ) or Cairo Calling at Saqiat Al - Sawi in Al - Zamalek until the end of the week
88
12 the show " Halawat Al - Dunia " ( The beauty of life )
13 It
89
12 " Halawat Al - Dunia " ( The beauty of life )
90
12 The beauty of life
91
12 Cairo Calling at Saqiat Al - Sawi in Al - Zamalek until the end of the week
92
12 Saqiat Al - Sawi in Al - Zamalek
93
12 the end of the week
94
12 the week
96
13 parts of old shows
97
13 old shows
98
13 the addition of some new parts
99
13 some new parts
100
14 The show
14 a real theatrical showcase
101
14 a real theatrical showcase and a huge acting effort
103
14 a huge acting effort
104
14 Al - Warsha
14 its
105
14 its attention to detail , such as the scene at the church
107
14 the scene at the church
108
14 the church
14 the church
109
14 the choir 's voices
14 the voices
110
14 the choir 's
111
14 hymns
112
14 the narrator
14 he
113
14 the hero of the story
14 the hero
114
14 the story
121
15 their acting abilities as individuals
123
15 individuals
124
15 the play in general
125
15 general
126
16 4
127
16 example in the scene with the person
128
16 the scene with the person
130
16 a monologue of 4 characters : the priest , the church guard , the citizen and the viewer
131
16 4 characters : the priest , the church guard , the citizen and the viewer
132
16 the priest
133
16 the church guard
134
16 the citizen
135
16 the viewer
136
16 The actor used simple makeup : a single prop which turns from a priest 's beard , into a policeman 's hat , a viewer 's hat and a citizen 's hat
137
16 a single prop which turns from a priest 's beard , into a policeman 's hat , a viewer 's hat and a citizen 's hat
138
16 a priest 's beard
139
16 a priest 's
140
16 a policeman 's hat , a viewer 's hat and a citizen 's hat
141
16 a policeman 's hat
142
16 a policeman 's
143
16 a viewer 's hat
144
16 a viewer 's
145
16 a citizen 's hat
146
16 a citizen 's

View File

@ -0,0 +1,61 @@
<DOC>
<DOCID> STILLALONEWOLF_20050102.1100.eng.LDC2005E83 </DOCID>
<DOCTYPE SOURCE="weblog"> WEB TEXT </DOCTYPE>
<DATETIME> 2005-01-02 </DATETIME>
<BODY>
<HEADLINE>
Munir, Al-Warsha and Extraction From The Clutches Of Winter Depression.
</HEADLINE>
<TEXT>
<POST>
<POSTER> Lone Wolf </POSTER>
Mohammed Munir is a strange person, and you can never expect what he
is going to do in a concert.
Sometimes I go expecting to see boundless creativity, then he
surprises me with the worst possible performance from him as well as
the rest of the band, in addition to a bad choice of songs and even
disrespect to the audience.
Sometimes I just go to see friends that I have missed and whom I only
see at concerts.
But then he surprises me with the greatest performance and well-chosen
songs.
In the end we have to admit that the ney and dokah flutist Abdullah
Hilmi is half the group.
The problem always remains: how will I know whether the concert will
be good or bad,
prediction and personal intuition is the best solution, and also the
location of the concert,
The Opera concerts have normally good standards,
One witty person said, and he appears to be correct, that the concert
depends on the type of "smoke" or the type of narcotic which Munir
handles before singing.
So if it was "banjo" the concert will be bad, and if it was "hashish"
then the concert will be good.
And if this is the decisive factor, then I invite him to take up
residence in Alexandria, where we can help him to present the most
beautiful concerts :),
In general, thank you Mohammed Munir for giving me unexpected pleasure
on New Year's Eve.
By the way, one day after this, I was fortunate enough to seize two
hours with Al-Warsha Theatre Company. They are presenting the show
"Halawat Al-Dunia" (The beauty of life) or Cairo Calling at Saqiat Al-
Sawi in Al-Zamalek until the end of the week.
It is compiled from parts of old shows, with the addition of some new
parts.
The show is a real theatrical showcase and a huge acting effort,
Al-Warsha is notable for its attention to detail, such as the scene at
the church where the choir's voices rise with hymns as the narrator
says that the hero of the story is entering the church, then the
voices drop when he says that the hero went out.
They always dazzle me with their acting abilities as individuals, and
in the play in general. For example in the scene with the person
performing a monologue of 4 characters: the priest, the church guard,
the citizen and the viewer; The actor used simple makeup: a single
prop which turns from a priest's beard, into a policeman's hat, a
viewer's hat and a citizen's hat.
I think that I was able to extract myself temporarily from the
clutches of winter depression.
</POST>
</TEXT>
</BODY>
</DOC>

View File

@ -0,0 +1,17 @@
# Pipeline options
annotators = pos, lemma, ner, parse
#dcoref.sievePasses = MarkRole, DiscourseMatch, ExactStringMatch, RelaxedExactStringMatch, PreciseConstructs, StrictHeadMatch1, StrictHeadMatch2, StrictHeadMatch3, StrictHeadMatch4, AliasMatch, RelaxedHeadMatch, LexicalChainMatch, CorefDictionaryMatch, PronounMatch
dcoref.sievePasses = MarkRole, DiscourseMatch, ExactStringMatch, RelaxedExactStringMatch, PreciseConstructs, StrictHeadMatch1, StrictHeadMatch2, StrictHeadMatch3, StrictHeadMatch4, RelaxedHeadMatch, PronounMatch
dcoref.score = true
dcoref.postprocessing = true
dcoref.maxdist = -1
dcoref.use.big.gender.number = false
dcoref.replicate.conll = true
dcoref.conll.scorer = /u/scr/nlp/data/conll-2011/scorer/v4/scorer.pl
dcoref.logFile = /u/scr/nlp/coref/error_log/temp/result_conlldev.txt
dcoref.conll2011 = /u/scr/nlp/data/conll-2011/v2/data/dev/data/english/annotations

View File

@ -0,0 +1,15 @@
CONLL EVAL SUMMARY (Before COREF)
Identification of Mentions: Recall: (12405 / 14291) 86.8% Precision: (12405 / 34910) 35.53% F1: 50.42%
CONLL EVAL SUMMARY (After COREF)
METRIC muc:Coreference: Recall: (6253 / 10539) 59.33% Precision: (6253 / 10073) 62.07% F1: 60.67%
METRIC bcub:Coreference: Recall: (12457.63 / 18383) 67.76% Precision: (13632.3 / 18383) 74.15% F1: 70.81%
METRIC ceafm:Coreference: Recall: (10927 / 18383) 59.44% Precision: (10927 / 18383) 59.44% F1: 59.44%
METRIC ceafe:Coreference: Recall: (3833.81 / 7844) 48.87% Precision: (3833.81 / 8310) 46.13% F1: 47.46%
METRIC blanc:Coreference links: Recall: (25241 / 54427) 46.37% Precision: (25241 / 40586) 62.19% F1: 53.13%
Non-coreference links: Recall: (931826 / 947171) 98.37% Precision: (931826 / 961012) 96.96% F1: 97.66%
BLANC: Recall: (0.72 / 1) 72.37% Precision: (0.8 / 1) 79.57% F1: 75.39%
Final conll score ((muc+bcub+ceafe)/3) = 59.65
Final score (pairwise) Precision = 0.57
done

View File

@ -0,0 +1,4 @@
FOO John
BAR John Bauer
BAZ Bauer John

View File

@ -0,0 +1 @@
عندما كنت اطالع المواضيع الخاصة ب# جماعة الاخوان المسلمين . . وجدت شئ غريب . . وجدت المدافعين عن الجماعة . . و# ل# الاسف . . يتوجهون الى الهجوم عندما لا يستطيعون الدفاع . .

View File

@ -0,0 +1 @@
عندما كنت اطالع المواضيع الخاصه بجماعة الاخوان المسلمين . . وجدت شئ غريب . . وجدت المدافعين عن الجماعه . . وللاسف . . يتوجهون الى الهجوم عندما لا يستطيعون الدفاع . .

View File

@ -0,0 +1 @@
و# اعلن مدير شرطة عمان ان #ه اثناء اقامة الحفل و# بناء على خلفية ثارية , اقدم خالد الكحلوت ( 22 عاما ) ب# اطلاق النار على سعيد الحرازين ( 30 عاما ) فاردا #ه . و# صادف اثناء اطلاق النار وجود عادل الحرازين شقيق القتيل الذي قام ب# اطلاق النار على الجاني ف# قتل #ه .

View File

@ -0,0 +1 @@
واعلن مدير شرطة عمان انه اثناء اقامة الحفل وبناء على خلفية ثأرية ، اقدم خالد الكحلوت ( 22 عاما ) باطلاق النار على سعيد الحرازين ( 30 عاما ) فارداه . وصادف اثناء اطلاق النار وجود عادل الحرازين شقيق القتيل الذي قام باطلاق النار على الجاني فقتله .

View File

@ -0,0 +1 @@
This is a test sentence.

View File

@ -0,0 +1,4 @@
0.1 0.2 0.3 0.4 0.5
0.6 0.7 0.8 0.9 1.0
1 2 3 4 5
6 7 8 9 10

View File

@ -0,0 +1,4 @@
*UNK*
*START*
*END*
the

View File

@ -0,0 +1,4 @@
*UNK* 0.1 0.2 0.3 0.4 0.5
*START* 0.6 0.7 0.8 0.9 1.0
*END* 1 2 3 4 5
the 6 7 8 9 10

View File

@ -0,0 +1 @@
و نشر العدل من خلال قضاء مستقل .

View File

@ -0,0 +1 @@
俄国 希望 伊朗 没有 制造 核武器 计划 。

View File

@ -0,0 +1 @@
国务院日前发出紧急通知,要求各地切实落实保证市场供应的各项政策,维护副食品价格稳定。

View File

@ -0,0 +1 @@
国务院日前发出紧急通知,要求各地切实落实保证市场供应的各项政策,维护副食品价格稳定。

View File

@ -0,0 +1 @@
俄国 希望 伊朗 没有 制造 核武器 计划 。

View File

@ -0,0 +1 @@
The quick brown fox jumped over the lazy dog.

View File

@ -0,0 +1 @@
Le gouvernement se résout donc à renvoyer la balle dans le camp de partenaires qui ont amplement fait la preuve de leur incapacité à gérer le système de santé .

View File

@ -0,0 +1 @@
Wir haben nichts zu tun .

View File

@ -0,0 +1 @@
(S (CC و) (VP (VBD صادف) (NP-TMP (NN اثناء) (NP (NN اطلاق) (NP (DTNN النار)))) (NP-SBJ (NN وجود) (NP (NP (NP (NNP عادل) (DTNNP الحرازين)) (NP (NN شقيق) (NP (DTNN القتيل)))) (SBAR (SBAR (WHNP-2 (WP الذي)) (S (VP (VBD قام) (PP-CLR (IN ب) (NP (NP (NN اطلاق) (NP (DTNN النار))) (PP (IN على) (NP (DTNN الجاني)))))))) (CC ف) (SBAR (S (VP (VBD قتل) (NP-OBJ (PRP ه))))))))) (PUNC .))

View File

@ -0,0 +1,3 @@
(S (VP (VBD افادت) (NP-SBJ (NN حصيلة) (JJ نهائية) (JJ رسمية)) (NP-TMP (DTNN اليوم)) (SBAR (IN ان) (S (NP-TPC-3 (NP (NN حادث) (NP (NN اطلاق) (NP (DTNN النار)))) (SBAR (WHNP-1 (WP الذي)) (S (VP (VBD وقع) (NP-TMP (NN مساء) (NP (DTNN الجمعة))) (NP-TMP (NN خلال) (NP (NP (NP (NN حفل) (NP (DTNN الزفاف))) (ADJP (DTJJ الجماعي))) (SBAR (WHNP-2 (WP الذي)) (S (VP (VBN نظم) (PP-LOC (IN في) (NP (NN احدى) (NP (NN مدارس) (NP (NNP عمان)))))))))))))) (VP (PRT (RP قد)) (VBD اسفر) (PP-CLR (IN عن) (NP (NP (NN مقتل) (NP (NNS شخصين))) (CC و) (NP (NN اصابة) (NP (NP (CD ثلاثة)) (PP (IN من) (NP (NN رجال) (NP (DTNN الشرطة)))))))))))) (PUNC .))
(S (CC و) (VP (VBD اعلن) (NP-SBJ (NN مدير) (NP (NN شرطة) (NP (NNP عمان)))) (SBAR (IN ان) (S (S (NP-TPC (PRP ه)) (NP-ADV (NP-TMP (NN اثناء) (NP (NN اقامة) (NP (DTNN الحفل)))) (CC و) (NP (NP (NN بناء)) (PP (IN على) (NP (NN خلفية) (JJ ثارية))))) (PUNC ,) (VP (VBD اقدم) (NP-SBJ (NP (NNP خالد) (DTNNP الكحلوت)) (NP (PUNC -LRB-) (CD 22) (NP (NN عاما)) (PUNC -RRB-))) (PP-CLR (IN ب) (NP (NP (NN اطلاق) (NP (DTNN النار))) (PP (IN على) (NP (NP (NNP سعيد) (DTNNP الحرازين)) (NP (PUNC -LRB-) (CD 30) (NP (NN عاما)) (PUNC -RRB-)))))))) (CC ف) (S (VP (VBD اردا) (NP-OBJ (PRP ه))))))) (PUNC .))
(S (CC و) (VP (VBD صادف) (NP-TMP (NN اثناء) (NP (NN اطلاق) (NP (DTNN النار)))) (NP-SBJ (NN وجود) (NP (NP (NP (NNP عادل) (DTNNP الحرازين)) (NP (NN شقيق) (NP (DTNN القتيل)))) (SBAR (SBAR (WHNP-2 (WP الذي)) (S (VP (VBD قام) (PP-CLR (IN ب) (NP (NP (NN اطلاق) (NP (DTNN النار))) (PP (IN على) (NP (DTNN الجاني)))))))) (CC ف) (SBAR (S (VP (VBD قتل) (NP-OBJ (PRP ه))))))))) (PUNC .))

View File

@ -0,0 +1 @@
(ROOT (S (ADJD-MO Genausowenig) (ADV-MO aber) (VMFIN-HD kann) (NE-SB Baumgartl) (VP-OC (VVINF-HD sagen) ($, ,) (S-OC (PWS-SB was) (VP-OC (PP-MO (APPR-AC in) (NN-NK Zukunft)) (ADV-MO nun) (ADJD-MO genau) (PP-MO (APPR-AC mit) (ART-NK der) (NE-NK Volksfürsorge)) (VVINF-HD geschehen)) (VMFIN-HD soll))) ($. .)))

View File

@ -0,0 +1,6 @@
(ROOT (S (ADJD-MO Genausowenig) (ADV-MO aber) (VMFIN-HD kann) (NE-SB Baumgartl) (VP-OC (VVINF-HD sagen) ($, ,) (S-OC (PWS-SB was) (VP-OC (PP-MO (APPR-AC in) (NN-NK Zukunft)) (ADV-MO nun) (ADJD-MO genau) (PP-MO (APPR-AC mit) (ART-NK der) (NE-NK Volksfürsorge)) (VVINF-HD geschehen)) (VMFIN-HD soll))) ($. .)))
(ROOT ( (S (PDS-SB Das) (VMFIN-HD müsse) ($*LRB* ") (VP-OC (VP-OC (ADV-MO erst) (PP-MO (APPR-AC in) (NN-NK Ruhe) ) ($*LRB* ") (VVPP-HD überlegt) ) (VAINF-HD werden) ) ) ($. .) ) )
(ROOT ( (S (CS-SB (S-CJ (PWS-SB Wer) (PP-MO (APPR-AC an) (ART-NK der) (NE-NK Seine) ) (NN-OA Wasser) (VVFIN-HD predigt) ) ($, ,) (S-CJ (PP-MO (APPR-AC an) (ART-NK der) (NE-NK Alster) ) (ADV-MO aber) (NN-OA Wein) (VVFIN-HD trinkt) ) ) ($, ,) (VVFIN-HD macht) (PRF-OA sich) (PTKNEG-NG nicht) (AP-PD (ADJD-MO sonderlich) (ADJD-HD glaubwürdig) ) ) ($. .)))

View File

@ -0,0 +1 @@
(ROOT (S (NP (NP (JJ Influential) (NNS members)) (PP (IN of) (NP (DT the) (NNP House) (NNP Ways) (CC and) (NNP Means) (NNP Committee)))) (VP (VBD introduced) (NP (NP (NN legislation)) (SBAR (WHNP (WDT that)) (S (VP (MD would) (VP (VB restrict) (SBAR (WHADVP (WRB how)) (S (NP (DT the) (JJ new) (NN savings-and-loan) (NN bailout) (NN agency)) (VP (MD can) (VP (VB raise) (NP (NN capital)))))) (, ,) (S (VP (VBG creating) (NP (NP (DT another) (JJ potential) (NN obstacle)) (PP (TO to) (NP (NP (NP (DT the) (NN government) (POS 's)) (NN sale)) (PP (IN of) (NP (JJ sick) (NNS thrifts)))))))))))))) (. .)))

View File

@ -0,0 +1,35 @@
( (S
(NP-SBJ (NNP Bio-Technology) (NNP General) (NNP Corp.) )
(VP (VBD received)
(NP
(NP (NNS tenders) )
(PP (IN for)
(NP
(NP
(NP (CD 97.9) (NN %) )
(PP (IN of)
(NP
(NP (PRP$ its)
(ADJP (CD 7.5) (NN %) )
(JJ convertible) (JJ senior) (VBN subordinated) (NNS notes) )
(ADJP (JJ due)
(NP-TMP
(NP (NNP April) (CD 15) )
(, ,)
(NP (CD 1997) ))))))
(, ,)
(CC and)
(NP
(NP (CD 96) (NN %) )
(PP (IN of)
(NP
(NP (PRP$ its)
(ADJP (CD 11) (NN %) )
(JJ convertible) (JJ senior) (VBN subordinated) (NNS debentures) )
(ADJP (JJ due)
(NP-TMP
(NP (NNP March) (CD 1) )
(, ,)
(NP (CD 2006) ))))))))))
(. .) ))

View File

@ -0,0 +1,74 @@
(ROOT (S (NP (NP (JJ Influential) (NNS members)) (PP (IN of) (NP (DT the) (NNP House) (NNP Ways) (CC and) (NNP Means) (NNP Committee)))) (VP (VBD introduced) (NP (NP (NN legislation)) (SBAR (WHNP (WDT that)) (S (VP (MD would) (VP (VB restrict) (SBAR (WHADVP (WRB how)) (S (NP (DT the) (JJ new) (NN savings-and-loan) (NN bailout) (NN agency)) (VP (MD can) (VP (VB raise) (NP (NN capital)))))) (, ,) (S (VP (VBG creating) (NP (NP (DT another) (JJ potential) (NN obstacle)) (PP (TO to) (NP (NP (NP (DT the) (NN government) (POS 's)) (NN sale)) (PP (IN of) (NP (JJ sick) (NNS thrifts)))))))))))))) (. .)))
( (S
(NP-SBJ (NNP Bio-Technology) (NNP General) (NNP Corp.) )
(VP (VBD received)
(NP
(NP (NNS tenders) )
(PP (IN for)
(NP
(NP
(NP (CD 97.9) (NN %) )
(PP (IN of)
(NP
(NP (PRP$ its)
(ADJP (CD 7.5) (NN %) )
(JJ convertible) (JJ senior) (VBN subordinated) (NNS notes) )
(ADJP (JJ due)
(NP-TMP
(NP (NNP April) (CD 15) )
(, ,)
(NP (CD 1997) ))))))
(, ,)
(CC and)
(NP
(NP (CD 96) (NN %) )
(PP (IN of)
(NP
(NP (PRP$ its)
(ADJP (CD 11) (NN %) )
(JJ convertible) (JJ senior) (VBN subordinated) (NNS debentures) )
(ADJP (JJ due)
(NP-TMP
(NP (NNP March) (CD 1) )
(, ,)
(NP (CD 2006) ))))))))))
(. .) ))
( (S
(PP-LOC (IN In)
(NP
(NP (NN exchange) (NNS offers) )
(SBAR
(WHNP-1 (WDT that) )
(S
(NP-SBJ (-NONE- *T*-1) )
(VP (VBD expired)
(NP-TMP (NNP Friday) ))))))
(, ,)
(NP-SBJ
(NP (NNS holders) )
(PP (IN of)
(NP
(NP (DT each) ($ $) (CD 1,000) (-NONE- *U*) )
(PP (IN of)
(NP (NNS notes) )))))
(VP (MD will)
(VP (VB receive)
(NP
(NP
(NP
(ADJP ($ $) (CD 250) (-NONE- *U*) )
(NN face) (NN amount) )
(PP (IN of)
(NP
(NP (NNP Series) (NNP A)
(ADJP (CD 7.5) (NN %) )
(JJ senior) (VBN secured) (JJ convertible) (NNS notes) )
(ADJP (JJ due)
(NP-TMP
(NP (NNP Jan.) (CD 15) )
(, ,)
(NP (CD 1995) ))))))
(, ,)
(CC and)
(NP (CD 200) (JJ common) (NNS shares) ))))
(. .) ))

View File

@ -0,0 +1 @@
(ROOT (SENT (VN (CL Nous) (V prions)) (NP (D les) (N cinéastes) (COORD (C et) (NP (A tous) (D nos) (N lecteurs)))) (VPinf (P de) (ADV bien) (VN (V vouloir)) (VPinf (VN (CL nous) (CL en) (V excuser)))) (PUNC .)))

View File

@ -0,0 +1,4 @@
(ROOT (SENT (VN (CL Nous) (V prions)) (NP (D les) (N cinéastes) (COORD (C et) (NP (A tous) (D nos) (N lecteurs)))) (VPinf (P de) (ADV bien) (VN (V vouloir)) (VPinf (VN (CL nous) (CL en) (V excuser)))) (PUNC .)))
(ROOT (SENT (NP (D La) (N diffusion) (PP (P des) (NP (MWN (N prévisions) (A météorologiques))))) (VN (V était) (ADV fortement) (V perturbée)) (PUNC ,) (NP (N mardi) (A 7) (N janvier)) (PUNC ,) (PP (P par) (NP (D le) (N mouvement) (PP (P de) (NP (N grève) (AP (A nationale)) (PP (P de) (NP (MWD (D trente) (PUNC -) (D six)) (N heures))))) (VPpart (V déclenché) (AdP (MWADV (D la) (N veille)) (MWADV (P au) (N soir))) (PUNC ,) (PP (MWP (P à) (D l') (N appel) (P des)) (NP (N délégués) (NP (N CGT) (COORD (PUNC ,) (NP (N CFDT))) (COORD (C et) (NP (N FO))))))) (PP (P du) (NP (MWN (N personnel) (A technique)) (PP (P de) (NP (MWN (N Météo) (PUNC -) (N France)))))))) (PUNC .)))
(ROOT (SENT (PP (P Par) (NP (D cette) (N action))) (PUNC ,) (NP (D ces) (A derniers)) (VN (V veulent)) (VPinf (VN (V dénoncer)) (NP (D la) (N baisse) (PP (P des) (NP (N effectifs))) (Srel (NP (PRO qui)) (VN (CL se) (V traduit)) (Sint (PUNC ,) (VN (V affirment) (CL -ils)) (PUNC ,)) (PP (P par) (PUNC ") (NP (D l') (N abandon) (PP (P de) (NP (D certaines) (N tâches))) (COORD (C et) (NP (D le) (N recours) (PP (P à) (NP (D la) (MWN (ADV sous) (PUNC -) (N traitance))))))))))) (PUNC .)))

View File

@ -0,0 +1 @@
(ROOT (IP (NP (NP (NR 上海) (NR 浦东)) (NP (NN 开发) (CC 与) (NN 法制) (NN 建设))) (VP (VV 同步))))

View File

@ -0,0 +1,83 @@
(ROOT (IP (NP (NP (NR 上海) (NR 浦东)) (NP (NN 开发) (CC 与) (NN 法制) (NN 建设))) (VP (VV 同步))))
( (IP (NP-PN-SBJ (NR 上海)
(NR 浦东))
(VP (VP (LCP-TMP (NP (NT 近年))
(LC 来))
(VP (VCD (VV 颁布)
(VV 实行))
(AS 了)
(NP-OBJ (CP (WHNP-1 (-NONE- *OP*))
(CP (IP (NP-SBJ (-NONE- *T*-1))
(VP (VV 涉及)
(NP-OBJ (NP-APP (NN 经济)
(PU 、)
(NN 贸易)
(PU 、)
(NN 建设)
(PU 、)
(NN 规划)
(PU 、)
(NN 科技)
(PU 、)
(NN 文教)
(ETC 等))
(NP (NN 领域)))))
(DEC 的)))
(QP (CD 七十一)
(CLP (M 件)))
(NP (NN 法规性)
(NN 文件)))))
(PU )
(VP (VV 确保)
(AS 了)
(NP-OBJ (DNP (NP (NP-PN (NR 浦东))
(NP (NN 开发)))
(DEG 的))
(ADJP (JJ 有序))
(NP (NN 进行)))))
(PU 。)) )
( (IP (IP (NP-SBJ (NP-PN (NR 浦东))
(NP (NN 开发)
(NN 开放)))
(VP (VC 是)
(NP-PRD (QP (CD 一)
(CLP (M 项)))
(CP (WHNP-1 (-NONE- *OP*))
(CP (IP (NP-SBJ (-NONE- *T*-1))
(VP (VP (VV 振兴)
(NP-PN-OBJ (NR 上海)))
(PU )
(VP (VV 建设)
(NP-OBJ (NP (NN 现代化))
(NP (NN 经济)
(PU 、)
(NN 贸易)
(PU 、)
(NN 金融))
(NP (NN 中心))))))
(DEC 的)))
(ADJP (JJ 跨世纪))
(NP (NN 工程)))))
(PU )
(IP (ADVP (AD 因此))
(NP-SBJ (CP (WHNP-2 (-NONE- *OP*))
(CP (IP (NP-SBJ (-NONE- *T*-2))
(VP (ADVP (AD 大量))
(VP (VV 出现))))
(DEC 的))))
(VP (VC 是)
(NP-PRD (CP (WHNP-3 (-NONE- *OP*))
(CP (IP (NP-SBJ (-NONE- *pro*))
(VP (NP-TMP (NT 以前))
(ADVP (AD 不))
(ADVP (AD 曾))
(VP (VV 遇到)
(AS 过)
(NP-OBJ (-NONE- *T*-3)))))
(DEC 的)))
(NP (NP (ADJP (JJ 新))
(NP (NN 情况)))
(PU 、)
(NP (ADJP (JJ 新))
(NP (NN 问题)))))))
(PU 。)) )

View File

@ -0,0 +1,74 @@
cholera
vibrio cholerae
vibrio cholerae infection
malaria
plasmodium infection
polio
poliomyelitis
infantile paralysis
tuberculosis
phthisis
typhoid fever
typhoid
gastric fever
enteric fever
abdominal typhus
slow fever
nervous fever
pythogenic fever
yellow fever
yellow yack
yellow plague
dengue fever
dengue
breakbone fever
tetanus
lockjaw
lock jaw
measles
morbilli
rubeola
diphtheria
mumps
parotitis
whooping cough
pertussis
bordetella
rubella
german measles
three-day measles
three day measles
human papillomavirus infection
papillomavirus infection
hepatitis a
hepatitis type a
hep a
hepatitis e
hepatitis type e
hep e
japanese encephalitis
japanese b encephalitis
tick-borne encephalitis
central european encephalitis
russian spring-summer encephalitis
siberian tick-borne encephalitis
taiga encephalitis
western european tick-borne encephalitis
west-siberian encephalitis
meningococcal disease
meningococcal septicaemia
neisseria meningitidis infection
hepatitis b
serum hepatitis
hepatitis type b
hep b
rabies
lyssa
chickenpox
varicella
chicken pox
influenza
pneumococcal infection
streptococcus pneumoniae infection
rotaviral gastroenteritis
rotavirus enteritis

View File

@ -0,0 +1,107 @@
# The properties in this file will be applied as defaults.
# A custom properties file should only should override or add the properties it is interested in.
# -----------------------
# General and fixed flags
# -----------------------
patternType = SURFACE
# To ignore case differences within the text
lowercaseText = true
# This ignores spelling mistakes (common for social media),
# but is slow
#fuzzyMatch = true
minLen4FuzzyForPattern = 4
# Do not evaluate (broken for our purposes)
evaluate = false
evalPerEntity = false
# These two are assumed to have this value
useTargetParserParentRestriction = false
useTargetNERRestriction = true
# The application doesn't check that the patterns are relevant to the corpus.
# Don't store, always compute them
computeAllPatterns = true
# Do not use Lucene or a RDBMS for temporary storage
storePatsForEachToken = MEMORY
# = save model and results.
# Needed for TextProc
savePatternsWordsDir = true
# This property is needed for file saving to work
identifier = identifier
# Using regexner and gazetteers makes the entity extraction process
# 10 times slower and much more memory intensive
applyFineGrainedRegexner = false
# --------------------------------------------------------------------------------------------------------------------------
# Pattern flags
# (copied from https://github.com/stanfordnlp/CoreNLP/blob/master/data/edu/stanford/nlp/patterns/surface/example.properties)
# --------------------------------------------------------------------------------------------------------------------------
# ***use context on the left
usePreviousContext = true
# ***use context on the right
useNextContext = true
# ***the context should be at least this long
minWindow4Pattern = 2
# ***the context can be at most this long
maxWindow4Pattern = 4
# if the context consists of only stop words, add only if it's more than these many stop words
numMinStopWordsToAdd = 3
# ***use POS tag restriction for the target phrase
usePOS4Pattern = true
# Ignore words {a, an, the} while matching the patterns to text (advisable true)
useFillerWordsInPat = false
# If your code is running too slow, try to reduce this number. Samples % of sentences for learning patterns
sampleSentencesForSufficientStats = 1.0
# maximum number of allowed words in the target phrase
numWordsCompound = 3
# consider patterns without the POS restriction on the target phrase
addPatWithoutPOS = true
# Ignore common stop words occurring just before the target phrase
useStopWordsBeforeTerm = false
# Use lemma instead of words of the context tokens
useLemmaContextTokens = true
# make context matching lowercase (advisable)
matchLowerCaseContext = true
# use named entity tag restrictions for the context (neighboring) tokens
useContextNERRestriction = false
# do not extract phrase in which any word is labeled with another class
# (for example, you don't wanna extract 'HIV patients' as disease)
doNotExtractPhraseAnyWordLabeledOtherClass = true
# kinda ignore this flag and use it as true. for those who care this too much: for each token,
# we use the phrase that originally matched that token instead of the token's word
# (in case you are using fuzzy matching)
useMatchingPhrase = true
# Use only the tokens that get matched by a pattern (advisable as false)
restrictToMatched = false
# Label the learned words in the text (advisable as true)
usePatternResultAsLabel = true
# Words excluded from NER labeling
englishWordsFiles = data/edu/stanford/nlp/patterns/surface/stopwords.txt
# Words to be ignored when learning phrases.
# See the commonWordsPatternFiles field in the ConstantsAndVariables class
commonWordsPatternFiles = data/edu/stanford/nlp/patterns/surface/stopwords.txt
# remove common stop words from phrases to get clean phrases (for example, "disease" instead of "some disease")
removeStopWordsFromSelectedPhrases = true
# Do not learn phrases that have any stop word
removePhrasesWithStopWords = false

View File

@ -0,0 +1,297 @@
#### NOTE: for all flags and their description, see the javadoc. Important parameters (in our experience) that you should tune for your dataset are marked with ***. Pay special attention to flags like targetAllowedNERs. Use batchProcesssSents and Lucene based options if you want low memory (but slower) runs.
#***Which type of patterns. Options are SURFACE and DEP
patternType=SURFACE
#name for the saved files for the output of the system (useful for comparing results of different experiments with different variables etc.)
identifier=useNERRestriction
#Directory where data lives
DIR=projects/core/data/edu/stanford/nlp/patterns/surface
outDir=SPIEDPatternsout
#If you want output in which each labeled phrase has <label> </label> around it
#markedOutputTextFile=${outDir}/markedtext.txt
#Number of threads available on the machine
numThreads=1
#***Use these options if you are limited by memory
batchProcessSents = false
#This name is a misnomer. Max number of *lines* per batch file. Works only for text file format; ser files cannot be broken down
numMaxSentencesPerBatchFile=100
saveInvertedIndex=false
invertedIndexDirectory=${outDir}/invertedIndex
#Loading index from invertedIndexDirectory
#loadInvertedIndex=true
#Useful for memory heavy apps.
#invertedIndexClass=edu.stanford.nlp.patterns.LuceneSentenceIndex
### Example for running it on presidents biographies. For more data examples, see the bottom of this file
#can be text. the code will tokenize it.
fileFormat=text
#Input file(s) (default assumed text). Can be one or more of (concatenated by comma or semi-colon): file, directory, files with regex in the filename (for example: "mydir/health-.*-processed.txt")
file=${DIR}/presidents.txt
#to save the serialized sentences into a file - text split into sentences, processed using ner, parse etc (depending on the flags) and labeled with seed set. Existing files will get deleted (if the fileFormat is not ser).
saveSentencesSerDir=${outDir}/sents
#if you use the flag above to save the file, you can use the saved file like this
#fileFormat=ser
#file=${outDir}/sents
#We are learning names of presidential candidates, places, and other names. In each line, all text after tabs are ignored in these seed files
seedWordsFiles=NAME,${DIR}/names.txt;PLACE,${DIR}/places.txt;OTHER,${DIR}/otherpeople.txt
#Useful for matching lemmas or spelling mistakes
fuzzyMatch=false
#Used if fuzzyMatch is true. minimum length of words to do fuzzy matching.
minLen4FuzzyForPattern=6
#You can evaluate two ways; both presented here.
evaluate=true
goldEntitiesEvalFiles=NAME,${DIR}/goldnames.txt;PLACE,${DIR}/goldplaces.txt
#evalFileWithGoldLabels=${DIR}/presidents_eval.txt
#default as true, false if you want scores per token
evalPerEntity=true
#SAVE n LOAD the model (patterns and phrases) options
patternsWordsDir=${outDir}/${identifier}/model/
#save the learned patterns and learned words in patternsWordsDir directory
savePatternsWordsDir=true
#load the patterns and words from patternsWordsDir directory
#loadSavedPatternsWordsDir=true
#false if you just want to process the text into sents but not do anything with it, or you want to use loadSavedPatternsWordsDir option. Useful for batch processing and saving text as serialized objects, then running the learning system on all the serialized objects (see saveSentences* and saveEvalSent* flags) or domain adaptation.
learn=true
#posModelPath=<if you want to use a different Stanford NLP group released POS tagger; e.g. caseless etc>
#In case the seeds have overlapping words like "lung" as bodyPart and "lung cancer" as disease. "lung" in "lung cancer" will be labeled as only disease, if the flag is true.
removeOverLappingLabelsFromSeed=true
######## creating patterns flags ##########
#***use context on the left
usePreviousContext=true
#***use context on the right
useNextContext = true
#***the context should be at least this long
minWindow4Pattern = 2
#***the context can be at most this long
maxWindow4Pattern = 4
#if the context consists of only stop words, add only if it's more than these many stop words
numMinStopWordsToAdd = 3
#***use POS tag restriction for the target phrase
usePOS4Pattern = true
#Ignore words {a, an, the} while matching the patterns to text (advisable true)
useFillerWordsInPat = false
#***Specific allowed tags' initials for the target phrase for each label while creating the patterns (if not specified, every tag is acceptable to create a pattern). Tag initials can be written as N or NN or J or N,J etc. E.g.: NAME,N,J;PLACE,N. If
targetAllowedTagsInitialsStr=NAME,N;OTHER,N
#You can save all possible patterns for all tokens in the flag allPatternsDir so you wouldn't need to calculate them everytime.
computeAllPatterns = true
#Options: MEMORY, DB, LUCENE. If using SQL for storing patterns for each token --- populate SQLConnection class, that is provide those properties!
storePatsForEachToken=MEMORY
#***If your code is running too slow, try to reduce this number. Samples % of sentences for learning patterns
sampleSentencesForSufficientStats=1.0
#Save or read (if computeAllPatterns is false) from here
allPatternsDir= ${DIR}/${identifier}_allpatterns
#***maximum Num of allowed words in the target phrase
numWordsCompound = 3
#***consider patterns without the POS restricion on the target phrase
addPatWithoutPOS = true
#Ignore common stop words occuring just before the target phrase
useStopWordsBeforeTerm=false
#Use lemma instead of words of the context tokens
useLemmaContextTokens=true
#make context matching lowercase (advisable)
matchLowerCaseContext=true
#***use named entity tag (predicted using StanfordCoreNLP NER) restriction of the target phrase
useTargetNERRestriction=true
#***If useTargetNERRestriction is true, you can give NER tags that the target phrase can take. Do not mention anything if you don't want any specific restriction
targetAllowedNERs=NAME,PERSON;PLACE,LOCATION;OTHER,PERSON
#use named entity tag restrictions for the context (neighboring) tokens
useContextNERRestriction=false
#***use the parse tag of the grandparent node as restriction (note that parent node is the POS tag of the word)
useTargetParserParentRestriction=false
#do not extract phrase in which any word is labeled with another class (for example, you don't wanna extract 'HIV patients' as disease)
doNotExtractPhraseAnyWordLabeledOtherClass = true
#### matching patterns to text ######
#kinda ignore this flag and use it as true. for those who care this too much: for each token, we use the phrase that originally matched that token instead of the token's word (in case you are using fuzzy matching)
useMatchingPhrase=true
#Use only the tokens that get matched by a pattern (advisable as false)
restrictToMatched = false
#Label the learned words in the text (advisable as true)
usePatternResultAsLabel=true
#remove common stop words from phrases to get clean phrases (for example, "disease" instead of "some disease")
removeStopWordsFromSelectedPhrases = true
#Do not learn phrases that have any stop word
removePhrasesWithStopWords = false
### evaluating candidate patterns
#***Minimum number of positive phrases a candidate pattern should extract
minPosPhraseSupportForPat = 1
##### thresholds for selecting paterns and words #####
#***threshold for learning a phrase
thresholdWordExtract=0.01
#***thrshold for learning a pattern
thresholdSelectPattern = 0.01
#keep lowering threshold as 0.8*threshold whenever the system doesn't learn any new patterns and phrases
tuneThresholdKeepRunning=false
#***discard phrases that do not have these many patterns extracting it
thresholdNumPatternsApplied = 1
#***max number of words to extract in each iteration
numWordsToAdd = 5
#***max number of words to extract in each pattern
numPatterns = 5
#***max number of iterations
numIterationsForPatterns = 8
#Consider words belonging to other labels as negative (advisable as true)
useOtherLabelsWordsasNegative=true
#***Pattern scoring measure. For more details, see the paper. The options are PhEvalInPatLogP, PhEvalInPat, PosNegUnlabOdds, RlogF, RlogFPosNeg, YanGarber02, PosNegOdds, LOGREG, LOGREGlogP, RatioAll, SqrtAllRatio
patternScoring=RatioAll
#Class to be used to score phrases. The valid options are edu.stanford.nlp.patterns.surface.ScorePhrasesAverageFeatures and edu.stanford.nlp.patterns.surface.ScorePhrasesLearnFeatWt
phraseScorerClass=edu.stanford.nlp.patterns.ScorePhrasesAverageFeatures
#phraseScorerClass=edu.stanford.nlp.patterns.ScorePhrasesLearnFeatWt
#***Club neighboring labeled words (of the same label) when extracting phrases
clubNeighboringLabeledWords=true
#if you want to sqrt root the pattern score
sqrtPatScore = false
#Phrase scoring measure; ignore.
wordScoring=WEIGHTEDNORM
#For scoring phrases that are OOV, a score is the average of the score of individual words (instead of min, which is default)
useAvgInsteadofMinPhraseScoring=true
#*** what all features to use to evaluate phrases. See the paper for more details on each
#only if wordClassClusterFile is provided
usePhraseEvalWordClass=false
#tf-idf scoring w.r.t to the domain
usePhraseEvalDomainNgram=false
#use pattern weights in scoring phrases extracted by them, if usePhraseEvalPatWtByFreq is true. otherwise it's just a tfidf like score
usePatternWeights=true
#basically patwt/log(freq), patwt = 1 if usePatternWeights is false
usePhraseEvalPatWtByFreq=true
#if using multiple label dictionaries etc, freq of the phrase in the label dictionary vs other dictionaries
usePhraseEvalSemanticOdds=true
#edit distance from positive entities
usePhraseEvalEditDistSame=true
#edit distance from the negative entities
usePhraseEvalEditDistOther=true
#if you have googlengrams, you can use googlengrams tf-idf scoring.
usePhraseEvalGoogleNgram=false
#% of positive labeled words with the same word class (see WordClassClassifier and chris2 for more details)
usePhraseEvalWordShape=true
#These flags are not valid if patternScoring is not PhEvalInPat* . Similar meaning as for the phrase ones above
usePatternEvalWordClass=false
usePatternEvalGoogleNgram=false
usePatternEvalSemanticOdds=true
usePatternEvalEditDistSame=true
usePatternEvalEditDistOther=true
usePatternEvalDomainNgram=false
usePatternEvalWordShape=true
#Options are LOG, NONE or SQRT
wordFreqNorm = NONE
######For logging
#4 if you wanna print out every single thing happening in the system, 3 if you want fair amount of debug messages and justification, 2 means some debug msgs, 1 means only necessary msgs and 0 means (almost) no msgs
debug = 3
#stop words file
stopWordsPatternFiles=${DIR}/stopwords.txt
englishWordsFiles=${stopWordsPatternFiles}
commonWordsPatternFiles= ${stopWordsPatternFiles}
#You can give some common words like this
#commonWordsPatternFiles =${DIR}/lists/commonEngWords1k.txt
#If you are using Google Ngrams TF-IDF feature
#googleNGramsFile=/u/nlp/scr/google-ngrams/1gms/vocab
#weightDomainFreq=10
#below is optional; comma separated files with list of phrases that def do not belong to any of the labels
#otherSemanticClassesFiles=${DIR}/nondiseases.txt
#The flags below are used when either LOGREG is used for patternScoring or ScorePhrasesLearnFeatWt class is used for phrase scoring
#% unlabeled tokens selected as negative
#perSelectRand=0.5
#% negative tokens selected as negative
#perSelectNeg=1
### Example for running the code on BioMed articles and NCBI corpus (instead of the toy example above)
#fileFormat=text
#file=${DIR}/BioMedSample
#saveSentencesSerFile=${DIR}/biomed_sents.ser
#evalFileWithGoldLabels=${DIR}/NCBI_corpus_testing_processed.txt
#saveEvalSentencesSerFile=${DIR}/ncbi_corpus_testing_sents.ser
#addEvalSentsToTrain=true
#seedWordsFiles=disease,${DIR}/diseases.txt;nondisease,${DIR}/nondiseases.txt
#wordClassClusterFile=${DIR}/ncbi_disease_brownclusters_200_min5.txt
#externalFeatureWeightsFile = ${DIR}/out/wordclass_weights

View File

@ -0,0 +1,85 @@
Nelson Rockefeller
Herbert Walker Bush
contributed#
Ronald Reagan
Bush
John Fitzgerald Kennedy
Nixon
Carter
George Herber Walker Bush
William Clinton
William
Nelle
George Bush
James Earl
John
Dwight D. Eisenhower
Prescott Bush
Wilkes Booth#
Taft
Prescott#
Coolidge
Kennedy
W. Bush
George
Reagan
Adams
Eisenhower
Ronald Wilson Reagan
William Jefferson
Roger#
Blythe III
Clinton
Johnson
Richard M.
George W.
George C. Marshall
Gibbs McAdoo#
Lyndon B.
III
John Quincy
agenda#
Governor Nelson Rockefeller
President Jimmy
Warren G. Harding
promise#
General Eisenhower
business#
Gerald R. Ford
America#
family#
Administration#
pledge#
phrases#
House#
measures#
campaign#
Roosevelt
Governor Jimmy
cut taxes#
Governor Nelson
Albert Gore Jr.
Gore Jr.
Leonid I. Brezhnev#
Albert Gore
Bush
James M. Cox
Nixon
George W. Bush
Michael Dukakisi#
Ronald Wilson Reagan
Saddam Hussein#
Dan Quayle
Al Gore
Carter
John Kerry
Spiro T. Agnew
Clinton
Walter F. Mondale
Nelson Rockefeller
Gerald R. Ford
Taft
Hubert H. Humphrey
Mikhail Gorbachev#
John Kennedy
Richard M. Nixon

View File

@ -0,0 +1,2 @@
Brookline
Plymouth

View File

@ -0,0 +1,3 @@
President Obama
President Bush
Jimmy Carter

View File

@ -0,0 +1,4 @@
Sasha
Michelle
Laura Welch
Jenna

View File

@ -0,0 +1,3 @@
Hawaii
New Haven
Milton

View File

@ -0,0 +1,275 @@
Barack H. Obama is the 44th President of the United States.
His story is the American story — values from the heartland, a middle-class upbringing in a strong family, hard work and education as the means of getting ahead, and the conviction that a life so blessed should be lived in service to others.
With a father from Kenya and a mother from Kansas, President Obama was born in Hawaii on August 4, 1961. He was raised with help from his grandfather, who served in Patton's army, and his grandmother, who worked her way up from the secretarial pool to middle management at a bank.
After working his way through college with the help of scholarships and student loans, President Obama moved to Chicago, where he worked with a group of churches to help rebuild communities devastated by the closure of local steel plants.
He went on to attend law school, where he became the first African—American president of the Harvard Law Review. Upon graduation, he returned to Chicago to help lead a voter registration drive, teach constitutional law at the University of Chicago, and remain active in his community.
President Obama's years of public service are based around his unwavering belief in the ability to unite people around a politics of purpose. In the Illinois State Senate, he passed the first major ethics reform in 25 years, cut taxes for working families, and expanded health care for children and their parents. As a United States Senator, he reached across the aisle to pass groundbreaking lobbying reform, lock up the world's most dangerous weapons, and bring transparency to government by putting federal spending online.
He was elected the 44th President of the United States on November 4, 2008, and sworn in on January 20, 2009. He and his wife, Michelle, are the proud parents of two daughters, Malia, 14, and Sasha, 11.
The airborne terrorist attacks on the World Trade Center, the Pentagon, and the thwarted flight against the White House or Capitol on September 11, 2001, in which nearly 3,000 Americans were killed, transformed George W. Bush into a wartime president. The attacks put on hold many of Bushs hopes and plans, and Bushs father, George Bush, the 41st president, declared that his son “faced the greatest challenge of any president since Abraham Lincoln.”
In response, Bush formed a new cabinet-level Department of Homeland Security, sent American forces into Afghanistan to break up the Taliban, a movement under Osama bin Laden that trained financed and exported terrorist teams. The Taliban was successfully disrupted but Bin Laden was not captured and was still on the loose as Bush began his second term. Following the attacks, the president also recast the nations intelligence gathering and analysis services, and ordered reform of the military forces to meet the new enemy. At the same time he delivered major tax cuts which had been a campaign pledge. His most controversial act was the invasion of Iraq on the belief that Iraqi President Saddam Hussein posed a grave threat to the United States. Saddam was captured, but the disruption of Iraq and the killing of American servicemen and friendly Iraqis by insurgents became the challenge of Bushs government as he began his second term. President Bush pledged during his 2005 State of the Union Address that the United States would help the Iraqi people establish a fully democratic government because the victory of freedom in Iraq would strengthen a new ally in the war on terror, bring hope to a troubled region, and lift a threat from the lives of future generations.
Bush was born in New Haven, Connecticut while his father was attending Yale University after service in World War II. The family moved to Midland, Texas, where the senior Bush entered the oil exploration business. The son spent formative years there, attended Midland public schools, and formed friendships that stayed with him into the White House. Bush graduated from Yale, received a business degree from Harvard, and then returned to Midland where he too got into the oil business. In Midland he met and married Laura Welch, a teacher and librarian. They had twin daughters, Jenna and Barbara, now out of college and pursuing careers.
When George W. Bush, at the age of 54, became the 43rd president of the United States, it was only the second time in American history that a presidents son went on to the White House. John Quincy Adams, elected the sixth president in 1824, was the son of John Adams, the second president. While John Adams had groomed his son to be president, George Bush, the 41st president, insisted he was surprised when the eldest of his six children became interested in politics, became governor of Texas, and then went on to the White House.
During the early part of the 2000 campaign for the White House, Bush enjoyed a double-digit lead in the polls over his opponent Vice President Al Gore Jr. But the gap closed as the election approached and though Gore finally won the popular vote by 543,895 votes, victory or loss of the presidency hinged on Floridas electoral votes. That struggle through recounts and lawsuits worked its way to the Supreme Court. In the end Bush won the electoral count 271 to 266. His new administration was focused on “compassionate conservatism,” which embraced excellence in education, tax relief and volunteerism among faith-based and community organizations.
Bush was challenged in his re-election bid in 2004 by Massachusetts Democratic Senator John Kerry. The election was a good contest, but Bushs contention that the invasion of Iraq had made the world more secure against terrorism won the national political debate. Bush was re-elected with 51 percent to 48 percent.
On the inaugural stand, George W. Bush set the theme for his second term: “At this second gathering, our duties are defined not by the words I use, but by the history we have seen together. For half a century, America defended our own freedom by standing watch on distant borders. After the shipwreck of communism came years of relative quiet- and then there came a day of fire. There is only one force of history that can break the reign of hatred and resentment, and expose the pretensions of tyrants, and reward the hopes of the decent and tolerant, and that is the force of human freedom tested but not weary… we are ready for the greatest achievements in the history of freedom.”
During the administration of William Jefferson Clinton, the U.S. enjoyed more peace and economic well being than at any time in its history. He was the first Democratic president since Franklin D. Roosevelt to win a second term. He could point to the lowest unemployment rate in modern times, the lowest inflation in 30 years, the highest home ownership in the country's history, dropping crime rates in many places, and reduced welfare rolls. He proposed the first balanced budget in decades and achieved a budget surplus. As part of a plan to celebrate the millennium in 2000, Clinton called for a great national initiative to end racial discrimination.
After the failure in his second year of a huge program of health care reform, Clinton shifted emphasis, declaring "the era of big government is over." He sought legislation to upgrade education, to protect jobs of parents who must care for sick children, to restrict handgun sales, and to strengthen environmental rules.
President Clinton was born William Jefferson Blythe III on August 19, 1946, in Hope, Arkansas, three months after his father died in a traffic accident. When he was four years old, his mother wed Roger Clinton, of Hot Springs, Arkansas. In high school, he took the family name.
He excelled as a student and as a saxophone player and once considered becoming a professional musician. As a delegate to Boys Nation while in high school, he met President John Kennedy in the White House Rose Garden. The encounter led him to enter a life of public service.
Clinton was graduated from Georgetown University and in 1968 won a Rhodes Scholarship to Oxford University. He received a law degree from Yale University in 1973, and entered politics in Arkansas.
He was defeated in his campaign for Congress in Arkansas's Third District in 1974. The next year he married Hillary Rodham, a graduate of Wellesley College and Yale Law School. In 1980, Chelsea, their only child, was born.
Clinton was elected Arkansas Attorney General in 1976, and won the governorship in 1978. After losing a bid for a second term, he regained the office four years later, and served until he defeated incumbent George Bush and third party candidate Ross Perot in the 1992 presidential race.
Clinton and his running mate, Tennessee's Senator Albert Gore Jr., then 44, represented a new generation in American political leadership. For the first time in 12 years both the White House and Congress were held by the same party. But that political edge was brief; the Republicans won both houses of Congress in 1994.
In 1998, as a result of issues surrounding personal indiscretions with a young woman White House intern, Clinton was the second U.S. president to be impeached by the House of Representatives. He was tried in the Senate and found not guilty of the charges brought against him. He apologized to the nation for his actions and continued to have unprecedented popular approval ratings for his job as president.
In the world, he successfully dispatched peace keeping forces to war-torn Bosnia and bombed Iraq when Saddam Hussein stopped United Nations inspections for evidence of nuclear, chemical, and biological weapons. He became a global proponent for an expanded NATO, more open international trade, and a worldwide campaign against drug trafficking. He drew huge crowds when he traveled through South America, Europe, Russia, Africa, and China, advocating U.S. style freedom.
George Bush brought to the White House a dedication to traditional American values and a determination to direct them toward making the United States "a kinder and gentler nation." In his Inaugural Address he pledged in "a moment rich with promise" to use American strength as "a force for good."
Coming from a family with a tradition of public service, George Herbert Walker Bush felt the responsibility to make his contribution both in time of war and in peace. Born in Milton, Massachusetts, on June 12, 1924, he became a student leader at Phillips Academy in Andover. On his 18th birthday he enlisted in the armed forces. The youngest pilot in the Navy when he received his wings, he flew 58 combat missions during World War II. On one mission over the Pacific as a torpedo bomber pilot he was shot down by Japanese antiaircraft fire and was rescued from the water by a U. S. submarine. He was awarded the Distinguished Flying Cross for bravery in action.
Bush next turned his energies toward completing his education and raising a family. In January 1945 he married Barbara Pierce. They had six children-- George, Robin (who died as a child), John (known as Jeb), Neil, Marvin, and Dorothy.
At Yale University he excelled both in sports and in his studies; he was captain of the baseball team and a member of Phi Beta Kappa. After graduation Bush embarked on a career in the oil industry of West Texas.
Like his father, Prescott Bush, who was elected a Senator from Connecticut in 1952, George became interested in public service and politics. He served two terms as a Representative to Congress from Texas. Twice he ran unsuccessfully for the Senate. Then he was appointed to a series of high-level positions: Ambassador to the United Nations, Chairman of the Republican National Committee, Chief of the U. S. Liaison Office in the People's Republic of China, and Director of the Central Intelligence Agency.
In 1980 Bush campaigned for the Republican nomination for President. He lost, but was chosen as a running mate by Ronald Reagan. As Vice President, Bush had responsibility in several domestic areas, including Federal deregulation and anti-drug programs, and visited scores of foreign countries. In 1988 Bush won the Republican nomination for President and, with Senator Dan Quayle of Indiana as his running mate, he defeated Massachusetts Governor Michael Dukakis in the general election.
Bush faced a dramatically changing world, as the Cold War ended after 40 bitter years, the Communist empire broke up, and the Berlin Wall fell. The Soviet Union ceased to exist; and reformist President Mikhail Gorbachev, whom Bush had supported, resigned. While Bush hailed the march of democracy, he insisted on restraint in U. S. policy toward the group of new nations.
In other areas of foreign policy, President Bush sent American troops into Panama to overthrow the corrupt regime of General Manuel Noriega, who was threatening the security of the canal and the Americans living there. Noriega was brought to the United States for trial as a drug trafficker.
Bush's greatest test came when Iraqi President Saddam Hussein invaded Kuwait, then threatened to move into Saudi Arabia. Vowing to free Kuwait, Bush rallied the United Nations, the U. S. people, and Congress and sent 425,000 American troops. They were joined by 118,000 troops from allied nations. After weeks of air and missile bombardment, the 100-hour land battle dubbed Desert Storm routed Iraq's million-man army.
Despite unprecedented popularity from this military and diplomatic triumph, Bush was unable to withstand discontent at home from a faltering economy, rising violence in inner cities, and continued high deficit spending. In 1992 he lost his bid for reelection to Democrat William Clinton.
At the end of his two terms in office, Ronald Reagan viewed with satisfaction the achievements of his innovative program known as the Reagan Revolution, which aimed to reinvigorate the American people and reduce their reliance upon Government. He felt he had fulfilled his campaign pledge of 1980 to restore "the great, confident roar of American progress and growth and optimism."
On February 6, 1911, Ronald Wilson Reagan was born to Nelle and John Reagan in Tampico, Illinois. He attended high school in nearby Dixon and then worked his way through Eureka College. There, he studied economics and sociology, played on the football team, and acted in school plays. Upon graduation, he became a radio sports announcer. A screen test in 1937 won him a contract in Hollywood. During the next two decades he appeared in 53 films.
From his first marriage to actress Jane Wyman, he had two children, Maureen and Michael. Maureen passed away in 2001. In 1952 he married Nancy Davis, who was also an actress, and they had two children, Patricia Ann and Ronald Prescott.
As president of the Screen Actors Guild, Reagan became embroiled in disputes over the issue of Communism in the film industry; his political views shifted from liberal to conservative. He toured the country as a television host, becoming a spokesman for conservatism. In 1966 he was elected Governor of California by a margin of a million votes; he was re-elected in 1970.
Ronald Reagan won the Republican Presidential nomination in 1980 and chose as his running mate former Texas Congressman and United Nations Ambassador George Bush. Voters troubled by inflation and by the year-long confinement of Americans in Iran swept the Republican ticket into office. Reagan won 489 electoral votes to 49 for President Jimmy Carter.
On January 20, 1981, Reagan took office. Only 69 days later he was shot by a would-be assassin, but quickly recovered and returned to duty. His grace and wit during the dangerous incident caused his popularity to soar.
Dealing skillfully with Congress, Reagan obtained legislation to stimulate economic growth, curb inflation, increase employment, and strengthen national defense. He embarked upon a course of cutting taxes and Government expenditures, refusing to deviate from it when the strengthening of defense forces led to a large deficit.
A renewal of national self-confidence by 1984 helped Reagan and Bush win a second term with an unprecedented number of electoral votes. Their victory turned away Democratic challengers Walter F. Mondale and Geraldine Ferraro.
In 1986 Reagan obtained an overhaul of the income tax code, which eliminated many deductions and exempted millions of people with low incomes. At the end of his administration, the Nation was enjoying its longest recorded period of peacetime prosperity without recession or depression.
In foreign policy, Reagan sought to achieve "peace through strength." During his two terms he increased defense spending 35 percent, but sought to improve relations with the Soviet Union. In dramatic meetings with Soviet leader Mikhail Gorbachev, he negotiated a treaty that would eliminate intermediate-range nuclear missiles. Reagan declared war against international terrorism, sending American bombers against Libya after evidence came out that Libya was involved in an attack on American soldiers in a West Berlin nightclub.
By ordering naval escorts in the Persian Gulf, he maintained the free flow of oil during the Iran-Iraq war. In keeping with the Reagan Doctrine, he gave support to anti-Communist insurgencies in Central America, Asia, and Africa.
Overall, the Reagan years saw a restoration of prosperity, and the goal of peace through strength seemed to be within grasp.
Jimmy Carter aspired to make Government "competent and compassionate," responsive to the American people and their expectations. His achievements were notable, but in an era of rising energy costs, mounting inflation, and continuing tensions, it was impossible for his administration to meet these high expectations.
Carter, who has rarely used his full name--James Earl Carter, Jr.--was born October 1, 1924, in Plains, Georgia. Peanut farming, talk of politics, and devotion to the Baptist faith were mainstays of his upbringing. Upon graduation in 1946 from the Naval Academy in Annapolis, Maryland, Carter married Rosalynn Smith. The Carters have three sons, John William (Jack), James Earl III (Chip), Donnel Jeffrey (Jeff), and a daughter, Amy Lynn.
After seven years' service as a naval officer, Carter returned to Plains. In 1962 he entered state politics, and eight years later he was elected Governor of Georgia. Among the new young southern governors, he attracted attention by emphasizing ecology, efficiency in government, and the removal of racial barriers.
Carter announced his candidacy for President in December 1974 and began a two-year campaign that gradually gained momentum. At the Democratic Convention, he was nominated on the first ballot. He chose Senator Walter F. Mondale of Minnesota as his running mate. Carter campaigned hard against President Gerald R. Ford, debating with him three times. Carter won by 297 electoral votes to 241 for Ford.
Carter worked hard to combat the continuing economic woes of inflation and unemployment. By the end of his administration, he could claim an increase of nearly eight million jobs and a decrease in the budget deficit, measured in percentage of the gross national product. Unfortunately, inflation and interest rates were at near record highs, and efforts to reduce them caused a short recession.
Carter could point to a number of achievements in domestic affairs. He dealt with the energy shortage by establishing a national energy policy and by decontrolling domestic petroleum prices to stimulate production. He prompted Government efficiency through civil service reform and proceeded with deregulation of the trucking and airline industries. He sought to improve the environment. His expansion of the national park system included protection of 103 million acres of Alaskan lands. To increase human and social services, he created the Department of Education, bolstered the Social Security system, and appointed record numbers of women, blacks, and Hispanics to Government jobs.
In foreign affairs, Carter set his own style. His championing of human rights was coldly received by the Soviet Union and some other nations. In the Middle East, through the Camp David agreement of 1978, he helped bring amity between Egypt and Israel. He succeeded in obtaining ratification of the Panama Canal treaties. Building upon the work of predecessors, he established full diplomatic relations with the People's Republic of China and completed negotiation of the SALT II nuclear limitation treaty with the Soviet Union.
There were serious setbacks, however. The Soviet invasion of Afghanistan caused the suspension of plans for ratification of the SALT II pact. The seizure as hostages of the U. S. embassy staff in Iran dominated the news during the last 14 months of the administration. The consequences of Iran's holding Americans captive, together with continuing inflation at home, contributed to Carter's defeat in 1980. Even then, he continued the difficult negotiations over the hostages. Iran finally released the 52 Americans the same day Carter left office.
At 2:30 on the morning of August 3, 1923, while visiting in Vermont, Calvin Coolidge received word that he was President. By the light of a kerosene lamp, his father, who was a notary public, administered the oath of office as Coolidge placed his hand on the family Bible.
Coolidge was "distinguished for character more than for heroic achievement," wrote a Democratic admirer, Alfred E. Smith. "His great task was to restore the dignity and prestige of the Presidency when it had reached the lowest ebb in our history ... in a time of extravagance and waste...."
Born in Plymouth, Vermont, on July 4, 1872, Coolidge was the son of a village storekeeper. He was graduated from Amherst College with honors, and entered law and politics in Northampton, Massachusetts. Slowly, methodically, he went up the political ladder from councilman in Northampton to Governor of Massachusetts, as a Republican. En route he became thoroughly conservative.
As President, Coolidge demonstrated his determination to preserve the old moral and economic precepts amid the material prosperity which many Americans were enjoying. He refused to use Federal economic power to check the growing boom or to ameliorate the depressed condition of agriculture and certain industries. His first message to Congress in December 1923 called for isolation in foreign policy, and for tax cuts, economy, and limited aid to farmers.
He rapidly became popular. In 1924, as the beneficiary of what was becoming known as "Coolidge prosperity," he polled more than 54 percent of the popular vote.
In his Inaugural he asserted that the country had achieved "a state of contentment seldom before seen," and pledged himself to maintain the status quo. In subsequent years he twice vetoed farm relief bills, and killed a plan to produce cheap Federal electric power on the Tennessee River.
The political genius of President Coolidge, Walter Lippmann pointed out in 1926, was his talent for effectively doing nothing: "This active inactivity suits the mood and certain of the needs of the country admirably. It suits all the business interests which want to be let alone.... And it suits all those who have become convinced that government in this country has become dangerously complicated and top-heavy...."
Coolidge was both the most negative and remote of Presidents, and the most accessible. He once explained to Bernard Baruch why he often sat silently through interviews: "Well, Baruch, many times I say only 'yes' or 'no' to people. Even that is too much. It winds them up for twenty minutes more."
But no President was kinder in permitting himself to be photographed in Indian war bonnets or cowboy dress, and in greeting a variety of delegations to the White House.
Both his dry Yankee wit and his frugality with words became legendary. His wife, Grace Goodhue Coolidge, recounted that a young woman sitting next to Coolidge at a dinner party confided to him she had bet she could get at least three words of conversation from him. Without looking at her he quietly retorted, "You lose." And in 1928, while vacationing in the Black Hills of South Dakota, he issued the most famous of his laconic statements, "I do not choose to run for President in 1928."
By the time the disaster of the Great Depression hit the country, Coolidge was in retirement. Before his death in January 1933, he confided to an old friend, ". . . I feel I no longer fit in with these times."
When Gerald R. Ford took the oath of office on August 9, 1974, he declared, "I assume the Presidency under extraordinary circumstances.... This is an hour of history that troubles our minds and hurts our hearts."
It was indeed an unprecedented time. He had been the first Vice President chosen under the terms of the Twenty-fifth Amendment and, in the aftermath of the Watergate scandal, was succeeding the first President ever to resign.
Ford was confronted with almost insuperable tasks. There were the challenges of mastering inflation, reviving a depressed economy, solving chronic energy shortages, and trying to ensure world peace.
The President acted to curb the trend toward Government intervention and spending as a means of solving the problems of American society and the economy. In the long run, he believed, this shift would bring a better life for all Americans.
Ford's reputation for integrity and openness had made him popular during his 25 years in Congress. From 1965 to 1973, he was House Minority Leader. Born in Omaha, Nebraska, in 1913, he grew up in Grand Rapids, Michigan. He starred on the University of Michigan football team, then went to Yale, where he served as assistant coach while earning his law degree. During World War II he attained the rank of lieutenant commander in the Navy. After the war he returned to Grand Rapids, where he began the practice of law, and entered Republican politics. A few weeks before his election to Congress in 1948, he married Elizabeth Bloomer. They have four children: Michael, John, Steven, and Susan.
As President, Ford tried to calm earlier controversies by granting former President Nixon a full pardon. His nominee for Vice President, former Governor Nelson Rockefeller of New York, was the second person to fill that office by appointment. Gradually, Ford selected a cabinet of his own.
Ford established his policies during his first year in office, despite opposition from a heavily Democratic Congress. His first goal was to curb inflation. Then, when recession became the Nation's most serious domestic problem, he shifted to measures aimed at stimulating the economy. But, still fearing inflation, Ford vetoed a number of non-military appropriations bills that would have further increased the already heavy budgetary deficit. During his first 14 months as President he vetoed 39 measures. His vetoes were usually sustained.
Ford continued as he had in his Congressional days to view himself as "a moderate in domestic affairs, a conservative in fiscal affairs, and a dyed-in-the-wool internationalist in foreign affairs." A major goal was to help business operate more freely by reducing taxes upon it and easing the controls exercised by regulatory agencies. "We...declared our independence 200 years ago, and we are not about to lose it now to paper shufflers and computers," he said.
In foreign affairs Ford acted vigorously to maintain U. S. power and prestige after the collapse of Cambodia and South Viet Nam. Preventing a new war in the Middle East remained a major objective; by providing aid to both Israel and Egypt, the Ford Administration helped persuade the two countries to accept an interim truce agreement. Detente with the Soviet Union continued. President Ford and Soviet leader Leonid I. Brezhnev set new limitations upon nuclear weapons.
President Ford won the Republican nomination for the Presidency in 1976, but lost the election to his Democratic opponent, former Governor Jimmy Carter of Georgia.
On Inauguration Day, President Carter began his speech: "For myself and for our Nation, I want to thank my predecessor for all he has done to heal our land." A grateful people concurred.
Before his nomination, Warren G. Harding declared, "America's present need is not heroics, but healing; not nostrums, but normalcy; not revolution, but restoration; not agitation, but adjustment; not surgery, but serenity; not the dramatic, but the dispassionate; not experiment, but equipoise; not submergence in internationality, but sustainment in triumphant nationality...."
A Democratic leader, William Gibbs McAdoo, called Harding's speeches "an army of pompous phrases moving across the landscape in search of an idea." Their very murkiness was effective, since Harding's pronouncements remained unclear on the League of Nations, in contrast to the impassioned crusade of the Democratic candidates, Governor James M. Cox of Ohio and Franklin D. Roosevelt.
Thirty-one distinguished Republicans had signed a manifesto assuring voters that a vote for Harding was a vote for the League. But Harding interpreted his election as a mandate to stay out of the League of Nations.
Harding, born near Marion, Ohio, in 1865, became the publisher of a newspaper. He married a divorcee, Mrs. Florence Kling De Wolfe. He was a trustee of the Trinity Baptist Church, a director of almost every important business, and a leader in fraternal organizations and charitable enterprises.
He organized the Citizen's Cornet Band, available for both Republican and Democratic rallies; "I played every instrument but the slide trombone and the E-flat cornet," he once remarked.
Harding's undeviating Republicanism and vibrant speaking voice, plus his willingness to let the machine bosses set policies, led him far in Ohio politics. He served in the state Senate and as Lieutenant Governor, and unsuccessfully ran for Governor. He delivered the nominating address for President Taft at the 1912 Republican Convention. In 1914 he was elected to the Senate, which he found "a very pleasant place."
An Ohio admirer, Harry Daugherty, began to promote Harding for the 1920 Republican nomination because, he later explained, "He looked like a President."
Thus a group of Senators, taking control of the 1920 Republican Convention when the principal candidates deadlocked, turned to Harding. He won the Presidential election by an unprecedented landslide of 60 percent of the popular vote.
Republicans in Congress easily got the President's signature on their bills. They eliminated wartime controls and slashed taxes, established a Federal budget system, restored the high protective tariff, and imposed tight limitations upon immigration.
By 1923 the postwar depression seemed to be giving way to a new surge of prosperity, and newspapers hailed Harding as a wise statesman carrying out his campaign promise--"Less government in business and more business in government."
Behind the facade, not all of Harding's Administration was so impressive. Word began to reach the President that some of his friends were using their official positions for their own enrichment. Alarmed, he complained, "My...friends...they're the ones that keep me walking the floors nights!"
Looking wan and depressed, Harding journeyed westward in the summer of 1923, taking with him his upright Secretary of Commerce, Herbert Hoover. "If you knew of a great scandal in our administration," he asked Hoover, "would you for the good of the country and the party expose it publicly or would you bury it?" Hoover urged publishing it, but Harding feared the political repercussions.
He did not live to find out how the public would react to the scandals of his administration. In August of 1923, he died in San Francisco of a heart attack.
Reconciliation was the first goal set by President Richard M. Nixon. The Nation was painfully divided, with turbulence in the cities and war overseas. During his Presidency, Nixon succeeded in ending American fighting in Viet Nam and improving relations with the U.S.S.R. and China. But the Watergate scandal brought fresh divisions to the country and ultimately led to his resignation.
His election in 1968 had climaxed a career unusual on two counts: his early success and his comeback after being defeated for President in 1960 and for Governor of California in 1962.
Born in California in 1913, Nixon had a brilliant record at Whittier College and Duke University Law School before beginning the practice of law. In 1940, he married Patricia Ryan; they had two daughters, Patricia (Tricia) and Julie. During World War II, Nixon served as a Navy lieutenant commander in the Pacific.
On leaving the service, he was elected to Congress from his California district. In 1950, he won a Senate seat. Two years later, General Eisenhower selected Nixon, age 39, to be his running mate.
As Vice President, Nixon took on major duties in the Eisenhower Administration. Nominated for President by acclamation in 1960, he lost by a narrow margin to John F. Kennedy. In 1968, he again won his party's nomination, and went on to defeat Vice President Hubert H. Humphrey and third-party candidate George C. Wallace.
His accomplishments while in office included revenue sharing, the end of the draft, new anticrime laws, and a broad environmental program. As he had promised, he appointed Justices of conservative philosophy to the Supreme Court. One of the most dramatic events of his first term occurred in 1969, when American astronauts made the first moon landing.
Some of his most acclaimed achievements came in his quest for world stability. During visits in 1972 to Beijing and Moscow, he reduced tensions with China and the U.S.S.R. His summit meetings with Russian leader Leonid I. Brezhnev produced a treaty to limit strategic nuclear weapons. In January 1973, he announced an accord with North Viet Nam to end American involvement in Indochina. In 1974, his Secretary of State, Henry Kissinger, negotiated disengagement agreements between Israel and
its opponents, Egypt and Syria.
In his 1972 bid for office, Nixon defeated Democratic candidate George McGovern by one of the widest margins on record.
Within a few months, his administration was embattled over the so-called "Watergate" scandal, stemming from a break-in at the offices of the Democratic National Committee during the 1972 campaign. The break-in was traced to officials of the Committee to Re-elect the President. A number of administration officials resigned; some were later convicted of offenses connected with efforts to cover up the affair. Nixon denied any personal involvement, but the courts forced him to
yield tape recordings which indicated that he had, in fact, tried to divert the investigation.
As a result of unrelated scandals in Maryland, Vice President Spiro T. Agnew resigned in 1973. Nixon nominated, and Congress approved, House Minority Leader Gerald R. Ford as Vice President.
Faced with what seemed almost certain impeachment, Nixon announced on August 8, 1974, that he would resign the next day to begin "that process of healing which is so desperately needed in America."
In his last years, Nixon gained praise as an elder statesman. By the time of his death on April 22, 1994, he had written numerous books on his experiences in public life and on foreign policy.
"A Great Society" for the American people and their fellow men elsewhere was the vision of Lyndon B. Johnson. In his first years of office he obtained passage of one of the most extensive legislative programs in the Nation's history. Maintaining collective security, he carried on the rapidly growing struggle to restrain Communist encroachment in Viet Nam.
Johnson was born on August 27, 1908, in central Texas, not far from Johnson City, which his family had helped settle. He felt the pinch of rural poverty as he grew up, working his way through Southwest Texas State Teachers College (now known as Texas State University-San Marcos); he learned compassion for the poverty of others when he taught students of Mexican descent.
In 1937 he campaigned successfully for the House of Representatives on a New Deal platform, effectively aided by his wife, the former Claudia "Lady Bird" Taylor, whom he had married in 1934.
During World War II he served briefly in the Navy as a lieutenant commander, winning a Silver Star in the South Pacific. After six terms in the House, Johnson was elected to the Senate in 1948. In 1953, he became the youngest Minority Leader in Senate history, and the following year, when the Democrats won control, Majority Leader. With rare skill he obtained passage of a number of key Eisenhower measures.
In the 1960 campaign, Johnson, as John F. Kennedy's running mate, was elected Vice President. On November 22, 1963, when Kennedy was assassinated, Johnson was sworn in as President.
First he obtained enactment of the measures President Kennedy had been urging at the time of his death--a new civil rights bill and a tax cut. Next he urged the Nation "to build a great society, a place where the meaning of man's life matches the marvels of man's labor." In 1964, Johnson won the Presidency with 61 percent of the vote and had the widest popular margin in American history--more than 15,000,000 votes.
The Great Society program became Johnson's agenda for Congress in January 1965: aid to education, attack on disease, Medicare, urban renewal, beautification, conservation, development of depressed regions, a wide-scale fight against poverty, control and prevention of crime and delinquency, removal of obstacles to the right to vote. Congress, at times augmenting or amending, rapidly enacted Johnson's recommendations. Millions of elderly people found succor through the 1965
Medicare amendment to the Social Security Act.
Under Johnson, the country made spectacular explorations of space in a program he had championed since its start. When three astronauts successfully orbited the moon in December 1968, Johnson congratulated them: "You've taken ... all of us, all over the world, into a new era. . . . "
Nevertheless, two overriding crises had been gaining momentum since 1965. Despite the beginning of new antipoverty and anti-discrimination programs, unrest and rioting in black ghettos troubled the Nation. President Johnson steadily exerted his influence against segregation and on behalf of law and order, but there was no early solution.
The other crisis arose from Viet Nam. Despite Johnson's efforts to end Communist aggression and achieve a settlement, fighting continued. Controversy over the war had become acute by the end of March 1968, when he limited the bombing of North Viet Nam in order to initiate negotiations. At the same time, he startled the world by withdrawing as a candidate for re-election so that he might devote his full efforts, unimpeded by politics, to the quest for peace.
When he left office, peace talks were under way; he did not live to see them successful, but died suddenly of a heart attack at his Texas ranch on January 22, 1973.
On November 22, 1963, when he was hardly past his first thousand days in office, John Fitzgerald Kennedy was killed by an assassin's bullets as his motorcade wound through Dallas, Texas. Kennedy was the youngest man elected President; he was the youngest to die.
Of Irish descent, he was born in Brookline, Massachusetts, on May 29, 1917. Graduating from Harvard in 1940, he entered the Navy. In 1943, when his PT boat was rammed and sunk by a Japanese destroyer, Kennedy, despite grave injuries, led the survivors through perilous waters to safety.
Back from the war, he became a Democratic Congressman from the Boston area, advancing in 1953 to the Senate. He married Jacqueline Bouvier on September 12, 1953. In 1955, while recuperating from a back operation, he wrote Profiles in Courage, which won the Pulitzer Prize in history.
In 1956 Kennedy almost gained the Democratic nomination for Vice President, and four years later was a first-ballot nominee for President. Millions watched his television debates with the Republican candidate, Richard M. Nixon. Winning by a narrow margin in the popular vote, Kennedy became the first Roman Catholic President.
His Inaugural Address offered the memorable injunction: "Ask not what your country can do for you--ask what you can do for your country." As President, he set out to redeem his campaign pledge to get America moving again. His economic programs launched the country on its longest sustained expansion since World War II; before his death, he laid plans for a massive assault on persisting pockets of privation and poverty.
Responding to ever more urgent demands, he took vigorous action in the cause of equal rights, calling for new civil rights legislation. His vision of America extended to the quality of the national culture and the central role of the arts in a vital society.
He wished America to resume its old mission as the first nation dedicated to the revolution of human rights. With the Alliance for Progress and the Peace Corps, he brought American idealism to the aid of developing nations. But the hard reality of the Communist challenge remained.
Shortly after his inauguration, Kennedy permitted a band of Cuban exiles, already armed and trained, to invade their homeland. The attempt to overthrow the regime of Fidel Castro was a failure. Soon thereafter, the Soviet Union renewed its campaign against West Berlin. Kennedy replied by reinforcing the Berlin garrison and increasing the Nation's military strength, including new efforts in outer space. Confronted by this reaction, Moscow, after the erection of the Berlin Wall, relaxed its
pressure in central Europe.
Instead, the Russians now sought to install nuclear missiles in Cuba. When this was discovered by air reconnaissance in October 1962, Kennedy imposed a quarantine on all offensive weapons bound for Cuba. While the world trembled on the brink of nuclear war, the Russians backed down and agreed to take the missiles away. The American response to the Cuban crisis evidently persuaded Moscow of the futility of nuclear blackmail.
Kennedy now contended that both sides had a vital interest in stopping the spread of nuclear weapons and slowing the arms race--a contention which led to the test ban treaty of 1963. The months after the Cuban crisis showed significant progress toward his goal of "a world of law and free choice, banishing the world of war and coercion." His administration thus saw the beginning of new hope for both the equal rights of Americans and the peace of the world.
Bringing to the Presidency his prestige as commanding general of the victorious forces in Europe during World War II, Dwight D. Eisenhower obtained a truce in Korea and worked incessantly during his two terms to ease the tensions of the Cold War. He pursued the moderate policies of "Modern Republicanism," pointing out as he left office, "America is today the strongest, most influential, and most productive nation in the world."
Born in Texas in 1890, brought up in Abilene, Kansas, Eisenhower was the third of seven sons. He excelled in sports in high school, and received an appointment to West Point. Stationed in Texas as a second lieutenant, he met Mamie Geneva Doud, whom he married in 1916.
In his early Army career, he excelled in staff assignments, serving under Generals John J. Pershing, Douglas MacArthur, and Walter Krueger. After Pearl Harbor, General George C. Marshall called him to Washington for a war plans assignment. He commanded the Allied Forces landing in North Africa in November 1942; on D-Day, 1944, he was Supreme Commander of the troops invading France.
After the war, he became President of Columbia University, then took leave to assume supreme command over the new NATO forces being assembled in 1951. Republican emissaries to his headquarters near Paris persuaded him to run for President in 1952.
"I like Ike" was an irresistible slogan; Eisenhower won a sweeping victory.
Negotiating from military strength, he tried to reduce the strains of the Cold War. In 1953, the signing of a truce brought an armed peace along the border of South Korea. The death of Stalin the same year caused shifts in relations with Russia.
New Russian leaders consented to a peace treaty neutralizing Austria. Meanwhile, both Russia and the United States had developed hydrogen bombs. With the threat of such destructive force hanging over the world, Eisenhower, with the leaders of the British, French, and Russian governments, met at Geneva in July 1955.
The President proposed that the United States and Russia exchange blueprints of each other's military establishments and "provide within our countries facilities for aerial photography to the other country." The Russians greeted the proposal with silence, but were so cordial throughout the meetings that tensions relaxed.
Suddenly, in September 1955, Eisenhower suffered a heart attack in Denver, Colorado. After seven weeks he left the hospital, and in February 1956 doctors reported his recovery. In November he was elected for his second term.
In domestic policy the President pursued a middle course, continuing most of the New Deal and Fair Deal programs, emphasizing a balanced budget. As desegregation of schools began, he sent troops into Little Rock, Arkansas, to assure compliance with the orders of a Federal court; he also ordered the complete desegregation of the Armed Forces. "There must be no second class citizens in this country," he wrote.
Eisenhower concentrated on maintaining world peace. He watched with pleasure the development of his "atoms for peace" program--the loan of American uranium to "have not" nations for peaceful purposes.
Before he left office in January 1961, for his farm in Gettysburg, he urged the necessity of maintaining an adequate military strength, but cautioned that vast, long-continued military expenditures could breed potential dangers to our way of life. He concluded with a prayer for peace "in the goodness of time." Both themes remained timely and urgent when he died, after a long illness, on March 28, 1969.

View File

@ -0,0 +1,29 @@
With the assassination of <NAME> President McKinley </NAME> , <NAME> Theodore Roosevelt </NAME>, not quite 43, became the youngest President in the Nation's history . He brought new excitement and power to the Presidency, as he vigorously led Congress and the American public toward progressive reforms and a strong foreign policy.
He took the view that the President as a "steward of the people" should take whatever action necessary for the public good unless expressly forbidden by law or the Constitution ." I did not usurp power," he wrote, "but I did greatly broaden the use of executive power ."
<NAME> Roosevelt </NAME> 's youth differed sharply from that of the log cabin Presidents . He was born in New York City in 1858 into a wealthy family, but he too struggled--against ill health--and in his triumph became an advocate of the strenuous life.
In 1884 his first wife, Alice Lee Roosevelt, and his mother died on the same day . <NAME> Roosevelt </NAME> spent much of the next two years on his ranch in the Badlands of Dakota Territory . There he mastered his sorrow as he lived in the saddle, driving cattle, hunting big game--he even captured an outlaw . On a visit to London, he married Edith Carow in December 1886.
During the Spanish-American War, <NAME> Roosevelt </NAME> was lieutenant colonel of the Rough Rider Regiment, which he led on a charge at the battle of San Juan . He was one of the most conspicuous heroes of the war.
Boss Tom Platt, needing a hero to draw attention away from scandals in New York State, accepted <NAME> Roosevelt </NAME> as the Republican candidate for Governor in 1898 . <NAME> Roosevelt </NAME> won and served with distinction.
As President , <NAME> Roosevelt </NAME> held the ideal that the Government should be the great arbiter of the conflicting economic forces in the Nation, especially between capital and labor, guaranteeing justice to each and dispensing favors to none.
<NAME> Roosevelt </NAME> emerged spectacularly as a "trust buster" by forcing the dissolution of a great railroad combination in the Northwest . Other antitrust suits under the Sherman Act followed.
<NAME> Roosevelt </NAME> steered the United States more actively into world politics . He liked to quote a favorite proverb, "Speak softly and carry a big stick .... "
Aware of the strategic need for a shortcut between the Atlantic and Pacific, <NAME> Roosevelt </NAME> ensured the construction of the Panama Canal . His corollary to the Monroe Doctrine prevented the establishment of foreign bases in the Caribbean and arrogated the sole right of intervention in Latin America to the United States.
He won the Nobel Peace Prize for mediating the Russo-Japanese War, reached a Gentleman's Agreement on immigration with Japan, and sent the Great White Fleet on a goodwill tour of the world.
Some of <NAME> Theodore Roosevelt </NAME> 's most effective achievements were in conservation . He added enormously to the national forests in the West, reserved lands for public use, and fostered great irrigation projects.
He crusaded endlessly on matters big and small, exciting audiences with his high-pitched voice, jutting jaw, and pounding fist . "The life of strenuous endeavor" was a must for those around him, as he romped with his five younger children and led ambassadors on hikes through Rock Creek Park in Washington, D .C.
Leaving the Presidency in 1909 , <NAME> Roosevelt </NAME> went on an African safari, then jumped back into politics . In 1912 he ran for President on a Progressive ticket . To reporters he once remarked that he felt as fit as a bull moose , the name of his new party.
While campaigning in Milwaukee , he was shot in the chest by a fanatic . <NAME> Roosevelt </NAME> soon recovered, but his words at that time would have been applicable at the time of his death in 1919: "No man has had a happier life than I have led; a happier life in every way ."

View File

@ -0,0 +1,257 @@
!!
?!
??
!?
`
``
''
-lrb-
-rrb-
-lsb-
-rsb-
,
.
:
;
"
'
?
<
>
{
}
[
]
+
-
(
)
&
%
$
@
!
^
#
*
..
...
'll
's
'm
a
about
above
after
again
against
all
am
an
and
any
are
aren't
as
at
be
because
been
before
being
below
between
both
but
by
can
can't
cannot
could
couldn't
did
didn't
do
does
doesn't
doing
don't
down
during
each
few
for
from
further
had
hadn't
has
hasn't
have
haven't
having
he
he'd
he'll
he's
her
here
here's
hers
herself
him
himself
his
how
how's
i
i'd
i'll
i'm
i've
if
in
into
is
isn't
it
it's
its
itself
let's
me
more
most
mustn't
my
myself
no
nor
not
of
off
on
once
only
or
other
ought
our
ours
ourselves
out
over
own
same
shan't
she
she'd
she'll
she's
should
shouldn't
so
some
such
than
that
that's
the
their
theirs
them
themselves
then
there
there's
these
they
they'd
they'll
they're
they've
this
those
through
to
too
under
until
up
very
was
wasn't
we
we'd
we'll
we're
we've
were
weren't
what
what's
when
when's
where
where's
which
while
who
who's
whom
why
why's
with
won't
would
wouldn't
you
you'd
you'll
you're
you've
your
yours
yourself
yourselves
###
return
arent
cant
couldnt
didnt
doesnt
dont
hadnt
hasnt
havent
hes
heres
hows
im
isnt
its
lets
mustnt
shant
shes
shouldnt
thats
theres
theyll
theyre
theyve
wasnt
were
werent
whats
whens
wheres
whos
whys
wont
wouldnt
youd
youll
youre
youve

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,173 @@
Noticing ``a lot of tread off the tires,'' Cone, 31, is allowing
Royals pitching coach Bruce Kison to reshape him into a cerebral
type.
<p>
``I study more, do a lot more video work, try to be more
efficient, keep the pitch count down and don't worry about
strikeouts,'' Cone said.
<p>
``I felt invulnerable. I was a kid who'd had some success in New
York,'' said Cone, who joined the Mets the year after their 1986
World Series victory. This was four years before the Anita
Hill-Clarence Thomas hearings awakened the public to the
seriousness of sexual harassment &MD; in all its forms and degrees.
<p>
Mrs. Clark and her broker, Gerry Jo Cranmer, of Turpin Real
Estate in Far Hills, toured other properties after she had seen
River Run Farm. The four-bedroom 1730 farmhouse there has had
several additions, including a 27- by 40-foot glass-enclosed wing
with a 16- by 35-foot indoor swimming pool.
<p>
Construction lasted from 1902 to 1905. The Hammonds put up the
imposing Italian Renaissance-style 9 East 91st Street, designed by
Carrere &AMP; Hastings. The Burdens built the Beaux-Arts style 7 East
91st Street, slightly shorter but just as grand.
<p>
In 1985, such companies took disciplinary actions against 3.2% of their insured physicians, including termination of coverage for 0.66% of them, according to the medical journal's study.
<p>
BURNS'S RECRUITING SPREE: Arnold Burns, the former Justice Department official who heads the Washington office of Proskauer Rose Goetz &amp; Mendelsohn, continues to bring new blood to the New York law firm.
<p>
Both lawyers were recruited by Mr. Burns from the Washington office of Philadelphia's Ballard, Spahr, Andrews &amp; Ingersoll and specialize in lender liability litigation.
<p>
The SEC statement prompted U.S. Representatives Neal Smith (D., Iowa) and Harold Rogers (R., Ky.) to suggest that legislation may be necessary to give the SEC more authority over such transactions.
<p>
Reckitt &amp; Colman advanced 32 to #12.33.
The maker of health and household products said strong European sales growth in household goods and toiletries helped boost first-half pretax profit by 14%.
<p>
C'mon Fred, you 'n' me had better go to Dunkin' Donuts 'cause somethin' is happening there.
<p>
www.aarnet.au
chris@not.a.university.edu
nlp.stanford.edu/index.shtml
<p>
Sadr vows to fight ‘Najaf occupation’: Moqtada Sadr vowed on Monday to fight the “occupation of Najaf” until his last drop of blood, as clashes continued between his militiamen and US forces for a fifth day.
<p>
More >> The kind of funny thing on Yahoo!
<p>
The colour was recognised.
Fred cannot understand it.
Full credit for starting the revels goes to GOP Govs. Christie Whitman
(N.J.) and Pete Wilson (Calif.), who care more about their own press
clippings than about electing Mr. Dole. First Ms. Whitman backed Mr. Clinton
on the partial-birth ban.
<p>
The plane crashed in extremely windy weather as it prepared to
land after a 45-minute flight from the Bosnian town of Tuzla at
2:52 p.m. Croatian time (4:52 a.m. PST).
<p>
I 'm sure they 're honest .
The US labelled North Korea part of an 'axis of evil'.
Playwright Miller, the Pulitzer Prize-winning author of 'Death of a Salesman', has died.
Cote d'Ivoire.
Around 50 million years ago, in contrast, oxygen levels in the atmosphere measured 23 percent--2 percent higher than today.
The National Assn. of Realtors reported that sales of existing homes had shot up to an annual rate of 7.29 million in August, 7.8% higher than a year earlier.
IBM rose 2-1/4 to 85 9/16.
Sort of a horse, a wagon and a knapsack all rolled into one.
The film was produced by Jason P. Brubaker of Brubaker Films LLC, in association with T-Street Productions.
Other finalists are likely to include Cameroon's Samuel Eto'o.
The US Air Force on Tuesday tested a powerful new 21,000-pound (9,450-kg) bomb, the biggest conventional bomb in the US military's arsenal that could be used against critical targets in a
possible war with Iraq.
The Premier League confirmed on Monday that Ashley Cole has launched an appeal against the 100, 000-pound fine imposed at the independent tapping up inquiry.
``Cpl. Banuelos was carrying out lawful orders of his chain of
command,'' he said. ``He was doing what he was supposed to do.''
Jia's "Pickpocket" (1997) and "Platform" (2000) were not
permitted to screen in China.
Wine Steward ($39.95) and the VinoVault ($299.95). Information: (800) 560-8860
or www.peksystems.com.
Thus, an Egyptian newspaper, Al-Masri al-Youm, refers to his "Muslim
origins." Libyan ruler Muammar al-Qaddafi referred to Obama as "a
Muslim" and a person with an "African and Islamic identity." One
Al-Jazeera analysis calls him a "non-Christian man," a second refers to
his "Muslim Kenyan" father, and a third, by Naseem Jamali, notes that
"Obama may not want to be counted as a Muslim but Muslims are eager to
count him as one of their own."
Dick Taylor, a longtime golf journalist, died Thursday at his home in Pinehurst, N.C. He was 72.
"Friendship" was one; "Memories of Other
Days"; "Religion in History"; "Dream Land"; "The Advantages of
Culture"; "Forms of Political Government Compared and Contrasted";
"Melancholy"; "Filial Love"; "Heart Longings," etc., etc.
A prevalent feature in these compositions was a nursed and petted
melancholy; another was a wasteful and opulent gush of "fine language";
another was a tendency to lug in by the ears particularly prized words
and phrases until they were worn entirely out; and a peculiarity that
conspicuously marked and marred them was the inveterate and intolerable
sermon that wagged its crippled tail at the end of each and every one
of them.
I'd've
Mr. Meier-Müller awoke.
Why are you shouting?!?!
Why are you shouting!?!?!
Rep. Mike Quigley, Ill., said that there was no cause for alarm.
Rep. Mike Quigley, D-Ill., said that there was no cause for alarm.
People, y'know, who started out honest. Y'all don't know what happens.
Louisa Moats, Ed.D., an adjunct faculty member.
Retail designer, J'Amy Owens was driving north.
Joyce Roche, president, chief operating officer, Carson Inc.
Vicki G. Roman, vice president, treasurer, Coca-Cola
Enterprises.
Garrett last year replaced singer N'Dea Davenport.
Nor may tyrants e'er spit in thy face...
She is a member of the H'mong ethnic minority.
Test soft hyphens
Rep. Mike Qui­gley, D-Ill., said that there was no cause for alarm.
Rep. Mike Qui­gley, D­Ill., said that there was no cause for alarm.
Rep. Mike Qui­gley, D-­Ill., said that there was no cause for alarm.
Rep. Mike Qui­gley, D­-Ill., said that there was no cause for alarm.
This sentence contains less than 4­2 soft hy­phens.
This sentence contains less than ­2 soft hy­phens.
My dog once scared away 5­5 geese at once.
My dog once scared away 5­5 geese at once.
Cap'n Crunch
we assembled s'mores recently
With his background in S&Ls and real estate
``Lift Ev'ry Voice and Sing''
Check known legacy character entities: &quot;Fred&apos;s Donuts &AMP; Fries&quot; &MD; a well-known store &mdash; is on the Champs Elys&eacute;. Also &lt; and &gt; may appear.
I like ¼ to ½ of what I read. Taco Bell is on the hook
for a free taco to everyone in America, with redemption Tuesday
from 2-5 p.m. The Sox are the 11th team to win at least six
straight Series games.
<<Datastream Systems>> Inc. (DSTM)-Two million common shares.
I bought 3000.5 shares of ABCD.EFG.
The file was named chart2.gif.
You can find the latest news about the Philadelphia Eagles at philadelphiaeagles.com.
If you want to learn about Python, the programming language, go to python.org, definitely not python.com.
He works for ditan360.com.
2.Heat the olive oil in the frying pan.
My favorite vacation picture was 45.gif.
Linus Torvalds hates C++.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,173 @@
Noticing ``a lot of tread off the tires,'' Cone, 31, is allowing
Royals pitching coach Bruce Kison to reshape him into a cerebral
type.
<p>
``I study more, do a lot more video work, try to be more
efficient, keep the pitch count down and don't worry about
strikeouts,'' Cone said.
<p>
``I felt invulnerable. I was a kid who'd had some success in New
York,'' said Cone, who joined the Mets the year after their 1986
World Series victory. This was four years before the Anita
Hill-Clarence Thomas hearings awakened the public to the
seriousness of sexual harassment &MD; in all its forms and degrees.
<p>
Mrs. Clark and her broker, Gerry Jo Cranmer, of Turpin Real
Estate in Far Hills, toured other properties after she had seen
River Run Farm. The four-bedroom 1730 farmhouse there has had
several additions, including a 27- by 40-foot glass-enclosed wing
with a 16- by 35-foot indoor swimming pool.
<p>
Construction lasted from 1902 to 1905. The Hammonds put up the
imposing Italian Renaissance-style 9 East 91st Street, designed by
Carrere &AMP; Hastings. The Burdens built the Beaux-Arts style 7 East
91st Street, slightly shorter but just as grand.
<p>
In 1985, such companies took disciplinary actions against 3.2% of their insured physicians, including termination of coverage for 0.66% of them, according to the medical journal's study.
<p>
BURNS'S RECRUITING SPREE: Arnold Burns, the former Justice Department official who heads the Washington office of Proskauer Rose Goetz &amp; Mendelsohn, continues to bring new blood to the New York law firm.
<p>
Both lawyers were recruited by Mr. Burns from the Washington office of Philadelphia's Ballard, Spahr, Andrews &amp; Ingersoll and specialize in lender liability litigation.
<p>
The SEC statement prompted U.S. Representatives Neal Smith (D., Iowa) and Harold Rogers (R., Ky.) to suggest that legislation may be necessary to give the SEC more authority over such transactions.
<p>
Reckitt &amp; Colman advanced 32 to #12.33.
The maker of health and household products said strong European sales growth in household goods and toiletries helped boost first-half pretax profit by 14%.
<p>
C'mon Fred, you 'n' me had better go to Dunkin' Donuts 'cause somethin' is happening there.
<p>
www.aarnet.au
chris@not.a.university.edu
nlp.stanford.edu/index.shtml
<p>
Sadr vows to fight ‘Najaf occupation’: Moqtada Sadr vowed on Monday to fight the “occupation of Najaf” until his last drop of blood, as clashes continued between his militiamen and US forces for a fifth day.
<p>
More >> The kind of funny thing on Yahoo!
<p>
The colour was recognised.
Fred cannot understand it.
Full credit for starting the revels goes to GOP Govs. Christie Whitman
(N.J.) and Pete Wilson (Calif.), who care more about their own press
clippings than about electing Mr. Dole. First Ms. Whitman backed Mr. Clinton
on the partial-birth ban.
<p>
The plane crashed in extremely windy weather as it prepared to
land after a 45-minute flight from the Bosnian town of Tuzla at
2:52 p.m. Croatian time (4:52 a.m. PST).
<p>
I 'm sure they 're honest .
The US labelled North Korea part of an 'axis of evil'.
Playwright Miller, the Pulitzer Prize-winning author of 'Death of a Salesman', has died.
Cote d'Ivoire.
Around 50 million years ago, in contrast, oxygen levels in the atmosphere measured 23 percent--2 percent higher than today.
The National Assn. of Realtors reported that sales of existing homes had shot up to an annual rate of 7.29 million in August, 7.8% higher than a year earlier.
IBM rose 2-1/4 to 85 9/16.
Sort of a horse, a wagon and a knapsack all rolled into one.
The film was produced by Jason P. Brubaker of Brubaker Films LLC, in association with T-Street Productions.
Other finalists are likely to include Cameroon's Samuel Eto'o.
The US Air Force on Tuesday tested a powerful new 21,000-pound (9,450-kg) bomb, the biggest conventional bomb in the US military's arsenal that could be used against critical targets in a
possible war with Iraq.
The Premier League confirmed on Monday that Ashley Cole has launched an appeal against the 100, 000-pound fine imposed at the independent tapping up inquiry.
``Cpl. Banuelos was carrying out lawful orders of his chain of
command,'' he said. ``He was doing what he was supposed to do.''
Jia's "Pickpocket" (1997) and "Platform" (2000) were not
permitted to screen in China.
Wine Steward ($39.95) and the VinoVault ($299.95). Information: (800) 560-8860
or www.peksystems.com.
Thus, an Egyptian newspaper, Al-Masri al-Youm, refers to his "Muslim
origins." Libyan ruler Muammar al-Qaddafi referred to Obama as "a
Muslim" and a person with an "African and Islamic identity." One
Al-Jazeera analysis calls him a "non-Christian man," a second refers to
his "Muslim Kenyan" father, and a third, by Naseem Jamali, notes that
"Obama may not want to be counted as a Muslim but Muslims are eager to
count him as one of their own."
Dick Taylor, a longtime golf journalist, died Thursday at his home in Pinehurst, N.C. He was 72.
"Friendship" was one; "Memories of Other
Days"; "Religion in History"; "Dream Land"; "The Advantages of
Culture"; "Forms of Political Government Compared and Contrasted";
"Melancholy"; "Filial Love"; "Heart Longings," etc., etc.
A prevalent feature in these compositions was a nursed and petted
melancholy; another was a wasteful and opulent gush of "fine language";
another was a tendency to lug in by the ears particularly prized words
and phrases until they were worn entirely out; and a peculiarity that
conspicuously marked and marred them was the inveterate and intolerable
sermon that wagged its crippled tail at the end of each and every one
of them.
I'd've
Mr. Meier-Müller awoke.
Why are you shouting?!?!
Why are you shouting!?!?!
Rep. Mike Quigley, Ill., said that there was no cause for alarm.
Rep. Mike Quigley, D-Ill., said that there was no cause for alarm.
People, y'know, who started out honest. Y'all don't know what happens.
Louisa Moats, Ed.D., an adjunct faculty member.
Retail designer, J'Amy Owens was driving north.
Joyce Roche, president, chief operating officer, Carson Inc.
Vicki G. Roman, vice president, treasurer, Coca-Cola
Enterprises.
Garrett last year replaced singer N'Dea Davenport.
Nor may tyrants e'er spit in thy face...
She is a member of the H'mong ethnic minority.
Test soft hyphens
Rep. Mike Qui­gley, D-Ill., said that there was no cause for alarm.
Rep. Mike Qui­gley, D­Ill., said that there was no cause for alarm.
Rep. Mike Qui­gley, D-­Ill., said that there was no cause for alarm.
Rep. Mike Qui­gley, D­-Ill., said that there was no cause for alarm.
This sentence contains less than 4­2 soft hy­phens.
This sentence contains less than ­2 soft hy­phens.
My dog once scared away 5­5 geese at once.
My dog once scared away 5­5 geese at once.
Cap'n Crunch
we assembled s'mores recently
With his background in S&Ls and real estate
``Lift Ev'ry Voice and Sing''
Check known legacy character entities: &quot;Fred&apos;s Donuts &AMP; Fries&quot; &MD; a well-known store &mdash; is on the Champs Elys&eacute;. Also &lt; and &gt; may appear.
I like ¼ to ½ of what I read. Taco Bell is on the hook
for a free taco to everyone in America, with redemption Tuesday
from 2-5 p.m. The Sox are the 11th team to win at least six
straight Series games.
<<Datastream Systems>> Inc. (DSTM)-Two million common shares.
I bought 3000.5 shares of ABCD.EFG.
The file was named chart2.gif.
You can find the latest news about the Philadelphia Eagles at philadelphiaeagles.com.
If you want to learn about Python, the programming language, go to python.org, definitely not python.com.
He works for ditan360.com.
2.Heat the olive oil in the frying pan.
My favorite vacation picture was 45.gif.
Linus Torvalds hates C++.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,70 @@
ancestor
aunt
bride
bridegroom
brother
brother-in-law
child
children
dad
daddy
daughter
daughter-in-law
father
father-in-law
fiancee
grampa
gramps
grandchild
grandchildren
granddaughter
grandfather
grandma
grandmother
grandpa
grandparent
grandson
granny
great-granddaughter
great-grandfather
great-grandmother
great-grandparent
great-grandson
great-aunt
great-uncle
groom
half-brother
half-sister
heir
heiress
husband
ma
mama
mom
mommy
mother
mother-in-law
nana
nephew
niece
pa
papa
parent
pop
second cousin
sister
sister-in-law
son
son-in-law
stepbrother
stepchild
stepchildren
stepdad
stepdaughter
stepfather
stepmom
stepmother
stepsister
stepson
uncle
wife

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,215 @@
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="kbe_to_html.xsl"?>
<root xmlns="http://bogusnamespace.com/inference_pilot_evaluation/annotation">
<inference id="RTE2_10" task="QA" length="">
<passage>In Nigeria, by far the most populous country in sub-Saharan Africa, over 2.7 million people are infected with HIV.</passage>
<question/>
<hypothesis>2.7 percent of the people infected with HIV live in Africa.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
<inference id="RTE1_1637" task="IE" length="">
<passage>Twenty years ago, on June 6, 1968, Sen. Robert F. Kennedy died at Good Samaritan Hospital in Los Angeles, 25 -LCB- hours after he was shot at the Ambassador Hotel by Sirhan Bishara Sirhan.</passage>
<question/>
<hypothesis>Sirhan Bishara Sirhan killed Sen. Robert F. Kennedy</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>yes</response>
</answer>
</inference>
<inference id="RTE2_271" task="IR" length="">
<passage>The May 3 ferry sinking in the Meghna River that claimed the lives of at least 370 people is a reminder of the dangers inherent in this sort of travel. </passage>
<question/>
<hypothesis>100 or more people lost their lives in a ferry sinking.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>yes</response>
</answer>
</inference>
<inference id="RTE2_293" task="IR" length="">
<passage>The Townsend Thoresen cross-Channel ferry Herald of Free Enterprise capsized outside the Belgian port of Zeebrugge on the 6th of March with the loss of 135 lives. </passage>
<question/>
<hypothesis>100 or more people lost their lives in a ferry sinking.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>yes</response>
</answer>
</inference>
<inference id="danr_001" task="" length="">
<passage>I ate cheese and bread.</passage>
<question/>
<hypothesis>I ate bread.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>yes</response>
</answer>
</inference>
<inference id="danr_002" task="" length="">
<passage>I ate cheese and carp.</passage>
<question/>
<hypothesis>I ate carp.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>yes</response>
</answer>
</inference>
<inference id="danr_003" task="" length="">
<passage>I ate cheese and hummus.</passage>
<question/>
<hypothesis>I ate hummus.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>yes</response>
</answer>
</inference>
<inference id="hypernymy1" task="" length="">
<passage>The cat napped.</passage>
<question/>
<hypothesis>The cat slept.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>yes</response>
</answer>
</inference>
<inference id="hypernymy2" task="" length="">
<passage>I like Java.</passage>
<question/>
<hypothesis>I like a programming language.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>yes</response>
</answer>
</inference>
<inference id="hypernymy3" task="" length="">
<passage>Some elephants like mice.</passage>
<question/>
<hypothesis>Some animals like mice.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>yes</response>
</answer>
</inference>
<inference id="hypernymy4" task="" length="">
<passage>Snakes don't like people.</passage>
<question/>
<hypothesis>Some reptiles don't like people.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>yes</response>
</answer>
</inference>
<inference id="chris_001" task="" length="">
<passage>A major US intelligence review has projected that Iran is about a decade
away from manufacturing the key ingredient for a nuclear weapon.</passage>
<question/>
<hypothesis>In January 2000, the Central Intelligence Agency concluded that Iran
might now be able to make a nuclear weapon.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
<inference id="chris_002" task="" length="">
<passage>A major US intelligence review has projected that Iran is about a decade
away from manufacturing the key ingredient for a nuclear weapon.</passage>
<question/>
<hypothesis>On 10 November 2004 the Wall Street Journal reported that European
officials believed Iran was five or six years away from possessing
nuclear weapon.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
<inference id="chris_003" task="" length="">
<passage>On 10 November 2004 the Wall Street Journal reported that European
officials believed Iran was five or six years away from possessing
nuclear weapon.</passage>
<question/>
<hypothesis>Iran does not currently have nuclear weapons, and would appear to be
about two years away from acquiring nuclear weapons.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
<inference id="chris_004" task="" length="">
<passage>Iran does not currently have nuclear weapons, and would appear to be
about two years away from acquiring nuclear weapons.</passage>
<question/>
<hypothesis>On 10 November 2004 the Wall Street Journal reported that European
officials believed Iran was five or six years away from possessing
nuclear weapon.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
<inference id="chris_005" task="" length="">
<passage>Iranian Foreign Minister Kamal Kharazi, denied his country has "any
program to produce weapons of mass destruction."</passage>
<question/>
<hypothesis>"According to experts, the document is unequivocal proof that Iran's
nuclear project is involved in weapons production".</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
<inference id="chris_006" task="" length="">
<passage>Iranian Foreign Minister Kamal Kharazi, denied his country has "any
program to produce weapons of mass destruction."</passage>
<question/>
<hypothesis>A major US intelligence review has projected that Iran is about a decade
away from manufacturing the key ingredient for a nuclear weapon.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
<inference id="chris_007" task="" length="">
<passage>"According to experts, the document is unequivocal proof that Iran's
nuclear project is involved in weapons production."</passage>
<question/>
<hypothesis>Iran does not currently have nuclear weapons, and would appear to be
about two years away from acquiring nuclear weapons.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
<inference id="chris_008" task="" length="">
<passage>Iranian Foreign Minister Kamal Kharazi, denied his country has "any
program to produce weapons of mass destruction."</passage>
<question/>
<hypothesis>In January 2000, the Central Intelligence Agency concluded that Iran
might now be able to make a nuclear weapon.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
<inference id="chris_009" task="" length="">
<passage>In January 2000, the Central Intelligence Agency concluded that Iran
might now be able to make a nuclear weapon.</passage>
<question/>
<hypothesis>A major US intelligence review has projected that Iran is about a decade
away from manufacturing the key ingredient for a nuclear weapon.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
<inference id="chris_010" task="" length="">
<passage>Iran does not currently have nuclear weapons, and would appear to be
about two years away from acquiring nuclear weapons.</passage>
<question/>
<hypothesis>The Jerusalem post reported the Israeli government estimated that Iran
would be able to build an atomic bomb as soon as the year 2008.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
<inference id="chris_011" task="" length="">
<passage>In January 2000, the Central Intelligence Agency concluded that Iran
might now be able to make a nuclear weapon.</passage>
<question/>
<hypothesis>A major US intelligence review has projected that Iran is about a decade
away from manufacturing the key ingredient for a nuclear weapon.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
<inference id="chris_012" task="" length="">
<passage>In January 2000, the Central Intelligence Agency concluded that Iran
might now be able to make a nuclear weapon.</passage>
<question/>
<hypothesis>The Jerusalem post reported the Israeli government estimated that Iran
would be able to build an atomic bomb as soon as the year 2008.</hypothesis>
<answer id="1" polarity="true" force="plausible" source="world">
<response>don't know</response>
</answer>
</inference>
</root>

View File

@ -0,0 +1 @@
(3 (2 (2 The) (2 Rock)) (4 (3 (2 is) (4 (2 destined) (2 (2 (2 (2 (2 to) (2 (2 be) (2 (2 the) (2 (2 21st) (2 (2 (2 Century) (2 's)) (2 (3 new) (2 (2 ``) (2 Conan)))))))) (2 '')) (2 and)) (3 (2 that) (3 (2 he) (3 (2 's) (3 (2 going) (3 (2 to) (4 (3 (2 make) (3 (3 (2 a) (3 splash)) (2 (2 even) (3 greater)))) (2 (2 than) (2 (2 (2 (2 (1 (2 Arnold) (2 Schwarzenegger)) (2 ,)) (2 (2 Jean-Claud) (2 (2 Van) (2 Damme)))) (2 or)) (2 (2 Steven) (2 Segal))))))))))))) (2 .)))

View File

@ -0,0 +1,21 @@
An egret is any of several herons, most of which are white or buff,
and several of which develop fine plumes (usually milky white) during
the breeding season. Many egrets are members of the genera Egretta or
Ardea which also contain other species named as herons rather than
egrets. The distinction between a heron and an egret is rather vague,
and depends more on appearance than biology. The word "egret" comes
from the French word "aigrette" that means both "silver heron" and
"brush," referring to the long filamentous feathers that seem to
cascade down an egret's back during the breeding season.
Several of the egrets have been reclassified from one genus to another
in recent years: the Great Egret, for example, has been classified as
a member of either Casmerodius, Egretta or Ardea. In the 19th and
early part of the 20th century, some of the world's egret species were
endangered by relentless plume hunting, since hat makers in Europe and
the United States demanded large numbers of egret plumes, leading to
breeding birds being killed in many places around the world. Several
Egretta species, including the Eastern Reef Egret, the Reddish Egret
and the Western Reef Egret have two distinct colours, one of which is
entirely white. Little Blue Heron has all-white juvenile plumage.

View File

@ -0,0 +1,8 @@
An_DT egret_NN is_VBZ any_DT of_IN several_JJ herons_NNS ,_, most_JJS of_IN which_WDT are_VBP white_JJ or_CC buff_NN ,_, and_CC several_JJ of_IN which_WDT develop_VBP fine_JJ plumes_NNS -LRB-_-LRB- usually_RB milky_JJ white_JJ -RRB-_-RRB- during_IN the_DT breeding_VBG season_NN ._.
Many_JJ egrets_NNS are_VBP members_NNS of_IN the_DT genera_NN Egretta_NNP or_CC Ardea_NNP which_WDT also_RB contain_VBP other_JJ species_NNS named_VBN as_IN herons_NNS rather_RB than_IN egrets_NNS ._.
The_DT distinction_NN between_IN a_DT heron_NN and_CC an_DT egret_NN is_VBZ rather_RB vague_JJ ,_, and_CC depends_VBZ more_RBR on_IN appearance_NN than_IN biology_NN ._.
The_DT word_NN ``_`` egret_NN ''_'' comes_VBZ from_IN the_DT French_JJ word_NN ``_`` aigrette_NN ''_'' that_WDT means_VBZ both_CC ``_`` silver_JJ heron_NN ''_'' and_CC ``_`` brush_NN ,_, ''_'' referring_VBG to_TO the_DT long_JJ filamentous_JJ feathers_NNS that_WDT seem_VBP to_TO cascade_NN down_IN an_DT egret_NN 's_POS back_RB during_IN the_DT breeding_VBG season_NN ._.
Several_JJ of_IN the_DT egrets_NNS have_VBP been_VBN reclassified_VBN from_IN one_CD genus_NN to_TO another_DT in_IN recent_JJ years_NNS :_: the_DT Great_NNP Egret_NNP ,_, for_IN example_NN ,_, has_VBZ been_VBN classified_VBN as_IN a_DT member_NN of_IN either_CC Casmerodius_NNP ,_, Egretta_NNP or_CC Ardea_NNP ._.
In_IN the_DT 19th_JJ and_CC early_JJ part_NN of_IN the_DT 20th_JJ century_NN ,_, some_DT of_IN the_DT world_NN 's_POS egret_NN species_NNS were_VBD endangered_VBN by_IN relentless_JJ plume_NN hunting_NN ,_, since_IN hat_NN makers_NNS in_IN Europe_NNP and_CC the_DT United_NNP States_NNPS demanded_VBD large_JJ numbers_NNS of_IN egret_NN plumes_NNS ,_, leading_VBG to_TO breeding_VBG birds_NNS being_VBG killed_VBN in_IN many_JJ places_NNS around_IN the_DT world_NN ._.
Several_JJ Egretta_NN species_NNS ,_, including_VBG the_DT Eastern_NNP Reef_NN Egret_NNP ,_, the_DT Reddish_JJ Egret_NN and_CC the_DT Western_JJ Reef_NN Egret_NN have_VBP two_CD distinct_JJ colors_NNS ,_, one_CD of_IN which_WDT is_VBZ entirely_RB white_JJ ._.
Little_JJ Blue_JJ Heron_NNP has_VBZ all-white_JJ juvenile_JJ plumage_NN ._.

View File

@ -0,0 +1,26 @@
<?xml version="1.0" encoding="utf-8"?>
<xml>
<p>
An egret is any of several herons, most of which are white or buff,
and several of which develop fine plumes (usually milky white) during
the breeding season. Many egrets are members of the genera Egretta or
Ardea which also contain other species named as herons rather than
egrets. The distinction between a heron and an egret is rather vague,
and depends more on appearance than biology. The word "egret" comes
from the French word "aigrette" that means both "silver heron" and
"brush," referring to the long filamentous feathers that seem to
cascade down an egret's back during the breeding season.
</p>
<p>
Several of the egrets have been reclassified from one genus to another
in recent years: the Great Egret, for example, has been classified as
a member of either Casmerodius, Egretta or Ardea. In the 19th and
early part of the 20th century, some of the world's egret species were
endangered by relentless plume hunting, since hat makers in Europe and
the United States demanded large numbers of egret plumes, leading to
breeding birds being killed in many places around the world. Several
Egretta species, including the Eastern Reef Egret, the Reddish Egret
and the Western Reef Egret have two distinct colours, one of which is
entirely white. Little Blue Heron has all-white juvenile plumage.
</p>
</xml>

View File

@ -0,0 +1,106 @@
* NN Number=Sing
* NNP Number=Sing
* NNS Number=Plur
* NNPS Number=Plur
* VBZ Number=Sing|Person=3|Tense=Pres|VerbForm=Fin|Mood=Ind
* VBD VerbForm=Fin|Mood=Ind|Tense=Past
* VBN Tense=Past|VerbForm=Part
* VBP VerbForm=Fin|Mood=Ind|Tense=Pres
* MD VerbForm=Fin
* JJ Degree=Pos
* JJR Degree=Cmp
* JJS Degree=Sup
* CD NumType=Card
am VBP VerbForm=Fin|Mood=Ind|Tense=Pres|Person=1|Number=Sing
was VBD VerbForm=Fin|Mood=Ind|Tense=Past|Number=Sing
i PRP Number=Sing|Person=1|PronType=Prs|Case=Nom
you PRP Person=2|PronType=Prs
he PRP Number=Sing|Person=3|Gender=Masc|PronType=Prs|Case=Nom
she PRP Number=Sing|Person=3|Gender=Fem|PronType=Prs|Case=Nom
it PRP Number=Sing|Person=3|Gender=Neut|PronType=Prs
we PRP Number=Plur|Person=1|PronType=Prs|Case=Nom
they PRP Number=Plur|Person=3|PronType=Prs|Case=Nom
me PRP Number=Sing|Person=1|PronType=Prs|Case=Acc
him PRP Number=Sing|Person=3|Gender=Masc|PronType=Prs|Case=Acc
her PRP Number=Sing|Person=3|Gender=Fem|PronType=Prs|Case=Acc
us PRP Number=Plur|Person=1|PronType=Prs|Case=Acc
them PRP Number=Plur|Person=3|PronType=Prs|Case=Acc
my PRP$ Number=Sing|Person=1|Poss=Yes|PronType=Prs
mine PRP$ Number=Sing|Person=1|Poss=Yes|PronType=Prs
your PRP$ Person=2|Poss=Yes|PronType=Prs
yours PRP$ Person=2|Poss=Yes|PronType=Prs
his PRP$ Number=Sing|Person=3|Gender=Masc|Poss=Yes|PronType=Prs
her PRP$ Number=Sing|Person=3|Gender=Fem|Poss=Yes|PronType=Prs
hers PRP$ Number=Sing|Person=3|Gender=Fem|Poss=Yes|PronType=Prs
its PRP$ Number=Sing|Person=3|Gender=Neut|Poss=Yes|PronType=Prs
our PRP$ Number=Plur|Person=1|Poss=Yes|PronType=Prs
ours PRP$ Number=Plur|Person=1|Poss=Yes|PronType=Prs
their PRP$ Number=Plur|Person=3|Poss=Yes|PronType=Prs
theirs PRP$ Number=Plur|Person=3|Poss=Yes|PronType=Prs
myself PRP Number=Sing|Person=1|PronType=Prs
yourself PRP Number=Sing|Person=2|PronType=Prs
himself PRP Number=Sing|Person=3|Gender=Masc|PronType=Prs
herself PRP Number=Sing|Person=3|Gender=Fem|PronType=Prs
itself PRP Number=Sing|Person=3|Gender=Neut|PronType=Prs
ourselves PRP Number=Plur|Person=1|PronType=Prs
yourselves PRP Number=Plur|Person=2|PronType=Prs
themselves PRP Number=Plur|Person=3|PronType=Prs
the DT Definite=Def|PronType=Art
a DT Definite=Ind|PronType=Art
an DT Definite=Ind|PronType=Art
this DT PronType=Dem|Number=Sing
that DT PronType=Dem|Number=Sing
these DT PronType=Dem|Number=Plur
those DT PronType=Dem|Number=Plur
here RB PronType=Dem
there RB PronType=Dem
then RB PronType=Dem
whose WP$ Poss=Yes
hard RB Degree=Pos
fast RB Degree=Pos
late RB Degree=Pos
long RB Degree=Pos
high RB Degree=Pos
easy RB Degree=Pos
early RB Degree=Pos
far RB Degree=Pos
soon RB Degree=Pos
low RB Degree=Pos
close RB Degree=Pos
well RB Degree=Pos
badly RB Degree=Pos
little RB Degree=Pos
harder RBR Degree=Cmp
faster RBR Degree=Cmp
later RBR Degree=Cmp
longer RBR Degree=Cmp
higher RBR Degree=Cmp
easier RBR Degree=Cmp
quicker RBR Degree=Cmp
earlier RBR Degree=Cmp
further RBR Degree=Cmp
farther RBR Degree=Cmp
sooner RBR Degree=Cmp
slower RBR Degree=Cmp
lower RBR Degree=Cmp
closer RBR Degree=Cmp
better RBR Degree=Cmp
worse RBR Degree=Cmp
less RBR Degree=Cmp
hardest RBS Degree=Sup
fastest RBS Degree=Sup
latest RBS Degree=Sup
longest RBS Degree=Sup
highest RBS Degree=Sup
easiest RBS Degree=Sup
quickest RBS Degree=Sup
earliest RBS Degree=Sup
furthest RBS Degree=Sup
farthest RBS Degree=Sup
soonest RBS Degree=Sup
slowest RBS Degree=Sup
lowest RBS Degree=Sup
closest RBS Degree=Sup
best RBS Degree=Sup
worst RBS Degree=Sup
least RBS Degree=Sup

View File

@ -0,0 +1,344 @@
%
% Context-sensitive mapping of PTB POS tags to
% Universal POS tags.
%
% Author: Sebastian Schuster
% Author: Christopher Manning
%
% The original Penn Treebank WSJ contains 45 POS tags (but almost certainly # for British pound currency is a bad idea!)
% {#=173, $=9,039, ''=8,658, ,=60,489, -LRB-=1,672, -RRB-=1,689, .=48,733, :=6,087, CC=29,462, CD=44,937, DT=101,190,
% EX=1,077, FW=268, IN=121,903, JJ=75,266, JJR=4,042, JJS=2,396, LS=64, MD=11,997, NN=163,935, NNP=114,053,
% NNPS=3,087, NNS=73,964, PDT=441, POS=10,801, PRP=21,357, PRP$=10,241, RB=38,197, RBR=2,175, RBS=555, RP=3,275,
% SYM=70, TO=27,449, UH=117, VB=32,565, VBD=37,493, VBG=18,239, VBN=24,865, VBP=15,377, VBZ=26,436, WDT=5,323,
% WP=2,887, WP$=219, WRB=2,625, ``=8,878}
%
% The Web Treebank corpus adds 6 tags, but doesn't have #, yielding 50 POS tags:
% ADD, AFX, GW, HYPH, NFP, XX
%
% OntoNotes 4.0 has 53 tags. It doesn't have # but adds: -LSB-, -RSB- [both mistakes!], ADD, AFX, CODE, HYPH, NFP,
% X [mistake!], XX.
%
%
% ------------------------------
% Context-sensitive mappings
%
% TO -> PART (in CONJP phrases)
@CONJP < TO=target < VB
relabel target PART
% TO -> PART
@VP < @VP < (/^TO$/=target <... {/.*/})
relabel target PART
% TO -> PART
@VP <: (/^TO$/=target <... {/.*/})
relabel target PART
% TO -> ADP (otherwise)
TO=target <... {/.*/}
relabel target ADP
% Don't do this, we are now treating these as copular constructions
% VB.* -> AUX (for passives where main verb is part of an ADJP)
%@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase|get|got|getting|gets|gotten)$/ ) < (@ADJP [ < VBN|VBD | < (@VP|ADJP < VBN|VBD) < CC ] )
%
%relabel target AUX
%
% VB.* -> AUX (for cases with fronted main VPs)
@SINV < (@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ ) $-- (@VP < VBD|VBN))
relabel target AUX
% VB.* -> AUX (another, rarer case of fronted VPs)
@SINV < (@VP < (@VP < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ )) $-- (@VP < VBD|VBN))
relabel target AUX
% VB.* -> AUX (passive, case 2)
%SQ|SINV < (/^VB/=target < /^(?i:am|is|are|r|be|being|'s|'re|'m|was|were|been|s|ai|m|art|ar|wase)$/ $++ (VP < VBD|VBN))
%
%relabel target AUX
% VB.* -> AUX (active, case 1)
VP < VP < (/^VB.*$/=target <: /^(?i:will|have|can|would|do|is|was|be|are|has|could|should|did|been|may|were|had|'ll|'ve|does|am|might|ca|'m|being|'s|must|'d|'re|wo|shall|get|ve|s|got|r|m|getting|having|d|re|ll|wilt|v|of|my|nt|gets|du|wud|woud|with|willl|wil|wase|shoul|shal|`s|ould|-ll|most|made|hvae|hav|cold|as|art|ai|ar|a)$/)
relabel target AUX
% VB -> AUX (active, case 2)
@SQ|SINV < (/^VB/=target $++ /^(?:VP)/ <... {/.*/})
relabel target AUX
% VB.* -> VERB
/^VB.*/=target <... {/.*/}
relabel target VERB
% IN -> SCONJ (subordinating conjunctions)
/^SBAR(-[^ ]+)?$/ < (IN=target $++ @S|FRAG|SBAR|SINV <... {/.*/})
relabel target SCONJ
% IN -> SCONJ (subordinating conjunctions II)
@PP < (IN=target $+ @SBAR|S)
relabel target SCONJ
% IN -> ADP (otherwise)
IN=target < __
relabel target ADP
% NN -> SYM (in case of the percent sign)
NN=target <... {/\\%/}
relabel target SYM
% fused det-noun pronouns -> PRON
NN=target < (/^(?i:(somebody|something|someone|anybody|anything|anyone|everybody|everything|everyone|nobody|nothing))$/)
relabel target PRON
% NN -> NOUN (otherwise)
NN=target <... {/.*/}
relabel target NOUN
% NFP -> PUNCT (in case of possibly repeated hyphens, asterisks or tildes)
NFP=target <... {/^(~+|\*+|\-+)$/}
relabel target PUNCT
% NFP -> SYM (otherwise)
NFP=target <... {/.*/}
relabel target SYM
% RB -> PART when it is verbal negation (not or its reductions)
@VP|SINV|SQ|FRAG|ADVP < (RB=target < /^(?i:not|n't|nt|t|n)$/)
relabel target PART
% Otherwise RB -> ADV
RB=target <... {/.*/}
relabel target ADV
% DT -> PRON (pronominal this/that/these/those)
@NP <: (DT=target < /^(?i:th(is|at|ose|ese))$/)
relabel target PRON
%DT -> DET
DT=target < __
relabel target DET
% WDT -> PRON (pronominal that/which)
@WHNP|NP <: (WDT=target < /^(?i:(that|which))$/)
relabel target PRON
% WDT->SCONJ (incorrectly tagged subordinating conjunctions)
@SBAR < (WDT=target < /^(?i:(that|which))$/)
relabel target SCONJ
% WDT -> DET
WDT=target <... {/.*/}
relabel target DET
% ------------------------------
% 1 to 1 mappings
%
%
% CC -> CCONJ
CC=target <... {/.*/}
relabel target CCONJ
% CD -> NUM
CD=target <... {/.*/}
relabel target NUM
% EX -> PRON
EX=target <... {/.*/}
relabel target PRON
% FW -> X
FW=target <... {/.*/}
relabel target X
% JJ.* -> ADJ
/^JJ.*$/=target < __
relabel target ADJ
% LS -> X
LS=target <... {/.*/}
relabel target X
% MD -> AUX
MD=target <... {/.*/}
relabel target AUX
% NNS -> NOUN
NNS=target <... {/.*/}
relabel target NOUN
% NNP -> PROPN
NNP=target <... {/.*/}
relabel target PROPN
% NNPS -> PROPN
NNPS=target <... {/.*/}
relabel target PROPN
% PDT -> DET
PDT=target <... {/.*/}
relabel target DET
% POS -> PART
POS=target <... {/.*/}
relabel target PART
% PRP -> PRON
PRP=target <... {/.*/}
relabel target PRON
% PRP$ -> PRON
/^PRP\$$/=target <... {/.*/}
relabel target PRON
% RBR -> ADV
RBR=target <... {/.*/}
relabel target ADV
% RBS -> ADV
RBS=target <... {/.*/}
relabel target ADV
% RP -> ADP
RP=target <... {/.*/}
relabel target ADP
% UH -> INTJ
UH=target <... {/.*/}
relabel target INTJ
% WP -> PRON
WP=target <... {/.*/}
relabel target PRON
% WP$ -> PRON
/^WP\$$/=target <... {/.*/}
relabel target PRON
% WRB -> ADV
WRB=target <... {/.*/}
relabel target ADV
% `` -> PUNCT
/^``$/=target <... {/.*/}
relabel target PUNCT
% '' -> PUNCT
/^''$/=target < __
relabel target PUNCT
% ( -> PUNCT
/^\($/=target <... {/.*/}
relabel target PUNCT
% ) -> PUNCT
/^\)$/=target <... {/.*/}
relabel target PUNCT
% -LRB- -> PUNCT
/^-LRB-$/=target <... {/.*/}
relabel target PUNCT
% -RRB- -> PUNCT
/^-RRB-$/=target <... {/.*/}
relabel target PUNCT
% , -> PUNCT
/^,$/=target <... {/.*/}
relabel target PUNCT
% . -> PUNCT
/^\.$/=target <... {/.*/}
relabel target PUNCT
% : -> PUNCT
/^:$/=target <... {/.*/}
relabel target PUNCT
% HYPH -> PUNCT
HYPH=target <... {/.*/}
relabel target PUNCT
% # -> SYM
/^#$/=target <... {/.*/}
relabel target SYM
% $ -> SYM. Also note that there is a no-op rule of SYM -> SYM!
/^\$$/=target <... {/.*/}
relabel target SYM
% ADD -> X
ADD=target <... {/.*/}
relabel target X
% AFX -> X
AFX=target <... {/.*/}
relabel target X
% GW -> X
GW=target <... {/.*/}
relabel target X
% XX -> X
XX=target <... {/.*/}
relabel target X

Binary file not shown.

After

Width:  |  Height:  |  Size: 318 B

View File

@ -0,0 +1,12 @@
This directory contains various forms of documentation.
software: the web pages on our apache httpd installation, ei.e.,
https://nlp.stanford.edu/software/
releasenotes: the output of running the release scripts to create the
zips we release
classify, lexparser, ner, segmenter: documentation included in various
packages, such as readmes, build files, etc
loglinear: architectural explanation and various tutorials

View File

@ -0,0 +1,12 @@
# This is a rudimentary Makefile for rebuilding the classifier.
# We actually use ant (q.v.) or a Java IDE.
JAVAC = javac
JAVAFLAGS = -O -d classes
classifier:
mkdir -p classes
$(JAVAC) $(JAVAFLAGS) src/edu/stanford/nlp/*/*.java
cd classes ; jar -cfm ../stanford-classifier-`date +%Y-%m-%d`.jar ../src/edu/stanford/nlp/classify/classifier-manifest.txt edu ; cd ..
cp stanford-classifier-`date +%Y-%m-%d`.jar stanford-classifier.jar
rm -rf classes

View File

@ -0,0 +1,145 @@
Stanford Classifier v4.2.0 - 2020-11-17
-------------------------------------------------
Copyright (c) 2003-2012 The Board of Trustees of
The Leland Stanford Junior University. All Rights Reserved.
Original core classifier code and command line interface by Dan Klein
and Chris Manning. Support code, additional features, etc. by
Kristina Toutanova, Jenny Finkel, Galen Andrew, Joseph Smarr, Chris
Cox, Roger Levy, Rajat Raina, Pi-Chuan Chang, Marie-Catherine de
Marneffe, Eric Yeh, Anna Rafferty, and John Bauer. This release
prepared by John Bauer.
This package contains a maximum entropy classifier.
For more information about the classifier, point a web browser at the included javadoc directory, starting at the Package page for the edu.stanford.nlp.classify package, and looking also at the ColumnDataClassifier class documentation therein.
This software requires Java 8 (JDK 1.8.0+). (You must have installed it
separately. Check the command "java -version".)
QUICKSTART
COMMAND LINE INTERFACE
To classify the included example dataset cheeseDisease (in the examples directory), type the following at the command line while in the main classifier directory:
java -cp "*:." edu.stanford.nlp.classify.ColumnDataClassifier -prop examples/cheese2007.prop
This will classify the included test data, cheeseDisease.test, based on the probability that each example is a cheese or a disease, as calculated by a linear classifier trained on cheeseDisease.train.
The cheese2007.prop file demonstrates how features are specified. The first feature in the file, useClassFeature,
indicates that a feature should be used based on class frequency in the training set. Most other features are
calculated on specific columns of data in your tab-delimited text file. For example, "1.useNGrams=true" indicates
that n-gram features should be created for the values in column 1 (numbering begins at 0!). Note that you must
specify, for example, "true" in "1.useNGrams=true"; "1.useNGrams" alone will not cause n-gram features to be created.
N-gram features are character subsequences of the string in the column, for example, "t", "h", "e", "th", "he",
"the" from the word "the". You can also specify various other kinds of features such as just using the string value
as a categorical feature (1.useString=true) or splitting up a longer string into bag-of-words features
(1.splitWordsRegexp=[ ] 1.useSplitWords=true). The prop file also allows a choice of printing and optimization
options, and allows you to specify training and test files (e.g., in cheese2007.prop under the "Training input"
comment). See the javadoc for ColumnDataClassifier within the edu.stanford.nlp.classify package for more information
on these and other options.
Another included dataset is the iris dataset which uses numerical features to separate types of irises. To specify the use of a real-valued rather than categorical feature, you can use one or more of "realValued", "logTransform", or "logitTransform" for a given column. "realValued" adds the number in the given column as a feature value, while the transform options perform either a log or a logit transform on the value first. The format of these feature options is the same as for categorical features; for instance, iris2007.prop shows the use of real valued features such as "2.realValued=true".
CLASSIFYING YOUR OWN DATA FILES
To classify your own data files, they should be in tab-delimited text from which to make features as shown above, SVMLight format, or as tab-delimited text with the exact feature values you would like. Then specify the train and test files on the command line or in a .prop file with "trainFile=/myPath/myTrainFile.train" and "testFile==/myPath/myTestFile.test". You can also create a serialized classifier using the serializeTo option followed by a file path.
CODE EXAMPLES
You can also directly use the classes in this package to train classifiers within other programs. An example of this is shown in ClassifierExample, in the package edu.stanford.nlp.classify. This class demonstrates how to build a classifier factory, creating a classifier and setting various parameters in the classifier, training the classifier, and finally testing the classifier on a different data set.
NO GUI
This package does not provide a graphical user interface. The
classifier is accessible only via the command line or programmatically.
LICENSE
// Stanford Classifier
// Copyright (c) 2003-2007 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see http://www.gnu.org/licenses/ .
//
// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 2A
// Stanford CA 94305-9020
// USA
// java-nlp-support@lists.stanford.edu
// https://nlp.stanford.edu/software/classifier.html
-------------------------
CHANGES
-------------------------
2020-11-17 4.2.0 Update for compatibility
2020-05-10 4.0.0 Update for compatibility
2018-10-16 3.9.2 Update for compatibility
2018-02-27 3.9.1 Updated for compatibility
2016-10-31 3.7.0 Update for compatibility
2015-12-09 3.6.0 Update for compatibility
2015-04-20 3.5.2 Update for compatibility
2015-01-29 3.5.1 New input/output options, support for GloVe
word vectors
2014-10-26 3.5.0 Upgrade to Java 1.8
2014-08-27 3.4.1 Update for compatibility
2014-06-16 3.4 Update for compatibility
2014-01-04 3.3.1 Bugfix release
2013-11-12 3.3.0 Update for compatibility
2013-06-19 3.2.0 Update for compatibility
2013-04-04 2.1.8 Update to maintain compatibility
2012-11-11 2.1.7 new pair-of-words features
2012-07-09 2.1.6 Minor bug fixes
2012-05-22 2.1.5 Re-release to maintain compatibility
with other releases
2012-03-09 2.1.4 Bugfix for svmlight format
2011-12-16 2.1.3 Re-release to maintain compatibility
with other releases
2011-09-14 2.1.2 Change ColumnDataClassifier to be an object
with API rather than static methods;
ColumnDataClassifier thread safe
2011-06-15 2.1.1 Re-release to maintain compatibility
with other releases
2011-05-15 2.1 Updated with more documentation
2007-08-15 2.0 New command line interface, substantial
increase in options and features
(updated on 2007-09-28 with a bug fix)
2003-05-26 1.0 Initial release

View File

@ -0,0 +1,192 @@
<!-- build.xml file for ant for JavaNLP -->
<!-- Before using this, unjar the sources' jar file into the src/ directory! -->
<!-- A "project" describes a set of targets that may be requested
when Ant is executed. The "default" attribute defines the
target which is executed if no specific target is requested,
and the "basedir" attribute defines the current working directory
from which Ant executes the requested task. This is normally
set to the current working directory.
-->
<project name="JavaNLP" default="compile" basedir=".">
<property name="build.home" value="${basedir}/classes"/>
<property name="build.tests" value="${basedir}/classes"/>
<property name="docs.home" value="${basedir}/docs"/>
<property name="src.home" value="${basedir}/src"/>
<property name="javadoc.home" value="${basedir}/javadoc"/>
<!-- ==================== Compilation Control Options ==================== -->
<!--
These properties control option settings on the Javac compiler when it
is invoked using the <javac> task.
compile.debug Should compilation include the debug option?
compile.deprecation Should compilation include the deprecation option?
compile.optimize Should compilation include the optimize option?
compile.source Source version compatibility
compile.target Target class version compatibility
-->
<property name="compile.debug" value="true"/>
<property name="compile.deprecation" value="false"/>
<property name="compile.optimize" value="true"/>
<property name="compile.source" value="1.8" />
<property name="compile.target" value="1.8" />
<property name="compile.encoding" value="utf-8" />
<!-- ==================== All Target ====================================== -->
<!--
The "all" target is a shortcut for running the "clean" target followed
by the "compile" target, to force a complete recompile.
-->
<target name="all" depends="clean,compile"
description="Clean build and dist directories, then compile"/>
<!-- ==================== Clean Target ==================================== -->
<!--
The "clean" target deletes any previous "build" and "dist" directory,
so that you can be ensured the application can be built from scratch.
-->
<target name="clean" description="Delete old classes">
<delete dir="${build.home}/edu"/>
</target>
<!-- ==================== Compile Target ================================== -->
<!--
The "compile" target transforms source files (from your "src" directory)
into object files in the appropriate location in the build directory.
This example assumes that you will be including your classes in an
unpacked directory hierarchy under "/WEB-INF/classes".
-->
<target name="compile" depends="prepare"
description="Compile Java sources">
<!-- Compile Java classes as necessary -->
<mkdir dir="${build.home}"/>
<javac srcdir="${src.home}"
destdir="${build.home}"
debug="${compile.debug}"
encoding="${compile.encoding}"
deprecation="${compile.deprecation}"
optimize="${compile.optimize}"
source="${compile.source}"
target="${compile.target}"
includeantruntime="false">
<compilerarg value="-Xmaxerrs"/>
<compilerarg value="20"/>
<classpath>
<fileset dir="${basedir}">
<include name="*.jar"/>
<exclude name="javanlp*"/>
</fileset>
</classpath>
<!-- <compilerarg value="-Xlint"/> -->
</javac>
<!-- Copy application resources -->
<!--
<copy todir="${build.home}/WEB-INF/classes">
<fileset dir="${src.home}" excludes="**/*.java"/>
</copy>
-->
</target>
<!-- ==================== Javadoc Target ================================== -->
<!--
The "javadoc" target creates Javadoc API documentation for the Java
classes included in your application. Normally, this is only required
when preparing a distribution release, but is available as a separate
target in case the developer wants to create Javadocs independently.
-->
<target name="javadoc" depends="compile"
description="Create Javadoc API documentation">
<mkdir dir="${javadoc.home}"/>
<javadoc sourcepath="${src.home}"
destdir="${javadoc.home}"
maxmemory="1g"
author="true"
source="${compile.source}"
overview="${src.home}/edu/stanford/nlp/overview.html"
doctitle="Stanford JavaNLP API Documentation"
windowtitle="Stanford JavaNLP API"
encoding="${compile.encoding}"
docencoding="${compile.encoding}"
charset="${compile.encoding}"
packagenames="*">
<!-- Allow @generated, @modifiable and @ordered tags -->
<tag name="generated" scope="all" description="Generated" />
<tag name="modifiable" scope="all" description="Modifiable" />
<tag name="ordered" scope="all" description="Ordered" />
<!-- Depends on lib and classes folders -->
<classpath>
<fileset dir="${basedir}">
<include name="*.jar"/>
<exclude name="javanlp*"/>
</fileset>
<pathelement path="${build.home}" />
</classpath>
<bottom><![CDATA[<font size="2"><a href="https://nlp.stanford.edu" target="_top">Stanford NLP Group</a></font>]]></bottom>
<link href="https://docs.oracle.com/javase/8/docs/api/"/>
</javadoc>
</target>
<!-- ==================== Prepare Target ================================== -->
<!--
The "prepare" target is used to create the "build" destination directory,
and copy the static contents of your web application to it. If you need
to copy static files from external dependencies, you can customize the
contents of this task.
Normally, this task is executed indirectly when needed.
-->
<target name="prepare">
<!-- Create build directories as needed -->
<mkdir dir="${build.home}"/>
</target>
</project>

View File

@ -0,0 +1,188 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:d="http://nlp.stanford.edu/CoreNLP/v1">
<xsl:output method="html"/>
<xsl:template match="/">
<html>
<body>
<center><h2>Stanford CoreNLP XML Output</h2></center>
<hr size="3" color="#333333"/>
<center><h3>Document</h3></center>
<table border="1" style="background-color:#f0f0f0;" align="center">
<tr><th>Document Info</th></tr>
<tr><td>
<xsl:if test="root/document/docId">
<br/><i>DocId</i>: <xsl:value-of select="root/document/docId"/>
</xsl:if>
<xsl:if test="root/document/docDate">
<br/><i>DocDate</i>: <xsl:value-of select="root/document/docDate"/>
</xsl:if>
<xsl:if test="root/document/docSourceType">
<br/><i>DocSourceType</i>: <xsl:value-of select="root/document/docSourceType"/>
</xsl:if>
<xsl:if test="root/document/docType">
<br/><i>DocType</i>: <xsl:value-of select="root/document/docType"/>
</xsl:if>
<xsl:if test="root/document/author">
<br/><i>Author</i>: <xsl:value-of select="root/document/author"/>
</xsl:if>
<xsl:if test="root/document/location">
<br/><i>Location</i>: <xsl:value-of select="root/document/location"/>
</xsl:if>
</td></tr>
<xsl:if test="root/document/text">
<tr><th>Text</th></tr>
<tr><td>
<div class="preformatted">
<xsl:value-of select="root/document/text"/>
</div>
</td></tr>
</xsl:if>
<tr><th>Sentences</th></tr>
<xsl:for-each select="root/document/sentences/sentence">
<tr><td>
<xsl:apply-templates select=".">
<xsl:with-param name="position" select="position()"/>
</xsl:apply-templates>
</td></tr>
</xsl:for-each>
<tr><th>Coreference resolution graph</th></tr>
<tr><td>
<xsl:apply-templates select="root/document/coreference"/>
</td></tr>
</table>
</body>
</html>
</xsl:template>
<xsl:template match="root/document/sentences/sentence">
<xsl:param name="position" select="'0'"/>
<p><i><b>Sentence #<xsl:value-of select="$position"/></b></i>
<xsl:if test="@sentiment">
<xsl:text> Sentiment: </xsl:text><xsl:value-of select="@sentiment"/>
</xsl:if>
</p>
<p>
<i>Tokens</i><br/>
<xsl:apply-templates select="tokens"/>
</p>
<p>
<i>Parse tree</i><br/>
<xsl:value-of select="parse"/>
</p>
<p>
<i>Uncollapsed dependencies</i>
<ul>
<xsl:for-each select="dependencies[@type='basic-dependencies']">
<xsl:apply-templates select="dep"/>
</xsl:for-each>
</ul>
</p>
<p>
<i>Enhanced dependencies</i>
<ul>
<xsl:for-each select="dependencies[@type='collapsed-ccprocessed-dependencies']">
<xsl:apply-templates select="dep"/>
</xsl:for-each>
</ul>
</p>
</xsl:template>
<xsl:template match="tokens">
<table border="1">
<tr>
<th>Id</th>
<th>Word</th>
<th>Lemma</th>
<th>Char begin</th>
<th>Char end</th>
<th>POS</th>
<th>NER</th>
<th>Normalized NER</th>
<th>Speaker</th>
<th>Sentiment</th>
</tr>
<xsl:for-each select="token">
<tr>
<td><xsl:value-of select="@id"/></td>
<td><xsl:value-of select="word"/></td>
<td><xsl:value-of select="lemma"/></td>
<td><xsl:value-of select="CharacterOffsetBegin"/></td>
<td><xsl:value-of select="CharacterOffsetEnd"/></td>
<td><xsl:value-of select="POS"/></td>
<td><xsl:value-of select="NER"/></td>
<td><xsl:value-of select="NormalizedNER"/></td>
<td><xsl:value-of select="Speaker"/></td>
<td><xsl:value-of select="sentiment"/></td>
</tr>
</xsl:for-each>
</table>
</xsl:template>
<xsl:template match="dependencies">
<ul>
<xsl:for-each select="dep">
<xsl:apply-templates select="."/>
</xsl:for-each>
</ul>
</xsl:template>
<xsl:template match="dep">
<li>
<xsl:value-of select="@type"/>
(
<xsl:value-of select="governor"/><xsl:if test="governor/@copy">^<xsl:value-of select="governor/@copy"/></xsl:if>-<xsl:value-of select="governor/@idx"/>
,
<xsl:value-of select="dependent"/><xsl:if test="dependent/@copy">^<xsl:value-of select="dependent/@copy"/></xsl:if>-<xsl:value-of select="dependent/@idx"/>
)
<xsl:if test="@extra">(extra)</xsl:if>
</li>
</xsl:template>
<xsl:template match="coreference">
<ol>
<xsl:for-each select="coreference">
<li>
<table border="1">
<tr>
<th>Sentence</th>
<th>Head</th>
<th>Text</th>
<th>Context</th>
</tr>
<xsl:for-each select="mention">
<tr>
<td><xsl:value-of select="sentence"/></td>
<td><xsl:value-of select="head"/> <xsl:if test="@representative"> (gov) </xsl:if></td>
<td><xsl:value-of select="text"/></td>
<td><xsl:if test="leftContext or rightContext">
...<xsl:value-of select="leftContext"/>
<span style="background-color: #99ff99;">
<xsl:text> </xsl:text>
<xsl:value-of select="text"/></span>
<xsl:text> </xsl:text>
<xsl:value-of select="rightContext"/>...
</xsl:if>
</td>
</tr>
</xsl:for-each>
</table>
</li>
</xsl:for-each>
</ol>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,6 @@
Manifest-Version: 1.0
Implementation-Version: 3.7.0
Built-Date: 2016-04-27
Created-By: Stanford JavaNLP
Main-class: edu.stanford.nlp.pipeline.StanfordCoreNLP

View File

@ -0,0 +1,25 @@
# This is a rudimentary Makefile for rebuilding Stanford CoreNLP.
# We actually use ant (q.v.) or a Java IDE.
JAVAC = javac
JAVAFLAGS = -O -d classes -encoding utf-8
# Builds the classes' jar file
corenlp: source
mkdir -p classes
$(JAVAC) $(JAVAFLAGS) src/edu/stanford/nlp/*/*.java \
src/edu/stanford/nlp/*/*/*.java \
src/edu/stanford/nlp/*/*/*/*.java \
src/edu/stanford/nlp/*/*/*/*/*.java \
src/edu/stanford/nlp/*/*/*/*/*/*.java
cd classes ; jar -cfm ../stanford-corenlp-`date +%Y-%m-%d`.jar ../src/META-INF/MANIFEST.MF edu ; cd ..
# Before making, unjar the source jar file in the 'src' directory
source:
if [ ! -e src ] ; then \
mkdir src ; cd src ; jar -xf ../stanford-corenlp-*-sources.jar; \
fi;
clean:
rm -rf classes
rm -rf src

View File

@ -0,0 +1,136 @@
Stanford CoreNLP - Stanford's Suite of NLP Tools
------------------------------------------------
Copyright © 2009-2020 The Board of Trustees of
The Leland Stanford Junior University. All Rights Reserved.
DOCUMENTATION
Please look at the URL below for documentation for Stanford CoreNLP:
https://nlp.stanford.edu/software/corenlp.html
LICENSE
//
// StanfordCoreNLP -- a suite of NLP tools
// Copyright © 2009-2020 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see http://www.gnu.org/licenses/ .
//
// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 2A
// Stanford CA 94305-9020
// USA
//
---------------------------------
CHANGES
---------------------------------
2020-11-16 4.2.0 Bug fixes, Retrained English parser models
with improved trees, Updated dependencies
(ejml, junit, jflex), Speed up loading
Wikidict annotator, New features for server
handling of tokensregex and tregex requests,
Release built directly from GitHub repo
2020-07-31 4.1.0 Improved server interface, improved memory
usage of sutime, spanish tokenization upgrades
2020-04-19 4.0.0 Changed to UDv2 tokenization ("new" LDC Treebank,
for English); handles multi-word-tokens;
improved UDv2-based taggers and parsers for
English, French, German, Spanish; new French NER;
new Chinese segmenter; library updates, bug fixes
2018-10-05 3.9.2 improved NER pipeline and entity mention
confidences; support for Java 11; new POS
models for English; 4 methods for setting
document dates; tokenizer improvements;
CoreNLP runs as filter from stdin to stdout;
bug fixes
2018-02-27 3.9.1 Bug fixes, minor enhancements
2018-01-31 3.9.0 Spanish KBP and new dependency parse model,
wrapper API for data, quote attribution
improvements, easier use of coref info, bug
fixes
2017-06-09 3.8.0 Web service annotator, discussion forum
handling, new French and Spanish models
2016-10-31 3.7.0 KBP Annotator, improved coreference, Arabic
pipeline
2015-12-09 3.6.0 Improved coreference, OpenIE integration,
Stanford CoreNLP server
2015-04-20 3.5.2 Switch to Universal dependencies, add Chinese
coreference system to CoreNLP
2015-01-29 3.5.1 NER, dependency parser, SPIED improvements;
general bugfixes
2014-10-26 3.5.0 Upgrade to Java 1.8; add annotators for
dependency parsing and relation extraction
2014-08-27 3.4.1 Add Spanish models
2014-06-16 3.4 Add shift reduce parser
2014-01-04 3.3.1 Bugfix release
2013-11-12 3.3.0 Add sentiment model, minor sutime improvements
2013-06-19 3.2.0 New RNN parser model, more efficient tagger
2013-04-04 1.3.5 Speed improvements, coref improvements,
Chinese version, -nthreads option
2012-11-12 1.3.4 Improved ner model and dependency code,
now possible to change annotator pool for
later StanfordCoreNLP objects
2012-07-09 1.3.3 Minor bug fixes
2012-05-22 1.3.2 Improvements to sutime
2012-03-09 1.3.1 Now supports caseless models (available as DLC)
2011-12-16 1.3.0 Threadsafe!
Bugs in time annotation fixed
2011-09-14 1.2.0 Time expression recognizer added to ner annotator
Output bugfixes
Parser can now substitute for tagger
2011-06-19 1.1.0 Improved coref release
2011-05-15 1.0.4 More efficient dcoref data structure
Supports already-tokenized input text
2011-04-17 1.0.3 Compatible with other releases
Support loading arbitrary annotators
Tagger bug fixes, such as "EOS" token
2010-11-11 1.0.2 Remove wn.jar
2010-11-11 1.0.1 Add xml removal
2010-10-07 1.0 Initial release

View File

@ -0,0 +1,192 @@
<!-- build.xml file for ant for Stanford CoreNLP -->
<!-- Before using this, unjar the sources' jar file into the src/ directory! -->
<!-- A "project" describes a set of targets that may be requested
when Ant is executed. The "default" attribute defines the
target which is executed if no specific target is requested,
and the "basedir" attribute defines the current working directory
from which Ant executes the requested task. This is normally
set to the current working directory.
-->
<project name="StanfordCoreNLP" default="compile" basedir=".">
<property name="build.home" value="${basedir}/classes"/>
<property name="build.tests" value="${basedir}/classes"/>
<property name="docs.home" value="${basedir}/docs"/>
<property name="src.home" value="${basedir}/src"/>
<property name="javadoc.home" value="${basedir}/javadoc"/>
<!-- ==================== Compilation Control Options ==================== -->
<!--
These properties control option settings on the Javac compiler when it
is invoked using the <javac> task.
compile.debug Should compilation include the debug option?
compile.deprecation Should compilation include the deprecation option?
compile.optimize Should compilation include the optimize option?
compile.source Source version compatibility
compile.target Target class version compatibility
-->
<property name="compile.debug" value="true"/>
<property name="compile.deprecation" value="false"/>
<property name="compile.optimize" value="true"/>
<property name="compile.source" value="1.8" />
<property name="compile.target" value="1.8" />
<property name="compile.encoding" value="utf-8" />
<!-- ==================== All Target ====================================== -->
<!--
The "all" target is a shortcut for running the "clean" target followed
by the "compile" target, to force a complete recompile.
-->
<target name="all" depends="clean,compile"
description="Clean build and dist directories, then compile"/>
<!-- ==================== Clean Target ==================================== -->
<!--
The "clean" target deletes any previous "build" and "dist" directory,
so that you can be ensured the application can be built from scratch.
-->
<target name="clean" description="Delete old classes">
<delete dir="${build.home}/edu"/>
</target>
<!-- ==================== Compile Target ================================== -->
<!--
The "compile" target transforms source files (from your "src" directory)
into object files in the appropriate location in the build directory.
This example assumes that you will be including your classes in an
unpacked directory hierarchy under "/WEB-INF/classes".
-->
<target name="compile" depends="prepare"
description="Compile Java sources">
<!-- Compile Java classes as necessary -->
<mkdir dir="${build.home}"/>
<javac srcdir="${src.home}"
destdir="${build.home}"
debug="${compile.debug}"
encoding="${compile.encoding}"
deprecation="${compile.deprecation}"
optimize="${compile.optimize}"
source="${compile.source}"
target="${compile.target}"
includeantruntime="false">
<compilerarg value="-Xmaxerrs"/>
<compilerarg value="20"/>
<classpath>
<fileset dir="${basedir}">
<include name="*.jar"/>
<exclude name="stanford-corenlp*"/>
</fileset>
</classpath>
<!-- <compilerarg value="-Xlint"/> -->
</javac>
<!-- Copy application resources -->
<!--
<copy todir="${build.home}/WEB-INF/classes">
<fileset dir="${src.home}" excludes="**/*.java"/>
</copy>
-->
</target>
<!-- ==================== Javadoc Target ================================== -->
<!--
The "javadoc" target creates Javadoc API documentation for the Java
classes included in your application. Normally, this is only required
when preparing a distribution release, but is available as a separate
target in case the developer wants to create Javadocs independently.
-->
<target name="javadoc" depends="compile"
description="Create Javadoc API documentation">
<mkdir dir="${javadoc.home}"/>
<javadoc sourcepath="${src.home}"
destdir="${javadoc.home}"
maxmemory="1g"
author="true"
source="${compile.source}"
overview="${src.home}/edu/stanford/nlp/overview.html"
doctitle="Stanford JavaNLP API Documentation"
windowtitle="Stanford JavaNLP API"
encoding="${compile.encoding}"
docencoding="${compile.encoding}"
charset="${compile.encoding}"
packagenames="*">
<!-- Allow @generated, @modifiable and @ordered tags -->
<tag name="generated" scope="all" description="Generated" />
<tag name="modifiable" scope="all" description="Modifiable" />
<tag name="ordered" scope="all" description="Ordered" />
<!-- Depends on lib and classes folders -->
<classpath>
<fileset dir="${basedir}">
<include name="*.jar"/>
<exclude name="stanford-corenlp*"/>
</fileset>
<pathelement path="${build.home}" />
</classpath>
<bottom><![CDATA[<font size="2"><a href="https://nlp.stanford.edu" target="_top">Stanford NLP Group</a></font>]]></bottom>
<link href="https://docs.oracle.com/javase/8/docs/api/"/>
</javadoc>
</target>
<!-- ==================== Prepare Target ================================== -->
<!--
The "prepare" target is used to create the "build" destination directory,
and copy the static contents of your web application to it. If you need
to copy static files from external dependencies, you can customize the
contents of this task.
Normally, this task is executed indirectly when needed.
-->
<target name="prepare">
<!-- Create build directories as needed -->
<mkdir dir="${build.home}"/>
</target>
</project>

View File

@ -0,0 +1 @@
斯坦福大学的工具不仅能处理英文,而且也可以处理中文!

View File

@ -0,0 +1,22 @@
#!/usr/bin/env bash
#
# Runs Stanford CoreNLP.
# Simple uses for xml and plain text output to files are:
# ./corenlp.sh -file filename
# ./corenlp.sh -file filename -outputFormat text
# Split into sentences, run POS tagger and NER, write CoNLL-style TSV file:
# ./corenlp.sh -annotators tokenize,ssplit,pos,lemma,ner -outputFormat conll -file input.txt
# You can also start a simple shell where you can enter sentences to be processed:
# ./corenlp.sh
OS=`uname`
# Some machines (older OS X, BSD, Windows environments) don't support readlink -e
if hash readlink 2>/dev/null; then
scriptdir=`dirname $0`
else
scriptpath=$(readlink -e "$0") || scriptpath=$0
scriptdir=$(dirname "$scriptpath")
fi
echo java -mx5g -cp \"$scriptdir/*\" edu.stanford.nlp.pipeline.StanfordCoreNLP $*
java -mx5g -cp "$scriptdir/*" edu.stanford.nlp.pipeline.StanfordCoreNLP $*

View File

@ -0,0 +1 @@
Stanford University is located in California. It is a great university, founded in 1891.

View File

@ -0,0 +1,324 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet href="CoreNLP-to-HTML.xsl" type="text/xsl"?>
<root>
<document>
<sentences>
<sentence id="1">
<tokens>
<token id="1">
<word>Stanford</word>
<lemma>Stanford</lemma>
<CharacterOffsetBegin>0</CharacterOffsetBegin>
<CharacterOffsetEnd>8</CharacterOffsetEnd>
<POS>NNP</POS>
<NER>ORGANIZATION</NER>
</token>
<token id="2">
<word>University</word>
<lemma>University</lemma>
<CharacterOffsetBegin>9</CharacterOffsetBegin>
<CharacterOffsetEnd>19</CharacterOffsetEnd>
<POS>NNP</POS>
<NER>ORGANIZATION</NER>
</token>
<token id="3">
<word>is</word>
<lemma>be</lemma>
<CharacterOffsetBegin>20</CharacterOffsetBegin>
<CharacterOffsetEnd>22</CharacterOffsetEnd>
<POS>VBZ</POS>
<NER>O</NER>
</token>
<token id="4">
<word>located</word>
<lemma>located</lemma>
<CharacterOffsetBegin>23</CharacterOffsetBegin>
<CharacterOffsetEnd>30</CharacterOffsetEnd>
<POS>JJ</POS>
<NER>O</NER>
</token>
<token id="5">
<word>in</word>
<lemma>in</lemma>
<CharacterOffsetBegin>31</CharacterOffsetBegin>
<CharacterOffsetEnd>33</CharacterOffsetEnd>
<POS>IN</POS>
<NER>O</NER>
</token>
<token id="6">
<word>California</word>
<lemma>California</lemma>
<CharacterOffsetBegin>34</CharacterOffsetBegin>
<CharacterOffsetEnd>44</CharacterOffsetEnd>
<POS>NNP</POS>
<NER>LOCATION</NER>
</token>
<token id="7">
<word>.</word>
<lemma>.</lemma>
<CharacterOffsetBegin>44</CharacterOffsetBegin>
<CharacterOffsetEnd>45</CharacterOffsetEnd>
<POS>.</POS>
<NER>O</NER>
</token>
</tokens>
<parse>(ROOT (S (NP (NNP Stanford) (NNP University)) (VP (VBZ is) (ADJP (JJ located) (PP (IN in) (NP (NNP California))))) (. .))) </parse>
<basic-dependencies>
<dep type="nn">
<governor idx="2">University</governor>
<dependent idx="1">Stanford</dependent>
</dep>
<dep type="nsubj">
<governor idx="4">located</governor>
<dependent idx="2">University</dependent>
</dep>
<dep type="cop">
<governor idx="4">located</governor>
<dependent idx="3">is</dependent>
</dep>
<dep type="prep">
<governor idx="4">located</governor>
<dependent idx="5">in</dependent>
</dep>
<dep type="pobj">
<governor idx="5">in</governor>
<dependent idx="6">California</dependent>
</dep>
</basic-dependencies>
<collapsed-dependencies>
<dep type="nn">
<governor idx="2">University</governor>
<dependent idx="1">Stanford</dependent>
</dep>
<dep type="nsubj">
<governor idx="4">located</governor>
<dependent idx="2">University</dependent>
</dep>
<dep type="cop">
<governor idx="4">located</governor>
<dependent idx="3">is</dependent>
</dep>
<dep type="prep_in">
<governor idx="4">located</governor>
<dependent idx="6">California</dependent>
</dep>
</collapsed-dependencies>
<collapsed-ccprocessed-dependencies>
<dep type="nn">
<governor idx="2">University</governor>
<dependent idx="1">Stanford</dependent>
</dep>
<dep type="nsubj">
<governor idx="4">located</governor>
<dependent idx="2">University</dependent>
</dep>
<dep type="cop">
<governor idx="4">located</governor>
<dependent idx="3">is</dependent>
</dep>
<dep type="prep_in">
<governor idx="4">located</governor>
<dependent idx="6">California</dependent>
</dep>
</collapsed-ccprocessed-dependencies>
</sentence>
<sentence id="2">
<tokens>
<token id="1">
<word>It</word>
<lemma>it</lemma>
<CharacterOffsetBegin>46</CharacterOffsetBegin>
<CharacterOffsetEnd>48</CharacterOffsetEnd>
<POS>PRP</POS>
<NER>O</NER>
</token>
<token id="2">
<word>is</word>
<lemma>be</lemma>
<CharacterOffsetBegin>49</CharacterOffsetBegin>
<CharacterOffsetEnd>51</CharacterOffsetEnd>
<POS>VBZ</POS>
<NER>O</NER>
</token>
<token id="3">
<word>a</word>
<lemma>a</lemma>
<CharacterOffsetBegin>52</CharacterOffsetBegin>
<CharacterOffsetEnd>53</CharacterOffsetEnd>
<POS>DT</POS>
<NER>O</NER>
</token>
<token id="4">
<word>great</word>
<lemma>great</lemma>
<CharacterOffsetBegin>54</CharacterOffsetBegin>
<CharacterOffsetEnd>59</CharacterOffsetEnd>
<POS>JJ</POS>
<NER>O</NER>
</token>
<token id="5">
<word>university</word>
<lemma>university</lemma>
<CharacterOffsetBegin>60</CharacterOffsetBegin>
<CharacterOffsetEnd>70</CharacterOffsetEnd>
<POS>NN</POS>
<NER>O</NER>
</token>
<token id="6">
<word>,</word>
<lemma>,</lemma>
<CharacterOffsetBegin>70</CharacterOffsetBegin>
<CharacterOffsetEnd>71</CharacterOffsetEnd>
<POS>,</POS>
<NER>O</NER>
</token>
<token id="7">
<word>founded</word>
<lemma>found</lemma>
<CharacterOffsetBegin>72</CharacterOffsetBegin>
<CharacterOffsetEnd>79</CharacterOffsetEnd>
<POS>VBN</POS>
<NER>O</NER>
</token>
<token id="8">
<word>in</word>
<lemma>in</lemma>
<CharacterOffsetBegin>80</CharacterOffsetBegin>
<CharacterOffsetEnd>82</CharacterOffsetEnd>
<POS>IN</POS>
<NER>O</NER>
</token>
<token id="9">
<word>1891</word>
<lemma>1891</lemma>
<CharacterOffsetBegin>83</CharacterOffsetBegin>
<CharacterOffsetEnd>87</CharacterOffsetEnd>
<POS>CD</POS>
<NER>DATE</NER>
<NormalizedNER>1891</NormalizedNER>
<Timex tid="t1" type="DATE">1891</Timex>
</token>
<token id="10">
<word>.</word>
<lemma>.</lemma>
<CharacterOffsetBegin>87</CharacterOffsetBegin>
<CharacterOffsetEnd>88</CharacterOffsetEnd>
<POS>.</POS>
<NER>O</NER>
</token>
</tokens>
<parse>(ROOT (S (NP (PRP It)) (VP (VBZ is) (NP (NP (DT a) (JJ great) (NN university)) (, ,) (VP (VBN founded) (PP (IN in) (NP (CD 1891)))))) (. .))) </parse>
<basic-dependencies>
<dep type="nsubj">
<governor idx="5">university</governor>
<dependent idx="1">It</dependent>
</dep>
<dep type="cop">
<governor idx="5">university</governor>
<dependent idx="2">is</dependent>
</dep>
<dep type="det">
<governor idx="5">university</governor>
<dependent idx="3">a</dependent>
</dep>
<dep type="amod">
<governor idx="5">university</governor>
<dependent idx="4">great</dependent>
</dep>
<dep type="partmod">
<governor idx="5">university</governor>
<dependent idx="7">founded</dependent>
</dep>
<dep type="prep">
<governor idx="7">founded</governor>
<dependent idx="8">in</dependent>
</dep>
<dep type="pobj">
<governor idx="8">in</governor>
<dependent idx="9">1891</dependent>
</dep>
</basic-dependencies>
<collapsed-dependencies>
<dep type="nsubj">
<governor idx="5">university</governor>
<dependent idx="1">It</dependent>
</dep>
<dep type="cop">
<governor idx="5">university</governor>
<dependent idx="2">is</dependent>
</dep>
<dep type="det">
<governor idx="5">university</governor>
<dependent idx="3">a</dependent>
</dep>
<dep type="amod">
<governor idx="5">university</governor>
<dependent idx="4">great</dependent>
</dep>
<dep type="partmod">
<governor idx="5">university</governor>
<dependent idx="7">founded</dependent>
</dep>
<dep type="prep_in">
<governor idx="7">founded</governor>
<dependent idx="9">1891</dependent>
</dep>
</collapsed-dependencies>
<collapsed-ccprocessed-dependencies>
<dep type="nsubj">
<governor idx="5">university</governor>
<dependent idx="1">It</dependent>
</dep>
<dep type="cop">
<governor idx="5">university</governor>
<dependent idx="2">is</dependent>
</dep>
<dep type="det">
<governor idx="5">university</governor>
<dependent idx="3">a</dependent>
</dep>
<dep type="amod">
<governor idx="5">university</governor>
<dependent idx="4">great</dependent>
</dep>
<dep type="partmod">
<governor idx="5">university</governor>
<dependent idx="7">founded</dependent>
</dep>
<dep type="prep_in">
<governor idx="7">founded</governor>
<dependent idx="9">1891</dependent>
</dep>
</collapsed-ccprocessed-dependencies>
</sentence>
</sentences>
<coreference>
<coreference>
<mention representative="true">
<sentence>1</sentence>
<start>1</start>
<end>3</end>
<head>2</head>
</mention>
<mention>
<sentence>2</sentence>
<start>1</start>
<end>2</end>
<head>1</head>
</mention>
<mention>
<sentence>2</sentence>
<start>3</start>
<end>10</end>
<head>5</head>
</mention>
<mention>
<sentence>2</sentence>
<start>3</start>
<end>6</end>
<head>5</head>
</mention>
</coreference>
</coreference>
</document>
</root>

View File

@ -0,0 +1,209 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>4.2.0</version>
<packaging>jar</packaging>
<name>Stanford CoreNLP</name>
<description>Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.</description>
<url>https://nlp.stanford.edu/software/corenlp.html</url>
<licenses>
<license>
<name>GNU General Public License Version 3</name>
<url>http://www.gnu.org/licenses/gpl-3.0.txt</url>
</license>
</licenses>
<scm>
<url>https://nlp.stanford.edu/software/stanford-corenlp-4.2.0.zip</url>
<connection>https://nlp.stanford.edu/software/stanford-corenlp-4.2.0.zip</connection>
</scm>
<developers>
<developer>
<id>christopher.manning</id>
<name>Christopher Manning</name>
<email>manning@stanford.edu</email>
</developer>
<developer>
<id>jason.bolton</id>
<name>Jason Bolton</name>
<email>jebolton@stanford.edu</email>
</developer>
<developer>
<id>john.bauer</id>
<name>John Bauer</name>
<email>horatio@gmail.com</email>
</developer>
</developers>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<encoding>UTF-8</encoding>
</properties>
<dependencies>
<dependency>
<groupId>com.apple</groupId>
<artifactId>AppleJavaExtensions</artifactId>
<version>1.4</version>
</dependency>
<dependency>
<groupId>de.jollyday</groupId>
<artifactId>jollyday</artifactId>
<version>0.4.9</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>7.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>7.5.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>7.5.0</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>xom</groupId>
<artifactId>xom</artifactId>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>2.10.5</version>
</dependency>
<dependency>
<groupId>org.ejml</groupId>
<artifactId>ejml-core</artifactId>
<version>0.39</version>
</dependency>
<dependency>
<groupId>org.ejml</groupId>
<artifactId>ejml-ddense</artifactId>
<version>0.39</version>
</dependency>
<dependency>
<groupId>org.ejml</groupId>
<artifactId>ejml-simple</artifactId>
<version>0.39</version>
</dependency>
<dependency>
<groupId>org.glassfish</groupId>
<artifactId>javax.json</artifactId>
<version>1.0.4</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.12</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>3.9.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.pholser</groupId>
<artifactId>junit-quickcheck-core</artifactId>
<version>0.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.pholser</groupId>
<artifactId>junit-quickcheck-generators</artifactId>
<version>0.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>javax.activation</groupId>
<artifactId>javax.activation-api</artifactId>
<version>1.2.0</version>
</dependency>
<dependency>
<groupId>javax.xml.bind</groupId>
<artifactId>jaxb-api</artifactId>
<version>2.4.0-b180830.0359</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-core</artifactId>
<version>2.3.0.1</version>
</dependency>
<dependency>
<groupId>com.sun.xml.bind</groupId>
<artifactId>jaxb-impl</artifactId>
<version>2.4.0-b180830.0438</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src</sourceDirectory>
<testSourceDirectory>test/src</testSourceDirectory>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>1.7</version>
<executions>
<execution>
<id>attach-models</id>
<phase>package</phase>
<goals>
<goal>attach-artifact</goal>
</goals>
<configuration>
<artifacts>
<artifact>
<file>${project.basedir}/stanford-corenlp-4.2.0-models.jar</file>
<type>jar</type>
<classifier>models</classifier>
</artifact>
</artifacts>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,70 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>4.2.0</version>
<packaging>jar</packaging>
<name>Stanford CoreNLP</name>
<description>Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.</description>
<url>https://nlp.stanford.edu/software/corenlp.html</url>
<licenses>
<license>
<name>GNU General Public License Version 3</name>
<url>http://www.gnu.org/licenses/gpl-3.0.txt</url>
</license>
</licenses>
<scm>
<url>https://nlp.stanford.edu/software/stanford-corenlp-4.2.0.zip</url>
<connection>https://nlp.stanford.edu/software/stanford-corenlp-4.2.0.zip</connection>
</scm>
<developers>
<developer>
<id>christopher.manning</id>
<name>Christopher Manning</name>
<email>manning@stanford.edu</email>
</developer>
<developer>
<id>jason.bolton</id>
<name>Jason Bolton</name>
<email>jebolton@stanford.edu</email>
</developer>
<developer>
<id>john.bauer</id>
<name>John Bauer</name>
<email>horatio@gmail.com</email>
</developer>
</developers>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<encoding>UTF-8</encoding>
</properties>
<build>
<sourceDirectory>src</sourceDirectory>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>1.7</version>
<executions>
<execution>
<id>attach-models</id>
<phase>package</phase>
<goals>
<goal>attach-artifact</goal>
</goals>
<configuration>
<artifacts>
<artifact>
<file>${project.basedir}/stanford-corenlp-4.2.0-models.jar</file>
<type>jar</type>
<classifier>models</classifier>
</artifact>
</artifacts>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,13 @@
# This is a rudimentary Makefile for rebuilding the parser.
# We actually use ant (q.v.) or a Java IDE.
JAVAC = javac
JAVAFLAGS = -O -d classes -encoding utf-8
parser:
mkdir -p classes
$(JAVAC) $(JAVAFLAGS) src/edu/stanford/nlp/*/*.java \
src/edu/stanford/nlp/*/*/*.java src/edu/stanford/nlp/*/*/*/*.java
cd classes ; jar -cfm ../stanford-parser-`date +%Y-%m-%d`.jar ../src/edu/stanford/nlp/parser/lexparser/lexparser-manifest.txt edu ; cd ..
cp stanford-parser-`date +%Y-%m-%d`.jar stanford-parser.jar
rm -rf classes

View File

@ -0,0 +1,412 @@
Stanford Lexicalized Parser v4.2.0 - 2020-11-17
-----------------------------------------------
Copyright (c) 2002-2020 The Board of Trustees of The Leland Stanford Junior
University. All Rights Reserved.
Original core parser code by Dan Klein. Support code, additional
modules, languages, features, internationalization, compaction, typed
dependencies, etc. by Christopher Manning, Roger Levy, Teg Grenager,
Galen Andrew, Marie-Catherine de Marneffe, Jenny Finkel, Spence Green,
Bill MacCartney, Anna Rafferty, Huihsin Tseng, Pi-Chuan Chang,
Wolfgang Maier, Richard Eckart, Richard Socher, John Bauer,
Sebastian Schuster, and Jon Gauthier.
This release was prepared by Jason Bolton.
This package contains 6 parsers: a high-accuracy unlexicalized PCFG; a
lexicalized dependency parser; a factored model, where the estimates
of dependencies and an unlexicalized PCFG are jointly optimized to
give a lexicalized PCFG treebank parser; a TreeRNN parser, where
recursive neural networks trained with semantic word vectors are used
to score parse trees; a Shift-Reduce Constituency Parser;
and a transition-based neural dependency parser.
Also included are grammars for various languages for use with these parsers.
For more information about the parser API, point a web browser at the
included javadoc directory (use the browser's Open File command to open
the index.html file inside the javadoc folder). Start by looking at the
Package page for the edu.stanford.nlp.parser.lexparser package, and then
look at the page for the LexicalizedParser class documentation therein,
particularly documentation of the main method.
Secondly, you should also look at the Parser FAQ on the web:
https://nlp.stanford.edu/software/parser-faq.html
This software requires Java 8 (JDK 1.8.0+). (You must have installed it
separately. Check that the command "java -version" works and gives 1.8+.)
QUICKSTART
UNIX COMMAND-LINE USAGE
On a Unix system you should be able to parse the English test file with the
following command:
./lexparser.sh data/testsent.txt
This uses the PCFG parser, which is quick to load and run, and quite accurate.
[Notes: it takes a few seconds to load the parser data before parsing
begins; continued parsing is quicker. To use the lexicalized parser, replace
englishPCFG.ser.gz with englishFactored.ser.gz in the lexparser.sh script
and use the flag -mx600m to give more memory to java.]
WINDOWS GUI USAGE
On a Windows system, assuming that java is on your PATH, you should be able
to run a parsing GUI by double-clicking on the lexparser-gui.bat icon,
or giving the command lexparser-gui in this directory from a command prompt.
Click Load File, Browse, and navigate to and select testsent.txt in
the top directory of the parser distribution. Click Load Parser,
Browse, and select the models jar, also in the top directory of the
parser distribution. From the models jar, select englishPCFG.ser.gz.
Click Parse to parse the first sentence.
NEURAL NETWORK DEPENDENCY PARSER USAGE
To use the neural net dependency parser, issue the following command:
java -Xmx2g -cp "*" edu.stanford.nlp.parser.nndep.DependencyParser \
-model edu/stanford/nlp/models/parser/nndep/english_UD.gz \
-textFile data/english-onesent.txt -outFile data/english-onesent.txt.out
The output will be written to data/english-onesent.txt.out
If you want to run on a language other than English, you will need to use
a language specific POS tagger. Here is an example for Chinese:
java -Xmx2g -cp "*" edu.stanford.nlp.parser.nndep.DependencyParser \
-model edu/stanford/nlp/models/parser/nndep/UD_Chinese.gz \
-tagger.model edu/stanford/nlp/models/pos-tagger/chinese-distsim.tagger \
-textFile data/chinese-onesent-utf8.txt -outFile data/chinese-onesent-utf8.txt.out
OTHER USE CASES
The GUI is also available under Unix:
lexparser-gui.sh
Under Mac OS X, you can double-click on lexparser-gui.command to invoke the
GUI. The command-line version works on all platforms. Use lexparser.bat
to run it under Windows. The GUI is only for exploring the parser. It does
not allow you to save output. You need to use the command-line program or
programmatic API to do serious work with the parser.
ADDITIONAL GRAMMARS
The parser is supplied with several trained grammars. There are English
grammars based on the standard LDC Penn Treebank WSJ training sections 2-21
(wsj*), and ones based on an augmented data set, better for questions,
commands, and recent English and biomedical text (english*).
All grammars are located in the included models jar. (If you'd like to have
grammar files like in older versions of the parser, you can get them by
extracting them from the jar file with the 'jar -xf' command.)
MULTILINGUAL PARSING
In addition to the English grammars, the parser comes with trained grammars
for Arabic, Chinese, French, and German. To parse with these grammars, run
lexparser-lang.sh
with no arguments to see usage instructions. You can change language-specific
settings passed to the parser by modifying lexparser_lang.def.
You can also train and evaluate new grammars using:
lexparser-lang-train-test.sh
To see how we trained the grammars supplied in this distribution, see
bin/makeSerialized.csh
You will not be able to run this script (since it uses Stanford-specific file
paths), but you should be able to see what we did.
Arabic
Trained on parts 1-3 of the Penn Arabic Treebank (ATB) using the
pre-processing described in (Green and Manning, 2010). The default input
encoding is UTF-8 Arabic script. You can convert text in Buckwalter encoding to UTF-8
with the package edu.stanford.nlp.international.arabic.Buckwalter which is included
in stanford-parser.jar.
The parser *requires* segmentation and tokenization of raw text per the ATB standard
prior to parsing. You can generate this segmentation and tokenization with the Stanford
Word Segmenter, which is available separately at:
https://nlp.stanford.edu/software/segmenter.html
Chinese
There are Chinese grammars trained just on mainland material from
Xinhua and more mixed material from the LDC Chinese Treebank. The default
input encoding is GB18030.
French
The standalone parser distribution comes with a neural dependency parser
model trained on the French-GSD data set (version 2.2). The standalone
parser distribution does not include functionality for producing UD 2.2
tokenization, so pre-tokenized text (text tokenized by whitespace) must
be provided when running the neural dependency parser, and the "-tokenized"
flag must be used.
Example command:
java -Xmx2g -cp "*" edu.stanford.nlp.parser.nndep.DependencyParser \
-model edu/stanford/nlp/models/parser/nndep/UD_French.gz \
-tagger.model edu/stanford/nlp/models/pos-tagger/french-ud.tagger \
-tokenized -textFile example.txt -outFile example.txt.out
Note that "example.txt" should contain UD 2.2 tokens, separated by whitespace.
The only provided French constituency parser is a shift-reduce parser. At this
time running the shift-reduce parser on French text requires running a pipeline
with the full Stanford CoreNLP package.
To use the shift-reduce constituency parser on text and the UD 2.2 tokenization,
upgrade to the full Stanford CoreNLP package and run a French pipeline.
German
The constituency parser was trained on the Negra corpus. Details are included in
(Rafferty and Manning, 2008). This parser expects UD 2.2 tokenization. Input
text files must be UD 2.2 tokens separated by whitespace. The "-tokenized" flag
must be used.
The neural dependency parser was trained on the German-GSD data set (version 2.2).
The standalone parser distribution does not include functionality for producing
UD 2.2 tokenization, so pre-tokenized text (text tokenized by whitespace) must
be provided when running the neural dependency parser, and the "-tokenized"
flag must be used.
Example command:
java -Xmx2g -cp "*" edu.stanford.nlp.parser.nndep.DependencyParser \
-model edu/stanford/nlp/models/parser/nndep/UD_German.gz \
-tagger.model edu/stanford/nlp/models/pos-tagger/german-ud.tagger \
-tokenized -textFile example.txt -outFile example.txt.out
German shift reduce parsers are available, but running them on input German text
requires the use of a full Stanford CoreNLP pipeline. The lower accuracy lexicalized
parsers have options for running on input text.
To use the shift-reduce constituency parser on text and the UD 2.2 tokenization, upgrade
to the full Stanford CoreNLP package and run a German pipeline.
Spanish
The constituency parser was trained on the Spanish AnCora treebank and
LDC provided discussion forum and newswire treebanks. This parser expects UD 2.0
tokenization.
The neural dependency parser was trained on the Spanish AnCora data set (version 2.0).
The standalone parser distribution does not include functionality for producing
UD 2.0 tokenization, so pre-tokenized text (text tokenized by whitespace) must
be provided when running the neural dependency parser, and the "-tokenized"
flag must be used.
Example command:
java -Xmx2g -cp "*" edu.stanford.nlp.parser.nndep.DependencyParser \
-model edu/stanford/nlp/models/parser/nndep/UD_Spanish.gz \
-tagger.model edu/stanford/nlp/models/pos-tagger/spanish-ud.tagger \
-tokenized -textFile example.txt -outFile example.txt.out
Spanish shift reduce parsers are available, but running them on input Spanish text
requires the use of a full Stanford CoreNLP pipeline. The lower accuracy lexicalized
parsers have options for running on input text.
To use the shift-reduce constituency parser on text and the UD 2.2 tokenization, upgrade
to the full Stanford CoreNLP package and run a Spanish pipeline.
TREEBANK PREPROCESSING
The pre-processed versions of the ATB described
in (Green and Manning, 2010) and the FTB described in (Green et al.,
2011) can be reproduced using the TreebankPreprocessor included in this
release. The configuration files are located in /conf. For example,
to create the ATB data, run:
bin/run-tb-preproc -v conf/atb-latest.conf
Note that you'll need to update the conf file paths to your local treebank
distributions as the data is not distributed with the parser. You'll
also need to set the classpath in the cmd_line variable of run-tb-preproc.
The TreebankPreprocessor conf files support various options, which are
documented in
edu.stanford.nlp.international.process.ConfigParser
EVALUATION METRICS
The Stanford parser comes with Java implementations of the following
evaluation metrics:
Dependency Labeled Attachment
Evalb (Collins, 1997)
-Includes per-category evaluation with the -c option
Leaf Ancestor (Sampson and Babarczy, 2003)
-Both micro- and macro-averaged score
Tagging Accuracy
See the usage instructions and javadocs in the requisite classes located in
edu.stanford.nlp.parser.metrics.
UNIVERSAL DEPENDENCIES vs. STANFORD DEPENDENCIES
Since v3.5.2 the default dependency representation is the new Universal Dependencies
representation. Universal Dependencies were developed with the goal of being a
cross-linguistically valid representation. Note that some constructs such as prepositional
phrases are now analyzed differently and that the set of relations was updated. Please
look at the Universal Dependencies documentation for more information:
http://www.universaldependencies.org
The parser also still supports the original Stanford Dependencies representation
as described in the StanfordDependenciesManual.pdf. Use the flag
-originalDependencies
to obtain original Stanford Dependencies.
LICENSE
// StanfordLexicalizedParser -- a probabilistic lexicalized NL CFG parser
// Copyright (c) 2002-2020 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see http://www.gnu.org/licenses/ .
//
// For more information, bug reports, fixes, contact:
// Christopher Manning
// Dept of Computer Science, Gates 2A
// Stanford CA 94305-9020
// USA
// parser-support@lists.stanford.edu
// https://nlp.stanford.edu/downloads/lex-parser.html
---------------------------------
CHANGES
---------------------------------
2020-11-17 4.2.0 Retrain English models with treebank fixes
2020-05-22 4.0.0 Model tokenization updated to UDv2.0
2018-10-16 3.9.2 Update for compatibility
2018-02-27 3.9.1 new French and Spanish UD models, misc. UD
enhancements, bug fixes
2017-06-09 3.8.0 Updated for compatibility
2016-10-31 3.7.0 new UD models
2015-12-09 3.6.0 Updated for compatibility
2015-04-20 3.5.2 Switch to universal dependencies
2015-01-29 3.5.1 Dependency parser improvements; general
bugfixes
2014-10-26 3.5.0 Upgrade to Java 1.8; add neural-network
dependency parser
2014-08-27 3.4.1 Add Spanish models
2014-06-16 3.4 Shift-reduce parser
2014-01-04 3.3.1 Bugfix release, dependency improvements
2013-11-12 3.3.0 Remove the attr dependency, add imperatives to
English training data
2013-06-19 3.2.0 New RNN model for WSJ and English with
improved test set accuracy, rel dependency
removed
2013-04-05 2.0.5 Dependency improvements, ctb7 model, -nthreads
option
2012-11-12 2.0.4 Dependency speed improvements; other
dependency changes
2012-07-09 2.0.3 Minor bug fixes
2012-05-22 2.0.2 Supports adding extra data in non-tree format
2012-03-09 2.0.1 Caseless English model added, ready for maven
2012-01-11 2.0.0 Threadsafe!
2011-09-14 1.6.9 Added some imperatives to the English
training data; added root dependency.
2011-06-15 1.6.8 Added French parser and leaf ancestor
evaluation metric; reorganized distribution;
new data preparation scripts; rebuilt grammar
models; other bug fixes
2011-05-15 1.6.7 Minor bug fixes
2011-04-17 1.6.6 Compatible with tagger, corenlp and tregex.
2010-10-30 1.6.5 Further improvements to English Stanford
Dependencies and other minor changes
2010-08-16 1.6.4 More minor bug fixes and improvements to English
Stanford Dependencies and question parsing
2010-07-09 1.6.3 Improvements to English Stanford Dependencies and
question parsing, minor bug fixes
2010-02-25 1.6.2 Improvements to Arabic parser models,
and to English and Chinese Stanford Dependencies
2008-10-19 1.6.1 Slightly improved Arabic, German and
Stanford Dependencies
2007-08-18 1.6 Added Arabic, k-best PCCFG parsing;
improved English grammatical relations
2006-05-30 1.5.1 Improved English and Chinese grammatical relations;
fixed UTF-8 handling
2005-07-20 1.5 Added grammatical relations output;
fixed bugs introduced in 1.4
2004-03-24 1.4 Made PCFG faster again (by FSA minimization);
added German support
2003-09-06 1.3 Made parser over twice as fast;
added tokenization options
2003-07-20 1.2 Halved PCFG memory usage;
added support for Chinese
2003-03-25 1.1 Improved parsing speed; included GUI,
improved PCFG grammar
2002-12-05 1.0 Initial release

View File

@ -0,0 +1,296 @@
UNIVERSAL/STANFORD DEPENDENCIES. Stanford Parser v3.7.0
-----------------------------------------------------------
IMPORTANT: Starting with version 3.5.2 the default dependencies
representation output by the Stanford Parser is the new Universal
Dependencies Representation. Universal Dependencies were developed
with the goal of being a cross-linguistically valid representation.
Note that some constructions such as prepositional phrases are now
analyzed differently and that the set of relations was updated. The
online documentation of English Universal Dependencies at
http://www.universaldependencies.org
should be consulted for the current set of dependency relations.
The parser and converter also still support the original
Stanford Dependencies as described in the Stanford Dependencies
manual. Use the flag
-originalDependencies
to obtain the original Stanford Dependencies. Note, however, that we
are no longer maintaining the SD converter or representation and we
therefore recommend to use the Universal Dependencies representation
for any new projects.
The manual for the English version of the Stanford Dependencies
representation:
StanfordDependenciesManual.pdf
should be consulted for the set of dependency relations in the original
Stanford Dependencies representation and the correct commands for
generating Stanford Dependencies together with any of the Stanford Parser,
another parser, or a treebank.
A typed dependencies representation is also available for Chinese. For
the moment the documentation consists of the code, and a brief
presentation in this paper:
Pi-Chuan Chang, Huihsin Tseng, Dan Jurafsky, and Christopher
D. Manning. 2009. Discriminative Reordering with Chinese Grammatical
Relations Features. Third Workshop on Syntax and Structure in Statistical
Translation. http://nlp.stanford.edu/pubs/ssst09-chang.pdf
--------------------------------------
DEPENDENCIES SCHEMES
For an overview of the original English Universal Dependencies schemes, please look
at:
Marie-Catherine de Marneffe, Timothy Dozat, Natalia Silveira, Katri Haverinen,
Filip Ginter, Joakim Nivre, and Christopher D. Manning. 2014. Universal Stanford
dependencies: A cross-linguistic typology. 9th International Conference on
Language Resources and Evaluation (LREC 2014).
http://nlp.stanford.edu/~manning/papers/USD_LREC14_UD_revision.pdf
and
Joakim Nivre, Marie-Catherine de Marneffe, Filip Ginter, Yoav Goldberg, Jan Hajič,
Christopher D. Manning, Ryan McDonald, Slav Petrov, Sampo Pyysalo, Natalia Silveira,
Reut Tsarfaty, and Daniel Zeman. 2016. Universal Dependencies v1: A Multilingual
Treebank Collection. In Proceedings of the Tenth International Conference on Language
Resources and Evaluation (LREC 2016).
http://nlp.stanford.edu/pubs/nivre2016ud.pdf
Please note, though, that some of the relations discussed in the first paper
were subsequently updated and please refer to the online documentation at
http://www.universaldependencies.org
for an up to date documention of the set of relations.
For an overview of the enhanced and enhanced++ dependency representations, please look
at:
Sebastian Schuster and Christopher D. Manning. 2016. Enhanced English Universal
Dependencies: An Improved Representation for Natural Language Understanding Tasks.
In Proceedings of the Tenth International Conference on Language Resources and
Evaluation (LREC 2016).
http://nlp.stanford.edu/~sebschu/pubs/schuster-manning-lrec2016.pdf
For an overview of the original typed dependencies scheme, please look
at:
Marie-Catherine de Marneffe, Bill MacCartney, and Christopher D.
Manning. 2006. Generating Typed Dependency Parses from Phrase
Structure Parses. 5th International Conference on Language Resources
and Evaluation (LREC 2006).
http://nlp.stanford.edu/~manning/papers/LREC_2.pdf
For more discussion of the design principles, please see:
Marie-Catherine de Marneffe and Christopher D. Manning. 2008. The
Stanford typed dependencies representation. In Proceedings of the
workshop on Cross-Framework and Cross-Domain Parser Evaluation, pp. 1-8.
http://nlp.stanford.edu/~manning/papers/dependencies-coling08.pdf
These papers can be cited as references for the original English Stanford
Dependencies and Enlgish Universal Dependencies.
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v3.7.0
Implementation of enhanced and enhanced++ dependency
representations as described in Schuster and Manning (2016).
Fixed concurrency issue.
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v3.5.2
Switch to Universal Dependencies as the default representation.
Please see the Universal Dependencies documentation at
http://www.universaldependencies.org
for more information on the new relations.
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v3.5.1
A couple of small fixes were made, leading to ccomp and advcl being
recognized in a couple of new environments.
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v3.4
One major change was made to the dependency taxonomy:
- We decided to collapse together the two dependencies partmod and infmod,
since they have similar function and mainly differ in the form of the verbal
head, which is anyways recorded in the POS tag. Those two relations are
removed from the taxonomy, and a new relation vmod covering the union of both
was added.
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v3.3.1
A couple of fixes/improvements were made in the dependency conversion,
and one change was made to the taxonomy of relations.
- The partmod and infmod relations were deleted, and replaced with
vmod for reduced, non-finite verbal modifiers. The distinction between
these two relations can be recovered from the POS tag of the dependent.
- A couple of improvements were made to the conversion, the largest
one being recognizing pobj inside a PP not headed by something tagged
as IN or TO.
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v3.3
Some fixes/improvements were made in the dependency conversion, and one
change was made to the taxonomy of relations.
- For currency amount expressions with a currency symbol like "$", it
had previously been the case that "$" was the head, and then each
number word modified it as a number. We realized that this was
unnecessarily inconsistent. For the expression "two thousand dollars",
"dollars" is the head, but "thousand" is a num modifier of it, and
number is used for the parts of a number multi-word expression only.
This analysis is now also used for cases with a currency symbol. E.g.,
"for $ 52.7 million": prep(for, $) num($, million) number(million, 52.7).
Similarly, for "the $ 2.29 billion value", we changed the analysis from
num(value, $) number($, billion) to amod(value, $) num($, billion).
This corresponds to hwat you got for "a two dollar value".
This is actually the most common change (at least on WSJ newswire!).
- Remove the attr relation. Some cases disappear by making the question
phrase of WHNP be NP questions the root. Others (predicative NP
complements) become xcomp.
- Less aggressive labeling of participial form VPs as xcomp. More of them
are correctly labeled partmod (but occasionally a true xcomp is also
mislabeled as partmod).
- Small rule changes to recognize a few more ccomp and parataxis.
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v3.2, JUNE 2013
An improved dependency conversion means that our dependency trees are
not always projective, one deletion was made from the taxonomy of
relations, and various small converter fixes were made:
- rel was removed. rel was originally used as the relation for an
overt relativizer in a relative clause. But it was never a real
grammatical relation, and we gradually started labeling easy cases
as nsubj or dobj. In this release, rel is removed, pobj cases are
also labeled, and the remaining hard cases are labeled as dep.
- As a result of correctly labeling a pobj in questions and relative
clauses, the converter now sometimes produces non-projective dependency
trees (ones with crossing dependencies, if the words are laid out in
their normal order in a line, and all dependency arcs are drawn above
them). This is not a bug, it's an improvement in the generated
dependencies, but you should be aware that Stanford Dependencies
trees are now occasionally non-projective. (Some simple dependency
parsing algorithms only produce projective dependency trees.)
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v2.0.5, MARCH 2013
We have begun a more major effort to improve the suitability and coverage of
Stanford Dependencies on less formal text types, and to clean up a couple of
the more quirky dependencies in the original set. These changes are still
ongoing, but in this first installment, we have removed 3 dependencies and
added 2:
- abbrev was removed, and is now viewed as just a case of appos.
- complm was removed, and is now viewed as just a case of mark.
(This is consistent with an HPSG-like usage of mark.)
- purpcl was removed, and is now viewed as just a case of advcl.
- discourse was added. The lack of a dependency type for
interjections was an omission even in the early versions, but it
became essential as we expanded our consideration of informal
text types. It is used for interjections, fillers, discourse markers
and emoticons.
- goeswith was added. In badly edited text, it is used to join the
two parts of a word.
A few other changes and improvements were also made, including improvements
in the recognition of advcl. There has been a reduction of "dep" dependencies
of about 14% on newswire (and higher on more informal text genres).
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v2.0.4, NOVEMBER 2012
A few minor changes and fixes were made: HYPH is now recognized, and treated
as punctuation and clausal complements of adjectives (including comparatives)
are recognized as ccomp.
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v1.6.9
This version adds an explicit root dependency in the set of dependencies
returned. In the past, there had been no explicit representation of the
root of the sentence in the set of dependencies returned, except in the
CoNLL format output, which always showed the root. Now, there is always
an explicit extra dependency that marks the sentence root, using a fake
ROOT pseudoword with index 0. That is, the root is marked in this way:
root(ROOT-0, depends-3)
Otherwise there were only a couple of minute changes in the dependencies
produced (appositions are now recognized in WHNPs!).
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- v1.6.8
This version includes only small fixes, principally addressing some gaps
in the correct treatment of dependencies in inverted sentence (SQ and SINV)
constructions, and some errors in the treatment of copulas in the presence of
temporal NPs.
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- NOVEMBER 2010 - JANUARY 2011
Two changes were made to the taxonomy of dependencies.
- measure (phrase modifier) was generalized and replaced by
npadvmod (noun phrase adverbial modifier) which includes measure
phrases and other adverbial uses of noun phrases. Temporal NPs
(tmod) are now a subtype of npadvmod in the dependency hierarchy.
- mwe (multi-word expression) is introduced for certain common
function word dependencies for which another good analysis isn't
easy to come by (and which were frequently dep before) such as
"instead of" or "rather than".
A new option has ben added to allow the copula to be treated as
the head when it has an adjective or noun complement.
The conversion software will now work fairly well with the
David Vadas version of the treebank with extra noun phrase
structure. (A few rare cases that are handled with the standard
treebank aren't yet handled, but you will get better dependencies
for compound nouns and multiword adjectival modifiers, etc.)
Considerable improvements were made in the coverage of named
dependencies. You should expect to see only about half as many generic
"dep" dependencies as in version 1.6.4.
--------------------------------------
CHANGES IN ENGLISH TYPED DEPENDENCIES CODE -- JUNE-AUGUST 2010
No new dependency relations have been introduced.
There have been some significant improvements in the generated
dependencies, principally covering:
- Better resolution of nsubj and dobj long distance dependencies
(but v1.6.4 fixes the overpercolation of dobj in v1.6.3)
- Better handling of conjunction distribution in CCprocessed option
- Correction of bug in v1.6.2 that made certain verb dependents noun
dependents.
- Better dependencies are generated for question structures (v1.6.4)
- Other minor improvements in recognizing passives, adverbial
modifiers, etc.

View File

@ -0,0 +1,449 @@
\begin{thebibliography}{64}
\providecommand{\natexlab}[1]{#1}
\providecommand{\url}[1]{\texttt{#1}}
\expandafter\ifx\csname urlstyle\endcsname\relax
\providecommand{\doi}[1]{doi: #1}\else
\providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi
\bibitem[Abbott et~al.(2011)Abbott, Walker, Anand, Fox~Tree, Bowmani, and
King]{Abbott11}
Rob Abbott, Marilyn Walker, Pranav Anand, Jean~E. Fox~Tree, Robeson Bowmani,
and Joseph King.
\newblock How can you say such things?!?: Recognizing disagreement in informal
political argument.
\newblock In \emph{Proceedings of the Workshop on Languages in Social Media},
LSM '11, pages 2--11, 2011.
\bibitem[Adams et~al.(2007)Adams, Nicolae, Nicolae, and
Harabagiu]{adams-EtAl:2007:WTEP}
Rod Adams, Gabriel Nicolae, Cristina Nicolae, and Sanda Harabagiu.
\newblock Textual entailment through extended lexical overlap and
lexico-semantic matching.
\newblock In \emph{Proceedings of the ACL-PASCAL Workshop on Textual Entailment
and Paraphrasing}, pages 119--124, Prague, June 2007.
\bibitem[Airola et~al.(2008)Airola, Pyysalo, Bj{\"o}rne, Pahikkala, Ginter, and
Salakoski]{Pyysalo08}
Antti Airola, Sampo Pyysalo, Jari Bj{\"o}rne, Tapio Pahikkala, Filip Ginter,
and Tapio Salakoski.
\newblock A graph kernel for protein-protein interaction extraction.
\newblock In \emph{Proceedings of BioNLP 2008: Current Trends in Biomedical
Natural Language Processing (ACL08)}, 2008.
\bibitem[Banko et~al.(2007)Banko, Cafarella, Soderland, Broadhead, and
Etzioni]{Banko07}
Michele Banko, Michael~J. Cafarella, Stephen Soderland, Matt Broadhead, and
Oren Etzioni.
\newblock Open information extraction from the web.
\newblock In \emph{Proceedings of the 20th International Joint Conference on
Artificial Intelligence (IJCAI 2007)}, 2007.
\bibitem[Bj\"{o}rne and Salakoski(2011)]{Bjorne2011}
Jari Bj\"{o}rne and Tapio Salakoski.
\newblock Generalizing biomedical event extraction.
\newblock In \emph{Proceedings of the BioNLP Shared Task 2011 Workshop}, BioNLP
Shared Task '11, pages 183--191, 2011.
\bibitem[Bj{\"o}rne et~al.(2008)Bj{\"o}rne, Pyysalo, Ginter, and
Salakoski]{bjorne08how}
Jari Bj{\"o}rne, Sampo Pyysalo, Filip Ginter, and Tapio Salakoski.
\newblock How complex are complex protein-protein interactions?
\newblock In \emph{3rd International Symposium on Semantic Mining in
Biomedecine}, 2008.
\bibitem[Blake(2007)]{blake:2007:WTEP}
Catherine Blake.
\newblock The role of sentence structure in recognizing textual entailment.
\newblock In \emph{Proceedings of the ACL-PASCAL Workshop on Textual Entailment
and Paraphrasing}, pages 101--106, Prague, June 2007.
\bibitem[Chambers(2011)]{Chambers11}
Nathanael Chambers.
\newblock \emph{Inducing Event Schemas and their Participants from Unlabeled
Text}.
\newblock PhD thesis, Department of Computer Science, Stanford University,
2011.
\bibitem[Chambers et~al.(2007)Chambers, Cer, Grenager, Hall, Kiddon,
MacCartney, de~Marneffe, Ramage, Yeh, and Manning]{chambers-EtAl:2007:WTEP}
Nathanael Chambers, Daniel Cer, Trond Grenager, David Hall, Chloe Kiddon, Bill
MacCartney, Marie-Catherine de~Marneffe, Daniel Ramage, Eric Yeh, and
Christopher~D. Manning.
\newblock Learning alignments and leveraging natural logic.
\newblock In \emph{Proceedings of the ACL-PASCAL Workshop on Textual Entailment
and Paraphrasing}, pages 165--170, Prague, June 2007.
\bibitem[Chang et~al.(2009)Chang, Tseng, Jurafsky, and
Manning]{chang-tseng-jurafsky-manning:2009:SSST}
Pi-Chuan Chang, Huihsin Tseng, Dan Jurafsky, and Christopher~D. Manning.
\newblock Discriminative reordering with {C}hinese grammatical relations
features.
\newblock In \emph{Proceedings of the Third Workshop on Syntax and Structure in
Statistical Translation}, Boulder, Colorado, June 2009.
\newblock URL \url{pubs/ssst09-chang.pdf}.
\bibitem[Chaumartin(2007)]{chaumartin07knowledge}
François-Régis Chaumartin.
\newblock {UPAR7}: A knowledge-based system for headline sentiment tagging.
\newblock In \emph{Proceedings of the 4th International Workshop on Semantic
Evaluations (SemEval-2007)}, pages 422--425, 2007.
\bibitem[Chen and Eugenio(2012)]{Chen12}
Lin Chen and Barbara~Di Eugenio.
\newblock Co-reference via pointing and haptics in multi-modal dialogues.
\newblock In \emph{Conference of the North American Chapter of the Association
for Computational Linguistics: Human Language Technologies}, 2012.
\bibitem[Clegg(2008)]{Clegg08}
Andrew~B. Clegg.
\newblock \emph{Computational-Linguistic Approaches to Biological Text Mining}.
\newblock PhD thesis, School of Crystallography, Birkbeck, University of
London, 2008.
\bibitem[Clegg and Shepherd(2007)]{Clegg07}
Andrew~B. Clegg and Adrian~J. Shepherd.
\newblock Benchmarking natural-language parsers for biological applications
using dependency graphs.
\newblock \emph{BMC Bioinformatics}, 8:24, 2007.
\bibitem[{de Marneffe} and Manning(2008)]{demarneffe08stanford}
Marie-Catherine {de Marneffe} and Christopher~D. Manning.
\newblock The {S}tanford typed dependencies representation.
\newblock In \emph{COLING Workshop on Cross-framework and Cross-domain Parser
Evaluation}, 2008.
\bibitem[{de Marneffe} et~al.(2006){de Marneffe}, MacCartney, and
Manning]{demarneffe06generating}
Marie-Catherine {de Marneffe}, Bill MacCartney, and Christopher~D. Manning.
\newblock Generating typed dependency parses from phrase structure parses.
\newblock In \emph{5th International Conference on Language Resources and
Evaluation (LREC 2006)}, 2006.
\bibitem[de~Marneffe et~al.(2014)de~Marneffe, Dozat, Silveira, Haverinen,
Ginter, Nivre, and Manning]{marneffe14universal}
Marie-Catherine de~Marneffe, Timothy Dozat, Natalia Silveira, Katri Haverinen,
Filip Ginter, Joakim Nivre, and Christopher~D. Manning.
\newblock Universal stanford dependencies: A cross-linguistic typology.
\newblock In \emph{Proceedings of the Ninth International Conference on
Language Resources and Evaluation (LREC-2014)}, 2014.
\bibitem[{El Maarouf} and Villaneau(2012)]{Maarouf12}
Isma\"{i}l {El Maarouf} and Jeanne Villaneau.
\newblock A {F}rench fairy tale corpus syntactically and semantically
annotated.
\newblock In \emph{Proceedings of the Eight International Conference on
Language Resources and Evaluation}, 2012.
\bibitem[Erkan et~al.(2007)Erkan, Ozgur, and Radev]{Erkan07}
Gunes Erkan, Arzucan Ozgur, and Dragomir~R. Radev.
\newblock Semi-supervised classification for extracting protein interaction
sentences using dependency parsing.
\newblock In \emph{Proceedings of the 2007 Joint Conference on Empirical
Methods in Natural Language Processing and Computational Natural Language
Learning (EMNLP-CoNLL)}, 2007.
\bibitem[Fundel et~al.(2007)Fundel, K{\"u}ffner, and Zimmer]{Fundel07}
Katrin Fundel, Robert K{\"u}ffner, and Ralf Zimmer.
\newblock Rel{E}x -- relation extraction using dependency parse trees.
\newblock \emph{Bioinformatics}, 23, 2007.
\bibitem[Garten(2010)]{Yarten10}
Yael Garten.
\newblock \emph{Text mining of the scientific literature to identify
pharmacogenomic interactions}.
\newblock PhD thesis, Department of Biomedical Informatics, Stanford
University, 2010.
\bibitem[Genzel(2010)]{genzel10automatically}
Dmitriy Genzel.
\newblock Automatically learning source-side reordering rules for large scale
machine translation.
\newblock In \emph{COLING-2010}, 2010.
\bibitem[Giles and Wren(2008)]{giles08large}
Cory~B. Giles and Jonathan~D. Wren.
\newblock Large-scale directional relationship extraction and resolution.
\newblock \emph{BMC Bioinformatics}, 9\penalty0 (Suppl 9):\penalty0 S11, 2008.
\bibitem[Glinos(2010)]{Glinos2010}
Demetrios~G. Glinos.
\newblock System description for {SAIC} entry at {RTE}-6.
\newblock In \emph{Proceedings of the Text Analysis Conference (TAC)}, 2010.
\bibitem[Greenwood and Stevenson(2007)]{Greenwood07}
Mark~A. Greenwood and Mark Stevenson.
\newblock A semi-supervised approach to learning relevant protein-protein
interaction articles.
\newblock In \emph{Proceedings of the Second BioCreAtIvE Challenge Workshop,
Madrid, Spain}, 2007.
\bibitem[Haghighi and Klein(2010)]{Haghighi10}
Aria Haghighi and Dan Klein.
\newblock An entity-level approach to information extraction.
\newblock In \emph{Proceedings of the ACL 2010 Conference Short Papers},
ACLShort '10, pages 291--295, 2010.
\bibitem[Harmeling(2007)]{harmeling:2007:WTEP}
Stefan Harmeling.
\newblock An extensible probabilistic transformation-based approach to the
third recognizing textual entailment challenge.
\newblock In \emph{Proceedings of the ACL-PASCAL Workshop on Textual Entailment
and Paraphrasing}, pages 137--142, Prague, June 2007.
\bibitem[Hassan et~al.(2010)Hassan, Qazvinian, and Radev]{Hassan10}
Ahmed Hassan, Vahed Qazvinian, and Dragomir Radev.
\newblock What's with the attitude?: identifying sentences with attitude in
online discussions.
\newblock In \emph{Proceedings of the 2010 Conference on Empirical Methods in
Natural Language Processing}, EMNLP '10, pages 1245--1255, 2010.
\bibitem[Haverinen et~al.(2010{\natexlab{a}})Haverinen, Ginter, Viljanen,
Laippala, and Salakoski]{Haverinen2010a}
Katri Haverinen, Filip Ginter, Timo Viljanen, Veronika Laippala, and Tapio
Salakoski.
\newblock Dependency-based propbanking of clinical {F}innish.
\newblock In \emph{Proceedings of the Fourth Linguistic Annotation Workshop},
LAW IV '10, pages 137--141, 2010{\natexlab{a}}.
\bibitem[Haverinen et~al.(2010{\natexlab{b}})Haverinen, Viljanen, Laippala,
Kohonen, Ginter, and Salakoski]{Haverinen2010b}
Katri Haverinen, Timo Viljanen, Veronika Laippala, Samuel Kohonen, Filip
Ginter, and Tapio Salakoski.
\newblock Treebanking {F}innish.
\newblock In \emph{Proceedings of the Ninth International Workshop on Treebanks
and Linguistic Theories (TLT)}, 2010{\natexlab{b}}.
\bibitem[Joshi et~al.(2010)Joshi, Das, Gimpel, and Smith]{Joshi2010}
Mahesh Joshi, Dipanjan Das, Kevin Gimpel, and Noah~A. Smith.
\newblock Movie reviews and revenues: an experiment in text regression.
\newblock In \emph{Human Language Technologies: The 2010 Annual Conference of
the North American Chapter of the Association for Computational Linguistics},
HLT '10, pages 293--296, 2010.
\bibitem[Kessler(2008)]{kessler08icwsm}
Jason~S. Kessler.
\newblock Polling the blogosphere: a rule-based approach to belief
classification.
\newblock In \emph{International Conference on Weblogs and Social Media}, 2008.
\bibitem[Kim et~al.(2009)Kim, Ohta, Pyysalo, Kano, and Tsujii]{kim09overview}
Jin-Dong Kim, Tomoko Ohta, Sampo Pyysalo, Yoshinobu Kano, and Jun'ichi Tsujii.
\newblock Overview of bionlp'09 shared task on event extraction.
\newblock In \emph{Proceedings of the BioNLP 2009 Workshop Companion Volume for
Shared Task}, pages 1--9, 2009.
\bibitem[Kim et~al.(2011)Kim, Pyysalo, Ohta, Bossy, Nguyen, and
Tsujii]{BioNLP11}
Jin-Dong Kim, Sampo Pyysalo, Tomoko Ohta, Robert Bossy, Ngan Nguyen, and
Jun'ichi Tsujii.
\newblock Overview of bionlp shared task 2011.
\newblock In \emph{Proceedings of the BioNLP Shared Task 2011 Workshop}, 2011.
\bibitem[Kl\"{u}wer et~al.(2010)Kl\"{u}wer, Uszkoreit, and Xu]{Kluwer10}
Tina Kl\"{u}wer, Hans Uszkoreit, and Feiyu Xu.
\newblock Using syntactic and semantic based relations for dialogue act
recognition.
\newblock In \emph{Proceedings of the 23rd International Conference on
Computational Linguistics}, COLING '10, pages 570--578, 2010.
\bibitem[Kouylekov et~al.(2010)Kouylekov, Mehdad, Negri, and
Cabrio]{Kouylekov2010}
Milen Kouylekov, Yashar Mehdad, Matteo Negri, and Elena Cabrio.
\newblock {FBK} participation in {RTE}-6: Main and {KBP} validation task.
\newblock In \emph{Proceedings of the Text Analysis Conference (TAC)}, 2010.
\bibitem[Landeghem et~al.(2012)Landeghem, Bj\"{o}rne, Abeel, Baets, Salakoski,
and de~Peer]{Landeghem12}
Sofie~Van Landeghem, Jari Bj\"{o}rne, Thomas Abeel, Bernard~De Baets, Tapio
Salakoski, and Yves~Van de~Peer.
\newblock Semantically linking molecular entities in literature through entity
relationships.
\newblock \emph{BMC Bioinformatics}, 13, 2012.
\bibitem[Lau et~al.(2012)Lau, Cook, McCarthy, Newman, and Baldwin]{Lau12}
Jey~Han Lau, Paul Cook, Diana McCarthy, David Newman, and Timothy Baldwin.
\newblock Word sense induction for novel sense detection.
\newblock In \emph{Proceedings of the 13th Conference of the European Chapter
of the Association for Computational Linguistics}, 2012.
\bibitem[Malakasiotis(2009)]{Malakasiotis09}
Prodromos Malakasiotis.
\newblock {AUEB} at {TAC} 2009.
\newblock In \emph{Proceedings of the Text Analysis Conference (TAC)}, 2009.
\bibitem[McClosky and Manning(2012)]{McClosky12}
David McClosky and Christopher~D. Manning.
\newblock Learning constraints for consistent timeline extraction.
\newblock In \emph{Proceedings of the 2012 Joint Conference on Empirical
Methods in Natural Language Processing and Computational Natural Language
Learning}, 2012.
\bibitem[McDonald et~al.(2013)McDonald, Nivre, Quirmbach-Brundage, Goldberg,
Das, Ganchev, Hall, Petrov, Zhang, T\"{a}ckstr\"{o}m, Bedini,
Bertomeu~Castell\'{o}, and Lee]{mcdonald-EtAl:2013:Short}
Ryan McDonald, Joakim Nivre, Yvonne Quirmbach-Brundage, Yoav Goldberg, Dipanjan
Das, Kuzman Ganchev, Keith Hall, Slav Petrov, Hao Zhang, Oscar
T\"{a}ckstr\"{o}m, Claudia Bedini, N\'{u}ria Bertomeu~Castell\'{o}, and
Jungmee Lee.
\newblock Universal dependency annotation for multilingual parsing.
\newblock In \emph{Proceedings of the 51st Annual Meeting of the Association
for Computational Linguistics (Volume 2: Short Papers)}, pages 92--97, 2013.
\bibitem[Meena and Prabhakar(2007)]{Meena07sentiment}
Arun Meena and T.~V. Prabhakar.
\newblock Sentence level sentiment analysis in the presence of conjuncts using
linguistic analysis.
\newblock In \emph{Advances in Information Retrieval}, volume 4425 of
\emph{Lecture Notes in Computer Science}. Springer, 2007.
\bibitem[Mehdad et~al.(2009)Mehdad, Negri, Cabrio, Kouylekov, and
Magnini]{Mehdad09}
Yashar Mehdad, Matteo Negri, Elena Cabrio, Milen Kouylekov, and Bernardo
Magnini.
\newblock Using lexical resources in a distance-based approach to {RTE}.
\newblock In \emph{Proceedings of the Text Analysis Conference (TAC)}, 2009.
\bibitem[{\"O}zg{\"u}r et~al.(2008){\"O}zg{\"u}r, Vu, Erkan, and
Radev]{ozgur08identifying}
Arzucan {\"O}zg{\"u}r, Thuy Vu, G{\"u}nes Erkan, and Dragomir~R. Radev.
\newblock Identifying gene-disease associations using centrality on a
literature mined gene-interaction network.
\newblock \emph{Bioinformatics}, 24\penalty0 (13):\penalty0 i277--i285, 2008.
\bibitem[Pakray et~al.(2011)Pakray, Neogi, Bhaskar, Poria, Bandyopadhyay, and
Gelbukh]{Pakray2011a}
Partha Pakray, Snehasis Neogi, Pinaki Bhaskar, Soujanya Poria, Sivaji
Bandyopadhyay, and Alexander Gelbukh.
\newblock A textual entailment system using anaphora resolution.
\newblock In \emph{Proceedings of the Text Analysis Conference (TAC)}, 2011.
\bibitem[Petrov and McDonald(2012)]{Petrov12}
Slav Petrov and Ryan McDonald.
\newblock Overview of the 2012 shared task on parsing the web.
\newblock In \emph{First Workshop on Syntactic Analysis of Non-Canonical
Language}, 2012.
\bibitem[Poon and Domingos(2009)]{poon09unsupervised}
Hoifung Poon and Pedro Domingos.
\newblock Unsupervised semantic parsing.
\newblock In \emph{Proceedings of the 2009 Conference on Empirical Methods in
Natural Language Processing (EMNLP 2009)}, pages 1--10, 2009.
\bibitem[Potisuk(2010)]{Potisuk10}
Siripong Potisuk.
\newblock Typed dependency relations for syntactic analysis of {T}hai
sentences.
\newblock In \emph{Proceedings of PACLIC 24 Pacific Asia Conference on
Language, Information and Computation}, 2010.
\bibitem[Pyysalo et~al.(2007)Pyysalo, Ginter, Haverinen, Heimonen, Salakoski,
and Laippala]{Pyysalo07}
Sampo Pyysalo, Filip Ginter, Katri Haverinen, Juho Heimonen, Tapio Salakoski,
and Veronika Laippala.
\newblock On the unification of syntactic annotations under the {S}tanford
dependency scheme: A case study on {B}io{I}nfer and {GENIA}.
\newblock In \emph{Proceedings of BioNLP 2007: Biological, translational, and
clinical language processing (ACL07)}, 2007.
\bibitem[Pyysalo et~al.(2011)Pyysalo, Ohta, and Tsujii]{Pyysalo11}
Sampo Pyysalo, Tomoko Ohta, and Junichi Tsujii.
\newblock An analysis of gene/protein associations at {P}ub{M}ed scale.
\newblock \emph{Journal of Biomedical Semantics}, 2, 2011.
\bibitem[Ramakrishnan et~al.(2008)Ramakrishnan, Mendes, Wang, and
Sheth]{ramakrishnan08discovery}
Cartic Ramakrishnan, Pablo~N. Mendes, Shaojun Wang, and Amit~P. Sheth.
\newblock Unsupervised discovery of compound entities for relationship
extraction.
\newblock In \emph{16th International Conference on Knowledge Engineering:
Practice and Patterns (EKAW 2008)}, pages 146--155, 2008.
\bibitem[Schuster and Manning(2016)]{schuster2016enhanced}
Sebastian Schuster and Christopher~D. Manning.
\newblock Enhanced {E}nglish {U}niversal {D}ependencies: An improved
representation for natural language understanding tasks.
\newblock In \emph{Proceedings of the Tenth International Conference on
Language Resources and Evaluation (LREC 2016)}, 2016.
\bibitem[Seraji et~al.(2012)Seraji, Megyesi, and Nivre]{Seraji12}
Mojgan Seraji, Be\'ata Megyesi, and Joakim Nivre.
\newblock A basic language resource kit for {P}ersian.
\newblock In \emph{Proceedings of the Eight International Conference on
Language Resources and Evaluation}, 2012.
\bibitem[Shivhare et~al.(2010)Shivhare, Nath, and Jain]{Shivare2010}
Himanshu Shivhare, Parul Nath, and Anusha Jain.
\newblock Semi cognitive approach to {RTE}-6 - using {F}rame{N}et for semantic
clustering.
\newblock In \emph{Proceedings of the Text Analysis Conference (TAC)}, 2010.
\bibitem[Sing and Bandyopadhyay(2010)]{singh10statistical}
Thoudam~Doren Sing and Sivaji Bandyopadhyay.
\newblock Statistical machine translation of {E}nglish -- {M}anipuri using
morpho-syntactic and semantic information.
\newblock In \emph{Proceedings of the Association for Machine Translation in
the Americas (AMTA 2010)}, 2010.
\bibitem[Tsarfaty(2013)]{tsarfaty:2013:Short}
Reut Tsarfaty.
\newblock A unified morpho-syntactic scheme of stanford dependencies.
\newblock In \emph{Proceedings of the 51st Annual Meeting of the Association
for Computational Linguistics (Volume 2: Short Papers)}, pages 578--584,
2013.
\bibitem[Urbain et~al.(2007)Urbain, Goharian, and Frieder]{Urbain07}
Jay Urbain, Nazli Goharian, and Ophir Frieder.
\newblock {IIT} {TREC} 2007 genomics track: Using concept-based semantics in
context for genomics literature passage retrieval.
\newblock In \emph{The Sixteenth Text REtrieval Conference (TREC 2007)
Proceedings}, 2007.
\bibitem[Wang and Neumann(2007)]{wang-neumann:2007:WTEP}
Rui Wang and G\"{u}nter Neumann.
\newblock Recognizing textual entailment using sentence similarity based on
dependency tree skeletons.
\newblock In \emph{Proceedings of the ACL-PASCAL Workshop on Textual Entailment
and Paraphrasing}, pages 36--41, Prague, June 2007.
\bibitem[Wu and Weld(2010)]{Wu2010}
Fei Wu and Daniel~S. Weld.
\newblock Open information extraction using {W}ikipedia.
\newblock In \emph{Proceedings of the 48th Annual Meeting of the Association
for Computational Linguistics}, ACL '10, 2010.
\bibitem[Xu et~al.(2009)Xu, Kang, Ringgaard, and Och]{xu09using}
Peng Xu, Jaeho Kang, Michael Ringgaard, and Franz Och.
\newblock Using a dependency parser to improve {SMT} for subject-object-verb
languages.
\newblock In \emph{NAACL 2009: Proceedings of Human Language Technologies, The
2009 Annual Conference of the North American Chapter of the Association for
Computational Linguistics}, pages 245--253, 2009.
\bibitem[Zhuang et~al.(2006)Zhuang, Jing, yan Zhu, and Zhang]{Zhuang06cikm}
Li~Zhuang, Feng Jing, Xiao yan Zhu, and Lei Zhang.
\newblock Movie review mining and summarization.
\newblock In \emph{Proc. ACM Conference on Information and Knowledge Management
(CIKM)}, pages 43--50, 2006.
\bibitem[Zouaq et~al.(2006)Zouaq, Nkambou, and Frasson]{Zouaq06tai}
Amal Zouaq, Roger Nkambou, and Claude Frasson.
\newblock The knowledge puzzle: An integrated approach of intelligent tutoring
systems and knowledge management.
\newblock In \emph{Proceedings of the 18th IEEE International Conference on
Tools with Artificial Intelligence (ICTAI 2006)}, pages 575--582, 2006.
\bibitem[Zouaq et~al.(2007)Zouaq, Nkambou, and Frasson]{Zouaq07tel}
Amal Zouaq, Roger Nkambou, and Claude Frasson.
\newblock Building domain ontologies from text for educational purposes.
\newblock In \emph{Proceedings of the Second European Conference on Technology
Enhanced Learning: Creating new learning experiences on a global scale},
2007.
\bibitem[Zouaq et~al.(2010)Zouaq, Gagnon, and Ozell]{Zouaq10}
Amal Zouaq, Michel Gagnon, and Beno\^{i}t Ozell.
\newblock Semantic analysis using dependency-based grammars and upper-level
ontologies.
\newblock \emph{International Journal of Computational Linguistics and
Applications}, 1\penalty0 (1-2), 2010.
\end{thebibliography}

View File

@ -0,0 +1,563 @@
@inproceedings{demarneffe06generating,
author = {{de Marneffe}, Marie-Catherine and Bill MacCartney and Christopher D. Manning},
year = 2006,
title = {Generating Typed Dependency Parses from Phrase Structure Parses},
booktitle = {5th International Conference on Language Resources and Evaluation (LREC 2006)},
}
@inproceedings{demarneffe08stanford,
author = {{de Marneffe}, Marie-Catherine and Christopher D. Manning},
year = 2008,
title = {The {S}tanford typed dependencies representation},
booktitle = {COLING Workshop on Cross-framework and Cross-domain Parser Evaluation},
}
@article{Clegg07,
author = {Andrew B. Clegg and Adrian J. Shepherd},
title={Benchmarking natural-language parsers for biological applications using dependency graphs},
journal ={BMC Bioinformatics},
volume ={8:24},
year={2007},
}
@inproceedings{Pyysalo07,
author = {Sampo Pyysalo and Filip Ginter and Katri Haverinen and Juho Heimonen and Tapio Salakoski and Veronika Laippala},
title = {On the unification of syntactic annotations under the {S}tanford
dependency scheme: A case study on {B}io{I}nfer and {GENIA}},
booktitle = {Proceedings of BioNLP 2007: Biological, translational, and clinical language processing (ACL07)},
year = {2007},
}
@InProceedings{Erkan07,
author = {Gunes Erkan and Arzucan Ozgur and Dragomir R. Radev},
title={Semi-Supervised Classification for Extracting Protein Interaction Sentences using Dependency Parsing},
booktitle = {Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP-CoNLL)},
year={2007},
}
@InProceedings{Greenwood07,
author = {Mark A. Greenwood and Mark Stevenson},
title={A Semi-Supervised Approach To Learning Relevant Protein-Protein Interaction Articles},
booktitle = {Proceedings of the Second BioCreAtIvE Challenge Workshop, Madrid, Spain},
year={2007},
}
@inproceedings{Urbain07,
author = {Jay Urbain and Nazli Goharian and Ophir Frieder},
title = {{IIT} {TREC} 2007 Genomics Track: Using Concept-Based Semantics in Context for Genomics Literature Passage Retrieval},
booktitle = {The Sixteenth Text REtrieval Conference (TREC 2007) Proceedings},
howpublished = {NIST Special Publication: SP 500-274},
year = 2007
}
@phdthesis{Clegg08,
author = {Andrew B. Clegg},
year = 2008,
title = {Computational-Linguistic Approaches to Biological Text Mining},
school = {School of Crystallography, Birkbeck, University of London}
}
@article{giles08large,
title = {Large-scale directional relationship extraction and resolution},
author = {Cory B. Giles and Jonathan D. Wren},
journal = {BMC Bioinformatics},
year = 2008,
volume = 9,
number = {Suppl 9},
pages = {S11},
}
@article{ozgur08identifying,
title = {Identifying gene-disease associations using centrality on a literature mined gene-interaction network},
author = {Arzucan {\"O}zg{\"u}r and Thuy Vu and G{\"u}nes Erkan and
Dragomir R. Radev},
journal = {Bioinformatics},
year = 2008,
volume = {24},
number = 13,
pages = {i277--i285},
}
@InProceedings{wang-neumann:2007:WTEP,
author = {Wang, Rui and Neumann, G\"{u}nter},
title = {Recognizing Textual Entailment Using Sentence Similarity based on Dependency Tree Skeletons},
booktitle = {Proceedings of the ACL-PASCAL Workshop on Textual Entailment and Paraphrasing},
month = {June},
year = {2007},
address = {Prague},
pages = {36--41},
}
@InProceedings{blake:2007:WTEP,
author = {Blake, Catherine},
title = {The Role of Sentence Structure in Recognizing Textual Entailment},
booktitle = {Proceedings of the ACL-PASCAL Workshop on Textual Entailment and Paraphrasing},
month = {June},
year = {2007},
address = {Prague},
pages = {101--106},
}
@InProceedings{harmeling:2007:WTEP,
author = {Harmeling, Stefan},
title = {An Extensible Probabilistic Transformation-based Approach to the Third Recognizing Textual Entailment Challenge},
booktitle = {Proceedings of the ACL-PASCAL Workshop on Textual Entailment and Paraphrasing},
month = {June},
year = {2007},
address = {Prague},
pages = {137--142},
}
@InProceedings{chambers-EtAl:2007:WTEP,
author = {Chambers, Nathanael and Cer, Daniel and Grenager, Trond and Hall, David and Kiddon, Chloe and MacCartney, Bill and de Marneffe, Marie-Catherine and Ramage, Daniel and Yeh, Eric and Manning, Christopher D.},
title = {Learning Alignments and Leveraging Natural Logic},
booktitle = {Proceedings of the ACL-PASCAL Workshop on Textual Entailment and Paraphrasing},
month = {June},
year = {2007},
address = {Prague},
pages = {165--170},
}
@inproceedings{chaumartin07knowledge,
title = {{UPAR7}: A knowledge-based system for headline sentiment
tagging},
author = {François-Régis Chaumartin},
booktitle = {Proceedings of the 4th International Workshop on Semantic Evaluations (SemEval-2007)},
pages = {422--425},
year = 2007
}
@inproceedings{Zhuang06cikm,
author = {Li Zhuang and Feng Jing and Xiao-yan Zhu and Lei Zhang},
title = {Movie Review Mining and Summarization},
booktitle = {Proc. ACM Conference on Information and Knowledge Management (CIKM)},
year = 2006,
pages = {43--50}
}
% Arlington, USA, November, 2006
@InProceedings{adams-EtAl:2007:WTEP,
author = {Adams, Rod and Nicolae, Gabriel and Nicolae, Cristina and Harabagiu, Sanda},
title = {Textual Entailment Through Extended Lexical Overlap and Lexico-Semantic Matching},
booktitle = {Proceedings of the ACL-PASCAL Workshop on Textual Entailment and Paraphrasing},
month = {June},
year = {2007},
address = {Prague},
pages = {119--124},
}
@inproceedings{ramakrishnan08discovery,
title = {Unsupervised Discovery of Compound Entities for
Relationship Extraction},
author = {Cartic Ramakrishnan and Pablo N. Mendes and Shaojun Wang
and Amit P. Sheth},
booktitle = {16th International Conference on Knowledge Engineering:
Practice and Patterns (EKAW 2008)},
year = 2008,
pages = {146--155}
}
@inproceedings{bjorne08how,
author = {Jari Bj{\"o}rne and Sampo Pyysalo and Filip Ginter and
Tapio Salakoski},
title = {How Complex are Complex Protein-protein Interactions?},
booktitle = {3rd International Symposium on Semantic Mining in
Biomedecine},
year = 2008
}
@inproceedings{kessler08icwsm,
author = {Jason S. Kessler},
title = {Polling the blogosphere: a rule-based approach to belief classification},
booktitle = {International Conference on Weblogs and Social Media},
year = {2008},
}
@article{Fundel07,
author = {Katrin Fundel and Robert K{\"u}ffner and Ralf Zimmer},
title={Rel{E}x -- Relation extraction using dependency parse trees},
journal ={Bioinformatics},
volume ={23},
year={2007},
}
@inproceedings{Pyysalo08,
author = {Antti Airola and Sampo Pyysalo and Jari Bj{\"o}rne and Tapio Pahikkala and Filip Ginter and Tapio Salakoski},
title={A graph kernel for protein-protein interaction extraction},
booktitle = {Proceedings of BioNLP 2008: Current Trends in Biomedical Natural Language Processing (ACL08)},
year = {2008},
}
@incollection{Meena07sentiment,
author = {Arun Meena and T. V. Prabhakar},
title = {Sentence Level Sentiment Analysis in the Presence of Conjuncts Using Linguistic Analysis},
year = 2007,
series = {Lecture Notes in Computer Science},
publisher = {Springer},
volume = {4425},
booktitle = {Advances in Information Retrieval}
}
@inproceedings{Banko07,
author= {Michele Banko and Michael J. Cafarella and Stephen Soderland and Matt Broadhead and Oren Etzioni},
title = {Open Information Extraction from the Web},
booktitle ={Proceedings of the 20th International Joint Conference on Artificial Intelligence (IJCAI 2007)},
year = {2007},
}
@inproceedings{Zouaq06tai,
author = {Amal Zouaq and Roger Nkambou and Claude Frasson},
title = {The Knowledge Puzzle: An Integrated Approach of Intelligent Tutoring Systems and Knowledge Management},
booktitle = {Proceedings of the 18th IEEE International Conference on Tools with Artificial Intelligence (ICTAI 2006)},
pages = {575--582},
year = 2006
}
@inproceedings{Zouaq07tel,
author = {Amal Zouaq and Roger Nkambou and Claude Frasson},
title = {Building Domain Ontologies From Text For Educational Purposes},
booktitle = {Proceedings of the Second European Conference on Technology Enhanced Learning: Creating new learning experiences on a global scale},
year = 2007
}
@InProceedings{kim09overview,
author = {Kim, Jin-Dong and Ohta, Tomoko and Pyysalo, Sampo and Kano, Yoshinobu and Tsujii, Jun'ichi},
title = {Overview of BioNLP'09 Shared Task on Event Extraction},
booktitle = {Proceedings of the BioNLP 2009 Workshop Companion Volume for Shared Task},
year = {2009},
pages = {1--9},
}
@inproceedings{xu09using,
title = {Using a dependency parser to improve {SMT} for subject-object-verb languages},
author = {Peng Xu and Jaeho Kang and Michael Ringgaard and Franz Och},
booktitle = {NAACL 2009: Proceedings of Human Language Technologies, The 2009 Annual Conference of the North American Chapter of the Association for Computational Linguistics},
pages = {245--253},
year = 2009,
}
@inproceedings{genzel10automatically,
title = {Automatically Learning Source-side Reordering Rules for Large Scale Machine Translation},
author = {Dmitriy Genzel},
booktitle = {COLING-2010},
year = 2010,
}
@inproceedings{singh10statistical,
title = {Statistical Machine Translation of {E}nglish -- {M}anipuri using Morpho-syntactic and Semantic Information},
author = {Thoudam Doren Sing and Sivaji Bandyopadhyay},
booktitle = {Proceedings of the Association for Machine Translation in the Americas (AMTA 2010)},
year = 2010,
}
@inproceedings{McClosky2011,
author = {McClosky, David and Surdeanu, Mihai and Manning, Christopher D.},
title = {Event extraction as dependency parsing for BioNLP 2011},
booktitle = {Proceedings of the BioNLP Shared Task 2011 Workshop},
series = {BioNLP Shared Task '11},
year = {2011},
pages = {41--45},
}
@inproceedings{Bjorne2011,
author = {Bj\"{o}rne, Jari and Salakoski, Tapio},
title = {Generalizing biomedical event extraction},
booktitle = {Proceedings of the BioNLP Shared Task 2011 Workshop},
series = {BioNLP Shared Task '11},
year = {2011},
pages = {183--191},
}
@article{Landeghem12,
author = {Sofie Van Landeghem and Jari Bj\"{o}rne and Thomas Abeel and Bernard De Baets and Tapio Salakoski and Yves Van de Peer},
title = {Semantically linking molecular entities in literature through entity relationships},
journal = {BMC Bioinformatics},
volume = {13},
year = {2012}
}
@phdthesis{Yarten10,
author = {Yael Garten},
year = 2010,
title = {Text mining of the scientific literature to identify pharmacogenomic interactions},
school = {Department of Biomedical Informatics, Stanford University}
}
@article{Pyysalo11,
author = {Sampo Pyysalo and Tomoko Ohta and Junichi Tsujii},
title = {An analysis of gene/protein associations at {P}ub{M}ed scale},
journal = {Journal of Biomedical Semantics},
volume = {2},
year = {2011}}
** Information extraction
@inproceedings{Wu2010,
author = {Wu, Fei and Weld, Daniel S.},
title = {Open information extraction using {W}ikipedia},
booktitle = {Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics},
series = {ACL '10},
year = {2010},
location = {Uppsala, Sweden},
}
@inproceedings{Haghighi10,
author = {Haghighi, Aria and Klein, Dan},
title = {An entity-level approach to information extraction},
booktitle = {Proceedings of the ACL 2010 Conference Short Papers},
series = {ACLShort '10},
year = {2010},
pages = {291--295},
}
** Sentiment analysis
@inproceedings{Joshi2010,
author = {Joshi, Mahesh and Das, Dipanjan and Gimpel, Kevin and Smith, Noah A.},
title = {Movie reviews and revenues: an experiment in text regression},
booktitle = {Human Language Technologies: The 2010 Annual Conference of the North American Chapter of the Association for Computational Linguistics},
series = {HLT '10},
year = {2010},
pages = {293--296},
}
@inproceedings{Hassan10,
author = {Hassan, Ahmed and Qazvinian, Vahed and Radev, Dragomir},
title = {What's with the attitude?: identifying sentences with attitude in online discussions},
booktitle = {Proceedings of the 2010 Conference on Empirical Methods in Natural Language Processing},
series = {EMNLP '10},
year = {2010},
location = {Cambridge, Massachusetts},
pages = {1245--1255},
}
** Dialogue
@inproceedings{Kluwer10,
author = {Kl\"{u}wer, Tina and Uszkoreit, Hans and Xu, Feiyu},
title = {Using syntactic and semantic based relations for dialogue act recognition},
booktitle = {Proceedings of the 23rd International Conference on Computational Linguistics},
series = {COLING '10},
year = {2010},
pages = {570--578},
}
** Co-reference
@inproceedings{Chen12,
author = {Lin Chen and Barbara Di Eugenio},
title = {Co-reference via Pointing and Haptics in Multi-Modal Dialogues},
booktitle = {Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
year = {2012}
}
** Time
@inproceedings{McClosky12,
author = {David McClosky and Christopher D. Manning },
title = {Learning Constraints for Consistent Timeline Extraction},
booktitle = {Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning},
year = 2012}
** Semantics
@article{Zouaq10,
author = {Amal Zouaq and Michel Gagnon and Beno\^{i}t Ozell},
title = {Semantic Analysis using Dependency-based Grammars and Upper-Level Ontologies},
journal = {International Journal of Computational Linguistics and Applications},
volume = 1,
number = {1-2},
year = 2010
}
** disagreement detection
@inproceedings{Abbott11,
author = {Abbott, Rob and Walker, Marilyn and Anand, Pranav and Fox Tree, Jean E. and Bowmani, Robeson and King, Joseph},
title = {How can you say such things?!?: Recognizing disagreement in informal political argument},
booktitle = {Proceedings of the Workshop on Languages in Social Media},
series = {LSM '11},
year = {2011},
pages = {2--11},
}
** word sense induction
@inproceedings{Lau12,
author = {Jey Han Lau and Paul Cook and Diana McCarthy and David Newman and Timothy Baldwin},
title = {Word Sense Induction for Novel Sense Detection},
booktitle = {Proceedings of the 13th Conference of the European Chapter of the Association for Computational Linguistics},
year = 2012
}
** textual entailment
@inproceedings{Pakray2011,
author = {Pakray, Partha},
title = {Answer validation through textual entailment},
booktitle = {Proceedings of the 16th international conference on Natural language processing and information systems},
series = {NLDB'11},
year = {2011},
pages = {324--329},
}
@inproceedings{Pakray2011a,
title = {A Textual Entailment System using Anaphora Resolution},
author = {Partha Pakray and Snehasis Neogi and Pinaki Bhaskar and Soujanya Poria and Sivaji Bandyopadhyay and Alexander Gelbukh},
booktitle = {Proceedings of the Text Analysis Conference (TAC)},
year = {2011}
}
@inproceedings{Kouylekov2010,
title = {{FBK} Participation in {RTE}-6: Main and {KBP} Validation Task},
author = {Milen Kouylekov and Yashar Mehdad and Matteo Negri and Elena Cabrio},
booktitle = {Proceedings of the Text Analysis Conference (TAC)},
year = {2010},
}
@inproceedings{Glinos2010,
title = {System Description for {SAIC} Entry at {RTE}-6},
author = {Demetrios G. Glinos},
booktitle = {Proceedings of the Text Analysis Conference (TAC)},
year = {2010},
}
@inproceedings{Shivare2010,
title = {Semi Cognitive approach to {RTE}-6 - Using {F}rame{N}et for Semantic Clustering},
author = {Himanshu Shivhare and Parul Nath and Anusha Jain},
booktitle = {Proceedings of the Text Analysis Conference (TAC)},
year = {2010},
}
@inproceedings{Mehdad09,
title = {Using Lexical Resources in a Distance-Based Approach to {RTE}},
author = {Yashar Mehdad and Matteo Negri and Elena Cabrio and Milen Kouylekov and Bernardo Magnini},
booktitle = {Proceedings of the Text Analysis Conference (TAC)},
year = {2009},
}
@inproceedings{Malakasiotis09,
title = {{AUEB} at {TAC} 2009},
author = {Prodromos Malakasiotis},
booktitle = {Proceedings of the Text Analysis Conference (TAC)},
year = {2009},
}
** Export to other languages
@inproceedings{Haverinen2010a,
author = {Haverinen, Katri and Ginter, Filip and Viljanen, Timo and Laippala, Veronika and Salakoski, Tapio},
title = {Dependency-based PropBanking of clinical {F}innish},
booktitle = {Proceedings of the Fourth Linguistic Annotation Workshop},
series = {LAW IV '10},
year = {2010},
pages = {137--141},
}
In this paper, we present a PropBank of clinical Finnish, an annotated corpus of verbal propositions and arguments. The clinical PropBank is created on top of a previously existing dependency treebank annotated in the Stanford Dependency (SD) scheme and covers 90% of all verb occurrences in the treebank.
We establish that the PropBank scheme is applicable to clinical Finnish as well as compatible with the SD scheme, with an overwhelming proportion of arguments being governed by the verb. This allows argument candidates to be restricted to direct verb dependents, substantially simplifying the PropBank construction.
The clinical Finnish PropBank is freely available at the address http://bionlp.utu.fi.
@inproceedings{Haverinen2010b,
author = {Haverinen, Katri and Viljanen, Timo and Laippala, Veronika and Kohonen, Samuel and Ginter, Filip and Salakoski, Tapio},
title = {Treebanking {F}innish},
booktitle = {Proceedings of the Ninth International Workshop on Treebanks and Linguistic Theories (TLT)},
year = 2010}
@inproceedings{Seraji12,
author = {Mojgan Seraji and Be\'ata Megyesi and Joakim Nivre},
title = {A Basic Language Resource Kit for {P}ersian},
booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation},
year = 2012}
@inproceedings{Potisuk10,
author = {Siripong Potisuk},
title = {Typed Dependency Relations for Syntactic Analysis of {T}hai Sentences},
booktitle = {Proceedings of PACLIC 24 Pacific Asia Conference on Language, Information and Computation},
year = 2010}
@inproceedings{Maarouf12,
author = {Isma\"{i}l {El Maarouf} and Jeanne Villaneau},
title = {A {F}rench Fairy Tale Corpus syntactically and semantically annotated},
booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation},
year = 2012}
**
@inproceedings{Petrov12,
author = {Slav Petrov and Ryan McDonald},
title = {Overview of the 2012 Shared Task on Parsing the Web},
booktitle = {First Workshop on Syntactic Analysis of Non-Canonical Language},
year = 2012}
"first shared task on parsing English web text"
@phdthesis{Chambers11,
author= {Nathanael Chambers},
title = {Inducing Event Schemas and their Participants from Unlabeled Text},
year = 2011,
school = {Department of Computer Science, Stanford University}
}
@inproceedings{BioNLP11,
author = {Jin-Dong Kim and Sampo Pyysalo and Tomoko Ohta and Robert Bossy and Ngan Nguyen and Jun'ichi Tsujii},
year = {2011},
title = {Overview of BioNLP Shared Task 2011},
booktitle = {Proceedings of the BioNLP Shared Task 2011 Workshop},
}
@inproceedings{poon09unsupervised,
author = {Poon, Hoifung and Domingos, Pedro},
title = {Unsupervised semantic parsing},
booktitle = {Proceedings of the 2009 Conference on Empirical Methods in Natural Language Processing (EMNLP 2009)},
year = {2009},
pages = {1--10},
}
% isbn = {978-1-932432-59-6},
% location = {Singapore},
@InProceedings{mcdonald-EtAl:2013:Short,
author = {McDonald, Ryan and Nivre, Joakim and Quirmbach-Brundage, Yvonne and Goldberg, Yoav and Das, Dipanjan and Ganchev, Kuzman and Hall, Keith and Petrov, Slav and Zhang, Hao and T\"{a}ckstr\"{o}m, Oscar and Bedini, Claudia and Bertomeu Castell\'{o}, N\'{u}ria and Lee, Jungmee},
title = {Universal Dependency Annotation for Multilingual Parsing},
booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
year = {2013},
pages = {92--97},
}
% month = {August},
% address = {Sofia, Bulgaria},
% publisher = {Association for Computational Linguistics},
% url = {http://www.aclweb.org/anthology/P13-2017}
@InProceedings{tsarfaty:2013:Short,
author = {Tsarfaty, Reut},
title = {A Unified Morpho-Syntactic Scheme of Stanford Dependencies},
booktitle = {Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
year = {2013},
pages = {578--584},
}
% month = {August},
% address = {Sofia, Bulgaria},
% publisher = {Association for Computational Linguistics},
% url = {http://www.aclweb.org/anthology/P13-2103}
@inproceedings{marneffe14universal,
year = {2014},
author = {Marie-Catherine de Marneffe and Timothy Dozat and Natalia Silveira and
Katri Haverinen and Filip Ginter and Joakim Nivre and Christopher D. Manning},
title = {Universal Stanford Dependencies: A cross-linguistic typology},
booktitle = {Proceedings of the Ninth International Conference on Language
Resources and Evaluation (LREC-2014)}
}
@InProceedings{chang-tseng-jurafsky-manning:2009:SSST,
author = {Chang, Pi-Chuan and Tseng, Huihsin and Jurafsky, Dan
and Manning, Christopher D.},
title = {Discriminative Reordering with {C}hinese Grammatical
Relations Features},
booktitle = {Proceedings of the Third Workshop on Syntax and
Structure in Statistical Translation},
month = {June},
year = {2009},
address = {Boulder, Colorado},
url = {pubs/ssst09-chang.pdf}
}
@inproceedings{schuster2016enhanced,
author = {Schuster, Sebastian and Manning, Christopher D.},
booktitle = {Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)},
title = {Enhanced {E}nglish {U}niversal {D}ependencies: An Improved Representation for Natural Language Understanding Tasks},
year = {2016}
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -0,0 +1,190 @@
<!-- build.xml file for ant for JavaNLP -->
<!-- A "project" describes a set of targets that may be requested
when Ant is executed. The "default" attribute defines the
target which is executed if no specific target is requested,
and the "basedir" attribute defines the current working directory
from which Ant executes the requested task. This is normally
set to the current working directory.
-->
<project name="JavaNLP" default="compile" basedir=".">
<property name="build.home" value="${basedir}/classes"/>
<property name="build.tests" value="${basedir}/classes"/>
<property name="docs.home" value="${basedir}/docs"/>
<property name="src.home" value="${basedir}/src"/>
<property name="javadoc.home" value="${basedir}/javadoc"/>
<!-- ==================== Compilation Control Options ==================== -->
<!--
These properties control option settings on the Javac compiler when it
is invoked using the <javac> task.
compile.debug Should compilation include the debug option?
compile.deprecation Should compilation include the deprecation option?
compile.optimize Should compilation include the optimize option?
compile.source Source version compatibility
compile.target Target class version compatibility
-->
<property name="compile.debug" value="true"/>
<property name="compile.deprecation" value="false"/>
<property name="compile.optimize" value="true"/>
<property name="compile.source" value="1.8" />
<property name="compile.target" value="1.8" />
<property name="compile.encoding" value="utf-8" />
<!-- ==================== All Target ====================================== -->
<!--
The "all" target is a shortcut for running the "clean" target followed
by the "compile" target, to force a complete recompile.
-->
<target name="all" depends="clean,compile"
description="Clean build and dist directories, then compile"/>
<!-- ==================== Clean Target ==================================== -->
<!--
The "clean" target deletes any previous "build" and "dist" directory,
so that you can be ensured the application can be built from scratch.
-->
<target name="clean" description="Delete old classes">
<delete dir="${build.home}/edu"/>
</target>
<!-- ==================== Compile Target ================================== -->
<!--
The "compile" target transforms source files (from your "src" directory)
into object files in the appropriate location in the build directory.
This example assumes that you will be including your classes in an
unpacked directory hierarchy under "/WEB-INF/classes".
-->
<target name="compile" depends="prepare"
description="Compile Java sources">
<!-- Compile Java classes as necessary -->
<mkdir dir="${build.home}"/>
<javac srcdir="${src.home}"
destdir="${build.home}"
debug="${compile.debug}"
encoding="${compile.encoding}"
deprecation="${compile.deprecation}"
optimize="${compile.optimize}"
source="${compile.source}"
target="${compile.target}"
includeantruntime="false">
<compilerarg value="-Xmaxerrs"/>
<compilerarg value="20"/>
<classpath>
<fileset dir="${basedir}">
<include name="*.jar"/>
<exclude name="stanford-parser*"/>
</fileset>
</classpath>
<!-- <compilerarg value="-Xlint"/> -->
</javac>
<!-- Copy application resources -->
<!--
<copy todir="${build.home}/WEB-INF/classes">
<fileset dir="${src.home}" excludes="**/*.java"/>
</copy>
-->
</target>
<!-- ==================== Javadoc Target ================================== -->
<!--
The "javadoc" target creates Javadoc API documentation for the Java
classes included in your application. Normally, this is only required
when preparing a distribution release, but is available as a separate
target in case the developer wants to create Javadocs independently.
-->
<target name="javadoc" depends="compile"
description="Create Javadoc API documentation">
<mkdir dir="${javadoc.home}"/>
<javadoc sourcepath="${src.home}"
destdir="${javadoc.home}"
maxmemory="1g"
author="true"
source="${compile.source}"
overview="${src.home}/edu/stanford/nlp/overview.html"
doctitle="Stanford JavaNLP API Documentation"
windowtitle="Stanford JavaNLP API"
encoding="${compile.encoding}"
docencoding="${compile.encoding}"
charset="${compile.encoding}"
packagenames="*">
<!-- Allow @generated, @modifiable and @ordered tags -->
<tag name="generated" scope="all" description="Generated" />
<tag name="modifiable" scope="all" description="Modifiable" />
<tag name="ordered" scope="all" description="Ordered" />
<!-- Depends on lib and classes folders -->
<classpath>
<fileset dir="${basedir}">
<include name="*.jar"/>
<exclude name="stanford-parser*"/>
</fileset>
<pathelement path="${build.home}" />
</classpath>
<bottom><![CDATA[<font size="2"><a href="https://nlp.stanford.edu" target="_top">Stanford NLP Group</a></font>]]></bottom>
<link href="https://docs.oracle.com/javase/8/docs/api/"/>
</javadoc>
</target>
<!-- ==================== Prepare Target ================================== -->
<!--
The "prepare" target is used to create the "build" destination directory,
and copy the static contents of your web application to it. If you need
to copy static files from external dependencies, you can customize the
contents of this task.
Normally, this task is executed indirectly when needed.
-->
<target name="prepare">
<!-- Create build directories as needed -->
<mkdir dir="${build.home}"/>
</target>
</project>

View File

@ -0,0 +1,95 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-parser</artifactId>
<version>4.2.0</version>
<packaging>jar</packaging>
<name>Stanford Parser</name>
<description>Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.</description>
<url>https://nlp.stanford.edu/software/lex-parser.html</url>
<licenses>
<license>
<name>GNU General Public License Version 2</name>
<url>http://www.gnu.org/licenses/gpl-2.0.txt</url>
</license>
</licenses>
<scm>
<url>http://nlp.stanford.edu/software/stanford-parser-4.2.0.zip</url>
<connection>http://nlp.stanford.edu/software/stanford-parser-4.2.0.zip</connection>
</scm>
<developers>
<developer>
<id>christopher.manning</id>
<name>Christopher Manning</name>
<email>manning@stanford.edu</email>
</developer>
<developer>
<id>jason.bolton</id>
<name>Jason Bolton</name>
<email>jebolton@stanford.edu</email>
</developer>
<developer>
<id>spence.green</id>
<name>Spence Green</name>
<email>spenceg@stanford.edu</email>
</developer>
</developers>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<encoding>UTF-8</encoding>
</properties>
<dependencies>
<dependency>
<groupId>org.ejml</groupId>
<artifactId>ejml-core</artifactId>
<version>0.38</version>
</dependency>
<dependency>
<groupId>org.ejml</groupId>
<artifactId>ejml-ddense</artifactId>
<version>0.38</version>
</dependency>
<dependency>
<groupId>org.ejml</groupId>
<artifactId>ejml-simple</artifactId>
<version>0.38</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.12</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src</sourceDirectory>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>1.7</version>
<executions>
<execution>
<id>attach-models</id>
<phase>package</phase>
<goals>
<goal>attach-artifact</goal>
</goals>
<configuration>
<artifacts>
<artifact>
<file>${project.basedir}/stanford-parser-4.2.0-models.jar</file>
<type>jar</type>
<classifier>models</classifier>
</artifact>
</artifacts>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

Some files were not shown because too many files have changed in this diff Show More