36 lines
1.4 KiB
Plaintext
36 lines
1.4 KiB
Plaintext
|
## tagger training invoked at Tue Feb 25 04:59:36 PST 2014 with arguments:
|
||
|
model = wsj-0-18-bidirectional-distsim.tagger
|
||
|
arch = bidirectional5words,naacl2003unknowns,wordshapes(-1,1),distsim(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1),distsimconjunction(/u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters,-1,1)
|
||
|
wordFunction =
|
||
|
trainFile = /u/nlp/data/pos-tagger/english/train-wsj-0-18
|
||
|
closedClassTags =
|
||
|
closedClassTagThreshold = 40
|
||
|
curWordMinFeatureThresh = 2
|
||
|
debug = false
|
||
|
debugPrefix =
|
||
|
tagSeparator = _
|
||
|
encoding = UTF-8
|
||
|
iterations = 100
|
||
|
lang = english
|
||
|
learnClosedClassTags = false
|
||
|
minFeatureThresh = 2
|
||
|
openClassTags =
|
||
|
rareWordMinFeatureThresh = 5
|
||
|
rareWordThresh = 5
|
||
|
search = owlqn
|
||
|
sgml = false
|
||
|
sigmaSquared = 0.5
|
||
|
regL1 = 0.75
|
||
|
tagInside =
|
||
|
tokenize = true
|
||
|
tokenizerFactory =
|
||
|
tokenizerOptions =
|
||
|
verbose = false
|
||
|
verboseResults = true
|
||
|
veryCommonWordThresh = 250
|
||
|
xmlInput =
|
||
|
outputFile =
|
||
|
outputFormat = slashTags
|
||
|
outputFormatOptions =
|
||
|
nthreads = 1
|