CofeehousePy/services/corenlp/scripts/pos-tagger/arabic-train.tagger.props

36 lines
1.4 KiB
XML

## tagger training invoked at Fri Feb 14 00:55:55 PST 2014 with arguments:
model = arabic-train.tagger
arch = words(-2,2),order(1),prefix(6),suffix(6),unicodeshapes(1)
wordFunction =
trainFile = format=TREES,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Train.utf8.txt;format=TREES,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Dev.utf8.txt;format=TREES,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Test.utf8.txt
closedClassTags =
closedClassTagThreshold = 40
curWordMinFeatureThresh = 1
debug = false
debugPrefix =
tagSeparator = /
encoding = UTF-8
iterations = 100
lang = arabic
learnClosedClassTags = false
minFeatureThresh = 3
openClassTags =
rareWordMinFeatureThresh = 3
rareWordThresh = 5
search = owlqn
sgml = false
sigmaSquared = 0.0
regL1 = 0.75
tagInside =
tokenize = false
tokenizerFactory = edu.stanford.nlp.process.WhitespaceTokenizer
tokenizerOptions =
verbose = false
verboseResults = true
veryCommonWordThresh = 250
xmlInput =
outputFile =
outputFormat = slashTags
outputFormatOptions =
nthreads = 1