2021-01-14 08:07:24 +01:00
|
|
|
## tagger training invoked at Mon Feb 24 14:01:33 PST 2014 with arguments:
|
|
|
|
model = arabic.tagger
|
|
|
|
arch = words(-2,2),order(1),prefix(6),suffix(6),unicodeshapes(1)
|
|
|
|
wordFunction =
|
|
|
|
trainFile = format=TREES,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Train.utf8.txt;format=TREES,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Dev.utf8.txt;format=TREES,/u/nlp/data/lexparser/trees/Arabic/2-Unvoc-Test.utf8.txt
|
|
|
|
closedClassTags =
|
|
|
|
closedClassTagThreshold = 40
|
|
|
|
curWordMinFeatureThresh = 1
|
|
|
|
debug = false
|
|
|
|
debugPrefix =
|
|
|
|
tagSeparator = /
|
|
|
|
encoding = UTF-8
|
|
|
|
iterations = 100
|
|
|
|
lang = arabic
|
|
|
|
learnClosedClassTags = false
|
|
|
|
minFeatureThresh = 3
|
|
|
|
openClassTags =
|
|
|
|
rareWordMinFeatureThresh = 3
|
|
|
|
rareWordThresh = 5
|
|
|
|
search = owlqn
|
|
|
|
sgml = false
|
|
|
|
sigmaSquared = 0.0
|
|
|
|
regL1 = 0.75
|
|
|
|
tagInside =
|
|
|
|
tokenize = false
|
|
|
|
tokenizerFactory = edu.stanford.nlp.process.WhitespaceTokenizer
|
|
|
|
tokenizerOptions =
|
|
|
|
verbose = false
|
|
|
|
verboseResults = true
|
|
|
|
veryCommonWordThresh = 250
|
|
|
|
xmlInput =
|
|
|
|
outputFile =
|
|
|
|
outputFormat = slashTags
|
|
|
|
outputFormatOptions =
|
|
|
|
nthreads = 1
|