17 lines
505 B
Properties
17 lines
505 B
Properties
|
# Pipeline options
|
||
|
annotators = tokenize, ssplit, pos, parse
|
||
|
|
||
|
# segment
|
||
|
#customAnnotatorClass.segment = edu.stanford.nlp.pipeline.ArabicSegmenterAnnotator
|
||
|
tokenize.language = ar
|
||
|
segment.model = edu/stanford/nlp/models/segmenter/arabic/arabic-segmenter-atb+bn+arztrain.ser.gz
|
||
|
|
||
|
# sentence split
|
||
|
ssplit.boundaryTokenRegex = [.]|[!?]+|[!\u061F]+
|
||
|
|
||
|
# pos
|
||
|
pos.model = edu/stanford/nlp/models/pos-tagger/arabic.tagger
|
||
|
|
||
|
# parse
|
||
|
parse.model = edu/stanford/nlp/models/lexparser/arabicFactored.ser.gz
|