17 lines
505 B
Properties
17 lines
505 B
Properties
# Pipeline options
|
|
annotators = tokenize, ssplit, pos, parse
|
|
|
|
# segment
|
|
#customAnnotatorClass.segment = edu.stanford.nlp.pipeline.ArabicSegmenterAnnotator
|
|
tokenize.language = ar
|
|
segment.model = edu/stanford/nlp/models/segmenter/arabic/arabic-segmenter-atb+bn+arztrain.ser.gz
|
|
|
|
# sentence split
|
|
ssplit.boundaryTokenRegex = [.]|[!?]+|[!\u061F]+
|
|
|
|
# pos
|
|
pos.model = edu/stanford/nlp/models/pos-tagger/arabic.tagger
|
|
|
|
# parse
|
|
parse.model = edu/stanford/nlp/models/lexparser/arabicFactored.ser.gz
|