58 lines
2.0 KiB
Properties
58 lines
2.0 KiB
Properties
# Pipeline options - lemma is no-op for Chinese but currently needed because coref demands it (bad old requirements system)
|
||
annotators = tokenize, ssplit, pos, lemma, ner, parse, coref
|
||
|
||
# segment
|
||
tokenize.language = zh
|
||
segment.model = edu/stanford/nlp/models/segmenter/chinese/ctb.gz
|
||
segment.sighanCorporaDict = edu/stanford/nlp/models/segmenter/chinese
|
||
segment.serDictionary = edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz
|
||
segment.sighanPostProcessing = true
|
||
|
||
# sentence split
|
||
ssplit.boundaryTokenRegex = [.。]|[!?!?]+
|
||
|
||
# pos
|
||
pos.model = edu/stanford/nlp/models/pos-tagger/chinese-distsim.tagger
|
||
|
||
# ner
|
||
ner.language = chinese
|
||
ner.model = edu/stanford/nlp/models/ner/chinese.misc.distsim.crf.ser.gz
|
||
ner.applyNumericClassifiers = true
|
||
ner.useSUTime = false
|
||
|
||
# regexner
|
||
ner.fine.regexner.mapping = edu/stanford/nlp/models/kbp/chinese/gazetteers/cn_regexner_mapping.tab
|
||
ner.fine.regexner.noDefaultOverwriteLabels = CITY,COUNTRY,STATE_OR_PROVINCE
|
||
|
||
# parse
|
||
parse.model = edu/stanford/nlp/models/srparser/chineseSR.ser.gz
|
||
|
||
# depparse
|
||
depparse.model = edu/stanford/nlp/models/parser/nndep/UD_Chinese.gz
|
||
depparse.language = chinese
|
||
|
||
# coref
|
||
coref.sieves = ChineseHeadMatch, ExactStringMatch, PreciseConstructs, StrictHeadMatch1, StrictHeadMatch2, StrictHeadMatch3, StrictHeadMatch4, PronounMatch
|
||
coref.input.type = raw
|
||
coref.postprocessing = true
|
||
coref.calculateFeatureImportance = false
|
||
coref.useConstituencyTree = true
|
||
coref.useSemantics = false
|
||
coref.algorithm = hybrid
|
||
coref.path.word2vec =
|
||
coref.language = zh
|
||
coref.defaultPronounAgreement = true
|
||
coref.zh.dict = edu/stanford/nlp/models/dcoref/zh-attributes.txt.gz
|
||
coref.print.md.log = false
|
||
coref.md.type = RULE
|
||
coref.md.liberalChineseMD = false
|
||
|
||
# kbp
|
||
kbp.semgrex = edu/stanford/nlp/models/kbp/chinese/semgrex
|
||
kbp.tokensregex = edu/stanford/nlp/models/kbp/chinese/tokensregex
|
||
kbp.language = zh
|
||
kbp.model = none
|
||
|
||
# entitylink
|
||
entitylink.wikidict = edu/stanford/nlp/models/kbp/chinese/wikidict_chinese.tsv.gz
|