CofeehousePy/services/corenlp/classes/edu/stanford/nlp/pipeline/StanfordCoreNLP-chinese.pro...

58 lines
2.0 KiB
Properties
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Pipeline options - lemma is no-op for Chinese but currently needed because coref demands it (bad old requirements system)
annotators = tokenize, ssplit, pos, lemma, ner, parse, coref
# segment
tokenize.language = zh
segment.model = edu/stanford/nlp/models/segmenter/chinese/ctb.gz
segment.sighanCorporaDict = edu/stanford/nlp/models/segmenter/chinese
segment.serDictionary = edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz
segment.sighanPostProcessing = true
# sentence split
ssplit.boundaryTokenRegex = [.。]|[!?]+
# pos
pos.model = edu/stanford/nlp/models/pos-tagger/chinese-distsim.tagger
# ner
ner.language = chinese
ner.model = edu/stanford/nlp/models/ner/chinese.misc.distsim.crf.ser.gz
ner.applyNumericClassifiers = true
ner.useSUTime = false
# regexner
ner.fine.regexner.mapping = edu/stanford/nlp/models/kbp/chinese/gazetteers/cn_regexner_mapping.tab
ner.fine.regexner.noDefaultOverwriteLabels = CITY,COUNTRY,STATE_OR_PROVINCE
# parse
parse.model = edu/stanford/nlp/models/srparser/chineseSR.ser.gz
# depparse
depparse.model = edu/stanford/nlp/models/parser/nndep/UD_Chinese.gz
depparse.language = chinese
# coref
coref.sieves = ChineseHeadMatch, ExactStringMatch, PreciseConstructs, StrictHeadMatch1, StrictHeadMatch2, StrictHeadMatch3, StrictHeadMatch4, PronounMatch
coref.input.type = raw
coref.postprocessing = true
coref.calculateFeatureImportance = false
coref.useConstituencyTree = true
coref.useSemantics = false
coref.algorithm = hybrid
coref.path.word2vec =
coref.language = zh
coref.defaultPronounAgreement = true
coref.zh.dict = edu/stanford/nlp/models/dcoref/zh-attributes.txt.gz
coref.print.md.log = false
coref.md.type = RULE
coref.md.liberalChineseMD = false
# kbp
kbp.semgrex = edu/stanford/nlp/models/kbp/chinese/semgrex
kbp.tokensregex = edu/stanford/nlp/models/kbp/chinese/tokensregex
kbp.language = zh
kbp.model = none
# entitylink
entitylink.wikidict = edu/stanford/nlp/models/kbp/chinese/wikidict_chinese.tsv.gz