# Pipeline options - lemma is no-op for Chinese but currently needed because coref demands it (bad old requirements system) annotators = tokenize, ssplit, pos, lemma, ner, parse, coref # segment tokenize.language = zh segment.model = edu/stanford/nlp/models/segmenter/chinese/ctb.gz segment.sighanCorporaDict = edu/stanford/nlp/models/segmenter/chinese segment.serDictionary = edu/stanford/nlp/models/segmenter/chinese/dict-chris6.ser.gz segment.sighanPostProcessing = true # sentence split ssplit.boundaryTokenRegex = [.。]|[!?!?]+ # pos pos.model = edu/stanford/nlp/models/pos-tagger/chinese-distsim.tagger # ner ner.language = chinese ner.model = edu/stanford/nlp/models/ner/chinese.misc.distsim.crf.ser.gz ner.applyNumericClassifiers = true ner.useSUTime = false # regexner ner.fine.regexner.mapping = edu/stanford/nlp/models/kbp/chinese/gazetteers/cn_regexner_mapping.tab ner.fine.regexner.noDefaultOverwriteLabels = CITY,COUNTRY,STATE_OR_PROVINCE # parse parse.model = edu/stanford/nlp/models/srparser/chineseSR.ser.gz # depparse depparse.model = edu/stanford/nlp/models/parser/nndep/UD_Chinese.gz depparse.language = chinese # coref coref.sieves = ChineseHeadMatch, ExactStringMatch, PreciseConstructs, StrictHeadMatch1, StrictHeadMatch2, StrictHeadMatch3, StrictHeadMatch4, PronounMatch coref.input.type = raw coref.postprocessing = true coref.calculateFeatureImportance = false coref.useConstituencyTree = true coref.useSemantics = false coref.algorithm = hybrid coref.path.word2vec = coref.language = zh coref.defaultPronounAgreement = true coref.zh.dict = edu/stanford/nlp/models/dcoref/zh-attributes.txt.gz coref.print.md.log = false coref.md.type = RULE coref.md.liberalChineseMD = false # kbp kbp.semgrex = edu/stanford/nlp/models/kbp/chinese/semgrex kbp.tokensregex = edu/stanford/nlp/models/kbp/chinese/tokensregex kbp.language = zh kbp.model = none # entitylink entitylink.wikidict = edu/stanford/nlp/models/kbp/chinese/wikidict_chinese.tsv.gz