CofeehousePy/services/corenlp/scripts/truecase/truecasing.fast.caseless.prop

50 lines
1.0 KiB
Plaintext

serializeTo=truecasing.fast.caseless.qn.ser.gz
trainFileList=/home/horatio/truecase/train.4_5M.truecase.txt
# trainFileList=/home/john/truecase/train.orig.txt
testFile=/home/horatio/truecase/test.truecase.txt
# testFile=/scr/nlp/data/gale/AE-MT-eval-data/mt06/cased/ref0
map=word=0,answer=1
wordFunction = edu.stanford.nlp.process.LowercaseFunction
useClassFeature=true
useWord=true
useNGrams=true
noMidNGrams=true
maxNGramLeng=6
usePrev=true
useNext=true
useLongSequences=true
useSequences=true
usePrevSequences=true
useTypeSeqs=true
useTypeSeqs2=true
useTypeySequences=true
useOccurrencePatterns=true
useLastRealWord=true
useNextRealWord=true
useDisjunctive=true
disjunctionWidth=5
wordShape=chris2useLC
usePosition=true
useBeginSent=true
useTitle=true
useObservedSequencesOnly=true
saveFeatureIndexToDisk=true
normalize=true
useQN=false
QNSize=25
maxLeft=1
l1reg=1.0
readerAndWriter=edu.stanford.nlp.sequences.TrueCasingForNISTDocumentReaderAndWriter
featureFactory=edu.stanford.nlp.ie.NERFeatureFactory
featureDiffThresh=0.02