CofeehousePy/services/corenlp/test/regression/test-postagger-left3words.rb

25 lines
745 B
Ruby

#!/bin/env ruby
# Unknown words (863) right: 788 (91.309386%); wrong: 75 (8.690614%).
TRAIN_FILE="/u/nlp/data/pos-tagger/train-wsj-0-18"
TRAIN_LINES=5000
TEST_FILE="/u/nlp/data/pos-tagger/test-wsj-19-21"
TEST_LINES=300
MIN_SCORE=96.07
train=`bash -c 'java edu.stanford.nlp.tagger.maxent.Train -arch left3words -file <(head -#{TRAIN_LINES} #{TRAIN_FILE}) -model goat'`
test=`bash -c 'java edu.stanford.nlp.tagger.maxent.Test -arch left3words -file <(head -#{TEST_LINES} #{TEST_FILE}) -model goat'`
test =~ /Total tags right: \d+ \((.+?)\)/ or raise "can't parse output"
score = $1.to_f
if score >= MIN_SCORE
puts "PASS score #{score} >= min #{MIN_SCORE}"
else
puts "FAIL score #{score} < min #{MIN_SCORE}"
end