25 lines
745 B
Ruby
25 lines
745 B
Ruby
#!/bin/env ruby
|
|
|
|
# Unknown words (863) right: 788 (91.309386%); wrong: 75 (8.690614%).
|
|
|
|
TRAIN_FILE="/u/nlp/data/pos-tagger/train-wsj-0-18"
|
|
TRAIN_LINES=5000
|
|
|
|
TEST_FILE="/u/nlp/data/pos-tagger/test-wsj-19-21"
|
|
TEST_LINES=300
|
|
|
|
MIN_SCORE=96.07
|
|
|
|
train=`bash -c 'java edu.stanford.nlp.tagger.maxent.Train -arch left3words -file <(head -#{TRAIN_LINES} #{TRAIN_FILE}) -model goat'`
|
|
test=`bash -c 'java edu.stanford.nlp.tagger.maxent.Test -arch left3words -file <(head -#{TEST_LINES} #{TEST_FILE}) -model goat'`
|
|
|
|
test =~ /Total tags right: \d+ \((.+?)\)/ or raise "can't parse output"
|
|
score = $1.to_f
|
|
|
|
if score >= MIN_SCORE
|
|
puts "PASS score #{score} >= min #{MIN_SCORE}"
|
|
else
|
|
puts "FAIL score #{score} < min #{MIN_SCORE}"
|
|
end
|
|
|