# -*- coding: utf-8 -*- """ Tests for Brill tagger. """ import unittest from nltk.tag import UnigramTagger, brill, brill_trainer from nltk.tbl import Template from nltk.corpus import treebank from nltk.tbl import demo class TestBrill(unittest.TestCase): def test_pos_template(self): train_sents = treebank.tagged_sents()[:1000] tagger = UnigramTagger(train_sents) trainer = brill_trainer.BrillTaggerTrainer( tagger, [brill.Template(brill.Pos([-1]))] ) brill_tagger = trainer.train(train_sents) # Example from https://github.com/nltk/nltk/issues/769 result = brill_tagger.tag('This is a foo bar sentence'.split()) expected = [ ('This', 'DT'), ('is', 'VBZ'), ('a', 'DT'), ('foo', None), ('bar', 'NN'), ('sentence', None), ] self.assertEqual(result, expected) @unittest.skip("Should be tested in __main__ of nltk.tbl.demo") def test_brill_demo(self): demo()