NLP | Brill tagger

Code # 1: Teaching the BrillTagger class

# Loading libraries

from nltk.tag import brill, brill_trainer

 

def train_brill_tagger (initial_tagger, train_sents, * * kwargs):

templates = [

brill.Template (brill.Pos ([ - 1 ])),

  brill.Template (brill.Pos ([ 1 ] )),

brill.Template (brill.Pos ([ - 2 ])),

brill.Template (brill.Pos ([ 2 ])),

  brill.Template (brill.Pos ([ - 2 , - 1 ])),

brill.Template (brill.Pos ([ 1 , 2 ])),

  brill.Template (brill.Pos ([ - 3 , - 2 , - 1 ])),

brill.Template (brill.Pos ([ 1 , 2 , 3 ])),

brill. Template (brill.Pos ([ - 1 ]), brill. Pos ([ 1 ])),

brill.Template (brill.Word ([ - 1 ])),

brill.Template (brill.Word ([ 1 ])),

brill.Template (brill.Word ([ - 2 ])),

brill.Template (brill.Word ([ 2 ])),

brill.Template (brill.Word ([ - 2 , - 1 ])),

brill.Template (brill.Word ([ 1 , 2 ])),

brill.Template (brill.Word ([ - 3 , - 2 , - 1 ])),

  brill.Template (brill.Word ([ 1 , 2 , 3 ])),

  brill.Template (brill.Word ([ - 1 ]), brill.Word ([ 1 ])),

 ]

 

# USING BrillTaggerTrainer to train

trainer = brill_trainer.BrillTaggerTrainer (

initial_tagger, templates, deterministic = True )

  

  return trainer.train (train_sents, * * kwargs)

Code # 2: Let`s use a trained BrillTagger

from nltk.tag import brill, brill_trainer

from nltk.tag import DefaultTagger

from nltk.corpus import treebank

from tag_util import train_brill_tagger

  
# Initialization

default_tag = DefaultTagger ( `NN` )

  
# start training and testing

train_data = treebank.tagged_sents () [: 3000 ]

test_data = treebank. tagged_sents () [ 3000 :]

  

initial_tag = backoff_tagger (

train_data, [UnigramTagger, BigramTagger, 

TrigramTagger], backoff = default_tagger)

 

a = initial_tag.evaluate (test_data)

print ( "Accuracy of Initial Tag:" , a)

Output:

 Accuracy of Initial Tag: 0.8806820634578028 

Code # 3:

< / tr>

brill_tag = train_brill_tagger (initial_tag, train_data)

b = brill_tag.evaluate (test_data)

 

print ( "Accuracy of brill_tag:" , b)

Output:

 Accuracy of brill_tag: 0.8827541549751781