NLP | WordNet for tags



Code # 1: Create a WordNet Word Search Class.

from nltk. tag import SequentialBackoffTagger

from nltk.corpus import wordnet

from nltk.probability import FreqDist

 

class WordNetTagger (SequentialBackoffTagger):

 

"" "

& gt; & gt; & gt; wt = WordNetTagger ()

& gt; & gt; & gt; wt.tag ([& # 39; food & # 39 ;, & # 39; is & # 39 ;, & # 39; great & # 39;])

[(& # 39; food & # 39 ;, & # 39; NN & # 39;), (& # 39; is & # 39 ;, & # 39; VB & # 39;), (& # 39; great & # 39 ;, & # 39; JJ & # 39;)]

"" "

  

  def __ init __ ( self , * args, * * kwargs):

 

SequentialBackoffTagger .__ init __ ( self , * args, * * kwargs)

self . wordnet_tag_map = {

`n` : `NN` ,

  `s` : ` JJ` ,

`a` : `JJ` ,

  `r` : ` RB` ,

`v` : `VB`

  }

 

def choose_tag ( self , tokens, index, history):

 

word = tokens [index]

fd = FreqDist ()

 

for synset in wordnet.synsets (word):

  fd [synset.pos ()] + = 1

 

  

return self . wordnet_tag_map.get (fd. max ())

This WordNetTagger class will consider no. of every POS tag found in Synsets for a word, and then the most common tag is the treebank tag using internal mapping.

Code # 2: Using a simple WordNetTagger ()

from taggers import WordNetTagger

from nltk.corpus import treebank

  
# Initialization

default_tag = DefaultTagger ( `NN` )

  
# init Set up training and testing a set

train_data = treebank.tagged_sents () [: 3000 ]

test_data = treebank.tagged_sents () [ 3000 :]

 

wn_tagging = WordNetTagger ()

a = wn_tagger.evaluate (test_data)

 

print ( "Accuracy of WordNetTagger: " , a)

Output:

Accuracy of WordNetTagger: 0.17914876598160262

Using Code 3, we can improve the accuracy. 
Code # 3: WordNetTagger class at the end of the NgramTagger rollback chain

from taggers import WordNetTagger

from nltk.corpus import treebank

from tag_util import backoff_tagger

from nltk.tag import UnigramTagger, BigramTagger, TrigramTagger

 
# Initialization

default_tag DefaultTagger ( `NN` )

 
# initialize learning and testing the suite

train_data = treebank.tagged_sents () [: 3000 ]

test_data = treebank.tagged_sents () [ 3000 :]

 

tagger = backoff_tagger (train_data,

[UnigramTagger, BigramTagger,

TrigramTagger], backoff = wn_tagger)

 

a = tagger.evaluate (test_data)

 

print ( "Accuracy:" , a)

Output:

 Accuracy: 0.8848262464925534