NLP | Chunking rules

Following are the steps for Chunking —

  • Convert a sentence to a plane tree. 

  • Creation Chunk strings using this tree.
  • Create a RegexpChunkParser by parsing grammar using RegexpParser.
  • Applying the generated chunk rule to the ChunkString that matches the clause in the chunk. 

  • Separation a larger chunk to a smaller chunk using certain chunk rules. 

  • Then ChunkString is converted back to a tree with two chunk subtrees. 

Code # 1: ChunkString is modified by applying each rule.

# Loading libraries

from nltk.chunk.regexp import ChunkString, ChunkRule, ChinkRule

from nltk.tree import Tree

 
# ChunkString () starts with a flat tree

tree = Tree ( ` S` , [( `the` , `DT` ), ( ` book` , ` NN` ),

( ` has` , `VBZ` ), ( ` many` , `JJ` ), ( ` chapters` , `NNS` )])

 
# Initializing ChunkString ()

chunk_string = ChunkString (tree)

print ( "Chunk String:" , chunk_string)

 
# ChunkRule initialization

chunk_rule = ChunkRule ( `& lt; DT & gt; & lt; NN. * & gt; & lt;. * & gt; * & lt; NN. * & gt;` , ` chunk determiners and nouns` )

chunk_rule. apply (chunk_string)

print ( " Applied ChunkRule: " , chunk_string)

 
# Another ChinkRule

ir = ChinkRule ( ` & lt; VB. * & gt; ` , `chink verbs` )

ir . apply (chunk_string)

print ( "Applied ChinkRule:" , chunk_string, "" )

 
# Return to chunk subtree
chunk_string.to_chunkstruct ()

Exit:

 Chunk String: & lt; & lt; DT & gt; & lt; NN & gt; & lt; VBZ & gt; & lt; JJ & gt; & lt; NNS & gt; Applied ChunkRule: {& lt; DT & gt; & lt; NN & gt; & lt; VBZ & gt; & lt; JJ & gt; & lt; NNS & gt;} Applied ChinkRule: {& lt; DT & gt; & lt; NN & gt;} & lt; VBZ & gt; {& lt; JJ & gt; & lt; NNS & gt;} Tree (`S`, [Tree (` CHUNK`, [(`the`,` DT`), (`book`,` NN`)]), (`has`,` VBZ`) , Tree (`CHUNK`, [(` many`, `JJ`), (` chapters`, `NNS`)])]) 

Note. This code works exactly as described in the ChunkRule steps above.

Code # 2: How to perform this task directly with RegexpChunkParser.

# Loading libraries

from nltk.chunk.regexp import ChunkString, ChunkRule, ChinkRule

from nltk.tree import Tree

from nltk.chunk import RegexpChunkParser

  
# ChunkString () starts with a flat tree

tree = Tree ( `S` , [( `the` , ` DT ` ), ( ` book` , `NN` ),

  ( `has` , ` VBZ` ), ( `many` , `JJ` ), ( ` chapters` , `NNS` )])

  
# ChunkRule initialization

chunk_rule = ChunkRule ( `& lt; DT & gt; & lt; NN. * & gt; & lt ;. * & gt; * & lt; NN. * & gt; ` , ` chunk determiners and nouns` )

 

 
# Other ChinkRule

chink_rule = ChinkRule ( `& lt; VB. * & gt;` , `chink verbs` )

  
# Applying RegexpChunkParser

chunker = RegexpChunkParser ([chunk_rule, chink_rule])

chunker.parse (tree)

Exit:

 Tree (`S`, [Tree (` CHUNK`, [(`the`,` DT`), (`book`,` NN`)]), (`has`,` VBZ`), Tree (`CHUNK`, [(` many`, `JJ `), (` chapters`, `NNS`)])]) 

Code # 3: Parse with a different ChunkType.

# Loading libraries

from nltk.chunk.regexp import ChunkString, ChunkRule, ChinkRule

from nltk.tree import Tree

from nltk.chunk import RegexpChunkParser

 
# ChunkString () starts with a flat tree

tree = Tree ( `S` , [( ` the` , ` DT` ), ( `book` , `NN` ),

( `has` , `VBZ` ), ( ` many` , `JJ` ), ( ` chapters` , `NNS` )])

 
# ChunkRule initialization

chunk_rule = ChunkRule ( `& lt; DT & gt; & lt; NN. * & gt; & lt;. * & gt; * & lt; NN. * & gt;` , `chunk determiners and nouns` )

  

  
# Another ChinkRule

chink_rule = ChinkRule ( `& lt; VB. * & gt;` , `chink verbs` )

 
# Using RegexpChunkParser

chunker = RegexpChunkParser ([chunk_rule, chink_rule], chunk_label = `CP` )

chunker.parse (tree)

Exit:

 Tree (`S`, [Tree (` CP`, [(`the`,` DT`), (`book`,` NN`)]), (`has`,` VBZ`), Tree ( `CP`, [(` many`, `JJ`), (` chapters`, `NNS`)])])