NLP | Splitting and combining fragments



The MergeRule: class merges two chunks based on the end of the first chunk and the beginning of the second chunk. It is indicated as & lt; NN. * & gt; {} & lt ;. * & gt; i.e. the curly braces face each other.

An example of how the steps are performed

  • Starting with the sentence tree

  • Full sentence

  • The chunks are divided into several chunks

  • The chunk with the qualifier is broken into separate chunks.

Code # 1 — Building a tree

from nltk.chunk import RegexpParser

chunker = RegexpParser (r "" "

NP:
{& lt; DT & gt; & lt ;. * & gt; * & lt; NN. * & gt;}
& lt; NN. * & gt;} {& lt ;. * & gt;
& lt ;. * & gt;} {& lt; DT & gt;
& lt; NN. * & gt; {} & lt; NN. * & gt;

"" " )

sent = [( `the` , ` DT` ), ( `sushi` , ` NN` ), ( `roll` , `NN` ), ( ` was` , `VBD` ), 

( `filled` , `VBN` ), ( ` with` , ` IN` ), ( `the` , `DT` ), ( `fish` , ` NN` )]

chunker.parse (sent)

Exit :

 Tree (`S`, [Tree (` NP`, [(`the`,` DT`), (`sushi`,` NN`) , (`roll`,` NN`)]), Tree (`NP`, [(` was`, `VBD`), (` filled`, `VBN`), (` with`, `IN`)] ), Tree (`NP`, [(` the`, `DT`), (` fish`, `NN`)])]) 

Code # 2 — Splitting and merging

# Loading libraries

from nltk.chunk.regexp import ChunkString, ChunkRule, ChinkRule

from nltk.tree import Tree

from nltk.chunk.regexp import MergeRule, SplitRule

 
# Chunk String

chunk_string = ChunkString (Tree ( `S` , sent))

print ( " Chunk String: " , chunk_string)

 
# Apply chunk rule

ur = ChunkRule ( `& lt ; DT & gt; & lt;. * & Gt; * & lt; NN. * & Gt; ` , ` chunk determiner to noun` )

ur. apply (chunk_string)

print ( "Applied ChunkRule:" , chunk_string)

 
# Splitting

sr1 = SplitRule ( `& lt; NN. * & gt;` , `& lt; . * & gt; ` , ` split after noun` )

sr1. apply (chunk_string)

print ( "Splitting Chunk String:" , chunk_string)

 

 

sr2 = SplitRule ( `& lt;. * & gt;` , `& lt; DT & gt; ` , ` split before determiner` )

sr2. apply (chunk_string)

print ( "Further Splitting Chunk String:" , chunk_string)

 
# Merge

mr = MergeRule ( `& lt; NN. * & gt;` , `& lt; NN. * & gt;` , `merge nouns` )

mr. apply (chunk_string)

print ( "Merging Chunk String:" , chunk_string)

  
# Vern go to the tree
chunk_string.to_chunkstruct ()

Exit :

 Chunk String: & lt; DT & gt; & lt; NN & gt; & lt; NN & gt; & lt; VBD & gt; & lt; VBN & gt; & lt; IN & gt; & lt; DT & gt; & lt; NN & gt; Applied ChunkRule: {& lt; DT & gt; & lt; NN & gt; & lt; NN & gt; & lt; VBD & gt; & lt; VBN & gt; & lt; IN & gt; & lt; DT & gt; & lt; NN & gt;} Splitting Chunk String: {& lt; DT & gt; & lt; NN & gt;} {& lt; NN & gt;} {& lt; VBD & gt; & lt; VBN & gt; & lt; IN & gt; & lt; DT & gt; & lt; NN & gt;} Further Splitting Chunk String: {& lt; DT & gt; & lt; NN & gt;} {& lt; NN & gt;} {& lt; VBD & gt; & lt; VBN & gt; & lt; IN & gt;} {& lt; DT & gt; & lt; NN & gt;} Merging Chunk String: {& lt; DT & gt; & lt; NN & gt; & lt; NN & gt;} {& lt; VBD & gt; & lt; VBN & gt; & lt; IN & gt;} {& lt; DT & gt; & lt; NN & gt;} Tree (`S`, [Tree (` CHUNK`, [(`the`,` DT`), (`sushi`,` NN`), (`roll`,` NN`)]) , Tree (`CHUNK`, [(` was`, `VBD`), (` filled`, `VBN`), (` with`, `IN`)]), Tree (` CHUNK`, [(`the `,` DT`), (`fish`,` NN`)])])