ML | Generating text using Gated Recurrent Unit Networks

The data for the described procedure is a collection of short and famous poems by famous poets in .txt format. It can be downloaded here .

Step 1: Import the required libraries

from __ future__ import absolute_import, division,

print_function, unicode_literals

 

import numpy as np

import tensorflow as tf

 

from keras.models import Sequential

from keras.layers import Dense, Activation

from keras.layers import LSTM

 

from keras.optimizers import RMSprop

 

from keras.callbacks import LambdaCallback

from keras.callbacks import ModelCheckpoint

from keras .callbacks import ReduceLROnPlateau

import random

import sys

Step 2: Load data to line

# Change workplace to text file location
cd C : UsersDevDesktopKagglePoems

 
# Read a text file into a string

with open ( ` poems.txt` , `r` ) as file :

text = file . read ()

 
# Text file preview

print (text)

Step 3: Create a mapping of each unique character in the text with a unique number

# Storing all unique characters in the text

vocabulary = sorted ( list ( set (text)))

 
# Create dictionaries to map each character to an index

char_to_indices = dict ((c, i) for i, c in enumerate (vocabulary))

indices_to_char = dict ((i, c) for i, c in enumerate (vocabulary))

 

print (vocabulary)

Step 4: Data preprocessing

# Dividing the text into subsequences of max_length
# So that at each step in time the characters are max_length
# submitted to the network

max_length = 100

steps = 5

sentences = []

next_chars = []

for i in range ( 0 , len (text) - max_length, steps):

sentences.append (text [i: i + max_length])

next_chars.append (text [i + max_length])

 
# Hot encoding each character into a boolean vector

 
# Initialize a matrix of booleans with each column representing
# hot coded character representation

X = np.zeros (( len (sentences), max_length, len (vocabulary)), dtype = np. bool )

y = np.zeros (( len (sentences), len (vocabulary)), dtype = np. bool )

 
# Putting the value 1 in the appropriate position for each vector
# end the hot coding process

for i, sentence in enumerate (sentences):

for t, char in enumerate (sentence):

X [i, t, char_to_indices [char]] = 1

y [i, char_to_indices [next_chars [i]]] = 1

Step 5: Building the GRU network

# Initializing the LSTM network

model = Sequential ()

 
# Determine the cell type

model.add (GRU ( 128 , input_shape = (max_length, len (vocabulary))))

 
# Define a tightly coupled neural network layer

model.add (Dense ( len (vocabulary)))

 
# Define the activation function for the cell

model.add (Activation ( `softmax` ))

 
# Define the optimization function

optimizer = RMSprop (lr = 0.01 )

 
# Setting up the model for training

model. compile (loss = `categorical_crossentropy` , optimizer = optimizer)

Step 6: Define some helper functions to be used during network training

Note that the first two the functions below were obtained from the doc of the official example of text generation from the Keras team .

a) Helper function for fetching the next character:

# Helper function for fetching an index from an array of probabilities

def sample_index (preds, temperature = 1.0 ):

# temperature determines the freedom that a function has when generating text

 

# Convert a vector of predictions to a NumPy array

preds = np.asarray (preds) .astype ( `float64` )

 

  # Normalizing the predicate array

preds = np.log (preds) / temperature

  exp_preds = np.exp (preds)

preds = exp_preds / np. sum (exp_preds)

  

# Main sampling step. Creates an array of probabilities meaning

# the probability that each character will be next in

# generated text

probas = np.random.multinomial ( 1 , preds, 1 )

 

# Returning a character with the maximum likelihood of being the next character

  # in the generated text

return np.argmax (probas)

b) helper function for generating text after each epoch

c) Helper function for saving the model after every era in which losses are reduced

# Helper function to generate text after each epoch

def on_epoch_end (epoch, logs):

print ()

print ( `----- Generating text after Epoch:% d` % epoch)

 

# Choosing a random starting index to generate text

  start_index = random.randint ( 0 , len (text) - max_length - 1 )

 

  # Sample for different diversity values ​​

  for diversity in [ 0.2 , 0.5 , 1.0 , 1.2 ]:

  print ( `---- - diversity: ` , diversity)

  

generated = ``

 

# Seed sentence

sentence = text [start_index: start_index + max_length]

 

generated + = sentence

print ( `----- Generating with seed: "` + sentence + `" ` )

  sys.stdout.write (generated)

 

for i in range ( 400 ):

# Initializing the prediction vector

  x_pred = np.zeros (( 1 , max_length, len (vocabulary)))

 

for t, char in enumerate (sentence):

  x_pred [ 0 , t, char_to_indices [char]] = 1.

 

  # Generate predictions for the next character

  preds = model.predict (x_pred, verbose = 0 ) [ 0 ]

  

  # Get the index of the most likely next character

next_index = sample_index (preds, diversity)

 

# Get the most likely next character using the constructed display

next_char = indices_to_char [next_index]

 

# Create generated text

  generated + = next_char

sentence = sentence [ 1 :] + next_char

 

sys.stdout. write (next_char)

sys.stdout.flush ()

print ()

 
# Define a custom callback function for
# describe internal network states

print_callback = LambdaCallback (on_epoch_end = on_epoch_end)

# Define a helper function to save the model after each epoch
# which reduces loss

filepath = "weights.hdf5"

checkpoint = ModelCheckpoint (filepath, monitor = `loss` ,

verbose = 1 , save_best_only = True ,

  mode = `min` )

d) Auxiliary function to reduce the learning rate every time learning plateau

# Define a helper function to decrease the learning rate every time
# learning plateaus

reduce_alpha = ReduceLROnPlateau (monitor = ` loss` , factor = 0.2 ,

patience = 1 , min_lr = 0.001 )

callbacks = [print_callback, checkpoint, reduce_alpha]

Step 7: Train the GRU model

# Train the GRU model

model.fit (X, y, batch_size = 128 , epochs = 30 , callbacks = callbacks)

Step 8: Generate new and random text

def generate_text (length, diversity ):

# Get a random starting text

start_index = random.randint ( 0 , len (text) - max_length - 1 )

 

# Generated text definition

  generated = ` `

sentence = text [start_index: start_index + max_length]

generated + = sentence

 

# Generate new text of a given length

for i in range (length) :

 

  # Initialize the prediction vector

x_pred = np.zeros (( 1 , max_length, len (vocabulary)))

for t, char in enumerate (sentence):

x_pred [ 0 , t, char_to_indices [char]] = 1.

 

# Making forecasts

preds = model.predict (x_pred, verbose = 0 ) [ 0 ]

 

# Get the index of the next most likely index

next_index = sample_index (preds, diversity)

 

# Get the most likely next character using the rendered mapping

next_char = indices_to_char [next_index]

 

# Generate new text

code class = "undefined spaces">  x_pred = np.zeros ( ( 1 , max_length, len (vocabulary)))

for t, char in enumerate (sentence):

x_pred [ 0 , t, char_to_indices [char]] = 1.

 

# Making forecasts

preds = model.predict (x_pred, verbose =