Generating text using a short-term memory recurrent network



Data for the described procedure was downloaded from Kaggle . This dataset contains articles published in the New York Times from April 2017 to April 2018. Divided according to the month of publication. The dataset is in the form of a .csv file that contains the URL of the published article along with other details. Any random URL was chosen for the tutorial, and then when visiting that URL, the text was copied into a text file and that text file was used for the tutorial.

Step 1: Import the required libraries

from __ future__ import absolute_import, division,

print_function, unicode_literals

 

import numpy as np

import tensorflow as tf

 

from keras.models import Sequential

from keras.layers import Dense, Activation

from keras.layers import LSTM

 

from keras.optimizers import RMSprop

 

from keras.callbacks import LambdaCallback

from keras.callbacks import ModelCheckpoint

from keras.callbacks import ReduceLROnPlateau

import random

import sys

Step 2: Load data into string

# Change workplace to text file location
cd C: UsersDevDesktopKaggleNew York Times

 
# Read a text file into a string

with open ( ` article1.txt` , `r` ) as file :

text = file . read ()

  
# Preview text file

print (text)

Step 3: Create a mapping of each unique character in the text to a unique number

# Storing all unique characters in the text

vocabulary = sorted ( list ( set (text)))

 
# Create dictionaries to map each character to an index

char_to_indices = dict ((c, i) for i, c in enumerate (vocabulary))

indices_to_char = dict ((i, c) for i, c in enumerate (vocabulary))

 

print (vocabulary)

Step 4: Data preprocessing

# Dividing the text into subsequences of max_length
# So that characters follow at each time step max_length
# submitted to the network

max_length = 100

steps = 5

sentences = []

next_chars = []

for i in range ( 0 , len (text) - max_length, steps):

sentences.append (text [i: i + max_length])

next_chars.append (text [i + max_length])

 
# Hot encoding each character into a boolean vector

X = np.zeros (( len (sentences), max_length, len (vocabulary)), dtype = np. bool )

y = np.zeros (( len (sentences), len (vocabulary)), dtype = np. bool )

for i, sentence in enumerate (sentences):

for t, char in en umerate (sentence):

X [i, t, char_to_indices [char]] = 1

y [i, char_to_indices [next_chars [i]]] = 1

Step 5: LSTM Networking

# Building the LSTM network for the task

model = Sequential ()

model. add (LSTM ( 128 , input_shape = (max_length, len (vocabulary))))

model.add (Dense ( len (vocabulary)))

model.add (Activation ( `softmax` ))

optimizer = RMSprop (lr = 0.01 )

model. compile (loss = `categorical_crossentropy` , optimizer = optimizer)

Step 6: Define some helper functions to be used during network training

Note that the first two functions and the ones below were obtained from the documentation of the official text generation example from the Keras team .

a) Helper function for fetching the next character:

b) helper function to generate text after each epoch

# Helper function for fetching an index from an array of probabilities

def sample_index (preds, temperature = 1.0 ):

preds = np.asarray (preds) .astype ( `float64` )

  preds = np.log (preds) / temperature

exp_preds = np.exp (preds)

preds = exp_preds / np. sum (exp_preds)

  probas = np.random.multinomial ( 1 , preds, 1 )

return np.argmax (probas)

# Helper function to generate text after each epoch

def on_epoch_end (epoch, logs):

print ()

  print ( `----- Generating text after Epoch:% d` % epoch)

 

start_index = random.randint ( 0 , len (text) - max_length - 1 )

for diversity in [ 0.2 , 0.5 , 1.0 , 1.2 ]:

print ( ` ----- diversity: ` , diversity)

 

generated = ``

sentence = text [start_index: start_index + max_length]

generated + = sentence

  print ( ` ----- Generating with seed: "` + sentence + `" ` )

  sys.stdout.write (generated)

  

  for i in range ( 400 ):

x_pred = np.zeros (( 1 , max_length, len (vocabulary)))

for t, char in enumerate (sentence):

x_pred [ 0 , t, char_to_indices [char]] = 1.

 

preds = model.predict (x_pred, verbose = 0 ) [ 0 ]

  next_index = sample_index (preds, diversity)

next_char = indices_to_char [next_index]

  

  generated + = next_char

sentence = sentence [ 1 :] + next_char

 

  sys.stdout.write (next_char)

  sys.stdout.flush ()

print ()

print_callback = LambdaCallback (on_epoch_end = on_epoch_end)

c) Helper function to save the model after every epoch in which losses decrease

# Define a helper function to save the model after each epoch
# which reduces loss

filepath = " weights.hdf5 "

checkpoint = ModelCheckpoint (filepath, monitor = ` loss` ,

verbose = 1 , save_best_only = True ,

mode = `min` )

d) Helper function to reduce learning rate every time learning plateau

# Define add a helper function to reduce the learning rate every time
# learning plateaus

reduce_alpha = ReduceLROnPlateau (monitor = `loss` , factor = 0.2 ,

patience = 1 , min_lr = 0.001 )

callbacks = [print_callback, checkpoint, reduce_alpha]

Step 7: Train the LSTM

Step 8 : Generate new and random text

# Train LSTM model

model.fit (X, y, batch_size = 128 , epochs = 500 , callbacks = callbacks)

# Define a utility function to generate new and random text based on
# teachings network

def generate_text (length, diversity):

# Get a random starting text

start_index = random.randint ( 0 , len (text) - max_length - 1 )

generated = ``

sentence = text [start_index: start_index + max_length]

  generated + = sentence

for i in range (length):

  x_pred = np.zeros (( 1 , max_length, len ( vocabulary)))

for t , char in enumerate (sentence):

x_pred [ 0 , t, char_to_indic es [char]] = 1.

 

  preds = model.predict (x_pred, verbose = 0 ) [ 0 ]

next_index = sample_index (preds, diversity)

next_char = indices_to_char [next_index]

 

generated + = next_char

sentence = sentence [ 1 :] + next_char

return generated

 

print (generate_text ( 500 , 0.2 ))