my_answers.py

import numpy as np

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import keras


# TODO: fill out the function below that transforms the input series 
# and window-size into a set of input/output pairs for use with our RNN model
def window_transform_series(series, window_size):
    # containers for input/output pairs
    X = []
    y = []
    
    for i in range(len(series)-window_size):
        X.append(series[i:i+window_size])
        y.append(series[i+window_size])

    # reshape each 
    X = np.asarray(X)
    X.shape = (np.shape(X)[0:2])
    y = np.asarray(y)
    y.shape = (len(y),1)

    return X,y

# TODO: build an RNN to perform regression on our time series input/output data
def build_part1_RNN(window_size):
    """
    layer 1 uses an LSTM module with 5 hidden units (note here the input_shape = (window_size,1))
    layer 2 uses a fully connected module with one unit
    """
    model = Sequential()
    model.add(LSTM(5, input_shape=(window_size,1)))
    model.add(Dense(1))
    model.summary()
    return model

### TODO: return the text input with only ascii lowercase and the punctuation given below included.
def cleaned_text(text):
    punctuation = ['!', ',', '.', ':', ';', '?']
    ascii_lowercase = ['a', 'b', 'c', 'd', 'e', 'f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
    spaces = [' ']
    clean_characters = punctuation + ascii_lowercase + spaces
    clean_text = (i for i in text if i in clean_characters)
    
    return ''.join(clean_text)

### TODO: fill out the function below that transforms the input text and window-size into a set of input/output pairs for use with our RNN model
def window_transform_text(text, window_size, step_size):
    # containers for input/output pairs
    inputs = []
    outputs = []
    
    slide_number = len(text)-window_size
    for i in range(0,slide_number,step_size):
        inputs.append(text[i:i+window_size])
        outputs.append(text[i+window_size])

    return inputs,outputs

# TODO build the required RNN model: 
# a single LSTM hidden layer with softmax activation, categorical_crossentropy loss 
def build_part2_RNN(window_size, num_chars):
    """
    layer 1 should be an LSTM module with 200 hidden units
        --> note this should have input_shape = (window_size,len(chars)) where len(chars) 
            = number of unique characters in your cleaned text
    layer 2 should be a linear module, fully connected, with len(chars) hidden units 
        --> where len(chars) = number of unique characters in
            your cleaned text
    layer 3 should be a softmax activation (since we are solving a multiclass classification)
    """
    model = Sequential()
    model.add(LSTM(200, input_shape=(window_size, num_chars)))
    model.add(Dense(num_chars, activation = 'softmax'))
    model.summary()
    return model