-
Notifications
You must be signed in to change notification settings - Fork 0
/
predictive_RNN.py
248 lines (220 loc) · 10.2 KB
/
predictive_RNN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
import sys
import os
import numpy as np
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Activation, SimpleRNN, GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
import tensorflow.keras.utils as utils
import tensorflow.keras.backend as K
import pickle
# python predictive_RNN.py [train/test] [model] [layer_size] [test_slice]
# Constants for all models.
INPUT_FILE = 'cumlen20_long.txt' # 512*170
CORPUS = 'LEQ_20_LONG'
TEST_INPUT = './eval_data/LRD_leq_20.txt' # 512*17
EXPERIMENT = 'LRD'
SEQ_LENGTH = 1
EPOCHS = 50
BATCH = 512
VALIDATION = 0.00 # Running training without validation.
MODE = sys.argv[1]
NETWORK = sys.argv[2]
LAYER_SIZE = int(sys.argv[3])
if MODE == 'test':
TEST_SLICE = int(sys.argv[4])
else:
TEST_SLICE = 0
# Various callbacks for later model training/evaluation.
# Ran into an odd bug where tf could not save files when the path was specified with '\dir\dir\', using '\\' instead of '\' works.
# Directory where the checkpoints will be saved.
checkpoint_dir = '.\\saved_models\\' + CORPUS + '\\' + str(NETWORK) + '\\' + str(LAYER_SIZE) + '\\'
# Name of the checkpoint files.
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch:02d}-{loss:.4f}-{ignore_acc:.4f}-{categorical_accuracy:.4f}")
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix, save_weights_only=True)
#stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.0001, patience=3)
# Define the Keras TensorBoard callback.
logdir = '.\\logs\\fit\\' + CORPUS + '\\' + str(NETWORK) + '\\' + str(LAYER_SIZE) + '\\'
tensorboard = TensorBoard(log_dir=logdir)
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)
# Preparing training/validation data.
file = open(INPUT_FILE, encoding = 'utf8')
raw_text = file.read()
chars = sorted(list(set(raw_text)))
print(chars) # Sanity check 1.
text_length = len(raw_text)
char_length = len(chars)
VOCABULARY = char_length
print("Text length = " + str(text_length)) # Sanity check 2.
print("No. of characters = " + str(char_length)) # Sanity check 3.
# Translating characters to integers to input into model and vice-versa to interpret model output.
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = np.array(chars)
print(char_to_int) # Sanity check 4.
input_strings = []
output_strings = []
file.close()
def ignore_acc(y_true, y_pred):
"""Custom accuracy calculation, based on the reasoning in Bernardy (2018). Only evaluates predictions on closing brackets.
"""
# Find class index.
y_true_class = K.argmax(y_true, axis=-1)
y_pred_class = K.argmax(y_pred, axis=-1)
# Mask both opening brackets by comparing found class index to character indices.
ignore_square = K.cast(K.not_equal(y_pred_class, char_to_int['[']), 'int32')
ignore_curl = K.cast(K.not_equal(y_pred_class, char_to_int['{']), 'int32')
ignore_EOS = K.cast(K.not_equal(y_pred_class, char_to_int['$']), 'int32')
ignore_mask = ignore_square * ignore_curl * ignore_EOS
# Only evaluate accuracy on unmasked characters.
matches = K.cast(K.equal(y_true_class, y_pred_class), 'int32') * ignore_mask
accuracy = K.sum(matches) / K.maximum(K.sum(ignore_mask), 1)
return accuracy
def buildmodel(vocabulary, layer_size, network):
"""Builds a neural network model with one hidden layer.
Args:
vocabulary: Size of the vocabulary the model is trained upon. Determines number of neurons in the Dense output layer.
layer_size: Number of neurons in the hidden layer.
network: Determines the type of the hidden layer. Either LSTM, GRU or Simple RNN.
Returns:
model - Keras model with one hidden layer of the specified parameters.
"""
model = Sequential()
if network == 'LSTM':
model.add(LSTM(layer_size, input_shape = (SEQ_LENGTH, 1), return_sequences = False))
elif network == 'GRU':
model.add(GRU(layer_size, input_shape = (SEQ_LENGTH, 1), return_sequences = False))
elif network == 'SRNN':
model.add(SimpleRNN(layer_size, input_shape = (SEQ_LENGTH, 1), return_sequences = False))
else:
"Could not build model. Provide a valid network argument in the command line.\n\tpython predictive_RNN.py [train/test] [LSTM/SRNN/GRU]"
return 0
model.add(Dense(vocabulary, activation = 'softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = tf.optimizers.Adam(learning_rate=0.0001), metrics = [ignore_acc, 'categorical_accuracy'])
model.summary()
return model
def evaluate(word, model, vocabulary):
"""Evaluate model accuracy according to Bernardy (2018).
Prediction of the model is evaluated for each index in the input word if and only if word[i] is a closing bracket.
Args:
word: Input word, string.
model: Model to be evaluated, tf.keras.model.
vocabulary: Size of the training corpus vocabulary.
Returns:
accuracy_by_index: List of the accuracy measurements per character.
predictions_by_index: List of the full output of the RNN per character.
"""
correct_preds = 0
closing_brackets = 0
accuracy_by_index = []
predictions_by_index = []
for i in range(len(word)-1): # Not accounting for EOW marker - irrelevant for scoring accuracy.
# Transform char_to_int input into training-shaped input.
X = np.reshape(word[i], (1, SEQ_LENGTH, 1))
Y_pred = model.predict(X/float(vocabulary))
predictions_by_index.append(Y_pred)
index = np.argmax(Y_pred)
if word[i+1] == char_to_int['}'] or word[i+1] == char_to_int[']']:
closing_brackets += 1
if index == word[i+1]:
correct_preds += 1
if closing_brackets:
accuracy = correct_preds/float(closing_brackets)
else:
# Accuracy via definitionem can't be lower than 1.0 before encountering a closing bracket.
accuracy = 1.
accuracy_by_index.append(accuracy)
return accuracy_by_index, predictions_by_index
def corpus_eval(words, splits):
"""Calculates accuracy on the chunked test corpus.
Args:
words: Preprocessed test corpus.
splits: Maximum number of splits.
Returns:
avg_accuracy_by_index: List containing accuracy by index, averaged over every word in the corpus.
accuracy_by_index: List containing seperate accuracy by index for every word in the corpus.
"""
# Evaluation is done by splitting the testing corpus into 169 pieces with 51 words each + a final piece for all leftover words, evaluating on those and then taking the average from all the shorter sets.
# This is because otherwise, repeatedly calling model.predict() caused
# a memory leak on my build.
# https://github.com/keras-team/keras/issues/13118
# K.clear_session() + reloading the model for
# a continuous evaluation resulted in a slower, but still significant leak.
pickle_dir = './pickle/' + str(NETWORK) + '/' + str(LAYER_SIZE) + '/' + CORPUS + '/' + EXPERIMENT + '/'
size = len(words)
chunk = int(size/splits)
corpus_accuracy_by_index = []
c = 0 # Solely for output.
# Set start and end of current chunk being evaluated.
if TEST_SLICE == 170:
start = 51*170
end = len(words)
else:
start = TEST_SLICE * chunk
end = start + chunk
predictions_by_word = []
print(" ======= SPLIT {} ======= ".format(TEST_SLICE))
for word in words[start:end]:
c += 1
result, predictions = evaluate(word, model, VOCABULARY)
corpus_accuracy_by_index.append(result)
predictions_by_word.append(predictions)
if c % 10 == 0:
print("{}/{}".format(c, chunk))
# Save chunk results.
pickle.dump(corpus_accuracy_by_index, open(pickle_dir + "corpus_accuracy_p" + str(TEST_SLICE) + ".p", "wb"))
pickle.dump(predictions_by_word, open(pickle_dir + "predictions_by_word_p" + str(TEST_SLICE) + ".p", "wb"))
corpus_accuracy_by_index = []
for i in range(splits+1):
corpus_accuracy = pickle.load(open(pickle_dir + "corpus_accuracy_p" + str(i) + ".p", "rb"))
corpus_accuracy_by_index = corpus_accuracy_by_index + corpus_accuracy
# Accuracy by index over the entire corpus is average of accuracy at each index through the corpus.
avg_accuracy_by_index = [float(sum(i))/size for i in zip(*corpus_accuracy_by_index)]
return [avg_accuracy_by_index, corpus_accuracy_by_index]
if MODE == 'train':
# Prepare input (X) and target (Y) data.
for i in range(len(raw_text) - SEQ_LENGTH):
X_text = raw_text[i:i + SEQ_LENGTH]
X = [char_to_int[char] for char in X_text]
input_strings.append(X)
Y = raw_text[i + SEQ_LENGTH]
output_strings.append(char_to_int[Y])
input_strings = np.array(input_strings)
input_strings = np.reshape(input_strings, (input_strings.shape[0], input_strings.shape[1], 1))
input_strings = input_strings/float(VOCABULARY) # active for main training
output_strings = np.array(output_strings)
output_strings = utils.to_categorical(output_strings)
print(input_strings.shape)
print(output_strings.shape)
# Create and train the model.
model = buildmodel(VOCABULARY, LAYER_SIZE, NETWORK)
history = model.fit(input_strings, output_strings, validation_split = VALIDATION, epochs = EPOCHS, batch_size = BATCH, callbacks = [checkpoint_callback])
elif MODE == 'test':
# Prepare corpus to evaluate on.
words = []
infile = open(TEST_INPUT, 'r')
for line in infile:
raw_words = line.split('$')
for raw_word in raw_words:
raw_word = raw_word + '$'
words.append([char_to_int[char] for char in raw_word])
words = words[:-1]
evaluated_words = len(words)
print("Words in test corpus: {}".format(evaluated_words))
# Build the model based off of the latest checkpoint.
model = buildmodel(VOCABULARY, LAYER_SIZE, NETWORK)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.compile(loss = 'categorical_crossentropy', metrics = [ignore_acc, 'categorical_accuracy'])
model.summary()
avg_accuracy_by_index, accuracy_by_index = corpus_eval(words, 170)
if TEST_SLICE == 170: # Final slice of test corpus processed.
out_avg = './eval/' + str(NETWORK) + '/' + str(LAYER_SIZE) + '/' + CORPUS + '/' + EXPERIMENT + '/' + 'avg_acc_by_index.p'
pickle.dump(avg_accuracy_by_index, open(out_avg, "wb"))
out_acc = './eval/' + str(NETWORK) + '/' + str(LAYER_SIZE) + '/' + CORPUS + '/' + EXPERIMENT + '/' + 'acc_by_index.p'
pickle.dump(accuracy_by_index, open(out_acc, "wb"))
print(avg_accuracy_by_index)
else:
print("Please specify a mode - train or test.")