-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtoyMlp.py
156 lines (118 loc) · 5.34 KB
/
toyMlp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# Perceptron to learn identification of hand-written numbers
# based on MNIST dataset
# CS545
import numpy as np
from tensorflow import keras
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import itertools as itertools
# number of epochs to run
epochs = 100
# batch size
batch_size = 2
# momentum
alpha = 0.9
# number of nodes (bias node + 28^2)
num_input_nodes = 3
# number of hidden nodes
num_hidden_nodes = 2
# learning rate
eta = 0.1
# number of perceptrons, one per digit
digits = 2
# array to plot accuracy
accuracy = np.zeros((epochs + 1), dtype=float)
test_accuracy = np.zeros((epochs + 1), dtype=float)
# save the training and testing data sets in to arrays
# x and t are for training, x_test and y_test are for testing
x = [[1, 1, 0], [1, 1, 1], [1, 0, 0], [1, 0, 1]]
t = [1, 1, 0, 1]
# max number of times to run through the training data/test data
num_train_examples = 4
num_test_examples = 0
# set starting input_weights randomly
# input_weights is [num_hidden_nodes, 785]
# hidden_weights is [10, num_hidden_nodes]
# # input_weights = np.random.uniform(low=-0.05, high=0.05, size=(num_hidden_nodes, num_input_nodes))
# hidden_weights = np.random.uniform(low=-0.05, high=0.05, size=(10, num_hidden_nodes))
input_weights = np.array([[0.5, 0.1, -0.5], [-0.2, 0.3, 0.4]])
hidden_weights = np.array([[-0.2, 0.1], [-0.1, -0.5]])
# store all predictions for the confusion matrix
test_predictions = np.zeros(num_test_examples)
epoch = 0
# Continue while
# epochs is less than some set number
while epoch < epochs:
i = 0
correct = 0
# Go through all the training data
while i < num_train_examples:
# store error for each training example in a matrix holding
# all error for the batch
output_error = np.zeros(digits)
hidden_error = np.zeros(num_hidden_nodes)
# The dot product multiplies each pixel value with the weight for its node and sums these values.
# hidden_dot_products is [num_hidden_nodes, 1]
# hidden_activation is [num_hidden_nodes, 1]
hidden_dot_products = np.matmul(x[i], input_weights.transpose())
# print(hidden_dot_products)
hidden_activation = 1/(1 + np.exp(-hidden_dot_products))
# print(hidden_activation)
# do the same on the hidden layer
# output_dot_products is [10, 1]
# output_activation is [10, 1]
output_dot_products = np.matmul(hidden_activation, hidden_weights.transpose())
# print(output_dot_products)
output_activation = 1/(1 + np.exp(-output_dot_products))
# print(output_activation)
# the max of the activations is the picked number
picked = np.argmax(output_activation)
# print(picked)
# print(t[i])
# print("\n\ni: ", i, "\noutput dot products: ", output_dot_products,
# "\nmax of dot products: ", np.argmax(output_dot_products), " picked: ", picked, "target: ", t[i])
if picked == t[i]:
correct += 1
# get an array as it should be to compare with output_activation
# y_target is [1, 10]
y_target = np.full(2, 0.1)
y_target[t[i]] = 0.9
# print(y_target)
# compute and store error
# output_error is [1, 2]
# hidden_error is [1, num_hidden_nodes]
diff1 = y_target - output_activation
# print(diff1)
diff2 = 1 - output_activation
# print(diff2)
# print(np.matmul(diff2, diff1))
# print(np.multiply(output_activation, np.multiply(1 - output_activation, y_target - output_activation)))
output_error = np.multiply(output_activation, np.multiply(1 - output_activation, y_target - output_activation))
# print(output_error)
sum = np.matmul(output_error, hidden_weights)
hidden_error = np.multiply(hidden_activation, np.multiply(1 - hidden_activation, sum))
# print(hidden_activation)
# print(1 - hidden_activation)
# print(sum)
# print(np.multiply(1 - hidden_activation, sum))
# update the weights
# diff is [10, 785]
print("i: ", i, "\n")
h_to_o_delta_w = eta * np.matmul(np.reshape(output_error, (digits, 1)), np.reshape(hidden_activation, (1, num_hidden_nodes)))
hidden_weights -= h_to_o_delta_w
print("h to o delta w: ", h_to_o_delta_w, "\n")
i_to_h_delta_w = eta * np.matmul(np.reshape(hidden_error, (num_hidden_nodes, 1)), np.reshape(x[i], (1, num_input_nodes)))
input_weights -= i_to_h_delta_w
print("i to h delta w: ", i_to_h_delta_w, "\n\n\n")
# print("after updating: \nhidden_weights: ", hidden_weights, "input weights", input_weights, "\n")
i += 1
accuracy[epoch] = correct / num_train_examples
# run the perceptron on the test data
# test_accuracy[epoch] = run_test(num_test_examples,einput_weights, x_test, test_predictions, t_test)
epoch += 1
print(accuracy)
# target_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
# cm = confusion_matrix(t_test, test_predictions)
# plot_confusion_matrix(cm, target_names)
# disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=target_names)
# if you want you can disp.plot() here