This repository was archived by the owner on Mar 30, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneuralnet.py
122 lines (84 loc) · 2.48 KB
/
neuralnet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from gensim.models.word2vec import *
import numpy as np
import re
# hyperparams
max_len = 25
word2vec_size = 300
input_size = max_len * word2vec_size * 2
epochs = 100
learning_rate = 0.0000005
def process_def(definition):
if len(definition) == 1:
return definition
definition = re.sub("[^a-zA-Z\s]", " ", definition).split()
definition = [a for a in definition if len(a) > 2]
def_arr = np.array(definition)
word_count = len(def_arr)
stretch = ([max_len/word_count + 1] * (max_len % word_count))
stretch.extend([max_len/word_count] * (word_count - max_len % word_count))
def_arr = np.repeat(def_arr, stretch)
return def_arr
def getwv(word):
try:
return model.wv[word]
except KeyError:
return None
print('-- READING DATA --')
# read in defs
with open("definitions.txt") as f:
data = f.readlines()
# remove \n at end
data = [process_def(x.strip()) for x in data]
# reorganize into [[def 1, def 2, 1/0], [def 1, def 2, 1/0], ...]
data = np.reshape(data, (-1, 3)).T
x = data[:2].T
y = np.array([map(int, data[2:][0])])
model = Word2Vec.load("word2vec model/trained/trained.w2v")
x = [[[getwv(word) for word in defn] for defn in a] for a in x]
x = np.array(x)
# init weights and the bias
w = np.zeros((1, input_size))
b = 0
def sigmoid(f_a):
return 1 / (1 + np.exp(-f_a))
print('-- STARTING TRAINING --')
for i in range(epochs):
# init loss
j = 0
print(x)
print(w)
# multiply inputs by weights and and bias
z = x.dot(w.transpose()) + b
# take sigmoid
a = sigmoid(z)
# calculate the loss
j += -np.sum(np.dot(y, np.log(a)) + np.dot(1 - y, np.log(1 - a))) / len(x)
# derivative of z
dz = a - y
# update w and b
w -= learning_rate * (np.sum(np.dot(dz, x), axis=0) / len(x))
b -= learning_rate * np.sum(dz) / len(x)
# print(loss)
print("\r" + str(i) + "/" + str(epochs) +
" [" + ("#" * int((i * 10) / epochs)) + ("." * int(10 - ((i * 10) / epochs))) +
"] Loss: " + str(j))
print()
print('-- STARTING TESTING --')
print()
# repeat above steps for test data
test_x = x
test_y = y
z = test_x.dot(w.transpose()) + b
a = sigmoid(z)
print(a)
print()
print('Author Inputted Definitions')
print()
def1 = "cats"
def2 = "being guided by what is right"
test_x = [[process_def(def1)], [process_def(def2)]]
test_x = [[getwv(word) for word in a] for a in test_x]
test_x = np.append(test_x[0], test_x[1])
z = test_x.dot(w.transpose()) + b
a = sigmoid(z)
print(a)