-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodels.py
92 lines (76 loc) · 3.73 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import torch
from torch import nn, optim
class SingleInputLSTMModel(nn.Module):
def __init__(self, vocab_size, arch):
super(SingleInputLSTMModel, self).__init__()
self.embedding = nn.Embedding(vocab_size + 1, arch['embedding_size'])
self.dropout = nn.Dropout(p=arch['dropout_prob'])
self.lstm1 = nn.LSTM(
arch['embedding_size'],
arch['hidden_units_lstm'],
num_layers=arch['num_layers_lstm'],
bidirectional=False
)
self.dense = nn.Linear(arch['hidden_units_lstm'], vocab_size)
self.log_softmax = nn.LogSoftmax(-1)
self.arch = arch
def init_weights(self):
initrange = self.arch['init_range']
self.lstm1.weight.data.uniform_(-initrange, initrange)
self.dense.bias.data.zero_()
self.dense.weight.data.uniform_(-initrange, initrange)
def init_hidden(self, batch_size):
# tuple of (hidden state, cell state)
return (
torch.zeros(self.arch['num_layers_lstm'], batch_size, self.arch['hidden_units_lstm']).float(),
torch.zeros(self.arch['num_layers_lstm'], batch_size, self.arch['hidden_units_lstm']).float()
)
def forward(self, input, hidden):
embedded = self.embedding(input)
dropped = self.dropout(embedded)
output_seq, hidden_output = self.lstm1(dropped, hidden)
# dense layer, and flatten batches/seq length so we can apply softmax to all output vectors
dense_output = self.dense(output_seq)
dense_output = dense_output.view(-1, dense_output.shape[-1])
# log softmax for smoother training
normalized_output = self.log_softmax(dense_output)
return normalized_output, hidden_output
class DualInputLSTMModel(nn.Module):
def __init__(self, vocab_size1, vocab_size2, arch):
super(DualInputLSTMModel, self).__init__()
self.embedding1 = nn.Embedding(vocab_size1 + 1, arch['embedding_size_1'])
self.embedding2 = nn.Embedding(vocab_size2 + 1, arch['embedding_size_2'])
self.dropout = nn.Dropout(p=arch['dropout_prob'])
self.lstm1 = nn.LSTM(
arch['embedding_size_1'] + arch['embedding_size_2'],
arch['hidden_units_lstm'],
num_layers=arch['num_layers_lstm'],
bidirectional=False
)
self.dense = nn.Linear(arch['hidden_units_lstm'], vocab_size1)
self.log_softmax = nn.LogSoftmax(-1)
self.arch = arch
def init_weights(self):
initrange = self.arch['init_range']
self.lstm1.weight.data.uniform_(-initrange, initrange)
self.dense.bias.data.zero_()
self.dense.weight.data.uniform_(-initrange, initrange)
def init_hidden(self, batch_size):
# tuple of (hidden state, cell state)
return (
torch.zeros(self.arch['num_layers_lstm'], batch_size, self.arch['hidden_units_lstm']).float(),
torch.zeros(self.arch['num_layers_lstm'], batch_size, self.arch['hidden_units_lstm']).float()
)
def forward(self, input, hidden):
input_1, input_2 = torch.split(input, 1, -1)
embedded_1 = self.embedding1(input_1.squeeze(-1))
embedded_2 = self.embedding2(input_2.squeeze(-1))
concatenated = torch.cat([embedded_1, embedded_2], -1)
dropped = self.dropout(concatenated)
output_seq, hidden_output = self.lstm1(dropped, hidden)
# dense layer, and flatten batches/seq length so we can apply softmax to all output vectors
dense_output = self.dense(output_seq)
dense_output = dense_output.view(-1, dense_output.shape[-1])
# log softmax for smoother training
normalized_output = self.log_softmax(dense_output)
return normalized_output, hidden_output