-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathutils.py
54 lines (42 loc) · 1.94 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# coding: utf-8
import torch
from torch.autograd import Variable
from torch.nn.parameter import Parameter
import torch.nn.functional as F
import torch.nn as nn
from torch.nn.init import uniform, normal
SYM_PAD = 0
SYM_GO = 1
SYM_EOS = 2
SYM_UNK = 3
class ApproximateEmbeddingLayer(nn.Module):
r"""
接收lstm的输出h_i, (batch_size, hid_size) * (hid_size, vocab_size)的Wp权重矩阵,-> (batch_size, vocab_size)
经过归一化:softmax( (h_i + z_i)*W_p + b_p)之后,得到(batch_size, vocab_size) * (vocab_size, emb_size)再和word embeddings相乘,
得到(batch_size, emb_size)即为当前时刻得到的approximate embeddings
"""
def __init__(self, hidden_dim, vocab_size):
super(ApproximateEmbeddingLayer, self).__init__()
self.hidden_dim = hidden_dim
self.vocab_size = vocab_size
self.out = nn.Linear(hidden_dim, vocab_size)
def reset_parameters(self):
normal(self.weight.data)
normal(self.noise.data)
normal(self.bias.data)
def forward(self, inputs, embeddings):
# input shape = (batch_size, hid_size)
noise = Variable(torch.rand(inputs.size()).normal_(0., 0.1), requires_grad=False).cuda() # h_i = (batch_size, hid_dim)每行对应相加
#noise = Variable(torch.zeros(inputs.size()), requires_grad=False).cuda() # h_i = (batch_size, hid_dim)每行对应相加
score = self.out(inputs+noise)
normalized_weights = F.log_softmax(score)
approximate_embeddings = torch.mm(F.softmax(score), embeddings.weight) # 得到(batch_size, emb_size)
return (normalized_weights, approximate_embeddings)
def __repr__(self):
s = '{name}({hidden_dim}, {vocab_size}'
if self.hidden_dim is not None:
s += ', hidden_dim={hidden_dim}'
if self.vocab_size is not None:
s += ', vocab_size={vocab_size}'
s += ')'
return s.format(name=self.__class__.__name__, **self.__dict__)