forked from Sarasra/models
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathseq2seq_lib.py
137 lines (118 loc) · 5.53 KB
/
seq2seq_lib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""seq2seq library codes copied from elsewhere for customization."""
import tensorflow as tf
# Adapted to support sampled_softmax loss function, which accepts activations
# instead of logits.
def sequence_loss_by_example(inputs, targets, weights, loss_function,
average_across_timesteps=True, name=None):
"""Sampled softmax loss for a sequence of inputs (per example).
Args:
inputs: List of 2D Tensors of shape [batch_size x hid_dim].
targets: List of 1D batch-sized int32 Tensors of the same length as logits.
weights: List of 1D batch-sized float-Tensors of the same length as logits.
loss_function: Sampled softmax function (inputs, labels) -> loss
average_across_timesteps: If set, divide the returned cost by the total
label weight.
name: Optional name for this operation, default: 'sequence_loss_by_example'.
Returns:
1D batch-sized float Tensor: The log-perplexity for each sequence.
Raises:
ValueError: If len(inputs) is different from len(targets) or len(weights).
"""
if len(targets) != len(inputs) or len(weights) != len(inputs):
raise ValueError('Lengths of logits, weights, and targets must be the same '
'%d, %d, %d.' % (len(inputs), len(weights), len(targets)))
with tf.name_scope(values=inputs + targets + weights, name=name,
default_name='sequence_loss_by_example'):
log_perp_list = []
for inp, target, weight in zip(inputs, targets, weights):
crossent = loss_function(inp, target)
log_perp_list.append(crossent * weight)
log_perps = tf.add_n(log_perp_list)
if average_across_timesteps:
total_size = tf.add_n(weights)
total_size += 1e-12 # Just to avoid division by 0 for all-0 weights.
log_perps /= total_size
return log_perps
def sampled_sequence_loss(inputs, targets, weights, loss_function,
average_across_timesteps=True,
average_across_batch=True, name=None):
"""Weighted cross-entropy loss for a sequence of logits, batch-collapsed.
Args:
inputs: List of 2D Tensors of shape [batch_size x hid_dim].
targets: List of 1D batch-sized int32 Tensors of the same length as inputs.
weights: List of 1D batch-sized float-Tensors of the same length as inputs.
loss_function: Sampled softmax function (inputs, labels) -> loss
average_across_timesteps: If set, divide the returned cost by the total
label weight.
average_across_batch: If set, divide the returned cost by the batch size.
name: Optional name for this operation, defaults to 'sequence_loss'.
Returns:
A scalar float Tensor: The average log-perplexity per symbol (weighted).
Raises:
ValueError: If len(inputs) is different from len(targets) or len(weights).
"""
with tf.name_scope(values=inputs + targets + weights, name=name,
default_name='sampled_sequence_loss'):
cost = tf.reduce_sum(sequence_loss_by_example(
inputs, targets, weights, loss_function,
average_across_timesteps=average_across_timesteps))
if average_across_batch:
batch_size = tf.shape(targets[0])[0]
return cost / tf.cast(batch_size, tf.float32)
else:
return cost
def linear(args, output_size, bias, bias_start=0.0, scope=None):
"""Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
Args:
args: a 2D Tensor or a list of 2D, batch x n, Tensors.
output_size: int, second dimension of W[i].
bias: boolean, whether to add a bias term or not.
bias_start: starting value to initialize the bias; 0 by default.
scope: VariableScope for the created subgraph; defaults to "Linear".
Returns:
A 2D Tensor with shape [batch x output_size] equal to
sum_i(args[i] * W[i]), where W[i]s are newly created matrices.
Raises:
ValueError: if some of the arguments has unspecified or wrong shape.
"""
if args is None or (isinstance(args, (list, tuple)) and not args):
raise ValueError('`args` must be specified')
if not isinstance(args, (list, tuple)):
args = [args]
# Calculate the total size of arguments on dimension 1.
total_arg_size = 0
shapes = [a.get_shape().as_list() for a in args]
for shape in shapes:
if len(shape) != 2:
raise ValueError('Linear is expecting 2D arguments: %s' % str(shapes))
if not shape[1]:
raise ValueError('Linear expects shape[1] of arguments: %s' % str(shapes))
else:
total_arg_size += shape[1]
# Now the computation.
with tf.variable_scope(scope or 'Linear'):
matrix = tf.get_variable('Matrix', [total_arg_size, output_size])
if len(args) == 1:
res = tf.matmul(args[0], matrix)
else:
res = tf.matmul(tf.concat(axis=1, values=args), matrix)
if not bias:
return res
bias_term = tf.get_variable(
'Bias', [output_size],
initializer=tf.constant_initializer(bias_start))
return res + bias_term