-
Notifications
You must be signed in to change notification settings - Fork 0
/
integrated_gradients.py
78 lines (61 loc) · 2.84 KB
/
integrated_gradients.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import tensorflow as tf
import numpy as np
def get_sub_models(model_pretrained,model_embedding):
model_pre = model_pretrained.model
model_con = tf.keras.Model(inputs=model_pre.inputs, outputs=model_pre.get_layer('token_pos_embedding').output)
model_emb = model_embedding.model
layers = model_pre.layers
for layer in layers:
try:
model_emb.get_layer(layer.name)
except:
continue
model_emb.get_layer(layer.name).set_weights(model_pre.get_layer(layer.name).get_weights())
return model_pre, model_con, model_emb
def compute_gradients_model(model,data_points, target_class_idx):
with tf.GradientTape() as tape:
data_points_tensor = tf.convert_to_tensor(data_points,dtype=tf.float32)
tape.watch(data_points_tensor)
probs = model(data_points_tensor)[:,target_class_idx] #tf.nn.softmax(logits, axis=-1)[:, target_class_idx]
return tape.gradient(probs, data_points_tensor).numpy()
def integral_approximation(gradients):
# riemann_trapezoidal
grads = (gradients[:-1] + gradients[1:]) / tf.constant(2.0)
integrated_gradients = tf.math.reduce_mean(grads, axis=0)
return integrated_gradients
def interpolate_embeddings(example,
alphas,
embedding_size=128,
baseline_method='zeros'):
if baseline_method == 'zeros':
baseline = tf.identity(example).numpy()
baseline[:,0:embedding_size] = 0
else:
print('not implemented')
return None
alphas_x = alphas[:, tf.newaxis, tf.newaxis]
baseline_x = baseline
input_x = example
delta = input_x - baseline_x
embeddings = baseline_x + alphas_x * delta
return embeddings
def get_attributions(orig_data, model_con, model_emb,alphas,
embedding_size=128, baseline_method='zeros'):
example = model_con(orig_data)
inter_embeddings = interpolate_embeddings(example.numpy(),alphas,
embedding_size=embedding_size,
baseline_method=baseline_method)
num_classes = int(model_emb.output.shape[1])
out_matrix = np.zeros([num_classes,example.shape[1],example.shape[2]])
for i in range(num_classes):
gradients = compute_gradients_model(model_emb,inter_embeddings,i)
ig = integral_approximation(gradients=gradients).numpy()
out_matrix[i] = ig
return out_matrix
def min_max_scale_ig(values,use_max=1,use_min=0):
# min max scaling
min_val = np.min(values)
max_val = np.max(values)
abs_sums_std = (values - min_val) / (max_val - min_val)
abs_sums_scaled = abs_sums_std * (use_max - use_min) + use_min
return abs_sums_scaled