-
Notifications
You must be signed in to change notification settings - Fork 10
/
adversarial_evaluation.py
505 lines (393 loc) · 21.1 KB
/
adversarial_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
""" Code to better evaluate the efficacy of our attacks/models.
AdversarialEvaluation class contains two main things:
1) evaluate_ensemble: convenient wrapper to try an ensemble of
attacks on a single pretrained model and
output the average accuracy of each at the
end (compared to the ground set too)
2) full_attack function: function that attacks each example in a DataLoader
and outputs all attacked examples to a numpy file
[USEFUL FOR GENERATING FILES NEEDED BY MADRY
CHALLENGE]
"""
from __future__ import print_function
import torch
from torch.autograd import Variable
import torch.nn as nn
import utils.pytorch_utils as utils
import utils.image_utils as img_utils
import custom_lpips.custom_dist_model as dm
import os
import config
import glob
import numpy as np
from skimage.measure import compare_ssim as ssim
import adversarial_attacks as aa
import math
import functools
###########################################################################
# #
# EVALUATION RESULT OBJECT #
# #
###########################################################################
class EvaluationResult(object):
""" Stores results of adversarial evaluations, will be used in the
output of AdversarialEvaluation
"""
def __init__(self, attack_params, to_eval=None,
manual_gpu=None):
""" to_eval is a dict of {str : toEval methods}.
"""
self.attack_params = attack_params
self.normalizer = attack_params.adv_attack_obj.normalizer
if manual_gpu is not None:
self.use_gpu = manual_gpu
else:
self.use_gpu = utils.use_gpu()
# First map shorthand strings to methods
shorthand_evals = {'top1': self.top1_accuracy,
'avg_successful_lpips': self.avg_successful_lpips,
'avg_successful_ssim': self.avg_successful_ssim,
'stash_perturbations': self.stash_perturbations,
'avg_loss_value': self.avg_loss_value}
if to_eval is None:
to_eval = {'top1': 'top1'}
to_eval = dict(to_eval.items())
for key, val in list(to_eval.items()):
if val in shorthand_evals:
to_eval[key] = shorthand_evals[val]
else:
assert callable(val)
to_eval[key] = functools.partial(val, self)
self.to_eval = to_eval
self.results = {k: None for k in self.to_eval}
self.params = {k: None for k in self.to_eval}
def set_gpu(self, use_gpu):
self.attack_params.set_gpu(use_gpu)
def eval(self, examples, labels):
attack_out = self.attack_params.attack(examples, labels)
for k, v in self.to_eval.items():
v(k, attack_out)
def _get_successful_attacks(self, attack_out):
''' Gets the (successful, corresponding-original) attacks '''
perturbation = attack_out[4]
return perturbation.collect_successful(self.attack_params.adv_attack_obj.classifier_net,
self.attack_params.adv_attack_obj.normalizer)
def top1_accuracy(self, eval_label, attack_out):
######################################################################
# First set up evaluation result if doesn't exist: #
######################################################################
if self.results[eval_label] is None:
self.results[eval_label] = utils.AverageMeter()
result = self.results[eval_label]
######################################################################
# Computes the top 1 accuracy and updates the averageMeter #
######################################################################
attack_examples = attack_out[0]
pre_adv_labels = attack_out[1]
num_examples = float(attack_examples.shape[0])
attack_accuracy_int = self.attack_params.eval_attack_only(
attack_examples,
pre_adv_labels, topk=1)
result.update(attack_accuracy_int / num_examples, n=int(num_examples))
self.results[eval_label] = result
def avg_successful_lpips(self, eval_label, attack_out):
######################################################################
# First set up evaluation result if doesn't exist: #
######################################################################
if self.results[eval_label] is None:
self.results[eval_label] = utils.AverageMeter()
self.dist_model = dm.DistModel(net='alex', manual_gpu=self.use_gpu)
result = self.results[eval_label]
if self.params[eval_label] is None:
dist_model = dm.DistModel(net='alex', manual_gpu=self.use_gpu)
self.params[eval_label] = {'dist_model': dist_model}
dist_model = self.params[eval_label]['dist_model']
######################################################################
# Compute which attacks were successful #
######################################################################
successful_pert, successful_orig = self._get_successful_attacks(
attack_out)
if successful_pert is None or successful_pert.numel() == 0:
return
successful_pert = Variable(successful_pert)
successful_orig = Variable(successful_orig)
num_successful = successful_pert.shape[0]
xform = lambda im: im * 2.0 - 1.0
lpips_dist = self.dist_model.forward_var(xform(successful_pert),
xform(successful_orig))
avg_lpips_dist = float(torch.mean(lpips_dist))
result.update(avg_lpips_dist, n=num_successful)
def avg_successful_ssim(self, eval_label, attack_out):
# We actually compute (1-ssim) to match better with notion of a 'metric'
######################################################################
# First set up evaluation result if doesn't exist: #
######################################################################
if self.results[eval_label] is None:
self.results[eval_label] = utils.AverageMeter()
result = self.results[eval_label]
######################################################################
# Compute which attacks were successful #
######################################################################
successful_pert, successful_orig = self._get_successful_attacks(
attack_out)
if successful_pert is None or successful_pert.numel() == 0:
return
successful_pert = Variable(successful_pert)
successful_orig = Variable(successful_orig)
count = 0
runsum = 0
for og, adv in zip(successful_orig, successful_pert):
count += 1
runsum += ssim(og.transpose(0, 2).cpu().numpy(),
adv.transpose(0, 2).cpu().numpy(), multichannel=True)
avg_minus_ssim = 1 - (runsum / float(count))
result.update(avg_minus_ssim, n=count)
def avg_loss_value(self, eval_label, attack_out):
""" Computes and keeps track of the average attack loss
"""
######################################################################
# First set up evaluation result if it doesn't exist #
######################################################################
if self.results[eval_label] is None:
self.results[eval_label] = utils.AverageMeter()
result = self.results[eval_label]
######################################################################
# Next collect the loss function and compute loss #
######################################################################
attack_obj = self.attack_params.adv_attack_obj
# Structure of loss objects varies based on which attack class used
if isinstance(attack_obj, (aa.FGSM, aa.PGD)):
attack_loss = attack_obj.loss_fxn
elif isinstance(attack_obj, aa.CarliniWagner):
attack_loss = attack_obj._construct_loss_fxn(1.0, 0.0)
attack_loss.setup_attack_batch(attack_out[0])
loss_val = attack_loss.forward(attack_out[0], attack_out[1],
perturbation=attack_out[4])
loss_val_sum = float(torch.sum(loss_val))
count = attack_out[0].shape[0]
result.update(loss_val_sum, n=count)
def stash_perturbations(self, eval_label, attack_out):
""" This will store the perturbations.
(TODO: make these tensors and store on CPU)
"""
######################################################################
# First set up evaluation result if it doesn't exist #
######################################################################
if self.results[eval_label] is None:
self.results[eval_label] = []
result = self.results[eval_label]
perturbation_obj = attack_out[4]
result.append(perturbation_obj)
class IdentityEvaluation(EvaluationResult):
""" Subclass of evaluation result that just computes top1 accuracy for the
ground truths (attack perturbation is the identity)
Constructor) ARGS:
classifier_net : nn.module - standard argument, is just the nn to eval
normalizer : DifferentiableNormalize - standard normalizing argument
manual_gpu : boolean - if not None, is the boolean we specify to use
the gpu or not
loss_fxn : None or nn.module - if not None is a loss function that takes
in arguments of the shape (NxC), (N), where the second arg has
integer values in 0 <= val <= C - 1. See nn.CrossEntropyLoss()
as an example signature
"""
def __init__(self, classifier_net, normalizer, manual_gpu=None,
loss_fxn=None):
self.classifier_net = classifier_net
self.normalizer = normalizer
if manual_gpu is not None:
self.use_gpu = manual_gpu
else:
self.use_gpu = utils.use_gpu()
self.loss_fxn = loss_fxn or nn.CrossEntropyLoss()
self.results = {'top1': utils.AverageMeter(),
'avg_loss_value': utils.AverageMeter()}
def set_gpu(self, use_gpu):
pass
def eval(self, examples, labels):
assert list(self.results.keys()) == ['top1', 'avg_loss_value']
ground_output = self.classifier_net(self.normalizer(Variable(examples)))
minibatch = float(examples.shape[0])
# Compute accuracy
ground_avg = self.results['top1']
minibatch_accuracy_int = utils.accuracy_int(ground_output,
Variable(labels), topk=1)
ground_avg.update(minibatch_accuracy_int / minibatch,
n=int(minibatch))
# Compute loss
ground_avg_loss = self.results['avg_loss_value']
minibatch_loss = float(self.loss_fxn(ground_output, labels))
ground_avg_loss.update(minibatch_loss, n=(int(minibatch)))
############################################################################
# #
# EVALUATION OBJECT #
# #
############################################################################
class AdversarialEvaluation(object):
""" Wrapper for evaluation of NN's against adversarial examples
"""
def __init__(self, classifier_net, normalizer, manual_gpu=None):
self.classifier_net = classifier_net
self.normalizer = normalizer
if manual_gpu is not None:
self.use_gpu = manual_gpu
else:
self.use_gpu = utils.use_gpu()
def evaluate_ensemble(self, data_loader, attack_ensemble,
skip_ground=False, verbose=True,
num_minibatches=None):
""" Runs evaluation against attacks generated by attack ensemble over
the entire training set
ARGS:
data_loader : torch.utils.data.DataLoader - object that loads the
evaluation data
attack_ensemble : dict {string -> EvaluationResult}
is a dict of attacks that we want to make.
None of the strings can be 'ground'
skip_ground : bool - if True we don't evaluate the no-attack case
verbose : bool - if True, we print things
num_minibatches: int - if not None, we only validate on a fixed
number of minibatches
RETURNS:
a dict same keys as attack_ensemble, as well as the key 'ground'.
The values are utils.AverageMeter objects
"""
######################################################################
# Setup input validations #
######################################################################
self.classifier_net.eval()
assert isinstance(data_loader, torch.utils.data.DataLoader)
if attack_ensemble is None:
attack_ensemble = {}
if not skip_ground:
assert 'ground' not in attack_ensemble
# Build ground result
ground_result = IdentityEvaluation(self.classifier_net,
self.normalizer,
manual_gpu=self.use_gpu)
attack_ensemble['ground'] = ground_result
# Do GPU checks
utils.cuda_assert(self.use_gpu)
if self.use_gpu:
self.classifier_net.cuda()
for eval_result in attack_ensemble.values():
eval_result.set_gpu(self.use_gpu)
######################################################################
# Loop through validation set and attack efficacy #
######################################################################
for i, data in enumerate(data_loader, 0):
if num_minibatches is not None and i >= num_minibatches:
break
if verbose:
print("Starting minibatch %s..." % i)
inputs, labels = data
if self.use_gpu:
inputs = inputs.cuda()
labels = labels.cuda()
for k, result in attack_ensemble.items():
if verbose:
print("\t (mb: %s) evaluating %s..." % (i, k))
result.eval(inputs, labels)
return attack_ensemble
def full_attack(self, data_loader, attack_parameters,
output_filename, num_minibatches=None,
continue_attack=True, checkpoint_minibatch=10,
verbose=True, save_xform=img_utils.nhwc255_xform):
""" Builds an attack on the data and outputs the resulting attacked
images into a .numpy file
ARGS:
data_loader : torch.utils.data.DataLoader - object that loads the
evaluation data.
NOTE: for Madry challenge this shouldn't be shuffled
attack_parameters : AdversarialAttackParameters object - wrapper to
contain the attack
output_filename : string - name of the file we want to output.
should just be the base name (extension is .npy)
num_minibatches : int - if not None, we only build attacks for this
many minibatches of data
continue_attack : bool - if True, we do the following :
1) check if output_filename exists. If it doesn't
exist, proceed to make full attack as usual.
2) if output_filename exists, figure out how many
minibatches it went through and skip to the
next minibatch in the data loader
This is kinda like a checkpointing system for attacks
checkpoint_minibatch: int - how many minibatches until we checkpoint
verbose: bool - if True, we print out which minibatch we're in out
of total number of minibatches
save_xform: fxn, np.ndarray -> np.ndarray - function that
transforms our adv_example.data.numpy() to the form that
we want to store it in in the .npy output file
RETURNS:
numpy array of attacked examples
"""
raise NotImplementedError("BROKEN!!!")
######################################################################
# Setup and assert things #
######################################################################
self.classifier_net.eval()
# Check if loader is shuffled. print warning if random
assert isinstance(data_loader, torch.utils.data.DataLoader)
if isinstance(data_loader.batch_sampler.sampler,
torch.utils.data.sampler.RandomSampler):
print("WARNING: data loader is shuffled!")
total_num_minibatches = int(math.ceil(len(data_loader.dataset) /
data_loader.batch_size))
minibatch_digits = len(str(total_num_minibatches))
# Do cuda stuff
utils.cuda_assert(self.use_gpu)
attack_parameters.set_gpu(self.use_gpu)
if self.use_gpu:
self.classifier_net.cuda()
# Check attack is attacking everything
assert attack_parameters.proportion_attacked == 1.0
# handle output_file + continue_attack stuff
assert os.path.basename(output_filename) == output_filename, \
"Provided output_filename was %s, should have been %s" % \
(output_filename, os.path.basename(output_filename))
output_file = os.path.join(config.OUTPUT_IMAGE_PATH,
output_filename + '.npy')
minibatch_attacks = [] # list of 4d numpy arrays
num_prev_minibatches = 0
if continue_attack and len(glob.glob(output_file)) != 0:
# load file and see how many minibatches we went through
saved_data = np.load(output_file)
saved_num_examples = saved_data.shape[0]
loader_batch_size = data_loader.batch_size
if saved_num_examples % loader_batch_size != 0:
print("WARNING: incomplete minibatch in previously saved attack")
minibatch_attacks.append(saved_data)
num_prev_minibatches = saved_num_examples / loader_batch_size
if verbose:
def printer(num):
print("Minibatch %%0%dd/%s" % (minibatch_digits,
total_num_minibatches) % num)
else:
printer = lambda num: None
######################################################################
# Start attacking and saving #
######################################################################
for minibatch_num, data in enumerate(data_loader):
# Handle skippy cases
if minibatch_num < num_prev_minibatches: # CAREFUL ABOUT OBOEs HERE
continue
if num_minibatches is not None and minibatch_num >= num_minibatches:
break
printer(minibatch_num)
# Load data and build minibatch of attacked images
inputs, labels = data
if self.use_gpu:
inputs = inputs.cuda()
labels = labels.cuda()
adv_examples = attack_parameters.attack(inputs, labels)[0]
# Convert to numpy and append to our save buffer
adv_data = adv_examples.cpu().numpy()
minibatch_attacks.append(save_xform(adv_data))
# Perform checkpoint if necessary
if minibatch_num > 0 and minibatch_num % checkpoint_minibatch == 0:
minibatch_attacks = utils.checkpoint_incremental_array(
output_file, minibatch_attacks,
return_concat=True)
return utils.checkpoint_incremental_array(output_file,
minibatch_attacks,
return_concat=True)[0]