forked from verivital/nnv
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathverify_continuous.m
213 lines (154 loc) · 5.92 KB
/
verify_continuous.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
%% Robustness verification of a NN (L infinity adversarial attack)
% if f(x) = y, then forall x' in X s.t. ||x - x'||_{\infty} <= eps,
% then f(x') = y = f(x)
%% Section 1. Load Data
% Load feature analysis data
feature_info = readtable('data/bodmas-feature-analysis-NEW.csv');
% Load data
data = load('data/bodmas.mat');
% Assign data to independent variables
X = data.X;
y = data.y;
y = y'; % transpose
y = y+1; % matlab predictions are 1 or 2
% Scale the data-- > must be scaled! (same scaling as used for training)
X_scaled = zscore(X); % standard scaler equivalent
% Initialize variables for experiment
rng(0); % initialize random seed to select inputs to verfy
N = 5; % number of samples to verify (may be different for every model)
idxs = randperm(size(X_scaled,1),N); % select N random samples
% Select subset data to analyze
XData = X_scaled(idxs,:);
YData = y(idxs);
numClasses = 2;
%% Section 2. Load and select NN model
% Load model (choose any of these)
models = ["models/malware_bodmas_binary_scaled_none-2.onnx";
"models/malware_bodmas_binary_scaled_4-2.onnx";
"models/malware_bodmas_binary_scaled_16-2.onnx"];
% Verify every model (we'll just choose one for this tutorial)
modelNumber = 1; % 1, 2, or 3
% Select model
modelName = models(modelNumber);
% Name fix
saveName = split(modelName,'/');
saveName = split(saveName{end}, '.');
saveName = saveName{1};
disp(" ");
disp("Running analysis on model: "+saveName);
% load model and convert
netonnx = importNetworkFromONNX(modelName, "InputDataFormats", "BC");
net = matlab2nnv(netonnx); % convert to NNV
net.OutputSize = numClasses;
%% Section 3. Robustness verification
%
% Steps to verification approach
% 1) Counterexample search
% 2) Reachability analysis (verification)
% Define reachability options
reachOptions = struct;
reachOptions.reachMethod = 'approx-star';
% reachOptions.reachMethod = 'relax-star-area';
% reachOptions.relaxFactor = 0.5;
% Robustness analysis over different epsilon values
epsilon = [0.01; 0.05; 0.1]; % scale of attack
nE = length(epsilon); % number of different attacks to evaluate
% Initialize variables to store results
res = zeros(N,nE); % robust result
time = zeros(N,nE); % computation time
% number of random samples from input set (counterexample search)
nR = 100;
% Begin analysis
for e=1:nE
ep = epsilon(e); % choose epsilon value
for i=1:N
% Print to screen
disp("Verifying example: "+string(i));
% Create input set
IS = L_inf_attack_informed(XData(i,:), ep, feature_info);
% Begin tracking time for verification
t = tic;
% Attempt at falsification
xBox = Box(IS.im_lb', IS.im_ub');
xF = xBox.sample(nR); % create random samples
% Add bounds and original input
xF(:,nR+1) = XData(i,:); % add original image
xF(:,nR+2) = IS.im_lb'; % add lower bound
xF(:,nR+3) = IS.im_ub'; % add upper bound
% Classify all random inputs and look for counterexamples
predictedLabels = predict(netonnx, xF');
[~, predictedLabels] = max(predictedLabels,[],2);
% print to screen
disp("Time spent after simulation: " + string(toc(t))+" seconds");
% Any misclassified?
if any(predictedLabels ~= YData(i))
temp = 0; % counterexample found
else % attempt to verify using reachability analysis
temp = net.verify_robustness(IS, reachOptions, YData(i));
disp("Time spent after reachability: " + string(toc(t))+" seconds");
end
% Add individual results
res(i,e) = temp;
time(i, e) = toc(t);
% Print final (individual) result to screen
disp("Total time spent: " + string(toc(t))+" seconds");
disp("Verification result: " +string(temp));
disp(" ");
end
% Print Global Results to screen
disp("======== RESULTS e: "+string(epsilon(e))+" ========");
disp("Average computation time: "+string(sum(time(:,e))/N));
disp("Robust = "+string(sum(res(:,e)==1))+" out of " + string(N) + " images");
disp("===================================")
disp(" ");
disp(" ");
disp(" ");
end
%% Let's visualize the ranges for the last verification example
% Get output reachable set
R = net.reachSet{end};
% Get (overapproximate) ranges for each output index
[lb_out, ub_out] = R.getRanges;
lb_out = squeeze(lb_out);
ub_out = squeeze(ub_out);
% Get middle point for each output and range sizes
mid_range = (lb_out + ub_out)/2;
range_size = ub_out - mid_range;
% Label for x-axis
x = [0, 1]; % benign or malicious
% Visualize set ranges and evaluation points
fig = figure;
errorbar(x, mid_range, range_size, '.');
xlim([-0.5 1.5]);
xticks([0 1]);
xticklabels({'benign','malicious'})
xlabel("Class");
grid;
fontsize(fig, 16, "points");
%% Helper Functions
function IS = L_inf_attack_informed(x,epsilon,info)
% This uses epsilon to scale the range between a features scaled values.
% Just looking at continuous features right now
% Categories include: ['Continuous', 'Hash Categorical' ,
% 'Hash Categorical Discrete', 'Discrete with large range' ,
% 'Binary', 'Categorical' ,'Mem related']
mask = strcmp(info.Category, "Continuous");
indices = find(mask);
% get range for each feature
range = info.scaled_max - info.scaled_min;
diff = zeros(size(range));
% only use the continuous variables
diff(indices) = range(indices);
% apply epsilong to every value
diff = diff * epsilon;
% permute dimensions
diff = diff';
% --> lb = x - diff (diff is epsilon informed by info)
lb = x - diff;
lb = max(lb, info.scaled_min.'); % ensure no values < min value
% --> ub = x + diff (diff is epsilon informed by info)
ub = x + diff;
ub = min(ub, info.scaled_max.'); % ensure no values > max value
% Create input set based on bounds
IS = ImageStar(lb,ub);
end