-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStrProxSGD_full_v5.m
429 lines (351 loc) · 11.4 KB
/
StrProxSGD_full_v5.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
function [T, it, EpochTime, Loss_real] = StrProxSGD_full_v5...
(C, flg_Img, Data_file, flg_rand_state, R, q, IT, g, ITperPStep, z_perc,...
precision)
% This function is implementing the Stratified Proximal Stochastic Gradient
% Descent Method from the paper:
%
%
% T. Papastergiou and V. Megalooikonomou, "A distributed proximal gradient descent method for tensor completion",
% 2017 IEEE International Conference on Big Data (Big Data), Boston, MA, 2017, pp. 2056-2065, doi: 10.1109/BigData.2017.8258152
%
% The training set is partitioned in d^2 independent strata
% and in each stratum the Proximal Operator is calculated in parallel.
% Arguments of the function
% C: Nr of workers
% flg_Img: if 1 then the data file is a image, if 2 the file is a hyperspectral image
% else the data file is a .mat file, that must contain a tensor named X
% Data_file: the full path to the file.
% flg_rand_state: if flg_rand_state>=0 rand state is put to flg_rand_state,
% if flg_rand_state == -1 then no rand state is used
% R: the rank of the PARAFAC Decomposition
% q: the exponent in the learning rate. the learning rate is calculated as
% follows: 1/(2^q)
% IT: the number of maximum iterations of the method
% g: the proximal coefficient, see the paper
% ITperPStep: SGD Iterations per proximal step, see the paper
% z_perc: the percentage of the missing values (applicable only in images
% and hyperspectral images
% precision: termination criterion of the algorithm
if flg_rand_state>=0
rand('state',flg_rand_state);
end
disp('loading and preprocessing data');
if flg_Img == 1
[I, J, K, X,~] = f_imgReadAndPreprossecing(Data_file,z_perc);
elseif flg_Img == 2
[X, IndexOfZeros, I, J, K] = f_specImgPrepross(Data_file, 1, 1280, 1, 307, z_perc);
else
load(Data_file);
f = X.size;
I = f(1);
J = f(2);
K = f(3);
end
[NNZ_i,NNZ_j,NNZ_k] = f_calculate_NNZ_i_v2_1(X);
execution_time = tic;
h = 1/(2^q);
disp('one time jobs:');
flg_X_sparse_i = 1;
flg_X_sparse_j = 1;
flg_X_sparse_k = 1;
X_un_subs_i = unique(X.subs(:,1));
X_un_subs_j = unique(X.subs(:,2));
X_un_subs_k = unique(X.subs(:,3));
if length(X_un_subs_i) == I
flg_X_sparse_i = 0;
disp('Dimension I is not sparse');
clear X_un_subs_i;
end
if length(X_un_subs_j) == J
flg_X_sparse_j = 0;
disp('Dimension J is not sparse');
clear X_un_subs_j;
end
if length(X_un_subs_k) == K
flg_X_sparse_k = 0;
disp('Dimension K is not sparse');
clear X_un_subs_k;
end
disp('start execution')
%partition of U0,V0,W0 in to C sequential blocks.
if flg_X_sparse_i == 0
U0_part = cell(1,C);
else
U0_part = cell(2,C);
end
if flg_X_sparse_j == 0
V0_part = cell(1,C);
else
V0_part = cell(2,C);
end
if flg_X_sparse_k == 0
W0_part = cell(1,C);
else
W0_part = cell(2,C);
end
index_part_X = floor(I/C);
index_part_Y = floor(J/C);
index_part_K = floor(K/C);
U0 = rand(I,R);
for i=0:C-1
if i==C-1
flg_mod_X = mod(I,C);
else
flg_mod_X = 0;
end
if flg_X_sparse_i==0
U0_part{i+1} = zeros(index_part_X+flg_mod_X,R);
U0_part{i+1} = U0(i*index_part_X+1:(i+1)*index_part_X + flg_mod_X,:);
else
[a,b] = findInSorted(X_un_subs_i, [i*index_part_X+1, (i+1)*index_part_X+flg_mod_X]);
U0_part{1,i+1} = zeros(b-a+1,R);
U0_part{1,i+1} = U0(X_un_subs_i(a:b),:);
U0_part{2,i+1} = X_un_subs_i(a:b);
end
end
clear U0;
clear X_un_subs_i;
V0 = rand(J,R);
for i=0:C-1
if i==C-1
flg_mod_Y = mod(J,C);
else
flg_mod_Y = 0;
end
if flg_X_sparse_j == 0
V0_part{i+1} = zeros(index_part_Y+flg_mod_Y,R);
V0_part{i+1} = V0(i*index_part_Y+1:(i+1)*index_part_Y+flg_mod_Y,:);
else
[a,b] = findInSorted(X_un_subs_j, [i*index_part_Y+1, (i+1)*index_part_Y+flg_mod_Y]);
V0_part{1,i+1} = zeros(b-a+1,R);
V0_part{1,i+1} = V0(X_un_subs_j(a:b),:);
V0_part{2,i+1} = X_un_subs_j(a:b);
end
end
clear V0;
clear X_un_subs_j;
W0 = rand(K,R);
for i=0:C-1
if i==C-1
flg_mod_K = mod(K,C);
else
flg_mod_K = 0;
end
if flg_X_sparse_k == 0
W0_part{i+1} = zeros(index_part_K+flg_mod_K,R);
W0_part{i+1} = W0(i*index_part_K+1:(i+1)*index_part_K + flg_mod_K,:);
else
[a,b] = findInSorted(X_un_subs_k , [i*index_part_K+1, (i+1)*index_part_K+flg_mod_K]);
W0_part{1,i+1} = zeros(b-a+1,R);
W0_part{1,i+1} = W0(X_un_subs_k(a:b),:);
W0_part{2,i+1} = X_un_subs_k(a:b);
end
end
clear W0;
clear X_un_subs_k;
%partitioning the tensor in strata
X_sp_part = cell(C,C^2);
for ip=0:C-1
for s=0:C^2-1
X_sp_part{ip+1,s+1} = sptensor([I J K]);
[X_sp_part{ip+1,s+1}] = f_find_a_stratum(s,ip,I,J,K,C,X);
end
end
norm_X = norm(X);
clear X;
Loss_real = zeros(1,IT);
EpochTime = zeros(1,IT);
%Composite Variables
U0_part_C = Composite(C);
V0_part_C = Composite(C);
W0_part_C = Composite(C);
U0_part_ind_C = Composite(C);
V0_part_ind_C = Composite(C);
W0_part_ind_C = Composite(C);
h_C = Composite(C);
g_C = Composite(C);
X_sp_part_C = Composite(C);
s_C = Composite(C);
it_C = Composite(C);
ITperPStep_C = Composite(C);
NNZ_i_C = Composite(C);
NNZ_j_C = Composite(C);
NNZ_k_C = Composite(C);
I_C = Composite(C);
J_C = Composite(C);
K_C = Composite(C);
C_C = Composite(C);
flg_X_sparse_i_C = Composite(C);
flg_X_sparse_j_C = Composite(C);
flg_X_sparse_k_C = Composite(C);
%send U0_part to the workers. send also h to the workers.
for i=1:C
if flg_X_sparse_i==1
U0_part_C{i} = U0_part{1,i};
U0_part_ind_C{i} = U0_part{2,i};
else
U0_part_C{i} = U0_part{i};
U0_part_ind_C{i} = [];
end
if flg_X_sparse_j==0
V0_part_ind_C{i} = [];
end
if flg_X_sparse_k==0
W0_part_ind_C{i} = [];
end
h_C{i} = h;
X_sp_part_C{i} = X_sp_part(i,:);
ITperPStep_C{i} = ITperPStep;
g_C{i} = g;
NNZ_i_C{i} = NNZ_i;
NNZ_j_C{i} = NNZ_j;
NNZ_k_C{i} = NNZ_k;
I_C{i} = I;
J_C{i} = J;
K_C{i} = K;
C_C{i} = C;
flg_X_sparse_i_C{i} = flg_X_sparse_i;
flg_X_sparse_j_C{i} = flg_X_sparse_j;
flg_X_sparse_k_C{i} = flg_X_sparse_k;
end
clear X_sp_part;
clear NNZ_i;
clear NNZ_j;
clear NNZ_k;
clear U0_part;
for it=1:IT
disp('epoch:')
disp(it)
%here begins an epoch
for s = 0:(C^2)-1
%here begins a subepoch
for ip=0:C-1
%send the appropriate blocks of V0_part and W0_part to the workers. The
%U0_part blocks have been allready sent to the workers since each
%worker needs only a unique part of U0
if flg_X_sparse_j==1
V0_part_C{ip+1} = V0_part{1,mod(ip+s,C)+1};
V0_part_ind_C{ip+1} = V0_part{2,mod(ip+s,C)+1};
else
V0_part_C{ip+1} = V0_part{mod(ip+s,C)+1};
end
if flg_X_sparse_k==1
W0_part_C{ip+1} = W0_part{1,mod(ip+floor(s/C),C)+1};
W0_part_ind_C{ip+1} = W0_part{2,mod(ip+floor(s/C),C)+1};
else
W0_part_C{ip+1} = W0_part{mod(ip+floor(s/C),C)+1};
end
%send also the stratum s to the workers
s_C{ip+1} = s;%this s is the stratum starting from 0 to C^2-1
it_C{ip+1} = it;
end
spmd(C)
if (it_C==1) && (s_C==0)
U0_internal = U0_part_C;
U0_internal_ind = U0_part_ind_C;
disp('read U0_C')
end
%run SGD on the Stratum Selected
[U0_internal,V0_internal,W0_internal] = f_SGDT_reg_forStrProximal_Itter_full_v3_0_noLossCom(...
NNZ_i_C,NNZ_j_C,NNZ_k_C,ITperPStep_C,X_sp_part_C{s_C+1},U0_internal,U0_internal_ind,V0_part_C,V0_part_ind_C,W0_part_C,W0_part_ind_C,...
h_C,g_C,flg_X_sparse_i_C,flg_X_sparse_j_C, flg_X_sparse_k_C,C_C, I_C, J_C, K_C);
end
% collect and concatenate the updates
for ip=0:C-1
if flg_X_sparse_j==1
V0_part{1,mod(ip+s,C)+1} = V0_internal{ip+1};
else
V0_part{mod(ip+s,C)+1} = V0_internal{ip+1};
end
if flg_X_sparse_k==1
W0_part{1,mod(ip+floor(s/C),C)+1} = W0_internal{ip+1};
else
W0_part{mod(ip+floor(s/C),C)+1} = W0_internal{ip+1};
end
end
%here ends a subepoch
end
% new parallel loss computing per stratum
for s = 0:(C^2)-1
%here begins a subepoch for loss computing
for ip=0:C-1
%send the appropriate blocks of V0_part and W0_part to the workers. The
%U0_part blocks have been allready sent to the workers since each
%worker needs only a unique part of U0
if flg_X_sparse_j==1
V0_part_C{ip+1} = V0_part{1,mod(ip+s,C)+1};
V0_part_ind_C{ip+1} = V0_part{2,mod(ip+s,C)+1};
else
V0_part_C{ip+1} = V0_part{mod(ip+s,C)+1};
end
if flg_X_sparse_k==1
W0_part_C{ip+1} = W0_part{1,mod(ip+floor(s/C),C)+1};
W0_part_ind_C{ip+1} = W0_part{2,mod(ip+floor(s/C),C)+1};
else
W0_part_C{ip+1} = W0_part{mod(ip+floor(s/C),C)+1};
end
%send also the stratum s to the workers
s_C{ip+1} = s;%this s is the stratum starting from 0 to C^2-1
end
spmd(C)
Loss_real_C = f_compute_real_Loss_StrSGD_v3...
(U0_internal,U0_internal_ind,V0_part_C,V0_part_ind_C,W0_part_C,W0_part_ind_C,...
X_sp_part_C{s_C+1},flg_X_sparse_i_C,flg_X_sparse_j_C, flg_X_sparse_k_C,...
C_C, I_C, J_C, K_C);
end
if (s==0)
Loss_real_tmp = 0;
end
for ip=1:C
Loss_real_tmp = Loss_real_tmp + Loss_real_C{ip};
end
%here ends a subepoch for loss computatuion
end
Loss_real(it) = sqrt(Loss_real_tmp)/norm_X;
disp(Loss_real(it));
% new parallel loss computing ends here
if (Loss_real(it)<=precision)
EpochTime(it) = toc(execution_time);
break;
end
EpochTime(it) = toc(execution_time);
%here ends an epoch
end
if flg_Img==1 || flg_Img==2
U0_new = zeros(I,R);
V0_new = zeros(J,R);
W0_new = zeros(K,R);
for i=0:C-1
if i==C-1
flg_mod_X = mod(I,C);
flg_mod_Y = mod(J,C);
flg_mod_K = mod(K,C);
else
flg_mod_X = 0;
flg_mod_Y = 0;
flg_mod_K = 0;
end
U0_new(i*index_part_X+1:(i+1)*index_part_X + flg_mod_X,:) = U0_internal{i+1};
V0_new(i*index_part_Y+1:(i+1)*index_part_Y + flg_mod_Y,:) = V0_part{i+1};
W0_new(i*index_part_K+1:(i+1)*index_part_K + flg_mod_K,:) = W0_part{i+1};
end
end
T = toc(execution_time);
figure('Name','Loss Plot','NumberTitle','off');
title(['StrPrSGD r=',num2str(R),' h=', num2str(h),' C=', num2str(C), ' g=',num2str(g),...
' ItPerPStep=', num2str(ITperPStep), 'Time=',num2str(T)]);
plot(Loss_real,'r'); %computed with tensors
%ploting the loss versus the wallclock time elapsed
figure('Name','Loss vs Time','NumberTitle','off');
title(['StrPrSGD r=',num2str(R),' h=', num2str(h),' C=', num2str(C), ' g=',num2str(g),...
' ItPerPStep=', num2str(ITperPStep), 'Time=',num2str(T)]);
plot(EpochTime,Loss_real,'r');
%output image
if flg_Img == 1
f_imgDisplay(U0_new, V0_new,W0_new);
end
if flg_Img == 2
f_spec_ImgDisp(U0_new, V0_new, W0_new, 63, 52, 36);
end
disp(T);
end