-
Notifications
You must be signed in to change notification settings - Fork 11
/
test_case_lcp_lemke_vdsp_multithread.h
475 lines (357 loc) · 16.5 KB
/
test_case_lcp_lemke_vdsp_multithread.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
#ifndef __TEST_CASE_LCP_LEMKE_VDSP_MULTITHREAD_H__
#define __TEST_CASE_LCP_LEMKE_VDSP_MULTITHREAD_H__
#include <iostream>
#include <vector>
#include <limits>
#include <string>
#include <Accelerate/Accelerate.h>
#include "thread_synchronizer.h"
#include "test_case_lcp.h"
template<class T, bool IS_COL_MAJOR>
class TestCaseLCP_lemke_vdsp_multithread : public TestCaseLCP<T, IS_COL_MAJOR> {
// Type of problem that can be solved:
//
// M z + q = w
//
// s.t. 0 <= z cmpl. w >= 0
//
// M must not be symmetric ( For the precise list of the types of matrices accepted.
// please see 2.3 "CONDITIONS UNDER WHICH THE COMPLEMENTARY PIVOT ALGORITHM WORKS"
// of "LINEAR COMPLEMENTARITY LINEAR AND NONLINEAR PROGRAMMING" by Katta G. Murty.)
//
// - All the constraints must be unilateral constraints, i.e., 0<=z cmpl. w>=0.
//
// - Boxed contraints can be solved with the following reformulation of the problem.
// See NotesOnBoxedConstraintsForLemkeSolvers.md.
//
// - Mixed complementarity problem with some bilateral constraints can be solved as follows.
// See NotesOnBilateralConstraintsForLemkeSolvers.md.
//
// Description of the table
// ------------------------
//
// width: | m_dim | m_dim | 1 | 1 |
// -------+-------------+-------------+-------------+-------------+
// | | | | |
// field | -1 | -1 | -1 | -1 |
// type | B I | B M | B e | B q |
// | | | | |
// -------+-------------+-------------+-------------+-------------+
// | | | | |
// | ^^^^^^^^^^^ |
// desc- | Columns for the slack variables.
// ription| The original columns constitute the identity matrix,
// | and the up-to-date columns here represent the current
// | inverse for the matrix B for the basic variables.
// | |
// | | ^^^^^^^^^^^
// | | Columns for the original variables z.
// | | The original columns correspond to the matrix M.
// | | |
// | | | ^^^^^^^^^^^ |
// | | | Column for the artificial varialble z_0
// | | | It originally consists of -1s.
// | | | | |
// | | | | ^^^^^^^^^^^ |
// | | | | The column for RHS q.
T* m_table;
// array of indices for the rows of the table to keep track of the current basic variables.
int* m_basic_variables_along_rows;
const int m_num_columns;
const int m_z0_index;
const int m_max_num_iterations;
WaitNotifyMultipleWaiters m_fan_out;
WaitNotifyMultipleNotifiers m_fan_in;
const int m_num_threads;
vector<thread> m_threads;
int m_entering_col_index;
int m_leaving_row_index;
public:
TestCaseLCP_lemke_vdsp_multithread( const int dim, const T condition_num, const int max_num_iterations, const T epsilon, const int num_threads, const LCPTestPatternType p_type )
:TestCaseLCP<T, IS_COL_MAJOR>( dim, condition_num, epsilon, p_type )
,m_num_columns ( dim + dim + 1 + 1 )
,m_z0_index ( dim + dim )
,m_max_num_iterations ( max_num_iterations )
,m_fan_out ( num_threads )
,m_fan_in ( num_threads )
,m_num_threads ( num_threads )
,m_entering_col_index ( -1 )
,m_leaving_row_index ( -1 )
{
if constexpr (IS_COL_MAJOR) {
assert(true); //column major not supported for Lemke.
}
m_table = new T [ dim * (m_num_columns ) ];
m_basic_variables_along_rows = new int [ dim ];
this->setCPPBlock( num_threads, 1 );
this->setImplementationType( LEMKE_VDSP );
const int num_rows_per_thread = (this->m_dim + m_num_threads - 1) / m_num_threads;
auto thread_lambda = [this, num_rows_per_thread ]( const size_t thread_index ) {
const int row_begin = thread_index * num_rows_per_thread;
const int row_end = min(this->m_dim, row_begin + num_rows_per_thread);
while ( true ) {
m_fan_out.wait( thread_index );
if( m_fan_out.isTerminating() ) {
break;
}
pivot_block( row_begin, row_end );
m_fan_in.notify();
if( m_fan_in.isTerminating() ) {
break;
}
}
};
for ( size_t i = 0; i < m_num_threads; i++ ) {
m_threads.emplace_back( thread_lambda, i );
}
}
virtual ~TestCaseLCP_lemke_vdsp_multithread(){
delete[] m_table;
delete[] m_basic_variables_along_rows;
m_fan_out.terminate();
m_fan_in.terminate();
for ( auto& t : m_threads ) {
t.join();
}
}
virtual void run() {
int num_iterations = 0;
if ( find_min_q() >= 0.0 ) {
// already feasible.
memset( this->m_z, 0, sizeof(T) * this->m_dim );
memcpy( this->m_w, this->m_q, sizeof(T) * this->m_dim );
return;
}
// from here on we assume m_q has at least one negative element.
fill_initial_values();
const auto initial_entering_col_index = this->m_dim + this->m_dim; //z_0
const auto initial_leaving_row_index = find_initial_leaving_row_index();
// pivot
pivot( initial_entering_col_index, initial_leaving_row_index );
int entering_col_index = get_complementary_index( initial_leaving_row_index );
m_basic_variables_along_rows[initial_leaving_row_index] = initial_entering_col_index;
// pivot loop until z_0 becomes non-basic
while ( num_iterations++ < m_max_num_iterations && entering_col_index != m_z0_index ) {
int leaving_row_index = find_leaving_row_index( entering_col_index );
if ( leaving_row_index == -1 ) {
cerr << "ERROR: ray-termination\n";
break; // ray-termination
}
pivot( entering_col_index, leaving_row_index );
int new_entering_col_index = get_complementary_index( leaving_row_index );
m_basic_variables_along_rows[leaving_row_index] = entering_col_index;
entering_col_index = new_entering_col_index;
}
if ( num_iterations >= m_max_num_iterations ) {
cerr << "ERROR: max number of pivoting exceeded.\n";
}
// arrange solutions in m_z and m_w.
memset( this->m_z, 0, sizeof(T) * this->m_dim );
memset( this->m_w, 0, sizeof(T) * this->m_dim );
for ( int row_index = 0; row_index < this->m_dim; row_index++ ) {
auto col_index = m_basic_variables_along_rows[row_index];
if ( col_index < this->m_dim ) {
// slack variable w_i
this->m_w[col_index] = this->m_table[ row_index * m_num_columns + this->m_dim + this->m_dim + 1 ];
}
else if ( col_index < m_z0_index ) {
// real variable z_i
this->m_z[ col_index - this->m_dim ] = this->m_table[ row_index * m_num_columns + this->m_dim + this->m_dim + 1 ];
}
else {
cerr << "ERROR: final solution contains z0\n";
}
}
this->setIterations( num_iterations, 0, 0 );
}
inline T find_min_q() {
T min_q;
if constexpr ( is_same<float, T>::value ) {
vDSP_minv( this->m_q, 1, &min_q, this->m_dim );
}
else {
vDSP_minvD( this->m_q, 1, &min_q, this->m_dim );
}
return min_q;
}
int get_complementary_index( const int leaving_row ) {
const int col_index = m_basic_variables_along_rows[leaving_row];
if ( col_index < this->m_dim ) { // index is for a slack varible w_i
return col_index + this->m_dim; // return z_i
}
else if ( col_index < m_z0_index ) { // index is for a real variable z_i
return col_index - this->m_dim; // return the slack variable w_i
}
else { // index is the artificial variable z_0.
return m_z0_index;
}
}
void pivot( const int entering_col_index, const int leaving_row_index ) {
m_entering_col_index = entering_col_index;
m_leaving_row_index = leaving_row_index;
m_fan_out.notify();
m_fan_in.wait();
T* pivot_row = &(m_table[ m_leaving_row_index * m_num_columns ]);
const T pivot_denom = 1.0 / pivot_row[ m_entering_col_index ];
const int adjacent_row_index = (leaving_row_index == 0) ? 1 : (leaving_row_index - 1);
const T* dummy_row = &(m_table[ adjacent_row_index * m_num_columns ]);
if constexpr ( is_same<float, T>::value ) {
catlas_saxpby( m_num_columns, 0.0, dummy_row, 1, pivot_denom, pivot_row, 1 );
}
else {
catlas_daxpby( m_num_columns, 0.0, dummy_row, 1, pivot_denom, pivot_row, 1 );
}
pivot_row[ entering_col_index ] = 1.0;
}
void pivot_block( const int row_begin, const int row_end ) {
T* pivot_row = &(m_table[ m_leaving_row_index * m_num_columns ]);
const T pivot_denom = 1.0 / pivot_row[ m_entering_col_index ];
for ( size_t row_index = row_begin; row_index < row_end; row_index++ ) {
if ( row_index != m_leaving_row_index ) {
T* current_row = &(m_table[ row_index * m_num_columns ]);
const T coeff = -1.0 * current_row[ m_entering_col_index ] * pivot_denom;
if constexpr ( is_same<float, T>::value ) {
catlas_saxpby( m_num_columns, coeff, pivot_row, 1, 1.0, current_row, 1 );
}
else {
catlas_daxpby( m_num_columns, coeff, pivot_row, 1, 1.0, current_row, 1 );
}
current_row[ m_entering_col_index ] = 0.0;
}
}
}
void fill_initial_values() {
for ( int row_index = 0; row_index < this->m_dim; row_index++ ) {
// Slack part. Fill with 0.0 first.
memset( &(m_table[ row_index * m_num_columns ]), 0, sizeof(T) * this->m_dim );
// Slack part diagonal with 1.0
m_table[ row_index * m_num_columns + row_index ] = 1.0;
// -M part. m_table[ row ] := m_M[ row ] * -1.0
const T alpha = -1.0;
if constexpr ( is_same<float, T>::value ) {
vDSP_vsmul ( &(this->m_M[ row_index * this->m_dim ]), 1, &alpha,
&(m_table[ row_index * m_num_columns + this->m_dim ]), 1, this->m_dim );
}
else {
vDSP_vsmulD ( &(this->m_M[ row_index * this->m_dim ]), 1, &alpha,
&(m_table[ row_index * m_num_columns + this->m_dim ]), 1, this->m_dim );
}
// z_0 part. Fill with -1.0.
m_table[ row_index * m_num_columns + this->m_dim + this->m_dim ] = -1.0;
// q part.
m_table[ row_index * m_num_columns + this->m_dim + this->m_dim + 1 ] = this->m_q[ row_index ];
// initially, all the rows correspond to the slack variables.
m_basic_variables_along_rows[row_index] = row_index;
}
}
int find_initial_leaving_row_index() {
// find the minimum row.
// It's not necessarity the lexico minimum,
// but since it happens only once in the beginning,
// it does not cause a pivot cycle.
T min_q;
unsigned long min_q_index;
if constexpr ( is_same<float, T>::value ) {
vDSP_minvi( this->m_q, 1, &min_q, &min_q_index, this->m_dim );
}
else {
vDSP_minviD( this->m_q, 1, &min_q, &min_q_index, this->m_dim );
}
return (int)min_q_index;
}
// Find the lexico minimum
int find_leaving_row_index(int entering_table_index) {
std::vector<int> active_rows;
T current_min = std::numeric_limits<T>::max();
for ( int row_index = 0; row_index < this->m_dim; row_index++ ) {
const T denom = m_table[ row_index * m_num_columns + entering_table_index ];
if ( denom > this->m_epsilon ) {
const T q = m_table[ row_index * m_num_columns + this->m_dim + this->m_dim + 1 ] / denom;
if ( fabs(current_min - q) < this->m_epsilon ) {
active_rows.push_back(row_index);
}
else if ( current_min > q ) {
current_min = q;
active_rows.clear();
active_rows.push_back(row_index);
}
}
}
if ( active_rows.size() == 0 ) {
// ray termination.
return -1;
}
else if ( active_rows.size() == 1 ) {
return *(active_rows.begin());
}
// If there are multiple rows, check if they contain the row for z_0.
for (auto it = active_rows.begin(); it != active_rows.end(); it++ ) {
if ( m_basic_variables_along_rows[*it] == m_z0_index ) {
return *it;
}
}
// look through the columns of the inverse of the basic matrix from left to right.
// until the tie is broken.
for ( int col_index = 0; col_index < this->m_dim ; col_index++ ) {
std::vector<int> active_rows_copy = active_rows;
active_rows.clear();
current_min = std::numeric_limits<T>::max();
for ( auto it = active_rows_copy.begin(); it != active_rows_copy.end(); it++ ) {
const int row_index = *it;
// denom must be positive here.
const T denom = m_table[ row_index * m_num_columns + entering_table_index ];
const T b_col = m_table[ row_index * m_num_columns + col_index ] / denom;
if ( fabs(current_min - b_col) < this->m_epsilon ) {
active_rows.push_back(row_index);
}
else if ( current_min > b_col ) {
current_min = b_col;
active_rows.clear();
active_rows.push_back(row_index);
}
}
if ( active_rows.size() == 1 ) {
return *(active_rows.begin());
}
}
assert(true);// must not reach here.
return -1;
}
string from_col_index_to_var_name(const int i) {
if ( i < this->m_dim ) {
return "w" + std::to_string( i + 1 );
}
else if ( i < this->m_z0_index ) {
return "z" + std::to_string( i + 1 - this->m_dim );
}
else if ( i == this->m_z0_index ) {
return "z0";
}
else {
return "q";
}
}
void print_table(ostream& os) {
os << setprecision(3);
os << "\n\t";
for ( int col_index = 0; col_index < m_num_columns; col_index++ ) {
os << from_col_index_to_var_name(col_index) << "\t";
}
os << "\n";
for ( int row_index = 0; row_index < this->m_dim; row_index++ ) {
int col_index = m_basic_variables_along_rows[row_index];
os << from_col_index_to_var_name(col_index) << "\t";
for ( int col_index = 0; col_index < m_num_columns; col_index++ ) {
os << m_table[row_index * this->m_num_columns + col_index] << "\t";
}
os << "\n";
}
}
void print_enter_leave( ostream& os, const int entering_col_index , const int leaving_row_index ) {
os << "entering_col_index: " << entering_col_index << " "
<< from_col_index_to_var_name(entering_col_index) << "\n";
os << "leaving_row_index: " << leaving_row_index << " "
<< from_col_index_to_var_name( m_basic_variables_along_rows[leaving_row_index]) << "\n";
}
};
#endif /*__TEST_CASE_LCP_LEMKE_VDSP_MULTITHREAD_H__*/