-
Notifications
You must be signed in to change notification settings - Fork 0
/
thread.c
483 lines (470 loc) · 19.1 KB
/
thread.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
#include "chess.h"
#include "data.h"
#include "epdglue.h"
/* modified 06/07/09 */
/*
*******************************************************************************
* *
* Thread() is the driver for the threaded parallel search in Crafty. The *
* basic idea is that whenever we notice that one (or more) threads are *
* in their idle loop, we drop into thread, from Search(), and begin a new *
* parallel search from this node. This is simply a problem of copying the *
* search state space for each thread working at this node, then sending *
* everyone off to SearchParallel() to search this node in parallel. *
* *
*******************************************************************************
*/
int Thread(TREE * RESTRICT tree) {
TREE *block;
int proc;
int nblocks = 0;
/*
************************************************************
* *
* First, we make sure that there are idle threads. It *
* is possible that we get here after they have all been *
* claimed by another thread. In that case we return. *
* *
************************************************************
*/
Lock(lock_smp);
for (proc = 0; proc < smp_max_threads && thread[proc]; proc++);
if (proc == smp_max_threads || tree->stop) {
Unlock(lock_smp);
return (0);
}
#if defined(DEBUGSMP)
Lock(lock_io);
Print(128, "thread %d block %d ply %d parallel split\n", tree->thread_id,
FindBlockID(tree), tree->ply);
Print(128, "thread %d threads(s) idle:", tree->thread_id);
for (proc = 0; proc < smp_max_threads; proc++)
if (!thread[proc])
Print(128, " %d", proc);
Print(128, "\n");
Unlock(lock_io);
#endif
/*
************************************************************
* *
* Now we start copying the current "state" from this *
* thread into new thread blocks so that the threads can *
* search this position without interfering with each *
* other. As we copy, we link those blocks as siblings *
* of the parent node, and point each sibling back to the *
* parent so we can unwind this confusion as the threads *
* complete their work. *
* *
************************************************************
*/
thread[tree->thread_id] = 0;
tree->nprocs = 0;
for (proc = 0;
proc < smp_max_threads && (nblocks < smp_max_thread_group ||
tree->ply == 1); proc++) {
tree->siblings[proc] = 0;
if (thread[proc] == 0) {
block = CopyToChild(tree, proc);
if (!block)
continue;
nblocks++;
tree->siblings[proc] = block;
block->thread_id = proc;
block->parent = tree;
tree->nprocs++;
#if defined(DEBUGSMP)
Print(128, "thread %d block %d allocated at ply=%d\n", proc,
FindBlockID(block), tree->ply);
#endif
}
}
tree->search_value = tree->value;
if (!nblocks) {
Unlock(lock_smp);
thread[tree->thread_id] = tree;
return (0);
}
/*
************************************************************
* *
* Everything is set. Now we can stuff the address of *
* the thread blocks into thread[i] so that those idle *
* threads can begin the parallel search. *
* *
************************************************************
*/
parallel_splits++;
for (proc = 0; proc < smp_max_threads; proc++)
if (tree->siblings[proc])
thread[proc] = tree->siblings[proc];
/*
************************************************************
* *
* After the threads are kicked off to start the parallel *
* search using the idle threads, This thread has to be *
* inserted as well. However, since it is possible that *
* this thread may finish before any or all of the other *
* parallel threads, this thread is sent to ThreadWait() *
* which will immediately send it to SearchParallel() *
* like the other threads. Going to ThreadWait() allows *
* this thread to join others if it runs out of work to *
* do. We do pass ThreadWait() the address of the parent *
* thread block, so that if this thread becomes idle, and *
* this thread block shows no threads are still busy, *
* then this thread can return to here and then back up *
* into the previous ply as it should. Note that no *
* other thread can back up to the previous ply since *
* their recursive call stacks are not set for that. *
* *
************************************************************
*/
Unlock(lock_smp);
ThreadWait(tree->thread_id, tree);
#if defined(DEBUGSMP)
Print(128, "thread %d block %d ply %d parallel join\n", tree->thread_id,
FindBlockID(tree), tree->ply);
#endif
return (1);
}
/* modified 06/07/09 */
/*
*******************************************************************************
* *
* CopyFromChild() is used to copy data from a child thread to a parent *
* thread. This only copies the appropriate parts of the TREE structure to *
* avoid burning memory bandwidth by copying everything. *
* *
*******************************************************************************
*/
void CopyFromChild(TREE * RESTRICT p, TREE * RESTRICT c, int value) {
int i;
if (c->nodes_searched && !c->stop && value > p->search_value) {
p->pv[p->ply] = c->pv[p->ply];
p->search_value = value;
for (i = 1; i < MAXPLY; i++)
p->killers[i] = c->killers[i];
p->cutmove = c->curmv[p->ply];
}
p->nodes_searched += c->nodes_searched;
p->fail_high += c->fail_high;
p->fail_high_first += c->fail_high_first;
p->evaluations += c->evaluations;
p->egtb_probes += c->egtb_probes;
p->egtb_probes_successful += c->egtb_probes_successful;
p->extensions_done += c->extensions_done;
p->qchecks_done += c->qchecks_done;
p->reductions_done += c->reductions_done;
p->moves_pruned += c->moves_pruned;
c->used = 0;
}
/* modified 06/07/09 */
/*
*******************************************************************************
* *
* CopyToChild() is used to copy data from a parent thread to a particular *
* child thread. This only copies the appropriate parts of the TREE *
* structure to avoid burning memory bandwidth by copying everything. *
* *
*******************************************************************************
*/
TREE *CopyToChild(TREE * RESTRICT p, int thread) {
int i, j, max;
TREE *c;
static int warnings = 0;
int first = thread * MAX_BLOCKS_PER_CPU + 1;
int last = first + MAX_BLOCKS_PER_CPU;
int maxb = smp_max_threads * MAX_BLOCKS_PER_CPU + 1;
for (i = first; i < last && block[i]->used; i++);
if (i >= last) {
if (++warnings < 6)
Print(128,
"WARNING. optimal SMP block cannot be allocated, thread %d\n",
thread);
for (i = 1; i < maxb && block[i]->used; i++);
if (i >= maxb) {
if (warnings < 6)
Print(128, "ERROR. no SMP block can be allocated\n");
return (0);
}
}
max = 0;
for (j = 1; j < maxb; j++)
if (block[j]->used)
max++;
max_split_blocks = Max(max_split_blocks, max);
c = block[i];
c->used = 1;
c->stop = 0;
for (i = 0; i < smp_max_threads; i++)
c->siblings[i] = 0;
c->pos = p->pos;
c->pv[p->ply - 1] = p->pv[p->ply - 1];
c->pv[p->ply] = p->pv[p->ply];
c->next_status[p->ply] = p->next_status[p->ply];
c->save_hash_key[p->ply] = p->save_hash_key[p->ply];
c->save_pawn_hash_key[p->ply] = p->save_pawn_hash_key[p->ply];
for (i = 0; i < 2; i++)
c->rep_index[i] = p->rep_index[i];
for (i = 0; i < 2; i++)
for (j = 0; j < c->rep_index[i]; j++)
c->rep_list[i][j] = p->rep_list[i][j];
c->last[p->ply] = c->move_list;
c->hash_move[p->ply] = p->hash_move[p->ply];
for (i = 1; i <= p->ply + 1; i++) {
c->position[i] = p->position[i];
c->curmv[i] = p->curmv[i];
c->inchk[i] = p->inchk[i];
c->phase[i] = p->phase[i];
}
for (i = 1; i < MAXPLY; i++)
c->killers[i] = p->killers[i];
c->nodes_searched = 0;
c->fail_high = 0;
c->fail_high_first = 0;
c->evaluations = 0;
c->egtb_probes = 0;
c->egtb_probes_successful = 0;
c->extensions_done = 0;
c->qchecks_done = 0;
c->reductions_done = 0;
c->moves_pruned = 0;
c->alpha = p->alpha;
c->beta = p->beta;
c->value = p->value;
c->wtm = p->wtm;
c->ply = p->ply;
c->depth = p->depth;
c->search_value = 0;
strcpy(c->root_move_text, p->root_move_text);
strcpy(c->remaining_moves_text, p->remaining_moves_text);
return (c);
}
/*
*******************************************************************************
* *
* WaitForAllThreadsInitialized() waits until all smp_max_threads are *
* initialized. We have to initialize each thread and malloc() its split *
* blocks before we start the actual parallel search. Otherwise we will see *
* invalid memory accesses and crash instantly. *
* *
*******************************************************************************
*/
void WaitForAllThreadsInitialized(void) {
while (initialized_threads < smp_max_threads); /* Do nothing */
}
/* modified 06/07/09 */
/*
*******************************************************************************
* *
* ThreadInit() is called after a process is created. Its main task is to *
* initialize the process local memory so that it will fault in and be *
* allocated on the local node rather than the node where the original *
* (first) process was running. All threads will hang here via a custom *
* WaitForALlThreadsInitialized() procedure so that all the local thread *
* blocks are usable before the search actually begins. *
* *
*******************************************************************************
*/
void *STDCALL ThreadInit(void *tid) {
int i, j;
#if defined(_WIN32) || defined(_WIN64)
ThreadMalloc((int) tid);
#endif
for (i = 0; i < MAX_BLOCKS_PER_CPU; i++) {
memset((void *) block[(long) tid * MAX_BLOCKS_PER_CPU + i + 1], 0,
sizeof(TREE));
block[(long) tid * MAX_BLOCKS_PER_CPU + i + 1]->used = 0;
block[(long) tid * MAX_BLOCKS_PER_CPU + i + 1]->parent = NULL;
LockInit(block[(long) tid * MAX_BLOCKS_PER_CPU + i + 1]->lock);
for (j = 0; j < 64; j++)
block[(long) tid * MAX_BLOCKS_PER_CPU + i + 1]->cache_n[j] = ~0ULL;
}
Lock(lock_smp);
initialized_threads++;
Unlock(lock_smp);
WaitForAllThreadsInitialized();
ThreadWait((long) tid, (TREE *) 0);
Lock(lock_smp);
smp_threads--;
Unlock(lock_smp);
return (0);
}
#if defined (_WIN32) || defined (_WIN64)
/* modified 01/17/09 */
/*
*******************************************************************************
* *
* ThreadMalloc() is called from the ThreadInit() function. It malloc's the *
* split blocks in the local memory for the processor associated with the *
* specific thread that is calling this code. *
* *
*******************************************************************************
*/
extern void *WinMalloc(size_t, int);
void ThreadMalloc(int tid) {
int i, n = MAX_BLOCKS_PER_CPU;
for (i = MAX_BLOCKS_PER_CPU * ((int) tid) + 1; n; i++, n--) {
if (block[i] == NULL)
block[i] =
(TREE *) ((~(size_t) 127) & (127 + (size_t) WinMalloc(sizeof(TREE) +
127, tid)));
block[i]->used = 0;
block[i]->parent = NULL;
LockInit(block[i]->lock);
}
}
#endif
/* modified 01/17/09 */
/*
*******************************************************************************
* *
* ThreadStop() is called from SearchParallel() when it detects a beta *
* cutoff (fail high) at a node that is being searched in parallel. We need *
* to stop all threads here, and since this threading algorithm is recursive *
* it may be necessary to stop other threads that are helping search this *
* branch further down into the tree. This function simply sets tree->stop *
* to 1, which will stop that particular thread instantly and return it to *
* the idle loop in ThreadWait(). *
* *
*******************************************************************************
*/
void ThreadStop(TREE * RESTRICT tree) {
int proc;
Lock(tree->lock);
tree->stop = 1;
for (proc = 0; proc < smp_max_threads; proc++)
if (tree->siblings[proc])
ThreadStop(tree->siblings[proc]);
Unlock(tree->lock);
#if defined(DEBUGSMP)
Lock(lock_io);
Print(128, "thread %d (block %d) being stopped by beta cutoff.\n",
tree->thread_id, FindBlockID(tree));
Unlock(lock_io);
#endif
}
/* modified 01/17/09 */
/*
*******************************************************************************
* *
* ThreadWait() is the idle loop for the N threads that are created at the *
* beginning when Crafty searches. Threads are "parked" here waiting on a *
* pointer to something they should search (a parameter block built in the *
* function Thread() in this case. When this pointer becomes non-zero, each *
* thread "parked" here will immediately call SearchParallel() and begin the *
* parallel search as directed. *
* *
*******************************************************************************
*/
int ThreadWait(long tid, TREE * RESTRICT waiting) {
int value;
/*
************************************************************
* *
* The N initial threads enter here and are kept penned *
* here forever. However, once a thread leaves here it *
* may well re-enter ThreadWait() from the top while it *
* waits for a parallel search to complete. While it *
* waits here it can also join in to help other busy *
* threads search their subtrees as well. *
* *
************************************************************
*/
while (1) {
Lock(lock_smp);
smp_idle++;
Unlock(lock_smp);
#if defined(DEBUGSMP)
Lock(lock_io);
Print(128, "thread %d now idle (%d procs, %d idle).\n", tid,
smp_max_threads, smp_idle);
if (FindBlockID(waiting) >= 0)
Print(128,
"thread %d waiting on block %d, still %d threads busy there\n",
tid, FindBlockID(waiting), waiting->nprocs);
Unlock(lock_io);
#endif
/*
************************************************************
* *
* We can exit if our thread[i] is non-zero, or if we are *
* waiting on others to finish a block that *we* have to *
* return through. When the busy count on such a block *
* hits zero, we return immediately which unwinds the *
* search as it should be. *
* *
************************************************************
*/
while (!thread[tid] && (!waiting || waiting->nprocs));
Lock(lock_smp);
if (!thread[tid])
thread[tid] = waiting;
/*
************************************************************
* *
* We either have work to do, or threads we were waiting *
* on have finished their work. *
* *
************************************************************
*/
#if defined(DEBUGSMP)
Lock(lock_io);
Print(128, "thread %d now has work at block %d.\n", tid,
FindBlockID(thread[tid]));
Unlock(lock_io);
#endif
smp_idle--;
/*
************************************************************
* *
* If we are waiting on a block and the busy count is now *
* zero, we simply return to finish up the bookkeeping at *
* that point. *
* *
************************************************************
*/
Unlock(lock_smp);
if (thread[tid] == waiting || thread[tid] == (TREE *) - 1)
return (0);
/*
************************************************************
* *
* Else our thread[i] pointer is non-zero, meaning we *
* have been assigned something to search. hi-ho, hi-ho, *
* off to work we go... *
* *
************************************************************
*/
value =
SearchParallel(thread[tid], thread[tid]->alpha, thread[tid]->beta,
thread[tid]->value, thread[tid]->wtm, thread[tid]->depth,
thread[tid]->ply);
Lock(lock_smp);
Lock(thread[tid]->parent->lock);
#if defined(DEBUGSMP)
Lock(lock_io);
Print(128, "thread %d block %d marked free at ply %d\n", tid,
FindBlockID(thread[tid]), thread[tid]->ply);
Unlock(lock_io);
#endif
CopyFromChild((TREE *) thread[tid]->parent, thread[tid], value);
thread[tid]->parent->nprocs--;
#if defined(DEBUGSMP)
Lock(lock_io);
Print(128, "thread %d decremented block %d nprocs=%d\n", tid,
FindBlockID(thread[tid]->parent), thread[tid]->parent->nprocs);
Unlock(lock_io);
#endif
thread[tid]->parent->siblings[tid] = 0;
Unlock(thread[tid]->parent->lock);
#if defined(DEBUGSMP)
Lock(lock_io);
Print(128, "thread %d block %d parent %d nprocs %d exit at ply %d\n",
tid, FindBlockID(thread[tid]), FindBlockID(thread[tid]->parent),
thread[tid]->parent->nprocs, thread[tid]->ply);
Unlock(lock_io);
#endif
thread[tid] = 0;
Unlock(lock_smp);
}
}