-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathParallel.h
118 lines (110 loc) · 3.41 KB
/
Parallel.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/* -*- C++ -*-
*
* Parallel.h
*
* Copyright (C) 2014-2019 jh10001 <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#pragma once
#if defined(USE_PARALLEL) && defined(USE_OMP_PARALLEL)
#undef USE_PARALLEL
#endif
#ifdef USE_PARALLEL
#include "SDL_cpuinfo.h"
#include "SDL_thread.h"
#include "SDL_timer.h"
#include "SDL_atomic.h"
#endif
#ifdef USE_OMP_PARALLEL
#include <omp.h>
#endif
#if defined(USE_PARALLEL) || defined(USE_OMP_PARALLEL)
#include <assert.h>
namespace parallel {
#ifdef USE_OMP_PARALLEL
static const int thread_num = omp_get_num_procs();
#elif defined(USE_PARALLEL)
static const int thread_num = SDL_GetCPUCount();
class ThreadPool {
struct Thread {
enum Status {
IDLE, RUNNING, EXIT
};
SDL_Thread *thread;
struct ThreadData {
void(*body)(void *data);
void *data;
SDL_atomic_t status;
SDL_sem *sem;
} threadData;
};
Thread *threads = new Thread[thread_num];
int threadNum = 0, threadCreated = 0;
void createThread();
public:
~ThreadPool();
Thread* newThread(void(*func)(void *body), void* threadData);
void sync();
};
extern ThreadPool threadPool;
#endif
static int thread_clamp(int threadnum) {
if (threadnum > thread_num) threadnum = thread_num;
if (threadnum < 1) threadnum = 1;
return threadnum;
}
template<typename Body>
void For(const int first, const int last, const int step, const Body &body, const int scale = -1) {
assert(step > 0);
if (last > first) {
static const int MINSCALE = 65536 * 4;
#ifdef USE_OMP_PARALLEL
scale > 0 ? omp_set_num_threads(thread_clamp(scale / MINSCALE)) : omp_set_num_threads(thread_num);
#pragma omp parallel for
for (int i = first; i < last; i += step) body(i);
#elif defined USE_PARALLEL
int range = last - first;
struct ThreadData {
int lr[2];
const Body *body;
};
int nthread = scale > 0 ? thread_clamp(scale / MINSCALE) : thread_num;
if (range < nthread) nthread = range;
typedef void(*FunctionBody)(void*);
FunctionBody* funcs = new FunctionBody[nthread - 1];
ThreadData *td = new ThreadData[nthread - 1];
int ssize = range / nthread;
int lend = last;
int i = nthread;
while (i > 1) {
int lstart = lend - ssize;
td[i - 2] = { { lstart, lend }, &body };
funcs[i - 2] = [](void* ptr) {
ThreadData &td = *((ThreadData*)ptr);
for (int i = td.lr[0]; i < td.lr[1]; ++i) (*td.body)(i);};
threadPool.newThread(funcs[i - 2], (void*)&td[i - 2]);
lend = lstart;
--i;
}
for (int i = first; i < lend; ++i) body(i);
threadPool.sync();
delete[] td;
delete[] funcs;
#endif
}
}
}
#endif