-
Notifications
You must be signed in to change notification settings - Fork 0
/
affinity.hpp
140 lines (117 loc) · 5.01 KB
/
affinity.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#pragma once
#ifndef __HPC_AFFINITY_HPP__
#define __HPC_AFFINITY_HPP__
#include "cpu.hpp"
namespace hpc {
std::vector<int> cal_parallel_cores(bool affinity=true) {
std::vector<int> cores;
if (affinity) {
return get_thread_affinity(false);
} else {
auto hardware_concurrency = get_hardware_concurrency();
for (auto i = 0; i < hardware_concurrency; i++) {
cores.emplace_back(i);
}
}
return cores;
}
std::size_t cal_thread_num(int thread_num, int core_num) {
if (thread_num < 0 || thread_num > MAX_THREADS) {
throw std::exception();
} else if (thread_num == 0){
return core_num;
} else {
return thread_num;
}
}
std::size_t cal_stream_num(int stream_num) {
return stream_num != 0 ? stream_num : get_hardware_sockets();
}
std::vector<std::vector<int>> cal_thread_affinity(
int thread_num, std::vector<int>& cores) {
std::vector<std::vector<int>> thread_affinity(thread_num, std::vector<int>({}));
auto core_num = cores.size();
if (thread_num <= core_num) {
int tail_cores_per_thread = core_num / thread_num;
int head_cores_per_thread = tail_cores_per_thread + 1;
int head_thread_num = core_num % thread_num;
// std::cout << head_thread_num << ", " << head_cores_per_thread << ", " << tail_cores_per_thread << std::endl;
int thread_idx = 0, core_idx = 0;
for (; thread_idx < head_thread_num; thread_idx++) {
auto core_stop = core_idx + head_cores_per_thread;
for (; core_idx < core_stop; core_idx++) {
thread_affinity[thread_idx].emplace_back(core_idx);
}
}
for (; thread_idx < thread_num; thread_idx++) {
auto core_stop = core_idx + tail_cores_per_thread;
for (; core_idx < core_stop; core_idx++) {
thread_affinity[thread_idx].emplace_back(core_idx);
}
}
} else {
int tail_thread_per_cores = thread_num / core_num;
int head_thread_per_cores = tail_thread_per_cores + 1;
int head_thread_num = (thread_num % core_num) * head_thread_per_cores;
// std::cout << head_thread_num << ", " << head_thread_per_cores << ", " << tail_thread_per_cores << std::endl;
int thread_idx = 0, core_idx = 0;
for (; thread_idx < head_thread_num; thread_idx++) {
thread_affinity[thread_idx].emplace_back(core_idx);
if (thread_idx % head_thread_per_cores == head_thread_per_cores-1) {
core_idx++;
};
}
for (; thread_idx < thread_num; thread_idx++) {
thread_affinity[thread_idx].emplace_back(core_idx);
if ((thread_idx-head_thread_num) % tail_thread_per_cores == tail_thread_per_cores-1) {
core_idx++;
};
}
}
for (auto i = 0; i < thread_affinity.size(); i++) {
for (auto j = 0; j < thread_affinity[i].size(); j++) {
thread_affinity[i][j] = cores[thread_affinity[i][j]];
}
}
return thread_affinity;
}
std::vector<std::vector<std::vector<int>>> cal_streams_affinity(
int streams, int threads, bool affinity, bool verbose) {
auto cores = cal_parallel_cores(affinity);
auto cores_num = cores.size();
auto thread_num = cal_thread_num(threads, cores_num);
auto stream_num = cal_stream_num(streams);
std::vector<std::vector<int>> thread_affinity =
cal_thread_affinity(thread_num, cores);
int threads_per_stream = thread_num / stream_num;
int threads_tail = thread_num % stream_num;
std::vector<std::vector<std::vector<int>>> streams_affinity;
auto thread_idx = 0;
for (auto i = 0; i < stream_num; i++) {
thread_idx = i * threads_per_stream;
std::vector<std::vector<int>> threads_affinity;
for (; thread_idx < (i+1)*threads_per_stream; thread_idx++) {
threads_affinity.emplace_back(thread_affinity[thread_idx]);
}
streams_affinity.emplace_back(threads_affinity);
}
for (; thread_idx < thread_num; thread_idx++) {
streams_affinity[stream_num-1].emplace_back(thread_affinity[thread_idx]);
}
if (verbose) {
for (auto stream : streams_affinity) {
std::cout << "stream affinity cores {";
for (auto thread : stream) {
std::cout << "[";
for (auto core : thread) {
std::cout << core << ",";
}
std::cout << "],";
}
std::cout << "}," << std::endl;
}
}
return streams_affinity;
}
}
#endif // __HPC_AFFINITY_HPP__