-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.cpp
155 lines (132 loc) · 5.25 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
//
// Created by Mikhail Gorshkov on 29.04.2022.
//
//
#include <pthread.h>
#include <thread>
#include <iostream>
#include <memory>
#include <sched.h>
#include <x86intrin.h>
#include <algorithm>
#include <cstring>
#include <vector>
//#define DEBUG_LOG
static inline std::uint64_t rdtsc() {
return __rdtsc();
}
static inline void setThreadAffinity(pthread_t thread, int cpu) {
cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(cpu, &cpuset);
int rc = pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
if (rc != 0) {
std::cerr << "Error calling pthread_setaffinity_np: " << rc << std::endl;
}
}
struct CacheLine {
uint64_t timestamp;
unsigned char data[CACHELINE_SIZE - sizeof(int64_t)];
} __attribute((aligned(CACHELINE_SIZE)));
static int64_t getReport(std::unique_ptr<CacheLine[]> cacheLineArray, std::size_t size, double freq) {
std::vector<uint64_t> report;
for (std::size_t i = 0; i < size - 1; i += 2) {
#ifdef DEBUG_LOG
std::cout << i << " " << cacheLineArray[i].timestamp << " " << cacheLineArray[i + 1].timestamp << std::endl;
#endif
if (cacheLineArray[i + 1].timestamp != cacheLineArray[i].timestamp)
std::cerr << "Incorrect data, offset=" << i << std::endl;
report.push_back(cacheLineArray[i + 2].timestamp - cacheLineArray[i].timestamp);
}
std::sort(report.begin(), report.end());
std::size_t reportSize = size / 2 - 1;
std::size_t medianPos = reportSize / 2;
uint64_t median = reportSize % 2 == 0 ?
(report[medianPos - 1] + report[medianPos]) / 2 :
report[medianPos];
#ifdef DEBUG_LOG
std::cout << "Median=" << median << std::endl;
#endif
auto roundtrip_nanoseconds = static_cast<int64_t>(static_cast<double>(median) / freq);
return roundtrip_nanoseconds;
}
// One write/read loop iteration:
//------------------------------------------------------------
// stage 1 | stage 2 | stage 3 | stage 4 | stage 1
// publisher write | wait | wait | read | ...
// consumer wait | read | write | wait | ...
//-------------------------------------------------------------
// Data looks like this
// TS counter 1 (write)
// TS counter 1 (read)
// TS counter 2 (write)
// TS counter 2 (read)
// …
// TS counter 1000000 (write)
// TS counter 1000000 (read)
// read counters are the ones read by the consumer and duplicated
// Then we calculate the diffs
// TS diff 1 = TS counter 2 - TS counter 1
// TS diff 2 = TS counter 3 - TS counter 2
// …
// TS diff 999999 = TS counter 1000000 - TS counter 999999
static int64_t runBenchmark(int num_messages, int core1, int core2, double freq) {
const std::size_t size = num_messages * 2;
std::unique_ptr<CacheLine[]> cacheLineArray{new CacheLine[size]()};
const auto startPtr = &cacheLineArray[0];
const auto endPtr = startPtr + size;
// consumer
std::thread consumer{[startPtr, endPtr, core1] {
setThreadAffinity(pthread_self(), core1);
auto ptr = startPtr;
// same cacheline for read/write
while (ptr < endPtr) {
CacheLine cacheLine;
do {
std::memcpy(&cacheLine, ptr, sizeof(CacheLine)); // read
} while (cacheLine.timestamp == 0);
++ptr;
std::memcpy(ptr++, &cacheLine, sizeof(CacheLine)); // write
}
}};
setThreadAffinity(pthread_self(), core2);
// publisher
auto ptr = startPtr;
while (ptr < endPtr) {
CacheLine cacheLine{rdtsc()};
std::memcpy(ptr++, &cacheLine, sizeof(CacheLine)); // write
CacheLine cacheLineRead;
do {
std::memcpy(&cacheLineRead, ptr, sizeof(CacheLine)); // read
} while (cacheLineRead.timestamp != cacheLine.timestamp);
++ptr;
}
consumer.join();
return getReport(std::move(cacheLineArray), size, freq) / 2; // single trip
}
int main(int argc, char** argv) {
const char* kAbout =
"A tool for measuring latency of a single cache line-length message delivery between different cores of a CPU";
const char* kUsage = "Usage: ./measure_cache_line_delivery_ns [<num_messages> (default 1M)] "
"[<consumer cpu id> (default 0)] [<producer cpu id> (default 1)] [<cpu freq (HGz)> (default 3.0)]\n"
"example: ./measure_cache_line_delivery_ns 1000 5 6 4.0";
std::cerr << kAbout << std::endl;
std::cerr << "Cache line size is " << CACHELINE_SIZE << "bytes" << std::endl;
std::cerr << kUsage << std::endl;
if (argc == 2 && !strcmp(argv[1], "-h")) {
return 1;
}
int num_messages = argc > 1 ? std::stoi(argv[1]) : 1000000;
if (num_messages < 2) {
std::cerr << "num_messages should be >= 2" << std::endl;
return 1;
}
int core1 = argc > 2 ? std::stoi(argv[2]) : 0;
int core2 = argc > 3 ? std::stoi(argv[3]) : 1;
double freq = argc > 4 ? std::stod(argv[4]) : 3.0;
std::cout << "Running the benchmark with parameters: num_messages=" << num_messages << ", core1=" << core1 <<
", core2=" << core2 << ", freq=" << freq << "GHz..." << std::endl;
auto nanoseconds = runBenchmark(num_messages, core1, core2, freq);
std::cout << "Single trip is " << nanoseconds << "ns " << std::endl;
return 0;
}