-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathcpubenchmark.h
95 lines (75 loc) · 2.94 KB
/
cpubenchmark.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/**
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
* (c) Daniel Lemire, http://lemire.me/en/
*/
#ifndef CPUBENCHMARK_H_
#define CPUBENCHMARK_H_
#include "common.h"
namespace FastPForLib {
#if defined(__corei7__) // __amd64__ is untested
// start and stop are as recommended by
// Gabriele Paoloni, How to Benchmark Code Execution Times on Intel� IA-32 and
// IA-64 Instruction Set Architectures
// September 2010
// http://edc.intel.com/Link.aspx?id=3954
static __inline__ unsigned long long startRDTSC(void) {
unsigned cycles_low, cycles_high;
asm volatile("CPUID\n\t"
"RDTSC\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
: "=r"(cycles_high), "=r"(cycles_low)::"%rax", "%rbx", "%rcx",
"%rdx");
return (static_cast<unsigned long long>(cycles_high) << 32) | cycles_low;
}
static __inline__ unsigned long long stopRDTSCP(void) {
unsigned cycles_low, cycles_high;
/// This should work fine on most machines, if the RDTSCP thing
/// fails for you, use the rdtsc() call instead.
asm volatile("RDTSCP\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
"CPUID\n\t"
: "=r"(cycles_high), "=r"(cycles_low)::"%rax", "%rbx", "%rcx",
"%rdx");
return (static_cast<unsigned long long>(cycles_high) << 32) | cycles_low;
}
#elif defined(_MSC_VER)
static inline unsigned long long startRDTSC(void) { return __rdtsc(); }
static inline unsigned long long stopRDTSCP(void) { return __rdtsc(); }
#elif defined(__i386__) || defined(__x86_64__)
// Taken from stackoverflow (see
// http://stackoverflow.com/questions/3830883/cpu-cycle-count-based-profiling-in-c-c-linux-x86-64)
// Can give nonsensical results on multi-core AMD processors.
inline unsigned long long rdtsc() {
unsigned int lo, hi;
asm volatile("cpuid \n" /* serializing */
"rdtsc"
: "=a"(lo), "=d"(hi) /* outputs */
: "a"(0) /* inputs */
: "%ebx", "%ecx"); /* clobbers*/
return (static_cast<unsigned long long>(lo)) |
((static_cast<unsigned long long>(hi)) << 32);
}
static __inline__ unsigned long long startRDTSC(void) { return rdtsc(); }
static __inline__ unsigned long long stopRDTSCP(void) { return rdtsc(); }
#elif(defined(__arm__) || defined(__ppc__) || defined(__ppc64__))
// for PPC we should be able to use tbl, but I could not find
// an equivalent to rdtsc for ARM.
inline uint64 rdtsc() { return 0; }
static __inline__ ticks startRDTSC(void) { return 0; }
static __inline__ ticks stopRDTSCP(void) { return 0; }
#else
#error Unknown architecture
#endif
class CPUBenchmark {
public:
CPUBenchmark() : ticktime(0) { start(); }
unsigned long long ticktime;
void start() { ticktime = startRDTSC(); }
unsigned long long stop() { return stopRDTSCP() - ticktime; }
};
} // namespace FastPForLib
#endif /* CPUBENCHMARK_H_ */