8#ifdef QTESTLIB_USE_PERF_EVENTS
12#include "../corelib/kernel/qcore_unix_p.h"
22#include <sys/syscall.h>
24#include "3rdparty/linux/perf_event_p.h"
30#define CACHE_L1D_READ (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8
| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16
)
31#define CACHE_L1D_WRITE (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8
| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16
)
32#define CACHE_L1D_PREFETCH (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8
| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16
)
33#define CACHE_L1I_READ (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8
| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16
)
34#define CACHE_L1I_PREFETCH (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8
| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16
)
35#define CACHE_LLC_READ (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8
| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16
)
36#define CACHE_LLC_WRITE (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8
| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16
)
37#define CACHE_LLC_PREFETCH (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8
| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16
)
38#define CACHE_L1D_READ_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8
| PERF_COUNT_HW_CACHE_RESULT_MISS << 16
)
39#define CACHE_L1D_WRITE_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8
| PERF_COUNT_HW_CACHE_RESULT_MISS << 16
)
40#define CACHE_L1D_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8
| PERF_COUNT_HW_CACHE_RESULT_MISS << 16
)
41#define CACHE_L1I_READ_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8
| PERF_COUNT_HW_CACHE_RESULT_MISS << 16
)
42#define CACHE_L1I_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8
| PERF_COUNT_HW_CACHE_RESULT_MISS << 16
)
43#define CACHE_LLC_READ_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8
| PERF_COUNT_HW_CACHE_RESULT_MISS << 16
)
44#define CACHE_LLC_WRITE_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8
| PERF_COUNT_HW_CACHE_RESULT_MISS << 16
)
45#define CACHE_LLC_PREFETCH_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8
| PERF_COUNT_HW_CACHE_RESULT_MISS << 16
)
46#define CACHE_BRANCH_READ (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8
| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16
)
47#define CACHE_BRANCH_READ_MISS (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8
| PERF_COUNT_HW_CACHE_RESULT_MISS << 16
)
56Q_GLOBAL_STATIC(QList<PerfEvent>, eventTypes);
58static QList<PerfEvent> defaultCounters()
61 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
62 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
63 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
64 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
90static int perf_event_open(perf_event_attr *attr, pid_t pid,
int cpu,
int group_fd,
unsigned long flags)
92#ifdef SYS_perf_event_open
94 return int(syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags));
106bool QBenchmarkPerfEventsMeasurer::isAvailable()
110 return perf_event_open(
nullptr, 0, 0, 0, 0) == -1 && errno != ENOSYS;
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
202 QTest::QBenchmarkMetric metric;
206static const char eventlist_strings[] =
208 "branch-instructions\0"
209 "branch-load-misses\0"
211 "branch-mispredicts\0"
214 "branch-read-misses\0"
228 "idle-cycles-backend\0"
229 "idle-cycles-frontend\0"
231 "l1d-cache-load-misses\0"
233 "l1d-cache-prefetch-misses\0"
234 "l1d-cache-prefetches\0"
235 "l1d-cache-read-misses\0"
237 "l1d-cache-store-misses\0"
239 "l1d-cache-write-misses\0"
243 "l1d-prefetch-misses\0"
251 "l1i-cache-load-misses\0"
253 "l1i-cache-prefetch-misses\0"
254 "l1i-cache-prefetches\0"
255 "l1i-cache-read-misses\0"
259 "l1i-prefetch-misses\0"
263 "llc-cache-load-misses\0"
265 "llc-cache-prefetch-misses\0"
266 "llc-cache-prefetches\0"
267 "llc-cache-read-misses\0"
269 "llc-cache-store-misses\0"
271 "llc-cache-write-misses\0"
275 "llc-prefetch-misses\0"
288 "stalled-cycles-backend\0"
289 "stalled-cycles-frontend\0"
293static const Events eventlist[] = {
294 { 0, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, QTest::AlignmentFaults },
295 { 17, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions },
296 { 37, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses },
297 { 56, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions },
298 { 69, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses },
299 { 88, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, QTest::BranchMisses },
300 { 102, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions },
301 { 118, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses },
302 { 137, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions },
303 { 150, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions },
304 { 159, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, QTest::BusCycles },
305 { 170, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, QTest::CacheMisses },
306 { 183, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, QTest::CacheReferences },
307 { 200, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches },
308 { 217, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, QTest::WalltimeNanoseconds },
309 { 227, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles },
310 { 238, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations },
311 { 253, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches },
312 { 256, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles },
313 { 263, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, QTest::EmulationFaults },
314 { 280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults },
315 { 287, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles },
316 { 307, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles },
317 { 328, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, QTest::Instructions },
318 { 341, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
319 { 363, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
320 { 379, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches },
321 { 405, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches },
322 { 426, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
323 { 448, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
324 { 464, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
325 { 487, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
326 { 504, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
327 { 527, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
328 { 544, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
329 { 560, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
330 { 570, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches },
331 { 590, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches },
332 { 605, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
333 { 621, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
334 { 631, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
335 { 648, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
336 { 659, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
337 { 676, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
338 { 687, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
339 { 709, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
340 { 725, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches },
341 { 751, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches },
342 { 772, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
343 { 794, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
344 { 810, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
345 { 826, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
346 { 836, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches },
347 { 856, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches },
348 { 871, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
349 { 887, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
350 { 897, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
351 { 919, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
352 { 935, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches },
353 { 961, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches },
354 { 982, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
355 { 1004, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
356 { 1020, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
357 { 1043, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
358 { 1060, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
359 { 1083, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
360 { 1100, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
361 { 1116, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
362 { 1126, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches },
363 { 1146, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches },
364 { 1161, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
365 { 1177, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
366 { 1187, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
367 { 1204, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
368 { 1215, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
369 { 1232, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
370 { 1243, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, QTest::MajorPageFaults },
371 { 1256, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations },
372 { 1267, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, QTest::MinorPageFaults },
373 { 1280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults },
374 { 1292, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, QTest::RefCPUCycles },
375 { 1303, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles },
376 { 1326, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles },
377 { 1350, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, QTest::WalltimeNanoseconds },
381static QTest::QBenchmarkMetric metricForEvent(PerfEvent counter)
383 for (
const Events &ev : eventlist) {
384 if (ev.type == counter.type && ev.event_id == counter.config)
387 return QTest::Events;
390void QBenchmarkPerfEventsMeasurer::setCounter(
const char *name)
393 std::string_view input = name;
394 if (qsizetype idx = input.find(
':'); idx >= 0)
395 input = input.substr(0, idx);
397 while (!input.empty()) {
398 std::string_view countername = input;
399 if (qsizetype idx = countername.find(
','); idx >= 0)
400 countername = countername.substr(0, idx);
402 for (
const Events &ev : eventlist) {
403 int c = countername.compare(eventlist_strings + ev.offset);
407 fprintf(stderr,
"ERROR: Performance counter type '%.*s' is unknown\n",
408 int(countername.size()), countername.data());
411 eventTypes->append({ ev.type, ev.event_id });
415 if (countername.size() == input.size())
418 input.remove_prefix(countername.size() + 1);
427void QBenchmarkPerfEventsMeasurer::listCounters()
429 if (!isAvailable()) {
430 printf(
"Performance counters are not available on this system\n");
434 printf(
"The following performance counters are available:\n");
435 for (
const Events &ev : eventlist) {
436 printf(
" %-30s [%s]\n", eventlist_strings + ev.offset,
437 ev.type == PERF_TYPE_HARDWARE ?
"hardware" :
438 ev.type == PERF_TYPE_SOFTWARE ?
"software" :
439 ev.type == PERF_TYPE_HW_CACHE ?
"cache" :
"other");
443QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() =
default;
445QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer()
447 for (
int fd : std::as_const(fds))
451void QBenchmarkPerfEventsMeasurer::start()
453 QT_WARNING_DISABLE_GCC(
"-Wmissing-field-initializers")
454 QT_WARNING_DISABLE_CLANG(
"-Wmissing-field-initializers")
455 perf_event_attr attr = {
457 .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING,
461 .inherit_stat =
true,
465 QList<PerfEvent> &counters = *eventTypes;
466 if (counters.isEmpty())
467 counters = defaultCounters();
472 int flags = PERF_FLAG_FD_CLOEXEC;
474 fds.reserve(counters.size());
475 for (PerfEvent counter : std::as_const(counters)) {
476 attr.type = counter.type;
477 attr.config = counter.config;
478 int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
481 attr.exclude_kernel =
true;
482 attr.exclude_hv =
true;
483 fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
486 perror(
"QBenchmarkPerfEventsMeasurer::start: perf_event_open");
495 for (
int fd : std::as_const(fds))
496 ::ioctl(fd, PERF_EVENT_IOC_RESET);
497 prctl(PR_TASK_PERF_EVENTS_ENABLE);
500QList<QBenchmarkMeasurerBase::Measurement> QBenchmarkPerfEventsMeasurer::stop()
503 prctl(PR_TASK_PERF_EVENTS_DISABLE);
505 const QList<PerfEvent> &counters = *eventTypes;
506 QList<Measurement> result(counters.size(), {});
507 for (qsizetype i = 0; i < counters.size(); ++i) {
508 result[i] = readValue(i);
513bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(Measurement)
518int QBenchmarkPerfEventsMeasurer::adjustIterationCount(
int)
523int QBenchmarkPerfEventsMeasurer::adjustMedianCount(
int)
528static quint64 rawReadValue(
int fd)
531
532
533
534
535
536
537
541 quint64 time_enabled;
542 quint64 time_running;
546 while (nread <
sizeof results) {
547 char *ptr =
reinterpret_cast<
char *>(&results);
548 qint64 r = qt_safe_read(fd, ptr + nread,
sizeof results - nread);
550 perror(
"QBenchmarkPerfEventsMeasurer::readValue: reading the results");
556 if (results.time_running == results.time_enabled)
557 return results.value;
560 return results.value * (
double(results.time_running) /
double(results.time_enabled));
563QBenchmarkMeasurerBase::Measurement QBenchmarkPerfEventsMeasurer::readValue(qsizetype idx)
565 quint64 raw = rawReadValue(fds.at(idx));
566 return { qreal(qint64(raw)), metricForEvent(eventTypes->at(idx)) };