Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qbenchmarkperfevents.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 Intel Corporation.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
6#include "qbenchmark_p.h"
7
8#ifdef QTESTLIB_USE_PERF_EVENTS
9
10// include the qcore_unix_p.h without core-private
11// we only use inline functions anyway
12#include "../corelib/kernel/qcore_unix_p.h"
13
14#include <sys/types.h>
15#include <errno.h>
16#include <fcntl.h>
17#include <string.h>
18#include <stdio.h>
19
20#include <sys/ioctl.h>
21#include <sys/prctl.h>
22#include <sys/syscall.h>
23
24#include "3rdparty/linux/perf_event_p.h"
25
26// for PERF_TYPE_HW_CACHE, the config is a bitmask
27// lowest 8 bits: cache type
28// bits 8 to 15: cache operation
29// bits 16 to 23: cache result
30#define CACHE_L1D_READ (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
31#define CACHE_L1D_WRITE (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
32#define CACHE_L1D_PREFETCH (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
33#define CACHE_L1I_READ (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
34#define CACHE_L1I_PREFETCH (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
35#define CACHE_LLC_READ (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
36#define CACHE_LLC_WRITE (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8| PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
37#define CACHE_LLC_PREFETCH (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
38#define CACHE_L1D_READ_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
39#define CACHE_L1D_WRITE_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
40#define CACHE_L1D_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1D | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
41#define CACHE_L1I_READ_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
42#define CACHE_L1I_PREFETCH_MISS (PERF_COUNT_HW_CACHE_L1I | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
43#define CACHE_LLC_READ_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
44#define CACHE_LLC_WRITE_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_WRITE << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
45#define CACHE_LLC_PREFETCH_MISS (PERF_COUNT_HW_CACHE_LL | PERF_COUNT_HW_CACHE_OP_PREFETCH << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
46#define CACHE_BRANCH_READ (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
47#define CACHE_BRANCH_READ_MISS (PERF_COUNT_HW_CACHE_BPU | PERF_COUNT_HW_CACHE_OP_READ << 8 | PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
48
49QT_BEGIN_NAMESPACE
50
51struct PerfEvent
52{
53 quint32 type;
54 quint64 config;
55};
56Q_GLOBAL_STATIC(QList<PerfEvent>, eventTypes);
57
58static QList<PerfEvent> defaultCounters()
59{
60 return {
61 { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
62 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
63 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
64 { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
65 };
66}
67
68// This class does not exist in the API so it's qdoc comment marker was removed.
69
70/*
71 \class QBenchmarkPerfEvents
72 \brief The Linux perf events benchmark backend
73
74 This benchmark backend uses the Linux Performance Counters interface,
75 introduced with the Linux kernel v2.6.31. The interface is done by one
76 system call (perf_event_open) which takes an attribute structure and
77 returns a file descriptor.
78
79 More information:
80 \li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt>
81 \li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c>
82 (note: as of v3.3.1, the documentation is out-of-date with the kernel
83 interface, so reading the source code of existing tools is necessary)
84
85 This benchlib backend monitors the current process as well as child process
86 launched. We do not try to benchmark in kernel or hypervisor mode, as that
87 usually requires elevated privileges.
88 */
89
90static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags)
91{
92#ifdef SYS_perf_event_open
93 // syscall() returns long, but perf_event_open() is used to get a file descriptor
94 return int(syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags));
95#else
96 Q_UNUSED(attr);
97 Q_UNUSED(pid);
98 Q_UNUSED(cpu);
99 Q_UNUSED(group_fd);
100 Q_UNUSED(flags);
101 errno = ENOSYS;
102 return -1;
103#endif
104}
105
106bool QBenchmarkPerfEventsMeasurer::isAvailable()
107{
108 // this generates an EFAULT because attr == NULL if perf_event_open is available
109 // if the kernel is too old, it generates ENOSYS
110 return perf_event_open(nullptr, 0, 0, 0, 0) == -1 && errno != ENOSYS;
111}
112
113/* Event list structure
114 The following table provides the list of supported events
115
116 Event type Event counter Unit Name and aliases
117 HARDWARE CPU_CYCLES CPUCycles cycles cpu-cycles
118 HARDWARE REF_CPU_CYCLES RefCPUCycles ref-cycles
119 HARDWARE INSTRUCTIONS Instructions instructions
120 HARDWARE CACHE_REFERENCES CacheReferences cache-references
121 HARDWARE CACHE_MISSES CacheMisses cache-misses
122 HARDWARE BRANCH_INSTRUCTIONS BranchInstructions branch-instructions branches
123 HARDWARE BRANCH_MISSES BranchMisses branch-misses
124 HARDWARE BUS_CYCLES BusCycles bus-cycles
125 HARDWARE STALLED_CYCLES_FRONTEND StalledCycles stalled-cycles-frontend idle-cycles-frontend
126 HARDWARE STALLED_CYCLES_BACKEND StalledCycles stalled-cycles-backend idle-cycles-backend
127 SOFTWARE CPU_CLOCK WalltimeNanoseconds cpu-clock
128 SOFTWARE TASK_CLOCK WalltimeNanoseconds task-clock
129 SOFTWARE PAGE_FAULTS PageFaults page-faults faults
130 SOFTWARE PAGE_FAULTS_MAJ MajorPageFaults major-faults
131 SOFTWARE PAGE_FAULTS_MIN MinorPageFaults minor-faults
132 SOFTWARE CONTEXT_SWITCHES ContextSwitches context-switches cs
133 SOFTWARE CPU_MIGRATIONS CPUMigrations cpu-migrations migrations
134 SOFTWARE ALIGNMENT_FAULTS AlignmentFaults alignment-faults
135 SOFTWARE EMULATION_FAULTS EmulationFaults emulation-faults
136 HW_CACHE L1D_READ CacheReads l1d-cache-reads l1d-cache-loads l1d-reads l1d-loads
137 HW_CACHE L1D_WRITE CacheWrites l1d-cache-writes l1d-cache-stores l1d-writes l1d-stores
138 HW_CACHE L1D_PREFETCH CachePrefetches l1d-cache-prefetches l1d-prefetches
139 HW_CACHE L1I_READ CacheReads l1i-cache-reads l1i-cache-loads l1i-reads l1i-loads
140 HW_CACHE L1I_PREFETCH CachePrefetches l1i-cache-prefetches l1i-prefetches
141 HW_CACHE LLC_READ CacheReads llc-cache-reads llc-cache-loads llc-loads llc-reads
142 HW_CACHE LLC_WRITE CacheWrites llc-cache-writes llc-cache-stores llc-writes llc-stores
143 HW_CACHE LLC_PREFETCH CachePrefetches llc-cache-prefetches llc-prefetches
144 HW_CACHE L1D_READ_MISS CacheReads l1d-cache-read-misses l1d-cache-load-misses l1d-read-misses l1d-load-misses
145 HW_CACHE L1D_WRITE_MISS CacheWrites l1d-cache-write-misses l1d-cache-store-misses l1d-write-misses l1d-store-misses
146 HW_CACHE L1D_PREFETCH_MISS CachePrefetches l1d-cache-prefetch-misses l1d-prefetch-misses
147 HW_CACHE L1I_READ_MISS CacheReads l1i-cache-read-misses l1i-cache-load-misses l1i-read-misses l1i-load-misses
148 HW_CACHE L1I_PREFETCH_MISS CachePrefetches l1i-cache-prefetch-misses l1i-prefetch-misses
149 HW_CACHE LLC_READ_MISS CacheReads llc-cache-read-misses llc-cache-load-misses llc-read-misses llc-load-misses
150 HW_CACHE LLC_WRITE_MISS CacheWrites llc-cache-write-misses llc-cache-store-misses llc-write-misses llc-store-misses
151 HW_CACHE LLC_PREFETCH_MISS CachePrefetches llc-cache-prefetch-misses llc-prefetch-misses
152 HW_CACHE BRANCH_READ BranchInstructions branch-reads branch-loads branch-predicts
153 HW_CACHE BRANCH_READ_MISS BranchMisses branch-mispredicts branch-read-misses branch-load-misses
154
155 Use the following Perl script to re-generate the list
156=== cut perl ===
157#!/usr/bin/env perl
158# Load all entries into %map
159while (<STDIN>) {
160 m/^\s*(.*)\s*$/;
161 @_ = split /\s+/, $1;
162 $type = shift @_;
163 $id = ($type eq "HARDWARE" ? "PERF_COUNT_HW_" :
164 $type eq "SOFTWARE" ? "PERF_COUNT_SW_" :
165 $type eq "HW_CACHE" ? "CACHE_" : "") . shift @_;
166 $unit = shift @_;
167
168 for $string (@_) {
169 die "$string was already seen!" if defined($map{$string});
170 $map{$string} = [-1, $type, $id, $unit];
171 push @strings, $string;
172 }
173}
174
175# sort the map and print the string list
176@strings = sort @strings;
177print "static const char eventlist_strings[] = \n";
178$counter = 0;
179for $entry (@strings) {
180 print " \"$entry\\0\"\n";
181 $map{$entry}[0] = $counter;
182 $counter += 1 + length $entry;
183}
184
185# print the table
186print " \"\\0\";\n\nstatic const Events eventlist[] = {\n";
187for $entry (sort @strings) {
188 printf " { %3d, PERF_TYPE_%s, %s, QTest::%s },\n",
189 $map{$entry}[0],
190 $map{$entry}[1],
191 $map{$entry}[2],
192 $map{$entry}[3];
193}
194print "};\n";
195=== cut perl ===
196*/
197
198struct Events {
199 unsigned offset;
200 quint32 type;
201 quint64 event_id;
202 QTest::QBenchmarkMetric metric;
203};
204
205/* -- BEGIN GENERATED CODE -- */
206static const char eventlist_strings[] =
207 "alignment-faults\0"
208 "branch-instructions\0"
209 "branch-load-misses\0"
210 "branch-loads\0"
211 "branch-mispredicts\0"
212 "branch-misses\0"
213 "branch-predicts\0"
214 "branch-read-misses\0"
215 "branch-reads\0"
216 "branches\0"
217 "bus-cycles\0"
218 "cache-misses\0"
219 "cache-references\0"
220 "context-switches\0"
221 "cpu-clock\0"
222 "cpu-cycles\0"
223 "cpu-migrations\0"
224 "cs\0"
225 "cycles\0"
226 "emulation-faults\0"
227 "faults\0"
228 "idle-cycles-backend\0"
229 "idle-cycles-frontend\0"
230 "instructions\0"
231 "l1d-cache-load-misses\0"
232 "l1d-cache-loads\0"
233 "l1d-cache-prefetch-misses\0"
234 "l1d-cache-prefetches\0"
235 "l1d-cache-read-misses\0"
236 "l1d-cache-reads\0"
237 "l1d-cache-store-misses\0"
238 "l1d-cache-stores\0"
239 "l1d-cache-write-misses\0"
240 "l1d-cache-writes\0"
241 "l1d-load-misses\0"
242 "l1d-loads\0"
243 "l1d-prefetch-misses\0"
244 "l1d-prefetches\0"
245 "l1d-read-misses\0"
246 "l1d-reads\0"
247 "l1d-store-misses\0"
248 "l1d-stores\0"
249 "l1d-write-misses\0"
250 "l1d-writes\0"
251 "l1i-cache-load-misses\0"
252 "l1i-cache-loads\0"
253 "l1i-cache-prefetch-misses\0"
254 "l1i-cache-prefetches\0"
255 "l1i-cache-read-misses\0"
256 "l1i-cache-reads\0"
257 "l1i-load-misses\0"
258 "l1i-loads\0"
259 "l1i-prefetch-misses\0"
260 "l1i-prefetches\0"
261 "l1i-read-misses\0"
262 "l1i-reads\0"
263 "llc-cache-load-misses\0"
264 "llc-cache-loads\0"
265 "llc-cache-prefetch-misses\0"
266 "llc-cache-prefetches\0"
267 "llc-cache-read-misses\0"
268 "llc-cache-reads\0"
269 "llc-cache-store-misses\0"
270 "llc-cache-stores\0"
271 "llc-cache-write-misses\0"
272 "llc-cache-writes\0"
273 "llc-load-misses\0"
274 "llc-loads\0"
275 "llc-prefetch-misses\0"
276 "llc-prefetches\0"
277 "llc-read-misses\0"
278 "llc-reads\0"
279 "llc-store-misses\0"
280 "llc-stores\0"
281 "llc-write-misses\0"
282 "llc-writes\0"
283 "major-faults\0"
284 "migrations\0"
285 "minor-faults\0"
286 "page-faults\0"
287 "ref-cycles\0"
288 "stalled-cycles-backend\0"
289 "stalled-cycles-frontend\0"
290 "task-clock\0"
291 "\0";
292
293static const Events eventlist[] = {
294 { 0, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, QTest::AlignmentFaults },
295 { 17, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions },
296 { 37, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses },
297 { 56, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions },
298 { 69, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses },
299 { 88, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, QTest::BranchMisses },
300 { 102, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions },
301 { 118, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ_MISS, QTest::BranchMisses },
302 { 137, PERF_TYPE_HW_CACHE, CACHE_BRANCH_READ, QTest::BranchInstructions },
303 { 150, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions },
304 { 159, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, QTest::BusCycles },
305 { 170, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, QTest::CacheMisses },
306 { 183, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, QTest::CacheReferences },
307 { 200, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches },
308 { 217, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, QTest::WalltimeNanoseconds },
309 { 227, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles },
310 { 238, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations },
311 { 253, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches },
312 { 256, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles },
313 { 263, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, QTest::EmulationFaults },
314 { 280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults },
315 { 287, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles },
316 { 307, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles },
317 { 328, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, QTest::Instructions },
318 { 341, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
319 { 363, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
320 { 379, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches },
321 { 405, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches },
322 { 426, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
323 { 448, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
324 { 464, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
325 { 487, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
326 { 504, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
327 { 527, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
328 { 544, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
329 { 560, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
330 { 570, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH_MISS, QTest::CachePrefetches },
331 { 590, PERF_TYPE_HW_CACHE, CACHE_L1D_PREFETCH, QTest::CachePrefetches },
332 { 605, PERF_TYPE_HW_CACHE, CACHE_L1D_READ_MISS, QTest::CacheReads },
333 { 621, PERF_TYPE_HW_CACHE, CACHE_L1D_READ, QTest::CacheReads },
334 { 631, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
335 { 648, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
336 { 659, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE_MISS, QTest::CacheWrites },
337 { 676, PERF_TYPE_HW_CACHE, CACHE_L1D_WRITE, QTest::CacheWrites },
338 { 687, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
339 { 709, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
340 { 725, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches },
341 { 751, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches },
342 { 772, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
343 { 794, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
344 { 810, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
345 { 826, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
346 { 836, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH_MISS, QTest::CachePrefetches },
347 { 856, PERF_TYPE_HW_CACHE, CACHE_L1I_PREFETCH, QTest::CachePrefetches },
348 { 871, PERF_TYPE_HW_CACHE, CACHE_L1I_READ_MISS, QTest::CacheReads },
349 { 887, PERF_TYPE_HW_CACHE, CACHE_L1I_READ, QTest::CacheReads },
350 { 897, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
351 { 919, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
352 { 935, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches },
353 { 961, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches },
354 { 982, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
355 { 1004, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
356 { 1020, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
357 { 1043, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
358 { 1060, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
359 { 1083, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
360 { 1100, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
361 { 1116, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
362 { 1126, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH_MISS, QTest::CachePrefetches },
363 { 1146, PERF_TYPE_HW_CACHE, CACHE_LLC_PREFETCH, QTest::CachePrefetches },
364 { 1161, PERF_TYPE_HW_CACHE, CACHE_LLC_READ_MISS, QTest::CacheReads },
365 { 1177, PERF_TYPE_HW_CACHE, CACHE_LLC_READ, QTest::CacheReads },
366 { 1187, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
367 { 1204, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
368 { 1215, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE_MISS, QTest::CacheWrites },
369 { 1232, PERF_TYPE_HW_CACHE, CACHE_LLC_WRITE, QTest::CacheWrites },
370 { 1243, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, QTest::MajorPageFaults },
371 { 1256, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations },
372 { 1267, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, QTest::MinorPageFaults },
373 { 1280, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults },
374 { 1292, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, QTest::RefCPUCycles },
375 { 1303, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles },
376 { 1326, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles },
377 { 1350, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, QTest::WalltimeNanoseconds },
378};
379/* -- END GENERATED CODE -- */
380
381static QTest::QBenchmarkMetric metricForEvent(PerfEvent counter)
382{
383 for (const Events &ev : eventlist) {
384 if (ev.type == counter.type && ev.event_id == counter.config)
385 return ev.metric;
386 }
387 return QTest::Events;
388}
389
390void QBenchmarkPerfEventsMeasurer::setCounter(const char *name)
391{
392 eventTypes->clear();
393 std::string_view input = name;
394 if (qsizetype idx = input.find(':'); idx >= 0)
395 input = input.substr(0, idx);
396
397 while (!input.empty()) {
398 std::string_view countername = input;
399 if (qsizetype idx = countername.find(','); idx >= 0)
400 countername = countername.substr(0, idx);
401
402 for (const Events &ev : eventlist) {
403 int c = countername.compare(eventlist_strings + ev.offset);
404 if (c > 0)
405 continue;
406 if (c < 0) {
407 fprintf(stderr, "ERROR: Performance counter type '%.*s' is unknown\n",
408 int(countername.size()), countername.data());
409 exit(1);
410 }
411 eventTypes->append({ ev.type, ev.event_id });
412 break;
413 }
414
415 if (countername.size() == input.size())
416 input = {};
417 else
418 input.remove_prefix(countername.size() + 1);
419 }
420
421 // We used to support attributes, but our code was the opposite of what
422 // perf(1) does, plus QBenchlib isn't exactly expected to be used to
423 // profile Linux kernel code or launch guest VMs as part of the workload.
424 // So we keep accepting the colon as a delimiter but ignore it.
425}
426
427void QBenchmarkPerfEventsMeasurer::listCounters()
428{
429 if (!isAvailable()) {
430 printf("Performance counters are not available on this system\n");
431 return;
432 }
433
434 printf("The following performance counters are available:\n");
435 for (const Events &ev : eventlist) {
436 printf(" %-30s [%s]\n", eventlist_strings + ev.offset,
437 ev.type == PERF_TYPE_HARDWARE ? "hardware" :
438 ev.type == PERF_TYPE_SOFTWARE ? "software" :
439 ev.type == PERF_TYPE_HW_CACHE ? "cache" : "other");
440 }
441}
442
443QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() = default;
444
445QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer()
446{
447 for (int fd : std::as_const(fds))
448 qt_safe_close(fd);
449}
450
451void QBenchmarkPerfEventsMeasurer::start()
452{
453 QT_WARNING_DISABLE_GCC("-Wmissing-field-initializers")
454 QT_WARNING_DISABLE_CLANG("-Wmissing-field-initializers")
455 perf_event_attr attr = {
456 .size = sizeof attr,
457 .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING,
458 .disabled = true, // we'll enable later
459 .inherit = true, // let children processes inherit the monitoring
460 .pinned = true, // keep it running in the hardware
461 .inherit_stat = true, // aggregate all the info from child processes
462 .task = true, // trace fork/exits
463 };
464
465 QList<PerfEvent> &counters = *eventTypes;
466 if (counters.isEmpty())
467 counters = defaultCounters();
468 if (fds.isEmpty()) {
469 pid_t pid = 0; // attach to the current process only
470 int cpu = -1; // on any CPU
471 int group_fd = -1;
472 int flags = PERF_FLAG_FD_CLOEXEC;
473
474 fds.reserve(counters.size());
475 for (PerfEvent counter : std::as_const(counters)) {
476 attr.type = counter.type;
477 attr.config = counter.config;
478 int fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
479 if (fd == -1) {
480 // probably a paranoid kernel (/proc/sys/kernel/perf_event_paranoid)
481 attr.exclude_kernel = true;
482 attr.exclude_hv = true;
483 fd = perf_event_open(&attr, pid, cpu, group_fd, flags);
484 }
485 if (fd == -1) {
486 perror("QBenchmarkPerfEventsMeasurer::start: perf_event_open");
487 exit(1);
488 }
489
490 fds.append(fd);
491 }
492 }
493
494 // enable the counters
495 for (int fd : std::as_const(fds))
496 ::ioctl(fd, PERF_EVENT_IOC_RESET);
497 prctl(PR_TASK_PERF_EVENTS_ENABLE);
498}
499
500QList<QBenchmarkMeasurerBase::Measurement> QBenchmarkPerfEventsMeasurer::stop()
501{
502 // disable the counters
503 prctl(PR_TASK_PERF_EVENTS_DISABLE);
504
505 const QList<PerfEvent> &counters = *eventTypes;
506 QList<Measurement> result(counters.size(), {});
507 for (qsizetype i = 0; i < counters.size(); ++i) {
508 result[i] = readValue(i);
509 }
510 return result;
511}
512
513bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(Measurement)
514{
515 return true;
516}
517
518int QBenchmarkPerfEventsMeasurer::adjustIterationCount(int)
519{
520 return 1;
521}
522
523int QBenchmarkPerfEventsMeasurer::adjustMedianCount(int)
524{
525 return 1;
526}
527
528static quint64 rawReadValue(int fd)
529{
530 /* from the kernel docs:
531 * struct read_format {
532 * { u64 value;
533 * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
534 * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
535 * { u64 id; } && PERF_FORMAT_ID
536 * } && !PERF_FORMAT_GROUP
537 */
538
539 struct read_format {
540 quint64 value;
541 quint64 time_enabled;
542 quint64 time_running;
543 } results;
544
545 size_t nread = 0;
546 while (nread < sizeof results) {
547 char *ptr = reinterpret_cast<char *>(&results);
548 qint64 r = qt_safe_read(fd, ptr + nread, sizeof results - nread);
549 if (r < 0) {
550 perror("QBenchmarkPerfEventsMeasurer::readValue: reading the results");
551 exit(1);
552 }
553 nread += quint64(r);
554 }
555
556 if (results.time_running == results.time_enabled)
557 return results.value;
558
559 // scale the results, though this shouldn't happen!
560 return results.value * (double(results.time_running) / double(results.time_enabled));
561}
562
563QBenchmarkMeasurerBase::Measurement QBenchmarkPerfEventsMeasurer::readValue(qsizetype idx)
564{
565 quint64 raw = rawReadValue(fds.at(idx));
566 return { qreal(qint64(raw)), metricForEvent(eventTypes->at(idx)) };
567}
568
569QT_END_NAMESPACE
570
571#endif