Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qsimd.cpp
Go to the documentation of this file.
1// Copyright (C) 2021 The Qt Company Ltd.
2// Copyright (C) 2022 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5// we need ICC to define the prototype for _rdseed64_step
6#define __INTEL_COMPILER_USE_INTRINSIC_PROTOTYPES
7#undef _FORTIFY_SOURCE // otherwise, the always_inline from stdio.h fail to inline
8
9#include "qsimd_p.h"
10#include "qalgorithms.h"
11
12#include <stdio.h>
13#include <string.h>
14
15#if defined(QT_NO_DEBUG) && !defined(NDEBUG)
16# define NDEBUG
17#endif
18#include <assert.h>
19
20#ifdef Q_OS_LINUX
21# include "../testlib/3rdparty/valgrind/valgrind_p.h"
22#endif
23
24#define QT_FUNCTION_TARGET_BASELINE
25
26#if defined(Q_OS_WIN)
27# if !defined(Q_CC_GNU)
28# include <intrin.h>
29# endif
30# if defined(Q_PROCESSOR_ARM_64)
31# include <qt_windows.h>
32# include <processthreadsapi.h>
33# endif
34#elif defined(Q_OS_LINUX) && defined(Q_PROCESSOR_MIPS_32)
35# include "private/qcore_unix_p.h"
36#elif QT_CONFIG(getauxval) && (defined(Q_PROCESSOR_ARM) || defined(Q_PROCESSOR_LOONGARCH))
37# include <sys/auxv.h>
38
39// the kernel header definitions for HWCAP_*
40// (the ones we need/may need anyway)
41
42// copied from <asm/hwcap.h> (ARM)
43#define HWCAP_NEON 4096
44
45// copied from <asm/hwcap.h> (ARM):
46#define HWCAP2_AES (1 << 0)
47#define HWCAP2_CRC32 (1 << 4)
48
49// copied from <asm/hwcap.h> (Aarch64)
50#define HWCAP_AES (1 << 3)
51#define HWCAP_CRC32 (1 << 7)
52#define HWCAP_SVE (1 << 22)
53
54// copied from <linux/auxvec.h>
55#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
56#define AT_HWCAP2 26 /* extension of AT_HWCAP */
57
58#elif defined(Q_CC_GHS)
59# include <INTEGRITY_types.h>
60#elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM)
61# include <sys/sysctl.h>
62#endif
63
64QT_BEGIN_NAMESPACE
65
66template <typename T, uint N> QT_FUNCTION_TARGET_BASELINE
67uint arraysize(T (&)[N])
68{
69 // Same as std::size, but with QT_FUNCTION_TARGET_BASELIE,
70 // otherwise some versions of GCC fail to compile.
71 return N;
72}
73
74#if defined(Q_PROCESSOR_ARM)
75/* Data:
76 neon
77 crc32
78 aes
79 sve
80 */
81static const char features_string[] =
82 "\0"
83 " neon\0"
84 " crc32\0"
85 " aes\0"
86 " sve\0";
87static const int features_indices[] = { 0, 1, 7, 14, 19 };
88#elif defined(Q_PROCESSOR_MIPS)
89/* Data:
90 dsp
91 dspr2
92*/
93static const char features_string[] =
94 "\0"
95 " dsp\0"
96 " dspr2\0";
97
98static const int features_indices[] = {
99 0, 1, 6
100};
101#elif defined(Q_PROCESSOR_LOONGARCH)
102/* Data:
103 lsx
104 lasx
105*/
106static const char features_string[] =
107 "\0"
108 " lsx\0"
109 " lasx\0";
110
111static const int features_indices[] = {
112 0, 1, 6
113};
114#elif defined(Q_PROCESSOR_X86)
115# include "qsimd_x86.cpp" // generated by util/x86simdgen
116#else
117static const char features_string[] = "";
118static const int features_indices[] = { 0 };
119#endif
120// end generated
121
122#if defined(Q_PROCESSOR_ARM)
123static inline quint64 detectProcessorFeatures()
124{
125 quint64 features = 0;
126
127#if QT_CONFIG(getauxval)
128 unsigned long auxvHwCap = getauxval(AT_HWCAP);
129 if (auxvHwCap != 0) {
130# if defined(Q_PROCESSOR_ARM_64)
131 // For Aarch64:
132 features |= CpuFeatureNEON; // NEON is always available
133 if (auxvHwCap & HWCAP_CRC32)
134 features |= CpuFeatureCRC32;
135 if (auxvHwCap & HWCAP_AES)
136 features |= CpuFeatureAES;
137 if (auxvHwCap & HWCAP_SVE)
138 features |= CpuFeatureSVE;
139# else
140 // For ARM32:
141 if (auxvHwCap & HWCAP_NEON)
142 features |= CpuFeatureNEON;
143 auxvHwCap = getauxval(AT_HWCAP2);
144 if (auxvHwCap & HWCAP2_CRC32)
145 features |= CpuFeatureCRC32;
146 if (auxvHwCap & HWCAP2_AES)
147 features |= CpuFeatureAES;
148# endif
149 return features;
150 }
151 // fall back to compile-time flags if getauxval failed
152#elif defined(Q_OS_DARWIN) && defined(Q_PROCESSOR_ARM)
153 unsigned feature;
154 size_t len = sizeof(feature);
155 Q_UNUSED(len);
156#if defined(__ARM_NEON)
157 features |= CpuFeatureNEON;
158#else
159 #error "Misconfiguration, NEON should always be enabled on Apple hardware"
160#endif
161#if defined(__ARM_FEATURE_CRC32)
162 features |= CpuFeatureCRC32;
163#elif defined(Q_OS_MACOS)
164 #error "Misconfiguration, CRC32 should always be enabled on Apple desktop hardware"
165#else
166 if (sysctlbyname("hw.optional.armv8_crc32", &feature, &len, nullptr, 0) == 0)
167 features |= feature ? CpuFeatureCRC32 : 0;
168#endif
169#if defined(__ARM_FEATURE_CRYPTO)
170 features |= CpuFeatureAES;
171#elif defined(Q_OS_MACOS)
172 #error "Misconfiguration, CRYPTO/AES should always be enabled on Apple desktop hardware"
173#else
174 if (sysctlbyname("hw.optional.arm.FEAT_AES", &feature, &len, nullptr, 0) == 0)
175 features |= feature ? CpuFeatureAES : 0;
176#endif
177#if defined(__ARM_FEATURE_SVE)
178 features |= CpuFeatureSVE;
179#else
180 if (sysctlbyname("hw.optional.arm.FEAT_SVE", &feature, &len, nullptr, 0) == 0)
181 features |= feature ? CpuFeatureSVE : 0;
182#endif
183 return features;
184#elif defined(Q_OS_WIN) && defined(Q_PROCESSOR_ARM_64)
185 features |= CpuFeatureNEON;
186 if (IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0)
187 features |= CpuFeatureCRC32;
188 if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0)
189 features |= CpuFeatureAES;
190 return features;
191#endif
192#if defined(__ARM_NEON__)
193 features |= CpuFeatureNEON;
194#endif
195#if defined(__ARM_FEATURE_CRC32)
196 features |= CpuFeatureCRC32;
197#endif
198#if defined(__ARM_FEATURE_CRYPTO)
199 features |= CpuFeatureAES;
200#endif
201#if defined(__ARM_FEATURE_SVE)
202 features |= CpuFeatureSVE;
203#endif
204
205 return features;
206}
207
208#elif defined(Q_PROCESSOR_LOONGARCH)
209static inline quint64 detectProcessorFeatures()
210{
211 quint64 features = 0;
212# if QT_CONFIG(getauxval)
213 quint64 hwcap = getauxval(AT_HWCAP);
214
215 if (hwcap & HWCAP_LOONGARCH_LSX)
216 features |= CpuFeatureLSX;
217 if (hwcap & HWCAP_LOONGARCH_LASX)
218 features |= CpuFeatureLASX;
219# else
220 enum LoongArchFeatures {
221 LOONGARCH_CFG2 = 0x2,
222 LOONGARCH_CFG2_LSX = (1 << 6),
223 LOONGARCH_CFG2_LASX = (1 << 7)
224 };
225
226 quint64 reg = 0;
227
228 __asm__ volatile(
229 "cpucfg %0, %1 \n\t"
230 : "+&r"(reg)
231 : "r"(LOONGARCH_CFG2)
232 );
233
234 if (reg & LOONGARCH_CFG2_LSX)
235 features |= CpuFeatureLSX;
236 if (reg & LOONGARCH_CFG2_LASX)
237 features |= CpuFeatureLASX;
238# endif
239 return features;
240}
241
242#elif defined(Q_PROCESSOR_X86)
243
244#ifdef Q_PROCESSOR_X86_32
245# define PICreg "%%ebx"
246#else
247# define PICreg "%%rbx"
248#endif
249#ifdef __SSE2_MATH__
250# define X86_BASELINE "no-sse3"
251#else
252# define X86_BASELINE "no-sse"
253#endif
254
255#if defined(Q_CC_GNU) || defined(Q_CC_CLANG)
256// lower the target for functions in this file
257# undef QT_FUNCTION_TARGET_BASELINE
258# define QT_FUNCTION_TARGET_BASELINE __attribute__((target(X86_BASELINE)))
259# define QT_FUNCTION_TARGET_STRING_BASELINE_RDRND
260 X86_BASELINE "," QT_FUNCTION_TARGET_STRING_RDRND
261#endif
262
263static bool checkRdrndWorks() noexcept;
264
265QT_FUNCTION_TARGET_BASELINE
266static int maxBasicCpuidSupported()
267{
268#if defined(Q_CC_EMSCRIPTEN)
269 return 6; // All features supported by Emscripten
270#elif defined(Q_CC_GNU)
271 qregisterint tmp1;
272
273# if Q_PROCESSOR_X86 < 5
274 // check if the CPUID instruction is supported
275 long cpuid_supported;
276 asm ("pushf\n"
277 "pop %0\n"
278 "mov %0, %1\n"
279 "xor $0x00200000, %0\n"
280 "push %0\n"
281 "popf\n"
282 "pushf\n"
283 "pop %0\n"
284 "xor %1, %0\n" // %eax is now 0 if CPUID is not supported
285 : "=a" (cpuid_supported), "=r" (tmp1)
286 );
287 if (!cpuid_supported)
288 return 0;
289# endif
290
291 int result;
292 asm ("xchg " PICreg", %1\n"
293 "cpuid\n"
294 "xchg " PICreg", %1\n"
295 : "=&a" (result), "=&r" (tmp1)
296 : "0" (0)
297 : "ecx", "edx");
298 return result;
299#elif defined(Q_OS_WIN)
300 // Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
301 int info[4];
302 __cpuid(info, 0);
303 return info[0];
304#elif defined(Q_CC_GHS)
305 unsigned int info[4];
306 __CPUID(0, info);
307 return info[0];
308#else
309 return 0;
310#endif
311}
312
313QT_FUNCTION_TARGET_BASELINE
314static void cpuidFeatures01(uint &ecx, uint &edx)
315{
316#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
317 qregisterint tmp1;
318 asm ("xchg " PICreg", %2\n"
319 "cpuid\n"
320 "xchg " PICreg", %2\n"
321 : "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
322 : "a" (1));
323#elif defined(Q_OS_WIN)
324 int info[4];
325 __cpuid(info, 1);
326 ecx = info[2];
327 edx = info[3];
328#elif defined(Q_CC_GHS)
329 unsigned int info[4];
330 __CPUID(1, info);
331 ecx = info[2];
332 edx = info[3];
333#else
334 Q_UNUSED(ecx);
335 Q_UNUSED(edx);
336#endif
337}
338
339#ifdef Q_OS_WIN
340inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
341#endif
342
343QT_FUNCTION_TARGET_BASELINE
344static void cpuidFeatures07_00(uint &ebx, uint &ecx, uint &edx)
345{
346#if defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)
347 qregisteruint rbx; // in case it's 64-bit
348 qregisteruint rcx = 0;
349 qregisteruint rdx = 0;
350 asm ("xchg " PICreg", %0\n"
351 "cpuid\n"
352 "xchg " PICreg", %0\n"
353 : "=&r" (rbx), "+&c" (rcx), "+&d" (rdx)
354 : "a" (7));
355 ebx = rbx;
356 ecx = rcx;
357 edx = rdx;
358#elif defined(Q_OS_WIN)
359 int info[4];
360 __cpuidex(info, 7, 0);
361 ebx = info[1];
362 ecx = info[2];
363 edx = info[3];
364#elif defined(Q_CC_GHS)
365 unsigned int info[4];
366 __CPUIDEX(7, 0, info);
367 ebx = info[1];
368 ecx = info[2];
369 edx = info[3];
370#else
371 Q_UNUSED(ebx);
372 Q_UNUSED(ecx);
373 Q_UNUSED(edx);
374#endif
375}
376
377QT_FUNCTION_TARGET_BASELINE
378#if defined(Q_OS_WIN) && !(defined(Q_CC_GNU) || defined(Q_CC_GHS))
379// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int);
380inline quint64 _xgetbv(__int64) { return 0; }
381#endif
382static void xgetbv(uint in, uint &eax, uint &edx)
383{
384#if (defined(Q_CC_GNU) && !defined(Q_CC_EMSCRIPTEN)) || defined(Q_CC_GHS)
385 asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
386 : "=a" (eax), "=d" (edx)
387 : "c" (in));
388#elif defined(Q_OS_WIN)
389 quint64 result = _xgetbv(in);
390 eax = result;
391 edx = result >> 32;
392#else
393 Q_UNUSED(in);
394 Q_UNUSED(eax);
395 Q_UNUSED(edx);
396#endif
397}
398
399QT_FUNCTION_TARGET_BASELINE
400static quint64 adjustedXcr0(quint64 xcr0)
401{
402 /*
403 * Some OSes hide their capability of context-switching the AVX512 state in
404 * the XCR0 register. They do that so the first time we execute an
405 * instruction that may access the AVX512 state (requiring the EVEX prefix)
406 * they allocate the necessary context switch space.
407 *
408 * This behavior is deprecated with the XFD (Extended Feature Disable)
409 * register, but we can't change existing OSes.
410 */
411#ifdef Q_OS_DARWIN
412 // from <machine/cpu_capabilities.h> in xnu
413 // <https://github.com/apple/darwin-xnu/blob/xnu-4903.221.2/osfmk/i386/cpu_capabilities.h>
414 constexpr quint64 kHasAVX512F = Q_UINT64_C(0x0000004000000000);
415 constexpr quintptr commpage = sizeof(void *) > 4 ? Q_UINT64_C(0x00007fffffe00000) : 0xffff0000;
416 constexpr quintptr cpu_capabilities64 = commpage + 0x10;
417 quint64 capab = *reinterpret_cast<quint64 *>(cpu_capabilities64);
418 if (capab & kHasAVX512F)
419 xcr0 |= XSave_Avx512State;
420#endif
421
422 return xcr0;
423}
424
425QT_FUNCTION_TARGET_BASELINE
426static quint64 detectProcessorFeatures()
427{
428 quint64 features = 0;
429 int cpuidLevel = maxBasicCpuidSupported();
430#if Q_PROCESSOR_X86 < 5
431 if (cpuidLevel < 1)
432 return 0;
433#else
434 assert(cpuidLevel >= 1);
435#endif
436
437 uint results[X86CpuidMaxLeaf] = {};
438 cpuidFeatures01(results[Leaf01ECX], results[Leaf01EDX]);
439 if (cpuidLevel >= 7)
440 cpuidFeatures07_00(results[Leaf07_00EBX], results[Leaf07_00ECX], results[Leaf07_00EDX]);
441
442 // populate our feature list
443 for (uint i = 0; i < arraysize(x86_locators); ++i) {
444 uint word = x86_locators[i] / 32;
445 uint bit = 1U << (x86_locators[i] % 32);
446 quint64 feature = Q_UINT64_C(1) << i;
447 if (results[word] & bit)
448 features |= feature;
449 }
450
451 // now check the AVX state
452 quint64 xcr0 = 0;
453 if (results[Leaf01ECX] & (1u << 27)) {
454 // XGETBV enabled
455 uint xgetbvA = 0, xgetbvD = 0;
456 xgetbv(0, xgetbvA, xgetbvD);
457
458 xcr0 = xgetbvA;
459 if (sizeof(XSaveBits) > sizeof(xgetbvA))
460 xcr0 |= quint64(xgetbvD) << 32;
461 xcr0 = adjustedXcr0(xcr0);
462 }
463
464 for (auto req : xsave_requirements) {
465 if ((xcr0 & req.xsave_state) != req.xsave_state)
466 features &= ~req.cpu_features;
467 }
468
469 if (features & CpuFeatureRDRND && !checkRdrndWorks())
470 features &= ~(CpuFeatureRDRND | CpuFeatureRDSEED);
471
472 return features;
473}
474
475#elif defined(Q_PROCESSOR_MIPS_32)
476
477#if defined(Q_OS_LINUX)
478//
479// Do not use QByteArray: it could use SIMD instructions itself at
480// some point, thus creating a recursive dependency. Instead, use a
481// QSimpleBuffer, which has the bare minimum needed to use memory
482// dynamically and read lines from /proc/cpuinfo of arbitrary sizes.
483//
484struct QSimpleBuffer
485{
486 static const int chunk_size = 256;
487 char *data;
488 unsigned alloc;
489 unsigned size;
490
491 QSimpleBuffer() : data(nullptr), alloc(0), size(0) { }
492 ~QSimpleBuffer() { ::free(data); }
493
494 void resize(unsigned newsize)
495 {
496 if (newsize > alloc) {
497 unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1);
498 if (newalloc < newsize)
499 newalloc = newsize;
500 if (newalloc != alloc) {
501 data = static_cast<char *>(::realloc(data, newalloc));
502 alloc = newalloc;
503 }
504 }
505 size = newsize;
506 }
507 void append(const QSimpleBuffer &other, unsigned appendsize)
508 {
509 unsigned oldsize = size;
510 resize(oldsize + appendsize);
511 ::memcpy(data + oldsize, other.data, appendsize);
512 }
513 void popleft(unsigned amount)
514 {
515 if (amount >= size)
516 return resize(0);
517 size -= amount;
518 ::memmove(data, data + amount, size);
519 }
520 char *cString()
521 {
522 if (!alloc)
523 resize(1);
524 return (data[size] = '\0', data);
525 }
526};
527
528//
529// Uses a scratch "buffer" (which must be used for all reads done in the
530// same file descriptor) to read chunks of data from a file, to read
531// one line at a time. Lines include the trailing newline character ('\n').
532// On EOF, line.size is zero.
533//
534static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer)
535{
536 for (;;) {
537 char *newline = static_cast<char *>(::memchr(buffer.data, '\n', buffer.size));
538 if (newline) {
539 unsigned piece_size = newline - buffer.data + 1;
540 line.append(buffer, piece_size);
541 buffer.popleft(piece_size);
542 line.resize(line.size - 1);
543 return;
544 }
545 if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) {
546 int oldsize = buffer.size;
547 buffer.resize(buffer.size + QSimpleBuffer::chunk_size);
548 buffer.size = oldsize;
549 }
550 ssize_t read_bytes =
551 ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size);
552 if (read_bytes > 0)
553 buffer.size += read_bytes;
554 else
555 return;
556 }
557}
558
559//
560// Checks if any line with a given prefix from /proc/cpuinfo contains
561// a certain string, surrounded by spaces.
562//
563static bool procCpuinfoContains(const char *prefix, const char *string)
564{
565 int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY);
566 if (cpuinfo_fd == -1)
567 return false;
568
569 unsigned string_len = ::strlen(string);
570 unsigned prefix_len = ::strlen(prefix);
571 QSimpleBuffer line, buffer;
572 bool present = false;
573 do {
574 line.resize(0);
575 bufReadLine(cpuinfo_fd, line, buffer);
576 char *colon = static_cast<char *>(::memchr(line.data, ':', line.size));
577 if (colon && line.size > prefix_len + string_len) {
578 if (!::strncmp(prefix, line.data, prefix_len)) {
579 // prefix matches, next character must be ':' or space
580 if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) {
581 // Does it contain the string?
582 char *found = ::strstr(line.cString(), string);
583 if (found && ::isspace(found[-1]) &&
584 (::isspace(found[string_len]) || found[string_len] == '\0')) {
585 present = true;
586 break;
587 }
588 }
589 }
590 }
591 } while (line.size);
592
593 ::qt_safe_close(cpuinfo_fd);
594 return present;
595}
596#endif
597
598static inline quint64 detectProcessorFeatures()
599{
600 // NOTE: MIPS 74K cores are the only ones supporting DSPr2.
601 quint64 flags = 0;
602
603#if defined __mips_dsp
604 flags |= CpuFeatureDSP;
605# if defined __mips_dsp_rev && __mips_dsp_rev >= 2
606 flags |= CpuFeatureDSPR2;
607# elif defined(Q_OS_LINUX)
608 if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
609 flags |= CpuFeatureDSPR2;
610# endif
611#elif defined(Q_OS_LINUX)
612 if (procCpuinfoContains("ASEs implemented", "dsp")) {
613 flags |= CpuFeatureDSP;
614 if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
615 flags |= CpuFeatureDSPR2;
616 }
617#endif
618
619 return flags;
620}
621
622#else
624{
625 return 0;
626}
627#endif
628
629// record what CPU features were enabled by default in this Qt build
630static const quint64 minFeature = qCompilerCpuFeatures;
631
632static constexpr auto SimdInitialized = QCpuFeatureType(1) << (sizeof(QCpuFeatureType) * 8 - 1);
633Q_ATOMIC(QCpuFeatureType) QT_MANGLE_NAMESPACE(qt_cpu_features)[1] = { 0 };
634
637{
638 auto minFeatureTest = minFeature;
639#if defined(Q_PROCESSOR_X86_64) && defined(cpu_feature_shstk)
640 // Controlflow Enforcement Technology (CET) is an OS-assisted
641 // hardware-feature, meaning the CPUID bit may be disabled if the OS
642 // doesn't support it, but that's ok.
643 minFeatureTest &= ~CpuFeatureSHSTK;
644#endif
645 QCpuFeatureType f = detectProcessorFeatures();
646
647 // Intentionally NOT qgetenv (this code runs too early)
648 if (char *disable = getenv("QT_NO_CPU_FEATURE"); disable && *disable) {
649#if _POSIX_C_SOURCE >= 200112L
650 char *saveptr = nullptr;
651 auto strtok = [&saveptr](char *str, const char *delim) {
652 return ::strtok_r(str, delim, &saveptr);
653 };
654#endif
655 while (char *token = strtok(disable, " ")) {
656 disable = nullptr;
657 for (uint i = 0; i < arraysize(features_indices); ++i) {
658 if (strcmp(token, features_string + features_indices[i]) == 0)
659 f &= ~(Q_UINT64_C(1) << i);
660 }
661 }
662 }
663
664#ifdef RUNNING_ON_VALGRIND
665 bool runningOnValgrind = RUNNING_ON_VALGRIND;
666#else
667 bool runningOnValgrind = false;
668#endif
669 if (Q_UNLIKELY(!runningOnValgrind && minFeatureTest != 0 && (f & minFeatureTest) != minFeatureTest)) {
670 quint64 missing = minFeatureTest & ~quint64(f);
671 fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n ");
672 for (uint i = 0; i < arraysize(features_indices); ++i) {
673 if (missing & (Q_UINT64_C(1) << i))
674 fprintf(stderr, "%s", features_string + features_indices[i]);
675 }
676 fprintf(stderr, "\n");
677 fflush(stderr);
678 qAbort();
679 }
680
681 assert((f & SimdInitialized) == 0);
682 f |= SimdInitialized;
683 std::atomic_store_explicit(QT_MANGLE_NAMESPACE(qt_cpu_features), f, std::memory_order_relaxed);
684 return f;
685}
686
689{
690 quint64 features = detectProcessorFeatures() & ~SimdInitialized;
691 printf("Processor features: ");
692 for (uint i = 0; i < arraysize(features_indices); ++i) {
693 if (features & (Q_UINT64_C(1) << i))
694 printf("%s%s", features_string + features_indices[i],
695 minFeature & (Q_UINT64_C(1) << i) ? "[required]" : "");
696 }
697 if ((features = (qCompilerCpuFeatures & ~features))) {
698 printf("\n!!!!!!!!!!!!!!!!!!!!\n!!! Missing required features:");
699 for (uint i = 0; i < arraysize(features_indices); ++i) {
700 if (features & (Q_UINT64_C(1) << i))
701 printf("%s", features_string + features_indices[i]);
702 }
703 printf("\n!!! Applications will likely crash with \"Invalid Instruction\"\n!!!!!!!!!!!!!!!!!!!!");
704 }
705 puts("");
706}
707
708#if defined(Q_PROCESSOR_X86) && QT_COMPILER_SUPPORTS_HERE(RDRND)
709
710# ifdef Q_PROCESSOR_X86_64
711# define _rdrandXX_step _rdrand64_step
712# define _rdseedXX_step _rdseed64_step
713# else
714# define _rdrandXX_step _rdrand32_step
715# define _rdseedXX_step _rdseed32_step
716# endif
717
718// The parameter to _rdrand64_step & _rdseed64_step is unsigned long long for
719// Clang and GCC but unsigned __int64 for MSVC and ICC, which is unsigned long
720// long on Windows, but unsigned long on Linux.
721namespace {
722template <typename F> struct ExtractParameter;
723template <typename T> struct ExtractParameter<int (T *)> { using Type = T; };
724using randuint = ExtractParameter<decltype(_rdrandXX_step)>::Type;
725}
726
727# if QT_COMPILER_SUPPORTS_HERE(RDSEED)
728static QT_FUNCTION_TARGET(RDSEED) unsigned *qt_random_rdseed(unsigned *ptr, unsigned *end) noexcept
729{
730 // Unlike for the RDRAND code below, the Intel whitepaper describing the
731 // use of the RDSEED instruction indicates we should not retry in a loop.
732 // If the independent bit generator used by RDSEED is out of entropy, it
733 // may take time to replenish.
734 // https://software.intel.com/en-us/articles/intel-digital-random-number-generator-drng-software-implementation-guide
735 while (ptr + sizeof(randuint) / sizeof(*ptr) <= end) {
736 if (_rdseedXX_step(reinterpret_cast<randuint *>(ptr)) == 0)
737 goto out;
738 ptr += sizeof(randuint) / sizeof(*ptr);
739 }
740
741 if (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
742 if (_rdseed32_step(ptr) == 0)
743 goto out;
744 ++ptr;
745 }
746
747out:
748 return ptr;
749}
750# else
751static unsigned *qt_random_rdseed(unsigned *ptr, unsigned *)
752{
753 return ptr;
754}
755# endif
756
757static QT_FUNCTION_TARGET(RDRND) unsigned *qt_random_rdrnd(unsigned *ptr, unsigned *end) noexcept
758{
759 int retries = 10;
760 while (ptr + sizeof(randuint)/sizeof(*ptr) <= end) {
761 if (_rdrandXX_step(reinterpret_cast<randuint *>(ptr)))
762 ptr += sizeof(randuint)/sizeof(*ptr);
763 else if (--retries == 0)
764 goto out;
765 }
766
767 while (sizeof(*ptr) != sizeof(randuint) && ptr != end) {
768 bool ok = _rdrand32_step(ptr);
769 if (!ok && --retries)
770 continue;
771 if (ok)
772 ++ptr;
773 break;
774 }
775
776out:
777 return ptr;
778}
779
780QT_FUNCTION_TARGET(BASELINE_RDRND) Q_DECL_COLD_FUNCTION
781static bool checkRdrndWorks() noexcept
782{
783 /*
784 * Some AMD CPUs (e.g. AMD A4-6250J and AMD Ryzen 3000-series) have a
785 * failing random generation instruction, which always returns
786 * 0xffffffff, even when generation was "successful".
787 *
788 * This code checks if hardware random generator generates four consecutive
789 * equal numbers. If it does, then we probably have a failing one and
790 * should disable it completely.
791 *
792 * https://bugreports.qt.io/browse/QTBUG-69423
793 */
794 constexpr qsizetype TestBufferSize = 4;
795 unsigned testBuffer[TestBufferSize] = {};
796
797 // But if the RDRND feature was statically enabled by the compiler, we
798 // assume that the RNG works. That's because the calls to qRandomCpu() will
799 // be guarded by qCpuHasFeature(RDRND) and that will be a constant true.
800 if (_compilerCpuFeatures & CpuFeatureRDRND)
801 return true;
802
803 unsigned *end = qt_random_rdrnd(testBuffer, testBuffer + TestBufferSize);
804 if (end < testBuffer + 3) {
805 // Random generation didn't produce enough data for us to make a
806 // determination whether it's working or not. Assume it isn't, but
807 // don't print a warning.
808 return false;
809 }
810
811 // Check the results for equality
812 if (testBuffer[0] == testBuffer[1]
813 && testBuffer[0] == testBuffer[2]
814 && (end < testBuffer + TestBufferSize || testBuffer[0] == testBuffer[3])) {
815 fprintf(stderr, "WARNING: CPU random generator seem to be failing, "
816 "disabling hardware random number generation\n"
817 "WARNING: RDRND generated:");
818 for (unsigned *ptr = testBuffer; ptr < end; ++ptr)
819 fprintf(stderr, " 0x%x", *ptr);
820 fprintf(stderr, "\n");
821 return false;
822 }
823
824 // We're good
825 return true;
826}
827
828QT_FUNCTION_TARGET(RDRND) qsizetype qRandomCpu(void *buffer, qsizetype count) noexcept
829{
830 unsigned *ptr = reinterpret_cast<unsigned *>(buffer);
831 unsigned *end = ptr + count;
832
833 if (qCpuHasFeature(RDSEED))
834 ptr = qt_random_rdseed(ptr, end);
835
836 // fill the buffer with RDRND if RDSEED didn't
837 ptr = qt_random_rdrnd(ptr, end);
838 return ptr - reinterpret_cast<unsigned *>(buffer);
839}
840#elif defined(Q_PROCESSOR_X86) && !defined(Q_PROCESSOR_ARM)
841static bool checkRdrndWorks() noexcept { return false; }
842#endif // Q_PROCESSOR_X86 && RDRND
843
844#if QT_SUPPORTS_INIT_PRIORITY
845namespace {
846struct QSimdInitializer
847{
848 inline QSimdInitializer() { QT_MANGLE_NAMESPACE(qDetectCpuFeatures)(); }
849};
850}
851
852// This is intentionally a dynamic initialization of the variable
853Q_DECL_INIT_PRIORITY(01) static QSimdInitializer initializer;
854#endif
855
856QT_END_NAMESPACE
#define assert
void qDumpCPUFeatures()
Definition qsimd.cpp:688
#define QT_FUNCTION_TARGET_BASELINE
Definition qsimd.cpp:24
QT_FUNCTION_TARGET_BASELINE uint64_t QT_MANGLE_NAMESPACE qDetectCpuFeatures()
Definition qsimd.cpp:636
static constexpr auto SimdInitialized
Definition qsimd.cpp:632
static const int features_indices[]
Definition qsimd.cpp:118
static uint detectProcessorFeatures()
Definition qsimd.cpp:623
static const char features_string[]
Definition qsimd.cpp:117
static const quint64 minFeature
Definition qsimd.cpp:630
#define QT_COMPILER_SUPPORTS_HERE(x)
Definition qsimd_p.h:139
static const uint64_t qCompilerCpuFeatures
Definition qsimd_p.h:395
unsigned QCpuFeatureType
Definition qsimd_p.h:448
#define Q_ATOMIC(T)
Definition qsimd_p.h:439