Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qfloat16.cpp
Go to the documentation of this file.
1// Copyright (C) 2020 The Qt Company Ltd.
2// Copyright (C) 2016 by Southwest Research Institute (R)
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4// Qt-Security score:significant reason:default
5
6#include "qfloat16.h"
7#include "private/qsimd_p.h"
8#include <cmath> // for fpclassify()'s return values
9
10#include <QtCore/qdatastream.h>
11#include <QtCore/qmetatype.h>
12#include <QtCore/qtextstream.h>
13
15
16#if QT_VERSION < QT_VERSION_CHECK(7, 0, 0)
18{
19 return QMetaType::Float16;
20}
21#endif
22
23/*!
24 \class qfloat16
25 \keyword 16-bit Floating Point Support
26 \ingroup funclists
27 \inmodule QtCore
28 \inheaderfile QFloat16
29 \brief Provides 16-bit floating point support.
30
31 \compares partial
32 \compareswith partial float double {long double} qint8 quint8 qint16 quint16 \
33 qint32 quint32 long {unsigned long} qint64 quint64
34 \endcompareswith
35 \compareswith partial qint128 quint128
36 Comparison with 128-bit integral types is only supported if Qt provides
37 these types.
38 \endcompareswith
39
40 The \c qfloat16 class provides support for half-precision (16-bit) floating
41 point data. It is fully compliant with IEEE 754 as a storage type. This
42 implies that any arithmetic operation on a \c qfloat16 instance results in
43 the value first being converted to a \c float. This conversion to and from
44 \c float is performed by hardware when possible, but on processors that do
45 not natively support half-precision, the conversion is performed through a
46 sequence of lookup table operations.
47
48 \c qfloat16 should be treated as if it were a POD (plain old data) type.
49 Consequently, none of the supported operations need any elaboration beyond
50 stating that it supports all arithmetic operators incident to floating point
51 types.
52
53 \note On x86 and x86-64 that to get hardware accelerated conversions you must
54 compile with F16C or AVX2 enabled, or use qFloatToFloat16() and qFloatFromFloat16()
55 which will detect F16C at runtime.
56
57 \since 5.9
58*/
59
60/*!
61 \fn qfloat16::qfloat16(Qt::Initialization)
62 \since 6.1
63
64 Constructs a qfloat16 without initializing the value.
65*/
66
67/*!
68 \fn bool qIsInf(qfloat16 f)
69 \relates qfloat16
70 \overload qIsInf(float)
71
72 Returns true if the \c qfloat16 \a {f} is equivalent to infinity.
73*/
74
75/*!
76 \fn bool qIsNaN(qfloat16 f)
77 \relates qfloat16
78 \overload qIsNaN(float)
79
80 Returns true if the \c qfloat16 \a {f} is not a number (NaN).
81*/
82
83/*!
84 \fn bool qIsFinite(qfloat16 f)
85 \relates qfloat16
86 \overload qIsFinite(float)
87
88 Returns true if the \c qfloat16 \a {f} is a finite number.
89*/
90
91/*!
92 \internal
93 \since 5.14
94 \fn bool qfloat16::isInf() const noexcept
95
96 Tests whether this \c qfloat16 value is an infinity.
97*/
98
99/*!
100 \internal
101 \since 5.14
102 \fn bool qfloat16::isNaN() const noexcept
103
104 Tests whether this \c qfloat16 value is "not a number".
105*/
106
107/*!
108 \since 5.14
109 \fn bool qfloat16::isNormal() const noexcept
110
111 Returns \c true if this \c qfloat16 value is finite and in normal form.
112
113 \sa qFpClassify()
114*/
115
116/*!
117 \internal
118 \since 5.14
119 \fn bool qfloat16::isFinite() const noexcept
120
121 Tests whether this \c qfloat16 value is finite.
122*/
123
124/*!
125 \since 5.15
126 \fn qfloat16 qfloat16::copySign(qfloat16 sign) const noexcept
127 \obsolete [6.11] Use the copysign() friend function instead.
128*/
129
130/*!
131 \since 6.11
132 \fn bool qfloat16::copysign(qfloat16 x, qfloat16 sign)
133
134 Returns a qfloat16 with the sign of \a sign but the rest of its value taken
135 from \a{x}. Serves as qfloat16's equivalent of std::copysign().
136
137 \sa signbit()
138*/
139
140/*!
141 \since 6.11
142 \fn bool qfloat16::signbit(qfloat16 x)
143
144 Returns true if qfloat16 \a x is negative, false otherwise. Note this
145 function returns true for negative zero, negative infinity, and negative
146 NaN values.
147
148 \sa copysign()
149*/
150
151/*!
152 \fn int qFpClassify(qfloat16 val)
153 \relates qfloat16
154 \since 5.14
155 \overload qFpClassify(float)
156
157 Returns the floating-point class of \a val.
158*/
159
160/*!
161 \internal
162 \since 5.14
163 Implements qFpClassify() for qfloat16.
164*/
165int qfloat16::fpClassify() const noexcept
166{
167 return isInf() ? FP_INFINITE : isNaN() ? FP_NAN
168 : !(b16 & 0x7fff) ? FP_ZERO : isNormal() ? FP_NORMAL : FP_SUBNORMAL;
169}
170
171/*! \fn int qRound(qfloat16 value)
172 \relates qfloat16
173 \overload qRound(float)
174
175 Rounds \a value to the nearest integer.
176*/
177
178/*! \fn qint64 qRound64(qfloat16 value)
179 \relates qfloat16
180 \overload qRound64(float)
181
182 Rounds \a value to the nearest 64-bit integer.
183*/
184
185/*! \fn bool qFuzzyCompare(qfloat16 p1, qfloat16 p2)
186 \relates qfloat16
187 \overload qFuzzyCompare(float, float)
188
189 Compares the floating point value \a p1 and \a p2 and
190 returns \c true if they are considered equal, otherwise \c false.
191
192 The two numbers are compared in a relative way, where the
193 exactness is stronger the smaller the numbers are.
194 */
195
196#if QT_COMPILER_SUPPORTS_HERE(F16C)
197static inline bool hasFastF16()
198{
199 // qsimd.cpp:detectProcessorFeatures() turns off this feature if AVX
200 // state-saving is not enabled by the OS
201 return qCpuHasFeature(F16C);
202}
203
204#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
205static bool hasFastF16Avx256()
206{
207 // 256-bit AVX512 don't have a performance penalty (see qstring.cpp for more info)
208 return qCpuHasFeature(ArchSkylakeAvx512);
209}
210
211static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512)
212void qFloatToFloat16_tail_avx256(quint16 *out, const float *in, qsizetype len) noexcept
213{
214 __mmask16 mask = _bzhi_u32(-1, len);
215 __m256 f32 = _mm256_maskz_loadu_ps(mask, in );
216 __m128i f16 = _mm256_maskz_cvtps_ph(mask, f32, _MM_FROUND_TO_NEAREST_INT);
217 _mm_mask_storeu_epi16(out, mask, f16);
218};
219
220static QT_FUNCTION_TARGET(ARCH_SKYLAKE_AVX512)
221void qFloatFromFloat16_tail_avx256(float *out, const quint16 *in, qsizetype len) noexcept
222{
223 __mmask16 mask = _bzhi_u32(-1, len);
224 __m128i f16 = _mm_maskz_loadu_epi16(mask, in);
225 __m256 f32 = _mm256_cvtph_ps(f16);
226 _mm256_mask_storeu_ps(out, mask, f32);
227};
228#endif
229
230QT_FUNCTION_TARGET(F16C)
231static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept
232{
233 constexpr qsizetype Step = sizeof(__m256i) / sizeof(float);
234 constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float);
235 qsizetype i = 0;
236
237 if (len >= Step) {
238 auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
239 __m256 f32 = _mm256_loadu_ps(in + offset);
240 __m128i f16 = _mm256_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT);
241 _mm_storeu_si128(reinterpret_cast<__m128i *>(out + offset), f16);
242 };
243
244 // main loop: convert Step (8) floats per iteration
245 for ( ; i + Step < len; i += Step)
246 convertOneChunk(i);
247
248 // epilogue: convert the last chunk, possibly overlapping with the last
249 // iteration of the loop
250 return convertOneChunk(len - Step);
251 }
252
253#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
254 if (hasFastF16Avx256())
255 return qFloatToFloat16_tail_avx256(out, in, len);
256#endif
257
258 if (len >= HalfStep) {
259 auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
260 __m128 f32 = _mm_loadu_ps(in + offset);
261 __m128i f16 = _mm_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT);
262 _mm_storel_epi64(reinterpret_cast<__m128i *>(out + offset), f16);
263 };
264
265 // two conversions, possibly overlapping
266 convertOneChunk(0);
267 return convertOneChunk(len - HalfStep);
268 }
269
270 // Inlining "qfloat16::qfloat16(float f)":
271 for ( ; i < len; ++i)
272 out[i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(in[i]), 0), 0);
273}
274
275QT_FUNCTION_TARGET(F16C)
276static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept
277{
278 constexpr qsizetype Step = sizeof(__m256i) / sizeof(float);
279 constexpr qsizetype HalfStep = sizeof(__m128i) / sizeof(float);
280 qsizetype i = 0;
281
282 if (len >= Step) {
283 auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
284 __m128i f16 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(in + offset));
285 __m256 f32 = _mm256_cvtph_ps(f16);
286 _mm256_storeu_ps(out + offset, f32);
287 };
288
289 // main loop: convert Step (8) floats per iteration
290 for ( ; i + Step < len; i += Step)
291 convertOneChunk(i);
292
293 // epilogue: convert the last chunk, possibly overlapping with the last
294 // iteration of the loop
295 return convertOneChunk(len - Step);
296 }
297
298#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
299 if (hasFastF16Avx256())
300 return qFloatFromFloat16_tail_avx256(out, in, len);
301#endif
302
303 if (len >= HalfStep) {
304 auto convertOneChunk = [=](qsizetype offset) QT_FUNCTION_TARGET(F16C) {
305 __m128i f16 = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(in + offset));
306 __m128 f32 = _mm_cvtph_ps(f16);
307 _mm_storeu_ps(out + offset, f32);
308 };
309
310 // two conversions, possibly overlapping
311 convertOneChunk(0);
312 return convertOneChunk(len - HalfStep);
313 }
314
315 // Inlining "qfloat16::operator float()":
316 for ( ; i < len; ++i)
317 out[i] = _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(in[i])));
318}
319
320#elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__) && (__ARM_FP & 2)
321static inline bool hasFastF16()
322{
323 return true;
324}
325
326static void qFloatToFloat16_fast(quint16 *out, const float *in, qsizetype len) noexcept
327{
328 __fp16 *out_f16 = reinterpret_cast<__fp16 *>(out);
329 qsizetype i = 0;
330 for (; i < len - 3; i += 4)
331 vst1_f16(out_f16 + i, vcvt_f16_f32(vld1q_f32(in + i)));
332 SIMD_EPILOGUE(i, len, 3)
333 out_f16[i] = __fp16(in[i]);
334}
335
336static void qFloatFromFloat16_fast(float *out, const quint16 *in, qsizetype len) noexcept
337{
338 const __fp16 *in_f16 = reinterpret_cast<const __fp16 *>(in);
339 qsizetype i = 0;
340 for (; i < len - 3; i += 4)
341 vst1q_f32(out + i, vcvt_f32_f16(vld1_f16(in_f16 + i)));
342 SIMD_EPILOGUE(i, len, 3)
343 out[i] = float(in_f16[i]);
344}
345#else
346static inline bool hasFastF16()
347{
348 return false;
349}
350
351static void qFloatToFloat16_fast(quint16 *, const float *, qsizetype) noexcept
352{
353 Q_UNREACHABLE();
354}
355
356static void qFloatFromFloat16_fast(float *, const quint16 *, qsizetype) noexcept
357{
358 Q_UNREACHABLE();
359}
360#endif
361/*!
362 \since 5.11
363 \relates qfloat16
364
365 Converts \a len floats from \a in to qfloat16 and stores them in \a out.
366 Both \a in and \a out must have \a len allocated entries.
367
368 This function is faster than converting values one by one, and will do runtime
369 F16C detection on x86 and x86-64 hardware.
370*/
371Q_CORE_EXPORT void qFloatToFloat16(qfloat16 *out, const float *in, qsizetype len) noexcept
372{
373 if (hasFastF16())
374 return qFloatToFloat16_fast(reinterpret_cast<quint16 *>(out), in, len);
375
376 for (qsizetype i = 0; i < len; ++i)
377 out[i] = qfloat16(in[i]);
378}
379
380/*!
381 \since 5.11
382 \relates qfloat16
383
384 Converts \a len qfloat16 from \a in to floats and stores them in \a out.
385 Both \a in and \a out must have \a len allocated entries.
386
387 This function is faster than converting values one by one, and will do runtime
388 F16C detection on x86 and x86-64 hardware.
389*/
390Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qsizetype len) noexcept
391{
392 if (hasFastF16())
393 return qFloatFromFloat16_fast(out, reinterpret_cast<const quint16 *>(in), len);
394
395 for (qsizetype i = 0; i < len; ++i)
396 out[i] = float(in[i]);
397}
398
399/*!
400 \fn size_t qfloat16::qHash(qfloat16 key, size_t seed)
401 \since 6.5.3
402 \qhash{qfloat16}
403
404 \note In Qt versions before 6.5, this operation was provided by the
405 qHash(float) overload. In Qt versions 6.5.0 to 6.5.2, this functionality
406 was broken in various ways. In Qt versions 6.5.3 and 6.6 onwards, this
407 overload restores the Qt 6.4 behavior.
408*/
409
410#ifndef QT_NO_DATASTREAM
411/*!
412 \fn qfloat16::operator<<(QDataStream &ds, qfloat16 f)
413 \relates QDataStream
414 \since 5.9
415
416 Writes a floating point number, \a f, to the stream \a ds using
417 the standard IEEE 754 format. Returns a reference to the stream.
418
419 \note In Qt versions prior to 6.3, this was a member function on
420 QDataStream.
421*/
422QDataStream &operator<<(QDataStream &ds, qfloat16 f)
423{
424 return ds << f.b16;
425}
426
427/*!
428 \fn qfloat16::operator>>(QDataStream &ds, qfloat16 &f)
429 \relates QDataStream
430 \since 5.9
431
432 Reads a floating point number from the stream \a ds into \a f,
433 using the standard IEEE 754 format. Returns a reference to the
434 stream.
435
436 \note In Qt versions prior to 6.3, this was a member function on
437 QDataStream.
438*/
439QDataStream &operator>>(QDataStream &ds, qfloat16 &f)
440{
441 return ds >> f.b16;
442}
443#endif
444
445QTextStream &operator>>(QTextStream &ts, qfloat16 &f16)
446{
447 float f;
448 ts >> f;
449 f16 = qfloat16(f);
450 return ts;
451}
452
453QTextStream &operator<<(QTextStream &ts, qfloat16 f)
454{
455 return ts << float(f);
456}
457
458QT_END_NAMESPACE
459
\keyword 16-bit Floating Point Support\inmodule QtCore \inheaderfile QFloat16
Definition qfloat16.h:57
Q_CORE_EXPORT int fpClassify() const noexcept
Definition qfloat16.cpp:165
QTextStream & operator>>(QTextStream &ts, qfloat16 &f16)
Definition qfloat16.cpp:445
static bool hasFastF16()
Definition qfloat16.cpp:346
static void qFloatToFloat16_fast(quint16 *, const float *, qsizetype) noexcept
Definition qfloat16.cpp:351
static void qFloatFromFloat16_fast(float *, const quint16 *, qsizetype) noexcept
Definition qfloat16.cpp:356
QDataStream & operator>>(QDataStream &ds, qfloat16 &f)
Definition qfloat16.cpp:439
Q_CORE_EXPORT void qFloatFromFloat16(float *, const qfloat16 *, qsizetype length) noexcept
Q_CORE_EXPORT void qFloatToFloat16(qfloat16 *, const float *, qsizetype length) noexcept