6#include "private/qsimd_p.h"
9#include <QtCore/qdatastream.h>
10#include <QtCore/qmetatype.h>
11#include <QtCore/qtextstream.h>
143 return isInf() ? FP_INFINITE : isNaN() ? FP_NAN
144 : !(b16 & 0x7fff) ? FP_ZERO : isNormal() ? FP_NORMAL : FP_SUBNORMAL;
172#if QT_COMPILER_SUPPORTS_HERE(F16C)
180#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
181static bool hasFastF16Avx256()
190 __mmask16
mask = _bzhi_u32(-1,
len);
191 __m256 f32 = _mm256_maskz_loadu_ps(
mask,
in );
192 __m128i f16 = _mm256_maskz_cvtps_ph(
mask, f32, _MM_FROUND_TO_NEAREST_INT);
193 _mm_mask_storeu_epi16(
out,
mask, f16);
199 __mmask16
mask = _bzhi_u32(-1,
len);
200 __m128i f16 = _mm_maskz_loadu_epi16(
mask,
in);
201 __m256 f32 = _mm256_cvtph_ps(f16);
202 _mm256_mask_storeu_ps(
out,
mask, f32);
209 constexpr qsizetype Step =
sizeof(__m256i) /
sizeof(
float);
210 constexpr qsizetype HalfStep =
sizeof(__m128i) /
sizeof(
float);
215 __m256 f32 = _mm256_loadu_ps(
in +
offset);
216 __m128i f16 = _mm256_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT);
217 _mm_storeu_si128(
reinterpret_cast<__m128i *
>(
out +
offset), f16);
221 for ( ;
i + Step <
len;
i += Step)
226 return convertOneChunk(
len - Step);
229#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
230 if (hasFastF16Avx256())
231 return qFloatToFloat16_tail_avx256(
out,
in,
len);
234 if (
len >= HalfStep) {
236 __m128 f32 = _mm_loadu_ps(
in +
offset);
237 __m128i f16 = _mm_cvtps_ph(f32, _MM_FROUND_TO_NEAREST_INT);
238 _mm_storel_epi64(
reinterpret_cast<__m128i *
>(
out +
offset), f16);
243 return convertOneChunk(
len - HalfStep);
247 for ( ;
i <
len; ++
i)
248 out[
i] = _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(
in[
i]), 0), 0);
254 constexpr qsizetype Step =
sizeof(__m256i) /
sizeof(
float);
255 constexpr qsizetype HalfStep =
sizeof(__m128i) /
sizeof(
float);
260 __m128i f16 = _mm_loadu_si128(
reinterpret_cast<const __m128i *
>(
in +
offset));
261 __m256 f32 = _mm256_cvtph_ps(f16);
266 for ( ;
i + Step <
len;
i += Step)
271 return convertOneChunk(
len - Step);
274#if QT_COMPILER_SUPPORTS_HERE(AVX512VL) && QT_COMPILER_SUPPORTS_HERE(AVX512BW)
275 if (hasFastF16Avx256())
276 return qFloatFromFloat16_tail_avx256(
out,
in,
len);
279 if (
len >= HalfStep) {
281 __m128i f16 = _mm_loadl_epi64(
reinterpret_cast<const __m128i *
>(
in +
offset));
282 __m128 f32 = _mm_cvtph_ps(f16);
288 return convertOneChunk(
len - HalfStep);
292 for ( ;
i <
len; ++
i)
293 out[
i] = _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(
in[
i])));
296#elif defined(__ARM_FP16_FORMAT_IEEE) && defined(__ARM_NEON__) && (__ARM_FP & 2)
304 __fp16 *out_f16 =
reinterpret_cast<__fp16 *
>(
out);
306 for (;
i <
len - 3;
i += 4)
307 vst1_f16(out_f16 +
i, vcvt_f16_f32(vld1q_f32(
in +
i)));
309 out_f16[
i] = __fp16(
in[
i]);
314 const __fp16 *in_f16 =
reinterpret_cast<const __fp16 *
>(
in);
316 for (;
i <
len - 3;
i += 4)
317 vst1q_f32(
out +
i, vcvt_f32_f16(vld1_f16(in_f16 +
i)));
319 out[
i] =
float(in_f16[
i]);
388#ifndef QT_NO_DATASTREAM
433 return ts << float(
f);
\inmodule QtCore\reentrant
\keyword 16-bit Floating Point Support\inmodule QtCore \inheaderfile QFloat16
Q_CORE_EXPORT void qFloatFromFloat16(float *out, const qfloat16 *in, qsizetype len) noexcept
Q_CORE_EXPORT void qFloatToFloat16(qfloat16 *out, const float *in, qsizetype len) noexcept
Combined button and popup list for selecting options.
QDataStream & operator<<(QDataStream &ds, qfloat16 f)
static void qFloatToFloat16_fast(quint16 *, const float *, qsizetype) noexcept
static void qFloatFromFloat16_fast(float *, const quint16 *, qsizetype) noexcept
QDataStream & operator>>(QDataStream &ds, qfloat16 &f)
GLenum GLuint GLintptr offset
GLint GLint GLint GLint GLint GLint GLint GLbitfield mask
#define qCpuHasFeature(feature)
#define QT_FUNCTION_TARGET(x)
#define SIMD_EPILOGUE(i, length, max)
QTextStream out(stdout)
[7]