Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qcolortransform.cpp
Go to the documentation of this file.
1// Copyright (C) 2024 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:significant reason:default
4
7
8#include "qcmyk_p.h"
9#include "qcolorclut_p.h"
10#include "qcolormatrix_p.h"
11#include "qcolorspace_p.h"
12#include "qcolortrc_p.h"
13#include "qcolortrclut_p.h"
14
15#include <QtCore/qatomic.h>
16#include <QtCore/qmath.h>
17#include <QtGui/qcolor.h>
18#include <QtGui/qimage.h>
19#include <QtGui/qtransform.h>
20#include <QtCore/private/qsimd_p.h>
21
22#include <qdebug.h>
23
25
27{
28 if (colorSpaceIn->lut.generated.loadAcquire())
29 return;
30 QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock);
31 if (colorSpaceIn->lut.generated.loadRelaxed())
32 return;
33
34 for (int i = 0; i < 3; ++i) {
35 if (!colorSpaceIn->trc[i].isValid())
36 return;
37 }
38
39 if (colorSpaceIn->trc[0] == colorSpaceIn->trc[1] && colorSpaceIn->trc[0] == colorSpaceIn->trc[2]) {
40 colorSpaceIn->lut[0] = QColorTrcLut::fromTrc(colorSpaceIn->trc[0]);
41 colorSpaceIn->lut[1] = colorSpaceIn->lut[0];
42 colorSpaceIn->lut[2] = colorSpaceIn->lut[0];
43 } else {
44 for (int i = 0; i < 3; ++i)
45 colorSpaceIn->lut[i] = QColorTrcLut::fromTrc(colorSpaceIn->trc[i]);
46 }
47
48 colorSpaceIn->lut.generated.storeRelease(1);
49}
50
52{
53 if (colorSpaceOut->lut.generated.loadAcquire())
54 return;
55 QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock);
56 if (colorSpaceOut->lut.generated.loadRelaxed())
57 return;
58 for (int i = 0; i < 3; ++i) {
59 if (!colorSpaceOut->trc[i].isValid())
60 return;
61 }
62
63 if (colorSpaceOut->trc[0] == colorSpaceOut->trc[1] && colorSpaceOut->trc[0] == colorSpaceOut->trc[2]) {
64 colorSpaceOut->lut[0] = QColorTrcLut::fromTrc(colorSpaceOut->trc[0]);
65 colorSpaceOut->lut[1] = colorSpaceOut->lut[0];
66 colorSpaceOut->lut[2] = colorSpaceOut->lut[0];
67 } else {
68 for (int i = 0; i < 3; ++i)
69 colorSpaceOut->lut[i] = QColorTrcLut::fromTrc(colorSpaceOut->trc[i]);
70 }
71
72 colorSpaceOut->lut.generated.storeRelease(1);
73}
74
75/*!
76 \class QColorTransform
77 \brief The QColorTransform class is a transformation between color spaces.
78 \since 5.14
79
80 \ingroup painting
81 \ingroup appearance
82 \inmodule QtGui
83
84 QColorTransform is an instantiation of a transformation between color spaces.
85 It can be applied on color and pixels to convert them from one color space to
86 another.
87
88 To create a QColorTransform, use QColorSpace::transformationToColorSpace():
89
90 \code
91 QColorSpace sourceColorSpace(QColorSpace::SRgb);
92 QColorSpace targetColorSpace(QColorSpace::DisplayP3);
93 QColorTransform srgbToP3Transform = sourceColorSpace.transformationToColorSpace(targetColorSpace);
94 \endcode
95
96 Setting up a QColorTransform takes some preprocessing, so keeping around
97 QColorTransforms that you need often is recommended, instead of generating
98 them on the fly.
99*/
100
101
102QColorTransform::QColorTransform(const QColorTransform &colorTransform) noexcept = default;
103
104QColorTransform::~QColorTransform() = default;
105
106QT_DEFINE_QESDP_SPECIALIZATION_DTOR(QColorTransformPrivate)
107
108/*!
109 \since 6.4
110 Returns true if the color transform is the identity transform.
111*/
112bool QColorTransform::isIdentity() const noexcept
113{
114 return !d || d->isIdentity();
115}
116
117/*!
118 \fn bool QColorTransform::operator==(const QColorTransform &ct1, const QColorTransform &ct2)
119 \since 6.4
120 Returns true if \a ct1 defines the same color transformation as \a ct2.
121*/
122
123/*!
124 \fn bool QColorTransform::operator!=(const QColorTransform &ct1, const QColorTransform &ct2)
125 \since 6.4
126 Returns true if \a ct1 does not define the same transformation as \a ct2.
127*/
128
129/*! \internal
130*/
131bool QColorTransform::compare(const QColorTransform &other) const
132{
133 if (d == other.d)
134 return true;
135 if (bool(d) != bool(other.d))
136 return d ? d->isIdentity() : other.d->isIdentity();
137 if (d->colorMatrix != other.d->colorMatrix)
138 return false;
139 if (bool(d->colorSpaceIn) != bool(other.d->colorSpaceIn))
140 return false;
141 if (bool(d->colorSpaceOut) != bool(other.d->colorSpaceOut))
142 return false;
143 if (d->colorSpaceIn) {
144 if (d->colorSpaceIn->transformModel != other.d->colorSpaceIn->transformModel)
145 return false;
146 if (d->colorSpaceIn->isThreeComponentMatrix()) {
147 for (int i = 0; i < 3; ++i) {
148 if (d->colorSpaceIn && d->colorSpaceIn->trc[i] != other.d->colorSpaceIn->trc[i])
149 return false;
150 }
151 } else {
152 if (!d->colorSpaceIn->equals(other.d->colorSpaceIn.constData()))
153 return false;
154 }
155 }
156 if (d->colorSpaceOut) {
157 if (d->colorSpaceOut->transformModel != other.d->colorSpaceOut->transformModel)
158 return false;
159 if (d->colorSpaceOut->isThreeComponentMatrix()) {
160 for (int i = 0; i < 3; ++i) {
161 if (d->colorSpaceOut && d->colorSpaceOut->trc[i] != other.d->colorSpaceOut->trc[i])
162 return false;
163 }
164 } else {
165 if (!d->colorSpaceOut->equals(other.d->colorSpaceOut.constData()))
166 return false;
167 }
168 }
169 return true;
170}
171
172/*!
173 Applies the color transformation on the QRgb value \a argb.
174
175 The input should be opaque or unpremultiplied.
176*/
177QRgb QColorTransform::map(QRgb argb) const
178{
179 if (!d)
180 return argb;
181 constexpr float f = 1.0f / 255.0f;
182 QColorVector c = { qRed(argb) * f, qGreen(argb) * f, qBlue(argb) * f };
183 c = d->map(c);
184 return qRgba(c.x * 255 + 0.5f, c.y * 255 + 0.5f, c.z * 255 + 0.5f, qAlpha(argb));
185}
186
187/*!
188 Applies the color transformation on the QRgba64 value \a rgba64.
189
190 The input should be opaque or unpremultiplied.
191*/
193{
194 if (!d)
195 return rgba64;
196 constexpr float f = 1.0f / 65535.0f;
197 QColorVector c = { rgba64.red() * f, rgba64.green() * f, rgba64.blue() * f };
198 c = d->map(c);
199 return QRgba64::fromRgba64(c.x * 65535.f + 0.5f, c.y * 65535.f + 0.5f, c.z * 65535.f + 0.5f, rgba64.alpha());
200}
201
202/*!
203 Applies the color transformation on the QRgbaFloat16 value \a rgbafp16.
204
205 The input should be opaque or unpremultiplied.
206 \since 6.4
207*/
208QRgbaFloat16 QColorTransform::map(QRgbaFloat16 rgbafp16) const
209{
210 if (!d)
211 return rgbafp16;
212 QColorVector c(rgbafp16.r, rgbafp16.g, rgbafp16.b);
213 c = d->mapExtended(c);
214 rgbafp16.r = qfloat16(c.x);
215 rgbafp16.g = qfloat16(c.y);
216 rgbafp16.b = qfloat16(c.z);
217 return rgbafp16;
218}
219
220/*!
221 Applies the color transformation on the QRgbaFloat32 value \a rgbafp32.
222
223 The input should be opaque or unpremultiplied.
224 \since 6.4
225*/
226QRgbaFloat32 QColorTransform::map(QRgbaFloat32 rgbafp32) const
227{
228 if (!d)
229 return rgbafp32;
230 QColorVector c(rgbafp32.r, rgbafp32.g, rgbafp32.b);
231 c = d->mapExtended(c);
232 rgbafp32.r = c.x;
233 rgbafp32.g = c.y;
234 rgbafp32.b = c.z;
235 return rgbafp32;
236}
237
238/*!
239 Applies the color transformation on the QColor value \a color.
240
241*/
242QColor QColorTransform::map(const QColor &color) const
243{
244 if (!d)
245 return color;
246 QColor clr = color;
247 if (d->colorSpaceIn->colorModel == QColorSpace::ColorModel::Rgb) {
248 if (color.spec() != QColor::ExtendedRgb && color.spec() != QColor::Rgb)
249 clr = clr.toRgb();
250 } else if (d->colorSpaceIn->colorModel == QColorSpace::ColorModel::Cmyk) {
251 if (color.spec() != QColor::Cmyk)
252 clr = clr.toCmyk();
253 }
254
255 QColorVector c =
256 (clr.spec() == QColor::Cmyk)
257 ? QColorVector(clr.cyanF(), clr.magentaF(), clr.yellowF(), clr.blackF())
258 : QColorVector(clr.redF(), clr.greenF(), clr.blueF());
259
260 c = d->mapExtended(c);
261
262 QColor out;
263 if (d->colorSpaceOut->colorModel == QColorSpace::ColorModel::Cmyk) {
264 c.x = std::clamp(c.x, 0.f, 1.f);
265 c.y = std::clamp(c.y, 0.f, 1.f);
266 c.z = std::clamp(c.z, 0.f, 1.f);
267 c.w = std::clamp(c.w, 0.f, 1.f);
268 out.setCmykF(c.x, c.y, c.z, c.w, color.alphaF());
269 } else {
270 out.setRgbF(c.x, c.y, c.z, color.alphaF());
271 }
272 return out;
273}
274
275// Optimized sub-routines for fast block based conversion:
276
281
282template<ApplyMatrixForm doClamp = DoClamp>
283static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorMatrix &colorMatrix)
284{
285#if defined(__SSE2__)
286 const __m128 minV = _mm_set1_ps(0.0f);
287 const __m128 maxV = _mm_set1_ps(1.0f);
288 const __m128 xMat = _mm_loadu_ps(&colorMatrix.r.x);
289 const __m128 yMat = _mm_loadu_ps(&colorMatrix.g.x);
290 const __m128 zMat = _mm_loadu_ps(&colorMatrix.b.x);
291 for (qsizetype j = 0; j < len; ++j) {
292 __m128 c = _mm_loadu_ps(&buffer[j].x);
293 __m128 cx = _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0));
294 __m128 cy = _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1));
295 __m128 cz = _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2));
296 cx = _mm_mul_ps(cx, xMat);
297 cy = _mm_mul_ps(cy, yMat);
298 cz = _mm_mul_ps(cz, zMat);
299 cx = _mm_add_ps(cx, cy);
300 cx = _mm_add_ps(cx, cz);
301 // Clamp:
302 if (doClamp) {
303 cx = _mm_min_ps(cx, maxV);
304 cx = _mm_max_ps(cx, minV);
305 }
306 _mm_storeu_ps(&buffer[j].x, cx);
307 }
308#elif defined(__ARM_NEON__)
309 const float32x4_t minV = vdupq_n_f32(0.0f);
310 const float32x4_t maxV = vdupq_n_f32(1.0f);
311 const float32x4_t xMat = vld1q_f32(&colorMatrix.r.x);
312 const float32x4_t yMat = vld1q_f32(&colorMatrix.g.x);
313 const float32x4_t zMat = vld1q_f32(&colorMatrix.b.x);
314 for (qsizetype j = 0; j < len; ++j) {
315 float32x4_t c = vld1q_f32(&buffer[j].x);
316 float32x4_t cx = vmulq_n_f32(xMat, vgetq_lane_f32(c, 0));
317 float32x4_t cy = vmulq_n_f32(yMat, vgetq_lane_f32(c, 1));
318 float32x4_t cz = vmulq_n_f32(zMat, vgetq_lane_f32(c, 2));
319 cx = vaddq_f32(cx, cy);
320 cx = vaddq_f32(cx, cz);
321 // Clamp:
322 if (doClamp) {
323 cx = vminq_f32(cx, maxV);
324 cx = vmaxq_f32(cx, minV);
325 }
326 vst1q_f32(&buffer[j].x, cx);
327 }
328#else
329 for (qsizetype j = 0; j < len; ++j) {
330 const QColorVector cv = colorMatrix.map(buffer[j]);
331 if (doClamp) {
332 buffer[j].x = std::clamp(cv.x, 0.f, 1.f);
333 buffer[j].y = std::clamp(cv.y, 0.f, 1.f);
334 buffer[j].z = std::clamp(cv.z, 0.f, 1.f);
335 } else {
336 buffer[j] = cv;
337 }
338 }
339#endif
340}
341
342template<ApplyMatrixForm doClamp = DoClamp>
343static void clampIfNeeded(QColorVector *buffer, const qsizetype len)
344{
345 if constexpr (doClamp != DoClamp)
346 return;
347#if defined(__SSE2__)
348 const __m128 minV = _mm_set1_ps(0.0f);
349 const __m128 maxV = _mm_set1_ps(1.0f);
350 for (qsizetype j = 0; j < len; ++j) {
351 __m128 c = _mm_loadu_ps(&buffer[j].x);
352 c = _mm_min_ps(c, maxV);
353 c = _mm_max_ps(c, minV);
354 _mm_storeu_ps(&buffer[j].x, c);
355 }
356#elif defined(__ARM_NEON__)
357 const float32x4_t minV = vdupq_n_f32(0.0f);
358 const float32x4_t maxV = vdupq_n_f32(1.0f);
359 for (qsizetype j = 0; j < len; ++j) {
360 float32x4_t c = vld1q_f32(&buffer[j].x);
361 c = vminq_f32(c, maxV);
362 c = vmaxq_f32(c, minV);
363 vst1q_f32(&buffer[j].x, c);
364 }
365#else
366 for (qsizetype j = 0; j < len; ++j) {
367 const QColorVector cv = buffer[j];
368 buffer[j].x = std::clamp(cv.x, 0.f, 1.f);
369 buffer[j].y = std::clamp(cv.y, 0.f, 1.f);
370 buffer[j].z = std::clamp(cv.z, 0.f, 1.f);
371 }
372#endif
373}
374
375#if defined(__SSE2__) || defined(__ARM_NEON__)
376template<typename T>
377static constexpr inline bool isArgb();
378template<>
379constexpr inline bool isArgb<QRgb>() { return true; }
380template<>
381constexpr inline bool isArgb<QRgba64>() { return false; }
382
383template<typename T>
384static inline int getAlpha(const T &p);
385template<>
386inline int getAlpha<QRgb>(const QRgb &p)
387{ return qAlpha(p); }
388template<>
389inline int getAlpha<QRgba64>(const QRgba64 &p)
390{ return p.alpha(); }
391
392template<typename T>
393static inline constexpr int getFactor();
394template<>
395inline constexpr int getFactor<QRgb>()
396{ return 255; }
397template<>
398inline constexpr int getFactor<QRgba64>()
399{ return 65535; }
400#endif
401
402template<typename T>
403static float getAlphaF(const T &);
404template<> float getAlphaF(const QRgb &r)
405{
406 return qAlpha(r) * (1.f / 255.f);
407}
408template<> float getAlphaF(const QCmyk32 &)
409{
410 return 1.f;
411}
412template<> float getAlphaF(const QRgba64 &r)
413{
414 return r.alpha() * (1.f / 65535.f);
415}
416template<> float getAlphaF(const QRgbaFloat32 &r)
417{
418 return r.a;
419}
420
421template<typename T>
422static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr);
423template<typename T>
424static void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr);
425
426#if defined(__SSE2__)
427// Load to [0-alpha] in 4x32 SIMD
428template<typename T>
429static inline void loadP(const T &p, __m128i &v);
430
431template<>
432inline void loadP<QRgb>(const QRgb &p, __m128i &v)
433{
434 v = _mm_cvtsi32_si128(p);
435#if defined(__SSE4_1__)
436 v = _mm_cvtepu8_epi32(v);
437#else
438 v = _mm_unpacklo_epi8(v, _mm_setzero_si128());
439 v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
440#endif
441}
442
443template<>
444inline void loadP<QRgba64>(const QRgba64 &p, __m128i &v)
445{
446 v = _mm_loadl_epi64((const __m128i *)&p);
447#if defined(__SSE4_1__)
448 v = _mm_cvtepu16_epi32(v);
449#else
450 v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
451#endif
452}
453
454template<typename T>
455static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
456{
457 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
458 const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
459 constexpr bool isARGB = isArgb<T>();
460 const __m128i vRangeMax = _mm_setr_epi32(isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
461 : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
462 d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
463 isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear
464 : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear,
465 QColorTrcLut::Resolution);
466 for (qsizetype i = 0; i < len; ++i) {
467 __m128i v;
468 loadP<T>(src[i], v);
469 __m128 vf = _mm_cvtepi32_ps(v);
470 // Approximate 1/a:
471 __m128 va = _mm_shuffle_ps(vf, vf, _MM_SHUFFLE(3, 3, 3, 3));
472 __m128 via = _mm_rcp_ps(va);
473 via = _mm_sub_ps(_mm_add_ps(via, via), _mm_mul_ps(via, _mm_mul_ps(via, va)));
474 // v * (1/a)
475 vf = _mm_mul_ps(vf, via);
476
477 // Handle zero alpha
478 __m128 vAlphaMask = _mm_cmpeq_ps(va, _mm_set1_ps(0.0f));
479 vf = _mm_andnot_ps(vAlphaMask, vf);
480
481 // LUT
482 v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
483 const int ridx = isARGB ? _mm_extract_epi16(v, 4) : _mm_extract_epi16(v, 0);
484 const int gidx = _mm_extract_epi16(v, 2);
485 const int bidx = isARGB ? _mm_extract_epi16(v, 0) : _mm_extract_epi16(v, 4);
486 if (_mm_movemask_epi8(_mm_cmpgt_epi32(v, vRangeMax)) == 0) {
487 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0);
488 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2);
489 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4);
490 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00);
491
492 _mm_storeu_ps(&buffer[i].x, vf);
493 } else {
494 constexpr float f = 1.f / QColorTrcLut::Resolution;
495 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
496 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
497 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
498 }
499 }
500}
501
502template<>
503void loadPremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
504{
505 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
506 const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256));
507 const __m128 vZero = _mm_set1_ps(0.0f);
508 const float factor = 1.f / float(QColorTrcLut::Resolution);
509 const __m128 vRangeMax = _mm_setr_ps(d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear * factor,
510 d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear * factor,
511 d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear * factor,
512 INFINITY);
513 for (qsizetype i = 0; i < len; ++i) {
514 __m128 vf = _mm_loadu_ps(&src[i].r);
515 // Approximate 1/a:
516 __m128 va = _mm_shuffle_ps(vf, vf, _MM_SHUFFLE(3, 3, 3, 3));
517 __m128 via = _mm_rcp_ps(va);
518 via = _mm_sub_ps(_mm_add_ps(via, via), _mm_mul_ps(via, _mm_mul_ps(via, va)));
519 // v * (1/a)
520 vf = _mm_mul_ps(vf, via);
521
522 // Handle zero alpha
523 __m128 vAlphaMask = _mm_cmpeq_ps(va, vZero);
524 vf = _mm_andnot_ps(vAlphaMask, vf);
525
526 // LUT
527 const __m128 under = _mm_cmplt_ps(vf, vZero);
528 const __m128 over = _mm_cmpgt_ps(vf, vRangeMax);
529 if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
530 // Within gamut
531 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
532 const int ridx = _mm_extract_epi16(v, 0);
533 const int gidx = _mm_extract_epi16(v, 2);
534 const int bidx = _mm_extract_epi16(v, 4);
535 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0);
536 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2);
537 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4);
538 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), viFF00);
539 _mm_storeu_ps(&buffer[i].x, vf);
540 } else {
541 // Outside 0.0->1.0 gamut
542 _mm_storeu_ps(&buffer[i].x, vf);
543 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(buffer[i].x);
544 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(buffer[i].y);
545 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(buffer[i].z);
546 }
547 }
548}
549
550// Load to [0->TrcResolution] in 4x32 SIMD
551template<typename T>
552static inline void loadPU(const T &p, __m128i &v);
553
554template<>
555inline void loadPU<QRgb>(const QRgb &p, __m128i &v)
556{
557 v = _mm_cvtsi32_si128(p);
558#if defined(__SSE4_1__)
559 v = _mm_cvtepu8_epi32(v);
560#else
561 v = _mm_unpacklo_epi8(v, _mm_setzero_si128());
562 v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
563#endif
564 v = _mm_slli_epi32(v, QColorTrcLut::ShiftUp);
565}
566
567template<>
568inline void loadPU<QRgba64>(const QRgba64 &p, __m128i &v)
569{
570 v = _mm_loadl_epi64((const __m128i *)&p);
571 v = _mm_sub_epi16(v, _mm_srli_epi16(v, 8));
572#if defined(__SSE4_1__)
573 v = _mm_cvtepu16_epi32(v);
574#else
575 v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
576#endif
577 v = _mm_srli_epi32(v, QColorTrcLut::ShiftDown);
578}
579
580template<typename T>
581void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
582{
583 constexpr bool isARGB = isArgb<T>();
584 const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
585 const __m128i vRangeMax = _mm_setr_epi32(isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
586 : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
587 d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
588 isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear
589 : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear,
590 QColorTrcLut::Resolution);
591 for (qsizetype i = 0; i < len; ++i) {
592 __m128i v;
593 loadPU<T>(src[i], v);
594 const int ridx = isARGB ? _mm_extract_epi16(v, 4) : _mm_extract_epi16(v, 0);
595 const int gidx = _mm_extract_epi16(v, 2);
596 const int bidx = isARGB ? _mm_extract_epi16(v, 0) : _mm_extract_epi16(v, 4);
597 if (_mm_movemask_epi8(_mm_cmpgt_epi32(v, vRangeMax)) == 0) {
598 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0);
599 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2);
600 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4);
601 __m128 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00);
602 _mm_storeu_ps(&buffer[i].x, vf);
603 } else {
604 constexpr float f = 1.f / QColorTrcLut::Resolution;
605 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
606 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
607 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
608 }
609 }
610}
611
612template<>
613void loadUnpremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
614{
615 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
616 const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
617 const __m128 vZero = _mm_set1_ps(0.0f);
618 const float factor = 1.f / float(QColorTrcLut::Resolution);
619 const __m128 vRangeMax = _mm_setr_ps(d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear * factor,
620 d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear * factor,
621 d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear * factor,
622 INFINITY);
623 for (qsizetype i = 0; i < len; ++i) {
624 __m128 vf = _mm_loadu_ps(&src[i].r);
625 const __m128 under = _mm_cmplt_ps(vf, vZero);
626 const __m128 over = _mm_cmpgt_ps(vf, vRangeMax);
627 if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
628 // Within gamut
629 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
630 const int ridx = _mm_extract_epi16(v, 0);
631 const int gidx = _mm_extract_epi16(v, 2);
632 const int bidx = _mm_extract_epi16(v, 4);
633 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0);
634 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2);
635 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4);
636 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00);
637 _mm_storeu_ps(&buffer[i].x, vf);
638 } else {
639 // Outside 0.0->1.0 gamut
640 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(src[i].r);
641 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(src[i].g);
642 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(src[i].b);
643 }
644 }
645}
646
647#elif defined(__ARM_NEON__)
648// Load to [0-alpha] in 4x32 SIMD
649template<typename T>
650static inline void loadP(const T &p, uint32x4_t &v);
651
652template<>
653inline void loadP<QRgb>(const QRgb &p, uint32x4_t &v)
654{
655 v = vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vmov_n_u32(p)))));
656}
657
658template<>
659inline void loadP<QRgba64>(const QRgba64 &p, uint32x4_t &v)
660{
661 v = vmovl_u16(vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&p))));
662}
663
664static inline bool test_all_zero(uint32x4_t p)
665{
666#if defined(Q_PROCESSOR_ARM_64)
667 return vaddvq_u32(p) == 0;
668#else
669 const uint32x2_t tmp = vpadd_u32(vget_low_u32(p), vget_high_u32(p));
670 return vget_lane_u32(vpadd_u32(tmp, tmp), 0) == 0;
671#endif
672}
673
674template<typename T>
675static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
676{
677 constexpr bool isARGB = isArgb<T>();
678 const float iFF00 = 1.0f / (255 * 256);
679 const uint32x4_t vRangeMax = qvsetq_n_u32(
680 isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
681 : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
682 d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
683 isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear
684 : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear,
685 QColorTrcLut::Resolution);
686 for (qsizetype i = 0; i < len; ++i) {
687 uint32x4_t v;
688 loadP<T>(src[i], v);
689 float32x4_t vf = vcvtq_f32_u32(v);
690 // Approximate 1/a:
691 float32x4_t va = vdupq_n_f32(vgetq_lane_f32(vf, 3));
692 float32x4_t via = vrecpeq_f32(va); // estimate 1/a
693 via = vmulq_f32(vrecpsq_f32(va, via), via);
694
695 // v * (1/a)
696 vf = vmulq_f32(vf, via);
697
698 // Handle zero alpha
699#if defined(Q_PROCESSOR_ARM_64)
700 uint32x4_t vAlphaMask = vceqzq_f32(va);
701#else
702 uint32x4_t vAlphaMask = vceqq_f32(va, vdupq_n_f32(0.0));
703#endif
704 vf = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf), vAlphaMask));
705
706 // LUT
707 v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f)));
708 const int ridx = isARGB ? vgetq_lane_u32(v, 2) : vgetq_lane_u32(v, 0);
709 const int gidx = vgetq_lane_u32(v, 1);
710 const int bidx = isARGB ? vgetq_lane_u32(v, 0) : vgetq_lane_u32(v, 2);
711 if (test_all_zero(vcgtq_u32(v, vRangeMax))) {
712 v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0);
713 v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1);
714 v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2);
715 vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00);
716
717 vst1q_f32(&buffer[i].x, vf);
718 } else {
719 constexpr float f = 1.f / QColorTrcLut::Resolution;
720 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
721 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
722 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
723 }
724 }
725}
726
727// Load to [0->TrcResultion] in 4x32 SIMD
728template<typename T>
729static inline void loadPU(const T &p, uint32x4_t &v);
730
731template<>
732inline void loadPU<QRgb>(const QRgb &p, uint32x4_t &v)
733{
734 v = vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vmov_n_u32(p)))));
735 v = vshlq_n_u32(v, QColorTrcLut::ShiftUp);
736}
737
738template<>
739inline void loadPU<QRgba64>(const QRgba64 &p, uint32x4_t &v)
740{
741 uint16x4_t v16 = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&p)));
742 v16 = vsub_u16(v16, vshr_n_u16(v16, 8));
743 v = vmovl_u16(v16);
744 v = vshrq_n_u32(v, QColorTrcLut::ShiftDown);
745}
746
747template<typename T>
748void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
749{
750 constexpr bool isARGB = isArgb<T>();
751 const float iFF00 = 1.0f / (255 * 256);
752 const uint32x4_t vRangeMax = qvsetq_n_u32(
753 isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
754 : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
755 d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
756 isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear
757 : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear,
758 QColorTrcLut::Resolution);
759 for (qsizetype i = 0; i < len; ++i) {
760 uint32x4_t v;
761 loadPU<T>(src[i], v);
762 const int ridx = isARGB ? vgetq_lane_u32(v, 2) : vgetq_lane_u32(v, 0);
763 const int gidx = vgetq_lane_u32(v, 1);
764 const int bidx = isARGB ? vgetq_lane_u32(v, 0) : vgetq_lane_u32(v, 2);
765 if (test_all_zero(vcgtq_u32(v, vRangeMax))) {
766 v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0);
767 v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1);
768 v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2);
769 float32x4_t vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00);
770 vst1q_f32(&buffer[i].x, vf);
771 } else {
772 constexpr float f = 1.f / QColorTrcLut::Resolution;
773 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
774 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
775 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
776 }
777 }
778}
779#else
780template<>
781void loadPremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
782{
783 const int rangeMaxR = d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear;
784 const int rangeMaxG = d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear;
785 const int rangeMaxB = d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear;
786 for (qsizetype i = 0; i < len; ++i) {
787 const uint p = src[i];
788 const int a = qAlpha(p);
789 if (a) {
790 const float ia = float(QColorTrcLut::Resolution) / a;
791 const int ridx = int(qRed(p) * ia + 0.5f);
792 const int gidx = int(qGreen(p) * ia + 0.5f);
793 const int bidx = int(qBlue(p) * ia + 0.5f);
794 if (ridx <= rangeMaxR && gidx <= rangeMaxG && bidx <= rangeMaxB) {
795 buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256));
796 buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256));
797 buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256));
798 } else {
799 constexpr float f = 1.f / QColorTrcLut::Resolution;
800 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
801 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
802 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
803 }
804 } else {
805 buffer[i].x = buffer[i].y = buffer[i].z = 0.0f;
806 }
807 }
808}
809
810template<>
811void loadPremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
812{
813 const int rangeMaxR = d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear;
814 const int rangeMaxG = d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear;
815 const int rangeMaxB = d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear;
816 for (qsizetype i = 0; i < len; ++i) {
817 const QRgba64 &p = src[i];
818 const int a = p.alpha();
819 if (a) {
820 const float ia = float(QColorTrcLut::Resolution) / a;
821 const int ridx = int(p.red() * ia + 0.5f);
822 const int gidx = int(p.green() * ia + 0.5f);
823 const int bidx = int(p.blue() * ia + 0.5f);
824 if (ridx <= rangeMaxR && gidx <= rangeMaxG && bidx <= rangeMaxB) {
825 buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256));
826 buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256));
827 buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256));
828 } else {
829 constexpr float f = 1.f / QColorTrcLut::Resolution;
830 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
831 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
832 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
833 }
834 } else {
835 buffer[i].x = buffer[i].y = buffer[i].z = 0.0f;
836 }
837 }
838}
839
840template<>
841void loadUnpremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
842{
843 const int rangeMaxR = d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear;
844 const int rangeMaxG = d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear;
845 const int rangeMaxB = d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear;
846 for (qsizetype i = 0; i < len; ++i) {
847 const uint p = src[i];
848 const int ridx = qRed(p) << QColorTrcLut::ShiftUp;
849 const int gidx = qGreen(p) << QColorTrcLut::ShiftUp;
850 const int bidx = qBlue(p) << QColorTrcLut::ShiftUp;
851 if (ridx <= rangeMaxR && gidx <= rangeMaxG && bidx <= rangeMaxB) {
852 buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256));
853 buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256));
854 buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256));
855 } else {
856 constexpr float f = 1.f / QColorTrcLut::Resolution;
857 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
858 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
859 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
860 }
861 }
862}
863
864static int u16toidx(int c)
865{
866 c -= c >> 8;
867 return c >> QColorTrcLut::ShiftDown;
868}
869
870template<>
871void loadUnpremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
872{
873 const int rangeMaxR = d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear;
874 const int rangeMaxG = d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear;
875 const int rangeMaxB = d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear;
876 for (qsizetype i = 0; i < len; ++i) {
877 const QRgba64 &p = src[i];
878 const int ridx = u16toidx(p.red());
879 const int gidx = u16toidx(p.green());
880 const int bidx = u16toidx(p.blue());
881 if (ridx <= rangeMaxR && gidx <= rangeMaxG && bidx <= rangeMaxB) {
882 buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256));
883 buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256));
884 buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256));
885 } else {
886 constexpr float f = 1.f / QColorTrcLut::Resolution;
887 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
888 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
889 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
890 }
891 }
892}
893#endif
894#if !defined(__SSE2__)
895template<>
896void loadPremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
897{
898 for (qsizetype i = 0; i < len; ++i) {
899 const QRgbaFloat32 &p = src[i];
900 const float a = p.a;
901 if (a) {
902 const float ia = 1.0f / a;
903 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(p.r * ia);
904 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(p.g * ia);
905 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(p.b * ia);
906 } else {
907 buffer[i].x = buffer[i].y = buffer[i].z = 0.0f;
908 }
909 }
910}
911
912template<>
913void loadUnpremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
914{
915 for (qsizetype i = 0; i < len; ++i) {
916 const QRgbaFloat32 &p = src[i];
917 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(p.r);
918 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(p.g);
919 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(p.b);
920 }
921}
922#endif
923
924#if defined(__SSE2__)
925template<typename T>
926static inline void storeP(T &p, __m128i &v, int a);
927template<>
928inline void storeP<QRgb>(QRgb &p, __m128i &v, int a)
929{
930 v = _mm_packs_epi32(v, v);
931 v = _mm_insert_epi16(v, a, 3);
932 p = _mm_cvtsi128_si32(_mm_packus_epi16(v, v));
933}
934template<>
935inline void storeP<QRgba64>(QRgba64 &p, __m128i &v, int a)
936{
937#if defined(__SSE4_1__)
938 v = _mm_packus_epi32(v, v);
939 v = _mm_insert_epi16(v, a, 3);
940 _mm_storel_epi64((__m128i *)&p, v);
941#else
942 const int r = _mm_extract_epi16(v, 0);
943 const int g = _mm_extract_epi16(v, 2);
944 const int b = _mm_extract_epi16(v, 4);
945 p = qRgba64(r, g, b, a);
946#endif
947}
948
949template<typename D, typename S,
950 typename = std::enable_if_t<!std::is_same_v<D, QRgbaFloat32>, void>>
951static void storePremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len,
952 const QColorTransformPrivate *d_ptr)
953{
954 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
955 const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
956 constexpr bool isARGB = isArgb<D>();
957 static_assert(getFactor<D>() >= getFactor<S>());
958 for (qsizetype i = 0; i < len; ++i) {
959 const int a = getAlpha<S>(src[i]) * (getFactor<D>() / getFactor<S>());
960 __m128 vf = _mm_loadu_ps(&buffer[i].x);
961 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
962 __m128 va = _mm_mul_ps(_mm_set1_ps(a), iFF00);
963 const int ridx = _mm_extract_epi16(v, 0);
964 const int gidx = _mm_extract_epi16(v, 2);
965 const int bidx = _mm_extract_epi16(v, 4);
966 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], isARGB ? 4 : 0);
967 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2);
968 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], isARGB ? 0 : 4);
969 vf = _mm_cvtepi32_ps(v);
970 vf = _mm_mul_ps(vf, va);
971 v = _mm_cvtps_epi32(vf);
972 storeP<D>(dst[i], v, a);
973 }
974}
975
976template<typename S>
977static void storePremultiplied(QRgbaFloat32 *dst, const S *src,
978 const QColorVector *buffer, const qsizetype len,
979 const QColorTransformPrivate *d_ptr)
980{
981 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
982 const __m128 vZero = _mm_set1_ps(0.0f);
983 const __m128 vOne = _mm_set1_ps(1.0f);
984 const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256));
985 for (qsizetype i = 0; i < len; ++i) {
986 const float a = getAlphaF<S>(src[i]);
987 __m128 va = _mm_set1_ps(a);
988 __m128 vf = _mm_loadu_ps(&buffer[i].x);
989 const __m128 under = _mm_cmplt_ps(vf, vZero);
990 const __m128 over = _mm_cmpgt_ps(vf, vOne);
991 if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
992 // Within gamut
993 va = _mm_mul_ps(va, viFF00);
994 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
995 const int ridx = _mm_extract_epi16(v, 0);
996 const int gidx = _mm_extract_epi16(v, 2);
997 const int bidx = _mm_extract_epi16(v, 4);
998 v = _mm_setzero_si128();
999 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 0);
1000 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2);
1001 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 4);
1002 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), va);
1003 _mm_store_ps(&dst[i].r, vf);
1004 } else {
1005 dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x);
1006 dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y);
1007 dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z);
1008 vf = _mm_mul_ps(_mm_load_ps(&dst[i].r), va);
1009 _mm_store_ps(&dst[i].r, vf);
1010 }
1011 dst[i].a = a;
1012 }
1013}
1014
1015template<typename T>
1016static inline void storePU(T &p, __m128i &v, int a);
1017template<>
1018inline void storePU<QRgb>(QRgb &p, __m128i &v, int a)
1019{
1020 v = _mm_add_epi16(v, _mm_set1_epi16(0x80));
1021 v = _mm_srli_epi16(v, 8);
1022 v = _mm_insert_epi16(v, a, 3);
1023 p = _mm_cvtsi128_si32(_mm_packus_epi16(v, v));
1024}
1025template<>
1026inline void storePU<QRgba64>(QRgba64 &p, __m128i &v, int a)
1027{
1028 v = _mm_add_epi16(v, _mm_srli_epi16(v, 8));
1029 v = _mm_insert_epi16(v, a, 3);
1030 _mm_storel_epi64((__m128i *)&p, v);
1031}
1032
1033template<typename D, typename S,
1034 typename = std::enable_if_t<!std::is_same_v<D, QRgbaFloat32>, void>>
1035static void storeUnpremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len,
1036 const QColorTransformPrivate *d_ptr)
1037{
1038 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
1039 constexpr bool isARGB = isArgb<D>();
1040 static_assert(getFactor<D>() >= getFactor<S>());
1041 for (qsizetype i = 0; i < len; ++i) {
1042 const int a = getAlpha<S>(src[i]) * (getFactor<D>() / getFactor<S>());
1043 __m128 vf = _mm_loadu_ps(&buffer[i].x);
1044 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
1045 const int ridx = _mm_extract_epi16(v, 0);
1046 const int gidx = _mm_extract_epi16(v, 2);
1047 const int bidx = _mm_extract_epi16(v, 4);
1048 v = _mm_setzero_si128();
1049 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], isARGB ? 2 : 0);
1050 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1);
1051 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], isARGB ? 0 : 2);
1052 storePU<D>(dst[i], v, a);
1053 }
1054}
1055
1056template<typename S>
1057void storeUnpremultiplied(QRgbaFloat32 *dst, const S *src,
1058 const QColorVector *buffer, const qsizetype len,
1059 const QColorTransformPrivate *d_ptr)
1060{
1061 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
1062 const __m128 vZero = _mm_set1_ps(0.0f);
1063 const __m128 vOne = _mm_set1_ps(1.0f);
1064 const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256));
1065 for (qsizetype i = 0; i < len; ++i) {
1066 const float a = getAlphaF<S>(src[i]);
1067 __m128 vf = _mm_loadu_ps(&buffer[i].x);
1068 const __m128 under = _mm_cmplt_ps(vf, vZero);
1069 const __m128 over = _mm_cmpgt_ps(vf, vOne);
1070 if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
1071 // Within gamut
1072 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
1073 const int ridx = _mm_extract_epi16(v, 0);
1074 const int gidx = _mm_extract_epi16(v, 2);
1075 const int bidx = _mm_extract_epi16(v, 4);
1076 v = _mm_setzero_si128();
1077 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 0);
1078 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2);
1079 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 4);
1080 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), viFF00);
1081 _mm_storeu_ps(&dst[i].r, vf);
1082 } else {
1083 dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x);
1084 dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y);
1085 dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z);
1086 }
1087 dst[i].a = a;
1088 }
1089}
1090
1091template<typename T>
1092static void storeOpaque(T *dst, const QColorVector *buffer, const qsizetype len,
1093 const QColorTransformPrivate *d_ptr)
1094{
1095 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
1096 constexpr bool isARGB = isArgb<T>();
1097 for (qsizetype i = 0; i < len; ++i) {
1098 __m128 vf = _mm_loadu_ps(&buffer[i].x);
1099 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
1100 const int ridx = _mm_extract_epi16(v, 0);
1101 const int gidx = _mm_extract_epi16(v, 2);
1102 const int bidx = _mm_extract_epi16(v, 4);
1103 v = _mm_setzero_si128();
1104 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], isARGB ? 2 : 0);
1105 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1);
1106 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], isARGB ? 0 : 2);
1107 storePU<T>(dst[i], v, isARGB ? 255 : 0xffff);
1108 }
1109}
1110
1111template<>
1112void storeOpaque(QRgbaFloat32 *dst, const QColorVector *buffer, const qsizetype len,
1113 const QColorTransformPrivate *d_ptr)
1114{
1115 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
1116 const __m128 vZero = _mm_set1_ps(0.0f);
1117 const __m128 vOne = _mm_set1_ps(1.0f);
1118 const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256));
1119 for (qsizetype i = 0; i < len; ++i) {
1120 __m128 vf = _mm_loadu_ps(&buffer[i].x);
1121 const __m128 under = _mm_cmplt_ps(vf, vZero);
1122 const __m128 over = _mm_cmpgt_ps(vf, vOne);
1123 if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
1124 // Within gamut
1125 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
1126 const int ridx = _mm_extract_epi16(v, 0);
1127 const int gidx = _mm_extract_epi16(v, 2);
1128 const int bidx = _mm_extract_epi16(v, 4);
1129 v = _mm_setzero_si128();
1130 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 0);
1131 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2);
1132 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 4);
1133 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), viFF00);
1134 _mm_store_ps(&dst[i].r, vf);
1135 } else {
1136 dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x);
1137 dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y);
1138 dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z);
1139 }
1140 dst[i].a = 1.0f;
1141 }
1142}
1143
1144#elif defined(__ARM_NEON__)
1145template<typename T>
1146static inline void storeP(T &p, const uint16x4_t &v);
1147template<>
1148inline void storeP<QRgb>(QRgb &p, const uint16x4_t &v)
1149{
1150 p = vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(v, v))), 0);
1151}
1152template<>
1153inline void storeP<QRgba64>(QRgba64 &p, const uint16x4_t &v)
1154{
1155 vst1_u16((uint16_t *)&p, v);
1156}
1157
1158template<typename D, typename S,
1159 typename = std::enable_if_t<!std::is_same_v<D, QRgbaFloat32>, void>>
1160static void storePremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len,
1161 const QColorTransformPrivate *d_ptr)
1162{
1163 const float iFF00 = 1.0f / (255 * 256);
1164 constexpr bool isARGB = isArgb<D>();
1165 static_assert(getFactor<D>() >= getFactor<S>());
1166 for (qsizetype i = 0; i < len; ++i) {
1167 const int a = getAlpha<S>(src[i]) * (getFactor<D>() / getFactor<S>());
1168 float32x4_t vf = vld1q_f32(&buffer[i].x);
1169 uint32x4_t v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f)));
1170 const int ridx = vgetq_lane_u32(v, 0);
1171 const int gidx = vgetq_lane_u32(v, 1);
1172 const int bidx = vgetq_lane_u32(v, 2);
1173 v = vsetq_lane_u32(d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], v, isARGB ? 2 : 0);
1174 v = vsetq_lane_u32(d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], v, 1);
1175 v = vsetq_lane_u32(d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], v, isARGB ? 0 : 2);
1176 vf = vcvtq_f32_u32(v);
1177 vf = vmulq_n_f32(vf, a * iFF00);
1178 vf = vaddq_f32(vf, vdupq_n_f32(0.5f));
1179 v = vcvtq_u32_f32(vf);
1180 uint16x4_t v16 = vmovn_u32(v);
1181 v16 = vset_lane_u16(a, v16, 3);
1182 storeP<D>(dst[i], v16);
1183 }
1184}
1185
1186template<typename T>
1187static inline void storePU(T &p, uint16x4_t &v, int a);
1188template<>
1189inline void storePU<QRgb>(QRgb &p, uint16x4_t &v, int a)
1190{
1191 v = vadd_u16(v, vdup_n_u16(0x80));
1192 v = vshr_n_u16(v, 8);
1193 v = vset_lane_u16(a, v, 3);
1194 p = vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(v, v))), 0);
1195}
1196template<>
1197inline void storePU<QRgba64>(QRgba64 &p, uint16x4_t &v, int a)
1198{
1199 v = vadd_u16(v, vshr_n_u16(v, 8));
1200 v = vset_lane_u16(a, v, 3);
1201 vst1_u16((uint16_t *)&p, v);
1202}
1203
1204template<typename D, typename S,
1205 typename = std::enable_if_t<!std::is_same_v<D, QRgbaFloat32>, void>>
1206static void storeUnpremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len,
1207 const QColorTransformPrivate *d_ptr)
1208{
1209 constexpr bool isARGB = isArgb<D>();
1210 static_assert(getFactor<D>() >= getFactor<S>());
1211 for (qsizetype i = 0; i < len; ++i) {
1212 const int a = getAlpha<S>(src[i]) * (getFactor<D>() / getFactor<S>());
1213 float32x4_t vf = vld1q_f32(&buffer[i].x);
1214 uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f))));
1215 const int ridx = vget_lane_u16(v, 0);
1216 const int gidx = vget_lane_u16(v, 1);
1217 const int bidx = vget_lane_u16(v, 2);
1218 v = vset_lane_u16(d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], v, isARGB ? 2 : 0);
1219 v = vset_lane_u16(d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], v, 1);
1220 v = vset_lane_u16(d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], v, isARGB ? 0 : 2);
1221 storePU<D>(dst[i], v, a);
1222 }
1223}
1224
1225template<typename T>
1226static void storeOpaque(T *dst, const QColorVector *buffer, const qsizetype len,
1227 const QColorTransformPrivate *d_ptr)
1228{
1229 constexpr bool isARGB = isArgb<T>();
1230 for (qsizetype i = 0; i < len; ++i) {
1231 float32x4_t vf = vld1q_f32(&buffer[i].x);
1232 uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f))));
1233 const int ridx = vget_lane_u16(v, 0);
1234 const int gidx = vget_lane_u16(v, 1);
1235 const int bidx = vget_lane_u16(v, 2);
1236 v = vset_lane_u16(d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], v, isARGB ? 2 : 0);
1237 v = vset_lane_u16(d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], v, 1);
1238 v = vset_lane_u16(d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], v, isARGB ? 0 : 2);
1239 storePU<T>(dst[i], v, isARGB ? 255 : 0xffff);
1240 }
1241}
1242#else
1243static void storePremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len,
1244 const QColorTransformPrivate *d_ptr)
1245{
1246 for (qsizetype i = 0; i < len; ++i) {
1247 const int a = qAlpha(src[i]);
1248 const float fa = a / (255.0f * 256.0f);
1249 const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * float(QColorTrcLut::Resolution) + 0.5f)];
1250 const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * float(QColorTrcLut::Resolution) + 0.5f)];
1251 const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * float(QColorTrcLut::Resolution) + 0.5f)];
1252 dst[i] = qRgba(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a);
1253 }
1254}
1255
1256static void storeUnpremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len,
1257 const QColorTransformPrivate *d_ptr)
1258{
1259 for (qsizetype i = 0; i < len; ++i) {
1260 const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x);
1261 const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y);
1262 const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z);
1263 dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0);
1264 }
1265}
1266
1267static void storeOpaque(QRgb *dst, const QColorVector *buffer, const qsizetype len,
1268 const QColorTransformPrivate *d_ptr)
1269{
1270 for (qsizetype i = 0; i < len; ++i) {
1271 const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x);
1272 const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y);
1273 const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z);
1274 dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0);
1275 }
1276}
1277
1278template<typename S>
1279static void storePremultiplied(QRgba64 *dst, const S *src, const QColorVector *buffer, const qsizetype len,
1280 const QColorTransformPrivate *d_ptr)
1281{
1282 for (qsizetype i = 0; i < len; ++i) {
1283 const int a = getAlphaF(src[i]) * 65535.f;
1284 const float fa = a / (255.0f * 256.0f);
1285 const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * float(QColorTrcLut::Resolution) + 0.5f)];
1286 const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * float(QColorTrcLut::Resolution) + 0.5f)];
1287 const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * float(QColorTrcLut::Resolution) + 0.5f)];
1288 dst[i] = qRgba64(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a);
1289 }
1290}
1291
1292template<typename S>
1293static void storeUnpremultiplied(QRgba64 *dst, const S *src, const QColorVector *buffer, const qsizetype len,
1294 const QColorTransformPrivate *d_ptr)
1295{
1296 for (qsizetype i = 0; i < len; ++i) {
1297 const int a = getAlphaF(src[i]) * 65535.f;
1298 const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].x);
1299 const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y);
1300 const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(buffer[i].z);
1301 dst[i] = qRgba64(r, g, b, a);
1302 }
1303}
1304
1305static void storeOpaque(QRgba64 *dst, const QColorVector *buffer, const qsizetype len,
1306 const QColorTransformPrivate *d_ptr)
1307{
1308 for (qsizetype i = 0; i < len; ++i) {
1309 const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].x);
1310 const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y);
1311 const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(buffer[i].z);
1312 dst[i] = qRgba64(r, g, b, 0xFFFF);
1313 }
1314}
1315#endif
1316#if !defined(__SSE2__)
1317template<typename S>
1318static void storePremultiplied(QRgbaFloat32 *dst, const S *src, const QColorVector *buffer,
1319 const qsizetype len, const QColorTransformPrivate *d_ptr)
1320{
1321 for (qsizetype i = 0; i < len; ++i) {
1322 const float a = getAlphaF(src[i]);
1323 dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x) * a;
1324 dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y) * a;
1325 dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z) * a;
1326 dst[i].a = a;
1327 }
1328}
1329
1330template<typename S>
1331static void storeUnpremultiplied(QRgbaFloat32 *dst, const S *src, const QColorVector *buffer,
1332 const qsizetype len, const QColorTransformPrivate *d_ptr)
1333{
1334 for (qsizetype i = 0; i < len; ++i) {
1335 const float a = getAlphaF(src[i]);
1336 dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x);
1337 dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y);
1338 dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z);
1339 dst[i].a = a;
1340 }
1341}
1342
1343static void storeOpaque(QRgbaFloat32 *dst, const QColorVector *buffer, const qsizetype len,
1344 const QColorTransformPrivate *d_ptr)
1345{
1346 for (qsizetype i = 0; i < len; ++i) {
1347 dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x);
1348 dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y);
1349 dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z);
1350 dst[i].a = 1.0f;
1351 }
1352}
1353#endif
1354
1355static void loadGray(QColorVector *buffer, const quint8 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
1356{
1357 if (d_ptr->colorSpaceIn->colorModel == QColorSpace::ColorModel::Gray ||
1358 (d_ptr->colorSpaceIn->lut[0] == d_ptr->colorSpaceIn->lut[1] &&
1359 d_ptr->colorSpaceIn->lut[0] == d_ptr->colorSpaceIn->lut[2])) {
1360 for (qsizetype i = 0; i < len; ++i) {
1361 const float y = d_ptr->colorSpaceIn->lut[0]->u8ToLinearF32(src[i]);
1362 buffer[i] = d_ptr->colorSpaceIn->whitePoint * y;
1363 }
1364 } else {
1365 for (qsizetype i = 0; i < len; ++i) {
1366 QColorVector v;
1367 v.x = d_ptr->colorSpaceIn->lut[0]->u8ToLinearF32(src[i]);
1368 v.y = d_ptr->colorSpaceIn->lut[1]->u8ToLinearF32(src[i]);
1369 v.z = d_ptr->colorSpaceIn->lut[2]->u8ToLinearF32(src[i]);
1370 buffer[i] = d_ptr->colorSpaceIn->toXyz.map(v);
1371 }
1372 }
1373}
1374
1375static void loadGray(QColorVector *buffer, const quint16 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
1376{
1377 if (d_ptr->colorSpaceIn->colorModel == QColorSpace::ColorModel::Gray ||
1378 (d_ptr->colorSpaceIn->lut[0] == d_ptr->colorSpaceIn->lut[1] &&
1379 d_ptr->colorSpaceIn->lut[0] == d_ptr->colorSpaceIn->lut[2])) {
1380 for (qsizetype i = 0; i < len; ++i) {
1381 const float y = d_ptr->colorSpaceIn->lut[0]->u16ToLinearF32(src[i]);
1382 buffer[i] = d_ptr->colorSpaceIn->whitePoint * y;
1383 }
1384 } else {
1385 for (qsizetype i = 0; i < len; ++i) {
1386 QColorVector v;
1387 v.x = d_ptr->colorSpaceIn->lut[0]->u16ToLinearF32(src[i]);
1388 v.y = d_ptr->colorSpaceIn->lut[1]->u16ToLinearF32(src[i]);
1389 v.z = d_ptr->colorSpaceIn->lut[2]->u16ToLinearF32(src[i]);
1390 buffer[i] = d_ptr->colorSpaceIn->toXyz.map(v);
1391 }
1392 }
1393}
1394
1395static void storeOpaque(quint8 *dst, const QColorVector *buffer, const qsizetype len,
1396 const QColorTransformPrivate *d_ptr)
1397{
1398 for (qsizetype i = 0; i < len; ++i)
1399 dst[i] = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].y);
1400}
1401
1402static void storeOpaque(quint16 *dst, const QColorVector *buffer, const qsizetype len,
1403 const QColorTransformPrivate *d_ptr)
1404{
1405 for (qsizetype i = 0; i < len; ++i)
1406 dst[i] = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].y);
1407}
1408
1409static constexpr qsizetype WorkBlockSize = 256;
1410
1411template <typename T, int Count = 1>
1413{
1414public:
1415 operator T*() { return reinterpret_cast<T *>(this); }
1416private:
1417 alignas(T) char data[sizeof(T) * Count];
1418};
1419
1420void loadUnpremultipliedLUT(QColorVector *buffer, const uchar *src, const qsizetype len)
1421{
1422 const float f = 1.0f / 255.f;
1423 for (qsizetype i = 0; i < len; ++i) {
1424 const float p = src[i] * f;
1425 buffer[i].x = p;
1426 buffer[i].y = p;
1427 buffer[i].z = p;
1428 }
1429}
1430
1431void loadUnpremultipliedLUT(QColorVector *buffer, const quint16 *src, const qsizetype len)
1432{
1433 const float f = 1.0f / 65535.f;
1434 for (qsizetype i = 0; i < len; ++i) {
1435 const float p = src[i] * f;
1436 buffer[i].x = p;
1437 buffer[i].y = p;
1438 buffer[i].z = p;
1439 }
1440}
1441
1442void loadUnpremultipliedLUT(QColorVector *buffer, const QRgb *src, const qsizetype len)
1443{
1444 const float f = 1.0f / 255.f;
1445 for (qsizetype i = 0; i < len; ++i) {
1446 const uint p = src[i];
1447 buffer[i].x = qRed(p) * f;
1448 buffer[i].y = qGreen(p) * f;
1449 buffer[i].z = qBlue(p) * f;
1450 }
1451}
1452
1453void loadUnpremultipliedLUT(QColorVector *buffer, const QCmyk32 *src, const qsizetype len)
1454{
1455 const float f = 1.0f / 255.f;
1456 for (qsizetype i = 0; i < len; ++i) {
1457 const QCmyk32 p = src[i];
1458 buffer[i].x = (p.cyan() * f);
1459 buffer[i].y = (p.magenta() * f);
1460 buffer[i].z = (p.yellow() * f);
1461 buffer[i].w = (p.black() * f);
1462 }
1463}
1464
1465void loadUnpremultipliedLUT(QColorVector *buffer, const QRgba64 *src, const qsizetype len)
1466{
1467 const float f = 1.0f / 65535.f;
1468 for (qsizetype i = 0; i < len; ++i) {
1469 buffer[i].x = src[i].red() * f;
1470 buffer[i].y = src[i].green() * f;
1471 buffer[i].z = src[i].blue() * f;
1472 }
1473}
1474
1475void loadUnpremultipliedLUT(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len)
1476{
1477 for (qsizetype i = 0; i < len; ++i) {
1478 buffer[i].x = src[i].r;
1479 buffer[i].y = src[i].g;
1480 buffer[i].z = src[i].b;
1481 }
1482}
1483
1484void loadPremultipliedLUT(QColorVector *, const uchar *, const qsizetype)
1485{
1486 Q_UNREACHABLE();
1487}
1488
1489void loadPremultipliedLUT(QColorVector *, const quint16 *, const qsizetype)
1490{
1491 Q_UNREACHABLE();
1492}
1493
1494void loadPremultipliedLUT(QColorVector *buffer, const QRgb *src, const qsizetype len)
1495{
1496 for (qsizetype i = 0; i < len; ++i) {
1497 const uint p = src[i];
1498 const float f = 1.0f / qAlpha(p);
1499 buffer[i].x = (qRed(p) * f);
1500 buffer[i].y = (qGreen(p) * f);
1501 buffer[i].z = (qBlue(p) * f);
1502 }
1503}
1504
1505void loadPremultipliedLUT(QColorVector *, const QCmyk32 *, const qsizetype)
1506{
1507 Q_UNREACHABLE();
1508}
1509
1510void loadPremultipliedLUT(QColorVector *buffer, const QRgba64 *src, const qsizetype len)
1511{
1512 for (qsizetype i = 0; i < len; ++i) {
1513 const float f = 1.0f / src[i].alpha();
1514 buffer[i].x = (src[i].red() * f);
1515 buffer[i].y = (src[i].green() * f);
1516 buffer[i].z = (src[i].blue() * f);
1517 }
1518}
1519
1520void loadPremultipliedLUT(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len)
1521{
1522 for (qsizetype i = 0; i < len; ++i) {
1523 const float f = 1.0f / src[i].a;
1524 buffer[i].x = src[i].r * f;
1525 buffer[i].y = src[i].g * f;
1526 buffer[i].z = src[i].b * f;
1527 }
1528}
1529template<typename T>
1530static void storeUnpremultipliedLUT(QRgb *dst, const T *, const QColorVector *buffer, const qsizetype len)
1531{
1532 for (qsizetype i = 0; i < len; ++i) {
1533 const int r = buffer[i].x * 255.f;
1534 const int g = buffer[i].y * 255.f;
1535 const int b = buffer[i].z * 255.f;
1536 dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0);
1537 }
1538}
1539
1540template<>
1541void storeUnpremultipliedLUT(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
1542{
1543 for (qsizetype i = 0; i < len; ++i) {
1544 const int r = buffer[i].x * 255.f;
1545 const int g = buffer[i].y * 255.f;
1546 const int b = buffer[i].z * 255.f;
1547 dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0);
1548 }
1549}
1550
1551
1552template<typename T>
1553void storeUnpremultipliedLUT(QCmyk32 *dst, const T *, const QColorVector *buffer, const qsizetype len)
1554{
1555 for (qsizetype i = 0; i < len; ++i) {
1556 const int c = buffer[i].x * 255.f;
1557 const int m = buffer[i].y * 255.f;
1558 const int y = buffer[i].z * 255.f;
1559 const int k = buffer[i].w * 255.f;
1560 dst[i] = QCmyk32(c, m, y, k);
1561 }
1562}
1563
1564template<typename T>
1565static void storeUnpremultipliedLUT(QRgba64 *dst, const T *,
1566 const QColorVector *buffer, const qsizetype len)
1567{
1568 for (qsizetype i = 0; i < len; ++i) {
1569 const int r = buffer[i].x * 65535.f;
1570 const int g = buffer[i].y * 65535.f;
1571 const int b = buffer[i].z * 65535.f;
1572 dst[i] = qRgba64(r, g, b, 65535);
1573 }
1574}
1575
1576template<>
1577void storeUnpremultipliedLUT(QRgba64 *dst, const QRgb *src,
1578 const QColorVector *buffer, const qsizetype len)
1579{
1580 for (qsizetype i = 0; i < len; ++i) {
1581 const int a = qAlpha(src[i]) * 257;
1582 const int r = buffer[i].x * 65535.f;
1583 const int g = buffer[i].y * 65535.f;
1584 const int b = buffer[i].z * 65535.f;
1585 dst[i] = qRgba64(r, g, b, a);
1586 }
1587}
1588
1589template<>
1591 const QColorVector *buffer, const qsizetype len)
1592{
1593 for (qsizetype i = 0; i < len; ++i) {
1594 const int r = buffer[i].x * 65535.f;
1595 const int g = buffer[i].y * 65535.f;
1596 const int b = buffer[i].z * 65535.f;
1597 dst[i] = qRgba64(r, g, b, src[i].alpha());
1598 }
1599}
1600
1601template<typename T>
1602static void storeUnpremultipliedLUT(QRgbaFloat32 *dst, const T *src,
1603 const QColorVector *buffer, const qsizetype len)
1604{
1605 for (qsizetype i = 0; i < len; ++i) {
1606 const float r = buffer[i].x;
1607 const float g = buffer[i].y;
1608 const float b = buffer[i].z;
1609 dst[i] = QRgbaFloat32{r, g, b, getAlphaF(src[i])};
1610 }
1611}
1612
1613template<typename T>
1614static void storePremultipliedLUT(QRgb *dst, const T *, const QColorVector *buffer, const qsizetype len)
1615{
1616 for (qsizetype i = 0; i < len; ++i) {
1617 const int r = buffer[i].x * 255.f;
1618 const int g = buffer[i].y * 255.f;
1619 const int b = buffer[i].z * 255.f;
1620 dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0);
1621 }
1622}
1623
1624template<>
1625void storePremultipliedLUT(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
1626{
1627 for (qsizetype i = 0; i < len; ++i) {
1628 const int a = qAlpha(src[i]);
1629 const int r = buffer[i].x * a;
1630 const int g = buffer[i].y * a;
1631 const int b = buffer[i].z * a;
1632 dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0);
1633 }
1634}
1635
1636template<typename T>
1637static void storePremultipliedLUT(QCmyk32 *dst, const T *src, const QColorVector *buffer, const qsizetype len)
1638{
1639 storeUnpremultipliedLUT(dst, src, buffer, len);
1640}
1641
1642template<typename T>
1643static void storePremultipliedLUT(QRgba64 *dst, const T *, const QColorVector *buffer, const qsizetype len)
1644{
1645 for (qsizetype i = 0; i < len; ++i) {
1646 const int r = buffer[i].x * 65535.f;
1647 const int g = buffer[i].y * 65535.f;
1648 const int b = buffer[i].z * 65535.f;
1649 dst[i] = qRgba64(r, g, b, 65535);
1650 }
1651}
1652
1653template<>
1654void storePremultipliedLUT(QRgba64 *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
1655{
1656 for (qsizetype i = 0; i < len; ++i) {
1657 const int a = qAlpha(src[i]) * 257;
1658 const int r = buffer[i].x * a;
1659 const int g = buffer[i].y * a;
1660 const int b = buffer[i].z * a;
1661 dst[i] = qRgba64(r, g, b, a);
1662 }
1663}
1664
1665template<>
1666void storePremultipliedLUT(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len)
1667{
1668 for (qsizetype i = 0; i < len; ++i) {
1669 const int a = src[i].alpha();
1670 const int r = buffer[i].x * a;
1671 const int g = buffer[i].y * a;
1672 const int b = buffer[i].z * a;
1673 dst[i] = qRgba64(r, g, b, a);
1674 }
1675}
1676
1677template<typename T>
1678static void storePremultipliedLUT(QRgbaFloat32 *dst, const T *src, const QColorVector *buffer, const qsizetype len)
1679{
1680 for (qsizetype i = 0; i < len; ++i) {
1681 const float a = getAlphaF(src[i]);
1682 const float r = buffer[i].x * a;
1683 const float g = buffer[i].y * a;
1684 const float b = buffer[i].z * a;
1685 dst[i] = QRgbaFloat32{r, g, b, a};
1686 }
1687}
1688
1689static void visitElement(const QColorSpacePrivate::TransferElement &element, QColorVector *buffer, const qsizetype len)
1690{
1691 const bool doW = element.trc[3].isValid();
1692 for (qsizetype i = 0; i < len; ++i) {
1693 buffer[i].x = element.trc[0].apply(buffer[i].x);
1694 buffer[i].y = element.trc[1].apply(buffer[i].y);
1695 buffer[i].z = element.trc[2].apply(buffer[i].z);
1696 if (doW)
1697 buffer[i].w = element.trc[3].apply(buffer[i].w);
1698 }
1699}
1700
1701static void visitElement(const QColorMatrix &element, QColorVector *buffer, const qsizetype len)
1702{
1703 for (qsizetype i = 0; i < len; ++i)
1704 buffer[i] = element.map(buffer[i]);
1705}
1706
1707static void visitElement(const QColorVector &offset, QColorVector *buffer, const qsizetype len)
1708{
1709 for (qsizetype i = 0; i < len; ++i)
1710 buffer[i] += offset;
1711}
1712
1713static void visitElement(const QColorCLUT &element, QColorVector *buffer, const qsizetype len)
1714{
1715 if (element.isEmpty())
1716 return;
1717 for (qsizetype i = 0; i < len; ++i)
1718 buffer[i] = element.apply(buffer[i]);
1719}
1720
1721/*!
1722 \internal
1723*/
1724QColorVector QColorTransformPrivate::map(QColorVector c) const
1725{
1726 if (colorSpaceIn->isThreeComponentMatrix()) {
1727 if (colorSpaceIn->lut.generated.loadAcquire()) {
1728 c.x = colorSpaceIn->lut[0]->toLinear(c.x);
1729 c.y = colorSpaceIn->lut[1]->toLinear(c.y);
1730 c.z = colorSpaceIn->lut[2]->toLinear(c.z);
1731 } else {
1732 c.x = colorSpaceIn->trc[0].apply(c.x);
1733 c.y = colorSpaceIn->trc[1].apply(c.y);
1734 c.z = colorSpaceIn->trc[2].apply(c.z);
1735 }
1736 c = colorMatrix.map(c);
1737 } else {
1738 // Do element based conversion
1739 for (auto &&element : colorSpaceIn->mAB)
1740 std::visit([&c](auto &&elm) { visitElement(elm, &c, 1); }, element);
1741 }
1742 c.x = std::clamp(c.x, 0.0f, 1.0f);
1743 c.y = std::clamp(c.y, 0.0f, 1.0f);
1744 c.z = std::clamp(c.z, 0.0f, 1.0f);
1745
1746 // Match Profile Connection Spaces (PCS):
1747 if (colorSpaceOut->isPcsLab && !colorSpaceIn->isPcsLab)
1748 c = c.xyzToLab();
1749 else if (colorSpaceIn->isPcsLab && !colorSpaceOut->isPcsLab)
1750 c = c.labToXyz();
1751
1752 if (colorSpaceOut->isThreeComponentMatrix()) {
1753 if (!colorSpaceIn->isThreeComponentMatrix()) {
1754 c = colorMatrix.map(c);
1755 c.x = std::clamp(c.x, 0.0f, 1.0f);
1756 c.y = std::clamp(c.y, 0.0f, 1.0f);
1757 c.z = std::clamp(c.z, 0.0f, 1.0f);
1758 }
1759 if (colorSpaceOut->lut.generated.loadAcquire()) {
1760 c.x = colorSpaceOut->lut[0]->fromLinear(c.x);
1761 c.y = colorSpaceOut->lut[1]->fromLinear(c.y);
1762 c.z = colorSpaceOut->lut[2]->fromLinear(c.z);
1763 } else {
1764 c.x = colorSpaceOut->trc[0].applyInverse(c.x);
1765 c.y = colorSpaceOut->trc[1].applyInverse(c.y);
1766 c.z = colorSpaceOut->trc[2].applyInverse(c.z);
1767 }
1768 } else {
1769 // Do element based conversion
1770 for (auto &&element : colorSpaceOut->mBA)
1771 std::visit([&c](auto &&elm) { visitElement(elm, &c, 1); }, element);
1772 c.x = std::clamp(c.x, 0.0f, 1.0f);
1773 c.y = std::clamp(c.y, 0.0f, 1.0f);
1774 c.z = std::clamp(c.z, 0.0f, 1.0f);
1775 }
1776 return c;
1777}
1778
1779/*!
1780 \internal
1781*/
1782QColorVector QColorTransformPrivate::mapExtended(QColorVector c) const
1783{
1784 if (colorSpaceIn->isThreeComponentMatrix()) {
1785 c.x = colorSpaceIn->trc[0].applyExtended(c.x);
1786 c.y = colorSpaceIn->trc[1].applyExtended(c.y);
1787 c.z = colorSpaceIn->trc[2].applyExtended(c.z);
1788 c = colorMatrix.map(c);
1789 } else {
1790 // Do element based conversion
1791 for (auto &&element : colorSpaceIn->mAB)
1792 std::visit([&c](auto &&elm) { visitElement(elm, &c, 1); }, element);
1793 }
1794
1795 // Match Profile Connection Spaces (PCS):
1796 if (colorSpaceOut->isPcsLab && !colorSpaceIn->isPcsLab)
1797 c = c.xyzToLab();
1798 else if (colorSpaceIn->isPcsLab && !colorSpaceOut->isPcsLab)
1799 c = c.labToXyz();
1800
1801 if (colorSpaceOut->isThreeComponentMatrix()) {
1802 if (!colorSpaceIn->isThreeComponentMatrix())
1803 c = colorMatrix.map(c);
1804 c.x = colorSpaceOut->trc[0].applyInverseExtended(c.x);
1805 c.y = colorSpaceOut->trc[1].applyInverseExtended(c.y);
1806 c.z = colorSpaceOut->trc[2].applyInverseExtended(c.z);
1807 } else {
1808 // Do element based conversion
1809 for (auto &&element : colorSpaceOut->mBA)
1810 std::visit([&c](auto &&elm) { visitElement(elm, &c, 1); }, element);
1811 }
1812 return c;
1813}
1814
1815template<typename T>
1816constexpr bool IsGrayscale = std::is_same_v<T, uchar> || std::is_same_v<T, quint16>;
1817template<typename T>
1819template<typename T>
1821template<typename T>
1823
1824// Possible combos for data and color spaces:
1825// DataCM ColorSpaceCM ColorSpacePM Notes
1826// Gray Gray ThreeMatrix
1827// Gray Rgb ThreeMatrix Invalid colorMatrix
1828// Rgb Rgb ThreeMatrix
1829// Rgb Rgb ElementProc
1830// Gray Rgb ElementProc Only possible for input data
1831// Cmyk Cmyk ElementProc
1832//
1833// Gray data can be uchar, quint16, and is always Opaque
1834// Rgb data can be QRgb, QRgba64, or QRgbaFloat32, and is Unpremultiplied, Premultiplied, or Opaque
1835// Cmyk data can be Cmyk32, and is always Opaque
1836//
1837// colorMatrix as setup for Gray on Gray or Rgb on Rgb, but not Gray data on Rgb colorspace.
1838
1839template<typename S>
1840void QColorTransformPrivate::applyConvertIn(const S *src, QColorVector *buffer, qsizetype len, TransformFlags flags) const
1841{
1842 if constexpr (IsGrayscale<S>) {
1843 if (colorSpaceIn->isThreeComponentMatrix()) {
1844 loadGray(buffer, src, len, this);
1845 if (!colorSpaceOut->isThreeComponentMatrix() || colorSpaceIn->colorModel != QColorSpace::ColorModel::Gray) {
1846 if (!colorSpaceIn->chad.isNull())
1847 applyMatrix<DoClamp>(buffer, len, colorSpaceIn->chad);
1848 }
1849 return;
1850 }
1851 } else if constexpr (CanUseThreeComponent<S>) {
1852 if (colorSpaceIn->isThreeComponentMatrix()) {
1853 if (flags & InputPremultiplied)
1854 loadPremultiplied(buffer, src, len, this);
1855 else
1856 loadUnpremultiplied(buffer, src, len, this);
1857
1858 if (!colorSpaceOut->isThreeComponentMatrix())
1859 applyMatrix<DoClamp>(buffer, len, colorMatrix);
1860 return;
1861 }
1862 }
1863 Q_ASSERT(!colorSpaceIn->isThreeComponentMatrix());
1864
1865 if (flags & InputPremultiplied)
1866 loadPremultipliedLUT(buffer, src, len);
1867 else
1868 loadUnpremultipliedLUT(buffer, src, len);
1869
1870 // Do element based conversion
1871 for (auto &&element : colorSpaceIn->mAB)
1872 std::visit([&buffer, len](auto &&elm) { visitElement(elm, buffer, len); }, element);
1873}
1874
1875template<typename D, typename S>
1876void QColorTransformPrivate::applyConvertOut(D *dst, const S *src, QColorVector *buffer, qsizetype len, TransformFlags flags) const
1877{
1878 constexpr ApplyMatrixForm doClamp = UnclampedValues<D> ? DoNotClamp : DoClamp;
1879 if constexpr (IsGrayscale<D>) {
1880 Q_UNUSED(src); // dealing with buggy warnings in gcc 9
1881 Q_UNUSED(flags);
1882 // Calculate the matrix for grayscale conversion
1883 QColorMatrix grayMatrix;
1884 if (colorSpaceIn == colorSpaceOut ||
1885 (colorSpaceIn->colorModel == QColorSpace::ColorModel::Gray &&
1886 colorSpaceOut->colorModel == QColorSpace::ColorModel::Gray)) {
1887 // colorMatrix already has the right form
1888 grayMatrix = colorMatrix;
1889 } else {
1890 if constexpr (IsGrayscale<S>) {
1891 if (colorSpaceIn->colorModel == QColorSpace::ColorModel::Gray)
1892 grayMatrix = colorSpaceIn->chad;
1893 else
1894 grayMatrix = QColorMatrix::identity(); // Otherwise already handled in applyConvertIn
1895 } else {
1896 if (colorSpaceIn->isThreeComponentMatrix())
1897 grayMatrix = colorSpaceIn->toXyz;
1898 else
1899 grayMatrix = QColorMatrix::identity();
1900 }
1901 if (!colorSpaceOut->chad.isNull())
1902 grayMatrix = colorSpaceOut->chad.inverted() * grayMatrix;
1903 }
1904
1905 applyMatrix<doClamp>(buffer, len, grayMatrix);
1906 storeOpaque(dst, buffer, len, this);
1907 return;
1908 } else if constexpr (CanUseThreeComponent<D>) {
1909 if (colorSpaceOut->isThreeComponentMatrix()) {
1910 if (IsGrayscale<S> && colorSpaceIn->colorModel != QColorSpace::ColorModel::Gray)
1911 applyMatrix<doClamp>(buffer, len, colorSpaceOut->toXyz.inverted()); // colorMatrix wasnt prepared for gray input
1912 else
1913 applyMatrix<doClamp>(buffer, len, colorMatrix);
1914
1915 if constexpr (IsAlwaysOpaque<S>) {
1916 storeOpaque(dst, buffer, len, this);
1917 } else {
1918 if (flags & InputOpaque)
1919 storeOpaque(dst, buffer, len, this);
1920 else if (flags & OutputPremultiplied)
1921 storePremultiplied(dst, src, buffer, len, this);
1922 else
1923 storeUnpremultiplied(dst, src, buffer, len, this);
1924 }
1925 return;
1926 }
1927 }
1928 if constexpr (!IsGrayscale<D>) {
1929 Q_ASSERT(!colorSpaceOut->isThreeComponentMatrix());
1930
1931 // Do element based conversion
1932 for (auto &&element : colorSpaceOut->mBA)
1933 std::visit([&buffer, len](auto &&elm) { visitElement(elm, buffer, len); }, element);
1934
1935 clampIfNeeded<doClamp>(buffer, len);
1936
1937 if (flags & OutputPremultiplied)
1938 storePremultipliedLUT(dst, src, buffer, len);
1939 else
1940 storeUnpremultipliedLUT(dst, src, buffer, len);
1941 } else {
1942 Q_UNREACHABLE();
1943 }
1944}
1945
1946/*!
1947 \internal
1948 Adapt Profile Connection Spaces.
1949*/
1950void QColorTransformPrivate::pcsAdapt(QColorVector *buffer, qsizetype count) const
1951{
1952 // Match Profile Connection Spaces (PCS):
1953 if (colorSpaceOut->isPcsLab && !colorSpaceIn->isPcsLab) {
1954 for (qsizetype j = 0; j < count; ++j)
1955 buffer[j] = buffer[j].xyzToLab();
1956 } else if (colorSpaceIn->isPcsLab && !colorSpaceOut->isPcsLab) {
1957 for (qsizetype j = 0; j < count; ++j)
1958 buffer[j] = buffer[j].labToXyz();
1959 }
1960}
1961
1962/*!
1963 \internal
1964 Applies the color transformation on \a count S pixels starting from
1965 \a src and stores the result in \a dst as D pixels .
1966
1967 Assumes unpremultiplied data by default. Set \a flags to change defaults.
1968
1969 \sa prepare()
1970*/
1971template<typename D, typename S>
1972void QColorTransformPrivate::apply(D *dst, const S *src, qsizetype count, TransformFlags flags) const
1973{
1974 if (colorSpaceIn->isThreeComponentMatrix())
1976 if (colorSpaceOut->isThreeComponentMatrix())
1978
1979 Q_DECL_UNINITIALIZED QUninitialized<QColorVector, WorkBlockSize> buffer;
1980 qsizetype i = 0;
1981 while (i < count) {
1982 const qsizetype len = qMin(count - i, WorkBlockSize);
1983
1984 applyConvertIn(src + i, buffer, len, flags);
1985
1986 pcsAdapt(buffer, len);
1987
1988 applyConvertOut(dst + i, src + i, buffer, len, flags);
1989
1990 i += len;
1991 }
1992}
1993
1994/*!
1995 \internal
1996 \enum QColorTransformPrivate::TransformFlag
1997
1998 Defines how the transform should handle alpha values.
1999
2000 \value Unpremultiplied The input and output should both be unpremultiplied.
2001 \value InputOpaque The input is guaranteed to be opaque.
2002 \value InputPremultiplied The input is premultiplied.
2003 \value OutputPremultiplied The output should be premultiplied.
2004 \value Premultiplied Both input and output should both be premultiplied.
2005*/
2006
2007/*!
2008 \internal
2009 Prepares a color transformation for fast application. You do not need to
2010 call this explicitly as it will be called implicitly on the first transforms, but
2011 if you want predictable performance on the first transforms, you can perform it
2012 in advance.
2013
2014 \sa QColorTransform::map(), apply()
2015*/
2021
2022// Only some versions increasing precision 14/36 combos
2023template void QColorTransformPrivate::apply<quint8, quint8>(quint8 *dst, const quint8 *src, qsizetype count, TransformFlags flags) const;
2024template void QColorTransformPrivate::apply<quint8, QRgb>(quint8 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const;
2025template void QColorTransformPrivate::apply<quint8, QCmyk32>(quint8 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const;
2026template void QColorTransformPrivate::apply<quint16, quint8>(quint16 *dst, const quint8 *src, qsizetype count, TransformFlags flags) const;
2027template void QColorTransformPrivate::apply<quint16, quint16>(quint16 *dst, const quint16 *src, qsizetype count, TransformFlags flags) const;
2028template void QColorTransformPrivate::apply<quint16, QCmyk32>(quint16 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const;
2029template void QColorTransformPrivate::apply<quint16, QRgba64>(quint16 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const;
2030template void QColorTransformPrivate::apply<QRgb, quint8>(QRgb *dst, const quint8 *src, qsizetype count, TransformFlags flags) const;
2031template void QColorTransformPrivate::apply<QRgb, QRgb>(QRgb *dst, const QRgb *src, qsizetype count, TransformFlags flags) const;
2032template void QColorTransformPrivate::apply<QRgb, QCmyk32>(QRgb *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const;
2033template void QColorTransformPrivate::apply<QCmyk32, quint8>(QCmyk32 *dst, const quint8 *src, qsizetype count, TransformFlags flags) const;
2034template void QColorTransformPrivate::apply<QCmyk32, quint16>(QCmyk32 *dst, const quint16 *src, qsizetype count, TransformFlags flags) const;
2035template void QColorTransformPrivate::apply<QCmyk32, QRgb>(QCmyk32 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const;
2036template void QColorTransformPrivate::apply<QCmyk32, QCmyk32>(QCmyk32 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const;
2037template void QColorTransformPrivate::apply<QCmyk32, QRgba64>(QCmyk32 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const;
2038template void QColorTransformPrivate::apply<QCmyk32, QRgbaFloat32>(QCmyk32 *dst, const QRgbaFloat32 *src, qsizetype count, TransformFlags flags) const;
2039template void QColorTransformPrivate::apply<QRgba64, quint16>(QRgba64 *dst, const quint16 *src, qsizetype count, TransformFlags flags) const;
2040template void QColorTransformPrivate::apply<QRgba64, QRgb>(QRgba64 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const;
2041template void QColorTransformPrivate::apply<QRgba64, QCmyk32>(QRgba64 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const;
2042template void QColorTransformPrivate::apply<QRgba64, QRgba64>(QRgba64 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const;
2043template void QColorTransformPrivate::apply<QRgbaFloat32, QRgb>(QRgbaFloat32 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const;
2044template void QColorTransformPrivate::apply<QRgbaFloat32, QCmyk32>(QRgbaFloat32 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const;
2045template void QColorTransformPrivate::apply<QRgbaFloat32, QRgba64>(QRgbaFloat32 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const;
2046template void QColorTransformPrivate::apply<QRgbaFloat32, QRgbaFloat32>(QRgbaFloat32 *dst, const QRgbaFloat32 *src, qsizetype count, TransformFlags flags) const;
2047
2048/*!
2049 \internal
2050*/
2052{
2053 if (colorSpaceIn == colorSpaceOut)
2054 return true;
2055 if (!colorMatrix.isIdentity())
2056 return false;
2057 if (colorSpaceIn && colorSpaceOut) {
2058 if (colorSpaceIn->equals(colorSpaceOut.constData()))
2059 return true;
2060 if (!colorSpaceIn->isThreeComponentMatrix() || !colorSpaceOut->isThreeComponentMatrix())
2061 return false;
2062 if (colorSpaceIn->transferFunction != colorSpaceOut->transferFunction)
2063 return false;
2064 if (colorSpaceIn->transferFunction == QColorSpace::TransferFunction::Custom) {
2065 return colorSpaceIn->trc[0] == colorSpaceOut->trc[0]
2066 && colorSpaceIn->trc[1] == colorSpaceOut->trc[1]
2067 && colorSpaceIn->trc[2] == colorSpaceOut->trc[2];
2068 }
2069 } else {
2070 if (colorSpaceIn && !colorSpaceIn->isThreeComponentMatrix())
2071 return false;
2072 if (colorSpaceOut && !colorSpaceOut->isThreeComponentMatrix())
2073 return false;
2074 if (colorSpaceIn && colorSpaceIn->transferFunction != QColorSpace::TransferFunction::Linear)
2075 return false;
2076 if (colorSpaceOut && colorSpaceOut->transferFunction != QColorSpace::TransferFunction::Linear)
2077 return false;
2078 }
2079 return true;
2080}
2081
2082QT_END_NAMESPACE
constexpr QCmyk32(int cyan, int magenta, int yellow, int black)
Definition qcmyk_p.h:36
constexpr int black() const noexcept
Definition qcmyk_p.h:49
constexpr int magenta() const noexcept
Definition qcmyk_p.h:47
constexpr int yellow() const noexcept
Definition qcmyk_p.h:48
constexpr int cyan() const noexcept
Definition qcmyk_p.h:46
QColorVector apply(const QColorVector &v) const
bool isEmpty() const
static QColorMatrix identity()
QColorVector map(const QColorVector &c) const
Q_GUI_EXPORT void prepare()
void apply(D *dst, const S *src, qsizetype count, TransformFlags flags) const
QColorVector mapExtended(QColorVector color) const
The QColorTransform class is a transformation between color spaces.
Q_GUI_EXPORT ~QColorTransform()
Combined button and popup list for selecting options.
static void visitElement(const QColorSpacePrivate::TransferElement &element, QColorVector *buffer, const qsizetype len)
float getAlphaF(const QRgbaFloat32 &r)
static void storePremultipliedLUT(QRgbaFloat32 *dst, const T *src, const QColorVector *buffer, const qsizetype len)
void storePremultipliedLUT(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len)
void loadPremultipliedLUT(QColorVector *, const QCmyk32 *, const qsizetype)
static void storeOpaque(quint8 *dst, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr)
static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorMatrix &colorMatrix)
static void visitElement(const QColorCLUT &element, QColorVector *buffer, const qsizetype len)
void loadUnpremultipliedLUT(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len)
static void storePremultipliedLUT(QRgb *dst, const T *, const QColorVector *buffer, const qsizetype len)
static float getAlphaF(const T &)
static void storePremultipliedLUT(QCmyk32 *dst, const T *src, const QColorVector *buffer, const qsizetype len)
void storeUnpremultipliedLUT(QCmyk32 *dst, const T *, const QColorVector *buffer, const qsizetype len)
void loadUnpremultipliedLUT(QColorVector *buffer, const uchar *src, const qsizetype len)
static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
static void storeUnpremultipliedLUT(QRgb *dst, const T *, const QColorVector *buffer, const qsizetype len)
void storeUnpremultipliedLUT(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
static void storeUnpremultipliedLUT(QRgba64 *dst, const T *, const QColorVector *buffer, const qsizetype len)
static void clampIfNeeded(QColorVector *buffer, const qsizetype len)
void storePremultipliedLUT(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
float getAlphaF(const QCmyk32 &)
static constexpr qsizetype WorkBlockSize
void loadPremultipliedLUT(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len)
static void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
ApplyMatrixForm
@ DoClamp
@ DoNotClamp
static void loadGray(QColorVector *buffer, const quint8 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
float getAlphaF(const QRgba64 &r)
constexpr bool IsAlwaysOpaque
constexpr bool IsGrayscale
float getAlphaF(const QRgb &r)
void storeUnpremultipliedLUT(QRgba64 *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
static void visitElement(const QColorVector &offset, QColorVector *buffer, const qsizetype len)
constexpr bool UnclampedValues
static void storePremultipliedLUT(QRgba64 *dst, const T *, const QColorVector *buffer, const qsizetype len)
static void visitElement(const QColorMatrix &element, QColorVector *buffer, const qsizetype len)
void loadPremultipliedLUT(QColorVector *, const uchar *, const qsizetype)
void storeUnpremultipliedLUT(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len)
void loadUnpremultipliedLUT(QColorVector *buffer, const QCmyk32 *src, const qsizetype len)
static void storeUnpremultipliedLUT(QRgbaFloat32 *dst, const T *src, const QColorVector *buffer, const qsizetype len)
void loadUnpremultipliedLUT(QColorVector *buffer, const QRgba64 *src, const qsizetype len)
constexpr bool CanUseThreeComponent
void storePremultipliedLUT(QRgba64 *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
void loadPremultipliedLUT(QColorVector *buffer, const QRgba64 *src, const qsizetype len)
QRgbaFloat< float > QRgbaFloat32
QRgbaFloat< qfloat16 > QRgbaFloat16