Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qcolortransform.cpp
Go to the documentation of this file.
1// Copyright (C) 2024 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
6
7#include "qcmyk_p.h"
8#include "qcolorclut_p.h"
10#include "qcolorspace_p.h"
11#include "qcolortrc_p.h"
12#include "qcolortrclut_p.h"
13
14#include <QtCore/qatomic.h>
15#include <QtCore/qmath.h>
16#include <QtGui/qcolor.h>
17#include <QtGui/qimage.h>
18#include <QtGui/qtransform.h>
19#include <QtCore/private/qsimd_p.h>
20
21#include <qdebug.h>
22
24
26{
27 if (colorSpaceIn->lut.generated.loadAcquire())
28 return;
29 QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock);
30 if (colorSpaceIn->lut.generated.loadRelaxed())
31 return;
32
33 for (int i = 0; i < 3; ++i) {
34 if (!colorSpaceIn->trc[i].isValid())
35 return;
36 }
37
38 if (colorSpaceIn->trc[0] == colorSpaceIn->trc[1] && colorSpaceIn->trc[0] == colorSpaceIn->trc[2]) {
39 colorSpaceIn->lut[0] = QColorTrcLut::fromTrc(colorSpaceIn->trc[0]);
40 colorSpaceIn->lut[1] = colorSpaceIn->lut[0];
41 colorSpaceIn->lut[2] = colorSpaceIn->lut[0];
42 } else {
43 for (int i = 0; i < 3; ++i)
44 colorSpaceIn->lut[i] = QColorTrcLut::fromTrc(colorSpaceIn->trc[i]);
45 }
46
47 colorSpaceIn->lut.generated.storeRelease(1);
48}
49
51{
52 if (colorSpaceOut->lut.generated.loadAcquire())
53 return;
54 QMutexLocker lock(&QColorSpacePrivate::s_lutWriteLock);
55 if (colorSpaceOut->lut.generated.loadRelaxed())
56 return;
57 for (int i = 0; i < 3; ++i) {
58 if (!colorSpaceOut->trc[i].isValid())
59 return;
60 }
61
62 if (colorSpaceOut->trc[0] == colorSpaceOut->trc[1] && colorSpaceOut->trc[0] == colorSpaceOut->trc[2]) {
63 colorSpaceOut->lut[0] = QColorTrcLut::fromTrc(colorSpaceOut->trc[0]);
64 colorSpaceOut->lut[1] = colorSpaceOut->lut[0];
65 colorSpaceOut->lut[2] = colorSpaceOut->lut[0];
66 } else {
67 for (int i = 0; i < 3; ++i)
68 colorSpaceOut->lut[i] = QColorTrcLut::fromTrc(colorSpaceOut->trc[i]);
69 }
70
71 colorSpaceOut->lut.generated.storeRelease(1);
72}
73
74/*!
75 \class QColorTransform
76 \brief The QColorTransform class is a transformation between color spaces.
77 \since 5.14
78
79 \ingroup painting
80 \ingroup appearance
81 \inmodule QtGui
82
83 QColorTransform is an instantiation of a transformation between color spaces.
84 It can be applied on color and pixels to convert them from one color space to
85 another.
86
87 To create a QColorTransform, use QColorSpace::transformationToColorSpace():
88
89 \code
90 QColorSpace sourceColorSpace(QColorSpace::SRgb);
91 QColorSpace targetColorSpace(QColorSpace::DisplayP3);
92 QColorTransform srgbToP3Transform = sourceColorSpace.transformationToColorSpace(targetColorSpace);
93 \endcode
94
95 Setting up a QColorTransform takes some preprocessing, so keeping around
96 QColorTransforms that you need often is recommended, instead of generating
97 them on the fly.
98*/
99
100
101QColorTransform::QColorTransform(const QColorTransform &colorTransform) noexcept = default;
102
103QColorTransform::~QColorTransform() = default;
104
105QT_DEFINE_QESDP_SPECIALIZATION_DTOR(QColorTransformPrivate)
106
107/*!
108 \since 6.4
109 Returns true if the color transform is the identity transform.
110*/
111bool QColorTransform::isIdentity() const noexcept
112{
113 return !d || d->isIdentity();
114}
115
116/*!
117 \fn bool QColorTransform::operator==(const QColorTransform &ct1, const QColorTransform &ct2)
118 \since 6.4
119 Returns true if \a ct1 defines the same color transformation as \a ct2.
120*/
121
122/*!
123 \fn bool QColorTransform::operator!=(const QColorTransform &ct1, const QColorTransform &ct2)
124 \since 6.4
125 Returns true if \a ct1 does not define the same transformation as \a ct2.
126*/
127
128/*! \internal
129*/
130bool QColorTransform::compare(const QColorTransform &other) const
131{
132 if (d == other.d)
133 return true;
134 if (bool(d) != bool(other.d))
135 return d ? d->isIdentity() : other.d->isIdentity();
136 if (d->colorMatrix != other.d->colorMatrix)
137 return false;
138 if (bool(d->colorSpaceIn) != bool(other.d->colorSpaceIn))
139 return false;
140 if (bool(d->colorSpaceOut) != bool(other.d->colorSpaceOut))
141 return false;
142 if (d->colorSpaceIn) {
143 if (d->colorSpaceIn->transformModel != other.d->colorSpaceIn->transformModel)
144 return false;
145 if (d->colorSpaceIn->isThreeComponentMatrix()) {
146 for (int i = 0; i < 3; ++i) {
147 if (d->colorSpaceIn && d->colorSpaceIn->trc[i] != other.d->colorSpaceIn->trc[i])
148 return false;
149 }
150 } else {
151 if (!d->colorSpaceIn->equals(other.d->colorSpaceIn.constData()))
152 return false;
153 }
154 }
155 if (d->colorSpaceOut) {
156 if (d->colorSpaceOut->transformModel != other.d->colorSpaceOut->transformModel)
157 return false;
158 if (d->colorSpaceOut->isThreeComponentMatrix()) {
159 for (int i = 0; i < 3; ++i) {
160 if (d->colorSpaceOut && d->colorSpaceOut->trc[i] != other.d->colorSpaceOut->trc[i])
161 return false;
162 }
163 } else {
164 if (!d->colorSpaceOut->equals(other.d->colorSpaceOut.constData()))
165 return false;
166 }
167 }
168 return true;
169}
170
171/*!
172 Applies the color transformation on the QRgb value \a argb.
173
174 The input should be opaque or unpremultiplied.
175*/
176QRgb QColorTransform::map(QRgb argb) const
177{
178 if (!d)
179 return argb;
180 constexpr float f = 1.0f / 255.0f;
181 QColorVector c = { qRed(argb) * f, qGreen(argb) * f, qBlue(argb) * f };
182 c = d->map(c);
183 return qRgba(c.x * 255 + 0.5f, c.y * 255 + 0.5f, c.z * 255 + 0.5f, qAlpha(argb));
184}
185
186/*!
187 Applies the color transformation on the QRgba64 value \a rgba64.
188
189 The input should be opaque or unpremultiplied.
190*/
192{
193 if (!d)
194 return rgba64;
195 constexpr float f = 1.0f / 65535.0f;
196 QColorVector c = { rgba64.red() * f, rgba64.green() * f, rgba64.blue() * f };
197 c = d->map(c);
198 return QRgba64::fromRgba64(c.x * 65535.f + 0.5f, c.y * 65535.f + 0.5f, c.z * 65535.f + 0.5f, rgba64.alpha());
199}
200
201/*!
202 Applies the color transformation on the QRgbaFloat16 value \a rgbafp16.
203
204 The input should be opaque or unpremultiplied.
205 \since 6.4
206*/
207QRgbaFloat16 QColorTransform::map(QRgbaFloat16 rgbafp16) const
208{
209 if (!d)
210 return rgbafp16;
211 QColorVector c(rgbafp16.r, rgbafp16.g, rgbafp16.b);
212 c = d->mapExtended(c);
213 rgbafp16.r = qfloat16(c.x);
214 rgbafp16.g = qfloat16(c.y);
215 rgbafp16.b = qfloat16(c.z);
216 return rgbafp16;
217}
218
219/*!
220 Applies the color transformation on the QRgbaFloat32 value \a rgbafp32.
221
222 The input should be opaque or unpremultiplied.
223 \since 6.4
224*/
225QRgbaFloat32 QColorTransform::map(QRgbaFloat32 rgbafp32) const
226{
227 if (!d)
228 return rgbafp32;
229 QColorVector c(rgbafp32.r, rgbafp32.g, rgbafp32.b);
230 c = d->mapExtended(c);
231 rgbafp32.r = c.x;
232 rgbafp32.g = c.y;
233 rgbafp32.b = c.z;
234 return rgbafp32;
235}
236
237/*!
238 Applies the color transformation on the QColor value \a color.
239
240*/
241QColor QColorTransform::map(const QColor &color) const
242{
243 if (!d)
244 return color;
245 QColor clr = color;
246 if (d->colorSpaceIn->colorModel == QColorSpace::ColorModel::Rgb) {
247 if (color.spec() != QColor::ExtendedRgb && color.spec() != QColor::Rgb)
248 clr = clr.toRgb();
249 } else if (d->colorSpaceIn->colorModel == QColorSpace::ColorModel::Cmyk) {
250 if (color.spec() != QColor::Cmyk)
251 clr = clr.toCmyk();
252 }
253
254 QColorVector c =
255 (clr.spec() == QColor::Cmyk)
256 ? QColorVector(clr.cyanF(), clr.magentaF(), clr.yellowF(), clr.blackF())
257 : QColorVector(clr.redF(), clr.greenF(), clr.blueF());
258
259 c = d->mapExtended(c);
260
261 QColor out;
262 if (d->colorSpaceOut->colorModel == QColorSpace::ColorModel::Cmyk) {
263 c.x = std::clamp(c.x, 0.f, 1.f);
264 c.y = std::clamp(c.y, 0.f, 1.f);
265 c.z = std::clamp(c.z, 0.f, 1.f);
266 c.w = std::clamp(c.w, 0.f, 1.f);
267 out.setCmykF(c.x, c.y, c.z, c.w, color.alphaF());
268 } else {
269 out.setRgbF(c.x, c.y, c.z, color.alphaF());
270 }
271 return out;
272}
273
274// Optimized sub-routines for fast block based conversion:
275
280
281template<ApplyMatrixForm doClamp = DoClamp>
282static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorMatrix &colorMatrix)
283{
284#if defined(__SSE2__)
285 const __m128 minV = _mm_set1_ps(0.0f);
286 const __m128 maxV = _mm_set1_ps(1.0f);
287 const __m128 xMat = _mm_loadu_ps(&colorMatrix.r.x);
288 const __m128 yMat = _mm_loadu_ps(&colorMatrix.g.x);
289 const __m128 zMat = _mm_loadu_ps(&colorMatrix.b.x);
290 for (qsizetype j = 0; j < len; ++j) {
291 __m128 c = _mm_loadu_ps(&buffer[j].x);
292 __m128 cx = _mm_shuffle_ps(c, c, _MM_SHUFFLE(0, 0, 0, 0));
293 __m128 cy = _mm_shuffle_ps(c, c, _MM_SHUFFLE(1, 1, 1, 1));
294 __m128 cz = _mm_shuffle_ps(c, c, _MM_SHUFFLE(2, 2, 2, 2));
295 cx = _mm_mul_ps(cx, xMat);
296 cy = _mm_mul_ps(cy, yMat);
297 cz = _mm_mul_ps(cz, zMat);
298 cx = _mm_add_ps(cx, cy);
299 cx = _mm_add_ps(cx, cz);
300 // Clamp:
301 if (doClamp) {
302 cx = _mm_min_ps(cx, maxV);
303 cx = _mm_max_ps(cx, minV);
304 }
305 _mm_storeu_ps(&buffer[j].x, cx);
306 }
307#elif defined(__ARM_NEON__)
308 const float32x4_t minV = vdupq_n_f32(0.0f);
309 const float32x4_t maxV = vdupq_n_f32(1.0f);
310 const float32x4_t xMat = vld1q_f32(&colorMatrix.r.x);
311 const float32x4_t yMat = vld1q_f32(&colorMatrix.g.x);
312 const float32x4_t zMat = vld1q_f32(&colorMatrix.b.x);
313 for (qsizetype j = 0; j < len; ++j) {
314 float32x4_t c = vld1q_f32(&buffer[j].x);
315 float32x4_t cx = vmulq_n_f32(xMat, vgetq_lane_f32(c, 0));
316 float32x4_t cy = vmulq_n_f32(yMat, vgetq_lane_f32(c, 1));
317 float32x4_t cz = vmulq_n_f32(zMat, vgetq_lane_f32(c, 2));
318 cx = vaddq_f32(cx, cy);
319 cx = vaddq_f32(cx, cz);
320 // Clamp:
321 if (doClamp) {
322 cx = vminq_f32(cx, maxV);
323 cx = vmaxq_f32(cx, minV);
324 }
325 vst1q_f32(&buffer[j].x, cx);
326 }
327#else
328 for (qsizetype j = 0; j < len; ++j) {
329 const QColorVector cv = colorMatrix.map(buffer[j]);
330 if (doClamp) {
331 buffer[j].x = std::clamp(cv.x, 0.f, 1.f);
332 buffer[j].y = std::clamp(cv.y, 0.f, 1.f);
333 buffer[j].z = std::clamp(cv.z, 0.f, 1.f);
334 } else {
335 buffer[j] = cv;
336 }
337 }
338#endif
339}
340
341template<ApplyMatrixForm doClamp = DoClamp>
342static void clampIfNeeded(QColorVector *buffer, const qsizetype len)
343{
344 if constexpr (doClamp != DoClamp)
345 return;
346#if defined(__SSE2__)
347 const __m128 minV = _mm_set1_ps(0.0f);
348 const __m128 maxV = _mm_set1_ps(1.0f);
349 for (qsizetype j = 0; j < len; ++j) {
350 __m128 c = _mm_loadu_ps(&buffer[j].x);
351 c = _mm_min_ps(c, maxV);
352 c = _mm_max_ps(c, minV);
353 _mm_storeu_ps(&buffer[j].x, c);
354 }
355#elif defined(__ARM_NEON__)
356 const float32x4_t minV = vdupq_n_f32(0.0f);
357 const float32x4_t maxV = vdupq_n_f32(1.0f);
358 for (qsizetype j = 0; j < len; ++j) {
359 float32x4_t c = vld1q_f32(&buffer[j].x);
360 c = vminq_f32(c, maxV);
361 c = vmaxq_f32(c, minV);
362 vst1q_f32(&buffer[j].x, c);
363 }
364#else
365 for (qsizetype j = 0; j < len; ++j) {
366 const QColorVector cv = buffer[j];
367 buffer[j].x = std::clamp(cv.x, 0.f, 1.f);
368 buffer[j].y = std::clamp(cv.y, 0.f, 1.f);
369 buffer[j].z = std::clamp(cv.z, 0.f, 1.f);
370 }
371#endif
372}
373
374#if defined(__SSE2__) || defined(__ARM_NEON__)
375template<typename T>
376static constexpr inline bool isArgb();
377template<>
378constexpr inline bool isArgb<QRgb>() { return true; }
379template<>
380constexpr inline bool isArgb<QRgba64>() { return false; }
381
382template<typename T>
383static inline int getAlpha(const T &p);
384template<>
385inline int getAlpha<QRgb>(const QRgb &p)
386{ return qAlpha(p); }
387template<>
388inline int getAlpha<QRgba64>(const QRgba64 &p)
389{ return p.alpha(); }
390
391template<typename T>
392static inline constexpr int getFactor();
393template<>
394inline constexpr int getFactor<QRgb>()
395{ return 255; }
396template<>
397inline constexpr int getFactor<QRgba64>()
398{ return 65535; }
399#endif
400
401template<typename T>
402static float getAlphaF(const T &);
403template<> float getAlphaF(const QRgb &r)
404{
405 return qAlpha(r) * (1.f / 255.f);
406}
407template<> float getAlphaF(const QCmyk32 &)
408{
409 return 1.f;
410}
411template<> float getAlphaF(const QRgba64 &r)
412{
413 return r.alpha() * (1.f / 65535.f);
414}
415template<> float getAlphaF(const QRgbaFloat32 &r)
416{
417 return r.a;
418}
419
420template<typename T>
421static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr);
422template<typename T>
423static void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr);
424
425#if defined(__SSE2__)
426// Load to [0-alpha] in 4x32 SIMD
427template<typename T>
428static inline void loadP(const T &p, __m128i &v);
429
430template<>
431inline void loadP<QRgb>(const QRgb &p, __m128i &v)
432{
433 v = _mm_cvtsi32_si128(p);
434#if defined(__SSE4_1__)
435 v = _mm_cvtepu8_epi32(v);
436#else
437 v = _mm_unpacklo_epi8(v, _mm_setzero_si128());
438 v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
439#endif
440}
441
442template<>
443inline void loadP<QRgba64>(const QRgba64 &p, __m128i &v)
444{
445 v = _mm_loadl_epi64((const __m128i *)&p);
446#if defined(__SSE4_1__)
447 v = _mm_cvtepu16_epi32(v);
448#else
449 v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
450#endif
451}
452
453template<typename T>
454static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
455{
456 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
457 const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
458 constexpr bool isARGB = isArgb<T>();
459 const __m128i vRangeMax = _mm_setr_epi32(isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
460 : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
461 d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
462 isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear
463 : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear,
464 QColorTrcLut::Resolution);
465 for (qsizetype i = 0; i < len; ++i) {
466 __m128i v;
467 loadP<T>(src[i], v);
468 __m128 vf = _mm_cvtepi32_ps(v);
469 // Approximate 1/a:
470 __m128 va = _mm_shuffle_ps(vf, vf, _MM_SHUFFLE(3, 3, 3, 3));
471 __m128 via = _mm_rcp_ps(va);
472 via = _mm_sub_ps(_mm_add_ps(via, via), _mm_mul_ps(via, _mm_mul_ps(via, va)));
473 // v * (1/a)
474 vf = _mm_mul_ps(vf, via);
475
476 // Handle zero alpha
477 __m128 vAlphaMask = _mm_cmpeq_ps(va, _mm_set1_ps(0.0f));
478 vf = _mm_andnot_ps(vAlphaMask, vf);
479
480 // LUT
481 v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
482 const int ridx = isARGB ? _mm_extract_epi16(v, 4) : _mm_extract_epi16(v, 0);
483 const int gidx = _mm_extract_epi16(v, 2);
484 const int bidx = isARGB ? _mm_extract_epi16(v, 0) : _mm_extract_epi16(v, 4);
485 if (_mm_movemask_epi8(_mm_cmpgt_epi32(v, vRangeMax)) == 0) {
486 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0);
487 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2);
488 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4);
489 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00);
490
491 _mm_storeu_ps(&buffer[i].x, vf);
492 } else {
493 constexpr float f = 1.f / QColorTrcLut::Resolution;
494 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
495 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
496 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
497 }
498 }
499}
500
501template<>
502void loadPremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
503{
504 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
505 const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256));
506 const __m128 vZero = _mm_set1_ps(0.0f);
507 const float factor = 1.f / float(QColorTrcLut::Resolution);
508 const __m128 vRangeMax = _mm_setr_ps(d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear * factor,
509 d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear * factor,
510 d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear * factor,
511 INFINITY);
512 for (qsizetype i = 0; i < len; ++i) {
513 __m128 vf = _mm_loadu_ps(&src[i].r);
514 // Approximate 1/a:
515 __m128 va = _mm_shuffle_ps(vf, vf, _MM_SHUFFLE(3, 3, 3, 3));
516 __m128 via = _mm_rcp_ps(va);
517 via = _mm_sub_ps(_mm_add_ps(via, via), _mm_mul_ps(via, _mm_mul_ps(via, va)));
518 // v * (1/a)
519 vf = _mm_mul_ps(vf, via);
520
521 // Handle zero alpha
522 __m128 vAlphaMask = _mm_cmpeq_ps(va, vZero);
523 vf = _mm_andnot_ps(vAlphaMask, vf);
524
525 // LUT
526 const __m128 under = _mm_cmplt_ps(vf, vZero);
527 const __m128 over = _mm_cmpgt_ps(vf, vRangeMax);
528 if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
529 // Within gamut
530 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
531 const int ridx = _mm_extract_epi16(v, 0);
532 const int gidx = _mm_extract_epi16(v, 2);
533 const int bidx = _mm_extract_epi16(v, 4);
534 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0);
535 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2);
536 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4);
537 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), viFF00);
538 _mm_storeu_ps(&buffer[i].x, vf);
539 } else {
540 // Outside 0.0->1.0 gamut
541 _mm_storeu_ps(&buffer[i].x, vf);
542 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(buffer[i].x);
543 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(buffer[i].y);
544 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(buffer[i].z);
545 }
546 }
547}
548
549// Load to [0->TrcResolution] in 4x32 SIMD
550template<typename T>
551static inline void loadPU(const T &p, __m128i &v);
552
553template<>
554inline void loadPU<QRgb>(const QRgb &p, __m128i &v)
555{
556 v = _mm_cvtsi32_si128(p);
557#if defined(__SSE4_1__)
558 v = _mm_cvtepu8_epi32(v);
559#else
560 v = _mm_unpacklo_epi8(v, _mm_setzero_si128());
561 v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
562#endif
563 v = _mm_slli_epi32(v, QColorTrcLut::ShiftUp);
564}
565
566template<>
567inline void loadPU<QRgba64>(const QRgba64 &p, __m128i &v)
568{
569 v = _mm_loadl_epi64((const __m128i *)&p);
570 v = _mm_sub_epi16(v, _mm_srli_epi16(v, 8));
571#if defined(__SSE4_1__)
572 v = _mm_cvtepu16_epi32(v);
573#else
574 v = _mm_unpacklo_epi16(v, _mm_setzero_si128());
575#endif
576 v = _mm_srli_epi32(v, QColorTrcLut::ShiftDown);
577}
578
579template<typename T>
580void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
581{
582 constexpr bool isARGB = isArgb<T>();
583 const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
584 const __m128i vRangeMax = _mm_setr_epi32(isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
585 : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
586 d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
587 isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear
588 : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear,
589 QColorTrcLut::Resolution);
590 for (qsizetype i = 0; i < len; ++i) {
591 __m128i v;
592 loadPU<T>(src[i], v);
593 const int ridx = isARGB ? _mm_extract_epi16(v, 4) : _mm_extract_epi16(v, 0);
594 const int gidx = _mm_extract_epi16(v, 2);
595 const int bidx = isARGB ? _mm_extract_epi16(v, 0) : _mm_extract_epi16(v, 4);
596 if (_mm_movemask_epi8(_mm_cmpgt_epi32(v, vRangeMax)) == 0) {
597 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0);
598 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2);
599 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4);
600 __m128 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00);
601 _mm_storeu_ps(&buffer[i].x, vf);
602 } else {
603 constexpr float f = 1.f / QColorTrcLut::Resolution;
604 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
605 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
606 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
607 }
608 }
609}
610
611template<>
612void loadUnpremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
613{
614 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
615 const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
616 const __m128 vZero = _mm_set1_ps(0.0f);
617 const float factor = 1.f / float(QColorTrcLut::Resolution);
618 const __m128 vRangeMax = _mm_setr_ps(d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear * factor,
619 d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear * factor,
620 d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear * factor,
621 INFINITY);
622 for (qsizetype i = 0; i < len; ++i) {
623 __m128 vf = _mm_loadu_ps(&src[i].r);
624 const __m128 under = _mm_cmplt_ps(vf, vZero);
625 const __m128 over = _mm_cmpgt_ps(vf, vRangeMax);
626 if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
627 // Within gamut
628 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
629 const int ridx = _mm_extract_epi16(v, 0);
630 const int gidx = _mm_extract_epi16(v, 2);
631 const int bidx = _mm_extract_epi16(v, 4);
632 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], 0);
633 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], 2);
634 v = _mm_insert_epi16(v, d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], 4);
635 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), iFF00);
636 _mm_storeu_ps(&buffer[i].x, vf);
637 } else {
638 // Outside 0.0->1.0 gamut
639 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(src[i].r);
640 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(src[i].g);
641 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(src[i].b);
642 }
643 }
644}
645
646#elif defined(__ARM_NEON__)
647// Load to [0-alpha] in 4x32 SIMD
648template<typename T>
649static inline void loadP(const T &p, uint32x4_t &v);
650
651template<>
652inline void loadP<QRgb>(const QRgb &p, uint32x4_t &v)
653{
654 v = vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vmov_n_u32(p)))));
655}
656
657template<>
658inline void loadP<QRgba64>(const QRgba64 &p, uint32x4_t &v)
659{
660 v = vmovl_u16(vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&p))));
661}
662
663static inline bool test_all_zero(uint32x4_t p)
664{
665#if defined(Q_PROCESSOR_ARM_64)
666 return vaddvq_u32(p) == 0;
667#else
668 const uint32x2_t tmp = vpadd_u32(vget_low_u32(p), vget_high_u32(p));
669 return vget_lane_u32(vpadd_u32(tmp, tmp), 0) == 0;
670#endif
671}
672
673template<typename T>
674static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
675{
676 constexpr bool isARGB = isArgb<T>();
677 const float iFF00 = 1.0f / (255 * 256);
678 const uint32x4_t vRangeMax = qvsetq_n_u32(
679 isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
680 : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
681 d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
682 isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear
683 : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear,
684 QColorTrcLut::Resolution);
685 for (qsizetype i = 0; i < len; ++i) {
686 uint32x4_t v;
687 loadP<T>(src[i], v);
688 float32x4_t vf = vcvtq_f32_u32(v);
689 // Approximate 1/a:
690 float32x4_t va = vdupq_n_f32(vgetq_lane_f32(vf, 3));
691 float32x4_t via = vrecpeq_f32(va); // estimate 1/a
692 via = vmulq_f32(vrecpsq_f32(va, via), via);
693
694 // v * (1/a)
695 vf = vmulq_f32(vf, via);
696
697 // Handle zero alpha
698#if defined(Q_PROCESSOR_ARM_64)
699 uint32x4_t vAlphaMask = vceqzq_f32(va);
700#else
701 uint32x4_t vAlphaMask = vceqq_f32(va, vdupq_n_f32(0.0));
702#endif
703 vf = vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(vf), vAlphaMask));
704
705 // LUT
706 v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f)));
707 const int ridx = isARGB ? vgetq_lane_u32(v, 2) : vgetq_lane_u32(v, 0);
708 const int gidx = vgetq_lane_u32(v, 1);
709 const int bidx = isARGB ? vgetq_lane_u32(v, 0) : vgetq_lane_u32(v, 2);
710 if (test_all_zero(vcgtq_u32(v, vRangeMax))) {
711 v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0);
712 v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1);
713 v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2);
714 vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00);
715
716 vst1q_f32(&buffer[i].x, vf);
717 } else {
718 constexpr float f = 1.f / QColorTrcLut::Resolution;
719 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
720 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
721 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
722 }
723 }
724}
725
726// Load to [0->TrcResultion] in 4x32 SIMD
727template<typename T>
728static inline void loadPU(const T &p, uint32x4_t &v);
729
730template<>
731inline void loadPU<QRgb>(const QRgb &p, uint32x4_t &v)
732{
733 v = vmovl_u16(vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vmov_n_u32(p)))));
734 v = vshlq_n_u32(v, QColorTrcLut::ShiftUp);
735}
736
737template<>
738inline void loadPU<QRgba64>(const QRgba64 &p, uint32x4_t &v)
739{
740 uint16x4_t v16 = vreinterpret_u16_u64(vld1_u64(reinterpret_cast<const uint64_t *>(&p)));
741 v16 = vsub_u16(v16, vshr_n_u16(v16, 8));
742 v = vmovl_u16(v16);
743 v = vshrq_n_u32(v, QColorTrcLut::ShiftDown);
744}
745
746template<typename T>
747void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
748{
749 constexpr bool isARGB = isArgb<T>();
750 const float iFF00 = 1.0f / (255 * 256);
751 const uint32x4_t vRangeMax = qvsetq_n_u32(
752 isARGB ? d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear
753 : d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear,
754 d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear,
755 isARGB ? d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear
756 : d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear,
757 QColorTrcLut::Resolution);
758 for (qsizetype i = 0; i < len; ++i) {
759 uint32x4_t v;
760 loadPU<T>(src[i], v);
761 const int ridx = isARGB ? vgetq_lane_u32(v, 2) : vgetq_lane_u32(v, 0);
762 const int gidx = vgetq_lane_u32(v, 1);
763 const int bidx = isARGB ? vgetq_lane_u32(v, 0) : vgetq_lane_u32(v, 2);
764 if (test_all_zero(vcgtq_u32(v, vRangeMax))) {
765 v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx], v, 0);
766 v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx], v, 1);
767 v = vsetq_lane_u32(d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx], v, 2);
768 float32x4_t vf = vmulq_n_f32(vcvtq_f32_u32(v), iFF00);
769 vst1q_f32(&buffer[i].x, vf);
770 } else {
771 constexpr float f = 1.f / QColorTrcLut::Resolution;
772 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
773 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
774 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
775 }
776 }
777}
778#else
779template<>
780void loadPremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
781{
782 const int rangeMaxR = d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear;
783 const int rangeMaxG = d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear;
784 const int rangeMaxB = d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear;
785 for (qsizetype i = 0; i < len; ++i) {
786 const uint p = src[i];
787 const int a = qAlpha(p);
788 if (a) {
789 const float ia = float(QColorTrcLut::Resolution) / a;
790 const int ridx = int(qRed(p) * ia + 0.5f);
791 const int gidx = int(qGreen(p) * ia + 0.5f);
792 const int bidx = int(qBlue(p) * ia + 0.5f);
793 if (ridx <= rangeMaxR && gidx <= rangeMaxG && bidx <= rangeMaxB) {
794 buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256));
795 buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256));
796 buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256));
797 } else {
798 constexpr float f = 1.f / QColorTrcLut::Resolution;
799 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
800 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
801 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
802 }
803 } else {
804 buffer[i].x = buffer[i].y = buffer[i].z = 0.0f;
805 }
806 }
807}
808
809template<>
810void loadPremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
811{
812 const int rangeMaxR = d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear;
813 const int rangeMaxG = d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear;
814 const int rangeMaxB = d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear;
815 for (qsizetype i = 0; i < len; ++i) {
816 const QRgba64 &p = src[i];
817 const int a = p.alpha();
818 if (a) {
819 const float ia = float(QColorTrcLut::Resolution) / a;
820 const int ridx = int(p.red() * ia + 0.5f);
821 const int gidx = int(p.green() * ia + 0.5f);
822 const int bidx = int(p.blue() * ia + 0.5f);
823 if (ridx <= rangeMaxR && gidx <= rangeMaxG && bidx <= rangeMaxB) {
824 buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256));
825 buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256));
826 buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256));
827 } else {
828 constexpr float f = 1.f / QColorTrcLut::Resolution;
829 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
830 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
831 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
832 }
833 } else {
834 buffer[i].x = buffer[i].y = buffer[i].z = 0.0f;
835 }
836 }
837}
838
839template<>
840void loadUnpremultiplied<QRgb>(QColorVector *buffer, const QRgb *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
841{
842 const int rangeMaxR = d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear;
843 const int rangeMaxG = d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear;
844 const int rangeMaxB = d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear;
845 for (qsizetype i = 0; i < len; ++i) {
846 const uint p = src[i];
847 const int ridx = qRed(p) << QColorTrcLut::ShiftUp;
848 const int gidx = qGreen(p) << QColorTrcLut::ShiftUp;
849 const int bidx = qBlue(p) << QColorTrcLut::ShiftUp;
850 if (ridx <= rangeMaxR && gidx <= rangeMaxG && bidx <= rangeMaxB) {
851 buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256));
852 buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256));
853 buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256));
854 } else {
855 constexpr float f = 1.f / QColorTrcLut::Resolution;
856 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
857 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
858 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
859 }
860 }
861}
862
863static int u16toidx(int c)
864{
865 c -= c >> 8;
866 return c >> QColorTrcLut::ShiftDown;
867}
868
869template<>
870void loadUnpremultiplied<QRgba64>(QColorVector *buffer, const QRgba64 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
871{
872 const int rangeMaxR = d_ptr->colorSpaceIn->lut[0]->m_unclampedToLinear;
873 const int rangeMaxG = d_ptr->colorSpaceIn->lut[1]->m_unclampedToLinear;
874 const int rangeMaxB = d_ptr->colorSpaceIn->lut[2]->m_unclampedToLinear;
875 for (qsizetype i = 0; i < len; ++i) {
876 const QRgba64 &p = src[i];
877 const int ridx = u16toidx(p.red());
878 const int gidx = u16toidx(p.green());
879 const int bidx = u16toidx(p.blue());
880 if (ridx <= rangeMaxR && gidx <= rangeMaxG && bidx <= rangeMaxB) {
881 buffer[i].x = d_ptr->colorSpaceIn->lut[0]->m_toLinear[ridx] * (1.0f / (255 * 256));
882 buffer[i].y = d_ptr->colorSpaceIn->lut[1]->m_toLinear[gidx] * (1.0f / (255 * 256));
883 buffer[i].z = d_ptr->colorSpaceIn->lut[2]->m_toLinear[bidx] * (1.0f / (255 * 256));
884 } else {
885 constexpr float f = 1.f / QColorTrcLut::Resolution;
886 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(ridx * f);
887 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(gidx * f);
888 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(bidx * f);
889 }
890 }
891}
892#endif
893#if !defined(__SSE2__)
894template<>
895void loadPremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
896{
897 for (qsizetype i = 0; i < len; ++i) {
898 const QRgbaFloat32 &p = src[i];
899 const float a = p.a;
900 if (a) {
901 const float ia = 1.0f / a;
902 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(p.r * ia);
903 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(p.g * ia);
904 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(p.b * ia);
905 } else {
906 buffer[i].x = buffer[i].y = buffer[i].z = 0.0f;
907 }
908 }
909}
910
911template<>
912void loadUnpremultiplied<QRgbaFloat32>(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
913{
914 for (qsizetype i = 0; i < len; ++i) {
915 const QRgbaFloat32 &p = src[i];
916 buffer[i].x = d_ptr->colorSpaceIn->trc[0].applyExtended(p.r);
917 buffer[i].y = d_ptr->colorSpaceIn->trc[1].applyExtended(p.g);
918 buffer[i].z = d_ptr->colorSpaceIn->trc[2].applyExtended(p.b);
919 }
920}
921#endif
922
923#if defined(__SSE2__)
924template<typename T>
925static inline void storeP(T &p, __m128i &v, int a);
926template<>
927inline void storeP<QRgb>(QRgb &p, __m128i &v, int a)
928{
929 v = _mm_packs_epi32(v, v);
930 v = _mm_insert_epi16(v, a, 3);
931 p = _mm_cvtsi128_si32(_mm_packus_epi16(v, v));
932}
933template<>
934inline void storeP<QRgba64>(QRgba64 &p, __m128i &v, int a)
935{
936#if defined(__SSE4_1__)
937 v = _mm_packus_epi32(v, v);
938 v = _mm_insert_epi16(v, a, 3);
939 _mm_storel_epi64((__m128i *)&p, v);
940#else
941 const int r = _mm_extract_epi16(v, 0);
942 const int g = _mm_extract_epi16(v, 2);
943 const int b = _mm_extract_epi16(v, 4);
944 p = qRgba64(r, g, b, a);
945#endif
946}
947
948template<typename D, typename S,
949 typename = std::enable_if_t<!std::is_same_v<D, QRgbaFloat32>, void>>
950static void storePremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len,
951 const QColorTransformPrivate *d_ptr)
952{
953 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
954 const __m128 iFF00 = _mm_set1_ps(1.0f / (255 * 256));
955 constexpr bool isARGB = isArgb<D>();
956 static_assert(getFactor<D>() >= getFactor<S>());
957 for (qsizetype i = 0; i < len; ++i) {
958 const int a = getAlpha<S>(src[i]) * (getFactor<D>() / getFactor<S>());
959 __m128 vf = _mm_loadu_ps(&buffer[i].x);
960 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
961 __m128 va = _mm_mul_ps(_mm_set1_ps(a), iFF00);
962 const int ridx = _mm_extract_epi16(v, 0);
963 const int gidx = _mm_extract_epi16(v, 2);
964 const int bidx = _mm_extract_epi16(v, 4);
965 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], isARGB ? 4 : 0);
966 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2);
967 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], isARGB ? 0 : 4);
968 vf = _mm_cvtepi32_ps(v);
969 vf = _mm_mul_ps(vf, va);
970 v = _mm_cvtps_epi32(vf);
971 storeP<D>(dst[i], v, a);
972 }
973}
974
975template<typename S>
976static void storePremultiplied(QRgbaFloat32 *dst, const S *src,
977 const QColorVector *buffer, const qsizetype len,
978 const QColorTransformPrivate *d_ptr)
979{
980 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
981 const __m128 vZero = _mm_set1_ps(0.0f);
982 const __m128 vOne = _mm_set1_ps(1.0f);
983 const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256));
984 for (qsizetype i = 0; i < len; ++i) {
985 const float a = getAlphaF<S>(src[i]);
986 __m128 va = _mm_set1_ps(a);
987 __m128 vf = _mm_loadu_ps(&buffer[i].x);
988 const __m128 under = _mm_cmplt_ps(vf, vZero);
989 const __m128 over = _mm_cmpgt_ps(vf, vOne);
990 if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
991 // Within gamut
992 va = _mm_mul_ps(va, viFF00);
993 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
994 const int ridx = _mm_extract_epi16(v, 0);
995 const int gidx = _mm_extract_epi16(v, 2);
996 const int bidx = _mm_extract_epi16(v, 4);
997 v = _mm_setzero_si128();
998 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 0);
999 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2);
1000 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 4);
1001 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), va);
1002 _mm_store_ps(&dst[i].r, vf);
1003 } else {
1004 dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x);
1005 dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y);
1006 dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z);
1007 vf = _mm_mul_ps(_mm_load_ps(&dst[i].r), va);
1008 _mm_store_ps(&dst[i].r, vf);
1009 }
1010 dst[i].a = a;
1011 }
1012}
1013
1014template<typename T>
1015static inline void storePU(T &p, __m128i &v, int a);
1016template<>
1017inline void storePU<QRgb>(QRgb &p, __m128i &v, int a)
1018{
1019 v = _mm_add_epi16(v, _mm_set1_epi16(0x80));
1020 v = _mm_srli_epi16(v, 8);
1021 v = _mm_insert_epi16(v, a, 3);
1022 p = _mm_cvtsi128_si32(_mm_packus_epi16(v, v));
1023}
1024template<>
1025inline void storePU<QRgba64>(QRgba64 &p, __m128i &v, int a)
1026{
1027 v = _mm_add_epi16(v, _mm_srli_epi16(v, 8));
1028 v = _mm_insert_epi16(v, a, 3);
1029 _mm_storel_epi64((__m128i *)&p, v);
1030}
1031
1032template<typename D, typename S,
1033 typename = std::enable_if_t<!std::is_same_v<D, QRgbaFloat32>, void>>
1034static void storeUnpremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len,
1035 const QColorTransformPrivate *d_ptr)
1036{
1037 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
1038 constexpr bool isARGB = isArgb<D>();
1039 static_assert(getFactor<D>() >= getFactor<S>());
1040 for (qsizetype i = 0; i < len; ++i) {
1041 const int a = getAlpha<S>(src[i]) * (getFactor<D>() / getFactor<S>());
1042 __m128 vf = _mm_loadu_ps(&buffer[i].x);
1043 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
1044 const int ridx = _mm_extract_epi16(v, 0);
1045 const int gidx = _mm_extract_epi16(v, 2);
1046 const int bidx = _mm_extract_epi16(v, 4);
1047 v = _mm_setzero_si128();
1048 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], isARGB ? 2 : 0);
1049 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1);
1050 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], isARGB ? 0 : 2);
1051 storePU<D>(dst[i], v, a);
1052 }
1053}
1054
1055template<typename S>
1056void storeUnpremultiplied(QRgbaFloat32 *dst, const S *src,
1057 const QColorVector *buffer, const qsizetype len,
1058 const QColorTransformPrivate *d_ptr)
1059{
1060 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
1061 const __m128 vZero = _mm_set1_ps(0.0f);
1062 const __m128 vOne = _mm_set1_ps(1.0f);
1063 const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256));
1064 for (qsizetype i = 0; i < len; ++i) {
1065 const float a = getAlphaF<S>(src[i]);
1066 __m128 vf = _mm_loadu_ps(&buffer[i].x);
1067 const __m128 under = _mm_cmplt_ps(vf, vZero);
1068 const __m128 over = _mm_cmpgt_ps(vf, vOne);
1069 if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
1070 // Within gamut
1071 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
1072 const int ridx = _mm_extract_epi16(v, 0);
1073 const int gidx = _mm_extract_epi16(v, 2);
1074 const int bidx = _mm_extract_epi16(v, 4);
1075 v = _mm_setzero_si128();
1076 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 0);
1077 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2);
1078 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 4);
1079 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), viFF00);
1080 _mm_storeu_ps(&dst[i].r, vf);
1081 } else {
1082 dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x);
1083 dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y);
1084 dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z);
1085 }
1086 dst[i].a = a;
1087 }
1088}
1089
1090template<typename T>
1091static void storeOpaque(T *dst, const QColorVector *buffer, const qsizetype len,
1092 const QColorTransformPrivate *d_ptr)
1093{
1094 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
1095 constexpr bool isARGB = isArgb<T>();
1096 for (qsizetype i = 0; i < len; ++i) {
1097 __m128 vf = _mm_loadu_ps(&buffer[i].x);
1098 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
1099 const int ridx = _mm_extract_epi16(v, 0);
1100 const int gidx = _mm_extract_epi16(v, 2);
1101 const int bidx = _mm_extract_epi16(v, 4);
1102 v = _mm_setzero_si128();
1103 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], isARGB ? 2 : 0);
1104 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 1);
1105 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], isARGB ? 0 : 2);
1106 storePU<T>(dst[i], v, isARGB ? 255 : 0xffff);
1107 }
1108}
1109
1110template<>
1111void storeOpaque(QRgbaFloat32 *dst, const QColorVector *buffer, const qsizetype len,
1112 const QColorTransformPrivate *d_ptr)
1113{
1114 const __m128 vTrcRes = _mm_set1_ps(float(QColorTrcLut::Resolution));
1115 const __m128 vZero = _mm_set1_ps(0.0f);
1116 const __m128 vOne = _mm_set1_ps(1.0f);
1117 const __m128 viFF00 = _mm_set1_ps(1.0f / (255 * 256));
1118 for (qsizetype i = 0; i < len; ++i) {
1119 __m128 vf = _mm_loadu_ps(&buffer[i].x);
1120 const __m128 under = _mm_cmplt_ps(vf, vZero);
1121 const __m128 over = _mm_cmpgt_ps(vf, vOne);
1122 if (_mm_movemask_ps(_mm_or_ps(under, over)) == 0) {
1123 // Within gamut
1124 __m128i v = _mm_cvtps_epi32(_mm_mul_ps(vf, vTrcRes));
1125 const int ridx = _mm_extract_epi16(v, 0);
1126 const int gidx = _mm_extract_epi16(v, 2);
1127 const int bidx = _mm_extract_epi16(v, 4);
1128 v = _mm_setzero_si128();
1129 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], 0);
1130 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], 2);
1131 v = _mm_insert_epi16(v, d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], 4);
1132 vf = _mm_mul_ps(_mm_cvtepi32_ps(v), viFF00);
1133 _mm_store_ps(&dst[i].r, vf);
1134 } else {
1135 dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x);
1136 dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y);
1137 dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z);
1138 }
1139 dst[i].a = 1.0f;
1140 }
1141}
1142
1143#elif defined(__ARM_NEON__)
1144template<typename T>
1145static inline void storeP(T &p, const uint16x4_t &v);
1146template<>
1147inline void storeP<QRgb>(QRgb &p, const uint16x4_t &v)
1148{
1149 p = vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(v, v))), 0);
1150}
1151template<>
1152inline void storeP<QRgba64>(QRgba64 &p, const uint16x4_t &v)
1153{
1154 vst1_u16((uint16_t *)&p, v);
1155}
1156
1157template<typename D, typename S,
1158 typename = std::enable_if_t<!std::is_same_v<D, QRgbaFloat32>, void>>
1159static void storePremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len,
1160 const QColorTransformPrivate *d_ptr)
1161{
1162 const float iFF00 = 1.0f / (255 * 256);
1163 constexpr bool isARGB = isArgb<D>();
1164 static_assert(getFactor<D>() >= getFactor<S>());
1165 for (qsizetype i = 0; i < len; ++i) {
1166 const int a = getAlpha<S>(src[i]) * (getFactor<D>() / getFactor<S>());
1167 float32x4_t vf = vld1q_f32(&buffer[i].x);
1168 uint32x4_t v = vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f)));
1169 const int ridx = vgetq_lane_u32(v, 0);
1170 const int gidx = vgetq_lane_u32(v, 1);
1171 const int bidx = vgetq_lane_u32(v, 2);
1172 v = vsetq_lane_u32(d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], v, isARGB ? 2 : 0);
1173 v = vsetq_lane_u32(d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], v, 1);
1174 v = vsetq_lane_u32(d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], v, isARGB ? 0 : 2);
1175 vf = vcvtq_f32_u32(v);
1176 vf = vmulq_n_f32(vf, a * iFF00);
1177 vf = vaddq_f32(vf, vdupq_n_f32(0.5f));
1178 v = vcvtq_u32_f32(vf);
1179 uint16x4_t v16 = vmovn_u32(v);
1180 v16 = vset_lane_u16(a, v16, 3);
1181 storeP<D>(dst[i], v16);
1182 }
1183}
1184
1185template<typename T>
1186static inline void storePU(T &p, uint16x4_t &v, int a);
1187template<>
1188inline void storePU<QRgb>(QRgb &p, uint16x4_t &v, int a)
1189{
1190 v = vadd_u16(v, vdup_n_u16(0x80));
1191 v = vshr_n_u16(v, 8);
1192 v = vset_lane_u16(a, v, 3);
1193 p = vget_lane_u32(vreinterpret_u32_u8(vmovn_u16(vcombine_u16(v, v))), 0);
1194}
1195template<>
1196inline void storePU<QRgba64>(QRgba64 &p, uint16x4_t &v, int a)
1197{
1198 v = vadd_u16(v, vshr_n_u16(v, 8));
1199 v = vset_lane_u16(a, v, 3);
1200 vst1_u16((uint16_t *)&p, v);
1201}
1202
1203template<typename D, typename S,
1204 typename = std::enable_if_t<!std::is_same_v<D, QRgbaFloat32>, void>>
1205static void storeUnpremultiplied(D *dst, const S *src, const QColorVector *buffer, const qsizetype len,
1206 const QColorTransformPrivate *d_ptr)
1207{
1208 constexpr bool isARGB = isArgb<D>();
1209 static_assert(getFactor<D>() >= getFactor<S>());
1210 for (qsizetype i = 0; i < len; ++i) {
1211 const int a = getAlpha<S>(src[i]) * (getFactor<D>() / getFactor<S>());
1212 float32x4_t vf = vld1q_f32(&buffer[i].x);
1213 uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f))));
1214 const int ridx = vget_lane_u16(v, 0);
1215 const int gidx = vget_lane_u16(v, 1);
1216 const int bidx = vget_lane_u16(v, 2);
1217 v = vset_lane_u16(d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], v, isARGB ? 2 : 0);
1218 v = vset_lane_u16(d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], v, 1);
1219 v = vset_lane_u16(d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], v, isARGB ? 0 : 2);
1220 storePU<D>(dst[i], v, a);
1221 }
1222}
1223
1224template<typename T>
1225static void storeOpaque(T *dst, const QColorVector *buffer, const qsizetype len,
1226 const QColorTransformPrivate *d_ptr)
1227{
1228 constexpr bool isARGB = isArgb<T>();
1229 for (qsizetype i = 0; i < len; ++i) {
1230 float32x4_t vf = vld1q_f32(&buffer[i].x);
1231 uint16x4_t v = vmovn_u32(vcvtq_u32_f32(vaddq_f32(vmulq_n_f32(vf, float(QColorTrcLut::Resolution)), vdupq_n_f32(0.5f))));
1232 const int ridx = vget_lane_u16(v, 0);
1233 const int gidx = vget_lane_u16(v, 1);
1234 const int bidx = vget_lane_u16(v, 2);
1235 v = vset_lane_u16(d_ptr->colorSpaceOut->lut[0]->m_fromLinear[ridx], v, isARGB ? 2 : 0);
1236 v = vset_lane_u16(d_ptr->colorSpaceOut->lut[1]->m_fromLinear[gidx], v, 1);
1237 v = vset_lane_u16(d_ptr->colorSpaceOut->lut[2]->m_fromLinear[bidx], v, isARGB ? 0 : 2);
1238 storePU<T>(dst[i], v, isARGB ? 255 : 0xffff);
1239 }
1240}
1241#else
1242static void storePremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len,
1243 const QColorTransformPrivate *d_ptr)
1244{
1245 for (qsizetype i = 0; i < len; ++i) {
1246 const int a = qAlpha(src[i]);
1247 const float fa = a / (255.0f * 256.0f);
1248 const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * float(QColorTrcLut::Resolution) + 0.5f)];
1249 const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * float(QColorTrcLut::Resolution) + 0.5f)];
1250 const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * float(QColorTrcLut::Resolution) + 0.5f)];
1251 dst[i] = qRgba(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a);
1252 }
1253}
1254
1255static void storeUnpremultiplied(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len,
1256 const QColorTransformPrivate *d_ptr)
1257{
1258 for (qsizetype i = 0; i < len; ++i) {
1259 const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x);
1260 const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y);
1261 const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z);
1262 dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0);
1263 }
1264}
1265
1266static void storeOpaque(QRgb *dst, const QColorVector *buffer, const qsizetype len,
1267 const QColorTransformPrivate *d_ptr)
1268{
1269 for (qsizetype i = 0; i < len; ++i) {
1270 const int r = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].x);
1271 const int g = d_ptr->colorSpaceOut->lut[1]->u8FromLinearF32(buffer[i].y);
1272 const int b = d_ptr->colorSpaceOut->lut[2]->u8FromLinearF32(buffer[i].z);
1273 dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0);
1274 }
1275}
1276
1277template<typename S>
1278static void storePremultiplied(QRgba64 *dst, const S *src, const QColorVector *buffer, const qsizetype len,
1279 const QColorTransformPrivate *d_ptr)
1280{
1281 for (qsizetype i = 0; i < len; ++i) {
1282 const int a = getAlphaF(src[i]) * 65535.f;
1283 const float fa = a / (255.0f * 256.0f);
1284 const float r = d_ptr->colorSpaceOut->lut[0]->m_fromLinear[int(buffer[i].x * float(QColorTrcLut::Resolution) + 0.5f)];
1285 const float g = d_ptr->colorSpaceOut->lut[1]->m_fromLinear[int(buffer[i].y * float(QColorTrcLut::Resolution) + 0.5f)];
1286 const float b = d_ptr->colorSpaceOut->lut[2]->m_fromLinear[int(buffer[i].z * float(QColorTrcLut::Resolution) + 0.5f)];
1287 dst[i] = qRgba64(r * fa + 0.5f, g * fa + 0.5f, b * fa + 0.5f, a);
1288 }
1289}
1290
1291template<typename S>
1292static void storeUnpremultiplied(QRgba64 *dst, const S *src, const QColorVector *buffer, const qsizetype len,
1293 const QColorTransformPrivate *d_ptr)
1294{
1295 for (qsizetype i = 0; i < len; ++i) {
1296 const int a = getAlphaF(src[i]) * 65535.f;
1297 const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].x);
1298 const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y);
1299 const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(buffer[i].z);
1300 dst[i] = qRgba64(r, g, b, a);
1301 }
1302}
1303
1304static void storeOpaque(QRgba64 *dst, const QColorVector *buffer, const qsizetype len,
1305 const QColorTransformPrivate *d_ptr)
1306{
1307 for (qsizetype i = 0; i < len; ++i) {
1308 const int r = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].x);
1309 const int g = d_ptr->colorSpaceOut->lut[1]->u16FromLinearF32(buffer[i].y);
1310 const int b = d_ptr->colorSpaceOut->lut[2]->u16FromLinearF32(buffer[i].z);
1311 dst[i] = qRgba64(r, g, b, 0xFFFF);
1312 }
1313}
1314#endif
1315#if !defined(__SSE2__)
1316template<typename S>
1317static void storePremultiplied(QRgbaFloat32 *dst, const S *src, const QColorVector *buffer,
1318 const qsizetype len, const QColorTransformPrivate *d_ptr)
1319{
1320 for (qsizetype i = 0; i < len; ++i) {
1321 const float a = getAlphaF(src[i]);
1322 dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x) * a;
1323 dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y) * a;
1324 dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z) * a;
1325 dst[i].a = a;
1326 }
1327}
1328
1329template<typename S>
1330static void storeUnpremultiplied(QRgbaFloat32 *dst, const S *src, const QColorVector *buffer,
1331 const qsizetype len, const QColorTransformPrivate *d_ptr)
1332{
1333 for (qsizetype i = 0; i < len; ++i) {
1334 const float a = getAlphaF(src[i]);
1335 dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x);
1336 dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y);
1337 dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z);
1338 dst[i].a = a;
1339 }
1340}
1341
1342static void storeOpaque(QRgbaFloat32 *dst, const QColorVector *buffer, const qsizetype len,
1343 const QColorTransformPrivate *d_ptr)
1344{
1345 for (qsizetype i = 0; i < len; ++i) {
1346 dst[i].r = d_ptr->colorSpaceOut->trc[0].applyInverseExtended(buffer[i].x);
1347 dst[i].g = d_ptr->colorSpaceOut->trc[1].applyInverseExtended(buffer[i].y);
1348 dst[i].b = d_ptr->colorSpaceOut->trc[2].applyInverseExtended(buffer[i].z);
1349 dst[i].a = 1.0f;
1350 }
1351}
1352#endif
1353
1354static void loadGray(QColorVector *buffer, const quint8 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
1355{
1356 if (d_ptr->colorSpaceIn->colorModel == QColorSpace::ColorModel::Gray ||
1357 (d_ptr->colorSpaceIn->lut[0] == d_ptr->colorSpaceIn->lut[1] &&
1358 d_ptr->colorSpaceIn->lut[0] == d_ptr->colorSpaceIn->lut[2])) {
1359 for (qsizetype i = 0; i < len; ++i) {
1360 const float y = d_ptr->colorSpaceIn->lut[0]->u8ToLinearF32(src[i]);
1361 buffer[i] = d_ptr->colorSpaceIn->whitePoint * y;
1362 }
1363 } else {
1364 for (qsizetype i = 0; i < len; ++i) {
1365 QColorVector v;
1366 v.x = d_ptr->colorSpaceIn->lut[0]->u8ToLinearF32(src[i]);
1367 v.y = d_ptr->colorSpaceIn->lut[1]->u8ToLinearF32(src[i]);
1368 v.z = d_ptr->colorSpaceIn->lut[2]->u8ToLinearF32(src[i]);
1369 buffer[i] = d_ptr->colorSpaceIn->toXyz.map(v);
1370 }
1371 }
1372}
1373
1374static void loadGray(QColorVector *buffer, const quint16 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
1375{
1376 if (d_ptr->colorSpaceIn->colorModel == QColorSpace::ColorModel::Gray ||
1377 (d_ptr->colorSpaceIn->lut[0] == d_ptr->colorSpaceIn->lut[1] &&
1378 d_ptr->colorSpaceIn->lut[0] == d_ptr->colorSpaceIn->lut[2])) {
1379 for (qsizetype i = 0; i < len; ++i) {
1380 const float y = d_ptr->colorSpaceIn->lut[0]->u16ToLinearF32(src[i]);
1381 buffer[i] = d_ptr->colorSpaceIn->whitePoint * y;
1382 }
1383 } else {
1384 for (qsizetype i = 0; i < len; ++i) {
1385 QColorVector v;
1386 v.x = d_ptr->colorSpaceIn->lut[0]->u16ToLinearF32(src[i]);
1387 v.y = d_ptr->colorSpaceIn->lut[1]->u16ToLinearF32(src[i]);
1388 v.z = d_ptr->colorSpaceIn->lut[2]->u16ToLinearF32(src[i]);
1389 buffer[i] = d_ptr->colorSpaceIn->toXyz.map(v);
1390 }
1391 }
1392}
1393
1394static void storeOpaque(quint8 *dst, const QColorVector *buffer, const qsizetype len,
1395 const QColorTransformPrivate *d_ptr)
1396{
1397 for (qsizetype i = 0; i < len; ++i)
1398 dst[i] = d_ptr->colorSpaceOut->lut[0]->u8FromLinearF32(buffer[i].y);
1399}
1400
1401static void storeOpaque(quint16 *dst, const QColorVector *buffer, const qsizetype len,
1402 const QColorTransformPrivate *d_ptr)
1403{
1404 for (qsizetype i = 0; i < len; ++i)
1405 dst[i] = d_ptr->colorSpaceOut->lut[0]->u16FromLinearF32(buffer[i].y);
1406}
1407
1408static constexpr qsizetype WorkBlockSize = 256;
1409
1410template <typename T, int Count = 1>
1412{
1413public:
1414 operator T*() { return reinterpret_cast<T *>(this); }
1415private:
1416 alignas(T) char data[sizeof(T) * Count];
1417};
1418
1419void loadUnpremultipliedLUT(QColorVector *buffer, const uchar *src, const qsizetype len)
1420{
1421 const float f = 1.0f / 255.f;
1422 for (qsizetype i = 0; i < len; ++i) {
1423 const float p = src[i] * f;
1424 buffer[i].x = p;
1425 buffer[i].y = p;
1426 buffer[i].z = p;
1427 }
1428}
1429
1430void loadUnpremultipliedLUT(QColorVector *buffer, const quint16 *src, const qsizetype len)
1431{
1432 const float f = 1.0f / 65535.f;
1433 for (qsizetype i = 0; i < len; ++i) {
1434 const float p = src[i] * f;
1435 buffer[i].x = p;
1436 buffer[i].y = p;
1437 buffer[i].z = p;
1438 }
1439}
1440
1441void loadUnpremultipliedLUT(QColorVector *buffer, const QRgb *src, const qsizetype len)
1442{
1443 const float f = 1.0f / 255.f;
1444 for (qsizetype i = 0; i < len; ++i) {
1445 const uint p = src[i];
1446 buffer[i].x = qRed(p) * f;
1447 buffer[i].y = qGreen(p) * f;
1448 buffer[i].z = qBlue(p) * f;
1449 }
1450}
1451
1452void loadUnpremultipliedLUT(QColorVector *buffer, const QCmyk32 *src, const qsizetype len)
1453{
1454 const float f = 1.0f / 255.f;
1455 for (qsizetype i = 0; i < len; ++i) {
1456 const QCmyk32 p = src[i];
1457 buffer[i].x = (p.cyan() * f);
1458 buffer[i].y = (p.magenta() * f);
1459 buffer[i].z = (p.yellow() * f);
1460 buffer[i].w = (p.black() * f);
1461 }
1462}
1463
1464void loadUnpremultipliedLUT(QColorVector *buffer, const QRgba64 *src, const qsizetype len)
1465{
1466 const float f = 1.0f / 65535.f;
1467 for (qsizetype i = 0; i < len; ++i) {
1468 buffer[i].x = src[i].red() * f;
1469 buffer[i].y = src[i].green() * f;
1470 buffer[i].z = src[i].blue() * f;
1471 }
1472}
1473
1474void loadUnpremultipliedLUT(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len)
1475{
1476 for (qsizetype i = 0; i < len; ++i) {
1477 buffer[i].x = src[i].r;
1478 buffer[i].y = src[i].g;
1479 buffer[i].z = src[i].b;
1480 }
1481}
1482
1483void loadPremultipliedLUT(QColorVector *, const uchar *, const qsizetype)
1484{
1485 Q_UNREACHABLE();
1486}
1487
1488void loadPremultipliedLUT(QColorVector *, const quint16 *, const qsizetype)
1489{
1490 Q_UNREACHABLE();
1491}
1492
1493void loadPremultipliedLUT(QColorVector *buffer, const QRgb *src, const qsizetype len)
1494{
1495 for (qsizetype i = 0; i < len; ++i) {
1496 const uint p = src[i];
1497 const float f = 1.0f / qAlpha(p);
1498 buffer[i].x = (qRed(p) * f);
1499 buffer[i].y = (qGreen(p) * f);
1500 buffer[i].z = (qBlue(p) * f);
1501 }
1502}
1503
1504void loadPremultipliedLUT(QColorVector *, const QCmyk32 *, const qsizetype)
1505{
1506 Q_UNREACHABLE();
1507}
1508
1509void loadPremultipliedLUT(QColorVector *buffer, const QRgba64 *src, const qsizetype len)
1510{
1511 for (qsizetype i = 0; i < len; ++i) {
1512 const float f = 1.0f / src[i].alpha();
1513 buffer[i].x = (src[i].red() * f);
1514 buffer[i].y = (src[i].green() * f);
1515 buffer[i].z = (src[i].blue() * f);
1516 }
1517}
1518
1519void loadPremultipliedLUT(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len)
1520{
1521 for (qsizetype i = 0; i < len; ++i) {
1522 const float f = 1.0f / src[i].a;
1523 buffer[i].x = src[i].r * f;
1524 buffer[i].y = src[i].g * f;
1525 buffer[i].z = src[i].b * f;
1526 }
1527}
1528template<typename T>
1529static void storeUnpremultipliedLUT(QRgb *dst, const T *, const QColorVector *buffer, const qsizetype len)
1530{
1531 for (qsizetype i = 0; i < len; ++i) {
1532 const int r = buffer[i].x * 255.f;
1533 const int g = buffer[i].y * 255.f;
1534 const int b = buffer[i].z * 255.f;
1535 dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0);
1536 }
1537}
1538
1539template<>
1540void storeUnpremultipliedLUT(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
1541{
1542 for (qsizetype i = 0; i < len; ++i) {
1543 const int r = buffer[i].x * 255.f;
1544 const int g = buffer[i].y * 255.f;
1545 const int b = buffer[i].z * 255.f;
1546 dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0);
1547 }
1548}
1549
1550
1551template<typename T>
1552void storeUnpremultipliedLUT(QCmyk32 *dst, const T *, const QColorVector *buffer, const qsizetype len)
1553{
1554 for (qsizetype i = 0; i < len; ++i) {
1555 const int c = buffer[i].x * 255.f;
1556 const int m = buffer[i].y * 255.f;
1557 const int y = buffer[i].z * 255.f;
1558 const int k = buffer[i].w * 255.f;
1559 dst[i] = QCmyk32(c, m, y, k);
1560 }
1561}
1562
1563template<typename T>
1564static void storeUnpremultipliedLUT(QRgba64 *dst, const T *,
1565 const QColorVector *buffer, const qsizetype len)
1566{
1567 for (qsizetype i = 0; i < len; ++i) {
1568 const int r = buffer[i].x * 65535.f;
1569 const int g = buffer[i].y * 65535.f;
1570 const int b = buffer[i].z * 65535.f;
1571 dst[i] = qRgba64(r, g, b, 65535);
1572 }
1573}
1574
1575template<>
1576void storeUnpremultipliedLUT(QRgba64 *dst, const QRgb *src,
1577 const QColorVector *buffer, const qsizetype len)
1578{
1579 for (qsizetype i = 0; i < len; ++i) {
1580 const int a = qAlpha(src[i]) * 257;
1581 const int r = buffer[i].x * 65535.f;
1582 const int g = buffer[i].y * 65535.f;
1583 const int b = buffer[i].z * 65535.f;
1584 dst[i] = qRgba64(r, g, b, a);
1585 }
1586}
1587
1588template<>
1590 const QColorVector *buffer, const qsizetype len)
1591{
1592 for (qsizetype i = 0; i < len; ++i) {
1593 const int r = buffer[i].x * 65535.f;
1594 const int g = buffer[i].y * 65535.f;
1595 const int b = buffer[i].z * 65535.f;
1596 dst[i] = qRgba64(r, g, b, src[i].alpha());
1597 }
1598}
1599
1600template<typename T>
1601static void storeUnpremultipliedLUT(QRgbaFloat32 *dst, const T *src,
1602 const QColorVector *buffer, const qsizetype len)
1603{
1604 for (qsizetype i = 0; i < len; ++i) {
1605 const float r = buffer[i].x;
1606 const float g = buffer[i].y;
1607 const float b = buffer[i].z;
1608 dst[i] = QRgbaFloat32{r, g, b, getAlphaF(src[i])};
1609 }
1610}
1611
1612template<typename T>
1613static void storePremultipliedLUT(QRgb *dst, const T *, const QColorVector *buffer, const qsizetype len)
1614{
1615 for (qsizetype i = 0; i < len; ++i) {
1616 const int r = buffer[i].x * 255.f;
1617 const int g = buffer[i].y * 255.f;
1618 const int b = buffer[i].z * 255.f;
1619 dst[i] = 0xff000000 | (r << 16) | (g << 8) | (b << 0);
1620 }
1621}
1622
1623template<>
1624void storePremultipliedLUT(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
1625{
1626 for (qsizetype i = 0; i < len; ++i) {
1627 const int a = qAlpha(src[i]);
1628 const int r = buffer[i].x * a;
1629 const int g = buffer[i].y * a;
1630 const int b = buffer[i].z * a;
1631 dst[i] = (src[i] & 0xff000000) | (r << 16) | (g << 8) | (b << 0);
1632 }
1633}
1634
1635template<typename T>
1636static void storePremultipliedLUT(QCmyk32 *dst, const T *src, const QColorVector *buffer, const qsizetype len)
1637{
1638 storeUnpremultipliedLUT(dst, src, buffer, len);
1639}
1640
1641template<typename T>
1642static void storePremultipliedLUT(QRgba64 *dst, const T *, const QColorVector *buffer, const qsizetype len)
1643{
1644 for (qsizetype i = 0; i < len; ++i) {
1645 const int r = buffer[i].x * 65535.f;
1646 const int g = buffer[i].y * 65535.f;
1647 const int b = buffer[i].z * 65535.f;
1648 dst[i] = qRgba64(r, g, b, 65535);
1649 }
1650}
1651
1652template<>
1653void storePremultipliedLUT(QRgba64 *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
1654{
1655 for (qsizetype i = 0; i < len; ++i) {
1656 const int a = qAlpha(src[i]) * 257;
1657 const int r = buffer[i].x * a;
1658 const int g = buffer[i].y * a;
1659 const int b = buffer[i].z * a;
1660 dst[i] = qRgba64(r, g, b, a);
1661 }
1662}
1663
1664template<>
1665void storePremultipliedLUT(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len)
1666{
1667 for (qsizetype i = 0; i < len; ++i) {
1668 const int a = src[i].alpha();
1669 const int r = buffer[i].x * a;
1670 const int g = buffer[i].y * a;
1671 const int b = buffer[i].z * a;
1672 dst[i] = qRgba64(r, g, b, a);
1673 }
1674}
1675
1676template<typename T>
1677static void storePremultipliedLUT(QRgbaFloat32 *dst, const T *src, const QColorVector *buffer, const qsizetype len)
1678{
1679 for (qsizetype i = 0; i < len; ++i) {
1680 const float a = getAlphaF(src[i]);
1681 const float r = buffer[i].x * a;
1682 const float g = buffer[i].y * a;
1683 const float b = buffer[i].z * a;
1684 dst[i] = QRgbaFloat32{r, g, b, a};
1685 }
1686}
1687
1688static void visitElement(const QColorSpacePrivate::TransferElement &element, QColorVector *buffer, const qsizetype len)
1689{
1690 const bool doW = element.trc[3].isValid();
1691 for (qsizetype i = 0; i < len; ++i) {
1692 buffer[i].x = element.trc[0].apply(buffer[i].x);
1693 buffer[i].y = element.trc[1].apply(buffer[i].y);
1694 buffer[i].z = element.trc[2].apply(buffer[i].z);
1695 if (doW)
1696 buffer[i].w = element.trc[3].apply(buffer[i].w);
1697 }
1698}
1699
1700static void visitElement(const QColorMatrix &element, QColorVector *buffer, const qsizetype len)
1701{
1702 for (qsizetype i = 0; i < len; ++i)
1703 buffer[i] = element.map(buffer[i]);
1704}
1705
1706static void visitElement(const QColorVector &offset, QColorVector *buffer, const qsizetype len)
1707{
1708 for (qsizetype i = 0; i < len; ++i)
1709 buffer[i] += offset;
1710}
1711
1712static void visitElement(const QColorCLUT &element, QColorVector *buffer, const qsizetype len)
1713{
1714 if (element.isEmpty())
1715 return;
1716 for (qsizetype i = 0; i < len; ++i)
1717 buffer[i] = element.apply(buffer[i]);
1718}
1719
1720/*!
1721 \internal
1722*/
1723QColorVector QColorTransformPrivate::map(QColorVector c) const
1724{
1725 if (colorSpaceIn->isThreeComponentMatrix()) {
1726 if (colorSpaceIn->lut.generated.loadAcquire()) {
1727 c.x = colorSpaceIn->lut[0]->toLinear(c.x);
1728 c.y = colorSpaceIn->lut[1]->toLinear(c.y);
1729 c.z = colorSpaceIn->lut[2]->toLinear(c.z);
1730 } else {
1731 c.x = colorSpaceIn->trc[0].apply(c.x);
1732 c.y = colorSpaceIn->trc[1].apply(c.y);
1733 c.z = colorSpaceIn->trc[2].apply(c.z);
1734 }
1735 c = colorMatrix.map(c);
1736 } else {
1737 // Do element based conversion
1738 for (auto &&element : colorSpaceIn->mAB)
1739 std::visit([&c](auto &&elm) { visitElement(elm, &c, 1); }, element);
1740 }
1741 c.x = std::clamp(c.x, 0.0f, 1.0f);
1742 c.y = std::clamp(c.y, 0.0f, 1.0f);
1743 c.z = std::clamp(c.z, 0.0f, 1.0f);
1744
1745 // Match Profile Connection Spaces (PCS):
1746 if (colorSpaceOut->isPcsLab && !colorSpaceIn->isPcsLab)
1747 c = c.xyzToLab();
1748 else if (colorSpaceIn->isPcsLab && !colorSpaceOut->isPcsLab)
1749 c = c.labToXyz();
1750
1751 if (colorSpaceOut->isThreeComponentMatrix()) {
1752 if (!colorSpaceIn->isThreeComponentMatrix()) {
1753 c = colorMatrix.map(c);
1754 c.x = std::clamp(c.x, 0.0f, 1.0f);
1755 c.y = std::clamp(c.y, 0.0f, 1.0f);
1756 c.z = std::clamp(c.z, 0.0f, 1.0f);
1757 }
1758 if (colorSpaceOut->lut.generated.loadAcquire()) {
1759 c.x = colorSpaceOut->lut[0]->fromLinear(c.x);
1760 c.y = colorSpaceOut->lut[1]->fromLinear(c.y);
1761 c.z = colorSpaceOut->lut[2]->fromLinear(c.z);
1762 } else {
1763 c.x = colorSpaceOut->trc[0].applyInverse(c.x);
1764 c.y = colorSpaceOut->trc[1].applyInverse(c.y);
1765 c.z = colorSpaceOut->trc[2].applyInverse(c.z);
1766 }
1767 } else {
1768 // Do element based conversion
1769 for (auto &&element : colorSpaceOut->mBA)
1770 std::visit([&c](auto &&elm) { visitElement(elm, &c, 1); }, element);
1771 c.x = std::clamp(c.x, 0.0f, 1.0f);
1772 c.y = std::clamp(c.y, 0.0f, 1.0f);
1773 c.z = std::clamp(c.z, 0.0f, 1.0f);
1774 }
1775 return c;
1776}
1777
1778/*!
1779 \internal
1780*/
1781QColorVector QColorTransformPrivate::mapExtended(QColorVector c) const
1782{
1783 if (colorSpaceIn->isThreeComponentMatrix()) {
1784 c.x = colorSpaceIn->trc[0].applyExtended(c.x);
1785 c.y = colorSpaceIn->trc[1].applyExtended(c.y);
1786 c.z = colorSpaceIn->trc[2].applyExtended(c.z);
1787 c = colorMatrix.map(c);
1788 } else {
1789 // Do element based conversion
1790 for (auto &&element : colorSpaceIn->mAB)
1791 std::visit([&c](auto &&elm) { visitElement(elm, &c, 1); }, element);
1792 }
1793
1794 // Match Profile Connection Spaces (PCS):
1795 if (colorSpaceOut->isPcsLab && !colorSpaceIn->isPcsLab)
1796 c = c.xyzToLab();
1797 else if (colorSpaceIn->isPcsLab && !colorSpaceOut->isPcsLab)
1798 c = c.labToXyz();
1799
1800 if (colorSpaceOut->isThreeComponentMatrix()) {
1801 if (!colorSpaceIn->isThreeComponentMatrix())
1802 c = colorMatrix.map(c);
1803 c.x = colorSpaceOut->trc[0].applyInverseExtended(c.x);
1804 c.y = colorSpaceOut->trc[1].applyInverseExtended(c.y);
1805 c.z = colorSpaceOut->trc[2].applyInverseExtended(c.z);
1806 } else {
1807 // Do element based conversion
1808 for (auto &&element : colorSpaceOut->mBA)
1809 std::visit([&c](auto &&elm) { visitElement(elm, &c, 1); }, element);
1810 }
1811 return c;
1812}
1813
1814template<typename T>
1815constexpr bool IsGrayscale = std::is_same_v<T, uchar> || std::is_same_v<T, quint16>;
1816template<typename T>
1818template<typename T>
1820template<typename T>
1822
1823// Possible combos for data and color spaces:
1824// DataCM ColorSpaceCM ColorSpacePM Notes
1825// Gray Gray ThreeMatrix
1826// Gray Rgb ThreeMatrix Invalid colorMatrix
1827// Rgb Rgb ThreeMatrix
1828// Rgb Rgb ElementProc
1829// Gray Rgb ElementProc Only possible for input data
1830// Cmyk Cmyk ElementProc
1831//
1832// Gray data can be uchar, quint16, and is always Opaque
1833// Rgb data can be QRgb, QRgba64, or QRgbaFloat32, and is Unpremultiplied, Premultiplied, or Opaque
1834// Cmyk data can be Cmyk32, and is always Opaque
1835//
1836// colorMatrix as setup for Gray on Gray or Rgb on Rgb, but not Gray data on Rgb colorspace.
1837
1838template<typename S>
1839void QColorTransformPrivate::applyConvertIn(const S *src, QColorVector *buffer, qsizetype len, TransformFlags flags) const
1840{
1841 if constexpr (IsGrayscale<S>) {
1842 if (colorSpaceIn->isThreeComponentMatrix()) {
1843 loadGray(buffer, src, len, this);
1844 if (!colorSpaceOut->isThreeComponentMatrix() || colorSpaceIn->colorModel != QColorSpace::ColorModel::Gray) {
1845 if (!colorSpaceIn->chad.isNull())
1846 applyMatrix<DoClamp>(buffer, len, colorSpaceIn->chad);
1847 }
1848 return;
1849 }
1850 } else if constexpr (CanUseThreeComponent<S>) {
1851 if (colorSpaceIn->isThreeComponentMatrix()) {
1852 if (flags & InputPremultiplied)
1853 loadPremultiplied(buffer, src, len, this);
1854 else
1855 loadUnpremultiplied(buffer, src, len, this);
1856
1857 if (!colorSpaceOut->isThreeComponentMatrix())
1858 applyMatrix<DoClamp>(buffer, len, colorMatrix);
1859 return;
1860 }
1861 }
1862 Q_ASSERT(!colorSpaceIn->isThreeComponentMatrix());
1863
1864 if (flags & InputPremultiplied)
1865 loadPremultipliedLUT(buffer, src, len);
1866 else
1867 loadUnpremultipliedLUT(buffer, src, len);
1868
1869 // Do element based conversion
1870 for (auto &&element : colorSpaceIn->mAB)
1871 std::visit([&buffer, len](auto &&elm) { visitElement(elm, buffer, len); }, element);
1872}
1873
1874template<typename D, typename S>
1875void QColorTransformPrivate::applyConvertOut(D *dst, const S *src, QColorVector *buffer, qsizetype len, TransformFlags flags) const
1876{
1877 constexpr ApplyMatrixForm doClamp = UnclampedValues<D> ? DoNotClamp : DoClamp;
1878 if constexpr (IsGrayscale<D>) {
1879 Q_UNUSED(src); // dealing with buggy warnings in gcc 9
1880 Q_UNUSED(flags);
1881 // Calculate the matrix for grayscale conversion
1882 QColorMatrix grayMatrix;
1883 if (colorSpaceIn == colorSpaceOut ||
1884 (colorSpaceIn->colorModel == QColorSpace::ColorModel::Gray &&
1885 colorSpaceOut->colorModel == QColorSpace::ColorModel::Gray)) {
1886 // colorMatrix already has the right form
1887 grayMatrix = colorMatrix;
1888 } else {
1889 if constexpr (IsGrayscale<S>) {
1890 if (colorSpaceIn->colorModel == QColorSpace::ColorModel::Gray)
1891 grayMatrix = colorSpaceIn->chad;
1892 else
1893 grayMatrix = QColorMatrix::identity(); // Otherwise already handled in applyConvertIn
1894 } else {
1895 if (colorSpaceIn->isThreeComponentMatrix())
1896 grayMatrix = colorSpaceIn->toXyz;
1897 else
1898 grayMatrix = QColorMatrix::identity();
1899 }
1900 if (!colorSpaceOut->chad.isNull())
1901 grayMatrix = colorSpaceOut->chad.inverted() * grayMatrix;
1902 }
1903
1904 applyMatrix<doClamp>(buffer, len, grayMatrix);
1905 storeOpaque(dst, buffer, len, this);
1906 return;
1907 } else if constexpr (CanUseThreeComponent<D>) {
1908 if (colorSpaceOut->isThreeComponentMatrix()) {
1909 if (IsGrayscale<S> && colorSpaceIn->colorModel != QColorSpace::ColorModel::Gray)
1910 applyMatrix<doClamp>(buffer, len, colorSpaceOut->toXyz.inverted()); // colorMatrix wasnt prepared for gray input
1911 else
1912 applyMatrix<doClamp>(buffer, len, colorMatrix);
1913
1914 if constexpr (IsAlwaysOpaque<S>) {
1915 storeOpaque(dst, buffer, len, this);
1916 } else {
1917 if (flags & InputOpaque)
1918 storeOpaque(dst, buffer, len, this);
1919 else if (flags & OutputPremultiplied)
1920 storePremultiplied(dst, src, buffer, len, this);
1921 else
1922 storeUnpremultiplied(dst, src, buffer, len, this);
1923 }
1924 return;
1925 }
1926 }
1927 if constexpr (!IsGrayscale<D>) {
1928 Q_ASSERT(!colorSpaceOut->isThreeComponentMatrix());
1929
1930 // Do element based conversion
1931 for (auto &&element : colorSpaceOut->mBA)
1932 std::visit([&buffer, len](auto &&elm) { visitElement(elm, buffer, len); }, element);
1933
1934 clampIfNeeded<doClamp>(buffer, len);
1935
1936 if (flags & OutputPremultiplied)
1937 storePremultipliedLUT(dst, src, buffer, len);
1938 else
1939 storeUnpremultipliedLUT(dst, src, buffer, len);
1940 } else {
1941 Q_UNREACHABLE();
1942 }
1943}
1944
1945/*!
1946 \internal
1947 Adapt Profile Connection Spaces.
1948*/
1949void QColorTransformPrivate::pcsAdapt(QColorVector *buffer, qsizetype count) const
1950{
1951 // Match Profile Connection Spaces (PCS):
1952 if (colorSpaceOut->isPcsLab && !colorSpaceIn->isPcsLab) {
1953 for (qsizetype j = 0; j < count; ++j)
1954 buffer[j] = buffer[j].xyzToLab();
1955 } else if (colorSpaceIn->isPcsLab && !colorSpaceOut->isPcsLab) {
1956 for (qsizetype j = 0; j < count; ++j)
1957 buffer[j] = buffer[j].labToXyz();
1958 }
1959}
1960
1961/*!
1962 \internal
1963 Applies the color transformation on \a count S pixels starting from
1964 \a src and stores the result in \a dst as D pixels .
1965
1966 Assumes unpremultiplied data by default. Set \a flags to change defaults.
1967
1968 \sa prepare()
1969*/
1970template<typename D, typename S>
1971void QColorTransformPrivate::apply(D *dst, const S *src, qsizetype count, TransformFlags flags) const
1972{
1973 if (colorSpaceIn->isThreeComponentMatrix())
1975 if (colorSpaceOut->isThreeComponentMatrix())
1977
1978 Q_DECL_UNINITIALIZED QUninitialized<QColorVector, WorkBlockSize> buffer;
1979 qsizetype i = 0;
1980 while (i < count) {
1981 const qsizetype len = qMin(count - i, WorkBlockSize);
1982
1983 applyConvertIn(src + i, buffer, len, flags);
1984
1985 pcsAdapt(buffer, len);
1986
1987 applyConvertOut(dst + i, src + i, buffer, len, flags);
1988
1989 i += len;
1990 }
1991}
1992
1993/*!
1994 \internal
1995 \enum QColorTransformPrivate::TransformFlag
1996
1997 Defines how the transform should handle alpha values.
1998
1999 \value Unpremultiplied The input and output should both be unpremultiplied.
2000 \value InputOpaque The input is guaranteed to be opaque.
2001 \value InputPremultiplied The input is premultiplied.
2002 \value OutputPremultiplied The output should be premultiplied.
2003 \value Premultiplied Both input and output should both be premultiplied.
2004*/
2005
2006/*!
2007 \internal
2008 Prepares a color transformation for fast application. You do not need to
2009 call this explicitly as it will be called implicitly on the first transforms, but
2010 if you want predictable performance on the first transforms, you can perform it
2011 in advance.
2012
2013 \sa QColorTransform::map(), apply()
2014*/
2020
2021// Only some versions increasing precision 14/36 combos
2022template void QColorTransformPrivate::apply<quint8, quint8>(quint8 *dst, const quint8 *src, qsizetype count, TransformFlags flags) const;
2023template void QColorTransformPrivate::apply<quint8, QRgb>(quint8 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const;
2024template void QColorTransformPrivate::apply<quint8, QCmyk32>(quint8 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const;
2025template void QColorTransformPrivate::apply<quint16, quint8>(quint16 *dst, const quint8 *src, qsizetype count, TransformFlags flags) const;
2026template void QColorTransformPrivate::apply<quint16, quint16>(quint16 *dst, const quint16 *src, qsizetype count, TransformFlags flags) const;
2027template void QColorTransformPrivate::apply<quint16, QCmyk32>(quint16 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const;
2028template void QColorTransformPrivate::apply<quint16, QRgba64>(quint16 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const;
2029template void QColorTransformPrivate::apply<QRgb, quint8>(QRgb *dst, const quint8 *src, qsizetype count, TransformFlags flags) const;
2030template void QColorTransformPrivate::apply<QRgb, QRgb>(QRgb *dst, const QRgb *src, qsizetype count, TransformFlags flags) const;
2031template void QColorTransformPrivate::apply<QRgb, QCmyk32>(QRgb *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const;
2032template void QColorTransformPrivate::apply<QCmyk32, quint8>(QCmyk32 *dst, const quint8 *src, qsizetype count, TransformFlags flags) const;
2033template void QColorTransformPrivate::apply<QCmyk32, quint16>(QCmyk32 *dst, const quint16 *src, qsizetype count, TransformFlags flags) const;
2034template void QColorTransformPrivate::apply<QCmyk32, QRgb>(QCmyk32 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const;
2035template void QColorTransformPrivate::apply<QCmyk32, QCmyk32>(QCmyk32 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const;
2036template void QColorTransformPrivate::apply<QCmyk32, QRgba64>(QCmyk32 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const;
2037template void QColorTransformPrivate::apply<QCmyk32, QRgbaFloat32>(QCmyk32 *dst, const QRgbaFloat32 *src, qsizetype count, TransformFlags flags) const;
2038template void QColorTransformPrivate::apply<QRgba64, quint16>(QRgba64 *dst, const quint16 *src, qsizetype count, TransformFlags flags) const;
2039template void QColorTransformPrivate::apply<QRgba64, QRgb>(QRgba64 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const;
2040template void QColorTransformPrivate::apply<QRgba64, QCmyk32>(QRgba64 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const;
2041template void QColorTransformPrivate::apply<QRgba64, QRgba64>(QRgba64 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const;
2042template void QColorTransformPrivate::apply<QRgbaFloat32, QRgb>(QRgbaFloat32 *dst, const QRgb *src, qsizetype count, TransformFlags flags) const;
2043template void QColorTransformPrivate::apply<QRgbaFloat32, QCmyk32>(QRgbaFloat32 *dst, const QCmyk32 *src, qsizetype count, TransformFlags flags) const;
2044template void QColorTransformPrivate::apply<QRgbaFloat32, QRgba64>(QRgbaFloat32 *dst, const QRgba64 *src, qsizetype count, TransformFlags flags) const;
2045template void QColorTransformPrivate::apply<QRgbaFloat32, QRgbaFloat32>(QRgbaFloat32 *dst, const QRgbaFloat32 *src, qsizetype count, TransformFlags flags) const;
2046
2047/*!
2048 \internal
2049*/
2051{
2052 if (colorSpaceIn == colorSpaceOut)
2053 return true;
2054 if (!colorMatrix.isIdentity())
2055 return false;
2056 if (colorSpaceIn && colorSpaceOut) {
2057 if (colorSpaceIn->equals(colorSpaceOut.constData()))
2058 return true;
2059 if (!colorSpaceIn->isThreeComponentMatrix() || !colorSpaceOut->isThreeComponentMatrix())
2060 return false;
2061 if (colorSpaceIn->transferFunction != colorSpaceOut->transferFunction)
2062 return false;
2063 if (colorSpaceIn->transferFunction == QColorSpace::TransferFunction::Custom) {
2064 return colorSpaceIn->trc[0] == colorSpaceOut->trc[0]
2065 && colorSpaceIn->trc[1] == colorSpaceOut->trc[1]
2066 && colorSpaceIn->trc[2] == colorSpaceOut->trc[2];
2067 }
2068 } else {
2069 if (colorSpaceIn && !colorSpaceIn->isThreeComponentMatrix())
2070 return false;
2071 if (colorSpaceOut && !colorSpaceOut->isThreeComponentMatrix())
2072 return false;
2073 if (colorSpaceIn && colorSpaceIn->transferFunction != QColorSpace::TransferFunction::Linear)
2074 return false;
2075 if (colorSpaceOut && colorSpaceOut->transferFunction != QColorSpace::TransferFunction::Linear)
2076 return false;
2077 }
2078 return true;
2079}
2080
2081QT_END_NAMESPACE
constexpr QCmyk32(int cyan, int magenta, int yellow, int black)
Definition qcmyk_p.h:35
constexpr int black() const noexcept
Definition qcmyk_p.h:48
constexpr int magenta() const noexcept
Definition qcmyk_p.h:46
constexpr int yellow() const noexcept
Definition qcmyk_p.h:47
constexpr int cyan() const noexcept
Definition qcmyk_p.h:45
bool isEmpty() const
static QColorMatrix identity()
QColorVector map(const QColorVector &c) const
Q_GUI_EXPORT void prepare()
void apply(D *dst, const S *src, qsizetype count, TransformFlags flags) const
QColorVector mapExtended(QColorVector color) const
The QColorTransform class is a transformation between color spaces.
Q_GUI_EXPORT ~QColorTransform()
Combined button and popup list for selecting options.
static void visitElement(const QColorSpacePrivate::TransferElement &element, QColorVector *buffer, const qsizetype len)
float getAlphaF(const QRgbaFloat32 &r)
static void storePremultipliedLUT(QRgbaFloat32 *dst, const T *src, const QColorVector *buffer, const qsizetype len)
void storePremultipliedLUT(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len)
void loadPremultipliedLUT(QColorVector *, const QCmyk32 *, const qsizetype)
static void storeOpaque(quint8 *dst, const QColorVector *buffer, const qsizetype len, const QColorTransformPrivate *d_ptr)
static void applyMatrix(QColorVector *buffer, const qsizetype len, const QColorMatrix &colorMatrix)
static void visitElement(const QColorCLUT &element, QColorVector *buffer, const qsizetype len)
void loadUnpremultipliedLUT(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len)
static void storePremultipliedLUT(QRgb *dst, const T *, const QColorVector *buffer, const qsizetype len)
static float getAlphaF(const T &)
static void storePremultipliedLUT(QCmyk32 *dst, const T *src, const QColorVector *buffer, const qsizetype len)
void storeUnpremultipliedLUT(QCmyk32 *dst, const T *, const QColorVector *buffer, const qsizetype len)
void loadUnpremultipliedLUT(QColorVector *buffer, const uchar *src, const qsizetype len)
static void loadPremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
static void storeUnpremultipliedLUT(QRgb *dst, const T *, const QColorVector *buffer, const qsizetype len)
void storeUnpremultipliedLUT(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
static void storeUnpremultipliedLUT(QRgba64 *dst, const T *, const QColorVector *buffer, const qsizetype len)
static void clampIfNeeded(QColorVector *buffer, const qsizetype len)
void storePremultipliedLUT(QRgb *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
float getAlphaF(const QCmyk32 &)
static constexpr qsizetype WorkBlockSize
void loadPremultipliedLUT(QColorVector *buffer, const QRgbaFloat32 *src, const qsizetype len)
static void loadUnpremultiplied(QColorVector *buffer, const T *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
ApplyMatrixForm
@ DoClamp
@ DoNotClamp
static void loadGray(QColorVector *buffer, const quint8 *src, const qsizetype len, const QColorTransformPrivate *d_ptr)
float getAlphaF(const QRgba64 &r)
constexpr bool IsAlwaysOpaque
constexpr bool IsGrayscale
float getAlphaF(const QRgb &r)
void storeUnpremultipliedLUT(QRgba64 *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
static void visitElement(const QColorVector &offset, QColorVector *buffer, const qsizetype len)
constexpr bool UnclampedValues
static void storePremultipliedLUT(QRgba64 *dst, const T *, const QColorVector *buffer, const qsizetype len)
static void visitElement(const QColorMatrix &element, QColorVector *buffer, const qsizetype len)
void loadPremultipliedLUT(QColorVector *, const uchar *, const qsizetype)
void storeUnpremultipliedLUT(QRgba64 *dst, const QRgba64 *src, const QColorVector *buffer, const qsizetype len)
void loadUnpremultipliedLUT(QColorVector *buffer, const QCmyk32 *src, const qsizetype len)
static void storeUnpremultipliedLUT(QRgbaFloat32 *dst, const T *src, const QColorVector *buffer, const qsizetype len)
void loadUnpremultipliedLUT(QColorVector *buffer, const QRgba64 *src, const qsizetype len)
constexpr bool CanUseThreeComponent
void storePremultipliedLUT(QRgba64 *dst, const QRgb *src, const QColorVector *buffer, const qsizetype len)
void loadPremultipliedLUT(QColorVector *buffer, const QRgba64 *src, const qsizetype len)
QRgbaFloat< float > QRgbaFloat32
QRgbaFloat< qfloat16 > QRgbaFloat16