48 QColorVector apply(
const QColorVector &v)
const
50 Q_ASSERT(table.size() == qsizetype(gridPointsX * gridPointsY * gridPointsZ * gridPointsW));
53 const __m128 minV = _mm_setzero_ps();
54 const __m128 maxV = _mm_set1_ps(1.0f);
55 const __m128i gridPointsInt = _mm_loadu_si128(
reinterpret_cast<
const __m128i *>(&gridPointsX));
56 const __m128 gridPointsV = _mm_cvtepi32_ps(_mm_add_epi32(gridPointsInt, _mm_set1_epi32(-1)));
57 __m128 c = _mm_loadu_ps(&v.x);
58 c = _mm_max_ps(c, minV);
59 c = _mm_min_ps(c, maxV);
60 c = _mm_mul_ps(c, gridPointsV);
61#if !defined(__SSE4_1__)
62 const __m128 clo = _mm_cvtepi32_ps(_mm_cvttps_epi32(c));
64 const __m128 clo = _mm_floor_ps(c);
66 __m128 chi = _mm_add_ps(clo, maxV);
67 chi = _mm_min_ps(chi, gridPointsV);
68 _mm_storeu_ps(
reinterpret_cast<
float *>(&frac), _mm_sub_ps(c, clo));
69 const __m128i ilo = _mm_cvtps_epi32(clo);
70 const __m128i ihi = _mm_cvtps_epi32(chi);
71 const uint32_t lox = _mm_cvtsi128_si32(ilo);
72 const uint32_t hix = _mm_cvtsi128_si32(ihi);
73#if !defined(__SSE4_1__)
74 const uint32_t loy = _mm_cvtsi128_si32(_mm_shuffle_epi32(ilo, _MM_SHUFFLE(1, 1, 1, 1)));
75 const uint32_t loz = _mm_cvtsi128_si32(_mm_unpackhi_epi32(ilo, ilo));
76 const uint32_t low = _mm_cvtsi128_si32(_mm_shuffle_epi32(ilo, _MM_SHUFFLE(3, 3, 3, 3)));
77 const uint32_t hiy = _mm_cvtsi128_si32(_mm_shuffle_epi32(ihi, _MM_SHUFFLE(1, 1, 1, 1)));
78 const uint32_t hiz = _mm_cvtsi128_si32(_mm_unpackhi_epi32(ihi, ihi));
79 const uint32_t hiw = _mm_cvtsi128_si32(_mm_shuffle_epi32(ihi, _MM_SHUFFLE(3, 3, 3, 3)));
81 const uint32_t loy = _mm_extract_epi32(ilo, 1);
82 const uint32_t loz = _mm_extract_epi32(ilo, 2);
83 const uint32_t low = _mm_extract_epi32(ilo, 3);
84 const uint32_t hiy = _mm_extract_epi32(ihi, 1);
85 const uint32_t hiz = _mm_extract_epi32(ihi, 2);
86 const uint32_t hiw = _mm_extract_epi32(ihi, 3);
89 const float x = std::clamp(v.x, 0.0f, 1.0f) * (gridPointsX - 1);
90 const float y = std::clamp(v.y, 0.0f, 1.0f) * (gridPointsY - 1);
91 const float z = std::clamp(v.z, 0.0f, 1.0f) * (gridPointsZ - 1);
92 const float w = std::clamp(v.w, 0.0f, 1.0f) * (gridPointsW - 1);
93 const uint32_t lox =
static_cast<uint32_t>(std::floor(x));
94 const uint32_t hix = std::min(lox + 1, gridPointsX - 1);
95 const uint32_t loy =
static_cast<uint32_t>(std::floor(y));
96 const uint32_t hiy = std::min(loy + 1, gridPointsY - 1);
97 const uint32_t loz =
static_cast<uint32_t>(std::floor(z));
98 const uint32_t hiz = std::min(loz + 1, gridPointsZ - 1);
99 const uint32_t low =
static_cast<uint32_t>(std::floor(w));
100 const uint32_t hiw = std::min(low + 1, gridPointsW - 1);
101 frac.x = x -
static_cast<
float>(lox);
102 frac.y = y -
static_cast<
float>(loy);
103 frac.z = z -
static_cast<
float>(loz);
104 frac.w = w -
static_cast<
float>(low);
106 if (gridPointsW > 1) {
107 auto index = [&](qsizetype x, qsizetype y, qsizetype z, qsizetype w) -> qsizetype {
108 return x * gridPointsW * gridPointsZ * gridPointsY
109 + y * gridPointsW * gridPointsZ
115 tmp[0] = interpolate(table[index(lox, loy, loz, low)],
116 table[index(lox, loy, loz, hiw)], frac.w);
117 tmp[1] = interpolate(table[index(lox, loy, hiz, low)],
118 table[index(lox, loy, hiz, hiw)], frac.w);
119 tmp[2] = interpolate(table[index(lox, hiy, loz, low)],
120 table[index(lox, hiy, loz, hiw)], frac.w);
121 tmp[3] = interpolate(table[index(lox, hiy, hiz, low)],
122 table[index(lox, hiy, hiz, hiw)], frac.w);
123 tmp[4] = interpolate(table[index(hix, loy, loz, low)],
124 table[index(hix, loy, loz, hiw)], frac.w);
125 tmp[5] = interpolate(table[index(hix, loy, hiz, low)],
126 table[index(hix, loy, hiz, hiw)], frac.w);
127 tmp[6] = interpolate(table[index(hix, hiy, loz, low)],
128 table[index(hix, hiy, loz, hiw)], frac.w);
129 tmp[7] = interpolate(table[index(hix, hiy, hiz, low)],
130 table[index(hix, hiy, hiz, hiw)], frac.w);
132 for (
int i = 0; i < 4; ++i)
133 interpolateIn(tmp[i * 2], tmp[i * 2 + 1], frac.z);
135 for (
int i = 0; i < 2; ++i)
136 interpolateIn(tmp[i * 4], tmp[i * 4 + 2], frac.y);
138 interpolateIn(tmp[0], tmp[4], frac.x);
141 auto index = [&](qsizetype x, qsizetype y, qsizetype z) -> qsizetype {
142 return x * gridPointsZ * gridPointsY
146 QColorVector tmp[8] = {
147 table[index(lox, loy, loz)],
148 table[index(lox, loy, hiz)],
149 table[index(lox, hiy, loz)],
150 table[index(lox, hiy, hiz)],
151 table[index(hix, loy, loz)],
152 table[index(hix, loy, hiz)],
153 table[index(hix, hiy, loz)],
154 table[index(hix, hiy, hiz)]
157 for (
int i = 0; i < 4; ++i)
158 interpolateIn(tmp[i * 2], tmp[i * 2 + 1], frac.z);
160 for (
int i = 0; i < 2; ++i)
161 interpolateIn(tmp[i * 4], tmp[i * 4 + 2], frac.y);
163 interpolateIn(tmp[0], tmp[4], frac.x);