49 Q_ASSERT(table.size() == qsizetype(gridPointsX * gridPointsY * gridPointsZ * gridPointsW));
52 const __m128 minV = _mm_setzero_ps();
53 const __m128 maxV = _mm_set1_ps(1.0f);
54 const __m128i gridPointsInt = _mm_loadu_si128(
reinterpret_cast<
const __m128i *>(&gridPointsX));
55 const __m128 gridPointsV = _mm_cvtepi32_ps(_mm_add_epi32(gridPointsInt, _mm_set1_epi32(-1)));
56 __m128 c = _mm_loadu_ps(&v.x);
57 c = _mm_max_ps(c, minV);
58 c = _mm_min_ps(c, maxV);
59 c = _mm_mul_ps(c, gridPointsV);
60#if !defined(__SSE4_1__)
61 const __m128 clo = _mm_cvtepi32_ps(_mm_cvttps_epi32(c));
63 const __m128 clo = _mm_floor_ps(c);
65 __m128 chi = _mm_add_ps(clo, maxV);
66 chi = _mm_min_ps(chi, gridPointsV);
67 _mm_storeu_ps(
reinterpret_cast<
float *>(&frac), _mm_sub_ps(c, clo));
68 const __m128i ilo = _mm_cvtps_epi32(clo);
69 const __m128i ihi = _mm_cvtps_epi32(chi);
70 const uint32_t lox = _mm_cvtsi128_si32(ilo);
71 const uint32_t hix = _mm_cvtsi128_si32(ihi);
72#if !defined(__SSE4_1__)
73 const uint32_t loy = _mm_cvtsi128_si32(_mm_shuffle_epi32(ilo, _MM_SHUFFLE(1, 1, 1, 1)));
74 const uint32_t loz = _mm_cvtsi128_si32(_mm_unpackhi_epi32(ilo, ilo));
75 const uint32_t low = _mm_cvtsi128_si32(_mm_shuffle_epi32(ilo, _MM_SHUFFLE(3, 3, 3, 3)));
76 const uint32_t hiy = _mm_cvtsi128_si32(_mm_shuffle_epi32(ihi, _MM_SHUFFLE(1, 1, 1, 1)));
77 const uint32_t hiz = _mm_cvtsi128_si32(_mm_unpackhi_epi32(ihi, ihi));
78 const uint32_t hiw = _mm_cvtsi128_si32(_mm_shuffle_epi32(ihi, _MM_SHUFFLE(3, 3, 3, 3)));
80 const uint32_t loy = _mm_extract_epi32(ilo, 1);
81 const uint32_t loz = _mm_extract_epi32(ilo, 2);
82 const uint32_t low = _mm_extract_epi32(ilo, 3);
83 const uint32_t hiy = _mm_extract_epi32(ihi, 1);
84 const uint32_t hiz = _mm_extract_epi32(ihi, 2);
85 const uint32_t hiw = _mm_extract_epi32(ihi, 3);
88 const float x = std::clamp(v.x, 0.0f, 1.0f) * (gridPointsX - 1);
89 const float y = std::clamp(v.y, 0.0f, 1.0f) * (gridPointsY - 1);
90 const float z = std::clamp(v.z, 0.0f, 1.0f) * (gridPointsZ - 1);
91 const float w = std::clamp(v.w, 0.0f, 1.0f) * (gridPointsW - 1);
92 const uint32_t lox =
static_cast<uint32_t>(std::floor(x));
93 const uint32_t hix = std::min(lox + 1, gridPointsX - 1);
94 const uint32_t loy =
static_cast<uint32_t>(std::floor(y));
95 const uint32_t hiy = std::min(loy + 1, gridPointsY - 1);
96 const uint32_t loz =
static_cast<uint32_t>(std::floor(z));
97 const uint32_t hiz = std::min(loz + 1, gridPointsZ - 1);
98 const uint32_t low =
static_cast<uint32_t>(std::floor(w));
99 const uint32_t hiw = std::min(low + 1, gridPointsW - 1);
100 frac.x = x -
static_cast<
float>(lox);
101 frac.y = y -
static_cast<
float>(loy);
102 frac.z = z -
static_cast<
float>(loz);
103 frac.w = w -
static_cast<
float>(low);
105 if (gridPointsW > 1) {
106 auto index = [&](qsizetype x, qsizetype y, qsizetype z, qsizetype w) -> qsizetype {
107 return x * gridPointsW * gridPointsZ * gridPointsY
108 + y * gridPointsW * gridPointsZ
114 tmp[0] = interpolate(table[index(lox, loy, loz, low)],
115 table[index(lox, loy, loz, hiw)], frac.w);
116 tmp[1] = interpolate(table[index(lox, loy, hiz, low)],
117 table[index(lox, loy, hiz, hiw)], frac.w);
118 tmp[2] = interpolate(table[index(lox, hiy, loz, low)],
119 table[index(lox, hiy, loz, hiw)], frac.w);
120 tmp[3] = interpolate(table[index(lox, hiy, hiz, low)],
121 table[index(lox, hiy, hiz, hiw)], frac.w);
122 tmp[4] = interpolate(table[index(hix, loy, loz, low)],
123 table[index(hix, loy, loz, hiw)], frac.w);
124 tmp[5] = interpolate(table[index(hix, loy, hiz, low)],
125 table[index(hix, loy, hiz, hiw)], frac.w);
126 tmp[6] = interpolate(table[index(hix, hiy, loz, low)],
127 table[index(hix, hiy, loz, hiw)], frac.w);
128 tmp[7] = interpolate(table[index(hix, hiy, hiz, low)],
129 table[index(hix, hiy, hiz, hiw)], frac.w);
131 for (
int i = 0; i < 4; ++i)
132 interpolateIn(tmp[i * 2], tmp[i * 2 + 1], frac.z);
134 for (
int i = 0; i < 2; ++i)
135 interpolateIn(tmp[i * 4], tmp[i * 4 + 2], frac.y);
137 interpolateIn(tmp[0], tmp[4], frac.x);
140 auto index = [&](qsizetype x, qsizetype y, qsizetype z) -> qsizetype {
141 return x * gridPointsZ * gridPointsY
145 QColorVector tmp[8] = {
146 table[index(lox, loy, loz)],
147 table[index(lox, loy, hiz)],
148 table[index(lox, hiy, loz)],
149 table[index(lox, hiy, hiz)],
150 table[index(hix, loy, loz)],
151 table[index(hix, loy, hiz)],
152 table[index(hix, hiy, loz)],
153 table[index(hix, hiy, hiz)]
156 for (
int i = 0; i < 4; ++i)
157 interpolateIn(tmp[i * 2], tmp[i * 2 + 1], frac.z);
159 for (
int i = 0; i < 2; ++i)
160 interpolateIn(tmp[i * 4], tmp[i * 4 + 2], frac.y);
162 interpolateIn(tmp[0], tmp[4], frac.x);