6#include <private/qdrawhelper_x86_p.h>
7#include <private/qsimd_p.h>
9#if QT_CONFIG(qtgui_threadpool)
10#include <private/qlatch_p.h>
11#include <qthreadpool.h>
12#include <private/qguiapplication_p.h>
13#include <private/qthreadpool_p.h>
16#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
20using namespace QImageScale;
23static inline void multithread_pixels_function(QImageScaleInfo *isi,
int dh,
const T &scaleSection)
25#if QT_CONFIG(qtgui_threadpool)
26 int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
27 segments = std::min(segments, dh);
28 QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool();
29 if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) {
30 QLatch latch(segments);
32 for (
int i = 0; i < segments; ++i) {
33 int yn = (dh - y) / (segments - i);
34 threadPool->start([&, y, yn]() {
35 scaleSection(y, y + yn);
47inline static __m128i Q_DECL_VECTORCALL
48qt_qimageScaleAARGBA_helper(
const unsigned int *pix,
int xyap,
int Cxy,
int step,
const __m128i vxyap,
const __m128i vCxy)
50 __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
51 __m128i vx = _mm_mullo_epi32(vpix, vxyap);
53 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
55 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
56 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCxy));
59 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
60 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i)));
65void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi,
unsigned int *dest,
66 int dw,
int dh,
int dow,
int sow)
68 const unsigned int **ypoints = isi->ypoints;
69 const int *xpoints = isi->xpoints;
70 const int *xapoints = isi->xapoints;
71 const int *yapoints = isi->yapoints;
73 const __m128i v256 = _mm_set1_epi32(256);
76 auto scaleSection = [&] (
int yStart,
int yEnd) {
77 for (
int y = yStart; y < yEnd; ++y) {
78 const int Cy = yapoints[y] >> 16;
79 const int yap = yapoints[y] & 0xffff;
80 const __m128i vCy = _mm_set1_epi32(Cy);
81 const __m128i vyap = _mm_set1_epi32(yap);
83 unsigned int *dptr = dest + (y * dow);
84 for (
int x = 0; x < dw; x++) {
85 const unsigned int *sptr = ypoints[y] + xpoints[x];
86 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
88 const int xap = xapoints[x];
90 const __m128i vxap = _mm_set1_epi32(xap);
91 const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
92 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
94 vx = _mm_mullo_epi32(vx, vinvxap);
95 vr = _mm_mullo_epi32(vr, vxap);
96 vx = _mm_add_epi32(vx, vr);
97 vx = _mm_srli_epi32(vx, 8);
99 vx = _mm_srli_epi32(vx, 14);
100 vx = _mm_packus_epi32(vx, vx);
101 vx = _mm_packus_epi16(vx, vx);
102 *dptr = _mm_cvtsi128_si32(vx);
109 multithread_pixels_function(isi, dh, scaleSection);
113void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi,
unsigned int *dest,
114 int dw,
int dh,
int dow,
int sow)
116 const unsigned int **ypoints = isi->ypoints;
117 int *xpoints = isi->xpoints;
118 int *xapoints = isi->xapoints;
119 int *yapoints = isi->yapoints;
121 const __m128i v256 = _mm_set1_epi32(256);
124 auto scaleSection = [&] (
int yStart,
int yEnd) {
125 for (
int y = yStart; y < yEnd; ++y) {
126 unsigned int *dptr = dest + (y * dow);
127 for (
int x = 0; x < dw; x++) {
128 int Cx = xapoints[x] >> 16;
129 int xap = xapoints[x] & 0xffff;
130 const __m128i vCx = _mm_set1_epi32(Cx);
131 const __m128i vxap = _mm_set1_epi32(xap);
133 const unsigned int *sptr = ypoints[y] + xpoints[x];
134 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
136 int yap = yapoints[y];
138 const __m128i vyap = _mm_set1_epi32(yap);
139 const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
140 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
142 vx = _mm_mullo_epi32(vx, vinvyap);
143 vr = _mm_mullo_epi32(vr, vyap);
144 vx = _mm_add_epi32(vx, vr);
145 vx = _mm_srli_epi32(vx, 8);
147 vx = _mm_srli_epi32(vx, 14);
148 vx = _mm_packus_epi32(vx, vx);
149 vx = _mm_packus_epi16(vx, vx);
150 *dptr = _mm_cvtsi128_si32(vx);
157 multithread_pixels_function(isi, dh, scaleSection);
161void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi,
unsigned int *dest,
162 int dw,
int dh,
int dow,
int sow)
164 const unsigned int **ypoints = isi->ypoints;
165 int *xpoints = isi->xpoints;
166 int *xapoints = isi->xapoints;
167 int *yapoints = isi->yapoints;
169 auto scaleSection = [&] (
int yStart,
int yEnd) {
170 for (
int y = yStart; y < yEnd; ++y) {
171 int Cy = yapoints[y] >> 16;
172 int yap = yapoints[y] & 0xffff;
173 const __m128i vCy = _mm_set1_epi32(Cy);
174 const __m128i vyap = _mm_set1_epi32(yap);
176 unsigned int *dptr = dest + (y * dow);
177 for (
int x = 0; x < dw; x++) {
178 const int Cx = xapoints[x] >> 16;
179 const int xap = xapoints[x] & 0xffff;
180 const __m128i vCx = _mm_set1_epi32(Cx);
181 const __m128i vxap = _mm_set1_epi32(xap);
183 const unsigned int *sptr = ypoints[y] + xpoints[x];
184 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
185 __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
188 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
190 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
191 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
194 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
195 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
197 vr = _mm_srli_epi32(vr, 24);
198 vr = _mm_packus_epi32(vr, _mm_setzero_si128());
199 vr = _mm_packus_epi16(vr, _mm_setzero_si128());
200 *dptr = _mm_cvtsi128_si32(vr);
207 multithread_pixels_function(isi, dh, scaleSection);
210template void qt_qimageScaleAARGBA_up_x_down_y_sse4<
false>(QImageScaleInfo *isi,
unsigned int *dest,
211 int dw,
int dh,
int dow,
int sow);
213template void qt_qimageScaleAARGBA_up_x_down_y_sse4<
true>(QImageScaleInfo *isi,
unsigned int *dest,
214 int dw,
int dh,
int dow,
int sow);
216template void qt_qimageScaleAARGBA_down_x_up_y_sse4<
false>(QImageScaleInfo *isi,
unsigned int *dest,
217 int dw,
int dh,
int dow,
int sow);
219template void qt_qimageScaleAARGBA_down_x_up_y_sse4<
true>(QImageScaleInfo *isi,
unsigned int *dest,
220 int dw,
int dh,
int dow,
int sow);
222template void qt_qimageScaleAARGBA_down_xy_sse4<
false>(QImageScaleInfo *isi,
unsigned int *dest,
223 int dw,
int dh,
int dow,
int sow);
225template void qt_qimageScaleAARGBA_down_xy_sse4<
true>(QImageScaleInfo *isi,
unsigned int *dest,
226 int dw,
int dh,
int dow,
int sow);