Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qimagescale_sse4.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
5#include "qimage.h"
6#include <private/qdrawhelper_x86_p.h>
7#include <private/qsimd_p.h>
8
9#if QT_CONFIG(qtgui_threadpool)
10#include <private/qlatch_p.h>
11#include <qthreadpool.h>
12#include <private/qguiapplication_p.h>
13#include <private/qthreadpool_p.h>
14#endif
15
16#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
17
18QT_BEGIN_NAMESPACE
19
20using namespace QImageScale;
21
22template<typename T>
23static inline void multithread_pixels_function(QImageScaleInfo *isi, int dh, const T &scaleSection)
24{
25#if QT_CONFIG(qtgui_threadpool)
26 int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
27 segments = std::min(segments, dh);
28 QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool();
29 if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) {
30 QLatch latch(segments);
31 int y = 0;
32 for (int i = 0; i < segments; ++i) {
33 int yn = (dh - y) / (segments - i);
34 threadPool->start([&, y, yn]() {
35 scaleSection(y, y + yn);
36 latch.countDown();
37 });
38 y += yn;
39 }
40 latch.wait();
41 return;
42 }
43#endif
44 scaleSection(0, dh);
45}
46
47inline static __m128i Q_DECL_VECTORCALL
48qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy)
49{
50 __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
51 __m128i vx = _mm_mullo_epi32(vpix, vxyap);
52 int i;
53 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
54 pix += step;
55 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
56 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCxy));
57 }
58 pix += step;
59 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
60 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i)));
61 return vx;
62}
63
64template<bool RGB>
65void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
66 int dw, int dh, int dow, int sow)
67{
68 const unsigned int **ypoints = isi->ypoints;
69 const int *xpoints = isi->xpoints;
70 const int *xapoints = isi->xapoints;
71 const int *yapoints = isi->yapoints;
72
73 const __m128i v256 = _mm_set1_epi32(256);
74
75 /* go through every scanline in the output buffer */
76 auto scaleSection = [&] (int yStart, int yEnd) {
77 for (int y = yStart; y < yEnd; ++y) {
78 const int Cy = yapoints[y] >> 16;
79 const int yap = yapoints[y] & 0xffff;
80 const __m128i vCy = _mm_set1_epi32(Cy);
81 const __m128i vyap = _mm_set1_epi32(yap);
82
83 unsigned int *dptr = dest + (y * dow);
84 for (int x = 0; x < dw; x++) {
85 const unsigned int *sptr = ypoints[y] + xpoints[x];
86 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
87
88 const int xap = xapoints[x];
89 if (xap > 0) {
90 const __m128i vxap = _mm_set1_epi32(xap);
91 const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
92 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
93
94 vx = _mm_mullo_epi32(vx, vinvxap);
95 vr = _mm_mullo_epi32(vr, vxap);
96 vx = _mm_add_epi32(vx, vr);
97 vx = _mm_srli_epi32(vx, 8);
98 }
99 vx = _mm_srli_epi32(vx, 14);
100 vx = _mm_packus_epi32(vx, vx);
101 vx = _mm_packus_epi16(vx, vx);
102 *dptr = _mm_cvtsi128_si32(vx);
103 if (RGB)
104 *dptr |= 0xff000000;
105 dptr++;
106 }
107 }
108 };
109 multithread_pixels_function(isi, dh, scaleSection);
110}
111
112template<bool RGB>
113void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
114 int dw, int dh, int dow, int sow)
115{
116 const unsigned int **ypoints = isi->ypoints;
117 int *xpoints = isi->xpoints;
118 int *xapoints = isi->xapoints;
119 int *yapoints = isi->yapoints;
120
121 const __m128i v256 = _mm_set1_epi32(256);
122
123 /* go through every scanline in the output buffer */
124 auto scaleSection = [&] (int yStart, int yEnd) {
125 for (int y = yStart; y < yEnd; ++y) {
126 unsigned int *dptr = dest + (y * dow);
127 for (int x = 0; x < dw; x++) {
128 int Cx = xapoints[x] >> 16;
129 int xap = xapoints[x] & 0xffff;
130 const __m128i vCx = _mm_set1_epi32(Cx);
131 const __m128i vxap = _mm_set1_epi32(xap);
132
133 const unsigned int *sptr = ypoints[y] + xpoints[x];
134 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
135
136 int yap = yapoints[y];
137 if (yap > 0) {
138 const __m128i vyap = _mm_set1_epi32(yap);
139 const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
140 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
141
142 vx = _mm_mullo_epi32(vx, vinvyap);
143 vr = _mm_mullo_epi32(vr, vyap);
144 vx = _mm_add_epi32(vx, vr);
145 vx = _mm_srli_epi32(vx, 8);
146 }
147 vx = _mm_srli_epi32(vx, 14);
148 vx = _mm_packus_epi32(vx, vx);
149 vx = _mm_packus_epi16(vx, vx);
150 *dptr = _mm_cvtsi128_si32(vx);
151 if (RGB)
152 *dptr |= 0xff000000;
153 dptr++;
154 }
155 }
156 };
157 multithread_pixels_function(isi, dh, scaleSection);
158}
159
160template<bool RGB>
161void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
162 int dw, int dh, int dow, int sow)
163{
164 const unsigned int **ypoints = isi->ypoints;
165 int *xpoints = isi->xpoints;
166 int *xapoints = isi->xapoints;
167 int *yapoints = isi->yapoints;
168
169 auto scaleSection = [&] (int yStart, int yEnd) {
170 for (int y = yStart; y < yEnd; ++y) {
171 int Cy = yapoints[y] >> 16;
172 int yap = yapoints[y] & 0xffff;
173 const __m128i vCy = _mm_set1_epi32(Cy);
174 const __m128i vyap = _mm_set1_epi32(yap);
175
176 unsigned int *dptr = dest + (y * dow);
177 for (int x = 0; x < dw; x++) {
178 const int Cx = xapoints[x] >> 16;
179 const int xap = xapoints[x] & 0xffff;
180 const __m128i vCx = _mm_set1_epi32(Cx);
181 const __m128i vxap = _mm_set1_epi32(xap);
182
183 const unsigned int *sptr = ypoints[y] + xpoints[x];
184 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
185 __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
186
187 int j;
188 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
189 sptr += sow;
190 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
191 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
192 }
193 sptr += sow;
194 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
195 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
196
197 vr = _mm_srli_epi32(vr, 24);
198 vr = _mm_packus_epi32(vr, _mm_setzero_si128());
199 vr = _mm_packus_epi16(vr, _mm_setzero_si128());
200 *dptr = _mm_cvtsi128_si32(vr);
201 if (RGB)
202 *dptr |= 0xff000000;
203 dptr++;
204 }
205 }
206 };
207 multithread_pixels_function(isi, dh, scaleSection);
208}
209
210template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
211 int dw, int dh, int dow, int sow);
212
213template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
214 int dw, int dh, int dow, int sow);
215
216template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
217 int dw, int dh, int dow, int sow);
218
219template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
220 int dw, int dh, int dow, int sow);
221
222template void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
223 int dw, int dh, int dow, int sow);
224
225template void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
226 int dw, int dh, int dow, int sow);
227
228QT_END_NAMESPACE
229
230#endif