Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qimagescale_sse4.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
5#include "qimage.h"
6#include <private/qdrawhelper_x86_p.h>
7#include <private/qsimd_p.h>
8
9#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
10
11QT_BEGIN_NAMESPACE
12
13using namespace QImageScale;
14
15inline static __m128i Q_DECL_VECTORCALL
16qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy)
17{
18 __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
19 __m128i vx = _mm_mullo_epi32(vpix, vxyap);
20 int i;
21 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
22 pix += step;
23 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
24 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCxy));
25 }
26 pix += step;
27 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
28 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i)));
29 return vx;
30}
31
32template<bool RGB>
33void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
34 int dw, int dh, int dow, int sow)
35{
36 const unsigned int **ypoints = isi->ypoints;
37 const int *xpoints = isi->xpoints;
38 const int *xapoints = isi->xapoints;
39 const int *yapoints = isi->yapoints;
40
41 const __m128i v256 = _mm_set1_epi32(256);
42
43 /* go through every scanline in the output buffer */
44 auto scaleSection = [&] (int yStart, int yEnd) {
45 for (int y = yStart; y < yEnd; ++y) {
46 const int Cy = yapoints[y] >> 16;
47 const int yap = yapoints[y] & 0xffff;
48 const __m128i vCy = _mm_set1_epi32(Cy);
49 const __m128i vyap = _mm_set1_epi32(yap);
50
51 unsigned int *dptr = dest + (y * dow);
52 for (int x = 0; x < dw; x++) {
53 const unsigned int *sptr = ypoints[y] + xpoints[x];
54 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
55
56 const int xap = xapoints[x];
57 if (xap > 0) {
58 const __m128i vxap = _mm_set1_epi32(xap);
59 const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
60 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
61
62 vx = _mm_mullo_epi32(vx, vinvxap);
63 vr = _mm_mullo_epi32(vr, vxap);
64 vx = _mm_add_epi32(vx, vr);
65 vx = _mm_srli_epi32(vx, 8);
66 }
67 vx = _mm_srli_epi32(vx, 14);
68 vx = _mm_packus_epi32(vx, vx);
69 vx = _mm_packus_epi16(vx, vx);
70 *dptr = _mm_cvtsi128_si32(vx);
71 if (RGB)
72 *dptr |= 0xff000000;
73 dptr++;
74 }
75 }
76 };
77 multithread_pixels_function(isi, dh, scaleSection);
78}
79
80template<bool RGB>
81void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
82 int dw, int dh, int dow, int sow)
83{
84 const unsigned int **ypoints = isi->ypoints;
85 int *xpoints = isi->xpoints;
86 int *xapoints = isi->xapoints;
87 int *yapoints = isi->yapoints;
88
89 const __m128i v256 = _mm_set1_epi32(256);
90
91 /* go through every scanline in the output buffer */
92 auto scaleSection = [&] (int yStart, int yEnd) {
93 for (int y = yStart; y < yEnd; ++y) {
94 unsigned int *dptr = dest + (y * dow);
95 for (int x = 0; x < dw; x++) {
96 int Cx = xapoints[x] >> 16;
97 int xap = xapoints[x] & 0xffff;
98 const __m128i vCx = _mm_set1_epi32(Cx);
99 const __m128i vxap = _mm_set1_epi32(xap);
100
101 const unsigned int *sptr = ypoints[y] + xpoints[x];
102 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
103
104 int yap = yapoints[y];
105 if (yap > 0) {
106 const __m128i vyap = _mm_set1_epi32(yap);
107 const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
108 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
109
110 vx = _mm_mullo_epi32(vx, vinvyap);
111 vr = _mm_mullo_epi32(vr, vyap);
112 vx = _mm_add_epi32(vx, vr);
113 vx = _mm_srli_epi32(vx, 8);
114 }
115 vx = _mm_srli_epi32(vx, 14);
116 vx = _mm_packus_epi32(vx, vx);
117 vx = _mm_packus_epi16(vx, vx);
118 *dptr = _mm_cvtsi128_si32(vx);
119 if (RGB)
120 *dptr |= 0xff000000;
121 dptr++;
122 }
123 }
124 };
125 multithread_pixels_function(isi, dh, scaleSection);
126}
127
128template<bool RGB>
129void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
130 int dw, int dh, int dow, int sow)
131{
132 const unsigned int **ypoints = isi->ypoints;
133 int *xpoints = isi->xpoints;
134 int *xapoints = isi->xapoints;
135 int *yapoints = isi->yapoints;
136
137 auto scaleSection = [&] (int yStart, int yEnd) {
138 for (int y = yStart; y < yEnd; ++y) {
139 int Cy = yapoints[y] >> 16;
140 int yap = yapoints[y] & 0xffff;
141 const __m128i vCy = _mm_set1_epi32(Cy);
142 const __m128i vyap = _mm_set1_epi32(yap);
143
144 unsigned int *dptr = dest + (y * dow);
145 for (int x = 0; x < dw; x++) {
146 const int Cx = xapoints[x] >> 16;
147 const int xap = xapoints[x] & 0xffff;
148 const __m128i vCx = _mm_set1_epi32(Cx);
149 const __m128i vxap = _mm_set1_epi32(xap);
150
151 const unsigned int *sptr = ypoints[y] + xpoints[x];
152 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
153 __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
154
155 int j;
156 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
157 sptr += sow;
158 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
159 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
160 }
161 sptr += sow;
162 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
163 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
164
165 vr = _mm_srli_epi32(vr, 24);
166 vr = _mm_packus_epi32(vr, _mm_setzero_si128());
167 vr = _mm_packus_epi16(vr, _mm_setzero_si128());
168 *dptr = _mm_cvtsi128_si32(vr);
169 if (RGB)
170 *dptr |= 0xff000000;
171 dptr++;
172 }
173 }
174 };
175 multithread_pixels_function(isi, dh, scaleSection);
176}
177
178template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
179 int dw, int dh, int dow, int sow);
180
181template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
182 int dw, int dh, int dow, int sow);
183
184template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
185 int dw, int dh, int dow, int sow);
186
187template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
188 int dw, int dh, int dow, int sow);
189
190template void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
191 int dw, int dh, int dow, int sow);
192
193template void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
194 int dw, int dh, int dow, int sow);
195
196QT_END_NAMESPACE
197
198#endif