Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qimagescale_sse4.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:significant reason:default
4
6#include "qimage.h"
7#include <private/qdrawhelper_x86_p.h>
8#include <private/qsimd_p.h>
9
10#if defined(QT_COMPILER_SUPPORTS_SSE4_1)
11
12QT_BEGIN_NAMESPACE
13
14using namespace QImageScale;
15
16inline static __m128i Q_DECL_VECTORCALL
17qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step, const __m128i vxyap, const __m128i vCxy)
18{
19 __m128i vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
20 __m128i vx = _mm_mullo_epi32(vpix, vxyap);
21 int i;
22 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
23 pix += step;
24 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
25 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, vCxy));
26 }
27 pix += step;
28 vpix = _mm_cvtepu8_epi32(_mm_cvtsi32_si128(*pix));
29 vx = _mm_add_epi32(vx, _mm_mullo_epi32(vpix, _mm_set1_epi32(i)));
30 return vx;
31}
32
33template<bool RGB>
34void qt_qimageScaleAARGBA_up_x_down_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
35 int dw, int dh, int dow, int sow)
36{
37 const unsigned int **ypoints = isi->ypoints;
38 const int *xpoints = isi->xpoints;
39 const int *xapoints = isi->xapoints;
40 const int *yapoints = isi->yapoints;
41
42 const __m128i v256 = _mm_set1_epi32(256);
43
44 /* go through every scanline in the output buffer */
45 auto scaleSection = [&] (int yStart, int yEnd) {
46 for (int y = yStart; y < yEnd; ++y) {
47 const int Cy = yapoints[y] >> 16;
48 const int yap = yapoints[y] & 0xffff;
49 const __m128i vCy = _mm_set1_epi32(Cy);
50 const __m128i vyap = _mm_set1_epi32(yap);
51
52 unsigned int *dptr = dest + (y * dow);
53 for (int x = 0; x < dw; x++) {
54 const unsigned int *sptr = ypoints[y] + xpoints[x];
55 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
56
57 const int xap = xapoints[x];
58 if (xap > 0) {
59 const __m128i vxap = _mm_set1_epi32(xap);
60 const __m128i vinvxap = _mm_sub_epi32(v256, vxap);
61 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
62
63 vx = _mm_mullo_epi32(vx, vinvxap);
64 vr = _mm_mullo_epi32(vr, vxap);
65 vx = _mm_add_epi32(vx, vr);
66 vx = _mm_srli_epi32(vx, 8);
67 }
68 vx = _mm_srli_epi32(vx, 14);
69 vx = _mm_packus_epi32(vx, vx);
70 vx = _mm_packus_epi16(vx, vx);
71 *dptr = _mm_cvtsi128_si32(vx);
72 if (RGB)
73 *dptr |= 0xff000000;
74 dptr++;
75 }
76 }
77 };
78 multithread_pixels_function(isi, dh, scaleSection);
79}
80
81template<bool RGB>
82void qt_qimageScaleAARGBA_down_x_up_y_sse4(QImageScaleInfo *isi, unsigned int *dest,
83 int dw, int dh, int dow, int sow)
84{
85 const unsigned int **ypoints = isi->ypoints;
86 int *xpoints = isi->xpoints;
87 int *xapoints = isi->xapoints;
88 int *yapoints = isi->yapoints;
89
90 const __m128i v256 = _mm_set1_epi32(256);
91
92 /* go through every scanline in the output buffer */
93 auto scaleSection = [&] (int yStart, int yEnd) {
94 for (int y = yStart; y < yEnd; ++y) {
95 unsigned int *dptr = dest + (y * dow);
96 for (int x = 0; x < dw; x++) {
97 int Cx = xapoints[x] >> 16;
98 int xap = xapoints[x] & 0xffff;
99 const __m128i vCx = _mm_set1_epi32(Cx);
100 const __m128i vxap = _mm_set1_epi32(xap);
101
102 const unsigned int *sptr = ypoints[y] + xpoints[x];
103 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
104
105 int yap = yapoints[y];
106 if (yap > 0) {
107 const __m128i vyap = _mm_set1_epi32(yap);
108 const __m128i vinvyap = _mm_sub_epi32(v256, vyap);
109 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
110
111 vx = _mm_mullo_epi32(vx, vinvyap);
112 vr = _mm_mullo_epi32(vr, vyap);
113 vx = _mm_add_epi32(vx, vr);
114 vx = _mm_srli_epi32(vx, 8);
115 }
116 vx = _mm_srli_epi32(vx, 14);
117 vx = _mm_packus_epi32(vx, vx);
118 vx = _mm_packus_epi16(vx, vx);
119 *dptr = _mm_cvtsi128_si32(vx);
120 if (RGB)
121 *dptr |= 0xff000000;
122 dptr++;
123 }
124 }
125 };
126 multithread_pixels_function(isi, dh, scaleSection);
127}
128
129template<bool RGB>
130void qt_qimageScaleAARGBA_down_xy_sse4(QImageScaleInfo *isi, unsigned int *dest,
131 int dw, int dh, int dow, int sow)
132{
133 const unsigned int **ypoints = isi->ypoints;
134 int *xpoints = isi->xpoints;
135 int *xapoints = isi->xapoints;
136 int *yapoints = isi->yapoints;
137
138 auto scaleSection = [&] (int yStart, int yEnd) {
139 for (int y = yStart; y < yEnd; ++y) {
140 int Cy = yapoints[y] >> 16;
141 int yap = yapoints[y] & 0xffff;
142 const __m128i vCy = _mm_set1_epi32(Cy);
143 const __m128i vyap = _mm_set1_epi32(yap);
144
145 unsigned int *dptr = dest + (y * dow);
146 for (int x = 0; x < dw; x++) {
147 const int Cx = xapoints[x] >> 16;
148 const int xap = xapoints[x] & 0xffff;
149 const __m128i vCx = _mm_set1_epi32(Cx);
150 const __m128i vxap = _mm_set1_epi32(xap);
151
152 const unsigned int *sptr = ypoints[y] + xpoints[x];
153 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
154 __m128i vr = _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vyap);
155
156 int j;
157 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
158 sptr += sow;
159 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
160 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), vCy));
161 }
162 sptr += sow;
163 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
164 vr = _mm_add_epi32(vr, _mm_mullo_epi32(_mm_srli_epi32(vx, 4), _mm_set1_epi32(j)));
165
166 vr = _mm_srli_epi32(vr, 24);
167 vr = _mm_packus_epi32(vr, _mm_setzero_si128());
168 vr = _mm_packus_epi16(vr, _mm_setzero_si128());
169 *dptr = _mm_cvtsi128_si32(vr);
170 if (RGB)
171 *dptr |= 0xff000000;
172 dptr++;
173 }
174 }
175 };
176 multithread_pixels_function(isi, dh, scaleSection);
177}
178
179template void qt_qimageScaleAARGBA_up_x_down_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
180 int dw, int dh, int dow, int sow);
181
182template void qt_qimageScaleAARGBA_up_x_down_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
183 int dw, int dh, int dow, int sow);
184
185template void qt_qimageScaleAARGBA_down_x_up_y_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
186 int dw, int dh, int dow, int sow);
187
188template void qt_qimageScaleAARGBA_down_x_up_y_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
189 int dw, int dh, int dow, int sow);
190
191template void qt_qimageScaleAARGBA_down_xy_sse4<false>(QImageScaleInfo *isi, unsigned int *dest,
192 int dw, int dh, int dow, int sow);
193
194template void qt_qimageScaleAARGBA_down_xy_sse4<true>(QImageScaleInfo *isi, unsigned int *dest,
195 int dw, int dh, int dow, int sow);
196
197QT_END_NAMESPACE
198
199#endif