Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qimagescale_lsx.cpp
Go to the documentation of this file.
1// Copyright (C) 2024 Loongson Technology Corporation Limited.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
5#include "qimage.h"
6#include <private/qdrawhelper_loongarch64_p.h>
7#include <private/qsimd_p.h>
8
9#if QT_CONFIG(qtgui_threadpool)
10#include <qsemaphore.h>
11#include <private/qguiapplication_p.h>
12#include <private/qthreadpool_p.h>
13#endif
14
15#if defined(QT_COMPILER_SUPPORTS_LSX)
16
17QT_BEGIN_NAMESPACE
18
19using namespace QImageScale;
20
21template<typename T>
22static inline void multithread_pixels_function(QImageScaleInfo *isi, int dh, const T &scaleSection)
23{
24#if QT_CONFIG(qtgui_threadpool)
25 int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
26 segments = std::min(segments, dh);
27 QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool();
28 if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) {
29 QSemaphore semaphore;
30 int y = 0;
31 for (int i = 0; i < segments; ++i) {
32 int yn = (dh - y) / (segments - i);
33 threadPool->start([&, y, yn]() {
34 scaleSection(y, y + yn);
35 semaphore.release(1);
36 });
37 y += yn;
38 }
39 semaphore.acquire(segments);
40 return;
41 }
42#else
43 Q_UNUSED(isi);
44#endif
45 scaleSection(0, dh);
46}
47
48inline static __m128i Q_DECL_VECTORCALL
49qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy,
50 int step, const __m128i vxyap, const __m128i vCxy)
51{
52 const __m128i shuffleMask = (__m128i)(v16i8){0, 16, 16, 16, 1, 16, 16, 16,
53 2, 16, 16, 16, 3, 16, 16, 16};
54 __m128i vpix = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(*pix), shuffleMask);
55 __m128i vx = __lsx_vmul_w(vpix, vxyap);
56 int i;
57 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
58 pix += step;
59 vpix = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(*pix), shuffleMask);
60 vx = __lsx_vadd_w(vx, __lsx_vmul_w(vpix, vCxy));
61 }
62 pix += step;
63 vpix = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(*pix), shuffleMask);
64 vx = __lsx_vadd_w(vx, __lsx_vmul_w(vpix, __lsx_vreplgr2vr_w(i)));
65 return vx;
66}
67
68template<bool RGB>
69void qt_qimageScaleAARGBA_up_x_down_y_lsx(QImageScaleInfo *isi, unsigned int *dest,
70 int dw, int dh, int dow, int sow)
71{
72 const unsigned int **ypoints = isi->ypoints;
73 const int *xpoints = isi->xpoints;
74 const int *xapoints = isi->xapoints;
75 const int *yapoints = isi->yapoints;
76
77 const __m128i v256 = __lsx_vreplgr2vr_w(256);
78
79 /* go through every scanline in the output buffer */
80 auto scaleSection = [&] (int yStart, int yEnd) {
81 for (int y = yStart; y < yEnd; ++y) {
82 const int Cy = yapoints[y] >> 16;
83 const int yap = yapoints[y] & 0xffff;
84 const __m128i vCy = __lsx_vreplgr2vr_w(Cy);
85 const __m128i vyap = __lsx_vreplgr2vr_w(yap);
86
87 unsigned int *dptr = dest + (y * dow);
88 for (int x = 0; x < dw; x++) {
89 const unsigned int *sptr = ypoints[y] + xpoints[x];
90 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
91
92 const int xap = xapoints[x];
93 if (xap > 0) {
94 const __m128i vxap = __lsx_vreplgr2vr_w(xap);
95 const __m128i vinvxap = __lsx_vsub_w(v256, vxap);
96 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
97
98 vx = __lsx_vmul_w(vx, vinvxap);
99 vr = __lsx_vmul_w(vr, vxap);
100 vx = __lsx_vadd_w(vx, vr);
101 vx = __lsx_vsrli_w(vx, 8);
102 }
103 vx = __lsx_vsrli_w(vx, 14);
104 vx = __lsx_vpickev_h(__lsx_vsat_wu(vx, 15), __lsx_vsat_wu(vx, 15));
105 vx = __lsx_vpickev_b(__lsx_vsat_hu(vx, 7), __lsx_vsat_hu(vx, 7));
106 *dptr = __lsx_vpickve2gr_w(vx, 0);
107 if (RGB)
108 *dptr |= 0xff000000;
109 dptr++;
110 }
111 }
112 };
113 multithread_pixels_function(isi, dh, scaleSection);
114}
115
116template<bool RGB>
117void qt_qimageScaleAARGBA_down_x_up_y_lsx(QImageScaleInfo *isi, unsigned int *dest,
118 int dw, int dh, int dow, int sow)
119{
120 const unsigned int **ypoints = isi->ypoints;
121 int *xpoints = isi->xpoints;
122 int *xapoints = isi->xapoints;
123 int *yapoints = isi->yapoints;
124
125 const __m128i v256 = __lsx_vreplgr2vr_w(256);
126
127 /* go through every scanline in the output buffer */
128 auto scaleSection = [&] (int yStart, int yEnd) {
129 for (int y = yStart; y < yEnd; ++y) {
130 unsigned int *dptr = dest + (y * dow);
131 for (int x = 0; x < dw; x++) {
132 int Cx = xapoints[x] >> 16;
133 int xap = xapoints[x] & 0xffff;
134 const __m128i vCx = __lsx_vreplgr2vr_w(Cx);
135 const __m128i vxap = __lsx_vreplgr2vr_w(xap);
136
137 const unsigned int *sptr = ypoints[y] + xpoints[x];
138 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
139
140 int yap = yapoints[y];
141 if (yap > 0) {
142 const __m128i vyap = __lsx_vreplgr2vr_w(yap);
143 const __m128i vinvyap = __lsx_vsub_w(v256, vyap);
144 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
145
146 vx = __lsx_vmul_w(vx, vinvyap);
147 vr = __lsx_vmul_w(vr, vyap);
148 vx = __lsx_vadd_w(vx, vr);
149 vx = __lsx_vsrli_w(vx, 8);
150 }
151 vx = __lsx_vsrli_w(vx, 14);
152 vx = __lsx_vpickev_h(__lsx_vsat_wu(vx, 15), __lsx_vsat_wu(vx, 15));
153 vx = __lsx_vpickev_b(__lsx_vsat_wu(vx, 7), __lsx_vsat_hu(vx, 7));
154 *dptr = __lsx_vpickve2gr_w(vx, 0);
155 if (RGB)
156 *dptr |= 0xff000000;
157 dptr++;
158 }
159 }
160 };
161 multithread_pixels_function(isi, dh, scaleSection);
162}
163
164template<bool RGB>
165void qt_qimageScaleAARGBA_down_xy_lsx(QImageScaleInfo *isi, unsigned int *dest,
166 int dw, int dh, int dow, int sow)
167{
168 const unsigned int **ypoints = isi->ypoints;
169 int *xpoints = isi->xpoints;
170 int *xapoints = isi->xapoints;
171 int *yapoints = isi->yapoints;
172
173 auto scaleSection = [&] (int yStart, int yEnd) {
174 for (int y = yStart; y < yEnd; ++y) {
175 int Cy = yapoints[y] >> 16;
176 int yap = yapoints[y] & 0xffff;
177 const __m128i vCy = __lsx_vreplgr2vr_w(Cy);
178 const __m128i vyap = __lsx_vreplgr2vr_w(yap);
179
180 unsigned int *dptr = dest + (y * dow);
181 for (int x = 0; x < dw; x++) {
182 const int Cx = xapoints[x] >> 16;
183 const int xap = xapoints[x] & 0xffff;
184 const __m128i vCx = __lsx_vreplgr2vr_w(Cx);
185 const __m128i vxap = __lsx_vreplgr2vr_w(xap);
186
187 const unsigned int *sptr = ypoints[y] + xpoints[x];
188 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
189 __m128i vr = __lsx_vmul_w(__lsx_vsrli_w(vx, 4), vyap);
190
191 int j;
192 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
193 sptr += sow;
194 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
195 vr = __lsx_vadd_w(vr, __lsx_vmul_w(__lsx_vsrli_w(vx, 4), vCy));
196 }
197 sptr += sow;
198 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
199 vr = __lsx_vadd_w(vr, __lsx_vmul_w(__lsx_vsrli_w(vx, 4), __lsx_vreplgr2vr_w(j)));
200
201 vr = __lsx_vsrli_w(vr, 24);
202 vr = __lsx_vpickev_h(__lsx_vldi(0), __lsx_vsat_wu(vr, 15));
203 vr = __lsx_vpickev_b(__lsx_vldi(0), __lsx_vsat_hu(vr, 7));
204 *dptr = __lsx_vpickve2gr_w(vr, 0);
205 if (RGB)
206 *dptr |= 0xff000000;
207 dptr++;
208 }
209 }
210 };
211 multithread_pixels_function(isi, dh, scaleSection);
212}
213
214template void qt_qimageScaleAARGBA_up_x_down_y_lsx<false>(QImageScaleInfo *isi, unsigned int *dest,
215 int dw, int dh, int dow, int sow);
216
217template void qt_qimageScaleAARGBA_up_x_down_y_lsx<true>(QImageScaleInfo *isi, unsigned int *dest,
218 int dw, int dh, int dow, int sow);
219
220template void qt_qimageScaleAARGBA_down_x_up_y_lsx<false>(QImageScaleInfo *isi, unsigned int *dest,
221 int dw, int dh, int dow, int sow);
222
223template void qt_qimageScaleAARGBA_down_x_up_y_lsx<true>(QImageScaleInfo *isi, unsigned int *dest,
224 int dw, int dh, int dow, int sow);
225
226template void qt_qimageScaleAARGBA_down_xy_lsx<false>(QImageScaleInfo *isi, unsigned int *dest,
227 int dw, int dh, int dow, int sow);
228
229template void qt_qimageScaleAARGBA_down_xy_lsx<true>(QImageScaleInfo *isi, unsigned int *dest,
230 int dw, int dh, int dow, int sow);
231
232QT_END_NAMESPACE
233
234#endif