6#include <private/qdrawhelper_loongarch64_p.h>
7#include <private/qsimd_p.h>
9#if QT_CONFIG(qtgui_threadpool)
10#include <qsemaphore.h>
11#include <private/qguiapplication_p.h>
12#include <private/qthreadpool_p.h>
15#if defined(QT_COMPILER_SUPPORTS_LSX)
19using namespace QImageScale;
22static inline void multithread_pixels_function(QImageScaleInfo *isi,
int dh,
const T &scaleSection)
24#if QT_CONFIG(qtgui_threadpool)
25 int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
26 segments = std::min(segments, dh);
27 QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool();
28 if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) {
31 for (
int i = 0; i < segments; ++i) {
32 int yn = (dh - y) / (segments - i);
33 threadPool->start([&, y, yn]() {
34 scaleSection(y, y + yn);
39 semaphore.acquire(segments);
48inline static __m128i Q_DECL_VECTORCALL
49qt_qimageScaleAARGBA_helper(
const unsigned int *pix,
int xyap,
int Cxy,
50 int step,
const __m128i vxyap,
const __m128i vCxy)
52 const __m128i shuffleMask = (__m128i)(v16i8){0, 16, 16, 16, 1, 16, 16, 16,
53 2, 16, 16, 16, 3, 16, 16, 16};
54 __m128i vpix = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(*pix), shuffleMask);
55 __m128i vx = __lsx_vmul_w(vpix, vxyap);
57 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
59 vpix = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(*pix), shuffleMask);
60 vx = __lsx_vadd_w(vx, __lsx_vmul_w(vpix, vCxy));
63 vpix = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(*pix), shuffleMask);
64 vx = __lsx_vadd_w(vx, __lsx_vmul_w(vpix, __lsx_vreplgr2vr_w(i)));
69void qt_qimageScaleAARGBA_up_x_down_y_lsx(QImageScaleInfo *isi,
unsigned int *dest,
70 int dw,
int dh,
int dow,
int sow)
72 const unsigned int **ypoints = isi->ypoints;
73 const int *xpoints = isi->xpoints;
74 const int *xapoints = isi->xapoints;
75 const int *yapoints = isi->yapoints;
77 const __m128i v256 = __lsx_vreplgr2vr_w(256);
80 auto scaleSection = [&] (
int yStart,
int yEnd) {
81 for (
int y = yStart; y < yEnd; ++y) {
82 const int Cy = yapoints[y] >> 16;
83 const int yap = yapoints[y] & 0xffff;
84 const __m128i vCy = __lsx_vreplgr2vr_w(Cy);
85 const __m128i vyap = __lsx_vreplgr2vr_w(yap);
87 unsigned int *dptr = dest + (y * dow);
88 for (
int x = 0; x < dw; x++) {
89 const unsigned int *sptr = ypoints[y] + xpoints[x];
90 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
92 const int xap = xapoints[x];
94 const __m128i vxap = __lsx_vreplgr2vr_w(xap);
95 const __m128i vinvxap = __lsx_vsub_w(v256, vxap);
96 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
98 vx = __lsx_vmul_w(vx, vinvxap);
99 vr = __lsx_vmul_w(vr, vxap);
100 vx = __lsx_vadd_w(vx, vr);
101 vx = __lsx_vsrli_w(vx, 8);
103 vx = __lsx_vsrli_w(vx, 14);
104 vx = __lsx_vpickev_h(__lsx_vsat_wu(vx, 15), __lsx_vsat_wu(vx, 15));
105 vx = __lsx_vpickev_b(__lsx_vsat_hu(vx, 7), __lsx_vsat_hu(vx, 7));
106 *dptr = __lsx_vpickve2gr_w(vx, 0);
113 multithread_pixels_function(isi, dh, scaleSection);
117void qt_qimageScaleAARGBA_down_x_up_y_lsx(QImageScaleInfo *isi,
unsigned int *dest,
118 int dw,
int dh,
int dow,
int sow)
120 const unsigned int **ypoints = isi->ypoints;
121 int *xpoints = isi->xpoints;
122 int *xapoints = isi->xapoints;
123 int *yapoints = isi->yapoints;
125 const __m128i v256 = __lsx_vreplgr2vr_w(256);
128 auto scaleSection = [&] (
int yStart,
int yEnd) {
129 for (
int y = yStart; y < yEnd; ++y) {
130 unsigned int *dptr = dest + (y * dow);
131 for (
int x = 0; x < dw; x++) {
132 int Cx = xapoints[x] >> 16;
133 int xap = xapoints[x] & 0xffff;
134 const __m128i vCx = __lsx_vreplgr2vr_w(Cx);
135 const __m128i vxap = __lsx_vreplgr2vr_w(xap);
137 const unsigned int *sptr = ypoints[y] + xpoints[x];
138 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
140 int yap = yapoints[y];
142 const __m128i vyap = __lsx_vreplgr2vr_w(yap);
143 const __m128i vinvyap = __lsx_vsub_w(v256, vyap);
144 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
146 vx = __lsx_vmul_w(vx, vinvyap);
147 vr = __lsx_vmul_w(vr, vyap);
148 vx = __lsx_vadd_w(vx, vr);
149 vx = __lsx_vsrli_w(vx, 8);
151 vx = __lsx_vsrli_w(vx, 14);
152 vx = __lsx_vpickev_h(__lsx_vsat_wu(vx, 15), __lsx_vsat_wu(vx, 15));
153 vx = __lsx_vpickev_b(__lsx_vsat_wu(vx, 7), __lsx_vsat_hu(vx, 7));
154 *dptr = __lsx_vpickve2gr_w(vx, 0);
161 multithread_pixels_function(isi, dh, scaleSection);
165void qt_qimageScaleAARGBA_down_xy_lsx(QImageScaleInfo *isi,
unsigned int *dest,
166 int dw,
int dh,
int dow,
int sow)
168 const unsigned int **ypoints = isi->ypoints;
169 int *xpoints = isi->xpoints;
170 int *xapoints = isi->xapoints;
171 int *yapoints = isi->yapoints;
173 auto scaleSection = [&] (
int yStart,
int yEnd) {
174 for (
int y = yStart; y < yEnd; ++y) {
175 int Cy = yapoints[y] >> 16;
176 int yap = yapoints[y] & 0xffff;
177 const __m128i vCy = __lsx_vreplgr2vr_w(Cy);
178 const __m128i vyap = __lsx_vreplgr2vr_w(yap);
180 unsigned int *dptr = dest + (y * dow);
181 for (
int x = 0; x < dw; x++) {
182 const int Cx = xapoints[x] >> 16;
183 const int xap = xapoints[x] & 0xffff;
184 const __m128i vCx = __lsx_vreplgr2vr_w(Cx);
185 const __m128i vxap = __lsx_vreplgr2vr_w(xap);
187 const unsigned int *sptr = ypoints[y] + xpoints[x];
188 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
189 __m128i vr = __lsx_vmul_w(__lsx_vsrli_w(vx, 4), vyap);
192 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
194 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
195 vr = __lsx_vadd_w(vr, __lsx_vmul_w(__lsx_vsrli_w(vx, 4), vCy));
198 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
199 vr = __lsx_vadd_w(vr, __lsx_vmul_w(__lsx_vsrli_w(vx, 4), __lsx_vreplgr2vr_w(j)));
201 vr = __lsx_vsrli_w(vr, 24);
202 vr = __lsx_vpickev_h(__lsx_vldi(0), __lsx_vsat_wu(vr, 15));
203 vr = __lsx_vpickev_b(__lsx_vldi(0), __lsx_vsat_hu(vr, 7));
204 *dptr = __lsx_vpickve2gr_w(vr, 0);
211 multithread_pixels_function(isi, dh, scaleSection);
214template void qt_qimageScaleAARGBA_up_x_down_y_lsx<
false>(QImageScaleInfo *isi,
unsigned int *dest,
215 int dw,
int dh,
int dow,
int sow);
217template void qt_qimageScaleAARGBA_up_x_down_y_lsx<
true>(QImageScaleInfo *isi,
unsigned int *dest,
218 int dw,
int dh,
int dow,
int sow);
220template void qt_qimageScaleAARGBA_down_x_up_y_lsx<
false>(QImageScaleInfo *isi,
unsigned int *dest,
221 int dw,
int dh,
int dow,
int sow);
223template void qt_qimageScaleAARGBA_down_x_up_y_lsx<
true>(QImageScaleInfo *isi,
unsigned int *dest,
224 int dw,
int dh,
int dow,
int sow);
226template void qt_qimageScaleAARGBA_down_xy_lsx<
false>(QImageScaleInfo *isi,
unsigned int *dest,
227 int dw,
int dh,
int dow,
int sow);
229template void qt_qimageScaleAARGBA_down_xy_lsx<
true>(QImageScaleInfo *isi,
unsigned int *dest,
230 int dw,
int dh,
int dow,
int sow);