7#include <private/qdrawhelper_loongarch64_p.h>
8#include <private/qsimd_p.h>
10#if defined(QT_COMPILER_SUPPORTS_LSX)
14using namespace QImageScale;
16inline static __m128i Q_DECL_VECTORCALL
17qt_qimageScaleAARGBA_helper(
const unsigned int *pix,
int xyap,
int Cxy,
18 int step,
const __m128i vxyap,
const __m128i vCxy)
20 const __m128i shuffleMask = (__m128i)(v16i8){0, 16, 16, 16, 1, 16, 16, 16,
21 2, 16, 16, 16, 3, 16, 16, 16};
22 __m128i vpix = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(*pix), shuffleMask);
23 __m128i vx = __lsx_vmul_w(vpix, vxyap);
25 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
27 vpix = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(*pix), shuffleMask);
28 vx = __lsx_vadd_w(vx, __lsx_vmul_w(vpix, vCxy));
31 vpix = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(*pix), shuffleMask);
32 vx = __lsx_vadd_w(vx, __lsx_vmul_w(vpix, __lsx_vreplgr2vr_w(i)));
37void qt_qimageScaleAARGBA_up_x_down_y_lsx(QImageScaleInfo *isi,
unsigned int *dest,
38 int dw,
int dh,
int dow,
int sow)
40 const unsigned int **ypoints = isi->ypoints;
41 const int *xpoints = isi->xpoints;
42 const int *xapoints = isi->xapoints;
43 const int *yapoints = isi->yapoints;
45 const __m128i v256 = __lsx_vreplgr2vr_w(256);
48 auto scaleSection = [&] (
int yStart,
int yEnd) {
49 for (
int y = yStart; y < yEnd; ++y) {
50 const int Cy = yapoints[y] >> 16;
51 const int yap = yapoints[y] & 0xffff;
52 const __m128i vCy = __lsx_vreplgr2vr_w(Cy);
53 const __m128i vyap = __lsx_vreplgr2vr_w(yap);
55 unsigned int *dptr = dest + (y * dow);
56 for (
int x = 0; x < dw; x++) {
57 const unsigned int *sptr = ypoints[y] + xpoints[x];
58 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
60 const int xap = xapoints[x];
62 const __m128i vxap = __lsx_vreplgr2vr_w(xap);
63 const __m128i vinvxap = __lsx_vsub_w(v256, vxap);
64 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
66 vx = __lsx_vmul_w(vx, vinvxap);
67 vr = __lsx_vmul_w(vr, vxap);
68 vx = __lsx_vadd_w(vx, vr);
69 vx = __lsx_vsrli_w(vx, 8);
71 vx = __lsx_vsrli_w(vx, 14);
72 vx = __lsx_vpickev_h(__lsx_vsat_wu(vx, 15), __lsx_vsat_wu(vx, 15));
73 vx = __lsx_vpickev_b(__lsx_vsat_hu(vx, 7), __lsx_vsat_hu(vx, 7));
74 *dptr = __lsx_vpickve2gr_w(vx, 0);
81 multithread_pixels_function(isi, dh, scaleSection);
85void qt_qimageScaleAARGBA_down_x_up_y_lsx(QImageScaleInfo *isi,
unsigned int *dest,
86 int dw,
int dh,
int dow,
int sow)
88 const unsigned int **ypoints = isi->ypoints;
89 int *xpoints = isi->xpoints;
90 int *xapoints = isi->xapoints;
91 int *yapoints = isi->yapoints;
93 const __m128i v256 = __lsx_vreplgr2vr_w(256);
96 auto scaleSection = [&] (
int yStart,
int yEnd) {
97 for (
int y = yStart; y < yEnd; ++y) {
98 unsigned int *dptr = dest + (y * dow);
99 for (
int x = 0; x < dw; x++) {
100 int Cx = xapoints[x] >> 16;
101 int xap = xapoints[x] & 0xffff;
102 const __m128i vCx = __lsx_vreplgr2vr_w(Cx);
103 const __m128i vxap = __lsx_vreplgr2vr_w(xap);
105 const unsigned int *sptr = ypoints[y] + xpoints[x];
106 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
108 int yap = yapoints[y];
110 const __m128i vyap = __lsx_vreplgr2vr_w(yap);
111 const __m128i vinvyap = __lsx_vsub_w(v256, vyap);
112 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
114 vx = __lsx_vmul_w(vx, vinvyap);
115 vr = __lsx_vmul_w(vr, vyap);
116 vx = __lsx_vadd_w(vx, vr);
117 vx = __lsx_vsrli_w(vx, 8);
119 vx = __lsx_vsrli_w(vx, 14);
120 vx = __lsx_vpickev_h(__lsx_vsat_wu(vx, 15), __lsx_vsat_wu(vx, 15));
121 vx = __lsx_vpickev_b(__lsx_vsat_wu(vx, 7), __lsx_vsat_hu(vx, 7));
122 *dptr = __lsx_vpickve2gr_w(vx, 0);
129 multithread_pixels_function(isi, dh, scaleSection);
133void qt_qimageScaleAARGBA_down_xy_lsx(QImageScaleInfo *isi,
unsigned int *dest,
134 int dw,
int dh,
int dow,
int sow)
136 const unsigned int **ypoints = isi->ypoints;
137 int *xpoints = isi->xpoints;
138 int *xapoints = isi->xapoints;
139 int *yapoints = isi->yapoints;
141 auto scaleSection = [&] (
int yStart,
int yEnd) {
142 for (
int y = yStart; y < yEnd; ++y) {
143 int Cy = yapoints[y] >> 16;
144 int yap = yapoints[y] & 0xffff;
145 const __m128i vCy = __lsx_vreplgr2vr_w(Cy);
146 const __m128i vyap = __lsx_vreplgr2vr_w(yap);
148 unsigned int *dptr = dest + (y * dow);
149 for (
int x = 0; x < dw; x++) {
150 const int Cx = xapoints[x] >> 16;
151 const int xap = xapoints[x] & 0xffff;
152 const __m128i vCx = __lsx_vreplgr2vr_w(Cx);
153 const __m128i vxap = __lsx_vreplgr2vr_w(xap);
155 const unsigned int *sptr = ypoints[y] + xpoints[x];
156 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
157 __m128i vr = __lsx_vmul_w(__lsx_vsrli_w(vx, 4), vyap);
160 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
162 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
163 vr = __lsx_vadd_w(vr, __lsx_vmul_w(__lsx_vsrli_w(vx, 4), vCy));
166 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
167 vr = __lsx_vadd_w(vr, __lsx_vmul_w(__lsx_vsrli_w(vx, 4), __lsx_vreplgr2vr_w(j)));
169 vr = __lsx_vsrli_w(vr, 24);
170 vr = __lsx_vpickev_h(__lsx_vldi(0), __lsx_vsat_wu(vr, 15));
171 vr = __lsx_vpickev_b(__lsx_vldi(0), __lsx_vsat_hu(vr, 7));
172 *dptr = __lsx_vpickve2gr_w(vr, 0);
179 multithread_pixels_function(isi, dh, scaleSection);
182template void qt_qimageScaleAARGBA_up_x_down_y_lsx<
false>(QImageScaleInfo *isi,
unsigned int *dest,
183 int dw,
int dh,
int dow,
int sow);
185template void qt_qimageScaleAARGBA_up_x_down_y_lsx<
true>(QImageScaleInfo *isi,
unsigned int *dest,
186 int dw,
int dh,
int dow,
int sow);
188template void qt_qimageScaleAARGBA_down_x_up_y_lsx<
false>(QImageScaleInfo *isi,
unsigned int *dest,
189 int dw,
int dh,
int dow,
int sow);
191template void qt_qimageScaleAARGBA_down_x_up_y_lsx<
true>(QImageScaleInfo *isi,
unsigned int *dest,
192 int dw,
int dh,
int dow,
int sow);
194template void qt_qimageScaleAARGBA_down_xy_lsx<
false>(QImageScaleInfo *isi,
unsigned int *dest,
195 int dw,
int dh,
int dow,
int sow);
197template void qt_qimageScaleAARGBA_down_xy_lsx<
true>(QImageScaleInfo *isi,
unsigned int *dest,
198 int dw,
int dh,
int dow,
int sow);