6#include <private/qdrawhelper_loongarch64_p.h>
7#include <private/qsimd_p.h>
9#if defined(QT_COMPILER_SUPPORTS_LSX)
13using namespace QImageScale;
15inline static __m128i Q_DECL_VECTORCALL
16qt_qimageScaleAARGBA_helper(
const unsigned int *pix,
int xyap,
int Cxy,
17 int step,
const __m128i vxyap,
const __m128i vCxy)
19 const __m128i shuffleMask = (__m128i)(v16i8){0, 16, 16, 16, 1, 16, 16, 16,
20 2, 16, 16, 16, 3, 16, 16, 16};
21 __m128i vpix = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(*pix), shuffleMask);
22 __m128i vx = __lsx_vmul_w(vpix, vxyap);
24 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
26 vpix = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(*pix), shuffleMask);
27 vx = __lsx_vadd_w(vx, __lsx_vmul_w(vpix, vCxy));
30 vpix = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(*pix), shuffleMask);
31 vx = __lsx_vadd_w(vx, __lsx_vmul_w(vpix, __lsx_vreplgr2vr_w(i)));
36void qt_qimageScaleAARGBA_up_x_down_y_lsx(QImageScaleInfo *isi,
unsigned int *dest,
37 int dw,
int dh,
int dow,
int sow)
39 const unsigned int **ypoints = isi->ypoints;
40 const int *xpoints = isi->xpoints;
41 const int *xapoints = isi->xapoints;
42 const int *yapoints = isi->yapoints;
44 const __m128i v256 = __lsx_vreplgr2vr_w(256);
47 auto scaleSection = [&] (
int yStart,
int yEnd) {
48 for (
int y = yStart; y < yEnd; ++y) {
49 const int Cy = yapoints[y] >> 16;
50 const int yap = yapoints[y] & 0xffff;
51 const __m128i vCy = __lsx_vreplgr2vr_w(Cy);
52 const __m128i vyap = __lsx_vreplgr2vr_w(yap);
54 unsigned int *dptr = dest + (y * dow);
55 for (
int x = 0; x < dw; x++) {
56 const unsigned int *sptr = ypoints[y] + xpoints[x];
57 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow, vyap, vCy);
59 const int xap = xapoints[x];
61 const __m128i vxap = __lsx_vreplgr2vr_w(xap);
62 const __m128i vinvxap = __lsx_vsub_w(v256, vxap);
63 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow, vyap, vCy);
65 vx = __lsx_vmul_w(vx, vinvxap);
66 vr = __lsx_vmul_w(vr, vxap);
67 vx = __lsx_vadd_w(vx, vr);
68 vx = __lsx_vsrli_w(vx, 8);
70 vx = __lsx_vsrli_w(vx, 14);
71 vx = __lsx_vpickev_h(__lsx_vsat_wu(vx, 15), __lsx_vsat_wu(vx, 15));
72 vx = __lsx_vpickev_b(__lsx_vsat_hu(vx, 7), __lsx_vsat_hu(vx, 7));
73 *dptr = __lsx_vpickve2gr_w(vx, 0);
80 multithread_pixels_function(isi, dh, scaleSection);
84void qt_qimageScaleAARGBA_down_x_up_y_lsx(QImageScaleInfo *isi,
unsigned int *dest,
85 int dw,
int dh,
int dow,
int sow)
87 const unsigned int **ypoints = isi->ypoints;
88 int *xpoints = isi->xpoints;
89 int *xapoints = isi->xapoints;
90 int *yapoints = isi->yapoints;
92 const __m128i v256 = __lsx_vreplgr2vr_w(256);
95 auto scaleSection = [&] (
int yStart,
int yEnd) {
96 for (
int y = yStart; y < yEnd; ++y) {
97 unsigned int *dptr = dest + (y * dow);
98 for (
int x = 0; x < dw; x++) {
99 int Cx = xapoints[x] >> 16;
100 int xap = xapoints[x] & 0xffff;
101 const __m128i vCx = __lsx_vreplgr2vr_w(Cx);
102 const __m128i vxap = __lsx_vreplgr2vr_w(xap);
104 const unsigned int *sptr = ypoints[y] + xpoints[x];
105 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
107 int yap = yapoints[y];
109 const __m128i vyap = __lsx_vreplgr2vr_w(yap);
110 const __m128i vinvyap = __lsx_vsub_w(v256, vyap);
111 __m128i vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1, vxap, vCx);
113 vx = __lsx_vmul_w(vx, vinvyap);
114 vr = __lsx_vmul_w(vr, vyap);
115 vx = __lsx_vadd_w(vx, vr);
116 vx = __lsx_vsrli_w(vx, 8);
118 vx = __lsx_vsrli_w(vx, 14);
119 vx = __lsx_vpickev_h(__lsx_vsat_wu(vx, 15), __lsx_vsat_wu(vx, 15));
120 vx = __lsx_vpickev_b(__lsx_vsat_wu(vx, 7), __lsx_vsat_hu(vx, 7));
121 *dptr = __lsx_vpickve2gr_w(vx, 0);
128 multithread_pixels_function(isi, dh, scaleSection);
132void qt_qimageScaleAARGBA_down_xy_lsx(QImageScaleInfo *isi,
unsigned int *dest,
133 int dw,
int dh,
int dow,
int sow)
135 const unsigned int **ypoints = isi->ypoints;
136 int *xpoints = isi->xpoints;
137 int *xapoints = isi->xapoints;
138 int *yapoints = isi->yapoints;
140 auto scaleSection = [&] (
int yStart,
int yEnd) {
141 for (
int y = yStart; y < yEnd; ++y) {
142 int Cy = yapoints[y] >> 16;
143 int yap = yapoints[y] & 0xffff;
144 const __m128i vCy = __lsx_vreplgr2vr_w(Cy);
145 const __m128i vyap = __lsx_vreplgr2vr_w(yap);
147 unsigned int *dptr = dest + (y * dow);
148 for (
int x = 0; x < dw; x++) {
149 const int Cx = xapoints[x] >> 16;
150 const int xap = xapoints[x] & 0xffff;
151 const __m128i vCx = __lsx_vreplgr2vr_w(Cx);
152 const __m128i vxap = __lsx_vreplgr2vr_w(xap);
154 const unsigned int *sptr = ypoints[y] + xpoints[x];
155 __m128i vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
156 __m128i vr = __lsx_vmul_w(__lsx_vsrli_w(vx, 4), vyap);
159 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
161 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
162 vr = __lsx_vadd_w(vr, __lsx_vmul_w(__lsx_vsrli_w(vx, 4), vCy));
165 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1, vxap, vCx);
166 vr = __lsx_vadd_w(vr, __lsx_vmul_w(__lsx_vsrli_w(vx, 4), __lsx_vreplgr2vr_w(j)));
168 vr = __lsx_vsrli_w(vr, 24);
169 vr = __lsx_vpickev_h(__lsx_vldi(0), __lsx_vsat_wu(vr, 15));
170 vr = __lsx_vpickev_b(__lsx_vldi(0), __lsx_vsat_hu(vr, 7));
171 *dptr = __lsx_vpickve2gr_w(vr, 0);
178 multithread_pixels_function(isi, dh, scaleSection);
181template void qt_qimageScaleAARGBA_up_x_down_y_lsx<
false>(QImageScaleInfo *isi,
unsigned int *dest,
182 int dw,
int dh,
int dow,
int sow);
184template void qt_qimageScaleAARGBA_up_x_down_y_lsx<
true>(QImageScaleInfo *isi,
unsigned int *dest,
185 int dw,
int dh,
int dow,
int sow);
187template void qt_qimageScaleAARGBA_down_x_up_y_lsx<
false>(QImageScaleInfo *isi,
unsigned int *dest,
188 int dw,
int dh,
int dow,
int sow);
190template void qt_qimageScaleAARGBA_down_x_up_y_lsx<
true>(QImageScaleInfo *isi,
unsigned int *dest,
191 int dw,
int dh,
int dow,
int sow);
193template void qt_qimageScaleAARGBA_down_xy_lsx<
false>(QImageScaleInfo *isi,
unsigned int *dest,
194 int dw,
int dh,
int dow,
int sow);
196template void qt_qimageScaleAARGBA_down_xy_lsx<
true>(QImageScaleInfo *isi,
unsigned int *dest,
197 int dw,
int dh,
int dow,
int sow);