6#include <private/qsimd_p.h>
8#if defined(__ARM_NEON__)
12using namespace QImageScale;
14inline static uint32x4_t qt_qimageScaleAARGBA_helper(
const unsigned int *pix,
int xyap,
int Cxy,
int step)
16 uint32x2_t vpix32 = vmov_n_u32(*pix);
17 uint16x4_t vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
18 uint32x4_t vx = vmull_n_u16(vpix16, xyap);
20 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
22 vpix32 = vmov_n_u32(*pix);
23 vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
24 vx = vaddq_u32(vx, vmull_n_u16(vpix16, Cxy));
27 vpix32 = vmov_n_u32(*pix);
28 vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
29 vx = vaddq_u32(vx, vmull_n_u16(vpix16, i));
34void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi,
unsigned int *dest,
35 int dw,
int dh,
int dow,
int sow)
37 const unsigned int **ypoints = isi->ypoints;
38 int *xpoints = isi->xpoints;
39 int *xapoints = isi->xapoints;
40 int *yapoints = isi->yapoints;
43 auto scaleSection = [&] (
int yStart,
int yEnd) {
44 for (
int y = yStart; y < yEnd; ++y) {
45 int Cy = yapoints[y] >> 16;
46 int yap = yapoints[y] & 0xffff;
48 unsigned int *dptr = dest + (y * dow);
49 for (
int x = 0; x < dw; x++) {
50 const unsigned int *sptr = ypoints[y] + xpoints[x];
51 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow);
53 int xap = xapoints[x];
55 uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow);
57 vx = vmulq_n_u32(vx, 256 - xap);
58 vr = vmulq_n_u32(vr, xap);
59 vx = vaddq_u32(vx, vr);
60 vx = vshrq_n_u32(vx, 8);
62 vx = vshrq_n_u32(vx, 14);
63 const uint16x4_t vx16 = vmovn_u32(vx);
64 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
65 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
72 multithread_pixels_function(isi, dh, scaleSection);
76void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi,
unsigned int *dest,
77 int dw,
int dh,
int dow,
int sow)
79 const unsigned int **ypoints = isi->ypoints;
80 int *xpoints = isi->xpoints;
81 int *xapoints = isi->xapoints;
82 int *yapoints = isi->yapoints;
85 auto scaleSection = [&] (
int yStart,
int yEnd) {
86 for (
int y = yStart; y < yEnd; ++y) {
87 unsigned int *dptr = dest + (y * dow);
88 for (
int x = 0; x < dw; x++) {
89 int Cx = xapoints[x] >> 16;
90 int xap = xapoints[x] & 0xffff;
92 const unsigned int *sptr = ypoints[y] + xpoints[x];
93 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
95 int yap = yapoints[y];
97 uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1);
99 vx = vmulq_n_u32(vx, 256 - yap);
100 vr = vmulq_n_u32(vr, yap);
101 vx = vaddq_u32(vx, vr);
102 vx = vshrq_n_u32(vx, 8);
104 vx = vshrq_n_u32(vx, 14);
105 const uint16x4_t vx16 = vmovn_u32(vx);
106 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
107 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
114 multithread_pixels_function(isi, dh, scaleSection);
118void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi,
unsigned int *dest,
119 int dw,
int dh,
int dow,
int sow)
121 const unsigned int **ypoints = isi->ypoints;
122 int *xpoints = isi->xpoints;
123 int *xapoints = isi->xapoints;
124 int *yapoints = isi->yapoints;
126 auto scaleSection = [&] (
int yStart,
int yEnd) {
127 for (
int y = yStart; y < yEnd; ++y) {
128 int Cy = yapoints[y] >> 16;
129 int yap = yapoints[y] & 0xffff;
131 unsigned int *dptr = dest + (y * dow);
132 for (
int x = 0; x < dw; x++) {
133 const int Cx = xapoints[x] >> 16;
134 const int xap = xapoints[x] & 0xffff;
136 const unsigned int *sptr = ypoints[y] + xpoints[x];
137 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
138 vx = vshrq_n_u32(vx, 4);
139 uint32x4_t vr = vmulq_n_u32(vx, yap);
142 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
144 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
145 vx = vshrq_n_u32(vx, 4);
146 vx = vmulq_n_u32(vx, Cy);
147 vr = vaddq_u32(vr, vx);
150 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
151 vx = vshrq_n_u32(vx, 4);
152 vx = vmulq_n_u32(vx, j);
153 vr = vaddq_u32(vr, vx);
155 vx = vshrq_n_u32(vr, 24);
156 const uint16x4_t vx16 = vmovn_u32(vx);
157 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
158 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
165 multithread_pixels_function(isi, dh, scaleSection);
168template void qt_qimageScaleAARGBA_up_x_down_y_neon<
false>(QImageScaleInfo *isi,
unsigned int *dest,
169 int dw,
int dh,
int dow,
int sow);
171template void qt_qimageScaleAARGBA_up_x_down_y_neon<
true>(QImageScaleInfo *isi,
unsigned int *dest,
172 int dw,
int dh,
int dow,
int sow);
174template void qt_qimageScaleAARGBA_down_x_up_y_neon<
false>(QImageScaleInfo *isi,
unsigned int *dest,
175 int dw,
int dh,
int dow,
int sow);
177template void qt_qimageScaleAARGBA_down_x_up_y_neon<
true>(QImageScaleInfo *isi,
unsigned int *dest,
178 int dw,
int dh,
int dow,
int sow);
180template void qt_qimageScaleAARGBA_down_xy_neon<
false>(QImageScaleInfo *isi,
unsigned int *dest,
181 int dw,
int dh,
int dow,
int sow);
183template void qt_qimageScaleAARGBA_down_xy_neon<
true>(QImageScaleInfo *isi,
unsigned int *dest,
184 int dw,
int dh,
int dow,
int sow);