7#include <private/qsimd_p.h>
9#if defined(__ARM_NEON__)
13using namespace QImageScale;
15inline static uint32x4_t qt_qimageScaleAARGBA_helper(
const unsigned int *pix,
int xyap,
int Cxy,
int step)
17 uint32x2_t vpix32 = vmov_n_u32(*pix);
18 uint16x4_t vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
19 uint32x4_t vx = vmull_n_u16(vpix16, xyap);
21 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
23 vpix32 = vmov_n_u32(*pix);
24 vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
25 vx = vaddq_u32(vx, vmull_n_u16(vpix16, Cxy));
28 vpix32 = vmov_n_u32(*pix);
29 vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
30 vx = vaddq_u32(vx, vmull_n_u16(vpix16, i));
35void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi,
unsigned int *dest,
36 int dw,
int dh,
int dow,
int sow)
38 const unsigned int **ypoints = isi->ypoints;
39 int *xpoints = isi->xpoints;
40 int *xapoints = isi->xapoints;
41 int *yapoints = isi->yapoints;
44 auto scaleSection = [&] (
int yStart,
int yEnd) {
45 for (
int y = yStart; y < yEnd; ++y) {
46 int Cy = yapoints[y] >> 16;
47 int yap = yapoints[y] & 0xffff;
49 unsigned int *dptr = dest + (y * dow);
50 for (
int x = 0; x < dw; x++) {
51 const unsigned int *sptr = ypoints[y] + xpoints[x];
52 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow);
54 int xap = xapoints[x];
56 uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow);
58 vx = vmulq_n_u32(vx, 256 - xap);
59 vr = vmulq_n_u32(vr, xap);
60 vx = vaddq_u32(vx, vr);
61 vx = vshrq_n_u32(vx, 8);
63 vx = vshrq_n_u32(vx, 14);
64 const uint16x4_t vx16 = vmovn_u32(vx);
65 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
66 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
73 multithread_pixels_function(isi, dh, scaleSection);
77void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi,
unsigned int *dest,
78 int dw,
int dh,
int dow,
int sow)
80 const unsigned int **ypoints = isi->ypoints;
81 int *xpoints = isi->xpoints;
82 int *xapoints = isi->xapoints;
83 int *yapoints = isi->yapoints;
86 auto scaleSection = [&] (
int yStart,
int yEnd) {
87 for (
int y = yStart; y < yEnd; ++y) {
88 unsigned int *dptr = dest + (y * dow);
89 for (
int x = 0; x < dw; x++) {
90 int Cx = xapoints[x] >> 16;
91 int xap = xapoints[x] & 0xffff;
93 const unsigned int *sptr = ypoints[y] + xpoints[x];
94 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
96 int yap = yapoints[y];
98 uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1);
100 vx = vmulq_n_u32(vx, 256 - yap);
101 vr = vmulq_n_u32(vr, yap);
102 vx = vaddq_u32(vx, vr);
103 vx = vshrq_n_u32(vx, 8);
105 vx = vshrq_n_u32(vx, 14);
106 const uint16x4_t vx16 = vmovn_u32(vx);
107 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
108 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
115 multithread_pixels_function(isi, dh, scaleSection);
119void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi,
unsigned int *dest,
120 int dw,
int dh,
int dow,
int sow)
122 const unsigned int **ypoints = isi->ypoints;
123 int *xpoints = isi->xpoints;
124 int *xapoints = isi->xapoints;
125 int *yapoints = isi->yapoints;
127 auto scaleSection = [&] (
int yStart,
int yEnd) {
128 for (
int y = yStart; y < yEnd; ++y) {
129 int Cy = yapoints[y] >> 16;
130 int yap = yapoints[y] & 0xffff;
132 unsigned int *dptr = dest + (y * dow);
133 for (
int x = 0; x < dw; x++) {
134 const int Cx = xapoints[x] >> 16;
135 const int xap = xapoints[x] & 0xffff;
137 const unsigned int *sptr = ypoints[y] + xpoints[x];
138 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
139 vx = vshrq_n_u32(vx, 4);
140 uint32x4_t vr = vmulq_n_u32(vx, yap);
143 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
145 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
146 vx = vshrq_n_u32(vx, 4);
147 vx = vmulq_n_u32(vx, Cy);
148 vr = vaddq_u32(vr, vx);
151 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
152 vx = vshrq_n_u32(vx, 4);
153 vx = vmulq_n_u32(vx, j);
154 vr = vaddq_u32(vr, vx);
156 vx = vshrq_n_u32(vr, 24);
157 const uint16x4_t vx16 = vmovn_u32(vx);
158 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
159 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
166 multithread_pixels_function(isi, dh, scaleSection);
169template void qt_qimageScaleAARGBA_up_x_down_y_neon<
false>(QImageScaleInfo *isi,
unsigned int *dest,
170 int dw,
int dh,
int dow,
int sow);
172template void qt_qimageScaleAARGBA_up_x_down_y_neon<
true>(QImageScaleInfo *isi,
unsigned int *dest,
173 int dw,
int dh,
int dow,
int sow);
175template void qt_qimageScaleAARGBA_down_x_up_y_neon<
false>(QImageScaleInfo *isi,
unsigned int *dest,
176 int dw,
int dh,
int dow,
int sow);
178template void qt_qimageScaleAARGBA_down_x_up_y_neon<
true>(QImageScaleInfo *isi,
unsigned int *dest,
179 int dw,
int dh,
int dow,
int sow);
181template void qt_qimageScaleAARGBA_down_xy_neon<
false>(QImageScaleInfo *isi,
unsigned int *dest,
182 int dw,
int dh,
int dow,
int sow);
184template void qt_qimageScaleAARGBA_down_xy_neon<
true>(QImageScaleInfo *isi,
unsigned int *dest,
185 int dw,
int dh,
int dow,
int sow);