6#include <private/qtguiglobal_p.h>
7#include <private/qsimd_p.h>
9#if QT_CONFIG(qtgui_threadpool)
10#include <private/qlatch_p.h>
11#include <qthreadpool.h>
12#include <private/qguiapplication_p.h>
13#include <private/qthreadpool_p.h>
16#if defined(__ARM_NEON__)
20using namespace QImageScale;
23static inline void multithread_pixels_function(QImageScaleInfo *isi,
int dh,
const T &scaleSection)
25#if QT_CONFIG(qtgui_threadpool)
26 int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
27 segments = std::min(segments, dh);
28 QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool();
29 if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) {
30 QLatch semaphore(segments);
32 for (
int i = 0; i < segments; ++i) {
33 int yn = (dh - y) / (segments - i);
34 threadPool->start([&, y, yn]() {
35 scaleSection(y, y + yn);
36 semaphore.countDown();
47inline static uint32x4_t qt_qimageScaleAARGBA_helper(
const unsigned int *pix,
int xyap,
int Cxy,
int step)
49 uint32x2_t vpix32 = vmov_n_u32(*pix);
50 uint16x4_t vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
51 uint32x4_t vx = vmull_n_u16(vpix16, xyap);
53 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
55 vpix32 = vmov_n_u32(*pix);
56 vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
57 vx = vaddq_u32(vx, vmull_n_u16(vpix16, Cxy));
60 vpix32 = vmov_n_u32(*pix);
61 vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
62 vx = vaddq_u32(vx, vmull_n_u16(vpix16, i));
67void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi,
unsigned int *dest,
68 int dw,
int dh,
int dow,
int sow)
70 const unsigned int **ypoints = isi->ypoints;
71 int *xpoints = isi->xpoints;
72 int *xapoints = isi->xapoints;
73 int *yapoints = isi->yapoints;
76 auto scaleSection = [&] (
int yStart,
int yEnd) {
77 for (
int y = yStart; y < yEnd; ++y) {
78 int Cy = yapoints[y] >> 16;
79 int yap = yapoints[y] & 0xffff;
81 unsigned int *dptr = dest + (y * dow);
82 for (
int x = 0; x < dw; x++) {
83 const unsigned int *sptr = ypoints[y] + xpoints[x];
84 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow);
86 int xap = xapoints[x];
88 uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow);
90 vx = vmulq_n_u32(vx, 256 - xap);
91 vr = vmulq_n_u32(vr, xap);
92 vx = vaddq_u32(vx, vr);
93 vx = vshrq_n_u32(vx, 8);
95 vx = vshrq_n_u32(vx, 14);
96 const uint16x4_t vx16 = vmovn_u32(vx);
97 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
98 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
105 multithread_pixels_function(isi, dh, scaleSection);
109void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi,
unsigned int *dest,
110 int dw,
int dh,
int dow,
int sow)
112 const unsigned int **ypoints = isi->ypoints;
113 int *xpoints = isi->xpoints;
114 int *xapoints = isi->xapoints;
115 int *yapoints = isi->yapoints;
118 auto scaleSection = [&] (
int yStart,
int yEnd) {
119 for (
int y = yStart; y < yEnd; ++y) {
120 unsigned int *dptr = dest + (y * dow);
121 for (
int x = 0; x < dw; x++) {
122 int Cx = xapoints[x] >> 16;
123 int xap = xapoints[x] & 0xffff;
125 const unsigned int *sptr = ypoints[y] + xpoints[x];
126 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
128 int yap = yapoints[y];
130 uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1);
132 vx = vmulq_n_u32(vx, 256 - yap);
133 vr = vmulq_n_u32(vr, yap);
134 vx = vaddq_u32(vx, vr);
135 vx = vshrq_n_u32(vx, 8);
137 vx = vshrq_n_u32(vx, 14);
138 const uint16x4_t vx16 = vmovn_u32(vx);
139 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
140 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
147 multithread_pixels_function(isi, dh, scaleSection);
151void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi,
unsigned int *dest,
152 int dw,
int dh,
int dow,
int sow)
154 const unsigned int **ypoints = isi->ypoints;
155 int *xpoints = isi->xpoints;
156 int *xapoints = isi->xapoints;
157 int *yapoints = isi->yapoints;
159 auto scaleSection = [&] (
int yStart,
int yEnd) {
160 for (
int y = yStart; y < yEnd; ++y) {
161 int Cy = yapoints[y] >> 16;
162 int yap = yapoints[y] & 0xffff;
164 unsigned int *dptr = dest + (y * dow);
165 for (
int x = 0; x < dw; x++) {
166 const int Cx = xapoints[x] >> 16;
167 const int xap = xapoints[x] & 0xffff;
169 const unsigned int *sptr = ypoints[y] + xpoints[x];
170 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
171 vx = vshrq_n_u32(vx, 4);
172 uint32x4_t vr = vmulq_n_u32(vx, yap);
175 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
177 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
178 vx = vshrq_n_u32(vx, 4);
179 vx = vmulq_n_u32(vx, Cy);
180 vr = vaddq_u32(vr, vx);
183 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
184 vx = vshrq_n_u32(vx, 4);
185 vx = vmulq_n_u32(vx, j);
186 vr = vaddq_u32(vr, vx);
188 vx = vshrq_n_u32(vr, 24);
189 const uint16x4_t vx16 = vmovn_u32(vx);
190 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
191 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
198 multithread_pixels_function(isi, dh, scaleSection);
201template void qt_qimageScaleAARGBA_up_x_down_y_neon<
false>(QImageScaleInfo *isi,
unsigned int *dest,
202 int dw,
int dh,
int dow,
int sow);
204template void qt_qimageScaleAARGBA_up_x_down_y_neon<
true>(QImageScaleInfo *isi,
unsigned int *dest,
205 int dw,
int dh,
int dow,
int sow);
207template void qt_qimageScaleAARGBA_down_x_up_y_neon<
false>(QImageScaleInfo *isi,
unsigned int *dest,
208 int dw,
int dh,
int dow,
int sow);
210template void qt_qimageScaleAARGBA_down_x_up_y_neon<
true>(QImageScaleInfo *isi,
unsigned int *dest,
211 int dw,
int dh,
int dow,
int sow);
213template void qt_qimageScaleAARGBA_down_xy_neon<
false>(QImageScaleInfo *isi,
unsigned int *dest,
214 int dw,
int dh,
int dow,
int sow);
216template void qt_qimageScaleAARGBA_down_xy_neon<
true>(QImageScaleInfo *isi,
unsigned int *dest,
217 int dw,
int dh,
int dow,
int sow);