Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qimagescale_neon.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3
5#include "qimage.h"
6#include <private/qtguiglobal_p.h>
7#include <private/qsimd_p.h>
8
9#if QT_CONFIG(qtgui_threadpool)
10#include <private/qlatch_p.h>
11#include <qthreadpool.h>
12#include <private/qguiapplication_p.h>
13#include <private/qthreadpool_p.h>
14#endif
15
16#if defined(__ARM_NEON__)
17
18QT_BEGIN_NAMESPACE
19
20using namespace QImageScale;
21
22template<typename T>
23static inline void multithread_pixels_function(QImageScaleInfo *isi, int dh, const T &scaleSection)
24{
25#if QT_CONFIG(qtgui_threadpool)
26 int segments = (qsizetype(isi->sh) * isi->sw) / (1<<16);
27 segments = std::min(segments, dh);
28 QThreadPool *threadPool = QGuiApplicationPrivate::qtGuiThreadPool();
29 if (segments > 1 && threadPool && !threadPool->contains(QThread::currentThread())) {
30 QLatch semaphore(segments);
31 int y = 0;
32 for (int i = 0; i < segments; ++i) {
33 int yn = (dh - y) / (segments - i);
34 threadPool->start([&, y, yn]() {
35 scaleSection(y, y + yn);
36 semaphore.countDown();
37 });
38 y += yn;
39 }
40 semaphore.wait();
41 return;
42 }
43#endif
44 scaleSection(0, dh);
45}
46
47inline static uint32x4_t qt_qimageScaleAARGBA_helper(const unsigned int *pix, int xyap, int Cxy, int step)
48{
49 uint32x2_t vpix32 = vmov_n_u32(*pix);
50 uint16x4_t vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
51 uint32x4_t vx = vmull_n_u16(vpix16, xyap);
52 int i;
53 for (i = (1 << 14) - xyap; i > Cxy; i -= Cxy) {
54 pix += step;
55 vpix32 = vmov_n_u32(*pix);
56 vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
57 vx = vaddq_u32(vx, vmull_n_u16(vpix16, Cxy));
58 }
59 pix += step;
60 vpix32 = vmov_n_u32(*pix);
61 vpix16 = vget_low_u16(vmovl_u8(vreinterpret_u8_u32(vpix32)));
62 vx = vaddq_u32(vx, vmull_n_u16(vpix16, i));
63 return vx;
64}
65
66template<bool RGB>
67void qt_qimageScaleAARGBA_up_x_down_y_neon(QImageScaleInfo *isi, unsigned int *dest,
68 int dw, int dh, int dow, int sow)
69{
70 const unsigned int **ypoints = isi->ypoints;
71 int *xpoints = isi->xpoints;
72 int *xapoints = isi->xapoints;
73 int *yapoints = isi->yapoints;
74
75 /* go through every scanline in the output buffer */
76 auto scaleSection = [&] (int yStart, int yEnd) {
77 for (int y = yStart; y < yEnd; ++y) {
78 int Cy = yapoints[y] >> 16;
79 int yap = yapoints[y] & 0xffff;
80
81 unsigned int *dptr = dest + (y * dow);
82 for (int x = 0; x < dw; x++) {
83 const unsigned int *sptr = ypoints[y] + xpoints[x];
84 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, yap, Cy, sow);
85
86 int xap = xapoints[x];
87 if (xap > 0) {
88 uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + 1, yap, Cy, sow);
89
90 vx = vmulq_n_u32(vx, 256 - xap);
91 vr = vmulq_n_u32(vr, xap);
92 vx = vaddq_u32(vx, vr);
93 vx = vshrq_n_u32(vx, 8);
94 }
95 vx = vshrq_n_u32(vx, 14);
96 const uint16x4_t vx16 = vmovn_u32(vx);
97 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
98 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
99 if (RGB)
100 *dptr |= 0xff000000;
101 dptr++;
102 }
103 }
104 };
105 multithread_pixels_function(isi, dh, scaleSection);
106}
107
108template<bool RGB>
109void qt_qimageScaleAARGBA_down_x_up_y_neon(QImageScaleInfo *isi, unsigned int *dest,
110 int dw, int dh, int dow, int sow)
111{
112 const unsigned int **ypoints = isi->ypoints;
113 int *xpoints = isi->xpoints;
114 int *xapoints = isi->xapoints;
115 int *yapoints = isi->yapoints;
116
117 /* go through every scanline in the output buffer */
118 auto scaleSection = [&] (int yStart, int yEnd) {
119 for (int y = yStart; y < yEnd; ++y) {
120 unsigned int *dptr = dest + (y * dow);
121 for (int x = 0; x < dw; x++) {
122 int Cx = xapoints[x] >> 16;
123 int xap = xapoints[x] & 0xffff;
124
125 const unsigned int *sptr = ypoints[y] + xpoints[x];
126 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
127
128 int yap = yapoints[y];
129 if (yap > 0) {
130 uint32x4_t vr = qt_qimageScaleAARGBA_helper(sptr + sow, xap, Cx, 1);
131
132 vx = vmulq_n_u32(vx, 256 - yap);
133 vr = vmulq_n_u32(vr, yap);
134 vx = vaddq_u32(vx, vr);
135 vx = vshrq_n_u32(vx, 8);
136 }
137 vx = vshrq_n_u32(vx, 14);
138 const uint16x4_t vx16 = vmovn_u32(vx);
139 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
140 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
141 if (RGB)
142 *dptr |= 0xff000000;
143 dptr++;
144 }
145 }
146 };
147 multithread_pixels_function(isi, dh, scaleSection);
148}
149
150template<bool RGB>
151void qt_qimageScaleAARGBA_down_xy_neon(QImageScaleInfo *isi, unsigned int *dest,
152 int dw, int dh, int dow, int sow)
153{
154 const unsigned int **ypoints = isi->ypoints;
155 int *xpoints = isi->xpoints;
156 int *xapoints = isi->xapoints;
157 int *yapoints = isi->yapoints;
158
159 auto scaleSection = [&] (int yStart, int yEnd) {
160 for (int y = yStart; y < yEnd; ++y) {
161 int Cy = yapoints[y] >> 16;
162 int yap = yapoints[y] & 0xffff;
163
164 unsigned int *dptr = dest + (y * dow);
165 for (int x = 0; x < dw; x++) {
166 const int Cx = xapoints[x] >> 16;
167 const int xap = xapoints[x] & 0xffff;
168
169 const unsigned int *sptr = ypoints[y] + xpoints[x];
170 uint32x4_t vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
171 vx = vshrq_n_u32(vx, 4);
172 uint32x4_t vr = vmulq_n_u32(vx, yap);
173
174 int j;
175 for (j = (1 << 14) - yap; j > Cy; j -= Cy) {
176 sptr += sow;
177 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
178 vx = vshrq_n_u32(vx, 4);
179 vx = vmulq_n_u32(vx, Cy);
180 vr = vaddq_u32(vr, vx);
181 }
182 sptr += sow;
183 vx = qt_qimageScaleAARGBA_helper(sptr, xap, Cx, 1);
184 vx = vshrq_n_u32(vx, 4);
185 vx = vmulq_n_u32(vx, j);
186 vr = vaddq_u32(vr, vx);
187
188 vx = vshrq_n_u32(vr, 24);
189 const uint16x4_t vx16 = vmovn_u32(vx);
190 const uint8x8_t vx8 = vmovn_u16(vcombine_u16(vx16, vx16));
191 *dptr = vget_lane_u32(vreinterpret_u32_u8(vx8), 0);
192 if (RGB)
193 *dptr |= 0xff000000;
194 dptr++;
195 }
196 }
197 };
198 multithread_pixels_function(isi, dh, scaleSection);
199}
200
201template void qt_qimageScaleAARGBA_up_x_down_y_neon<false>(QImageScaleInfo *isi, unsigned int *dest,
202 int dw, int dh, int dow, int sow);
203
204template void qt_qimageScaleAARGBA_up_x_down_y_neon<true>(QImageScaleInfo *isi, unsigned int *dest,
205 int dw, int dh, int dow, int sow);
206
207template void qt_qimageScaleAARGBA_down_x_up_y_neon<false>(QImageScaleInfo *isi, unsigned int *dest,
208 int dw, int dh, int dow, int sow);
209
210template void qt_qimageScaleAARGBA_down_x_up_y_neon<true>(QImageScaleInfo *isi, unsigned int *dest,
211 int dw, int dh, int dow, int sow);
212
213template void qt_qimageScaleAARGBA_down_xy_neon<false>(QImageScaleInfo *isi, unsigned int *dest,
214 int dw, int dh, int dow, int sow);
215
216template void qt_qimageScaleAARGBA_down_xy_neon<true>(QImageScaleInfo *isi, unsigned int *dest,
217 int dw, int dh, int dow, int sow);
218
219QT_END_NAMESPACE
220
221#endif