Qt
Internal/Contributor docs for the Qt SDK. Note: These are NOT official API docs; those are found at https://doc.qt.io/
Loading...
Searching...
No Matches
qdrawingprimitive_lsx_p.h
Go to the documentation of this file.
1// Copyright (C) 2024 Loongson Technology Corporation Limited.
2// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
3// Qt-Security score:significant reason:default
4
5#ifndef QDRAWINGPRIMITIVE_LSX_P_H
6#define QDRAWINGPRIMITIVE_LSX_P_H
7
8#include <QtGui/private/qtguiglobal_p.h>
9#include <private/qsimd_p.h>
11#include "qrgba64_p.h"
12
13#ifdef __loongarch_sx
14
15//
16// W A R N I N G
17// -------------
18//
19// This file is not part of the Qt API. It exists purely as an
20// implementation detail. This header file may change from version to
21// version without notice, or even be removed.
22//
23// We mean it.
24//
25
26QT_BEGIN_NAMESPACE
27
28/*
29 * Multiply the components of pixelVector by alphaChannel
30 * Each 32bits components of alphaChannel must be in the form 0x00AA00AA
31 * colorMask must have 0x00ff00ff on each 32 bits component
32 * half must have the value 128 (0x80) for each 32 bits component
33 */
34inline static void Q_DECL_VECTORCALL
35BYTE_MUL_LSX(__m128i &pixelVector, __m128i alphaChannel, __m128i colorMask, __m128i half)
36{
37 /* 1. separate the colors in 2 vectors so each color is on 16 bits
38 (in order to be multiplied by the alpha
39 each 32 bit of dstVectorAG are in the form 0x00AA00GG
40 each 32 bit of dstVectorRB are in the form 0x00RR00BB */
41 __m128i pixelVectorAG = __lsx_vsrli_h(pixelVector, 8);
42 __m128i pixelVectorRB = __lsx_vand_v(pixelVector, colorMask);
43
44 /* 2. multiply the vectors by the alpha channel */
45 pixelVectorAG = __lsx_vmul_h(pixelVectorAG, alphaChannel);
46 pixelVectorRB = __lsx_vmul_h(pixelVectorRB, alphaChannel);
47
48 /* 3. divide by 255, that's the tricky part.
49 we do it like for BYTE_MUL(), with bit shift: X/255 ~= (X + X/256 + rounding)/256 */
50 /** so first (X + X/256 + rounding) */
51 pixelVectorRB = __lsx_vadd_h(pixelVectorRB, __lsx_vsrli_h(pixelVectorRB, 8));
52 pixelVectorRB = __lsx_vadd_h(pixelVectorRB, half);
53 pixelVectorAG = __lsx_vadd_h(pixelVectorAG, __lsx_vsrli_h(pixelVectorAG, 8));
54 pixelVectorAG = __lsx_vadd_h(pixelVectorAG, half);
55
56 /** second divide by 256 */
57 pixelVectorRB = __lsx_vsrli_h(pixelVectorRB, 8);
58 /** for AG, we could >> 8 to divide followed by << 8 to put the
59 bytes in the correct position. By masking instead, we execute
60 only one instruction */
61 pixelVectorAG = __lsx_vandn_v(colorMask, pixelVectorAG);
62
63 /* 4. combine the 2 pairs of colors */
64 pixelVector = __lsx_vor_v(pixelVectorAG, pixelVectorRB);
65}
66
67/*
68 * Each 32bits components of alphaChannel must be in the form 0x00AA00AA
69 * oneMinusAlphaChannel must be 255 - alpha for each 32 bits component
70 * colorMask must have 0x00ff00ff on each 32 bits component
71 * half must have the value 128 (0x80) for each 32 bits component
72 */
73inline static void Q_DECL_VECTORCALL
74INTERPOLATE_PIXEL_255_LSX(__m128i srcVector, __m128i &dstVector, __m128i alphaChannel,
75 __m128i oneMinusAlphaChannel, __m128i colorMask, __m128i half)
76{
77 /* interpolate AG */
78 __m128i srcVectorAG = __lsx_vsrli_h(srcVector, 8);
79 __m128i dstVectorAG = __lsx_vsrli_h(dstVector, 8);
80 __m128i srcVectorAGalpha = __lsx_vmul_h(srcVectorAG, alphaChannel);
81 __m128i dstVectorAGoneMinusAlphalpha = __lsx_vmul_h(dstVectorAG, oneMinusAlphaChannel);
82 __m128i finalAG = __lsx_vadd_h(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha);
83 finalAG = __lsx_vadd_h(finalAG, __lsx_vsrli_h(finalAG, 8));
84 finalAG = __lsx_vadd_h(finalAG, half);
85 finalAG = __lsx_vandn_v(colorMask, finalAG);
86
87 /* interpolate RB */
88 __m128i srcVectorRB = __lsx_vand_v(srcVector, colorMask);
89 __m128i dstVectorRB = __lsx_vand_v(dstVector, colorMask);
90 __m128i srcVectorRBalpha = __lsx_vmul_h(srcVectorRB, alphaChannel);
91 __m128i dstVectorRBoneMinusAlphalpha = __lsx_vmul_h(dstVectorRB, oneMinusAlphaChannel);
92 __m128i finalRB = __lsx_vadd_h(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha);
93 finalRB = __lsx_vadd_h(finalRB, __lsx_vsrli_h(finalRB, 8));
94 finalRB = __lsx_vadd_h(finalRB, half);
95 finalRB = __lsx_vsrli_h(finalRB, 8);
96
97 /* combine */
98 dstVector = __lsx_vor_v(finalAG, finalRB);
99}
100
101// same as BLEND_SOURCE_OVER_ARGB32_LSX, but for one vector srcVector
102inline static void Q_DECL_VECTORCALL
103BLEND_SOURCE_OVER_ARGB32_LSX_helper(quint32 *dst, int x, __m128i srcVector,
104 __m128i nullVector, __m128i half, __m128i one,
105 __m128i colorMask, __m128i alphaMask)
106{
107 const __m128i srcVectorAlpha = __lsx_vand_v(srcVector, alphaMask);
108 __m128i vseq = __lsx_vseq_w(srcVectorAlpha, alphaMask);
109 v4i32 vseq_res = (v4i32)__lsx_vmsknz_b(vseq);
110 if (vseq_res[0] == (0x0000ffff)) {
111 /* all opaque */
112 __lsx_vst(srcVector, &dst[x], 0);
113 } else {
114 __m128i vseq_n = __lsx_vseq_w(srcVectorAlpha, nullVector);
115 v4i32 vseq_n_res = (v4i32)__lsx_vmsknz_b(vseq_n);
116 if (vseq_n_res[0] != (0x0000ffff)) {
117 /* not fully transparent */
118 /* extract the alpha channel on 2 x 16 bits */
119 /* so we have room for the multiplication */
120 /* each 32 bits will be in the form 0x00AA00AA */
121 /* with A being the 1 - alpha */
122 __m128i alphaChannel = __lsx_vsrli_w(srcVector, 24);
123 alphaChannel = __lsx_vor_v(alphaChannel, __lsx_vslli_w(alphaChannel, 16));
124 alphaChannel = __lsx_vsub_h(one, alphaChannel);
125
126 __m128i dstVector = __lsx_vld(&dst[x], 0);
127 BYTE_MUL_LSX(dstVector, alphaChannel, colorMask, half);
128
129 /* result = s + d * (1-alpha) */
130 const __m128i result = __lsx_vadd_b(srcVector, dstVector);
131 __lsx_vst(result, &dst[x], 0);
132 }
133 }
134}
135
136// Basically blend src over dst with the const alpha defined as constAlphaVector.
137// nullVector, half, one, colorMask are constant across the whole image/texture, and should be defined as:
138//const __m128i nullVector = __lsx_vreplgr2vr_w(0);
139//const __m128i half = __lsx_vreplgr2vr_h(0x80);
140//const __m128i one = __lsx_vreplgr2vr_h(0xff);
141//const __m128i colorMask = __lsx_vreplgr2vr_w(0x00ff00ff);
142//const __m128i alphaMask = __lsx_vreplgr2vr_w(0xff000000);
143//
144// The computation being done is:
145// result = s + d * (1-alpha)
146// with shortcuts if fully opaque or fully transparent.
147inline static void Q_DECL_VECTORCALL
148BLEND_SOURCE_OVER_ARGB32_LSX(quint32 *dst, const quint32 *src, int length)
149{
150 int x = 0;
151
152 /* First, get dst aligned. */
153 ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) {
154 blend_pixel(dst[x], src[x]);
155 }
156
157 const __m128i alphaMask = __lsx_vreplgr2vr_w(0xff000000);
158 const __m128i nullVector = __lsx_vreplgr2vr_w(0);
159 const __m128i half = __lsx_vreplgr2vr_h(0x80);
160 const __m128i one = __lsx_vreplgr2vr_h(0xff);
161 const __m128i colorMask = __lsx_vreplgr2vr_w(0x00ff00ff);
162
163 for (; x < length-3; x += 4) {
164 const __m128i srcVector = __lsx_vld((const __m128i *)&src[x], 0);
165 BLEND_SOURCE_OVER_ARGB32_LSX_helper(dst, x, srcVector, nullVector, half, one, colorMask, alphaMask);
166 }
167 SIMD_EPILOGUE(x, length, 3) {
168 blend_pixel(dst[x], src[x]);
169 }
170}
171
172// Basically blend src over dst with the const alpha defined as constAlphaVector.
173// The computation being done is:
174// dest = (s + d * sia) * ca + d * cia
175// = s * ca + d * (sia * ca + cia)
176// = s * ca + d * (1 - sa*ca)
177inline static void Q_DECL_VECTORCALL
178BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_LSX(quint32 *dst, const quint32 *src, int length, uint const_alpha)
179{
180 int x = 0;
181
182 ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) {
183 blend_pixel(dst[x], src[x], const_alpha);
184 }
185
186 const __m128i nullVector = __lsx_vreplgr2vr_w(0);
187 const __m128i half = __lsx_vreplgr2vr_h(0x80);
188 const __m128i one = __lsx_vreplgr2vr_h(0xff);
189 const __m128i colorMask = __lsx_vreplgr2vr_w(0x00ff00ff);
190 const __m128i constAlphaVector = __lsx_vreplgr2vr_h(const_alpha);
191
192 for (; x < length-3; x += 4) {
193 __m128i srcVector = __lsx_vld((const __m128i *)&src[x], 0);
194 __m128i vseq = __lsx_vseq_w(srcVector, nullVector);
195 v4i32 vseq_res = (v4i32)__lsx_vmsknz_b(vseq);
196 if (vseq_res[0] != 0x0000ffff) {
197 BYTE_MUL_LSX(srcVector, constAlphaVector, colorMask, half);
198
199 __m128i alphaChannel = __lsx_vsrli_w(srcVector, 24);
200 alphaChannel = __lsx_vor_v(alphaChannel, __lsx_vslli_w(alphaChannel, 16));
201 alphaChannel = __lsx_vsub_h(one, alphaChannel);
202
203 __m128i dstVector = __lsx_vld((__m128i *)&dst[x], 0);
204 BYTE_MUL_LSX(dstVector, alphaChannel, colorMask, half);
205
206 const __m128i result = __lsx_vadd_b(srcVector, dstVector);
207 __lsx_vst(result, &dst[x], 0);
208 }
209 }
210 SIMD_EPILOGUE(x, length, 3) {
211 blend_pixel(dst[x], src[x], const_alpha);
212 }
213}
214
215typedef union
216{
217 int i;
218 float f;
219} FloatInt;
220
221/* float type data load instructions */
222static __m128 __lsx_vreplfr2vr_s(float val)
223{
224 FloatInt fi_tmpval = {.f = val};
225 return (__m128)__lsx_vreplgr2vr_w(fi_tmpval.i);
226}
227
228Q_ALWAYS_INLINE __m128 Q_DECL_VECTORCALL reciprocal_mul_ps(const __m128 a, float mul)
229{
230 __m128 ia = __lsx_vfrecip_s(a); // Approximate 1/a
231 // Improve precision of ia using Newton-Raphson
232 ia = __lsx_vfsub_s(__lsx_vfadd_s(ia, ia), __lsx_vfmul_s(ia, __lsx_vfmul_s(ia, a)));
233 ia = __lsx_vfmul_s(ia, __lsx_vreplfr2vr_s(mul));
234 return ia;
235}
236
237inline QRgb qUnpremultiply_lsx(QRgb p)
238{
239 const uint alpha = qAlpha(p);
240 if (alpha == 255)
241 return p;
242 if (alpha == 0)
243 return 0;
244 const __m128 va = __lsx_vffint_s_w(__lsx_vreplgr2vr_w(alpha));
245 __m128 via = reciprocal_mul_ps(va, 255.0f); // Approximate 1/a
246 const __m128i shuffleMask = (__m128i)(v16i8){0,16,16,16,1,16,16,16,2,16,16,16,3,16,16,16};
247 __m128i vl = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(p), shuffleMask);
248 vl = __lsx_vftintrne_w_s(__lsx_vfmul_s(__lsx_vffint_s_w(vl), via));
249 vl = __lsx_vmaxi_w(vl, 0);
250 vl = __lsx_vpickev_h(__lsx_vsat_wu(vl, 15), __lsx_vsat_wu(vl, 15));
251 vl = __lsx_vinsgr2vr_h(vl, alpha, 3);
252 vl = __lsx_vpickev_b(__lsx_vsat_hu(vl, 7), __lsx_vsat_hu(vl, 7));
253 return __lsx_vpickve2gr_w(vl, 0);
254}
255
256template<enum QtPixelOrder PixelOrder>
257inline uint qConvertArgb32ToA2rgb30_lsx(QRgb p)
258{
259 const uint alpha = qAlpha(p);
260 if (alpha == 255)
261 return qConvertRgb32ToRgb30<PixelOrder>(p);
262 if (alpha == 0)
263 return 0;
264 Q_CONSTEXPR float mult = 1023.0f / (255 >> 6);
265 const uint newalpha = (alpha >> 6);
266 const __m128 va = __lsx_vffint_s_w(__lsx_vreplgr2vr_w(alpha));
267 __m128 via = reciprocal_mul_ps(va, mult * newalpha);
268 const __m128i shuffleMask = (__m128i)(v16i8){0,16,16,16,1,16,16,16,2,16,16,16,3,16,16,16};
269 __m128i vl = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(p), shuffleMask);
270 vl = __lsx_vftintrne_w_s(__lsx_vfmul_s(__lsx_vffint_s_w(vl), via));
271 vl = __lsx_vmaxi_w(vl, 0);
272 vl = __lsx_vpickev_h(__lsx_vsat_wu(vl, 15), __lsx_vsat_wu(vl, 15));
273 uint rgb30 = (newalpha << 30);
274 rgb30 |= ((uint)__lsx_vpickve2gr_h(vl, 1)) << 10;
275 if (PixelOrder == PixelOrderRGB) {
276 rgb30 |= ((uint)__lsx_vpickve2gr_h(vl, 2)) << 20;
277 rgb30 |= ((uint)__lsx_vpickve2gr_h(vl, 0));
278 } else {
279 rgb30 |= ((uint)__lsx_vpickve2gr_h(vl, 0)) << 20;
280 rgb30 |= ((uint)__lsx_vpickve2gr_h(vl, 2));
281 }
282 return rgb30;
283}
284
285template<enum QtPixelOrder PixelOrder>
286inline uint qConvertRgba64ToRgb32_lsx(QRgba64 p)
287{
288 if (p.isTransparent())
289 return 0;
290 __m128i vl = __lsx_vilvl_d(__lsx_vldi(0), __lsx_vldrepl_d(&p, 0));
291 if (!p.isOpaque()) {
292 const __m128 va = __lsx_vffint_s_w(__lsx_vreplgr2vr_w(p.alpha()));
293 __m128 via = reciprocal_mul_ps(va, 65535.0f);
294 vl = __lsx_vilvl_h(__lsx_vldi(0), vl);
295 vl = __lsx_vftintrne_w_s(__lsx_vfmul_s(__lsx_vffint_s_w(vl) , via));
296 vl = __lsx_vmaxi_w(vl, 0);
297 vl = __lsx_vpickev_h(__lsx_vsat_wu(vl, 15), __lsx_vsat_wu(vl, 15));
298 vl = __lsx_vinsgr2vr_h(vl, p.alpha(), 3);
299 }
300 if (PixelOrder == PixelOrderBGR){
301 const __m128i shuffleMask = (__m128i)(v8i16){2, 1, 0, 3, 4, 5, 6, 7};
302 vl = __lsx_vshuf_h(shuffleMask, __lsx_vldi(0), vl);
303 }
304 vl = __lsx_vilvl_h(__lsx_vldi(0), vl);
305 vl = __lsx_vadd_w(vl, __lsx_vreplgr2vr_w(128));
306 vl = __lsx_vsub_w(vl, __lsx_vsrli_w(vl, 8));
307 vl = __lsx_vsrli_w(vl, 8);
308 vl = __lsx_vpickev_h(__lsx_vsat_w(vl, 15), __lsx_vsat_w(vl, 15));
309 __m128i tmp = __lsx_vmaxi_h(vl, 0);
310 vl = __lsx_vpickev_b(__lsx_vsat_hu(tmp, 7), __lsx_vsat_hu(tmp, 7));
311 return __lsx_vpickve2gr_w(vl, 0);
312}
313
314QT_END_NAMESPACE
315
316#endif // __loongarch_sx
317
318#endif // QDRAWINGPRIMITIVE_LSX_P_H
Combined button and popup list for selecting options.