4#ifndef QDRAWINGPRIMITIVE_LSX_P_H
5#define QDRAWINGPRIMITIVE_LSX_P_H
7#include <QtGui/private/qtguiglobal_p.h>
8#include <private/qsimd_p.h>
28
29
30
31
32
33inline static void Q_DECL_VECTORCALL
34BYTE_MUL_LSX(__m128i &pixelVector, __m128i alphaChannel, __m128i colorMask, __m128i half)
37
38
39
40 __m128i pixelVectorAG = __lsx_vsrli_h(pixelVector, 8);
41 __m128i pixelVectorRB = __lsx_vand_v(pixelVector, colorMask);
44 pixelVectorAG = __lsx_vmul_h(pixelVectorAG, alphaChannel);
45 pixelVectorRB = __lsx_vmul_h(pixelVectorRB, alphaChannel);
48
50 pixelVectorRB = __lsx_vadd_h(pixelVectorRB, __lsx_vsrli_h(pixelVectorRB, 8));
51 pixelVectorRB = __lsx_vadd_h(pixelVectorRB, half);
52 pixelVectorAG = __lsx_vadd_h(pixelVectorAG, __lsx_vsrli_h(pixelVectorAG, 8));
53 pixelVectorAG = __lsx_vadd_h(pixelVectorAG, half);
56 pixelVectorRB = __lsx_vsrli_h(pixelVectorRB, 8);
58
59
60 pixelVectorAG = __lsx_vandn_v(colorMask, pixelVectorAG);
63 pixelVector = __lsx_vor_v(pixelVectorAG, pixelVectorRB);
67
68
69
70
71
72inline static void Q_DECL_VECTORCALL
73INTERPOLATE_PIXEL_255_LSX(__m128i srcVector, __m128i &dstVector, __m128i alphaChannel,
74 __m128i oneMinusAlphaChannel, __m128i colorMask, __m128i half)
77 __m128i srcVectorAG = __lsx_vsrli_h(srcVector, 8);
78 __m128i dstVectorAG = __lsx_vsrli_h(dstVector, 8);
79 __m128i srcVectorAGalpha = __lsx_vmul_h(srcVectorAG, alphaChannel);
80 __m128i dstVectorAGoneMinusAlphalpha = __lsx_vmul_h(dstVectorAG, oneMinusAlphaChannel);
81 __m128i finalAG = __lsx_vadd_h(srcVectorAGalpha, dstVectorAGoneMinusAlphalpha);
82 finalAG = __lsx_vadd_h(finalAG, __lsx_vsrli_h(finalAG, 8));
83 finalAG = __lsx_vadd_h(finalAG, half);
84 finalAG = __lsx_vandn_v(colorMask, finalAG);
87 __m128i srcVectorRB = __lsx_vand_v(srcVector, colorMask);
88 __m128i dstVectorRB = __lsx_vand_v(dstVector, colorMask);
89 __m128i srcVectorRBalpha = __lsx_vmul_h(srcVectorRB, alphaChannel);
90 __m128i dstVectorRBoneMinusAlphalpha = __lsx_vmul_h(dstVectorRB, oneMinusAlphaChannel);
91 __m128i finalRB = __lsx_vadd_h(srcVectorRBalpha, dstVectorRBoneMinusAlphalpha);
92 finalRB = __lsx_vadd_h(finalRB, __lsx_vsrli_h(finalRB, 8));
93 finalRB = __lsx_vadd_h(finalRB, half);
94 finalRB = __lsx_vsrli_h(finalRB, 8);
97 dstVector = __lsx_vor_v(finalAG, finalRB);
101inline static void Q_DECL_VECTORCALL
102BLEND_SOURCE_OVER_ARGB32_LSX_helper(quint32 *dst,
int x, __m128i srcVector,
103 __m128i nullVector, __m128i half, __m128i one,
104 __m128i colorMask, __m128i alphaMask)
106 const __m128i srcVectorAlpha = __lsx_vand_v(srcVector, alphaMask);
107 __m128i vseq = __lsx_vseq_w(srcVectorAlpha, alphaMask);
108 v4i32 vseq_res = (v4i32)__lsx_vmsknz_b(vseq);
109 if (vseq_res[0] == (0x0000ffff)) {
111 __lsx_vst(srcVector, &dst[x], 0);
113 __m128i vseq_n = __lsx_vseq_w(srcVectorAlpha, nullVector);
114 v4i32 vseq_n_res = (v4i32)__lsx_vmsknz_b(vseq_n);
115 if (vseq_n_res[0] != (0x0000ffff)) {
121 __m128i alphaChannel = __lsx_vsrli_w(srcVector, 24);
122 alphaChannel = __lsx_vor_v(alphaChannel, __lsx_vslli_w(alphaChannel, 16));
123 alphaChannel = __lsx_vsub_h(one, alphaChannel);
125 __m128i dstVector = __lsx_vld(&dst[x], 0);
126 BYTE_MUL_LSX(dstVector, alphaChannel, colorMask, half);
129 const __m128i result = __lsx_vadd_b(srcVector, dstVector);
130 __lsx_vst(result, &dst[x], 0);
146inline static void Q_DECL_VECTORCALL
147BLEND_SOURCE_OVER_ARGB32_LSX(quint32 *dst,
const quint32 *src,
int length)
152 ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) {
153 blend_pixel(dst[x], src[x]);
156 const __m128i alphaMask = __lsx_vreplgr2vr_w(0xff000000);
157 const __m128i nullVector = __lsx_vreplgr2vr_w(0);
158 const __m128i half = __lsx_vreplgr2vr_h(0x80);
159 const __m128i one = __lsx_vreplgr2vr_h(0xff);
160 const __m128i colorMask = __lsx_vreplgr2vr_w(0x00ff00ff);
162 for (; x < length-3; x += 4) {
163 const __m128i srcVector = __lsx_vld((
const __m128i *)&src[x], 0);
164 BLEND_SOURCE_OVER_ARGB32_LSX_helper(dst, x, srcVector, nullVector, half, one, colorMask, alphaMask);
166 SIMD_EPILOGUE(x, length, 3) {
167 blend_pixel(dst[x], src[x]);
176inline static void Q_DECL_VECTORCALL
177BLEND_SOURCE_OVER_ARGB32_WITH_CONST_ALPHA_LSX(quint32 *dst,
const quint32 *src,
int length, uint const_alpha)
181 ALIGNMENT_PROLOGUE_16BYTES(dst, x, length) {
182 blend_pixel(dst[x], src[x], const_alpha);
185 const __m128i nullVector = __lsx_vreplgr2vr_w(0);
186 const __m128i half = __lsx_vreplgr2vr_h(0x80);
187 const __m128i one = __lsx_vreplgr2vr_h(0xff);
188 const __m128i colorMask = __lsx_vreplgr2vr_w(0x00ff00ff);
189 const __m128i constAlphaVector = __lsx_vreplgr2vr_h(const_alpha);
191 for (; x < length-3; x += 4) {
192 __m128i srcVector = __lsx_vld((
const __m128i *)&src[x], 0);
193 __m128i vseq = __lsx_vseq_w(srcVector, nullVector);
194 v4i32 vseq_res = (v4i32)__lsx_vmsknz_b(vseq);
195 if (vseq_res[0] != 0x0000ffff) {
196 BYTE_MUL_LSX(srcVector, constAlphaVector, colorMask, half);
198 __m128i alphaChannel = __lsx_vsrli_w(srcVector, 24);
199 alphaChannel = __lsx_vor_v(alphaChannel, __lsx_vslli_w(alphaChannel, 16));
200 alphaChannel = __lsx_vsub_h(one, alphaChannel);
202 __m128i dstVector = __lsx_vld((__m128i *)&dst[x], 0);
203 BYTE_MUL_LSX(dstVector, alphaChannel, colorMask, half);
205 const __m128i result = __lsx_vadd_b(srcVector, dstVector);
206 __lsx_vst(result, &dst[x], 0);
209 SIMD_EPILOGUE(x, length, 3) {
210 blend_pixel(dst[x], src[x], const_alpha);
221static __m128 __lsx_vreplfr2vr_s(
float val)
223 FloatInt fi_tmpval = {.f = val};
224 return (__m128)__lsx_vreplgr2vr_w(fi_tmpval.i);
227Q_ALWAYS_INLINE __m128 Q_DECL_VECTORCALL reciprocal_mul_ps(
const __m128 a,
float mul)
229 __m128 ia = __lsx_vfrecip_s(a);
231 ia = __lsx_vfsub_s(__lsx_vfadd_s(ia, ia), __lsx_vfmul_s(ia, __lsx_vfmul_s(ia, a)));
232 ia = __lsx_vfmul_s(ia, __lsx_vreplfr2vr_s(mul));
236inline QRgb qUnpremultiply_lsx(QRgb p)
238 const uint alpha = qAlpha(p);
243 const __m128 va = __lsx_vffint_s_w(__lsx_vreplgr2vr_w(alpha));
244 __m128 via = reciprocal_mul_ps(va, 255.0f);
245 const __m128i shuffleMask = (__m128i)(v16i8){0,16,16,16,1,16,16,16,2,16,16,16,3,16,16,16};
246 __m128i vl = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(p), shuffleMask);
247 vl = __lsx_vftintrne_w_s(__lsx_vfmul_s(__lsx_vffint_s_w(vl), via));
248 vl = __lsx_vmaxi_w(vl, 0);
249 vl = __lsx_vpickev_h(__lsx_vsat_wu(vl, 15), __lsx_vsat_wu(vl, 15));
250 vl = __lsx_vinsgr2vr_h(vl, alpha, 3);
251 vl = __lsx_vpickev_b(__lsx_vsat_hu(vl, 7), __lsx_vsat_hu(vl, 7));
252 return __lsx_vpickve2gr_w(vl, 0);
255template<
enum QtPixelOrder PixelOrder>
256inline uint qConvertArgb32ToA2rgb30_lsx(QRgb p)
258 const uint alpha = qAlpha(p);
260 return qConvertRgb32ToRgb30<PixelOrder>(p);
263 Q_CONSTEXPR
float mult = 1023.0f / (255 >> 6);
264 const uint newalpha = (alpha >> 6);
265 const __m128 va = __lsx_vffint_s_w(__lsx_vreplgr2vr_w(alpha));
266 __m128 via = reciprocal_mul_ps(va, mult * newalpha);
267 const __m128i shuffleMask = (__m128i)(v16i8){0,16,16,16,1,16,16,16,2,16,16,16,3,16,16,16};
268 __m128i vl = __lsx_vshuf_b(__lsx_vldi(0), __lsx_vreplgr2vr_w(p), shuffleMask);
269 vl = __lsx_vftintrne_w_s(__lsx_vfmul_s(__lsx_vffint_s_w(vl), via));
270 vl = __lsx_vmaxi_w(vl, 0);
271 vl = __lsx_vpickev_h(__lsx_vsat_wu(vl, 15), __lsx_vsat_wu(vl, 15));
272 uint rgb30 = (newalpha << 30);
273 rgb30 |= ((uint)__lsx_vpickve2gr_h(vl, 1)) << 10;
274 if (PixelOrder == PixelOrderRGB) {
275 rgb30 |= ((uint)__lsx_vpickve2gr_h(vl, 2)) << 20;
276 rgb30 |= ((uint)__lsx_vpickve2gr_h(vl, 0));
278 rgb30 |= ((uint)__lsx_vpickve2gr_h(vl, 0)) << 20;
279 rgb30 |= ((uint)__lsx_vpickve2gr_h(vl, 2));
284template<
enum QtPixelOrder PixelOrder>
285inline uint qConvertRgba64ToRgb32_lsx(QRgba64 p)
287 if (p.isTransparent())
289 __m128i vl = __lsx_vilvl_d(__lsx_vldi(0), __lsx_vldrepl_d(&p, 0));
291 const __m128 va = __lsx_vffint_s_w(__lsx_vreplgr2vr_w(p.alpha()));
292 __m128 via = reciprocal_mul_ps(va, 65535.0f);
293 vl = __lsx_vilvl_h(__lsx_vldi(0), vl);
294 vl = __lsx_vftintrne_w_s(__lsx_vfmul_s(__lsx_vffint_s_w(vl) , via));
295 vl = __lsx_vmaxi_w(vl, 0);
296 vl = __lsx_vpickev_h(__lsx_vsat_wu(vl, 15), __lsx_vsat_wu(vl, 15));
297 vl = __lsx_vinsgr2vr_h(vl, p.alpha(), 3);
299 if (PixelOrder == PixelOrderBGR){
300 const __m128i shuffleMask = (__m128i)(v8i16){2, 1, 0, 3, 4, 5, 6, 7};
301 vl = __lsx_vshuf_h(shuffleMask, __lsx_vldi(0), vl);
303 vl = __lsx_vilvl_h(__lsx_vldi(0), vl);
304 vl = __lsx_vadd_w(vl, __lsx_vreplgr2vr_w(128));
305 vl = __lsx_vsub_w(vl, __lsx_vsrli_w(vl, 8));
306 vl = __lsx_vsrli_w(vl, 8);
307 vl = __lsx_vpickev_h(__lsx_vsat_w(vl, 15), __lsx_vsat_w(vl, 15));
308 __m128i tmp = __lsx_vmaxi_h(vl, 0);
309 vl = __lsx_vpickev_b(__lsx_vsat_hu(tmp, 7), __lsx_vsat_hu(tmp, 7));
310 return __lsx_vpickve2gr_w(vl, 0);