6#ifdef QT_COMPILER_SUPPORTS_SSE2
12template<
int a,
int r,
int b,
int g>
13void convert_to_ARGB32_sse2(
const QVideoFrame &frame, uchar *output)
15 FETCH_INFO_PACKED(frame)
16 MERGE_LOOPS(width, height, stride, 4)
17 quint32 *argb =
reinterpret_cast<quint32*>(output);
19 const __m128i zero = _mm_setzero_si128();
20#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
21 const uchar shuffle = _MM_SHUFFLE(a, r, b, g);
23 const uchar shuffle = _MM_SHUFFLE(3-a, 3-r, 3-b, 3-g);
26 using Pixel =
const ArgbPixel<a, r, g, b>;
28 for (
int y = 0; y < height; ++y) {
29 auto *pixel =
reinterpret_cast<
const Pixel *>(src);
32 QT_MEDIA_ALIGN(16, argb, x, width) {
33 *argb = pixel->convert();
38 for (; x < width - 3; x += 4) {
39 __m128i pixelData = _mm_loadu_si128(
reinterpret_cast<
const __m128i*>(pixel));
41 __m128i lowPixels = _mm_unpacklo_epi8(pixelData, zero);
42 __m128i highPixels = _mm_unpackhi_epi8(pixelData, zero);
43 lowPixels = _mm_shufflelo_epi16(_mm_shufflehi_epi16(lowPixels, shuffle), shuffle);
44 highPixels = _mm_shufflelo_epi16(_mm_shufflehi_epi16(highPixels, shuffle), shuffle);
45 pixelData = _mm_packus_epi16(lowPixels, highPixels);
46 _mm_store_si128(
reinterpret_cast<__m128i*>(argb), pixelData);
51 for (; x < width; ++x) {
52 *argb = pixel->convert();
63void QT_FASTCALL qt_convert_ARGB8888_to_ARGB32_sse2(
const QVideoFrame &frame, uchar *output)
65 convert_to_ARGB32_sse2<0, 1, 2, 3>(frame, output);
68void QT_FASTCALL qt_convert_ABGR8888_to_ARGB32_sse2(
const QVideoFrame &frame, uchar *output)
70 convert_to_ARGB32_sse2<0, 3, 2, 1>(frame, output);
73void QT_FASTCALL qt_convert_RGBA8888_to_ARGB32_sse2(
const QVideoFrame &frame, uchar *output)
75 convert_to_ARGB32_sse2<3, 0, 1, 2>(frame, output);
78void QT_FASTCALL qt_convert_BGRA8888_to_ARGB32_sse2(
const QVideoFrame &frame, uchar *output)
80 convert_to_ARGB32_sse2<3, 2, 1, 0>(frame, output);
83void QT_FASTCALL qt_copy_pixels_with_mask_sse2(uint32_t *dst,
const uint32_t *src, size_t size, uint32_t mask)
85 const auto mask128 = _mm_set_epi32(mask, mask, mask, mask);
89 QT_MEDIA_ALIGN(16, dst, x, size)
90 *(dst++) = *(src++) | mask;
92 for (; x < size - (4 * 4 - 1); x += 4 * 4) {
93 const auto srcData0 = _mm_loadu_si128(
reinterpret_cast<
const __m128i *>(src));
94 const auto srcData1 = _mm_loadu_si128(
reinterpret_cast<
const __m128i *>(src += 4));
95 const auto srcData2 = _mm_loadu_si128(
reinterpret_cast<
const __m128i *>(src += 4));
96 const auto srcData3 = _mm_loadu_si128(
reinterpret_cast<
const __m128i *>(src += 4));
98 _mm_store_si128(
reinterpret_cast<__m128i *>(dst), _mm_or_si128(srcData0, mask128));
99 _mm_store_si128(
reinterpret_cast<__m128i *>(dst += 4), _mm_or_si128(srcData1, mask128));
100 _mm_store_si128(
reinterpret_cast<__m128i *>(dst += 4), _mm_or_si128(srcData2, mask128));
101 _mm_store_si128(
reinterpret_cast<__m128i *>(dst += 4), _mm_or_si128(srcData3, mask128));
107 for (; x < size - 3; x += 4) {
108 const auto srcData = _mm_loadu_si128(
reinterpret_cast<
const __m128i *>(src));
110 _mm_store_si128(
reinterpret_cast<__m128i *>(dst), _mm_or_si128(srcData, mask128));
117 for (; x < size; ++x)
118 *(dst++) = *(src++) | mask;