Qt
Internal/Contributor docs for the Qt SDK. <b>Note:</b> These are NOT official API docs; those are found <a href='https://doc.qt.io/'>here</a>.
Loading...
Searching...
No Matches
qendian.cpp
Go to the documentation of this file.
1// Copyright (C) 2016 The Qt Company Ltd.
2// Copyright (C) 2018 Intel Corporation.
3// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
4
5#include "qendian.h"
6
7#include "qalgorithms.h"
8#include <private/qsimd_p.h>
9
11
730#if defined(__SSSE3__)
731using ShuffleMask = uchar[16];
732alignas(16) static const ShuffleMask shuffleMasks[3] = {
733 // 16-bit
734 {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
735 // 32-bit
736 {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
737 // 64-bit
738 {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}
739};
740
741static size_t sseSwapLoop(const uchar *src, size_t bytes, uchar *dst,
742 const __m128i *shuffleMaskPtr) noexcept
743{
744 size_t i = 0;
745 const __m128i shuffleMask = _mm_load_si128(shuffleMaskPtr);
746
747# ifdef __AVX2__
748 const __m256i shuffleMask256 = _mm256_inserti128_si256(_mm256_castsi128_si256(shuffleMask), shuffleMask, 1);
749 for ( ; i + sizeof(__m256i) <= bytes; i += sizeof(__m256i)) {
750 __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + i));
751 data = _mm256_shuffle_epi8(data, shuffleMask256);
752 _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst + i), data);
753 }
754# else
755 for ( ; i + 2 * sizeof(__m128i) <= bytes; i += 2 * sizeof(__m128i)) {
756 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i));
757 __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i) + 1);
758 data1 = _mm_shuffle_epi8(data1, shuffleMask);
759 data2 = _mm_shuffle_epi8(data2, shuffleMask);
760 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i), data1);
761 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i) + 1, data2);
762 }
763# endif
764
765 if (i + sizeof(__m128i) <= bytes) {
766 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i));
767 data = _mm_shuffle_epi8(data, shuffleMask);
768 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i), data);
769 i += sizeof(__m128i);
770 }
771
772 return i;
773}
774
775template <typename T> static Q_ALWAYS_INLINE
776size_t simdSwapLoop(const uchar *src, size_t bytes, uchar *dst) noexcept
777{
778 auto shuffleMaskPtr = reinterpret_cast<const __m128i *>(shuffleMasks[0]);
779 shuffleMaskPtr += qCountTrailingZeroBits(sizeof(T)) - 1;
780 size_t i = sseSwapLoop(src, bytes, dst, shuffleMaskPtr);
781
782 // epilogue
783 for (size_t _i = 0; i < bytes && _i < sizeof(__m128i); i += sizeof(T), _i += sizeof(T))
784 qbswap(qFromUnaligned<T>(src + i), dst + i);
785
786 // return the total, so the bswapLoop below does nothing
787 return bytes;
788}
789#elif defined(__SSE2__)
790template <typename T> static
791size_t simdSwapLoop(const uchar *, size_t, uchar *) noexcept
792{
793 // no generic version: we can't do 32- and 64-bit swaps easily,
794 // so we won't try
795 return 0;
796}
797
798template <> size_t simdSwapLoop<quint16>(const uchar *src, size_t bytes, uchar *dst) noexcept
799{
800 auto swapEndian = [](__m128i &data) {
801 __m128i lows = _mm_srli_epi16(data, 8);
802 __m128i highs = _mm_slli_epi16(data, 8);
803 data = _mm_xor_si128(lows, highs);
804 };
805
806 size_t i = 0;
807 for ( ; i + 2 * sizeof(__m128i) <= bytes; i += 2 * sizeof(__m128i)) {
808 __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i));
809 __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i) + 1);
810 swapEndian(data1);
811 swapEndian(data2);
812 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i), data1);
813 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i) + 1, data2);
814 }
815
816 if (i + sizeof(__m128i) <= bytes) {
817 __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i));
818 swapEndian(data);
819 _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i), data);
820 i += sizeof(__m128i);
821 }
822
823 // epilogue
824 for (size_t _i = 0 ; i < bytes && _i < sizeof(__m128i); i += sizeof(quint16), _i += sizeof(quint16))
825 qbswap(qFromUnaligned<quint16>(src + i), dst + i);
826
827 // return the total, so the bswapLoop below does nothing
828 return bytes;
829}
830#else
831template <typename T> static Q_ALWAYS_INLINE
832size_t simdSwapLoop(const uchar *, size_t, uchar *) noexcept
833{
834 return 0;
835}
836#endif
837
838template <typename T> static Q_ALWAYS_INLINE
839void *bswapLoop(const uchar *src, size_t n, uchar *dst) noexcept
840{
841 // Buffers cannot partially overlap: either they're identical or totally
842 // disjoint (note: they can be adjacent).
843 if (src != dst) {
846 if (s < d)
847 Q_ASSERT(s + n <= d);
848 else
849 Q_ASSERT(d + n <= s);
850 }
851
852 size_t i = simdSwapLoop<T>(src, n, dst);
853
854 for (; i < n; i += sizeof(T))
855 qbswap(qFromUnaligned<T>(src + i), dst + i);
856 return dst + i;
857}
858
859template<>
860void *qbswap<2>(const void *source, qsizetype n, void *dest) noexcept
861{
862 const uchar *src = reinterpret_cast<const uchar *>(source);
863 uchar *dst = reinterpret_cast<uchar *>(dest);
864
865 return bswapLoop<quint16>(src, n << 1, dst);
866}
867
868template<>
869void *qbswap<4>(const void *source, qsizetype n, void *dest) noexcept
870{
871 const uchar *src = reinterpret_cast<const uchar *>(source);
872 uchar *dst = reinterpret_cast<uchar *>(dest);
873
874 return bswapLoop<quint32>(src, n << 2, dst);
875}
876
877template<>
878void *qbswap<8>(const void *source, qsizetype n, void *dest) noexcept
879{
880 const uchar *src = reinterpret_cast<const uchar *>(source);
881 uchar *dst = reinterpret_cast<uchar *>(dest);
882
883 return bswapLoop<quint64>(src, n << 3, dst);
884}
885
Combined button and popup list for selecting options.
constexpr uint qCountTrailingZeroBits(quint32 v) noexcept
#define Q_ALWAYS_INLINE
void * qbswap< 8 >(const void *source, qsizetype n, void *dest) noexcept
Definition qendian.cpp:878
void * qbswap< 4 >(const void *source, qsizetype n, void *dest) noexcept
Definition qendian.cpp:869
static Q_ALWAYS_INLINE size_t simdSwapLoop(const uchar *, size_t, uchar *) noexcept
Definition qendian.cpp:832
static Q_ALWAYS_INLINE void * bswapLoop(const uchar *src, size_t n, uchar *dst) noexcept
Definition qendian.cpp:839
void * qbswap< 2 >(const void *source, qsizetype n, void *dest) noexcept
Definition qendian.cpp:860
constexpr T qbswap(T source)
Definition qendian.h:103
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLenum src
GLenum GLenum dst
GLfloat n
GLsizei GLsizei GLchar * source
GLdouble s
[6]
Definition qopenglext.h:235
#define Q_ASSERT(cond)
Definition qrandom.cpp:47
unsigned char uchar
Definition qtypes.h:32
unsigned short quint16
Definition qtypes.h:48
size_t quintptr
Definition qtypes.h:167
ptrdiff_t qsizetype
Definition qtypes.h:165