d7/def/qendian_8cpp_source.html

// Copyright (C) 2016 The Qt Company Ltd.

// Copyright (C) 2018 Intel Corporation.

// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only


#include "qendian.h"


#include "qalgorithms.h"

#include <private/qsimd_p.h>


QT_BEGIN_NAMESPACE


#if defined(__SSSE3__)

using ShuffleMask = uchar[16];

alignas(16) static const ShuffleMask shuffleMasks[3] = {

    // 16-bit

    {1, 0, 3, 2,  5, 4, 7, 6,  9, 8, 11, 10,  13, 12, 15, 14},

    // 32-bit

    {3, 2, 1, 0,  7, 6, 5, 4,  11, 10, 9, 8,  15, 14, 13, 12},

    // 64-bit

    {7, 6, 5, 4, 3, 2, 1, 0,   15, 14, 13, 12, 11, 10, 9, 8}

};


static size_t sseSwapLoop(const uchar *src, size_t bytes, uchar *dst,

                          const __m128i *shuffleMaskPtr) noexcept

{

    size_t i = 0;

    const __m128i shuffleMask = _mm_load_si128(shuffleMaskPtr);


#  ifdef __AVX2__

    const __m256i shuffleMask256 = _mm256_inserti128_si256(_mm256_castsi128_si256(shuffleMask), shuffleMask, 1);

    for ( ; i + sizeof(__m256i) <= bytes; i += sizeof(__m256i)) {

        __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + i));

        data = _mm256_shuffle_epi8(data, shuffleMask256);

        _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst + i), data);

    }

#  else

    for ( ; i + 2 * sizeof(__m128i) <= bytes; i += 2 * sizeof(__m128i)) {

        __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i));

        __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i) + 1);

        data1 = _mm_shuffle_epi8(data1, shuffleMask);

        data2 = _mm_shuffle_epi8(data2, shuffleMask);

        _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i), data1);

        _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i) + 1, data2);

    }

#  endif


    if (i + sizeof(__m128i) <= bytes) {

        __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i));

        data = _mm_shuffle_epi8(data, shuffleMask);

        _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i), data);

        i += sizeof(__m128i);

    }


    return i;

}


template <typename T> static Q_ALWAYS_INLINE

size_t simdSwapLoop(const uchar *src, size_t bytes, uchar *dst) noexcept

{

    auto shuffleMaskPtr = reinterpret_cast<const __m128i *>(shuffleMasks[0]);

    shuffleMaskPtr += qCountTrailingZeroBits(sizeof(T)) - 1;

    size_t i = sseSwapLoop(src, bytes, dst, shuffleMaskPtr);


    // epilogue

    for (size_t _i = 0; i < bytes && _i < sizeof(__m128i); i += sizeof(T), _i += sizeof(T))

        qbswap(qFromUnaligned<T>(src + i), dst + i);


    // return the total, so the bswapLoop below does nothing

    return bytes;

}

#elif defined(__SSE2__)

template <typename T> static

size_t simdSwapLoop(const uchar *, size_t, uchar *) noexcept

{

    // no generic version: we can't do 32- and 64-bit swaps easily,

    // so we won't try

    return 0;

}


template <> size_t simdSwapLoop<quint16>(const uchar *src, size_t bytes, uchar *dst) noexcept

{

    auto swapEndian = [](__m128i &data) {

        __m128i lows = _mm_srli_epi16(data, 8);

        __m128i highs = _mm_slli_epi16(data, 8);

        data = _mm_xor_si128(lows, highs);

    };


    size_t i = 0;

    for ( ; i + 2 * sizeof(__m128i) <= bytes; i += 2 * sizeof(__m128i)) {

        __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i));

        __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i) + 1);

        swapEndian(data1);

        swapEndian(data2);

        _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i), data1);

        _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i) + 1, data2);

    }


    if (i + sizeof(__m128i) <= bytes) {

        __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i));

        swapEndian(data);

        _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i), data);

        i += sizeof(__m128i);

    }


    // epilogue

    for (size_t _i = 0 ; i < bytes && _i < sizeof(__m128i); i += sizeof(quint16), _i += sizeof(quint16))

        qbswap(qFromUnaligned<quint16>(src + i), dst + i);


    // return the total, so the bswapLoop below does nothing

    return bytes;

}

#else

template <typename T> static Q_ALWAYS_INLINE


size_t simdSwapLoop(const uchar *, size_t, uchar *) noexcept

{

    return 0;

}


#endif


template <typename T> static Q_ALWAYS_INLINE


void *bswapLoop(const uchar *src, size_t n, uchar *dst) noexcept

{

    // Buffers cannot partially overlap: either they're identical or totally

    // disjoint (note: they can be adjacent).

    if (src != dst) {

        quintptr s = quintptr(src);

        quintptr d = quintptr(dst);

        if (s < d)

            Q_ASSERT(s + n <= d);

        else

            Q_ASSERT(d + n <= s);

    }


    size_t i = simdSwapLoop<T>(src, n, dst);


    for (; i < n; i += sizeof(T))

        qbswap(qFromUnaligned<T>(src + i), dst + i);

    return dst + i;

}


template<>


void *qbswap<2>(const void *source, qsizetype n, void *dest) noexcept

{

    const uchar *src = reinterpret_cast<const uchar *>(source);

    uchar *dst = reinterpret_cast<uchar *>(dest);


    return bswapLoop<quint16>(src, n << 1, dst);

}


template<>


void *qbswap<4>(const void *source, qsizetype n, void *dest) noexcept

{

    const uchar *src = reinterpret_cast<const uchar *>(source);

    uchar *dst = reinterpret_cast<uchar *>(dest);


    return bswapLoop<quint32>(src, n << 2, dst);

}


template<>


void *qbswap<8>(const void *source, qsizetype n, void *dest) noexcept

{

    const uchar *src = reinterpret_cast<const uchar *>(source);

    uchar *dst = reinterpret_cast<uchar *>(dest);


    return bswapLoop<quint64>(src, n << 3, dst);

}


QT_END_NAMESPACE

i
i
[1]
Definition doc_src_containers.cpp:169

QT_BEGIN_NAMESPACE
Combined button and popup list for selecting options.
Definition qstandardpaths_haiku.cpp:21

QT_END_NAMESPACE
Definition qsharedpointer.cpp:1590

qalgorithms.h

qCountTrailingZeroBits
constexpr uint qCountTrailingZeroBits(quint32 v) noexcept
Definition qalgorithms.h:319

Q_ALWAYS_INLINE
#define Q_ALWAYS_INLINE
Definition qcompilerdetection.h:1318

qbswap< 8 >
void * qbswap< 8 >(const void *source, qsizetype n, void *dest) noexcept
Definition qendian.cpp:878

qbswap< 4 >
void * qbswap< 4 >(const void *source, qsizetype n, void *dest) noexcept
Definition qendian.cpp:869

simdSwapLoop
static Q_ALWAYS_INLINE size_t simdSwapLoop(const uchar *, size_t, uchar *) noexcept
Definition qendian.cpp:832

bswapLoop
static Q_ALWAYS_INLINE void * bswapLoop(const uchar *src, size_t n, uchar *dst) noexcept
Definition qendian.cpp:839

qbswap< 2 >
void * qbswap< 2 >(const void *source, qsizetype n, void *dest) noexcept
Definition qendian.cpp:860

qendian.h

qbswap
constexpr T qbswap(T source)
Definition qendian.h:103

data
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
Definition qopengles2ext.h:206

src
GLenum src
Definition qopengles2ext.h:335

dst
GLenum GLenum dst
Definition qopengles2ext.h:335

n
GLfloat n
Definition qopengles2ext.h:795

source
GLsizei GLsizei GLchar * source
Definition qopengles2ext.h:952

s
GLdouble s
[6]
Definition qopenglext.h:235

Q_ASSERT
#define Q_ASSERT(cond)
Definition qrandom.cpp:47

uchar
unsigned char uchar
Definition qtypes.h:32

quint16
unsigned short quint16
Definition qtypes.h:48

quintptr
size_t quintptr
Definition qtypes.h:167

qsizetype
ptrdiff_t qsizetype
Definition qtypes.h:165

d
double d
Definition src_corelib_text_qlocale.cpp:9