2017-10-16 17:50:09 +02:00
|
|
|
#pragma once
|
|
|
|
|
2018-05-19 08:26:25 +01:00
|
|
|
#ifndef _MSC_VER
|
2019-09-05 09:01:33 -07:00
|
|
|
#if defined(__WINE__) && defined(__clang__)
|
|
|
|
#pragma push_macro("_WIN32")
|
|
|
|
#undef _WIN32
|
|
|
|
#endif
|
2018-05-09 20:37:49 +02:00
|
|
|
#include <x86intrin.h>
|
2019-09-05 09:01:33 -07:00
|
|
|
#if defined(__WINE__) && defined(__clang__)
|
|
|
|
#pragma pop_macro("_WIN32")
|
|
|
|
#endif
|
2018-05-19 08:26:25 +01:00
|
|
|
#else
|
|
|
|
#include <intrin.h>
|
|
|
|
#endif
|
2018-05-09 20:37:49 +02:00
|
|
|
|
2019-04-04 05:48:42 +02:00
|
|
|
#include "util_likely.h"
|
|
|
|
|
2017-10-16 17:50:09 +02:00
|
|
|
namespace dxvk::bit {
|
|
|
|
|
2017-10-21 17:58:58 +02:00
|
|
|
template<typename T>
|
2017-11-07 15:10:38 +01:00
|
|
|
T extract(T value, uint32_t fst, uint32_t lst) {
|
2017-10-21 17:58:58 +02:00
|
|
|
return (value >> fst) & ~(~T(0) << (lst - fst + 1));
|
2017-10-16 17:50:09 +02:00
|
|
|
}
|
2019-05-09 16:37:45 +02:00
|
|
|
|
|
|
|
inline uint32_t popcntStep(uint32_t n, uint32_t mask, uint32_t shift) {
|
|
|
|
return (n & mask) + ((n & ~mask) >> shift);
|
|
|
|
}
|
2017-10-16 17:50:09 +02:00
|
|
|
|
2019-05-09 16:37:45 +02:00
|
|
|
inline uint32_t popcnt(uint32_t n) {
|
|
|
|
n = popcntStep(n, 0x55555555, 1);
|
|
|
|
n = popcntStep(n, 0x33333333, 2);
|
|
|
|
n = popcntStep(n, 0x0F0F0F0F, 4);
|
|
|
|
n = popcntStep(n, 0x00FF00FF, 8);
|
|
|
|
n = popcntStep(n, 0x0000FFFF, 16);
|
|
|
|
return n;
|
2017-11-07 15:10:38 +01:00
|
|
|
}
|
|
|
|
|
2018-05-09 20:09:09 +02:00
|
|
|
inline uint32_t tzcnt(uint32_t n) {
|
2019-08-06 22:22:58 +01:00
|
|
|
#if defined(_MSC_VER) && !defined(__clang__)
|
2018-05-19 08:26:25 +01:00
|
|
|
return _tzcnt_u32(n);
|
|
|
|
#elif defined(__BMI__)
|
2018-05-09 20:09:09 +02:00
|
|
|
return __tzcnt_u32(n);
|
2019-08-06 22:22:58 +01:00
|
|
|
#elif defined(__GNUC__) || defined(__clang__)
|
2018-05-09 20:09:09 +02:00
|
|
|
uint32_t res;
|
|
|
|
uint32_t tmp;
|
|
|
|
asm (
|
2018-07-09 13:31:34 +02:00
|
|
|
"mov $32, %1;"
|
|
|
|
"bsf %2, %0;"
|
2018-05-09 20:09:09 +02:00
|
|
|
"cmovz %1, %0;"
|
|
|
|
: "=&r" (res), "=&r" (tmp)
|
|
|
|
: "r" (n));
|
|
|
|
return res;
|
|
|
|
#else
|
|
|
|
uint32_t r = 31;
|
|
|
|
n &= -n;
|
|
|
|
r -= (n & 0x0000FFFF) ? 16 : 0;
|
|
|
|
r -= (n & 0x00FF00FF) ? 8 : 0;
|
|
|
|
r -= (n & 0x0F0F0F0F) ? 4 : 0;
|
|
|
|
r -= (n & 0x33333333) ? 2 : 0;
|
|
|
|
r -= (n & 0x55555555) ? 1 : 0;
|
|
|
|
return n != 0 ? r : 32;
|
|
|
|
#endif
|
2017-11-07 15:10:38 +01:00
|
|
|
}
|
2019-04-04 05:48:42 +02:00
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
uint32_t pack(T& dst, uint32_t& shift, T src, uint32_t count) {
|
|
|
|
constexpr uint32_t Bits = 8 * sizeof(T);
|
|
|
|
if (likely(shift < Bits))
|
|
|
|
dst |= src << shift;
|
|
|
|
shift += count;
|
|
|
|
return shift > Bits ? shift - Bits : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
template<typename T>
|
|
|
|
uint32_t unpack(T& dst, T src, uint32_t& shift, uint32_t count) {
|
|
|
|
constexpr uint32_t Bits = 8 * sizeof(T);
|
|
|
|
if (likely(shift < Bits))
|
|
|
|
dst = (src >> shift) & ((T(1) << count) - 1);
|
|
|
|
shift += count;
|
|
|
|
return shift > Bits ? shift - Bits : 0;
|
|
|
|
}
|
2019-10-07 17:19:32 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
* \brief Compares two aligned structs bit by bit
|
|
|
|
*
|
|
|
|
* \param [in] a First struct
|
|
|
|
* \param [in] b Second struct
|
|
|
|
* \returns \c true if the structs are equal
|
|
|
|
*/
|
|
|
|
template<typename T>
|
|
|
|
bool bcmpeq(const T* a, const T* b) {
|
|
|
|
static_assert(alignof(T) >= 16);
|
|
|
|
#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
|
|
|
|
auto ai = reinterpret_cast<const __m128i*>(a);
|
|
|
|
auto bi = reinterpret_cast<const __m128i*>(b);
|
|
|
|
|
|
|
|
size_t i = 0;
|
|
|
|
|
|
|
|
#if defined(__clang__)
|
|
|
|
#pragma nounroll
|
|
|
|
#elif defined(__GNUC__)
|
|
|
|
#pragma GCC unroll 0
|
|
|
|
#endif
|
|
|
|
|
|
|
|
for ( ; i < 2 * (sizeof(T) / 32); i += 2) {
|
|
|
|
__m128i eq0 = _mm_cmpeq_epi8(
|
|
|
|
_mm_load_si128(ai + i),
|
|
|
|
_mm_load_si128(bi + i));
|
|
|
|
__m128i eq1 = _mm_cmpeq_epi8(
|
|
|
|
_mm_load_si128(ai + i + 1),
|
|
|
|
_mm_load_si128(bi + i + 1));
|
|
|
|
__m128i eq = _mm_and_si128(eq0, eq1);
|
|
|
|
|
|
|
|
int mask = _mm_movemask_epi8(eq);
|
|
|
|
if (mask != 0xFFFF)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
for ( ; i < sizeof(T) / 16; i++) {
|
|
|
|
__m128i eq = _mm_cmpeq_epi8(
|
|
|
|
_mm_load_si128(ai + i),
|
|
|
|
_mm_load_si128(bi + i));
|
|
|
|
|
|
|
|
int mask = _mm_movemask_epi8(eq);
|
|
|
|
if (mask != 0xFFFF)
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
#else
|
|
|
|
return !std::memcmp(a, b, sizeof(T));
|
|
|
|
#endif
|
|
|
|
}
|
2017-11-07 15:10:38 +01:00
|
|
|
|
2019-09-05 09:01:33 -07:00
|
|
|
}
|