swr: [rasterizer] add support for building avx512 version
Currently, most code paths between AVX2 and AVX512 are identical (see changes to knobs.h). Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com>
This commit is contained in:
parent
695af2a7e2
commit
b6d2c96851
|
@ -1002,7 +1002,7 @@ static INLINE simdscalar _simd_abs_ps(simdscalar a)
|
|||
INLINE
|
||||
UINT pdep_u32(UINT a, UINT mask)
|
||||
{
|
||||
#if KNOB_ARCH==KNOB_ARCH_AVX2
|
||||
#if KNOB_ARCH >= KNOB_ARCH_AVX2
|
||||
return _pdep_u32(a, mask);
|
||||
#else
|
||||
UINT result = 0;
|
||||
|
@ -1035,7 +1035,7 @@ UINT pdep_u32(UINT a, UINT mask)
|
|||
INLINE
|
||||
UINT pext_u32(UINT a, UINT mask)
|
||||
{
|
||||
#if KNOB_ARCH==KNOB_ARCH_AVX2
|
||||
#if KNOB_ARCH >= KNOB_ARCH_AVX2
|
||||
return _pext_u32(a, mask);
|
||||
#else
|
||||
UINT result = 0;
|
||||
|
|
|
@ -98,7 +98,7 @@ struct PackTraits<8, false>
|
|||
__m256i result = _mm256_castsi128_si256(resLo);
|
||||
result = _mm256_insertf128_si256(result, resHi, 1);
|
||||
return _mm256_castsi256_ps(result);
|
||||
#elif KNOB_ARCH==KNOB_ARCH_AVX2
|
||||
#elif KNOB_ARCH>=KNOB_ARCH_AVX2
|
||||
return _mm256_castsi256_ps(_mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
|
||||
#endif
|
||||
#else
|
||||
|
@ -161,7 +161,7 @@ struct PackTraits<8, true>
|
|||
__m256i result = _mm256_castsi128_si256(resLo);
|
||||
result = _mm256_insertf128_si256(result, resHi, 1);
|
||||
return _mm256_castsi256_ps(result);
|
||||
#elif KNOB_ARCH==KNOB_ARCH_AVX2
|
||||
#elif KNOB_ARCH>=KNOB_ARCH_AVX2
|
||||
return _mm256_castsi256_ps(_mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
|
||||
#endif
|
||||
#else
|
||||
|
@ -223,7 +223,7 @@ struct PackTraits<16, false>
|
|||
__m256i result = _mm256_castsi128_si256(resLo);
|
||||
result = _mm256_insertf128_si256(result, resHi, 1);
|
||||
return _mm256_castsi256_ps(result);
|
||||
#elif KNOB_ARCH==KNOB_ARCH_AVX2
|
||||
#elif KNOB_ARCH>=KNOB_ARCH_AVX2
|
||||
return _mm256_castsi256_ps(_mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
|
||||
#endif
|
||||
#else
|
||||
|
@ -285,7 +285,7 @@ struct PackTraits<16, true>
|
|||
__m256i result = _mm256_castsi128_si256(resLo);
|
||||
result = _mm256_insertf128_si256(result, resHi, 1);
|
||||
return _mm256_castsi256_ps(result);
|
||||
#elif KNOB_ARCH==KNOB_ARCH_AVX2
|
||||
#elif KNOB_ARCH>=KNOB_ARCH_AVX2
|
||||
return _mm256_castsi256_ps(_mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in))));
|
||||
#endif
|
||||
#else
|
||||
|
|
|
@ -52,11 +52,16 @@
|
|||
#define KNOB_SIMD_WIDTH 8
|
||||
#define KNOB_SIMD_BYTES 32
|
||||
#elif (KNOB_ARCH == KNOB_ARCH_AVX512)
|
||||
#define KNOB_ARCH_ISA AVX512F
|
||||
#define KNOB_ARCH_STR "AVX512"
|
||||
#define KNOB_SIMD_WIDTH 16
|
||||
#define KNOB_SIMD_BYTES 64
|
||||
#error "AVX512 not yet supported"
|
||||
#define KNOB_ARCH_ISA AVX2
|
||||
#define KNOB_ARCH_STR "AVX2"
|
||||
#define KNOB_SIMD_WIDTH 8
|
||||
#define KNOB_SIMD_BYTES 32
|
||||
// Disable AVX512 for now...
|
||||
//#define KNOB_ARCH_ISA AVX512F
|
||||
//#define KNOB_ARCH_STR "AVX512"
|
||||
//#define KNOB_SIMD_WIDTH 16
|
||||
//#define KNOB_SIMD_BYTES 64
|
||||
//#error "AVX512 not yet supported"
|
||||
#else
|
||||
#error "Unknown architecture"
|
||||
#endif
|
||||
|
|
|
@ -336,7 +336,7 @@ static void ConvertPixelFromFloat(
|
|||
// Convert from 32-bit float to 16-bit float using _mm_cvtps_ph
|
||||
// @todo 16bit float instruction support is orthogonal to avx support. need to
|
||||
// add check for F16C support instead.
|
||||
#if KNOB_ARCH == KNOB_ARCH_AVX2
|
||||
#if KNOB_ARCH >= KNOB_ARCH_AVX2
|
||||
__m128 src128 = _mm_set1_ps(src);
|
||||
__m128i srci128 = _mm_cvtps_ph(src128, _MM_FROUND_TRUNC);
|
||||
UINT value = _mm_extract_epi16(srci128, 0);
|
||||
|
@ -519,7 +519,7 @@ INLINE static void ConvertPixelToFloat(
|
|||
float dst;
|
||||
if (FormatTraits<SrcFormat>::GetBPC(comp) == 16)
|
||||
{
|
||||
#if KNOB_ARCH == KNOB_ARCH_AVX2
|
||||
#if KNOB_ARCH >= KNOB_ARCH_AVX2
|
||||
// Convert from 16-bit float to 32-bit float using _mm_cvtph_ps
|
||||
// @todo 16bit float instruction support is orthogonal to avx support. need to
|
||||
// add check for F16C support instead.
|
||||
|
|
|
@ -454,7 +454,7 @@ INLINE static void FlatConvert(const uint8_t* pSrc, uint8_t* pDst, uint8_t* pDst
|
|||
__m256i final = _mm256_castsi128_si256(vRow00);
|
||||
final = _mm256_insertf128_si256(final, vRow10, 1);
|
||||
|
||||
#elif KNOB_ARCH == KNOB_ARCH_AVX2
|
||||
#elif KNOB_ARCH >= KNOB_ARCH_AVX2
|
||||
|
||||
// logic is as above, only wider
|
||||
src1 = _mm256_slli_si256(src1, 1);
|
||||
|
@ -542,7 +542,7 @@ INLINE static void FlatConvertNoAlpha(const uint8_t* pSrc, uint8_t* pDst, uint8_
|
|||
__m256i final = _mm256_castsi128_si256(vRow00);
|
||||
final = _mm256_insertf128_si256(final, vRow10, 1);
|
||||
|
||||
#elif KNOB_ARCH == KNOB_ARCH_AVX2
|
||||
#elif KNOB_ARCH >= KNOB_ARCH_AVX2
|
||||
|
||||
// logic is as above, only wider
|
||||
src1 = _mm256_slli_si256(src1, 1);
|
||||
|
|
Loading…
Reference in New Issue