From b6d2c9685154a6bed5c42d90af39213e9c274b59 Mon Sep 17 00:00:00 2001 From: Tim Rowley Date: Tue, 31 May 2016 20:01:40 -0600 Subject: [PATCH] swr: [rasterizer] add support for building avx512 version Currently, most code paths between AVX2 and AVX512 are identical (see changes to knobs.h). Reviewed-by: Bruce Cherniak --- .../drivers/swr/rasterizer/common/simdintrin.h | 4 ++-- .../drivers/swr/rasterizer/core/format_types.h | 8 ++++---- src/gallium/drivers/swr/rasterizer/core/knobs.h | 15 ++++++++++----- .../drivers/swr/rasterizer/memory/Convert.h | 4 ++-- .../drivers/swr/rasterizer/memory/StoreTile.cpp | 4 ++-- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h index 5ec1f719348..cc29b5d6a93 100644 --- a/src/gallium/drivers/swr/rasterizer/common/simdintrin.h +++ b/src/gallium/drivers/swr/rasterizer/common/simdintrin.h @@ -1002,7 +1002,7 @@ static INLINE simdscalar _simd_abs_ps(simdscalar a) INLINE UINT pdep_u32(UINT a, UINT mask) { -#if KNOB_ARCH==KNOB_ARCH_AVX2 +#if KNOB_ARCH >= KNOB_ARCH_AVX2 return _pdep_u32(a, mask); #else UINT result = 0; @@ -1035,7 +1035,7 @@ UINT pdep_u32(UINT a, UINT mask) INLINE UINT pext_u32(UINT a, UINT mask) { -#if KNOB_ARCH==KNOB_ARCH_AVX2 +#if KNOB_ARCH >= KNOB_ARCH_AVX2 return _pext_u32(a, mask); #else UINT result = 0; diff --git a/src/gallium/drivers/swr/rasterizer/core/format_types.h b/src/gallium/drivers/swr/rasterizer/core/format_types.h index afb63378c0d..6612c83beb4 100644 --- a/src/gallium/drivers/swr/rasterizer/core/format_types.h +++ b/src/gallium/drivers/swr/rasterizer/core/format_types.h @@ -98,7 +98,7 @@ struct PackTraits<8, false> __m256i result = _mm256_castsi128_si256(resLo); result = _mm256_insertf128_si256(result, resHi, 1); return _mm256_castsi256_ps(result); -#elif KNOB_ARCH==KNOB_ARCH_AVX2 +#elif KNOB_ARCH>=KNOB_ARCH_AVX2 return _mm256_castsi256_ps(_mm256_cvtepu8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in)))); #endif #else @@ -161,7 +161,7 @@ struct PackTraits<8, true> __m256i result = _mm256_castsi128_si256(resLo); result = _mm256_insertf128_si256(result, resHi, 1); return _mm256_castsi256_ps(result); -#elif KNOB_ARCH==KNOB_ARCH_AVX2 +#elif KNOB_ARCH>=KNOB_ARCH_AVX2 return _mm256_castsi256_ps(_mm256_cvtepi8_epi32(_mm_castps_si128(_mm256_castps256_ps128(in)))); #endif #else @@ -223,7 +223,7 @@ struct PackTraits<16, false> __m256i result = _mm256_castsi128_si256(resLo); result = _mm256_insertf128_si256(result, resHi, 1); return _mm256_castsi256_ps(result); -#elif KNOB_ARCH==KNOB_ARCH_AVX2 +#elif KNOB_ARCH>=KNOB_ARCH_AVX2 return _mm256_castsi256_ps(_mm256_cvtepu16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in)))); #endif #else @@ -285,7 +285,7 @@ struct PackTraits<16, true> __m256i result = _mm256_castsi128_si256(resLo); result = _mm256_insertf128_si256(result, resHi, 1); return _mm256_castsi256_ps(result); -#elif KNOB_ARCH==KNOB_ARCH_AVX2 +#elif KNOB_ARCH>=KNOB_ARCH_AVX2 return _mm256_castsi256_ps(_mm256_cvtepi16_epi32(_mm_castps_si128(_mm256_castps256_ps128(in)))); #endif #else diff --git a/src/gallium/drivers/swr/rasterizer/core/knobs.h b/src/gallium/drivers/swr/rasterizer/core/knobs.h index 55a22a67f4c..2629276ed59 100644 --- a/src/gallium/drivers/swr/rasterizer/core/knobs.h +++ b/src/gallium/drivers/swr/rasterizer/core/knobs.h @@ -52,11 +52,16 @@ #define KNOB_SIMD_WIDTH 8 #define KNOB_SIMD_BYTES 32 #elif (KNOB_ARCH == KNOB_ARCH_AVX512) -#define KNOB_ARCH_ISA AVX512F -#define KNOB_ARCH_STR "AVX512" -#define KNOB_SIMD_WIDTH 16 -#define KNOB_SIMD_BYTES 64 -#error "AVX512 not yet supported" +#define KNOB_ARCH_ISA AVX2 +#define KNOB_ARCH_STR "AVX2" +#define KNOB_SIMD_WIDTH 8 +#define KNOB_SIMD_BYTES 32 +// Disable AVX512 for now... +//#define KNOB_ARCH_ISA AVX512F +//#define KNOB_ARCH_STR "AVX512" +//#define KNOB_SIMD_WIDTH 16 +//#define KNOB_SIMD_BYTES 64 +//#error "AVX512 not yet supported" #else #error "Unknown architecture" #endif diff --git a/src/gallium/drivers/swr/rasterizer/memory/Convert.h b/src/gallium/drivers/swr/rasterizer/memory/Convert.h index 42b973c13fc..b790d35e49d 100644 --- a/src/gallium/drivers/swr/rasterizer/memory/Convert.h +++ b/src/gallium/drivers/swr/rasterizer/memory/Convert.h @@ -336,7 +336,7 @@ static void ConvertPixelFromFloat( // Convert from 32-bit float to 16-bit float using _mm_cvtps_ph // @todo 16bit float instruction support is orthogonal to avx support. need to // add check for F16C support instead. -#if KNOB_ARCH == KNOB_ARCH_AVX2 +#if KNOB_ARCH >= KNOB_ARCH_AVX2 __m128 src128 = _mm_set1_ps(src); __m128i srci128 = _mm_cvtps_ph(src128, _MM_FROUND_TRUNC); UINT value = _mm_extract_epi16(srci128, 0); @@ -519,7 +519,7 @@ INLINE static void ConvertPixelToFloat( float dst; if (FormatTraits::GetBPC(comp) == 16) { -#if KNOB_ARCH == KNOB_ARCH_AVX2 +#if KNOB_ARCH >= KNOB_ARCH_AVX2 // Convert from 16-bit float to 32-bit float using _mm_cvtph_ps // @todo 16bit float instruction support is orthogonal to avx support. need to // add check for F16C support instead. diff --git a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp index 2ab29362a52..8a26ff63595 100644 --- a/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp +++ b/src/gallium/drivers/swr/rasterizer/memory/StoreTile.cpp @@ -454,7 +454,7 @@ INLINE static void FlatConvert(const uint8_t* pSrc, uint8_t* pDst, uint8_t* pDst __m256i final = _mm256_castsi128_si256(vRow00); final = _mm256_insertf128_si256(final, vRow10, 1); -#elif KNOB_ARCH == KNOB_ARCH_AVX2 +#elif KNOB_ARCH >= KNOB_ARCH_AVX2 // logic is as above, only wider src1 = _mm256_slli_si256(src1, 1); @@ -542,7 +542,7 @@ INLINE static void FlatConvertNoAlpha(const uint8_t* pSrc, uint8_t* pDst, uint8_ __m256i final = _mm256_castsi128_si256(vRow00); final = _mm256_insertf128_si256(final, vRow10, 1); -#elif KNOB_ARCH == KNOB_ARCH_AVX2 +#elif KNOB_ARCH >= KNOB_ARCH_AVX2 // logic is as above, only wider src1 = _mm256_slli_si256(src1, 1);