gallivm: don't use pavg.b intrinsic on llvm >= 6.0
This intrinsic disppeared with llvm 6.0, using it ends up in segfaults
(due to llvm issuing call to NULL address in the jited shaders).
Add code doing the same thing as the autoupgrade code in llvm so it
can be matched and replaced back with a pavgb.
While here, also improve lp_test_format, so it tests both with and without
cache (as it was, it tested the cache versions only, whereas cache is
actually disabled in llvmpipe, and in any case even with it enabled
vertex and geometry shaders wouldn't use it). (Although at least for
the unorm8 uncached fetch, the code is still quite different to what
llvmpipe is using, since that would use unorm8x16 type, whereas
the test code is using unorm8x4 type, hence disabling some intrinsic
paths.)
Fixes: 6f4083143b
("gallivm: use llvm jit code for decoding s3tc")
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
This commit is contained in:
parent
a8d020c3dc
commit
f3b1acff48
|
@ -457,6 +457,50 @@ color_expand_565_to_8888(struct gallivm_state *gallivm,
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
* Average two byte vectors. (Will always round up.)
|
||||
*/
|
||||
static LLVMValueRef
|
||||
lp_build_pavgb(struct lp_build_context *bld8,
|
||||
LLVMValueRef v0,
|
||||
LLVMValueRef v1)
|
||||
{
|
||||
struct gallivm_state *gallivm = bld8->gallivm;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
assert(bld8->type.width == 8);
|
||||
assert(bld8->type.length == 16 || bld8->type.length == 32);
|
||||
if (HAVE_LLVM < 0x0600) {
|
||||
LLVMValueRef intrargs[2];
|
||||
char *intr_name = bld8->type.length == 32 ? "llvm.x86.avx2.pavg.b" :
|
||||
"llvm.x86.sse2.pavg.b";
|
||||
intrargs[0] = v0;
|
||||
intrargs[1] = v1;
|
||||
return lp_build_intrinsic(builder, intr_name,
|
||||
bld8->vec_type, intrargs, 2, 0);
|
||||
} else {
|
||||
/*
|
||||
* Must match llvm's autoupgrade of pavg.b intrinsic to be useful.
|
||||
* You better hope the backend code manages to detect the pattern, and
|
||||
* the pattern doesn't change there...
|
||||
*/
|
||||
struct lp_type type_ext = bld8->type;
|
||||
LLVMTypeRef vec_type_ext;
|
||||
LLVMValueRef res;
|
||||
LLVMValueRef ext_one;
|
||||
type_ext.width = 16;
|
||||
vec_type_ext = lp_build_vec_type(gallivm, type_ext);
|
||||
ext_one = lp_build_const_vec(gallivm, type_ext, 1);
|
||||
|
||||
v0 = LLVMBuildZExt(builder, v0, vec_type_ext, "");
|
||||
v1 = LLVMBuildZExt(builder, v1, vec_type_ext, "");
|
||||
res = LLVMBuildAdd(builder, v0, v1, "");
|
||||
res = LLVMBuildAdd(builder, res, ext_one, "");
|
||||
res = LLVMBuildLShr(builder, res, ext_one, "");
|
||||
res = LLVMBuildTrunc(builder, res, bld8->vec_type, "");
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate 1/3(v1-v0) + v0
|
||||
* and 2*1/3(v1-v0) + v0
|
||||
|
@ -602,13 +646,7 @@ s3tc_dxt1_full_to_rgba_aos(struct gallivm_state *gallivm,
|
|||
*/
|
||||
if ((util_cpu_caps.has_sse2 && n == 4) ||
|
||||
(util_cpu_caps.has_avx2 && n == 8)) {
|
||||
LLVMValueRef intrargs[2];
|
||||
char *intr_name = n == 8 ? "llvm.x86.avx2.pavg.b" :
|
||||
"llvm.x86.sse2.pavg.b";
|
||||
intrargs[0] = colors0;
|
||||
intrargs[1] = colors1;
|
||||
color2_2 = lp_build_intrinsic(builder, intr_name,
|
||||
bld8.vec_type, intrargs, 2, 0);
|
||||
color2_2 = lp_build_pavgb(&bld8, colors0, colors1);
|
||||
color2_2 = LLVMBuildBitCast(builder, color2_2, bld32.vec_type, "");
|
||||
}
|
||||
else {
|
||||
|
@ -1278,8 +1316,7 @@ s3tc_decode_block_dxt1(struct gallivm_state *gallivm,
|
|||
/* same interleave as for lerp23 - correct result in 2nd element */
|
||||
intrargs[1] = lp_build_interleave2(gallivm, type32, color01, color01, 0);
|
||||
intrargs[1] = LLVMBuildBitCast(builder, intrargs[1], bld8.vec_type, "");
|
||||
color2_2 = lp_build_intrinsic(builder, "llvm.x86.sse2.pavg.b",
|
||||
bld8.vec_type, intrargs, 2, 0);
|
||||
color2_2 = lp_build_pavgb(&bld8, intrargs[0], intrargs[1]);
|
||||
}
|
||||
else {
|
||||
LLVMValueRef v01, v0, v1, vhalf;
|
||||
|
|
|
@ -44,8 +44,6 @@
|
|||
|
||||
#include "lp_test.h"
|
||||
|
||||
#define USE_TEXTURE_CACHE 1
|
||||
|
||||
static struct lp_build_format_cache *cache_ptr;
|
||||
|
||||
void
|
||||
|
@ -80,7 +78,8 @@ typedef void
|
|||
static LLVMValueRef
|
||||
add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
|
||||
const struct util_format_description *desc,
|
||||
struct lp_type type)
|
||||
struct lp_type type,
|
||||
unsigned use_cache)
|
||||
{
|
||||
char name[256];
|
||||
LLVMContextRef context = gallivm->context;
|
||||
|
@ -114,7 +113,7 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
|
|||
i = LLVMGetParam(func, 2);
|
||||
j = LLVMGetParam(func, 3);
|
||||
|
||||
if (cache_ptr) {
|
||||
if (use_cache) {
|
||||
cache = LLVMGetParam(func, 4);
|
||||
}
|
||||
|
||||
|
@ -137,7 +136,8 @@ add_fetch_rgba_test(struct gallivm_state *gallivm, unsigned verbose,
|
|||
PIPE_ALIGN_STACK
|
||||
static boolean
|
||||
test_format_float(unsigned verbose, FILE *fp,
|
||||
const struct util_format_description *desc)
|
||||
const struct util_format_description *desc,
|
||||
unsigned use_cache)
|
||||
{
|
||||
LLVMContextRef context;
|
||||
struct gallivm_state *gallivm;
|
||||
|
@ -152,7 +152,8 @@ test_format_float(unsigned verbose, FILE *fp,
|
|||
context = LLVMContextCreate();
|
||||
gallivm = gallivm_create("test_module_float", context);
|
||||
|
||||
fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_float32_vec4_type());
|
||||
fetch = add_fetch_rgba_test(gallivm, verbose, desc,
|
||||
lp_float32_vec4_type(), use_cache);
|
||||
|
||||
gallivm_compile_module(gallivm);
|
||||
|
||||
|
@ -181,7 +182,7 @@ test_format_float(unsigned verbose, FILE *fp,
|
|||
|
||||
memset(unpacked, 0, sizeof unpacked);
|
||||
|
||||
fetch_ptr(unpacked, packed, j, i, cache_ptr);
|
||||
fetch_ptr(unpacked, packed, j, i, use_cache ? cache_ptr : NULL);
|
||||
|
||||
for(k = 0; k < 4; ++k) {
|
||||
if (util_double_inf_sign(test->unpacked[i][j][k]) != util_inf_sign(unpacked[k])) {
|
||||
|
@ -236,7 +237,8 @@ test_format_float(unsigned verbose, FILE *fp,
|
|||
PIPE_ALIGN_STACK
|
||||
static boolean
|
||||
test_format_unorm8(unsigned verbose, FILE *fp,
|
||||
const struct util_format_description *desc)
|
||||
const struct util_format_description *desc,
|
||||
unsigned use_cache)
|
||||
{
|
||||
LLVMContextRef context;
|
||||
struct gallivm_state *gallivm;
|
||||
|
@ -251,7 +253,8 @@ test_format_unorm8(unsigned verbose, FILE *fp,
|
|||
context = LLVMContextCreate();
|
||||
gallivm = gallivm_create("test_module_unorm8", context);
|
||||
|
||||
fetch = add_fetch_rgba_test(gallivm, verbose, desc, lp_unorm8_vec4_type());
|
||||
fetch = add_fetch_rgba_test(gallivm, verbose, desc,
|
||||
lp_unorm8_vec4_type(), use_cache);
|
||||
|
||||
gallivm_compile_module(gallivm);
|
||||
|
||||
|
@ -280,7 +283,7 @@ test_format_unorm8(unsigned verbose, FILE *fp,
|
|||
|
||||
memset(unpacked, 0, sizeof unpacked);
|
||||
|
||||
fetch_ptr(unpacked, packed, j, i, cache_ptr);
|
||||
fetch_ptr(unpacked, packed, j, i, use_cache ? cache_ptr : NULL);
|
||||
|
||||
match = TRUE;
|
||||
for(k = 0; k < 4; ++k) {
|
||||
|
@ -335,15 +338,16 @@ test_format_unorm8(unsigned verbose, FILE *fp,
|
|||
|
||||
static boolean
|
||||
test_one(unsigned verbose, FILE *fp,
|
||||
const struct util_format_description *format_desc)
|
||||
const struct util_format_description *format_desc,
|
||||
unsigned use_cache)
|
||||
{
|
||||
boolean success = TRUE;
|
||||
|
||||
if (!test_format_float(verbose, fp, format_desc)) {
|
||||
if (!test_format_float(verbose, fp, format_desc, use_cache)) {
|
||||
success = FALSE;
|
||||
}
|
||||
|
||||
if (!test_format_unorm8(verbose, fp, format_desc)) {
|
||||
if (!test_format_unorm8(verbose, fp, format_desc, use_cache)) {
|
||||
success = FALSE;
|
||||
}
|
||||
|
||||
|
@ -356,49 +360,52 @@ test_all(unsigned verbose, FILE *fp)
|
|||
{
|
||||
enum pipe_format format;
|
||||
boolean success = TRUE;
|
||||
unsigned use_cache;
|
||||
|
||||
#if USE_TEXTURE_CACHE
|
||||
cache_ptr = align_malloc(sizeof(struct lp_build_format_cache), 16);
|
||||
#endif
|
||||
|
||||
for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
|
||||
const struct util_format_description *format_desc;
|
||||
for (use_cache = 0; use_cache < 2; use_cache++) {
|
||||
for (format = 1; format < PIPE_FORMAT_COUNT; ++format) {
|
||||
const struct util_format_description *format_desc;
|
||||
|
||||
format_desc = util_format_description(format);
|
||||
if (!format_desc) {
|
||||
continue;
|
||||
}
|
||||
format_desc = util_format_description(format);
|
||||
if (!format_desc) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: test more
|
||||
*/
|
||||
|
||||
/*
|
||||
* TODO: test more
|
||||
*/
|
||||
if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
|
||||
continue;
|
||||
}
|
||||
if (util_format_is_pure_integer(format))
|
||||
continue;
|
||||
|
||||
if (util_format_is_pure_integer(format))
|
||||
continue;
|
||||
/* only have util fetch func for etc1 */
|
||||
if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
|
||||
format != PIPE_FORMAT_ETC1_RGB8) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* only have util fetch func for etc1 */
|
||||
if (format_desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
|
||||
format != PIPE_FORMAT_ETC1_RGB8) {
|
||||
continue;
|
||||
}
|
||||
/* missing fetch funcs */
|
||||
if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* missing fetch funcs */
|
||||
if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC) {
|
||||
continue;
|
||||
}
|
||||
/* only test twice with formats which can use cache */
|
||||
if (format_desc->layout != UTIL_FORMAT_LAYOUT_S3TC && use_cache) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!test_one(verbose, fp, format_desc)) {
|
||||
success = FALSE;
|
||||
if (!test_one(verbose, fp, format_desc, use_cache)) {
|
||||
success = FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
#if USE_TEXTURE_CACHE
|
||||
align_free(cache_ptr);
|
||||
#endif
|
||||
|
||||
return success;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue