radeonsi/gfx9: fix VM fault with fetched instance divisors
We need to account for SGPR locations in merged shaders.
This case is exercised by KHR-GL45.enhanced_layouts.vertex_attrib_locations
Fixes: 79c2e7388c
("radeonsi/gfx9: use SPI_SHADER_USER_DATA_COMMON")
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
This commit is contained in:
parent
3a32858fc3
commit
df5ebe0c26
|
@ -5883,11 +5883,13 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info,
|
||||||
key->vs_prolog.num_input_sgprs = num_input_sgprs;
|
key->vs_prolog.num_input_sgprs = num_input_sgprs;
|
||||||
key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
|
key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
|
||||||
key->vs_prolog.as_ls = shader_out->key.as_ls;
|
key->vs_prolog.as_ls = shader_out->key.as_ls;
|
||||||
|
key->vs_prolog.as_es = shader_out->key.as_es;
|
||||||
|
|
||||||
if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) {
|
if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) {
|
||||||
key->vs_prolog.as_ls = 1;
|
key->vs_prolog.as_ls = 1;
|
||||||
key->vs_prolog.num_merged_next_stage_vgprs = 2;
|
key->vs_prolog.num_merged_next_stage_vgprs = 2;
|
||||||
} else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) {
|
} else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) {
|
||||||
|
key->vs_prolog.as_es = 1;
|
||||||
key->vs_prolog.num_merged_next_stage_vgprs = 5;
|
key->vs_prolog.num_merged_next_stage_vgprs = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6768,6 +6770,8 @@ si_get_shader_part(struct si_screen *sscreen,
|
||||||
|
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case PIPE_SHADER_VERTEX:
|
case PIPE_SHADER_VERTEX:
|
||||||
|
shader.key.as_ls = key->vs_prolog.as_ls;
|
||||||
|
shader.key.as_es = key->vs_prolog.as_es;
|
||||||
break;
|
break;
|
||||||
case PIPE_SHADER_TESS_CTRL:
|
case PIPE_SHADER_TESS_CTRL:
|
||||||
assert(!prolog);
|
assert(!prolog);
|
||||||
|
@ -6810,10 +6814,15 @@ out:
|
||||||
static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
|
static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
|
||||||
{
|
{
|
||||||
LLVMValueRef ptr[2], list;
|
LLVMValueRef ptr[2], list;
|
||||||
|
bool is_merged_shader =
|
||||||
|
ctx->screen->b.chip_class >= GFX9 &&
|
||||||
|
(ctx->type == PIPE_SHADER_TESS_CTRL ||
|
||||||
|
ctx->type == PIPE_SHADER_GEOMETRY ||
|
||||||
|
ctx->shader->key.as_ls || ctx->shader->key.as_es);
|
||||||
|
|
||||||
/* Get the pointer to rw buffers. */
|
/* Get the pointer to rw buffers. */
|
||||||
ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS);
|
ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
|
||||||
ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI);
|
ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS_HI);
|
||||||
list = lp_build_gather_values(&ctx->gallivm, ptr, 2);
|
list = lp_build_gather_values(&ctx->gallivm, ptr, 2);
|
||||||
list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
|
list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
|
||||||
list = LLVMBuildIntToPtr(ctx->ac.builder, list,
|
list = LLVMBuildIntToPtr(ctx->ac.builder, list,
|
||||||
|
|
|
@ -149,9 +149,6 @@ struct nir_shader;
|
||||||
|
|
||||||
/* SGPR user data indices */
|
/* SGPR user data indices */
|
||||||
enum {
|
enum {
|
||||||
/* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs,
|
|
||||||
* and these two are used for other purposes.
|
|
||||||
*/
|
|
||||||
SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */
|
SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */
|
||||||
SI_SGPR_RW_BUFFERS_HI,
|
SI_SGPR_RW_BUFFERS_HI,
|
||||||
SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
|
SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
|
||||||
|
@ -455,6 +452,7 @@ union si_shader_part_key {
|
||||||
unsigned num_merged_next_stage_vgprs:3;
|
unsigned num_merged_next_stage_vgprs:3;
|
||||||
unsigned last_input:4;
|
unsigned last_input:4;
|
||||||
unsigned as_ls:1;
|
unsigned as_ls:1;
|
||||||
|
unsigned as_es:1;
|
||||||
/* Prologs for monolithic shaders shouldn't set EXEC. */
|
/* Prologs for monolithic shaders shouldn't set EXEC. */
|
||||||
unsigned is_monolithic:1;
|
unsigned is_monolithic:1;
|
||||||
} vs_prolog;
|
} vs_prolog;
|
||||||
|
|
Loading…
Reference in New Issue