diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 91f1b0a7420..9d83cb3a736 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -348,6 +348,7 @@ static bool si_switch_compute_shader(struct si_context *sctx, lds_blocks += align(program->local_size, 512) >> 9; } + /* TODO: use si_multiwave_lds_size_workaround */ assert(lds_blocks <= 0xFF); config->rsrc2 &= C_00B84C_LDS_SIZE; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index b19c61e99d2..80c063aaeec 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -8198,11 +8198,31 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen, return true; } -static void si_fix_num_sgprs(struct si_shader *shader) +void si_multiwave_lds_size_workaround(struct si_screen *sscreen, + unsigned *lds_size) +{ + /* SPI barrier management bug: + * Make sure we have at least 4k of LDS in use to avoid the bug. + * It applies to workgroup sizes of more than one wavefront. + */ + if (sscreen->b.family == CHIP_BONAIRE || + sscreen->b.family == CHIP_KABINI || + sscreen->b.family == CHIP_MULLINS) + *lds_size = MAX2(*lds_size, 8); +} + +static void si_fix_resource_usage(struct si_screen *sscreen, + struct si_shader *shader) { unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */ shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs); + + if (shader->selector->type == PIPE_SHADER_COMPUTE && + si_get_max_workgroup_size(shader) > 64) { + si_multiwave_lds_size_workaround(sscreen, + &shader->config.lds_size); + } } int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, @@ -8297,7 +8317,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm, } } - si_fix_num_sgprs(shader); + si_fix_resource_usage(sscreen, shader); si_shader_dump(sscreen, shader, debug, sel->info.processor, stderr); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index d4bc47b3878..129e571aa73 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -547,6 +547,8 @@ int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader) void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader, struct pipe_debug_callback *debug, unsigned processor, FILE *f); +void si_multiwave_lds_size_workaround(struct si_screen *sscreen, + unsigned *lds_size); void si_shader_apply_scratch_relocs(struct si_context *sctx, struct si_shader *shader, struct si_shader_config *config, diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index e904164fafa..10073ef9d74 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -176,11 +176,13 @@ static void si_emit_derived_tess_state(struct si_context *sctx, if (sctx->b.chip_class >= CIK) { assert(lds_size <= 65536); - ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 512) / 512); + lds_size = align(lds_size, 512) / 512; } else { assert(lds_size <= 32768); - ls_rsrc2 |= S_00B52C_LDS_SIZE(align(lds_size, 256) / 256); + lds_size = align(lds_size, 256) / 256; } + si_multiwave_lds_size_workaround(sctx->screen, &lds_size); + ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size); /* Due to a hw bug, RSRC2_LS must be written twice with another * LS register written in between. */