From df62c8fbeac1742a606d39239085e4f1d771253c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Fri, 2 Oct 2020 10:29:27 +0200 Subject: [PATCH] aco/ngg: Place workgroup barrier outside control flow for NGG GS. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merged shaders have a workgroup barrier which makes sure that the first half is completed in every wave before the 2nd half is started. This barrier is located in divergent control flow, so that waves that don't have any invocations in the 2nd half can finish as early as possible. This is problematic for NGG GS because it has more workgroup barriers after the 2nd half. So, for NGG GS we need to put the barrier outside control flow because otherwise the waves that have 0 GS threads won't be able to wait for the waves which have non-zero GS threads. Signed-off-by: Timur Kristóf Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_instruction_selection.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index efe06a89124..e2624b268c9 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -11381,6 +11381,13 @@ void select_program(Program *program, bool check_merged_wave_info = ctx.tcs_in_out_eq ? i == 0 : ((shader_count >= 2 && !empty_shader) || ngg_no_gs); bool endif_merged_wave_info = ctx.tcs_in_out_eq ? i == 1 : check_merged_wave_info; + + if (i && ngg_gs) { + /* NGG GS waves need to wait for each other after the GS half is done. */ + Builder bld(ctx.program, ctx.block); + create_workgroup_barrier(bld); + } + if (check_merged_wave_info) { Temp cond = merged_wave_info_to_mask(&ctx, i); begin_divergent_if_then(&ctx, &ic_merged_wave_info, cond); @@ -11389,7 +11396,8 @@ void select_program(Program *program, if (i) { Builder bld(ctx.program, ctx.block); - create_workgroup_barrier(bld); + if (!ngg_gs) + create_workgroup_barrier(bld); if (ctx.stage == vertex_geometry_gs || ctx.stage == tess_eval_geometry_gs) { ctx.gs_wave_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, m0), bld.def(s1, scc), get_arg(&ctx, args->merged_wave_info), Operand((8u << 16) | 16u));