From 4ee6d68d1f25adb59a469d67d8a29adc9905b80b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Wed, 2 Dec 2020 12:06:28 +0100 Subject: [PATCH] aco: Wait for stores when NGG or legacy VS can finish early. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When there are no param exports in an NGG (or legacy VS) shader, the NO_PC_EXPORT=1 is set, which means PS waves can launch before the current stage finishes. If the current stage has any stores, we need to make sure to wait for those before we allow PS waves to start, so that PS can read what these instructions stored. Fossil DB results on Navi 10: Totals from 45 (0.03% of 136420) affected shaders: CodeSize: 87224 -> 87404 (+0.21%) Instrs: 16750 -> 16795 (+0.27%) Cycles: 69580 -> 69760 (+0.26%) VMEM: 8022 -> 8167 (+1.81%) Signed-off-by: Timur Kristóf Reviewed-by: Rhys Perry Part-of: --- src/amd/compiler/aco_insert_waitcnt.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/amd/compiler/aco_insert_waitcnt.cpp b/src/amd/compiler/aco_insert_waitcnt.cpp index 87a7e0926f4..5adbc1fbd26 100644 --- a/src/amd/compiler/aco_insert_waitcnt.cpp +++ b/src/amd/compiler/aco_insert_waitcnt.cpp @@ -28,6 +28,7 @@ #include #include "aco_ir.h" +#include "sid.h" namespace aco { @@ -530,6 +531,24 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info) } } + if (ctx.program->early_rast && + instr->opcode == aco_opcode::exp) { + + Export_instruction *exp = static_cast(instr); + if (exp->dest >= V_008DFC_SQ_EXP_POS && + exp->dest < V_008DFC_SQ_EXP_PRIM) { + + /* With early_rast, the HW will start clipping and rasterization after the 1st DONE pos export. + * Wait for all stores (and atomics) to complete, so PS can read them. + * TODO: This only really applies to DONE pos exports. Consider setting the DONE bit earlier. + */ + if (ctx.vs_cnt > 0) + imm.vs = 0; + if (ctx.vm_cnt > 0) + imm.vm = 0; + } + } + if (instr->opcode == aco_opcode::p_barrier) imm.combine(perform_barrier(ctx, static_cast(instr)->sync, semantic_acqrel)); else