aco: Wait for stores when NGG or legacy VS can finish early.

When there are no param exports in an NGG (or legacy VS) shader,
the NO_PC_EXPORT=1 is set, which means PS waves can launch before
the current stage finishes.

If the current stage has any stores, we need to make sure to wait for
those before we allow PS waves to start, so that PS can read what
these instructions stored.

Fossil DB results on Navi 10:
Totals from 45 (0.03% of 136420) affected shaders:
CodeSize: 87224 -> 87404 (+0.21%)
Instrs: 16750 -> 16795 (+0.27%)
Cycles: 69580 -> 69760 (+0.26%)
VMEM: 8022 -> 8167 (+1.81%)

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7868>
This commit is contained in:
Timur Kristóf 2020-12-02 12:06:28 +01:00 committed by Marge Bot
parent 38da379b3e
commit 4ee6d68d1f
1 changed files with 19 additions and 0 deletions

View File

@ -28,6 +28,7 @@
#include <math.h>
#include "aco_ir.h"
#include "sid.h"
namespace aco {
@ -530,6 +531,24 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info)
}
}
if (ctx.program->early_rast &&
instr->opcode == aco_opcode::exp) {
Export_instruction *exp = static_cast<Export_instruction *>(instr);
if (exp->dest >= V_008DFC_SQ_EXP_POS &&
exp->dest < V_008DFC_SQ_EXP_PRIM) {
/* With early_rast, the HW will start clipping and rasterization after the 1st DONE pos export.
* Wait for all stores (and atomics) to complete, so PS can read them.
* TODO: This only really applies to DONE pos exports. Consider setting the DONE bit earlier.
*/
if (ctx.vs_cnt > 0)
imm.vs = 0;
if (ctx.vm_cnt > 0)
imm.vm = 0;
}
}
if (instr->opcode == aco_opcode::p_barrier)
imm.combine(perform_barrier(ctx, static_cast<Pseudo_barrier_instruction *>(instr)->sync, semantic_acqrel));
else