From e2066032598bf5ca1cf785e2c144505db813064a Mon Sep 17 00:00:00 2001 From: Rhys Perry Date: Fri, 11 Feb 2022 19:19:45 +0000 Subject: [PATCH] aco: don't move exec reads around exec writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes flickering and blocky plants in Jedi: Fallen Order. Also fixes flickering squares in The Last of Us Part 1. fossil-db (navi21): Totals from 92 (0.07% of 135636) affected shaders: Instrs: 35324 -> 35354 (+0.08%); split: -0.03%, +0.11% CodeSize: 189568 -> 189668 (+0.05%); split: -0.03%, +0.08% Latency: 345305 -> 346529 (+0.35%); split: -0.02%, +0.37% InvThroughput: 78632 -> 78625 (-0.01%) SClause: 1955 -> 1972 (+0.87%); split: -0.61%, +1.48% Copies: 1311 -> 1304 (-0.53%); split: -0.69%, +0.15% Signed-off-by: Rhys Perry Reviewed-by: Timur Kristóf Cc: mesa-stable Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8883 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8878 Part-of: (cherry picked from commit 5e20fbd424543d2c919c8baae247f0d909659640) --- .pick_status.json | 2 +- src/amd/compiler/aco_scheduler.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.pick_status.json b/.pick_status.json index aa21b670c4ece..26b67028b9e24 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -31,7 +31,7 @@ "description": "aco: don't move exec reads around exec writes", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null }, diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp index 4ab13fe7c30b7..75c1e676b63a1 100644 --- a/src/amd/compiler/aco_scheduler.cpp +++ b/src/amd/compiler/aco_scheduler.cpp @@ -459,6 +459,7 @@ struct hazard_query { bool contains_spill; bool contains_sendmsg; bool uses_exec; + bool writes_exec; memory_event_set mem_events; unsigned aliasing_storage; /* storage classes which are accessed (non-SMEM) */ unsigned aliasing_storage_smem; /* storage classes which are accessed (SMEM) */ @@ -471,6 +472,7 @@ init_hazard_query(const sched_ctx& ctx, hazard_query* query) query->contains_spill = false; query->contains_sendmsg = false; query->uses_exec = false; + query->writes_exec = false; memset(&query->mem_events, 0, sizeof(query->mem_events)); query->aliasing_storage = 0; query->aliasing_storage_smem = 0; @@ -515,6 +517,10 @@ add_to_hazard_query(hazard_query* query, Instruction* instr) query->contains_spill = true; query->contains_sendmsg |= instr->opcode == aco_opcode::s_sendmsg; query->uses_exec |= needs_exec_mask(instr); + for (const Definition& def : instr->definitions) { + if (def.isFixed() && def.physReg() == exec) + query->writes_exec = true; + } memory_sync_info sync = get_sync_info_with_hack(instr); @@ -560,6 +566,8 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards) return hazard_fail_exec; } } + if (query->writes_exec && needs_exec_mask(instr)) + return hazard_fail_exec; /* don't move exports so that they stay closer together */ if (instr->isEXP())