From 37939e9c5462b871b0b9b00a43c5c9bec1e10e9d Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Fri, 25 Mar 2022 15:26:52 +0200 Subject: [PATCH] turnip: Fix the lack of WFM before indirect draws We have to add WFM to pending bits when we are flushing into CP for indirect draw to know when they should apply WFM workaround. Fixes CTS tests: dEQP-VK.draw.renderpass.indirect_draw.*_data_from_compute.indirect_draw_count* Fixes: abf0ae014a878d063132a4bf2f2515dc7052f069 ("tu: Properly handle waiting on an earlier pipeline stage") Signed-off-by: Danylo Piliaiev Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 0879de306c5..a1e47a5007f 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -2868,6 +2868,9 @@ tu_flush_for_stage(struct tu_cache_state *cache, * for any WFI's to finish. This is already done for draw calls, including * before indirect param reads, for the most part, so we just need to WFI. * + * However, some indirect draw opcodes, depending on firmware, don't have + * implicit CP_WAIT_FOR_ME so we have to handle it manually. + * * Transform feedback counters are read via CP_MEM_TO_REG, which implicitly * does CP_WAIT_FOR_ME, but we still need a WFI if the GPU writes it. * @@ -2879,8 +2882,11 @@ tu_flush_for_stage(struct tu_cache_state *cache, * future, or if CP_DRAW_PRED_SET grows the capability to do 32-bit * comparisons, then this will have to be dealt with. */ - if (src_stage > dst_stage) + if (src_stage > dst_stage) { cache->flush_bits |= TU_CMD_FLAG_WAIT_FOR_IDLE; + if (dst_stage == TU_STAGE_CP) + cache->pending_flush_bits |= TU_CMD_FLAG_WAIT_FOR_ME; + } } static enum tu_cmd_access_mask