From ad61eac250120eec44a305e6872a63c21ec683cc Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 1 Jun 2017 05:24:34 +0100 Subject: [PATCH] radv: factor out eop event writing code. (v2) In prep for GFX9 refactor some of the eop event writing code out. This changes behaviour, but aligns with what radeonsi does, it does double emits on CIK/VI, whereas previously it only did this on CIK. v2: bump the size checks. Reviewed-by: Bas Nieuwenhuizen Signed-off-by: Dave Airlie --- src/amd/vulkan/radv_cmd_buffer.c | 24 ++++---------- src/amd/vulkan/radv_private.h | 9 +++++ src/amd/vulkan/radv_query.c | 57 ++++++++++---------------------- src/amd/vulkan/si_cmd_buffer.c | 57 ++++++++++++++++++++++++++++---- 4 files changed, 82 insertions(+), 65 deletions(-) diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index c4d3d7bde79..64eab2d5734 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -3228,28 +3228,16 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer, cmd_buffer->device->ws->cs_add_buffer(cs, event->bo, 8); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12); + MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18); /* TODO: this is overkill. Probably should figure something out from * the stage mask. */ - if (cmd_buffer->device->physical_device->rad_info.chip_class == CIK) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | - EVENT_INDEX(5)); - radeon_emit(cs, va); - radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1)); - radeon_emit(cs, 2); - radeon_emit(cs, 0); - } - - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | - EVENT_INDEX(5)); - radeon_emit(cs, va); - radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1)); - radeon_emit(cs, value); - radeon_emit(cs, 0); + si_cs_emit_write_event_eop(cs, + cmd_buffer->device->physical_device->rad_info.chip_class == CIK, + false, + EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, + 1, va, 2, value); assert(cmd_buffer->cs->cdw <= cdw_max); } diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h index 54bec4bd659..16f95c40ef1 100644 --- a/src/amd/vulkan/radv_private.h +++ b/src/amd/vulkan/radv_private.h @@ -838,6 +838,15 @@ void si_write_scissors(struct radeon_winsys_cs *cs, int first, uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, bool indirect_draw, uint32_t draw_vertex_count); +void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs, + enum chip_class chip_class, + bool is_mec, + unsigned event, unsigned event_flags, + unsigned data_sel, + uint64_t va, + uint32_t old_fence, + uint32_t new_fence); + void si_emit_wait_fence(struct radeon_winsys_cs *cs, uint64_t va, uint32_t ref, uint32_t mask); diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c index 910eedd833c..55e4185772e 100644 --- a/src/amd/vulkan/radv_query.c +++ b/src/amd/vulkan/radv_query.c @@ -1144,7 +1144,7 @@ void radv_CmdEndQuery( break; case VK_QUERY_TYPE_PIPELINE_STATISTICS: - radeon_check_space(cmd_buffer->device->ws, cs, 10); + radeon_check_space(cmd_buffer->device->ws, cs, 16); va += pipelinestat_block_size; @@ -1153,13 +1153,11 @@ void radv_CmdEndQuery( radeon_emit(cs, va); radeon_emit(cs, va >> 32); - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | - EVENT_INDEX(5)); - radeon_emit(cs, avail_va); - radeon_emit(cs, (avail_va >> 32) | EOP_DATA_SEL(1)); - radeon_emit(cs, 1); - radeon_emit(cs, 0); + si_cs_emit_write_event_eop(cs, + cmd_buffer->device->physical_device->rad_info.chip_class, + false, + EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, + 1, avail_va, 0, 1); break; default: unreachable("ending unhandled query type"); @@ -1182,7 +1180,7 @@ void radv_CmdWriteTimestamp( cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 5); - MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 14); + MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28); switch(pipelineStage) { case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT: @@ -1204,37 +1202,16 @@ void radv_CmdWriteTimestamp( radeon_emit(cs, 1); break; default: - if (mec) { - radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5)); - radeon_emit(cs, 3 << 29); - radeon_emit(cs, query_va); - radeon_emit(cs, query_va >> 32); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - - radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5)); - radeon_emit(cs, 1 << 29); - radeon_emit(cs, avail_va); - radeon_emit(cs, avail_va >> 32); - radeon_emit(cs, 1); - radeon_emit(cs, 0); - } else { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5)); - radeon_emit(cs, query_va); - radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF)); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5)); - radeon_emit(cs, avail_va); - radeon_emit(cs, (1 << 29) | ((avail_va >> 32) & 0xFFFF)); - radeon_emit(cs, 1); - radeon_emit(cs, 0); - } + si_cs_emit_write_event_eop(cs, + cmd_buffer->device->physical_device->rad_info.chip_class, + mec, + V_028A90_BOTTOM_OF_PIPE_TS, 0, + 3, query_va, 0, 0); + si_cs_emit_write_event_eop(cs, + cmd_buffer->device->physical_device->rad_info.chip_class, + mec, + V_028A90_BOTTOM_OF_PIPE_TS, 0, + 1, avail_va, 0, 1); break; } diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c index 3bf1d391cf3..2b0ae5c7696 100644 --- a/src/amd/vulkan/si_cmd_buffer.c +++ b/src/amd/vulkan/si_cmd_buffer.c @@ -762,6 +762,51 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, } +void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs, + enum chip_class chip_class, + bool is_mec, + unsigned event, unsigned event_flags, + unsigned data_sel, + uint64_t va, + uint32_t old_fence, + uint32_t new_fence) +{ + unsigned op = EVENT_TYPE(event) | + EVENT_INDEX(5) | + event_flags; + + if (is_mec) { + radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0)); + radeon_emit(cs, op); + radeon_emit(cs, EOP_DATA_SEL(data_sel)); + radeon_emit(cs, va); /* address lo */ + radeon_emit(cs, va >> 32); /* address hi */ + radeon_emit(cs, new_fence); /* immediate data lo */ + radeon_emit(cs, 0); /* immediate data hi */ + } else { + if (chip_class == CIK || + chip_class == VI) { + /* Two EOP events are required to make all engines go idle + * (and optional cache flushes executed) before the timstamp + * is written. + */ + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); + radeon_emit(cs, op); + radeon_emit(cs, va); + radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel)); + radeon_emit(cs, old_fence); /* immediate data */ + radeon_emit(cs, 0); /* unused */ + } + + radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); + radeon_emit(cs, op); + radeon_emit(cs, va); + radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel)); + radeon_emit(cs, new_fence); /* immediate data */ + radeon_emit(cs, 0); /* unused */ + } +} + void si_emit_wait_fence(struct radeon_winsys_cs *cs, uint64_t va, uint32_t ref, @@ -826,13 +871,11 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs, /* Necessary for DCC */ if (chip_class >= VI) { - radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); - radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) | - EVENT_INDEX(5)); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - radeon_emit(cs, 0); - radeon_emit(cs, 0); + si_cs_emit_write_event_eop(cs, + chip_class, + is_mec, + V_028A90_FLUSH_AND_INV_CB_DATA_TS, + 0, 0, 0, 0, 0); } }