From c6d12a510cd9ad5681c411f5f94243296d06cee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Thu, 26 May 2022 13:35:24 +0200 Subject: [PATCH] ac/nir/taskmesh: Use task shader lowering from common NIR code. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also implements task_payload atomics for RADV. Signed-off-by: Timur Kristóf Reviewed-by: Rhys Perry Part-of: --- .../common/ac_nir_lower_taskmesh_io_to_mem.c | 92 +++++++++---------- 1 file changed, 43 insertions(+), 49 deletions(-) diff --git a/src/amd/common/ac_nir_lower_taskmesh_io_to_mem.c b/src/amd/common/ac_nir_lower_taskmesh_io_to_mem.c index a3a6eefee90..2ef11e3c05b 100644 --- a/src/amd/common/ac_nir_lower_taskmesh_io_to_mem.c +++ b/src/amd/common/ac_nir_lower_taskmesh_io_to_mem.c @@ -236,36 +236,48 @@ task_write_draw_ring(nir_builder *b, } static bool -filter_task_output_or_payload(const nir_instr *instr, - UNUSED const void *state) +filter_task_intrinsics(const nir_instr *instr, + UNUSED const void *state) { if (instr->type != nir_instr_type_intrinsic) return false; nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - return intrin->intrinsic == nir_intrinsic_store_output || + return intrin->intrinsic == nir_intrinsic_launch_mesh_workgroups || intrin->intrinsic == nir_intrinsic_store_task_payload || intrin->intrinsic == nir_intrinsic_load_task_payload; } static nir_ssa_def * -lower_task_output_store(nir_builder *b, - nir_intrinsic_instr *intrin, - lower_tsms_io_state *s) +lower_task_launch_mesh_workgroups(nir_builder *b, + nir_intrinsic_instr *intrin, + lower_tsms_io_state *s) { - /* NV_mesh_shader: - * Task shaders should only have 1 output: TASK_COUNT - * which is the number of launched mesh shader workgroups in 1D. - * - * Task count is one dimensional, but the HW needs X, Y, Z. - * Use the shader's value for X, and write Y=1, Z=1. + /* This intrinsic must be always in uniform control flow, + * so we assume that all invocations are active here. */ - nir_ssa_def *store_val = nir_vec3(b, intrin->src[0].ssa, - nir_imm_int(b, 1), - nir_imm_int(b, 1)); + /* Wait for all necessary stores to finish. */ + nir_scoped_barrier(b, .execution_scope = NIR_SCOPE_WORKGROUP, + .memory_scope = NIR_SCOPE_WORKGROUP, + .memory_semantics = NIR_MEMORY_ACQ_REL, + .memory_modes = nir_var_mem_task_payload | nir_var_shader_out | + nir_var_mem_ssbo | nir_var_mem_global); + + /* On the first invocation, write the full draw ring entry. */ + nir_ssa_def *invocation_index = nir_load_local_invocation_index(b); + nir_if *if_invocation_index_zero = nir_push_if(b, nir_ieq_imm(b, invocation_index, 0)); + { + nir_ssa_def *dimensions = intrin->src[0].ssa; + nir_ssa_def *x = nir_channel(b, dimensions, 0); + nir_ssa_def *y = nir_channel(b, dimensions, 1); + nir_ssa_def *z = nir_channel(b, dimensions, 2); + nir_ssa_def *rdy = task_draw_ready_bit(b, s); + nir_ssa_def *store_val = nir_vec4(b, x, y, z, rdy); + task_write_draw_ring(b, store_val, 0, s); + } + nir_pop_if(b, if_invocation_index_zero); - task_write_draw_ring(b, store_val, 0, s); return NIR_LOWER_INSTR_PROGRESS_REPLACE; } @@ -321,37 +333,16 @@ lower_task_intrinsics(nir_builder *b, nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); lower_tsms_io_state *s = (lower_tsms_io_state *)state; - if (intrin->intrinsic == nir_intrinsic_store_output) - return lower_task_output_store(b, intrin, s); - else if (intrin->intrinsic == nir_intrinsic_store_task_payload) - return lower_task_payload_store(b, intrin, s); - else if (intrin->intrinsic == nir_intrinsic_load_task_payload) - return lower_taskmesh_payload_load(b, intrin, s); - else - unreachable("unsupported task shader intrinsic"); -} - -static void -emit_task_finale(nir_builder *b, lower_tsms_io_state *s) -{ - /* We assume there is always a single end block in the shader. */ - b->cursor = nir_after_block(nir_impl_last_block(b->impl)); - - /* Wait for all task_payload, output, SSBO and global stores to finish. */ - nir_scoped_barrier(b, .execution_scope = NIR_SCOPE_WORKGROUP, - .memory_scope = NIR_SCOPE_WORKGROUP, - .memory_semantics = NIR_MEMORY_ACQ_REL, - .memory_modes = nir_var_mem_task_payload | nir_var_shader_out | - nir_var_mem_ssbo | nir_var_mem_global); - - nir_ssa_def *invocation_index = nir_load_local_invocation_index(b); - nir_if *if_invocation_index_zero = nir_push_if(b, nir_ieq_imm(b, invocation_index, 0)); - { - /* Write ready bit. */ - nir_ssa_def *ready_bit = task_draw_ready_bit(b, s); - task_write_draw_ring(b, ready_bit, 12, s); + switch (intrin->intrinsic) { + case nir_intrinsic_store_task_payload: + return lower_task_payload_store(b, intrin, s); + case nir_intrinsic_load_task_payload: + return lower_taskmesh_payload_load(b, intrin, s); + case nir_intrinsic_launch_mesh_workgroups: + return lower_task_launch_mesh_workgroups(b, intrin, s); + default: + unreachable("unsupported task shader intrinsic"); } - nir_pop_if(b, if_invocation_index_zero); } void @@ -361,6 +352,11 @@ ac_nir_lower_task_outputs_to_mem(nir_shader *shader, { assert(util_is_power_of_two_nonzero(task_num_entries)); + nir_lower_task_shader_options lower_ts_opt = { + .payload_to_shared_for_atomics = true, + }; + NIR_PASS(_, shader, nir_lower_task_shader, lower_ts_opt); + lower_tsms_io_state state = { .draw_entry_bytes = 16, .payload_entry_bytes = task_payload_entry_bytes, @@ -373,13 +369,11 @@ ac_nir_lower_task_outputs_to_mem(nir_shader *shader, nir_builder_init(b, impl); nir_shader_lower_instructions(shader, - filter_task_output_or_payload, + filter_task_intrinsics, lower_task_intrinsics, &state); - emit_task_finale(b, &state); nir_metadata_preserve(impl, nir_metadata_none); - nir_validate_shader(shader, "after lowering task shader outputs to memory stores"); }