From 5b7446d74cbb9406dd369d4c2eec2b3fab716d6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timur=20Krist=C3=B3f?= Date: Tue, 24 Aug 2021 08:20:32 +0200 Subject: [PATCH] radv, ac, aco: Use indices 0-2 of gs_vtx_offset argument array on GFX9+. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, indices 0, 2, 4 were used. This worked, but it was somewhat unintuitive. This commit changes it to use indices 0, 1, 2 instead, which makes the code easier to understand. Signed-off-by: Timur Kristóf Reviewed-by: Samuel Pitoiset Reviewed-by: Marek Olšák Part-of: --- src/amd/common/ac_nir_lower_esgs_io_to_mem.c | 4 ++-- src/amd/common/ac_nir_lower_ngg.c | 5 ++--- src/amd/common/ac_shader_args.h | 2 +- src/amd/compiler/aco_instruction_selection.cpp | 2 ++ src/amd/vulkan/radv_nir_to_llvm.c | 4 ++-- src/amd/vulkan/radv_shader_args.c | 4 ++-- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/amd/common/ac_nir_lower_esgs_io_to_mem.c b/src/amd/common/ac_nir_lower_esgs_io_to_mem.c index cd7f7cba701..0dbe7107a27 100644 --- a/src/amd/common/ac_nir_lower_esgs_io_to_mem.c +++ b/src/amd/common/ac_nir_lower_esgs_io_to_mem.c @@ -169,8 +169,8 @@ gs_per_vertex_input_vertex_offset_gfx9(nir_builder *b, nir_src *vertex_src) { if (nir_src_is_const(*vertex_src)) { unsigned vertex = nir_src_as_uint(*vertex_src); - return nir_ubfe(b, nir_build_load_gs_vertex_offset_amd(b, .base = vertex / 2u * 2u), - nir_imm_int(b, (vertex % 2u) * 16u), nir_imm_int(b, 16u)); + return nir_ubfe(b, nir_build_load_gs_vertex_offset_amd(b, .base = vertex / 2u), + nir_imm_int(b, (vertex & 1u) * 16u), nir_imm_int(b, 16u)); } nir_ssa_def *vertex_offset = nir_build_load_gs_vertex_offset_amd(b, .base = 0); diff --git a/src/amd/common/ac_nir_lower_ngg.c b/src/amd/common/ac_nir_lower_ngg.c index 75728b707f3..2cf976b5e97 100644 --- a/src/amd/common/ac_nir_lower_ngg.c +++ b/src/amd/common/ac_nir_lower_ngg.c @@ -289,9 +289,8 @@ emit_pack_ngg_prim_exp_arg(nir_builder *b, unsigned num_vertices_per_primitives, static nir_ssa_def * ngg_input_primitive_vertex_index(nir_builder *b, unsigned vertex) { - /* TODO: This is RADV specific. We'll need to refactor RADV and/or RadeonSI to match. */ - return nir_ubfe(b, nir_build_load_gs_vertex_offset_amd(b, .base = vertex / 2u * 2u), - nir_imm_int(b, (vertex % 2u) * 16u), nir_imm_int(b, 16u)); + return nir_ubfe(b, nir_build_load_gs_vertex_offset_amd(b, .base = vertex / 2u), + nir_imm_int(b, (vertex & 1u) * 16u), nir_imm_int(b, 16u)); } static nir_ssa_def * diff --git a/src/amd/common/ac_shader_args.h b/src/amd/common/ac_shader_args.h index 4da9d06612e..9d8112b409f 100644 --- a/src/amd/common/ac_shader_args.h +++ b/src/amd/common/ac_shader_args.h @@ -107,7 +107,7 @@ struct ac_shader_args { struct ac_arg es2gs_offset; /* separate legacy ES */ struct ac_arg gs2vs_offset; /* legacy GS */ struct ac_arg gs_wave_id; /* legacy GS */ - struct ac_arg gs_vtx_offset[6]; /* separate legacy GS */ + struct ac_arg gs_vtx_offset[6]; /* GFX6-8: [0-5], GFX9+: [0-2] packed */ struct ac_arg gs_prim_id; struct ac_arg gs_invocation_id; diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index ccad2f46904..eb01a77e266 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -8901,7 +8901,9 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) break; } case nir_intrinsic_load_gs_vertex_offset_amd: { + /* GFX6-8 uses 6 separate args, while GFX9+ packs these into only 3 args. */ unsigned b = nir_intrinsic_base(instr); + assert(b <= (ctx->program->chip_class >= GFX9 ? 2 : 5)); bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), get_arg(ctx, ctx->args->ac.gs_vtx_offset[b])); break; diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c index dff9f635756..04a8753bb03 100644 --- a/src/amd/vulkan/radv_nir_to_llvm.c +++ b/src/amd/vulkan/radv_nir_to_llvm.c @@ -2097,7 +2097,7 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx) LLVMValueRef vtxindex[] = { ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 0, 16), ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]), 16, 16), - ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[2]), 0, 16), + ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[1]), 0, 16), }; /* Determine the number of vertices per primitive. */ @@ -2940,7 +2940,7 @@ prepare_gs_input_vgprs(struct radv_shader_context *ctx, bool merged) if (merged) { for (int i = 5; i >= 0; --i) { ctx->gs_vtx_offset[i] = ac_unpack_param( - &ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i & ~1]), (i & 1) * 16, 16); + &ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[i / 2]), (i & 1) * 16, 16); } ctx->gs_wave_id = diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 773a2364565..fe89bf863a5 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -637,10 +637,10 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage, } ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[1]); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_prim_id); ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_invocation_id); - ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[4]); + ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]); if (previous_stage == MESA_SHADER_VERTEX) { declare_vs_input_vgprs(args);