From 46802f7b608b7e6f809033f671aedce8e93064a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 11 Aug 2021 00:48:17 -0400 Subject: [PATCH] radeonsi: interleave si_shader_info::input_* in memory for faster emit_spi_map Reviewed-by: Pierre-Eric Pelloux-Prayer Part-of: --- src/gallium/drivers/radeonsi/si_shader.h | 15 ++++++++++---- .../drivers/radeonsi/si_shader_llvm_gs.c | 2 +- .../drivers/radeonsi/si_shader_llvm_tess.c | 4 ++-- .../drivers/radeonsi/si_shader_llvm_vs.c | 4 ++-- src/gallium/drivers/radeonsi/si_shader_nir.c | 20 +++++++++---------- .../drivers/radeonsi/si_state_shaders.c | 12 +++++------ 6 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index b14a9a27f28..292bd0d2a07 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -323,6 +323,16 @@ enum si_color_output_type { SI_TYPE_UINT16, }; +union si_input_info { + struct { + ubyte semantic; + ubyte interpolate; + ubyte fp16_lo_hi_valid; + ubyte usage_mask; + }; + uint32_t _unused; /* this just forces 4-byte alignment */ +}; + struct si_shader_info { shader_info base; @@ -330,10 +340,7 @@ struct si_shader_info { ubyte num_inputs; ubyte num_outputs; - ubyte input_semantic[PIPE_MAX_SHADER_INPUTS]; - ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS]; - ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS]; - ubyte input_fp16_lo_hi_valid[PIPE_MAX_SHADER_INPUTS]; + union si_input_info input[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS]; char output_semantic_to_slot[VARYING_SLOT_VAR15_16BIT + 1]; ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS]; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index b4a3b8a8aad..a9ab0c549f3 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -52,7 +52,7 @@ static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned in unsigned param; LLVMValueRef value; - param = si_shader_io_get_unique_index(info->input_semantic[input_index], false); + param = si_shader_io_get_unique_index(info->input[input_index].semantic, false); /* GFX9 has the ESGS ring in LDS. */ if (ctx->screen->info.chip_class >= GFX9) { diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c index c79c475506d..68e3fc18e21 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c @@ -390,7 +390,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType ubyte semantic; if (load_input) { - semantic = info->input_semantic[driver_location]; + semantic = info->input[driver_location].semantic; } else { semantic = info->output_semantic[driver_location]; } @@ -448,7 +448,7 @@ static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef struct si_shader_info *info = &ctx->shader->selector->info; LLVMValueRef base, addr; - ubyte semantic = info->input_semantic[driver_location]; + ubyte semantic = info->input[driver_location].semantic; assert((semantic >= VARYING_SLOT_PATCH0 || semantic == VARYING_SLOT_TESS_LEVEL_INNER || diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index b6bfa6fe09d..cf57a6e77e8 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -107,7 +107,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L * ... which is what we must prevent at all cost. */ const bool can_speculate = false; - unsigned bit_size = info->input_fp16_lo_hi_valid[input_index] & 0x1 ? 16 : 32; + unsigned bit_size = info->input[input_index].fp16_lo_hi_valid & 0x1 ? 16 : 32; LLVMTypeRef int_type = bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32; LLVMTypeRef float_type = bit_size == 16 ? ctx->ac.f16 : ctx->ac.f32; unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs; @@ -157,7 +157,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L return; } - unsigned required_channels = util_last_bit(info->input_usage_mask[input_index]); + unsigned required_channels = util_last_bit(info->input[input_index].usage_mask); if (required_channels == 0) { for (unsigned i = 0; i < 4; ++i) out[i] = LLVMGetUndef(ctx->ac.f32); diff --git a/src/gallium/drivers/radeonsi/si_shader_nir.c b/src/gallium/drivers/radeonsi/si_shader_nir.c index 0da9054b561..5de678b62d4 100644 --- a/src/gallium/drivers/radeonsi/si_shader_nir.c +++ b/src/gallium/drivers/radeonsi/si_shader_nir.c @@ -108,25 +108,25 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1; if (is_input) { - assert(driver_location + num_slots <= ARRAY_SIZE(info->input_usage_mask)); + assert(driver_location + num_slots <= ARRAY_SIZE(info->input)); for (unsigned i = 0; i < num_slots; i++) { unsigned loc = driver_location + i; - info->input_semantic[loc] = semantic + i; + info->input[loc].semantic = semantic + i; if (semantic == SYSTEM_VALUE_PRIMITIVE_ID) - info->input_interpolate[loc] = INTERP_MODE_FLAT; + info->input[loc].interpolate = INTERP_MODE_FLAT; else - info->input_interpolate[loc] = interp; + info->input[loc].interpolate = interp; if (mask) { - info->input_usage_mask[loc] |= mask; + info->input[loc].usage_mask |= mask; if (bit_size == 16) { if (nir_intrinsic_io_semantics(intr).high_16bits) - info->input_fp16_lo_hi_valid[loc] |= 0x2; + info->input[loc].fp16_lo_hi_valid |= 0x2; else - info->input_fp16_lo_hi_valid[loc] |= 0x1; + info->input[loc].fp16_lo_hi_valid |= 0x1; } info->num_inputs = MAX2(info->num_inputs, loc + 1); } @@ -517,9 +517,9 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf if (nir->info.stage == MESA_SHADER_FRAGMENT) { for (unsigned i = 0; i < 2; i++) { if ((info->colors_read >> (i * 4)) & 0xf) { - info->input_semantic[info->num_inputs] = VARYING_SLOT_COL0 + i; - info->input_interpolate[info->num_inputs] = info->color_interpolate[i]; - info->input_usage_mask[info->num_inputs] = info->colors_read >> (i * 4); + info->input[info->num_inputs].semantic = VARYING_SLOT_COL0 + i; + info->input[info->num_inputs].interpolate = info->color_interpolate[i]; + info->input[info->num_inputs].usage_mask = info->colors_read >> (i * 4); info->num_inputs++; } } diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index c8235ddf15f..8d9068bf843 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2962,7 +2962,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx, case MESA_SHADER_FRAGMENT: for (i = 0; i < sel->info.num_inputs; i++) { - unsigned semantic = sel->info.input_semantic[i]; + unsigned semantic = sel->info.input[i].semantic; if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) && semantic != VARYING_SLOT_PNTC) { @@ -2975,9 +2975,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx, sel->colors_written_4bit |= 0xf << (4 * i); for (i = 0; i < sel->info.num_inputs; i++) { - if (sel->info.input_semantic[i] == VARYING_SLOT_COL0) + if (sel->info.input[i].semantic == VARYING_SLOT_COL0) sel->color_attr_index[0] = i; - else if (sel->info.input_semantic[i] == VARYING_SLOT_COL1) + else if (sel->info.input[i].semantic == VARYING_SLOT_COL1) sel->color_attr_index[1] = i; } break; @@ -3605,9 +3605,9 @@ static void si_emit_spi_map(struct si_context *sctx) assert(num_interp > 0); for (i = 0; i < psinfo->num_inputs; i++) { - unsigned semantic = psinfo->input_semantic[i]; - unsigned interpolate = psinfo->input_interpolate[i]; - ubyte fp16_lo_hi_mask = psinfo->input_fp16_lo_hi_valid[i]; + unsigned semantic = psinfo->input[i].semantic; + unsigned interpolate = psinfo->input[i].interpolate; + ubyte fp16_lo_hi_mask = psinfo->input[i].fp16_lo_hi_valid; spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic, interpolate, fp16_lo_hi_mask);