radeonsi: interleave si_shader_info::input_* in memory for faster emit_spi_map

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12343>
This commit is contained in:
Marek Olšák 2021-08-11 00:48:17 -04:00 committed by Marge Bot
parent 11d1309d82
commit 46802f7b60
6 changed files with 32 additions and 25 deletions

View File

@ -323,6 +323,16 @@ enum si_color_output_type {
SI_TYPE_UINT16,
};
union si_input_info {
struct {
ubyte semantic;
ubyte interpolate;
ubyte fp16_lo_hi_valid;
ubyte usage_mask;
};
uint32_t _unused; /* this just forces 4-byte alignment */
};
struct si_shader_info {
shader_info base;
@ -330,10 +340,7 @@ struct si_shader_info {
ubyte num_inputs;
ubyte num_outputs;
ubyte input_semantic[PIPE_MAX_SHADER_INPUTS];
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
ubyte input_fp16_lo_hi_valid[PIPE_MAX_SHADER_INPUTS];
union si_input_info input[PIPE_MAX_SHADER_INPUTS];
ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS];
char output_semantic_to_slot[VARYING_SLOT_VAR15_16BIT + 1];
ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS];

View File

@ -52,7 +52,7 @@ static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned in
unsigned param;
LLVMValueRef value;
param = si_shader_io_get_unique_index(info->input_semantic[input_index], false);
param = si_shader_io_get_unique_index(info->input[input_index].semantic, false);
/* GFX9 has the ESGS ring in LDS. */
if (ctx->screen->info.chip_class >= GFX9) {

View File

@ -390,7 +390,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
ubyte semantic;
if (load_input) {
semantic = info->input_semantic[driver_location];
semantic = info->input[driver_location].semantic;
} else {
semantic = info->output_semantic[driver_location];
}
@ -448,7 +448,7 @@ static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef
struct si_shader_info *info = &ctx->shader->selector->info;
LLVMValueRef base, addr;
ubyte semantic = info->input_semantic[driver_location];
ubyte semantic = info->input[driver_location].semantic;
assert((semantic >= VARYING_SLOT_PATCH0 ||
semantic == VARYING_SLOT_TESS_LEVEL_INNER ||

View File

@ -107,7 +107,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
* ... which is what we must prevent at all cost.
*/
const bool can_speculate = false;
unsigned bit_size = info->input_fp16_lo_hi_valid[input_index] & 0x1 ? 16 : 32;
unsigned bit_size = info->input[input_index].fp16_lo_hi_valid & 0x1 ? 16 : 32;
LLVMTypeRef int_type = bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32;
LLVMTypeRef float_type = bit_size == 16 ? ctx->ac.f16 : ctx->ac.f32;
unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs;
@ -157,7 +157,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
return;
}
unsigned required_channels = util_last_bit(info->input_usage_mask[input_index]);
unsigned required_channels = util_last_bit(info->input[input_index].usage_mask);
if (required_channels == 0) {
for (unsigned i = 0; i < 4; ++i)
out[i] = LLVMGetUndef(ctx->ac.f32);

View File

@ -108,25 +108,25 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1;
if (is_input) {
assert(driver_location + num_slots <= ARRAY_SIZE(info->input_usage_mask));
assert(driver_location + num_slots <= ARRAY_SIZE(info->input));
for (unsigned i = 0; i < num_slots; i++) {
unsigned loc = driver_location + i;
info->input_semantic[loc] = semantic + i;
info->input[loc].semantic = semantic + i;
if (semantic == SYSTEM_VALUE_PRIMITIVE_ID)
info->input_interpolate[loc] = INTERP_MODE_FLAT;
info->input[loc].interpolate = INTERP_MODE_FLAT;
else
info->input_interpolate[loc] = interp;
info->input[loc].interpolate = interp;
if (mask) {
info->input_usage_mask[loc] |= mask;
info->input[loc].usage_mask |= mask;
if (bit_size == 16) {
if (nir_intrinsic_io_semantics(intr).high_16bits)
info->input_fp16_lo_hi_valid[loc] |= 0x2;
info->input[loc].fp16_lo_hi_valid |= 0x2;
else
info->input_fp16_lo_hi_valid[loc] |= 0x1;
info->input[loc].fp16_lo_hi_valid |= 0x1;
}
info->num_inputs = MAX2(info->num_inputs, loc + 1);
}
@ -517,9 +517,9 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
for (unsigned i = 0; i < 2; i++) {
if ((info->colors_read >> (i * 4)) & 0xf) {
info->input_semantic[info->num_inputs] = VARYING_SLOT_COL0 + i;
info->input_interpolate[info->num_inputs] = info->color_interpolate[i];
info->input_usage_mask[info->num_inputs] = info->colors_read >> (i * 4);
info->input[info->num_inputs].semantic = VARYING_SLOT_COL0 + i;
info->input[info->num_inputs].interpolate = info->color_interpolate[i];
info->input[info->num_inputs].usage_mask = info->colors_read >> (i * 4);
info->num_inputs++;
}
}

View File

@ -2962,7 +2962,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
case MESA_SHADER_FRAGMENT:
for (i = 0; i < sel->info.num_inputs; i++) {
unsigned semantic = sel->info.input_semantic[i];
unsigned semantic = sel->info.input[i].semantic;
if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
semantic != VARYING_SLOT_PNTC) {
@ -2975,9 +2975,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
sel->colors_written_4bit |= 0xf << (4 * i);
for (i = 0; i < sel->info.num_inputs; i++) {
if (sel->info.input_semantic[i] == VARYING_SLOT_COL0)
if (sel->info.input[i].semantic == VARYING_SLOT_COL0)
sel->color_attr_index[0] = i;
else if (sel->info.input_semantic[i] == VARYING_SLOT_COL1)
else if (sel->info.input[i].semantic == VARYING_SLOT_COL1)
sel->color_attr_index[1] = i;
}
break;
@ -3605,9 +3605,9 @@ static void si_emit_spi_map(struct si_context *sctx)
assert(num_interp > 0);
for (i = 0; i < psinfo->num_inputs; i++) {
unsigned semantic = psinfo->input_semantic[i];
unsigned interpolate = psinfo->input_interpolate[i];
ubyte fp16_lo_hi_mask = psinfo->input_fp16_lo_hi_valid[i];
unsigned semantic = psinfo->input[i].semantic;
unsigned interpolate = psinfo->input[i].interpolate;
ubyte fp16_lo_hi_mask = psinfo->input[i].fp16_lo_hi_valid;
spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic, interpolate,
fp16_lo_hi_mask);