radeonsi: interleave si_shader_info::input_* in memory for faster emit_spi_map
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12343>
This commit is contained in:
parent
11d1309d82
commit
46802f7b60
|
@ -323,6 +323,16 @@ enum si_color_output_type {
|
|||
SI_TYPE_UINT16,
|
||||
};
|
||||
|
||||
union si_input_info {
|
||||
struct {
|
||||
ubyte semantic;
|
||||
ubyte interpolate;
|
||||
ubyte fp16_lo_hi_valid;
|
||||
ubyte usage_mask;
|
||||
};
|
||||
uint32_t _unused; /* this just forces 4-byte alignment */
|
||||
};
|
||||
|
||||
struct si_shader_info {
|
||||
shader_info base;
|
||||
|
||||
|
@ -330,10 +340,7 @@ struct si_shader_info {
|
|||
|
||||
ubyte num_inputs;
|
||||
ubyte num_outputs;
|
||||
ubyte input_semantic[PIPE_MAX_SHADER_INPUTS];
|
||||
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
|
||||
ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
|
||||
ubyte input_fp16_lo_hi_valid[PIPE_MAX_SHADER_INPUTS];
|
||||
union si_input_info input[PIPE_MAX_SHADER_INPUTS];
|
||||
ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS];
|
||||
char output_semantic_to_slot[VARYING_SLOT_VAR15_16BIT + 1];
|
||||
ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS];
|
||||
|
|
|
@ -52,7 +52,7 @@ static LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi, unsigned in
|
|||
unsigned param;
|
||||
LLVMValueRef value;
|
||||
|
||||
param = si_shader_io_get_unique_index(info->input_semantic[input_index], false);
|
||||
param = si_shader_io_get_unique_index(info->input[input_index].semantic, false);
|
||||
|
||||
/* GFX9 has the ESGS ring in LDS. */
|
||||
if (ctx->screen->info.chip_class >= GFX9) {
|
||||
|
|
|
@ -390,7 +390,7 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
|
|||
ubyte semantic;
|
||||
|
||||
if (load_input) {
|
||||
semantic = info->input_semantic[driver_location];
|
||||
semantic = info->input[driver_location].semantic;
|
||||
} else {
|
||||
semantic = info->output_semantic[driver_location];
|
||||
}
|
||||
|
@ -448,7 +448,7 @@ static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef
|
|||
struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
LLVMValueRef base, addr;
|
||||
|
||||
ubyte semantic = info->input_semantic[driver_location];
|
||||
ubyte semantic = info->input[driver_location].semantic;
|
||||
|
||||
assert((semantic >= VARYING_SLOT_PATCH0 ||
|
||||
semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
|
||||
|
|
|
@ -107,7 +107,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
|
|||
* ... which is what we must prevent at all cost.
|
||||
*/
|
||||
const bool can_speculate = false;
|
||||
unsigned bit_size = info->input_fp16_lo_hi_valid[input_index] & 0x1 ? 16 : 32;
|
||||
unsigned bit_size = info->input[input_index].fp16_lo_hi_valid & 0x1 ? 16 : 32;
|
||||
LLVMTypeRef int_type = bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32;
|
||||
LLVMTypeRef float_type = bit_size == 16 ? ctx->ac.f16 : ctx->ac.f32;
|
||||
unsigned num_vbos_in_user_sgprs = ctx->shader->selector->num_vbos_in_user_sgprs;
|
||||
|
@ -157,7 +157,7 @@ static void load_input_vs(struct si_shader_context *ctx, unsigned input_index, L
|
|||
return;
|
||||
}
|
||||
|
||||
unsigned required_channels = util_last_bit(info->input_usage_mask[input_index]);
|
||||
unsigned required_channels = util_last_bit(info->input[input_index].usage_mask);
|
||||
if (required_channels == 0) {
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
out[i] = LLVMGetUndef(ctx->ac.f32);
|
||||
|
|
|
@ -108,25 +108,25 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
|
|||
unsigned num_slots = indirect ? nir_intrinsic_io_semantics(intr).num_slots : 1;
|
||||
|
||||
if (is_input) {
|
||||
assert(driver_location + num_slots <= ARRAY_SIZE(info->input_usage_mask));
|
||||
assert(driver_location + num_slots <= ARRAY_SIZE(info->input));
|
||||
|
||||
for (unsigned i = 0; i < num_slots; i++) {
|
||||
unsigned loc = driver_location + i;
|
||||
|
||||
info->input_semantic[loc] = semantic + i;
|
||||
info->input[loc].semantic = semantic + i;
|
||||
|
||||
if (semantic == SYSTEM_VALUE_PRIMITIVE_ID)
|
||||
info->input_interpolate[loc] = INTERP_MODE_FLAT;
|
||||
info->input[loc].interpolate = INTERP_MODE_FLAT;
|
||||
else
|
||||
info->input_interpolate[loc] = interp;
|
||||
info->input[loc].interpolate = interp;
|
||||
|
||||
if (mask) {
|
||||
info->input_usage_mask[loc] |= mask;
|
||||
info->input[loc].usage_mask |= mask;
|
||||
if (bit_size == 16) {
|
||||
if (nir_intrinsic_io_semantics(intr).high_16bits)
|
||||
info->input_fp16_lo_hi_valid[loc] |= 0x2;
|
||||
info->input[loc].fp16_lo_hi_valid |= 0x2;
|
||||
else
|
||||
info->input_fp16_lo_hi_valid[loc] |= 0x1;
|
||||
info->input[loc].fp16_lo_hi_valid |= 0x1;
|
||||
}
|
||||
info->num_inputs = MAX2(info->num_inputs, loc + 1);
|
||||
}
|
||||
|
@ -517,9 +517,9 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
|
|||
if (nir->info.stage == MESA_SHADER_FRAGMENT) {
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
if ((info->colors_read >> (i * 4)) & 0xf) {
|
||||
info->input_semantic[info->num_inputs] = VARYING_SLOT_COL0 + i;
|
||||
info->input_interpolate[info->num_inputs] = info->color_interpolate[i];
|
||||
info->input_usage_mask[info->num_inputs] = info->colors_read >> (i * 4);
|
||||
info->input[info->num_inputs].semantic = VARYING_SLOT_COL0 + i;
|
||||
info->input[info->num_inputs].interpolate = info->color_interpolate[i];
|
||||
info->input[info->num_inputs].usage_mask = info->colors_read >> (i * 4);
|
||||
info->num_inputs++;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2962,7 +2962,7 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
for (i = 0; i < sel->info.num_inputs; i++) {
|
||||
unsigned semantic = sel->info.input_semantic[i];
|
||||
unsigned semantic = sel->info.input[i].semantic;
|
||||
|
||||
if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
|
||||
semantic != VARYING_SLOT_PNTC) {
|
||||
|
@ -2975,9 +2975,9 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
|
|||
sel->colors_written_4bit |= 0xf << (4 * i);
|
||||
|
||||
for (i = 0; i < sel->info.num_inputs; i++) {
|
||||
if (sel->info.input_semantic[i] == VARYING_SLOT_COL0)
|
||||
if (sel->info.input[i].semantic == VARYING_SLOT_COL0)
|
||||
sel->color_attr_index[0] = i;
|
||||
else if (sel->info.input_semantic[i] == VARYING_SLOT_COL1)
|
||||
else if (sel->info.input[i].semantic == VARYING_SLOT_COL1)
|
||||
sel->color_attr_index[1] = i;
|
||||
}
|
||||
break;
|
||||
|
@ -3605,9 +3605,9 @@ static void si_emit_spi_map(struct si_context *sctx)
|
|||
assert(num_interp > 0);
|
||||
|
||||
for (i = 0; i < psinfo->num_inputs; i++) {
|
||||
unsigned semantic = psinfo->input_semantic[i];
|
||||
unsigned interpolate = psinfo->input_interpolate[i];
|
||||
ubyte fp16_lo_hi_mask = psinfo->input_fp16_lo_hi_valid[i];
|
||||
unsigned semantic = psinfo->input[i].semantic;
|
||||
unsigned interpolate = psinfo->input[i].interpolate;
|
||||
ubyte fp16_lo_hi_mask = psinfo->input[i].fp16_lo_hi_valid;
|
||||
|
||||
spi_ps_input_cntl[num_written++] = si_get_ps_input_cntl(sctx, vs, semantic, interpolate,
|
||||
fp16_lo_hi_mask);
|
||||
|
|
Loading…
Reference in New Issue