radeonsi: optimize out the loop in si_get_ps_input_cntl
Use a remap table from a semantic to an index instead of searching for the correct index. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6340>
This commit is contained in:
parent
6ecb8b6899
commit
98e866c669
|
@ -327,6 +327,7 @@ struct si_shader_info {
|
|||
ubyte input_interpolate[PIPE_MAX_SHADER_INPUTS];
|
||||
ubyte input_usage_mask[PIPE_MAX_SHADER_INPUTS];
|
||||
ubyte output_semantic[PIPE_MAX_SHADER_OUTPUTS];
|
||||
char output_semantic_to_slot[VARYING_SLOT_TESS_MAX];
|
||||
ubyte output_usagemask[PIPE_MAX_SHADER_OUTPUTS];
|
||||
ubyte output_readmask[PIPE_MAX_SHADER_OUTPUTS];
|
||||
ubyte output_streams[PIPE_MAX_SHADER_OUTPUTS];
|
||||
|
|
|
@ -143,12 +143,14 @@ static void scan_io_usage(struct si_shader_info *info, nir_intrinsic_instr *intr
|
|||
} else {
|
||||
/* Outputs. */
|
||||
assert(driver_location + num_slots <= ARRAY_SIZE(info->output_usagemask));
|
||||
assert(semantic + num_slots < ARRAY_SIZE(info->output_semantic_to_slot));
|
||||
|
||||
for (unsigned i = 0; i < num_slots; i++) {
|
||||
unsigned loc = driver_location + i;
|
||||
unsigned slot_mask = (dual_slot && i % 2 ? mask >> 4 : mask) & 0xf;
|
||||
|
||||
info->output_semantic[loc] = semantic + i;
|
||||
info->output_semantic_to_slot[semantic + i] = loc;
|
||||
|
||||
if (is_output_load) {
|
||||
/* Output loads have only a few things that we need to track. */
|
||||
|
@ -556,6 +558,8 @@ void si_nir_scan_shader(const struct nir_shader *nir, struct si_shader_info *inf
|
|||
info->tessfactors_are_def_in_all_invocs = ac_are_tessfactors_def_in_all_invocs(nir);
|
||||
}
|
||||
|
||||
memset(info->output_semantic_to_slot, -1, sizeof(info->output_semantic_to_slot));
|
||||
|
||||
func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
|
||||
nir_foreach_block (block, func->impl) {
|
||||
nir_foreach_instr (instr, block)
|
||||
|
|
|
@ -3171,7 +3171,7 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader *
|
|||
unsigned semantic, enum glsl_interp_mode interpolate)
|
||||
{
|
||||
struct si_shader_info *vsinfo = &vs->selector->info;
|
||||
unsigned j, offset, ps_input_cntl = 0;
|
||||
unsigned offset, ps_input_cntl = 0;
|
||||
|
||||
if (interpolate == INTERP_MODE_FLAT ||
|
||||
(interpolate == INTERP_MODE_COLOR && sctx->flatshade) ||
|
||||
|
@ -3184,43 +3184,42 @@ static unsigned si_get_ps_input_cntl(struct si_context *sctx, struct si_shader *
|
|||
ps_input_cntl |= S_028644_PT_SPRITE_TEX(1);
|
||||
}
|
||||
|
||||
/* TODO: This search can be removed if we add a lookup table from semantic to index. */
|
||||
for (j = 0; j < vsinfo->num_outputs; j++) {
|
||||
if (semantic == vsinfo->output_semantic[j]) {
|
||||
offset = vs->info.vs_output_param_offset[j];
|
||||
int vs_slot = vsinfo->output_semantic_to_slot[semantic];
|
||||
if (vs_slot >= 0) {
|
||||
offset = vs->info.vs_output_param_offset[vs_slot];
|
||||
|
||||
if (offset <= AC_EXP_PARAM_OFFSET_31) {
|
||||
/* The input is loaded from parameter memory. */
|
||||
ps_input_cntl |= S_028644_OFFSET(offset);
|
||||
} else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
|
||||
if (offset == AC_EXP_PARAM_UNDEFINED) {
|
||||
/* This can happen with depth-only rendering. */
|
||||
offset = 0;
|
||||
} else {
|
||||
/* The input is a DEFAULT_VAL constant. */
|
||||
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
|
||||
offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
|
||||
offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
|
||||
}
|
||||
|
||||
ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset);
|
||||
if (offset <= AC_EXP_PARAM_OFFSET_31) {
|
||||
/* The input is loaded from parameter memory. */
|
||||
ps_input_cntl |= S_028644_OFFSET(offset);
|
||||
} else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
|
||||
if (offset == AC_EXP_PARAM_UNDEFINED) {
|
||||
/* This can happen with depth-only rendering. */
|
||||
offset = 0;
|
||||
} else {
|
||||
/* The input is a DEFAULT_VAL constant. */
|
||||
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
|
||||
offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
|
||||
offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
|
||||
}
|
||||
break;
|
||||
|
||||
ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset);
|
||||
}
|
||||
} else {
|
||||
/* VS output not found. */
|
||||
if (semantic == VARYING_SLOT_PRIMITIVE_ID) {
|
||||
/* PrimID is written after the last output when HW VS is used. */
|
||||
ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[vsinfo->num_outputs]);
|
||||
} else if (!G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
|
||||
/* No corresponding output found, load defaults into input.
|
||||
* Don't set any other bits.
|
||||
* (FLAT_SHADE=1 completely changes behavior) */
|
||||
ps_input_cntl = S_028644_OFFSET(0x20);
|
||||
/* D3D 9 behaviour. GL is undefined */
|
||||
if (semantic == VARYING_SLOT_COL0)
|
||||
ps_input_cntl |= S_028644_DEFAULT_VAL(3);
|
||||
}
|
||||
}
|
||||
|
||||
if (j == vsinfo->num_outputs && semantic == VARYING_SLOT_PRIMITIVE_ID)
|
||||
/* PrimID is written after the last output when HW VS is used. */
|
||||
ps_input_cntl |= S_028644_OFFSET(vs->info.vs_output_param_offset[vsinfo->num_outputs]);
|
||||
else if (j == vsinfo->num_outputs && !G_028644_PT_SPRITE_TEX(ps_input_cntl)) {
|
||||
/* No corresponding output found, load defaults into input.
|
||||
* Don't set any other bits.
|
||||
* (FLAT_SHADE=1 completely changes behavior) */
|
||||
ps_input_cntl = S_028644_OFFSET(0x20);
|
||||
/* D3D 9 behaviour. GL is undefined */
|
||||
if (semantic == VARYING_SLOT_COL0)
|
||||
ps_input_cntl |= S_028644_DEFAULT_VAL(3);
|
||||
}
|
||||
return ps_input_cntl;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue