broadcom/compiler: don't emit redundant ldunif

If we emit a new uniform and that uniform has already been emitted
in the same block we can just reuse that.

There is a balancing game here between reducing ldunif instructions
and not increasing register pressure too much though, so we put
a limit to how far back we are willing to look for a previous
definition of the uniform. Based on shader-db results, 20 instructions
produces best results.

total instructions in shared programs: 14928266 -> 14907432 (-0.14%)
instructions in affected programs: 6431841 -> 6411007 (-0.32%)
helped: 15270
HURT: 10772
Instructions are helped.

total uniforms in shared programs: 3944672 -> 3840276 (-2.65%)
uniforms in affected programs: 1827184 -> 1722788 (-5.71%)
helped: 30423
HURT: 845
Uniforms are helped.

total inst-and-stalls in shared programs: 14957813 -> 14936873 (-0.14%)
inst-and-stalls in affected programs: 6475349 -> 6454409 (-0.32%)
helped: 15287
HURT: 10852
Inst-and-stalls are helped.

v2 (Eric):
 - consider ldunifrf too
 - check that no other instruction writes to the register

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9077>
This commit is contained in:
Iago Toral Quiroga 2021-02-16 09:07:04 +01:00
parent 8dd54778fa
commit 14af7b3085
3 changed files with 67 additions and 1 deletions

View File

@ -643,6 +643,13 @@ struct v3d_compile {
*/
bool disable_tmu_pipelining;
/* Emits ldunif for each new uniform, even if the uniform was already
* emitted in the same block. Useful to compile shaders with high
* register pressure or to disable the optimization during uniform
* spills.
*/
bool disable_ldunif_opt;
/* State for whether we're executing on each channel currently. 0 if
* yes, otherwise a block number + 1 that the channel jumped to.
*/

View File

@ -1403,14 +1403,67 @@ vir_get_uniform_index(struct v3d_compile *c,
return uniform;
}
/* Looks back into the current block to find the ldunif that wrote the uniform
* at the requested index. If it finds it, it returns true and writes the
* destination register of the ldunif instruction to 'unif'.
*
* This can impact register pressure and end up leading to worse code, so we
* limit the number of instructions we are willing to look back through to
* strike a good balance.
*/
static bool
try_opt_ldunif(struct v3d_compile *c, uint32_t index, struct qreg *unif)
{
uint32_t count = 20;
struct qinst *prev_inst = NULL;
vir_for_each_inst_rev(inst, c->cur_block) {
if ((inst->qpu.sig.ldunif || inst->qpu.sig.ldunifrf) &&
inst->uniform == index) {
prev_inst = inst;
break;
}
if (--count == 0)
break;
}
if (!prev_inst)
return false;
list_for_each_entry_from(struct qinst, inst, prev_inst->link.next,
&c->cur_block->instructions, link) {
if (inst->dst.file == prev_inst->dst.file &&
inst->dst.index == prev_inst->dst.index) {
return false;
}
}
*unif = prev_inst->dst;
return true;
}
struct qreg
vir_uniform(struct v3d_compile *c,
enum quniform_contents contents,
uint32_t data)
{
const int num_uniforms = c->num_uniforms;
const int index = vir_get_uniform_index(c, contents, data);
/* If this is not the first time we see this uniform try to reuse the
* result of the last ldunif that loaded it.
*/
const bool is_new_uniform = num_uniforms != c->num_uniforms;
if (!is_new_uniform && !c->disable_ldunif_opt) {
struct qreg ldunif_dst;
if (try_opt_ldunif(c, index, &ldunif_dst))
return ldunif_dst;
}
struct qinst *inst = vir_NOP(c);
inst->qpu.sig.ldunif = true;
inst->uniform = vir_get_uniform_index(c, contents, data);
inst->uniform = index;
inst->dst = vir_get_temp(c);
c->defs[inst->dst.index] = inst;
return inst->dst;

View File

@ -259,6 +259,10 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
uniform_index = orig_unif->uniform;
}
/* We must disable the ldunif optimization if we are spilling uniforms */
bool had_disable_ldunif_opt = c->disable_ldunif_opt;
c->disable_ldunif_opt = true;
struct qinst *start_of_tmu_sequence = NULL;
struct qinst *postponed_spill = NULL;
vir_for_each_block(block, c) {
@ -360,6 +364,8 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
*/
for (int i = start_num_temps; i < c->num_temps; i++)
BITSET_CLEAR(c->spillable, i);
c->disable_ldunif_opt = had_disable_ldunif_opt;
}
struct v3d_ra_select_callback_data {