broadcom/compiler: don't emit redundant ldunif
If we emit a new uniform and that uniform has already been emitted in the same block we can just reuse that. There is a balancing game here between reducing ldunif instructions and not increasing register pressure too much though, so we put a limit to how far back we are willing to look for a previous definition of the uniform. Based on shader-db results, 20 instructions produces best results. total instructions in shared programs: 14928266 -> 14907432 (-0.14%) instructions in affected programs: 6431841 -> 6411007 (-0.32%) helped: 15270 HURT: 10772 Instructions are helped. total uniforms in shared programs: 3944672 -> 3840276 (-2.65%) uniforms in affected programs: 1827184 -> 1722788 (-5.71%) helped: 30423 HURT: 845 Uniforms are helped. total inst-and-stalls in shared programs: 14957813 -> 14936873 (-0.14%) inst-and-stalls in affected programs: 6475349 -> 6454409 (-0.32%) helped: 15287 HURT: 10852 Inst-and-stalls are helped. v2 (Eric): - consider ldunifrf too - check that no other instruction writes to the register Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Reviewed-by: Eric Anholt <eric@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9077>
This commit is contained in:
parent
8dd54778fa
commit
14af7b3085
|
@ -643,6 +643,13 @@ struct v3d_compile {
|
|||
*/
|
||||
bool disable_tmu_pipelining;
|
||||
|
||||
/* Emits ldunif for each new uniform, even if the uniform was already
|
||||
* emitted in the same block. Useful to compile shaders with high
|
||||
* register pressure or to disable the optimization during uniform
|
||||
* spills.
|
||||
*/
|
||||
bool disable_ldunif_opt;
|
||||
|
||||
/* State for whether we're executing on each channel currently. 0 if
|
||||
* yes, otherwise a block number + 1 that the channel jumped to.
|
||||
*/
|
||||
|
|
|
@ -1403,14 +1403,67 @@ vir_get_uniform_index(struct v3d_compile *c,
|
|||
return uniform;
|
||||
}
|
||||
|
||||
/* Looks back into the current block to find the ldunif that wrote the uniform
|
||||
* at the requested index. If it finds it, it returns true and writes the
|
||||
* destination register of the ldunif instruction to 'unif'.
|
||||
*
|
||||
* This can impact register pressure and end up leading to worse code, so we
|
||||
* limit the number of instructions we are willing to look back through to
|
||||
* strike a good balance.
|
||||
*/
|
||||
static bool
|
||||
try_opt_ldunif(struct v3d_compile *c, uint32_t index, struct qreg *unif)
|
||||
{
|
||||
uint32_t count = 20;
|
||||
struct qinst *prev_inst = NULL;
|
||||
vir_for_each_inst_rev(inst, c->cur_block) {
|
||||
if ((inst->qpu.sig.ldunif || inst->qpu.sig.ldunifrf) &&
|
||||
inst->uniform == index) {
|
||||
prev_inst = inst;
|
||||
break;
|
||||
}
|
||||
|
||||
if (--count == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!prev_inst)
|
||||
return false;
|
||||
|
||||
|
||||
list_for_each_entry_from(struct qinst, inst, prev_inst->link.next,
|
||||
&c->cur_block->instructions, link) {
|
||||
if (inst->dst.file == prev_inst->dst.file &&
|
||||
inst->dst.index == prev_inst->dst.index) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
*unif = prev_inst->dst;
|
||||
return true;
|
||||
}
|
||||
|
||||
struct qreg
|
||||
vir_uniform(struct v3d_compile *c,
|
||||
enum quniform_contents contents,
|
||||
uint32_t data)
|
||||
{
|
||||
const int num_uniforms = c->num_uniforms;
|
||||
const int index = vir_get_uniform_index(c, contents, data);
|
||||
|
||||
/* If this is not the first time we see this uniform try to reuse the
|
||||
* result of the last ldunif that loaded it.
|
||||
*/
|
||||
const bool is_new_uniform = num_uniforms != c->num_uniforms;
|
||||
if (!is_new_uniform && !c->disable_ldunif_opt) {
|
||||
struct qreg ldunif_dst;
|
||||
if (try_opt_ldunif(c, index, &ldunif_dst))
|
||||
return ldunif_dst;
|
||||
}
|
||||
|
||||
struct qinst *inst = vir_NOP(c);
|
||||
inst->qpu.sig.ldunif = true;
|
||||
inst->uniform = vir_get_uniform_index(c, contents, data);
|
||||
inst->uniform = index;
|
||||
inst->dst = vir_get_temp(c);
|
||||
c->defs[inst->dst.index] = inst;
|
||||
return inst->dst;
|
||||
|
|
|
@ -259,6 +259,10 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
|
|||
uniform_index = orig_unif->uniform;
|
||||
}
|
||||
|
||||
/* We must disable the ldunif optimization if we are spilling uniforms */
|
||||
bool had_disable_ldunif_opt = c->disable_ldunif_opt;
|
||||
c->disable_ldunif_opt = true;
|
||||
|
||||
struct qinst *start_of_tmu_sequence = NULL;
|
||||
struct qinst *postponed_spill = NULL;
|
||||
vir_for_each_block(block, c) {
|
||||
|
@ -360,6 +364,8 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
|
|||
*/
|
||||
for (int i = start_num_temps; i < c->num_temps; i++)
|
||||
BITSET_CLEAR(c->spillable, i);
|
||||
|
||||
c->disable_ldunif_opt = had_disable_ldunif_opt;
|
||||
}
|
||||
|
||||
struct v3d_ra_select_callback_data {
|
||||
|
|
Loading…
Reference in New Issue