broadcom/compiler: skip unnecessary unifa writes

If a new UBO load happens to read exactly at the offset right after the
previous UBO load (something that is fairly common, for example when
reading a matrix), we can skip the unifa write (with its 3 delay slots)
and just continue to call ldunifa to continue reading consecutive addresses.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9128>
This commit is contained in:
Iago Toral Quiroga 2021-02-18 08:32:13 +01:00 committed by Marge Bot
parent e1cf2406da
commit 54c17e45ae
2 changed files with 37 additions and 10 deletions

View File

@ -2589,17 +2589,34 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
if (c->key->environment == V3D_ENVIRONMENT_OPENGL)
index++;
struct qreg base_offset =
vir_uniform(c, QUNIFORM_UBO_ADDR,
v3d_unit_data_create(index, const_offset));
const_offset = 0;
struct qreg unifa = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
if (!dynamic_src) {
vir_MOV_dest(c, unifa, base_offset);
/* We can only keep track of the last unifa address we used with
* constant offset loads.
*/
bool skip_unifa = false;
if (dynamic_src) {
c->last_unifa_block = NULL;
} else if (c->cur_block == c->last_unifa_block &&
c->last_unifa_index == index &&
c->last_unifa_offset == const_offset) {
skip_unifa = true;
} else {
vir_ADD_dest(c, unifa, base_offset,
ntq_get_src(c, instr->src[1], 0));
c->last_unifa_block = c->cur_block;
c->last_unifa_index = index;
c->last_unifa_offset = const_offset;
}
if (!skip_unifa) {
struct qreg base_offset =
vir_uniform(c, QUNIFORM_UBO_ADDR,
v3d_unit_data_create(index, const_offset));
struct qreg unifa = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
if (!dynamic_src) {
vir_MOV_dest(c, unifa, base_offset);
} else {
vir_ADD_dest(c, unifa, base_offset,
ntq_get_src(c, instr->src[1], 0));
}
}
for (uint32_t i = 0; i < nir_intrinsic_dest_components(instr); i++) {
@ -2608,6 +2625,7 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
ldunifa->qpu.sig.ldunifa = true;
struct qreg data = vir_emit_def(c, ldunifa);
ntq_store_dest(c, &instr->dest, i, vir_MOV(c, data));
c->last_unifa_offset += 4;
}
}

View File

@ -650,6 +650,15 @@ struct v3d_compile {
*/
bool disable_ldunif_opt;
/* Last UBO index and offset used with a unifa/ldunifa sequence and the
* block where it was emitted. This is used to skip unifa writes (and
* their 3 delay slot) when the next UBO load reads right after the
* previous one in the same block.
*/
struct qblock *last_unifa_block;
int32_t last_unifa_index;
uint32_t last_unifa_offset;
/* State for whether we're executing on each channel currently. 0 if
* yes, otherwise a block number + 1 that the channel jumped to.
*/