broadcom/compiler: skip unnecessary unifa writes
If a new UBO load happens to read exactly at the offset right after the previous UBO load (something that is fairly common, for example when reading a matrix), we can skip the unifa write (with its 3 delay slots) and just continue to call ldunifa to continue reading consecutive addresses. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9128>
This commit is contained in:
parent
e1cf2406da
commit
54c17e45ae
|
@ -2589,17 +2589,34 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
if (c->key->environment == V3D_ENVIRONMENT_OPENGL)
|
||||
index++;
|
||||
|
||||
struct qreg base_offset =
|
||||
vir_uniform(c, QUNIFORM_UBO_ADDR,
|
||||
v3d_unit_data_create(index, const_offset));
|
||||
const_offset = 0;
|
||||
|
||||
struct qreg unifa = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
|
||||
if (!dynamic_src) {
|
||||
vir_MOV_dest(c, unifa, base_offset);
|
||||
/* We can only keep track of the last unifa address we used with
|
||||
* constant offset loads.
|
||||
*/
|
||||
bool skip_unifa = false;
|
||||
if (dynamic_src) {
|
||||
c->last_unifa_block = NULL;
|
||||
} else if (c->cur_block == c->last_unifa_block &&
|
||||
c->last_unifa_index == index &&
|
||||
c->last_unifa_offset == const_offset) {
|
||||
skip_unifa = true;
|
||||
} else {
|
||||
vir_ADD_dest(c, unifa, base_offset,
|
||||
ntq_get_src(c, instr->src[1], 0));
|
||||
c->last_unifa_block = c->cur_block;
|
||||
c->last_unifa_index = index;
|
||||
c->last_unifa_offset = const_offset;
|
||||
}
|
||||
|
||||
if (!skip_unifa) {
|
||||
struct qreg base_offset =
|
||||
vir_uniform(c, QUNIFORM_UBO_ADDR,
|
||||
v3d_unit_data_create(index, const_offset));
|
||||
|
||||
struct qreg unifa = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
|
||||
if (!dynamic_src) {
|
||||
vir_MOV_dest(c, unifa, base_offset);
|
||||
} else {
|
||||
vir_ADD_dest(c, unifa, base_offset,
|
||||
ntq_get_src(c, instr->src[1], 0));
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < nir_intrinsic_dest_components(instr); i++) {
|
||||
|
@ -2608,6 +2625,7 @@ ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
ldunifa->qpu.sig.ldunifa = true;
|
||||
struct qreg data = vir_emit_def(c, ldunifa);
|
||||
ntq_store_dest(c, &instr->dest, i, vir_MOV(c, data));
|
||||
c->last_unifa_offset += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -650,6 +650,15 @@ struct v3d_compile {
|
|||
*/
|
||||
bool disable_ldunif_opt;
|
||||
|
||||
/* Last UBO index and offset used with a unifa/ldunifa sequence and the
|
||||
* block where it was emitted. This is used to skip unifa writes (and
|
||||
* their 3 delay slot) when the next UBO load reads right after the
|
||||
* previous one in the same block.
|
||||
*/
|
||||
struct qblock *last_unifa_block;
|
||||
int32_t last_unifa_index;
|
||||
uint32_t last_unifa_offset;
|
||||
|
||||
/* State for whether we're executing on each channel currently. 0 if
|
||||
* yes, otherwise a block number + 1 that the channel jumped to.
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue