broadcom/compiler: rework simultaneous peripheral access checks

This was not quite correct in that our checks for the allowed cases
were not checking that there were no other peripheral access other
than the ones allowed.

For example, we allowed  wrtmuc signal and TMU write other than
TMUC, and we also allowed TMU read and VPM read/write. But we
cannot allow wrtmuc with TMU write other than TMUC and at the
same time a VPM write for example, so we can't just check if we
have a combination of allowed peripherals, we still need to check
that those are the only ones in use by the combined instructions.

Another example is that even if we allow a TMU write (other than TMUC)
with a wrtmuc signal, the resulting instruction must still have just
one TMU write other than TMUC, but we were allowing the merge if one
instruction signaled wrtmuc and the other wrote to tmu other than tmuc
without testing if the combined result would have 2 tmu writes.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13527>
This commit is contained in:
Iago Toral Quiroga 2021-10-26 11:30:15 +02:00 committed by Marge Bot
parent 2c61d89d36
commit 3fbd6662b7
3 changed files with 62 additions and 41 deletions

View File

@ -648,48 +648,56 @@ get_instruction_priority(const struct v3d_device_info *devinfo,
return baseline_score;
}
static bool
qpu_magic_waddr_is_periph(const struct v3d_device_info *devinfo,
enum v3d_qpu_waddr waddr)
{
return (v3d_qpu_magic_waddr_is_tmu(devinfo, waddr) ||
v3d_qpu_magic_waddr_is_sfu(waddr) ||
v3d_qpu_magic_waddr_is_tlb(waddr) ||
v3d_qpu_magic_waddr_is_vpm(waddr) ||
v3d_qpu_magic_waddr_is_tsy(waddr));
}
enum {
V3D_PERIPHERAL_VPM_READ = (1 << 0),
V3D_PERIPHERAL_VPM_WRITE = (1 << 1),
V3D_PERIPHERAL_VPM_WAIT = (1 << 2),
V3D_PERIPHERAL_SFU = (1 << 3),
V3D_PERIPHERAL_TMU_WRITE = (1 << 4),
V3D_PERIPHERAL_TMU_READ = (1 << 5),
V3D_PERIPHERAL_TMU_WAIT = (1 << 6),
V3D_PERIPHERAL_TMU_WRTMUC_SIG = (1 << 7),
V3D_PERIPHERAL_TSY = (1 << 8),
V3D_PERIPHERAL_TLB = (1 << 9),
};
static bool
qpu_accesses_peripheral(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *inst)
static uint32_t
qpu_peripherals(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *inst)
{
if (v3d_qpu_uses_vpm(inst))
return true;
uint32_t result = 0;
if (v3d_qpu_reads_vpm(inst))
result |= V3D_PERIPHERAL_VPM_READ;
if (v3d_qpu_writes_vpm(inst))
result |= V3D_PERIPHERAL_VPM_WRITE;
if (v3d_qpu_waits_vpm(inst))
result |= V3D_PERIPHERAL_VPM_WAIT;
if (v3d_qpu_writes_tmu(devinfo, inst))
result |= V3D_PERIPHERAL_TMU_WRITE;
if (inst->sig.ldtmu)
result |= V3D_PERIPHERAL_TMU_READ;
if (inst->sig.wrtmuc)
result |= V3D_PERIPHERAL_TMU_WRTMUC_SIG;
if (v3d_qpu_uses_sfu(inst))
return true;
result |= V3D_PERIPHERAL_SFU;
if (v3d_qpu_uses_tlb(inst))
result |= V3D_PERIPHERAL_TLB;
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
if (inst->alu.add.op != V3D_QPU_A_NOP &&
inst->alu.add.magic_write &&
qpu_magic_waddr_is_periph(devinfo, inst->alu.add.waddr)) {
return true;
v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr)) {
result |= V3D_PERIPHERAL_TSY;
}
if (inst->alu.add.op == V3D_QPU_A_TMUWT)
return true;
if (inst->alu.mul.op != V3D_QPU_M_NOP &&
inst->alu.mul.magic_write &&
qpu_magic_waddr_is_periph(devinfo, inst->alu.mul.waddr)) {
return true;
}
result |= V3D_PERIPHERAL_TMU_WAIT;
}
return (inst->sig.ldvpm ||
inst->sig.ldtmu ||
inst->sig.ldtlb ||
inst->sig.ldtlbu ||
inst->sig.wrtmuc);
return result;
}
static bool
@ -697,26 +705,38 @@ qpu_compatible_peripheral_access(const struct v3d_device_info *devinfo,
const struct v3d_qpu_instr *a,
const struct v3d_qpu_instr *b)
{
const bool a_uses_peripheral = qpu_accesses_peripheral(devinfo, a);
const bool b_uses_peripheral = qpu_accesses_peripheral(devinfo, b);
const uint32_t a_peripherals = qpu_peripherals(devinfo, a);
const uint32_t b_peripherals = qpu_peripherals(devinfo, b);
/* We can always do one peripheral access per instruction. */
if (!a_uses_peripheral || !b_uses_peripheral)
if (util_bitcount(a_peripherals) + util_bitcount(b_peripherals) <= 1)
return true;
if (devinfo->ver < 41)
return false;
/* V3D 4.1 and later allow TMU read along with a VPM read or write, and
* WRTMUC with a TMU magic register write (other than tmuc).
/* V3D 4.1+ allow WRTMUC signal with TMU register write (other than
* tmuc).
*/
if ((a->sig.ldtmu && v3d_qpu_reads_or_writes_vpm(b)) ||
(b->sig.ldtmu && v3d_qpu_reads_or_writes_vpm(a))) {
return true;
if (a_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG &&
b_peripherals == V3D_PERIPHERAL_TMU_WRITE) {
return v3d_qpu_writes_tmu_not_tmuc(devinfo, b);
}
if ((a->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(devinfo, b)) ||
(b->sig.wrtmuc && v3d_qpu_writes_tmu_not_tmuc(devinfo, a))) {
if (a_peripherals == V3D_PERIPHERAL_TMU_WRITE &&
b_peripherals == V3D_PERIPHERAL_TMU_WRTMUC_SIG) {
return v3d_qpu_writes_tmu_not_tmuc(devinfo, a);
}
/* V3D 4.1+ allows TMU read with VPM read/write. */
if (a_peripherals == V3D_PERIPHERAL_TMU_READ &&
(b_peripherals == V3D_PERIPHERAL_VPM_READ ||
b_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
return true;
}
if (b_peripherals == V3D_PERIPHERAL_TMU_READ &&
(a_peripherals == V3D_PERIPHERAL_VPM_READ ||
a_peripherals == V3D_PERIPHERAL_VPM_WRITE)) {
return true;
}

View File

@ -778,7 +778,7 @@ v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo,
return false;
}
static bool
bool
v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)
{
return inst->type == V3D_QPU_INSTR_TYPE_ALU &&

View File

@ -468,6 +468,7 @@ bool v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
bool v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;