broadcom/compiler: fix scoreboard locking checks

According to the spec the hardware locks the scoreboard on the first
or last thread switch (selected via shader state) and any TLB accesses
executed before this are not synchronized by hardware.

This change updates the logic to ensure we respect this requirement
and that we don't assume that the lock is acquired automatically
on the first TLB access, which is not valid at least since V3D 4.1+.

Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13910>
This commit is contained in:
Iago Toral Quiroga 2021-11-22 11:27:41 +01:00 committed by Marge Bot
parent bd7584c16b
commit 7fec4f4135
1 changed files with 31 additions and 16 deletions

View File

@ -492,7 +492,8 @@ struct choose_scoreboard {
int last_thrsw_tick; int last_thrsw_tick;
int last_branch_tick; int last_branch_tick;
int last_setmsf_tick; int last_setmsf_tick;
bool tlb_locked; bool first_thrsw_emitted;
bool last_thrsw_emitted;
bool fixup_ldvary; bool fixup_ldvary;
int ldvary_count; int ldvary_count;
}; };
@ -576,10 +577,26 @@ writes_too_soon_after_write(const struct v3d_device_info *devinfo,
} }
static bool static bool
pixel_scoreboard_too_soon(struct choose_scoreboard *scoreboard, scoreboard_is_locked(struct choose_scoreboard *scoreboard,
bool lock_scoreboard_on_first_thrsw)
{
if (lock_scoreboard_on_first_thrsw) {
return scoreboard->first_thrsw_emitted &&
scoreboard->tick - scoreboard->last_thrsw_tick >= 3;
}
return scoreboard->last_thrsw_emitted &&
scoreboard->tick - scoreboard->last_thrsw_tick >= 3;
}
static bool
pixel_scoreboard_too_soon(struct v3d_compile *c,
struct choose_scoreboard *scoreboard,
const struct v3d_qpu_instr *inst) const struct v3d_qpu_instr *inst)
{ {
return (scoreboard->tick == 0 && qpu_inst_is_tlb(inst)); return qpu_inst_is_tlb(inst) &&
!scoreboard_is_locked(scoreboard,
c->lock_scoreboard_on_first_thrsw);
} }
static bool static bool
@ -1080,12 +1097,12 @@ retry:
if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst)) if (writes_too_soon_after_write(c->devinfo, scoreboard, n->inst))
continue; continue;
/* "A scoreboard wait must not occur in the first two /* "Before doing a TLB access a scoreboard wait must have been
* instructions of a fragment shader. This is either the * done. This happens either on the first or last thread
* explicit Wait for Scoreboard signal or an implicit wait * switch, depending on a setting (scb_wait_on_first_thrsw) in
* with the first tile-buffer read or write instruction." * the shader state."
*/ */
if (pixel_scoreboard_too_soon(scoreboard, inst)) if (pixel_scoreboard_too_soon(c, scoreboard, inst))
continue; continue;
/* ldunif and ldvary both write r5, but ldunif does so a tick /* ldunif and ldvary both write r5, but ldunif does so a tick
@ -1158,12 +1175,10 @@ retry:
continue; continue;
} }
/* Don't merge in something that will lock the TLB. /* Don't merge TLB instructions before we have acquired
* Hopwefully what we have in inst will release some * the scoreboard lock.
* other instructions, allowing us to delay the
* TLB-locking instruction until later.
*/ */
if (!scoreboard->tlb_locked && qpu_inst_is_tlb(inst)) if (pixel_scoreboard_too_soon(c, scoreboard, inst))
continue; continue;
/* When we succesfully pair up an ldvary we then try /* When we succesfully pair up an ldvary we then try
@ -1300,9 +1315,6 @@ update_scoreboard_for_chosen(struct choose_scoreboard *scoreboard,
if (inst->sig.ldvary) if (inst->sig.ldvary)
scoreboard->last_ldvary_tick = scoreboard->tick; scoreboard->last_ldvary_tick = scoreboard->tick;
if (qpu_inst_is_tlb(inst))
scoreboard->tlb_locked = true;
} }
static void static void
@ -1787,6 +1799,8 @@ emit_thrsw(struct v3d_compile *c,
merge_inst = inst; merge_inst = inst;
} }
scoreboard->first_thrsw_emitted = true;
/* If we're emitting the last THRSW (other than program end), then /* If we're emitting the last THRSW (other than program end), then
* signal that to the HW by emitting two THRSWs in a row. * signal that to the HW by emitting two THRSWs in a row.
*/ */
@ -1798,6 +1812,7 @@ emit_thrsw(struct v3d_compile *c,
struct qinst *second_inst = struct qinst *second_inst =
(struct qinst *)merge_inst->link.next; (struct qinst *)merge_inst->link.next;
second_inst->qpu.sig.thrsw = true; second_inst->qpu.sig.thrsw = true;
scoreboard->last_thrsw_emitted = true;
} }
/* Make sure the thread end executes within the program lifespan */ /* Make sure the thread end executes within the program lifespan */