ir3: Use (ss) for instructions writing shared regs
The blob uses *both* nops and (ss). It turns out that in some rare cases the hardware does take more than 6 cycles, at least for movmsk, but adding nops is unnecessary. I believe the extra nops are only there due to the immaturity of the blob's implementation of subgroup ops, so we don't have to copy them - just handle shared reg producers the same as SFU instructions. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14246>
This commit is contained in:
parent
d45678cac4
commit
cb45120556
|
@ -1670,6 +1670,10 @@ is_local_mem_load(struct ir3_instruction *instr)
|
|||
static inline bool
|
||||
is_ss_producer(struct ir3_instruction *instr)
|
||||
{
|
||||
foreach_dst (dst, instr) {
|
||||
if (dst->flags & IR3_REG_SHARED)
|
||||
return true;
|
||||
}
|
||||
return is_sfu(instr) || is_local_mem_load(instr);
|
||||
}
|
||||
|
||||
|
@ -1687,7 +1691,13 @@ soft_ss_delay(struct ir3_instruction *instr)
|
|||
* and so on. Not quite sure where it tapers out (ie. how many warps share an
|
||||
* SFU unit). But 10 seems like a reasonable # to choose:
|
||||
*/
|
||||
return 10;
|
||||
if (is_sfu(instr) || is_local_mem_load(instr))
|
||||
return 10;
|
||||
|
||||
/* The blob adds 6 nops between shared producers and consumers, and before we
|
||||
* used (ss) this was sufficient in most cases.
|
||||
*/
|
||||
return 6;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
|
|
|
@ -76,7 +76,7 @@ ir3_delayslots(struct ir3_instruction *assigner,
|
|||
|
||||
/* assigner must be alu: */
|
||||
if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
|
||||
is_mem(consumer) || (assigner->dsts[0]->flags & IR3_REG_SHARED)) {
|
||||
is_mem(consumer)) {
|
||||
return 6;
|
||||
} else {
|
||||
/* In mergedregs mode, there is an extra 2-cycle penalty when half of
|
||||
|
|
|
@ -255,6 +255,11 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
|
|||
if (is_sfu(n))
|
||||
regmask_set(&state->needs_ss, n->dsts[0]);
|
||||
|
||||
foreach_dst (dst, n) {
|
||||
if (dst->flags & IR3_REG_SHARED)
|
||||
regmask_set(&state->needs_ss, dst);
|
||||
}
|
||||
|
||||
if (is_tex_or_prefetch(n)) {
|
||||
regmask_set(&state->needs_sy, n->dsts[0]);
|
||||
if (n->opc == OPC_META_TEX_PREFETCH)
|
||||
|
|
Loading…
Reference in New Issue