ir3/delay: Ignore earlier definitions to the same register

We have a situation in some skia shaders like:

add.f r0.x, ...
(rpt2)nop
mul.f ..., r0.x
sam (xyzw) r0.x, ...
rcp ..., r0.x

Notice that rcp uses the result of the sam instruction, not the add.f,
but we didn't keep track of which instructions kill the sources in
ir3_delay, so we'd add an extra nop, resulting in a disagreement betwen
ir3_delay and the scheduling graph. Since postsched is correct, fix
ir3_delay. This only results in some very slight shader-db changes but
keeps the next commit from changing things.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13722>
This commit is contained in:
Connor Abbott 2021-11-08 17:20:39 +01:00 committed by Marge Bot
parent a54e7baa65
commit 140e117f2b
3 changed files with 43 additions and 4 deletions

View File

@ -2341,6 +2341,20 @@ regmask_set(regmask_t *regmask, struct ir3_register *reg)
}
}
static inline void
regmask_clear(regmask_t *regmask, struct ir3_register *reg)
{
bool half = reg->flags & IR3_REG_HALF;
if (reg->flags & IR3_REG_RELATIV) {
for (unsigned i = 0; i < reg->size; i++)
__regmask_clear(regmask, half, reg->array.base + i);
} else {
for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++)
if (mask & 1)
__regmask_clear(regmask, half, n);
}
}
static inline bool
regmask_get(regmask_t *regmask, struct ir3_register *reg)
{

View File

@ -237,8 +237,11 @@ delay_calc_srcn_postra(struct ir3_instruction *assigner,
static unsigned
delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
struct ir3_instruction *consumer, unsigned distance,
bool soft, bool pred, bool mergedregs)
bool soft, bool pred, regmask_t *in_mask, bool mergedregs)
{
regmask_t mask;
memcpy(&mask, in_mask, sizeof(mask));
unsigned delay = 0;
/* Search backwards starting at the instruction before start, unless it's
* NULL then search backwards from the block end.
@ -261,6 +264,8 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
foreach_dst_n (dst, dst_n, assigner) {
if (dst->wrmask == 0)
continue;
if (!regmask_get(&mask, dst))
continue;
foreach_src_n (src, src_n, consumer) {
if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST))
continue;
@ -269,6 +274,7 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
assigner, consumer, dst_n, src_n, soft, mergedregs);
new_delay = MAX2(new_delay, src_delay);
}
regmask_clear(&mask, dst);
}
new_delay = new_delay > distance ? new_delay - distance : 0;
@ -298,7 +304,7 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
for (unsigned i = 0; i < block->predecessors_count; i++) {
struct ir3_block *pred = block->predecessors[i];
unsigned pred_delay = delay_calc_postra(pred, NULL, consumer, distance,
soft, pred, mergedregs);
soft, pred, &mask, mergedregs);
delay = MAX2(delay, pred_delay);
}
@ -323,7 +329,14 @@ unsigned
ir3_delay_calc_postra(struct ir3_block *block, struct ir3_instruction *instr,
bool soft, bool mergedregs)
{
return delay_calc_postra(block, NULL, instr, 0, soft, false, mergedregs);
regmask_t mask;
regmask_init(&mask, mergedregs);
foreach_src (src, instr) {
if (!(src->flags & (IR3_REG_IMMED | IR3_REG_CONST)))
regmask_set(&mask, src);
}
return delay_calc_postra(block, NULL, instr, 0, soft, false, &mask, mergedregs);
}
/**
@ -334,7 +347,14 @@ unsigned
ir3_delay_calc_exact(struct ir3_block *block, struct ir3_instruction *instr,
bool mergedregs)
{
return delay_calc_postra(block, NULL, instr, 0, false, true, mergedregs);
regmask_t mask;
regmask_init(&mask, mergedregs);
foreach_src (src, instr) {
if (!(src->flags & (IR3_REG_IMMED | IR3_REG_CONST)))
regmask_set(&mask, src);
}
return delay_calc_postra(block, NULL, instr, 0, false, true, &mask, mergedregs);
}
/**

View File

@ -61,6 +61,11 @@ static const struct test {
mov.f32f32 r0.z, c0.z
mad.f32 r0.x, r0.x, r0.y, r0.z
),
TEST(0,
mov.f32f32 r0.x, c0.x
rcp r0.x, r0.y
add.f r0.x, r0.x, c0.x
),
TEST(2,
mov.f32f32 r0.x, c0.x
mov.f32f32 r0.y, c0.y