ir3/delay: Ignore earlier definitions to the same register
We have a situation in some skia shaders like: add.f r0.x, ... (rpt2)nop mul.f ..., r0.x sam (xyzw) r0.x, ... rcp ..., r0.x Notice that rcp uses the result of the sam instruction, not the add.f, but we didn't keep track of which instructions kill the sources in ir3_delay, so we'd add an extra nop, resulting in a disagreement betwen ir3_delay and the scheduling graph. Since postsched is correct, fix ir3_delay. This only results in some very slight shader-db changes but keeps the next commit from changing things. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13722>
This commit is contained in:
parent
a54e7baa65
commit
140e117f2b
|
@ -2341,6 +2341,20 @@ regmask_set(regmask_t *regmask, struct ir3_register *reg)
|
|||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
regmask_clear(regmask_t *regmask, struct ir3_register *reg)
|
||||
{
|
||||
bool half = reg->flags & IR3_REG_HALF;
|
||||
if (reg->flags & IR3_REG_RELATIV) {
|
||||
for (unsigned i = 0; i < reg->size; i++)
|
||||
__regmask_clear(regmask, half, reg->array.base + i);
|
||||
} else {
|
||||
for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++)
|
||||
if (mask & 1)
|
||||
__regmask_clear(regmask, half, n);
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool
|
||||
regmask_get(regmask_t *regmask, struct ir3_register *reg)
|
||||
{
|
||||
|
|
|
@ -237,8 +237,11 @@ delay_calc_srcn_postra(struct ir3_instruction *assigner,
|
|||
static unsigned
|
||||
delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
|
||||
struct ir3_instruction *consumer, unsigned distance,
|
||||
bool soft, bool pred, bool mergedregs)
|
||||
bool soft, bool pred, regmask_t *in_mask, bool mergedregs)
|
||||
{
|
||||
regmask_t mask;
|
||||
memcpy(&mask, in_mask, sizeof(mask));
|
||||
|
||||
unsigned delay = 0;
|
||||
/* Search backwards starting at the instruction before start, unless it's
|
||||
* NULL then search backwards from the block end.
|
||||
|
@ -261,6 +264,8 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
|
|||
foreach_dst_n (dst, dst_n, assigner) {
|
||||
if (dst->wrmask == 0)
|
||||
continue;
|
||||
if (!regmask_get(&mask, dst))
|
||||
continue;
|
||||
foreach_src_n (src, src_n, consumer) {
|
||||
if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST))
|
||||
continue;
|
||||
|
@ -269,6 +274,7 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
|
|||
assigner, consumer, dst_n, src_n, soft, mergedregs);
|
||||
new_delay = MAX2(new_delay, src_delay);
|
||||
}
|
||||
regmask_clear(&mask, dst);
|
||||
}
|
||||
|
||||
new_delay = new_delay > distance ? new_delay - distance : 0;
|
||||
|
@ -298,7 +304,7 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start,
|
|||
for (unsigned i = 0; i < block->predecessors_count; i++) {
|
||||
struct ir3_block *pred = block->predecessors[i];
|
||||
unsigned pred_delay = delay_calc_postra(pred, NULL, consumer, distance,
|
||||
soft, pred, mergedregs);
|
||||
soft, pred, &mask, mergedregs);
|
||||
delay = MAX2(delay, pred_delay);
|
||||
}
|
||||
|
||||
|
@ -323,7 +329,14 @@ unsigned
|
|||
ir3_delay_calc_postra(struct ir3_block *block, struct ir3_instruction *instr,
|
||||
bool soft, bool mergedregs)
|
||||
{
|
||||
return delay_calc_postra(block, NULL, instr, 0, soft, false, mergedregs);
|
||||
regmask_t mask;
|
||||
regmask_init(&mask, mergedregs);
|
||||
foreach_src (src, instr) {
|
||||
if (!(src->flags & (IR3_REG_IMMED | IR3_REG_CONST)))
|
||||
regmask_set(&mask, src);
|
||||
}
|
||||
|
||||
return delay_calc_postra(block, NULL, instr, 0, soft, false, &mask, mergedregs);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -334,7 +347,14 @@ unsigned
|
|||
ir3_delay_calc_exact(struct ir3_block *block, struct ir3_instruction *instr,
|
||||
bool mergedregs)
|
||||
{
|
||||
return delay_calc_postra(block, NULL, instr, 0, false, true, mergedregs);
|
||||
regmask_t mask;
|
||||
regmask_init(&mask, mergedregs);
|
||||
foreach_src (src, instr) {
|
||||
if (!(src->flags & (IR3_REG_IMMED | IR3_REG_CONST)))
|
||||
regmask_set(&mask, src);
|
||||
}
|
||||
|
||||
return delay_calc_postra(block, NULL, instr, 0, false, true, &mask, mergedregs);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -61,6 +61,11 @@ static const struct test {
|
|||
mov.f32f32 r0.z, c0.z
|
||||
mad.f32 r0.x, r0.x, r0.y, r0.z
|
||||
),
|
||||
TEST(0,
|
||||
mov.f32f32 r0.x, c0.x
|
||||
rcp r0.x, r0.y
|
||||
add.f r0.x, r0.x, c0.x
|
||||
),
|
||||
TEST(2,
|
||||
mov.f32f32 r0.x, c0.x
|
||||
mov.f32f32 r0.y, c0.y
|
||||
|
|
Loading…
Reference in New Issue