From 140e117f2b0c45439a913efe0a4fd1df0842fdfb Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Mon, 8 Nov 2021 17:20:39 +0100 Subject: [PATCH] ir3/delay: Ignore earlier definitions to the same register We have a situation in some skia shaders like: add.f r0.x, ... (rpt2)nop mul.f ..., r0.x sam (xyzw) r0.x, ... rcp ..., r0.x Notice that rcp uses the result of the sam instruction, not the add.f, but we didn't keep track of which instructions kill the sources in ir3_delay, so we'd add an extra nop, resulting in a disagreement betwen ir3_delay and the scheduling graph. Since postsched is correct, fix ir3_delay. This only results in some very slight shader-db changes but keeps the next commit from changing things. Part-of: --- src/freedreno/ir3/ir3.h | 14 ++++++++++++++ src/freedreno/ir3/ir3_delay.c | 28 ++++++++++++++++++++++++---- src/freedreno/ir3/tests/delay.c | 5 +++++ 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 0c3b54d7ba9..6e4e81270a2 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -2341,6 +2341,20 @@ regmask_set(regmask_t *regmask, struct ir3_register *reg) } } +static inline void +regmask_clear(regmask_t *regmask, struct ir3_register *reg) +{ + bool half = reg->flags & IR3_REG_HALF; + if (reg->flags & IR3_REG_RELATIV) { + for (unsigned i = 0; i < reg->size; i++) + __regmask_clear(regmask, half, reg->array.base + i); + } else { + for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++) + if (mask & 1) + __regmask_clear(regmask, half, n); + } +} + static inline bool regmask_get(regmask_t *regmask, struct ir3_register *reg) { diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c index 8a5a57dc756..f4a748cc3eb 100644 --- a/src/freedreno/ir3/ir3_delay.c +++ b/src/freedreno/ir3/ir3_delay.c @@ -237,8 +237,11 @@ delay_calc_srcn_postra(struct ir3_instruction *assigner, static unsigned delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, struct ir3_instruction *consumer, unsigned distance, - bool soft, bool pred, bool mergedregs) + bool soft, bool pred, regmask_t *in_mask, bool mergedregs) { + regmask_t mask; + memcpy(&mask, in_mask, sizeof(mask)); + unsigned delay = 0; /* Search backwards starting at the instruction before start, unless it's * NULL then search backwards from the block end. @@ -261,6 +264,8 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, foreach_dst_n (dst, dst_n, assigner) { if (dst->wrmask == 0) continue; + if (!regmask_get(&mask, dst)) + continue; foreach_src_n (src, src_n, consumer) { if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) continue; @@ -269,6 +274,7 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, assigner, consumer, dst_n, src_n, soft, mergedregs); new_delay = MAX2(new_delay, src_delay); } + regmask_clear(&mask, dst); } new_delay = new_delay > distance ? new_delay - distance : 0; @@ -298,7 +304,7 @@ delay_calc_postra(struct ir3_block *block, struct ir3_instruction *start, for (unsigned i = 0; i < block->predecessors_count; i++) { struct ir3_block *pred = block->predecessors[i]; unsigned pred_delay = delay_calc_postra(pred, NULL, consumer, distance, - soft, pred, mergedregs); + soft, pred, &mask, mergedregs); delay = MAX2(delay, pred_delay); } @@ -323,7 +329,14 @@ unsigned ir3_delay_calc_postra(struct ir3_block *block, struct ir3_instruction *instr, bool soft, bool mergedregs) { - return delay_calc_postra(block, NULL, instr, 0, soft, false, mergedregs); + regmask_t mask; + regmask_init(&mask, mergedregs); + foreach_src (src, instr) { + if (!(src->flags & (IR3_REG_IMMED | IR3_REG_CONST))) + regmask_set(&mask, src); + } + + return delay_calc_postra(block, NULL, instr, 0, soft, false, &mask, mergedregs); } /** @@ -334,7 +347,14 @@ unsigned ir3_delay_calc_exact(struct ir3_block *block, struct ir3_instruction *instr, bool mergedregs) { - return delay_calc_postra(block, NULL, instr, 0, false, true, mergedregs); + regmask_t mask; + regmask_init(&mask, mergedregs); + foreach_src (src, instr) { + if (!(src->flags & (IR3_REG_IMMED | IR3_REG_CONST))) + regmask_set(&mask, src); + } + + return delay_calc_postra(block, NULL, instr, 0, false, true, &mask, mergedregs); } /** diff --git a/src/freedreno/ir3/tests/delay.c b/src/freedreno/ir3/tests/delay.c index 018ade53a96..4f8e072ef6b 100644 --- a/src/freedreno/ir3/tests/delay.c +++ b/src/freedreno/ir3/tests/delay.c @@ -61,6 +61,11 @@ static const struct test { mov.f32f32 r0.z, c0.z mad.f32 r0.x, r0.x, r0.y, r0.z ), + TEST(0, + mov.f32f32 r0.x, c0.x + rcp r0.x, r0.y + add.f r0.x, r0.x, c0.x + ), TEST(2, mov.f32f32 r0.x, c0.x mov.f32f32 r0.y, c0.y