pan/bi: Rework FAU lowering

Move and reshape bi_lower_fau to bi_schedule.c. This generalizes the
pass for FAU reads, allowing copyprop to work with FAU without problems.

The pass must run immediately before scheduling. Its post-conditions are
directly specified as the scheduler's pre-conditions. It momentarily
will depend on internal scheduler predicates. It is, for all intents and
purposes, part of the scheduler. Keep it all together.

Finally, adjust the 0 handling to avoid a move at the expense of
constrained scheduling of something like `FADD.v2f16.clamp_0_1 u0, #0`

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8973>
This commit is contained in:
Alyssa Rosenzweig 2021-02-10 11:43:18 -05:00 committed by Marge Bot
parent 6106fb5d8d
commit 0acc6b564e
2 changed files with 73 additions and 56 deletions

View File

@ -1403,11 +1403,84 @@ bi_schedule_block(bi_context *ctx, bi_block *block)
bi_free_worklist(st);
}
static bool
bi_check_fau_src(bi_instr *ins, unsigned s, uint32_t *constants, unsigned *cwords, bi_index *fau)
{
bi_index src = ins->src[s];
/* Staging registers can't have FAU accesses */
if (s == 0 && bi_opcode_props[ins->op].sr_read)
return (src.type != BI_INDEX_CONSTANT) && (src.type != BI_INDEX_FAU);
if (src.type == BI_INDEX_CONSTANT) {
/* Allow fast zero */
if (src.value == 0 && bi_opcode_props[ins->op].fma && bi_reads_zero(ins))
return true;
if (!bi_is_null(*fau))
return false;
/* Else, try to inline a constant */
for (unsigned i = 0; i < *cwords; ++i) {
if (src.value == constants[i])
return true;
}
if (*cwords >= 2)
return false;
constants[(*cwords)++] = src.value;
} else if (src.type == BI_INDEX_FAU) {
if (*cwords != 0)
return false;
/* Can only read from one pair of FAU words */
if (!bi_is_null(*fau) && (src.value != fau->value))
return false;
/* If there is a target, we'll need a PC-relative constant */
if (ins->branch_target)
return false;
*fau = src;
}
return true;
}
static void
bi_lower_fau(bi_context *ctx, bi_block *block)
{
bi_builder b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
bi_foreach_instr_in_block_safe(block, _ins) {
bi_instr *ins = (bi_instr *) _ins;
uint32_t constants[2];
unsigned cwords = 0;
bi_index fau = bi_null();
/* ATEST must have the ATEST datum encoded, not any other
* uniform. See to it this is the case. */
if (ins->op == BI_OPCODE_ATEST)
fau = ins->src[2];
bi_foreach_src(ins, s) {
if (bi_check_fau_src(ins, s, constants, &cwords, &fau)) continue;
b.cursor = bi_before_instr(ins);
bi_index copy = bi_mov_i32(&b, ins->src[s]);
ins->src[s] = bi_replace_index(ins->src[s], copy);
}
}
}
void
bi_schedule(bi_context *ctx)
{
bi_foreach_block(ctx, block) {
bi_block *bblock = (bi_block *) block;
bi_lower_fau(ctx, bblock);
bi_schedule_block(ctx, bblock);
bi_opt_dead_code_eliminate(ctx, bblock, true);
}

View File

@ -2272,57 +2272,6 @@ glsl_type_size(const struct glsl_type *type, bool bindless)
return glsl_count_attribute_slots(type, false);
}
static unsigned
bi_lower_constant(bi_builder *b, bi_instr *ins, unsigned s, uint32_t *accum, unsigned cwords, bool allow_constant)
{
uint32_t value = ins->src[s].value;
/* Staging registers can't have constants */
allow_constant &= !(s == 0 && bi_opcode_props[ins->op].sr_read);
/* If we're allowed any inline constants, see if this one works */
if (allow_constant) {
for (unsigned i = 0; i < cwords; ++i) {
if (value == accum[i])
return cwords;
}
if (value == 0 && !bi_opcode_props[ins->op].add)
return cwords;
if (cwords < 2) {
accum[cwords] = value;
return cwords + 1;
}
}
/* should be const folded */
assert(!ins->src[s].abs && !ins->src[s].neg);
enum bi_swizzle old_swizzle = ins->src[s].swizzle;
b->cursor = bi_before_instr(ins);
ins->src[s] = bi_mov_i32(b, bi_imm_u32(value));
ins->src[s].swizzle = old_swizzle;
return cwords;
}
static void
bi_lower_fau(bi_context *ctx, bi_block *block)
{
bi_builder b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
bi_foreach_instr_in_block_safe(block, _ins) {
bi_instr *ins = (bi_instr *) _ins;
uint32_t constants[2];
unsigned cwords = 0;
bi_foreach_src(ins, s) {
if (ins->src[s].type == BI_INDEX_CONSTANT)
cwords = bi_lower_constant(&b, ins, s, constants, cwords, true);
}
}
}
static void
bi_optimize_nir(nir_shader *nir)
{
@ -2575,11 +2524,6 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
}
} while(progress);
bi_foreach_block(ctx, _block) {
bi_block *block = (bi_block *) _block;
bi_lower_fau(ctx, block);
}
if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
bi_print_shader(ctx, stdout);
bi_schedule(ctx);