pan/bi: Rework FAU lowering
Move and reshape bi_lower_fau to bi_schedule.c. This generalizes the pass for FAU reads, allowing copyprop to work with FAU without problems. The pass must run immediately before scheduling. Its post-conditions are directly specified as the scheduler's pre-conditions. It momentarily will depend on internal scheduler predicates. It is, for all intents and purposes, part of the scheduler. Keep it all together. Finally, adjust the 0 handling to avoid a move at the expense of constrained scheduling of something like `FADD.v2f16.clamp_0_1 u0, #0` Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8973>
This commit is contained in:
parent
6106fb5d8d
commit
0acc6b564e
|
@ -1403,11 +1403,84 @@ bi_schedule_block(bi_context *ctx, bi_block *block)
|
|||
bi_free_worklist(st);
|
||||
}
|
||||
|
||||
static bool
|
||||
bi_check_fau_src(bi_instr *ins, unsigned s, uint32_t *constants, unsigned *cwords, bi_index *fau)
|
||||
{
|
||||
bi_index src = ins->src[s];
|
||||
|
||||
/* Staging registers can't have FAU accesses */
|
||||
if (s == 0 && bi_opcode_props[ins->op].sr_read)
|
||||
return (src.type != BI_INDEX_CONSTANT) && (src.type != BI_INDEX_FAU);
|
||||
|
||||
if (src.type == BI_INDEX_CONSTANT) {
|
||||
/* Allow fast zero */
|
||||
if (src.value == 0 && bi_opcode_props[ins->op].fma && bi_reads_zero(ins))
|
||||
return true;
|
||||
|
||||
if (!bi_is_null(*fau))
|
||||
return false;
|
||||
|
||||
/* Else, try to inline a constant */
|
||||
for (unsigned i = 0; i < *cwords; ++i) {
|
||||
if (src.value == constants[i])
|
||||
return true;
|
||||
}
|
||||
|
||||
if (*cwords >= 2)
|
||||
return false;
|
||||
|
||||
constants[(*cwords)++] = src.value;
|
||||
} else if (src.type == BI_INDEX_FAU) {
|
||||
if (*cwords != 0)
|
||||
return false;
|
||||
|
||||
/* Can only read from one pair of FAU words */
|
||||
if (!bi_is_null(*fau) && (src.value != fau->value))
|
||||
return false;
|
||||
|
||||
/* If there is a target, we'll need a PC-relative constant */
|
||||
if (ins->branch_target)
|
||||
return false;
|
||||
|
||||
*fau = src;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
bi_lower_fau(bi_context *ctx, bi_block *block)
|
||||
{
|
||||
bi_builder b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
|
||||
|
||||
bi_foreach_instr_in_block_safe(block, _ins) {
|
||||
bi_instr *ins = (bi_instr *) _ins;
|
||||
|
||||
uint32_t constants[2];
|
||||
unsigned cwords = 0;
|
||||
bi_index fau = bi_null();
|
||||
|
||||
/* ATEST must have the ATEST datum encoded, not any other
|
||||
* uniform. See to it this is the case. */
|
||||
if (ins->op == BI_OPCODE_ATEST)
|
||||
fau = ins->src[2];
|
||||
|
||||
bi_foreach_src(ins, s) {
|
||||
if (bi_check_fau_src(ins, s, constants, &cwords, &fau)) continue;
|
||||
|
||||
b.cursor = bi_before_instr(ins);
|
||||
bi_index copy = bi_mov_i32(&b, ins->src[s]);
|
||||
ins->src[s] = bi_replace_index(ins->src[s], copy);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
bi_schedule(bi_context *ctx)
|
||||
{
|
||||
bi_foreach_block(ctx, block) {
|
||||
bi_block *bblock = (bi_block *) block;
|
||||
bi_lower_fau(ctx, bblock);
|
||||
bi_schedule_block(ctx, bblock);
|
||||
bi_opt_dead_code_eliminate(ctx, bblock, true);
|
||||
}
|
||||
|
|
|
@ -2272,57 +2272,6 @@ glsl_type_size(const struct glsl_type *type, bool bindless)
|
|||
return glsl_count_attribute_slots(type, false);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
bi_lower_constant(bi_builder *b, bi_instr *ins, unsigned s, uint32_t *accum, unsigned cwords, bool allow_constant)
|
||||
{
|
||||
uint32_t value = ins->src[s].value;
|
||||
|
||||
/* Staging registers can't have constants */
|
||||
allow_constant &= !(s == 0 && bi_opcode_props[ins->op].sr_read);
|
||||
|
||||
/* If we're allowed any inline constants, see if this one works */
|
||||
if (allow_constant) {
|
||||
for (unsigned i = 0; i < cwords; ++i) {
|
||||
if (value == accum[i])
|
||||
return cwords;
|
||||
}
|
||||
|
||||
if (value == 0 && !bi_opcode_props[ins->op].add)
|
||||
return cwords;
|
||||
|
||||
if (cwords < 2) {
|
||||
accum[cwords] = value;
|
||||
return cwords + 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* should be const folded */
|
||||
assert(!ins->src[s].abs && !ins->src[s].neg);
|
||||
enum bi_swizzle old_swizzle = ins->src[s].swizzle;
|
||||
|
||||
b->cursor = bi_before_instr(ins);
|
||||
ins->src[s] = bi_mov_i32(b, bi_imm_u32(value));
|
||||
ins->src[s].swizzle = old_swizzle;
|
||||
return cwords;
|
||||
}
|
||||
|
||||
static void
|
||||
bi_lower_fau(bi_context *ctx, bi_block *block)
|
||||
{
|
||||
bi_builder b = bi_init_builder(ctx, bi_after_block(ctx->current_block));
|
||||
|
||||
bi_foreach_instr_in_block_safe(block, _ins) {
|
||||
bi_instr *ins = (bi_instr *) _ins;
|
||||
uint32_t constants[2];
|
||||
unsigned cwords = 0;
|
||||
|
||||
bi_foreach_src(ins, s) {
|
||||
if (ins->src[s].type == BI_INDEX_CONSTANT)
|
||||
cwords = bi_lower_constant(&b, ins, s, constants, cwords, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bi_optimize_nir(nir_shader *nir)
|
||||
{
|
||||
|
@ -2575,11 +2524,6 @@ bifrost_compile_shader_nir(void *mem_ctx, nir_shader *nir,
|
|||
}
|
||||
} while(progress);
|
||||
|
||||
bi_foreach_block(ctx, _block) {
|
||||
bi_block *block = (bi_block *) _block;
|
||||
bi_lower_fau(ctx, block);
|
||||
}
|
||||
|
||||
if (bifrost_debug & BIFROST_DBG_SHADERS && !skip_internal)
|
||||
bi_print_shader(ctx, stdout);
|
||||
bi_schedule(ctx);
|
||||
|
|
Loading…
Reference in New Issue