aco: remove block_kind_discard

This case doesn't seem to happen in practice.
No need to micro-optimize it.

This patch merges instruction selection for discard/discard_if.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14805>
This commit is contained in:
Daniel Schürmann 2022-01-31 15:11:22 +01:00 committed by Marge Bot
parent b67092e685
commit 08b8500dfb
6 changed files with 28 additions and 106 deletions

View File

@ -317,13 +317,11 @@ calculate_wqm_needs(exec_ctx& exec_ctx)
exec_ctx.info[i].block_needs |= Exact;
/* if discard is used somewhere in nested CF, we need to preserve the WQM mask */
if ((block.kind & block_kind_discard || block.kind & block_kind_uses_discard_if) &&
ever_again_needs & WQM)
if (block.kind & block_kind_uses_discard_if && ever_again_needs & WQM)
exec_ctx.info[i].block_needs |= Preserve_WQM;
ever_again_needs |= exec_ctx.info[i].block_needs & ~Exact_Branch;
if (block.kind & block_kind_discard || block.kind & block_kind_uses_discard_if ||
block.kind & block_kind_uses_demote)
if (block.kind & block_kind_uses_discard_if || block.kind & block_kind_uses_demote)
ever_again_needs |= Exact;
/* don't propagate WQM preservation further than the next top_level block */
@ -890,8 +888,7 @@ add_branch_code(exec_ctx& ctx, Block* block)
Block& loop_block = ctx.program->blocks[i];
needs |= ctx.info[i].block_needs;
if (loop_block.kind & block_kind_uses_discard_if || loop_block.kind & block_kind_discard ||
loop_block.kind & block_kind_uses_demote)
if (loop_block.kind & block_kind_uses_discard_if || loop_block.kind & block_kind_uses_demote)
has_discard = true;
if (loop_block.loop_nest_depth != loop_nest_depth)
continue;
@ -931,40 +928,6 @@ add_branch_code(exec_ctx& ctx, Block* block)
*/
Operand break_cond = Operand(exec, bld.lm);
if (block->kind & block_kind_discard) {
assert(block->instructions.back()->isBranch());
aco_ptr<Instruction> branch = std::move(block->instructions.back());
block->instructions.pop_back();
/* create a discard_if() instruction with the exec mask as condition */
unsigned num = 0;
if (ctx.loop.size()) {
/* if we're in a loop, only discard from the outer exec masks */
num = ctx.loop.back().num_exec_masks;
} else {
num = ctx.info[idx].exec.size() - 1;
}
Temp cond = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
Definition(exec, bld.lm), Operand::zero(), Operand(exec, bld.lm));
for (int i = num - 1; i >= 0; i--) {
Instruction* andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
get_exec_op(ctx.info[block->index].exec[i].first), cond);
if (i == (int)ctx.info[idx].exec.size() - 1)
andn2->definitions[0] = Definition(exec, bld.lm);
if (i == 0)
bld.pseudo(aco_opcode::p_exit_early_if, bld.scc(andn2->definitions[1].getTemp()));
ctx.info[block->index].exec[i].first = Operand(andn2->definitions[0].getTemp());
}
assert(!ctx.handle_wqm || (ctx.info[block->index].exec[0].second & mask_type_wqm) == 0);
break_cond = Operand(cond);
bld.insert(std::move(branch));
/* no return here as it can be followed by a divergent break */
}
if (block->kind & block_kind_continue_or_break) {
assert(ctx.program->blocks[ctx.program->blocks[block->linear_succs[1]].linear_succs[0]].kind &
block_kind_loop_header);

View File

@ -5708,58 +5708,6 @@ visit_load_constant(isel_context* ctx, nir_intrinsic_instr* instr)
load_buffer(ctx, instr->num_components, size, dst, rsrc, offset, size, 0);
}
void
visit_discard_if(isel_context* ctx, nir_intrinsic_instr* instr)
{
if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
ctx->cf_info.exec_potentially_empty_discard = true;
ctx->program->needs_exact = true;
// TODO: optimize uniform conditions
Builder bld(ctx->program, ctx->block);
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
assert(src.regClass() == bld.lm);
src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
bld.pseudo(aco_opcode::p_discard_if, src);
ctx->block->kind |= block_kind_uses_discard_if;
return;
}
void
visit_discard(isel_context* ctx, nir_intrinsic_instr* instr)
{
Builder bld(ctx->program, ctx->block);
if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
ctx->cf_info.exec_potentially_empty_discard = true;
bool divergent =
ctx->cf_info.parent_if.is_divergent || ctx->cf_info.parent_loop.has_divergent_continue;
if (ctx->block->loop_nest_depth && (nir_instr_is_last(&instr->instr) && !divergent)) {
/* we handle discards the same way as jump instructions */
append_logical_end(ctx->block);
/* in loops, discard behaves like break */
Block* linear_target = ctx->cf_info.parent_loop.exit;
ctx->block->kind |= block_kind_discard;
/* uniform discard - loop ends here */
assert(nir_instr_is_last(&instr->instr));
ctx->block->kind |= block_kind_uniform;
ctx->cf_info.has_branch = true;
bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
add_linear_edge(ctx->block->index, linear_target);
return;
}
ctx->program->needs_exact = true;
bld.pseudo(aco_opcode::p_discard_if, Operand::c32(-1u));
ctx->block->kind |= block_kind_uses_discard_if;
return;
}
enum aco_descriptor_type {
ACO_DESC_IMAGE,
ACO_DESC_FMASK,
@ -8129,10 +8077,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
case nir_intrinsic_load_push_constant: visit_load_push_constant(ctx, instr); break;
case nir_intrinsic_load_constant: visit_load_constant(ctx, instr); break;
case nir_intrinsic_vulkan_resource_index: visit_load_resource(ctx, instr); break;
case nir_intrinsic_terminate:
case nir_intrinsic_discard: visit_discard(ctx, instr); break;
case nir_intrinsic_terminate_if:
case nir_intrinsic_discard_if: visit_discard_if(ctx, instr); break;
case nir_intrinsic_load_shared: visit_load_shared(ctx, instr); break;
case nir_intrinsic_store_shared: visit_store_shared(ctx, instr); break;
case nir_intrinsic_shared_atomic_add:
@ -8742,6 +8686,27 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
ctx->program->needs_exact = true;
break;
}
case nir_intrinsic_terminate:
case nir_intrinsic_terminate_if:
case nir_intrinsic_discard:
case nir_intrinsic_discard_if: {
Operand cond = Operand::c32(-1u);
if (instr->intrinsic == nir_intrinsic_discard_if ||
instr->intrinsic == nir_intrinsic_terminate_if) {
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
assert(src.regClass() == bld.lm);
cond =
bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
}
bld.pseudo(aco_opcode::p_discard_if, cond);
if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
ctx->cf_info.exec_potentially_empty_discard = true;
ctx->block->kind |= block_kind_uses_discard_if;
ctx->program->needs_exact = true;
break;
}
case nir_intrinsic_first_invocation: {
emit_wqm(bld, bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)),
get_ssa_temp(ctx, &instr->dest.ssa));

View File

@ -1811,10 +1811,9 @@ enum block_kind {
block_kind_continue = 1 << 5,
block_kind_break = 1 << 6,
block_kind_continue_or_break = 1 << 7,
block_kind_discard = 1 << 8,
block_kind_branch = 1 << 9,
block_kind_merge = 1 << 10,
block_kind_invert = 1 << 11,
block_kind_branch = 1 << 8,
block_kind_merge = 1 << 9,
block_kind_invert = 1 << 10,
block_kind_uses_discard_if = 1 << 12,
block_kind_needs_lowering = 1 << 13,
block_kind_uses_demote = 1 << 14,

View File

@ -2036,8 +2036,6 @@ lower_to_hw_instr(Program* program)
bld.reset(&ctx.instructions);
}
// TODO: exec can be zero here with block_kind_discard
assert(instr->operands[0].physReg() == scc);
bld.sopp(aco_opcode::s_cbranch_scc0, Definition(exec, s2), instr->operands[0],
discard_block->index);

View File

@ -476,8 +476,7 @@ value_numbering(Program* program)
/* increment exec_id when entering nested control flow */
if (block.kind & block_kind_branch || block.kind & block_kind_loop_preheader ||
block.kind & block_kind_break || block.kind & block_kind_continue ||
block.kind & block_kind_discard)
block.kind & block_kind_break || block.kind & block_kind_continue)
ctx.exec_id++;
else if (block.kind & block_kind_continue_or_break)
ctx.exec_id += 2;

View File

@ -744,8 +744,6 @@ print_block_kind(uint16_t kind, FILE* output)
fprintf(output, "break, ");
if (kind & block_kind_continue_or_break)
fprintf(output, "continue_or_break, ");
if (kind & block_kind_discard)
fprintf(output, "discard, ");
if (kind & block_kind_branch)
fprintf(output, "branch, ");
if (kind & block_kind_merge)