aco: remove block_kind_discard
This case doesn't seem to happen in practice. No need to micro-optimize it. This patch merges instruction selection for discard/discard_if. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14805>
This commit is contained in:
parent
b67092e685
commit
08b8500dfb
|
@ -317,13 +317,11 @@ calculate_wqm_needs(exec_ctx& exec_ctx)
|
|||
exec_ctx.info[i].block_needs |= Exact;
|
||||
|
||||
/* if discard is used somewhere in nested CF, we need to preserve the WQM mask */
|
||||
if ((block.kind & block_kind_discard || block.kind & block_kind_uses_discard_if) &&
|
||||
ever_again_needs & WQM)
|
||||
if (block.kind & block_kind_uses_discard_if && ever_again_needs & WQM)
|
||||
exec_ctx.info[i].block_needs |= Preserve_WQM;
|
||||
|
||||
ever_again_needs |= exec_ctx.info[i].block_needs & ~Exact_Branch;
|
||||
if (block.kind & block_kind_discard || block.kind & block_kind_uses_discard_if ||
|
||||
block.kind & block_kind_uses_demote)
|
||||
if (block.kind & block_kind_uses_discard_if || block.kind & block_kind_uses_demote)
|
||||
ever_again_needs |= Exact;
|
||||
|
||||
/* don't propagate WQM preservation further than the next top_level block */
|
||||
|
@ -890,8 +888,7 @@ add_branch_code(exec_ctx& ctx, Block* block)
|
|||
Block& loop_block = ctx.program->blocks[i];
|
||||
needs |= ctx.info[i].block_needs;
|
||||
|
||||
if (loop_block.kind & block_kind_uses_discard_if || loop_block.kind & block_kind_discard ||
|
||||
loop_block.kind & block_kind_uses_demote)
|
||||
if (loop_block.kind & block_kind_uses_discard_if || loop_block.kind & block_kind_uses_demote)
|
||||
has_discard = true;
|
||||
if (loop_block.loop_nest_depth != loop_nest_depth)
|
||||
continue;
|
||||
|
@ -931,40 +928,6 @@ add_branch_code(exec_ctx& ctx, Block* block)
|
|||
*/
|
||||
Operand break_cond = Operand(exec, bld.lm);
|
||||
|
||||
if (block->kind & block_kind_discard) {
|
||||
|
||||
assert(block->instructions.back()->isBranch());
|
||||
aco_ptr<Instruction> branch = std::move(block->instructions.back());
|
||||
block->instructions.pop_back();
|
||||
|
||||
/* create a discard_if() instruction with the exec mask as condition */
|
||||
unsigned num = 0;
|
||||
if (ctx.loop.size()) {
|
||||
/* if we're in a loop, only discard from the outer exec masks */
|
||||
num = ctx.loop.back().num_exec_masks;
|
||||
} else {
|
||||
num = ctx.info[idx].exec.size() - 1;
|
||||
}
|
||||
|
||||
Temp cond = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc),
|
||||
Definition(exec, bld.lm), Operand::zero(), Operand(exec, bld.lm));
|
||||
|
||||
for (int i = num - 1; i >= 0; i--) {
|
||||
Instruction* andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc),
|
||||
get_exec_op(ctx.info[block->index].exec[i].first), cond);
|
||||
if (i == (int)ctx.info[idx].exec.size() - 1)
|
||||
andn2->definitions[0] = Definition(exec, bld.lm);
|
||||
if (i == 0)
|
||||
bld.pseudo(aco_opcode::p_exit_early_if, bld.scc(andn2->definitions[1].getTemp()));
|
||||
ctx.info[block->index].exec[i].first = Operand(andn2->definitions[0].getTemp());
|
||||
}
|
||||
assert(!ctx.handle_wqm || (ctx.info[block->index].exec[0].second & mask_type_wqm) == 0);
|
||||
|
||||
break_cond = Operand(cond);
|
||||
bld.insert(std::move(branch));
|
||||
/* no return here as it can be followed by a divergent break */
|
||||
}
|
||||
|
||||
if (block->kind & block_kind_continue_or_break) {
|
||||
assert(ctx.program->blocks[ctx.program->blocks[block->linear_succs[1]].linear_succs[0]].kind &
|
||||
block_kind_loop_header);
|
||||
|
|
|
@ -5708,58 +5708,6 @@ visit_load_constant(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
load_buffer(ctx, instr->num_components, size, dst, rsrc, offset, size, 0);
|
||||
}
|
||||
|
||||
void
|
||||
visit_discard_if(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
|
||||
ctx->cf_info.exec_potentially_empty_discard = true;
|
||||
|
||||
ctx->program->needs_exact = true;
|
||||
|
||||
// TODO: optimize uniform conditions
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
assert(src.regClass() == bld.lm);
|
||||
src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
|
||||
bld.pseudo(aco_opcode::p_discard_if, src);
|
||||
ctx->block->kind |= block_kind_uses_discard_if;
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
visit_discard(isel_context* ctx, nir_intrinsic_instr* instr)
|
||||
{
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
|
||||
if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
|
||||
ctx->cf_info.exec_potentially_empty_discard = true;
|
||||
|
||||
bool divergent =
|
||||
ctx->cf_info.parent_if.is_divergent || ctx->cf_info.parent_loop.has_divergent_continue;
|
||||
|
||||
if (ctx->block->loop_nest_depth && (nir_instr_is_last(&instr->instr) && !divergent)) {
|
||||
/* we handle discards the same way as jump instructions */
|
||||
append_logical_end(ctx->block);
|
||||
|
||||
/* in loops, discard behaves like break */
|
||||
Block* linear_target = ctx->cf_info.parent_loop.exit;
|
||||
ctx->block->kind |= block_kind_discard;
|
||||
|
||||
/* uniform discard - loop ends here */
|
||||
assert(nir_instr_is_last(&instr->instr));
|
||||
ctx->block->kind |= block_kind_uniform;
|
||||
ctx->cf_info.has_branch = true;
|
||||
bld.branch(aco_opcode::p_branch, bld.hint_vcc(bld.def(s2)));
|
||||
add_linear_edge(ctx->block->index, linear_target);
|
||||
return;
|
||||
}
|
||||
|
||||
ctx->program->needs_exact = true;
|
||||
bld.pseudo(aco_opcode::p_discard_if, Operand::c32(-1u));
|
||||
ctx->block->kind |= block_kind_uses_discard_if;
|
||||
return;
|
||||
}
|
||||
|
||||
enum aco_descriptor_type {
|
||||
ACO_DESC_IMAGE,
|
||||
ACO_DESC_FMASK,
|
||||
|
@ -8129,10 +8077,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
case nir_intrinsic_load_push_constant: visit_load_push_constant(ctx, instr); break;
|
||||
case nir_intrinsic_load_constant: visit_load_constant(ctx, instr); break;
|
||||
case nir_intrinsic_vulkan_resource_index: visit_load_resource(ctx, instr); break;
|
||||
case nir_intrinsic_terminate:
|
||||
case nir_intrinsic_discard: visit_discard(ctx, instr); break;
|
||||
case nir_intrinsic_terminate_if:
|
||||
case nir_intrinsic_discard_if: visit_discard_if(ctx, instr); break;
|
||||
case nir_intrinsic_load_shared: visit_load_shared(ctx, instr); break;
|
||||
case nir_intrinsic_store_shared: visit_store_shared(ctx, instr); break;
|
||||
case nir_intrinsic_shared_atomic_add:
|
||||
|
@ -8742,6 +8686,27 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
ctx->program->needs_exact = true;
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_terminate:
|
||||
case nir_intrinsic_terminate_if:
|
||||
case nir_intrinsic_discard:
|
||||
case nir_intrinsic_discard_if: {
|
||||
Operand cond = Operand::c32(-1u);
|
||||
if (instr->intrinsic == nir_intrinsic_discard_if ||
|
||||
instr->intrinsic == nir_intrinsic_terminate_if) {
|
||||
Temp src = get_ssa_temp(ctx, instr->src[0].ssa);
|
||||
assert(src.regClass() == bld.lm);
|
||||
cond =
|
||||
bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm));
|
||||
}
|
||||
|
||||
bld.pseudo(aco_opcode::p_discard_if, cond);
|
||||
|
||||
if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent)
|
||||
ctx->cf_info.exec_potentially_empty_discard = true;
|
||||
ctx->block->kind |= block_kind_uses_discard_if;
|
||||
ctx->program->needs_exact = true;
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_first_invocation: {
|
||||
emit_wqm(bld, bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)),
|
||||
get_ssa_temp(ctx, &instr->dest.ssa));
|
||||
|
|
|
@ -1811,10 +1811,9 @@ enum block_kind {
|
|||
block_kind_continue = 1 << 5,
|
||||
block_kind_break = 1 << 6,
|
||||
block_kind_continue_or_break = 1 << 7,
|
||||
block_kind_discard = 1 << 8,
|
||||
block_kind_branch = 1 << 9,
|
||||
block_kind_merge = 1 << 10,
|
||||
block_kind_invert = 1 << 11,
|
||||
block_kind_branch = 1 << 8,
|
||||
block_kind_merge = 1 << 9,
|
||||
block_kind_invert = 1 << 10,
|
||||
block_kind_uses_discard_if = 1 << 12,
|
||||
block_kind_needs_lowering = 1 << 13,
|
||||
block_kind_uses_demote = 1 << 14,
|
||||
|
|
|
@ -2036,8 +2036,6 @@ lower_to_hw_instr(Program* program)
|
|||
bld.reset(&ctx.instructions);
|
||||
}
|
||||
|
||||
// TODO: exec can be zero here with block_kind_discard
|
||||
|
||||
assert(instr->operands[0].physReg() == scc);
|
||||
bld.sopp(aco_opcode::s_cbranch_scc0, Definition(exec, s2), instr->operands[0],
|
||||
discard_block->index);
|
||||
|
|
|
@ -476,8 +476,7 @@ value_numbering(Program* program)
|
|||
|
||||
/* increment exec_id when entering nested control flow */
|
||||
if (block.kind & block_kind_branch || block.kind & block_kind_loop_preheader ||
|
||||
block.kind & block_kind_break || block.kind & block_kind_continue ||
|
||||
block.kind & block_kind_discard)
|
||||
block.kind & block_kind_break || block.kind & block_kind_continue)
|
||||
ctx.exec_id++;
|
||||
else if (block.kind & block_kind_continue_or_break)
|
||||
ctx.exec_id += 2;
|
||||
|
|
|
@ -744,8 +744,6 @@ print_block_kind(uint16_t kind, FILE* output)
|
|||
fprintf(output, "break, ");
|
||||
if (kind & block_kind_continue_or_break)
|
||||
fprintf(output, "continue_or_break, ");
|
||||
if (kind & block_kind_discard)
|
||||
fprintf(output, "discard, ");
|
||||
if (kind & block_kind_branch)
|
||||
fprintf(output, "branch, ");
|
||||
if (kind & block_kind_merge)
|
||||
|
|
Loading…
Reference in New Issue