aco: Remember when exec mask is const, and restore the const then.

Previously, we would store even the constant -1 exec mask from the
beginning of every merged shader. With this change it is no longer
necessary because we can restore to constant exec mask directly.

Hence, this frees up a register pair (single register for Wave32)
in every merged shader.

No Fossil DB changes.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Tony Wasserka <tony.wasserka@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10691>
This commit is contained in:
Timur Kristóf 2021-05-07 15:02:58 +02:00 committed by Marge Bot
parent 04f90db9a0
commit c850af936a
1 changed files with 10 additions and 5 deletions

View File

@ -75,7 +75,7 @@ struct loop_info {
};
struct block_info {
std::vector<std::pair<Operand, uint8_t>> exec;
std::vector<std::pair<Operand, uint8_t>> exec; /* Vector of exec masks. Either a temporary or const -1. */
std::vector<WQMState> instr_needs;
uint8_t block_needs;
uint8_t ever_again_needs;
@ -399,13 +399,16 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
assert(startpgm->opcode == aco_opcode::p_startpgm);
bld.insert(std::move(startpgm));
Operand start_exec(bld.lm);
/* exec seems to need to be manually initialized with combined shaders */
if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG) {
bld.copy(Definition(exec, bld.lm), Operand(UINT32_MAX, bld.lm == s2));
start_exec = Operand(-1u, bld.lm == s2);
bld.copy(Definition(exec, bld.lm), start_exec);
}
if (ctx.handle_wqm) {
ctx.info[0].exec.emplace_back(Operand(bld.lm), mask_type_global | mask_type_exact);
ctx.info[0].exec.emplace_back(start_exec, mask_type_global | mask_type_exact);
/* if this block only needs WQM, initialize already */
if (ctx.info[0].block_needs == WQM)
transition_to_WQM(ctx, bld, 0);
@ -417,7 +420,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
} else {
mask |= mask_type_exact;
}
ctx.info[0].exec.emplace_back(Operand(bld.lm), mask);
ctx.info[0].exec.emplace_back(start_exec, mask);
}
return 1;
@ -641,7 +644,9 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
if (block->kind & block_kind_merge && !ctx.info[idx].exec.back().first.isUndefined()) {
Operand restore = ctx.info[idx].exec.back().first;
assert(restore.size() == bld.lm.size());
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), restore);
bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), restore);
if (!restore.isConstant())
ctx.info[idx].exec.back().first = Operand(bld.lm);
}
return i;