aco: Remember when exec mask is const, and restore the const then.
Previously, we would store even the constant -1 exec mask from the beginning of every merged shader. With this change it is no longer necessary because we can restore to constant exec mask directly. Hence, this frees up a register pair (single register for Wave32) in every merged shader. No Fossil DB changes. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Tony Wasserka <tony.wasserka@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10691>
This commit is contained in:
parent
04f90db9a0
commit
c850af936a
|
@ -75,7 +75,7 @@ struct loop_info {
|
|||
};
|
||||
|
||||
struct block_info {
|
||||
std::vector<std::pair<Operand, uint8_t>> exec;
|
||||
std::vector<std::pair<Operand, uint8_t>> exec; /* Vector of exec masks. Either a temporary or const -1. */
|
||||
std::vector<WQMState> instr_needs;
|
||||
uint8_t block_needs;
|
||||
uint8_t ever_again_needs;
|
||||
|
@ -399,13 +399,16 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
assert(startpgm->opcode == aco_opcode::p_startpgm);
|
||||
bld.insert(std::move(startpgm));
|
||||
|
||||
Operand start_exec(bld.lm);
|
||||
|
||||
/* exec seems to need to be manually initialized with combined shaders */
|
||||
if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG) {
|
||||
bld.copy(Definition(exec, bld.lm), Operand(UINT32_MAX, bld.lm == s2));
|
||||
start_exec = Operand(-1u, bld.lm == s2);
|
||||
bld.copy(Definition(exec, bld.lm), start_exec);
|
||||
}
|
||||
|
||||
if (ctx.handle_wqm) {
|
||||
ctx.info[0].exec.emplace_back(Operand(bld.lm), mask_type_global | mask_type_exact);
|
||||
ctx.info[0].exec.emplace_back(start_exec, mask_type_global | mask_type_exact);
|
||||
/* if this block only needs WQM, initialize already */
|
||||
if (ctx.info[0].block_needs == WQM)
|
||||
transition_to_WQM(ctx, bld, 0);
|
||||
|
@ -417,7 +420,7 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
} else {
|
||||
mask |= mask_type_exact;
|
||||
}
|
||||
ctx.info[0].exec.emplace_back(Operand(bld.lm), mask);
|
||||
ctx.info[0].exec.emplace_back(start_exec, mask);
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
@ -641,7 +644,9 @@ unsigned add_coupling_code(exec_ctx& ctx, Block* block,
|
|||
if (block->kind & block_kind_merge && !ctx.info[idx].exec.back().first.isUndefined()) {
|
||||
Operand restore = ctx.info[idx].exec.back().first;
|
||||
assert(restore.size() == bld.lm.size());
|
||||
ctx.info[idx].exec.back().first = bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), restore);
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), restore);
|
||||
if (!restore.isConstant())
|
||||
ctx.info[idx].exec.back().first = Operand(bld.lm);
|
||||
}
|
||||
|
||||
return i;
|
||||
|
|
Loading…
Reference in New Issue