aco: coalesce constant copies
fossil-db (Navi): Totals from 20108 (14.49% of 138791) affected shaders: CodeSize: 117835376 -> 117830512 (-0.00%) Instrs: 22813722 -> 22733245 (-0.35%) Cycles: 1009135584 -> 1008543628 (-0.06%) VMEM: 5401668 -> 5391247 (-0.19%) SMEM: 1286824 -> 1283663 (-0.25%) Copies: 1742154 -> 1661686 (-4.62%) Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7798>
This commit is contained in:
parent
f53d4e5f60
commit
7610630124
|
@ -1331,6 +1331,51 @@ void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo,
|
||||||
bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand(2u));
|
bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand(2u));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void try_coalesce_copies(lower_context *ctx,
|
||||||
|
std::map<PhysReg, copy_operation>& copy_map,
|
||||||
|
copy_operation& copy)
|
||||||
|
{
|
||||||
|
// TODO try more relaxed alignment for subdword copies
|
||||||
|
unsigned next_def_align = util_next_power_of_two(copy.bytes + 1);
|
||||||
|
unsigned next_op_align = next_def_align;
|
||||||
|
if (copy.def.regClass().type() == RegType::vgpr)
|
||||||
|
next_def_align = MIN2(next_def_align, 4);
|
||||||
|
if (copy.op.regClass().type() == RegType::vgpr)
|
||||||
|
next_op_align = MIN2(next_op_align, 4);
|
||||||
|
|
||||||
|
if (copy.bytes >= 8 || copy.def.physReg().reg_b % next_def_align ||
|
||||||
|
(!copy.op.isConstant() && copy.op.physReg().reg_b % next_op_align))
|
||||||
|
return;
|
||||||
|
|
||||||
|
auto other = copy_map.find(copy.def.physReg().advance(copy.bytes));
|
||||||
|
if (other == copy_map.end() || copy.bytes + other->second.bytes > 8 ||
|
||||||
|
copy.op.isConstant() != other->second.op.isConstant())
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* don't create 64-bit copies before GFX10 */
|
||||||
|
if (copy.bytes >= 4 && copy.def.regClass().type() == RegType::vgpr &&
|
||||||
|
ctx->program->chip_class < GFX10)
|
||||||
|
return;
|
||||||
|
|
||||||
|
unsigned new_size = copy.bytes + other->second.bytes;
|
||||||
|
if (copy.op.isConstant()) {
|
||||||
|
uint64_t val = copy.op.constantValue64() |
|
||||||
|
(other->second.op.constantValue64() << (copy.bytes * 8u));
|
||||||
|
if (!Operand::is_constant_representable(val, copy.bytes + other->second.bytes, true,
|
||||||
|
copy.def.regClass().type() == RegType::vgpr))
|
||||||
|
return;
|
||||||
|
copy.op = Operand::get_const(ctx->program->chip_class, val, new_size);
|
||||||
|
} else {
|
||||||
|
if (other->second.op.physReg() != copy.op.physReg().advance(copy.bytes))
|
||||||
|
return;
|
||||||
|
copy.op = Operand(copy.op.physReg(), RegClass::get(copy.op.regClass().type(), new_size));
|
||||||
|
}
|
||||||
|
|
||||||
|
copy.bytes = new_size;
|
||||||
|
copy.def = Definition(copy.def.physReg(), RegClass::get(copy.def.regClass().type(), copy.bytes));
|
||||||
|
copy_map.erase(other);
|
||||||
|
}
|
||||||
|
|
||||||
void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx, chip_class chip_class, Pseudo_instruction *pi)
|
void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx, chip_class chip_class, Pseudo_instruction *pi)
|
||||||
{
|
{
|
||||||
Builder bld(ctx->program, &ctx->instructions);
|
Builder bld(ctx->program, &ctx->instructions);
|
||||||
|
@ -1368,32 +1413,7 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
|
||||||
it->second.bytes = 8;
|
it->second.bytes = 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* try to coalesce copies */
|
try_coalesce_copies(ctx, copy_map, it->second);
|
||||||
unsigned next_def_align = util_next_power_of_two(it->second.bytes + 1);
|
|
||||||
unsigned next_op_align = next_def_align;
|
|
||||||
if (it->second.def.regClass().type() == RegType::vgpr)
|
|
||||||
next_def_align = MIN2(next_def_align, 4);
|
|
||||||
if (it->second.op.regClass().type() == RegType::vgpr)
|
|
||||||
next_op_align = MIN2(next_op_align, 4);
|
|
||||||
|
|
||||||
if (it->second.bytes < 8 && !it->second.op.isConstant() &&
|
|
||||||
it->first.reg_b % next_def_align == 0 &&
|
|
||||||
it->second.op.physReg().reg_b % next_op_align == 0) {
|
|
||||||
// TODO try more relaxed alignment for subdword copies
|
|
||||||
PhysReg other_def_reg = it->first;
|
|
||||||
other_def_reg.reg_b += it->second.bytes;
|
|
||||||
PhysReg other_op_reg = it->second.op.physReg();
|
|
||||||
other_op_reg.reg_b += it->second.bytes;
|
|
||||||
std::map<PhysReg, copy_operation>::iterator other = copy_map.find(other_def_reg);
|
|
||||||
if (other != copy_map.end() &&
|
|
||||||
other->second.op.physReg() == other_op_reg &&
|
|
||||||
it->second.bytes + other->second.bytes <= 8) {
|
|
||||||
it->second.bytes += other->second.bytes;
|
|
||||||
it->second.def = Definition(it->first, RegClass::get(it->second.def.regClass().type(), it->second.bytes));
|
|
||||||
it->second.op = Operand(it->second.op.physReg(), RegClass::get(it->second.op.regClass().type(), it->second.bytes));
|
|
||||||
copy_map.erase(other);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check if the definition reg is used by another copy operation */
|
/* check if the definition reg is used by another copy operation */
|
||||||
for (std::pair<const PhysReg, copy_operation>& copy : copy_map) {
|
for (std::pair<const PhysReg, copy_operation>& copy : copy_map) {
|
||||||
|
|
Loading…
Reference in New Issue