aco: small refactoring of shuffle code lowering

Uses now bytes instead of 32bit size

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-By: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4002>
This commit is contained in:
Daniel Schürmann 2020-02-20 11:34:40 +01:00
parent 0680b258f4
commit 9f779a2518
1 changed files with 48 additions and 41 deletions

View File

@ -697,7 +697,7 @@ struct copy_operation {
Operand op; Operand op;
Definition def; Definition def;
unsigned uses; unsigned uses;
unsigned size; unsigned bytes;
}; };
void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx, chip_class chip_class, Pseudo_instruction *pi) void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx, chip_class chip_class, Pseudo_instruction *pi)
@ -743,28 +743,28 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
if (it->second.uses == 0) { if (it->second.uses == 0) {
/* try to coalesce 32-bit sgpr copies to 64-bit copies */ /* try to coalesce 32-bit sgpr copies to 64-bit copies */
if (it->second.def.getTemp().type() == RegType::sgpr && it->second.size == 1 && if (it->second.def.getTemp().type() == RegType::sgpr && it->second.bytes == 4 &&
!it->second.op.isConstant() && it->first % 2 == it->second.op.physReg() % 2) { !it->second.op.isConstant() && it->first % 2 == it->second.op.physReg() % 2) {
PhysReg other_def_reg = PhysReg{it->first % 2 ? it->first - 1 : it->first + 1}; PhysReg other_def_reg = PhysReg{it->first % 2 ? it->first - 1 : it->first + 1};
PhysReg other_op_reg = PhysReg{it->first % 2 ? it->second.op.physReg() - 1 : it->second.op.physReg() + 1}; PhysReg other_op_reg = PhysReg{it->first % 2 ? it->second.op.physReg() - 1 : it->second.op.physReg() + 1};
std::map<PhysReg, copy_operation>::iterator other = copy_map.find(other_def_reg); std::map<PhysReg, copy_operation>::iterator other = copy_map.find(other_def_reg);
if (other != copy_map.end() && !other->second.uses && other->second.size == 1 && if (other != copy_map.end() && !other->second.uses && other->second.bytes == 4 &&
other->second.op.physReg() == other_op_reg && !other->second.op.isConstant()) { other->second.op.physReg() == other_op_reg && !other->second.op.isConstant()) {
std::map<PhysReg, copy_operation>::iterator to_erase = it->first % 2 ? it : other; std::map<PhysReg, copy_operation>::iterator to_erase = it->first % 2 ? it : other;
it = it->first % 2 ? other : it; it = it->first % 2 ? other : it;
copy_map.erase(to_erase); copy_map.erase(to_erase);
it->second.size = 2; it->second.bytes = 8;
} }
} }
if (it->second.def.physReg() == scc) { if (it->second.def.physReg() == scc) {
bld.sopc(aco_opcode::s_cmp_lg_i32, it->second.def, it->second.op, Operand(0u)); bld.sopc(aco_opcode::s_cmp_lg_i32, it->second.def, it->second.op, Operand(0u));
preserve_scc = true; preserve_scc = true;
} else if (it->second.size == 2 && it->second.def.getTemp().type() == RegType::sgpr) { } else if (it->second.bytes == 8 && it->second.def.getTemp().type() == RegType::sgpr) {
bld.sop1(aco_opcode::s_mov_b64, it->second.def, Operand(it->second.op.physReg(), s2)); bld.sop1(aco_opcode::s_mov_b64, it->second.def, Operand(it->second.op.physReg(), s2));
} else if (it->second.size == 2 && it->second.op.isConstant()) { } else if (it->second.bytes == 8 && it->second.op.isConstant()) {
uint64_t val = it->second.op.constantValue64(); uint64_t val = it->second.op.constantValue64();
bld.vop1(aco_opcode::v_mov_b32, it->second.def, Operand((uint32_t)val)); bld.vop1(aco_opcode::v_mov_b32, it->second.def, Operand((uint32_t)val));
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{it->second.def.physReg() + 1}, v1), bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{it->second.def.physReg() + 1}, v1),
@ -775,8 +775,8 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
/* reduce the number of uses of the operand reg by one */ /* reduce the number of uses of the operand reg by one */
if (!it->second.op.isConstant()) { if (!it->second.op.isConstant()) {
for (unsigned i = 0; i < it->second.size; i++) { for (unsigned i = 0; i < it->second.bytes; i += 4) {
target = copy_map.find(PhysReg{it->second.op.physReg() + i}); target = copy_map.find(PhysReg{it->second.op.physReg() + i/4});
if (target != copy_map.end()) if (target != copy_map.end())
target->second.uses--; target->second.uses--;
} }
@ -892,16 +892,18 @@ void lower_to_hw_instr(Program* program)
{ {
case aco_opcode::p_extract_vector: case aco_opcode::p_extract_vector:
{ {
unsigned reg = instr->operands[0].physReg() + instr->operands[1].constantValue() * instr->definitions[0].size(); PhysReg reg = instr->operands[0].physReg();
RegClass rc = RegClass(instr->operands[0].getTemp().type(), 1); reg.reg_b += instr->operands[1].constantValue() * instr->definitions[0].bytes();
RegClass rc_def = RegClass(instr->definitions[0].getTemp().type(), 1);
if (reg == instr->definitions[0].physReg()) if (reg == instr->definitions[0].physReg())
break; break;
std::map<PhysReg, copy_operation> copy_operations; std::map<PhysReg, copy_operation> copy_operations;
RegClass rc = RegClass(instr->operands[0].getTemp().type(), 1);
RegClass rc_def = RegClass(instr->definitions[0].getTemp().type(), 1);
for (unsigned i = 0; i < instr->definitions[0].size(); i++) { for (unsigned i = 0; i < instr->definitions[0].size(); i++) {
Definition def = Definition(PhysReg{instr->definitions[0].physReg() + i}, rc_def); Definition def = Definition(PhysReg{instr->definitions[0].physReg() + i}, rc_def);
copy_operations[def.physReg()] = {Operand(PhysReg{reg + i}, rc), def, 0, 1}; copy_operations[def.physReg()] = {Operand(PhysReg{reg + i}, rc), def, 0, 4};
} }
handle_operands(copy_operations, &ctx, program->chip_class, pi); handle_operands(copy_operations, &ctx, program->chip_class, pi);
break; break;
@ -910,24 +912,28 @@ void lower_to_hw_instr(Program* program)
{ {
std::map<PhysReg, copy_operation> copy_operations; std::map<PhysReg, copy_operation> copy_operations;
RegClass rc_def = RegClass(instr->definitions[0].getTemp().type(), 1); RegClass rc_def = RegClass(instr->definitions[0].getTemp().type(), 1);
unsigned reg_idx = 0; PhysReg reg = instr->definitions[0].physReg();
for (const Operand& op : instr->operands) { for (const Operand& op : instr->operands) {
if (op.isConstant()) { if (op.isConstant()) {
const PhysReg reg = PhysReg{instr->definitions[0].physReg() + reg_idx}; const Definition def = Definition(reg, RegClass(instr->definitions[0].getTemp().type(), op.size()));
const Definition def = Definition(reg, rc_def); copy_operations[reg] = {op, def, 0, op.bytes()};
copy_operations[reg] = {op, def, 0, op.size()}; reg.reg_b += op.bytes();
reg_idx++; continue;
}
if (op.isUndefined()) {
reg.reg_b += op.bytes();
continue; continue;
} }
RegClass rc_op = RegClass(op.getTemp().type(), 1); RegClass rc_op = RegClass(op.getTemp().type(), 1);
for (unsigned j = 0; j < op.size(); j++) PhysReg def_reg = reg;
{ for (unsigned j = 0; j < op.size(); j++) {
const Operand copy_op = Operand(PhysReg{op.physReg() + j}, rc_op); const Operand copy_op = Operand(PhysReg{op.physReg() + j}, rc_op);
const Definition def = Definition(PhysReg{instr->definitions[0].physReg() + reg_idx}, rc_def); const Definition def = Definition(def_reg, rc_def);
copy_operations[def.physReg()] = {copy_op, def, 0, 1}; copy_operations[def.physReg()] = {copy_op, def, 0, 4};
reg_idx++; def_reg.reg_b += 4;
} }
reg.reg_b += op.bytes();
} }
handle_operands(copy_operations, &ctx, program->chip_class, pi); handle_operands(copy_operations, &ctx, program->chip_class, pi);
break; break;
@ -935,15 +941,19 @@ void lower_to_hw_instr(Program* program)
case aco_opcode::p_split_vector: case aco_opcode::p_split_vector:
{ {
std::map<PhysReg, copy_operation> copy_operations; std::map<PhysReg, copy_operation> copy_operations;
RegClass rc_op = instr->operands[0].isConstant() ? s1 : RegClass(instr->operands[0].regClass().type(), 1); RegClass rc_op = RegClass(instr->operands[0].regClass().type(), 1);
for (unsigned i = 0; i < instr->definitions.size(); i++) { PhysReg reg = instr->operands[0].physReg();
unsigned k = instr->definitions[i].size();
RegClass rc_def = RegClass(instr->definitions[i].getTemp().type(), 1); for (const Definition& def : instr->definitions) {
for (unsigned j = 0; j < k; j++) { RegClass rc_def = RegClass(def.getTemp().type(), 1);
Operand op = Operand(PhysReg{instr->operands[0].physReg() + (i*k+j)}, rc_op); PhysReg op_reg = reg;
Definition def = Definition(PhysReg{instr->definitions[i].physReg() + j}, rc_def); for (unsigned j = 0; j < def.size(); j++) {
copy_operations[def.physReg()] = {op, def, 0, op.size()}; const Operand op = Operand(op_reg, rc_op);
const Definition copy_def = Definition(PhysReg{def.physReg() + j}, rc_def);
copy_operations[copy_def.physReg()] = {op, copy_def, 0, 4};
op_reg.reg_b += 4;
} }
reg.reg_b += def.bytes();
} }
handle_operands(copy_operations, &ctx, program->chip_class, pi); handle_operands(copy_operations, &ctx, program->chip_class, pi);
break; break;
@ -952,20 +962,18 @@ void lower_to_hw_instr(Program* program)
case aco_opcode::p_wqm: case aco_opcode::p_wqm:
{ {
std::map<PhysReg, copy_operation> copy_operations; std::map<PhysReg, copy_operation> copy_operations;
for (unsigned i = 0; i < instr->operands.size(); i++) for (unsigned i = 0; i < instr->operands.size(); i++) {
{
Operand operand = instr->operands[i]; Operand operand = instr->operands[i];
if (operand.isConstant() || operand.size() == 1) { if (operand.isConstant() || operand.size() == 1) {
assert(instr->definitions[i].size() == operand.size()); assert(instr->definitions[i].bytes() == operand.bytes());
copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, operand.size()}; copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, operand.bytes()};
} else { } else {
RegClass def_rc = RegClass(instr->definitions[i].regClass().type(), 1); RegClass def_rc = RegClass(instr->definitions[i].regClass().type(), 1);
RegClass op_rc = RegClass(operand.getTemp().type(), 1); RegClass op_rc = RegClass(operand.getTemp().type(), 1);
for (unsigned j = 0; j < operand.size(); j++) for (unsigned j = 0; j < operand.size(); j++) {
{
Operand op = Operand(PhysReg{instr->operands[i].physReg() + j}, op_rc); Operand op = Operand(PhysReg{instr->operands[i].physReg() + j}, op_rc);
Definition def = Definition(PhysReg{instr->definitions[i].physReg() + j}, def_rc); Definition def = Definition(PhysReg{instr->definitions[i].physReg() + j}, def_rc);
copy_operations[def.physReg()] = {op, def, 0, 1}; copy_operations[def.physReg()] = {op, def, 0, 4};
} }
} }
} }
@ -1049,13 +1057,12 @@ void lower_to_hw_instr(Program* program)
Operand operand = instr->operands[0]; Operand operand = instr->operands[0];
if (operand.isConstant() || operand.size() == 1) { if (operand.isConstant() || operand.size() == 1) {
assert(instr->definitions[0].size() == 1); assert(instr->definitions[0].size() == 1);
copy_operations[instr->definitions[0].physReg()] = {operand, instr->definitions[0], 0, operand.size()}; copy_operations[instr->definitions[0].physReg()] = {operand, instr->definitions[0], 0, operand.bytes()};
} else { } else {
for (unsigned i = 0; i < operand.size(); i++) for (unsigned i = 0; i < operand.size(); i++) {
{
Operand op = Operand(PhysReg{operand.physReg() + i}, s1); Operand op = Operand(PhysReg{operand.physReg() + i}, s1);
Definition def = Definition(PhysReg{instr->definitions[0].physReg() + i}, s1); Definition def = Definition(PhysReg{instr->definitions[0].physReg() + i}, s1);
copy_operations[def.physReg()] = {op, def, 0, 1}; copy_operations[def.physReg()] = {op, def, 0, 4};
} }
} }