aco: small refactoring of shuffle code lowering
Uses now bytes instead of 32bit size Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-By: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4002>
This commit is contained in:
parent
0680b258f4
commit
9f779a2518
|
@ -697,7 +697,7 @@ struct copy_operation {
|
||||||
Operand op;
|
Operand op;
|
||||||
Definition def;
|
Definition def;
|
||||||
unsigned uses;
|
unsigned uses;
|
||||||
unsigned size;
|
unsigned bytes;
|
||||||
};
|
};
|
||||||
|
|
||||||
void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx, chip_class chip_class, Pseudo_instruction *pi)
|
void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context* ctx, chip_class chip_class, Pseudo_instruction *pi)
|
||||||
|
@ -743,28 +743,28 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
|
||||||
if (it->second.uses == 0) {
|
if (it->second.uses == 0) {
|
||||||
|
|
||||||
/* try to coalesce 32-bit sgpr copies to 64-bit copies */
|
/* try to coalesce 32-bit sgpr copies to 64-bit copies */
|
||||||
if (it->second.def.getTemp().type() == RegType::sgpr && it->second.size == 1 &&
|
if (it->second.def.getTemp().type() == RegType::sgpr && it->second.bytes == 4 &&
|
||||||
!it->second.op.isConstant() && it->first % 2 == it->second.op.physReg() % 2) {
|
!it->second.op.isConstant() && it->first % 2 == it->second.op.physReg() % 2) {
|
||||||
|
|
||||||
PhysReg other_def_reg = PhysReg{it->first % 2 ? it->first - 1 : it->first + 1};
|
PhysReg other_def_reg = PhysReg{it->first % 2 ? it->first - 1 : it->first + 1};
|
||||||
PhysReg other_op_reg = PhysReg{it->first % 2 ? it->second.op.physReg() - 1 : it->second.op.physReg() + 1};
|
PhysReg other_op_reg = PhysReg{it->first % 2 ? it->second.op.physReg() - 1 : it->second.op.physReg() + 1};
|
||||||
std::map<PhysReg, copy_operation>::iterator other = copy_map.find(other_def_reg);
|
std::map<PhysReg, copy_operation>::iterator other = copy_map.find(other_def_reg);
|
||||||
|
|
||||||
if (other != copy_map.end() && !other->second.uses && other->second.size == 1 &&
|
if (other != copy_map.end() && !other->second.uses && other->second.bytes == 4 &&
|
||||||
other->second.op.physReg() == other_op_reg && !other->second.op.isConstant()) {
|
other->second.op.physReg() == other_op_reg && !other->second.op.isConstant()) {
|
||||||
std::map<PhysReg, copy_operation>::iterator to_erase = it->first % 2 ? it : other;
|
std::map<PhysReg, copy_operation>::iterator to_erase = it->first % 2 ? it : other;
|
||||||
it = it->first % 2 ? other : it;
|
it = it->first % 2 ? other : it;
|
||||||
copy_map.erase(to_erase);
|
copy_map.erase(to_erase);
|
||||||
it->second.size = 2;
|
it->second.bytes = 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (it->second.def.physReg() == scc) {
|
if (it->second.def.physReg() == scc) {
|
||||||
bld.sopc(aco_opcode::s_cmp_lg_i32, it->second.def, it->second.op, Operand(0u));
|
bld.sopc(aco_opcode::s_cmp_lg_i32, it->second.def, it->second.op, Operand(0u));
|
||||||
preserve_scc = true;
|
preserve_scc = true;
|
||||||
} else if (it->second.size == 2 && it->second.def.getTemp().type() == RegType::sgpr) {
|
} else if (it->second.bytes == 8 && it->second.def.getTemp().type() == RegType::sgpr) {
|
||||||
bld.sop1(aco_opcode::s_mov_b64, it->second.def, Operand(it->second.op.physReg(), s2));
|
bld.sop1(aco_opcode::s_mov_b64, it->second.def, Operand(it->second.op.physReg(), s2));
|
||||||
} else if (it->second.size == 2 && it->second.op.isConstant()) {
|
} else if (it->second.bytes == 8 && it->second.op.isConstant()) {
|
||||||
uint64_t val = it->second.op.constantValue64();
|
uint64_t val = it->second.op.constantValue64();
|
||||||
bld.vop1(aco_opcode::v_mov_b32, it->second.def, Operand((uint32_t)val));
|
bld.vop1(aco_opcode::v_mov_b32, it->second.def, Operand((uint32_t)val));
|
||||||
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{it->second.def.physReg() + 1}, v1),
|
bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{it->second.def.physReg() + 1}, v1),
|
||||||
|
@ -775,8 +775,8 @@ void handle_operands(std::map<PhysReg, copy_operation>& copy_map, lower_context*
|
||||||
|
|
||||||
/* reduce the number of uses of the operand reg by one */
|
/* reduce the number of uses of the operand reg by one */
|
||||||
if (!it->second.op.isConstant()) {
|
if (!it->second.op.isConstant()) {
|
||||||
for (unsigned i = 0; i < it->second.size; i++) {
|
for (unsigned i = 0; i < it->second.bytes; i += 4) {
|
||||||
target = copy_map.find(PhysReg{it->second.op.physReg() + i});
|
target = copy_map.find(PhysReg{it->second.op.physReg() + i/4});
|
||||||
if (target != copy_map.end())
|
if (target != copy_map.end())
|
||||||
target->second.uses--;
|
target->second.uses--;
|
||||||
}
|
}
|
||||||
|
@ -892,16 +892,18 @@ void lower_to_hw_instr(Program* program)
|
||||||
{
|
{
|
||||||
case aco_opcode::p_extract_vector:
|
case aco_opcode::p_extract_vector:
|
||||||
{
|
{
|
||||||
unsigned reg = instr->operands[0].physReg() + instr->operands[1].constantValue() * instr->definitions[0].size();
|
PhysReg reg = instr->operands[0].physReg();
|
||||||
RegClass rc = RegClass(instr->operands[0].getTemp().type(), 1);
|
reg.reg_b += instr->operands[1].constantValue() * instr->definitions[0].bytes();
|
||||||
RegClass rc_def = RegClass(instr->definitions[0].getTemp().type(), 1);
|
|
||||||
if (reg == instr->definitions[0].physReg())
|
if (reg == instr->definitions[0].physReg())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
std::map<PhysReg, copy_operation> copy_operations;
|
std::map<PhysReg, copy_operation> copy_operations;
|
||||||
|
RegClass rc = RegClass(instr->operands[0].getTemp().type(), 1);
|
||||||
|
RegClass rc_def = RegClass(instr->definitions[0].getTemp().type(), 1);
|
||||||
for (unsigned i = 0; i < instr->definitions[0].size(); i++) {
|
for (unsigned i = 0; i < instr->definitions[0].size(); i++) {
|
||||||
Definition def = Definition(PhysReg{instr->definitions[0].physReg() + i}, rc_def);
|
Definition def = Definition(PhysReg{instr->definitions[0].physReg() + i}, rc_def);
|
||||||
copy_operations[def.physReg()] = {Operand(PhysReg{reg + i}, rc), def, 0, 1};
|
copy_operations[def.physReg()] = {Operand(PhysReg{reg + i}, rc), def, 0, 4};
|
||||||
}
|
}
|
||||||
handle_operands(copy_operations, &ctx, program->chip_class, pi);
|
handle_operands(copy_operations, &ctx, program->chip_class, pi);
|
||||||
break;
|
break;
|
||||||
|
@ -910,24 +912,28 @@ void lower_to_hw_instr(Program* program)
|
||||||
{
|
{
|
||||||
std::map<PhysReg, copy_operation> copy_operations;
|
std::map<PhysReg, copy_operation> copy_operations;
|
||||||
RegClass rc_def = RegClass(instr->definitions[0].getTemp().type(), 1);
|
RegClass rc_def = RegClass(instr->definitions[0].getTemp().type(), 1);
|
||||||
unsigned reg_idx = 0;
|
PhysReg reg = instr->definitions[0].physReg();
|
||||||
for (const Operand& op : instr->operands) {
|
for (const Operand& op : instr->operands) {
|
||||||
if (op.isConstant()) {
|
if (op.isConstant()) {
|
||||||
const PhysReg reg = PhysReg{instr->definitions[0].physReg() + reg_idx};
|
const Definition def = Definition(reg, RegClass(instr->definitions[0].getTemp().type(), op.size()));
|
||||||
const Definition def = Definition(reg, rc_def);
|
copy_operations[reg] = {op, def, 0, op.bytes()};
|
||||||
copy_operations[reg] = {op, def, 0, op.size()};
|
reg.reg_b += op.bytes();
|
||||||
reg_idx++;
|
continue;
|
||||||
|
}
|
||||||
|
if (op.isUndefined()) {
|
||||||
|
reg.reg_b += op.bytes();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
RegClass rc_op = RegClass(op.getTemp().type(), 1);
|
RegClass rc_op = RegClass(op.getTemp().type(), 1);
|
||||||
for (unsigned j = 0; j < op.size(); j++)
|
PhysReg def_reg = reg;
|
||||||
{
|
for (unsigned j = 0; j < op.size(); j++) {
|
||||||
const Operand copy_op = Operand(PhysReg{op.physReg() + j}, rc_op);
|
const Operand copy_op = Operand(PhysReg{op.physReg() + j}, rc_op);
|
||||||
const Definition def = Definition(PhysReg{instr->definitions[0].physReg() + reg_idx}, rc_def);
|
const Definition def = Definition(def_reg, rc_def);
|
||||||
copy_operations[def.physReg()] = {copy_op, def, 0, 1};
|
copy_operations[def.physReg()] = {copy_op, def, 0, 4};
|
||||||
reg_idx++;
|
def_reg.reg_b += 4;
|
||||||
}
|
}
|
||||||
|
reg.reg_b += op.bytes();
|
||||||
}
|
}
|
||||||
handle_operands(copy_operations, &ctx, program->chip_class, pi);
|
handle_operands(copy_operations, &ctx, program->chip_class, pi);
|
||||||
break;
|
break;
|
||||||
|
@ -935,15 +941,19 @@ void lower_to_hw_instr(Program* program)
|
||||||
case aco_opcode::p_split_vector:
|
case aco_opcode::p_split_vector:
|
||||||
{
|
{
|
||||||
std::map<PhysReg, copy_operation> copy_operations;
|
std::map<PhysReg, copy_operation> copy_operations;
|
||||||
RegClass rc_op = instr->operands[0].isConstant() ? s1 : RegClass(instr->operands[0].regClass().type(), 1);
|
RegClass rc_op = RegClass(instr->operands[0].regClass().type(), 1);
|
||||||
for (unsigned i = 0; i < instr->definitions.size(); i++) {
|
PhysReg reg = instr->operands[0].physReg();
|
||||||
unsigned k = instr->definitions[i].size();
|
|
||||||
RegClass rc_def = RegClass(instr->definitions[i].getTemp().type(), 1);
|
for (const Definition& def : instr->definitions) {
|
||||||
for (unsigned j = 0; j < k; j++) {
|
RegClass rc_def = RegClass(def.getTemp().type(), 1);
|
||||||
Operand op = Operand(PhysReg{instr->operands[0].physReg() + (i*k+j)}, rc_op);
|
PhysReg op_reg = reg;
|
||||||
Definition def = Definition(PhysReg{instr->definitions[i].physReg() + j}, rc_def);
|
for (unsigned j = 0; j < def.size(); j++) {
|
||||||
copy_operations[def.physReg()] = {op, def, 0, op.size()};
|
const Operand op = Operand(op_reg, rc_op);
|
||||||
|
const Definition copy_def = Definition(PhysReg{def.physReg() + j}, rc_def);
|
||||||
|
copy_operations[copy_def.physReg()] = {op, copy_def, 0, 4};
|
||||||
|
op_reg.reg_b += 4;
|
||||||
}
|
}
|
||||||
|
reg.reg_b += def.bytes();
|
||||||
}
|
}
|
||||||
handle_operands(copy_operations, &ctx, program->chip_class, pi);
|
handle_operands(copy_operations, &ctx, program->chip_class, pi);
|
||||||
break;
|
break;
|
||||||
|
@ -952,20 +962,18 @@ void lower_to_hw_instr(Program* program)
|
||||||
case aco_opcode::p_wqm:
|
case aco_opcode::p_wqm:
|
||||||
{
|
{
|
||||||
std::map<PhysReg, copy_operation> copy_operations;
|
std::map<PhysReg, copy_operation> copy_operations;
|
||||||
for (unsigned i = 0; i < instr->operands.size(); i++)
|
for (unsigned i = 0; i < instr->operands.size(); i++) {
|
||||||
{
|
|
||||||
Operand operand = instr->operands[i];
|
Operand operand = instr->operands[i];
|
||||||
if (operand.isConstant() || operand.size() == 1) {
|
if (operand.isConstant() || operand.size() == 1) {
|
||||||
assert(instr->definitions[i].size() == operand.size());
|
assert(instr->definitions[i].bytes() == operand.bytes());
|
||||||
copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, operand.size()};
|
copy_operations[instr->definitions[i].physReg()] = {operand, instr->definitions[i], 0, operand.bytes()};
|
||||||
} else {
|
} else {
|
||||||
RegClass def_rc = RegClass(instr->definitions[i].regClass().type(), 1);
|
RegClass def_rc = RegClass(instr->definitions[i].regClass().type(), 1);
|
||||||
RegClass op_rc = RegClass(operand.getTemp().type(), 1);
|
RegClass op_rc = RegClass(operand.getTemp().type(), 1);
|
||||||
for (unsigned j = 0; j < operand.size(); j++)
|
for (unsigned j = 0; j < operand.size(); j++) {
|
||||||
{
|
|
||||||
Operand op = Operand(PhysReg{instr->operands[i].physReg() + j}, op_rc);
|
Operand op = Operand(PhysReg{instr->operands[i].physReg() + j}, op_rc);
|
||||||
Definition def = Definition(PhysReg{instr->definitions[i].physReg() + j}, def_rc);
|
Definition def = Definition(PhysReg{instr->definitions[i].physReg() + j}, def_rc);
|
||||||
copy_operations[def.physReg()] = {op, def, 0, 1};
|
copy_operations[def.physReg()] = {op, def, 0, 4};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1049,13 +1057,12 @@ void lower_to_hw_instr(Program* program)
|
||||||
Operand operand = instr->operands[0];
|
Operand operand = instr->operands[0];
|
||||||
if (operand.isConstant() || operand.size() == 1) {
|
if (operand.isConstant() || operand.size() == 1) {
|
||||||
assert(instr->definitions[0].size() == 1);
|
assert(instr->definitions[0].size() == 1);
|
||||||
copy_operations[instr->definitions[0].physReg()] = {operand, instr->definitions[0], 0, operand.size()};
|
copy_operations[instr->definitions[0].physReg()] = {operand, instr->definitions[0], 0, operand.bytes()};
|
||||||
} else {
|
} else {
|
||||||
for (unsigned i = 0; i < operand.size(); i++)
|
for (unsigned i = 0; i < operand.size(); i++) {
|
||||||
{
|
|
||||||
Operand op = Operand(PhysReg{operand.physReg() + i}, s1);
|
Operand op = Operand(PhysReg{operand.physReg() + i}, s1);
|
||||||
Definition def = Definition(PhysReg{instr->definitions[0].physReg() + i}, s1);
|
Definition def = Definition(PhysReg{instr->definitions[0].physReg() + i}, s1);
|
||||||
copy_operations[def.physReg()] = {op, def, 0, 1};
|
copy_operations[def.physReg()] = {op, def, 0, 4};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue