aco: remove explicit dst_preserve flag
Instead, we can rely on the fact that subdword definitions must preserve the unused bits while dword definitions either pad or sign-extend. Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12640>
This commit is contained in:
parent
9e3ff06c38
commit
0988f7b9ba
|
@ -704,7 +704,7 @@ emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction* inst
|
||||||
} else {
|
} else {
|
||||||
encoding |= sdwa.dst_sel.to_sdwa_sel(instr->definitions[0].physReg().byte()) << 8;
|
encoding |= sdwa.dst_sel.to_sdwa_sel(instr->definitions[0].physReg().byte()) << 8;
|
||||||
uint32_t dst_u = sdwa.dst_sel.sign_extend() ? 1 : 0;
|
uint32_t dst_u = sdwa.dst_sel.sign_extend() ? 1 : 0;
|
||||||
if (sdwa.dst_preserve)
|
if (instr->definitions[0].bytes() < 4) /* dst_preserve */
|
||||||
dst_u = 2;
|
dst_u = 2;
|
||||||
encoding |= dst_u << 11;
|
encoding |= dst_u << 11;
|
||||||
encoding |= (sdwa.clamp ? 1 : 0) << 13;
|
encoding |= (sdwa.clamp ? 1 : 0) << 13;
|
||||||
|
|
|
@ -646,7 +646,6 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign
|
||||||
sdwa->definitions[0] = Definition(tmp);
|
sdwa->definitions[0] = Definition(tmp);
|
||||||
sdwa->sel[0] = SubdwordSel(src_bits / 8, 0, sign_extend);
|
sdwa->sel[0] = SubdwordSel(src_bits / 8, 0, sign_extend);
|
||||||
sdwa->dst_sel = tmp.bytes() == 2 ? SubdwordSel::uword : SubdwordSel::dword;
|
sdwa->dst_sel = tmp.bytes() == 2 ? SubdwordSel::uword : SubdwordSel::dword;
|
||||||
sdwa->dst_preserve = tmp.bytes() == 2;
|
|
||||||
bld.insert(std::move(sdwa));
|
bld.insert(std::move(sdwa));
|
||||||
} else {
|
} else {
|
||||||
assert(src_bits < 32);
|
assert(src_bits < 32);
|
||||||
|
|
|
@ -280,7 +280,6 @@ convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr)
|
||||||
}
|
}
|
||||||
|
|
||||||
sdwa.dst_sel = SubdwordSel(instr->definitions[0].bytes(), 0, false);
|
sdwa.dst_sel = SubdwordSel(instr->definitions[0].bytes(), 0, false);
|
||||||
sdwa.dst_preserve = sdwa.dst_sel.size() < 4;
|
|
||||||
|
|
||||||
if (instr->definitions[0].getTemp().type() == RegType::sgpr && chip == GFX8)
|
if (instr->definitions[0].getTemp().type() == RegType::sgpr && chip == GFX8)
|
||||||
instr->definitions[0].setFixed(vcc);
|
instr->definitions[0].setFixed(vcc);
|
||||||
|
|
|
@ -1469,10 +1469,9 @@ struct SDWA_instruction : public Instruction {
|
||||||
SubdwordSel dst_sel;
|
SubdwordSel dst_sel;
|
||||||
bool neg[2];
|
bool neg[2];
|
||||||
bool abs[2];
|
bool abs[2];
|
||||||
bool dst_preserve : 1;
|
|
||||||
bool clamp : 1;
|
bool clamp : 1;
|
||||||
uint8_t omod : 2; /* GFX9+ */
|
uint8_t omod : 2; /* GFX9+ */
|
||||||
uint8_t padding : 4;
|
uint8_t padding : 5;
|
||||||
};
|
};
|
||||||
static_assert(sizeof(SDWA_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
static_assert(sizeof(SDWA_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
|
||||||
|
|
||||||
|
|
|
@ -2097,7 +2097,6 @@ lower_to_hw_instr(Program* program)
|
||||||
sdwa->definitions[0] = dst;
|
sdwa->definitions[0] = dst;
|
||||||
sdwa->sel[0] = SubdwordSel(1, op.physReg().byte() + offset / 8, signext);
|
sdwa->sel[0] = SubdwordSel(1, op.physReg().byte() + offset / 8, signext);
|
||||||
sdwa->dst_sel = SubdwordSel::uword;
|
sdwa->dst_sel = SubdwordSel::uword;
|
||||||
sdwa->dst_preserve = true;
|
|
||||||
bld.insert(std::move(sdwa));
|
bld.insert(std::move(sdwa));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -180,7 +180,6 @@ class Format(Enum):
|
||||||
for i in range(min(num_operands, 2)):
|
for i in range(min(num_operands, 2)):
|
||||||
res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i)
|
res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i)
|
||||||
res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n'
|
res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n'
|
||||||
res += 'if (def0.bytes() < 4) instr->dst_preserve = true;'
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -187,8 +187,8 @@ struct InstrPred {
|
||||||
return aSDWA.sel[0] == bSDWA.sel[0] && aSDWA.sel[1] == bSDWA.sel[1] &&
|
return aSDWA.sel[0] == bSDWA.sel[0] && aSDWA.sel[1] == bSDWA.sel[1] &&
|
||||||
aSDWA.dst_sel == bSDWA.dst_sel && aSDWA.abs[0] == bSDWA.abs[0] &&
|
aSDWA.dst_sel == bSDWA.dst_sel && aSDWA.abs[0] == bSDWA.abs[0] &&
|
||||||
aSDWA.abs[1] == bSDWA.abs[1] && aSDWA.neg[0] == bSDWA.neg[0] &&
|
aSDWA.abs[1] == bSDWA.abs[1] && aSDWA.neg[0] == bSDWA.neg[0] &&
|
||||||
aSDWA.neg[1] == bSDWA.neg[1] && aSDWA.dst_preserve == bSDWA.dst_preserve &&
|
aSDWA.neg[1] == bSDWA.neg[1] && aSDWA.clamp == bSDWA.clamp &&
|
||||||
aSDWA.clamp == bSDWA.clamp && aSDWA.omod == bSDWA.omod;
|
aSDWA.omod == bSDWA.omod;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (a->format) {
|
switch (a->format) {
|
||||||
|
|
|
@ -2079,7 +2079,6 @@ combine_inverse_comparison(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||||
memcpy(new_sdwa->sel, cmp_sdwa.sel, sizeof(new_sdwa->sel));
|
memcpy(new_sdwa->sel, cmp_sdwa.sel, sizeof(new_sdwa->sel));
|
||||||
memcpy(new_sdwa->neg, cmp_sdwa.neg, sizeof(new_sdwa->neg));
|
memcpy(new_sdwa->neg, cmp_sdwa.neg, sizeof(new_sdwa->neg));
|
||||||
new_sdwa->dst_sel = cmp_sdwa.dst_sel;
|
new_sdwa->dst_sel = cmp_sdwa.dst_sel;
|
||||||
new_sdwa->dst_preserve = cmp_sdwa.dst_preserve;
|
|
||||||
new_sdwa->clamp = cmp_sdwa.clamp;
|
new_sdwa->clamp = cmp_sdwa.clamp;
|
||||||
new_sdwa->omod = cmp_sdwa.omod;
|
new_sdwa->omod = cmp_sdwa.omod;
|
||||||
new_instr = new_sdwa;
|
new_instr = new_sdwa;
|
||||||
|
|
|
@ -638,7 +638,7 @@ print_instr_format_specific(const Instruction* instr, FILE* output)
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (sdwa.dst_preserve)
|
if (instr->definitions[0].bytes() < 4)
|
||||||
fprintf(output, " dst_preserve");
|
fprintf(output, " dst_preserve");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -174,8 +174,11 @@ validate_ir(Program* program)
|
||||||
"SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
|
"SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
|
||||||
check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset",
|
check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset",
|
||||||
instr.get());
|
instr.get());
|
||||||
check(def.bytes() == 4 || sdwa.dst_preserve,
|
check(def.bytes() == 4 || def.bytes() == sdwa.dst_sel.size(),
|
||||||
"SDWA subdword definition needs dst_preserve", instr.get());
|
"SDWA dst_sel size must be definition size for subdword definitions",
|
||||||
|
instr.get());
|
||||||
|
check(def.bytes() == 4 || sdwa.dst_sel.offset() == 0,
|
||||||
|
"SDWA dst_sel offset must be 0 for subdword definitions", instr.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
|
for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
|
||||||
|
|
|
@ -37,9 +37,7 @@ BEGIN_TEST(validate.sdwa.allow)
|
||||||
SDWA_instruction *sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
|
SDWA_instruction *sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
|
||||||
sdwa->neg[0] = sdwa->neg[1] = sdwa->abs[0] = sdwa->abs[1] = true;
|
sdwa->neg[0] = sdwa->neg[1] = sdwa->abs[0] = sdwa->abs[1] = true;
|
||||||
|
|
||||||
sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
|
sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1b), inputs[0], inputs[1]).instr->sdwa();
|
||||||
sdwa->dst_preserve = true;
|
|
||||||
sdwa->dst_sel = SubdwordSel::ubyte0;
|
|
||||||
|
|
||||||
sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
|
sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa();
|
||||||
sdwa->sel[0] = SubdwordSel::sbyte2;
|
sdwa->sel[0] = SubdwordSel::sbyte2;
|
||||||
|
|
Loading…
Reference in New Issue