diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 3ae5860d857..e520da50fec 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -704,7 +704,7 @@ emit_instruction(asm_context& ctx, std::vector& out, Instruction* inst } else { encoding |= sdwa.dst_sel.to_sdwa_sel(instr->definitions[0].physReg().byte()) << 8; uint32_t dst_u = sdwa.dst_sel.sign_extend() ? 1 : 0; - if (sdwa.dst_preserve) + if (instr->definitions[0].bytes() < 4) /* dst_preserve */ dst_u = 2; encoding |= dst_u << 11; encoding |= (sdwa.clamp ? 1 : 0) << 13; diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index f8d16f0337d..32b54c8d14f 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -646,7 +646,6 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign sdwa->definitions[0] = Definition(tmp); sdwa->sel[0] = SubdwordSel(src_bits / 8, 0, sign_extend); sdwa->dst_sel = tmp.bytes() == 2 ? SubdwordSel::uword : SubdwordSel::dword; - sdwa->dst_preserve = tmp.bytes() == 2; bld.insert(std::move(sdwa)); } else { assert(src_bits < 32); diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp index 5f51181da3d..6a8f8d2b808 100644 --- a/src/amd/compiler/aco_ir.cpp +++ b/src/amd/compiler/aco_ir.cpp @@ -280,7 +280,6 @@ convert_to_SDWA(chip_class chip, aco_ptr& instr) } sdwa.dst_sel = SubdwordSel(instr->definitions[0].bytes(), 0, false); - sdwa.dst_preserve = sdwa.dst_sel.size() < 4; if (instr->definitions[0].getTemp().type() == RegType::sgpr && chip == GFX8) instr->definitions[0].setFixed(vcc); diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index 624b8ff23a1..525f6c21a85 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -1469,10 +1469,9 @@ struct SDWA_instruction : public Instruction { SubdwordSel dst_sel; bool neg[2]; bool abs[2]; - bool dst_preserve : 1; bool clamp : 1; uint8_t omod : 2; /* GFX9+ */ - uint8_t padding : 4; + uint8_t padding : 5; }; static_assert(sizeof(SDWA_instruction) == sizeof(Instruction) + 8, "Unexpected padding"); diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index e4af6fd153b..8de0c5c2fb0 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -2097,7 +2097,6 @@ lower_to_hw_instr(Program* program) sdwa->definitions[0] = dst; sdwa->sel[0] = SubdwordSel(1, op.physReg().byte() + offset / 8, signext); sdwa->dst_sel = SubdwordSel::uword; - sdwa->dst_preserve = true; bld.insert(std::move(sdwa)); } break; diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index 33ba9346682..426c97c2069 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -180,7 +180,6 @@ class Format(Enum): for i in range(min(num_operands, 2)): res += 'instr->sel[{0}] = SubdwordSel(op{0}.op.bytes(), 0, false);'.format(i) res += 'instr->dst_sel = SubdwordSel(def0.bytes(), 0, false);\n' - res += 'if (def0.bytes() < 4) instr->dst_preserve = true;' return res diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp index 32c0bd8a120..d650665e0d0 100644 --- a/src/amd/compiler/aco_opt_value_numbering.cpp +++ b/src/amd/compiler/aco_opt_value_numbering.cpp @@ -187,8 +187,8 @@ struct InstrPred { return aSDWA.sel[0] == bSDWA.sel[0] && aSDWA.sel[1] == bSDWA.sel[1] && aSDWA.dst_sel == bSDWA.dst_sel && aSDWA.abs[0] == bSDWA.abs[0] && aSDWA.abs[1] == bSDWA.abs[1] && aSDWA.neg[0] == bSDWA.neg[0] && - aSDWA.neg[1] == bSDWA.neg[1] && aSDWA.dst_preserve == bSDWA.dst_preserve && - aSDWA.clamp == bSDWA.clamp && aSDWA.omod == bSDWA.omod; + aSDWA.neg[1] == bSDWA.neg[1] && aSDWA.clamp == bSDWA.clamp && + aSDWA.omod == bSDWA.omod; } switch (a->format) { diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index f461932bfeb..933d56894bf 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -2079,7 +2079,6 @@ combine_inverse_comparison(opt_ctx& ctx, aco_ptr& instr) memcpy(new_sdwa->sel, cmp_sdwa.sel, sizeof(new_sdwa->sel)); memcpy(new_sdwa->neg, cmp_sdwa.neg, sizeof(new_sdwa->neg)); new_sdwa->dst_sel = cmp_sdwa.dst_sel; - new_sdwa->dst_preserve = cmp_sdwa.dst_preserve; new_sdwa->clamp = cmp_sdwa.clamp; new_sdwa->omod = cmp_sdwa.omod; new_instr = new_sdwa; diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp index 15e504feb2a..618f8b6b03b 100644 --- a/src/amd/compiler/aco_print_ir.cpp +++ b/src/amd/compiler/aco_print_ir.cpp @@ -638,7 +638,7 @@ print_instr_format_specific(const Instruction* instr, FILE* output) default: break; } } - if (sdwa.dst_preserve) + if (instr->definitions[0].bytes() < 4) fprintf(output, " dst_preserve"); } } diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp index 022b2fd6bab..3e1603e6ca1 100644 --- a/src/amd/compiler/aco_validate.cpp +++ b/src/amd/compiler/aco_validate.cpp @@ -174,8 +174,11 @@ validate_ir(Program* program) "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get()); check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset", instr.get()); - check(def.bytes() == 4 || sdwa.dst_preserve, - "SDWA subdword definition needs dst_preserve", instr.get()); + check(def.bytes() == 4 || def.bytes() == sdwa.dst_sel.size(), + "SDWA dst_sel size must be definition size for subdword definitions", + instr.get()); + check(def.bytes() == 4 || sdwa.dst_sel.offset() == 0, + "SDWA dst_sel offset must be 0 for subdword definitions", instr.get()); } for (unsigned i = 0; i < std::min(2, instr->operands.size()); i++) { diff --git a/src/amd/compiler/tests/test_sdwa.cpp b/src/amd/compiler/tests/test_sdwa.cpp index 2bd002ed56b..de46ee755e1 100644 --- a/src/amd/compiler/tests/test_sdwa.cpp +++ b/src/amd/compiler/tests/test_sdwa.cpp @@ -37,9 +37,7 @@ BEGIN_TEST(validate.sdwa.allow) SDWA_instruction *sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa(); sdwa->neg[0] = sdwa->neg[1] = sdwa->abs[0] = sdwa->abs[1] = true; - sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa(); - sdwa->dst_preserve = true; - sdwa->dst_sel = SubdwordSel::ubyte0; + sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1b), inputs[0], inputs[1]).instr->sdwa(); sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]).instr->sdwa(); sdwa->sel[0] = SubdwordSel::sbyte2;