diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index ec83f8a2ffc5e..aeee5111955b1 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1371,43 +1371,6 @@ do_copy(lower_context* ctx, Builder& bld, const copy_operation& copy, bool* pres bld.sop1(aco_opcode::s_mov_b32, def, op); } else if (def.regClass() == s2) { bld.sop1(aco_opcode::s_mov_b64, def, op); - } else if (def.regClass().is_subdword() && ctx->program->gfx_level < GFX8) { - if (op.physReg().byte()) { - assert(def.physReg().byte() == 0); - bld.vop2(aco_opcode::v_lshrrev_b32, def, Operand::c32(op.physReg().byte() * 8), op); - } else if (def.physReg().byte()) { - assert(op.physReg().byte() == 0); - /* preserve the target's lower half */ - uint32_t bits = def.physReg().byte() * 8; - PhysReg lo_reg = PhysReg(def.physReg().reg()); - Definition lo_half = - Definition(lo_reg, RegClass::get(RegType::vgpr, def.physReg().byte())); - Definition dst = - Definition(lo_reg, RegClass::get(RegType::vgpr, lo_half.bytes() + op.bytes())); - - if (def.physReg().reg() == op.physReg().reg()) { - bld.vop2(aco_opcode::v_and_b32, lo_half, Operand::c32((1 << bits) - 1u), - Operand(lo_reg, lo_half.regClass())); - if (def.physReg().byte() == 1) { - bld.vop2(aco_opcode::v_mul_u32_u24, dst, Operand::c32((1 << bits) + 1u), op); - } else if (def.physReg().byte() == 2) { - bld.vop3(aco_opcode::v_cvt_pk_u16_u32, dst, Operand(lo_reg, v2b), op); - } else if (def.physReg().byte() == 3) { - bld.sop1(aco_opcode::s_mov_b32, Definition(scratch_sgpr, s1), - Operand::c32((1 << bits) + 1u)); - bld.vop3(aco_opcode::v_mul_lo_u32, dst, Operand(scratch_sgpr, s1), op); - } - } else { - lo_half.setFixed(lo_half.physReg().advance(4 - def.physReg().byte())); - bld.vop2(aco_opcode::v_lshlrev_b32, lo_half, Operand::c32(32 - bits), - Operand(lo_reg, lo_half.regClass())); - bld.vop3(aco_opcode::v_alignbyte_b32, dst, op, - Operand(lo_half.physReg(), lo_half.regClass()), - Operand::c32(4 - def.physReg().byte())); - } - } else { - bld.vop1(aco_opcode::v_mov_b32, def, op); - } } else if (def.regClass() == v1b && ctx->program->gfx_level >= GFX11) { uint8_t swiz[] = {4, 5, 6, 7}; swiz[def.physReg().byte()] = op.physReg().byte(); @@ -1568,6 +1531,8 @@ do_swap(lower_context* ctx, Builder& bld, const copy_operation& copy, bool prese void do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Operand hi) { + assert(ctx->program->gfx_level >= GFX8); + if (lo.isConstant() && hi.isConstant()) { copy_constant(ctx, bld, def, Operand::c32(lo.constantValue() | (hi.constantValue() << 16))); return; @@ -1651,35 +1616,12 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera emit_v_mov_b16(bld, def_hi, hi); else emit_v_mov_b16(bld, def_lo, lo); - return; - } else if (ctx->program->gfx_level >= GFX8) { + } else { if (lo.physReg().reg() == def.physReg().reg()) bld.vop1_sdwa(aco_opcode::v_mov_b32, def_hi, hi); else bld.vop1_sdwa(aco_opcode::v_mov_b32, def_lo, lo); - return; } - - /* alignbyte needs the operands in the following way: - * | xx hi | lo xx | >> 2 byte */ - if (lo.physReg().byte() != hi.physReg().byte()) { - /* | xx lo | hi xx | => | lo hi | lo hi | */ - assert(lo.physReg().byte() == 0 && hi.physReg().byte() == 2); - bld.vop3(aco_opcode::v_alignbyte_b32, def, lo, hi, Operand::c32(2u)); - lo = Operand(def_hi.physReg(), v2b); - hi = Operand(def_lo.physReg(), v2b); - } else if (lo.physReg().byte() == 0) { - /* | xx hi | xx lo | => | xx hi | lo 00 | */ - bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand::c32(16u), lo); - lo = Operand(def_hi.physReg(), v2b); - } else { - /* | hi xx | lo xx | => | 00 hi | lo xx | */ - assert(hi.physReg().byte() == 2); - bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand::c32(16u), hi); - hi = Operand(def_lo.physReg(), v2b); - } - /* perform the alignbyte */ - bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand::c32(2u)); } void @@ -1833,53 +1775,6 @@ handle_operands(std::map& copy_map, lower_context* ctx, } } - /* on GFX6/7, we need some small workarounds as there is no - * SDWA instruction to do partial register writes */ - if (ctx->program->gfx_level < GFX8 && it->second.bytes < 4) { - if (it->first.byte() == 0 && it->second.op.physReg().byte() == 0 && !it->second.is_used && - pi->opcode == aco_opcode::p_split_vector) { - /* Other operations might overwrite the high bits, so change all users - * of the high bits to the new target where they are still available. - * This mechanism depends on also emitting dead definitions. */ - PhysReg reg_hi = it->second.op.physReg().advance(it->second.bytes); - while (reg_hi != PhysReg(it->second.op.physReg().reg() + 1)) { - std::map::iterator other = copy_map.begin(); - for (other = copy_map.begin(); other != copy_map.end(); other++) { - /* on GFX6/7, if the high bits are used as operand, they cannot be a target */ - if (other->second.op.physReg() == reg_hi) { - other->second.op.setFixed(it->first.advance(reg_hi.byte())); - break; /* break because an operand can only be used once */ - } - } - reg_hi = reg_hi.advance(it->second.bytes); - } - } else if (it->first.byte()) { - assert(pi->opcode == aco_opcode::p_create_vector); - /* on GFX6/7, if we target an upper half where the lower half hasn't yet been handled, - * move to the target operand's high bits. This is save to do as it cannot be an operand - */ - PhysReg lo = PhysReg(it->first.reg()); - std::map::iterator other = copy_map.find(lo); - if (other != copy_map.end()) { - assert(other->second.bytes == it->first.byte()); - PhysReg new_reg_hi = other->second.op.physReg().advance(it->first.byte()); - it->second.def = Definition(new_reg_hi, it->second.def.regClass()); - it->second.is_used = 0; - other->second.bytes += it->second.bytes; - other->second.def.setTemp(Temp(other->second.def.tempId(), - RegClass::get(RegType::vgpr, other->second.bytes))); - other->second.op.setTemp(Temp(other->second.op.tempId(), - RegClass::get(RegType::vgpr, other->second.bytes))); - /* if the new target's high bits are also a target, change uses */ - std::map::iterator target = copy_map.find(new_reg_hi); - if (target != copy_map.end()) { - for (unsigned i = 0; i < it->second.bytes; i++) - target->second.uses[i]++; - } - } - } - } - /* find portions where the target reg is not used as operand for any other copy */ if (it->second.is_used) { if (it->second.op.isConstant() || skip_partial_copies) { @@ -1899,8 +1794,7 @@ handle_operands(std::map& copy_map, lower_context* ctx, * a partial copy allows further copies, it should be done instead. */ bool partial_copy = (has_zero_use_bytes == 0xf) || (has_zero_use_bytes == 0xf0); for (std::pair& copy : copy_map) { - /* on GFX6/7, we can only do copies with full registers */ - if (partial_copy || ctx->program->gfx_level <= GFX7) + if (partial_copy) break; for (uint16_t i = 0; i < copy.second.bytes; i++) { /* distance might underflow */ @@ -2026,8 +1920,7 @@ handle_operands(std::map& copy_map, lower_context* ctx, } /* GFX6-7 can only swap full registers */ - if (ctx->program->gfx_level <= GFX7) - swap.bytes = align(swap.bytes, 4); + assert (ctx->program->gfx_level > GFX7 || (swap.bytes % 4) == 0); do_swap(ctx, bld, swap, preserve_scc, pi); @@ -2588,8 +2481,7 @@ lower_to_hw_instr(Program* program) bld.sop2(signext ? aco_opcode::s_bfe_i32 : aco_opcode::s_bfe_u32, dst, bld.def(s1, scc), op, Operand::c32((bits << 16) | offset)); } - } else if ((dst.regClass() == v1 && op.physReg().byte() == 0) || - ctx.program->gfx_level <= GFX7) { + } else if (dst.regClass() == v1 && op.physReg().byte() == 0) { assert(op.physReg().byte() == 0 && dst.physReg().byte() == 0); if (offset == (32 - bits) && op.regClass() != s1) { bld.vop2(signext ? aco_opcode::v_ashrrev_i32 : aco_opcode::v_lshrrev_b32, dst, @@ -2667,10 +2559,9 @@ lower_to_hw_instr(Program* program) Operand(dst.physReg(), s1), Operand::c32(offset)); } } else if (dst.regClass() == v1 || !has_sdwa) { - if (offset == (dst.bytes() * 8u - bits) && - (dst.regClass() == v1 || program->gfx_level <= GFX7)) { + if (offset == (dst.bytes() * 8u - bits) && dst.regClass() == v1) { bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op); - } else if (offset == 0 && (dst.regClass() == v1 || program->gfx_level <= GFX7)) { + } else if (offset == 0 && dst.regClass() == v1) { bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits)); } else if (has_sdwa && (op.regClass() != s1 || program->gfx_level >= GFX9)) { bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa().dst_sel = diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index e78893b245999..85b3f06d634d2 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -16,8 +16,6 @@ BEGIN_TEST(to_hw_instr.swap_subdword) PhysReg v1_hi{257}; PhysReg v1_b1{257}; PhysReg v1_b3{257}; - PhysReg v2_lo{258}; - PhysReg v3_lo{259}; v0_hi.reg_b += 2; v1_hi.reg_b += 2; v0_b1.reg_b += 1; @@ -25,164 +23,6 @@ BEGIN_TEST(to_hw_instr.swap_subdword) v0_b3.reg_b += 3; v1_b3.reg_b += 3; - for (unsigned i = GFX6; i <= GFX7; i++) { - if (!setup_cs(NULL, (amd_gfx_level)i)) - continue; - - //~gfx[67]>> p_unit_test 0 - //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] - //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] - //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] - bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); - bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v1_lo, v2b), - Operand(v1_lo, v2b), Operand(v0_lo, v2b)); - - //~gfx[67]! p_unit_test 1 - //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] - //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 - //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); - bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v2b), - Operand(v0_lo, v2b)); - - //~gfx[67]! p_unit_test 2 - //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] - //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 - //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 - //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16] - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); - bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v6b), Operand(v1_lo, v2b), - Operand(v0_lo, v2b), Operand(v2_lo, v2b)); - - //~gfx[67]! p_unit_test 3 - //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] - //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 - //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 - //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16] - //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2 - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); - bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b), - Operand(v0_lo, v2b), Operand(v2_lo, v2b), Operand(v3_lo, v2b)); - - //~gfx[67]! p_unit_test 4 - //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16] - //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[2][0:16], %0:v[1][16:32], 2 - //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] - //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:16], %0:v[0][16:32], 2 - //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] - //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] - //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); - bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b), - Operand(v2_lo, v2b), Operand(v0_lo, v2b), Operand(v3_lo, v2b)); - - //~gfx[67]! p_unit_test 5 - //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] - //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); - bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b), - Operand(v0_lo, v1)); - - //~gfx[67]! p_unit_test 6 - //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16] - //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] - //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); - bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b), - Definition(v2_lo, v2b), Operand(v0_lo, v6b)); - - //~gfx[67]! p_unit_test 7 - //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16] - //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] - //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] - //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32] - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); - bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b), - Definition(v2_lo, v2b), Definition(v3_lo, v2b), Operand(v0_lo, v2)); - - //~gfx[67]! p_unit_test 8 - //~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32] - //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] - //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] - //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] - //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); - bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v2_lo, v2b), - Definition(v0_lo, v2b), Definition(v3_lo, v2b), Operand(v0_lo, v2)); - - //~gfx[67]! p_unit_test 9 - //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] - //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] - //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); - bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v1_lo, v1b), - Operand(v1_lo, v1b), Operand(v0_lo, v1b)); - - //~gfx[67]! p_unit_test 10 - //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] - //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 - //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); - bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2b), Operand(v1_lo, v1b), - Operand(v0_lo, v1b)); - - //~gfx[67]! p_unit_test 11 - //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] - //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 - //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] - //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] - //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); - bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v3b), Operand(v1_lo, v1b), - Operand(v0_lo, v1b), Operand(v2_lo, v1b)); - - //~gfx[67]! p_unit_test 12 - //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] - //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 - //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] - //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] - //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 - //~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24] - //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1 - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); - bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v1b), - Operand(v0_lo, v1b), Operand(v2_lo, v1b), Operand(v3_lo, v1b)); - - //~gfx[67]! p_unit_test 13 - //~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8] - //~gfx[67]! v2b: %0:v[0][0:16] = v_mul_u32_u24 0x101, %0:v[0][0:8] - //~gfx[67]! v2b: %0:v[0][0:16] = v_and_b32 0xffff, %0:v[0][0:16] - //~gfx[67]! v3b: %0:v[0][0:24] = v_cvt_pk_u16_u32 %0:v[0][0:16], %0:v[0][0:8] - //~gfx[67]! v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24] - //~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001 - //~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8] - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); - Instruction* pseudo = - bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v0_lo, v1b), - Operand(v0_lo, v1b), Operand(v0_lo, v1b), Operand(v0_lo, v1b)); - pseudo->pseudo().scratch_sgpr = m0; - - //~gfx[67]! p_unit_test 14 - //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] - //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); - bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b), - Operand(v0_lo, v2b)); - - //~gfx[67]! p_unit_test 15 - //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] - //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] - //~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24] - //~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32] - bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u)); - bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b), - Definition(v2_lo, v1b), Definition(v3_lo, v1b), Operand(v0_lo, v1)); - - //~gfx[67]! s_endpgm - - finish_to_hw_instr_test(); - } - for (amd_gfx_level lvl : {GFX8, GFX9, GFX11}) { if (!setup_cs(NULL, lvl)) continue; @@ -621,23 +461,23 @@ BEGIN_TEST(to_hw_instr.extract) //>> p_unit_test 4 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); - //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0) //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04 - EXT(0, 0) + if (lvl != GFX7) + EXT(0, 0) //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2) //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04 if (lvl != GFX7) EXT(0, 2) - //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8 //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1) //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801 - EXT(1, 0) + if (lvl != GFX7) + EXT(1, 0) //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3) //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03 //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903 @@ -721,10 +561,10 @@ BEGIN_TEST(to_hw_instr.insert) //>> p_unit_test 2 bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); - //~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8 //~gfx(8|9)! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:ubyte0 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060c00 - INS(0, 0, 0) + if (lvl != GFX7) + INS(0, 0, 0) //~gfx(8|9)! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:ubyte0 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc000504 if (lvl != GFX7) @@ -737,12 +577,12 @@ BEGIN_TEST(to_hw_instr.insert) //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0xc020504 if (lvl != GFX7) INS(0, 2, 2) - //~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16] //~gfx8! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][0:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte0 //~gfx8! v2b: %0:v[0][0:16] = v_and_b32 0xffffff00, %0:v[1] //~gfx9! v2b: %0:v[0][0:16] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:dword src1_sel:ubyte0 //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x706000c - INS(1, 0, 0) + if (lvl != GFX7) + INS(1, 0, 0) //~gfx8! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][0:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte0 //~gfx8! v2b: %0:v[0][16:32] = v_and_b32 0xff00ffff, %0:v[1] //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 8, %0:v[1][0:16] dst_sel:uword1 dst_preserve src0_sel:dword src1_sel:ubyte0