diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 40798da9597..6e9606c47d3 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -1452,7 +1452,11 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera if (lo.physReg().byte() == 2 && hi.physReg().byte() == 0 && (!hi.isConstant() || !Operand::c32(hi.constantValue()).isLiteral() || ctx->program->gfx_level >= GFX10)) { - bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand::c32(2u)); + if (hi.isConstant()) + bld.vop3(aco_opcode::v_alignbyte_b32, def, Operand::c32(hi.constantValue()), lo, + Operand::c32(2u)); + else + bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand::c32(2u)); return; } diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index d4799d6cdb5..38a95eacc1e 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -838,3 +838,27 @@ BEGIN_TEST(to_hw_instr.swap_linear_vgpr) finish_to_hw_instr_test(); END_TEST + +BEGIN_TEST(to_hw_instr.pack2x16_alignbyte_constant) + PhysReg v0_lo{256}; + PhysReg v0_hi{256}; + PhysReg v1_hi{257}; + v0_hi.reg_b += 2; + v1_hi.reg_b += 2; + + if (!setup_cs(NULL, GFX10)) + return; + + /* prevent usage of v_pack_b32_f16 */ + program->blocks[0].fp_mode.denorm16_64 = fp_denorm_flush; + + //>> p_unit_test 0 + //! v1: %_:v[0] = v_alignbyte_b32 0x3800, %_:v[1][16:32], 2 + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand(v1_hi, v2b), Operand::c16(0x3800)); + + //! s_endpgm + + finish_to_hw_instr_test(); +END_TEST