diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index de435ff7834..6fb6e664edd 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -136,8 +136,11 @@ Temp emit_mbcnt(isel_context *ctx, Definition dst, if (ctx->program->wave_size == 32) { return thread_id_lo; + } else if (ctx->program->chip_class <= GFX7) { + Temp thread_id_hi = bld.vop2(aco_opcode::v_mbcnt_hi_u32_b32, dst, mask_hi, thread_id_lo); + return thread_id_hi; } else { - Temp thread_id_hi = bld.vop3(aco_opcode::v_mbcnt_hi_u32_b32, dst, mask_hi, thread_id_lo); + Temp thread_id_hi = bld.vop3(aco_opcode::v_mbcnt_hi_u32_b32_e64, dst, mask_hi, thread_id_lo); return thread_id_hi; } } diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py index a0ecc9c5788..bee3186ebae 100644 --- a/src/amd/compiler/aco_opcodes.py +++ b/src/amd/compiler/aco_opcodes.py @@ -672,6 +672,7 @@ VOP2 = { (0x1f, 0x1f, 0x16, 0x16, 0x1f, "v_mac_f32", True), (0x20, 0x20, 0x17, 0x17, 0x20, "v_madmk_f32", False), (0x21, 0x21, 0x18, 0x18, 0x21, "v_madak_f32", False), + (0x24, 0x24, -1, -1, -1, "v_mbcnt_hi_u32_b32", False), (0x25, 0x25, 0x19, 0x19, -1, "v_add_co_u32", False), # VOP3B only in RDNA (0x26, 0x26, 0x1a, 0x1a, -1, "v_sub_co_u32", False), # VOP3B only in RDNA (0x27, 0x27, 0x1b, 0x1b, -1, "v_subrev_co_u32", False), # VOP3B only in RDNA @@ -1042,7 +1043,7 @@ VOP3 = { ( -1, -1, 0x28a, 0x28a, 0x361, "v_writelane_b32_e64", False, False), (0x122, 0x122, 0x28b, 0x28b, 0x364, "v_bcnt_u32_b32", False, False), (0x123, 0x123, 0x28c, 0x28c, 0x365, "v_mbcnt_lo_u32_b32", False, False), - (0x124, 0x124, 0x28d, 0x28d, 0x366, "v_mbcnt_hi_u32_b32", False, False), + ( -1, -1, 0x28d, 0x28d, 0x366, "v_mbcnt_hi_u32_b32_e64", False, False), ( -1, -1, 0x28f, 0x28f, 0x2ff, "v_lshlrev_b64", False, False), ( -1, -1, 0x290, 0x290, 0x300, "v_lshrrev_b64", False, False), ( -1, -1, 0x291, 0x291, 0x301, "v_ashrrev_i64", False, False),