aco: use v_perm_b32 to copy 0xff00/0x00ff/0xff/0x00
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16595>
This commit is contained in:
parent
dae1629778
commit
e68e6c75ca
|
@ -1085,6 +1085,10 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
|
|||
assert(dst.regClass() == v1b || dst.regClass() == v2b);
|
||||
|
||||
bool use_sdwa = ctx->program->gfx_level >= GFX9 && ctx->program->gfx_level < GFX11;
|
||||
/* We need the v_perm_b32 (VOP3) to be able to take literals, and that's a GFX10+ feature. */
|
||||
bool can_use_perm = ctx->program->gfx_level >= GFX10 &&
|
||||
(op.constantEquals(0) || op.constantEquals(0xff) ||
|
||||
op.constantEquals(0xffff) || op.constantEquals(0xff00));
|
||||
if (dst.regClass() == v1b && use_sdwa) {
|
||||
uint8_t val = op.constantValue();
|
||||
Operand op32 = Operand::c32((uint32_t)val | (val & 0x80u ? 0xffffff00u : 0u));
|
||||
|
@ -1118,6 +1122,12 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op)
|
|||
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi);
|
||||
instr->vop3().opsel = 2;
|
||||
}
|
||||
} else if (can_use_perm) {
|
||||
uint8_t swiz[] = {4, 5, 6, 7};
|
||||
swiz[dst.physReg().byte()] = op.constantValue() & 0xff ? bperm_255 : bperm_0;
|
||||
if (dst.bytes() == 2)
|
||||
swiz[dst.physReg().byte() + 1] = op.constantValue() >> 8 ? bperm_255 : bperm_0;
|
||||
create_bperm(bld, swiz, dst, Operand::zero());
|
||||
} else {
|
||||
uint32_t offset = dst.physReg().byte() * 8u;
|
||||
uint32_t mask = ((1u << (dst.bytes() * 8)) - 1) << offset;
|
||||
|
|
|
@ -464,6 +464,50 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
|
|||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b),
|
||||
Operand::zero(), Operand::zero(1));
|
||||
|
||||
bld.reset(program->create_and_insert_block());
|
||||
program->blocks[0].linear_succs.push_back(1);
|
||||
program->blocks[1].linear_preds.push_back(0);
|
||||
|
||||
/* Prevent usage of v_pack_b32_f16, so we use v_perm_b32 instead. */
|
||||
program->blocks[1].fp_mode.denorm16_64 = fp_denorm_flush;
|
||||
|
||||
//>> p_unit_test 13
|
||||
//~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0]
|
||||
//~gfx9! v1: %_:v[0] = v_or_b32 0xff, %_:v[0]
|
||||
//~gfx(10|11)! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0x7060c0d
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b),
|
||||
Operand::c16(0x00ff));
|
||||
|
||||
//! p_unit_test 14
|
||||
//~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
|
||||
//~gfx9! v1: %_:v[0] = v_or_b32 0xff000000, %_:v[0]
|
||||
//~gfx(10|11)! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0xd0c0504
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b),
|
||||
Operand::c16(0xff00));
|
||||
|
||||
//! p_unit_test 15
|
||||
//~gfx(9|10)! v2b: %_:v[0][0:16] = v_mov_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
|
||||
//~gfx11! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0x7060c0c
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b),
|
||||
Operand::zero(2));
|
||||
|
||||
//! p_unit_test 16
|
||||
//~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 -1 dst_sel:ubyte0 dst_preserve src0_sel:dword
|
||||
//~gfx11! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0x706050d
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b),
|
||||
Operand::c8(0xff));
|
||||
|
||||
//! p_unit_test 17
|
||||
//~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword
|
||||
//~gfx11! v1: %_:v[0] = v_perm_b32 0, %_:v[0], 0x706050c
|
||||
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u));
|
||||
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b),
|
||||
Operand::zero(1));
|
||||
|
||||
//! s_endpgm
|
||||
|
||||
finish_to_hw_instr_test();
|
||||
|
|
Loading…
Reference in New Issue