aco: sign-extend input/indentity for 32-bit reduce ops on GFX10
Because some 16-bit instructions are already VOP3 on GFX10, we use the 32-bit variants to remove the temporary VGPR and to use DDP with the arithmetic instructions. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5148>
This commit is contained in:
parent
83dcd1690b
commit
e22567089c
|
@ -529,6 +529,20 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
|
|||
sdwa->sel[0] = sdwa_ubyte;
|
||||
sdwa->dst_sel = sdwa_udword;
|
||||
bld.insert(std::move(sdwa));
|
||||
} else if (src.regClass() == v2b) {
|
||||
if (ctx->program->chip_class >= GFX10 &&
|
||||
(reduce_op == iadd16 || reduce_op == imax16 ||
|
||||
reduce_op == imin16 || reduce_op == umin16 || reduce_op == umax16)) {
|
||||
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
|
||||
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
|
||||
sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
|
||||
if (reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16)
|
||||
sdwa->sel[0] = sdwa_sword;
|
||||
else
|
||||
sdwa->sel[0] = sdwa_uword;
|
||||
sdwa->dst_sel = sdwa_udword;
|
||||
bld.insert(std::move(sdwa));
|
||||
}
|
||||
}
|
||||
|
||||
bool reduction_needs_last_op = false;
|
||||
|
|
Loading…
Reference in New Issue