aco: sign-extend input/indentity for 32-bit reduce ops on GFX10

Because some 16-bit instructions are already VOP3 on GFX10, we use
the 32-bit variants to remove the temporary VGPR and to use DDP with
the arithmetic instructions.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5148>
This commit is contained in:
Samuel Pitoiset 2020-05-28 09:09:49 +02:00 committed by Marge Bot
parent 83dcd1690b
commit e22567089c
1 changed files with 14 additions and 0 deletions

View File

@ -529,6 +529,20 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
sdwa->sel[0] = sdwa_ubyte;
sdwa->dst_sel = sdwa_udword;
bld.insert(std::move(sdwa));
} else if (src.regClass() == v2b) {
if (ctx->program->chip_class >= GFX10 &&
(reduce_op == iadd16 || reduce_op == imax16 ||
reduce_op == imin16 || reduce_op == umin16 || reduce_op == umax16)) {
aco_ptr<SDWA_instruction> sdwa{create_instruction<SDWA_instruction>(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)};
sdwa->operands[0] = Operand(PhysReg{tmp}, v1);
sdwa->definitions[0] = Definition(PhysReg{tmp}, v1);
if (reduce_op == imin16 || reduce_op == imax16 || reduce_op == iadd16)
sdwa->sel[0] = sdwa_sword;
else
sdwa->sel[0] = sdwa_uword;
sdwa->dst_sel = sdwa_udword;
bld.insert(std::move(sdwa));
}
}
bool reduction_needs_last_op = false;