aco: Implement 64bit uadd_sat.
Signed-off-by: Georg Lehmann <dadschoorse@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15421>
This commit is contained in:
parent
16be909936
commit
989f2b1785
|
@ -1830,6 +1830,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||||
bld.sop2(aco_opcode::s_add_u32, Definition(tmp), bld.scc(Definition(carry)), src0, src1);
|
bld.sop2(aco_opcode::s_add_u32, Definition(tmp), bld.scc(Definition(carry)), src0, src1);
|
||||||
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(-1), tmp,
|
bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(-1), tmp,
|
||||||
bld.scc(carry));
|
bld.scc(carry));
|
||||||
|
break;
|
||||||
} else if (dst.regClass() == v2b) {
|
} else if (dst.regClass() == v2b) {
|
||||||
Instruction* add_instr;
|
Instruction* add_instr;
|
||||||
if (ctx->program->chip_class >= GFX10) {
|
if (ctx->program->chip_class >= GFX10) {
|
||||||
|
@ -1841,8 +1842,59 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
||||||
bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
|
bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
|
||||||
}
|
}
|
||||||
add_instr->vop3().clamp = 1;
|
add_instr->vop3().clamp = 1;
|
||||||
|
break;
|
||||||
} else if (dst.regClass() == v1) {
|
} else if (dst.regClass() == v1) {
|
||||||
uadd32_sat(bld, Definition(dst), src0, src1);
|
uadd32_sat(bld, Definition(dst), src0, src1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(src0.size() == 2 && src1.size() == 2);
|
||||||
|
|
||||||
|
Temp src00 = bld.tmp(src0.type(), 1);
|
||||||
|
Temp src01 = bld.tmp(src0.type(), 1);
|
||||||
|
bld.pseudo(aco_opcode::p_split_vector, Definition(src00), Definition(src01), src0);
|
||||||
|
Temp src10 = bld.tmp(src1.type(), 1);
|
||||||
|
Temp src11 = bld.tmp(src1.type(), 1);
|
||||||
|
bld.pseudo(aco_opcode::p_split_vector, Definition(src10), Definition(src11), src1);
|
||||||
|
|
||||||
|
if (dst.regClass() == s2) {
|
||||||
|
Temp carry0 = bld.tmp(s1);
|
||||||
|
Temp carry1 = bld.tmp(s1);
|
||||||
|
|
||||||
|
Temp no_sat0 =
|
||||||
|
bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry0)), src00, src10);
|
||||||
|
Temp no_sat1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.scc(Definition(carry1)),
|
||||||
|
src01, src11, bld.scc(carry0));
|
||||||
|
|
||||||
|
Temp no_sat = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), no_sat0, no_sat1);
|
||||||
|
|
||||||
|
bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand::c64(-1), no_sat,
|
||||||
|
bld.scc(carry1));
|
||||||
|
} else if (dst.regClass() == v2) {
|
||||||
|
Temp no_sat0 = bld.tmp(v1);
|
||||||
|
Temp dst0 = bld.tmp(v1);
|
||||||
|
Temp dst1 = bld.tmp(v1);
|
||||||
|
|
||||||
|
Temp carry0 = bld.vadd32(Definition(no_sat0), src00, src10, true).def(1).getTemp();
|
||||||
|
Temp carry1;
|
||||||
|
|
||||||
|
if (ctx->program->chip_class >= GFX8) {
|
||||||
|
carry1 = bld.tmp(bld.lm);
|
||||||
|
bld.vop2_e64(aco_opcode::v_addc_co_u32, Definition(dst1),
|
||||||
|
bld.hint_vcc(Definition(carry1)), as_vgpr(ctx, src01), as_vgpr(ctx, src11),
|
||||||
|
carry0)
|
||||||
|
.instr->vop3()
|
||||||
|
.clamp = 1;
|
||||||
|
} else {
|
||||||
|
Temp no_sat1 = bld.tmp(v1);
|
||||||
|
carry1 = bld.vadd32(Definition(no_sat1), src01, src11, true, carry0).def(1).getTemp();
|
||||||
|
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst1), no_sat1, Operand::c32(-1),
|
||||||
|
carry1);
|
||||||
|
}
|
||||||
|
|
||||||
|
bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst0), no_sat0, Operand::c32(-1),
|
||||||
|
carry1);
|
||||||
|
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), dst0, dst1);
|
||||||
} else {
|
} else {
|
||||||
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue