gallivm: Altivec vector add/sub intrisics
This patch add correct vector addition and substraction intrisics when using Altivec with PPC. Current code uses default path and LLVM backend ends up issuing carry-out arithmetic instruction while it is expected saturated ones. It also includes a fix for PowerPC where char are unsigned by default, resulting in bogus values for vector shifting. Reviewed-by: Roland Scheidegger <sroland@vmware.com> Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
This commit is contained in:
parent
2ea7d3dabd
commit
dd5c580816
|
@ -365,13 +365,19 @@ lp_build_add(struct lp_build_context *bld,
|
|||
if(a == bld->one || b == bld->one)
|
||||
return bld->one;
|
||||
|
||||
if(util_cpu_caps.has_sse2 &&
|
||||
type.width * type.length == 128 &&
|
||||
!type.floating && !type.fixed) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
|
||||
if (type.width * type.length == 128 &&
|
||||
!type.floating && !type.fixed) {
|
||||
if(util_cpu_caps.has_sse2) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.padds.b" : "llvm.x86.sse2.paddus.b";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.padds.w" : "llvm.x86.sse2.paddus.w";
|
||||
} else if (util_cpu_caps.has_altivec) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.ppc.altivec.vaddsbs" : "llvm.ppc.altivec.vaddubs";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.ppc.altivec.vaddsws" : "llvm.ppc.altivec.vadduws";
|
||||
}
|
||||
}
|
||||
|
||||
if(intrinsic)
|
||||
|
@ -636,13 +642,19 @@ lp_build_sub(struct lp_build_context *bld,
|
|||
if(b == bld->one)
|
||||
return bld->zero;
|
||||
|
||||
if(util_cpu_caps.has_sse2 &&
|
||||
type.width * type.length == 128 &&
|
||||
!type.floating && !type.fixed) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
|
||||
if (type.width * type.length == 128 &&
|
||||
!type.floating && !type.fixed) {
|
||||
if (util_cpu_caps.has_sse2) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.psubs.b" : "llvm.x86.sse2.psubus.b";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.psubs.w" : "llvm.x86.sse2.psubus.w";
|
||||
} else if (util_cpu_caps.has_altivec) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.ppc.altivec.vsubsbs" : "llvm.ppc.altivec.vsububs";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.ppc.altivec.vsubsws" : "llvm.ppc.altivec.vsubuws";
|
||||
}
|
||||
}
|
||||
|
||||
if(intrinsic)
|
||||
|
|
|
@ -246,7 +246,7 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
|
|||
* YYYY YYYY .... YYYY <= output
|
||||
*/
|
||||
struct lp_type type4;
|
||||
const char shifts[4][2] = {
|
||||
const int shifts[4][2] = {
|
||||
{ 1, 2},
|
||||
{-1, 2},
|
||||
{ 1, -2},
|
||||
|
|
Loading…
Reference in New Issue