llvmpipe: Specialize arithmetic operations.
This commit is contained in:
parent
272dadbe4e
commit
a6622e6c54
|
@ -53,7 +53,7 @@
|
|||
LLVMTypeRef
|
||||
lp_build_elem_type(union lp_type type)
|
||||
{
|
||||
if (type.kind == LP_TYPE_FLOAT) {
|
||||
if (type.floating) {
|
||||
assert(type.sign);
|
||||
switch(type.width) {
|
||||
case 32:
|
||||
|
@ -90,9 +90,15 @@ lp_build_vec_type(union lp_type type)
|
|||
boolean
|
||||
lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type)
|
||||
{
|
||||
LLVMTypeKind elem_kind = LLVMGetTypeKind(elem_type);
|
||||
LLVMTypeKind elem_kind;
|
||||
|
||||
if (type.kind == LP_TYPE_FLOAT) {
|
||||
assert(elem_type);
|
||||
if(!elem_type)
|
||||
return FALSE;
|
||||
|
||||
elem_kind = LLVMGetTypeKind(elem_type);
|
||||
|
||||
if (type.floating) {
|
||||
switch(type.width) {
|
||||
case 32:
|
||||
if(elem_kind != LLVMFloatTypeKind)
|
||||
|
@ -124,6 +130,10 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)
|
|||
{
|
||||
LLVMTypeRef elem_type;
|
||||
|
||||
assert(vec_type);
|
||||
if(!vec_type)
|
||||
return FALSE;
|
||||
|
||||
if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind)
|
||||
return FALSE;
|
||||
|
||||
|
@ -136,6 +146,73 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)
|
|||
}
|
||||
|
||||
|
||||
boolean
|
||||
lp_check_value(union lp_type type, LLVMValueRef val)
|
||||
{
|
||||
LLVMTypeRef vec_type;
|
||||
|
||||
assert(val);
|
||||
if(!val)
|
||||
return FALSE;
|
||||
|
||||
vec_type = LLVMTypeOf(val);
|
||||
|
||||
return lp_check_vec_type(type, vec_type);
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_undef(union lp_type type)
|
||||
{
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
return LLVMGetUndef(vec_type);
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_zero(union lp_type type)
|
||||
{
|
||||
LLVMTypeRef vec_type = lp_build_vec_type(type);
|
||||
return LLVMConstNull(vec_type);
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_one(union lp_type type)
|
||||
{
|
||||
LLVMTypeRef elem_type;
|
||||
LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
|
||||
unsigned i;
|
||||
|
||||
assert(type.length < LP_MAX_VECTOR_LENGTH);
|
||||
|
||||
elem_type = lp_build_elem_type(type);
|
||||
|
||||
if(type.floating)
|
||||
elems[0] = LLVMConstReal(elem_type, 1.0);
|
||||
else if(type.fixed)
|
||||
elems[0] = LLVMConstInt(elem_type, 1LL << (type.width/2), 0);
|
||||
else if(!type.norm)
|
||||
elems[0] = LLVMConstInt(elem_type, 1, 0);
|
||||
else {
|
||||
/* special case' -- 1.0 for normalized types is more easily attained if
|
||||
* we start with a vector consisting of all bits set */
|
||||
LLVMTypeRef vec_type = LLVMVectorType(elem_type, type.length);
|
||||
LLVMValueRef vec = LLVMConstAllOnes(vec_type);
|
||||
|
||||
if(type.sign)
|
||||
vec = LLVMConstLShr(vec, LLVMConstInt(LLVMInt32Type(), 1, 0));
|
||||
|
||||
return vec;
|
||||
}
|
||||
|
||||
for(i = 1; i < type.length; ++i)
|
||||
elems[i] = elems[0];
|
||||
|
||||
return LLVMConstVector(elems, type.length);
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_const_aos(union lp_type type,
|
||||
double r, double g, double b, double a,
|
||||
|
@ -154,20 +231,18 @@ lp_build_const_aos(union lp_type type,
|
|||
if(swizzle == NULL)
|
||||
swizzle = default_swizzle;
|
||||
|
||||
if(type.kind == LP_TYPE_FLOAT) {
|
||||
for(i = 0; i < type.length; i += 4) {
|
||||
elems[i + swizzle[0]] = LLVMConstReal(elem_type, r);
|
||||
elems[i + swizzle[1]] = LLVMConstReal(elem_type, g);
|
||||
elems[i + swizzle[2]] = LLVMConstReal(elem_type, b);
|
||||
elems[i + swizzle[3]] = LLVMConstReal(elem_type, a);
|
||||
}
|
||||
if(type.floating) {
|
||||
elems[swizzle[0]] = LLVMConstReal(elem_type, r);
|
||||
elems[swizzle[1]] = LLVMConstReal(elem_type, g);
|
||||
elems[swizzle[2]] = LLVMConstReal(elem_type, b);
|
||||
elems[swizzle[3]] = LLVMConstReal(elem_type, a);
|
||||
}
|
||||
else {
|
||||
unsigned shift;
|
||||
long long llscale;
|
||||
double dscale;
|
||||
|
||||
if(type.kind == LP_TYPE_FIXED)
|
||||
if(type.fixed)
|
||||
shift = type.width/2;
|
||||
else if(type.norm)
|
||||
shift = type.sign ? type.width - 1 : type.width;
|
||||
|
@ -178,14 +253,15 @@ lp_build_const_aos(union lp_type type,
|
|||
dscale = (double)llscale;
|
||||
assert((long long)dscale == llscale);
|
||||
|
||||
for(i = 0; i < type.length; i += 4) {
|
||||
elems[i + swizzle[0]] = LLVMConstInt(elem_type, r*dscale + 0.5, 0);
|
||||
elems[i + swizzle[1]] = LLVMConstInt(elem_type, g*dscale + 0.5, 0);
|
||||
elems[i + swizzle[2]] = LLVMConstInt(elem_type, b*dscale + 0.5, 0);
|
||||
elems[i + swizzle[3]] = LLVMConstInt(elem_type, a*dscale + 0.5, 0);
|
||||
}
|
||||
elems[swizzle[0]] = LLVMConstInt(elem_type, r*dscale + 0.5, 0);
|
||||
elems[swizzle[1]] = LLVMConstInt(elem_type, g*dscale + 0.5, 0);
|
||||
elems[swizzle[2]] = LLVMConstInt(elem_type, b*dscale + 0.5, 0);
|
||||
elems[swizzle[3]] = LLVMConstInt(elem_type, a*dscale + 0.5, 0);
|
||||
}
|
||||
|
||||
for(i = 4; i < type.length; ++i)
|
||||
elems[i] = elems[i % 4];
|
||||
|
||||
return LLVMConstVector(elems, type.length);
|
||||
}
|
||||
|
||||
|
@ -219,165 +295,261 @@ lp_build_intrinsic_binary(LLVMBuilderRef builder,
|
|||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_add(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero)
|
||||
static LLVMValueRef
|
||||
lp_build_min_simple(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
if(a == zero)
|
||||
return b;
|
||||
else if(b == zero)
|
||||
return a;
|
||||
else if(LLVMIsConstant(a) && LLVMIsConstant(b))
|
||||
return LLVMConstAdd(a, b);
|
||||
const union lp_type type = bld->type;
|
||||
const char *intrinsic = NULL;
|
||||
LLVMValueRef cond;
|
||||
|
||||
/* TODO: optimize the constant case */
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
if(type.width * type.length == 128) {
|
||||
if(type.floating)
|
||||
if(type.width == 32)
|
||||
intrinsic = "llvm.x86.sse.min.ps";
|
||||
if(type.width == 64)
|
||||
intrinsic = "llvm.x86.sse2.min.pd";
|
||||
else {
|
||||
if(type.width == 8 && !type.sign)
|
||||
intrinsic = "llvm.x86.sse2.pminu.b";
|
||||
if(type.width == 16 && type.sign)
|
||||
intrinsic = "llvm.x86.sse2.pmins.w";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if(intrinsic)
|
||||
return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
|
||||
|
||||
if(type.floating)
|
||||
cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
|
||||
else
|
||||
return LLVMBuildAdd(builder, a, b, "");
|
||||
cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, "");
|
||||
return LLVMBuildSelect(bld->builder, cond, a, b, "");
|
||||
}
|
||||
|
||||
|
||||
static LLVMValueRef
|
||||
lp_build_max_simple(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
const char *intrinsic = NULL;
|
||||
LLVMValueRef cond;
|
||||
|
||||
/* TODO: optimize the constant case */
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
if(type.width * type.length == 128) {
|
||||
if(type.floating)
|
||||
if(type.width == 32)
|
||||
intrinsic = "llvm.x86.sse.max.ps";
|
||||
if(type.width == 64)
|
||||
intrinsic = "llvm.x86.sse2.max.pd";
|
||||
else {
|
||||
if(type.width == 8 && !type.sign)
|
||||
intrinsic = "llvm.x86.sse2.pmaxu.b";
|
||||
if(type.width == 16 && type.sign)
|
||||
intrinsic = "llvm.x86.sse2.pmaxs.w";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if(intrinsic)
|
||||
return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
|
||||
|
||||
if(type.floating)
|
||||
cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
|
||||
else
|
||||
cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, "");
|
||||
return LLVMBuildSelect(bld->builder, cond, b, a, "");
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_sub(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero)
|
||||
lp_build_comp(struct lp_build_context *bld,
|
||||
LLVMValueRef a)
|
||||
{
|
||||
if(b == zero)
|
||||
return a;
|
||||
else if(a == b)
|
||||
return zero;
|
||||
else if(LLVMIsConstant(a) && LLVMIsConstant(b))
|
||||
return LLVMConstSub(a, b);
|
||||
const union lp_type type = bld->type;
|
||||
|
||||
if(a == bld->one)
|
||||
return bld->zero;
|
||||
if(a == bld->zero)
|
||||
return bld->one;
|
||||
|
||||
if(type.norm && !type.floating && !type.fixed && !type.sign) {
|
||||
if(LLVMIsConstant(a))
|
||||
return LLVMConstNot(a);
|
||||
else
|
||||
return LLVMBuildNot(bld->builder, a, "");
|
||||
}
|
||||
|
||||
if(LLVMIsConstant(a))
|
||||
return LLVMConstSub(bld->one, a);
|
||||
else
|
||||
return LLVMBuildSub(builder, a, b, "");
|
||||
return LLVMBuildSub(bld->builder, bld->one, a, "");
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_mul(LLVMBuilderRef builder,
|
||||
lp_build_add(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero,
|
||||
LLVMValueRef one)
|
||||
LLVMValueRef b)
|
||||
{
|
||||
if(a == zero)
|
||||
return zero;
|
||||
else if(a == one)
|
||||
const union lp_type type = bld->type;
|
||||
LLVMValueRef res;
|
||||
|
||||
if(a == bld->zero)
|
||||
return b;
|
||||
else if(b == zero)
|
||||
return zero;
|
||||
else if(b == one)
|
||||
if(b == bld->zero)
|
||||
return a;
|
||||
else if(LLVMIsConstant(a) && LLVMIsConstant(b))
|
||||
if(a == bld->undef || b == bld->undef)
|
||||
return bld->undef;
|
||||
|
||||
if(bld->type.norm) {
|
||||
const char *intrinsic = NULL;
|
||||
|
||||
if(a == bld->one || b == bld->one)
|
||||
return bld->one;
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
if(type.width * type.length == 128 &&
|
||||
!type.floating && !type.fixed) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.adds.b" : "llvm.x86.sse2.addus.b";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.adds.w" : "llvm.x86.sse2.addus.w";
|
||||
}
|
||||
#endif
|
||||
|
||||
if(intrinsic)
|
||||
return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
|
||||
}
|
||||
|
||||
if(LLVMIsConstant(a) && LLVMIsConstant(b))
|
||||
res = LLVMConstAdd(a, b);
|
||||
else
|
||||
res = LLVMBuildAdd(bld->builder, a, b, "");
|
||||
|
||||
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
|
||||
res = lp_build_min_simple(bld, res, bld->one);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_sub(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
const union lp_type type = bld->type;
|
||||
LLVMValueRef res;
|
||||
|
||||
if(b == bld->zero)
|
||||
return a;
|
||||
if(a == bld->undef || b == bld->undef)
|
||||
return bld->undef;
|
||||
if(a == b)
|
||||
return bld->zero;
|
||||
|
||||
if(bld->type.norm) {
|
||||
const char *intrinsic = NULL;
|
||||
|
||||
if(b == bld->one)
|
||||
return bld->zero;
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
if(type.width * type.length == 128 &&
|
||||
!type.floating && !type.fixed) {
|
||||
if(type.width == 8)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.subs.b" : "llvm.x86.sse2.subus.b";
|
||||
if(type.width == 16)
|
||||
intrinsic = type.sign ? "llvm.x86.sse2.subs.w" : "llvm.x86.sse2.subus.w";
|
||||
}
|
||||
#endif
|
||||
|
||||
if(intrinsic)
|
||||
return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
|
||||
}
|
||||
|
||||
if(LLVMIsConstant(a) && LLVMIsConstant(b))
|
||||
res = LLVMConstSub(a, b);
|
||||
else
|
||||
res = LLVMBuildSub(bld->builder, a, b, "");
|
||||
|
||||
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
|
||||
res = lp_build_max_simple(bld, res, bld->zero);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_mul(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
if(a == bld->zero)
|
||||
return bld->zero;
|
||||
if(a == bld->one)
|
||||
return b;
|
||||
if(b == bld->zero)
|
||||
return bld->zero;
|
||||
if(b == bld->one)
|
||||
return a;
|
||||
if(a == bld->undef || b == bld->undef)
|
||||
return bld->undef;
|
||||
|
||||
if(LLVMIsConstant(a) && LLVMIsConstant(b))
|
||||
return LLVMConstMul(a, b);
|
||||
else
|
||||
return LLVMBuildMul(builder, a, b, "");
|
||||
|
||||
return LLVMBuildMul(bld->builder, a, b, "");
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_min(LLVMBuilderRef builder,
|
||||
lp_build_min(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
/* TODO: optimize the constant case */
|
||||
if(a == bld->undef || b == bld->undef)
|
||||
return bld->undef;
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
if(bld->type.norm) {
|
||||
if(a == bld->zero || b == bld->zero)
|
||||
return bld->zero;
|
||||
if(a == bld->one)
|
||||
return b;
|
||||
if(b == bld->one)
|
||||
return a;
|
||||
}
|
||||
|
||||
return lp_build_intrinsic_binary(builder, "llvm.x86.sse.min.ps", a, b);
|
||||
|
||||
#else
|
||||
|
||||
LLVMValueRef cond = LLVMBuildFCmp(values->builder, LLVMRealULT, a, b, "");
|
||||
return LLVMBuildSelect(values->builder, cond, a, b, "");
|
||||
|
||||
#endif
|
||||
return lp_build_min_simple(bld, a, b);
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_max(LLVMBuilderRef builder,
|
||||
lp_build_max(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b)
|
||||
{
|
||||
/* TODO: optimize the constant case */
|
||||
if(a == bld->undef || b == bld->undef)
|
||||
return bld->undef;
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
if(bld->type.norm) {
|
||||
if(a == bld->one || b == bld->one)
|
||||
return bld->one;
|
||||
if(a == bld->zero)
|
||||
return b;
|
||||
if(b == bld->zero)
|
||||
return a;
|
||||
}
|
||||
|
||||
return lp_build_intrinsic_binary(builder, "llvm.x86.sse.max.ps", a, b);
|
||||
|
||||
#else
|
||||
|
||||
LLVMValueRef cond = LLVMBuildFCmp(values->builder, LLVMRealULT, a, b, "");
|
||||
return LLVMBuildSelect(values->builder, cond, b, a, "");
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_add_sat(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero,
|
||||
LLVMValueRef one)
|
||||
{
|
||||
if(a == zero)
|
||||
return b;
|
||||
else if(b == zero)
|
||||
return a;
|
||||
else if(a == one || b == one)
|
||||
return one;
|
||||
else
|
||||
return lp_build_min(builder, lp_build_add(builder, a, b, zero), one);
|
||||
}
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_sub_sat(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero,
|
||||
LLVMValueRef one)
|
||||
{
|
||||
if(b == zero)
|
||||
return a;
|
||||
else if(b == one)
|
||||
return zero;
|
||||
else
|
||||
return lp_build_max(builder, lp_build_sub(builder, a, b, zero), zero);
|
||||
}
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_min_sat(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero,
|
||||
LLVMValueRef one)
|
||||
{
|
||||
if(a == zero || b == zero)
|
||||
return zero;
|
||||
else if(a == one)
|
||||
return b;
|
||||
else if(b == one)
|
||||
return a;
|
||||
else
|
||||
return lp_build_min(builder, a, b);
|
||||
}
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_max_sat(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero,
|
||||
LLVMValueRef one)
|
||||
{
|
||||
if(a == zero)
|
||||
return b;
|
||||
else if(b == zero)
|
||||
return a;
|
||||
else if(a == one || b == one)
|
||||
return one;
|
||||
else
|
||||
return lp_build_max(builder, a, b);
|
||||
return lp_build_max_simple(bld, a, b);
|
||||
}
|
||||
|
|
|
@ -66,7 +66,13 @@ union lp_type {
|
|||
* Integer. floating-point, or fixed point as established by the
|
||||
* lp_build_type_kind enum above.
|
||||
*/
|
||||
unsigned kind:2;
|
||||
unsigned floating:1;
|
||||
|
||||
/**
|
||||
* Integer. floating-point, or fixed point as established by the
|
||||
* lp_build_type_kind enum above.
|
||||
*/
|
||||
unsigned fixed:1;
|
||||
|
||||
/**
|
||||
* Whether it can represent negative values or not.
|
||||
|
@ -79,9 +85,11 @@ union lp_type {
|
|||
* Whether values are normalized to fit [0, 1] interval, or [-1, 1] interval for
|
||||
* signed types.
|
||||
*
|
||||
* For integer types
|
||||
* For integer types it means the representable integer range should be
|
||||
* interpreted as the interval above.
|
||||
*
|
||||
* It makes no sense to use this with fixed point values.
|
||||
* For floating and fixed point formats it means the values should be
|
||||
* clamped to the interval above.
|
||||
*/
|
||||
unsigned norm:1;
|
||||
|
||||
|
@ -123,10 +131,27 @@ boolean
|
|||
lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type);
|
||||
|
||||
|
||||
boolean
|
||||
lp_check_value(union lp_type type, LLVMValueRef val);
|
||||
|
||||
|
||||
/*
|
||||
* Constants
|
||||
*/
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_undef(union lp_type type);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_zero(union lp_type type);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_one(union lp_type type);
|
||||
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_const_aos(union lp_type type,
|
||||
double r, double g, double b, double a,
|
||||
|
@ -136,66 +161,52 @@ lp_build_const_aos(union lp_type type,
|
|||
* Basic arithmetic
|
||||
*/
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_add(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_sub(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_mul(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero,
|
||||
LLVMValueRef one);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_min(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_max(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b);
|
||||
|
||||
/*
|
||||
* Satured arithmetic
|
||||
/**
|
||||
*/
|
||||
struct lp_build_context
|
||||
{
|
||||
LLVMBuilderRef builder;
|
||||
|
||||
union lp_type type;
|
||||
|
||||
LLVMValueRef undef;
|
||||
LLVMValueRef zero;
|
||||
LLVMValueRef one;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Complement, i.e., 1 - a.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_comp(struct lp_build_context *bld,
|
||||
LLVMValueRef a);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_add_sat(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero,
|
||||
LLVMValueRef one);
|
||||
lp_build_add(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_sub_sat(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero,
|
||||
LLVMValueRef one);
|
||||
lp_build_sub(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_min_sat(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero,
|
||||
LLVMValueRef one);
|
||||
lp_build_mul(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_max_sat(LLVMBuilderRef builder,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b,
|
||||
LLVMValueRef zero,
|
||||
LLVMValueRef one);
|
||||
lp_build_min(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b);
|
||||
|
||||
LLVMValueRef
|
||||
lp_build_max(struct lp_build_context *bld,
|
||||
LLVMValueRef a,
|
||||
LLVMValueRef b);
|
||||
|
||||
|
||||
#endif /* !LP_BLD_ARIT_H */
|
||||
|
|
|
@ -44,18 +44,14 @@
|
|||
|
||||
|
||||
/**
|
||||
* We may the same values several times, so we keep them here to avoid
|
||||
* recomputing them. Also reusing the values allows us to do simplifications
|
||||
* We may the same bld several times, so we keep them here to avoid
|
||||
* recomputing them. Also reusing the bld allows us to do simplifications
|
||||
* that LLVM optimization passes wouldn't normally be able to do.
|
||||
*/
|
||||
struct lp_build_blend_values
|
||||
struct lp_build_blend_context
|
||||
{
|
||||
LLVMBuilderRef builder;
|
||||
struct lp_build_context base;
|
||||
|
||||
LLVMValueRef undef;
|
||||
LLVMValueRef zero;
|
||||
LLVMValueRef one;
|
||||
|
||||
LLVMValueRef src;
|
||||
LLVMValueRef dst;
|
||||
LLVMValueRef const_;
|
||||
|
@ -73,62 +69,62 @@ struct lp_build_blend_values
|
|||
|
||||
|
||||
static LLVMValueRef
|
||||
lp_build_blend_factor_unswizzled(struct lp_build_blend_values *values,
|
||||
lp_build_blend_factor_unswizzled(struct lp_build_blend_context *bld,
|
||||
unsigned factor,
|
||||
boolean alpha)
|
||||
{
|
||||
switch (factor) {
|
||||
case PIPE_BLENDFACTOR_ZERO:
|
||||
return values->zero;
|
||||
return bld->base.zero;
|
||||
case PIPE_BLENDFACTOR_ONE:
|
||||
return values->one;
|
||||
return bld->base.one;
|
||||
case PIPE_BLENDFACTOR_SRC_COLOR:
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA:
|
||||
return values->src;
|
||||
return bld->src;
|
||||
case PIPE_BLENDFACTOR_DST_COLOR:
|
||||
case PIPE_BLENDFACTOR_DST_ALPHA:
|
||||
return values->dst;
|
||||
return bld->dst;
|
||||
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
|
||||
if(alpha)
|
||||
return values->one;
|
||||
return bld->base.one;
|
||||
else {
|
||||
if(!values->inv_dst)
|
||||
values->inv_dst = lp_build_sub(values->builder, values->one, values->dst, values->zero);
|
||||
if(!values->saturate)
|
||||
values->saturate = lp_build_min_sat(values->builder, values->src, values->inv_dst, values->zero, values->one);
|
||||
return values->saturate;
|
||||
if(!bld->inv_dst)
|
||||
bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
|
||||
if(!bld->saturate)
|
||||
bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
|
||||
return bld->saturate;
|
||||
}
|
||||
case PIPE_BLENDFACTOR_CONST_COLOR:
|
||||
case PIPE_BLENDFACTOR_CONST_ALPHA:
|
||||
return values->const_;
|
||||
return bld->const_;
|
||||
case PIPE_BLENDFACTOR_SRC1_COLOR:
|
||||
case PIPE_BLENDFACTOR_SRC1_ALPHA:
|
||||
/* TODO */
|
||||
assert(0);
|
||||
return values->zero;
|
||||
return bld->base.zero;
|
||||
case PIPE_BLENDFACTOR_INV_SRC_COLOR:
|
||||
case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
|
||||
if(!values->inv_src)
|
||||
values->inv_src = lp_build_sub(values->builder, values->one, values->src, values->zero);
|
||||
return values->inv_src;
|
||||
if(!bld->inv_src)
|
||||
bld->inv_src = lp_build_comp(&bld->base, bld->src);
|
||||
return bld->inv_src;
|
||||
case PIPE_BLENDFACTOR_INV_DST_COLOR:
|
||||
case PIPE_BLENDFACTOR_INV_DST_ALPHA:
|
||||
if(!values->inv_dst)
|
||||
values->inv_dst = lp_build_sub(values->builder, values->one, values->dst, values->zero);
|
||||
return values->inv_dst;
|
||||
if(!bld->inv_dst)
|
||||
bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
|
||||
return bld->inv_dst;
|
||||
case PIPE_BLENDFACTOR_INV_CONST_COLOR:
|
||||
case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
|
||||
if(!values->inv_const)
|
||||
values->inv_const = lp_build_sub(values->builder, values->one, values->const_, values->zero);
|
||||
return values->inv_const;
|
||||
if(!bld->inv_const)
|
||||
bld->inv_const = lp_build_comp(&bld->base, bld->const_);
|
||||
return bld->inv_const;
|
||||
case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
|
||||
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
|
||||
/* TODO */
|
||||
assert(0);
|
||||
return values->zero;
|
||||
return bld->base.zero;
|
||||
default:
|
||||
assert(0);
|
||||
return values->zero;
|
||||
return bld->base.zero;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -175,13 +171,13 @@ lp_build_blend_factor_swizzle(unsigned factor)
|
|||
|
||||
|
||||
static LLVMValueRef
|
||||
lp_build_blend_swizzle(struct lp_build_blend_values *values,
|
||||
lp_build_blend_swizzle(struct lp_build_blend_context *bld,
|
||||
LLVMValueRef rgb,
|
||||
LLVMValueRef alpha,
|
||||
enum lp_build_blend_swizzle rgb_swizzle,
|
||||
unsigned alpha_swizzle,
|
||||
unsigned n)
|
||||
unsigned alpha_swizzle)
|
||||
{
|
||||
const unsigned n = bld->base.type.length;
|
||||
LLVMValueRef swizzles[LP_MAX_VECTOR_LENGTH];
|
||||
unsigned i, j;
|
||||
|
||||
|
@ -189,14 +185,14 @@ lp_build_blend_swizzle(struct lp_build_blend_values *values,
|
|||
if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
|
||||
return rgb;
|
||||
|
||||
alpha = values->undef;
|
||||
alpha = bld->base.undef;
|
||||
}
|
||||
|
||||
for(j = 0; j < n; j += 4) {
|
||||
for(i = 0; i < 4; ++i) {
|
||||
unsigned swizzle;
|
||||
|
||||
if(i == alpha_swizzle && alpha != values->undef) {
|
||||
if(i == alpha_swizzle && alpha != bld->base.undef) {
|
||||
/* Take the alpha from the second shuffle argument */
|
||||
swizzle = n + j + alpha_swizzle;
|
||||
}
|
||||
|
@ -212,55 +208,54 @@ lp_build_blend_swizzle(struct lp_build_blend_values *values,
|
|||
}
|
||||
}
|
||||
|
||||
return LLVMBuildShuffleVector(values->builder, rgb, alpha, LLVMConstVector(swizzles, n), "");
|
||||
return LLVMBuildShuffleVector(bld->base.builder, rgb, alpha, LLVMConstVector(swizzles, n), "");
|
||||
}
|
||||
|
||||
|
||||
static LLVMValueRef
|
||||
lp_build_blend_factor(struct lp_build_blend_values *values,
|
||||
lp_build_blend_factor(struct lp_build_blend_context *bld,
|
||||
LLVMValueRef factor1,
|
||||
unsigned rgb_factor,
|
||||
unsigned alpha_factor,
|
||||
unsigned alpha_swizzle,
|
||||
unsigned n)
|
||||
unsigned alpha_swizzle)
|
||||
{
|
||||
LLVMValueRef rgb_factor_;
|
||||
LLVMValueRef alpha_factor_;
|
||||
LLVMValueRef factor2;
|
||||
enum lp_build_blend_swizzle rgb_swizzle;
|
||||
|
||||
rgb_factor_ = lp_build_blend_factor_unswizzled(values, rgb_factor, FALSE);
|
||||
alpha_factor_ = lp_build_blend_factor_unswizzled(values, alpha_factor, TRUE);
|
||||
rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
|
||||
alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
|
||||
|
||||
rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
|
||||
|
||||
factor2 = lp_build_blend_swizzle(values, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, n);
|
||||
factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
|
||||
|
||||
return lp_build_mul(values->builder, factor1, factor2, values->zero, values->one);
|
||||
return lp_build_mul(&bld->base, factor1, factor2);
|
||||
}
|
||||
|
||||
|
||||
static LLVMValueRef
|
||||
lp_build_blend_func(struct lp_build_blend_values *values,
|
||||
lp_build_blend_func(struct lp_build_blend_context *bld,
|
||||
unsigned func,
|
||||
LLVMValueRef term1,
|
||||
LLVMValueRef term2)
|
||||
{
|
||||
switch (func) {
|
||||
case PIPE_BLEND_ADD:
|
||||
return lp_build_add_sat(values->builder, term1, term2, values->zero, values->one);
|
||||
return lp_build_add(&bld->base, term1, term2);
|
||||
break;
|
||||
case PIPE_BLEND_SUBTRACT:
|
||||
return lp_build_sub_sat(values->builder, term1, term2, values->zero, values->one);
|
||||
return lp_build_sub(&bld->base, term1, term2);
|
||||
case PIPE_BLEND_REVERSE_SUBTRACT:
|
||||
return lp_build_sub_sat(values->builder, term2, term1, values->zero, values->one);
|
||||
return lp_build_sub(&bld->base, term2, term1);
|
||||
case PIPE_BLEND_MIN:
|
||||
return lp_build_min_sat(values->builder, term1, term2, values->zero, values->one);
|
||||
return lp_build_min(&bld->base, term1, term2);
|
||||
case PIPE_BLEND_MAX:
|
||||
return lp_build_max_sat(values->builder, term1, term2, values->zero, values->one);
|
||||
return lp_build_max(&bld->base, term1, term2);
|
||||
default:
|
||||
assert(0);
|
||||
return values->zero;
|
||||
return bld->base.zero;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -274,35 +269,33 @@ lp_build_blend(LLVMBuilderRef builder,
|
|||
LLVMValueRef const_,
|
||||
unsigned alpha_swizzle)
|
||||
{
|
||||
struct lp_build_blend_values values;
|
||||
struct lp_build_blend_context bld;
|
||||
LLVMValueRef src_term;
|
||||
LLVMValueRef dst_term;
|
||||
LLVMTypeRef vec_type;
|
||||
|
||||
vec_type = lp_build_vec_type(type);
|
||||
/* It makes no sense to blend unless values are normalized */
|
||||
assert(type.norm);
|
||||
|
||||
/*
|
||||
* Compute constants
|
||||
*/
|
||||
memset(&values, 0, sizeof values);
|
||||
values.builder = builder;
|
||||
values.undef = LLVMGetUndef(vec_type);
|
||||
values.zero = LLVMConstNull(vec_type);
|
||||
values.one = lp_build_const_aos(type, 1.0, 1.0, 1.0, 1.0, NULL);
|
||||
|
||||
values.src = src;
|
||||
values.dst = dst;
|
||||
values.const_ = const_;
|
||||
/* Setup build context */
|
||||
memset(&bld, 0, sizeof bld);
|
||||
bld.base.builder = builder;
|
||||
bld.base.type = type;
|
||||
bld.base.undef = lp_build_undef(type);
|
||||
bld.base.zero = lp_build_zero(type);
|
||||
bld.base.one = lp_build_one(type);
|
||||
bld.src = src;
|
||||
bld.dst = dst;
|
||||
bld.const_ = const_;
|
||||
|
||||
/* TODO: There are still a few optimization oportunities here. For certain
|
||||
* combinations it is possible to reorder the operations and therefor saving
|
||||
* some instructions. */
|
||||
|
||||
src_term = lp_build_blend_factor(&values, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle, type.length);
|
||||
dst_term = lp_build_blend_factor(&values, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle, type.length);
|
||||
src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle);
|
||||
dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle);
|
||||
|
||||
if(blend->rgb_func == blend->alpha_func) {
|
||||
return lp_build_blend_func(&values, blend->rgb_func, src_term, dst_term);
|
||||
return lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term);
|
||||
}
|
||||
else {
|
||||
/* Seperate RGB / A functions */
|
||||
|
@ -310,9 +303,9 @@ lp_build_blend(LLVMBuilderRef builder,
|
|||
LLVMValueRef rgb;
|
||||
LLVMValueRef alpha;
|
||||
|
||||
rgb = lp_build_blend_func(&values, blend->rgb_func, src_term, dst_term);
|
||||
alpha = lp_build_blend_func(&values, blend->alpha_func, src_term, dst_term);
|
||||
rgb = lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term);
|
||||
alpha = lp_build_blend_func(&bld, blend->alpha_func, src_term, dst_term);
|
||||
|
||||
return lp_build_blend_swizzle(&values, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle, type.length);
|
||||
return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -81,7 +81,7 @@ add_blend_test(LLVMModuleRef module,
|
|||
LLVMValueRef res;
|
||||
|
||||
type.value = 0;
|
||||
type.kind = LP_TYPE_FLOAT;
|
||||
type.floating = TRUE;
|
||||
type.sign = TRUE;
|
||||
type.norm = TRUE;
|
||||
type.width = 32;
|
||||
|
|
Loading…
Reference in New Issue