llvmpipe: Specialize arithmetic operations.

This commit is contained in:
José Fonseca 2009-08-02 17:54:53 +01:00
parent 272dadbe4e
commit a6622e6c54
4 changed files with 446 additions and 270 deletions

View File

@ -53,7 +53,7 @@
LLVMTypeRef LLVMTypeRef
lp_build_elem_type(union lp_type type) lp_build_elem_type(union lp_type type)
{ {
if (type.kind == LP_TYPE_FLOAT) { if (type.floating) {
assert(type.sign); assert(type.sign);
switch(type.width) { switch(type.width) {
case 32: case 32:
@ -90,9 +90,15 @@ lp_build_vec_type(union lp_type type)
boolean boolean
lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type)
{ {
LLVMTypeKind elem_kind = LLVMGetTypeKind(elem_type); LLVMTypeKind elem_kind;
if (type.kind == LP_TYPE_FLOAT) { assert(elem_type);
if(!elem_type)
return FALSE;
elem_kind = LLVMGetTypeKind(elem_type);
if (type.floating) {
switch(type.width) { switch(type.width) {
case 32: case 32:
if(elem_kind != LLVMFloatTypeKind) if(elem_kind != LLVMFloatTypeKind)
@ -124,6 +130,10 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)
{ {
LLVMTypeRef elem_type; LLVMTypeRef elem_type;
assert(vec_type);
if(!vec_type)
return FALSE;
if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind)
return FALSE; return FALSE;
@ -136,6 +146,73 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type)
} }
boolean
lp_check_value(union lp_type type, LLVMValueRef val)
{
LLVMTypeRef vec_type;
assert(val);
if(!val)
return FALSE;
vec_type = LLVMTypeOf(val);
return lp_check_vec_type(type, vec_type);
}
LLVMValueRef
lp_build_undef(union lp_type type)
{
LLVMTypeRef vec_type = lp_build_vec_type(type);
return LLVMGetUndef(vec_type);
}
LLVMValueRef
lp_build_zero(union lp_type type)
{
LLVMTypeRef vec_type = lp_build_vec_type(type);
return LLVMConstNull(vec_type);
}
LLVMValueRef
lp_build_one(union lp_type type)
{
LLVMTypeRef elem_type;
LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
unsigned i;
assert(type.length < LP_MAX_VECTOR_LENGTH);
elem_type = lp_build_elem_type(type);
if(type.floating)
elems[0] = LLVMConstReal(elem_type, 1.0);
else if(type.fixed)
elems[0] = LLVMConstInt(elem_type, 1LL << (type.width/2), 0);
else if(!type.norm)
elems[0] = LLVMConstInt(elem_type, 1, 0);
else {
/* special case' -- 1.0 for normalized types is more easily attained if
* we start with a vector consisting of all bits set */
LLVMTypeRef vec_type = LLVMVectorType(elem_type, type.length);
LLVMValueRef vec = LLVMConstAllOnes(vec_type);
if(type.sign)
vec = LLVMConstLShr(vec, LLVMConstInt(LLVMInt32Type(), 1, 0));
return vec;
}
for(i = 1; i < type.length; ++i)
elems[i] = elems[0];
return LLVMConstVector(elems, type.length);
}
LLVMValueRef LLVMValueRef
lp_build_const_aos(union lp_type type, lp_build_const_aos(union lp_type type,
double r, double g, double b, double a, double r, double g, double b, double a,
@ -154,20 +231,18 @@ lp_build_const_aos(union lp_type type,
if(swizzle == NULL) if(swizzle == NULL)
swizzle = default_swizzle; swizzle = default_swizzle;
if(type.kind == LP_TYPE_FLOAT) { if(type.floating) {
for(i = 0; i < type.length; i += 4) { elems[swizzle[0]] = LLVMConstReal(elem_type, r);
elems[i + swizzle[0]] = LLVMConstReal(elem_type, r); elems[swizzle[1]] = LLVMConstReal(elem_type, g);
elems[i + swizzle[1]] = LLVMConstReal(elem_type, g); elems[swizzle[2]] = LLVMConstReal(elem_type, b);
elems[i + swizzle[2]] = LLVMConstReal(elem_type, b); elems[swizzle[3]] = LLVMConstReal(elem_type, a);
elems[i + swizzle[3]] = LLVMConstReal(elem_type, a);
}
} }
else { else {
unsigned shift; unsigned shift;
long long llscale; long long llscale;
double dscale; double dscale;
if(type.kind == LP_TYPE_FIXED) if(type.fixed)
shift = type.width/2; shift = type.width/2;
else if(type.norm) else if(type.norm)
shift = type.sign ? type.width - 1 : type.width; shift = type.sign ? type.width - 1 : type.width;
@ -178,14 +253,15 @@ lp_build_const_aos(union lp_type type,
dscale = (double)llscale; dscale = (double)llscale;
assert((long long)dscale == llscale); assert((long long)dscale == llscale);
for(i = 0; i < type.length; i += 4) { elems[swizzle[0]] = LLVMConstInt(elem_type, r*dscale + 0.5, 0);
elems[i + swizzle[0]] = LLVMConstInt(elem_type, r*dscale + 0.5, 0); elems[swizzle[1]] = LLVMConstInt(elem_type, g*dscale + 0.5, 0);
elems[i + swizzle[1]] = LLVMConstInt(elem_type, g*dscale + 0.5, 0); elems[swizzle[2]] = LLVMConstInt(elem_type, b*dscale + 0.5, 0);
elems[i + swizzle[2]] = LLVMConstInt(elem_type, b*dscale + 0.5, 0); elems[swizzle[3]] = LLVMConstInt(elem_type, a*dscale + 0.5, 0);
elems[i + swizzle[3]] = LLVMConstInt(elem_type, a*dscale + 0.5, 0);
}
} }
for(i = 4; i < type.length; ++i)
elems[i] = elems[i % 4];
return LLVMConstVector(elems, type.length); return LLVMConstVector(elems, type.length);
} }
@ -219,165 +295,261 @@ lp_build_intrinsic_binary(LLVMBuilderRef builder,
} }
LLVMValueRef static LLVMValueRef
lp_build_add(LLVMBuilderRef builder, lp_build_min_simple(struct lp_build_context *bld,
LLVMValueRef a, LLVMValueRef a,
LLVMValueRef b, LLVMValueRef b)
LLVMValueRef zero)
{ {
if(a == zero) const union lp_type type = bld->type;
return b; const char *intrinsic = NULL;
else if(b == zero) LLVMValueRef cond;
return a;
else if(LLVMIsConstant(a) && LLVMIsConstant(b)) /* TODO: optimize the constant case */
return LLVMConstAdd(a, b);
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128) {
if(type.floating)
if(type.width == 32)
intrinsic = "llvm.x86.sse.min.ps";
if(type.width == 64)
intrinsic = "llvm.x86.sse2.min.pd";
else {
if(type.width == 8 && !type.sign)
intrinsic = "llvm.x86.sse2.pminu.b";
if(type.width == 16 && type.sign)
intrinsic = "llvm.x86.sse2.pmins.w";
}
}
#endif
if(intrinsic)
return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
if(type.floating)
cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
else else
return LLVMBuildAdd(builder, a, b, ""); cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, "");
return LLVMBuildSelect(bld->builder, cond, a, b, "");
}
static LLVMValueRef
lp_build_max_simple(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
const union lp_type type = bld->type;
const char *intrinsic = NULL;
LLVMValueRef cond;
/* TODO: optimize the constant case */
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128) {
if(type.floating)
if(type.width == 32)
intrinsic = "llvm.x86.sse.max.ps";
if(type.width == 64)
intrinsic = "llvm.x86.sse2.max.pd";
else {
if(type.width == 8 && !type.sign)
intrinsic = "llvm.x86.sse2.pmaxu.b";
if(type.width == 16 && type.sign)
intrinsic = "llvm.x86.sse2.pmaxs.w";
}
}
#endif
if(intrinsic)
return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
if(type.floating)
cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, "");
else
cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, "");
return LLVMBuildSelect(bld->builder, cond, b, a, "");
} }
LLVMValueRef LLVMValueRef
lp_build_sub(LLVMBuilderRef builder, lp_build_comp(struct lp_build_context *bld,
LLVMValueRef a, LLVMValueRef a)
LLVMValueRef b,
LLVMValueRef zero)
{ {
if(b == zero) const union lp_type type = bld->type;
return a;
else if(a == b) if(a == bld->one)
return zero; return bld->zero;
else if(LLVMIsConstant(a) && LLVMIsConstant(b)) if(a == bld->zero)
return LLVMConstSub(a, b); return bld->one;
if(type.norm && !type.floating && !type.fixed && !type.sign) {
if(LLVMIsConstant(a))
return LLVMConstNot(a);
else else
return LLVMBuildSub(builder, a, b, ""); return LLVMBuildNot(bld->builder, a, "");
}
if(LLVMIsConstant(a))
return LLVMConstSub(bld->one, a);
else
return LLVMBuildSub(bld->builder, bld->one, a, "");
} }
LLVMValueRef LLVMValueRef
lp_build_mul(LLVMBuilderRef builder, lp_build_add(struct lp_build_context *bld,
LLVMValueRef a, LLVMValueRef a,
LLVMValueRef b, LLVMValueRef b)
LLVMValueRef zero,
LLVMValueRef one)
{ {
if(a == zero) const union lp_type type = bld->type;
return zero; LLVMValueRef res;
else if(a == one)
if(a == bld->zero)
return b; return b;
else if(b == zero) if(b == bld->zero)
return zero;
else if(b == one)
return a; return a;
else if(LLVMIsConstant(a) && LLVMIsConstant(b)) if(a == bld->undef || b == bld->undef)
return bld->undef;
if(bld->type.norm) {
const char *intrinsic = NULL;
if(a == bld->one || b == bld->one)
return bld->one;
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128 &&
!type.floating && !type.fixed) {
if(type.width == 8)
intrinsic = type.sign ? "llvm.x86.sse2.adds.b" : "llvm.x86.sse2.addus.b";
if(type.width == 16)
intrinsic = type.sign ? "llvm.x86.sse2.adds.w" : "llvm.x86.sse2.addus.w";
}
#endif
if(intrinsic)
return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
}
if(LLVMIsConstant(a) && LLVMIsConstant(b))
res = LLVMConstAdd(a, b);
else
res = LLVMBuildAdd(bld->builder, a, b, "");
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
res = lp_build_min_simple(bld, res, bld->one);
return res;
}
LLVMValueRef
lp_build_sub(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
const union lp_type type = bld->type;
LLVMValueRef res;
if(b == bld->zero)
return a;
if(a == bld->undef || b == bld->undef)
return bld->undef;
if(a == b)
return bld->zero;
if(bld->type.norm) {
const char *intrinsic = NULL;
if(b == bld->one)
return bld->zero;
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
if(type.width * type.length == 128 &&
!type.floating && !type.fixed) {
if(type.width == 8)
intrinsic = type.sign ? "llvm.x86.sse2.subs.b" : "llvm.x86.sse2.subus.b";
if(type.width == 16)
intrinsic = type.sign ? "llvm.x86.sse2.subs.w" : "llvm.x86.sse2.subus.w";
}
#endif
if(intrinsic)
return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b);
}
if(LLVMIsConstant(a) && LLVMIsConstant(b))
res = LLVMConstSub(a, b);
else
res = LLVMBuildSub(bld->builder, a, b, "");
if(bld->type.norm && (bld->type.floating || bld->type.fixed))
res = lp_build_max_simple(bld, res, bld->zero);
return res;
}
LLVMValueRef
lp_build_mul(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b)
{
if(a == bld->zero)
return bld->zero;
if(a == bld->one)
return b;
if(b == bld->zero)
return bld->zero;
if(b == bld->one)
return a;
if(a == bld->undef || b == bld->undef)
return bld->undef;
if(LLVMIsConstant(a) && LLVMIsConstant(b))
return LLVMConstMul(a, b); return LLVMConstMul(a, b);
else
return LLVMBuildMul(builder, a, b, ""); return LLVMBuildMul(bld->builder, a, b, "");
} }
LLVMValueRef LLVMValueRef
lp_build_min(LLVMBuilderRef builder, lp_build_min(struct lp_build_context *bld,
LLVMValueRef a, LLVMValueRef a,
LLVMValueRef b) LLVMValueRef b)
{ {
/* TODO: optimize the constant case */ if(a == bld->undef || b == bld->undef)
return bld->undef;
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(bld->type.norm) {
if(a == bld->zero || b == bld->zero)
return bld->zero;
if(a == bld->one)
return b;
if(b == bld->one)
return a;
}
return lp_build_intrinsic_binary(builder, "llvm.x86.sse.min.ps", a, b); return lp_build_min_simple(bld, a, b);
#else
LLVMValueRef cond = LLVMBuildFCmp(values->builder, LLVMRealULT, a, b, "");
return LLVMBuildSelect(values->builder, cond, a, b, "");
#endif
} }
LLVMValueRef LLVMValueRef
lp_build_max(LLVMBuilderRef builder, lp_build_max(struct lp_build_context *bld,
LLVMValueRef a, LLVMValueRef a,
LLVMValueRef b) LLVMValueRef b)
{ {
/* TODO: optimize the constant case */ if(a == bld->undef || b == bld->undef)
return bld->undef;
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) if(bld->type.norm) {
if(a == bld->one || b == bld->one)
return lp_build_intrinsic_binary(builder, "llvm.x86.sse.max.ps", a, b); return bld->one;
if(a == bld->zero)
#else
LLVMValueRef cond = LLVMBuildFCmp(values->builder, LLVMRealULT, a, b, "");
return LLVMBuildSelect(values->builder, cond, b, a, "");
#endif
}
LLVMValueRef
lp_build_add_sat(LLVMBuilderRef builder,
LLVMValueRef a,
LLVMValueRef b,
LLVMValueRef zero,
LLVMValueRef one)
{
if(a == zero)
return b; return b;
else if(b == zero) if(b == bld->zero)
return a; return a;
else if(a == one || b == one)
return one;
else
return lp_build_min(builder, lp_build_add(builder, a, b, zero), one);
} }
LLVMValueRef return lp_build_max_simple(bld, a, b);
lp_build_sub_sat(LLVMBuilderRef builder,
LLVMValueRef a,
LLVMValueRef b,
LLVMValueRef zero,
LLVMValueRef one)
{
if(b == zero)
return a;
else if(b == one)
return zero;
else
return lp_build_max(builder, lp_build_sub(builder, a, b, zero), zero);
}
LLVMValueRef
lp_build_min_sat(LLVMBuilderRef builder,
LLVMValueRef a,
LLVMValueRef b,
LLVMValueRef zero,
LLVMValueRef one)
{
if(a == zero || b == zero)
return zero;
else if(a == one)
return b;
else if(b == one)
return a;
else
return lp_build_min(builder, a, b);
}
LLVMValueRef
lp_build_max_sat(LLVMBuilderRef builder,
LLVMValueRef a,
LLVMValueRef b,
LLVMValueRef zero,
LLVMValueRef one)
{
if(a == zero)
return b;
else if(b == zero)
return a;
else if(a == one || b == one)
return one;
else
return lp_build_max(builder, a, b);
} }

View File

@ -66,7 +66,13 @@ union lp_type {
* Integer. floating-point, or fixed point as established by the * Integer. floating-point, or fixed point as established by the
* lp_build_type_kind enum above. * lp_build_type_kind enum above.
*/ */
unsigned kind:2; unsigned floating:1;
/**
* Integer. floating-point, or fixed point as established by the
* lp_build_type_kind enum above.
*/
unsigned fixed:1;
/** /**
* Whether it can represent negative values or not. * Whether it can represent negative values or not.
@ -79,9 +85,11 @@ union lp_type {
* Whether values are normalized to fit [0, 1] interval, or [-1, 1] interval for * Whether values are normalized to fit [0, 1] interval, or [-1, 1] interval for
* signed types. * signed types.
* *
* For integer types * For integer types it means the representable integer range should be
* interpreted as the interval above.
* *
* It makes no sense to use this with fixed point values. * For floating and fixed point formats it means the values should be
* clamped to the interval above.
*/ */
unsigned norm:1; unsigned norm:1;
@ -123,10 +131,27 @@ boolean
lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type); lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type);
boolean
lp_check_value(union lp_type type, LLVMValueRef val);
/* /*
* Constants * Constants
*/ */
LLVMValueRef
lp_build_undef(union lp_type type);
LLVMValueRef
lp_build_zero(union lp_type type);
LLVMValueRef
lp_build_one(union lp_type type);
LLVMValueRef LLVMValueRef
lp_build_const_aos(union lp_type type, lp_build_const_aos(union lp_type type,
double r, double g, double b, double a, double r, double g, double b, double a,
@ -136,66 +161,52 @@ lp_build_const_aos(union lp_type type,
* Basic arithmetic * Basic arithmetic
*/ */
LLVMValueRef
lp_build_add(LLVMBuilderRef builder,
LLVMValueRef a,
LLVMValueRef b,
LLVMValueRef zero);
LLVMValueRef /**
lp_build_sub(LLVMBuilderRef builder,
LLVMValueRef a,
LLVMValueRef b,
LLVMValueRef zero);
LLVMValueRef
lp_build_mul(LLVMBuilderRef builder,
LLVMValueRef a,
LLVMValueRef b,
LLVMValueRef zero,
LLVMValueRef one);
LLVMValueRef
lp_build_min(LLVMBuilderRef builder,
LLVMValueRef a,
LLVMValueRef b);
LLVMValueRef
lp_build_max(LLVMBuilderRef builder,
LLVMValueRef a,
LLVMValueRef b);
/*
* Satured arithmetic
*/ */
struct lp_build_context
{
LLVMBuilderRef builder;
union lp_type type;
LLVMValueRef undef;
LLVMValueRef zero;
LLVMValueRef one;
};
/**
* Complement, i.e., 1 - a.
*/
LLVMValueRef
lp_build_comp(struct lp_build_context *bld,
LLVMValueRef a);
LLVMValueRef LLVMValueRef
lp_build_add_sat(LLVMBuilderRef builder, lp_build_add(struct lp_build_context *bld,
LLVMValueRef a, LLVMValueRef a,
LLVMValueRef b, LLVMValueRef b);
LLVMValueRef zero,
LLVMValueRef one);
LLVMValueRef LLVMValueRef
lp_build_sub_sat(LLVMBuilderRef builder, lp_build_sub(struct lp_build_context *bld,
LLVMValueRef a, LLVMValueRef a,
LLVMValueRef b, LLVMValueRef b);
LLVMValueRef zero,
LLVMValueRef one);
LLVMValueRef LLVMValueRef
lp_build_min_sat(LLVMBuilderRef builder, lp_build_mul(struct lp_build_context *bld,
LLVMValueRef a, LLVMValueRef a,
LLVMValueRef b, LLVMValueRef b);
LLVMValueRef zero,
LLVMValueRef one);
LLVMValueRef LLVMValueRef
lp_build_max_sat(LLVMBuilderRef builder, lp_build_min(struct lp_build_context *bld,
LLVMValueRef a, LLVMValueRef a,
LLVMValueRef b, LLVMValueRef b);
LLVMValueRef zero,
LLVMValueRef one); LLVMValueRef
lp_build_max(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
#endif /* !LP_BLD_ARIT_H */ #endif /* !LP_BLD_ARIT_H */

View File

@ -44,17 +44,13 @@
/** /**
* We may the same values several times, so we keep them here to avoid * We may the same bld several times, so we keep them here to avoid
* recomputing them. Also reusing the values allows us to do simplifications * recomputing them. Also reusing the bld allows us to do simplifications
* that LLVM optimization passes wouldn't normally be able to do. * that LLVM optimization passes wouldn't normally be able to do.
*/ */
struct lp_build_blend_values struct lp_build_blend_context
{ {
LLVMBuilderRef builder; struct lp_build_context base;
LLVMValueRef undef;
LLVMValueRef zero;
LLVMValueRef one;
LLVMValueRef src; LLVMValueRef src;
LLVMValueRef dst; LLVMValueRef dst;
@ -73,62 +69,62 @@ struct lp_build_blend_values
static LLVMValueRef static LLVMValueRef
lp_build_blend_factor_unswizzled(struct lp_build_blend_values *values, lp_build_blend_factor_unswizzled(struct lp_build_blend_context *bld,
unsigned factor, unsigned factor,
boolean alpha) boolean alpha)
{ {
switch (factor) { switch (factor) {
case PIPE_BLENDFACTOR_ZERO: case PIPE_BLENDFACTOR_ZERO:
return values->zero; return bld->base.zero;
case PIPE_BLENDFACTOR_ONE: case PIPE_BLENDFACTOR_ONE:
return values->one; return bld->base.one;
case PIPE_BLENDFACTOR_SRC_COLOR: case PIPE_BLENDFACTOR_SRC_COLOR:
case PIPE_BLENDFACTOR_SRC_ALPHA: case PIPE_BLENDFACTOR_SRC_ALPHA:
return values->src; return bld->src;
case PIPE_BLENDFACTOR_DST_COLOR: case PIPE_BLENDFACTOR_DST_COLOR:
case PIPE_BLENDFACTOR_DST_ALPHA: case PIPE_BLENDFACTOR_DST_ALPHA:
return values->dst; return bld->dst;
case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
if(alpha) if(alpha)
return values->one; return bld->base.one;
else { else {
if(!values->inv_dst) if(!bld->inv_dst)
values->inv_dst = lp_build_sub(values->builder, values->one, values->dst, values->zero); bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
if(!values->saturate) if(!bld->saturate)
values->saturate = lp_build_min_sat(values->builder, values->src, values->inv_dst, values->zero, values->one); bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst);
return values->saturate; return bld->saturate;
} }
case PIPE_BLENDFACTOR_CONST_COLOR: case PIPE_BLENDFACTOR_CONST_COLOR:
case PIPE_BLENDFACTOR_CONST_ALPHA: case PIPE_BLENDFACTOR_CONST_ALPHA:
return values->const_; return bld->const_;
case PIPE_BLENDFACTOR_SRC1_COLOR: case PIPE_BLENDFACTOR_SRC1_COLOR:
case PIPE_BLENDFACTOR_SRC1_ALPHA: case PIPE_BLENDFACTOR_SRC1_ALPHA:
/* TODO */ /* TODO */
assert(0); assert(0);
return values->zero; return bld->base.zero;
case PIPE_BLENDFACTOR_INV_SRC_COLOR: case PIPE_BLENDFACTOR_INV_SRC_COLOR:
case PIPE_BLENDFACTOR_INV_SRC_ALPHA: case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
if(!values->inv_src) if(!bld->inv_src)
values->inv_src = lp_build_sub(values->builder, values->one, values->src, values->zero); bld->inv_src = lp_build_comp(&bld->base, bld->src);
return values->inv_src; return bld->inv_src;
case PIPE_BLENDFACTOR_INV_DST_COLOR: case PIPE_BLENDFACTOR_INV_DST_COLOR:
case PIPE_BLENDFACTOR_INV_DST_ALPHA: case PIPE_BLENDFACTOR_INV_DST_ALPHA:
if(!values->inv_dst) if(!bld->inv_dst)
values->inv_dst = lp_build_sub(values->builder, values->one, values->dst, values->zero); bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
return values->inv_dst; return bld->inv_dst;
case PIPE_BLENDFACTOR_INV_CONST_COLOR: case PIPE_BLENDFACTOR_INV_CONST_COLOR:
case PIPE_BLENDFACTOR_INV_CONST_ALPHA: case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
if(!values->inv_const) if(!bld->inv_const)
values->inv_const = lp_build_sub(values->builder, values->one, values->const_, values->zero); bld->inv_const = lp_build_comp(&bld->base, bld->const_);
return values->inv_const; return bld->inv_const;
case PIPE_BLENDFACTOR_INV_SRC1_COLOR: case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
/* TODO */ /* TODO */
assert(0); assert(0);
return values->zero; return bld->base.zero;
default: default:
assert(0); assert(0);
return values->zero; return bld->base.zero;
} }
} }
@ -175,13 +171,13 @@ lp_build_blend_factor_swizzle(unsigned factor)
static LLVMValueRef static LLVMValueRef
lp_build_blend_swizzle(struct lp_build_blend_values *values, lp_build_blend_swizzle(struct lp_build_blend_context *bld,
LLVMValueRef rgb, LLVMValueRef rgb,
LLVMValueRef alpha, LLVMValueRef alpha,
enum lp_build_blend_swizzle rgb_swizzle, enum lp_build_blend_swizzle rgb_swizzle,
unsigned alpha_swizzle, unsigned alpha_swizzle)
unsigned n)
{ {
const unsigned n = bld->base.type.length;
LLVMValueRef swizzles[LP_MAX_VECTOR_LENGTH]; LLVMValueRef swizzles[LP_MAX_VECTOR_LENGTH];
unsigned i, j; unsigned i, j;
@ -189,14 +185,14 @@ lp_build_blend_swizzle(struct lp_build_blend_values *values,
if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA)
return rgb; return rgb;
alpha = values->undef; alpha = bld->base.undef;
} }
for(j = 0; j < n; j += 4) { for(j = 0; j < n; j += 4) {
for(i = 0; i < 4; ++i) { for(i = 0; i < 4; ++i) {
unsigned swizzle; unsigned swizzle;
if(i == alpha_swizzle && alpha != values->undef) { if(i == alpha_swizzle && alpha != bld->base.undef) {
/* Take the alpha from the second shuffle argument */ /* Take the alpha from the second shuffle argument */
swizzle = n + j + alpha_swizzle; swizzle = n + j + alpha_swizzle;
} }
@ -212,55 +208,54 @@ lp_build_blend_swizzle(struct lp_build_blend_values *values,
} }
} }
return LLVMBuildShuffleVector(values->builder, rgb, alpha, LLVMConstVector(swizzles, n), ""); return LLVMBuildShuffleVector(bld->base.builder, rgb, alpha, LLVMConstVector(swizzles, n), "");
} }
static LLVMValueRef static LLVMValueRef
lp_build_blend_factor(struct lp_build_blend_values *values, lp_build_blend_factor(struct lp_build_blend_context *bld,
LLVMValueRef factor1, LLVMValueRef factor1,
unsigned rgb_factor, unsigned rgb_factor,
unsigned alpha_factor, unsigned alpha_factor,
unsigned alpha_swizzle, unsigned alpha_swizzle)
unsigned n)
{ {
LLVMValueRef rgb_factor_; LLVMValueRef rgb_factor_;
LLVMValueRef alpha_factor_; LLVMValueRef alpha_factor_;
LLVMValueRef factor2; LLVMValueRef factor2;
enum lp_build_blend_swizzle rgb_swizzle; enum lp_build_blend_swizzle rgb_swizzle;
rgb_factor_ = lp_build_blend_factor_unswizzled(values, rgb_factor, FALSE); rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE);
alpha_factor_ = lp_build_blend_factor_unswizzled(values, alpha_factor, TRUE); alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor);
factor2 = lp_build_blend_swizzle(values, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, n); factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle);
return lp_build_mul(values->builder, factor1, factor2, values->zero, values->one); return lp_build_mul(&bld->base, factor1, factor2);
} }
static LLVMValueRef static LLVMValueRef
lp_build_blend_func(struct lp_build_blend_values *values, lp_build_blend_func(struct lp_build_blend_context *bld,
unsigned func, unsigned func,
LLVMValueRef term1, LLVMValueRef term1,
LLVMValueRef term2) LLVMValueRef term2)
{ {
switch (func) { switch (func) {
case PIPE_BLEND_ADD: case PIPE_BLEND_ADD:
return lp_build_add_sat(values->builder, term1, term2, values->zero, values->one); return lp_build_add(&bld->base, term1, term2);
break; break;
case PIPE_BLEND_SUBTRACT: case PIPE_BLEND_SUBTRACT:
return lp_build_sub_sat(values->builder, term1, term2, values->zero, values->one); return lp_build_sub(&bld->base, term1, term2);
case PIPE_BLEND_REVERSE_SUBTRACT: case PIPE_BLEND_REVERSE_SUBTRACT:
return lp_build_sub_sat(values->builder, term2, term1, values->zero, values->one); return lp_build_sub(&bld->base, term2, term1);
case PIPE_BLEND_MIN: case PIPE_BLEND_MIN:
return lp_build_min_sat(values->builder, term1, term2, values->zero, values->one); return lp_build_min(&bld->base, term1, term2);
case PIPE_BLEND_MAX: case PIPE_BLEND_MAX:
return lp_build_max_sat(values->builder, term1, term2, values->zero, values->one); return lp_build_max(&bld->base, term1, term2);
default: default:
assert(0); assert(0);
return values->zero; return bld->base.zero;
} }
} }
@ -274,35 +269,33 @@ lp_build_blend(LLVMBuilderRef builder,
LLVMValueRef const_, LLVMValueRef const_,
unsigned alpha_swizzle) unsigned alpha_swizzle)
{ {
struct lp_build_blend_values values; struct lp_build_blend_context bld;
LLVMValueRef src_term; LLVMValueRef src_term;
LLVMValueRef dst_term; LLVMValueRef dst_term;
LLVMTypeRef vec_type;
vec_type = lp_build_vec_type(type); /* It makes no sense to blend unless values are normalized */
assert(type.norm);
/* /* Setup build context */
* Compute constants memset(&bld, 0, sizeof bld);
*/ bld.base.builder = builder;
memset(&values, 0, sizeof values); bld.base.type = type;
values.builder = builder; bld.base.undef = lp_build_undef(type);
values.undef = LLVMGetUndef(vec_type); bld.base.zero = lp_build_zero(type);
values.zero = LLVMConstNull(vec_type); bld.base.one = lp_build_one(type);
values.one = lp_build_const_aos(type, 1.0, 1.0, 1.0, 1.0, NULL); bld.src = src;
bld.dst = dst;
values.src = src; bld.const_ = const_;
values.dst = dst;
values.const_ = const_;
/* TODO: There are still a few optimization oportunities here. For certain /* TODO: There are still a few optimization oportunities here. For certain
* combinations it is possible to reorder the operations and therefor saving * combinations it is possible to reorder the operations and therefor saving
* some instructions. */ * some instructions. */
src_term = lp_build_blend_factor(&values, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle, type.length); src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle);
dst_term = lp_build_blend_factor(&values, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle, type.length); dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle);
if(blend->rgb_func == blend->alpha_func) { if(blend->rgb_func == blend->alpha_func) {
return lp_build_blend_func(&values, blend->rgb_func, src_term, dst_term); return lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term);
} }
else { else {
/* Seperate RGB / A functions */ /* Seperate RGB / A functions */
@ -310,9 +303,9 @@ lp_build_blend(LLVMBuilderRef builder,
LLVMValueRef rgb; LLVMValueRef rgb;
LLVMValueRef alpha; LLVMValueRef alpha;
rgb = lp_build_blend_func(&values, blend->rgb_func, src_term, dst_term); rgb = lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term);
alpha = lp_build_blend_func(&values, blend->alpha_func, src_term, dst_term); alpha = lp_build_blend_func(&bld, blend->alpha_func, src_term, dst_term);
return lp_build_blend_swizzle(&values, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle, type.length); return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle);
} }
} }

View File

@ -81,7 +81,7 @@ add_blend_test(LLVMModuleRef module,
LLVMValueRef res; LLVMValueRef res;
type.value = 0; type.value = 0;
type.kind = LP_TYPE_FLOAT; type.floating = TRUE;
type.sign = TRUE; type.sign = TRUE;
type.norm = TRUE; type.norm = TRUE;
type.width = 32; type.width = 32;