From a6622e6c544d3530a463d6a274a15bfae58f7ccc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Fonseca?= Date: Sun, 2 Aug 2009 17:54:53 +0100 Subject: [PATCH] llvmpipe: Specialize arithmetic operations. --- src/gallium/drivers/llvmpipe/lp_bld_arit.c | 458 +++++++++++++------ src/gallium/drivers/llvmpipe/lp_bld_arit.h | 117 ++--- src/gallium/drivers/llvmpipe/lp_bld_blend.c | 139 +++--- src/gallium/drivers/llvmpipe/lp_test_blend.c | 2 +- 4 files changed, 446 insertions(+), 270 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.c b/src/gallium/drivers/llvmpipe/lp_bld_arit.c index b5b4148ac2d..f45b7d82f14 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.c @@ -53,7 +53,7 @@ LLVMTypeRef lp_build_elem_type(union lp_type type) { - if (type.kind == LP_TYPE_FLOAT) { + if (type.floating) { assert(type.sign); switch(type.width) { case 32: @@ -90,9 +90,15 @@ lp_build_vec_type(union lp_type type) boolean lp_check_elem_type(union lp_type type, LLVMTypeRef elem_type) { - LLVMTypeKind elem_kind = LLVMGetTypeKind(elem_type); + LLVMTypeKind elem_kind; - if (type.kind == LP_TYPE_FLOAT) { + assert(elem_type); + if(!elem_type) + return FALSE; + + elem_kind = LLVMGetTypeKind(elem_type); + + if (type.floating) { switch(type.width) { case 32: if(elem_kind != LLVMFloatTypeKind) @@ -124,6 +130,10 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) { LLVMTypeRef elem_type; + assert(vec_type); + if(!vec_type) + return FALSE; + if(LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) return FALSE; @@ -136,6 +146,73 @@ lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type) } +boolean +lp_check_value(union lp_type type, LLVMValueRef val) +{ + LLVMTypeRef vec_type; + + assert(val); + if(!val) + return FALSE; + + vec_type = LLVMTypeOf(val); + + return lp_check_vec_type(type, vec_type); +} + + +LLVMValueRef +lp_build_undef(union lp_type type) +{ + LLVMTypeRef vec_type = lp_build_vec_type(type); + return LLVMGetUndef(vec_type); +} + + +LLVMValueRef +lp_build_zero(union lp_type type) +{ + LLVMTypeRef vec_type = lp_build_vec_type(type); + return LLVMConstNull(vec_type); +} + + +LLVMValueRef +lp_build_one(union lp_type type) +{ + LLVMTypeRef elem_type; + LLVMValueRef elems[LP_MAX_VECTOR_LENGTH]; + unsigned i; + + assert(type.length < LP_MAX_VECTOR_LENGTH); + + elem_type = lp_build_elem_type(type); + + if(type.floating) + elems[0] = LLVMConstReal(elem_type, 1.0); + else if(type.fixed) + elems[0] = LLVMConstInt(elem_type, 1LL << (type.width/2), 0); + else if(!type.norm) + elems[0] = LLVMConstInt(elem_type, 1, 0); + else { + /* special case' -- 1.0 for normalized types is more easily attained if + * we start with a vector consisting of all bits set */ + LLVMTypeRef vec_type = LLVMVectorType(elem_type, type.length); + LLVMValueRef vec = LLVMConstAllOnes(vec_type); + + if(type.sign) + vec = LLVMConstLShr(vec, LLVMConstInt(LLVMInt32Type(), 1, 0)); + + return vec; + } + + for(i = 1; i < type.length; ++i) + elems[i] = elems[0]; + + return LLVMConstVector(elems, type.length); +} + + LLVMValueRef lp_build_const_aos(union lp_type type, double r, double g, double b, double a, @@ -154,20 +231,18 @@ lp_build_const_aos(union lp_type type, if(swizzle == NULL) swizzle = default_swizzle; - if(type.kind == LP_TYPE_FLOAT) { - for(i = 0; i < type.length; i += 4) { - elems[i + swizzle[0]] = LLVMConstReal(elem_type, r); - elems[i + swizzle[1]] = LLVMConstReal(elem_type, g); - elems[i + swizzle[2]] = LLVMConstReal(elem_type, b); - elems[i + swizzle[3]] = LLVMConstReal(elem_type, a); - } + if(type.floating) { + elems[swizzle[0]] = LLVMConstReal(elem_type, r); + elems[swizzle[1]] = LLVMConstReal(elem_type, g); + elems[swizzle[2]] = LLVMConstReal(elem_type, b); + elems[swizzle[3]] = LLVMConstReal(elem_type, a); } else { unsigned shift; long long llscale; double dscale; - if(type.kind == LP_TYPE_FIXED) + if(type.fixed) shift = type.width/2; else if(type.norm) shift = type.sign ? type.width - 1 : type.width; @@ -178,14 +253,15 @@ lp_build_const_aos(union lp_type type, dscale = (double)llscale; assert((long long)dscale == llscale); - for(i = 0; i < type.length; i += 4) { - elems[i + swizzle[0]] = LLVMConstInt(elem_type, r*dscale + 0.5, 0); - elems[i + swizzle[1]] = LLVMConstInt(elem_type, g*dscale + 0.5, 0); - elems[i + swizzle[2]] = LLVMConstInt(elem_type, b*dscale + 0.5, 0); - elems[i + swizzle[3]] = LLVMConstInt(elem_type, a*dscale + 0.5, 0); - } + elems[swizzle[0]] = LLVMConstInt(elem_type, r*dscale + 0.5, 0); + elems[swizzle[1]] = LLVMConstInt(elem_type, g*dscale + 0.5, 0); + elems[swizzle[2]] = LLVMConstInt(elem_type, b*dscale + 0.5, 0); + elems[swizzle[3]] = LLVMConstInt(elem_type, a*dscale + 0.5, 0); } + for(i = 4; i < type.length; ++i) + elems[i] = elems[i % 4]; + return LLVMConstVector(elems, type.length); } @@ -219,165 +295,261 @@ lp_build_intrinsic_binary(LLVMBuilderRef builder, } -LLVMValueRef -lp_build_add(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero) +static LLVMValueRef +lp_build_min_simple(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b) { - if(a == zero) - return b; - else if(b == zero) - return a; - else if(LLVMIsConstant(a) && LLVMIsConstant(b)) - return LLVMConstAdd(a, b); + const union lp_type type = bld->type; + const char *intrinsic = NULL; + LLVMValueRef cond; + + /* TODO: optimize the constant case */ + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + if(type.width * type.length == 128) { + if(type.floating) + if(type.width == 32) + intrinsic = "llvm.x86.sse.min.ps"; + if(type.width == 64) + intrinsic = "llvm.x86.sse2.min.pd"; + else { + if(type.width == 8 && !type.sign) + intrinsic = "llvm.x86.sse2.pminu.b"; + if(type.width == 16 && type.sign) + intrinsic = "llvm.x86.sse2.pmins.w"; + } + } +#endif + + if(intrinsic) + return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b); + + if(type.floating) + cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, ""); else - return LLVMBuildAdd(builder, a, b, ""); + cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, ""); + return LLVMBuildSelect(bld->builder, cond, a, b, ""); +} + + +static LLVMValueRef +lp_build_max_simple(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b) +{ + const union lp_type type = bld->type; + const char *intrinsic = NULL; + LLVMValueRef cond; + + /* TODO: optimize the constant case */ + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + if(type.width * type.length == 128) { + if(type.floating) + if(type.width == 32) + intrinsic = "llvm.x86.sse.max.ps"; + if(type.width == 64) + intrinsic = "llvm.x86.sse2.max.pd"; + else { + if(type.width == 8 && !type.sign) + intrinsic = "llvm.x86.sse2.pmaxu.b"; + if(type.width == 16 && type.sign) + intrinsic = "llvm.x86.sse2.pmaxs.w"; + } + } +#endif + + if(intrinsic) + return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b); + + if(type.floating) + cond = LLVMBuildFCmp(bld->builder, LLVMRealULT, a, b, ""); + else + cond = LLVMBuildICmp(bld->builder, type.sign ? LLVMIntSLT : LLVMIntULT, a, b, ""); + return LLVMBuildSelect(bld->builder, cond, b, a, ""); } LLVMValueRef -lp_build_sub(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero) +lp_build_comp(struct lp_build_context *bld, + LLVMValueRef a) { - if(b == zero) - return a; - else if(a == b) - return zero; - else if(LLVMIsConstant(a) && LLVMIsConstant(b)) - return LLVMConstSub(a, b); + const union lp_type type = bld->type; + + if(a == bld->one) + return bld->zero; + if(a == bld->zero) + return bld->one; + + if(type.norm && !type.floating && !type.fixed && !type.sign) { + if(LLVMIsConstant(a)) + return LLVMConstNot(a); + else + return LLVMBuildNot(bld->builder, a, ""); + } + + if(LLVMIsConstant(a)) + return LLVMConstSub(bld->one, a); else - return LLVMBuildSub(builder, a, b, ""); + return LLVMBuildSub(bld->builder, bld->one, a, ""); } LLVMValueRef -lp_build_mul(LLVMBuilderRef builder, +lp_build_add(struct lp_build_context *bld, LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero, - LLVMValueRef one) + LLVMValueRef b) { - if(a == zero) - return zero; - else if(a == one) + const union lp_type type = bld->type; + LLVMValueRef res; + + if(a == bld->zero) return b; - else if(b == zero) - return zero; - else if(b == one) + if(b == bld->zero) return a; - else if(LLVMIsConstant(a) && LLVMIsConstant(b)) + if(a == bld->undef || b == bld->undef) + return bld->undef; + + if(bld->type.norm) { + const char *intrinsic = NULL; + + if(a == bld->one || b == bld->one) + return bld->one; + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + if(type.width * type.length == 128 && + !type.floating && !type.fixed) { + if(type.width == 8) + intrinsic = type.sign ? "llvm.x86.sse2.adds.b" : "llvm.x86.sse2.addus.b"; + if(type.width == 16) + intrinsic = type.sign ? "llvm.x86.sse2.adds.w" : "llvm.x86.sse2.addus.w"; + } +#endif + + if(intrinsic) + return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b); + } + + if(LLVMIsConstant(a) && LLVMIsConstant(b)) + res = LLVMConstAdd(a, b); + else + res = LLVMBuildAdd(bld->builder, a, b, ""); + + if(bld->type.norm && (bld->type.floating || bld->type.fixed)) + res = lp_build_min_simple(bld, res, bld->one); + + return res; +} + + +LLVMValueRef +lp_build_sub(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b) +{ + const union lp_type type = bld->type; + LLVMValueRef res; + + if(b == bld->zero) + return a; + if(a == bld->undef || b == bld->undef) + return bld->undef; + if(a == b) + return bld->zero; + + if(bld->type.norm) { + const char *intrinsic = NULL; + + if(b == bld->one) + return bld->zero; + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + if(type.width * type.length == 128 && + !type.floating && !type.fixed) { + if(type.width == 8) + intrinsic = type.sign ? "llvm.x86.sse2.subs.b" : "llvm.x86.sse2.subus.b"; + if(type.width == 16) + intrinsic = type.sign ? "llvm.x86.sse2.subs.w" : "llvm.x86.sse2.subus.w"; + } +#endif + + if(intrinsic) + return lp_build_intrinsic_binary(bld->builder, intrinsic, a, b); + } + + if(LLVMIsConstant(a) && LLVMIsConstant(b)) + res = LLVMConstSub(a, b); + else + res = LLVMBuildSub(bld->builder, a, b, ""); + + if(bld->type.norm && (bld->type.floating || bld->type.fixed)) + res = lp_build_max_simple(bld, res, bld->zero); + + return res; +} + + +LLVMValueRef +lp_build_mul(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b) +{ + if(a == bld->zero) + return bld->zero; + if(a == bld->one) + return b; + if(b == bld->zero) + return bld->zero; + if(b == bld->one) + return a; + if(a == bld->undef || b == bld->undef) + return bld->undef; + + if(LLVMIsConstant(a) && LLVMIsConstant(b)) return LLVMConstMul(a, b); - else - return LLVMBuildMul(builder, a, b, ""); + + return LLVMBuildMul(bld->builder, a, b, ""); } LLVMValueRef -lp_build_min(LLVMBuilderRef builder, +lp_build_min(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - /* TODO: optimize the constant case */ + if(a == bld->undef || b == bld->undef) + return bld->undef; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + if(bld->type.norm) { + if(a == bld->zero || b == bld->zero) + return bld->zero; + if(a == bld->one) + return b; + if(b == bld->one) + return a; + } - return lp_build_intrinsic_binary(builder, "llvm.x86.sse.min.ps", a, b); - -#else - - LLVMValueRef cond = LLVMBuildFCmp(values->builder, LLVMRealULT, a, b, ""); - return LLVMBuildSelect(values->builder, cond, a, b, ""); - -#endif + return lp_build_min_simple(bld, a, b); } LLVMValueRef -lp_build_max(LLVMBuilderRef builder, +lp_build_max(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b) { - /* TODO: optimize the constant case */ + if(a == bld->undef || b == bld->undef) + return bld->undef; -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + if(bld->type.norm) { + if(a == bld->one || b == bld->one) + return bld->one; + if(a == bld->zero) + return b; + if(b == bld->zero) + return a; + } - return lp_build_intrinsic_binary(builder, "llvm.x86.sse.max.ps", a, b); - -#else - - LLVMValueRef cond = LLVMBuildFCmp(values->builder, LLVMRealULT, a, b, ""); - return LLVMBuildSelect(values->builder, cond, b, a, ""); - -#endif -} - - -LLVMValueRef -lp_build_add_sat(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero, - LLVMValueRef one) -{ - if(a == zero) - return b; - else if(b == zero) - return a; - else if(a == one || b == one) - return one; - else - return lp_build_min(builder, lp_build_add(builder, a, b, zero), one); -} - -LLVMValueRef -lp_build_sub_sat(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero, - LLVMValueRef one) -{ - if(b == zero) - return a; - else if(b == one) - return zero; - else - return lp_build_max(builder, lp_build_sub(builder, a, b, zero), zero); -} - -LLVMValueRef -lp_build_min_sat(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero, - LLVMValueRef one) -{ - if(a == zero || b == zero) - return zero; - else if(a == one) - return b; - else if(b == one) - return a; - else - return lp_build_min(builder, a, b); -} - - -LLVMValueRef -lp_build_max_sat(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero, - LLVMValueRef one) -{ - if(a == zero) - return b; - else if(b == zero) - return a; - else if(a == one || b == one) - return one; - else - return lp_build_max(builder, a, b); + return lp_build_max_simple(bld, a, b); } diff --git a/src/gallium/drivers/llvmpipe/lp_bld_arit.h b/src/gallium/drivers/llvmpipe/lp_bld_arit.h index f9a61be5168..795b8165071 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_arit.h +++ b/src/gallium/drivers/llvmpipe/lp_bld_arit.h @@ -66,7 +66,13 @@ union lp_type { * Integer. floating-point, or fixed point as established by the * lp_build_type_kind enum above. */ - unsigned kind:2; + unsigned floating:1; + + /** + * Integer. floating-point, or fixed point as established by the + * lp_build_type_kind enum above. + */ + unsigned fixed:1; /** * Whether it can represent negative values or not. @@ -79,9 +85,11 @@ union lp_type { * Whether values are normalized to fit [0, 1] interval, or [-1, 1] interval for * signed types. * - * For integer types + * For integer types it means the representable integer range should be + * interpreted as the interval above. * - * It makes no sense to use this with fixed point values. + * For floating and fixed point formats it means the values should be + * clamped to the interval above. */ unsigned norm:1; @@ -123,10 +131,27 @@ boolean lp_check_vec_type(union lp_type type, LLVMTypeRef vec_type); +boolean +lp_check_value(union lp_type type, LLVMValueRef val); + + /* * Constants */ + +LLVMValueRef +lp_build_undef(union lp_type type); + + +LLVMValueRef +lp_build_zero(union lp_type type); + + +LLVMValueRef +lp_build_one(union lp_type type); + + LLVMValueRef lp_build_const_aos(union lp_type type, double r, double g, double b, double a, @@ -136,66 +161,52 @@ lp_build_const_aos(union lp_type type, * Basic arithmetic */ -LLVMValueRef -lp_build_add(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero); -LLVMValueRef -lp_build_sub(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero); - -LLVMValueRef -lp_build_mul(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero, - LLVMValueRef one); - -LLVMValueRef -lp_build_min(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b); - -LLVMValueRef -lp_build_max(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b); - -/* - * Satured arithmetic +/** */ +struct lp_build_context +{ + LLVMBuilderRef builder; + + union lp_type type; + + LLVMValueRef undef; + LLVMValueRef zero; + LLVMValueRef one; +}; + + +/** + * Complement, i.e., 1 - a. + */ +LLVMValueRef +lp_build_comp(struct lp_build_context *bld, + LLVMValueRef a); LLVMValueRef -lp_build_add_sat(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero, - LLVMValueRef one); +lp_build_add(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b); LLVMValueRef -lp_build_sub_sat(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero, - LLVMValueRef one); +lp_build_sub(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b); LLVMValueRef -lp_build_min_sat(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero, - LLVMValueRef one); +lp_build_mul(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b); LLVMValueRef -lp_build_max_sat(LLVMBuilderRef builder, - LLVMValueRef a, - LLVMValueRef b, - LLVMValueRef zero, - LLVMValueRef one); +lp_build_min(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b); + +LLVMValueRef +lp_build_max(struct lp_build_context *bld, + LLVMValueRef a, + LLVMValueRef b); #endif /* !LP_BLD_ARIT_H */ diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c index 48d1e0028ae..ce8408b79bd 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c @@ -44,18 +44,14 @@ /** - * We may the same values several times, so we keep them here to avoid - * recomputing them. Also reusing the values allows us to do simplifications + * We may the same bld several times, so we keep them here to avoid + * recomputing them. Also reusing the bld allows us to do simplifications * that LLVM optimization passes wouldn't normally be able to do. */ -struct lp_build_blend_values +struct lp_build_blend_context { - LLVMBuilderRef builder; + struct lp_build_context base; - LLVMValueRef undef; - LLVMValueRef zero; - LLVMValueRef one; - LLVMValueRef src; LLVMValueRef dst; LLVMValueRef const_; @@ -73,62 +69,62 @@ struct lp_build_blend_values static LLVMValueRef -lp_build_blend_factor_unswizzled(struct lp_build_blend_values *values, +lp_build_blend_factor_unswizzled(struct lp_build_blend_context *bld, unsigned factor, boolean alpha) { switch (factor) { case PIPE_BLENDFACTOR_ZERO: - return values->zero; + return bld->base.zero; case PIPE_BLENDFACTOR_ONE: - return values->one; + return bld->base.one; case PIPE_BLENDFACTOR_SRC_COLOR: case PIPE_BLENDFACTOR_SRC_ALPHA: - return values->src; + return bld->src; case PIPE_BLENDFACTOR_DST_COLOR: case PIPE_BLENDFACTOR_DST_ALPHA: - return values->dst; + return bld->dst; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: if(alpha) - return values->one; + return bld->base.one; else { - if(!values->inv_dst) - values->inv_dst = lp_build_sub(values->builder, values->one, values->dst, values->zero); - if(!values->saturate) - values->saturate = lp_build_min_sat(values->builder, values->src, values->inv_dst, values->zero, values->one); - return values->saturate; + if(!bld->inv_dst) + bld->inv_dst = lp_build_comp(&bld->base, bld->dst); + if(!bld->saturate) + bld->saturate = lp_build_min(&bld->base, bld->src, bld->inv_dst); + return bld->saturate; } case PIPE_BLENDFACTOR_CONST_COLOR: case PIPE_BLENDFACTOR_CONST_ALPHA: - return values->const_; + return bld->const_; case PIPE_BLENDFACTOR_SRC1_COLOR: case PIPE_BLENDFACTOR_SRC1_ALPHA: /* TODO */ assert(0); - return values->zero; + return bld->base.zero; case PIPE_BLENDFACTOR_INV_SRC_COLOR: case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - if(!values->inv_src) - values->inv_src = lp_build_sub(values->builder, values->one, values->src, values->zero); - return values->inv_src; + if(!bld->inv_src) + bld->inv_src = lp_build_comp(&bld->base, bld->src); + return bld->inv_src; case PIPE_BLENDFACTOR_INV_DST_COLOR: case PIPE_BLENDFACTOR_INV_DST_ALPHA: - if(!values->inv_dst) - values->inv_dst = lp_build_sub(values->builder, values->one, values->dst, values->zero); - return values->inv_dst; + if(!bld->inv_dst) + bld->inv_dst = lp_build_comp(&bld->base, bld->dst); + return bld->inv_dst; case PIPE_BLENDFACTOR_INV_CONST_COLOR: case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - if(!values->inv_const) - values->inv_const = lp_build_sub(values->builder, values->one, values->const_, values->zero); - return values->inv_const; + if(!bld->inv_const) + bld->inv_const = lp_build_comp(&bld->base, bld->const_); + return bld->inv_const; case PIPE_BLENDFACTOR_INV_SRC1_COLOR: case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: /* TODO */ assert(0); - return values->zero; + return bld->base.zero; default: assert(0); - return values->zero; + return bld->base.zero; } } @@ -175,13 +171,13 @@ lp_build_blend_factor_swizzle(unsigned factor) static LLVMValueRef -lp_build_blend_swizzle(struct lp_build_blend_values *values, +lp_build_blend_swizzle(struct lp_build_blend_context *bld, LLVMValueRef rgb, LLVMValueRef alpha, enum lp_build_blend_swizzle rgb_swizzle, - unsigned alpha_swizzle, - unsigned n) + unsigned alpha_swizzle) { + const unsigned n = bld->base.type.length; LLVMValueRef swizzles[LP_MAX_VECTOR_LENGTH]; unsigned i, j; @@ -189,14 +185,14 @@ lp_build_blend_swizzle(struct lp_build_blend_values *values, if(rgb_swizzle == LP_BUILD_BLEND_SWIZZLE_RGBA) return rgb; - alpha = values->undef; + alpha = bld->base.undef; } for(j = 0; j < n; j += 4) { for(i = 0; i < 4; ++i) { unsigned swizzle; - if(i == alpha_swizzle && alpha != values->undef) { + if(i == alpha_swizzle && alpha != bld->base.undef) { /* Take the alpha from the second shuffle argument */ swizzle = n + j + alpha_swizzle; } @@ -212,55 +208,54 @@ lp_build_blend_swizzle(struct lp_build_blend_values *values, } } - return LLVMBuildShuffleVector(values->builder, rgb, alpha, LLVMConstVector(swizzles, n), ""); + return LLVMBuildShuffleVector(bld->base.builder, rgb, alpha, LLVMConstVector(swizzles, n), ""); } static LLVMValueRef -lp_build_blend_factor(struct lp_build_blend_values *values, +lp_build_blend_factor(struct lp_build_blend_context *bld, LLVMValueRef factor1, unsigned rgb_factor, unsigned alpha_factor, - unsigned alpha_swizzle, - unsigned n) + unsigned alpha_swizzle) { LLVMValueRef rgb_factor_; LLVMValueRef alpha_factor_; LLVMValueRef factor2; enum lp_build_blend_swizzle rgb_swizzle; - rgb_factor_ = lp_build_blend_factor_unswizzled(values, rgb_factor, FALSE); - alpha_factor_ = lp_build_blend_factor_unswizzled(values, alpha_factor, TRUE); + rgb_factor_ = lp_build_blend_factor_unswizzled(bld, rgb_factor, FALSE); + alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE); rgb_swizzle = lp_build_blend_factor_swizzle(rgb_factor); - factor2 = lp_build_blend_swizzle(values, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, n); + factor2 = lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle); - return lp_build_mul(values->builder, factor1, factor2, values->zero, values->one); + return lp_build_mul(&bld->base, factor1, factor2); } static LLVMValueRef -lp_build_blend_func(struct lp_build_blend_values *values, +lp_build_blend_func(struct lp_build_blend_context *bld, unsigned func, LLVMValueRef term1, LLVMValueRef term2) { switch (func) { case PIPE_BLEND_ADD: - return lp_build_add_sat(values->builder, term1, term2, values->zero, values->one); + return lp_build_add(&bld->base, term1, term2); break; case PIPE_BLEND_SUBTRACT: - return lp_build_sub_sat(values->builder, term1, term2, values->zero, values->one); + return lp_build_sub(&bld->base, term1, term2); case PIPE_BLEND_REVERSE_SUBTRACT: - return lp_build_sub_sat(values->builder, term2, term1, values->zero, values->one); + return lp_build_sub(&bld->base, term2, term1); case PIPE_BLEND_MIN: - return lp_build_min_sat(values->builder, term1, term2, values->zero, values->one); + return lp_build_min(&bld->base, term1, term2); case PIPE_BLEND_MAX: - return lp_build_max_sat(values->builder, term1, term2, values->zero, values->one); + return lp_build_max(&bld->base, term1, term2); default: assert(0); - return values->zero; + return bld->base.zero; } } @@ -274,35 +269,33 @@ lp_build_blend(LLVMBuilderRef builder, LLVMValueRef const_, unsigned alpha_swizzle) { - struct lp_build_blend_values values; + struct lp_build_blend_context bld; LLVMValueRef src_term; LLVMValueRef dst_term; - LLVMTypeRef vec_type; - vec_type = lp_build_vec_type(type); + /* It makes no sense to blend unless values are normalized */ + assert(type.norm); - /* - * Compute constants - */ - memset(&values, 0, sizeof values); - values.builder = builder; - values.undef = LLVMGetUndef(vec_type); - values.zero = LLVMConstNull(vec_type); - values.one = lp_build_const_aos(type, 1.0, 1.0, 1.0, 1.0, NULL); - - values.src = src; - values.dst = dst; - values.const_ = const_; + /* Setup build context */ + memset(&bld, 0, sizeof bld); + bld.base.builder = builder; + bld.base.type = type; + bld.base.undef = lp_build_undef(type); + bld.base.zero = lp_build_zero(type); + bld.base.one = lp_build_one(type); + bld.src = src; + bld.dst = dst; + bld.const_ = const_; /* TODO: There are still a few optimization oportunities here. For certain * combinations it is possible to reorder the operations and therefor saving * some instructions. */ - src_term = lp_build_blend_factor(&values, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle, type.length); - dst_term = lp_build_blend_factor(&values, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle, type.length); + src_term = lp_build_blend_factor(&bld, src, blend->rgb_src_factor, blend->alpha_src_factor, alpha_swizzle); + dst_term = lp_build_blend_factor(&bld, dst, blend->rgb_dst_factor, blend->alpha_dst_factor, alpha_swizzle); if(blend->rgb_func == blend->alpha_func) { - return lp_build_blend_func(&values, blend->rgb_func, src_term, dst_term); + return lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term); } else { /* Seperate RGB / A functions */ @@ -310,9 +303,9 @@ lp_build_blend(LLVMBuilderRef builder, LLVMValueRef rgb; LLVMValueRef alpha; - rgb = lp_build_blend_func(&values, blend->rgb_func, src_term, dst_term); - alpha = lp_build_blend_func(&values, blend->alpha_func, src_term, dst_term); + rgb = lp_build_blend_func(&bld, blend->rgb_func, src_term, dst_term); + alpha = lp_build_blend_func(&bld, blend->alpha_func, src_term, dst_term); - return lp_build_blend_swizzle(&values, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle, type.length); + return lp_build_blend_swizzle(&bld, rgb, alpha, LP_BUILD_BLEND_SWIZZLE_RGBA, alpha_swizzle); } } diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c index 3becac18717..5f46bb5f36f 100644 --- a/src/gallium/drivers/llvmpipe/lp_test_blend.c +++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c @@ -81,7 +81,7 @@ add_blend_test(LLVMModuleRef module, LLVMValueRef res; type.value = 0; - type.kind = LP_TYPE_FLOAT; + type.floating = TRUE; type.sign = TRUE; type.norm = TRUE; type.width = 32;