diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 33b8f6dc9fe..4c6fd9ef7ca 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -45,6 +45,8 @@ */ +#include + #include "util/u_memory.h" #include "util/u_debug.h" #include "util/u_math.h" @@ -58,9 +60,9 @@ #include "lp_bld_logic.h" #include "lp_bld_pack.h" #include "lp_bld_debug.h" +#include "lp_bld_bitarit.h" #include "lp_bld_arit.h" -#include "float.h" #define EXP_POLY_DEGREE 5 @@ -679,8 +681,12 @@ lp_build_sub(struct lp_build_context *bld, } + /** - * Normalized 8bit multiplication. + * Normalized multiplication. + * + * There are several approaches here (using 8-bit normalized multiplication as + * an example): * * - alpha plus one * @@ -721,66 +727,56 @@ lp_build_sub(struct lp_build_context *bld, * http://www.stereopsis.com/doubleblend.html */ static LLVMValueRef -lp_build_mul_u8n(struct gallivm_state *gallivm, - struct lp_type i16_type, - LLVMValueRef a, LLVMValueRef b) +lp_build_mul_norm(struct gallivm_state *gallivm, + struct lp_type wide_type, + LLVMValueRef a, LLVMValueRef b) { LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef c8; + struct lp_build_context bld; + unsigned bits; + LLVMValueRef shift; + LLVMValueRef half; LLVMValueRef ab; - assert(!i16_type.floating); - assert(lp_check_value(i16_type, a)); - assert(lp_check_value(i16_type, b)); + assert(!wide_type.floating); + assert(lp_check_value(wide_type, a)); + assert(lp_check_value(wide_type, b)); + + lp_build_context_init(&bld, gallivm, wide_type); + + bits = wide_type.width / 2; + if (wide_type.sign) { + --bits; + } + + shift = lp_build_const_int_vec(gallivm, wide_type, bits); - c8 = lp_build_const_int_vec(gallivm, i16_type, 8); - #if 0 /* a*b/255 ~= (a*(b + 1)) >> 256 */ - b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(gallium, i16_type, 1), ""); + /* XXX: This would not work for signed types */ + assert(!wide_type.sign); + b = LLVMBuildAdd(builder, b, lp_build_const_int_vec(gallium, wide_type, 1), ""); ab = LLVMBuildMul(builder, a, b, ""); #else /* ab/255 ~= (ab + (ab >> 8) + 0x80) >> 8 */ ab = LLVMBuildMul(builder, a, b, ""); - ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c8, ""), ""); - ab = LLVMBuildAdd(builder, ab, lp_build_const_int_vec(gallivm, i16_type, 0x80), ""); + ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, shift, ""), ""); + + /* Add rounding term */ + half = lp_build_const_int_vec(gallivm, wide_type, 1 << (bits - 1)); + if (wide_type.sign) { + LLVMValueRef minus_half = LLVMBuildNeg(builder, half, ""); + LLVMValueRef sign = lp_build_shr_imm(&bld, half, wide_type.width - 1); + half = lp_build_select(&bld, sign, minus_half, half); + } + ab = LLVMBuildAdd(builder, ab, half, ""); #endif - ab = LLVMBuildLShr(builder, ab, c8, ""); - - return ab; -} - -/** - * Normalized 16bit multiplication. - * - * Utilises same principle as above code. - */ -static LLVMValueRef -lp_build_mul_u16n(struct gallivm_state *gallivm, - struct lp_type i32_type, - LLVMValueRef a, LLVMValueRef b) -{ - LLVMBuilderRef builder = gallivm->builder; - LLVMValueRef c16; - LLVMValueRef ab; - - assert(!i32_type.floating); - assert(lp_check_value(i32_type, a)); - assert(lp_check_value(i32_type, b)); - - c16 = lp_build_const_int_vec(gallivm, i32_type, 16); - - /* ab/65535 ~= (ab + (ab >> 16) + 0x8000) >> 16 */ - ab = LLVMBuildMul(builder, a, b, ""); - ab = LLVMBuildAdd(builder, ab, LLVMBuildLShr(builder, ab, c16, ""), ""); - ab = LLVMBuildAdd(builder, ab, lp_build_const_int_vec(gallivm, i32_type, 0x8000), ""); - - ab = LLVMBuildLShr(builder, ab, c16, ""); + ab = LLVMBuildLShr(builder, ab, shift, ""); return ab; } @@ -812,41 +808,20 @@ lp_build_mul(struct lp_build_context *bld, if(a == bld->undef || b == bld->undef) return bld->undef; - if(!type.floating && !type.fixed && type.norm) { - if(type.width == 8) { - struct lp_type i16_type = lp_wider_type(type); - LLVMValueRef al, ah, bl, bh, abl, abh, ab; + if (!type.floating && !type.fixed && type.norm) { + struct lp_type wide_type = lp_wider_type(type); + LLVMValueRef al, ah, bl, bh, abl, abh, ab; - lp_build_unpack2(bld->gallivm, type, i16_type, a, &al, &ah); - lp_build_unpack2(bld->gallivm, type, i16_type, b, &bl, &bh); + lp_build_unpack2(bld->gallivm, type, wide_type, a, &al, &ah); + lp_build_unpack2(bld->gallivm, type, wide_type, b, &bl, &bh); - /* PMULLW, PSRLW, PADDW */ - abl = lp_build_mul_u8n(bld->gallivm, i16_type, al, bl); - abh = lp_build_mul_u8n(bld->gallivm, i16_type, ah, bh); + /* PMULLW, PSRLW, PADDW */ + abl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl); + abh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh); - ab = lp_build_pack2(bld->gallivm, i16_type, type, abl, abh); - - return ab; - } + ab = lp_build_pack2(bld->gallivm, wide_type, type, abl, abh); - if(type.width == 16) { - struct lp_type i32_type = lp_wider_type(type); - LLVMValueRef al, ah, bl, bh, abl, abh, ab; - - lp_build_unpack2(bld->gallivm, type, i32_type, a, &al, &ah); - lp_build_unpack2(bld->gallivm, type, i32_type, b, &bl, &bh); - - /* PMULLW, PSRLW, PADDW */ - abl = lp_build_mul_u16n(bld->gallivm, i32_type, al, bl); - abh = lp_build_mul_u16n(bld->gallivm, i32_type, ah, bh); - - ab = lp_build_pack2(bld->gallivm, i32_type, type, abl, abh); - - return ab; - } - - /* FIXME */ - assert(0); + return ab; } if(type.fixed) @@ -988,7 +963,10 @@ lp_build_div(struct lp_build_context *bld, /** - * Linear interpolation -- without any checks. + * Linear interpolation helper. + * + * @param normalized whether we are interpolating normalized values, + * encoded in normalized integers, twice as wide. * * @sa http://www.stereopsis.com/doubleblend.html */ @@ -996,8 +974,10 @@ static INLINE LLVMValueRef lp_build_lerp_simple(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef v0, - LLVMValueRef v1) + LLVMValueRef v1, + bool normalized) { + unsigned half_width = bld->type.width/2; LLVMBuilderRef builder = bld->gallivm->builder; LLVMValueRef delta; LLVMValueRef res; @@ -1010,14 +990,19 @@ lp_build_lerp_simple(struct lp_build_context *bld, res = lp_build_mul(bld, x, delta); + if (normalized) { + if (bld->type.sign) { + res = lp_build_shr_imm(bld, res, half_width - 1); + } else { + res = lp_build_shr_imm(bld, res, half_width); + } + } + res = lp_build_add(bld, v0, res); - if (bld->type.fixed) { - /* XXX: This step is necessary for lerping 8bit colors stored on 16bits, - * but it will be wrong for other uses. Basically we need a more - * powerful lp_type, capable of further distinguishing the values - * interpretation from the value storage. */ - res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(bld->gallivm, bld->type, (1 << bld->type.width/2) - 1), ""); + if (normalized && !bld->type.sign) { + /* We need to mask out the high order bits when lerping 8bit normalized colors stored on 16bits */ + res = LLVMBuildAnd(builder, res, lp_build_const_int_vec(bld->gallivm, bld->type, (1 << half_width) - 1), ""); } return res; @@ -1045,17 +1030,17 @@ lp_build_lerp(struct lp_build_context *bld, struct lp_type wide_type; struct lp_build_context wide_bld; LLVMValueRef xl, xh, v0l, v0h, v1l, v1h, resl, resh; + unsigned bits; LLVMValueRef shift; assert(type.length >= 2); - assert(!type.sign); /* - * Create a wider type, enough to hold the intermediate result of the - * multiplication. + * Create a wider integer type, enough to hold the + * intermediate result of the multiplication. */ memset(&wide_type, 0, sizeof wide_type); - wide_type.fixed = TRUE; + wide_type.sign = type.sign; wide_type.width = type.width*2; wide_type.length = type.length/2; @@ -1069,7 +1054,12 @@ lp_build_lerp(struct lp_build_context *bld, * Scale x from [0, 255] to [0, 256] */ - shift = lp_build_const_int_vec(bld->gallivm, wide_type, type.width - 1); + bits = type.width - 1; + if (type.sign) { + --bits; + } + + shift = lp_build_const_int_vec(bld->gallivm, wide_type, bits - 1); xl = lp_build_add(&wide_bld, xl, LLVMBuildAShr(builder, xl, shift, "")); @@ -1080,12 +1070,12 @@ lp_build_lerp(struct lp_build_context *bld, * Lerp both halves. */ - resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l); - resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h); + resl = lp_build_lerp_simple(&wide_bld, xl, v0l, v1l, TRUE); + resh = lp_build_lerp_simple(&wide_bld, xh, v0h, v1h, TRUE); res = lp_build_pack2(bld->gallivm, wide_type, type, resl, resh); } else { - res = lp_build_lerp_simple(bld, x, v0, v1); + res = lp_build_lerp_simple(bld, x, v0, v1, FALSE); } return res;