gallivm: Add code for rgb9e5 shared exponent format to float conversion

And use this (and the code for r11g11b10 packed float to float conversion)
in the soa texturing code (the generated code looks quite good).
Should be an order of magnitude faster probably than using the fallback
(not measured).
Tested with piglit texwrap GL_EXT_packed_float and
GL_EXT_texture_shared_exponent respectively (didn't find much else using
it).

Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
This commit is contained in:
Roland Scheidegger 2013-03-23 02:05:54 +01:00
parent 3e10ab6b22
commit b50e362dbb
3 changed files with 118 additions and 3 deletions

View File

@ -397,6 +397,92 @@ lp_build_r11g11b10_to_float(struct gallivm_state *gallivm,
}
static LLVMValueRef
lp_build_rgb9_to_float_helper(struct gallivm_state *gallivm,
struct lp_type f32_type,
LLVMValueRef src,
LLVMValueRef scale,
unsigned mantissa_start)
{
LLVMValueRef shift, mask;
struct lp_type i32_type = lp_type_int_vec(32, 32 * f32_type.length);
struct lp_build_context i32_bld, f32_bld;
lp_build_context_init(&i32_bld, gallivm, i32_type);
lp_build_context_init(&f32_bld, gallivm, f32_type);
/*
* This is much easier as other weirdo float formats, since
* there's no sign, no Inf/NaN, and there's nothing special
* required for normals/denormals neither (as without the implied one
* for the mantissa for other formats, everything looks like a denormal).
* So just do (float)comp_bits * scale
*/
shift = lp_build_const_int_vec(gallivm, i32_type, mantissa_start);
mask = lp_build_const_int_vec(gallivm, i32_type, 0x1ff);
src = lp_build_shr(&i32_bld, src, shift);
src = lp_build_and(&i32_bld, src, mask);
src = lp_build_int_to_float(&f32_bld, src);
return lp_build_mul(&f32_bld, src, scale);
}
/**
* Convert shared exponent format (rgb9e5) value(s) to rgba float SoA values.
*
* @param src packed AoS rgb9e5 values (as (vector) int32)
* @param dst pointer to the SoA result values
*/
void
lp_build_rgb9e5_to_float(struct gallivm_state *gallivm,
LLVMValueRef src,
LLVMValueRef *dst)
{
LLVMBuilderRef builder = gallivm->builder;
LLVMTypeRef src_type = LLVMTypeOf(src);
LLVMValueRef shift, scale, bias, exp;
unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
LLVMGetVectorSize(src_type) : 1;
struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
struct lp_type u32_type = lp_type_uint_vec(32, 32 * src_length);
struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
struct lp_build_context i32_bld, u32_bld, f32_bld;
lp_build_context_init(&i32_bld, gallivm, i32_type);
lp_build_context_init(&u32_bld, gallivm, u32_type);
lp_build_context_init(&f32_bld, gallivm, f32_type);
/* extract exponent */
shift = lp_build_const_int_vec(gallivm, i32_type, 27);
/* this shift needs to be unsigned otherwise need mask */
exp = lp_build_shr(&u32_bld, src, shift);
/*
* scale factor is 2 ^ (exp - bias)
* (and additionally corrected here for the mantissa bits)
* not using shift because
* a) don't have vector shift in a lot of cases
* b) shift direction changes hence need 2 shifts + conditional
* (or rotate instruction which is even more rare (for instance XOP))
* so use whacky float 2 ^ function instead manipulating exponent
* (saves us the float conversion at the end too)
*/
bias = lp_build_const_int_vec(gallivm, i32_type, 127 - (15 + 9));
scale = lp_build_add(&i32_bld, exp, bias);
shift = lp_build_const_int_vec(gallivm, i32_type, 23);
scale = lp_build_shl(&i32_bld, scale, shift);
scale = LLVMBuildBitCast(builder, scale, f32_bld.vec_type, "");
dst[0] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 0);
dst[1] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 9);
dst[2] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 18);
/* Just set alpha to one */
dst[3] = f32_bld.one;
}
/**
* Converts int16 half-float to float32
* Note this can be performed in 1 instruction if vcvtph2ps exists (sse5 i think?)

View File

@ -70,6 +70,11 @@ lp_build_r11g11b10_to_float(struct gallivm_state *gallivm,
LLVMValueRef src,
LLVMValueRef *dst);
void
lp_build_rgb9e5_to_float(struct gallivm_state *gallivm,
LLVMValueRef src,
LLVMValueRef *dst);
LLVMValueRef
lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm,
struct lp_type src_type,

View File

@ -310,9 +310,10 @@ lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
* \param type the desired return type for 'rgba'. The vector length
* is the number of texels to fetch
*
* \param base_ptr points to start of the texture image block. For non-
* compressed formats, this simply points to the texel.
* For compressed formats, it points to the start of the
* \param base_ptr points to the base of the texture mip tree.
* \param offset offset to start of the texture image block. For non-
* compressed formats, this simply is an offset to the texel.
* For compressed formats, it is an offset to the start of the
* compressed data block.
*
* \param i, j the sub-block pixel coordinates. For non-compressed formats
@ -368,6 +369,29 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
return;
}
if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
/*
* similar conceptually to above but requiring special
* AoS packed -> SoA float conversion code.
*/
LLVMValueRef packed;
assert(type.floating);
assert(type.width == 32);
packed = lp_build_gather(gallivm, type.length,
format_desc->block.bits,
type.width, base_ptr, offset);
if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
}
else {
lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
}
return;
}
/*
* Try calling lp_build_fetch_rgba_aos for all pixels.
*/