gallivm: Compute the 4 texel offsets for linear filtering en ensemble.

This commit is contained in:
José Fonseca 2010-06-07 12:05:18 +01:00
parent ccd8b935e4
commit 0a6c908e0d
3 changed files with 277 additions and 123 deletions

View File

@ -123,6 +123,52 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
}
/**
* Compute the partial offset of a pixel block along an arbitrary axis.
*
* @param coord coordinate in pixels
* @param stride number of bytes between rows of successive pixel blocks
* @param block_length number of pixels in a pixels block along the coordinate
* axis
* @param out_offset resulting relative offset of the pixel block in bytes
* @param out_subcoord resulting sub-block pixel coordinate
*/
void
lp_build_sample_partial_offset(struct lp_build_context *bld,
unsigned block_length,
LLVMValueRef coord,
LLVMValueRef stride,
LLVMValueRef *out_offset,
LLVMValueRef *out_subcoord)
{
LLVMValueRef offset;
LLVMValueRef subcoord;
if (block_length == 1) {
subcoord = bld->zero;
}
else {
/*
* Pixel blocks have power of two dimensions. LLVM should convert the
* rem/div to bit arithmetic.
* TODO: Verify this.
*/
LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
subcoord = LLVMBuildURem(bld->builder, coord, block_width, "");
coord = LLVMBuildUDiv(bld->builder, coord, block_width, "");
}
offset = lp_build_mul(bld, coord, stride);
assert(out_offset);
assert(out_subcoord);
*out_offset = offset;
*out_subcoord = subcoord;
}
/**
* Compute the offset of a pixel block.
*
@ -144,48 +190,35 @@ lp_build_sample_offset(struct lp_build_context *bld,
{
LLVMValueRef x_stride;
LLVMValueRef offset;
LLVMValueRef i;
LLVMValueRef j;
/*
* Describe the coordinates in terms of pixel blocks.
*
* TODO: pixel blocks are power of two. LLVM should convert rem/div to
* bit arithmetic. Verify this.
*/
if (format_desc->block.width == 1) {
i = bld->zero;
}
else {
LLVMValueRef block_width = lp_build_const_int_vec(bld->type, format_desc->block.width);
i = LLVMBuildURem(bld->builder, x, block_width, "");
x = LLVMBuildUDiv(bld->builder, x, block_width, "");
}
if (format_desc->block.height == 1) {
j = bld->zero;
}
else {
LLVMValueRef block_height = lp_build_const_int_vec(bld->type, format_desc->block.height);
j = LLVMBuildURem(bld->builder, y, block_height, "");
y = LLVMBuildUDiv(bld->builder, y, block_height, "");
}
x_stride = lp_build_const_vec(bld->type, format_desc->block.bits/8);
offset = lp_build_mul(bld, x, x_stride);
lp_build_sample_partial_offset(bld,
format_desc->block.width,
x, x_stride,
&offset, out_i);
if (y && y_stride) {
LLVMValueRef y_offset = lp_build_mul(bld, y, y_stride);
LLVMValueRef y_offset;
lp_build_sample_partial_offset(bld,
format_desc->block.height,
y, y_stride,
&y_offset, out_j);
offset = lp_build_add(bld, offset, y_offset);
}
else {
*out_j = bld->zero;
}
if (z && z_stride) {
LLVMValueRef z_offset = lp_build_mul(bld, z, z_stride);
LLVMValueRef z_offset;
LLVMValueRef k;
lp_build_sample_partial_offset(bld,
1, /* pixel blocks are always 2D */
z, z_stride,
&z_offset, &k);
offset = lp_build_add(bld, offset, z_offset);
}
*out_offset = offset;
*out_i = i;
*out_j = j;
}

View File

@ -148,6 +148,15 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
const struct pipe_sampler_state *sampler);
void
lp_build_sample_partial_offset(struct lp_build_context *bld,
unsigned block_length,
LLVMValueRef coord,
LLVMValueRef stride,
LLVMValueRef *out_offset,
LLVMValueRef *out_i);
void
lp_build_sample_offset(struct lp_build_context *bld,
const struct util_format_description *format_desc,

View File

@ -322,59 +322,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
}
/**
* Fetch the texels as <4n x i8> in AoS form.
*/
static LLVMValueRef
lp_build_sample_packed(struct lp_build_sample_context *bld,
LLVMValueRef x,
LLVMValueRef y,
LLVMValueRef y_stride,
LLVMValueRef data_array)
{
LLVMValueRef offset, i, j;
LLVMValueRef data_ptr;
LLVMValueRef res;
/* convert x,y,z coords to linear offset from start of texture, in bytes */
lp_build_sample_offset(&bld->uint_coord_bld,
bld->format_desc,
x, y, NULL, y_stride, NULL,
&offset, &i, &j);
/* get pointer to mipmap level 0 data */
data_ptr = lp_build_get_const_mipmap_level(bld, data_array, 0);
if (util_format_is_rgba8_variant(bld->format_desc)) {
/* Just fetch the data directly without swizzling */
assert(bld->format_desc->block.width == 1);
assert(bld->format_desc->block.height == 1);
assert(bld->format_desc->block.bits <= bld->texel_type.width);
res = lp_build_gather(bld->builder,
bld->texel_type.length,
bld->format_desc->block.bits,
bld->texel_type.width,
data_ptr, offset);
}
else {
struct lp_type type;
assert(bld->texel_type.width == 32);
memset(&type, 0, sizeof type);
type.width = 8;
type.length = bld->texel_type.length*4;
type.norm = TRUE;
res = lp_build_fetch_rgba_aos(bld->builder, bld->format_desc, type,
data_ptr, offset, i, j);
}
return res;
}
/**
* Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
*/
@ -409,7 +356,7 @@ lp_build_coord_mirror(struct lp_build_sample_context *bld,
/**
* We only support a few wrap modes in lp_build_sample_wrap_int() at this time.
* We only support a few wrap modes in lp_build_sample_wrap_linear_int() at this time.
* Return whether the given mode is supported by that function.
*/
static boolean
@ -431,13 +378,18 @@ is_simple_wrap_mode(unsigned mode)
* \param length the texture size along one dimension
* \param is_pot if TRUE, length is a power of two
* \param wrap_mode one of PIPE_TEX_WRAP_x
* \param i0 resulting sub-block pixel coordinate for coord0
*/
static LLVMValueRef
lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
LLVMValueRef coord,
LLVMValueRef length,
boolean is_pot,
unsigned wrap_mode)
static void
lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld,
unsigned block_length,
LLVMValueRef coord,
LLVMValueRef length,
LLVMValueRef stride,
boolean is_pot,
unsigned wrap_mode,
LLVMValueRef *out_offset,
LLVMValueRef *out_i)
{
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
@ -470,7 +422,134 @@ lp_build_sample_wrap_int(struct lp_build_sample_context *bld,
assert(0);
}
return coord;
lp_build_sample_partial_offset(uint_coord_bld, block_length, coord, stride,
out_offset, out_i);
}
/**
* Build LLVM code for texture wrap mode, for scaled integer texcoords.
* \param coord0 the incoming texcoord (s,t,r or q) scaled to the texture size
* \param length the texture size along one dimension
* \param stride pixel stride along the coordinate axis
* \param block_length is the length of the pixel block along the
* coordinate axis
* \param is_pot if TRUE, length is a power of two
* \param wrap_mode one of PIPE_TEX_WRAP_x
* \param offset0 resulting relative offset for coord0
* \param offset1 resulting relative offset for coord0 + 1
* \param i0 resulting sub-block pixel coordinate for coord0
* \param i1 resulting sub-block pixel coordinate for coord0 + 1
*/
static void
lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld,
unsigned block_length,
LLVMValueRef coord0,
LLVMValueRef length,
LLVMValueRef stride,
boolean is_pot,
unsigned wrap_mode,
LLVMValueRef *offset0,
LLVMValueRef *offset1,
LLVMValueRef *i0,
LLVMValueRef *i1)
{
struct lp_build_context *uint_coord_bld = &bld->uint_coord_bld;
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
LLVMValueRef length_minus_one;
LLVMValueRef lmask, umask, mask;
if (block_length != 1) {
/*
* If the pixel block covers more than one pixel then there is no easy
* way to calculate offset1 relative to offset0. Instead, compute them
* independently.
*/
LLVMValueRef coord1;
lp_build_sample_wrap_nearest_int(bld,
block_length,
coord0,
length,
stride,
is_pot,
wrap_mode,
offset0, i0);
coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
lp_build_sample_wrap_nearest_int(bld,
block_length,
coord1,
length,
stride,
is_pot,
wrap_mode,
offset1, i1);
return;
}
/*
* Scalar pixels -- try to compute offset0 and offset1 with a single stride
* multiplication.
*/
*i0 = uint_coord_bld->zero;
*i1 = uint_coord_bld->zero;
length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
switch(wrap_mode) {
case PIPE_TEX_WRAP_REPEAT:
if (is_pot) {
coord0 = LLVMBuildAnd(bld->builder, coord0, length_minus_one, "");
}
else {
/* Signed remainder won't give the right results for negative
* dividends but unsigned remainder does.*/
coord0 = LLVMBuildURem(bld->builder, coord0, length, "");
}
mask = lp_build_compare(bld->builder, int_coord_bld->type,
PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
*offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
*offset1 = LLVMBuildAnd(bld->builder,
lp_build_add(uint_coord_bld, *offset0, stride),
mask, "");
break;
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
lmask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero);
umask = lp_build_compare(int_coord_bld->builder, int_coord_bld->type,
PIPE_FUNC_LESS, coord0, length_minus_one);
coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero);
coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one);
mask = LLVMBuildAnd(bld->builder, lmask, umask, "");
*offset0 = lp_build_mul(uint_coord_bld, coord0, stride);
*offset1 = lp_build_add(uint_coord_bld,
*offset0,
LLVMBuildAnd(bld->builder, stride, mask, ""));
break;
case PIPE_TEX_WRAP_CLAMP:
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
case PIPE_TEX_WRAP_MIRROR_REPEAT:
case PIPE_TEX_WRAP_MIRROR_CLAMP:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
default:
assert(0);
*offset0 = uint_coord_bld->zero;
*offset1 = uint_coord_bld->zero;
break;
}
}
@ -1741,14 +1820,18 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef i32_c8, i32_c128, i32_c255;
LLVMValueRef s_ipart, s_fpart, s_fpart_lo, s_fpart_hi;
LLVMValueRef t_ipart, t_fpart, t_fpart_lo, t_fpart_hi;
LLVMValueRef x0, x1;
LLVMValueRef y0, y1;
LLVMValueRef neighbors[2][2];
LLVMValueRef data_ptr;
LLVMValueRef x_stride, y_stride;
LLVMValueRef x_offset0, x_offset1;
LLVMValueRef y_offset0, y_offset1;
LLVMValueRef offset[2][2];
LLVMValueRef x_subcoord[2], y_subcoord[2];
LLVMValueRef neighbors_lo[2][2];
LLVMValueRef neighbors_hi[2][2];
LLVMValueRef packed, packed_lo, packed_hi;
LLVMValueRef unswizzled[4];
LLVMValueRef stride;
const unsigned level = 0;
unsigned i, j;
assert(bld->static_state->target == PIPE_TEXTURE_2D
|| bld->static_state->target == PIPE_TEXTURE_RECT);
@ -1795,21 +1878,30 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
s_fpart = LLVMBuildAnd(builder, s, i32_c255, "");
t_fpart = LLVMBuildAnd(builder, t, i32_c255, "");
x0 = s_ipart;
y0 = t_ipart;
x_stride = lp_build_const_vec(bld->uint_coord_bld.type,
bld->format_desc->block.bits/8);
x1 = lp_build_add(&bld->int_coord_bld, x0, bld->int_coord_bld.one);
y1 = lp_build_add(&bld->int_coord_bld, y0, bld->int_coord_bld.one);
y_stride = lp_build_get_const_level_stride_vec(bld, stride_array, level);
x0 = lp_build_sample_wrap_int(bld, x0, width, bld->static_state->pot_width,
bld->static_state->wrap_s);
y0 = lp_build_sample_wrap_int(bld, y0, height, bld->static_state->pot_height,
bld->static_state->wrap_t);
lp_build_sample_wrap_linear_int(bld,
bld->format_desc->block.width,
s_ipart, width, x_stride,
bld->static_state->pot_width,
bld->static_state->wrap_s,
&x_offset0, &x_offset1,
&x_subcoord[0], &x_subcoord[1]);
lp_build_sample_wrap_linear_int(bld,
bld->format_desc->block.height,
t_ipart, height, y_stride,
bld->static_state->pot_height,
bld->static_state->wrap_t,
&y_offset0, &y_offset1,
&y_subcoord[0], &y_subcoord[1]);
x1 = lp_build_sample_wrap_int(bld, x1, width, bld->static_state->pot_width,
bld->static_state->wrap_s);
y1 = lp_build_sample_wrap_int(bld, y1, height, bld->static_state->pot_height,
bld->static_state->wrap_t);
offset[0][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset0);
offset[0][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset0);
offset[1][0] = lp_build_add(&bld->uint_coord_bld, x_offset0, y_offset1);
offset[1][1] = lp_build_add(&bld->uint_coord_bld, x_offset1, y_offset1);
/*
* Transform 4 x i32 in
@ -1838,7 +1930,6 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
LLVMValueRef shuffles_hi[LP_MAX_VECTOR_LENGTH];
LLVMValueRef shuffle_lo;
LLVMValueRef shuffle_hi;
unsigned i, j;
for(j = 0; j < h16.type.length; j += 4) {
#ifdef PIPE_ARCH_LITTLE_ENDIAN
@ -1866,7 +1957,10 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
t_fpart_hi = LLVMBuildShuffleVector(builder, t_fpart, h16.undef, shuffle_hi, "");
}
stride = lp_build_get_const_level_stride_vec(bld, stride_array, 0);
/*
* get pointer to mipmap level 0 data
*/
data_ptr = lp_build_get_const_mipmap_level(bld, data_array, level);
/*
* Fetch the pixels as 4 x 32bit (rgba order might differ):
@ -1885,20 +1979,38 @@ lp_build_sample_2d_linear_aos(struct lp_build_sample_context *bld,
* The higher 8 bits of the resulting elements will be zero.
*/
neighbors[0][0] = lp_build_sample_packed(bld, x0, y0, stride, data_array);
neighbors[0][1] = lp_build_sample_packed(bld, x1, y0, stride, data_array);
neighbors[1][0] = lp_build_sample_packed(bld, x0, y1, stride, data_array);
neighbors[1][1] = lp_build_sample_packed(bld, x1, y1, stride, data_array);
for (j = 0; j < 2; ++j) {
for (i = 0; i < 2; ++i) {
LLVMValueRef rgba8;
neighbors[0][0] = LLVMBuildBitCast(builder, neighbors[0][0], u8n_vec_type, "");
neighbors[0][1] = LLVMBuildBitCast(builder, neighbors[0][1], u8n_vec_type, "");
neighbors[1][0] = LLVMBuildBitCast(builder, neighbors[1][0], u8n_vec_type, "");
neighbors[1][1] = LLVMBuildBitCast(builder, neighbors[1][1], u8n_vec_type, "");
if (util_format_is_rgba8_variant(bld->format_desc)) {
/*
* Given the format is a rgba8, just read the pixels as is,
* without any swizzling. Swizzling will be done later.
*/
rgba8 = lp_build_gather(bld->builder,
bld->texel_type.length,
bld->format_desc->block.bits,
bld->texel_type.width,
data_ptr, offset[j][i]);
lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][0], &neighbors_lo[0][0], &neighbors_hi[0][0]);
lp_build_unpack2(builder, u8n.type, h16.type, neighbors[0][1], &neighbors_lo[0][1], &neighbors_hi[0][1]);
lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][0], &neighbors_lo[1][0], &neighbors_hi[1][0]);
lp_build_unpack2(builder, u8n.type, h16.type, neighbors[1][1], &neighbors_lo[1][1], &neighbors_hi[1][1]);
rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, "");
}
else {
rgba8 = lp_build_fetch_rgba_aos(bld->builder,
bld->format_desc,
u8n.type,
data_ptr, offset[j][i],
x_subcoord[i],
y_subcoord[j]);
}
lp_build_unpack2(builder, u8n.type, h16.type,
rgba8,
&neighbors_lo[j][i], &neighbors_hi[j][i]);
}
}
/*
* Linear interpolate with 8.8 fixed point.