gallivm: Fallback to calling util_format_description::fetch_float for any format we can't code LLVM IR directly.
This commit is contained in:
parent
306835cc0f
commit
728741a3bd
|
@ -80,4 +80,15 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
|
|||
LLVMValueRef *rgba);
|
||||
|
||||
|
||||
void
|
||||
lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
|
||||
const struct util_format_description *format_desc,
|
||||
struct lp_type type,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offsets,
|
||||
LLVMValueRef i,
|
||||
LLVMValueRef j,
|
||||
LLVMValueRef *rgba);
|
||||
|
||||
|
||||
#endif /* !LP_BLD_FORMAT_H */
|
||||
|
|
|
@ -27,10 +27,14 @@
|
|||
|
||||
|
||||
#include "util/u_format.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_string.h"
|
||||
|
||||
#include "lp_bld_type.h"
|
||||
#include "lp_bld_const.h"
|
||||
#include "lp_bld_conv.h"
|
||||
#include "lp_bld_sample.h" /* for lp_build_gather */
|
||||
#include "lp_bld_init.h"
|
||||
#include "lp_bld_format.h"
|
||||
|
||||
|
||||
|
@ -240,3 +244,139 @@ lp_build_unpack_rgba_soa(LLVMBuilderRef builder,
|
|||
|
||||
lp_build_format_swizzle_soa(format_desc, type, inputs, rgba);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Fetch a pixel into a SoA.
|
||||
*
|
||||
* i and j are the sub-block pixel coordinates.
|
||||
*/
|
||||
void
|
||||
lp_build_fetch_rgba_soa(LLVMBuilderRef builder,
|
||||
const struct util_format_description *format_desc,
|
||||
struct lp_type type,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offset,
|
||||
LLVMValueRef i,
|
||||
LLVMValueRef j,
|
||||
LLVMValueRef *rgba)
|
||||
{
|
||||
|
||||
if (format_desc->block.width == 1 &&
|
||||
format_desc->block.height == 1 &&
|
||||
format_desc->block.bits <= type.width)
|
||||
{
|
||||
/*
|
||||
* The packed pixel fits into an element of the destination format. Put
|
||||
* the packed pixels into a vector and estract each component for all
|
||||
* vector elements in parallel.
|
||||
*/
|
||||
|
||||
LLVMValueRef packed;
|
||||
|
||||
/*
|
||||
* gather the texels from the texture
|
||||
*/
|
||||
packed = lp_build_gather(builder,
|
||||
type.length,
|
||||
format_desc->block.bits,
|
||||
type.width,
|
||||
base_ptr, offset);
|
||||
|
||||
/*
|
||||
* convert texels to float rgba
|
||||
*/
|
||||
lp_build_unpack_rgba_soa(builder,
|
||||
format_desc,
|
||||
type,
|
||||
packed, rgba);
|
||||
}
|
||||
else {
|
||||
/*
|
||||
* Fallback to calling util_format_description::fetch_float for each
|
||||
* pixel.
|
||||
*
|
||||
* This is definitely not the most efficient way of fetching pixels, as
|
||||
* we miss the opportunity to do vectorization, but this it is a
|
||||
* convenient for formats or scenarios for which there was no opportunity
|
||||
* or incentive to optimize.
|
||||
*/
|
||||
|
||||
LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(LLVMGetInsertBlock(builder)));
|
||||
char name[256];
|
||||
LLVMValueRef function;
|
||||
LLVMValueRef tmp;
|
||||
unsigned k, chan;
|
||||
|
||||
assert(type.floating);
|
||||
|
||||
util_snprintf(name, sizeof name, "util_format_%s_fetch_float", format_desc->short_name);
|
||||
|
||||
/*
|
||||
* Declare and bind format_desc->fetch_float().
|
||||
*/
|
||||
|
||||
function = LLVMGetNamedFunction(module, name);
|
||||
if (!function) {
|
||||
LLVMTypeRef ret_type;
|
||||
LLVMTypeRef arg_types[4];
|
||||
LLVMTypeRef function_type;
|
||||
|
||||
ret_type = LLVMVoidType();
|
||||
arg_types[0] = LLVMPointerType(LLVMFloatType(), 0);
|
||||
arg_types[1] = LLVMPointerType(LLVMInt8Type(), 0);
|
||||
arg_types[3] = arg_types[2] = LLVMIntType(sizeof(unsigned) * 8);
|
||||
function_type = LLVMFunctionType(ret_type, arg_types, Elements(arg_types), 0);
|
||||
function = LLVMAddFunction(module, name, function_type);
|
||||
|
||||
LLVMSetFunctionCallConv(function, LLVMCCallConv);
|
||||
LLVMSetLinkage(function, LLVMExternalLinkage);
|
||||
|
||||
assert(LLVMIsDeclaration(function));
|
||||
|
||||
LLVMAddGlobalMapping(lp_build_engine, function, format_desc->fetch_float);
|
||||
}
|
||||
|
||||
for (chan = 0; chan < 4; ++chan) {
|
||||
rgba[chan] = lp_build_undef(type);
|
||||
}
|
||||
|
||||
tmp = LLVMBuildArrayAlloca(builder,
|
||||
LLVMFloatType(),
|
||||
LLVMConstInt(LLVMInt32Type(), 4, 0),
|
||||
"");
|
||||
|
||||
/*
|
||||
* Invoke format_desc->fetch_float() for each pixel and insert the result
|
||||
* in the SoA vectors.
|
||||
*/
|
||||
|
||||
for(k = 0; k < type.length; ++k) {
|
||||
LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), k, 0);
|
||||
LLVMValueRef offset_elem;
|
||||
LLVMValueRef ptr;
|
||||
LLVMValueRef i_elem, j_elem;
|
||||
LLVMValueRef args[4];
|
||||
|
||||
offset_elem = LLVMBuildExtractElement(builder, offset, index, "");
|
||||
ptr = LLVMBuildGEP(builder, base_ptr, &offset_elem, 1, "");
|
||||
|
||||
i_elem = LLVMBuildExtractElement(builder, i, index, "");
|
||||
j_elem = LLVMBuildExtractElement(builder, j, index, "");
|
||||
|
||||
args[0] = tmp;
|
||||
args[1] = ptr;
|
||||
args[2] = i_elem;
|
||||
args[3] = j_elem;
|
||||
|
||||
LLVMBuildCall(builder, function, args, 4, "");
|
||||
|
||||
for (chan = 0; chan < 4; ++chan) {
|
||||
LLVMValueRef chan_val = LLVMConstInt(LLVMInt32Type(), chan, 0),
|
||||
tmp_chan = LLVMBuildGEP(builder, tmp, &chan_val, 1, "");
|
||||
tmp_chan = LLVMBuildLoad(builder, tmp_chan, "");
|
||||
rgba[chan] = LLVMBuildInsertElement(builder, rgba[chan], tmp_chan, index, "");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -157,9 +157,10 @@ lp_build_gather(LLVMBuilderRef builder,
|
|||
|
||||
|
||||
/**
|
||||
* Compute the offset of a pixel.
|
||||
* Compute the offset of a pixel block.
|
||||
*
|
||||
* x, y, z, y_stride, z_stride are vectors
|
||||
* x, y, z, y_stride, z_stride are vectors, and they refer to pixel blocks, as
|
||||
* per format description, and not individual pixels.
|
||||
*/
|
||||
LLVMValueRef
|
||||
lp_build_sample_offset(struct lp_build_context *bld,
|
||||
|
|
|
@ -211,7 +211,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
|
|||
const int dims = texture_dims(bld->static_state->target);
|
||||
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
|
||||
LLVMValueRef offset;
|
||||
LLVMValueRef packed;
|
||||
LLVMValueRef i, j;
|
||||
LLVMValueRef use_border = NULL;
|
||||
|
||||
/* use_border = x < 0 || x >= width || y < 0 || y >= height */
|
||||
|
@ -248,6 +248,43 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Describe the coordinates in terms of pixel blocks.
|
||||
*
|
||||
* TODO: pixel blocks are power of two. LLVM should convert rem/div to
|
||||
* bit arithmetic. Verify this.
|
||||
*/
|
||||
|
||||
if (bld->format_desc->block.width == 1) {
|
||||
i = bld->uint_coord_bld.zero;
|
||||
}
|
||||
else {
|
||||
LLVMValueRef block_width = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.width);
|
||||
i = LLVMBuildURem(bld->builder, x, block_width, "");
|
||||
x = LLVMBuildUDiv(bld->builder, x, block_width, "");
|
||||
}
|
||||
|
||||
if (bld->format_desc->block.height == 1) {
|
||||
j = bld->uint_coord_bld.zero;
|
||||
}
|
||||
else {
|
||||
LLVMValueRef block_height = lp_build_const_int_vec(bld->uint_coord_bld.type, bld->format_desc->block.height);
|
||||
j = LLVMBuildURem(bld->builder, y, block_height, "");
|
||||
y = LLVMBuildUDiv(bld->builder, y, block_height, "");
|
||||
}
|
||||
|
||||
/* convert x,y,z coords to linear offset from start of texture, in bytes */
|
||||
offset = lp_build_sample_offset(&bld->uint_coord_bld,
|
||||
bld->format_desc,
|
||||
x, y, z, y_stride, z_stride);
|
||||
|
||||
lp_build_fetch_rgba_soa(bld->builder,
|
||||
bld->format_desc,
|
||||
bld->texel_type,
|
||||
data_ptr, offset,
|
||||
i, j,
|
||||
texel);
|
||||
|
||||
/*
|
||||
* Note: if we find an app which frequently samples the texture border
|
||||
* we might want to implement a true conditional here to avoid sampling
|
||||
|
@ -263,30 +300,6 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
|
|||
* the texel color results with the border color.
|
||||
*/
|
||||
|
||||
/* convert x,y,z coords to linear offset from start of texture, in bytes */
|
||||
offset = lp_build_sample_offset(&bld->uint_coord_bld,
|
||||
bld->format_desc,
|
||||
x, y, z, y_stride, z_stride);
|
||||
|
||||
assert(bld->format_desc->block.width == 1);
|
||||
assert(bld->format_desc->block.height == 1);
|
||||
assert(bld->format_desc->block.bits <= bld->texel_type.width);
|
||||
|
||||
/* gather the texels from the texture */
|
||||
packed = lp_build_gather(bld->builder,
|
||||
bld->texel_type.length,
|
||||
bld->format_desc->block.bits,
|
||||
bld->texel_type.width,
|
||||
data_ptr, offset);
|
||||
|
||||
texel[0] = texel[1] = texel[2] = texel[3] = NULL;
|
||||
|
||||
/* convert texels to float rgba */
|
||||
lp_build_unpack_rgba_soa(bld->builder,
|
||||
bld->format_desc,
|
||||
bld->texel_type,
|
||||
packed, texel);
|
||||
|
||||
if (use_border) {
|
||||
/* select texel color or border color depending on use_border */
|
||||
int chan;
|
||||
|
|
Loading…
Reference in New Issue