gallivm: add image load/store/atomic support
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
This commit is contained in:
parent
15f7688ac9
commit
ceb8d0ac5a
|
@ -151,6 +151,16 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
|
|||
LLVMValueRef cache,
|
||||
LLVMValueRef rgba_out[4]);
|
||||
|
||||
void
|
||||
lp_build_store_rgba_soa(struct gallivm_state *gallivm,
|
||||
const struct util_format_description *format_desc,
|
||||
struct lp_type type,
|
||||
LLVMValueRef exec_mask,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offset,
|
||||
LLVMValueRef out_of_bounds,
|
||||
const LLVMValueRef rgba_in[4]);
|
||||
|
||||
/*
|
||||
* YUV
|
||||
*/
|
||||
|
|
|
@ -42,7 +42,9 @@
|
|||
#include "lp_bld_format.h"
|
||||
#include "lp_bld_arit.h"
|
||||
#include "lp_bld_pack.h"
|
||||
|
||||
#include "lp_bld_flow.h"
|
||||
#include "lp_bld_printf.h"
|
||||
#include "lp_bld_intr.h"
|
||||
|
||||
static void
|
||||
convert_to_soa(struct gallivm_state *gallivm,
|
||||
|
@ -858,3 +860,230 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
|
|||
convert_to_soa(gallivm, aos_fetch, rgba_out, type);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lp_build_insert_soa_chan(struct lp_build_context *bld,
|
||||
unsigned blockbits,
|
||||
struct util_format_channel_description chan_desc,
|
||||
LLVMValueRef *output,
|
||||
LLVMValueRef rgba)
|
||||
{
|
||||
struct gallivm_state *gallivm = bld->gallivm;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
struct lp_type type = bld->type;
|
||||
const unsigned width = chan_desc.size;
|
||||
const unsigned start = chan_desc.shift;
|
||||
const unsigned stop = start + width;
|
||||
LLVMValueRef chan;
|
||||
switch(chan_desc.type) {
|
||||
case UTIL_FORMAT_TYPE_UNSIGNED:
|
||||
|
||||
if (chan_desc.pure_integer)
|
||||
chan = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, "");
|
||||
else if (type.floating) {
|
||||
if (chan_desc.normalized)
|
||||
chan = lp_build_clamped_float_to_unsigned_norm(gallivm, type, width, rgba);
|
||||
else
|
||||
chan = LLVMBuildFPToSI(builder, rgba, bld->vec_type, "");
|
||||
}
|
||||
if (start)
|
||||
chan = LLVMBuildShl(builder, chan,
|
||||
lp_build_const_int_vec(gallivm, type, start), "");
|
||||
if (!*output)
|
||||
*output = chan;
|
||||
else
|
||||
*output = LLVMBuildOr(builder, *output, chan, "");
|
||||
break;
|
||||
case UTIL_FORMAT_TYPE_SIGNED:
|
||||
if (chan_desc.pure_integer)
|
||||
chan = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, "");
|
||||
else if (type.floating) {
|
||||
uint32_t mask_val = (1UL << chan_desc.size) - 1;
|
||||
if (chan_desc.normalized) {
|
||||
char intrin[32];
|
||||
double scale = ((1 << (chan_desc.size - 1)) - 1);
|
||||
LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale);
|
||||
rgba = lp_build_clamp(bld, rgba, lp_build_negate(bld, bld->one), bld->one);
|
||||
rgba = LLVMBuildFMul(builder, rgba, scale_val, "");
|
||||
lp_format_intrinsic(intrin, sizeof intrin, "llvm.rint", bld->vec_type);
|
||||
rgba = lp_build_intrinsic_unary(builder, intrin, bld->vec_type, rgba);
|
||||
}
|
||||
chan = LLVMBuildFPToSI(builder, rgba, bld->int_vec_type, "");
|
||||
chan = LLVMBuildAnd(builder, chan, lp_build_const_int_vec(gallivm, type, mask_val), "");
|
||||
}
|
||||
if (start)
|
||||
chan = LLVMBuildShl(builder, chan,
|
||||
lp_build_const_int_vec(gallivm, type, start), "");
|
||||
if (!*output)
|
||||
*output = chan;
|
||||
else
|
||||
*output = LLVMBuildOr(builder, *output, chan, "");
|
||||
break;
|
||||
case UTIL_FORMAT_TYPE_FLOAT:
|
||||
if (type.floating) {
|
||||
if (chan_desc.size == 16) {
|
||||
chan = lp_build_float_to_half(gallivm, rgba);
|
||||
chan = LLVMBuildZExt(builder, chan, bld->int_vec_type, "");
|
||||
if (start)
|
||||
chan = LLVMBuildShl(builder, chan,
|
||||
lp_build_const_int_vec(gallivm, type, start), "");
|
||||
if (!*output)
|
||||
*output = chan;
|
||||
else
|
||||
*output = LLVMBuildOr(builder, *output, chan, "");
|
||||
} else {
|
||||
assert(start == 0);
|
||||
assert(stop == 32);
|
||||
assert(type.width == 32);
|
||||
*output = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, "");
|
||||
}
|
||||
} else
|
||||
assert(0);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
*output = bld->undef;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lp_build_pack_rgba_soa(struct gallivm_state *gallivm,
|
||||
const struct util_format_description *format_desc,
|
||||
struct lp_type type,
|
||||
const LLVMValueRef rgba_in[4],
|
||||
LLVMValueRef *packed)
|
||||
{
|
||||
unsigned chan;
|
||||
struct lp_build_context bld;
|
||||
assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN);
|
||||
assert(format_desc->block.width == 1);
|
||||
assert(format_desc->block.height == 1);
|
||||
assert(format_desc->block.bits <= type.width);
|
||||
/* FIXME: Support more output types */
|
||||
assert(type.width == 32);
|
||||
|
||||
lp_build_context_init(&bld, gallivm, type);
|
||||
for (chan = 0; chan < format_desc->nr_channels; ++chan) {
|
||||
struct util_format_channel_description chan_desc = format_desc->channel[chan];
|
||||
|
||||
lp_build_insert_soa_chan(&bld, format_desc->block.bits,
|
||||
chan_desc,
|
||||
packed,
|
||||
rgba_in[chan]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
lp_build_store_rgba_soa(struct gallivm_state *gallivm,
|
||||
const struct util_format_description *format_desc,
|
||||
struct lp_type type,
|
||||
LLVMValueRef exec_mask,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offset,
|
||||
LLVMValueRef out_of_bounds,
|
||||
const LLVMValueRef rgba_in[4])
|
||||
{
|
||||
enum pipe_format format = format_desc->format;
|
||||
LLVMValueRef packed[4] = {};
|
||||
unsigned num_stores;
|
||||
|
||||
if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
|
||||
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
|
||||
format_desc->block.width == 1 &&
|
||||
format_desc->block.height == 1 &&
|
||||
format_desc->block.bits <= type.width &&
|
||||
(format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT ||
|
||||
format_desc->channel[0].size == 32 ||
|
||||
format_desc->channel[0].size == 16))
|
||||
{
|
||||
lp_build_pack_rgba_soa(gallivm, format_desc, type, rgba_in, &packed[0]);
|
||||
|
||||
num_stores = 1;
|
||||
} else if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN &&
|
||||
(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) &&
|
||||
format_desc->block.width == 1 &&
|
||||
format_desc->block.height == 1 &&
|
||||
format_desc->block.bits > type.width &&
|
||||
((format_desc->block.bits <= type.width * type.length &&
|
||||
format_desc->channel[0].size <= type.width) ||
|
||||
(format_desc->channel[0].size == 64 &&
|
||||
format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&
|
||||
type.floating)))
|
||||
{
|
||||
/*
|
||||
* Similar to above, but the packed pixel is larger than what fits
|
||||
* into an element of the destination format. The packed pixels will be
|
||||
* shuffled into SoA vectors appropriately, and then the extraction will
|
||||
* be done in parallel as much as possible.
|
||||
* Good for 16xn (n > 2) and 32xn (n > 1) formats, care is taken so
|
||||
* the gathered vectors can be shuffled easily (even with avx).
|
||||
* 64xn float -> 32xn float is handled too but it's a bit special as
|
||||
* it does the conversion pre-shuffle.
|
||||
*/
|
||||
struct lp_build_context bld;
|
||||
|
||||
lp_build_context_init(&bld, gallivm, type);
|
||||
assert(type.width == 32);
|
||||
assert(format_desc->block.bits > type.width);
|
||||
|
||||
unsigned store_width = util_next_power_of_two(format_desc->block.bits);
|
||||
num_stores = store_width / type.width;
|
||||
for (unsigned i = 0; i < format_desc->nr_channels; i++) {
|
||||
struct util_format_channel_description chan_desc = format_desc->channel[i];
|
||||
unsigned blockbits = type.width;
|
||||
unsigned vec_nr;
|
||||
|
||||
vec_nr = chan_desc.shift / type.width;
|
||||
chan_desc.shift %= type.width;
|
||||
|
||||
lp_build_insert_soa_chan(&bld, blockbits,
|
||||
chan_desc,
|
||||
&packed[vec_nr],
|
||||
rgba_in[i]);
|
||||
}
|
||||
|
||||
assert(num_stores == 4 || num_stores == 2);
|
||||
/* we can transpose and store at the same time */
|
||||
} else if (format == PIPE_FORMAT_R11G11B10_FLOAT) {
|
||||
packed[0] = lp_build_float_to_r11g11b10(gallivm, rgba_in);
|
||||
num_stores = 1;
|
||||
} else
|
||||
assert(0);
|
||||
|
||||
assert(exec_mask);
|
||||
|
||||
LLVMTypeRef int32_ptr_type = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0);
|
||||
LLVMTypeRef int16_ptr_type = LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0);
|
||||
LLVMTypeRef int8_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
|
||||
|
||||
LLVMValueRef should_store_mask = LLVMBuildAnd(gallivm->builder, exec_mask, LLVMBuildNot(gallivm->builder, out_of_bounds, ""), "store_mask");
|
||||
should_store_mask = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask, lp_build_const_int_vec(gallivm, type, 0), "");
|
||||
for (unsigned i = 0; i < num_stores; i++) {
|
||||
struct lp_build_loop_state loop_state;
|
||||
|
||||
LLVMValueRef store_offset = LLVMBuildAdd(gallivm->builder, offset, lp_build_const_int_vec(gallivm, type, i * 4), "");
|
||||
store_offset = LLVMBuildGEP(gallivm->builder, base_ptr, &store_offset, 1, "");
|
||||
|
||||
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
|
||||
|
||||
struct lp_build_if_state ifthen;
|
||||
LLVMValueRef cond = LLVMBuildExtractElement(gallivm->builder, should_store_mask, loop_state.counter, "");
|
||||
lp_build_if(&ifthen, gallivm, cond);
|
||||
|
||||
LLVMValueRef data = LLVMBuildExtractElement(gallivm->builder, packed[i], loop_state.counter, "");
|
||||
LLVMValueRef this_offset = LLVMBuildExtractElement(gallivm->builder, store_offset, loop_state.counter, "");
|
||||
|
||||
if (format_desc->block.bits == 8) {
|
||||
this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int8_ptr_type, "");
|
||||
data = LLVMBuildTrunc(gallivm->builder, data, LLVMInt8TypeInContext(gallivm->context), "");
|
||||
} else if (format_desc->block.bits == 16) {
|
||||
this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int16_ptr_type, "");
|
||||
data = LLVMBuildTrunc(gallivm->builder, data, LLVMInt16TypeInContext(gallivm->context), "");
|
||||
} else
|
||||
this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int32_ptr_type, "");
|
||||
LLVMBuildStore(gallivm->builder, data, this_offset);
|
||||
lp_build_endif(&ifthen);
|
||||
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, type.length),
|
||||
NULL, LLVMIntUGE);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -125,6 +125,41 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state,
|
|||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize lp_sampler_static_texture_state object with the gallium
|
||||
* texture/sampler_view state (this contains the parts which are
|
||||
* considered static).
|
||||
*/
|
||||
void
|
||||
lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
|
||||
const struct pipe_image_view *view)
|
||||
{
|
||||
const struct pipe_resource *resource;
|
||||
|
||||
memset(state, 0, sizeof *state);
|
||||
|
||||
if (!view || !view->resource)
|
||||
return;
|
||||
|
||||
resource = view->resource;
|
||||
|
||||
state->format = view->format;
|
||||
state->swizzle_r = PIPE_SWIZZLE_X;
|
||||
state->swizzle_g = PIPE_SWIZZLE_Y;
|
||||
state->swizzle_b = PIPE_SWIZZLE_Z;
|
||||
state->swizzle_a = PIPE_SWIZZLE_W;
|
||||
|
||||
state->target = view->resource->target;
|
||||
state->pot_width = util_is_power_of_two_or_zero(resource->width0);
|
||||
state->pot_height = util_is_power_of_two_or_zero(resource->height0);
|
||||
state->pot_depth = util_is_power_of_two_or_zero(resource->depth0);
|
||||
state->level_zero_only = 0;
|
||||
|
||||
/*
|
||||
* the layer / element / level parameters are all either dynamic
|
||||
* state or handled transparently wrt execution.
|
||||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize lp_sampler_static_sampler_state object with the gallium sampler
|
||||
|
|
|
@ -49,6 +49,7 @@ extern "C" {
|
|||
struct pipe_resource;
|
||||
struct pipe_sampler_view;
|
||||
struct pipe_sampler_state;
|
||||
struct pipe_image_view;
|
||||
struct util_format_description;
|
||||
struct lp_type;
|
||||
struct lp_build_context;
|
||||
|
@ -122,6 +123,27 @@ struct lp_sampler_size_query_params
|
|||
LLVMValueRef explicit_lod;
|
||||
LLVMValueRef *sizes_out;
|
||||
};
|
||||
|
||||
#define LP_IMG_LOAD 0
|
||||
#define LP_IMG_STORE 1
|
||||
#define LP_IMG_ATOMIC 2
|
||||
#define LP_IMG_ATOMIC_CAS 3
|
||||
|
||||
struct lp_img_params
|
||||
{
|
||||
struct lp_type type;
|
||||
unsigned image_index;
|
||||
unsigned img_op;
|
||||
unsigned target;
|
||||
LLVMAtomicRMWBinOp op;
|
||||
LLVMValueRef exec_mask;
|
||||
LLVMValueRef context_ptr;
|
||||
LLVMValueRef thread_data_ptr;
|
||||
const LLVMValueRef *coords;
|
||||
LLVMValueRef indata[4];
|
||||
LLVMValueRef indata2[4];
|
||||
LLVMValueRef *outdata;
|
||||
};
|
||||
/**
|
||||
* Texture static state.
|
||||
*
|
||||
|
@ -489,6 +511,9 @@ void
|
|||
lp_sampler_static_texture_state(struct lp_static_texture_state *state,
|
||||
const struct pipe_sampler_view *view);
|
||||
|
||||
void
|
||||
lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
|
||||
const struct pipe_image_view *view);
|
||||
|
||||
void
|
||||
lp_build_lod_selector(struct lp_build_sample_context *bld,
|
||||
|
@ -639,6 +664,12 @@ lp_build_minify(struct lp_build_context *bld,
|
|||
LLVMValueRef level,
|
||||
boolean lod_scalar);
|
||||
|
||||
void
|
||||
lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
|
||||
struct lp_sampler_dynamic_state *dynamic_state,
|
||||
struct gallivm_state *gallivm,
|
||||
const struct lp_img_params *params);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -61,6 +61,7 @@
|
|||
#include "lp_bld_quad.h"
|
||||
#include "lp_bld_pack.h"
|
||||
#include "lp_bld_intr.h"
|
||||
#include "lp_bld_misc.h"
|
||||
|
||||
|
||||
/**
|
||||
|
@ -3947,3 +3948,185 @@ lp_build_size_query_soa(struct gallivm_state *gallivm,
|
|||
num_levels);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lp_build_do_atomic_soa(struct gallivm_state *gallivm,
|
||||
const struct util_format_description *format_desc,
|
||||
struct lp_type type,
|
||||
LLVMValueRef exec_mask,
|
||||
LLVMValueRef base_ptr,
|
||||
LLVMValueRef offset,
|
||||
LLVMValueRef out_of_bounds,
|
||||
unsigned img_op,
|
||||
LLVMAtomicRMWBinOp op,
|
||||
const LLVMValueRef rgba_in[4],
|
||||
const LLVMValueRef rgba2_in[4],
|
||||
LLVMValueRef atomic_result[4])
|
||||
{
|
||||
enum pipe_format format = format_desc->format;
|
||||
|
||||
if (format != PIPE_FORMAT_R32_UINT && format != PIPE_FORMAT_R32_SINT && format != PIPE_FORMAT_R32_FLOAT)
|
||||
return;
|
||||
|
||||
LLVMValueRef atom_res = lp_build_alloca(gallivm,
|
||||
LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), type.length), "");
|
||||
|
||||
offset = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, "");
|
||||
struct lp_build_loop_state loop_state;
|
||||
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
|
||||
struct lp_build_if_state ifthen;
|
||||
LLVMValueRef cond;
|
||||
LLVMValueRef packed = rgba_in[0], packed2 = rgba2_in[0];
|
||||
|
||||
LLVMValueRef should_store_mask = LLVMBuildAnd(gallivm->builder, exec_mask, LLVMBuildNot(gallivm->builder, out_of_bounds, ""), "store_mask");
|
||||
assert(exec_mask);
|
||||
|
||||
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask, lp_build_const_int_vec(gallivm, type, 0), "");
|
||||
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
|
||||
lp_build_if(&ifthen, gallivm, cond);
|
||||
|
||||
LLVMValueRef data = LLVMBuildExtractElement(gallivm->builder, packed, loop_state.counter, "");
|
||||
LLVMValueRef cast_base_ptr = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
|
||||
cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
|
||||
data = LLVMBuildBitCast(gallivm->builder, data, LLVMInt32TypeInContext(gallivm->context), "");
|
||||
|
||||
if (img_op == LP_IMG_ATOMIC_CAS) {
|
||||
LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, packed2, loop_state.counter, "");
|
||||
LLVMValueRef cas_src = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, LLVMInt32TypeInContext(gallivm->context), "");
|
||||
data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data,
|
||||
cas_src,
|
||||
LLVMAtomicOrderingSequentiallyConsistent,
|
||||
LLVMAtomicOrderingSequentiallyConsistent,
|
||||
false);
|
||||
data = LLVMBuildExtractValue(gallivm->builder, data, 0, "");
|
||||
} else {
|
||||
data = LLVMBuildAtomicRMW(gallivm->builder, op,
|
||||
cast_base_ptr, data,
|
||||
LLVMAtomicOrderingSequentiallyConsistent,
|
||||
false);
|
||||
}
|
||||
|
||||
LLVMValueRef temp_res = LLVMBuildLoad(gallivm->builder, atom_res, "");
|
||||
temp_res = LLVMBuildInsertElement(gallivm->builder, temp_res, data, loop_state.counter, "");
|
||||
LLVMBuildStore(gallivm->builder, temp_res, atom_res);
|
||||
|
||||
lp_build_endif(&ifthen);
|
||||
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, type.length),
|
||||
NULL, LLVMIntUGE);
|
||||
atomic_result[0] = LLVMBuildLoad(gallivm->builder, atom_res, "");
|
||||
}
|
||||
|
||||
void
|
||||
lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
|
||||
struct lp_sampler_dynamic_state *dynamic_state,
|
||||
struct gallivm_state *gallivm,
|
||||
const struct lp_img_params *params)
|
||||
{
|
||||
unsigned target = params->target;
|
||||
unsigned dims = texture_dims(target);
|
||||
/** regular scalar int type */
|
||||
struct lp_type int_type, int_coord_type;
|
||||
struct lp_build_context int_bld, int_coord_bld;
|
||||
const struct util_format_description *format_desc = util_format_description(static_texture_state->format);
|
||||
LLVMValueRef x = params->coords[0], y = params->coords[1], z = params->coords[2];
|
||||
LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
|
||||
int_type = lp_type_int(32);
|
||||
int_coord_type = lp_int_type(params->type);
|
||||
lp_build_context_init(&int_bld, gallivm, int_type);
|
||||
lp_build_context_init(&int_coord_bld, gallivm, int_coord_type);
|
||||
|
||||
LLVMValueRef offset, i, j;
|
||||
|
||||
LLVMValueRef row_stride = dynamic_state->row_stride(dynamic_state, gallivm,
|
||||
params->context_ptr, params->image_index);
|
||||
LLVMValueRef img_stride = dynamic_state->img_stride(dynamic_state, gallivm,
|
||||
params->context_ptr, params->image_index);
|
||||
LLVMValueRef base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
|
||||
params->context_ptr, params->image_index);
|
||||
LLVMValueRef width = dynamic_state->width(dynamic_state, gallivm,
|
||||
params->context_ptr, params->image_index);
|
||||
LLVMValueRef height = dynamic_state->height(dynamic_state, gallivm,
|
||||
params->context_ptr, params->image_index);
|
||||
LLVMValueRef depth = dynamic_state->depth(dynamic_state, gallivm,
|
||||
params->context_ptr, params->image_index);
|
||||
boolean layer_coord = has_layer_coord(target);
|
||||
|
||||
width = lp_build_broadcast_scalar(&int_coord_bld, width);
|
||||
if (dims >= 2) {
|
||||
height = lp_build_broadcast_scalar(&int_coord_bld, height);
|
||||
row_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, row_stride);
|
||||
}
|
||||
if (dims >= 3 || layer_coord) {
|
||||
depth = lp_build_broadcast_scalar(&int_coord_bld, depth);
|
||||
img_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, img_stride);
|
||||
}
|
||||
|
||||
LLVMValueRef out_of_bounds = int_coord_bld.zero;
|
||||
LLVMValueRef out1;
|
||||
out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
|
||||
out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
|
||||
|
||||
if (dims >= 2) {
|
||||
out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
|
||||
out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
|
||||
}
|
||||
if (dims >= 3) {
|
||||
out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
|
||||
out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
|
||||
}
|
||||
lp_build_sample_offset(&int_coord_bld,
|
||||
format_desc,
|
||||
x, y, z, row_stride_vec, img_stride_vec,
|
||||
&offset, &i, &j);
|
||||
|
||||
if (params->img_op == LP_IMG_LOAD) {
|
||||
struct lp_type texel_type = params->type;
|
||||
if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
|
||||
format_desc->channel[0].pure_integer) {
|
||||
if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
|
||||
texel_type = lp_type_int_vec(params->type.width, params->type.width * params->type.length);
|
||||
} else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
|
||||
texel_type = lp_type_uint_vec(params->type.width, params->type.width * params->type.length);
|
||||
}
|
||||
}
|
||||
|
||||
if (static_texture_state->format == PIPE_FORMAT_NONE) {
|
||||
/*
|
||||
* If there's nothing bound, format is NONE, and we must return
|
||||
* all zero as mandated by d3d10 in this case.
|
||||
*/
|
||||
unsigned chan;
|
||||
LLVMValueRef zero = lp_build_zero(gallivm, params->type);
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
params->outdata[chan] = zero;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds);
|
||||
struct lp_build_context texel_bld;
|
||||
lp_build_context_init(&texel_bld, gallivm, texel_type);
|
||||
lp_build_fetch_rgba_soa(gallivm,
|
||||
format_desc,
|
||||
texel_type, TRUE,
|
||||
base_ptr, offset,
|
||||
i, j,
|
||||
NULL,
|
||||
params->outdata);
|
||||
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
params->outdata[chan] = lp_build_select(&texel_bld, out_of_bounds,
|
||||
texel_bld.zero, params->outdata[chan]);
|
||||
}
|
||||
} else if (params->img_op == LP_IMG_STORE) {
|
||||
if (static_texture_state->format == PIPE_FORMAT_NONE)
|
||||
return;
|
||||
lp_build_store_rgba_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds,
|
||||
params->indata);
|
||||
} else {
|
||||
if (static_texture_state->format == PIPE_FORMAT_NONE)
|
||||
return;
|
||||
lp_build_do_atomic_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds,
|
||||
params->img_op, params->op, params->indata, params->indata2, params->outdata);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -309,6 +309,8 @@ analyse_instruction(struct analysis_context *ctx,
|
|||
continue;
|
||||
} else if (dst->File == TGSI_FILE_BUFFER) {
|
||||
continue;
|
||||
} else if (dst->File == TGSI_FILE_IMAGE) {
|
||||
continue;
|
||||
} else {
|
||||
assert(0);
|
||||
continue;
|
||||
|
|
|
@ -3392,6 +3392,79 @@ lod_emit(
|
|||
FALSE, LP_SAMPLER_OP_LODQ, emit_data->output);
|
||||
}
|
||||
|
||||
static void target_to_dims_layer(unsigned target,
|
||||
unsigned *dims,
|
||||
unsigned *layer_coord)
|
||||
{
|
||||
*layer_coord = 0;
|
||||
switch (target) {
|
||||
case TGSI_TEXTURE_1D:
|
||||
case TGSI_TEXTURE_BUFFER:
|
||||
*dims = 1;
|
||||
break;
|
||||
case TGSI_TEXTURE_1D_ARRAY:
|
||||
*layer_coord = 1;
|
||||
*dims = 1;
|
||||
break;
|
||||
case TGSI_TEXTURE_2D:
|
||||
case TGSI_TEXTURE_RECT:
|
||||
*dims = 2;
|
||||
break;
|
||||
case TGSI_TEXTURE_2D_ARRAY:
|
||||
*layer_coord = 2;
|
||||
*dims = 2;
|
||||
break;
|
||||
case TGSI_TEXTURE_3D:
|
||||
case TGSI_TEXTURE_CUBE:
|
||||
case TGSI_TEXTURE_CUBE_ARRAY:
|
||||
*dims = 3;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
img_load_emit(
|
||||
const struct lp_build_tgsi_action * action,
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
|
||||
struct lp_img_params params;
|
||||
LLVMValueRef coords[5];
|
||||
LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
|
||||
unsigned dims;
|
||||
unsigned target = emit_data->inst->Memory.Texture;
|
||||
unsigned layer_coord;
|
||||
|
||||
target_to_dims_layer(target, &dims, &layer_coord);
|
||||
|
||||
for (unsigned i = 0; i < dims; i++) {
|
||||
coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
|
||||
}
|
||||
for (unsigned i = dims; i < 5; i++) {
|
||||
coords[i] = coord_undef;
|
||||
}
|
||||
if (layer_coord)
|
||||
coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
|
||||
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
|
||||
params.type = bld->bld_base.base.type;
|
||||
params.context_ptr = bld->context_ptr;
|
||||
params.thread_data_ptr = bld->thread_data_ptr;
|
||||
params.coords = coords;
|
||||
params.outdata = emit_data->output;
|
||||
params.target = tgsi_to_pipe_tex_target(target);
|
||||
params.image_index = emit_data->inst->Src[0].Register.Index;
|
||||
params.img_op = LP_IMG_LOAD;
|
||||
bld->image->emit_op(bld->image,
|
||||
bld->bld_base.base.gallivm,
|
||||
¶ms);
|
||||
}
|
||||
|
||||
static void
|
||||
load_emit(
|
||||
const struct lp_build_tgsi_action * action,
|
||||
|
@ -3403,10 +3476,12 @@ load_emit(
|
|||
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
|
||||
const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
|
||||
unsigned buf = bufreg->Register.Index;
|
||||
assert(bufreg->Register.File == TGSI_FILE_BUFFER);
|
||||
assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
|
||||
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
||||
|
||||
if (0) {
|
||||
if (bufreg->Register.File == TGSI_FILE_IMAGE)
|
||||
img_load_emit(action, bld_base, emit_data);
|
||||
else if (0) {
|
||||
/* for indirect support with ARB_gpu_shader5 */
|
||||
} else {
|
||||
LLVMValueRef index;
|
||||
|
@ -3461,6 +3536,48 @@ load_emit(
|
|||
}
|
||||
}
|
||||
|
||||
static void
|
||||
img_store_emit(
|
||||
const struct lp_build_tgsi_action * action,
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
|
||||
struct lp_img_params params;
|
||||
LLVMValueRef coords[5];
|
||||
LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
|
||||
unsigned dims;
|
||||
unsigned target = emit_data->inst->Memory.Texture;
|
||||
unsigned layer_coord;
|
||||
|
||||
target_to_dims_layer(target, &dims, &layer_coord);
|
||||
for (unsigned i = 0; i < dims; i++) {
|
||||
coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
|
||||
}
|
||||
for (unsigned i = dims; i < 5; i++) {
|
||||
coords[i] = coord_undef;
|
||||
}
|
||||
if (layer_coord)
|
||||
coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
|
||||
params.type = bld->bld_base.base.type;
|
||||
params.context_ptr = bld->context_ptr;
|
||||
params.thread_data_ptr = bld->thread_data_ptr;
|
||||
params.coords = coords;
|
||||
params.outdata = NULL;
|
||||
params.exec_mask = mask_vec(bld_base);
|
||||
params.target = tgsi_to_pipe_tex_target(target);
|
||||
params.image_index = emit_data->inst->Dst[0].Register.Index;
|
||||
params.img_op = LP_IMG_STORE;
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
|
||||
|
||||
bld->image->emit_op(bld->image,
|
||||
bld->bld_base.base.gallivm,
|
||||
¶ms);
|
||||
}
|
||||
|
||||
static void
|
||||
store_emit(
|
||||
const struct lp_build_tgsi_action * action,
|
||||
|
@ -3473,9 +3590,11 @@ store_emit(
|
|||
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
||||
const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
|
||||
unsigned buf = bufreg->Register.Index;
|
||||
assert(bufreg->Register.File == TGSI_FILE_BUFFER);
|
||||
assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
|
||||
|
||||
if (0) {
|
||||
if (bufreg->Register.File == TGSI_FILE_IMAGE) {
|
||||
img_store_emit(action, bld_base, emit_data);
|
||||
} else if (0) {
|
||||
|
||||
} else {
|
||||
LLVMValueRef index; /* index into the const buffer */
|
||||
|
@ -3539,11 +3658,74 @@ resq_emit(
|
|||
const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
|
||||
|
||||
unsigned buf = bufreg->Register.Index;
|
||||
assert(bufreg->Register.File == TGSI_FILE_BUFFER);
|
||||
assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
|
||||
|
||||
LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
|
||||
if (bufreg->Register.File == TGSI_FILE_IMAGE) {
|
||||
unsigned target = emit_data->inst->Memory.Texture;
|
||||
struct lp_sampler_size_query_params params = { 0 };
|
||||
params.int_type = bld->bld_base.int_bld.type;
|
||||
params.texture_unit = buf;
|
||||
params.target = tgsi_to_pipe_tex_target(target);
|
||||
params.context_ptr = bld->context_ptr;
|
||||
params.sizes_out = emit_data->output;
|
||||
|
||||
emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
|
||||
bld->image->emit_size_query(bld->image,
|
||||
bld->bld_base.base.gallivm,
|
||||
¶ms);
|
||||
} else {
|
||||
LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
|
||||
|
||||
emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
img_atomic_emit(
|
||||
const struct lp_build_tgsi_action * action,
|
||||
struct lp_build_tgsi_context * bld_base,
|
||||
struct lp_build_emit_data * emit_data,
|
||||
LLVMAtomicRMWBinOp op)
|
||||
{
|
||||
struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
|
||||
struct lp_img_params params;
|
||||
LLVMValueRef coords[5];
|
||||
LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
|
||||
unsigned dims;
|
||||
unsigned layer_coord;
|
||||
unsigned target = emit_data->inst->Memory.Texture;
|
||||
|
||||
target_to_dims_layer(target, &dims, &layer_coord);
|
||||
|
||||
for (unsigned i = 0; i < dims; i++) {
|
||||
coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
|
||||
}
|
||||
for (unsigned i = dims; i < 5; i++) {
|
||||
coords[i] = coord_undef;
|
||||
}
|
||||
if (layer_coord)
|
||||
coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
|
||||
memset(¶ms, 0, sizeof(params));
|
||||
|
||||
params.type = bld->bld_base.base.type;
|
||||
params.context_ptr = bld->context_ptr;
|
||||
params.thread_data_ptr = bld->thread_data_ptr;
|
||||
params.exec_mask = mask_vec(bld_base);
|
||||
params.image_index = emit_data->inst->Src[0].Register.Index;
|
||||
params.coords = coords;
|
||||
params.target = tgsi_to_pipe_tex_target(target);
|
||||
params.op = op;
|
||||
params.outdata = emit_data->output;
|
||||
params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
|
||||
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
|
||||
if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
|
||||
}
|
||||
bld->image->emit_op(bld->image,
|
||||
bld->bld_base.base.gallivm,
|
||||
¶ms);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -3558,7 +3740,7 @@ atomic_emit(
|
|||
struct lp_build_context *uint_bld = &bld_base->uint_bld;
|
||||
const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
|
||||
|
||||
assert(bufreg->Register.File == TGSI_FILE_BUFFER);
|
||||
assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
|
||||
unsigned buf = bufreg->Register.Index;
|
||||
|
||||
LLVMAtomicRMWBinOp op;
|
||||
|
@ -3597,7 +3779,9 @@ atomic_emit(
|
|||
return;
|
||||
}
|
||||
|
||||
if (0) {
|
||||
if (bufreg->Register.File == TGSI_FILE_IMAGE) {
|
||||
img_atomic_emit(action, bld_base, emit_data, op);
|
||||
} else if (0) {
|
||||
} else {
|
||||
LLVMValueRef index; /* index into the const buffer */
|
||||
LLVMValueRef scalar, scalar_ptr;
|
||||
|
|
Loading…
Reference in New Issue