From ceb8d0ac5ac665a1cbb2537586d192374ab1e327 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 19 Jul 2019 19:06:48 +1000 Subject: [PATCH] gallivm: add image load/store/atomic support Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/gallivm/lp_bld_format.h | 10 + .../auxiliary/gallivm/lp_bld_format_soa.c | 231 +++++++++++++++++- src/gallium/auxiliary/gallivm/lp_bld_sample.c | 35 +++ src/gallium/auxiliary/gallivm/lp_bld_sample.h | 31 +++ .../auxiliary/gallivm/lp_bld_sample_soa.c | 183 ++++++++++++++ .../auxiliary/gallivm/lp_bld_tgsi_info.c | 2 + .../auxiliary/gallivm/lp_bld_tgsi_soa.c | 202 ++++++++++++++- 7 files changed, 684 insertions(+), 10 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index ade8825e1fb..cd1eaec9e5f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -151,6 +151,16 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, LLVMValueRef cache, LLVMValueRef rgba_out[4]); +void +lp_build_store_rgba_soa(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef exec_mask, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef out_of_bounds, + const LLVMValueRef rgba_in[4]); + /* * YUV */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 22c19b10dbd..a6ec91c4a3f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -42,7 +42,9 @@ #include "lp_bld_format.h" #include "lp_bld_arit.h" #include "lp_bld_pack.h" - +#include "lp_bld_flow.h" +#include "lp_bld_printf.h" +#include "lp_bld_intr.h" static void convert_to_soa(struct gallivm_state *gallivm, @@ -858,3 +860,230 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, convert_to_soa(gallivm, aos_fetch, rgba_out, type); } } + +static void +lp_build_insert_soa_chan(struct lp_build_context *bld, + unsigned blockbits, + struct util_format_channel_description chan_desc, + LLVMValueRef *output, + LLVMValueRef rgba) +{ + struct gallivm_state *gallivm = bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_type type = bld->type; + const unsigned width = chan_desc.size; + const unsigned start = chan_desc.shift; + const unsigned stop = start + width; + LLVMValueRef chan; + switch(chan_desc.type) { + case UTIL_FORMAT_TYPE_UNSIGNED: + + if (chan_desc.pure_integer) + chan = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, ""); + else if (type.floating) { + if (chan_desc.normalized) + chan = lp_build_clamped_float_to_unsigned_norm(gallivm, type, width, rgba); + else + chan = LLVMBuildFPToSI(builder, rgba, bld->vec_type, ""); + } + if (start) + chan = LLVMBuildShl(builder, chan, + lp_build_const_int_vec(gallivm, type, start), ""); + if (!*output) + *output = chan; + else + *output = LLVMBuildOr(builder, *output, chan, ""); + break; + case UTIL_FORMAT_TYPE_SIGNED: + if (chan_desc.pure_integer) + chan = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, ""); + else if (type.floating) { + uint32_t mask_val = (1UL << chan_desc.size) - 1; + if (chan_desc.normalized) { + char intrin[32]; + double scale = ((1 << (chan_desc.size - 1)) - 1); + LLVMValueRef scale_val = lp_build_const_vec(gallivm, type, scale); + rgba = lp_build_clamp(bld, rgba, lp_build_negate(bld, bld->one), bld->one); + rgba = LLVMBuildFMul(builder, rgba, scale_val, ""); + lp_format_intrinsic(intrin, sizeof intrin, "llvm.rint", bld->vec_type); + rgba = lp_build_intrinsic_unary(builder, intrin, bld->vec_type, rgba); + } + chan = LLVMBuildFPToSI(builder, rgba, bld->int_vec_type, ""); + chan = LLVMBuildAnd(builder, chan, lp_build_const_int_vec(gallivm, type, mask_val), ""); + } + if (start) + chan = LLVMBuildShl(builder, chan, + lp_build_const_int_vec(gallivm, type, start), ""); + if (!*output) + *output = chan; + else + *output = LLVMBuildOr(builder, *output, chan, ""); + break; + case UTIL_FORMAT_TYPE_FLOAT: + if (type.floating) { + if (chan_desc.size == 16) { + chan = lp_build_float_to_half(gallivm, rgba); + chan = LLVMBuildZExt(builder, chan, bld->int_vec_type, ""); + if (start) + chan = LLVMBuildShl(builder, chan, + lp_build_const_int_vec(gallivm, type, start), ""); + if (!*output) + *output = chan; + else + *output = LLVMBuildOr(builder, *output, chan, ""); + } else { + assert(start == 0); + assert(stop == 32); + assert(type.width == 32); + *output = LLVMBuildBitCast(builder, rgba, bld->int_vec_type, ""); + } + } else + assert(0); + break; + default: + assert(0); + *output = bld->undef; + } +} + +static void +lp_build_pack_rgba_soa(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + struct lp_type type, + const LLVMValueRef rgba_in[4], + LLVMValueRef *packed) +{ + unsigned chan; + struct lp_build_context bld; + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN); + assert(format_desc->block.width == 1); + assert(format_desc->block.height == 1); + assert(format_desc->block.bits <= type.width); + /* FIXME: Support more output types */ + assert(type.width == 32); + + lp_build_context_init(&bld, gallivm, type); + for (chan = 0; chan < format_desc->nr_channels; ++chan) { + struct util_format_channel_description chan_desc = format_desc->channel[chan]; + + lp_build_insert_soa_chan(&bld, format_desc->block.bits, + chan_desc, + packed, + rgba_in[chan]); + } +} + +void +lp_build_store_rgba_soa(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef exec_mask, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef out_of_bounds, + const LLVMValueRef rgba_in[4]) +{ + enum pipe_format format = format_desc->format; + LLVMValueRef packed[4] = {}; + unsigned num_stores; + + if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && + format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && + format_desc->block.width == 1 && + format_desc->block.height == 1 && + format_desc->block.bits <= type.width && + (format_desc->channel[0].type != UTIL_FORMAT_TYPE_FLOAT || + format_desc->channel[0].size == 32 || + format_desc->channel[0].size == 16)) + { + lp_build_pack_rgba_soa(gallivm, format_desc, type, rgba_in, &packed[0]); + + num_stores = 1; + } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && + (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB) && + format_desc->block.width == 1 && + format_desc->block.height == 1 && + format_desc->block.bits > type.width && + ((format_desc->block.bits <= type.width * type.length && + format_desc->channel[0].size <= type.width) || + (format_desc->channel[0].size == 64 && + format_desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT && + type.floating))) + { + /* + * Similar to above, but the packed pixel is larger than what fits + * into an element of the destination format. The packed pixels will be + * shuffled into SoA vectors appropriately, and then the extraction will + * be done in parallel as much as possible. + * Good for 16xn (n > 2) and 32xn (n > 1) formats, care is taken so + * the gathered vectors can be shuffled easily (even with avx). + * 64xn float -> 32xn float is handled too but it's a bit special as + * it does the conversion pre-shuffle. + */ + struct lp_build_context bld; + + lp_build_context_init(&bld, gallivm, type); + assert(type.width == 32); + assert(format_desc->block.bits > type.width); + + unsigned store_width = util_next_power_of_two(format_desc->block.bits); + num_stores = store_width / type.width; + for (unsigned i = 0; i < format_desc->nr_channels; i++) { + struct util_format_channel_description chan_desc = format_desc->channel[i]; + unsigned blockbits = type.width; + unsigned vec_nr; + + vec_nr = chan_desc.shift / type.width; + chan_desc.shift %= type.width; + + lp_build_insert_soa_chan(&bld, blockbits, + chan_desc, + &packed[vec_nr], + rgba_in[i]); + } + + assert(num_stores == 4 || num_stores == 2); + /* we can transpose and store at the same time */ + } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { + packed[0] = lp_build_float_to_r11g11b10(gallivm, rgba_in); + num_stores = 1; + } else + assert(0); + + assert(exec_mask); + + LLVMTypeRef int32_ptr_type = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0); + LLVMTypeRef int16_ptr_type = LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0); + LLVMTypeRef int8_ptr_type = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); + + LLVMValueRef should_store_mask = LLVMBuildAnd(gallivm->builder, exec_mask, LLVMBuildNot(gallivm->builder, out_of_bounds, ""), "store_mask"); + should_store_mask = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask, lp_build_const_int_vec(gallivm, type, 0), ""); + for (unsigned i = 0; i < num_stores; i++) { + struct lp_build_loop_state loop_state; + + LLVMValueRef store_offset = LLVMBuildAdd(gallivm->builder, offset, lp_build_const_int_vec(gallivm, type, i * 4), ""); + store_offset = LLVMBuildGEP(gallivm->builder, base_ptr, &store_offset, 1, ""); + + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + + struct lp_build_if_state ifthen; + LLVMValueRef cond = LLVMBuildExtractElement(gallivm->builder, should_store_mask, loop_state.counter, ""); + lp_build_if(&ifthen, gallivm, cond); + + LLVMValueRef data = LLVMBuildExtractElement(gallivm->builder, packed[i], loop_state.counter, ""); + LLVMValueRef this_offset = LLVMBuildExtractElement(gallivm->builder, store_offset, loop_state.counter, ""); + + if (format_desc->block.bits == 8) { + this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int8_ptr_type, ""); + data = LLVMBuildTrunc(gallivm->builder, data, LLVMInt8TypeInContext(gallivm->context), ""); + } else if (format_desc->block.bits == 16) { + this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int16_ptr_type, ""); + data = LLVMBuildTrunc(gallivm->builder, data, LLVMInt16TypeInContext(gallivm->context), ""); + } else + this_offset = LLVMBuildBitCast(gallivm->builder, this_offset, int32_ptr_type, ""); + LLVMBuildStore(gallivm->builder, data, this_offset); + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, type.length), + NULL, LLVMIntUGE); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 81cb5060711..e49ae810a5e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -125,6 +125,41 @@ lp_sampler_static_texture_state(struct lp_static_texture_state *state, */ } +/** + * Initialize lp_sampler_static_texture_state object with the gallium + * texture/sampler_view state (this contains the parts which are + * considered static). + */ +void +lp_sampler_static_texture_state_image(struct lp_static_texture_state *state, + const struct pipe_image_view *view) +{ + const struct pipe_resource *resource; + + memset(state, 0, sizeof *state); + + if (!view || !view->resource) + return; + + resource = view->resource; + + state->format = view->format; + state->swizzle_r = PIPE_SWIZZLE_X; + state->swizzle_g = PIPE_SWIZZLE_Y; + state->swizzle_b = PIPE_SWIZZLE_Z; + state->swizzle_a = PIPE_SWIZZLE_W; + + state->target = view->resource->target; + state->pot_width = util_is_power_of_two_or_zero(resource->width0); + state->pot_height = util_is_power_of_two_or_zero(resource->height0); + state->pot_depth = util_is_power_of_two_or_zero(resource->depth0); + state->level_zero_only = 0; + + /* + * the layer / element / level parameters are all either dynamic + * state or handled transparently wrt execution. + */ +} /** * Initialize lp_sampler_static_sampler_state object with the gallium sampler diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.h b/src/gallium/auxiliary/gallivm/lp_bld_sample.h index c00997b8983..8509179d08f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h @@ -49,6 +49,7 @@ extern "C" { struct pipe_resource; struct pipe_sampler_view; struct pipe_sampler_state; +struct pipe_image_view; struct util_format_description; struct lp_type; struct lp_build_context; @@ -122,6 +123,27 @@ struct lp_sampler_size_query_params LLVMValueRef explicit_lod; LLVMValueRef *sizes_out; }; + +#define LP_IMG_LOAD 0 +#define LP_IMG_STORE 1 +#define LP_IMG_ATOMIC 2 +#define LP_IMG_ATOMIC_CAS 3 + +struct lp_img_params +{ + struct lp_type type; + unsigned image_index; + unsigned img_op; + unsigned target; + LLVMAtomicRMWBinOp op; + LLVMValueRef exec_mask; + LLVMValueRef context_ptr; + LLVMValueRef thread_data_ptr; + const LLVMValueRef *coords; + LLVMValueRef indata[4]; + LLVMValueRef indata2[4]; + LLVMValueRef *outdata; +}; /** * Texture static state. * @@ -489,6 +511,9 @@ void lp_sampler_static_texture_state(struct lp_static_texture_state *state, const struct pipe_sampler_view *view); +void +lp_sampler_static_texture_state_image(struct lp_static_texture_state *state, + const struct pipe_image_view *view); void lp_build_lod_selector(struct lp_build_sample_context *bld, @@ -639,6 +664,12 @@ lp_build_minify(struct lp_build_context *bld, LLVMValueRef level, boolean lod_scalar); +void +lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state, + struct lp_sampler_dynamic_state *dynamic_state, + struct gallivm_state *gallivm, + const struct lp_img_params *params); + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c index d5bd9161119..adb6adf143a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c @@ -61,6 +61,7 @@ #include "lp_bld_quad.h" #include "lp_bld_pack.h" #include "lp_bld_intr.h" +#include "lp_bld_misc.h" /** @@ -3947,3 +3948,185 @@ lp_build_size_query_soa(struct gallivm_state *gallivm, num_levels); } } + +static void +lp_build_do_atomic_soa(struct gallivm_state *gallivm, + const struct util_format_description *format_desc, + struct lp_type type, + LLVMValueRef exec_mask, + LLVMValueRef base_ptr, + LLVMValueRef offset, + LLVMValueRef out_of_bounds, + unsigned img_op, + LLVMAtomicRMWBinOp op, + const LLVMValueRef rgba_in[4], + const LLVMValueRef rgba2_in[4], + LLVMValueRef atomic_result[4]) +{ + enum pipe_format format = format_desc->format; + + if (format != PIPE_FORMAT_R32_UINT && format != PIPE_FORMAT_R32_SINT && format != PIPE_FORMAT_R32_FLOAT) + return; + + LLVMValueRef atom_res = lp_build_alloca(gallivm, + LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), type.length), ""); + + offset = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, ""); + struct lp_build_loop_state loop_state; + lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0)); + struct lp_build_if_state ifthen; + LLVMValueRef cond; + LLVMValueRef packed = rgba_in[0], packed2 = rgba2_in[0]; + + LLVMValueRef should_store_mask = LLVMBuildAnd(gallivm->builder, exec_mask, LLVMBuildNot(gallivm->builder, out_of_bounds, ""), "store_mask"); + assert(exec_mask); + + cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask, lp_build_const_int_vec(gallivm, type, 0), ""); + cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, ""); + lp_build_if(&ifthen, gallivm, cond); + + LLVMValueRef data = LLVMBuildExtractElement(gallivm->builder, packed, loop_state.counter, ""); + LLVMValueRef cast_base_ptr = LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, ""); + cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), ""); + data = LLVMBuildBitCast(gallivm->builder, data, LLVMInt32TypeInContext(gallivm->context), ""); + + if (img_op == LP_IMG_ATOMIC_CAS) { + LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, packed2, loop_state.counter, ""); + LLVMValueRef cas_src = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, LLVMInt32TypeInContext(gallivm->context), ""); + data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data, + cas_src, + LLVMAtomicOrderingSequentiallyConsistent, + LLVMAtomicOrderingSequentiallyConsistent, + false); + data = LLVMBuildExtractValue(gallivm->builder, data, 0, ""); + } else { + data = LLVMBuildAtomicRMW(gallivm->builder, op, + cast_base_ptr, data, + LLVMAtomicOrderingSequentiallyConsistent, + false); + } + + LLVMValueRef temp_res = LLVMBuildLoad(gallivm->builder, atom_res, ""); + temp_res = LLVMBuildInsertElement(gallivm->builder, temp_res, data, loop_state.counter, ""); + LLVMBuildStore(gallivm->builder, temp_res, atom_res); + + lp_build_endif(&ifthen); + lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, type.length), + NULL, LLVMIntUGE); + atomic_result[0] = LLVMBuildLoad(gallivm->builder, atom_res, ""); +} + +void +lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state, + struct lp_sampler_dynamic_state *dynamic_state, + struct gallivm_state *gallivm, + const struct lp_img_params *params) +{ + unsigned target = params->target; + unsigned dims = texture_dims(target); + /** regular scalar int type */ + struct lp_type int_type, int_coord_type; + struct lp_build_context int_bld, int_coord_bld; + const struct util_format_description *format_desc = util_format_description(static_texture_state->format); + LLVMValueRef x = params->coords[0], y = params->coords[1], z = params->coords[2]; + LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL; + int_type = lp_type_int(32); + int_coord_type = lp_int_type(params->type); + lp_build_context_init(&int_bld, gallivm, int_type); + lp_build_context_init(&int_coord_bld, gallivm, int_coord_type); + + LLVMValueRef offset, i, j; + + LLVMValueRef row_stride = dynamic_state->row_stride(dynamic_state, gallivm, + params->context_ptr, params->image_index); + LLVMValueRef img_stride = dynamic_state->img_stride(dynamic_state, gallivm, + params->context_ptr, params->image_index); + LLVMValueRef base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, + params->context_ptr, params->image_index); + LLVMValueRef width = dynamic_state->width(dynamic_state, gallivm, + params->context_ptr, params->image_index); + LLVMValueRef height = dynamic_state->height(dynamic_state, gallivm, + params->context_ptr, params->image_index); + LLVMValueRef depth = dynamic_state->depth(dynamic_state, gallivm, + params->context_ptr, params->image_index); + boolean layer_coord = has_layer_coord(target); + + width = lp_build_broadcast_scalar(&int_coord_bld, width); + if (dims >= 2) { + height = lp_build_broadcast_scalar(&int_coord_bld, height); + row_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, row_stride); + } + if (dims >= 3 || layer_coord) { + depth = lp_build_broadcast_scalar(&int_coord_bld, depth); + img_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, img_stride); + } + + LLVMValueRef out_of_bounds = int_coord_bld.zero; + LLVMValueRef out1; + out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, x, width); + out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1); + + if (dims >= 2) { + out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height); + out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1); + } + if (dims >= 3) { + out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); + out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1); + } + lp_build_sample_offset(&int_coord_bld, + format_desc, + x, y, z, row_stride_vec, img_stride_vec, + &offset, &i, &j); + + if (params->img_op == LP_IMG_LOAD) { + struct lp_type texel_type = params->type; + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && + format_desc->channel[0].pure_integer) { + if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + texel_type = lp_type_int_vec(params->type.width, params->type.width * params->type.length); + } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { + texel_type = lp_type_uint_vec(params->type.width, params->type.width * params->type.length); + } + } + + if (static_texture_state->format == PIPE_FORMAT_NONE) { + /* + * If there's nothing bound, format is NONE, and we must return + * all zero as mandated by d3d10 in this case. + */ + unsigned chan; + LLVMValueRef zero = lp_build_zero(gallivm, params->type); + for (chan = 0; chan < 4; chan++) { + params->outdata[chan] = zero; + } + return; + } + + offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds); + struct lp_build_context texel_bld; + lp_build_context_init(&texel_bld, gallivm, texel_type); + lp_build_fetch_rgba_soa(gallivm, + format_desc, + texel_type, TRUE, + base_ptr, offset, + i, j, + NULL, + params->outdata); + + for (unsigned chan = 0; chan < 4; chan++) { + params->outdata[chan] = lp_build_select(&texel_bld, out_of_bounds, + texel_bld.zero, params->outdata[chan]); + } + } else if (params->img_op == LP_IMG_STORE) { + if (static_texture_state->format == PIPE_FORMAT_NONE) + return; + lp_build_store_rgba_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds, + params->indata); + } else { + if (static_texture_state->format == PIPE_FORMAT_NONE) + return; + lp_build_do_atomic_soa(gallivm, format_desc, params->type, params->exec_mask, base_ptr, offset, out_of_bounds, + params->img_op, params->op, params->indata, params->indata2, params->outdata); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c index 7871dce9103..672d3a503ca 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_info.c @@ -309,6 +309,8 @@ analyse_instruction(struct analysis_context *ctx, continue; } else if (dst->File == TGSI_FILE_BUFFER) { continue; + } else if (dst->File == TGSI_FILE_IMAGE) { + continue; } else { assert(0); continue; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index ab78e6bb50c..d6625c42276 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -3392,6 +3392,79 @@ lod_emit( FALSE, LP_SAMPLER_OP_LODQ, emit_data->output); } +static void target_to_dims_layer(unsigned target, + unsigned *dims, + unsigned *layer_coord) +{ + *layer_coord = 0; + switch (target) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_BUFFER: + *dims = 1; + break; + case TGSI_TEXTURE_1D_ARRAY: + *layer_coord = 1; + *dims = 1; + break; + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + *dims = 2; + break; + case TGSI_TEXTURE_2D_ARRAY: + *layer_coord = 2; + *dims = 2; + break; + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + case TGSI_TEXTURE_CUBE_ARRAY: + *dims = 3; + break; + default: + assert(0); + return; + } +} + +static void +img_load_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct lp_img_params params; + LLVMValueRef coords[5]; + LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); + unsigned dims; + unsigned target = emit_data->inst->Memory.Texture; + unsigned layer_coord; + + target_to_dims_layer(target, &dims, &layer_coord); + + for (unsigned i = 0; i < dims; i++) { + coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i); + } + for (unsigned i = dims; i < 5; i++) { + coords[i] = coord_undef; + } + if (layer_coord) + coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord); + + memset(¶ms, 0, sizeof(params)); + + params.type = bld->bld_base.base.type; + params.context_ptr = bld->context_ptr; + params.thread_data_ptr = bld->thread_data_ptr; + params.coords = coords; + params.outdata = emit_data->output; + params.target = tgsi_to_pipe_tex_target(target); + params.image_index = emit_data->inst->Src[0].Register.Index; + params.img_op = LP_IMG_LOAD; + bld->image->emit_op(bld->image, + bld->bld_base.base.gallivm, + ¶ms); +} + static void load_emit( const struct lp_build_tgsi_action * action, @@ -3403,10 +3476,12 @@ load_emit( LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0]; unsigned buf = bufreg->Register.Index; - assert(bufreg->Register.File == TGSI_FILE_BUFFER); + assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE); struct lp_build_context *uint_bld = &bld_base->uint_bld; - if (0) { + if (bufreg->Register.File == TGSI_FILE_IMAGE) + img_load_emit(action, bld_base, emit_data); + else if (0) { /* for indirect support with ARB_gpu_shader5 */ } else { LLVMValueRef index; @@ -3461,6 +3536,48 @@ load_emit( } } +static void +img_store_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct lp_img_params params; + LLVMValueRef coords[5]; + LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); + unsigned dims; + unsigned target = emit_data->inst->Memory.Texture; + unsigned layer_coord; + + target_to_dims_layer(target, &dims, &layer_coord); + for (unsigned i = 0; i < dims; i++) { + coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i); + } + for (unsigned i = dims; i < 5; i++) { + coords[i] = coord_undef; + } + if (layer_coord) + coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord); + memset(¶ms, 0, sizeof(params)); + + params.type = bld->bld_base.base.type; + params.context_ptr = bld->context_ptr; + params.thread_data_ptr = bld->thread_data_ptr; + params.coords = coords; + params.outdata = NULL; + params.exec_mask = mask_vec(bld_base); + params.target = tgsi_to_pipe_tex_target(target); + params.image_index = emit_data->inst->Dst[0].Register.Index; + params.img_op = LP_IMG_STORE; + for (unsigned i = 0; i < 4; i++) + params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i); + + bld->image->emit_op(bld->image, + bld->bld_base.base.gallivm, + ¶ms); +} + static void store_emit( const struct lp_build_tgsi_action * action, @@ -3473,9 +3590,11 @@ store_emit( struct lp_build_context *uint_bld = &bld_base->uint_bld; const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0]; unsigned buf = bufreg->Register.Index; - assert(bufreg->Register.File == TGSI_FILE_BUFFER); + assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE); - if (0) { + if (bufreg->Register.File == TGSI_FILE_IMAGE) { + img_store_emit(action, bld_base, emit_data); + } else if (0) { } else { LLVMValueRef index; /* index into the const buffer */ @@ -3539,11 +3658,74 @@ resq_emit( const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0]; unsigned buf = bufreg->Register.Index; - assert(bufreg->Register.File == TGSI_FILE_BUFFER); + assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE); - LLVMValueRef num_ssbo = bld->ssbo_sizes[buf]; + if (bufreg->Register.File == TGSI_FILE_IMAGE) { + unsigned target = emit_data->inst->Memory.Texture; + struct lp_sampler_size_query_params params = { 0 }; + params.int_type = bld->bld_base.int_bld.type; + params.texture_unit = buf; + params.target = tgsi_to_pipe_tex_target(target); + params.context_ptr = bld->context_ptr; + params.sizes_out = emit_data->output; - emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo); + bld->image->emit_size_query(bld->image, + bld->bld_base.base.gallivm, + ¶ms); + } else { + LLVMValueRef num_ssbo = bld->ssbo_sizes[buf]; + + emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo); + } +} + +static void +img_atomic_emit( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data, + LLVMAtomicRMWBinOp op) +{ + struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); + struct lp_img_params params; + LLVMValueRef coords[5]; + LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); + unsigned dims; + unsigned layer_coord; + unsigned target = emit_data->inst->Memory.Texture; + + target_to_dims_layer(target, &dims, &layer_coord); + + for (unsigned i = 0; i < dims; i++) { + coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i); + } + for (unsigned i = dims; i < 5; i++) { + coords[i] = coord_undef; + } + if (layer_coord) + coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord); + memset(¶ms, 0, sizeof(params)); + + params.type = bld->bld_base.base.type; + params.context_ptr = bld->context_ptr; + params.thread_data_ptr = bld->thread_data_ptr; + params.exec_mask = mask_vec(bld_base); + params.image_index = emit_data->inst->Src[0].Register.Index; + params.coords = coords; + params.target = tgsi_to_pipe_tex_target(target); + params.op = op; + params.outdata = emit_data->output; + params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC; + + for (unsigned i = 0; i < 4; i++) + params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i); + if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { + for (unsigned i = 0; i < 4; i++) + params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i); + } + bld->image->emit_op(bld->image, + bld->bld_base.base.gallivm, + ¶ms); } static void @@ -3558,7 +3740,7 @@ atomic_emit( struct lp_build_context *uint_bld = &bld_base->uint_bld; const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0]; - assert(bufreg->Register.File == TGSI_FILE_BUFFER); + assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE); unsigned buf = bufreg->Register.Index; LLVMAtomicRMWBinOp op; @@ -3597,7 +3779,9 @@ atomic_emit( return; } - if (0) { + if (bufreg->Register.File == TGSI_FILE_IMAGE) { + img_atomic_emit(action, bld_base, emit_data, op); + } else if (0) { } else { LLVMValueRef index; /* index into the const buffer */ LLVMValueRef scalar, scalar_ptr;