From 89ec36457ead92410b8799f79268dceb5727696b Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 30 Dec 2020 11:59:15 -0800 Subject: [PATCH] gallium/tgsi_exec: Reuse the atomic helper for SSBO atomics. I needed to refactor SSBOs to deal with a buffer overflow, and it's easier to just delete the SSBO atomic code from the interface. Reviewed-by: Dave Airlie Part-of: --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 93 +++--------- src/gallium/auxiliary/tgsi/tgsi_exec.h | 8 +- src/gallium/drivers/softpipe/sp_buffer.c | 177 +++-------------------- 3 files changed, 42 insertions(+), 236 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 1238d273229..47bd5776509 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -4230,67 +4230,8 @@ exec_atomop_img(struct tgsi_exec_machine *mach, } static void -exec_atomop_buf(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel r[4]; - union tgsi_exec_channel value[4], value2[4]; - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; - float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; - struct tgsi_buffer_params params; - int i, j; - uint unit, chan; - int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; - - unit = fetch_sampler_unit(mach, inst, 0); - - params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; - params.unit = unit; - params.writemask = inst->Dst[0].Register.WriteMask; - - IFETCH(&r[0], 1, TGSI_CHAN_X); - - for (i = 0; i < 4; i++) { - FETCH(&value[i], 2, TGSI_CHAN_X + i); - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) - FETCH(&value2[i], 3, TGSI_CHAN_X + i); - } - - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - rgba[0][j] = value[0].f[j]; - rgba[1][j] = value[1].f[j]; - rgba[2][j] = value[2].f[j]; - rgba[3][j] = value[3].f[j]; - } - if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - rgba2[0][j] = value2[0].f[j]; - rgba2[1][j] = value2[1].f[j]; - rgba2[2][j] = value2[2].f[j]; - rgba2[3][j] = value2[3].f[j]; - } - } - - mach->Buffer->op(mach->Buffer, ¶ms, inst->Instruction.Opcode, - r[0].i, - rgba, rgba2); - - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - r[0].f[j] = rgba[0][j]; - r[1].f[j] = rgba[1][j]; - r[2].f[j] = rgba[2][j]; - r[3].f[j] = rgba[3][j]; - } - for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { - if (inst->Dst[0].Register.WriteMask & (1 << chan)) { - store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); - } - } -} - -static void -exec_atomop_mem(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_atomop_membuf(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { union tgsi_exec_channel offset, r0, r1; uint chan, i; @@ -4302,11 +4243,25 @@ exec_atomop_mem(struct tgsi_exec_machine *mach, return; void *ptr[TGSI_QUAD_SIZE]; - for (i = 0; i < TGSI_QUAD_SIZE; i++) { - if (likely(mach->LocalMemSize >= 4 && offset.u[i] <= mach->LocalMemSize - 4)) - ptr[i] = (char *)mach->LocalMem + offset.u[i]; - else - ptr[i] = NULL; + if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { + uint32_t unit = fetch_sampler_unit(mach, inst, 0); + uint32_t size; + char *buffer = mach->Buffer->lookup(mach->Buffer, unit, &size); + for (int i = 0; i < TGSI_QUAD_SIZE; i++) { + if (likely(size >= 4 && offset.u[i] <= size - 4)) + ptr[i] = buffer + offset.u[i]; + else + ptr[i] = NULL; + } + } else { + assert(inst->Src[0].Register.File == TGSI_FILE_MEMORY); + + for (i = 0; i < TGSI_QUAD_SIZE; i++) { + if (likely(mach->LocalMemSize >= 4 && offset.u[i] <= mach->LocalMemSize - 4)) + ptr[i] = (char *)mach->LocalMem + offset.u[i]; + else + ptr[i] = NULL; + } } FETCH(&r0, 2, TGSI_CHAN_X); @@ -4381,10 +4336,8 @@ exec_atomop(struct tgsi_exec_machine *mach, { if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) exec_atomop_img(mach, inst); - else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) - exec_atomop_buf(mach, inst); - else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) - exec_atomop_mem(mach, inst); + else + exec_atomop_membuf(mach, inst); } static void diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 2f03798d5ef..9f42cb0a97f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -165,12 +165,8 @@ struct tgsi_buffer { const int s[TGSI_QUAD_SIZE], float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); - void (*op)(const struct tgsi_buffer *buffer, - const struct tgsi_buffer_params *params, - enum tgsi_opcode opcode, - const int s[TGSI_QUAD_SIZE], - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], - float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]); + void *(*lookup)(const struct tgsi_buffer *buffer, + uint32_t unit, uint32_t *size); void (*get_dims)(const struct tgsi_buffer *buffer, const struct tgsi_buffer_params *params, diff --git a/src/gallium/drivers/softpipe/sp_buffer.c b/src/gallium/drivers/softpipe/sp_buffer.c index 97e52008a8b..e0bd0efd687 100644 --- a/src/gallium/drivers/softpipe/sp_buffer.c +++ b/src/gallium/drivers/softpipe/sp_buffer.c @@ -142,169 +142,26 @@ sp_tgsi_store(const struct tgsi_buffer *buffer, } } -/* - * Implement atomic operations on unsigned integers. - */ -static void -handle_op_atomic(const struct pipe_shader_buffer *bview, - bool just_read, - unsigned char *data_ptr, - uint qi, - enum tgsi_opcode opcode, - unsigned writemask, - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], - float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) -{ - uint c = 0; /* SSBO atomics are always on the .x channel. */ - unsigned sdata; - - memcpy(&sdata, data_ptr + (c * 4), 4); - - if (just_read) { - ((uint32_t *)rgba[c])[qi] = sdata; - return; - } - - switch (opcode) { - case TGSI_OPCODE_ATOMUADD: { - unsigned temp = sdata; - sdata += ((uint32_t *)rgba[c])[qi]; - ((uint32_t *)rgba[c])[qi] = temp; - break; - } - case TGSI_OPCODE_ATOMXCHG: { - unsigned temp = sdata; - sdata = ((uint32_t *)rgba[c])[qi]; - ((uint32_t *)rgba[c])[qi] = temp; - break; - } - case TGSI_OPCODE_ATOMCAS: { - unsigned dst_x = sdata; - unsigned cmp_x = ((uint32_t *)rgba[c])[qi]; - unsigned src_x = ((uint32_t *)rgba2[c])[qi]; - unsigned temp = sdata; - sdata = (dst_x == cmp_x) ? src_x : dst_x; - ((uint32_t *)rgba[c])[qi] = temp; - break; - } - case TGSI_OPCODE_ATOMAND: { - unsigned temp = sdata; - sdata &= ((uint32_t *)rgba[c])[qi]; - ((uint32_t *)rgba[c])[qi] = temp; - break; - } - case TGSI_OPCODE_ATOMOR: { - unsigned temp = sdata; - sdata |= ((uint32_t *)rgba[c])[qi]; - ((uint32_t *)rgba[c])[qi] = temp; - break; - } - case TGSI_OPCODE_ATOMXOR: { - unsigned temp = sdata; - sdata ^= ((uint32_t *)rgba[c])[qi]; - ((uint32_t *)rgba[c])[qi] = temp; - break; - } - case TGSI_OPCODE_ATOMUMIN: { - unsigned dst_x = sdata; - unsigned src_x = ((uint32_t *)rgba[c])[qi]; - sdata = MIN2(dst_x, src_x); - ((uint32_t *)rgba[c])[qi] = dst_x; - break; - } - case TGSI_OPCODE_ATOMUMAX: { - unsigned dst_x = sdata; - unsigned src_x = ((uint32_t *)rgba[c])[qi]; - sdata = MAX2(dst_x, src_x); - ((uint32_t *)rgba[c])[qi] = dst_x; - break; - } - case TGSI_OPCODE_ATOMIMIN: { - int dst_x = sdata; - int src_x = ((uint32_t *)rgba[c])[qi]; - sdata = MIN2(dst_x, src_x); - ((uint32_t *)rgba[c])[qi] = dst_x; - break; - } - case TGSI_OPCODE_ATOMIMAX: { - int dst_x = sdata; - int src_x = ((uint32_t *)rgba[c])[qi]; - sdata = MAX2(dst_x, src_x); - ((uint32_t *)rgba[c])[qi] = dst_x; - break; - } - case TGSI_OPCODE_ATOMFADD: { - float temp = uif(sdata); - sdata = fui(temp + rgba[c][qi]); - rgba[c][qi] = temp; - break; - } - default: - assert(!"Unexpected TGSI opcode in sp_tgsi_op"); - break; - } - - if (writemask & TGSI_WRITEMASK_X) { - memcpy(data_ptr + (c * 4), &sdata, 4); - } -} - -/* - * Implement atomic buffer operations. - */ -static void -sp_tgsi_op(const struct tgsi_buffer *buffer, - const struct tgsi_buffer_params *params, - enum tgsi_opcode opcode, - const int s[TGSI_QUAD_SIZE], - float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE], - float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]) +static void * +sp_tgsi_ssbo_lookup(const struct tgsi_buffer *buffer, + uint32_t unit, + uint32_t *size) { struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer; - struct pipe_shader_buffer *bview; - struct softpipe_resource *spr; - unsigned width; - int j, c; - unsigned char *data_ptr; - if (params->unit >= PIPE_MAX_SHADER_BUFFERS) - return; - - bview = &sp_buf->sp_bview[params->unit]; - spr = softpipe_resource(bview->buffer); - if (!spr) - goto fail_write_all_zero; - - if (!get_dimensions(bview, spr, &width)) - goto fail_write_all_zero; - - for (j = 0; j < TGSI_QUAD_SIZE; j++) { - int s_coord; - bool just_read = false; - - s_coord = s[j]; - if (s_coord >= width) { - for (c = 0; c < 4; c++) { - rgba[c][j] = 0; - } - continue; - } - - /* just readback the value for atomic if execmask isn't set */ - if (!(params->execmask & (1 << j))) { - just_read = true; - } - - data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord; - /* we should see atomic operations on r32 formats */ - - handle_op_atomic(bview, just_read, data_ptr, j, - opcode, params->writemask, rgba, rgba2); + if (unit >= PIPE_MAX_SHADER_BUFFERS) { + *size = 0; + return NULL; } - return; -fail_write_all_zero: - memset(rgba, 0, TGSI_NUM_CHANNELS * TGSI_QUAD_SIZE * 4); - return; + + struct pipe_shader_buffer *bview = &sp_buf->sp_bview[unit]; + struct softpipe_resource *spr = softpipe_resource(bview->buffer); + if (!spr || !get_dimensions(bview, spr, size)) { + *size = 0; + return NULL; + } + + return (char *)spr->data + bview->buffer_offset; } /* @@ -339,7 +196,7 @@ sp_create_tgsi_buffer(void) buf->base.load = sp_tgsi_load; buf->base.store = sp_tgsi_store; - buf->base.op = sp_tgsi_op; + buf->base.lookup = sp_tgsi_ssbo_lookup; buf->base.get_dims = sp_tgsi_get_dims; return buf; };