gallium/tgsi_exec: Reuse the atomic helper for SSBO atomics.
I needed to refactor SSBOs to deal with a buffer overflow, and it's easier to just delete the SSBO atomic code from the interface. Reviewed-by: Dave Airlie <airlied@redhat.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8276>
This commit is contained in:
parent
85be1f867c
commit
89ec36457e
|
@ -4230,67 +4230,8 @@ exec_atomop_img(struct tgsi_exec_machine *mach,
|
|||
}
|
||||
|
||||
static void
|
||||
exec_atomop_buf(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
union tgsi_exec_channel r[4];
|
||||
union tgsi_exec_channel value[4], value2[4];
|
||||
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
|
||||
float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
|
||||
struct tgsi_buffer_params params;
|
||||
int i, j;
|
||||
uint unit, chan;
|
||||
int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
|
||||
|
||||
unit = fetch_sampler_unit(mach, inst, 0);
|
||||
|
||||
params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
|
||||
params.unit = unit;
|
||||
params.writemask = inst->Dst[0].Register.WriteMask;
|
||||
|
||||
IFETCH(&r[0], 1, TGSI_CHAN_X);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
FETCH(&value[i], 2, TGSI_CHAN_X + i);
|
||||
if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
|
||||
FETCH(&value2[i], 3, TGSI_CHAN_X + i);
|
||||
}
|
||||
|
||||
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
|
||||
rgba[0][j] = value[0].f[j];
|
||||
rgba[1][j] = value[1].f[j];
|
||||
rgba[2][j] = value[2].f[j];
|
||||
rgba[3][j] = value[3].f[j];
|
||||
}
|
||||
if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
|
||||
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
|
||||
rgba2[0][j] = value2[0].f[j];
|
||||
rgba2[1][j] = value2[1].f[j];
|
||||
rgba2[2][j] = value2[2].f[j];
|
||||
rgba2[3][j] = value2[3].f[j];
|
||||
}
|
||||
}
|
||||
|
||||
mach->Buffer->op(mach->Buffer, ¶ms, inst->Instruction.Opcode,
|
||||
r[0].i,
|
||||
rgba, rgba2);
|
||||
|
||||
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
|
||||
r[0].f[j] = rgba[0][j];
|
||||
r[1].f[j] = rgba[1][j];
|
||||
r[2].f[j] = rgba[2][j];
|
||||
r[3].f[j] = rgba[3][j];
|
||||
}
|
||||
for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
|
||||
if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
|
||||
store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
exec_atomop_mem(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
exec_atomop_membuf(struct tgsi_exec_machine *mach,
|
||||
const struct tgsi_full_instruction *inst)
|
||||
{
|
||||
union tgsi_exec_channel offset, r0, r1;
|
||||
uint chan, i;
|
||||
|
@ -4302,11 +4243,25 @@ exec_atomop_mem(struct tgsi_exec_machine *mach,
|
|||
return;
|
||||
|
||||
void *ptr[TGSI_QUAD_SIZE];
|
||||
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
|
||||
if (likely(mach->LocalMemSize >= 4 && offset.u[i] <= mach->LocalMemSize - 4))
|
||||
ptr[i] = (char *)mach->LocalMem + offset.u[i];
|
||||
else
|
||||
ptr[i] = NULL;
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
|
||||
uint32_t unit = fetch_sampler_unit(mach, inst, 0);
|
||||
uint32_t size;
|
||||
char *buffer = mach->Buffer->lookup(mach->Buffer, unit, &size);
|
||||
for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
|
||||
if (likely(size >= 4 && offset.u[i] <= size - 4))
|
||||
ptr[i] = buffer + offset.u[i];
|
||||
else
|
||||
ptr[i] = NULL;
|
||||
}
|
||||
} else {
|
||||
assert(inst->Src[0].Register.File == TGSI_FILE_MEMORY);
|
||||
|
||||
for (i = 0; i < TGSI_QUAD_SIZE; i++) {
|
||||
if (likely(mach->LocalMemSize >= 4 && offset.u[i] <= mach->LocalMemSize - 4))
|
||||
ptr[i] = (char *)mach->LocalMem + offset.u[i];
|
||||
else
|
||||
ptr[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
FETCH(&r0, 2, TGSI_CHAN_X);
|
||||
|
@ -4381,10 +4336,8 @@ exec_atomop(struct tgsi_exec_machine *mach,
|
|||
{
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
|
||||
exec_atomop_img(mach, inst);
|
||||
else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
|
||||
exec_atomop_buf(mach, inst);
|
||||
else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
|
||||
exec_atomop_mem(mach, inst);
|
||||
else
|
||||
exec_atomop_membuf(mach, inst);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -165,12 +165,8 @@ struct tgsi_buffer {
|
|||
const int s[TGSI_QUAD_SIZE],
|
||||
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
|
||||
|
||||
void (*op)(const struct tgsi_buffer *buffer,
|
||||
const struct tgsi_buffer_params *params,
|
||||
enum tgsi_opcode opcode,
|
||||
const int s[TGSI_QUAD_SIZE],
|
||||
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
|
||||
float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
|
||||
void *(*lookup)(const struct tgsi_buffer *buffer,
|
||||
uint32_t unit, uint32_t *size);
|
||||
|
||||
void (*get_dims)(const struct tgsi_buffer *buffer,
|
||||
const struct tgsi_buffer_params *params,
|
||||
|
|
|
@ -142,169 +142,26 @@ sp_tgsi_store(const struct tgsi_buffer *buffer,
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Implement atomic operations on unsigned integers.
|
||||
*/
|
||||
static void
|
||||
handle_op_atomic(const struct pipe_shader_buffer *bview,
|
||||
bool just_read,
|
||||
unsigned char *data_ptr,
|
||||
uint qi,
|
||||
enum tgsi_opcode opcode,
|
||||
unsigned writemask,
|
||||
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
|
||||
float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
|
||||
{
|
||||
uint c = 0; /* SSBO atomics are always on the .x channel. */
|
||||
unsigned sdata;
|
||||
|
||||
memcpy(&sdata, data_ptr + (c * 4), 4);
|
||||
|
||||
if (just_read) {
|
||||
((uint32_t *)rgba[c])[qi] = sdata;
|
||||
return;
|
||||
}
|
||||
|
||||
switch (opcode) {
|
||||
case TGSI_OPCODE_ATOMUADD: {
|
||||
unsigned temp = sdata;
|
||||
sdata += ((uint32_t *)rgba[c])[qi];
|
||||
((uint32_t *)rgba[c])[qi] = temp;
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_ATOMXCHG: {
|
||||
unsigned temp = sdata;
|
||||
sdata = ((uint32_t *)rgba[c])[qi];
|
||||
((uint32_t *)rgba[c])[qi] = temp;
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_ATOMCAS: {
|
||||
unsigned dst_x = sdata;
|
||||
unsigned cmp_x = ((uint32_t *)rgba[c])[qi];
|
||||
unsigned src_x = ((uint32_t *)rgba2[c])[qi];
|
||||
unsigned temp = sdata;
|
||||
sdata = (dst_x == cmp_x) ? src_x : dst_x;
|
||||
((uint32_t *)rgba[c])[qi] = temp;
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_ATOMAND: {
|
||||
unsigned temp = sdata;
|
||||
sdata &= ((uint32_t *)rgba[c])[qi];
|
||||
((uint32_t *)rgba[c])[qi] = temp;
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_ATOMOR: {
|
||||
unsigned temp = sdata;
|
||||
sdata |= ((uint32_t *)rgba[c])[qi];
|
||||
((uint32_t *)rgba[c])[qi] = temp;
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_ATOMXOR: {
|
||||
unsigned temp = sdata;
|
||||
sdata ^= ((uint32_t *)rgba[c])[qi];
|
||||
((uint32_t *)rgba[c])[qi] = temp;
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_ATOMUMIN: {
|
||||
unsigned dst_x = sdata;
|
||||
unsigned src_x = ((uint32_t *)rgba[c])[qi];
|
||||
sdata = MIN2(dst_x, src_x);
|
||||
((uint32_t *)rgba[c])[qi] = dst_x;
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_ATOMUMAX: {
|
||||
unsigned dst_x = sdata;
|
||||
unsigned src_x = ((uint32_t *)rgba[c])[qi];
|
||||
sdata = MAX2(dst_x, src_x);
|
||||
((uint32_t *)rgba[c])[qi] = dst_x;
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_ATOMIMIN: {
|
||||
int dst_x = sdata;
|
||||
int src_x = ((uint32_t *)rgba[c])[qi];
|
||||
sdata = MIN2(dst_x, src_x);
|
||||
((uint32_t *)rgba[c])[qi] = dst_x;
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_ATOMIMAX: {
|
||||
int dst_x = sdata;
|
||||
int src_x = ((uint32_t *)rgba[c])[qi];
|
||||
sdata = MAX2(dst_x, src_x);
|
||||
((uint32_t *)rgba[c])[qi] = dst_x;
|
||||
break;
|
||||
}
|
||||
case TGSI_OPCODE_ATOMFADD: {
|
||||
float temp = uif(sdata);
|
||||
sdata = fui(temp + rgba[c][qi]);
|
||||
rgba[c][qi] = temp;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
assert(!"Unexpected TGSI opcode in sp_tgsi_op");
|
||||
break;
|
||||
}
|
||||
|
||||
if (writemask & TGSI_WRITEMASK_X) {
|
||||
memcpy(data_ptr + (c * 4), &sdata, 4);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Implement atomic buffer operations.
|
||||
*/
|
||||
static void
|
||||
sp_tgsi_op(const struct tgsi_buffer *buffer,
|
||||
const struct tgsi_buffer_params *params,
|
||||
enum tgsi_opcode opcode,
|
||||
const int s[TGSI_QUAD_SIZE],
|
||||
float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
|
||||
float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
|
||||
static void *
|
||||
sp_tgsi_ssbo_lookup(const struct tgsi_buffer *buffer,
|
||||
uint32_t unit,
|
||||
uint32_t *size)
|
||||
{
|
||||
struct sp_tgsi_buffer *sp_buf = (struct sp_tgsi_buffer *)buffer;
|
||||
struct pipe_shader_buffer *bview;
|
||||
struct softpipe_resource *spr;
|
||||
unsigned width;
|
||||
int j, c;
|
||||
unsigned char *data_ptr;
|
||||
|
||||
if (params->unit >= PIPE_MAX_SHADER_BUFFERS)
|
||||
return;
|
||||
|
||||
bview = &sp_buf->sp_bview[params->unit];
|
||||
spr = softpipe_resource(bview->buffer);
|
||||
if (!spr)
|
||||
goto fail_write_all_zero;
|
||||
|
||||
if (!get_dimensions(bview, spr, &width))
|
||||
goto fail_write_all_zero;
|
||||
|
||||
for (j = 0; j < TGSI_QUAD_SIZE; j++) {
|
||||
int s_coord;
|
||||
bool just_read = false;
|
||||
|
||||
s_coord = s[j];
|
||||
if (s_coord >= width) {
|
||||
for (c = 0; c < 4; c++) {
|
||||
rgba[c][j] = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
/* just readback the value for atomic if execmask isn't set */
|
||||
if (!(params->execmask & (1 << j))) {
|
||||
just_read = true;
|
||||
}
|
||||
|
||||
data_ptr = (unsigned char *)spr->data + bview->buffer_offset + s_coord;
|
||||
/* we should see atomic operations on r32 formats */
|
||||
|
||||
handle_op_atomic(bview, just_read, data_ptr, j,
|
||||
opcode, params->writemask, rgba, rgba2);
|
||||
if (unit >= PIPE_MAX_SHADER_BUFFERS) {
|
||||
*size = 0;
|
||||
return NULL;
|
||||
}
|
||||
return;
|
||||
fail_write_all_zero:
|
||||
memset(rgba, 0, TGSI_NUM_CHANNELS * TGSI_QUAD_SIZE * 4);
|
||||
return;
|
||||
|
||||
struct pipe_shader_buffer *bview = &sp_buf->sp_bview[unit];
|
||||
struct softpipe_resource *spr = softpipe_resource(bview->buffer);
|
||||
if (!spr || !get_dimensions(bview, spr, size)) {
|
||||
*size = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return (char *)spr->data + bview->buffer_offset;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -339,7 +196,7 @@ sp_create_tgsi_buffer(void)
|
|||
|
||||
buf->base.load = sp_tgsi_load;
|
||||
buf->base.store = sp_tgsi_store;
|
||||
buf->base.op = sp_tgsi_op;
|
||||
buf->base.lookup = sp_tgsi_ssbo_lookup;
|
||||
buf->base.get_dims = sp_tgsi_get_dims;
|
||||
return buf;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue