r600/shader: add support for load/store/atomic ops on images.

This adds support to the shader assembler for load/store/atomic
ops on images which are handled via the RAT operations.

Tested-By: Gert Wollny <gw.fossdev@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Dave Airlie 2017-11-15 10:01:56 +10:00
parent a6b3792843
commit aa38bf658f
1 changed files with 315 additions and 4 deletions

View File

@ -969,6 +969,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
case TGSI_FILE_SAMPLER:
case TGSI_FILE_SAMPLER_VIEW:
case TGSI_FILE_ADDRESS:
case TGSI_FILE_IMAGE:
break;
case TGSI_FILE_HW_ATOMIC:
@ -3074,6 +3075,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
case PIPE_SHADER_FRAGMENT:
shader->two_side = key.ps.color_two_side;
shader->atomic_base = key.ps.first_atomic_counter;
shader->rat_base = key.ps.nr_cbufs;
break;
default:
break;
@ -7775,14 +7777,321 @@ static int tgsi_load_gds(struct r600_shader_ctx *ctx)
return 0;
}
/* this fixes up 1D arrays properly */
static int load_index_src(struct r600_shader_ctx *ctx, int src_index, int *idx_gpr)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
int r, i;
struct r600_bytecode_alu alu;
int temp_reg = r600_get_temp(ctx);
for (i = 0; i < 4; i++) {
bool def_val = true, write_zero = false;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
alu.dst.sel = temp_reg;
alu.dst.chan = i;
switch (inst->Memory.Texture) {
case TGSI_TEXTURE_BUFFER:
case TGSI_TEXTURE_1D:
if (i == 1 || i == 2 || i == 3) {
write_zero = true;
}
break;
case TGSI_TEXTURE_1D_ARRAY:
if (i == 1 || i == 3)
write_zero = true;
else if (i == 2) {
r600_bytecode_src(&alu.src[0], &ctx->src[src_index], 1);
def_val = false;
}
break;
case TGSI_TEXTURE_2D:
if (i == 2 || i == 3)
write_zero = true;
break;
default:
if (i == 3)
write_zero = true;
break;
}
if (write_zero) {
alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
alu.src[0].value = 0;
} else if (def_val) {
r600_bytecode_src(&alu.src[0], &ctx->src[src_index], i);
}
if (i == 3)
alu.last = 1;
alu.dst.write = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
*idx_gpr = temp_reg;
return 0;
}
static int tgsi_load_rat(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
/* have to work out the offset into the RAT immediate return buffer */
struct r600_bytecode_vtx vtx;
struct r600_bytecode_cf *cf;
int r;
int idx_gpr;
unsigned format, num_format, format_comp, endian;
const struct util_format_description *desc;
unsigned rat_index_mode;
unsigned immed_base;
rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
r = load_index_src(ctx, 1, &idx_gpr);
if (r)
return r;
if (rat_index_mode)
egcm_load_index_reg(ctx->bc, 1, false);
r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
cf = ctx->bc->cf_last;
cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index;
cf->rat.inst = V_RAT_INST_NOP_RTN;
cf->rat.index_mode = rat_index_mode;
cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
cf->output.gpr = ctx->thread_id_gpr;
cf->output.index_gpr = idx_gpr;
cf->output.comp_mask = 0xf;
cf->output.burst_count = 1;
cf->vpm = 1;
cf->barrier = 1;
cf->mark = 1;
cf->output.elem_size = 0;
r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
cf = ctx->bc->cf_last;
cf->barrier = 1;
desc = util_format_description(inst->Memory.Format);
r600_vertex_data_type(inst->Memory.Format,
&format, &num_format, &format_comp, &endian);
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
vtx.op = FETCH_OP_VFETCH;
vtx.buffer_id = immed_base + inst->Src[0].Register.Index;
vtx.buffer_index_mode = rat_index_mode;
vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
vtx.src_gpr = ctx->thread_id_gpr;
vtx.src_sel_x = 1;
vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
vtx.dst_sel_x = desc->swizzle[0];
vtx.dst_sel_y = desc->swizzle[1];
vtx.dst_sel_z = desc->swizzle[2];
vtx.dst_sel_w = desc->swizzle[3];
vtx.srf_mode_all = 1;
vtx.data_format = format;
vtx.num_format_all = num_format;
vtx.format_comp_all = format_comp;
vtx.endian = endian;
vtx.offset = 0;
vtx.mega_fetch_count = 3;
r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
if (r)
return r;
cf = ctx->bc->cf_last;
cf->barrier = 1;
return 0;
}
static int tgsi_load(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
return tgsi_load_rat(ctx);
if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
return tgsi_load_gds(ctx);
return 0;
}
static int tgsi_store_rat(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
struct r600_bytecode_cf *cf;
bool src_requires_loading = false;
int val_gpr, idx_gpr;
int r, i;
unsigned rat_index_mode;
rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
r = load_index_src(ctx, 0, &idx_gpr);
if (r)
return r;
if (inst->Src[1].Register.File != TGSI_FILE_TEMPORARY)
src_requires_loading = true;
if (src_requires_loading) {
struct r600_bytecode_alu alu;
for (i = 0; i < 4; i++) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
alu.dst.sel = ctx->temp_reg;
alu.dst.chan = i;
r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
if (i == 3)
alu.last = 1;
alu.dst.write = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
val_gpr = ctx->temp_reg;
} else
val_gpr = tgsi_tex_get_src_gpr(ctx, 1);
if (rat_index_mode)
egcm_load_index_reg(ctx->bc, 1, false);
r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
cf = ctx->bc->cf_last;
cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index;
cf->rat.inst = V_RAT_INST_STORE_TYPED;
cf->rat.index_mode = rat_index_mode;
cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
cf->output.gpr = val_gpr;
cf->output.index_gpr = idx_gpr;
cf->output.comp_mask = 0xf;
cf->output.burst_count = 1;
cf->vpm = 1;
cf->barrier = 1;
cf->output.elem_size = 0;
return 0;
}
static int tgsi_store(struct r600_shader_ctx *ctx)
{
return tgsi_store_rat(ctx);
}
static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
/* have to work out the offset into the RAT immediate return buffer */
struct r600_bytecode_alu alu;
struct r600_bytecode_vtx vtx;
struct r600_bytecode_cf *cf;
int r;
int idx_gpr;
unsigned format, num_format, format_comp, endian;
const struct util_format_description *desc;
unsigned rat_index_mode;
unsigned immed_base;
immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
assert (inst->Src[0].Register.File == TGSI_FILE_IMAGE);
rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
r = load_index_src(ctx, 1, &idx_gpr);
if (r)
return r;
if (ctx->inst_info->op == V_RAT_INST_CMPXCHG_INT_RTN) {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
alu.dst.sel = ctx->thread_id_gpr;
alu.dst.chan = 0;
alu.dst.write = 1;
r600_bytecode_src(&alu.src[0], &ctx->src[3], 0);
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
alu.dst.sel = ctx->thread_id_gpr;
alu.dst.chan = 3;
alu.dst.write = 1;
r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
} else {
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
alu.op = ALU_OP1_MOV;
alu.dst.sel = ctx->thread_id_gpr;
alu.dst.chan = 0;
alu.dst.write = 1;
r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
alu.last = 1;
r = r600_bytecode_add_alu(ctx->bc, &alu);
if (r)
return r;
}
if (rat_index_mode)
egcm_load_index_reg(ctx->bc, 1, false);
r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
cf = ctx->bc->cf_last;
cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index;
cf->rat.inst = ctx->inst_info->op;
cf->rat.index_mode = rat_index_mode;
cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
cf->output.gpr = ctx->thread_id_gpr;
cf->output.index_gpr = idx_gpr;
cf->output.comp_mask = 0xf;
cf->output.burst_count = 1;
cf->vpm = 1;
cf->barrier = 1;
cf->mark = 1;
cf->output.elem_size = 0;
r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
cf = ctx->bc->cf_last;
cf->barrier = 1;
cf->cf_addr = 1;
desc = util_format_description(inst->Memory.Format);
r600_vertex_data_type(inst->Memory.Format,
&format, &num_format, &format_comp, &endian);
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
vtx.op = FETCH_OP_VFETCH;
vtx.buffer_id = immed_base + inst->Src[0].Register.Index;
vtx.buffer_index_mode = rat_index_mode;
vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
vtx.src_gpr = ctx->thread_id_gpr;
vtx.src_sel_x = 1;
vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
vtx.dst_sel_x = desc->swizzle[0];
vtx.dst_sel_y = 7;
vtx.dst_sel_z = 7;
vtx.dst_sel_w = 7;
vtx.use_const_fields = 0;
vtx.srf_mode_all = 1;
vtx.data_format = format;
vtx.num_format_all = num_format;
vtx.format_comp_all = format_comp;
vtx.endian = endian;
vtx.offset = 0;
vtx.mega_fetch_count = 0xf;
r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
if (r)
return r;
cf = ctx->bc->cf_last;
cf->vpm = 1;
cf->barrier = 1;
return 0;
}
static int get_gds_op(int opcode)
{
switch (opcode) {
@ -7881,6 +8190,8 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx)
static int tgsi_atomic_op(struct r600_shader_ctx *ctx)
{
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
return tgsi_atomic_op_rat(ctx);
if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
return tgsi_atomic_op_gds(ctx);
return 0;
@ -9540,7 +9851,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2},
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
[113] = { ALU_OP0_NOP, tgsi_unsupported},
[114] = { ALU_OP0_NOP, tgsi_unsupported},
[115] = { ALU_OP0_NOP, tgsi_unsupported},
@ -9590,7 +9901,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
[TGSI_OPCODE_IABS] = { 0, tgsi_iabs},
[TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
[TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load},
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_store},
[163] = { ALU_OP0_NOP, tgsi_unsupported},
[164] = { ALU_OP0_NOP, tgsi_unsupported},
[165] = { ALU_OP0_NOP, tgsi_unsupported},
@ -9763,7 +10074,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2},
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
[113] = { ALU_OP0_NOP, tgsi_unsupported},
[114] = { ALU_OP0_NOP, tgsi_unsupported},
[115] = { ALU_OP0_NOP, tgsi_unsupported},
@ -9813,7 +10124,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
[TGSI_OPCODE_IABS] = { 0, tgsi_iabs},
[TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
[TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load},
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_store},
[163] = { ALU_OP0_NOP, tgsi_unsupported},
[164] = { ALU_OP0_NOP, tgsi_unsupported},
[165] = { ALU_OP0_NOP, tgsi_unsupported},