r600/shader: add support for load/store/atomic ops on images.
This adds support to the shader assembler for load/store/atomic ops on images which are handled via the RAT operations. Tested-By: Gert Wollny <gw.fossdev@gmail.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
a6b3792843
commit
aa38bf658f
|
@ -969,6 +969,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx)
|
|||
case TGSI_FILE_SAMPLER:
|
||||
case TGSI_FILE_SAMPLER_VIEW:
|
||||
case TGSI_FILE_ADDRESS:
|
||||
case TGSI_FILE_IMAGE:
|
||||
break;
|
||||
|
||||
case TGSI_FILE_HW_ATOMIC:
|
||||
|
@ -3074,6 +3075,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
|||
case PIPE_SHADER_FRAGMENT:
|
||||
shader->two_side = key.ps.color_two_side;
|
||||
shader->atomic_base = key.ps.first_atomic_counter;
|
||||
shader->rat_base = key.ps.nr_cbufs;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -7775,14 +7777,321 @@ static int tgsi_load_gds(struct r600_shader_ctx *ctx)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* this fixes up 1D arrays properly */
|
||||
static int load_index_src(struct r600_shader_ctx *ctx, int src_index, int *idx_gpr)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
int r, i;
|
||||
struct r600_bytecode_alu alu;
|
||||
int temp_reg = r600_get_temp(ctx);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
bool def_val = true, write_zero = false;
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
alu.dst.sel = temp_reg;
|
||||
alu.dst.chan = i;
|
||||
|
||||
switch (inst->Memory.Texture) {
|
||||
case TGSI_TEXTURE_BUFFER:
|
||||
case TGSI_TEXTURE_1D:
|
||||
if (i == 1 || i == 2 || i == 3) {
|
||||
write_zero = true;
|
||||
}
|
||||
break;
|
||||
case TGSI_TEXTURE_1D_ARRAY:
|
||||
if (i == 1 || i == 3)
|
||||
write_zero = true;
|
||||
else if (i == 2) {
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[src_index], 1);
|
||||
def_val = false;
|
||||
}
|
||||
break;
|
||||
case TGSI_TEXTURE_2D:
|
||||
if (i == 2 || i == 3)
|
||||
write_zero = true;
|
||||
break;
|
||||
default:
|
||||
if (i == 3)
|
||||
write_zero = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (write_zero) {
|
||||
alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
|
||||
alu.src[0].value = 0;
|
||||
} else if (def_val) {
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[src_index], i);
|
||||
}
|
||||
|
||||
if (i == 3)
|
||||
alu.last = 1;
|
||||
alu.dst.write = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
*idx_gpr = temp_reg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tgsi_load_rat(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
/* have to work out the offset into the RAT immediate return buffer */
|
||||
struct r600_bytecode_vtx vtx;
|
||||
struct r600_bytecode_cf *cf;
|
||||
int r;
|
||||
int idx_gpr;
|
||||
unsigned format, num_format, format_comp, endian;
|
||||
const struct util_format_description *desc;
|
||||
unsigned rat_index_mode;
|
||||
unsigned immed_base;
|
||||
|
||||
rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
|
||||
|
||||
immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
|
||||
r = load_index_src(ctx, 1, &idx_gpr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (rat_index_mode)
|
||||
egcm_load_index_reg(ctx->bc, 1, false);
|
||||
|
||||
r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
|
||||
cf = ctx->bc->cf_last;
|
||||
|
||||
cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index;
|
||||
cf->rat.inst = V_RAT_INST_NOP_RTN;
|
||||
cf->rat.index_mode = rat_index_mode;
|
||||
cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
|
||||
cf->output.gpr = ctx->thread_id_gpr;
|
||||
cf->output.index_gpr = idx_gpr;
|
||||
cf->output.comp_mask = 0xf;
|
||||
cf->output.burst_count = 1;
|
||||
cf->vpm = 1;
|
||||
cf->barrier = 1;
|
||||
cf->mark = 1;
|
||||
cf->output.elem_size = 0;
|
||||
|
||||
r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
|
||||
cf = ctx->bc->cf_last;
|
||||
cf->barrier = 1;
|
||||
|
||||
desc = util_format_description(inst->Memory.Format);
|
||||
r600_vertex_data_type(inst->Memory.Format,
|
||||
&format, &num_format, &format_comp, &endian);
|
||||
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
|
||||
vtx.op = FETCH_OP_VFETCH;
|
||||
vtx.buffer_id = immed_base + inst->Src[0].Register.Index;
|
||||
vtx.buffer_index_mode = rat_index_mode;
|
||||
vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
|
||||
vtx.src_gpr = ctx->thread_id_gpr;
|
||||
vtx.src_sel_x = 1;
|
||||
vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
|
||||
vtx.dst_sel_x = desc->swizzle[0];
|
||||
vtx.dst_sel_y = desc->swizzle[1];
|
||||
vtx.dst_sel_z = desc->swizzle[2];
|
||||
vtx.dst_sel_w = desc->swizzle[3];
|
||||
vtx.srf_mode_all = 1;
|
||||
vtx.data_format = format;
|
||||
vtx.num_format_all = num_format;
|
||||
vtx.format_comp_all = format_comp;
|
||||
vtx.endian = endian;
|
||||
vtx.offset = 0;
|
||||
vtx.mega_fetch_count = 3;
|
||||
r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
|
||||
if (r)
|
||||
return r;
|
||||
cf = ctx->bc->cf_last;
|
||||
cf->barrier = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tgsi_load(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
|
||||
return tgsi_load_rat(ctx);
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
|
||||
return tgsi_load_gds(ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tgsi_store_rat(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
struct r600_bytecode_cf *cf;
|
||||
bool src_requires_loading = false;
|
||||
int val_gpr, idx_gpr;
|
||||
int r, i;
|
||||
unsigned rat_index_mode;
|
||||
|
||||
rat_index_mode = inst->Dst[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
|
||||
|
||||
r = load_index_src(ctx, 0, &idx_gpr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (inst->Src[1].Register.File != TGSI_FILE_TEMPORARY)
|
||||
src_requires_loading = true;
|
||||
|
||||
if (src_requires_loading) {
|
||||
struct r600_bytecode_alu alu;
|
||||
for (i = 0; i < 4; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
alu.dst.sel = ctx->temp_reg;
|
||||
alu.dst.chan = i;
|
||||
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[1], i);
|
||||
if (i == 3)
|
||||
alu.last = 1;
|
||||
alu.dst.write = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
val_gpr = ctx->temp_reg;
|
||||
} else
|
||||
val_gpr = tgsi_tex_get_src_gpr(ctx, 1);
|
||||
if (rat_index_mode)
|
||||
egcm_load_index_reg(ctx->bc, 1, false);
|
||||
|
||||
r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
|
||||
cf = ctx->bc->cf_last;
|
||||
|
||||
cf->rat.id = ctx->shader->rat_base + inst->Dst[0].Register.Index;
|
||||
cf->rat.inst = V_RAT_INST_STORE_TYPED;
|
||||
cf->rat.index_mode = rat_index_mode;
|
||||
cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
|
||||
cf->output.gpr = val_gpr;
|
||||
cf->output.index_gpr = idx_gpr;
|
||||
cf->output.comp_mask = 0xf;
|
||||
cf->output.burst_count = 1;
|
||||
cf->vpm = 1;
|
||||
cf->barrier = 1;
|
||||
cf->output.elem_size = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tgsi_store(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
return tgsi_store_rat(ctx);
|
||||
}
|
||||
|
||||
static int tgsi_atomic_op_rat(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
/* have to work out the offset into the RAT immediate return buffer */
|
||||
struct r600_bytecode_alu alu;
|
||||
struct r600_bytecode_vtx vtx;
|
||||
struct r600_bytecode_cf *cf;
|
||||
int r;
|
||||
int idx_gpr;
|
||||
unsigned format, num_format, format_comp, endian;
|
||||
const struct util_format_description *desc;
|
||||
unsigned rat_index_mode;
|
||||
unsigned immed_base;
|
||||
|
||||
immed_base = R600_IMAGE_IMMED_RESOURCE_OFFSET;
|
||||
|
||||
assert (inst->Src[0].Register.File == TGSI_FILE_IMAGE);
|
||||
rat_index_mode = inst->Src[0].Indirect.Index == 2 ? 2 : 0; // CF_INDEX_1 : CF_INDEX_NONE
|
||||
|
||||
r = load_index_src(ctx, 1, &idx_gpr);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (ctx->inst_info->op == V_RAT_INST_CMPXCHG_INT_RTN) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
alu.dst.sel = ctx->thread_id_gpr;
|
||||
alu.dst.chan = 0;
|
||||
alu.dst.write = 1;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[3], 0);
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
alu.dst.sel = ctx->thread_id_gpr;
|
||||
alu.dst.chan = 3;
|
||||
alu.dst.write = 1;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
} else {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
alu.dst.sel = ctx->thread_id_gpr;
|
||||
alu.dst.chan = 0;
|
||||
alu.dst.write = 1;
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[2], 0);
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
if (rat_index_mode)
|
||||
egcm_load_index_reg(ctx->bc, 1, false);
|
||||
r600_bytecode_add_cfinst(ctx->bc, CF_OP_MEM_RAT);
|
||||
cf = ctx->bc->cf_last;
|
||||
|
||||
cf->rat.id = ctx->shader->rat_base + inst->Src[0].Register.Index;
|
||||
cf->rat.inst = ctx->inst_info->op;
|
||||
cf->rat.index_mode = rat_index_mode;
|
||||
cf->output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND;
|
||||
cf->output.gpr = ctx->thread_id_gpr;
|
||||
cf->output.index_gpr = idx_gpr;
|
||||
cf->output.comp_mask = 0xf;
|
||||
cf->output.burst_count = 1;
|
||||
cf->vpm = 1;
|
||||
cf->barrier = 1;
|
||||
cf->mark = 1;
|
||||
cf->output.elem_size = 0;
|
||||
r600_bytecode_add_cfinst(ctx->bc, CF_OP_WAIT_ACK);
|
||||
cf = ctx->bc->cf_last;
|
||||
cf->barrier = 1;
|
||||
cf->cf_addr = 1;
|
||||
|
||||
desc = util_format_description(inst->Memory.Format);
|
||||
r600_vertex_data_type(inst->Memory.Format,
|
||||
&format, &num_format, &format_comp, &endian);
|
||||
memset(&vtx, 0, sizeof(struct r600_bytecode_vtx));
|
||||
vtx.op = FETCH_OP_VFETCH;
|
||||
vtx.buffer_id = immed_base + inst->Src[0].Register.Index;
|
||||
vtx.buffer_index_mode = rat_index_mode;
|
||||
vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
|
||||
vtx.src_gpr = ctx->thread_id_gpr;
|
||||
vtx.src_sel_x = 1;
|
||||
vtx.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + inst->Dst[0].Register.Index;
|
||||
vtx.dst_sel_x = desc->swizzle[0];
|
||||
vtx.dst_sel_y = 7;
|
||||
vtx.dst_sel_z = 7;
|
||||
vtx.dst_sel_w = 7;
|
||||
vtx.use_const_fields = 0;
|
||||
vtx.srf_mode_all = 1;
|
||||
vtx.data_format = format;
|
||||
vtx.num_format_all = num_format;
|
||||
vtx.format_comp_all = format_comp;
|
||||
vtx.endian = endian;
|
||||
vtx.offset = 0;
|
||||
vtx.mega_fetch_count = 0xf;
|
||||
r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx);
|
||||
if (r)
|
||||
return r;
|
||||
cf = ctx->bc->cf_last;
|
||||
cf->vpm = 1;
|
||||
cf->barrier = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_gds_op(int opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
|
@ -7881,6 +8190,8 @@ static int tgsi_atomic_op_gds(struct r600_shader_ctx *ctx)
|
|||
static int tgsi_atomic_op(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
|
||||
return tgsi_atomic_op_rat(ctx);
|
||||
if (inst->Src[0].Register.File == TGSI_FILE_HW_ATOMIC)
|
||||
return tgsi_atomic_op_gds(ctx);
|
||||
return 0;
|
||||
|
@ -9540,7 +9851,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
|
|||
[TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2},
|
||||
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
|
||||
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
|
||||
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
|
||||
[113] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[114] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[115] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
|
@ -9590,7 +9901,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
|
|||
[TGSI_OPCODE_IABS] = { 0, tgsi_iabs},
|
||||
[TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
|
||||
[TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load},
|
||||
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_store},
|
||||
[163] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[164] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[165] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
|
@ -9763,7 +10074,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
|
|||
[TGSI_OPCODE_FSGE] = { ALU_OP2_SETGE_DX10, tgsi_op2},
|
||||
[TGSI_OPCODE_FSLT] = { ALU_OP2_SETGT_DX10, tgsi_op2_swap},
|
||||
[TGSI_OPCODE_FSNE] = { ALU_OP2_SETNE_DX10, tgsi_op2_swap},
|
||||
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_MEMBAR] = { ALU_OP0_GROUP_BARRIER, tgsi_barrier},
|
||||
[113] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[114] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[115] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
|
@ -9813,7 +10124,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
|
|||
[TGSI_OPCODE_IABS] = { 0, tgsi_iabs},
|
||||
[TGSI_OPCODE_ISSG] = { 0, tgsi_issg},
|
||||
[TGSI_OPCODE_LOAD] = { ALU_OP0_NOP, tgsi_load},
|
||||
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[TGSI_OPCODE_STORE] = { ALU_OP0_NOP, tgsi_store},
|
||||
[163] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[164] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
[165] = { ALU_OP0_NOP, tgsi_unsupported},
|
||||
|
|
Loading…
Reference in New Issue