aco: reorder VMEM operands in ACO IR

For all VMEM instructions, the resource constant is now
in operands[0]. For MIMG instructions, the sampler shares
operands[1] with write data in case this instruction writes memory.
Moving the VADDR to be the last operand for MIMG is the first step to
support Navi NSA encoding.

Reviewed-by: Rhys Perry <pendingchaos02@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3602>
This commit is contained in:
Daniel Schürmann 2020-01-16 16:54:35 +01:00 committed by Marge Bot
parent 8548fe19f0
commit 71440ba0f5
11 changed files with 153 additions and 132 deletions

View File

@ -329,10 +329,10 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
}
encoding |= instr->operands[2].physReg() << 24;
encoding |= (mubuf->tfe ? 1 : 0) << 23;
encoding |= (instr->operands[1].physReg() >> 2) << 16;
encoding |= (instr->operands[0].physReg() >> 2) << 16;
unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg();
encoding |= (0xFF & reg) << 8;
encoding |= (0xFF & instr->operands[0].physReg());
encoding |= (0xFF & instr->operands[1].physReg());
out.push_back(encoding);
break;
}
@ -362,10 +362,10 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
encoding |= instr->operands[2].physReg() << 24;
encoding |= (mtbuf->tfe ? 1 : 0) << 23;
encoding |= (mtbuf->slc ? 1 : 0) << 22;
encoding |= (instr->operands[1].physReg() >> 2) << 16;
encoding |= (instr->operands[0].physReg() >> 2) << 16;
unsigned reg = instr->operands.size() > 3 ? instr->operands[3].physReg() : instr->definitions[0].physReg();
encoding |= (0xFF & reg) << 8;
encoding |= (0xFF & instr->operands[0].physReg());
encoding |= (0xFF & instr->operands[1].physReg());
if (ctx.chip_class >= GFX10) {
encoding |= (((opcode & 0x08) >> 4) << 21); /* MSB of 4-bit OPCODE */
@ -395,15 +395,15 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
}
encoding |= (0xF & mimg->dmask) << 8;
out.push_back(encoding);
encoding = (0xFF & instr->operands[0].physReg()); /* VADDR */
encoding = (0xFF & instr->operands[2].physReg()); /* VADDR */
if (!instr->definitions.empty()) {
encoding |= (0xFF & instr->definitions[0].physReg()) << 8; /* VDATA */
} else if (instr->operands.size() == 4) {
encoding |= (0xFF & instr->operands[3].physReg()) << 8; /* VDATA */
} else if (instr->operands[1].regClass().type() == RegType::vgpr) {
encoding |= (0xFF & instr->operands[1].physReg()) << 8; /* VDATA */
}
encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 16; /* T# (resource) */
if (instr->operands.size() > 2)
encoding |= (0x1F & (instr->operands[2].physReg() >> 2)) << 21; /* sampler */
encoding |= (0x1F & (instr->operands[0].physReg() >> 2)) << 16; /* T# (resource) */
if (instr->operands[1].regClass().type() == RegType::sgpr)
encoding |= (0x1F & (instr->operands[1].physReg() >> 2)) << 21; /* sampler */
assert(!mimg->d16 || ctx.chip_class >= GFX9);
encoding |= mimg->d16 ? 1 << 15 : 0;

View File

@ -473,7 +473,7 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod
("ds", [Format.DS], 'DS_instruction', [(1, 1), (1, 2), (0, 3), (0, 4)]),
("mubuf", [Format.MUBUF], 'MUBUF_instruction', [(0, 4), (1, 3)]),
("mtbuf", [Format.MTBUF], 'MTBUF_instruction', [(0, 4), (1, 3)]),
("mimg", [Format.MIMG], 'MIMG_instruction', [(0, 4), (1, 3), (0, 3), (1, 2)]), #TODO(pendingchaos): less shapes?
("mimg", [Format.MIMG], 'MIMG_instruction', [(0, 3), (1, 3)]),
("exp", [Format.EXP], 'Export_instruction', [(0, 4)]),
("branch", [Format.PSEUDO_BRANCH], 'Pseudo_branch_instruction', itertools.product([0], [0, 1])),
("barrier", [Format.PSEUDO_BARRIER], 'Pseudo_barrier_instruction', [(0, 0)]),

View File

@ -325,9 +325,9 @@ int handle_instruction_gfx8_9(NOP_ctx_gfx8_9& ctx, aco_ptr<Instruction>& instr,
pred->operands[2].physReg() >= 128;
/* MIMG store with a 128-bit T# with more than two bits set in dmask (making it a >64-bit store) */
bool consider_mimg = pred->format == Format::MIMG &&
pred->operands.size() == 4 &&
pred->operands[3].size() > 2 &&
pred->operands[1].size() != 8;
pred->operands[1].regClass().type() == RegType::vgpr &&
pred->operands[1].size() > 2 &&
pred->operands[0].size() == 4;
/* FLAT/GLOBAL/SCRATCH store with >64-bit data */
bool consider_flat = (pred->isFlatOrGlobal() || pred->format == Format::SCRATCH) &&
pred->operands.size() == 3 &&
@ -376,6 +376,7 @@ int handle_instruction_gfx8_9(NOP_ctx_gfx8_9& ctx, aco_ptr<Instruction>& instr,
/* If the VALU writes the SGPR that is used by a VMEM, the user must add five wait states. */
for (int pred_idx = new_idx - 1; pred_idx >= 0 && pred_idx >= new_idx - 5; pred_idx--) {
aco_ptr<Instruction>& pred = new_instructions[pred_idx];
// TODO: break if something else writes the SGPR
if (!(pred->isVALU() && VALU_writes_sgpr(pred)))
continue;
@ -383,16 +384,10 @@ int handle_instruction_gfx8_9(NOP_ctx_gfx8_9& ctx, aco_ptr<Instruction>& instr,
if (def.physReg() > 102)
continue;
if (instr->operands.size() > 1 &&
regs_intersect(instr->operands[1].physReg(), instr->operands[1].size(),
def.physReg(), def.size())) {
for (const Operand& op : instr->operands) {
if (regs_intersect(op.physReg(), op.size(), def.physReg(), def.size()))
return 5 + pred_idx - new_idx + 1;
}
if (instr->operands.size() > 2 &&
regs_intersect(instr->operands[2].physReg(), instr->operands[2].size(),
def.physReg(), def.size())) {
return 5 + pred_idx - new_idx + 1;
}
}
}

View File

@ -689,11 +689,20 @@ void gen(Instruction* instr, wait_ctx& ctx)
if (!instr->definitions.empty())
insert_wait_entry(ctx, instr->definitions[0], ev);
if (instr->operands.size() == 4 && ctx.chip_class == GFX6) {
if (ctx.chip_class == GFX6 &&
instr->format != Format::MIMG &&
instr->operands.size() == 4) {
ctx.exp_cnt++;
update_counters(ctx, event_vmem_gpr_lock);
insert_wait_entry(ctx, instr->operands[3], event_vmem_gpr_lock);
} else if (ctx.chip_class == GFX6 &&
instr->format == Format::MIMG &&
instr->operands[1].regClass().type() == RegType::vgpr) {
ctx.exp_cnt++;
update_counters(ctx, event_vmem_gpr_lock);
insert_wait_entry(ctx, instr->operands[1], event_vmem_gpr_lock);
}
break;
}
case Format::SOPP: {

View File

@ -2929,8 +2929,8 @@ void visit_store_vsgs_output(isel_context *ctx, nir_intrinsic_instr *instr)
}
aco_ptr<MTBUF_instruction> mtbuf{create_instruction<MTBUF_instruction>(aco_opcode::tbuffer_store_format_x, Format::MTBUF, 4, 0)};
mtbuf->operands[0] = vaddr_offset;
mtbuf->operands[1] = Operand(esgs_ring);
mtbuf->operands[0] = Operand(esgs_ring);
mtbuf->operands[1] = vaddr_offset;
mtbuf->operands[2] = Operand(get_arg(ctx, ctx->args->es2gs_offset));
mtbuf->operands[3] = Operand(elem);
mtbuf->offen = !vaddr_offset.isUndefined();
@ -3288,12 +3288,12 @@ void visit_load_input(isel_context *ctx, nir_intrinsic_instr *instr)
if (use_mubuf) {
Instruction *mubuf = bld.mubuf(opcode,
Definition(fetch_dst), fetch_index, list, soffset,
Definition(fetch_dst), list, fetch_index, soffset,
fetch_offset, false, true).instr;
static_cast<MUBUF_instruction*>(mubuf)->can_reorder = true;
} else {
Instruction *mtbuf = bld.mtbuf(opcode,
Definition(fetch_dst), fetch_index, list, soffset,
Definition(fetch_dst), list, fetch_index, soffset,
fetch_dfmt, nfmt, fetch_offset, false, true).instr;
static_cast<MTBUF_instruction*>(mtbuf)->can_reorder = true;
}
@ -3487,8 +3487,8 @@ void visit_load_per_vertex_input(isel_context *ctx, nir_intrinsic_instr *instr)
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(aco_opcode::buffer_load_dword, Format::MUBUF, 3, 1)};
mubuf->definitions[0] = bld.def(v1);
subelems[j] = mubuf->definitions[0].getTemp();
mubuf->operands[0] = Operand(offset);
mubuf->operands[1] = Operand(esgs_ring);
mubuf->operands[0] = Operand(esgs_ring);
mubuf->operands[1] = Operand(offset);
mubuf->operands[2] = Operand(soffset);
mubuf->offen = true;
mubuf->offset = const_offset % 4096u;
@ -3616,8 +3616,8 @@ void load_buffer(isel_context *ctx, unsigned num_components, Temp dst,
lower = bld.tmp(v4);
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
mubuf->definitions[0] = Definition(lower);
mubuf->operands[0] = vaddr;
mubuf->operands[1] = Operand(rsrc);
mubuf->operands[0] = Operand(rsrc);
mubuf->operands[1] = vaddr;
mubuf->operands[2] = soffset;
mubuf->offen = (offset.type() == RegType::vgpr);
mubuf->glc = glc;
@ -3651,8 +3651,8 @@ void load_buffer(isel_context *ctx, unsigned num_components, Temp dst,
unreachable("Load SSBO not implemented for this size.");
}
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
mubuf->operands[0] = vaddr;
mubuf->operands[1] = Operand(rsrc);
mubuf->operands[0] = Operand(rsrc);
mubuf->operands[1] = vaddr;
mubuf->operands[2] = soffset;
mubuf->offen = (offset.type() == RegType::vgpr);
mubuf->glc = glc;
@ -4228,9 +4228,10 @@ static Temp adjust_sample_index_using_fmask(isel_context *ctx, bool da, Temp coo
? ac_get_sampler_dim(ctx->options->chip_class, GLSL_SAMPLER_DIM_2D, da)
: 0;
aco_ptr<MIMG_instruction> load{create_instruction<MIMG_instruction>(aco_opcode::image_load, Format::MIMG, 2, 1)};
load->operands[0] = Operand(coords);
load->operands[1] = Operand(fmask_desc_ptr);
aco_ptr<MIMG_instruction> load{create_instruction<MIMG_instruction>(aco_opcode::image_load, Format::MIMG, 3, 1)};
load->operands[0] = Operand(fmask_desc_ptr);
load->operands[1] = Operand(s4); /* no sampler */
load->operands[2] = Operand(coords);
load->definitions[0] = Definition(fmask);
load->glc = false;
load->dlc = false;
@ -4374,8 +4375,8 @@ void visit_image_load(isel_context *ctx, nir_intrinsic_instr *instr)
unreachable(">4 channel buffer image load");
}
aco_ptr<MUBUF_instruction> load{create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 3, 1)};
load->operands[0] = Operand(vindex);
load->operands[1] = Operand(rsrc);
load->operands[0] = Operand(rsrc);
load->operands[1] = Operand(vindex);
load->operands[2] = Operand((uint32_t) 0);
Temp tmp;
if (num_channels == instr->dest.ssa.num_components && dst.type() == RegType::vgpr)
@ -4407,9 +4408,10 @@ void visit_image_load(isel_context *ctx, nir_intrinsic_instr *instr)
bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
aco_opcode opcode = level_zero ? aco_opcode::image_load : aco_opcode::image_load_mip;
aco_ptr<MIMG_instruction> load{create_instruction<MIMG_instruction>(opcode, Format::MIMG, 2, 1)};
load->operands[0] = Operand(coords);
load->operands[1] = Operand(resource);
aco_ptr<MIMG_instruction> load{create_instruction<MIMG_instruction>(opcode, Format::MIMG, 3, 1)};
load->operands[0] = Operand(resource);
load->operands[1] = Operand(s4); /* no sampler */
load->operands[2] = Operand(coords);
load->definitions[0] = Definition(tmp);
load->glc = var->data.access & (ACCESS_VOLATILE | ACCESS_COHERENT) ? 1 : 0;
load->dlc = load->glc && ctx->options->chip_class >= GFX10;
@ -4455,8 +4457,8 @@ void visit_image_store(isel_context *ctx, nir_intrinsic_instr *instr)
unreachable(">4 channel buffer image store");
}
aco_ptr<MUBUF_instruction> store{create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 4, 0)};
store->operands[0] = Operand(vindex);
store->operands[1] = Operand(rsrc);
store->operands[0] = Operand(rsrc);
store->operands[1] = Operand(vindex);
store->operands[2] = Operand((uint32_t) 0);
store->operands[3] = Operand(data);
store->idxen = true;
@ -4476,11 +4478,10 @@ void visit_image_store(isel_context *ctx, nir_intrinsic_instr *instr)
bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
aco_opcode opcode = level_zero ? aco_opcode::image_store : aco_opcode::image_store_mip;
aco_ptr<MIMG_instruction> store{create_instruction<MIMG_instruction>(opcode, Format::MIMG, 4, 0)};
store->operands[0] = Operand(coords);
store->operands[1] = Operand(resource);
store->operands[2] = Operand(s4);
store->operands[3] = Operand(data);
aco_ptr<MIMG_instruction> store{create_instruction<MIMG_instruction>(opcode, Format::MIMG, 3, 0)};
store->operands[0] = Operand(resource);
store->operands[1] = Operand(data);
store->operands[2] = Operand(coords);
store->glc = glc;
store->dlc = false;
store->dim = ac_get_image_dim(ctx->options->chip_class, dim, is_array);
@ -4572,8 +4573,8 @@ void visit_image_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
Temp resource = get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), ACO_DESC_BUFFER, nullptr, true, true);
//assert(ctx->options->chip_class < GFX9 && "GFX9 stride size workaround not yet implemented.");
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(buf_op, Format::MUBUF, 4, return_previous ? 1 : 0)};
mubuf->operands[0] = Operand(vindex);
mubuf->operands[1] = Operand(resource);
mubuf->operands[0] = Operand(resource);
mubuf->operands[1] = Operand(vindex);
mubuf->operands[2] = Operand((uint32_t)0);
mubuf->operands[3] = Operand(data);
if (return_previous)
@ -4591,11 +4592,10 @@ void visit_image_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
Temp coords = get_image_coords(ctx, instr, type);
Temp resource = get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), ACO_DESC_IMAGE, nullptr, true, true);
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(image_op, Format::MIMG, 4, return_previous ? 1 : 0)};
mimg->operands[0] = Operand(coords);
mimg->operands[1] = Operand(resource);
mimg->operands[2] = Operand(s4); /* no sampler */
mimg->operands[3] = Operand(data);
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(image_op, Format::MIMG, 3, return_previous ? 1 : 0)};
mimg->operands[0] = Operand(resource);
mimg->operands[1] = Operand(data);
mimg->operands[2] = Operand(coords);
if (return_previous)
mimg->definitions[0] = Definition(dst);
mimg->glc = return_previous;
@ -4661,9 +4661,10 @@ void visit_image_size(isel_context *ctx, nir_intrinsic_instr *instr)
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 2, 1)};
mimg->operands[0] = Operand(lod);
mimg->operands[1] = Operand(resource);
aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 3, 1)};
mimg->operands[0] = Operand(resource);
mimg->operands[1] = Operand(s4); /* no sampler */
mimg->operands[2] = Operand(lod);
uint8_t& dmask = mimg->dmask;
mimg->dim = ac_get_image_dim(ctx->options->chip_class, dim, is_array);
mimg->dmask = (1 << instr->dest.ssa.num_components) - 1;
@ -4823,8 +4824,8 @@ void visit_store_ssbo(isel_context *ctx, nir_intrinsic_instr *instr)
}
} else {
aco_ptr<MUBUF_instruction> store{create_instruction<MUBUF_instruction>(vmem_op, Format::MUBUF, 4, 0)};
store->operands[0] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
store->operands[1] = Operand(rsrc);
store->operands[0] = Operand(rsrc);
store->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
store->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand((uint32_t) 0);
store->operands[3] = Operand(write_data);
store->offset = start * elem_size_bytes;
@ -4912,8 +4913,8 @@ void visit_atomic_ssbo(isel_context *ctx, nir_intrinsic_instr *instr)
}
aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, return_previous ? 1 : 0)};
mubuf->operands[0] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
mubuf->operands[1] = Operand(rsrc);
mubuf->operands[0] = Operand(rsrc);
mubuf->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1);
mubuf->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand((uint32_t) 0);
mubuf->operands[3] = Operand(data);
if (return_previous)
@ -5021,8 +5022,8 @@ void visit_load_global(isel_context *ctx, nir_intrinsic_instr *instr)
Temp rsrc = get_gfx6_global_rsrc(bld, addr);
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
mubuf->operands[0] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
mubuf->operands[1] = Operand(rsrc);
mubuf->operands[0] = Operand(rsrc);
mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
mubuf->operands[2] = Operand(0u);
mubuf->glc = glc;
mubuf->dlc = false;
@ -5202,8 +5203,8 @@ void visit_store_global(isel_context *ctx, nir_intrinsic_instr *instr)
Temp rsrc = get_gfx6_global_rsrc(bld, addr);
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, 0)};
mubuf->operands[0] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
mubuf->operands[1] = Operand(rsrc);
mubuf->operands[0] = Operand(rsrc);
mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
mubuf->operands[2] = Operand(0u);
mubuf->operands[3] = Operand(write_data);
mubuf->glc = glc;
@ -5360,8 +5361,8 @@ void visit_global_atomic(isel_context *ctx, nir_intrinsic_instr *instr)
aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 4, return_previous ? 1 : 0)};
mubuf->operands[0] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
mubuf->operands[1] = Operand(rsrc);
mubuf->operands[0] = Operand(rsrc);
mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1);
mubuf->operands[2] = Operand(0u);
mubuf->operands[3] = Operand(data);
if (return_previous)
@ -5589,12 +5590,12 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
case 8: {
std::array<Temp,NIR_MAX_VEC_COMPONENTS> elems;
Temp lower = bld.mubuf(aco_opcode::buffer_load_dwordx4,
bld.def(v4), offset, rsrc,
bld.def(v4), rsrc, offset,
ctx->program->scratch_offset, 0, true);
Temp upper = bld.mubuf(dst.size() == 6 ? aco_opcode::buffer_load_dwordx2 :
aco_opcode::buffer_load_dwordx4,
dst.size() == 6 ? bld.def(v2) : bld.def(v4),
offset, rsrc, ctx->program->scratch_offset, 16, true);
rsrc, offset, ctx->program->scratch_offset, 16, true);
emit_split_vector(ctx, lower, 2);
elems[0] = emit_extract_vector(ctx, lower, 0, v2);
elems[1] = emit_extract_vector(ctx, lower, 1, v2);
@ -5619,7 +5620,7 @@ void visit_load_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
unreachable("Wrong dst size for nir_intrinsic_load_scratch");
}
bld.mubuf(op, Definition(dst), offset, rsrc, ctx->program->scratch_offset, 0, true);
bld.mubuf(op, Definition(dst), rsrc, offset, ctx->program->scratch_offset, 0, true);
emit_split_vector(ctx, dst, instr->num_components);
}
@ -5680,7 +5681,7 @@ void visit_store_scratch(isel_context *ctx, nir_intrinsic_instr *instr) {
unreachable("Invalid data size for nir_intrinsic_store_scratch.");
}
bld.mubuf(op, offset, rsrc, ctx->program->scratch_offset, write_data, start * elem_size_bytes, true);
bld.mubuf(op, rsrc, offset, ctx->program->scratch_offset, write_data, start * elem_size_bytes, true);
}
}
@ -5784,8 +5785,8 @@ void visit_emit_vertex_with_counter(isel_context *ctx, nir_intrinsic_instr *inst
}
aco_ptr<MTBUF_instruction> mtbuf{create_instruction<MTBUF_instruction>(aco_opcode::tbuffer_store_format_x, Format::MTBUF, 4, 0)};
mtbuf->operands[0] = vaddr_offset;
mtbuf->operands[1] = Operand(gsvs_ring);
mtbuf->operands[0] = Operand(gsvs_ring);
mtbuf->operands[1] = vaddr_offset;
mtbuf->operands[2] = Operand(get_arg(ctx, ctx->args->gs2vs_offset));
mtbuf->operands[3] = Operand(ctx->outputs.outputs[i][j]);
mtbuf->offen = !vaddr_offset.isUndefined();
@ -6110,8 +6111,8 @@ void visit_intrinsic(isel_context *ctx, nir_intrinsic_instr *instr)
aco_ptr<MUBUF_instruction> load{create_instruction<MUBUF_instruction>(aco_opcode::buffer_load_dwordx2, Format::MUBUF, 3, 1)};
load->definitions[0] = Definition(sample_pos);
load->operands[0] = Operand(addr);
load->operands[1] = Operand(rsrc);
load->operands[0] = Operand(rsrc);
load->operands[1] = Operand(addr);
load->operands[2] = Operand(0u);
load->offset = sample_pos_offset;
load->offen = 0;
@ -7340,9 +7341,10 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
if (tmp_dst.id() == dst.id() && div_by_6)
tmp_dst = bld.tmp(tmp_dst.regClass());
tex.reset(create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 2, 1));
tex->operands[0] = Operand(as_vgpr(ctx,lod));
tex->operands[1] = Operand(resource);
tex.reset(create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 3, 1));
tex->operands[0] = Operand(resource);
tex->operands[1] = Operand(s4); /* no sampler */
tex->operands[2] = Operand(as_vgpr(ctx,lod));
if (ctx->options->chip_class == GFX9 &&
instr->op == nir_texop_txs &&
instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
@ -7380,9 +7382,10 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
Temp tg4_compare_cube_wa64 = Temp();
if (tg4_integer_workarounds) {
tex.reset(create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 2, 1));
tex->operands[0] = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0u));
tex->operands[1] = Operand(resource);
tex.reset(create_instruction<MIMG_instruction>(aco_opcode::image_get_resinfo, Format::MIMG, 3, 1));
tex->operands[0] = Operand(resource);
tex->operands[1] = Operand(s4); /* no sampler */
tex->operands[2] = bld.vop1(aco_opcode::v_mov_b32, bld.def(v1), Operand(0u));
tex->dim = dim;
tex->dmask = 0x3;
tex->da = da;
@ -7537,8 +7540,8 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
tmp_dst = bld.tmp(RegType::vgpr, last_bit);
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(op, Format::MUBUF, 3, 1)};
mubuf->operands[0] = Operand(coords);
mubuf->operands[1] = Operand(resource);
mubuf->operands[0] = Operand(resource);
mubuf->operands[1] = Operand(coords);
mubuf->operands[2] = Operand((uint32_t) 0);
mubuf->definitions[0] = Definition(tmp_dst);
mubuf->idxen = true;
@ -7556,9 +7559,10 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
instr->op == nir_texop_fragment_fetch ||
instr->op == nir_texop_fragment_mask_fetch) {
aco_opcode op = level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS || instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ? aco_opcode::image_load : aco_opcode::image_load_mip;
tex.reset(create_instruction<MIMG_instruction>(op, Format::MIMG, 2, 1));
tex->operands[0] = Operand(arg);
tex->operands[1] = Operand(resource);
tex.reset(create_instruction<MIMG_instruction>(op, Format::MIMG, 3, 1));
tex->operands[0] = Operand(resource);
tex->operands[1] = Operand(s4); /* no sampler */
tex->operands[2] = Operand(arg);
tex->dim = dim;
tex->dmask = dmask;
tex->unrm = true;
@ -7644,9 +7648,9 @@ void visit_tex(isel_context *ctx, nir_tex_instr *instr)
}
tex.reset(create_instruction<MIMG_instruction>(opcode, Format::MIMG, 3, 1));
tex->operands[0] = Operand(arg);
tex->operands[1] = Operand(resource);
tex->operands[2] = Operand(sampler);
tex->operands[0] = Operand(resource);
tex->operands[1] = Operand(sampler);
tex->operands[2] = Operand(arg);
tex->dim = dim;
tex->dmask = dmask;
tex->da = da;
@ -8753,8 +8757,8 @@ static void emit_stream_output(isel_context *ctx,
}
aco_ptr<MUBUF_instruction> store{create_instruction<MUBUF_instruction>(opcode, Format::MUBUF, 4, 0)};
store->operands[0] = Operand(so_write_offset[buf]);
store->operands[1] = Operand(so_buffers[buf]);
store->operands[0] = Operand(so_buffers[buf]);
store->operands[1] = Operand(so_write_offset[buf]);
store->operands[2] = Operand((uint32_t) 0);
store->operands[3] = Operand(write_data);
if (offset > 4095) {
@ -9118,8 +9122,8 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(aco_opcode::buffer_load_dword, Format::MUBUF, 3, 1)};
mubuf->definitions[0] = bld.def(v1);
mubuf->operands[0] = Operand(voffset);
mubuf->operands[1] = Operand(gsvs_ring);
mubuf->operands[0] = Operand(gsvs_ring);
mubuf->operands[1] = Operand(voffset);
mubuf->operands[2] = Operand(0u);
mubuf->offen = true;
mubuf->offset = const_offset;

View File

@ -781,8 +781,8 @@ struct DS_instruction : public Instruction {
/**
* Vector Memory Untyped-buffer Instructions
* Operand(0): VADDR - Address source. Can carry an index and/or offset
* Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
* Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
* Operand(1): VADDR - Address source. Can carry an index and/or offset
* Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
* Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
*
@ -804,8 +804,8 @@ struct MUBUF_instruction : public Instruction {
/**
* Vector Memory Typed-buffer Instructions
* Operand(0): VADDR - Address source. Can carry an index and/or offset
* Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
* Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
* Operand(1): VADDR - Address source. Can carry an index and/or offset
* Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
* Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
*
@ -827,10 +827,11 @@ struct MTBUF_instruction : public Instruction {
/**
* Vector Memory Image Instructions
* Operand(0): VADDR - Address source. Can carry an offset or an index.
* Operand(1): SRSRC - Scalar GPR that specifies the resource constant.
* Operand(2): SSAMP - Scalar GPR that specifies sampler constant.
* Operand(3) / Definition(0): VDATA - Vector GPR for read / write result.
* Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
* Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
* or VDATA - Vector GPR for write data.
* Operand(2): VADDR - Address source. Can carry an offset or an index.
* Definition(0): VDATA - Vector GPR for read result.
*
*/
struct MIMG_instruction : public Instruction {

View File

@ -528,9 +528,9 @@ void to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr)
}
/* only covers special cases */
bool can_accept_constant(aco_ptr<Instruction>& instr, unsigned operand)
bool alu_can_accept_constant(aco_opcode opcode, unsigned operand)
{
switch (instr->opcode) {
switch (opcode) {
case aco_opcode::v_interp_p2_f32:
case aco_opcode::v_mac_f32:
case aco_opcode::v_writelane_b32:
@ -547,12 +547,6 @@ bool can_accept_constant(aco_ptr<Instruction>& instr, unsigned operand)
case aco_opcode::v_readfirstlane_b32:
return operand != 0;
default:
if ((instr->format == Format::MUBUF ||
instr->format == Format::MIMG) &&
instr->definitions.size() == 1 &&
instr->operands.size() == 4) {
return operand != 3;
}
return true;
}
}
@ -719,7 +713,8 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
break;
}
}
if ((info.is_constant() || info.is_constant_64bit() || (info.is_literal() && instr->format == Format::PSEUDO)) && !instr->operands[i].isFixed() && can_accept_constant(instr, i)) {
if ((info.is_constant() || info.is_constant_64bit() || (info.is_literal() && instr->format == Format::PSEUDO)) &&
!instr->operands[i].isFixed() && alu_can_accept_constant(instr->opcode, i)) {
instr->operands[i] = get_constant_op(ctx, info.val, info.is_constant_64bit());
continue;
}
@ -754,7 +749,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
static_cast<VOP3A_instruction*>(instr.get())->neg[i] = true;
continue;
}
if ((info.is_constant() || info.is_constant_64bit()) && can_accept_constant(instr, i)) {
if ((info.is_constant() || info.is_constant_64bit()) && alu_can_accept_constant(instr->opcode, i)) {
Operand op = get_constant_op(ctx, info.val, info.is_constant_64bit());
perfwarn(instr->opcode == aco_opcode::v_cndmask_b32 && i == 2, "v_cndmask_b32 with a constant selector", instr.get());
if (i == 0 || instr->opcode == aco_opcode::v_readlane_b32 || instr->opcode == aco_opcode::v_writelane_b32) {
@ -780,9 +775,9 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
while (info.is_temp())
info = ctx.info[info.temp.id()];
if (mubuf->offen && i == 0 && info.is_constant_or_literal() && mubuf->offset + info.val < 4096) {
if (mubuf->offen && i == 1 && info.is_constant_or_literal() && mubuf->offset + info.val < 4096) {
assert(!mubuf->idxen);
instr->operands[i] = Operand(v1);
instr->operands[1] = Operand(v1);
mubuf->offset += info.val;
mubuf->offen = false;
continue;
@ -790,9 +785,9 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
instr->operands[2] = Operand((uint32_t) 0);
mubuf->offset += info.val;
continue;
} else if (mubuf->offen && i == 0 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == v1 && mubuf->offset + offset < 4096) {
} else if (mubuf->offen && i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == v1 && mubuf->offset + offset < 4096) {
assert(!mubuf->idxen);
instr->operands[i].setTemp(base);
instr->operands[1].setTemp(base);
mubuf->offset += offset;
continue;
} else if (i == 2 && parse_base_offset(ctx, instr.get(), i, &base, &offset) && base.regClass() == s1 && mubuf->offset + offset < 4096) {
@ -2698,7 +2693,7 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr)
continue;
}
if (!can_accept_constant(instr, i))
if (!alu_can_accept_constant(instr->opcode, i))
continue;
if (ctx.uses[op.tempId()] < literal_uses) {

View File

@ -1532,11 +1532,14 @@ void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_
} else if (instr->opcode == aco_opcode::s_addk_i32 ||
instr->opcode == aco_opcode::s_mulk_i32) {
instr->definitions[0].setFixed(instr->operands[0].physReg());
} else if ((instr->format == Format::MUBUF ||
instr->format == Format::MIMG) &&
instr->definitions.size() == 1 &&
instr->operands.size() == 4) {
} else if (instr->format == Format::MUBUF &&
instr->definitions.size() == 1 &&
instr->operands.size() == 4) {
instr->definitions[0].setFixed(instr->operands[3].physReg());
} else if (instr->format == Format::MIMG &&
instr->definitions.size() == 1 &&
instr->operands[1].regClass() == instr->definitions[0].regClass()) {
instr->definitions[0].setFixed(instr->operands[1].physReg());
}
ctx.defs_done.reset();

View File

@ -548,7 +548,7 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
if (current->isVMEM() == candidate->isVMEM()) {
bool same_resource = true;
if (current->isVMEM())
same_resource = candidate->operands[1].tempId() == current->operands[1].tempId();
same_resource = candidate->operands[0].tempId() == current->operands[0].tempId();
bool can_reorder = can_reorder_vmem || can_reorder_candidate;
int grab_dist = clause_insert_idx - candidate_idx;
/* We can't easily tell how much this will decrease the def-to-use

View File

@ -1575,9 +1575,9 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
split->definitions[i] = bld.def(v1);
bld.insert(split);
for (unsigned i = 0; i < temp.size(); i++)
bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false);
bld.mubuf(opcode, scratch_rsrc, Operand(), scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false);
} else {
bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, temp, offset, false);
bld.mubuf(opcode, scratch_rsrc, Operand(), scratch_offset, temp, offset, false);
}
} else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
@ -1641,11 +1641,11 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
for (unsigned i = 0; i < def.size(); i++) {
Temp tmp = bld.tmp(v1);
vec->operands[i] = Operand(tmp);
bld.mubuf(opcode, Definition(tmp), Operand(), scratch_rsrc, scratch_offset, offset + i * 4, false);
bld.mubuf(opcode, Definition(tmp), scratch_rsrc, Operand(), scratch_offset, offset + i * 4, false);
}
bld.insert(vec);
} else {
bld.mubuf(opcode, def, Operand(), scratch_rsrc, scratch_offset, offset, false);
bld.mubuf(opcode, def, scratch_rsrc, Operand(), scratch_offset, offset, false);
}
} else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
uint32_t spill_slot = sgpr_slot[spill_id];

View File

@ -99,8 +99,8 @@ void validate(Program* program, FILE * output)
bool flat = instr->format == Format::FLAT || instr->format == Format::SCRATCH || instr->format == Format::GLOBAL;
bool can_be_undef = is_phi(instr) || instr->format == Format::EXP ||
instr->format == Format::PSEUDO_REDUCTION ||
(flat && i == 1) || (instr->format == Format::MIMG && i == 2) ||
((instr->format == Format::MUBUF || instr->format == Format::MTBUF) && i == 0);
(flat && i == 1) || (instr->format == Format::MIMG && i == 1) ||
((instr->format == Format::MUBUF || instr->format == Format::MTBUF) && i == 1);
check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
}
}
@ -229,15 +229,29 @@ void validate(Program* program, FILE * output)
break;
}
case Format::MTBUF:
case Format::MUBUF:
case Format::MIMG: {
case Format::MUBUF: {
check(instr->operands.size() > 1, "VMEM instructions must have at least one operand", instr.get());
check(instr->operands[0].hasRegClass() && instr->operands[0].regClass().type() == RegType::vgpr,
check(instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::vgpr,
"VADDR must be in vgpr for VMEM instructions", instr.get());
check(instr->operands[1].isTemp() && instr->operands[1].regClass().type() == RegType::sgpr, "VMEM resource constant must be sgpr", instr.get());
check(instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr, "VMEM resource constant must be sgpr", instr.get());
check(instr->operands.size() < 4 || (instr->operands[3].isTemp() && instr->operands[3].regClass().type() == RegType::vgpr), "VMEM write data must be vgpr", instr.get());
break;
}
case Format::MIMG: {
check(instr->operands.size() == 3, "MIMG instructions must have exactly 3 operands", instr.get());
check(instr->operands[0].hasRegClass() && (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
"MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
if (instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::sgpr)
check(instr->operands[1].regClass() == s4, "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
else if (instr->operands[1].hasRegClass() && instr->operands[1].regClass().type() == RegType::vgpr)
check(instr->definitions.empty() || instr->definitions[0].regClass() == instr->operands[1].regClass(),
"MIMG operands[1] (VDATA) must be the same as definitions[0] for atomics", instr.get());
check(instr->operands[2].hasRegClass() && instr->operands[2].regClass().type() == RegType::vgpr,
"MIMG operands[2] (VADDR) must be VGPR", instr.get());
check(instr->definitions.empty() || (instr->definitions[0].isTemp() && instr->definitions[0].regClass().type() == RegType::vgpr),
"MIMG definitions[0] (VDATA) must be VGPR", instr.get());
break;
}
case Format::DS: {
for (const Operand& op : instr->operands) {
check((op.isTemp() && op.regClass().type() == RegType::vgpr) || op.physReg() == m0,