aco: use mubuf helper in select_gs_copy_shader

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6103>
This commit is contained in:
Rhys Perry 2020-07-28 13:50:08 +01:00 committed by Marge Bot
parent ec7ecfe9cb
commit 483657de32
1 changed files with 9 additions and 20 deletions

View File

@ -2938,6 +2938,7 @@ struct LoadEmitInfo {
unsigned align_offset = 0;
bool glc = false;
bool slc = false;
unsigned swizzle_component_size = 0;
memory_sync_info sync;
Temp soffset = Temp(0, s1);
@ -3358,6 +3359,7 @@ Temp mubuf_load_callback(Builder& bld, const LoadEmitInfo &info,
mubuf->offen = (offset.type() == RegType::vgpr);
mubuf->glc = info.glc;
mubuf->dlc = info.glc && bld.program->chip_class >= GFX10;
mubuf->slc = info.slc;
mubuf->sync = info.sync;
mubuf->offset = const_offset;
mubuf->swizzled = info.swizzle_component_size != 0;
@ -3862,7 +3864,8 @@ void store_vmem_mubuf(isel_context *ctx, Temp src, Temp descriptor, Temp voffset
void load_vmem_mubuf(isel_context *ctx, Temp dst, Temp descriptor, Temp voffset, Temp soffset,
unsigned base_const_offset, unsigned elem_size_bytes, unsigned num_components,
unsigned stride = 0u, bool allow_combining = true, bool allow_reorder = true)
unsigned stride = 0u, bool allow_combining = true, bool allow_reorder = true,
bool slc = false)
{
assert(elem_size_bytes == 2 || elem_size_bytes == 4 || elem_size_bytes == 8);
assert((num_components * elem_size_bytes) == dst.bytes());
@ -3873,6 +3876,7 @@ void load_vmem_mubuf(isel_context *ctx, Temp dst, Temp descriptor, Temp voffset,
LoadEmitInfo info = {Operand(voffset), dst, num_components, elem_size_bytes, descriptor};
info.component_stride = allow_combining ? 0 : stride;
info.glc = true;
info.slc = slc;
info.swizzle_component_size = allow_combining ? 0 : 4;
info.align_mul = MIN2(elem_size_bytes, 4);
info.align_offset = 0;
@ -11750,28 +11754,13 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
if (!(output_usage_mask & (1 << j)))
continue;
Temp val = bld.tmp(v1);
unsigned const_offset = offset * args->shader_info->gs.vertices_out * 16 * 4;
Temp voffset = vtx_offset;
if (const_offset >= 4096u) {
voffset = bld.vadd32(bld.def(v1), Operand(const_offset / 4096u * 4096u), voffset);
const_offset %= 4096u;
}
aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(aco_opcode::buffer_load_dword, Format::MUBUF, 3, 1)};
mubuf->definitions[0] = bld.def(v1);
mubuf->operands[0] = Operand(gsvs_ring);
mubuf->operands[1] = Operand(voffset);
mubuf->operands[2] = Operand(0u);
mubuf->offen = true;
mubuf->offset = const_offset;
mubuf->glc = true;
mubuf->slc = true;
mubuf->dlc = args->options->chip_class >= GFX10;
load_vmem_mubuf(&ctx, val, gsvs_ring, vtx_offset, Temp(), const_offset, 4, 1,
0u, true, true, true);
ctx.outputs.mask[i] |= 1 << j;
ctx.outputs.temps[i * 4u + j] = mubuf->definitions[0].getTemp();
bld.insert(std::move(mubuf));
ctx.outputs.temps[i * 4u + j] = val;
offset++;
}