aco: use mubuf helper in select_gs_copy_shader

Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6103>
2020-07-28 13:50:08 +01:00 · 2020-07-28 13:50:08 +01:00 · 483657de32
parent ec7ecfe9cb
commit 483657de32
1 changed files with 9 additions and 20 deletions
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@ -2938,6 +2938,7 @@ struct LoadEmitInfo {
   unsigned align_offset = 0;

   bool glc = false;
+   bool slc = false;
   unsigned swizzle_component_size = 0;
   memory_sync_info sync;
   Temp soffset = Temp(0, s1);
@ -3358,6 +3359,7 @@ Temp mubuf_load_callback(Builder& bld, const LoadEmitInfo &info,
   mubuf->offen = (offset.type() == RegType::vgpr);
   mubuf->glc = info.glc;
   mubuf->dlc = info.glc && bld.program->chip_class >= GFX10;
+   mubuf->slc = info.slc;
   mubuf->sync = info.sync;
   mubuf->offset = const_offset;
   mubuf->swizzled = info.swizzle_component_size != 0;
@ -3862,7 +3864,8 @@ void store_vmem_mubuf(isel_context *ctx, Temp src, Temp descriptor, Temp voffset

 void load_vmem_mubuf(isel_context *ctx, Temp dst, Temp descriptor, Temp voffset, Temp soffset,
                     unsigned base_const_offset, unsigned elem_size_bytes, unsigned num_components,
-                     unsigned stride = 0u, bool allow_combining = true, bool allow_reorder = true)
+                     unsigned stride = 0u, bool allow_combining = true, bool allow_reorder = true,
+                     bool slc = false)
 {
   assert(elem_size_bytes == 2 || elem_size_bytes == 4 || elem_size_bytes == 8);
   assert((num_components * elem_size_bytes) == dst.bytes());
@ -3873,6 +3876,7 @@ void load_vmem_mubuf(isel_context *ctx, Temp dst, Temp descriptor, Temp voffset,
   LoadEmitInfo info = {Operand(voffset), dst, num_components, elem_size_bytes, descriptor};
   info.component_stride = allow_combining ? 0 : stride;
   info.glc = true;
+   info.slc = slc;
   info.swizzle_component_size = allow_combining ? 0 : 4;
   info.align_mul = MIN2(elem_size_bytes, 4);
   info.align_offset = 0;
@ -11750,28 +11754,13 @@ void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
            if (!(output_usage_mask & (1 << j)))
               continue;

+            Temp val = bld.tmp(v1);
            unsigned const_offset = offset * args->shader_info->gs.vertices_out * 16 * 4;
-            Temp voffset = vtx_offset;
-            if (const_offset >= 4096u) {
-               voffset = bld.vadd32(bld.def(v1), Operand(const_offset / 4096u * 4096u), voffset);
-               const_offset %= 4096u;
-            }
-
-            aco_ptr<MUBUF_instruction> mubuf{create_instruction<MUBUF_instruction>(aco_opcode::buffer_load_dword, Format::MUBUF, 3, 1)};
-            mubuf->definitions[0] = bld.def(v1);
-            mubuf->operands[0] = Operand(gsvs_ring);
-            mubuf->operands[1] = Operand(voffset);
-            mubuf->operands[2] = Operand(0u);
-            mubuf->offen = true;
-            mubuf->offset = const_offset;
-            mubuf->glc = true;
-            mubuf->slc = true;
-            mubuf->dlc = args->options->chip_class >= GFX10;
+            load_vmem_mubuf(&ctx, val, gsvs_ring, vtx_offset, Temp(), const_offset, 4, 1,
+                            0u, true, true, true);

            ctx.outputs.mask[i] |= 1 << j;
-            ctx.outputs.temps[i * 4u + j] = mubuf->definitions[0].getTemp();
-
-            bld.insert(std::move(mubuf));
+            ctx.outputs.temps[i * 4u + j] = val;

            offset++;
         }