ac/nir/esgs: Slightly refactor emitting IO loads and stores.

No functional changes, just reorganize the code a little bit in preparation for the next commits. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Georg Lehmann <dadschoorse@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28768>
2024-04-15 00:03:12 +02:00 · 2024-04-15 00:03:12 +02:00 · 55757ce03b
parent 86cce0e677
commit 55757ce03b
1 changed files with 14 additions and 14 deletions
--- a/src/amd/common/ac_nir_lower_esgs_io_to_mem.c
+++ b/src/amd/common/ac_nir_lower_esgs_io_to_mem.c
@ -37,8 +37,8 @@ typedef struct {
 } lower_esgs_io_state;

 static nir_def *
-emit_split_buffer_load(nir_builder *b, nir_def *desc, nir_def *v_off, nir_def *s_off,
-                       unsigned component_stride, unsigned num_components, unsigned bit_size)
+emit_split_buffer_load(nir_builder *b, unsigned num_components, unsigned bit_size,
+                       unsigned component_stride, nir_def *desc, nir_def *v_off, nir_def *s_off)
 {
   unsigned total_bytes = num_components * bit_size / 8u;
   unsigned full_dwords = total_bytes / 4u;
@ -71,8 +71,7 @@ emit_split_buffer_load(nir_builder *b, nir_def *desc, nir_def *v_off, nir_def *s

 static void
 emit_split_buffer_store(nir_builder *b, nir_def *d, nir_def *desc, nir_def *v_off, nir_def *s_off,
-                        unsigned component_stride, unsigned num_components, unsigned bit_size,
-                        unsigned writemask, bool swizzled, bool slc)
+                        unsigned bit_size, unsigned const_offset, unsigned writemask, bool swizzled, bool slc)
 {
   nir_def *zero = nir_imm_int(b, 0);

@ -93,7 +92,7 @@ emit_split_buffer_store(nir_builder *b, nir_def *d, nir_def *desc, nir_def *v_of

         nir_def *store_val = nir_extract_bits(b, &d, 1, start_byte * 8u, 1, store_bytes * 8u);
         nir_store_buffer_amd(b, store_val, desc, v_off, s_off, zero,
-                              .base = start_byte, .memory_modes = nir_var_shader_out,
+                              .base = start_byte + const_offset, .memory_modes = nir_var_shader_out,
                              .access = ACCESS_COHERENT |
                                        (slc ? ACCESS_NON_TEMPORAL : 0) |
                                        (swizzled ? ACCESS_IS_SWIZZLED_AMD : 0));
@ -138,30 +137,31 @@ lower_es_output_store(nir_builder *b,
    *
    * So writes to those outputs in ES are simply ignored.
    */
-   unsigned semantic = nir_intrinsic_io_semantics(intrin).location;
-   if (semantic == VARYING_SLOT_LAYER || semantic == VARYING_SLOT_VIEWPORT) {
+   const nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin);
+   if (io_sem.location == VARYING_SLOT_LAYER || io_sem.location == VARYING_SLOT_VIEWPORT) {
      nir_instr_remove(&intrin->instr);
      return true;
   }

   lower_esgs_io_state *st = (lower_esgs_io_state *) state;
-   unsigned write_mask = nir_intrinsic_write_mask(intrin);
+   const unsigned write_mask = nir_intrinsic_write_mask(intrin);

   b->cursor = nir_before_instr(&intrin->instr);
   nir_def *io_off = ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io);
+   nir_def *store_val = intrin->src[0].ssa;

   if (st->gfx_level <= GFX8) {
      /* GFX6-8: ES is a separate HW stage, data is passed from ES to GS in VRAM. */
      nir_def *ring = nir_load_ring_esgs_amd(b);
      nir_def *es2gs_off = nir_load_ring_es2gs_offset_amd(b);
-      emit_split_buffer_store(b, intrin->src[0].ssa, ring, io_off, es2gs_off, 4u,
-                              intrin->src[0].ssa->num_components, intrin->src[0].ssa->bit_size,
-                              write_mask, true, true);
+      emit_split_buffer_store(b, store_val, ring, io_off, es2gs_off,
+                              store_val->bit_size,
+                              0, write_mask, true, true);
   } else {
      /* GFX9+: ES is merged into GS, data is passed through LDS. */
      nir_def *vertex_idx = nir_load_local_invocation_index(b);
      nir_def *off = nir_iadd(b, nir_imul_imm(b, vertex_idx, st->esgs_itemsize), io_off);
-      nir_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask);
+      nir_store_shared(b, store_val, off, .write_mask = write_mask);
   }

   nir_instr_remove(&intrin->instr);
@ -271,8 +271,8 @@ lower_gs_per_vertex_input_load(nir_builder *b,

   unsigned wave_size = 64u; /* GFX6-8 only support wave64 */
   nir_def *ring = nir_load_ring_esgs_amd(b);
-   return emit_split_buffer_load(b, ring, off, nir_imm_zero(b, 1, 32), 4u * wave_size,
-                                 intrin->def.num_components, intrin->def.bit_size);
+   return emit_split_buffer_load(b, intrin->def.num_components, intrin->def.bit_size,
+                                 4u * wave_size, ring, off, nir_imm_int(b, 0));
 }

 static bool