diff --git a/src/amd/common/ac_nir_lower_esgs_io_to_mem.c b/src/amd/common/ac_nir_lower_esgs_io_to_mem.c index 0c9368f414a65..0238634dfdf70 100644 --- a/src/amd/common/ac_nir_lower_esgs_io_to_mem.c +++ b/src/amd/common/ac_nir_lower_esgs_io_to_mem.c @@ -37,8 +37,8 @@ typedef struct { } lower_esgs_io_state; static nir_def * -emit_split_buffer_load(nir_builder *b, nir_def *desc, nir_def *v_off, nir_def *s_off, - unsigned component_stride, unsigned num_components, unsigned bit_size) +emit_split_buffer_load(nir_builder *b, unsigned num_components, unsigned bit_size, + unsigned component_stride, nir_def *desc, nir_def *v_off, nir_def *s_off) { unsigned total_bytes = num_components * bit_size / 8u; unsigned full_dwords = total_bytes / 4u; @@ -71,8 +71,7 @@ emit_split_buffer_load(nir_builder *b, nir_def *desc, nir_def *v_off, nir_def *s static void emit_split_buffer_store(nir_builder *b, nir_def *d, nir_def *desc, nir_def *v_off, nir_def *s_off, - unsigned component_stride, unsigned num_components, unsigned bit_size, - unsigned writemask, bool swizzled, bool slc) + unsigned bit_size, unsigned const_offset, unsigned writemask, bool swizzled, bool slc) { nir_def *zero = nir_imm_int(b, 0); @@ -93,7 +92,7 @@ emit_split_buffer_store(nir_builder *b, nir_def *d, nir_def *desc, nir_def *v_of nir_def *store_val = nir_extract_bits(b, &d, 1, start_byte * 8u, 1, store_bytes * 8u); nir_store_buffer_amd(b, store_val, desc, v_off, s_off, zero, - .base = start_byte, .memory_modes = nir_var_shader_out, + .base = start_byte + const_offset, .memory_modes = nir_var_shader_out, .access = ACCESS_COHERENT | (slc ? ACCESS_NON_TEMPORAL : 0) | (swizzled ? ACCESS_IS_SWIZZLED_AMD : 0)); @@ -138,30 +137,31 @@ lower_es_output_store(nir_builder *b, * * So writes to those outputs in ES are simply ignored. */ - unsigned semantic = nir_intrinsic_io_semantics(intrin).location; - if (semantic == VARYING_SLOT_LAYER || semantic == VARYING_SLOT_VIEWPORT) { + const nir_io_semantics io_sem = nir_intrinsic_io_semantics(intrin); + if (io_sem.location == VARYING_SLOT_LAYER || io_sem.location == VARYING_SLOT_VIEWPORT) { nir_instr_remove(&intrin->instr); return true; } lower_esgs_io_state *st = (lower_esgs_io_state *) state; - unsigned write_mask = nir_intrinsic_write_mask(intrin); + const unsigned write_mask = nir_intrinsic_write_mask(intrin); b->cursor = nir_before_instr(&intrin->instr); nir_def *io_off = ac_nir_calc_io_offset(b, intrin, nir_imm_int(b, 16u), 4u, st->map_io); + nir_def *store_val = intrin->src[0].ssa; if (st->gfx_level <= GFX8) { /* GFX6-8: ES is a separate HW stage, data is passed from ES to GS in VRAM. */ nir_def *ring = nir_load_ring_esgs_amd(b); nir_def *es2gs_off = nir_load_ring_es2gs_offset_amd(b); - emit_split_buffer_store(b, intrin->src[0].ssa, ring, io_off, es2gs_off, 4u, - intrin->src[0].ssa->num_components, intrin->src[0].ssa->bit_size, - write_mask, true, true); + emit_split_buffer_store(b, store_val, ring, io_off, es2gs_off, + store_val->bit_size, + 0, write_mask, true, true); } else { /* GFX9+: ES is merged into GS, data is passed through LDS. */ nir_def *vertex_idx = nir_load_local_invocation_index(b); nir_def *off = nir_iadd(b, nir_imul_imm(b, vertex_idx, st->esgs_itemsize), io_off); - nir_store_shared(b, intrin->src[0].ssa, off, .write_mask = write_mask); + nir_store_shared(b, store_val, off, .write_mask = write_mask); } nir_instr_remove(&intrin->instr); @@ -271,8 +271,8 @@ lower_gs_per_vertex_input_load(nir_builder *b, unsigned wave_size = 64u; /* GFX6-8 only support wave64 */ nir_def *ring = nir_load_ring_esgs_amd(b); - return emit_split_buffer_load(b, ring, off, nir_imm_zero(b, 1, 32), 4u * wave_size, - intrin->def.num_components, intrin->def.bit_size); + return emit_split_buffer_load(b, intrin->def.num_components, intrin->def.bit_size, + 4u * wave_size, ring, off, nir_imm_int(b, 0)); } static bool