diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 49d61484d73..8371ae9e6dc 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1499,7 +1499,7 @@ typedef struct { #include "nir_intrinsics.h" -#define NIR_INTRINSIC_MAX_CONST_INDEX 4 +#define NIR_INTRINSIC_MAX_CONST_INDEX 5 /** Represents an intrinsic * @@ -1744,10 +1744,24 @@ typedef enum { */ NIR_INTRINSIC_EXECUTION_SCOPE, + /** + * Value of nir_io_semantics. + */ + NIR_INTRINSIC_IO_SEMANTICS, + NIR_INTRINSIC_NUM_INDEX_FLAGS, } nir_intrinsic_index_flag; +typedef struct { + unsigned location:7; /* gl_vert_attrib, gl_varying_slot, or gl_frag_result */ + unsigned num_slots:6; /* max 32, may be pessimistic with const indexing */ + unsigned dual_source_blend_index:1; + unsigned fb_fetch_output:1; /* for GL_KHR_blend_equation_advanced */ + unsigned gs_streams:8; /* xxyyzzww: 2-bit stream index for each component */ + unsigned _pad:9; +} nir_io_semantics; + #define NIR_INTRINSIC_MAX_INPUTS 5 typedef struct { @@ -1917,6 +1931,30 @@ nir_intrinsic_align(const nir_intrinsic_instr *intrin) return align_offset ? 1 << (ffs(align_offset) - 1) : align_mul; } +static inline void +nir_intrinsic_set_io_semantics(nir_intrinsic_instr *intrin, + nir_io_semantics semantics) +{ + const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic]; + assert(info->index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0); + STATIC_ASSERT(sizeof(nir_io_semantics) == sizeof(intrin->const_index[0])); + semantics._pad = 0; /* clear padding bits */ + memcpy(&intrin->const_index[info->index_map[NIR_INTRINSIC_IO_SEMANTICS] - 1], + &semantics, sizeof(semantics)); +} + +static inline nir_io_semantics +nir_intrinsic_io_semantics(const nir_intrinsic_instr *intrin) +{ + const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic]; + assert(info->index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0); + nir_io_semantics semantics; + memcpy(&semantics, + &intrin->const_index[info->index_map[NIR_INTRINSIC_IO_SEMANTICS] - 1], + sizeof(semantics)); + return semantics; +} + unsigned nir_image_intrinsic_coord_components(const nir_intrinsic_instr *instr); diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py index 8d00446a2a2..e9cc0da6472 100644 --- a/src/compiler/nir/nir_intrinsics.py +++ b/src/compiler/nir/nir_intrinsics.py @@ -134,6 +134,7 @@ MEMORY_MODES = "NIR_INTRINSIC_MEMORY_MODES" MEMORY_SCOPE = "NIR_INTRINSIC_MEMORY_SCOPE" # Scope of a control barrier EXECUTION_SCOPE = "NIR_INTRINSIC_EXECUTION_SCOPE" +IO_SEMANTICS = "NIR_INTRINSIC_IO_SEMANTICS" # # Possible flags: @@ -738,22 +739,22 @@ load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], flags=[CAN_ELIMINATE, CA # src[] = { buffer_index, offset in vec4 units } load("ubo_vec4", [-1, 1], [ACCESS, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER]) # src[] = { offset }. -load("input", [1], [BASE, COMPONENT, TYPE], [CAN_ELIMINATE, CAN_REORDER]) +load("input", [1], [BASE, COMPONENT, TYPE, IO_SEMANTICS], [CAN_ELIMINATE, CAN_REORDER]) # src[] = { vertex_id, offset }. -load("input_vertex", [1, 1], [BASE, COMPONENT, TYPE], [CAN_ELIMINATE, CAN_REORDER]) +load("input_vertex", [1, 1], [BASE, COMPONENT, TYPE, IO_SEMANTICS], [CAN_ELIMINATE, CAN_REORDER]) # src[] = { vertex, offset }. -load("per_vertex_input", [1, 1], [BASE, COMPONENT], [CAN_ELIMINATE, CAN_REORDER]) +load("per_vertex_input", [1, 1], [BASE, COMPONENT, IO_SEMANTICS], [CAN_ELIMINATE, CAN_REORDER]) # src[] = { barycoord, offset }. -load("interpolated_input", [2, 1], [BASE, COMPONENT], [CAN_ELIMINATE, CAN_REORDER]) +load("interpolated_input", [2, 1], [BASE, COMPONENT, IO_SEMANTICS], [CAN_ELIMINATE, CAN_REORDER]) # src[] = { buffer_index, offset }. load("ssbo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) # src[] = { buffer_index } load("ssbo_address", [1], [], [CAN_ELIMINATE, CAN_REORDER]) # src[] = { offset }. -load("output", [1], [BASE, COMPONENT], flags=[CAN_ELIMINATE]) +load("output", [1], [BASE, COMPONENT, IO_SEMANTICS], flags=[CAN_ELIMINATE]) # src[] = { vertex, offset }. -load("per_vertex_output", [1, 1], [BASE, COMPONENT], [CAN_ELIMINATE]) +load("per_vertex_output", [1, 1], [BASE, COMPONENT, IO_SEMANTICS], [CAN_ELIMINATE]) # src[] = { offset }. load("shared", [1], [BASE, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE]) # src[] = { offset }. @@ -777,9 +778,9 @@ def store(name, srcs, indices=[], flags=[]): intrinsic("store_" + name, [0] + srcs, indices=indices, flags=flags) # src[] = { value, offset }. -store("output", [1], [BASE, WRMASK, COMPONENT, TYPE]) +store("output", [1], [BASE, WRMASK, COMPONENT, TYPE, IO_SEMANTICS]) # src[] = { value, vertex, offset }. -store("per_vertex_output", [1, 1], [BASE, WRMASK, COMPONENT]) +store("per_vertex_output", [1, 1], [BASE, WRMASK, COMPONENT, IO_SEMANTICS]) # src[] = { value, block_index, offset } store("ssbo", [-1, 1], [WRMASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET]) # src[] = { value, offset }. diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index 2e17ad70b36..7f20b96fb0c 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -159,6 +159,19 @@ nir_is_per_vertex_io(const nir_variable *var, gl_shader_stage stage) return false; } +static unsigned get_number_of_slots(struct lower_io_state *state, + const nir_variable *var) +{ + const struct glsl_type *type = var->type; + + if (nir_is_per_vertex_io(var, state->builder.shader->info.stage)) { + assert(glsl_type_is_array(type)); + type = glsl_get_array_element(type); + } + + return state->type_size(type, var->data.bindless); +} + static nir_ssa_def * get_io_offset(nir_builder *b, nir_deref_instr *deref, nir_ssa_def **vertex_index, @@ -291,6 +304,14 @@ emit_load(struct lower_io_state *state, load->intrinsic == nir_intrinsic_load_uniform) nir_intrinsic_set_type(load, type); + if (load->intrinsic != nir_intrinsic_load_uniform) { + nir_io_semantics semantics = {0}; + semantics.location = var->data.location; + semantics.num_slots = get_number_of_slots(state, var); + semantics.fb_fetch_output = var->data.fb_fetch_output; + nir_intrinsic_set_io_semantics(load, semantics); + } + if (vertex_index) { load->src[0] = nir_src_for_ssa(vertex_index); load->src[1] = nir_src_for_ssa(offset); @@ -393,6 +414,25 @@ emit_store(struct lower_io_state *state, nir_ssa_def *data, store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset); + unsigned gs_streams = 0; + if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) { + if (var->data.stream & NIR_STREAM_PACKED) { + gs_streams = var->data.stream & ~NIR_STREAM_PACKED; + } else { + assert(var->data.stream < 4); + gs_streams = 0; + for (unsigned i = 0; i < num_components; ++i) + gs_streams |= var->data.stream << (2 * i); + } + } + + nir_io_semantics semantics = {0}; + semantics.location = var->data.location; + semantics.num_slots = get_number_of_slots(state, var); + semantics.dual_source_blend_index = var->data.index; + semantics.gs_streams = gs_streams; + nir_intrinsic_set_io_semantics(store, semantics); + nir_builder_instr_insert(b, &store->instr); } @@ -519,6 +559,11 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_intrinsic_set_base(load, var->data.driver_location); nir_intrinsic_set_component(load, component); + nir_io_semantics semantics = {0}; + semantics.location = var->data.location; + semantics.num_slots = get_number_of_slots(state, var); + nir_intrinsic_set_io_semantics(load, semantics); + load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa); load->src[1] = nir_src_for_ssa(offset);