nir: add transform feedback info into nir_intrinsic_store_output
This will allow compaction of transform feedback varyings because they are no longer tied to varying slots with this information. It will also make transform feedback info available to all NIR passes after IO is lowered. It's meant to replace pipe_stream_output_info. Other intrinsics are not used with transform feedback. Reviewed-by: Emma Anholt <emma@anholt.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14388>
This commit is contained in:
parent
2c6e41bfe1
commit
4636fa7f38
|
@ -3383,3 +3383,30 @@ nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src)
|
|||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return which components are written into transform feedback buffers.
|
||||
* The result is relative to 0, not "component".
|
||||
*/
|
||||
unsigned
|
||||
nir_instr_xfb_write_mask(nir_intrinsic_instr *instr)
|
||||
{
|
||||
unsigned mask = 0;
|
||||
|
||||
if (nir_intrinsic_has_io_xfb(instr)) {
|
||||
unsigned wr_mask = nir_intrinsic_write_mask(instr) <<
|
||||
nir_intrinsic_component(instr);
|
||||
assert((wr_mask & ~0xf) == 0); /* only 4 components allowed */
|
||||
|
||||
unsigned iter_mask = wr_mask;
|
||||
while (iter_mask) {
|
||||
unsigned i = u_bit_scan(&iter_mask);
|
||||
nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(instr) :
|
||||
nir_intrinsic_io_xfb2(instr);
|
||||
if (xfb.out[i % 2].num_components)
|
||||
mask |= BITFIELD_RANGE(i, xfb.out[i % 2].num_components) & wr_mask;
|
||||
}
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
|
|
@ -1651,7 +1651,7 @@ typedef struct {
|
|||
|
||||
#include "nir_intrinsics.h"
|
||||
|
||||
#define NIR_INTRINSIC_MAX_CONST_INDEX 6
|
||||
#define NIR_INTRINSIC_MAX_CONST_INDEX 7
|
||||
|
||||
/** Represents an intrinsic
|
||||
*
|
||||
|
@ -1770,6 +1770,25 @@ typedef struct nir_io_semantics {
|
|||
unsigned _pad:5;
|
||||
} nir_io_semantics;
|
||||
|
||||
/* Transform feedback info for 2 outputs. nir_intrinsic_store_output contains
|
||||
* this structure twice to support up to 4 outputs. The structure is limited
|
||||
* to 32 bits because it's stored in nir_intrinsic_instr::const_index[].
|
||||
*/
|
||||
typedef struct nir_io_xfb {
|
||||
struct {
|
||||
/* start_component is equal to the index of out[]; add 2 for io_xfb2 */
|
||||
/* start_component is not relative to nir_intrinsic_component */
|
||||
/* get the stream index from nir_io_semantics */
|
||||
uint8_t num_components:4; /* max 4; if this is 0, xfb is disabled */
|
||||
uint8_t buffer:4; /* buffer index, max 3 */
|
||||
uint8_t offset; /* transform feedback buffer offset in dwords,
|
||||
max (1K - 4) bytes */
|
||||
} out[2];
|
||||
} nir_io_xfb;
|
||||
|
||||
unsigned
|
||||
nir_instr_xfb_write_mask(nir_intrinsic_instr *instr);
|
||||
|
||||
#define NIR_INTRINSIC_MAX_INPUTS 11
|
||||
|
||||
typedef struct {
|
||||
|
|
|
@ -248,6 +248,10 @@ index("nir_scope", "execution_scope")
|
|||
# Semantics of an IO instruction
|
||||
index("struct nir_io_semantics", "io_semantics")
|
||||
|
||||
# Transform feedback info
|
||||
index("struct nir_io_xfb", "io_xfb")
|
||||
index("struct nir_io_xfb", "io_xfb2")
|
||||
|
||||
# Rounding mode for conversions
|
||||
index("nir_rounding_mode", "rounding_mode")
|
||||
|
||||
|
@ -1001,7 +1005,7 @@ def store(name, srcs, indices=[], flags=[]):
|
|||
intrinsic("store_" + name, [0] + srcs, indices=indices, flags=flags)
|
||||
|
||||
# src[] = { value, offset }.
|
||||
store("output", [1], [BASE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS])
|
||||
store("output", [1], [BASE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS, IO_XFB, IO_XFB2])
|
||||
# src[] = { value, vertex, offset }.
|
||||
store("per_vertex_output", [1, 1], [BASE, WRITE_MASK, COMPONENT, SRC_TYPE, IO_SEMANTICS])
|
||||
# src[] = { value, primitive, offset }.
|
||||
|
|
|
@ -54,7 +54,9 @@ nir_intrinsic_set_${name}(nir_intrinsic_instr *instr, ${data_type} val)
|
|||
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
|
||||
assert(info->index_map[${enum}] > 0);
|
||||
% if "struct" in data_type:
|
||||
% if name == "io_semantics":
|
||||
val._pad = 0; /* clear padding bits */
|
||||
% endif
|
||||
STATIC_ASSERT(sizeof(instr->const_index[0]) == sizeof(val));
|
||||
memcpy(&instr->const_index[info->index_map[${enum}] - 1], &val, sizeof(val));
|
||||
% else:
|
||||
|
|
|
@ -1078,6 +1078,36 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
|||
break;
|
||||
}
|
||||
|
||||
case NIR_INTRINSIC_IO_XFB:
|
||||
case NIR_INTRINSIC_IO_XFB2: {
|
||||
/* This prints both IO_XFB and IO_XFB2. */
|
||||
fprintf(fp, "xfb%s(", idx == NIR_INTRINSIC_IO_XFB ? "" : "2");
|
||||
bool first = true;
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
unsigned start_comp = (idx == NIR_INTRINSIC_IO_XFB ? 0 : 2) + i;
|
||||
nir_io_xfb xfb = start_comp < 2 ? nir_intrinsic_io_xfb(instr) :
|
||||
nir_intrinsic_io_xfb2(instr);
|
||||
|
||||
if (!xfb.out[i].num_components)
|
||||
continue;
|
||||
|
||||
if (!first)
|
||||
fprintf(fp, ", ");
|
||||
first = false;
|
||||
|
||||
if (xfb.out[i].num_components > 1) {
|
||||
fprintf(fp, "components=%u..%u",
|
||||
start_comp, start_comp + xfb.out[i].num_components - 1);
|
||||
} else {
|
||||
fprintf(fp, "component=%u", start_comp);
|
||||
}
|
||||
fprintf(fp, " buffer=%u offset=%u",
|
||||
xfb.out[i].buffer, (uint32_t)xfb.out[i].offset * 4);
|
||||
}
|
||||
fprintf(fp, ")");
|
||||
break;
|
||||
}
|
||||
|
||||
case NIR_INTRINSIC_ROUNDING_MODE: {
|
||||
fprintf(fp, "rounding_mode=");
|
||||
switch (nir_intrinsic_rounding_mode(instr)) {
|
||||
|
|
|
@ -826,6 +826,20 @@ validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state)
|
|||
unsigned component_mask = BITFIELD_MASK(instr->num_components);
|
||||
validate_assert(state, (nir_intrinsic_write_mask(instr) & ~component_mask) == 0);
|
||||
}
|
||||
|
||||
if (nir_intrinsic_has_io_xfb(instr)) {
|
||||
unsigned used_mask = 0;
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
nir_io_xfb xfb = i < 2 ? nir_intrinsic_io_xfb(instr) :
|
||||
nir_intrinsic_io_xfb2(instr);
|
||||
unsigned xfb_mask = BITFIELD_RANGE(i, xfb.out[i % 2].num_components);
|
||||
|
||||
/* Each component can be used only once by transform feedback info. */
|
||||
validate_assert(state, (xfb_mask & used_mask) == 0);
|
||||
used_mask |= xfb_mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
Loading…
Reference in New Issue