nir: Add a "compact array" flag and IO lowering code.
Certain built-in arrays, such as gl_ClipDistance[], gl_CullDistance[], gl_TessLevelInner[], and gl_TessLevelOuter[] are specified as scalar arrays. Normal scalar arrays are sparse - each array element usually occupies a whole vec4 slot. However, most hardware assumes these built-in arrays are tightly packed. The new var->data.compact flag indicates that a scalar array should be tightly packed, so a float[4] array would take up a single vec4 slot, and a float[8] array would take up two slots. They are still arrays, not vec4s, however. nir_lower_io will generate intrinsics using ARB_enhanced_layouts style component qualifiers. v2: Add nir_validate code to enforce type restrictions. Signed-off-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
This commit is contained in:
parent
f395e3445d
commit
663b2e9a92
|
@ -329,6 +329,7 @@ nir_visitor::visit(ir_variable *ir)
|
|||
var->data.explicit_index = ir->data.explicit_index;
|
||||
var->data.explicit_binding = ir->data.explicit_binding;
|
||||
var->data.has_initializer = ir->data.has_initializer;
|
||||
var->data.compact = false;
|
||||
var->data.location_frac = ir->data.location_frac;
|
||||
|
||||
switch (ir->data.depth_layout) {
|
||||
|
|
|
@ -229,6 +229,13 @@ typedef struct nir_variable {
|
|||
*/
|
||||
unsigned location_frac:2;
|
||||
|
||||
/**
|
||||
* If true, this variable represents an array of scalars that should
|
||||
* be tightly packed. In other words, consecutive array elements
|
||||
* should be stored one component apart, rather than one slot apart.
|
||||
*/
|
||||
bool compact:1;
|
||||
|
||||
/**
|
||||
* Whether this is a fragment shader output implicitly initialized with
|
||||
* the previous contents of the specified render target at the
|
||||
|
|
|
@ -94,8 +94,11 @@ mark_whole_variable(nir_shader *shader, nir_variable *var)
|
|||
var->data.mode == nir_var_shader_in)
|
||||
is_vertex_input = true;
|
||||
|
||||
set_io_mask(shader, var, 0,
|
||||
glsl_count_attribute_slots(type, is_vertex_input));
|
||||
const unsigned slots =
|
||||
var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
|
||||
: glsl_count_attribute_slots(type, is_vertex_input);
|
||||
|
||||
set_io_mask(shader, var, 0, slots);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
|
@ -150,7 +153,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var *deref)
|
|||
* here marking the entire variable as used.
|
||||
*/
|
||||
if (!(glsl_type_is_matrix(type) ||
|
||||
(glsl_type_is_array(type) &&
|
||||
(glsl_type_is_array(type) && !var->data.compact &&
|
||||
(glsl_type_is_numeric(glsl_without_array(type)) ||
|
||||
glsl_type_is_boolean(glsl_without_array(type)))))) {
|
||||
|
||||
|
|
|
@ -175,8 +175,12 @@ lower_indirect_block(nir_block *block, nir_builder *b,
|
|||
if (!deref_has_indirect(intrin->variables[0]))
|
||||
continue;
|
||||
|
||||
/* Only lower variables whose mode is in the mask */
|
||||
if (!(modes & intrin->variables[0]->var->data.mode))
|
||||
/* Only lower variables whose mode is in the mask, or compact
|
||||
* array variables. (We can't handle indirects on tightly packed
|
||||
* scalar arrays, so we need to lower them regardless.)
|
||||
*/
|
||||
if (!(modes & intrin->variables[0]->var->data.mode) &&
|
||||
!intrin->variables[0]->var->data.compact)
|
||||
continue;
|
||||
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
|
|
@ -88,7 +88,8 @@ nir_is_per_vertex_io(nir_variable *var, gl_shader_stage stage)
|
|||
static nir_ssa_def *
|
||||
get_io_offset(nir_builder *b, nir_deref_var *deref,
|
||||
nir_ssa_def **vertex_index,
|
||||
int (*type_size)(const struct glsl_type *))
|
||||
int (*type_size)(const struct glsl_type *),
|
||||
unsigned *component)
|
||||
{
|
||||
nir_deref *tail = &deref->deref;
|
||||
|
||||
|
@ -106,6 +107,19 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,
|
|||
*vertex_index = vtx;
|
||||
}
|
||||
|
||||
if (deref->var->data.compact) {
|
||||
assert(tail->child->deref_type == nir_deref_type_array);
|
||||
assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
|
||||
nir_deref_array *deref_array = nir_deref_as_array(tail->child);
|
||||
/* We always lower indirect dereferences for "compact" array vars. */
|
||||
assert(deref_array->deref_array_type == nir_deref_array_type_direct);
|
||||
|
||||
const unsigned total_offset = *component + deref_array->base_offset;
|
||||
const unsigned slot_offset = total_offset / 4;
|
||||
*component = total_offset % 4;
|
||||
return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset);
|
||||
}
|
||||
|
||||
/* Just emit code and let constant-folding go to town */
|
||||
nir_ssa_def *offset = nir_imm_int(b, 0);
|
||||
|
||||
|
@ -143,7 +157,8 @@ get_io_offset(nir_builder *b, nir_deref_var *deref,
|
|||
|
||||
static nir_intrinsic_instr *
|
||||
lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
||||
nir_ssa_def *vertex_index, nir_ssa_def *offset)
|
||||
nir_ssa_def *vertex_index, nir_ssa_def *offset,
|
||||
unsigned component)
|
||||
{
|
||||
const nir_shader *nir = state->builder.shader;
|
||||
nir_variable *var = intrin->variables[0]->var;
|
||||
|
@ -194,7 +209,7 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
|||
|
||||
nir_intrinsic_set_base(load, var->data.driver_location);
|
||||
if (mode == nir_var_shader_in || mode == nir_var_shader_out)
|
||||
nir_intrinsic_set_component(load, var->data.location_frac);
|
||||
nir_intrinsic_set_component(load, component);
|
||||
|
||||
if (load->intrinsic == nir_intrinsic_load_uniform)
|
||||
nir_intrinsic_set_range(load, state->type_size(var->type));
|
||||
|
@ -214,7 +229,8 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
|||
|
||||
static nir_intrinsic_instr *
|
||||
lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
||||
nir_ssa_def *vertex_index, nir_ssa_def *offset)
|
||||
nir_ssa_def *vertex_index, nir_ssa_def *offset,
|
||||
unsigned component)
|
||||
{
|
||||
nir_variable *var = intrin->variables[0]->var;
|
||||
nir_variable_mode mode = var->data.mode;
|
||||
|
@ -236,7 +252,7 @@ lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
|||
nir_intrinsic_set_base(store, var->data.driver_location);
|
||||
|
||||
if (mode == nir_var_shader_out)
|
||||
nir_intrinsic_set_component(store, var->data.location_frac);
|
||||
nir_intrinsic_set_component(store, component);
|
||||
|
||||
nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
|
||||
|
||||
|
@ -289,7 +305,7 @@ lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
|||
|
||||
static nir_intrinsic_instr *
|
||||
lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
||||
nir_ssa_def *offset)
|
||||
nir_ssa_def *offset, unsigned component)
|
||||
{
|
||||
nir_variable *var = intrin->variables[0]->var;
|
||||
|
||||
|
@ -297,7 +313,7 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
|||
|
||||
/* Ignore interpolateAt() for flat variables - flat is flat. */
|
||||
if (var->data.interpolation == INTERP_MODE_FLAT)
|
||||
return lower_load(intrin, state, NULL, offset);
|
||||
return lower_load(intrin, state, NULL, offset, component);
|
||||
|
||||
nir_intrinsic_op bary_op;
|
||||
switch (intrin->intrinsic) {
|
||||
|
@ -333,7 +349,7 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
|
|||
load->num_components = intrin->num_components;
|
||||
|
||||
nir_intrinsic_set_base(load, var->data.driver_location);
|
||||
nir_intrinsic_set_component(load, var->data.location_frac);
|
||||
nir_intrinsic_set_component(load, component);
|
||||
|
||||
load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
|
||||
load->src[1] = nir_src_for_ssa(offset);
|
||||
|
@ -398,20 +414,23 @@ nir_lower_io_block(nir_block *block,
|
|||
|
||||
nir_ssa_def *offset;
|
||||
nir_ssa_def *vertex_index = NULL;
|
||||
unsigned component_offset = var->data.location_frac;
|
||||
|
||||
offset = get_io_offset(b, intrin->variables[0],
|
||||
per_vertex ? &vertex_index : NULL,
|
||||
state->type_size);
|
||||
state->type_size, &component_offset);
|
||||
|
||||
nir_intrinsic_instr *replacement;
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_var:
|
||||
replacement = lower_load(intrin, state, vertex_index, offset);
|
||||
replacement = lower_load(intrin, state, vertex_index, offset,
|
||||
component_offset);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_var:
|
||||
replacement = lower_store(intrin, state, vertex_index, offset);
|
||||
replacement = lower_store(intrin, state, vertex_index, offset,
|
||||
component_offset);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_var_atomic_add:
|
||||
|
@ -432,7 +451,8 @@ nir_lower_io_block(nir_block *block,
|
|||
case nir_intrinsic_interp_var_at_sample:
|
||||
case nir_intrinsic_interp_var_at_offset:
|
||||
assert(vertex_index == NULL);
|
||||
replacement = lower_interpolate_at(intrin, state, offset);
|
||||
replacement = lower_interpolate_at(intrin, state, offset,
|
||||
component_offset);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -432,7 +432,8 @@ print_var_decl(nir_variable *var, print_state *state)
|
|||
loc = buf;
|
||||
}
|
||||
|
||||
fprintf(fp, " (%s, %u)", loc, var->data.driver_location);
|
||||
fprintf(fp, " (%s, %u)%s", loc, var->data.driver_location,
|
||||
var->data.compact ? " compact" : "");
|
||||
}
|
||||
|
||||
if (var->constant_initializer) {
|
||||
|
|
|
@ -942,6 +942,19 @@ validate_var_decl(nir_variable *var, bool is_global, validate_state *state)
|
|||
/* Must have exactly one mode set */
|
||||
validate_assert(state, util_bitcount(var->data.mode) == 1);
|
||||
|
||||
if (var->data.compact) {
|
||||
/* The "compact" flag is only valid on arrays of scalars. */
|
||||
assert(glsl_type_is_array(var->type));
|
||||
|
||||
const struct glsl_type *type = glsl_get_array_element(var->type);
|
||||
if (nir_is_per_vertex_io(var, state->shader->stage)) {
|
||||
assert(glsl_type_is_array(type));
|
||||
assert(glsl_type_is_scalar(glsl_get_array_element(type)));
|
||||
} else {
|
||||
assert(glsl_type_is_scalar(type));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO validate some things ir_validate.cpp does (requires more GLSL type
|
||||
* support)
|
||||
|
|
Loading…
Reference in New Issue