mesa/src/broadcom/compiler/v3d_nir_lower_io.c

755 lines
29 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright © 2015 Broadcom
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "compiler/v3d_compiler.h"
#include "compiler/nir/nir_builder.h"
#include "util/u_helpers.h"
/**
* Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
* intrinsics into something amenable to the V3D architecture.
*
* Most of the work is turning the VS's store_output intrinsics from working
* on a base representing the gallium-level vec4 driver_location to an offset
* within the VPM, and emitting the header that's read by the fixed function
* hardware between the VS and FS.
*
* We also adjust the offsets on uniform loads to be in bytes, since that's
* what we need for indirect addressing with general TMU access.
*/
struct v3d_nir_lower_io_state {
int pos_vpm_offset;
int vp_vpm_offset;
int zs_vpm_offset;
int rcp_wc_vpm_offset;
int psiz_vpm_offset;
int varyings_vpm_offset;
/* Geometry shader state */
struct {
/* VPM offset for the current vertex data output */
nir_variable *output_offset_var;
/* VPM offset for the current vertex header */
nir_variable *header_offset_var;
/* VPM header for the current vertex */
nir_variable *header_var;
/* Size of the complete VPM output header */
uint32_t output_header_size;
/* Size of the output data for a single vertex */
uint32_t output_vertex_data_size;
} gs;
BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
nir_ssa_def *pos[4];
};
static void
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
struct v3d_nir_lower_io_state *state);
static void
v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
nir_ssa_def *chan)
{
if (offset) {
/* When generating the VIR instruction, the base and the offset
* are just going to get added together with an ADD instruction
* so we might as well do the add here at the NIR level instead
* and let the constant folding do its magic.
*/
offset = nir_iadd_imm(b, offset, base);
base = 0;
} else {
offset = nir_imm_int(b, 0);
}
nir_store_output(b, chan, offset, .base = base, .write_mask = 0x1, .component = 0);
}
/* Convert the uniform offset to bytes. If it happens to be a constant,
* constant-folding will clean up the shift for us.
*/
static void
v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
/* On SPIR-V/Vulkan we are already getting our offsets in
* bytes.
*/
if (c->key->environment == V3D_ENVIRONMENT_VULKAN)
return;
b->cursor = nir_before_instr(&intr->instr);
nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16);
nir_instr_rewrite_src(&intr->instr,
&intr->src[0],
nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
nir_imm_int(b, 4))));
}
static int
v3d_varying_slot_vpm_offset(struct v3d_compile *c, unsigned location, unsigned component)
{
uint32_t num_used_outputs = 0;
struct v3d_varying_slot *used_outputs = NULL;
switch (c->s->info.stage) {
case MESA_SHADER_VERTEX:
num_used_outputs = c->vs_key->num_used_outputs;
used_outputs = c->vs_key->used_outputs;
break;
case MESA_SHADER_GEOMETRY:
num_used_outputs = c->gs_key->num_used_outputs;
used_outputs = c->gs_key->used_outputs;
break;
default:
unreachable("Unsupported shader stage");
}
for (int i = 0; i < num_used_outputs; i++) {
struct v3d_varying_slot slot = used_outputs[i];
if (v3d_slot_get_slot(slot) == location &&
v3d_slot_get_component(slot) == component) {
return i;
}
}
return -1;
}
/* Lowers a store_output(gallium driver location) to a series of store_outputs
* with a driver_location equal to the offset in the VPM.
*
* For geometry shaders we need to emit multiple vertices so the VPM offsets
* need to be computed in the shader code based on the current vertex index.
*/
static void
v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_instr *intr,
struct v3d_nir_lower_io_state *state)
{
b->cursor = nir_before_instr(&intr->instr);
/* If this is a geometry shader we need to emit our outputs
* to the current vertex offset in the VPM.
*/
nir_ssa_def *offset_reg =
c->s->info.stage == MESA_SHADER_GEOMETRY ?
nir_load_var(b, state->gs.output_offset_var) : NULL;
int start_comp = nir_intrinsic_component(intr);
unsigned location = nir_intrinsic_io_semantics(intr).location;
nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
intr->num_components);
/* Save off the components of the position for the setup of VPM inputs
* read by fixed function HW.
*/
if (location == VARYING_SLOT_POS) {
for (int i = 0; i < intr->num_components; i++) {
state->pos[start_comp + i] = nir_channel(b, src, i);
}
}
/* Just psiz to the position in the FF header right now. */
if (location == VARYING_SLOT_PSIZ &&
state->psiz_vpm_offset != -1) {
v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
}
if (location == VARYING_SLOT_LAYER) {
assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff));
/* From the GLES 3.2 spec:
*
* "When fragments are written to a layered framebuffer, the
* fragments layer number selects an image from the array
* of images at each attachment (...). If the fragments
* layer number is negative, or greater than or equal to
* the minimum number of layers of any attachment, the
* effects of the fragment on the framebuffer contents are
* undefined."
*
* This suggests we can just ignore that situation, however,
* for V3D an out-of-bounds layer index means that the binner
* might do out-of-bounds writes access to the tile state. The
* simulator has an assert to catch this, so we play safe here
* and we make sure that doesn't happen by setting gl_Layer
* to 0 in that case (we always allocate tile state for at
* least one layer).
*/
nir_ssa_def *fb_layers = nir_load_fb_layers_v3d(b, 32);
nir_ssa_def *cond = nir_ige(b, src, fb_layers);
nir_ssa_def *layer_id =
nir_bcsel(b, cond,
nir_imm_int(b, 0),
nir_ishl(b, src, nir_imm_int(b, 16)));
header = nir_ior(b, header, layer_id);
nir_store_var(b, state->gs.header_var, header, 0x1);
}
/* Scalarize outputs if it hasn't happened already, since we want to
* schedule each VPM write individually. We can skip any outut
* components not read by the FS.
*/
for (int i = 0; i < intr->num_components; i++) {
int vpm_offset =
v3d_varying_slot_vpm_offset(c, location, start_comp + i);
if (vpm_offset == -1)
continue;
if (nir_src_is_const(intr->src[1]))
vpm_offset += nir_src_as_uint(intr->src[1]) * 4;
BITSET_SET(state->varyings_stored, vpm_offset);
v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
offset_reg, nir_channel(b, src, i));
}
nir_instr_remove(&intr->instr);
}
static inline void
reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
{
const uint8_t NEW_PRIMITIVE_OFFSET = 0;
const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
uint32_t vertex_data_size = state->gs.output_vertex_data_size;
assert((vertex_data_size & 0xffffff00) == 0);
uint32_t header;
header = 1 << NEW_PRIMITIVE_OFFSET;
header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
}
static void
v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_instr *instr,
struct v3d_nir_lower_io_state *state)
{
b->cursor = nir_before_instr(&instr->instr);
nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
/* Emit fixed function outputs */
v3d_nir_emit_ff_vpm_outputs(c, b, state);
/* Emit vertex header */
v3d_nir_store_output(b, 0, header_offset, header);
/* Update VPM offset for next vertex output data and header */
output_offset =
nir_iadd(b, output_offset,
nir_imm_int(b, state->gs.output_vertex_data_size));
header_offset = nir_iadd(b, header_offset, nir_imm_int(b, 1));
/* Reset the New Primitive bit */
header = nir_iand(b, header, nir_imm_int(b, 0xfffffffe));
nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
nir_store_var(b, state->gs.header_var, header, 0x1);
nir_instr_remove(&instr->instr);
}
static void
v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_instr *instr,
struct v3d_nir_lower_io_state *state)
{
assert(state->gs.header_var);
b->cursor = nir_before_instr(&instr->instr);
reset_gs_header(b, state);
nir_instr_remove(&instr->instr);
}
/* Some vertex attribute formats may require to apply a swizzle but the hardware
* doesn't provide means to do that, so we need to apply the swizzle in the
* vertex shader.
*
* This is required at least in Vulkan to support madatory vertex attribute
* format VK_FORMAT_B8G8R8A8_UNORM.
*/
static void
v3d_nir_lower_vertex_input(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_instr *instr)
{
assert(c->s->info.stage == MESA_SHADER_VERTEX);
if (!c->vs_key->va_swap_rb_mask)
return;
const uint32_t location = nir_intrinsic_io_semantics(instr).location;
if (!(c->vs_key->va_swap_rb_mask & (1 << location)))
return;
assert(instr->num_components == 1);
const uint32_t comp = nir_intrinsic_component(instr);
if (comp == 0 || comp == 2)
nir_intrinsic_set_component(instr, (comp + 2) % 4);
}
/* Sometimes the origin of gl_PointCoord is in the upper left rather than the
* lower left so we need to flip it.
*
* This is needed for Vulkan, Gallium uses lower_wpos_pntc.
*/
static void
v3d_nir_lower_fragment_input(struct v3d_compile *c, nir_builder *b,
nir_intrinsic_instr *intr)
{
assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
/* Gallium uses lower_wpos_pntc */
if (c->key->environment == V3D_ENVIRONMENT_OPENGL)
return;
b->cursor = nir_after_instr(&intr->instr);
int comp = nir_intrinsic_component(intr);
nir_variable *input_var =
nir_find_variable_with_driver_location(c->s,
nir_var_shader_in,
nir_intrinsic_base(intr));
if (input_var && util_varying_is_point_coord(input_var->data.location,
c->fs_key->point_sprite_mask)) {
assert(intr->num_components == 1);
nir_ssa_def *result = &intr->dest.ssa;
switch (comp) {
case 0:
case 1:
if (!c->fs_key->is_points)
result = nir_imm_float(b, 0.0);
break;
case 2:
result = nir_imm_float(b, 0.0);
break;
case 3:
result = nir_imm_float(b, 1.0);
break;
}
if (c->fs_key->point_coord_upper_left && comp == 1)
result = nir_fsub(b, nir_imm_float(b, 1.0), result);
if (result != &intr->dest.ssa) {
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
result,
result->parent_instr);
}
}
}
static void
v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
struct nir_instr *instr,
struct v3d_nir_lower_io_state *state)
{
if (instr->type != nir_instr_type_intrinsic)
return;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
switch (intr->intrinsic) {
case nir_intrinsic_load_input:
if (c->s->info.stage == MESA_SHADER_VERTEX)
v3d_nir_lower_vertex_input(c, b, intr);
else if (c->s->info.stage == MESA_SHADER_FRAGMENT)
v3d_nir_lower_fragment_input(c, b, intr);
break;
case nir_intrinsic_load_uniform:
v3d_nir_lower_uniform(c, b, intr);
break;
case nir_intrinsic_store_output:
if (c->s->info.stage == MESA_SHADER_VERTEX ||
c->s->info.stage == MESA_SHADER_GEOMETRY) {
v3d_nir_lower_vpm_output(c, b, intr, state);
}
break;
case nir_intrinsic_emit_vertex:
v3d_nir_lower_emit_vertex(c, b, intr, state);
break;
case nir_intrinsic_end_primitive:
v3d_nir_lower_end_primitive(c, b, intr, state);
break;
default:
break;
}
}
/* Remap the output var's .driver_location. This is purely for
* nir_print_shader() so that store_output can map back to a variable name.
*/
static void
v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
struct v3d_nir_lower_io_state *state)
{
nir_foreach_shader_out_variable_safe(var, c->s) {
if (var->data.location == VARYING_SLOT_POS &&
state->pos_vpm_offset != -1) {
var->data.driver_location = state->pos_vpm_offset;
continue;
}
if (var->data.location == VARYING_SLOT_PSIZ &&
state->psiz_vpm_offset != -1) {
var->data.driver_location = state->psiz_vpm_offset;
continue;
}
int vpm_offset =
v3d_varying_slot_vpm_offset(c,
var->data.location,
var->data.location_frac);
if (vpm_offset != -1) {
var->data.driver_location =
state->varyings_vpm_offset + vpm_offset;
} else {
/* If we couldn't find a mapping for the var, delete
* it so that its old .driver_location doesn't confuse
* nir_print_shader().
*/
exec_node_remove(&var->node);
}
}
}
static void
v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
struct v3d_nir_lower_io_state *state)
{
uint32_t vpm_offset = 0;
state->pos_vpm_offset = -1;
state->vp_vpm_offset = -1;
state->zs_vpm_offset = -1;
state->rcp_wc_vpm_offset = -1;
state->psiz_vpm_offset = -1;
bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
if (needs_ff_outputs) {
if (c->vs_key->is_coord) {
state->pos_vpm_offset = vpm_offset;
vpm_offset += 4;
}
state->vp_vpm_offset = vpm_offset;
vpm_offset += 2;
if (!c->vs_key->is_coord) {
state->zs_vpm_offset = vpm_offset++;
state->rcp_wc_vpm_offset = vpm_offset++;
}
if (c->vs_key->per_vertex_point_size)
state->psiz_vpm_offset = vpm_offset++;
}
state->varyings_vpm_offset = vpm_offset;
c->vpm_output_size = MAX2(1, vpm_offset + c->vs_key->num_used_outputs);
}
static void
v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
struct v3d_nir_lower_io_state *state)
{
/* 1 header slot for number of output vertices */
uint32_t vpm_offset = 1;
/* 1 header slot per output vertex */
const uint32_t num_vertices = c->s->info.gs.vertices_out;
vpm_offset += num_vertices;
state->gs.output_header_size = vpm_offset;
/* Vertex data: here we only compute offsets into a generic vertex data
* elements. When it is time to actually write a particular vertex to
* the VPM, we will add the offset for that vertex into the VPM output
* to these offsets.
*
* If geometry shaders are present, they are always the last shader
* stage before rasterization, so we always emit fixed function outputs.
*/
vpm_offset = 0;
if (c->gs_key->is_coord) {
state->pos_vpm_offset = vpm_offset;
vpm_offset += 4;
} else {
state->pos_vpm_offset = -1;
}
state->vp_vpm_offset = vpm_offset;
vpm_offset += 2;
if (!c->gs_key->is_coord) {
state->zs_vpm_offset = vpm_offset++;
state->rcp_wc_vpm_offset = vpm_offset++;
} else {
state->zs_vpm_offset = -1;
state->rcp_wc_vpm_offset = -1;
}
/* Mesa enables OES_geometry_shader_point_size automatically with
* OES_geometry_shader so we always need to handle point size
* writes if present.
*/
if (c->gs_key->per_vertex_point_size)
state->psiz_vpm_offset = vpm_offset++;
state->varyings_vpm_offset = vpm_offset;
state->gs.output_vertex_data_size =
state->varyings_vpm_offset + c->gs_key->num_used_outputs;
c->vpm_output_size =
state->gs.output_header_size +
state->gs.output_vertex_data_size * num_vertices;
}
static void
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
struct v3d_nir_lower_io_state *state)
{
/* If this is a geometry shader we need to emit our fixed function
* outputs to the current vertex offset in the VPM.
*/
nir_ssa_def *offset_reg =
c->s->info.stage == MESA_SHADER_GEOMETRY ?
nir_load_var(b, state->gs.output_offset_var) : NULL;
for (int i = 0; i < 4; i++) {
if (!state->pos[i])
state->pos[i] = nir_ssa_undef(b, 1, 32);
}
nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]);
if (state->pos_vpm_offset != -1) {
for (int i = 0; i < 4; i++) {
v3d_nir_store_output(b, state->pos_vpm_offset + i,
offset_reg, state->pos[i]);
}
}
if (state->vp_vpm_offset != -1) {
for (int i = 0; i < 2; i++) {
nir_ssa_def *pos;
nir_ssa_def *scale;
pos = state->pos[i];
if (i == 0)
scale = nir_load_viewport_x_scale(b);
else
scale = nir_load_viewport_y_scale(b);
pos = nir_fmul(b, pos, scale);
pos = nir_fmul(b, pos, rcp_wc);
/* Pre-V3D 4.3 hardware has a quirk where it expects XY
* coordinates in .8 fixed-point format, but then it
* will internally round it to .6 fixed-point,
* introducing a double rounding. The double rounding
* can cause very slight differences in triangle
* raterization coverage that can actually be noticed by
* some CTS tests.
*
* The correct fix for this as recommended by Broadcom
* is to convert to .8 fixed-point with ffloor().
*/
pos = nir_f2i32(b, nir_ffloor(b, pos));
v3d_nir_store_output(b, state->vp_vpm_offset + i,
offset_reg, pos);
}
}
if (state->zs_vpm_offset != -1) {
nir_ssa_def *z = state->pos[2];
z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
z = nir_fmul(b, z, rcp_wc);
z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
}
if (state->rcp_wc_vpm_offset != -1) {
v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
offset_reg, rcp_wc);
}
/* Store 0 to varyings requested by the FS but not stored by the
* previous stage. This should be undefined behavior, but
* glsl-routing seems to rely on it.
*/
uint32_t num_used_outputs;
switch (c->s->info.stage) {
case MESA_SHADER_VERTEX:
num_used_outputs = c->vs_key->num_used_outputs;
break;
case MESA_SHADER_GEOMETRY:
num_used_outputs = c->gs_key->num_used_outputs;
break;
default:
unreachable("Unsupported shader stage");
}
for (int i = 0; i < num_used_outputs; i++) {
if (!BITSET_TEST(state->varyings_stored, i)) {
v3d_nir_store_output(b, state->varyings_vpm_offset + i,
offset_reg, nir_imm_int(b, 0));
}
}
}
static void
emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
nir_function_impl *impl,
struct v3d_nir_lower_io_state *state)
{
nir_block *first = nir_start_block(impl);
b->cursor = nir_before_block(first);
const struct glsl_type *uint_type = glsl_uint_type();
assert(!state->gs.output_offset_var);
state->gs.output_offset_var =
nir_local_variable_create(impl, uint_type, "output_offset");
nir_store_var(b, state->gs.output_offset_var,
nir_imm_int(b, state->gs.output_header_size), 0x1);
assert(!state->gs.header_offset_var);
state->gs.header_offset_var =
nir_local_variable_create(impl, uint_type, "header_offset");
nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
assert(!state->gs.header_var);
state->gs.header_var =
nir_local_variable_create(impl, uint_type, "header");
reset_gs_header(b, state);
}
static void
emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
struct v3d_nir_lower_io_state *state)
{
const uint8_t VERTEX_COUNT_OFFSET = 16;
/* Our GS header has 1 generic header slot (at VPM offset 0) and then
* one slot per output vertex after it. This means we don't need to
* have a variable just to keep track of the number of vertices we
* emitted and instead we can just compute it here from the header
* offset variable by removing the one generic header slot that always
* goes at the begining of out header.
*/
nir_ssa_def *header_offset =
nir_load_var(b, state->gs.header_offset_var);
nir_ssa_def *vertex_count =
nir_isub(b, header_offset, nir_imm_int(b, 1));
nir_ssa_def *header =
nir_ior(b, nir_imm_int(b, state->gs.output_header_size),
nir_ishl(b, vertex_count,
nir_imm_int(b, VERTEX_COUNT_OFFSET)));
v3d_nir_store_output(b, 0, NULL, header);
}
bool
v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
{
struct v3d_nir_lower_io_state state = { 0 };
/* Set up the layout of the VPM outputs. */
switch (s->info.stage) {
case MESA_SHADER_VERTEX:
v3d_nir_setup_vpm_layout_vs(c, &state);
break;
case MESA_SHADER_GEOMETRY:
v3d_nir_setup_vpm_layout_gs(c, &state);
break;
case MESA_SHADER_FRAGMENT:
case MESA_SHADER_COMPUTE:
break;
default:
unreachable("Unsupported shader stage");
}
nir_foreach_function(function, s) {
if (function->impl) {
nir_builder b;
nir_builder_init(&b, function->impl);
if (c->s->info.stage == MESA_SHADER_GEOMETRY)
emit_gs_prolog(c, &b, function->impl, &state);
nir_foreach_block(block, function->impl) {
nir_foreach_instr_safe(instr, block)
v3d_nir_lower_io_instr(c, &b, instr,
&state);
}
nir_block *last = nir_impl_last_block(function->impl);
b.cursor = nir_after_block(last);
if (s->info.stage == MESA_SHADER_VERTEX) {
v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
} else if (s->info.stage == MESA_SHADER_GEOMETRY) {
emit_gs_vpm_output_header_prolog(c, &b, &state);
}
nir_metadata_preserve(function->impl,
nir_metadata_block_index |
nir_metadata_dominance);
}
}
if (s->info.stage == MESA_SHADER_VERTEX ||
s->info.stage == MESA_SHADER_GEOMETRY) {
v3d_nir_lower_io_update_output_var_base(c, &state);
}
/* It is really unlikely that we don't get progress here, and fully
* filtering when not would make code more complex, but we are still
* interested on getting this lowering going through NIR_PASS
*/
return true;
}