755 lines
29 KiB
C
755 lines
29 KiB
C
/*
|
||
* Copyright © 2015 Broadcom
|
||
*
|
||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||
* copy of this software and associated documentation files (the "Software"),
|
||
* to deal in the Software without restriction, including without limitation
|
||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||
* and/or sell copies of the Software, and to permit persons to whom the
|
||
* Software is furnished to do so, subject to the following conditions:
|
||
*
|
||
* The above copyright notice and this permission notice (including the next
|
||
* paragraph) shall be included in all copies or substantial portions of the
|
||
* Software.
|
||
*
|
||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||
* IN THE SOFTWARE.
|
||
*/
|
||
|
||
#include "compiler/v3d_compiler.h"
|
||
#include "compiler/nir/nir_builder.h"
|
||
|
||
#include "util/u_helpers.h"
|
||
|
||
/**
|
||
* Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
|
||
* intrinsics into something amenable to the V3D architecture.
|
||
*
|
||
* Most of the work is turning the VS's store_output intrinsics from working
|
||
* on a base representing the gallium-level vec4 driver_location to an offset
|
||
* within the VPM, and emitting the header that's read by the fixed function
|
||
* hardware between the VS and FS.
|
||
*
|
||
* We also adjust the offsets on uniform loads to be in bytes, since that's
|
||
* what we need for indirect addressing with general TMU access.
|
||
*/
|
||
|
||
struct v3d_nir_lower_io_state {
|
||
int pos_vpm_offset;
|
||
int vp_vpm_offset;
|
||
int zs_vpm_offset;
|
||
int rcp_wc_vpm_offset;
|
||
int psiz_vpm_offset;
|
||
int varyings_vpm_offset;
|
||
|
||
/* Geometry shader state */
|
||
struct {
|
||
/* VPM offset for the current vertex data output */
|
||
nir_variable *output_offset_var;
|
||
/* VPM offset for the current vertex header */
|
||
nir_variable *header_offset_var;
|
||
/* VPM header for the current vertex */
|
||
nir_variable *header_var;
|
||
|
||
/* Size of the complete VPM output header */
|
||
uint32_t output_header_size;
|
||
/* Size of the output data for a single vertex */
|
||
uint32_t output_vertex_data_size;
|
||
} gs;
|
||
|
||
BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
|
||
|
||
nir_ssa_def *pos[4];
|
||
};
|
||
|
||
static void
|
||
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
|
||
struct v3d_nir_lower_io_state *state);
|
||
|
||
static void
|
||
v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
|
||
nir_ssa_def *chan)
|
||
{
|
||
if (offset) {
|
||
/* When generating the VIR instruction, the base and the offset
|
||
* are just going to get added together with an ADD instruction
|
||
* so we might as well do the add here at the NIR level instead
|
||
* and let the constant folding do its magic.
|
||
*/
|
||
offset = nir_iadd_imm(b, offset, base);
|
||
base = 0;
|
||
} else {
|
||
offset = nir_imm_int(b, 0);
|
||
}
|
||
|
||
nir_store_output(b, chan, offset, .base = base, .write_mask = 0x1, .component = 0);
|
||
}
|
||
|
||
/* Convert the uniform offset to bytes. If it happens to be a constant,
|
||
* constant-folding will clean up the shift for us.
|
||
*/
|
||
static void
|
||
v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
|
||
nir_intrinsic_instr *intr)
|
||
{
|
||
/* On SPIR-V/Vulkan we are already getting our offsets in
|
||
* bytes.
|
||
*/
|
||
if (c->key->environment == V3D_ENVIRONMENT_VULKAN)
|
||
return;
|
||
|
||
b->cursor = nir_before_instr(&intr->instr);
|
||
|
||
nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16);
|
||
|
||
nir_instr_rewrite_src(&intr->instr,
|
||
&intr->src[0],
|
||
nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
|
||
nir_imm_int(b, 4))));
|
||
}
|
||
|
||
static int
|
||
v3d_varying_slot_vpm_offset(struct v3d_compile *c, unsigned location, unsigned component)
|
||
{
|
||
uint32_t num_used_outputs = 0;
|
||
struct v3d_varying_slot *used_outputs = NULL;
|
||
switch (c->s->info.stage) {
|
||
case MESA_SHADER_VERTEX:
|
||
num_used_outputs = c->vs_key->num_used_outputs;
|
||
used_outputs = c->vs_key->used_outputs;
|
||
break;
|
||
case MESA_SHADER_GEOMETRY:
|
||
num_used_outputs = c->gs_key->num_used_outputs;
|
||
used_outputs = c->gs_key->used_outputs;
|
||
break;
|
||
default:
|
||
unreachable("Unsupported shader stage");
|
||
}
|
||
|
||
for (int i = 0; i < num_used_outputs; i++) {
|
||
struct v3d_varying_slot slot = used_outputs[i];
|
||
|
||
if (v3d_slot_get_slot(slot) == location &&
|
||
v3d_slot_get_component(slot) == component) {
|
||
return i;
|
||
}
|
||
}
|
||
|
||
return -1;
|
||
}
|
||
|
||
/* Lowers a store_output(gallium driver location) to a series of store_outputs
|
||
* with a driver_location equal to the offset in the VPM.
|
||
*
|
||
* For geometry shaders we need to emit multiple vertices so the VPM offsets
|
||
* need to be computed in the shader code based on the current vertex index.
|
||
*/
|
||
static void
|
||
v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
|
||
nir_intrinsic_instr *intr,
|
||
struct v3d_nir_lower_io_state *state)
|
||
{
|
||
b->cursor = nir_before_instr(&intr->instr);
|
||
|
||
/* If this is a geometry shader we need to emit our outputs
|
||
* to the current vertex offset in the VPM.
|
||
*/
|
||
nir_ssa_def *offset_reg =
|
||
c->s->info.stage == MESA_SHADER_GEOMETRY ?
|
||
nir_load_var(b, state->gs.output_offset_var) : NULL;
|
||
|
||
int start_comp = nir_intrinsic_component(intr);
|
||
unsigned location = nir_intrinsic_io_semantics(intr).location;
|
||
nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
|
||
intr->num_components);
|
||
/* Save off the components of the position for the setup of VPM inputs
|
||
* read by fixed function HW.
|
||
*/
|
||
if (location == VARYING_SLOT_POS) {
|
||
for (int i = 0; i < intr->num_components; i++) {
|
||
state->pos[start_comp + i] = nir_channel(b, src, i);
|
||
}
|
||
}
|
||
|
||
/* Just psiz to the position in the FF header right now. */
|
||
if (location == VARYING_SLOT_PSIZ &&
|
||
state->psiz_vpm_offset != -1) {
|
||
v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
|
||
}
|
||
|
||
if (location == VARYING_SLOT_LAYER) {
|
||
assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
|
||
nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
|
||
header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff));
|
||
|
||
/* From the GLES 3.2 spec:
|
||
*
|
||
* "When fragments are written to a layered framebuffer, the
|
||
* fragment’s layer number selects an image from the array
|
||
* of images at each attachment (...). If the fragment’s
|
||
* layer number is negative, or greater than or equal to
|
||
* the minimum number of layers of any attachment, the
|
||
* effects of the fragment on the framebuffer contents are
|
||
* undefined."
|
||
*
|
||
* This suggests we can just ignore that situation, however,
|
||
* for V3D an out-of-bounds layer index means that the binner
|
||
* might do out-of-bounds writes access to the tile state. The
|
||
* simulator has an assert to catch this, so we play safe here
|
||
* and we make sure that doesn't happen by setting gl_Layer
|
||
* to 0 in that case (we always allocate tile state for at
|
||
* least one layer).
|
||
*/
|
||
nir_ssa_def *fb_layers = nir_load_fb_layers_v3d(b, 32);
|
||
nir_ssa_def *cond = nir_ige(b, src, fb_layers);
|
||
nir_ssa_def *layer_id =
|
||
nir_bcsel(b, cond,
|
||
nir_imm_int(b, 0),
|
||
nir_ishl(b, src, nir_imm_int(b, 16)));
|
||
header = nir_ior(b, header, layer_id);
|
||
nir_store_var(b, state->gs.header_var, header, 0x1);
|
||
}
|
||
|
||
/* Scalarize outputs if it hasn't happened already, since we want to
|
||
* schedule each VPM write individually. We can skip any outut
|
||
* components not read by the FS.
|
||
*/
|
||
for (int i = 0; i < intr->num_components; i++) {
|
||
int vpm_offset =
|
||
v3d_varying_slot_vpm_offset(c, location, start_comp + i);
|
||
|
||
|
||
if (vpm_offset == -1)
|
||
continue;
|
||
|
||
if (nir_src_is_const(intr->src[1]))
|
||
vpm_offset += nir_src_as_uint(intr->src[1]) * 4;
|
||
|
||
BITSET_SET(state->varyings_stored, vpm_offset);
|
||
|
||
v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
|
||
offset_reg, nir_channel(b, src, i));
|
||
}
|
||
|
||
nir_instr_remove(&intr->instr);
|
||
}
|
||
|
||
static inline void
|
||
reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
|
||
{
|
||
const uint8_t NEW_PRIMITIVE_OFFSET = 0;
|
||
const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
|
||
|
||
uint32_t vertex_data_size = state->gs.output_vertex_data_size;
|
||
assert((vertex_data_size & 0xffffff00) == 0);
|
||
|
||
uint32_t header;
|
||
header = 1 << NEW_PRIMITIVE_OFFSET;
|
||
header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
|
||
nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
|
||
}
|
||
|
||
static void
|
||
v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
|
||
nir_intrinsic_instr *instr,
|
||
struct v3d_nir_lower_io_state *state)
|
||
{
|
||
b->cursor = nir_before_instr(&instr->instr);
|
||
|
||
nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
|
||
nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
|
||
nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
|
||
|
||
/* Emit fixed function outputs */
|
||
v3d_nir_emit_ff_vpm_outputs(c, b, state);
|
||
|
||
/* Emit vertex header */
|
||
v3d_nir_store_output(b, 0, header_offset, header);
|
||
|
||
/* Update VPM offset for next vertex output data and header */
|
||
output_offset =
|
||
nir_iadd(b, output_offset,
|
||
nir_imm_int(b, state->gs.output_vertex_data_size));
|
||
|
||
header_offset = nir_iadd(b, header_offset, nir_imm_int(b, 1));
|
||
|
||
/* Reset the New Primitive bit */
|
||
header = nir_iand(b, header, nir_imm_int(b, 0xfffffffe));
|
||
|
||
nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
|
||
nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
|
||
nir_store_var(b, state->gs.header_var, header, 0x1);
|
||
|
||
nir_instr_remove(&instr->instr);
|
||
}
|
||
|
||
static void
|
||
v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
|
||
nir_intrinsic_instr *instr,
|
||
struct v3d_nir_lower_io_state *state)
|
||
{
|
||
assert(state->gs.header_var);
|
||
b->cursor = nir_before_instr(&instr->instr);
|
||
reset_gs_header(b, state);
|
||
|
||
nir_instr_remove(&instr->instr);
|
||
}
|
||
|
||
/* Some vertex attribute formats may require to apply a swizzle but the hardware
|
||
* doesn't provide means to do that, so we need to apply the swizzle in the
|
||
* vertex shader.
|
||
*
|
||
* This is required at least in Vulkan to support madatory vertex attribute
|
||
* format VK_FORMAT_B8G8R8A8_UNORM.
|
||
*/
|
||
static void
|
||
v3d_nir_lower_vertex_input(struct v3d_compile *c, nir_builder *b,
|
||
nir_intrinsic_instr *instr)
|
||
{
|
||
assert(c->s->info.stage == MESA_SHADER_VERTEX);
|
||
|
||
if (!c->vs_key->va_swap_rb_mask)
|
||
return;
|
||
|
||
const uint32_t location = nir_intrinsic_io_semantics(instr).location;
|
||
|
||
if (!(c->vs_key->va_swap_rb_mask & (1 << location)))
|
||
return;
|
||
|
||
assert(instr->num_components == 1);
|
||
const uint32_t comp = nir_intrinsic_component(instr);
|
||
if (comp == 0 || comp == 2)
|
||
nir_intrinsic_set_component(instr, (comp + 2) % 4);
|
||
}
|
||
|
||
/* Sometimes the origin of gl_PointCoord is in the upper left rather than the
|
||
* lower left so we need to flip it.
|
||
*
|
||
* This is needed for Vulkan, Gallium uses lower_wpos_pntc.
|
||
*/
|
||
static void
|
||
v3d_nir_lower_fragment_input(struct v3d_compile *c, nir_builder *b,
|
||
nir_intrinsic_instr *intr)
|
||
{
|
||
assert(c->s->info.stage == MESA_SHADER_FRAGMENT);
|
||
|
||
/* Gallium uses lower_wpos_pntc */
|
||
if (c->key->environment == V3D_ENVIRONMENT_OPENGL)
|
||
return;
|
||
|
||
b->cursor = nir_after_instr(&intr->instr);
|
||
|
||
int comp = nir_intrinsic_component(intr);
|
||
|
||
nir_variable *input_var =
|
||
nir_find_variable_with_driver_location(c->s,
|
||
nir_var_shader_in,
|
||
nir_intrinsic_base(intr));
|
||
|
||
if (input_var && util_varying_is_point_coord(input_var->data.location,
|
||
c->fs_key->point_sprite_mask)) {
|
||
assert(intr->num_components == 1);
|
||
|
||
nir_ssa_def *result = &intr->dest.ssa;
|
||
|
||
switch (comp) {
|
||
case 0:
|
||
case 1:
|
||
if (!c->fs_key->is_points)
|
||
result = nir_imm_float(b, 0.0);
|
||
break;
|
||
case 2:
|
||
result = nir_imm_float(b, 0.0);
|
||
break;
|
||
case 3:
|
||
result = nir_imm_float(b, 1.0);
|
||
break;
|
||
}
|
||
if (c->fs_key->point_coord_upper_left && comp == 1)
|
||
result = nir_fsub(b, nir_imm_float(b, 1.0), result);
|
||
if (result != &intr->dest.ssa) {
|
||
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
|
||
result,
|
||
result->parent_instr);
|
||
}
|
||
}
|
||
}
|
||
|
||
static void
|
||
v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
|
||
struct nir_instr *instr,
|
||
struct v3d_nir_lower_io_state *state)
|
||
{
|
||
if (instr->type != nir_instr_type_intrinsic)
|
||
return;
|
||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||
|
||
switch (intr->intrinsic) {
|
||
case nir_intrinsic_load_input:
|
||
if (c->s->info.stage == MESA_SHADER_VERTEX)
|
||
v3d_nir_lower_vertex_input(c, b, intr);
|
||
else if (c->s->info.stage == MESA_SHADER_FRAGMENT)
|
||
v3d_nir_lower_fragment_input(c, b, intr);
|
||
break;
|
||
|
||
case nir_intrinsic_load_uniform:
|
||
v3d_nir_lower_uniform(c, b, intr);
|
||
break;
|
||
|
||
case nir_intrinsic_store_output:
|
||
if (c->s->info.stage == MESA_SHADER_VERTEX ||
|
||
c->s->info.stage == MESA_SHADER_GEOMETRY) {
|
||
v3d_nir_lower_vpm_output(c, b, intr, state);
|
||
}
|
||
break;
|
||
|
||
case nir_intrinsic_emit_vertex:
|
||
v3d_nir_lower_emit_vertex(c, b, intr, state);
|
||
break;
|
||
|
||
case nir_intrinsic_end_primitive:
|
||
v3d_nir_lower_end_primitive(c, b, intr, state);
|
||
break;
|
||
|
||
default:
|
||
break;
|
||
}
|
||
}
|
||
|
||
/* Remap the output var's .driver_location. This is purely for
|
||
* nir_print_shader() so that store_output can map back to a variable name.
|
||
*/
|
||
static void
|
||
v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
|
||
struct v3d_nir_lower_io_state *state)
|
||
{
|
||
nir_foreach_shader_out_variable_safe(var, c->s) {
|
||
if (var->data.location == VARYING_SLOT_POS &&
|
||
state->pos_vpm_offset != -1) {
|
||
var->data.driver_location = state->pos_vpm_offset;
|
||
continue;
|
||
}
|
||
|
||
if (var->data.location == VARYING_SLOT_PSIZ &&
|
||
state->psiz_vpm_offset != -1) {
|
||
var->data.driver_location = state->psiz_vpm_offset;
|
||
continue;
|
||
}
|
||
|
||
int vpm_offset =
|
||
v3d_varying_slot_vpm_offset(c,
|
||
var->data.location,
|
||
var->data.location_frac);
|
||
if (vpm_offset != -1) {
|
||
var->data.driver_location =
|
||
state->varyings_vpm_offset + vpm_offset;
|
||
} else {
|
||
/* If we couldn't find a mapping for the var, delete
|
||
* it so that its old .driver_location doesn't confuse
|
||
* nir_print_shader().
|
||
*/
|
||
exec_node_remove(&var->node);
|
||
}
|
||
}
|
||
}
|
||
|
||
static void
|
||
v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
|
||
struct v3d_nir_lower_io_state *state)
|
||
{
|
||
uint32_t vpm_offset = 0;
|
||
|
||
state->pos_vpm_offset = -1;
|
||
state->vp_vpm_offset = -1;
|
||
state->zs_vpm_offset = -1;
|
||
state->rcp_wc_vpm_offset = -1;
|
||
state->psiz_vpm_offset = -1;
|
||
|
||
bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
|
||
if (needs_ff_outputs) {
|
||
if (c->vs_key->is_coord) {
|
||
state->pos_vpm_offset = vpm_offset;
|
||
vpm_offset += 4;
|
||
}
|
||
|
||
state->vp_vpm_offset = vpm_offset;
|
||
vpm_offset += 2;
|
||
|
||
if (!c->vs_key->is_coord) {
|
||
state->zs_vpm_offset = vpm_offset++;
|
||
state->rcp_wc_vpm_offset = vpm_offset++;
|
||
}
|
||
|
||
if (c->vs_key->per_vertex_point_size)
|
||
state->psiz_vpm_offset = vpm_offset++;
|
||
}
|
||
|
||
state->varyings_vpm_offset = vpm_offset;
|
||
|
||
c->vpm_output_size = MAX2(1, vpm_offset + c->vs_key->num_used_outputs);
|
||
}
|
||
|
||
static void
|
||
v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
|
||
struct v3d_nir_lower_io_state *state)
|
||
{
|
||
/* 1 header slot for number of output vertices */
|
||
uint32_t vpm_offset = 1;
|
||
|
||
/* 1 header slot per output vertex */
|
||
const uint32_t num_vertices = c->s->info.gs.vertices_out;
|
||
vpm_offset += num_vertices;
|
||
|
||
state->gs.output_header_size = vpm_offset;
|
||
|
||
/* Vertex data: here we only compute offsets into a generic vertex data
|
||
* elements. When it is time to actually write a particular vertex to
|
||
* the VPM, we will add the offset for that vertex into the VPM output
|
||
* to these offsets.
|
||
*
|
||
* If geometry shaders are present, they are always the last shader
|
||
* stage before rasterization, so we always emit fixed function outputs.
|
||
*/
|
||
vpm_offset = 0;
|
||
if (c->gs_key->is_coord) {
|
||
state->pos_vpm_offset = vpm_offset;
|
||
vpm_offset += 4;
|
||
} else {
|
||
state->pos_vpm_offset = -1;
|
||
}
|
||
|
||
state->vp_vpm_offset = vpm_offset;
|
||
vpm_offset += 2;
|
||
|
||
if (!c->gs_key->is_coord) {
|
||
state->zs_vpm_offset = vpm_offset++;
|
||
state->rcp_wc_vpm_offset = vpm_offset++;
|
||
} else {
|
||
state->zs_vpm_offset = -1;
|
||
state->rcp_wc_vpm_offset = -1;
|
||
}
|
||
|
||
/* Mesa enables OES_geometry_shader_point_size automatically with
|
||
* OES_geometry_shader so we always need to handle point size
|
||
* writes if present.
|
||
*/
|
||
if (c->gs_key->per_vertex_point_size)
|
||
state->psiz_vpm_offset = vpm_offset++;
|
||
|
||
state->varyings_vpm_offset = vpm_offset;
|
||
|
||
state->gs.output_vertex_data_size =
|
||
state->varyings_vpm_offset + c->gs_key->num_used_outputs;
|
||
|
||
c->vpm_output_size =
|
||
state->gs.output_header_size +
|
||
state->gs.output_vertex_data_size * num_vertices;
|
||
}
|
||
|
||
static void
|
||
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
|
||
struct v3d_nir_lower_io_state *state)
|
||
{
|
||
/* If this is a geometry shader we need to emit our fixed function
|
||
* outputs to the current vertex offset in the VPM.
|
||
*/
|
||
nir_ssa_def *offset_reg =
|
||
c->s->info.stage == MESA_SHADER_GEOMETRY ?
|
||
nir_load_var(b, state->gs.output_offset_var) : NULL;
|
||
|
||
for (int i = 0; i < 4; i++) {
|
||
if (!state->pos[i])
|
||
state->pos[i] = nir_ssa_undef(b, 1, 32);
|
||
}
|
||
|
||
nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]);
|
||
|
||
if (state->pos_vpm_offset != -1) {
|
||
for (int i = 0; i < 4; i++) {
|
||
v3d_nir_store_output(b, state->pos_vpm_offset + i,
|
||
offset_reg, state->pos[i]);
|
||
}
|
||
}
|
||
|
||
if (state->vp_vpm_offset != -1) {
|
||
for (int i = 0; i < 2; i++) {
|
||
nir_ssa_def *pos;
|
||
nir_ssa_def *scale;
|
||
pos = state->pos[i];
|
||
if (i == 0)
|
||
scale = nir_load_viewport_x_scale(b);
|
||
else
|
||
scale = nir_load_viewport_y_scale(b);
|
||
pos = nir_fmul(b, pos, scale);
|
||
pos = nir_fmul(b, pos, rcp_wc);
|
||
/* Pre-V3D 4.3 hardware has a quirk where it expects XY
|
||
* coordinates in .8 fixed-point format, but then it
|
||
* will internally round it to .6 fixed-point,
|
||
* introducing a double rounding. The double rounding
|
||
* can cause very slight differences in triangle
|
||
* raterization coverage that can actually be noticed by
|
||
* some CTS tests.
|
||
*
|
||
* The correct fix for this as recommended by Broadcom
|
||
* is to convert to .8 fixed-point with ffloor().
|
||
*/
|
||
pos = nir_f2i32(b, nir_ffloor(b, pos));
|
||
v3d_nir_store_output(b, state->vp_vpm_offset + i,
|
||
offset_reg, pos);
|
||
}
|
||
}
|
||
|
||
if (state->zs_vpm_offset != -1) {
|
||
nir_ssa_def *z = state->pos[2];
|
||
z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
|
||
z = nir_fmul(b, z, rcp_wc);
|
||
z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
|
||
v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
|
||
}
|
||
|
||
if (state->rcp_wc_vpm_offset != -1) {
|
||
v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
|
||
offset_reg, rcp_wc);
|
||
}
|
||
|
||
/* Store 0 to varyings requested by the FS but not stored by the
|
||
* previous stage. This should be undefined behavior, but
|
||
* glsl-routing seems to rely on it.
|
||
*/
|
||
uint32_t num_used_outputs;
|
||
switch (c->s->info.stage) {
|
||
case MESA_SHADER_VERTEX:
|
||
num_used_outputs = c->vs_key->num_used_outputs;
|
||
break;
|
||
case MESA_SHADER_GEOMETRY:
|
||
num_used_outputs = c->gs_key->num_used_outputs;
|
||
break;
|
||
default:
|
||
unreachable("Unsupported shader stage");
|
||
}
|
||
|
||
for (int i = 0; i < num_used_outputs; i++) {
|
||
if (!BITSET_TEST(state->varyings_stored, i)) {
|
||
v3d_nir_store_output(b, state->varyings_vpm_offset + i,
|
||
offset_reg, nir_imm_int(b, 0));
|
||
}
|
||
}
|
||
}
|
||
|
||
static void
|
||
emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
|
||
nir_function_impl *impl,
|
||
struct v3d_nir_lower_io_state *state)
|
||
{
|
||
nir_block *first = nir_start_block(impl);
|
||
b->cursor = nir_before_block(first);
|
||
|
||
const struct glsl_type *uint_type = glsl_uint_type();
|
||
|
||
assert(!state->gs.output_offset_var);
|
||
state->gs.output_offset_var =
|
||
nir_local_variable_create(impl, uint_type, "output_offset");
|
||
nir_store_var(b, state->gs.output_offset_var,
|
||
nir_imm_int(b, state->gs.output_header_size), 0x1);
|
||
|
||
assert(!state->gs.header_offset_var);
|
||
state->gs.header_offset_var =
|
||
nir_local_variable_create(impl, uint_type, "header_offset");
|
||
nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
|
||
|
||
assert(!state->gs.header_var);
|
||
state->gs.header_var =
|
||
nir_local_variable_create(impl, uint_type, "header");
|
||
reset_gs_header(b, state);
|
||
}
|
||
|
||
static void
|
||
emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
|
||
struct v3d_nir_lower_io_state *state)
|
||
{
|
||
const uint8_t VERTEX_COUNT_OFFSET = 16;
|
||
|
||
/* Our GS header has 1 generic header slot (at VPM offset 0) and then
|
||
* one slot per output vertex after it. This means we don't need to
|
||
* have a variable just to keep track of the number of vertices we
|
||
* emitted and instead we can just compute it here from the header
|
||
* offset variable by removing the one generic header slot that always
|
||
* goes at the begining of out header.
|
||
*/
|
||
nir_ssa_def *header_offset =
|
||
nir_load_var(b, state->gs.header_offset_var);
|
||
nir_ssa_def *vertex_count =
|
||
nir_isub(b, header_offset, nir_imm_int(b, 1));
|
||
nir_ssa_def *header =
|
||
nir_ior(b, nir_imm_int(b, state->gs.output_header_size),
|
||
nir_ishl(b, vertex_count,
|
||
nir_imm_int(b, VERTEX_COUNT_OFFSET)));
|
||
|
||
v3d_nir_store_output(b, 0, NULL, header);
|
||
}
|
||
|
||
bool
|
||
v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
|
||
{
|
||
struct v3d_nir_lower_io_state state = { 0 };
|
||
|
||
/* Set up the layout of the VPM outputs. */
|
||
switch (s->info.stage) {
|
||
case MESA_SHADER_VERTEX:
|
||
v3d_nir_setup_vpm_layout_vs(c, &state);
|
||
break;
|
||
case MESA_SHADER_GEOMETRY:
|
||
v3d_nir_setup_vpm_layout_gs(c, &state);
|
||
break;
|
||
case MESA_SHADER_FRAGMENT:
|
||
case MESA_SHADER_COMPUTE:
|
||
break;
|
||
default:
|
||
unreachable("Unsupported shader stage");
|
||
}
|
||
|
||
nir_foreach_function(function, s) {
|
||
if (function->impl) {
|
||
nir_builder b;
|
||
nir_builder_init(&b, function->impl);
|
||
|
||
if (c->s->info.stage == MESA_SHADER_GEOMETRY)
|
||
emit_gs_prolog(c, &b, function->impl, &state);
|
||
|
||
nir_foreach_block(block, function->impl) {
|
||
nir_foreach_instr_safe(instr, block)
|
||
v3d_nir_lower_io_instr(c, &b, instr,
|
||
&state);
|
||
}
|
||
|
||
nir_block *last = nir_impl_last_block(function->impl);
|
||
b.cursor = nir_after_block(last);
|
||
if (s->info.stage == MESA_SHADER_VERTEX) {
|
||
v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
|
||
} else if (s->info.stage == MESA_SHADER_GEOMETRY) {
|
||
emit_gs_vpm_output_header_prolog(c, &b, &state);
|
||
}
|
||
|
||
nir_metadata_preserve(function->impl,
|
||
nir_metadata_block_index |
|
||
nir_metadata_dominance);
|
||
}
|
||
}
|
||
|
||
if (s->info.stage == MESA_SHADER_VERTEX ||
|
||
s->info.stage == MESA_SHADER_GEOMETRY) {
|
||
v3d_nir_lower_io_update_output_var_base(c, &state);
|
||
}
|
||
|
||
/* It is really unlikely that we don't get progress here, and fully
|
||
* filtering when not would make code more complex, but we are still
|
||
* interested on getting this lowering going through NIR_PASS
|
||
*/
|
||
return true;
|
||
}
|