1053 lines
34 KiB
C
1053 lines
34 KiB
C
/*
|
|
* Copyright © 2019 Google, Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
#include "compiler/nir/nir_builder.h"
|
|
#include "ir3_compiler.h"
|
|
#include "ir3_nir.h"
|
|
|
|
struct state {
|
|
uint32_t topology;
|
|
|
|
struct primitive_map {
|
|
/* +POSITION, +PSIZE, ... - see shader_io_get_unique_index */
|
|
unsigned loc[12 + 32];
|
|
unsigned stride;
|
|
} map;
|
|
|
|
nir_ssa_def *header;
|
|
|
|
nir_variable *vertex_count_var;
|
|
nir_variable *emitted_vertex_var;
|
|
nir_variable *vertex_flags_out;
|
|
|
|
struct exec_list old_outputs;
|
|
struct exec_list new_outputs;
|
|
struct exec_list emit_outputs;
|
|
|
|
/* tess ctrl shader on a650 gets the local primitive id at different bits: */
|
|
unsigned local_primitive_id_start;
|
|
};
|
|
|
|
static nir_ssa_def *
|
|
bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
|
|
{
|
|
return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
|
|
nir_imm_int(b, mask));
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
build_invocation_id(nir_builder *b, struct state *state)
|
|
{
|
|
return bitfield_extract(b, state->header, 11, 31);
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
build_vertex_id(nir_builder *b, struct state *state)
|
|
{
|
|
return bitfield_extract(b, state->header, 6, 31);
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
build_local_primitive_id(nir_builder *b, struct state *state)
|
|
{
|
|
return bitfield_extract(b, state->header, state->local_primitive_id_start,
|
|
63);
|
|
}
|
|
|
|
static bool
|
|
is_tess_levels(gl_varying_slot slot)
|
|
{
|
|
return (slot == VARYING_SLOT_PRIMITIVE_ID ||
|
|
slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
|
|
slot == VARYING_SLOT_TESS_LEVEL_INNER);
|
|
}
|
|
|
|
/* Return a deterministic index for varyings. We can't rely on driver_location
|
|
* to be correct without linking the different stages first, so we create
|
|
* "primitive maps" where the producer decides on the location of each varying
|
|
* slot and then exports a per-slot array to the consumer. This compacts the
|
|
* gl_varying_slot space down a bit so that the primitive maps aren't too
|
|
* large.
|
|
*
|
|
* Note: per-patch varyings are currently handled separately, without any
|
|
* compacting.
|
|
*
|
|
* TODO: We could probably use the driver_location's directly in the non-SSO
|
|
* (Vulkan) case.
|
|
*/
|
|
|
|
static unsigned
|
|
shader_io_get_unique_index(gl_varying_slot slot)
|
|
{
|
|
switch (slot) {
|
|
case VARYING_SLOT_POS: return 0;
|
|
case VARYING_SLOT_PSIZ: return 1;
|
|
case VARYING_SLOT_COL0: return 2;
|
|
case VARYING_SLOT_COL1: return 3;
|
|
case VARYING_SLOT_BFC0: return 4;
|
|
case VARYING_SLOT_BFC1: return 5;
|
|
case VARYING_SLOT_FOGC: return 6;
|
|
case VARYING_SLOT_CLIP_DIST0: return 7;
|
|
case VARYING_SLOT_CLIP_DIST1: return 8;
|
|
case VARYING_SLOT_CLIP_VERTEX: return 9;
|
|
case VARYING_SLOT_LAYER: return 10;
|
|
case VARYING_SLOT_VIEWPORT: return 11;
|
|
case VARYING_SLOT_VAR0 ... VARYING_SLOT_VAR31: {
|
|
struct state state = {};
|
|
STATIC_ASSERT(ARRAY_SIZE(state.map.loc) - 1 ==
|
|
(12 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0));
|
|
struct ir3_shader_variant v = {};
|
|
STATIC_ASSERT(ARRAY_SIZE(v.output_loc) - 1 ==
|
|
(12 + VARYING_SLOT_VAR31 - VARYING_SLOT_VAR0));
|
|
return 12 + (slot - VARYING_SLOT_VAR0);
|
|
}
|
|
default:
|
|
unreachable("illegal slot in get unique index\n");
|
|
}
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
build_local_offset(nir_builder *b, struct state *state, nir_ssa_def *vertex,
|
|
uint32_t location, uint32_t comp, nir_ssa_def *offset)
|
|
{
|
|
nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
|
|
nir_ssa_def *primitive_offset =
|
|
nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);
|
|
nir_ssa_def *attr_offset;
|
|
nir_ssa_def *vertex_stride;
|
|
unsigned index = shader_io_get_unique_index(location);
|
|
|
|
switch (b->shader->info.stage) {
|
|
case MESA_SHADER_VERTEX:
|
|
case MESA_SHADER_TESS_EVAL:
|
|
vertex_stride = nir_imm_int(b, state->map.stride * 4);
|
|
attr_offset = nir_imm_int(b, state->map.loc[index] + 4 * comp);
|
|
break;
|
|
case MESA_SHADER_TESS_CTRL:
|
|
case MESA_SHADER_GEOMETRY:
|
|
vertex_stride = nir_load_vs_vertex_stride_ir3(b);
|
|
attr_offset = nir_iadd(b, nir_load_primitive_location_ir3(b, index),
|
|
nir_imm_int(b, comp * 4));
|
|
break;
|
|
default:
|
|
unreachable("bad shader stage");
|
|
}
|
|
|
|
nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
|
|
|
|
return nir_iadd(
|
|
b, nir_iadd(b, primitive_offset, vertex_offset),
|
|
nir_iadd(b, attr_offset, nir_ishl(b, offset, nir_imm_int(b, 4))));
|
|
}
|
|
|
|
static nir_intrinsic_instr *
|
|
replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
|
|
nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1,
|
|
nir_ssa_def *src2)
|
|
{
|
|
nir_intrinsic_instr *new_intr = nir_intrinsic_instr_create(b->shader, op);
|
|
|
|
new_intr->src[0] = nir_src_for_ssa(src0);
|
|
if (src1)
|
|
new_intr->src[1] = nir_src_for_ssa(src1);
|
|
if (src2)
|
|
new_intr->src[2] = nir_src_for_ssa(src2);
|
|
|
|
new_intr->num_components = intr->num_components;
|
|
|
|
if (nir_intrinsic_infos[op].has_dest)
|
|
nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, intr->num_components,
|
|
intr->dest.ssa.bit_size, NULL);
|
|
|
|
nir_builder_instr_insert(b, &new_intr->instr);
|
|
|
|
if (nir_intrinsic_infos[op].has_dest)
|
|
nir_ssa_def_rewrite_uses(&intr->dest.ssa, &new_intr->dest.ssa);
|
|
|
|
nir_instr_remove(&intr->instr);
|
|
|
|
return new_intr;
|
|
}
|
|
|
|
static void
|
|
build_primitive_map(nir_shader *shader, struct primitive_map *map)
|
|
{
|
|
/* All interfaces except the TCS <-> TES interface use ldlw, which takes
|
|
* an offset in bytes, so each vec4 slot is 16 bytes. TCS <-> TES uses
|
|
* ldg, which takes an offset in dwords, but each per-vertex slot has
|
|
* space for every vertex, and there's space at the beginning for
|
|
* per-patch varyings.
|
|
*/
|
|
unsigned slot_size = 16, start = 0;
|
|
if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
|
|
slot_size = shader->info.tess.tcs_vertices_out * 4;
|
|
start = util_last_bit(shader->info.patch_outputs_written) * 4;
|
|
}
|
|
|
|
uint64_t mask = shader->info.outputs_written;
|
|
unsigned loc = start;
|
|
while (mask) {
|
|
int location = u_bit_scan64(&mask);
|
|
if (is_tess_levels(location))
|
|
continue;
|
|
|
|
unsigned index = shader_io_get_unique_index(location);
|
|
map->loc[index] = loc;
|
|
loc += slot_size;
|
|
}
|
|
|
|
map->stride = loc;
|
|
/* Use units of dwords for the stride. */
|
|
if (shader->info.stage != MESA_SHADER_TESS_CTRL)
|
|
map->stride /= 4;
|
|
}
|
|
|
|
/* For shader stages that receive a primitive map, calculate how big it should
|
|
* be.
|
|
*/
|
|
|
|
static unsigned
|
|
calc_primitive_map_size(nir_shader *shader)
|
|
{
|
|
uint64_t mask = shader->info.inputs_read;
|
|
unsigned max_index = 0;
|
|
while (mask) {
|
|
int location = u_bit_scan64(&mask);
|
|
|
|
if (is_tess_levels(location))
|
|
continue;
|
|
|
|
unsigned index = shader_io_get_unique_index(location);
|
|
max_index = MAX2(max_index, index + 1);
|
|
}
|
|
|
|
return max_index;
|
|
}
|
|
|
|
static void
|
|
lower_block_to_explicit_output(nir_block *block, nir_builder *b,
|
|
struct state *state)
|
|
{
|
|
nir_foreach_instr_safe (instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_store_output: {
|
|
// src[] = { value, offset }.
|
|
|
|
/* nir_lower_io_to_temporaries replaces all access to output
|
|
* variables with temp variables and then emits a nir_copy_var at
|
|
* the end of the shader. Thus, we should always get a full wrmask
|
|
* here.
|
|
*/
|
|
assert(
|
|
util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
|
|
|
|
b->cursor = nir_instr_remove(&intr->instr);
|
|
|
|
nir_ssa_def *vertex_id = build_vertex_id(b, state);
|
|
nir_ssa_def *offset = build_local_offset(
|
|
b, state, vertex_id, nir_intrinsic_io_semantics(intr).location,
|
|
nir_intrinsic_component(intr), intr->src[1].ssa);
|
|
|
|
nir_store_shared_ir3(b, intr->src[0].ssa, offset);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
local_thread_id(nir_builder *b)
|
|
{
|
|
return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
|
|
}
|
|
|
|
void
|
|
ir3_nir_lower_to_explicit_output(nir_shader *shader,
|
|
struct ir3_shader_variant *v,
|
|
unsigned topology)
|
|
{
|
|
struct state state = {};
|
|
|
|
build_primitive_map(shader, &state.map);
|
|
memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
assert(impl);
|
|
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
b.cursor = nir_before_cf_list(&impl->body);
|
|
|
|
if (v->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
|
|
state.header = nir_load_tcs_header_ir3(&b);
|
|
else
|
|
state.header = nir_load_gs_header_ir3(&b);
|
|
|
|
nir_foreach_block_safe (block, impl)
|
|
lower_block_to_explicit_output(block, &b, &state);
|
|
|
|
nir_metadata_preserve(impl,
|
|
nir_metadata_block_index | nir_metadata_dominance);
|
|
|
|
v->output_size = state.map.stride;
|
|
}
|
|
|
|
static void
|
|
lower_block_to_explicit_input(nir_block *block, nir_builder *b,
|
|
struct state *state)
|
|
{
|
|
nir_foreach_instr_safe (instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_load_per_vertex_input: {
|
|
// src[] = { vertex, offset }.
|
|
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
|
|
nir_ssa_def *offset = build_local_offset(
|
|
b, state,
|
|
intr->src[0].ssa, // this is typically gl_InvocationID
|
|
nir_intrinsic_io_semantics(intr).location,
|
|
nir_intrinsic_component(intr), intr->src[1].ssa);
|
|
|
|
replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL,
|
|
NULL);
|
|
break;
|
|
}
|
|
|
|
case nir_intrinsic_load_invocation_id: {
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
|
|
nir_ssa_def *iid = build_invocation_id(b, state);
|
|
nir_ssa_def_rewrite_uses(&intr->dest.ssa, iid);
|
|
nir_instr_remove(&intr->instr);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
ir3_nir_lower_to_explicit_input(nir_shader *shader,
|
|
struct ir3_shader_variant *v)
|
|
{
|
|
struct state state = {};
|
|
|
|
/* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
|
|
* HS uses a different primitive id, which starts at bit 16 in the header
|
|
*/
|
|
if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
|
|
v->compiler->tess_use_shared)
|
|
state.local_primitive_id_start = 16;
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
assert(impl);
|
|
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
b.cursor = nir_before_cf_list(&impl->body);
|
|
|
|
if (shader->info.stage == MESA_SHADER_GEOMETRY)
|
|
state.header = nir_load_gs_header_ir3(&b);
|
|
else
|
|
state.header = nir_load_tcs_header_ir3(&b);
|
|
|
|
nir_foreach_block_safe (block, impl)
|
|
lower_block_to_explicit_input(block, &b, &state);
|
|
|
|
v->input_size = calc_primitive_map_size(shader);
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
build_tcs_out_vertices(nir_builder *b)
|
|
{
|
|
if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
|
|
return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
|
|
else
|
|
return nir_load_patch_vertices_in(b);
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
build_per_vertex_offset(nir_builder *b, struct state *state,
|
|
nir_ssa_def *vertex, uint32_t location, uint32_t comp,
|
|
nir_ssa_def *offset)
|
|
{
|
|
nir_ssa_def *patch_id = nir_load_rel_patch_id_ir3(b);
|
|
nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
|
|
nir_ssa_def *patch_offset = nir_imul24(b, patch_id, patch_stride);
|
|
nir_ssa_def *attr_offset;
|
|
|
|
if (nir_src_is_const(nir_src_for_ssa(offset))) {
|
|
location += nir_src_as_uint(nir_src_for_ssa(offset));
|
|
offset = nir_imm_int(b, 0);
|
|
} else {
|
|
/* Offset is in vec4's, but we need it in unit of components for the
|
|
* load/store_global_ir3 offset.
|
|
*/
|
|
offset = nir_ishl(b, offset, nir_imm_int(b, 2));
|
|
}
|
|
|
|
nir_ssa_def *vertex_offset;
|
|
if (vertex) {
|
|
unsigned index = shader_io_get_unique_index(location);
|
|
switch (b->shader->info.stage) {
|
|
case MESA_SHADER_TESS_CTRL:
|
|
attr_offset = nir_imm_int(b, state->map.loc[index] + comp);
|
|
break;
|
|
case MESA_SHADER_TESS_EVAL:
|
|
attr_offset = nir_iadd(b, nir_load_primitive_location_ir3(b, index),
|
|
nir_imm_int(b, comp));
|
|
break;
|
|
default:
|
|
unreachable("bad shader state");
|
|
}
|
|
|
|
attr_offset = nir_iadd(b, attr_offset,
|
|
nir_imul24(b, offset, build_tcs_out_vertices(b)));
|
|
vertex_offset = nir_ishl(b, vertex, nir_imm_int(b, 2));
|
|
} else {
|
|
assert(location >= VARYING_SLOT_PATCH0 &&
|
|
location <= VARYING_SLOT_TESS_MAX);
|
|
unsigned index = location - VARYING_SLOT_PATCH0;
|
|
attr_offset = nir_iadd(b, nir_imm_int(b, index * 4 + comp), offset);
|
|
vertex_offset = nir_imm_int(b, 0);
|
|
}
|
|
|
|
return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset), vertex_offset);
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
build_patch_offset(nir_builder *b, struct state *state, uint32_t base,
|
|
uint32_t comp, nir_ssa_def *offset)
|
|
{
|
|
return build_per_vertex_offset(b, state, NULL, base, comp, offset);
|
|
}
|
|
|
|
static void
|
|
tess_level_components(struct state *state, uint32_t *inner, uint32_t *outer)
|
|
{
|
|
switch (state->topology) {
|
|
case IR3_TESS_TRIANGLES:
|
|
*inner = 1;
|
|
*outer = 3;
|
|
break;
|
|
case IR3_TESS_QUADS:
|
|
*inner = 2;
|
|
*outer = 4;
|
|
break;
|
|
case IR3_TESS_ISOLINES:
|
|
*inner = 0;
|
|
*outer = 2;
|
|
break;
|
|
default:
|
|
unreachable("bad");
|
|
}
|
|
}
|
|
|
|
static nir_ssa_def *
|
|
build_tessfactor_base(nir_builder *b, gl_varying_slot slot, uint32_t comp,
|
|
struct state *state)
|
|
{
|
|
uint32_t inner_levels, outer_levels;
|
|
tess_level_components(state, &inner_levels, &outer_levels);
|
|
|
|
const uint32_t patch_stride = 1 + inner_levels + outer_levels;
|
|
|
|
nir_ssa_def *patch_id = nir_load_rel_patch_id_ir3(b);
|
|
|
|
nir_ssa_def *patch_offset =
|
|
nir_imul24(b, patch_id, nir_imm_int(b, patch_stride));
|
|
|
|
uint32_t offset;
|
|
switch (slot) {
|
|
case VARYING_SLOT_PRIMITIVE_ID:
|
|
offset = 0;
|
|
break;
|
|
case VARYING_SLOT_TESS_LEVEL_OUTER:
|
|
offset = 1;
|
|
break;
|
|
case VARYING_SLOT_TESS_LEVEL_INNER:
|
|
offset = 1 + outer_levels;
|
|
break;
|
|
default:
|
|
unreachable("bad");
|
|
}
|
|
|
|
return nir_iadd(b, patch_offset, nir_imm_int(b, offset + comp));
|
|
}
|
|
|
|
static void
|
|
lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
|
|
{
|
|
nir_foreach_instr_safe (instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_load_per_vertex_output: {
|
|
// src[] = { vertex, offset }.
|
|
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
|
|
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
|
|
nir_ssa_def *offset = build_per_vertex_offset(
|
|
b, state, intr->src[0].ssa,
|
|
nir_intrinsic_io_semantics(intr).location,
|
|
nir_intrinsic_component(intr), intr->src[1].ssa);
|
|
|
|
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
|
|
offset, NULL);
|
|
break;
|
|
}
|
|
|
|
case nir_intrinsic_store_per_vertex_output: {
|
|
// src[] = { value, vertex, offset }.
|
|
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
|
|
/* sparse writemask not supported */
|
|
assert(
|
|
util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
|
|
|
|
nir_ssa_def *value = intr->src[0].ssa;
|
|
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
|
|
nir_ssa_def *offset = build_per_vertex_offset(
|
|
b, state, intr->src[1].ssa,
|
|
nir_intrinsic_io_semantics(intr).location,
|
|
nir_intrinsic_component(intr), intr->src[2].ssa);
|
|
|
|
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value,
|
|
address, offset);
|
|
|
|
break;
|
|
}
|
|
|
|
case nir_intrinsic_load_output: {
|
|
// src[] = { offset }.
|
|
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
|
|
nir_ssa_def *address, *offset;
|
|
|
|
/* note if vectorization of the tess level loads ever happens:
|
|
* "ldg" across 16-byte boundaries can behave incorrectly if results
|
|
* are never used. most likely some issue with (sy) not properly
|
|
* syncing with values coming from a second memory transaction.
|
|
*/
|
|
gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
|
|
if (is_tess_levels(location)) {
|
|
assert(intr->dest.ssa.num_components == 1);
|
|
address = nir_load_tess_factor_base_ir3(b);
|
|
offset = build_tessfactor_base(
|
|
b, location, nir_intrinsic_component(intr), state);
|
|
} else {
|
|
address = nir_load_tess_param_base_ir3(b);
|
|
offset = build_patch_offset(b, state, location,
|
|
nir_intrinsic_component(intr),
|
|
intr->src[0].ssa);
|
|
}
|
|
|
|
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
|
|
offset, NULL);
|
|
break;
|
|
}
|
|
|
|
case nir_intrinsic_store_output: {
|
|
// src[] = { value, offset }.
|
|
|
|
/* write patch output to bo */
|
|
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
|
|
/* sparse writemask not supported */
|
|
assert(
|
|
util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
|
|
|
|
gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
|
|
if (is_tess_levels(location)) {
|
|
uint32_t inner_levels, outer_levels, levels;
|
|
tess_level_components(state, &inner_levels, &outer_levels);
|
|
|
|
assert(intr->src[0].ssa->num_components == 1);
|
|
|
|
nir_if *nif = NULL;
|
|
if (location != VARYING_SLOT_PRIMITIVE_ID) {
|
|
/* with tess levels are defined as float[4] and float[2],
|
|
* but tess factor BO has smaller sizes for tris/isolines,
|
|
* so we have to discard any writes beyond the number of
|
|
* components for inner/outer levels
|
|
*/
|
|
if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
|
|
levels = outer_levels;
|
|
else
|
|
levels = inner_levels;
|
|
|
|
nir_ssa_def *offset = nir_iadd_imm(
|
|
b, intr->src[1].ssa, nir_intrinsic_component(intr));
|
|
nif = nir_push_if(b, nir_ult(b, offset, nir_imm_int(b, levels)));
|
|
}
|
|
|
|
nir_ssa_def *offset = build_tessfactor_base(
|
|
b, location, nir_intrinsic_component(intr), state);
|
|
|
|
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
|
|
intr->src[0].ssa,
|
|
nir_load_tess_factor_base_ir3(b),
|
|
nir_iadd(b, intr->src[1].ssa, offset));
|
|
|
|
if (location != VARYING_SLOT_PRIMITIVE_ID) {
|
|
nir_pop_if(b, nif);
|
|
}
|
|
} else {
|
|
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
|
|
nir_ssa_def *offset = build_patch_offset(
|
|
b, state, location, nir_intrinsic_component(intr),
|
|
intr->src[1].ssa);
|
|
|
|
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
|
|
intr->src[0].ssa, address, offset);
|
|
}
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void
|
|
emit_tess_epilouge(nir_builder *b, struct state *state)
|
|
{
|
|
/* Insert endpatch instruction:
|
|
*
|
|
* TODO we should re-work this to use normal flow control.
|
|
*/
|
|
|
|
nir_end_patch_ir3(b);
|
|
}
|
|
|
|
void
|
|
ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
|
|
unsigned topology)
|
|
{
|
|
struct state state = {.topology = topology};
|
|
|
|
if (shader_debug_enabled(shader->info.stage)) {
|
|
mesa_logi("NIR (before tess lowering) for %s shader:",
|
|
_mesa_shader_stage_to_string(shader->info.stage));
|
|
nir_log_shaderi(shader);
|
|
}
|
|
|
|
build_primitive_map(shader, &state.map);
|
|
memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
|
|
v->output_size = state.map.stride;
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
assert(impl);
|
|
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
b.cursor = nir_before_cf_list(&impl->body);
|
|
|
|
state.header = nir_load_tcs_header_ir3(&b);
|
|
|
|
/* If required, store gl_PrimitiveID. */
|
|
if (v->key.tcs_store_primid) {
|
|
b.cursor = nir_after_cf_list(&impl->body);
|
|
|
|
nir_store_output(&b, nir_load_primitive_id(&b), nir_imm_int(&b, 0),
|
|
.io_semantics = {
|
|
.location = VARYING_SLOT_PRIMITIVE_ID,
|
|
.num_slots = 1
|
|
});
|
|
|
|
b.cursor = nir_before_cf_list(&impl->body);
|
|
}
|
|
|
|
nir_foreach_block_safe (block, impl)
|
|
lower_tess_ctrl_block(block, &b, &state);
|
|
|
|
/* Now move the body of the TCS into a conditional:
|
|
*
|
|
* if (gl_InvocationID < num_vertices)
|
|
* // body
|
|
*
|
|
*/
|
|
|
|
nir_cf_list body;
|
|
nir_cf_extract(&body, nir_before_cf_list(&impl->body),
|
|
nir_after_cf_list(&impl->body));
|
|
|
|
b.cursor = nir_after_cf_list(&impl->body);
|
|
|
|
/* Re-emit the header, since the old one got moved into the if branch */
|
|
state.header = nir_load_tcs_header_ir3(&b);
|
|
nir_ssa_def *iid = build_invocation_id(&b, &state);
|
|
|
|
const uint32_t nvertices = shader->info.tess.tcs_vertices_out;
|
|
nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));
|
|
|
|
nir_if *nif = nir_push_if(&b, cond);
|
|
|
|
nir_cf_reinsert(&body, b.cursor);
|
|
|
|
b.cursor = nir_after_cf_list(&nif->then_list);
|
|
|
|
/* Insert conditional exit for threads invocation id != 0 */
|
|
nir_ssa_def *iid0_cond = nir_ieq_imm(&b, iid, 0);
|
|
nir_cond_end_ir3(&b, iid0_cond);
|
|
|
|
emit_tess_epilouge(&b, &state);
|
|
|
|
nir_pop_if(&b, nif);
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_none);
|
|
}
|
|
|
|
static void
|
|
lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
|
|
{
|
|
nir_foreach_instr_safe (instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_load_tess_coord: {
|
|
b->cursor = nir_after_instr(&intr->instr);
|
|
nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);
|
|
nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);
|
|
nir_ssa_def *z;
|
|
|
|
if (state->topology == IR3_TESS_TRIANGLES)
|
|
z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);
|
|
else
|
|
z = nir_imm_float(b, 0.0f);
|
|
|
|
nir_ssa_def *coord = nir_vec3(b, x, y, z);
|
|
|
|
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, coord,
|
|
b->cursor.instr);
|
|
break;
|
|
}
|
|
|
|
case nir_intrinsic_load_per_vertex_input: {
|
|
// src[] = { vertex, offset }.
|
|
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
|
|
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
|
|
nir_ssa_def *offset = build_per_vertex_offset(
|
|
b, state, intr->src[0].ssa,
|
|
nir_intrinsic_io_semantics(intr).location,
|
|
nir_intrinsic_component(intr), intr->src[1].ssa);
|
|
|
|
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
|
|
offset, NULL);
|
|
break;
|
|
}
|
|
|
|
case nir_intrinsic_load_input: {
|
|
// src[] = { offset }.
|
|
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
|
|
nir_ssa_def *address, *offset;
|
|
|
|
/* note if vectorization of the tess level loads ever happens:
|
|
* "ldg" across 16-byte boundaries can behave incorrectly if results
|
|
* are never used. most likely some issue with (sy) not properly
|
|
* syncing with values coming from a second memory transaction.
|
|
*/
|
|
gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
|
|
if (is_tess_levels(location)) {
|
|
assert(intr->dest.ssa.num_components == 1);
|
|
address = nir_load_tess_factor_base_ir3(b);
|
|
offset = build_tessfactor_base(
|
|
b, location, nir_intrinsic_component(intr), state);
|
|
} else {
|
|
address = nir_load_tess_param_base_ir3(b);
|
|
offset = build_patch_offset(b, state, location,
|
|
nir_intrinsic_component(intr),
|
|
intr->src[0].ssa);
|
|
}
|
|
|
|
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
|
|
offset, NULL);
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v,
|
|
unsigned topology)
|
|
{
|
|
struct state state = {.topology = topology};
|
|
|
|
if (shader_debug_enabled(shader->info.stage)) {
|
|
mesa_logi("NIR (before tess lowering) for %s shader:",
|
|
_mesa_shader_stage_to_string(shader->info.stage));
|
|
nir_log_shaderi(shader);
|
|
}
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
assert(impl);
|
|
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
|
|
nir_foreach_block_safe (block, impl)
|
|
lower_tess_eval_block(block, &b, &state);
|
|
|
|
v->input_size = calc_primitive_map_size(shader);
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_none);
|
|
}
|
|
|
|
static void
|
|
copy_vars(nir_builder *b, struct exec_list *dests, struct exec_list *srcs)
|
|
{
|
|
foreach_two_lists (dest_node, dests, src_node, srcs) {
|
|
nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
|
|
nir_variable *src = exec_node_data(nir_variable, src_node, node);
|
|
nir_copy_var(b, dest, src);
|
|
}
|
|
}
|
|
|
|
static void
|
|
lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
|
|
{
|
|
nir_foreach_instr_safe (instr, block) {
|
|
if (instr->type != nir_instr_type_intrinsic)
|
|
continue;
|
|
|
|
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
|
|
|
switch (intr->intrinsic) {
|
|
case nir_intrinsic_end_primitive: {
|
|
/* Note: This ignores the stream, which seems to match the blob
|
|
* behavior. I'm guessing the HW ignores any extraneous cut
|
|
* signals from an EndPrimitive() that doesn't correspond to the
|
|
* rasterized stream.
|
|
*/
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 4), 0x1);
|
|
nir_instr_remove(&intr->instr);
|
|
break;
|
|
}
|
|
|
|
case nir_intrinsic_emit_vertex: {
|
|
/* Load the vertex count */
|
|
b->cursor = nir_before_instr(&intr->instr);
|
|
nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
|
|
|
|
nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
|
|
|
|
unsigned stream = nir_intrinsic_stream_id(intr);
|
|
/* vertex_flags_out |= stream */
|
|
nir_store_var(b, state->vertex_flags_out,
|
|
nir_ior(b, nir_load_var(b, state->vertex_flags_out),
|
|
nir_imm_int(b, stream)),
|
|
0x1 /* .x */);
|
|
|
|
copy_vars(b, &state->emit_outputs, &state->old_outputs);
|
|
|
|
nir_instr_remove(&intr->instr);
|
|
|
|
nir_store_var(b, state->emitted_vertex_var,
|
|
nir_iadd(b, nir_load_var(b, state->emitted_vertex_var),
|
|
nir_imm_int(b, 1)),
|
|
0x1);
|
|
|
|
nir_pop_if(b, NULL);
|
|
|
|
/* Increment the vertex count by 1 */
|
|
nir_store_var(b, state->vertex_count_var,
|
|
nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
|
|
nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 0), 0x1);
|
|
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
ir3_nir_lower_gs(nir_shader *shader)
|
|
{
|
|
struct state state = {};
|
|
|
|
/* Don't lower multiple times: */
|
|
nir_foreach_shader_out_variable (var, shader)
|
|
if (var->data.location == VARYING_SLOT_GS_VERTEX_FLAGS_IR3)
|
|
return;
|
|
|
|
if (shader_debug_enabled(shader->info.stage)) {
|
|
mesa_logi("NIR (before gs lowering):");
|
|
nir_log_shaderi(shader);
|
|
}
|
|
|
|
/* Create an output var for vertex_flags. This will be shadowed below,
|
|
* same way regular outputs get shadowed, and this variable will become a
|
|
* temporary.
|
|
*/
|
|
state.vertex_flags_out = nir_variable_create(
|
|
shader, nir_var_shader_out, glsl_uint_type(), "vertex_flags");
|
|
state.vertex_flags_out->data.driver_location = shader->num_outputs++;
|
|
state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
|
|
state.vertex_flags_out->data.interpolation = INTERP_MODE_NONE;
|
|
|
|
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
|
|
assert(impl);
|
|
|
|
nir_builder b;
|
|
nir_builder_init(&b, impl);
|
|
b.cursor = nir_before_cf_list(&impl->body);
|
|
|
|
state.header = nir_load_gs_header_ir3(&b);
|
|
|
|
/* Generate two set of shadow vars for the output variables. The first
|
|
* set replaces the real outputs and the second set (emit_outputs) we'll
|
|
* assign in the emit_vertex conditionals. Then at the end of the shader
|
|
* we copy the emit_outputs to the real outputs, so that we get
|
|
* store_output in uniform control flow.
|
|
*/
|
|
exec_list_make_empty(&state.old_outputs);
|
|
nir_foreach_shader_out_variable_safe (var, shader) {
|
|
exec_node_remove(&var->node);
|
|
exec_list_push_tail(&state.old_outputs, &var->node);
|
|
}
|
|
exec_list_make_empty(&state.new_outputs);
|
|
exec_list_make_empty(&state.emit_outputs);
|
|
nir_foreach_variable_in_list (var, &state.old_outputs) {
|
|
/* Create a new output var by cloning the original output var and
|
|
* stealing the name.
|
|
*/
|
|
nir_variable *output = nir_variable_clone(var, shader);
|
|
exec_list_push_tail(&state.new_outputs, &output->node);
|
|
|
|
/* Rewrite the original output to be a shadow variable. */
|
|
var->name = ralloc_asprintf(var, "%s@gs-temp", output->name);
|
|
var->data.mode = nir_var_shader_temp;
|
|
|
|
/* Clone the shadow variable to create the emit shadow variable that
|
|
* we'll assign in the emit conditionals.
|
|
*/
|
|
nir_variable *emit_output = nir_variable_clone(var, shader);
|
|
emit_output->name = ralloc_asprintf(var, "%s@emit-temp", output->name);
|
|
exec_list_push_tail(&state.emit_outputs, &emit_output->node);
|
|
}
|
|
|
|
/* During the shader we'll keep track of which vertex we're currently
|
|
* emitting for the EmitVertex test and how many vertices we emitted so we
|
|
* know to discard if didn't emit any. In most simple shaders, this can
|
|
* all be statically determined and gets optimized away.
|
|
*/
|
|
state.vertex_count_var =
|
|
nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
|
|
state.emitted_vertex_var =
|
|
nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
|
|
|
|
/* Initialize to 0. */
|
|
b.cursor = nir_before_cf_list(&impl->body);
|
|
nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
|
|
nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
|
|
nir_store_var(&b, state.vertex_flags_out, nir_imm_int(&b, 4), 0x1);
|
|
|
|
nir_foreach_block_safe (block, impl)
|
|
lower_gs_block(block, &b, &state);
|
|
|
|
/* Note: returns are lowered, so there should be only one block before the
|
|
* end block. If we had real returns, we would probably want to redirect
|
|
* them to this new if statement, rather than emitting this code at every
|
|
* return statement.
|
|
*/
|
|
assert(impl->end_block->predecessors->entries == 1);
|
|
nir_block *block = nir_impl_last_block(impl);
|
|
b.cursor = nir_after_block_before_jump(block);
|
|
|
|
/* If we haven't emitted any vertex we need to copy the shadow (old)
|
|
* outputs to emit outputs here.
|
|
*
|
|
* Also some piglit GS tests[1] don't have EndPrimitive() so throw
|
|
* in an extra vertex_flags write for good measure. If unneeded it
|
|
* will be optimized out.
|
|
*
|
|
* [1] ex, tests/spec/glsl-1.50/execution/compatibility/clipping/gs-clip-vertex-const-accept.shader_test
|
|
*/
|
|
nir_ssa_def *cond =
|
|
nir_ieq_imm(&b, nir_load_var(&b, state.emitted_vertex_var), 0);
|
|
nir_push_if(&b, cond);
|
|
nir_store_var(&b, state.vertex_flags_out, nir_imm_int(&b, 4), 0x1);
|
|
copy_vars(&b, &state.emit_outputs, &state.old_outputs);
|
|
nir_pop_if(&b, NULL);
|
|
|
|
nir_discard_if(&b, cond);
|
|
|
|
copy_vars(&b, &state.new_outputs, &state.emit_outputs);
|
|
|
|
exec_list_append(&shader->variables, &state.old_outputs);
|
|
exec_list_append(&shader->variables, &state.emit_outputs);
|
|
exec_list_append(&shader->variables, &state.new_outputs);
|
|
|
|
nir_metadata_preserve(impl, nir_metadata_none);
|
|
|
|
nir_lower_global_vars_to_local(shader);
|
|
nir_split_var_copies(shader);
|
|
nir_lower_var_copies(shader);
|
|
|
|
nir_fixup_deref_modes(shader);
|
|
|
|
if (shader_debug_enabled(shader->info.stage)) {
|
|
mesa_logi("NIR (after gs lowering):");
|
|
nir_log_shaderi(shader);
|
|
}
|
|
}
|