ac: Add new NIR pass to lower NGG VS/TES.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10740>
This commit is contained in:
Timur Kristóf 2021-04-09 16:56:57 +02:00 committed by Marge Bot
parent 89a76ff786
commit 3d589b8b46
3 changed files with 290 additions and 0 deletions

View File

@ -86,6 +86,23 @@ bool
ac_nir_lower_indirect_derefs(nir_shader *shader,
enum chip_class chip_class);
typedef struct
{
bool can_cull;
bool passthrough;
} ac_nir_ngg_config;
ac_nir_ngg_config
ac_nir_lower_ngg_nogs(nir_shader *shader,
unsigned max_num_es_vertices,
unsigned num_vertices_per_primitive,
unsigned max_workgroup_size,
unsigned wave_size,
bool consider_culling,
bool consider_passthrough,
bool export_prim_id,
bool provoking_vtx_last);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,272 @@
/*
* Copyright © 2021 Valve Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#include "ac_nir.h"
#include "nir_builder.h"
#include "u_math.h"
typedef struct
{
nir_variable *position_value_var;
nir_variable *prim_exp_arg_var;
bool passthrough;
bool export_prim_id;
bool early_prim_export;
unsigned max_num_waves;
unsigned num_vertices_per_primitives;
unsigned provoking_vtx_idx;
unsigned max_es_num_vertices;
unsigned total_lds_bytes;
} lower_ngg_nogs_state;
static nir_ssa_def *
pervertex_lds_addr(nir_builder *b, nir_ssa_def *vertex_idx, unsigned per_vtx_bytes)
{
return nir_imul_imm(b, vertex_idx, per_vtx_bytes);
}
static nir_ssa_def *
emit_pack_ngg_prim_exp_arg(nir_builder *b, unsigned num_vertices_per_primitives,
nir_ssa_def *vertex_indices[3], nir_ssa_def *is_null_prim)
{
nir_ssa_def *arg = vertex_indices[0];
for (unsigned i = 0; i < num_vertices_per_primitives; ++i) {
assert(vertex_indices[i]);
if (i)
arg = nir_ior(b, arg, nir_ishl(b, vertex_indices[i], nir_imm_int(b, 10u * i)));
if (b->shader->info.stage == MESA_SHADER_VERTEX) {
nir_ssa_def *edgeflag = nir_build_load_initial_edgeflag_amd(b, 32, nir_imm_int(b, i));
arg = nir_ior(b, arg, nir_ishl(b, edgeflag, nir_imm_int(b, 10u * i + 9u)));
}
}
if (is_null_prim) {
if (is_null_prim->bit_size == 1)
is_null_prim = nir_b2i32(b, is_null_prim);
assert(is_null_prim->bit_size == 32);
arg = nir_ior(b, arg, nir_ishl(b, is_null_prim, nir_imm_int(b, 31u)));
}
return arg;
}
static nir_ssa_def *
ngg_input_primitive_vertex_index(nir_builder *b, unsigned vertex)
{
/* TODO: This is RADV specific. We'll need to refactor RADV and/or RadeonSI to match. */
return nir_ubfe(b, nir_build_load_gs_vertex_offset_amd(b, .base = vertex / 2u * 2u),
nir_imm_int(b, (vertex % 2u) * 16u), nir_imm_int(b, 16u));
}
static nir_ssa_def *
emit_ngg_nogs_prim_exp_arg(nir_builder *b, lower_ngg_nogs_state *st)
{
if (st->passthrough) {
assert(!st->export_prim_id || b->shader->info.stage != MESA_SHADER_VERTEX);
return nir_build_load_packed_passthrough_primitive_amd(b);
} else {
nir_ssa_def *vtx_idx[3] = {0};
vtx_idx[0] = ngg_input_primitive_vertex_index(b, 0);
vtx_idx[1] = st->num_vertices_per_primitives >= 2
? ngg_input_primitive_vertex_index(b, 1)
: nir_imm_zero(b, 1, 32);
vtx_idx[2] = st->num_vertices_per_primitives >= 3
? ngg_input_primitive_vertex_index(b, 2)
: nir_imm_zero(b, 1, 32);
return emit_pack_ngg_prim_exp_arg(b, st->num_vertices_per_primitives, vtx_idx, NULL);
}
}
static void
emit_ngg_nogs_prim_export(nir_builder *b, lower_ngg_nogs_state *st, nir_ssa_def *arg)
{
nir_if *if_gs_thread = nir_push_if(b, nir_build_has_input_primitive_amd(b));
{
if (!arg)
arg = emit_ngg_nogs_prim_exp_arg(b, st);
if (st->export_prim_id && b->shader->info.stage == MESA_SHADER_VERTEX) {
/* Copy Primitive IDs from GS threads to the LDS address corresponding to the ES thread of the provoking vertex. */
nir_ssa_def *prim_id = nir_build_load_primitive_id(b);
nir_ssa_def *provoking_vtx_idx = ngg_input_primitive_vertex_index(b, st->provoking_vtx_idx);
nir_ssa_def *addr = pervertex_lds_addr(b, provoking_vtx_idx, 4u);
nir_build_store_shared(b, prim_id, addr, .write_mask = 1u, .align_mul = 4u);
}
nir_build_export_primitive_amd(b, arg);
}
nir_pop_if(b, if_gs_thread);
}
static void
emit_store_ngg_nogs_es_primitive_id(nir_builder *b)
{
nir_ssa_def *prim_id = NULL;
if (b->shader->info.stage == MESA_SHADER_VERTEX) {
/* Workgroup barrier - wait for GS threads to store primitive ID in LDS. */
nir_scoped_barrier(b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_WORKGROUP,
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_shared);
/* LDS address where the primitive ID is stored */
nir_ssa_def *thread_id_in_threadgroup = nir_build_load_local_invocation_index(b);
nir_ssa_def *addr = pervertex_lds_addr(b, thread_id_in_threadgroup, 4u);
/* Load primitive ID from LDS */
prim_id = nir_build_load_shared(b, 1, 32, addr, .align_mul = 4u);
} else if (b->shader->info.stage == MESA_SHADER_TESS_EVAL) {
/* Just use tess eval primitive ID, which is the same as the patch ID. */
prim_id = nir_build_load_primitive_id(b);
}
nir_io_semantics io_sem = {
.location = VARYING_SLOT_PRIMITIVE_ID,
.num_slots = 1,
};
nir_build_store_output(b, prim_id, nir_imm_zero(b, 1, 32),
.base = io_sem.location,
.write_mask = 1u, .src_type = nir_type_uint32, .io_semantics = io_sem);
}
ac_nir_ngg_config
ac_nir_lower_ngg_nogs(nir_shader *shader,
unsigned max_num_es_vertices,
unsigned num_vertices_per_primitives,
unsigned max_workgroup_size,
unsigned wave_size,
bool consider_culling,
bool consider_passthrough,
bool export_prim_id,
bool provoking_vtx_last)
{
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
assert(impl);
assert(max_num_es_vertices && max_workgroup_size && wave_size);
bool can_cull = false; /* TODO */
bool passthrough = consider_passthrough && !can_cull &&
!(shader->info.stage == MESA_SHADER_VERTEX && export_prim_id);
nir_variable *position_value_var = nir_local_variable_create(impl, glsl_vec4_type(), "position_value");
nir_variable *prim_exp_arg_var = nir_local_variable_create(impl, glsl_uint_type(), "prim_exp_arg");
lower_ngg_nogs_state state = {
.passthrough = passthrough,
.export_prim_id = export_prim_id,
.early_prim_export = exec_list_is_singular(&impl->body),
.num_vertices_per_primitives = num_vertices_per_primitives,
.provoking_vtx_idx = provoking_vtx_last ? (num_vertices_per_primitives - 1) : 0,
.position_value_var = position_value_var,
.prim_exp_arg_var = prim_exp_arg_var,
.max_num_waves = DIV_ROUND_UP(max_workgroup_size, wave_size),
.max_es_num_vertices = max_num_es_vertices,
};
/* We need LDS space when VS needs to export the primitive ID. */
if (shader->info.stage == MESA_SHADER_VERTEX && export_prim_id)
state.total_lds_bytes = max_num_es_vertices * 4u;
nir_cf_list extracted;
nir_cf_extract(&extracted, nir_before_cf_list(&impl->body), nir_after_cf_list(&impl->body));
nir_builder builder;
nir_builder *b = &builder; /* This is to avoid the & */
nir_builder_init(b, impl);
b->cursor = nir_before_cf_list(&impl->body);
if (!can_cull) {
/* Allocate export space on wave 0 - confirm to the HW that we want to use all possible space */
nir_if *if_wave_0 = nir_push_if(b, nir_ieq(b, nir_build_load_subgroup_id(b), nir_imm_int(b, 0)));
{
nir_ssa_def *vtx_cnt = nir_build_load_workgroup_num_input_vertices_amd(b);
nir_ssa_def *prim_cnt = nir_build_load_workgroup_num_input_primitives_amd(b);
nir_build_alloc_vertices_and_primitives_amd(b, vtx_cnt, prim_cnt);
}
nir_pop_if(b, if_wave_0);
/* Take care of early primitive export, otherwise just pack the primitive export argument */
if (state.early_prim_export)
emit_ngg_nogs_prim_export(b, &state, NULL);
else
nir_store_var(b, prim_exp_arg_var, emit_ngg_nogs_prim_exp_arg(b, &state), 0x1u);
} else {
abort(); /* TODO */
}
nir_if *if_es_thread = nir_push_if(b, nir_build_has_input_vertex_amd(b));
{
if (can_cull) {
nir_ssa_def *pos_val = nir_load_var(b, state.position_value_var);
nir_io_semantics io_sem = { .location = VARYING_SLOT_POS, .num_slots = 1 };
nir_build_store_output(b, pos_val, nir_imm_int(b, 0), .base = VARYING_SLOT_POS, .component = 0, .io_semantics = io_sem, .write_mask = 0xfu);
}
/* Run the actual shader */
nir_cf_reinsert(&extracted, b->cursor);
b->cursor = nir_after_cf_list(&if_es_thread->then_list);
/* Export all vertex attributes (except primitive ID) */
nir_build_export_vertex_amd(b);
/* Export primitive ID (in case of early primitive export or TES) */
if (state.export_prim_id && (state.early_prim_export || shader->info.stage != MESA_SHADER_VERTEX))
emit_store_ngg_nogs_es_primitive_id(b);
}
nir_pop_if(b, if_es_thread);
/* Take care of late primitive export */
if (!state.early_prim_export) {
emit_ngg_nogs_prim_export(b, &state, nir_load_var(b, prim_exp_arg_var));
if (state.export_prim_id && shader->info.stage == MESA_SHADER_VERTEX) {
if_es_thread = nir_push_if(b, nir_build_has_input_vertex_amd(b));
emit_store_ngg_nogs_es_primitive_id(b);
nir_pop_if(b, if_es_thread);
}
}
nir_metadata_preserve(impl, nir_metadata_none);
nir_validate_shader(shader, "after emitting NGG VS/TES");
/* Cleanup */
nir_lower_vars_to_ssa(shader);
nir_remove_dead_variables(shader, nir_var_function_temp, NULL);
nir_opt_undef(shader);
shader->info.shared_size = state.total_lds_bytes;
ac_nir_ngg_config ret = {
.can_cull = can_cull,
.passthrough = passthrough,
};
return ret;
}

View File

@ -91,6 +91,7 @@ amd_common_files = files(
'ac_nir.h',
'ac_nir_lower_esgs_io_to_mem.c',
'ac_nir_lower_tess_io_to_mem.c',
'ac_nir_lower_ngg.c',
'amd_family.c',
)