radeonsi: replace llvm tcs output with nir lower pass

Remove the store_tcs_outputs abi, we can use common output abi
to handle the tessfactor pass as vgpr.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16705>
This commit is contained in:
Qiang Yu 2022-05-28 17:52:35 +08:00 committed by Marge Bot
parent d00845faf4
commit 7598bfd768
6 changed files with 51 additions and 161 deletions

View File

@ -2367,12 +2367,9 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *
unsigned component = nir_intrinsic_component(instr);
LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
nir_src offset = *nir_get_io_offset_src(instr);
LLVMValueRef indir_index = NULL;
if (nir_src_is_const(offset))
assert(nir_src_as_uint(offset) == 0);
else
indir_index = get_src(ctx, offset);
/* No indirect indexing is allowed here. */
assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
case 16:
@ -2388,19 +2385,6 @@ static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *
writemask <<= component;
if (ctx->stage == MESA_SHADER_TESS_CTRL) {
nir_src *vertex_index_src = nir_get_io_arrayed_index_src(instr);
LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
unsigned location = nir_intrinsic_io_semantics(instr).location;
ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index, src,
writemask, component, location, base);
return;
}
/* No indirect indexing is allowed after this point. */
assert(!indir_index);
for (unsigned chan = 0; chan < 8; chan++) {
if (!(writemask & (1 << chan)))
continue;

View File

@ -69,12 +69,6 @@ struct ac_shader_abi {
unsigned driver_location, unsigned component,
unsigned num_components, bool load_inputs);
void (*store_tcs_outputs)(struct ac_shader_abi *abi,
LLVMValueRef vertex_index, LLVMValueRef param_index,
LLVMValueRef src, unsigned writemask,
unsigned component, unsigned location, unsigned driver_location);
LLVMValueRef (*load_ubo)(struct ac_shader_abi *abi, LLVMValueRef index);
/**

View File

@ -1491,13 +1491,20 @@ static bool si_nir_kill_outputs(nir_shader *nir, const union si_shader_key *key)
static unsigned si_map_io_driver_location(unsigned semantic)
{
if ((semantic >= VARYING_SLOT_PATCH0 && semantic < VARYING_SLOT_TESS_MAX) ||
semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
semantic == VARYING_SLOT_TESS_LEVEL_OUTER)
return si_shader_io_get_unique_index_patch(semantic);
return si_shader_io_get_unique_index(semantic, false);
}
static bool si_lower_io_to_mem(const union si_shader_key *key,
nir_shader *nir,
static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir,
uint64_t tcs_vgpr_only_inputs)
{
struct si_shader_selector *sel = shader->selector;
const union si_shader_key *key = &shader->key;
if (nir->info.stage == MESA_SHADER_VERTEX) {
if (key->ge.as_ls) {
NIR_PASS_V(nir, ac_nir_lower_ls_outputs_to_mem, si_map_io_driver_location,
@ -1507,6 +1514,17 @@ static bool si_lower_io_to_mem(const union si_shader_key *key,
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS_V(nir, ac_nir_lower_hs_inputs_to_mem, si_map_io_driver_location,
key->ge.opt.same_patch_vertices);
NIR_PASS_V(nir, ac_nir_lower_hs_outputs_to_mem, si_map_io_driver_location,
sel->screen->info.gfx_level,
false, /* does not matter as we disabled final tess factor write */
~0ULL, ~0ULL, /* no TES inputs filter */
util_last_bit64(sel->info.outputs_written),
util_last_bit64(sel->info.patch_outputs_written),
shader->wave_size,
/* ALL TCS inputs are passed by register. */
key->ge.opt.same_patch_vertices &&
!(sel->info.base.inputs_read & ~sel->info.tcs_vgpr_only_inputs),
sel->info.tessfactors_are_def_in_all_invocs, false);
return true;
}
@ -1633,7 +1651,7 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, bool *free_nir,
*/
progress2 |= ac_nir_lower_indirect_derefs(nir, sel->screen->info.gfx_level);
bool opt_offsets = si_lower_io_to_mem(key, nir, tcs_vgpr_only_inputs);
bool opt_offsets = si_lower_io_to_mem(shader, nir, tcs_vgpr_only_inputs);
if (progress2 || opt_offsets)
si_nir_opts(sel->screen, nir, false);

View File

@ -144,7 +144,6 @@ struct si_shader_context {
LLVMValueRef gsvs_ring[4];
LLVMValueRef tess_offchip_ring;
LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
LLVMValueRef gs_next_vertex[4];
LLVMValueRef gs_curprim_verts[4];
LLVMValueRef gs_generated_prims[4];

View File

@ -834,11 +834,6 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
case MESA_SHADER_TESS_CTRL:
si_llvm_init_tcs_callbacks(ctx);
si_llvm_preload_tess_rings(ctx);
if (sel->info.tessfactors_are_def_in_all_invocs) {
for (unsigned i = 0; i < 6; i++)
ctx->invoc0_tess_factors[i] = ac_build_alloca_undef(&ctx->ac, ctx->ac.i32, "");
}
break;
case MESA_SHADER_TESS_EVAL:

View File

@ -383,42 +383,18 @@ static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMType
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader_info *info = &ctx->shader->selector->info;
assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index);
ubyte semantic = info->input[driver_location].semantic;
/* Load the TCS input from a VGPR. */
unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
si_shader_io_get_unique_index(semantic, false) * 4;
LLVMValueRef value[4];
if (load_input) {
assert(ctx->shader->key.ge.opt.same_patch_vertices && !param_index);
ubyte semantic = info->input[driver_location].semantic;
/* Load the TCS input from a VGPR. */
unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
si_shader_io_get_unique_index(semantic, false) * 4;
for (unsigned i = component; i < component + num_components; i++) {
value[i] = LLVMGetParam(ctx->main_fn, func_param + i);
value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
}
} else {
ubyte semantic = info->output_semantic[driver_location];
bool is_patch = vertex_index == NULL;
assert((semantic >= VARYING_SLOT_PATCH0 ||
semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch);
LLVMValueRef dw_addr, stride;
if (is_patch) {
stride = NULL;
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
} else {
stride = get_tcs_out_vertex_dw_stride(ctx);
dw_addr = get_tcs_out_current_patch_offset(ctx);
}
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index,
param_index, semantic);
for (unsigned i = component; i < component + num_components; i++)
value[i] = lshs_lds_load(ctx, type, i, dw_addr);
for (unsigned i = component; i < component + num_components; i++) {
value[i] = LLVMGetParam(ctx->main_fn, func_param + i);
value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
}
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
@ -455,96 +431,6 @@ static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
}
static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
LLVMValueRef vertex_index, LLVMValueRef param_index,
LLVMValueRef src, unsigned writemask,
unsigned component, unsigned location, unsigned driver_location)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader_info *info = &ctx->shader->selector->info;
LLVMValueRef dw_addr, stride;
LLVMValueRef buffer, base, addr;
LLVMValueRef values[8];
bool is_tess_factor = false, is_tess_inner = false;
ubyte semantic = info->output_semantic[driver_location];
const bool is_const = !param_index;
const bool is_patch = vertex_index == NULL;
/* Invalid SPIR-V can cause this. */
if ((semantic >= VARYING_SLOT_PATCH0 || semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) != is_patch)
return;
if (!is_patch) {
stride = get_tcs_out_vertex_dw_stride(ctx);
dw_addr = get_tcs_out_current_patch_offset(ctx);
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
semantic);
} else {
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr, vertex_index, param_index,
semantic);
if (is_const) {
int semantic = info->output_semantic[driver_location];
/* Always write tess factors into LDS for the TCS epilog. */
if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) {
is_tess_factor = true;
is_tess_inner = semantic == VARYING_SLOT_TESS_LEVEL_INNER;
}
}
}
buffer = ctx->tess_offchip_ring;
base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
addr =
get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic);
for (unsigned chan = component; chan < 4; chan++) {
if (!(writemask & (1 << chan)))
continue;
LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
/* Skip LDS stores if there is no LDS read of this output. */
if (info->output_readmask[driver_location] & (1 << chan) ||
/* The epilog reads LDS if invocation 0 doesn't define tess factors. */
(is_tess_factor &&
!ctx->shader->selector->info.tessfactors_are_def_in_all_invocs))
lshs_lds_store(ctx, chan, dw_addr, value);
value = ac_to_integer(&ctx->ac, value);
values[chan] = value;
if (writemask != 0xF && !is_tess_factor) {
LLVMValueRef voffset = LLVMBuildAdd(ctx->ac.builder, addr,
LLVMConstInt(ctx->ac.i32, 4 * chan, 0), "");
ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, voffset, base, ac_glc);
}
/* Write tess factors into VGPRs for the epilog. */
if (is_tess_factor && ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
if (!is_tess_inner) {
LLVMBuildStore(ctx->ac.builder, value, /* outer */
ctx->invoc0_tess_factors[chan]);
} else if (chan < 2) {
LLVMBuildStore(ctx->ac.builder, value, /* inner */
ctx->invoc0_tess_factors[4 + chan]);
}
}
}
if (writemask == 0xF && !is_tess_factor) {
LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4);
ac_build_buffer_store_dword(&ctx->ac, buffer, value, NULL, addr, base, ac_glc);
}
}
static void si_write_tess_factors(struct si_shader_context *ctx, union si_shader_part_key *key,
LLVMValueRef rel_patch_id, LLVMValueRef invocation_id,
LLVMValueRef tcs_out_current_patch_data_offset,
@ -769,10 +655,25 @@ void si_llvm_tcs_build_end(struct si_shader_context *ctx)
ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
if (ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
struct si_shader_info *info = &ctx->shader->selector->info;
if (info->tessfactors_are_def_in_all_invocs) {
vgpr++; /* skip the tess factor LDS offset */
/* get tess factor driver location */
int outer_loc = -1;
int inner_loc = -1;
for (int i = 0; i < info->num_outputs; i++) {
unsigned semantic = info->output_semantic[i];
if (semantic == VARYING_SLOT_TESS_LEVEL_OUTER)
outer_loc = i;
else if (semantic == VARYING_SLOT_TESS_LEVEL_INNER)
inner_loc = i;
}
for (unsigned i = 0; i < 6; i++) {
LLVMValueRef value = LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], "");
int loc = i < 4 ? outer_loc : inner_loc;
LLVMValueRef value = loc < 0 ? LLVMGetUndef(ctx->ac.f32) :
LLVMBuildLoad(builder, ctx->abi.outputs[loc * 4 + i % 4], "");
value = ac_to_float(&ctx->ac, value);
ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");
}
@ -920,7 +821,6 @@ void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_par
void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx)
{
ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
}
void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader)