radeonsi: assign param export indices before compilation

This moves the logic out of LLVM-specific codepaths.

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14414>
This commit is contained in:
Marek Olšák 2021-12-12 20:50:58 -05:00 committed by Marge Bot
parent 11c28d9798
commit 3777a5d715
5 changed files with 123 additions and 89 deletions

View File

@ -1580,6 +1580,52 @@ void si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir)
shader->info.uses_vmem_sampler_or_bvh |= info.uses_vmem_sampler_or_bvh;
}
static void si_nir_assign_param_offsets(nir_shader *nir, const struct si_shader_info *info,
int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS],
uint8_t *num_param_exports, uint64_t *output_param_mask,
uint8_t vs_output_param_offset[NUM_TOTAL_VARYING_SLOTS])
{
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
assert(impl);
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic != nir_intrinsic_store_output)
continue;
/* No indirect indexing allowed. */
ASSERTED nir_src offset = *nir_get_io_offset_src(intr);
assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
assert(intr->num_components == 1); /* only scalar stores expected */
nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
/* Assign the param index if it's unassigned. */
if (nir_slot_is_varying(sem.location) && !sem.no_varying &&
(sem.gs_streams & 0x3) == 0 &&
vs_output_param_offset[sem.location] == AC_EXP_PARAM_DEFAULT_VAL_0000) {
/* The semantic and the base should be the same as in si_shader_info. */
assert(sem.location == info->output_semantic[nir_intrinsic_base(intr)]);
/* It must not be remapped (duplicated). */
assert(slot_remap[sem.location] == -1);
vs_output_param_offset[sem.location] = (*num_param_exports)++;
*output_param_mask |= BITFIELD64_BIT(nir_intrinsic_base(intr));
}
}
}
/* Duplicated outputs are redirected here. */
for (unsigned i = 0; i < NUM_TOTAL_VARYING_SLOTS; i++) {
if (slot_remap[i] >= 0)
vs_output_param_offset[i] = vs_output_param_offset[slot_remap[i]];
}
}
bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader, struct util_debug_callback *debug)
{
@ -1587,6 +1633,42 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
bool free_nir;
struct nir_shader *nir = si_get_nir_shader(sel, &shader->key, &free_nir);
/* Assign param export indices. */
if ((sel->stage == MESA_SHADER_VERTEX ||
sel->stage == MESA_SHADER_TESS_EVAL ||
(sel->stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg)) &&
!shader->key.ge.as_ls && !shader->key.ge.as_es) {
/* Initialize this first. */
shader->info.nr_param_exports = 0;
shader->info.vs_output_param_mask = 0;
STATIC_ASSERT(sizeof(shader->info.vs_output_param_offset[0]) == 1);
memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
sizeof(shader->info.vs_output_param_offset));
/* A slot remapping table for duplicated outputs, so that 1 vertex shader output can be
* mapped to multiple fragment shader inputs.
*/
int8_t slot_remap[NUM_TOTAL_VARYING_SLOTS];
memset(slot_remap, -1, NUM_TOTAL_VARYING_SLOTS);
/* This sets DEFAULT_VAL for constant outputs in vs_output_param_offset. */
/* TODO: This doesn't affect GS. */
NIR_PASS_V(nir, ac_nir_optimize_outputs, false, slot_remap,
shader->info.vs_output_param_offset);
/* Assign the non-constant outputs. */
/* TODO: Use this for the GS copy shader too. */
si_nir_assign_param_offsets(nir, &sel->info, slot_remap, &shader->info.nr_param_exports,
&shader->info.vs_output_param_mask,
shader->info.vs_output_param_offset);
if (shader->key.ge.mono.u.vs_export_prim_id) {
shader->info.vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = shader->info.nr_param_exports++;
shader->info.vs_output_param_mask |= BITFIELD64_BIT(sel->info.num_outputs);
}
}
struct pipe_stream_output_info so = {};
if (sel->info.enabled_streamout_buffer_mask)
nir_gather_stream_output_info(nir, &so);
@ -1635,13 +1717,14 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
if (sel->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg)
vs_output_param_offset = shader->gs_copy_shader->info.vs_output_param_offset;
/* We must use the original shader info before the removal of duplicated shader outputs. */
/* VS and TES should also set primitive ID output if it's used. */
unsigned num_outputs_with_prim_id = sel->info.num_outputs +
shader->key.ge.mono.u.vs_export_prim_id;
for (unsigned i = 0; i < num_outputs_with_prim_id; i++) {
unsigned semantic = sel->info.output_semantic[i];
unsigned offset = vs_output_param_offset[i];
unsigned offset = vs_output_param_offset[semantic];
unsigned ps_input_cntl;
if (offset <= AC_EXP_PARAM_OFFSET_31) {

View File

@ -739,7 +739,8 @@ union si_shader_key {
/* GCN-specific shader info. */
struct si_shader_binary_info {
ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];
ubyte vs_output_param_offset[NUM_TOTAL_VARYING_SLOTS];
uint64_t vs_output_param_mask; /* which params to export, indexed by "base" */
uint32_t vs_output_ps_input_cntl[NUM_TOTAL_VARYING_SLOTS];
ubyte num_input_sgprs;
ubyte num_input_vgprs;

View File

@ -1061,31 +1061,6 @@ static bool si_should_optimize_less(struct ac_llvm_compiler *compiler,
return sel->stage == MESA_SHADER_COMPUTE && sel->info.num_memory_stores > 1000;
}
static void si_optimize_vs_outputs(struct si_shader_context *ctx)
{
struct si_shader *shader = ctx->shader;
struct si_shader_info *info = &shader->selector->info;
unsigned skip_vs_optim_mask = 0;
if ((ctx->stage != MESA_SHADER_VERTEX && ctx->stage != MESA_SHADER_TESS_EVAL) ||
shader->key.ge.as_ls || shader->key.ge.as_es)
return;
/* Optimizing these outputs is not possible, since they might be overriden
* at runtime with S_028644_PT_SPRITE_TEX. */
for (int i = 0; i < info->num_outputs; i++) {
if (info->output_semantic[i] == VARYING_SLOT_PNTC ||
(info->output_semantic[i] >= VARYING_SLOT_TEX0 &&
info->output_semantic[i] <= VARYING_SLOT_TEX7)) {
skip_vs_optim_mask |= 1u << shader->info.vs_output_param_offset[i];
}
}
ac_optimize_vs_outputs(&ctx->ac, ctx->main_fn, shader->info.vs_output_param_offset,
info->num_outputs, skip_vs_optim_mask,
&shader->info.nr_param_exports);
}
bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
struct si_shader *shader, const struct pipe_stream_output_info *so,
struct util_debug_callback *debug, struct nir_shader *nir,
@ -1295,9 +1270,6 @@ bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *
si_llvm_optimize_module(&ctx);
/* Post-optimization transformations and analysis. */
si_optimize_vs_outputs(&ctx);
/* Make sure the input is a pointer and not integer followed by inttoptr. */
assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) == LLVMPointerTypeKind);

View File

@ -22,6 +22,7 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "ac_nir.h"
#include "si_pipe.h"
#include "si_shader_internal.h"
#include "sid.h"
@ -444,6 +445,25 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
shader->is_gs_copy_shader = true;
shader->wave_size = si_determine_wave_size(sscreen, shader);
STATIC_ASSERT(sizeof(shader->info.vs_output_param_offset[0]) == 1);
memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
sizeof(shader->info.vs_output_param_offset));
for (unsigned i = 0; i < gsinfo->num_outputs; i++) {
unsigned semantic = gsinfo->output_semantic[i];
/* Skip if no channel writes to stream 0. */
if (!nir_slot_is_varying(semantic) ||
(gsinfo->output_streams[i] & 0x03 &&
gsinfo->output_streams[i] & 0x0c &&
gsinfo->output_streams[i] & 0x30 &&
gsinfo->output_streams[i] & 0xc0))
continue;
shader->info.vs_output_param_offset[semantic] = shader->info.nr_param_exports++;
shader->info.vs_output_param_mask |= BITFIELD64_BIT(i);
}
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
ctx.shader = shader;
ctx.stage = MESA_SHADER_VERTEX;

View File

@ -438,61 +438,6 @@ static void si_llvm_init_vs_export_args(struct si_shader_context *ctx, const LLV
memcpy(&args->out[0], values, sizeof(values[0]) * 4);
}
static void si_prepare_param_exports(struct si_shader_context *ctx,
const struct si_shader_output_values *outputs, unsigned noutput,
struct ac_export_args exports[32])
{
struct si_shader *shader = ctx->shader;
unsigned param_count = 0;
memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
sizeof(shader->info.vs_output_param_offset));
for (unsigned i = 0; i < noutput; i++) {
unsigned semantic = outputs[i].semantic;
/* Skip if no channel writes to stream 0. */
if (outputs[i].vertex_streams & 0x03 &&
outputs[i].vertex_streams & 0x0c &&
outputs[i].vertex_streams & 0x30 &&
outputs[i].vertex_streams & 0xc0)
continue;
switch (semantic) {
case VARYING_SLOT_LAYER:
case VARYING_SLOT_VIEWPORT:
case VARYING_SLOT_CLIP_DIST0:
case VARYING_SLOT_CLIP_DIST1:
case VARYING_SLOT_COL0:
case VARYING_SLOT_COL1:
case VARYING_SLOT_BFC0:
case VARYING_SLOT_BFC1:
case VARYING_SLOT_PRIMITIVE_ID:
case VARYING_SLOT_FOGC:
break;
default:
if ((semantic >= VARYING_SLOT_TEX0 && semantic <= VARYING_SLOT_TEX7) ||
semantic >= VARYING_SLOT_VAR0)
break;
else
continue;
}
if ((semantic <= VARYING_SLOT_VAR31 || semantic >= VARYING_SLOT_VAR0_16BIT) &&
shader->key.ge.opt.kill_outputs &
(1ull << si_shader_io_get_unique_index(semantic, true)))
continue;
si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_PARAM + param_count,
&exports[param_count]);
assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
shader->info.vs_output_param_offset[i] = param_count++;
}
shader->info.nr_param_exports = param_count;
}
/**
* Vertex color clamping.
*
@ -576,9 +521,6 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
si_vertex_color_clamping(ctx, outputs, noutput);
struct ac_export_args param_exports[32];
si_prepare_param_exports(ctx, outputs, noutput, param_exports);
/* Build position exports. */
for (i = 0; i < noutput; i++) {
switch (outputs[i].semantic) {
@ -747,7 +689,23 @@ void si_llvm_build_vs_exports(struct si_shader_context *ctx,
ac_build_export(&ctx->ac, &pos_args[i]);
}
/* Build parameter exports. */
/* Build parameter exports. Use 2 loops to export params in ascending order.
* 32 is the maximum number of parameter exports.
*/
struct ac_export_args param_exports[32] = {};
uint64_t vs_output_param_mask = shader->info.vs_output_param_mask;
while (vs_output_param_mask) {
unsigned i = u_bit_scan64(&vs_output_param_mask);
unsigned offset = shader->info.vs_output_param_offset[outputs[i].semantic];
assert(offset <= AC_EXP_PARAM_OFFSET_31);
assert(!param_exports[offset].enabled_channels);
si_llvm_init_vs_export_args(ctx, outputs[i].values, V_008DFC_SQ_EXP_PARAM + offset,
&param_exports[offset]);
}
for (unsigned i = 0; i < shader->info.nr_param_exports; i++)
ac_build_export(&ctx->ac, &param_exports[i]);
}