radeonsi: add pipe_stream_output_info into si_shader_context
to reduce deltas for the next commit Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14414>
This commit is contained in:
parent
db92451e23
commit
a7b906264a
|
@ -218,7 +218,7 @@ static void build_streamout_vertex(struct si_shader_context *ctx, LLVMValueRef *
|
|||
LLVMValueRef offset_vtx, LLVMValueRef vertexptr)
|
||||
{
|
||||
struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
struct pipe_stream_output_info *so = &ctx->shader->selector->so;
|
||||
struct pipe_stream_output_info *so = &ctx->so;
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMValueRef offset[4] = {};
|
||||
LLVMValueRef tmp;
|
||||
|
@ -274,7 +274,7 @@ struct ngg_streamout {
|
|||
static void build_streamout(struct si_shader_context *ctx, struct ngg_streamout *nggso)
|
||||
{
|
||||
struct si_shader_info *info = &ctx->shader->selector->info;
|
||||
struct pipe_stream_output_info *so = &ctx->shader->selector->so;
|
||||
struct pipe_stream_output_info *so = &ctx->so;
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings);
|
||||
LLVMValueRef tid = gfx10_get_thread_id_in_tg(ctx);
|
||||
|
@ -1418,7 +1418,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
|
||||
LLVMValueRef vertex_ptr = NULL;
|
||||
|
||||
if (sel->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader))
|
||||
if (ctx->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader))
|
||||
vertex_ptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx));
|
||||
|
||||
for (unsigned i = 0; i < info->num_outputs; i++) {
|
||||
|
@ -1430,7 +1430,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
/* TODO: we may store more outputs than streamout needs,
|
||||
* but streamout performance isn't that important.
|
||||
*/
|
||||
if (sel->so.num_outputs) {
|
||||
if (ctx->so.num_outputs) {
|
||||
tmp = ac_build_gep0(&ctx->ac, vertex_ptr, LLVMConstInt(ctx->ac.i32, 4 * i + j, false));
|
||||
tmp2 = LLVMBuildLoad(builder, addrs[4 * i + j], "");
|
||||
tmp2 = ac_to_integer(&ctx->ac, tmp2);
|
||||
|
@ -1452,7 +1452,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
}
|
||||
|
||||
bool unterminated_es_if_block =
|
||||
!sel->so.num_outputs && !gfx10_ngg_writes_user_edgeflags(ctx->shader) &&
|
||||
!ctx->so.num_outputs && !gfx10_ngg_writes_user_edgeflags(ctx->shader) &&
|
||||
!ctx->screen->use_ngg_streamout && /* no query buffer */
|
||||
(ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.ge.mono.u.vs_export_prim_id);
|
||||
|
||||
|
@ -1478,7 +1478,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
/* Streamout */
|
||||
LLVMValueRef emitted_prims = NULL;
|
||||
|
||||
if (sel->so.num_outputs) {
|
||||
if (ctx->so.num_outputs) {
|
||||
assert(!unterminated_es_if_block);
|
||||
|
||||
struct ngg_streamout nggso = {};
|
||||
|
@ -1498,7 +1498,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
assert(!unterminated_es_if_block);
|
||||
|
||||
/* Streamout already inserted the barrier, so don't insert it again. */
|
||||
if (!sel->so.num_outputs)
|
||||
if (!ctx->so.num_outputs)
|
||||
ac_build_s_barrier(&ctx->ac);
|
||||
|
||||
ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
|
||||
|
@ -1522,7 +1522,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
assert(!unterminated_es_if_block);
|
||||
|
||||
/* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */
|
||||
if (sel->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader))
|
||||
if (ctx->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader))
|
||||
ac_build_s_barrier(&ctx->ac);
|
||||
|
||||
ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
|
||||
|
@ -1550,7 +1550,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
tmp = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, "");
|
||||
ac_build_ifcc(&ctx->ac, tmp, 5030);
|
||||
tmp = LLVMBuildICmp(builder, LLVMIntULE, ac_get_thread_id(&ctx->ac),
|
||||
sel->so.num_outputs ? ctx->ac.i32_1 : ctx->ac.i32_0, "");
|
||||
ctx->so.num_outputs ? ctx->ac.i32_1 : ctx->ac.i32_0, "");
|
||||
ac_build_ifcc(&ctx->ac, tmp, 5031);
|
||||
{
|
||||
LLVMValueRef args[] = {
|
||||
|
@ -1561,7 +1561,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
|
|||
ctx->ac.i32_0, /* cachepolicy */
|
||||
};
|
||||
|
||||
if (sel->so.num_outputs) {
|
||||
if (ctx->so.num_outputs) {
|
||||
args[0] = ac_build_writelane(&ctx->ac, args[0], emitted_prims, ctx->ac.i32_1);
|
||||
args[2] = ac_build_writelane(&ctx->ac, args[2], LLVMConstInt(ctx->ac.i32, 24, false),
|
||||
ctx->ac.i32_1);
|
||||
|
@ -1896,7 +1896,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
|
|||
LLVMValueRef num_emit_threads = ngg_get_prim_cnt(ctx);
|
||||
|
||||
/* Streamout */
|
||||
if (sel->so.num_outputs) {
|
||||
if (ctx->so.num_outputs) {
|
||||
struct ngg_streamout nggso = {};
|
||||
|
||||
nggso.num_vertices = LLVMConstInt(ctx->ac.i32, verts_per_prim, false);
|
||||
|
@ -1927,17 +1927,17 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
|
|||
tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1);
|
||||
tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, "");
|
||||
ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (STREAMOUT_QUERY_ENABLED) */
|
||||
unsigned num_query_comps = sel->so.num_outputs ? 8 : 4;
|
||||
unsigned num_query_comps = ctx->so.num_outputs ? 8 : 4;
|
||||
tmp = LLVMBuildICmp(builder, LLVMIntULT, tid,
|
||||
LLVMConstInt(ctx->ac.i32, num_query_comps, false), "");
|
||||
ac_build_ifcc(&ctx->ac, tmp, 5110);
|
||||
{
|
||||
LLVMValueRef offset;
|
||||
tmp = tid;
|
||||
if (sel->so.num_outputs)
|
||||
if (ctx->so.num_outputs)
|
||||
tmp = LLVMBuildAnd(builder, tmp, LLVMConstInt(ctx->ac.i32, 3, false), "");
|
||||
offset = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->ac.i32, 32, false), "");
|
||||
if (sel->so.num_outputs) {
|
||||
if (ctx->so.num_outputs) {
|
||||
tmp = LLVMBuildLShr(builder, tid, LLVMConstInt(ctx->ac.i32, 2, false), "");
|
||||
tmp = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->ac.i32, 8, false), "");
|
||||
offset = LLVMBuildAdd(builder, offset, tmp, "");
|
||||
|
@ -1967,7 +1967,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
|
|||
LLVMValueRef prim_enable = LLVMBuildAnd(builder, live, is_emit, "");
|
||||
|
||||
/* Wait for streamout to finish before we kill primitives. */
|
||||
if (sel->so.num_outputs)
|
||||
if (ctx->so.num_outputs)
|
||||
ac_build_s_barrier(&ctx->ac);
|
||||
|
||||
ac_build_ifcc(&ctx->ac, prim_enable, 0);
|
||||
|
|
|
@ -409,7 +409,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
|||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits);
|
||||
|
||||
if (ctx->shader->is_gs_copy_shader) {
|
||||
declare_streamout_params(ctx, &shader->selector->so);
|
||||
declare_streamout_params(ctx, &ctx->so);
|
||||
/* VGPRs */
|
||||
declare_vs_input_vgprs(ctx, &num_prolog_vgprs);
|
||||
break;
|
||||
|
@ -425,7 +425,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
|||
} else if (shader->key.ge.as_ls) {
|
||||
/* no extra parameters */
|
||||
} else {
|
||||
declare_streamout_params(ctx, &shader->selector->so);
|
||||
declare_streamout_params(ctx, &ctx->so);
|
||||
}
|
||||
|
||||
/* VGPRs */
|
||||
|
@ -626,7 +626,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader)
|
|||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset);
|
||||
} else {
|
||||
declare_streamout_params(ctx, &shader->selector->so);
|
||||
declare_streamout_params(ctx, &ctx->so);
|
||||
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
|
||||
}
|
||||
|
||||
|
@ -1501,12 +1501,14 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
|
|||
bool free_nir;
|
||||
struct nir_shader *nir = si_get_nir_shader(sel, &shader->key, &free_nir);
|
||||
|
||||
struct pipe_stream_output_info so = sel->so;
|
||||
|
||||
/* Dump NIR before doing NIR->LLVM conversion in case the
|
||||
* conversion fails. */
|
||||
if (si_can_dump_shader(sscreen, sel->info.stage) &&
|
||||
!(sscreen->debug_flags & DBG(NO_NIR))) {
|
||||
nir_print_shader(nir, stderr);
|
||||
si_dump_streamout(&sel->so);
|
||||
si_dump_streamout(&so);
|
||||
}
|
||||
|
||||
/* Initialize vs_output_ps_input_cntl to default. */
|
||||
|
@ -1523,7 +1525,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi
|
|||
* with PS and NGG VS), but monolithic shaders should be compiled
|
||||
* by LLVM due to more complicated compilation.
|
||||
*/
|
||||
if (!si_llvm_compile_shader(sscreen, compiler, shader, debug, nir, free_nir))
|
||||
if (!si_llvm_compile_shader(sscreen, compiler, shader, &so, debug, nir, free_nir))
|
||||
return false;
|
||||
|
||||
/* The GS copy shader is compiled next. */
|
||||
|
|
|
@ -41,6 +41,7 @@ struct si_shader_context {
|
|||
struct si_shader *shader;
|
||||
struct si_shader_selector *next_shader_sel;
|
||||
struct si_screen *screen;
|
||||
struct pipe_stream_output_info so;
|
||||
|
||||
gl_shader_stage stage;
|
||||
|
||||
|
@ -226,8 +227,9 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part
|
|||
bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader,
|
||||
struct nir_shader *nir, bool free_nir, bool ngg_cull_shader);
|
||||
bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
|
||||
struct si_shader *shader, struct util_debug_callback *debug,
|
||||
struct nir_shader *nir, bool free_nir);
|
||||
struct si_shader *shader, const struct pipe_stream_output_info *so,
|
||||
struct util_debug_callback *debug, struct nir_shader *nir,
|
||||
bool free_nir);
|
||||
|
||||
/* si_shader_llvm_gs.c */
|
||||
LLVMValueRef si_is_es_thread(struct si_shader_context *ctx);
|
||||
|
|
|
@ -933,7 +933,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad
|
|||
/* This is really only needed when streamout and / or vertex
|
||||
* compaction is enabled.
|
||||
*/
|
||||
if (!ctx->gs_ngg_scratch && (sel->so.num_outputs || shader->key.ge.opt.ngg_culling)) {
|
||||
if (!ctx->gs_ngg_scratch && (ctx->so.num_outputs || shader->key.ge.opt.ngg_culling)) {
|
||||
LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
|
||||
ctx->gs_ngg_scratch =
|
||||
LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
|
||||
|
@ -1087,13 +1087,15 @@ static void si_optimize_vs_outputs(struct si_shader_context *ctx)
|
|||
}
|
||||
|
||||
bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler,
|
||||
struct si_shader *shader, struct util_debug_callback *debug,
|
||||
struct nir_shader *nir, bool free_nir)
|
||||
struct si_shader *shader, const struct pipe_stream_output_info *so,
|
||||
struct util_debug_callback *debug, struct nir_shader *nir,
|
||||
bool free_nir)
|
||||
{
|
||||
struct si_shader_selector *sel = shader->selector;
|
||||
struct si_shader_context ctx;
|
||||
|
||||
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
|
||||
ctx.so = *so;
|
||||
|
||||
LLVMValueRef ngg_cull_main_fn = NULL;
|
||||
if (sel->info.stage <= MESA_SHADER_TESS_EVAL && shader->key.ge.opt.ngg_culling) {
|
||||
|
|
|
@ -446,9 +446,11 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
|
|||
si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size);
|
||||
ctx.shader = shader;
|
||||
ctx.stage = MESA_SHADER_VERTEX;
|
||||
ctx.so = gs_selector->so;
|
||||
|
||||
builder = ctx.ac.builder;
|
||||
|
||||
/* Build the main function. */
|
||||
si_llvm_create_main_func(&ctx, false);
|
||||
|
||||
LLVMValueRef buf_ptr = ac_get_arg(&ctx.ac, ctx.internal_bindings);
|
||||
|
@ -461,7 +463,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
|
|||
/* Fetch the vertex stream ID.*/
|
||||
LLVMValueRef stream_id;
|
||||
|
||||
if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs)
|
||||
if (!sscreen->use_ngg_streamout && ctx.so.num_outputs)
|
||||
stream_id = si_unpack_param(&ctx, ctx.args.streamout_config, 24, 2);
|
||||
else
|
||||
stream_id = ctx.ac.i32_0;
|
||||
|
@ -485,7 +487,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
|
|||
if (!gsinfo->num_stream_output_components[stream])
|
||||
continue;
|
||||
|
||||
if (stream > 0 && !gs_selector->so.num_outputs)
|
||||
if (stream > 0 && !ctx.so.num_outputs)
|
||||
continue;
|
||||
|
||||
bb = LLVMInsertBasicBlockInContext(ctx.ac.context, end_bb, "out");
|
||||
|
@ -513,7 +515,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
|
|||
}
|
||||
|
||||
/* Streamout and exports. */
|
||||
if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs) {
|
||||
if (!sscreen->use_ngg_streamout && ctx.so.num_outputs) {
|
||||
si_llvm_emit_streamout(&ctx, outputs, gsinfo->num_outputs, stream);
|
||||
}
|
||||
|
||||
|
|
|
@ -315,8 +315,7 @@ void si_llvm_streamout_store_output(struct si_shader_context *ctx, LLVMValueRef
|
|||
void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_output_values *outputs,
|
||||
unsigned noutput, unsigned stream)
|
||||
{
|
||||
struct si_shader_selector *sel = ctx->shader->selector;
|
||||
struct pipe_stream_output_info *so = &sel->so;
|
||||
struct pipe_stream_output_info *so = &ctx->so;
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
int i;
|
||||
|
||||
|
@ -775,7 +774,7 @@ void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi)
|
|||
}
|
||||
}
|
||||
|
||||
if (!ctx->screen->use_ngg_streamout && ctx->shader->selector->so.num_outputs)
|
||||
if (!ctx->screen->use_ngg_streamout && ctx->so.num_outputs)
|
||||
si_llvm_emit_streamout(ctx, outputs, i, 0);
|
||||
|
||||
/* Export PrimitiveID. */
|
||||
|
|
Loading…
Reference in New Issue