diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index b8e064fb94b..02545073f64 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -218,7 +218,7 @@ static void build_streamout_vertex(struct si_shader_context *ctx, LLVMValueRef * LLVMValueRef offset_vtx, LLVMValueRef vertexptr) { struct si_shader_info *info = &ctx->shader->selector->info; - struct pipe_stream_output_info *so = &ctx->shader->selector->so; + struct pipe_stream_output_info *so = &ctx->so; LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef offset[4] = {}; LLVMValueRef tmp; @@ -274,7 +274,7 @@ struct ngg_streamout { static void build_streamout(struct si_shader_context *ctx, struct ngg_streamout *nggso) { struct si_shader_info *info = &ctx->shader->selector->info; - struct pipe_stream_output_info *so = &ctx->shader->selector->so; + struct pipe_stream_output_info *so = &ctx->so; LLVMBuilderRef builder = ctx->ac.builder; LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings); LLVMValueRef tid = gfx10_get_thread_id_in_tg(ctx); @@ -1418,7 +1418,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) LLVMValueRef vertex_ptr = NULL; - if (sel->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader)) + if (ctx->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader)) vertex_ptr = ngg_nogs_vertex_ptr(ctx, gfx10_get_thread_id_in_tg(ctx)); for (unsigned i = 0; i < info->num_outputs; i++) { @@ -1430,7 +1430,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) /* TODO: we may store more outputs than streamout needs, * but streamout performance isn't that important. */ - if (sel->so.num_outputs) { + if (ctx->so.num_outputs) { tmp = ac_build_gep0(&ctx->ac, vertex_ptr, LLVMConstInt(ctx->ac.i32, 4 * i + j, false)); tmp2 = LLVMBuildLoad(builder, addrs[4 * i + j], ""); tmp2 = ac_to_integer(&ctx->ac, tmp2); @@ -1452,7 +1452,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) } bool unterminated_es_if_block = - !sel->so.num_outputs && !gfx10_ngg_writes_user_edgeflags(ctx->shader) && + !ctx->so.num_outputs && !gfx10_ngg_writes_user_edgeflags(ctx->shader) && !ctx->screen->use_ngg_streamout && /* no query buffer */ (ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.ge.mono.u.vs_export_prim_id); @@ -1478,7 +1478,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) /* Streamout */ LLVMValueRef emitted_prims = NULL; - if (sel->so.num_outputs) { + if (ctx->so.num_outputs) { assert(!unterminated_es_if_block); struct ngg_streamout nggso = {}; @@ -1498,7 +1498,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) assert(!unterminated_es_if_block); /* Streamout already inserted the barrier, so don't insert it again. */ - if (!sel->so.num_outputs) + if (!ctx->so.num_outputs) ac_build_s_barrier(&ctx->ac); ac_build_ifcc(&ctx->ac, is_gs_thread, 5400); @@ -1522,7 +1522,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) assert(!unterminated_es_if_block); /* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */ - if (sel->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader)) + if (ctx->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader)) ac_build_s_barrier(&ctx->ac); ac_build_ifcc(&ctx->ac, is_gs_thread, 5400); @@ -1550,7 +1550,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) tmp = LLVMBuildICmp(builder, LLVMIntEQ, get_wave_id_in_tg(ctx), ctx->ac.i32_0, ""); ac_build_ifcc(&ctx->ac, tmp, 5030); tmp = LLVMBuildICmp(builder, LLVMIntULE, ac_get_thread_id(&ctx->ac), - sel->so.num_outputs ? ctx->ac.i32_1 : ctx->ac.i32_0, ""); + ctx->so.num_outputs ? ctx->ac.i32_1 : ctx->ac.i32_0, ""); ac_build_ifcc(&ctx->ac, tmp, 5031); { LLVMValueRef args[] = { @@ -1561,7 +1561,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) ctx->ac.i32_0, /* cachepolicy */ }; - if (sel->so.num_outputs) { + if (ctx->so.num_outputs) { args[0] = ac_build_writelane(&ctx->ac, args[0], emitted_prims, ctx->ac.i32_1); args[2] = ac_build_writelane(&ctx->ac, args[2], LLVMConstInt(ctx->ac.i32, 24, false), ctx->ac.i32_1); @@ -1896,7 +1896,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) LLVMValueRef num_emit_threads = ngg_get_prim_cnt(ctx); /* Streamout */ - if (sel->so.num_outputs) { + if (ctx->so.num_outputs) { struct ngg_streamout nggso = {}; nggso.num_vertices = LLVMConstInt(ctx->ac.i32, verts_per_prim, false); @@ -1927,17 +1927,17 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) tmp = si_unpack_param(ctx, ctx->vs_state_bits, 6, 1); tmp = LLVMBuildTrunc(builder, tmp, ctx->ac.i1, ""); ac_build_ifcc(&ctx->ac, tmp, 5109); /* if (STREAMOUT_QUERY_ENABLED) */ - unsigned num_query_comps = sel->so.num_outputs ? 8 : 4; + unsigned num_query_comps = ctx->so.num_outputs ? 8 : 4; tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, LLVMConstInt(ctx->ac.i32, num_query_comps, false), ""); ac_build_ifcc(&ctx->ac, tmp, 5110); { LLVMValueRef offset; tmp = tid; - if (sel->so.num_outputs) + if (ctx->so.num_outputs) tmp = LLVMBuildAnd(builder, tmp, LLVMConstInt(ctx->ac.i32, 3, false), ""); offset = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->ac.i32, 32, false), ""); - if (sel->so.num_outputs) { + if (ctx->so.num_outputs) { tmp = LLVMBuildLShr(builder, tid, LLVMConstInt(ctx->ac.i32, 2, false), ""); tmp = LLVMBuildNUWMul(builder, tmp, LLVMConstInt(ctx->ac.i32, 8, false), ""); offset = LLVMBuildAdd(builder, offset, tmp, ""); @@ -1967,7 +1967,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx) LLVMValueRef prim_enable = LLVMBuildAnd(builder, live, is_emit, ""); /* Wait for streamout to finish before we kill primitives. */ - if (sel->so.num_outputs) + if (ctx->so.num_outputs) ac_build_s_barrier(&ctx->ac); ac_build_ifcc(&ctx->ac, prim_enable, 0); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 3344774523c..c60909833fa 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -409,7 +409,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->vs_state_bits); if (ctx->shader->is_gs_copy_shader) { - declare_streamout_params(ctx, &shader->selector->so); + declare_streamout_params(ctx, &ctx->so); /* VGPRs */ declare_vs_input_vgprs(ctx, &num_prolog_vgprs); break; @@ -425,7 +425,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) } else if (shader->key.ge.as_ls) { /* no extra parameters */ } else { - declare_streamout_params(ctx, &shader->selector->so); + declare_streamout_params(ctx, &ctx->so); } /* VGPRs */ @@ -626,7 +626,7 @@ void si_init_shader_args(struct si_shader_context *ctx, bool ngg_cull_shader) ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.es2gs_offset); } else { - declare_streamout_params(ctx, &shader->selector->so); + declare_streamout_params(ctx, &ctx->so); ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset); } @@ -1501,12 +1501,14 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi bool free_nir; struct nir_shader *nir = si_get_nir_shader(sel, &shader->key, &free_nir); + struct pipe_stream_output_info so = sel->so; + /* Dump NIR before doing NIR->LLVM conversion in case the * conversion fails. */ if (si_can_dump_shader(sscreen, sel->info.stage) && !(sscreen->debug_flags & DBG(NO_NIR))) { nir_print_shader(nir, stderr); - si_dump_streamout(&sel->so); + si_dump_streamout(&so); } /* Initialize vs_output_ps_input_cntl to default. */ @@ -1523,7 +1525,7 @@ bool si_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compi * with PS and NGG VS), but monolithic shaders should be compiled * by LLVM due to more complicated compilation. */ - if (!si_llvm_compile_shader(sscreen, compiler, shader, debug, nir, free_nir)) + if (!si_llvm_compile_shader(sscreen, compiler, shader, &so, debug, nir, free_nir)) return false; /* The GS copy shader is compiled next. */ diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h index 103d78a9c52..a9747c15b7f 100644 --- a/src/gallium/drivers/radeonsi/si_shader_internal.h +++ b/src/gallium/drivers/radeonsi/si_shader_internal.h @@ -41,6 +41,7 @@ struct si_shader_context { struct si_shader *shader; struct si_shader_selector *next_shader_sel; struct si_screen *screen; + struct pipe_stream_output_info so; gl_shader_stage stage; @@ -226,8 +227,9 @@ void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *part bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shader, struct nir_shader *nir, bool free_nir, bool ngg_cull_shader); bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, - struct si_shader *shader, struct util_debug_callback *debug, - struct nir_shader *nir, bool free_nir); + struct si_shader *shader, const struct pipe_stream_output_info *so, + struct util_debug_callback *debug, struct nir_shader *nir, + bool free_nir); /* si_shader_llvm_gs.c */ LLVMValueRef si_is_es_thread(struct si_shader_context *ctx); diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index e8546d1748d..101d2fb116e 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -933,7 +933,7 @@ bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shader *shad /* This is really only needed when streamout and / or vertex * compaction is enabled. */ - if (!ctx->gs_ngg_scratch && (sel->so.num_outputs || shader->key.ge.opt.ngg_culling)) { + if (!ctx->gs_ngg_scratch && (ctx->so.num_outputs || shader->key.ge.opt.ngg_culling)) { LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader)); ctx->gs_ngg_scratch = LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch", AC_ADDR_SPACE_LDS); @@ -1087,13 +1087,15 @@ static void si_optimize_vs_outputs(struct si_shader_context *ctx) } bool si_llvm_compile_shader(struct si_screen *sscreen, struct ac_llvm_compiler *compiler, - struct si_shader *shader, struct util_debug_callback *debug, - struct nir_shader *nir, bool free_nir) + struct si_shader *shader, const struct pipe_stream_output_info *so, + struct util_debug_callback *debug, struct nir_shader *nir, + bool free_nir) { struct si_shader_selector *sel = shader->selector; struct si_shader_context ctx; si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size); + ctx.so = *so; LLVMValueRef ngg_cull_main_fn = NULL; if (sel->info.stage <= MESA_SHADER_TESS_EVAL && shader->key.ge.opt.ngg_culling) { diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c index ec77d2fc65d..9cfca4dc5dc 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c @@ -446,9 +446,11 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, si_llvm_context_init(&ctx, sscreen, compiler, shader->wave_size); ctx.shader = shader; ctx.stage = MESA_SHADER_VERTEX; + ctx.so = gs_selector->so; builder = ctx.ac.builder; + /* Build the main function. */ si_llvm_create_main_func(&ctx, false); LLVMValueRef buf_ptr = ac_get_arg(&ctx.ac, ctx.internal_bindings); @@ -461,7 +463,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, /* Fetch the vertex stream ID.*/ LLVMValueRef stream_id; - if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs) + if (!sscreen->use_ngg_streamout && ctx.so.num_outputs) stream_id = si_unpack_param(&ctx, ctx.args.streamout_config, 24, 2); else stream_id = ctx.ac.i32_0; @@ -485,7 +487,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, if (!gsinfo->num_stream_output_components[stream]) continue; - if (stream > 0 && !gs_selector->so.num_outputs) + if (stream > 0 && !ctx.so.num_outputs) continue; bb = LLVMInsertBasicBlockInContext(ctx.ac.context, end_bb, "out"); @@ -513,7 +515,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen, } /* Streamout and exports. */ - if (!sscreen->use_ngg_streamout && gs_selector->so.num_outputs) { + if (!sscreen->use_ngg_streamout && ctx.so.num_outputs) { si_llvm_emit_streamout(&ctx, outputs, gsinfo->num_outputs, stream); } diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c index 06b049652d3..d0c699cf924 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c @@ -315,8 +315,7 @@ void si_llvm_streamout_store_output(struct si_shader_context *ctx, LLVMValueRef void si_llvm_emit_streamout(struct si_shader_context *ctx, struct si_shader_output_values *outputs, unsigned noutput, unsigned stream) { - struct si_shader_selector *sel = ctx->shader->selector; - struct pipe_stream_output_info *so = &sel->so; + struct pipe_stream_output_info *so = &ctx->so; LLVMBuilderRef builder = ctx->ac.builder; int i; @@ -775,7 +774,7 @@ void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi) } } - if (!ctx->screen->use_ngg_streamout && ctx->shader->selector->so.num_outputs) + if (!ctx->screen->use_ngg_streamout && ctx->so.num_outputs) si_llvm_emit_streamout(ctx, outputs, i, 0); /* Export PrimitiveID. */