diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c index e5f54de2c78..139639a71d0 100644 --- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c +++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c @@ -114,7 +114,8 @@ bool gfx10_ngg_export_prim_early(struct si_shader *shader) assert(shader->key.as_ngg && !shader->key.as_es); - return sel->info.stage != MESA_SHADER_GEOMETRY && !sel->info.writes_edgeflag; + return sel->info.stage != MESA_SHADER_GEOMETRY && + !gfx10_ngg_writes_user_edgeflags(shader); } void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx) @@ -146,7 +147,7 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use /* This is only used with NGG culling, which returns the NGG * passthrough prim export encoding. */ - if (ctx->shader->selector->info.writes_edgeflag) { + if (gfx10_ngg_writes_user_edgeflags(ctx->shader)) { unsigned all_bits_no_edgeflags = ~SI_NGG_PRIM_EDGE_FLAG_BITS; LLVMValueRef edgeflags = LLVMConstInt(ctx->ac.i32, all_bits_no_edgeflags, 0); @@ -179,8 +180,7 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use prim.isnull = ctx->ac.i1false; - if (ctx->stage == MESA_SHADER_VERTEX && - !ctx->shader->selector->info.base.vs.blit_sgprs_amd) + if (gfx10_edgeflags_have_effect(ctx->shader)) prim.edgeflags = ac_pack_edgeflags_for_export(&ctx->ac, &ctx->args); else prim.edgeflags = ctx->ac.i32_0; @@ -188,7 +188,7 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use for (unsigned i = 0; i < prim.num_vertices; ++i) prim.index[i] = si_unpack_param(ctx, ctx->args.gs_vtx_offset[i / 2], (i & 1) * 16, 16); - if (ctx->shader->selector->info.writes_edgeflag) { + if (gfx10_ngg_writes_user_edgeflags(ctx->shader)) { LLVMValueRef edgeflags = ctx->ac.i32_0; for (unsigned i = 0; i < prim.num_vertices; ++i) { @@ -603,7 +603,7 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader) * used for padding to reduce LDS bank conflicts. */ if (shader->selector->so.num_outputs) lds_vertex_size = 4 * shader->selector->info.num_outputs + 1; - if (shader->selector->info.writes_edgeflag) + if (gfx10_ngg_writes_user_edgeflags(shader)) lds_vertex_size = MAX2(lds_vertex_size, 1); /* LDS size for passing data from GS to ES. @@ -1161,7 +1161,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) prim.num_vertices = num_vertices; prim.isnull = ctx->ac.i1false; - if (ctx->stage == MESA_SHADER_VERTEX) + if (gfx10_edgeflags_have_effect(shader)) prim.edgeflags = ac_pack_edgeflags_for_export(&ctx->ac, &ctx->args); else prim.edgeflags = ctx->ac.i32_0; @@ -1272,7 +1272,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi) } /* These two also use LDS. */ - if (sel->info.writes_edgeflag || + if (gfx10_ngg_writes_user_edgeflags(shader) || (ctx->stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id)) ac_build_s_barrier(&ctx->ac); @@ -1297,7 +1297,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) LLVMValueRef vertex_ptr = NULL; - if (sel->so.num_outputs || sel->info.writes_edgeflag) + if (sel->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader)) vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx)); for (unsigned i = 0; i < info->num_outputs; i++) { @@ -1318,7 +1318,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) } /* Store the edgeflag at the end (if streamout is enabled) */ - if (info->output_semantic[i] == VARYING_SLOT_EDGE && sel->info.writes_edgeflag) { + if (info->output_semantic[i] == VARYING_SLOT_EDGE && gfx10_ngg_writes_user_edgeflags(ctx->shader)) { LLVMValueRef edgeflag = LLVMBuildLoad(builder, addrs[4 * i], ""); /* The output is a float, but the hw expects a 1-bit integer. */ edgeflag = LLVMBuildFPToUI(ctx->ac.builder, edgeflag, ctx->ac.i32, ""); @@ -1331,7 +1331,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) } bool unterminated_es_if_block = - !sel->so.num_outputs && !sel->info.writes_edgeflag && + !sel->so.num_outputs && !gfx10_ngg_writes_user_edgeflags(ctx->shader) && !ctx->screen->use_ngg_streamout && /* no query buffer */ (ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.mono.u.vs_export_prim_id); @@ -1373,7 +1373,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) LLVMValueRef user_edgeflags[3] = {}; - if (sel->info.writes_edgeflag) { + if (gfx10_ngg_writes_user_edgeflags(ctx->shader)) { assert(!unterminated_es_if_block); /* Streamout already inserted the barrier, so don't insert it again. */ @@ -1401,7 +1401,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi) assert(!unterminated_es_if_block); /* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */ - if (sel->so.num_outputs || sel->info.writes_edgeflag) + if (sel->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader)) ac_build_s_barrier(&ctx->ac); ac_build_ifcc(&ctx->ac, is_gs_thread, 5400); diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h index fa32c8ed705..c55461fdaa4 100644 --- a/src/gallium/drivers/radeonsi/si_shader.h +++ b/src/gallium/drivers/radeonsi/si_shader.h @@ -960,6 +960,21 @@ static inline bool si_shader_uses_bindless_images(struct si_shader_selector *sel return selector ? selector->info.uses_bindless_images : false; } +static inline bool gfx10_edgeflags_have_effect(struct si_shader *shader) +{ + if (shader->selector->info.stage == MESA_SHADER_VERTEX && + !shader->selector->info.base.vs.blit_sgprs_amd) + return true; + + return false; +} + +static inline bool gfx10_ngg_writes_user_edgeflags(struct si_shader *shader) +{ + return gfx10_edgeflags_have_effect(shader) && + shader->selector->info.writes_edgeflag; +} + #ifdef __cplusplus } #endif diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index ae10ec4c28f..c69e70b2ec3 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1193,7 +1193,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader * for the GL_LINE polygon mode to skip rendering lines on inner edges. */ if (gs_info->uses_invocationid || - (gs_stage == MESA_SHADER_VERTEX && !gfx10_is_ngg_passthrough(shader))) + (gfx10_edgeflags_have_effect(shader) && !gfx10_is_ngg_passthrough(shader))) gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID, edge flags. */ else if ((gs_stage == MESA_SHADER_GEOMETRY && gs_info->uses_primid) || (gs_stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id)) @@ -1276,13 +1276,12 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) | S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(shader->ngg.max_vert_out_per_gs_instance); - /* Always output hw-generated edge flags and pass them via the prim + /* Output hw-generated edge flags if needed and pass them via the prim * export to prevent drawing lines on internal edges of decomposed - * primitives (such as quads) with polygon mode = lines. Only VS needs - * this. + * primitives (such as quads) with polygon mode = lines. */ shader->ctx_reg.ngg.pa_cl_ngg_cntl = - S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_stage == MESA_SHADER_VERTEX) | + S_028838_INDEX_BUF_EDGE_FLAG_ENA(gfx10_edgeflags_have_effect(shader)) | /* Reuse for NGG. */ S_028838_VERTEX_REUSE_DEPTH(sscreen->info.chip_class >= GFX10_3 ? 30 : 0); shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, shader, true);