radeonsi: add gfx10 helpers for determining whether edgeflags are enabled

They will return false when culling lines.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13048>
This commit is contained in:
Marek Olšák 2021-09-26 12:49:00 -04:00 committed by Marge Bot
parent 6058b7381e
commit 0030bdf9a6
3 changed files with 32 additions and 18 deletions

View File

@ -114,7 +114,8 @@ bool gfx10_ngg_export_prim_early(struct si_shader *shader)
assert(shader->key.as_ngg && !shader->key.as_es);
return sel->info.stage != MESA_SHADER_GEOMETRY && !sel->info.writes_edgeflag;
return sel->info.stage != MESA_SHADER_GEOMETRY &&
!gfx10_ngg_writes_user_edgeflags(shader);
}
void gfx10_ngg_build_sendmsg_gs_alloc_req(struct si_shader_context *ctx)
@ -146,7 +147,7 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use
/* This is only used with NGG culling, which returns the NGG
* passthrough prim export encoding.
*/
if (ctx->shader->selector->info.writes_edgeflag) {
if (gfx10_ngg_writes_user_edgeflags(ctx->shader)) {
unsigned all_bits_no_edgeflags = ~SI_NGG_PRIM_EDGE_FLAG_BITS;
LLVMValueRef edgeflags = LLVMConstInt(ctx->ac.i32, all_bits_no_edgeflags, 0);
@ -179,8 +180,7 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use
prim.isnull = ctx->ac.i1false;
if (ctx->stage == MESA_SHADER_VERTEX &&
!ctx->shader->selector->info.base.vs.blit_sgprs_amd)
if (gfx10_edgeflags_have_effect(ctx->shader))
prim.edgeflags = ac_pack_edgeflags_for_export(&ctx->ac, &ctx->args);
else
prim.edgeflags = ctx->ac.i32_0;
@ -188,7 +188,7 @@ void gfx10_ngg_build_export_prim(struct si_shader_context *ctx, LLVMValueRef use
for (unsigned i = 0; i < prim.num_vertices; ++i)
prim.index[i] = si_unpack_param(ctx, ctx->args.gs_vtx_offset[i / 2], (i & 1) * 16, 16);
if (ctx->shader->selector->info.writes_edgeflag) {
if (gfx10_ngg_writes_user_edgeflags(ctx->shader)) {
LLVMValueRef edgeflags = ctx->ac.i32_0;
for (unsigned i = 0; i < prim.num_vertices; ++i) {
@ -603,7 +603,7 @@ static unsigned ngg_nogs_vertex_size(struct si_shader *shader)
* used for padding to reduce LDS bank conflicts. */
if (shader->selector->so.num_outputs)
lds_vertex_size = 4 * shader->selector->info.num_outputs + 1;
if (shader->selector->info.writes_edgeflag)
if (gfx10_ngg_writes_user_edgeflags(shader))
lds_vertex_size = MAX2(lds_vertex_size, 1);
/* LDS size for passing data from GS to ES.
@ -1161,7 +1161,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
prim.num_vertices = num_vertices;
prim.isnull = ctx->ac.i1false;
if (ctx->stage == MESA_SHADER_VERTEX)
if (gfx10_edgeflags_have_effect(shader))
prim.edgeflags = ac_pack_edgeflags_for_export(&ctx->ac, &ctx->args);
else
prim.edgeflags = ctx->ac.i32_0;
@ -1272,7 +1272,7 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi)
}
/* These two also use LDS. */
if (sel->info.writes_edgeflag ||
if (gfx10_ngg_writes_user_edgeflags(shader) ||
(ctx->stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
ac_build_s_barrier(&ctx->ac);
@ -1297,7 +1297,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
LLVMValueRef vertex_ptr = NULL;
if (sel->so.num_outputs || sel->info.writes_edgeflag)
if (sel->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader))
vertex_ptr = ngg_nogs_vertex_ptr(ctx, get_thread_id_in_tg(ctx));
for (unsigned i = 0; i < info->num_outputs; i++) {
@ -1318,7 +1318,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
}
/* Store the edgeflag at the end (if streamout is enabled) */
if (info->output_semantic[i] == VARYING_SLOT_EDGE && sel->info.writes_edgeflag) {
if (info->output_semantic[i] == VARYING_SLOT_EDGE && gfx10_ngg_writes_user_edgeflags(ctx->shader)) {
LLVMValueRef edgeflag = LLVMBuildLoad(builder, addrs[4 * i], "");
/* The output is a float, but the hw expects a 1-bit integer. */
edgeflag = LLVMBuildFPToUI(ctx->ac.builder, edgeflag, ctx->ac.i32, "");
@ -1331,7 +1331,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
}
bool unterminated_es_if_block =
!sel->so.num_outputs && !sel->info.writes_edgeflag &&
!sel->so.num_outputs && !gfx10_ngg_writes_user_edgeflags(ctx->shader) &&
!ctx->screen->use_ngg_streamout && /* no query buffer */
(ctx->stage != MESA_SHADER_VERTEX || !ctx->shader->key.mono.u.vs_export_prim_id);
@ -1373,7 +1373,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
LLVMValueRef user_edgeflags[3] = {};
if (sel->info.writes_edgeflag) {
if (gfx10_ngg_writes_user_edgeflags(ctx->shader)) {
assert(!unterminated_es_if_block);
/* Streamout already inserted the barrier, so don't insert it again. */
@ -1401,7 +1401,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
assert(!unterminated_es_if_block);
/* Streamout and edge flags use LDS. Make it idle, so that we can reuse it. */
if (sel->so.num_outputs || sel->info.writes_edgeflag)
if (sel->so.num_outputs || gfx10_ngg_writes_user_edgeflags(ctx->shader))
ac_build_s_barrier(&ctx->ac);
ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);

View File

@ -960,6 +960,21 @@ static inline bool si_shader_uses_bindless_images(struct si_shader_selector *sel
return selector ? selector->info.uses_bindless_images : false;
}
static inline bool gfx10_edgeflags_have_effect(struct si_shader *shader)
{
if (shader->selector->info.stage == MESA_SHADER_VERTEX &&
!shader->selector->info.base.vs.blit_sgprs_amd)
return true;
return false;
}
static inline bool gfx10_ngg_writes_user_edgeflags(struct si_shader *shader)
{
return gfx10_edgeflags_have_effect(shader) &&
shader->selector->info.writes_edgeflag;
}
#ifdef __cplusplus
}
#endif

View File

@ -1193,7 +1193,7 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
* for the GL_LINE polygon mode to skip rendering lines on inner edges.
*/
if (gs_info->uses_invocationid ||
(gs_stage == MESA_SHADER_VERTEX && !gfx10_is_ngg_passthrough(shader)))
(gfx10_edgeflags_have_effect(shader) && !gfx10_is_ngg_passthrough(shader)))
gs_vgpr_comp_cnt = 3; /* VGPR3 contains InvocationID, edge flags. */
else if ((gs_stage == MESA_SHADER_GEOMETRY && gs_info->uses_primid) ||
(gs_stage == MESA_SHADER_VERTEX && shader->key.mono.u.vs_export_prim_id))
@ -1276,13 +1276,12 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) |
S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(shader->ngg.max_vert_out_per_gs_instance);
/* Always output hw-generated edge flags and pass them via the prim
/* Output hw-generated edge flags if needed and pass them via the prim
* export to prevent drawing lines on internal edges of decomposed
* primitives (such as quads) with polygon mode = lines. Only VS needs
* this.
* primitives (such as quads) with polygon mode = lines.
*/
shader->ctx_reg.ngg.pa_cl_ngg_cntl =
S_028838_INDEX_BUF_EDGE_FLAG_ENA(gs_stage == MESA_SHADER_VERTEX) |
S_028838_INDEX_BUF_EDGE_FLAG_ENA(gfx10_edgeflags_have_effect(shader)) |
/* Reuse for NGG. */
S_028838_VERTEX_REUSE_DEPTH(sscreen->info.chip_class >= GFX10_3 ? 30 : 0);
shader->pa_cl_vs_out_cntl = si_get_vs_out_cntl(shader->selector, shader, true);