radeonsi: if shader culling culls all vertices, cull the primitive exports too
This was overlooked. It benefits triangle strips the most due to GS fast launch. Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11509>
This commit is contained in:
parent
defc7c92d9
commit
593f3b3a5a
|
@ -1113,9 +1113,15 @@ void gfx10_emit_ngg_culling_epilogue(struct ac_shader_abi *abi, unsigned max_out
|
|||
}
|
||||
ac_build_endif(&ctx->ac, 16009);
|
||||
|
||||
/* If all vertices are culled, set the primitive count to 0, so that all waves are culled here. */
|
||||
LLVMValueRef num_primitives = ngg_get_prim_cnt(ctx);
|
||||
num_primitives = LLVMBuildSelect(builder,
|
||||
LLVMBuildICmp(builder, LLVMIntEQ, new_num_es_threads,
|
||||
ctx->ac.i32_0, ""),
|
||||
ctx->ac.i32_0, num_primitives, "");
|
||||
/* Kill waves that have inactive threads. */
|
||||
kill_wave = LLVMBuildICmp(builder, LLVMIntULE,
|
||||
ac_build_imax(&ctx->ac, new_num_es_threads, ngg_get_prim_cnt(ctx)),
|
||||
ac_build_imax(&ctx->ac, new_num_es_threads, num_primitives),
|
||||
LLVMBuildMul(builder, get_wave_id_in_tg(ctx),
|
||||
LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, 0), ""),
|
||||
"");
|
||||
|
|
Loading…
Reference in New Issue