anv: Do depth/stencil optimization for dynamic depth/stencil

When we made depth/stencil dynamic, we lost the optimization.  This is
particularly important for cases where the stencil test is enabled but
never writes anything as certain combinations with discard can cause
the stencil write (which doesn't do anything) to get moved late which
can be a measurable perf hit.  According to 028e1137e6 ("anv/pipeline:
Be smarter about depth/stencil state", it was a couple percent for DOTA2
on Broadwell back in the day.  No idea how it affects current titles.

This may also improve the depth/stncil PMA workarounds on Gen8 and Gen9
since they're now looking at optimized depth/stencil state.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17564>
This commit is contained in:
Jason Ekstrand 2022-07-14 21:02:35 -05:00 committed by Marge Bot
parent 07f5f15963
commit 9ecc26ff27
4 changed files with 93 additions and 126 deletions

View File

@ -3106,10 +3106,6 @@ struct anv_graphics_pipeline {
uint32_t view_mask;
uint32_t instance_multiplier;
bool writes_depth;
bool depth_test_enable;
bool writes_stencil;
bool stencil_test_enable;
bool depth_clamp_enable;
bool depth_clip_enable;
bool kill_pixel;

View File

@ -974,38 +974,6 @@ const uint32_t genX(vk_to_intel_primitive_type)[] = {
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
};
static void
emit_ds_state(struct anv_graphics_pipeline *pipeline,
const struct vk_depth_stencil_state *ds_in,
const struct vk_render_pass_state *rp)
{
if (ds_in == NULL) {
/* We're going to OR this together with the dynamic state. We need
* to make sure it's initialized to something useful.
*/
pipeline->writes_stencil = false;
pipeline->stencil_test_enable = false;
pipeline->writes_depth = false;
pipeline->depth_test_enable = false;
return;
}
VkImageAspectFlags ds_aspects = 0;
if (rp != NULL) {
if (rp->depth_attachment_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (rp->stencil_attachment_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
}
struct vk_depth_stencil_state ds = *ds_in;
vk_optimize_depth_stencil_state(&ds, ds_aspects, false);
pipeline->writes_stencil = ds.stencil.write_enable;
pipeline->stencil_test_enable = ds.stencil.test_enable;
pipeline->writes_depth = ds.depth.write_enable;
pipeline->depth_test_enable = ds.depth.test_enable;
}
static bool
is_dual_src_blend_factor(VkBlendFactor factor)
{
@ -2336,7 +2304,6 @@ genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
emit_rs_state(pipeline, state->ia, state->rs, state->ms, state->rp,
urb_deref_block_size);
emit_ms_state(pipeline, state->ms);
emit_ds_state(pipeline, state->ds, state->rp);
emit_cb_state(pipeline, state->cb, state->ms);
compute_kill_pixel(pipeline, state->ms, state->rp);

View File

@ -145,33 +145,37 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
uint32_t depth_stencil_dw[GENX(DEPTH_STENCIL_STATE_length)];
VkImageAspectFlags ds_aspects = 0;
if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
struct vk_depth_stencil_state opt_ds = dyn->ds;
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
struct GENX(DEPTH_STENCIL_STATE) depth_stencil = {
.DoubleSidedStencilEnable = true,
.StencilTestMask = dyn->ds.stencil.front.compare_mask & 0xff,
.StencilWriteMask = dyn->ds.stencil.front.write_mask & 0xff,
.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff,
.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff,
.BackfaceStencilTestMask = dyn->ds.stencil.back.compare_mask & 0xff,
.BackfaceStencilWriteMask = dyn->ds.stencil.back.write_mask & 0xff,
.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff,
.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff,
.StencilBufferWriteEnable =
(dyn->ds.stencil.back.write_mask ||
dyn->ds.stencil.front.write_mask) &&
dyn->ds.stencil.test_enable,
.DepthTestEnable = dyn->ds.depth.test_enable,
.DepthBufferWriteEnable = dyn->ds.depth.test_enable &&
dyn->ds.depth.write_enable,
.DepthTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.depth.compare_op],
.StencilTestEnable = dyn->ds.stencil.test_enable,
.StencilFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.fail],
.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.pass],
.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.depth_fail],
.StencilTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.stencil.front.op.compare],
.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.fail],
.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.pass],
.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.depth_fail],
.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.stencil.back.op.compare],
.DepthTestEnable = opt_ds.depth.test_enable,
.DepthBufferWriteEnable = opt_ds.depth.write_enable,
.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op],
.StencilTestEnable = opt_ds.stencil.test_enable,
.StencilBufferWriteEnable = opt_ds.stencil.write_enable,
.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail],
.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass],
.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail],
.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare],
.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail],
.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass],
.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail],
.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare],
};
GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);

View File

@ -107,7 +107,8 @@ genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
}
UNUSED static bool
want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer)
want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer,
const struct vk_depth_stencil_state *ds)
{
assert(GFX_VER == 8);
@ -174,7 +175,7 @@ want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer)
*/
/* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */
if (!pipeline->depth_test_enable)
if (!ds->depth.test_enable)
return false;
/* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
@ -190,17 +191,15 @@ want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer)
* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
* (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
*/
return (pipeline->kill_pixel && (pipeline->writes_depth ||
pipeline->writes_stencil)) ||
return (pipeline->kill_pixel && (ds->depth.write_enable ||
ds->stencil.write_enable)) ||
wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
}
UNUSED static bool
want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer)
want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer,
const struct vk_depth_stencil_state *ds)
{
const struct vk_dynamic_graphics_state *dyn =
&cmd_buffer->vk.dynamic_graphics_state;
if (GFX_VER > 9)
return false;
assert(GFX_VER == 9);
@ -286,19 +285,13 @@ want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer)
/* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
*/
const bool stc_test_en =
cmd_buffer->state.gfx.stencil_att.iview != NULL &&
pipeline->stencil_test_enable;
const bool stc_test_en = ds->stencil.test_enable;
/* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
* (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
* 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
*/
const bool stc_write_en =
cmd_buffer->state.gfx.stencil_att.iview != NULL &&
(dyn->ds.stencil.front.write_mask ||
dyn->ds.stencil.back.write_mask) &&
pipeline->writes_stencil;
const bool stc_write_en = ds->stencil.write_enable;
/* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
@ -433,37 +426,41 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
VkImageAspectFlags ds_aspects = 0;
if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
struct vk_depth_stencil_state opt_ds = dyn->ds;
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
ds.DoubleSidedStencilEnable = true;
ds.StencilTestMask = dyn->ds.stencil.front.compare_mask & 0xff;
ds.StencilWriteMask = dyn->ds.stencil.front.write_mask & 0xff;
ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
ds.BackfaceStencilTestMask = dyn->ds.stencil.back.compare_mask & 0xff;
ds.BackfaceStencilWriteMask = dyn->ds.stencil.back.write_mask & 0xff;
ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
ds.StencilBufferWriteEnable =
(dyn->ds.stencil.front.write_mask ||
dyn->ds.stencil.back.write_mask) &&
dyn->ds.stencil.test_enable;
ds.DepthTestEnable = dyn->ds.depth.test_enable;
ds.DepthBufferWriteEnable = dyn->ds.depth.test_enable &&
dyn->ds.depth.write_enable;
ds.DepthTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.depth.compare_op];
ds.StencilTestEnable = dyn->ds.stencil.test_enable;
ds.StencilFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.fail];
ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.pass];
ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.depth_fail];
ds.StencilTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.stencil.front.op.compare];
ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.fail];
ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.pass];
ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.depth_fail];
ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.stencil.back.op.compare];
ds.DepthTestEnable = opt_ds.depth.test_enable;
ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
ds.StencilTestEnable = opt_ds.stencil.test_enable;
ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
}
genX(cmd_buffer_enable_pma_fix)(cmd_buffer,
want_depth_pma_fix(cmd_buffer));
const bool pma = want_depth_pma_fix(cmd_buffer, &opt_ds);
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
}
#else
if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
@ -495,41 +492,44 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) {
VkImageAspectFlags ds_aspects = 0;
if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
struct vk_depth_stencil_state opt_ds = dyn->ds;
vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
ds.DoubleSidedStencilEnable = true;
ds.StencilTestMask = dyn->ds.stencil.front.compare_mask & 0xff;
ds.StencilWriteMask = dyn->ds.stencil.front.write_mask & 0xff;
ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
ds.BackfaceStencilTestMask = dyn->ds.stencil.back.compare_mask & 0xff;
ds.BackfaceStencilWriteMask = dyn->ds.stencil.back.write_mask & 0xff;
ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
ds.StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff;
ds.BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff;
ds.StencilBufferWriteEnable =
(dyn->ds.stencil.front.write_mask ||
dyn->ds.stencil.back.write_mask) &&
dyn->ds.stencil.test_enable;
ds.DepthTestEnable = dyn->ds.depth.test_enable;
ds.DepthBufferWriteEnable = dyn->ds.depth.test_enable &&
dyn->ds.depth.write_enable;
ds.DepthTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.depth.compare_op];
ds.StencilTestEnable = dyn->ds.stencil.test_enable;
ds.StencilFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.fail];
ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.pass];
ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.front.op.depth_fail];
ds.StencilTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.stencil.front.op.compare];
ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.fail];
ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.pass];
ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[dyn->ds.stencil.back.op.depth_fail];
ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[dyn->ds.stencil.back.op.compare];
ds.StencilReferenceValue = opt_ds.stencil.front.reference & 0xff;
ds.BackfaceStencilReferenceValue = opt_ds.stencil.back.reference & 0xff;
ds.DepthTestEnable = opt_ds.depth.test_enable;
ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
ds.StencilTestEnable = opt_ds.stencil.test_enable;
ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
}
genX(cmd_buffer_enable_pma_fix)(cmd_buffer,
want_stencil_pma_fix(cmd_buffer));
const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds);
genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
}
#endif