mirror of https://gitlab.freedesktop.org/mesa/mesa
turnip: Treating non-d/s-write pipelines as not having d/s feedback loops.
A subpass in gfxbench has the depth buffer present, but not written to, for a render pass using the depth buffer as an input attachment. We can skip single-prim-mode and the associated "oh no don't use sysmem" in that case. Improves gfxbench vk-5-normal perf by 1.56193% +/- 0.0743035% (n=14). Part of #6327. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18241>
This commit is contained in:
parent
14911e8f83
commit
3ef13ef234
|
@ -513,18 +513,13 @@ tu_autotune_use_bypass(struct tu_autotune *at,
|
|||
const struct tu_render_pass *pass = cmd_buffer->state.pass;
|
||||
const struct tu_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
|
||||
|
||||
for (unsigned i = 0; i < pass->subpass_count; i++) {
|
||||
const struct tu_subpass *subpass = &pass->subpasses[i];
|
||||
/* GMEM works much faster in this case */
|
||||
if (subpass->raster_order_attachment_access)
|
||||
return false;
|
||||
|
||||
/* Would be very slow in sysmem mode because we have to enable
|
||||
* SINGLE_PRIM_MODE(FLUSH_PER_OVERLAP_AND_OVERWRITE)
|
||||
*/
|
||||
if (subpass->feedback_loop_color || subpass->feedback_loop_ds)
|
||||
return false;
|
||||
}
|
||||
/* If a feedback loop in the subpass caused one of the pipelines used to set
|
||||
* SINGLE_PRIM_MODE(FLUSH_PER_OVERLAP_AND_OVERWRITE) or even
|
||||
* SINGLE_PRIM_MODE(FLUSH), then that should cause significantly increased
|
||||
* sysmem bandwidth (though we haven't quantified it).
|
||||
*/
|
||||
if (cmd_buffer->state.rp.sysmem_single_prim_mode)
|
||||
return false;
|
||||
|
||||
/* For VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT buffers
|
||||
* we would have to allocate GPU memory at the submit time and copy
|
||||
|
|
|
@ -2489,6 +2489,7 @@ tu_CmdBindPipeline(VkCommandBuffer commandBuffer,
|
|||
*/
|
||||
cmd->state.rp.disable_gmem = true;
|
||||
}
|
||||
cmd->state.rp.sysmem_single_prim_mode |= pipeline->sysmem_single_prim_mode;
|
||||
|
||||
struct tu_cs *cs = &cmd->draw_cs;
|
||||
|
||||
|
@ -3440,6 +3441,7 @@ tu_render_pass_state_merge(struct tu_render_pass_state *dst,
|
|||
dst->has_tess |= src->has_tess;
|
||||
dst->has_prim_generated_query_in_rp |= src->has_prim_generated_query_in_rp;
|
||||
dst->disable_gmem |= src->disable_gmem;
|
||||
dst->sysmem_single_prim_mode |= src->sysmem_single_prim_mode;
|
||||
dst->draw_cs_writes_to_cond_pred |= src->draw_cs_writes_to_cond_pred;
|
||||
|
||||
dst->drawcall_count += src->drawcall_count;
|
||||
|
|
|
@ -262,6 +262,7 @@ struct tu_render_pass_state
|
|||
bool has_tess;
|
||||
bool has_prim_generated_query_in_rp;
|
||||
bool disable_gmem;
|
||||
bool sysmem_single_prim_mode;
|
||||
|
||||
/* Track whether conditional predicate for COND_REG_EXEC is changed in draw_cs */
|
||||
bool draw_cs_writes_to_cond_pred;
|
||||
|
|
|
@ -3840,6 +3840,7 @@ tu_pipeline_builder_parse_rasterization_order(
|
|||
*/
|
||||
sysmem_prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE;
|
||||
gmem_prim_mode = FLUSH_PER_OVERLAP;
|
||||
pipeline->sysmem_single_prim_mode = true;
|
||||
} else {
|
||||
/* If there is a feedback loop, then the shader can read the previous value
|
||||
* of a pixel being written out. It can also write some components and then
|
||||
|
@ -3850,8 +3851,10 @@ tu_pipeline_builder_parse_rasterization_order(
|
|||
* for advanced_blend in sysmem mode if a feedback loop is detected.
|
||||
*/
|
||||
if (builder->subpass_feedback_loop_color ||
|
||||
builder->subpass_feedback_loop_ds) {
|
||||
(builder->subpass_feedback_loop_ds &&
|
||||
(ds_info->depthWriteEnable || ds_info->stencilTestEnable))) {
|
||||
sysmem_prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE;
|
||||
pipeline->sysmem_single_prim_mode = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -192,6 +192,8 @@ struct tu_pipeline
|
|||
bool raster_order_attachment_access;
|
||||
bool subpass_feedback_loop_ds;
|
||||
bool feedback_loop_may_involve_textures;
|
||||
/* If the pipeline sets SINGLE_PRIM_MODE for sysmem. */
|
||||
bool sysmem_single_prim_mode;
|
||||
|
||||
bool z_negative_one_to_one;
|
||||
|
||||
|
|
Loading…
Reference in New Issue