turnip: Skip load/stores for tiles with no geometry
When HW binning is used tile loads/stores could be skipped if there is no geometry in the tile. Loads could be skipped when: - The attachment won't be resolved, otherwise if load is skipped there would be holes in the resolved attachment; - There is no vkCmdClearAttachments afterwards since it is likely a partial clear done via 2d blit (2d blit doesn't produce geometry). Stores could be skipped when: - The attachment was not cleared, which may happen by load_op or vkCmdClearAttachments; - When store is not a resolve. I chose to predicate each load/store separately to allow them to be skipped when only some attachments are cleared or resolved. Gmem loads are moved into separate cs because whether to emit CP_COND_REG_EXEC depends on HW binning being enabled and usage of vkCmdClearAttachments. CP_COND_REG_EXEC predicate could be changed during draw_cs only by perf query, in such case the predicate should be re-emitted. (At the moment it is always re-emitted before stores) Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15974>
This commit is contained in:
parent
d5debf0d8a
commit
0c489f18cb
|
@ -2280,6 +2280,8 @@ tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
|
||||||
s_clear_val = attachments[i].clearValue.depthStencil.stencil & 0xff;
|
s_clear_val = attachments[i].clearValue.depthStencil.stencil & 0xff;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cmd->state.attachment_cmd_clear[a] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We may not know the multisample count if there are no attachments, so
|
/* We may not know the multisample count if there are no attachments, so
|
||||||
|
@ -2551,6 +2553,8 @@ tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
|
||||||
if (a == VK_ATTACHMENT_UNUSED)
|
if (a == VK_ATTACHMENT_UNUSED)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
cmd->state.attachment_cmd_clear[a] = true;
|
||||||
|
|
||||||
tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask,
|
tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask,
|
||||||
&attachments[j].clearValue);
|
&attachments[j].clearValue);
|
||||||
}
|
}
|
||||||
|
@ -2799,24 +2803,64 @@ blit_can_resolve(VkFormat format)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
tu_begin_load_store_cond_exec(struct tu_cmd_buffer *cmd,
|
||||||
|
struct tu_cs *cs, bool load)
|
||||||
|
{
|
||||||
|
tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
tu_end_load_store_cond_exec(struct tu_cmd_buffer *cmd,
|
||||||
|
struct tu_cs *cs, bool load)
|
||||||
|
{
|
||||||
|
tu_cond_exec_end(cs);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
|
tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||||
struct tu_cs *cs,
|
struct tu_cs *cs,
|
||||||
uint32_t a,
|
uint32_t a,
|
||||||
|
bool cond_exec_allowed,
|
||||||
bool force_load)
|
bool force_load)
|
||||||
{
|
{
|
||||||
const struct tu_image_view *iview = cmd->state.attachments[a];
|
const struct tu_image_view *iview = cmd->state.attachments[a];
|
||||||
const struct tu_render_pass_attachment *attachment =
|
const struct tu_render_pass_attachment *attachment =
|
||||||
&cmd->state.pass->attachments[a];
|
&cmd->state.pass->attachments[a];
|
||||||
|
|
||||||
|
bool load_common = attachment->load || force_load;
|
||||||
|
bool load_stencil =
|
||||||
|
attachment->load_stencil ||
|
||||||
|
(attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load);
|
||||||
|
|
||||||
|
if (!load_common && !load_stencil)
|
||||||
|
return;
|
||||||
|
|
||||||
trace_start_gmem_load(&cmd->trace, cs);
|
trace_start_gmem_load(&cmd->trace, cs);
|
||||||
|
|
||||||
if (attachment->load || force_load)
|
/* If attachment will be cleared by vkCmdClearAttachments - it is likely
|
||||||
|
* that it would be partially cleared, and since it is done by 2d blit
|
||||||
|
* it doesn't produce geometry, so we have to unconditionally load.
|
||||||
|
*
|
||||||
|
* To simplify conditions treat partially cleared separate DS as fully
|
||||||
|
* cleared and don't emit cond_exec.
|
||||||
|
*/
|
||||||
|
bool cond_exec = cond_exec_allowed &&
|
||||||
|
!attachment->clear_mask &&
|
||||||
|
!cmd->state.attachment_cmd_clear[a] &&
|
||||||
|
!attachment->will_be_resolved;
|
||||||
|
if (cond_exec)
|
||||||
|
tu_begin_load_store_cond_exec(cmd, cs, true);
|
||||||
|
|
||||||
|
if (load_common)
|
||||||
tu_emit_blit(cmd, cs, iview, attachment, false, false);
|
tu_emit_blit(cmd, cs, iview, attachment, false, false);
|
||||||
|
|
||||||
if (attachment->load_stencil || (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load))
|
if (load_stencil)
|
||||||
tu_emit_blit(cmd, cs, iview, attachment, false, true);
|
tu_emit_blit(cmd, cs, iview, attachment, false, true);
|
||||||
|
|
||||||
|
if (cond_exec)
|
||||||
|
tu_end_load_store_cond_exec(cmd, cs, true);
|
||||||
|
|
||||||
trace_end_gmem_load(&cmd->trace, cs, attachment->format, force_load);
|
trace_end_gmem_load(&cmd->trace, cs, attachment->format, force_load);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2919,7 +2963,8 @@ void
|
||||||
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||||
struct tu_cs *cs,
|
struct tu_cs *cs,
|
||||||
uint32_t a,
|
uint32_t a,
|
||||||
uint32_t gmem_a)
|
uint32_t gmem_a,
|
||||||
|
bool cond_exec_allowed)
|
||||||
{
|
{
|
||||||
struct tu_physical_device *phys_dev = cmd->device->physical_device;
|
struct tu_physical_device *phys_dev = cmd->device->physical_device;
|
||||||
const VkRect2D *render_area = &cmd->state.render_area;
|
const VkRect2D *render_area = &cmd->state.render_area;
|
||||||
|
@ -2930,6 +2975,15 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||||
if (!dst->store && !dst->store_stencil)
|
if (!dst->store && !dst->store_stencil)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
bool was_cleared = src->clear_mask || cmd->state.attachment_cmd_clear[a];
|
||||||
|
/* Unconditional store should happen only if attachment was cleared,
|
||||||
|
* which could have happened either by load_op or via vkCmdClearAttachments.
|
||||||
|
*/
|
||||||
|
bool cond_exec = cond_exec_allowed && !was_cleared;
|
||||||
|
if (cond_exec) {
|
||||||
|
tu_begin_load_store_cond_exec(cmd, cs, false);
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t x1 = render_area->offset.x;
|
uint32_t x1 = render_area->offset.x;
|
||||||
uint32_t y1 = render_area->offset.y;
|
uint32_t y1 = render_area->offset.y;
|
||||||
uint32_t x2 = x1 + render_area->extent.width;
|
uint32_t x2 = x1 + render_area->extent.width;
|
||||||
|
@ -2971,6 +3025,10 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||||
if (store_separate_stencil)
|
if (store_separate_stencil)
|
||||||
tu_emit_blit(cmd, cs, iview, src, true, true);
|
tu_emit_blit(cmd, cs, iview, src, true, true);
|
||||||
|
|
||||||
|
if (cond_exec) {
|
||||||
|
tu_end_load_store_cond_exec(cmd, cs, false);
|
||||||
|
}
|
||||||
|
|
||||||
trace_end_gmem_store(&cmd->trace, cs, dst->format, true, false);
|
trace_end_gmem_store(&cmd->trace, cs, dst->format, true, false);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -3011,5 +3069,9 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cond_exec) {
|
||||||
|
tu_end_load_store_cond_exec(cmd, cs, false);
|
||||||
|
}
|
||||||
|
|
||||||
trace_end_gmem_store(&cmd->trace, cs, dst->format, false, unaligned);
|
trace_end_gmem_store(&cmd->trace, cs, dst->format, false, unaligned);
|
||||||
}
|
}
|
||||||
|
|
|
@ -632,6 +632,25 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd,
|
||||||
return use_sysmem;
|
return use_sysmem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Optimization: there is no reason to load gmem if there is no
|
||||||
|
* geometry to process. COND_REG_EXEC predicate is set here,
|
||||||
|
* but the actual skip happens in tile_load_cs and tile_store_cs,
|
||||||
|
* for each blit separately.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
tu6_emit_cond_for_load_stores(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||||
|
uint32_t pipe, uint32_t slot, bool wfm)
|
||||||
|
{
|
||||||
|
if (use_hw_binning(cmd)) {
|
||||||
|
tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
|
||||||
|
tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(pipe)) |
|
||||||
|
A6XX_CP_REG_TEST_0_BIT(slot) |
|
||||||
|
COND(wfm, A6XX_CP_REG_TEST_0_WAIT_FOR_ME));
|
||||||
|
} else {
|
||||||
|
/* COND_REG_EXECs are not emitted in non-binning case */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
||||||
struct tu_cs *cs,
|
struct tu_cs *cs,
|
||||||
|
@ -664,6 +683,8 @@ tu6_emit_tile_select(struct tu_cmd_buffer *cmd,
|
||||||
tu_cs_emit(cs, pipe * 4);
|
tu_cs_emit(cs, pipe * 4);
|
||||||
tu_cs_emit(cs, pipe * cmd->vsc_prim_strm_pitch);
|
tu_cs_emit(cs, pipe * cmd->vsc_prim_strm_pitch);
|
||||||
|
|
||||||
|
tu6_emit_cond_for_load_stores(cmd, cs, pipe, slot, true);
|
||||||
|
|
||||||
tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
|
tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
|
||||||
tu_cs_emit(cs, 0x0);
|
tu_cs_emit(cs, 0x0);
|
||||||
|
|
||||||
|
@ -740,6 +761,15 @@ tu6_emit_sysmem_resolves(struct tu_cmd_buffer *cmd,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
tu6_emit_tile_load(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||||
|
{
|
||||||
|
tu6_emit_blit_scissor(cmd, cs, true);
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
|
||||||
|
tu_load_gmem_attachment(cmd, cs, i, use_hw_binning(cmd), false);
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||||
{
|
{
|
||||||
|
@ -756,7 +786,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||||
|
|
||||||
for (uint32_t a = 0; a < pass->attachment_count; ++a) {
|
for (uint32_t a = 0; a < pass->attachment_count; ++a) {
|
||||||
if (pass->attachments[a].gmem_offset >= 0)
|
if (pass->attachments[a].gmem_offset >= 0)
|
||||||
tu_store_gmem_attachment(cmd, cs, a, a);
|
tu_store_gmem_attachment(cmd, cs, a, a, use_hw_binning(cmd));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (subpass->resolve_attachments) {
|
if (subpass->resolve_attachments) {
|
||||||
|
@ -764,7 +794,7 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||||
uint32_t a = subpass->resolve_attachments[i].attachment;
|
uint32_t a = subpass->resolve_attachments[i].attachment;
|
||||||
if (a != VK_ATTACHMENT_UNUSED) {
|
if (a != VK_ATTACHMENT_UNUSED) {
|
||||||
uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i);
|
uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i);
|
||||||
tu_store_gmem_attachment(cmd, cs, a, gmem_a);
|
tu_store_gmem_attachment(cmd, cs, a, gmem_a, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1220,11 +1250,6 @@ tu_emit_renderpass_begin(struct tu_cmd_buffer *cmd,
|
||||||
|
|
||||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
|
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
|
||||||
|
|
||||||
tu6_emit_blit_scissor(cmd, cs, true);
|
|
||||||
|
|
||||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
|
|
||||||
tu_load_gmem_attachment(cmd, cs, i, false);
|
|
||||||
|
|
||||||
tu6_emit_blit_scissor(cmd, cs, false);
|
tu6_emit_blit_scissor(cmd, cs, false);
|
||||||
|
|
||||||
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
|
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; ++i)
|
||||||
|
@ -1356,8 +1381,10 @@ tu6_tile_render_begin(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
|
||||||
|
uint32_t pipe, uint32_t slot)
|
||||||
{
|
{
|
||||||
|
tu_cs_emit_call(cs, &cmd->tile_load_cs);
|
||||||
tu_cs_emit_call(cs, &cmd->draw_cs);
|
tu_cs_emit_call(cs, &cmd->draw_cs);
|
||||||
|
|
||||||
if (use_hw_binning(cmd)) {
|
if (use_hw_binning(cmd)) {
|
||||||
|
@ -1365,6 +1392,10 @@ tu6_render_tile(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||||
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));
|
tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Predicate is changed in draw_cs so we have to re-emit it */
|
||||||
|
if (cmd->state.draw_cs_writes_to_cond_pred)
|
||||||
|
tu6_emit_cond_for_load_stores(cmd, cs, pipe, slot, false);
|
||||||
|
|
||||||
tu_cs_emit_call(cs, &cmd->tile_store_cs);
|
tu_cs_emit_call(cs, &cmd->tile_store_cs);
|
||||||
|
|
||||||
if (!u_trace_iterator_equal(cmd->trace_renderpass_start, cmd->trace_renderpass_end)) {
|
if (!u_trace_iterator_equal(cmd->trace_renderpass_start, cmd->trace_renderpass_end)) {
|
||||||
|
@ -1418,7 +1449,7 @@ tu_cmd_render_tiles(struct tu_cmd_buffer *cmd,
|
||||||
tu6_emit_tile_select(cmd, &cmd->cs, tx, ty, pipe, slot);
|
tu6_emit_tile_select(cmd, &cmd->cs, tx, ty, pipe, slot);
|
||||||
|
|
||||||
trace_start_draw_ib_gmem(&cmd->trace, &cmd->cs);
|
trace_start_draw_ib_gmem(&cmd->trace, &cmd->cs);
|
||||||
tu6_render_tile(cmd, &cmd->cs);
|
tu6_render_tile(cmd, &cmd->cs, pipe, slot);
|
||||||
trace_end_draw_ib_gmem(&cmd->trace, &cmd->cs);
|
trace_end_draw_ib_gmem(&cmd->trace, &cmd->cs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1491,6 +1522,7 @@ tu_create_cmd_buffer(struct tu_device *device,
|
||||||
list_inithead(&cmd_buffer->renderpass_autotune_results);
|
list_inithead(&cmd_buffer->renderpass_autotune_results);
|
||||||
|
|
||||||
tu_cs_init(&cmd_buffer->cs, device, TU_CS_MODE_GROW, 4096);
|
tu_cs_init(&cmd_buffer->cs, device, TU_CS_MODE_GROW, 4096);
|
||||||
|
tu_cs_init(&cmd_buffer->tile_load_cs, device, TU_CS_MODE_GROW, 2048);
|
||||||
tu_cs_init(&cmd_buffer->draw_cs, device, TU_CS_MODE_GROW, 4096);
|
tu_cs_init(&cmd_buffer->draw_cs, device, TU_CS_MODE_GROW, 4096);
|
||||||
tu_cs_init(&cmd_buffer->tile_store_cs, device, TU_CS_MODE_GROW, 2048);
|
tu_cs_init(&cmd_buffer->tile_store_cs, device, TU_CS_MODE_GROW, 2048);
|
||||||
tu_cs_init(&cmd_buffer->draw_epilogue_cs, device, TU_CS_MODE_GROW, 4096);
|
tu_cs_init(&cmd_buffer->draw_epilogue_cs, device, TU_CS_MODE_GROW, 4096);
|
||||||
|
@ -1507,11 +1539,14 @@ tu_cmd_buffer_destroy(struct tu_cmd_buffer *cmd_buffer)
|
||||||
list_del(&cmd_buffer->pool_link);
|
list_del(&cmd_buffer->pool_link);
|
||||||
|
|
||||||
tu_cs_finish(&cmd_buffer->cs);
|
tu_cs_finish(&cmd_buffer->cs);
|
||||||
|
tu_cs_finish(&cmd_buffer->tile_load_cs);
|
||||||
tu_cs_finish(&cmd_buffer->draw_cs);
|
tu_cs_finish(&cmd_buffer->draw_cs);
|
||||||
tu_cs_finish(&cmd_buffer->tile_store_cs);
|
tu_cs_finish(&cmd_buffer->tile_store_cs);
|
||||||
tu_cs_finish(&cmd_buffer->draw_epilogue_cs);
|
tu_cs_finish(&cmd_buffer->draw_epilogue_cs);
|
||||||
tu_cs_finish(&cmd_buffer->sub_cs);
|
tu_cs_finish(&cmd_buffer->sub_cs);
|
||||||
|
|
||||||
|
vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachment_cmd_clear);
|
||||||
|
|
||||||
u_trace_fini(&cmd_buffer->trace);
|
u_trace_fini(&cmd_buffer->trace);
|
||||||
|
|
||||||
tu_autotune_free_results(cmd_buffer->device, &cmd_buffer->renderpass_autotune_results);
|
tu_autotune_free_results(cmd_buffer->device, &cmd_buffer->renderpass_autotune_results);
|
||||||
|
@ -1535,11 +1570,15 @@ tu_reset_cmd_buffer(struct tu_cmd_buffer *cmd_buffer)
|
||||||
cmd_buffer->record_result = VK_SUCCESS;
|
cmd_buffer->record_result = VK_SUCCESS;
|
||||||
|
|
||||||
tu_cs_reset(&cmd_buffer->cs);
|
tu_cs_reset(&cmd_buffer->cs);
|
||||||
|
tu_cs_reset(&cmd_buffer->tile_load_cs);
|
||||||
tu_cs_reset(&cmd_buffer->draw_cs);
|
tu_cs_reset(&cmd_buffer->draw_cs);
|
||||||
tu_cs_reset(&cmd_buffer->tile_store_cs);
|
tu_cs_reset(&cmd_buffer->tile_store_cs);
|
||||||
tu_cs_reset(&cmd_buffer->draw_epilogue_cs);
|
tu_cs_reset(&cmd_buffer->draw_epilogue_cs);
|
||||||
tu_cs_reset(&cmd_buffer->sub_cs);
|
tu_cs_reset(&cmd_buffer->sub_cs);
|
||||||
|
|
||||||
|
vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachment_cmd_clear);
|
||||||
|
cmd_buffer->state.attachment_cmd_clear = NULL;
|
||||||
|
|
||||||
tu_autotune_free_results(cmd_buffer->device, &cmd_buffer->renderpass_autotune_results);
|
tu_autotune_free_results(cmd_buffer->device, &cmd_buffer->renderpass_autotune_results);
|
||||||
|
|
||||||
for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
|
for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
|
||||||
|
@ -1678,6 +1717,7 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
|
||||||
cmd_buffer->usage_flags = pBeginInfo->flags;
|
cmd_buffer->usage_flags = pBeginInfo->flags;
|
||||||
|
|
||||||
tu_cs_begin(&cmd_buffer->cs);
|
tu_cs_begin(&cmd_buffer->cs);
|
||||||
|
tu_cs_begin(&cmd_buffer->tile_load_cs);
|
||||||
tu_cs_begin(&cmd_buffer->draw_cs);
|
tu_cs_begin(&cmd_buffer->draw_cs);
|
||||||
tu_cs_begin(&cmd_buffer->tile_store_cs);
|
tu_cs_begin(&cmd_buffer->tile_store_cs);
|
||||||
tu_cs_begin(&cmd_buffer->draw_epilogue_cs);
|
tu_cs_begin(&cmd_buffer->draw_epilogue_cs);
|
||||||
|
@ -1710,6 +1750,14 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
|
||||||
cmd_buffer->state.pass = tu_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
|
cmd_buffer->state.pass = tu_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
|
||||||
cmd_buffer->state.subpass =
|
cmd_buffer->state.subpass =
|
||||||
&cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
|
&cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
|
||||||
|
/* vkCmdClearAttachments is allowed in a secondary cmdbuf and we have to
|
||||||
|
* track it as in primary cmdbuf.
|
||||||
|
*/
|
||||||
|
cmd_buffer->state.attachment_cmd_clear =
|
||||||
|
vk_zalloc(&cmd_buffer->pool->vk.alloc,
|
||||||
|
cmd_buffer->state.pass->attachment_count *
|
||||||
|
sizeof(cmd_buffer->state.attachment_cmd_clear[0]),
|
||||||
|
8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||||
} else {
|
} else {
|
||||||
/* When executing in the middle of another command buffer, the CCU
|
/* When executing in the middle of another command buffer, the CCU
|
||||||
* state is unknown.
|
* state is unknown.
|
||||||
|
@ -2245,6 +2293,7 @@ tu_EndCommandBuffer(VkCommandBuffer commandBuffer)
|
||||||
}
|
}
|
||||||
|
|
||||||
tu_cs_end(&cmd_buffer->cs);
|
tu_cs_end(&cmd_buffer->cs);
|
||||||
|
tu_cs_end(&cmd_buffer->tile_load_cs);
|
||||||
tu_cs_end(&cmd_buffer->draw_cs);
|
tu_cs_end(&cmd_buffer->draw_cs);
|
||||||
tu_cs_end(&cmd_buffer->tile_store_cs);
|
tu_cs_end(&cmd_buffer->tile_store_cs);
|
||||||
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
|
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
|
||||||
|
@ -3061,7 +3110,7 @@ vk2tu_src_stage(VkPipelineStageFlags vk_stages)
|
||||||
{
|
{
|
||||||
enum tu_stage stage = TU_STAGE_CP;
|
enum tu_stage stage = TU_STAGE_CP;
|
||||||
u_foreach_bit (bit, vk_stages) {
|
u_foreach_bit (bit, vk_stages) {
|
||||||
enum tu_stage new_stage = vk2tu_single_stage(1ull << bit, false);
|
enum tu_stage new_stage = vk2tu_single_stage(1ull << bit, false);
|
||||||
stage = MAX2(stage, new_stage);
|
stage = MAX2(stage, new_stage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3073,7 +3122,7 @@ vk2tu_dst_stage(VkPipelineStageFlags vk_stages)
|
||||||
{
|
{
|
||||||
enum tu_stage stage = TU_STAGE_PS;
|
enum tu_stage stage = TU_STAGE_PS;
|
||||||
u_foreach_bit (bit, vk_stages) {
|
u_foreach_bit (bit, vk_stages) {
|
||||||
enum tu_stage new_stage = vk2tu_single_stage(1ull << bit, true);
|
enum tu_stage new_stage = vk2tu_single_stage(1ull << bit, true);
|
||||||
stage = MIN2(stage, new_stage);
|
stage = MIN2(stage, new_stage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3130,6 +3179,14 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
||||||
cmd->state.has_subpass_predication = true;
|
cmd->state.has_subpass_predication = true;
|
||||||
if (secondary->state.disable_gmem)
|
if (secondary->state.disable_gmem)
|
||||||
cmd->state.disable_gmem = true;
|
cmd->state.disable_gmem = true;
|
||||||
|
|
||||||
|
cmd->state.draw_cs_writes_to_cond_pred |=
|
||||||
|
secondary->state.draw_cs_writes_to_cond_pred;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < cmd->state.pass->attachment_count; i++) {
|
||||||
|
cmd->state.attachment_cmd_clear[i] |=
|
||||||
|
secondary->state.attachment_cmd_clear[i];
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
assert(tu_cs_is_empty(&secondary->draw_cs));
|
assert(tu_cs_is_empty(&secondary->draw_cs));
|
||||||
assert(tu_cs_is_empty(&secondary->draw_epilogue_cs));
|
assert(tu_cs_is_empty(&secondary->draw_epilogue_cs));
|
||||||
|
@ -3307,6 +3364,18 @@ tu_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cmd->state.attachment_cmd_clear =
|
||||||
|
vk_zalloc(&cmd->pool->vk.alloc, pass->attachment_count *
|
||||||
|
sizeof(cmd->state.attachment_cmd_clear[0]), 8,
|
||||||
|
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||||
|
|
||||||
|
if (!cmd->state.attachment_cmd_clear) {
|
||||||
|
cmd->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd->state.draw_cs_writes_to_cond_pred = false;
|
||||||
|
|
||||||
for (unsigned i = 0; i < pass->attachment_count; i++) {
|
for (unsigned i = 0; i < pass->attachment_count; i++) {
|
||||||
cmd->state.attachments[i] = pAttachmentInfo ?
|
cmd->state.attachments[i] = pAttachmentInfo ?
|
||||||
tu_image_view_from_handle(pAttachmentInfo->pAttachments[i]) :
|
tu_image_view_from_handle(pAttachmentInfo->pAttachments[i]) :
|
||||||
|
@ -3400,7 +3469,7 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer,
|
||||||
|
|
||||||
uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i);
|
uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i);
|
||||||
|
|
||||||
tu_store_gmem_attachment(cmd, cs, a, gmem_a);
|
tu_store_gmem_attachment(cmd, cs, a, gmem_a, false);
|
||||||
|
|
||||||
if (pass->attachments[a].gmem_offset < 0)
|
if (pass->attachments[a].gmem_offset < 0)
|
||||||
continue;
|
continue;
|
||||||
|
@ -3410,7 +3479,7 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer,
|
||||||
* if it is, should be doing a GMEM->GMEM resolve instead of GMEM->MEM->GMEM..
|
* if it is, should be doing a GMEM->GMEM resolve instead of GMEM->MEM->GMEM..
|
||||||
*/
|
*/
|
||||||
tu_finishme("missing GMEM->GMEM resolve path\n");
|
tu_finishme("missing GMEM->GMEM resolve path\n");
|
||||||
tu_load_gmem_attachment(cmd, cs, a, true);
|
tu_load_gmem_attachment(cmd, cs, a, false, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4627,8 +4696,15 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
|
||||||
{
|
{
|
||||||
TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
|
TU_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, commandBuffer);
|
||||||
|
|
||||||
|
/* GMEM loads are created after draw_cs in the separate cs
|
||||||
|
* because they need to know whether to allow their conditional
|
||||||
|
* execution, which is tied to a state that is known only at
|
||||||
|
* the end of the renderpass.
|
||||||
|
*/
|
||||||
|
tu6_emit_tile_load(cmd_buffer, &cmd_buffer->tile_load_cs);
|
||||||
tu6_emit_tile_store(cmd_buffer, &cmd_buffer->tile_store_cs);
|
tu6_emit_tile_store(cmd_buffer, &cmd_buffer->tile_store_cs);
|
||||||
|
|
||||||
|
tu_cs_end(&cmd_buffer->tile_load_cs);
|
||||||
tu_cs_end(&cmd_buffer->draw_cs);
|
tu_cs_end(&cmd_buffer->draw_cs);
|
||||||
tu_cs_end(&cmd_buffer->tile_store_cs);
|
tu_cs_end(&cmd_buffer->tile_store_cs);
|
||||||
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
|
tu_cs_end(&cmd_buffer->draw_epilogue_cs);
|
||||||
|
@ -4649,6 +4725,8 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
|
||||||
|
|
||||||
/* discard draw_cs and draw_epilogue_cs entries now that the tiles are
|
/* discard draw_cs and draw_epilogue_cs entries now that the tiles are
|
||||||
rendered */
|
rendered */
|
||||||
|
tu_cs_discard_entries(&cmd_buffer->tile_load_cs);
|
||||||
|
tu_cs_begin(&cmd_buffer->tile_load_cs);
|
||||||
tu_cs_discard_entries(&cmd_buffer->draw_cs);
|
tu_cs_discard_entries(&cmd_buffer->draw_cs);
|
||||||
tu_cs_begin(&cmd_buffer->draw_cs);
|
tu_cs_begin(&cmd_buffer->draw_cs);
|
||||||
tu_cs_discard_entries(&cmd_buffer->tile_store_cs);
|
tu_cs_discard_entries(&cmd_buffer->tile_store_cs);
|
||||||
|
@ -4661,6 +4739,8 @@ tu_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
|
||||||
tu_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier, true);
|
tu_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier, true);
|
||||||
|
|
||||||
vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachments);
|
vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachments);
|
||||||
|
vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachment_cmd_clear);
|
||||||
|
cmd_buffer->state.attachment_cmd_clear = NULL;
|
||||||
|
|
||||||
cmd_buffer->state.pass = NULL;
|
cmd_buffer->state.pass = NULL;
|
||||||
cmd_buffer->state.subpass = NULL;
|
cmd_buffer->state.subpass = NULL;
|
||||||
|
|
|
@ -800,6 +800,12 @@ tu_CreateRenderPass2(VkDevice _device,
|
||||||
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
|
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
|
||||||
subpass->resolve_attachments[j].attachment =
|
subpass->resolve_attachments[j].attachment =
|
||||||
desc->pResolveAttachments[j].attachment;
|
desc->pResolveAttachments[j].attachment;
|
||||||
|
|
||||||
|
uint32_t src_a = desc->pColorAttachments[j].attachment;
|
||||||
|
if (src_a != VK_ATTACHMENT_UNUSED) {
|
||||||
|
pass->attachments[src_a].will_be_resolved =
|
||||||
|
desc->pResolveAttachments[j].attachment != VK_ATTACHMENT_UNUSED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -808,6 +814,11 @@ tu_CreateRenderPass2(VkDevice _device,
|
||||||
subpass->resolve_count++;
|
subpass->resolve_count++;
|
||||||
uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
|
uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
|
||||||
subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a;
|
subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a;
|
||||||
|
|
||||||
|
uint32_t src_a = desc->pDepthStencilAttachment->attachment;
|
||||||
|
if (src_a != VK_ATTACHMENT_UNUSED) {
|
||||||
|
pass->attachments[src_a].will_be_resolved = a != VK_ATTACHMENT_UNUSED;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t a = desc->pDepthStencilAttachment ?
|
uint32_t a = desc->pDepthStencilAttachment ?
|
||||||
|
|
|
@ -1196,6 +1196,10 @@ struct tu_cmd_state
|
||||||
VkRect2D render_area;
|
VkRect2D render_area;
|
||||||
|
|
||||||
const struct tu_image_view **attachments;
|
const struct tu_image_view **attachments;
|
||||||
|
/* Tracks whether attachment was cleared by vkCmdClearAttachments */
|
||||||
|
bool *attachment_cmd_clear;
|
||||||
|
/* Track whether conditional predicate for COND_REG_EXEC is changed in draw_cs */
|
||||||
|
bool draw_cs_writes_to_cond_pred;
|
||||||
|
|
||||||
bool xfb_used;
|
bool xfb_used;
|
||||||
bool has_tess;
|
bool has_tess;
|
||||||
|
@ -1290,6 +1294,7 @@ struct tu_cmd_buffer
|
||||||
VkResult record_result;
|
VkResult record_result;
|
||||||
|
|
||||||
struct tu_cs cs;
|
struct tu_cs cs;
|
||||||
|
struct tu_cs tile_load_cs;
|
||||||
struct tu_cs draw_cs;
|
struct tu_cs draw_cs;
|
||||||
struct tu_cs tile_store_cs;
|
struct tu_cs tile_store_cs;
|
||||||
struct tu_cs draw_epilogue_cs;
|
struct tu_cs draw_epilogue_cs;
|
||||||
|
@ -1576,6 +1581,7 @@ void
|
||||||
tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
|
tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||||
struct tu_cs *cs,
|
struct tu_cs *cs,
|
||||||
uint32_t a,
|
uint32_t a,
|
||||||
|
bool cond_exec_allowed,
|
||||||
bool force_load);
|
bool force_load);
|
||||||
|
|
||||||
/* expose this function to be able to emit load without checking LOAD_OP */
|
/* expose this function to be able to emit load without checking LOAD_OP */
|
||||||
|
@ -1587,7 +1593,8 @@ void
|
||||||
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||||
struct tu_cs *cs,
|
struct tu_cs *cs,
|
||||||
uint32_t a,
|
uint32_t a,
|
||||||
uint32_t gmem_a);
|
uint32_t gmem_a,
|
||||||
|
bool cond_exec_allowed);
|
||||||
|
|
||||||
enum pipe_format tu_vk_format_to_pipe_format(VkFormat vk_format);
|
enum pipe_format tu_vk_format_to_pipe_format(VkFormat vk_format);
|
||||||
|
|
||||||
|
@ -1857,6 +1864,7 @@ struct tu_render_pass_attachment
|
||||||
bool load;
|
bool load;
|
||||||
bool store;
|
bool store;
|
||||||
int32_t gmem_offset;
|
int32_t gmem_offset;
|
||||||
|
bool will_be_resolved;
|
||||||
/* for D32S8 separate stencil: */
|
/* for D32S8 separate stencil: */
|
||||||
bool load_stencil;
|
bool load_stencil;
|
||||||
bool store_stencil;
|
bool store_stencil;
|
||||||
|
|
|
@ -874,6 +874,10 @@ emit_begin_perf_query(struct tu_cmd_buffer *cmdbuf,
|
||||||
struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
|
struct tu_cs *cs = cmdbuf->state.pass ? &cmdbuf->draw_cs : &cmdbuf->cs;
|
||||||
uint32_t last_pass = ~0;
|
uint32_t last_pass = ~0;
|
||||||
|
|
||||||
|
if (cmdbuf->state.pass) {
|
||||||
|
cmdbuf->state.draw_cs_writes_to_cond_pred = true;
|
||||||
|
}
|
||||||
|
|
||||||
/* Querying perf counters happens in these steps:
|
/* Querying perf counters happens in these steps:
|
||||||
*
|
*
|
||||||
* 0) There's a scratch reg to set a pass index for perf counters query.
|
* 0) There's a scratch reg to set a pass index for perf counters query.
|
||||||
|
|
Loading…
Reference in New Issue