tu: Implement VK_EXT_conditional_rendering
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6009>
This commit is contained in:
parent
f226b198f5
commit
ee2c58dde4
|
@ -327,6 +327,13 @@ r2d_setup(struct tu_cmd_buffer *cmd,
|
|||
r2d_setup_common(cmd, cs, vk_format, aspect_mask, rotation, clear, ubwc, false);
|
||||
}
|
||||
|
||||
static void
|
||||
r2d_teardown(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs)
|
||||
{
|
||||
/* nothing to do here */
|
||||
}
|
||||
|
||||
static void
|
||||
r2d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
|
@ -803,6 +810,11 @@ r3d_setup(struct tu_cmd_buffer *cmd,
|
|||
.component_enable = aspect_write_mask(vk_format, aspect_mask)));
|
||||
tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format)));
|
||||
tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format)));
|
||||
|
||||
if (cmd->state.predication_active) {
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1);
|
||||
tu_cs_emit(cs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -816,6 +828,15 @@ r3d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
|||
tu_cs_emit(cs, 2); /* vertex count */
|
||||
}
|
||||
|
||||
static void
|
||||
r3d_teardown(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
|
||||
{
|
||||
if (cmd->state.predication_active) {
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1);
|
||||
tu_cs_emit(cs, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* blit ops - common interface for 2d/shader paths */
|
||||
|
||||
struct blit_ops {
|
||||
|
@ -844,6 +865,8 @@ struct blit_ops {
|
|||
bool clear,
|
||||
bool ubwc);
|
||||
void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
|
||||
void (*teardown)(struct tu_cmd_buffer *cmd,
|
||||
struct tu_cs *cs);
|
||||
};
|
||||
|
||||
static const struct blit_ops r2d_ops = {
|
||||
|
@ -855,6 +878,7 @@ static const struct blit_ops r2d_ops = {
|
|||
.dst_buffer = r2d_dst_buffer,
|
||||
.setup = r2d_setup,
|
||||
.run = r2d_run,
|
||||
.teardown = r2d_teardown,
|
||||
};
|
||||
|
||||
static const struct blit_ops r3d_ops = {
|
||||
|
@ -866,6 +890,7 @@ static const struct blit_ops r3d_ops = {
|
|||
.dst_buffer = r3d_dst_buffer,
|
||||
.setup = r3d_setup,
|
||||
.run = r3d_run,
|
||||
.teardown = r3d_teardown,
|
||||
};
|
||||
|
||||
/* passthrough set coords from 3D extents */
|
||||
|
@ -1061,6 +1086,8 @@ tu6_blit_image(struct tu_cmd_buffer *cmd,
|
|||
ops->src(cmd, cs, &src, i, filter);
|
||||
ops->run(cmd, cs);
|
||||
}
|
||||
|
||||
ops->teardown(cmd, cs);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1170,6 +1197,8 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd,
|
|||
ops->run(cmd, cs);
|
||||
}
|
||||
}
|
||||
|
||||
ops->teardown(cmd, cs);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1243,6 +1272,8 @@ tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd,
|
|||
ops->run(cmd, cs);
|
||||
}
|
||||
}
|
||||
|
||||
ops->teardown(cmd, cs);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1464,6 +1495,8 @@ tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
|
|||
ops->run(cmd, cs);
|
||||
}
|
||||
}
|
||||
|
||||
ops->teardown(cmd, cs);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1514,6 +1547,8 @@ copy_buffer(struct tu_cmd_buffer *cmd,
|
|||
dst_va += width * block_size;
|
||||
blocks -= width;
|
||||
}
|
||||
|
||||
ops->teardown(cmd, cs);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1595,6 +1630,8 @@ tu_CmdFillBuffer(VkCommandBuffer commandBuffer,
|
|||
dst_va += width * 4;
|
||||
blocks -= width;
|
||||
}
|
||||
|
||||
ops->teardown(cmd, cs);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1637,6 +1674,8 @@ tu_CmdResolveImage(VkCommandBuffer commandBuffer,
|
|||
ops->run(cmd, cs);
|
||||
}
|
||||
}
|
||||
|
||||
ops->teardown(cmd, cs);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1663,6 +1702,8 @@ tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
|
|||
ops->dst(cs, dst, i);
|
||||
ops->run(cmd, cs);
|
||||
}
|
||||
|
||||
ops->teardown(cmd, cs);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -1714,6 +1755,8 @@ clear_image(struct tu_cmd_buffer *cmd,
|
|||
ops->run(cmd, cs);
|
||||
}
|
||||
}
|
||||
|
||||
ops->teardown(cmd, cs);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -2050,6 +2093,22 @@ tu_CmdClearAttachments(VkCommandBuffer commandBuffer,
|
|||
*/
|
||||
tu_emit_cache_flush_renderpass(cmd, cs);
|
||||
|
||||
/* vkCmdClearAttachments is supposed to respect the predicate if active.
|
||||
* The easiest way to do this is to always use the 3d path, which always
|
||||
* works even with GMEM because it's just a simple draw using the existing
|
||||
* attachment state. However it seems that IGNORE_VISIBILITY draws must be
|
||||
* skipped in the binning pass, since otherwise they produce binning data
|
||||
* which isn't consumed and leads to the wrong binning data being read, so
|
||||
* condition on GMEM | SYSMEM.
|
||||
*/
|
||||
if (cmd->state.predication_active) {
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM |
|
||||
CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
|
||||
tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
|
||||
tu_cond_exec_end(cs);
|
||||
return;
|
||||
}
|
||||
|
||||
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
|
||||
tu_clear_gmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
|
||||
tu_cond_exec_end(cs);
|
||||
|
@ -2089,6 +2148,8 @@ clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
|
|||
}
|
||||
ops->run(cmd, cs);
|
||||
}
|
||||
|
||||
ops->teardown(cmd, cs);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "adreno_common.xml.h"
|
||||
|
||||
#include "vk_format.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
#include "tu_cs.h"
|
||||
|
||||
|
@ -568,6 +569,29 @@ use_hw_binning(struct tu_cmd_buffer *cmd)
|
|||
if (cmd->state.xfb_used)
|
||||
return true;
|
||||
|
||||
/* Some devices have a newer a630_sqe.fw in which, only in CP_DRAW_INDX and
|
||||
* CP_DRAW_INDX_OFFSET, visibility-based skipping happens *before*
|
||||
* predication-based skipping. It seems this breaks predication, because
|
||||
* draws skipped by predication will not be executed in the binning phase,
|
||||
* and therefore won't have an entry in the draw stream, but the
|
||||
* visibility-based skipping will expect it to have an entry. The result is
|
||||
* a GPU hang when actually executing the first non-predicated draw.
|
||||
* However, it seems that things still work if the whole renderpass is
|
||||
* predicated. Affected tests are
|
||||
* dEQP-VK.conditional_rendering.draw_clear.draw.case_2 as well as a few
|
||||
* other case_N.
|
||||
*
|
||||
* Broken FW version: 016ee181
|
||||
* linux-firmware (working) FW version: 016ee176
|
||||
*
|
||||
* All known a650_sqe.fw versions don't have this bug.
|
||||
*
|
||||
* TODO: we should do version detection of the FW so that devices using the
|
||||
* linux-firmware version of a630_sqe.fw don't need this workaround.
|
||||
*/
|
||||
if (cmd->state.has_subpass_predication && cmd->device->physical_device->gpu_id != 650)
|
||||
return false;
|
||||
|
||||
if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_NOBIN))
|
||||
return false;
|
||||
|
||||
|
@ -583,6 +607,13 @@ use_sysmem_rendering(struct tu_cmd_buffer *cmd)
|
|||
if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_SYSMEM))
|
||||
return true;
|
||||
|
||||
/* If hw binning is required because of XFB but doesn't work because of the
|
||||
* conditional rendering bug, fallback to sysmem.
|
||||
*/
|
||||
if (cmd->state.xfb_used && cmd->state.has_subpass_predication &&
|
||||
cmd->device->physical_device->gpu_id != 650)
|
||||
return true;
|
||||
|
||||
/* can't fit attachments into gmem */
|
||||
if (!cmd->state.pass->gmem_pixels)
|
||||
return true;
|
||||
|
@ -1591,8 +1622,21 @@ tu_BeginCommandBuffer(VkCommandBuffer commandBuffer,
|
|||
break;
|
||||
}
|
||||
} else if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
|
||||
assert(pBeginInfo->pInheritanceInfo);
|
||||
|
||||
vk_foreach_struct(ext, pBeginInfo->pInheritanceInfo) {
|
||||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT: {
|
||||
const VkCommandBufferInheritanceConditionalRenderingInfoEXT *cond_rend = (void *) ext;
|
||||
cmd_buffer->state.predication_active = cond_rend->conditionalRenderingEnable;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
|
||||
assert(pBeginInfo->pInheritanceInfo);
|
||||
cmd_buffer->state.pass = tu_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
|
||||
cmd_buffer->state.subpass =
|
||||
&cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
|
||||
|
@ -2356,10 +2400,19 @@ vk2tu_access(VkAccessFlags flags, bool gmem)
|
|||
*
|
||||
* Transform feedback counters are read via CP_MEM_TO_REG, which implicitly
|
||||
* does CP_WAIT_FOR_ME, but we still need a WFI if the GPU writes it.
|
||||
*
|
||||
* Currently we read the draw predicate using CP_MEM_TO_MEM, which
|
||||
* also implicitly does CP_WAIT_FOR_ME. However CP_DRAW_PRED_SET does *not*
|
||||
* implicitly do CP_WAIT_FOR_ME, it seems to only wait for counters to
|
||||
* complete since it's written for DX11 where you can only predicate on the
|
||||
* result of a query object. So if we implement 64-bit comparisons in the
|
||||
* future, or if CP_DRAW_PRED_SET grows the capability to do 32-bit
|
||||
* comparisons, then this will have to be dealt with.
|
||||
*/
|
||||
if (flags &
|
||||
(VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
|
||||
VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_READ_BIT_EXT |
|
||||
VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT |
|
||||
VK_ACCESS_MEMORY_READ_BIT)) {
|
||||
mask |= TU_ACCESS_WFI_READ;
|
||||
}
|
||||
|
@ -2531,6 +2584,8 @@ tu_CmdExecuteCommands(VkCommandBuffer commandBuffer,
|
|||
|
||||
if (secondary->state.has_tess)
|
||||
cmd->state.has_tess = true;
|
||||
if (secondary->state.has_subpass_predication)
|
||||
cmd->state.has_subpass_predication = true;
|
||||
} else {
|
||||
assert(tu_cs_is_empty(&secondary->draw_cs));
|
||||
assert(tu_cs_is_empty(&secondary->draw_epilogue_cs));
|
||||
|
@ -3671,6 +3726,7 @@ tu_CmdEndRenderPass(VkCommandBuffer commandBuffer)
|
|||
cmd_buffer->state.subpass = NULL;
|
||||
cmd_buffer->state.framebuffer = NULL;
|
||||
cmd_buffer->state.has_tess = false;
|
||||
cmd_buffer->state.has_subpass_predication = false;
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -3870,3 +3926,64 @@ tu_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask)
|
|||
{
|
||||
/* No-op */
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
tu_CmdBeginConditionalRenderingEXT(VkCommandBuffer commandBuffer,
|
||||
const VkConditionalRenderingBeginInfoEXT *pConditionalRenderingBegin)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||
|
||||
cmd->state.predication_active = true;
|
||||
if (cmd->state.pass)
|
||||
cmd->state.has_subpass_predication = true;
|
||||
|
||||
struct tu_cs *cs = cmd->state.pass ? &cmd->draw_cs : &cmd->cs;
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_GLOBAL, 1);
|
||||
tu_cs_emit(cs, 1);
|
||||
|
||||
/* Wait for any writes to the predicate to land */
|
||||
if (cmd->state.pass)
|
||||
tu_emit_cache_flush_renderpass(cmd, cs);
|
||||
else
|
||||
tu_emit_cache_flush(cmd, cs);
|
||||
|
||||
TU_FROM_HANDLE(tu_buffer, buf, pConditionalRenderingBegin->buffer);
|
||||
uint64_t iova = tu_buffer_iova(buf) + pConditionalRenderingBegin->offset;
|
||||
|
||||
/* qcom doesn't support 32-bit reference values, only 64-bit, but Vulkan
|
||||
* mandates 32-bit comparisons. Our workaround is to copy the the reference
|
||||
* value to the low 32-bits of a location where the high 32 bits are known
|
||||
* to be 0 and then compare that.
|
||||
*/
|
||||
tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 5);
|
||||
tu_cs_emit(cs, 0);
|
||||
tu_cs_emit_qw(cs, global_iova(cmd, predicate));
|
||||
tu_cs_emit_qw(cs, iova);
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
|
||||
tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
|
||||
|
||||
bool inv = pConditionalRenderingBegin->flags & VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_PRED_SET, 3);
|
||||
tu_cs_emit(cs, CP_DRAW_PRED_SET_0_SRC(PRED_SRC_MEM) |
|
||||
CP_DRAW_PRED_SET_0_TEST(inv ? EQ_0_PASS : NE_0_PASS));
|
||||
tu_cs_emit_qw(cs, global_iova(cmd, predicate));
|
||||
|
||||
tu_bo_list_add(&cmd->bo_list, buf->bo, MSM_SUBMIT_BO_READ);
|
||||
}
|
||||
|
||||
void
|
||||
tu_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
|
||||
|
||||
cmd->state.predication_active = false;
|
||||
|
||||
struct tu_cs *cs = cmd->state.pass ? &cmd->draw_cs : &cmd->cs;
|
||||
|
||||
tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_GLOBAL, 1);
|
||||
tu_cs_emit(cs, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -793,8 +793,8 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
|
||||
VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
|
||||
(VkPhysicalDeviceConditionalRenderingFeaturesEXT *) ext;
|
||||
features->conditionalRendering = false;
|
||||
features->inheritedConditionalRendering = false;
|
||||
features->conditionalRendering = true;
|
||||
features->inheritedConditionalRendering = true;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
|
||||
|
@ -1354,8 +1354,10 @@ tu_CreateDevice(VkPhysicalDevice physicalDevice,
|
|||
if (result != VK_SUCCESS)
|
||||
goto fail_global_bo_map;
|
||||
|
||||
memcpy(device->global_bo.map + gb_offset(border_color), border_color, sizeof(border_color));
|
||||
tu_init_clear_blit_shaders(device->global_bo.map);
|
||||
struct tu6_global *global = device->global_bo.map;
|
||||
memcpy(global->border_color, border_color, sizeof(border_color));
|
||||
global->predicate = 0;
|
||||
tu_init_clear_blit_shaders(global);
|
||||
|
||||
VkPipelineCacheCreateInfo ci;
|
||||
ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
|
||||
|
|
|
@ -90,6 +90,7 @@ EXTENSIONS = [
|
|||
Extension('VK_EXT_depth_clip_enable', 1, True),
|
||||
Extension('VK_KHR_draw_indirect_count', 1, True),
|
||||
Extension('VK_EXT_4444_formats', 1, True),
|
||||
Extension('VK_EXT_conditional_rendering', 1, True),
|
||||
]
|
||||
|
||||
MAX_API_VERSION = VkVersion(MAX_API_VERSION)
|
||||
|
|
|
@ -368,7 +368,8 @@ struct tu6_global
|
|||
volatile uint32_t vsc_draw_overflow;
|
||||
uint32_t _pad1;
|
||||
volatile uint32_t vsc_prim_overflow;
|
||||
uint32_t _pad2[3];
|
||||
uint32_t _pad2;
|
||||
uint64_t predicate;
|
||||
|
||||
/* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
|
||||
struct {
|
||||
|
@ -923,6 +924,8 @@ struct tu_cmd_state
|
|||
|
||||
bool xfb_used;
|
||||
bool has_tess;
|
||||
bool has_subpass_predication;
|
||||
bool predication_active;
|
||||
};
|
||||
|
||||
struct tu_cmd_pool
|
||||
|
|
Loading…
Reference in New Issue