turnip: implement VK_KHR_depth_stencil_resolve support

Signed-off-by: Samuel Iglesias Gonsálvez <siglesias@igalia.com>
Reviewed-by: Jonathan Marek <jonathan@marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6884>
This commit is contained in:
Samuel Iglesias Gonsálvez 2020-09-23 12:09:25 +02:00 committed by Marge Bot
parent 13b2beb415
commit 1200f6da0b
8 changed files with 179 additions and 29 deletions

View File

@ -85,3 +85,18 @@ dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_depth_2d,Fail
dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_depth_2d_array,Fail
dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_depth_2d,Fail
dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_depth_2d_array,Fail
# sysmem clear/blit hangs on a630 for d24_s8 msaa images.
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d24_unorm_s8_uint.depth_zero_unused_resolve,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d32_sfloat_s8_uint.stencil_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d32_sfloat_s8_uint.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_17_1.samples_2.d32_sfloat.depth_zero_stencil_zero_testing_depth,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_17_1.samples_4.d32_sfloat.depth_zero_stencil_zero_testing_depth,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_32_32.samples_2.d32_sfloat.depth_zero_stencil_zero_testing_depth,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_32_32.samples_4.d32_sfloat.depth_zero_stencil_zero_testing_depth,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_49_13.samples_2.d32_sfloat.depth_zero_stencil_zero_testing_depth,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_49_13.samples_4.d32_sfloat.depth_zero_stencil_zero_testing_depth,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_5_1.samples_2.d32_sfloat.depth_zero_stencil_zero_testing_depth,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_5_1.samples_4.d32_sfloat.depth_zero_stencil_zero_testing_depth,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_8_32.samples_2.d32_sfloat.depth_zero_stencil_zero_testing_depth,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_8_32.samples_4.d32_sfloat.depth_zero_stencil_zero_testing_depth,Fail

View File

@ -120,3 +120,38 @@ KHR-GL30.transform_feedback.draw_xfb_feedbackk_test,Crash
KHR-GL30.transform_feedback.draw_xfb_instanced_test,Crash
KHR-GL30.transform_feedback.draw_xfb_stream_instanced_test,Crash
KHR-GL30.transform_feedback.draw_xfb_test,Crash
# clear/blit hangs on a630 for d24_s8 msaa images.
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d16_unorm.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.x8_d24_unorm_pack32.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.x8_d24_unorm_pack32.depth_zero_unused_resolve,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d32_sfloat.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.s8_uint.stencil_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d24_unorm_s8_uint.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d24_unorm_s8_uint.stencil_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d24_unorm_s8_uint.depth_zero_unused_resolve,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d24_unorm_s8_uint_separate_layouts.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d24_unorm_s8_uint_separate_layouts.stencil_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d24_unorm_s8_uint_separate_layouts.depth_zero_unused_resolve,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d32_sfloat_s8_uint.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d32_sfloat_s8_uint.stencil_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d32_sfloat_s8_uint_separate_layouts.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d32_sfloat_s8_uint_separate_layouts.stencil_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d16_unorm.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.x8_d24_unorm_pack32.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.x8_d24_unorm_pack32.depth_zero_unused_resolve,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d32_sfloat.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.s8_uint.stencil_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d24_unorm_s8_uint.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d24_unorm_s8_uint.stencil_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d24_unorm_s8_uint.depth_zero_unused_resolve,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d24_unorm_s8_uint_separate_layouts.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d24_unorm_s8_uint_separate_layouts.stencil_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d24_unorm_s8_uint_separate_layouts.depth_zero_unused_resolve,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d32_sfloat_s8_uint.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d32_sfloat_s8_uint.stencil_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d32_sfloat_s8_uint_separate_layouts.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d32_sfloat_s8_uint_separate_layouts.stencil_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_2.d24_unorm_s8_uint.depth_zero_unused_resolve,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_16_64_6.samples_4.d32_sfloat_s8_uint.depth_zero,Fail
dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_49_13.samples_4.d32_sfloat.depth_zero_stencil_zero_testing_depth,Fail

View File

@ -156,6 +156,21 @@ r2d_src(struct tu_cmd_buffer *cmd,
tu_cs_image_flag_ref(cs, iview, layer);
}
static void
r2d_src_stencil(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
const struct tu_image_view *iview,
uint32_t layer,
VkFilter filter)
{
tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5);
tu_cs_emit(cs, tu_image_view_stencil(iview, SP_PS_2D_SRC_INFO) & ~A6XX_SP_PS_2D_SRC_INFO_FLAGS);
tu_cs_emit(cs, iview->SP_PS_2D_SRC_SIZE);
tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
/* SP_PS_2D_SRC_PITCH has shifted pitch field */
tu_cs_emit(cs, iview->stencil_PITCH << 9);
}
static void
r2d_src_buffer(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
@ -1671,6 +1686,37 @@ tu_CmdResolveImage(VkCommandBuffer commandBuffer,
layer++) \
if (!layer_mask || (layer_mask & BIT(layer)))
static void
resolve_sysmem(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
VkFormat format,
struct tu_image_view *src,
struct tu_image_view *dst,
uint32_t layer_mask,
uint32_t layers,
const VkRect2D *rect,
bool separate_stencil)
{
const struct blit_ops *ops = &r2d_ops;
ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT,
ROTATE_0, false, dst->ubwc_enabled);
ops->coords(cs, &rect->offset, &rect->offset, &rect->extent);
for_each_layer(i, layer_mask, layers) {
if (separate_stencil) {
r2d_src_stencil(cmd, cs, src, i, VK_FILTER_NEAREST);
r2d_dst_stencil(cs, dst, i);
} else {
ops->src(cmd, cs, src, i, VK_FILTER_NEAREST);
ops->dst(cs, dst, i);
}
ops->run(cmd, cs);
}
ops->teardown(cmd, cs);
}
void
tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
struct tu_cs *cs,
@ -1680,21 +1726,17 @@ tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
uint32_t layers,
const VkRect2D *rect)
{
const struct blit_ops *ops = &r2d_ops;
assert(src->image->vk_format == dst->image->vk_format);
ops->setup(cmd, cs, dst->image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT,
ROTATE_0, false, dst->ubwc_enabled);
ops->coords(cs, &rect->offset, &rect->offset, &rect->extent);
for_each_layer(i, layer_mask, layers) {
ops->src(cmd, cs, src, i, VK_FILTER_NEAREST);
ops->dst(cs, dst, i);
ops->run(cmd, cs);
if (dst->image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
resolve_sysmem(cmd, cs, VK_FORMAT_D32_SFLOAT,
src, dst, layer_mask, layers, rect, false);
resolve_sysmem(cmd, cs, VK_FORMAT_S8_UINT,
src, dst, layer_mask, layers, rect, true);
} else {
resolve_sysmem(cmd, cs, dst->image->vk_format,
src, dst, layer_mask, layers, rect, false);
}
ops->teardown(cmd, cs);
}
static void
@ -2382,7 +2424,8 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
.tile_mode = TILE6_2,
.srgb = vk_format_is_srgb(format),
.samples = tu_msaa_samples(samples),
.samples_average = !vk_format_is_int(format),
.samples_average = !vk_format_is_int(format) &&
!vk_format_is_depth_or_stencil(format),
.unk20 = 1,
.unk22 = 1),
/* note: src size does not matter when not scaling */

View File

@ -632,10 +632,11 @@ tu6_emit_sysmem_resolves(struct tu_cmd_buffer *cmd,
* Commands":
*
* End-of-subpass multisample resolves are treated as color
* attachment writes for the purposes of synchronization. That is,
* they are considered to execute in the
* VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT pipeline stage and
* their writes are synchronized with
* attachment writes for the purposes of synchronization.
* This applies to resolve operations for both color and
* depth/stencil attachments. That is, they are considered to
* execute in the VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
* pipeline stage and their writes are synchronized with
* VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT. Synchronization between
* rendering within a subpass and any resolve operations at the end
* of the subpass occurs automatically, without need for explicit
@ -649,18 +650,22 @@ tu6_emit_sysmem_resolves(struct tu_cmd_buffer *cmd,
* last sentence and the fact that we're in sysmem mode.
*/
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
if (subpass->resolve_depth_stencil)
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS);
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
/* Wait for the flushes to land before using the 2D engine */
tu_cs_emit_wfi(cs);
for (unsigned i = 0; i < subpass->color_count; i++) {
for (unsigned i = 0; i < subpass->resolve_count; i++) {
uint32_t a = subpass->resolve_attachments[i].attachment;
if (a == VK_ATTACHMENT_UNUSED)
continue;
tu6_emit_sysmem_resolve(cmd, cs, subpass->multiview_mask, a,
subpass->color_attachments[i].attachment);
uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i);
tu6_emit_sysmem_resolve(cmd, cs, subpass->multiview_mask, a, gmem_a);
}
}
}
@ -692,11 +697,12 @@ tu6_emit_tile_store(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
}
if (subpass->resolve_attachments) {
for (unsigned i = 0; i < subpass->color_count; i++) {
for (unsigned i = 0; i < subpass->resolve_count; i++) {
uint32_t a = subpass->resolve_attachments[i].attachment;
if (a != VK_ATTACHMENT_UNUSED)
tu_store_gmem_attachment(cmd, cs, a,
subpass->color_attachments[i].attachment);
if (a != VK_ATTACHMENT_UNUSED) {
uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i);
tu_store_gmem_attachment(cmd, cs, a, gmem_a);
}
}
}
}
@ -2965,13 +2971,14 @@ tu_CmdNextSubpass2(VkCommandBuffer commandBuffer,
if (subpass->resolve_attachments) {
tu6_emit_blit_scissor(cmd, cs, true);
for (unsigned i = 0; i < subpass->color_count; i++) {
for (unsigned i = 0; i < subpass->resolve_count; i++) {
uint32_t a = subpass->resolve_attachments[i].attachment;
if (a == VK_ATTACHMENT_UNUSED)
continue;
tu_store_gmem_attachment(cmd, cs, a,
subpass->color_attachments[i].attachment);
uint32_t gmem_a = tu_subpass_get_attachment_to_resolve(subpass, i);
tu_store_gmem_attachment(cmd, cs, a, gmem_a);
if (pass->attachments[a].gmem_offset < 0)
continue;

View File

@ -858,6 +858,15 @@ tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: {
VkPhysicalDeviceDepthStencilResolveProperties *props =
(VkPhysicalDeviceDepthStencilResolveProperties *)ext;
props->independentResolve = false;
props->independentResolveNone = false;
props->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
props->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
break;
}
default:
break;
}

View File

@ -100,6 +100,7 @@ EXTENSIONS = [
Extension('VK_KHR_push_descriptor', 1, True),
Extension('VK_KHR_incremental_present', 1, 'TU_HAS_SURFACE'),
Extension('VK_KHR_image_format_list', 1, True),
Extension('VK_KHR_depth_stencil_resolve', 1, True),
]
MAX_API_VERSION = VkVersion(MAX_API_VERSION)

View File

@ -475,6 +475,17 @@ attachment_set_ops(struct tu_render_pass_attachment *att,
}
}
static bool
is_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve *depth_stencil_resolve)
{
if (depth_stencil_resolve &&
depth_stencil_resolve->pDepthStencilResolveAttachment &&
depth_stencil_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
return true;
}
return false;
}
VkResult
tu_CreateRenderPass2(VkDevice _device,
const VkRenderPassCreateInfo2KHR *pCreateInfo,
@ -526,10 +537,13 @@ tu_CreateRenderPass2(VkDevice _device,
struct tu_subpass_attachment *p;
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
subpass_attachment_count +=
desc->inputAttachmentCount + desc->colorAttachmentCount +
(desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
(desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
(is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0);
}
if (subpass_attachment_count) {
@ -547,10 +561,14 @@ tu_CreateRenderPass2(VkDevice _device,
p = pass->subpass_attachments;
for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);
struct tu_subpass *subpass = &pass->subpasses[i];
subpass->input_count = desc->inputAttachmentCount;
subpass->color_count = desc->colorAttachmentCount;
subpass->resolve_count = 0;
subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve);
subpass->samples = 0;
subpass->srgb_cntl = 0;
@ -588,15 +606,22 @@ tu_CreateRenderPass2(VkDevice _device,
}
}
subpass->resolve_attachments = desc->pResolveAttachments ? p : NULL;
subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL;
if (desc->pResolveAttachments) {
p += desc->colorAttachmentCount;
subpass->resolve_count += desc->colorAttachmentCount;
for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
subpass->resolve_attachments[j].attachment =
desc->pResolveAttachments[j].attachment;
}
}
if (subpass->resolve_depth_stencil) {
p++;
subpass->resolve_count++;
uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a;
}
uint32_t a = desc->pDepthStencilAttachment ?
desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
@ -668,3 +693,13 @@ tu_GetRenderAreaGranularity(VkDevice _device,
pGranularity->width = device->physical_device->info.gmem_align_w;
pGranularity->height = device->physical_device->info.gmem_align_h;
}
uint32_t
tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index)
{
if (subpass->resolve_depth_stencil &&
index == (subpass->resolve_count - 1))
return subpass->depth_stencil_attachment.attachment;
return subpass->color_attachments[index].attachment;
}

View File

@ -1448,6 +1448,8 @@ struct tu_subpass
{
uint32_t input_count;
uint32_t color_count;
uint32_t resolve_count;
bool resolve_depth_stencil;
struct tu_subpass_attachment *input_attachments;
struct tu_subpass_attachment *color_attachments;
struct tu_subpass_attachment *resolve_attachments;
@ -1502,6 +1504,9 @@ struct tu_query_pool
struct tu_bo bo;
};
uint32_t
tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index);
void
tu_update_descriptor_sets(VkDescriptorSet overrideSet,
uint32_t descriptorWriteCount,