From f3ea0cf8289ebdbb45d7122095919fb6752eb433 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sun, 16 Jun 2019 21:21:16 -0500 Subject: [PATCH] anv: Add stencil texturing support for gen7 Intel hardware didn't get support for sampling from W-tiled (required for stencil) images until Broadwell so we can't directly sample from stencil. Instead, if we want to support stencil texturing on gen7 hardware, we have to keep a texture-capable shadow copy around and use BLORP to update when stencil changes. The one thing this commit does not implement is self-dependencies with stencil input attachments. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99493 Reviewed-by: Lionel Landwerlin --- src/intel/vulkan/anv_formats.c | 4 +- src/intel/vulkan/anv_image.c | 23 +++++++-- src/intel/vulkan/genX_cmd_buffer.c | 76 ++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 7 deletions(-) diff --git a/src/intel/vulkan/anv_formats.c b/src/intel/vulkan/anv_formats.c index 70543e0cfdf..d46da754ba3 100644 --- a/src/intel/vulkan/anv_formats.c +++ b/src/intel/vulkan/anv_formats.c @@ -514,14 +514,12 @@ anv_get_image_format_features(const struct gen_device_info *devinfo, return 0; flags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT | VK_FORMAT_FEATURE_TRANSFER_SRC_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT; - if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT || devinfo->gen >= 8) - flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT; - if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && devinfo->gen >= 9) flags |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT; diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c index 038a01f8a39..f405aa8067f 100644 --- a/src/intel/vulkan/anv_image.c +++ b/src/intel/vulkan/anv_image.c @@ -336,6 +336,12 @@ make_surface(const struct anv_device *dev, needs_shadow = true; } + if (dev->info.gen <= 7 && + aspect == VK_IMAGE_ASPECT_STENCIL_BIT && + (image->usage & VK_IMAGE_USAGE_SAMPLED_BIT)) { + needs_shadow = true; + } + ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl, .dim = vk_to_isl_surf_dim[image->type], .format = plane_format.isl_format, @@ -359,12 +365,11 @@ make_surface(const struct anv_device *dev, /* If an image is created as BLOCK_TEXEL_VIEW_COMPATIBLE, then we need to * create an identical tiled shadow surface for use while texturing so we - * don't get garbage performance. + * don't get garbage performance. If we're on gen7 and the image contains + * stencil, then we need to maintain a shadow because we can't texture from + * W-tiled images. */ if (needs_shadow) { - assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT); - assert(tiling_flags == ISL_TILING_LINEAR_BIT); - ok = isl_surf_init(&dev->isl_dev, &image->planes[plane].shadow_surface.isl, .dim = vk_to_isl_surf_dim[image->type], .format = plane_format.isl_format, @@ -1275,6 +1280,16 @@ anv_image_fill_surface_state(struct anv_device *device, surface = &image->planes[plane].shadow_surface; } + /* For texturing from stencil on gen7, we have to sample from a shadow + * surface because we don't support W-tiling in the sampler. + */ + if (image->planes[plane].shadow_surface.isl.size_B > 0 && + aspect == VK_IMAGE_ASPECT_STENCIL_BIT) { + assert(device->info.gen == 7); + assert(view_usage & ISL_SURF_USAGE_TEXTURE_BIT); + surface = &image->planes[plane].shadow_surface; + } + if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT) view.swizzle = anv_swizzle_for_render(view.swizzle); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 2ee06618d07..a580fda6c2c 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -474,6 +474,13 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer, 0, 0, 1, hiz_op); } +static inline bool +vk_image_layout_stencil_write_optimal(VkImageLayout layout) +{ + return layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL || + layout == VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL; +} + /* Transitions a HiZ-enabled depth buffer from one layout to another. Unless * the initial layout is undefined, the HiZ buffer and depth buffer will * represent the same data at the end of this operation. @@ -486,6 +493,34 @@ transition_stencil_buffer(struct anv_cmd_buffer *cmd_buffer, VkImageLayout initial_layout, VkImageLayout final_layout) { +#if GEN_GEN == 7 + uint32_t plane = anv_image_aspect_to_plane(image->aspects, + VK_IMAGE_ASPECT_STENCIL_BIT); + + /* On gen7, we have to store a texturable version of the stencil buffer in + * a shadow whenever VK_IMAGE_USAGE_SAMPLED_BIT is set and copy back and + * forth at strategic points. Stencil writes are only allowed in three + * layouts: + * + * - VK_IMAGE_LAYOUT_GENERAL + * - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL + * - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL + * - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL + * + * For general, we have no nice opportunity to transition so we do the copy + * to the shadow unconditionally at the end of the subpass. For transfer + * destinations, we can update it as part of the transfer op. For the + * other two, we delay the copy until a transition into some other layout. + */ + if (image->planes[plane].shadow_surface.isl.size_B > 0 && + vk_image_layout_stencil_write_optimal(initial_layout) && + !vk_image_layout_stencil_write_optimal(final_layout)) { + anv_image_copy_to_shadow(cmd_buffer, image, + VK_IMAGE_ASPECT_STENCIL_BIT, + base_level, level_count, + base_layer, layer_count); + } +#endif /* GEN_GEN == 7 */ } #define MI_PREDICATE_SRC0 0x2400 @@ -4503,6 +4538,47 @@ cmd_buffer_end_subpass(struct anv_cmd_buffer *cmd_buffer) } } +#if GEN_GEN == 7 + /* On gen7, we have to store a texturable version of the stencil buffer in + * a shadow whenever VK_IMAGE_USAGE_SAMPLED_BIT is set and copy back and + * forth at strategic points. Stencil writes are only allowed in three + * layouts: + * + * - VK_IMAGE_LAYOUT_GENERAL + * - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL + * - VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL + * - VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL + * + * For general, we have no nice opportunity to transition so we do the copy + * to the shadow unconditionally at the end of the subpass. For transfer + * destinations, we can update it as part of the transfer op. For the + * other two, we delay the copy until a transition into some other layout. + */ + if (subpass->depth_stencil_attachment) { + uint32_t a = subpass->depth_stencil_attachment->attachment; + assert(a != VK_ATTACHMENT_UNUSED); + + struct anv_attachment_state *att_state = &cmd_state->attachments[a]; + struct anv_image_view *iview = fb->attachments[a]; + const struct anv_image *image = iview->image; + + if (image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { + uint32_t plane = anv_image_aspect_to_plane(image->aspects, + VK_IMAGE_ASPECT_STENCIL_BIT); + + if (image->planes[plane].shadow_surface.isl.size_B > 0 && + att_state->current_layout == VK_IMAGE_LAYOUT_GENERAL) { + assert(image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT); + anv_image_copy_to_shadow(cmd_buffer, image, + VK_IMAGE_ASPECT_STENCIL_BIT, + iview->planes[plane].isl.base_level, 1, + iview->planes[plane].isl.base_array_layer, + fb->layers); + } + } + } +#endif /* GEN_GEN == 7 */ + for (uint32_t i = 0; i < subpass->attachment_count; ++i) { const uint32_t a = subpass->attachments[i].attachment; if (a == VK_ATTACHMENT_UNUSED)