From 104603fa763c52e98a79785dd514beab949546db Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Fri, 9 Apr 2021 10:23:48 -0400 Subject: [PATCH] zink: create separate linear tiling image for scanout rendering onto a linear-tiled image is unbelievably slow if any sort of blending is enabled, so instead always render to optimal tiling and then copy to linear for scanout this doubles performance for now and can be deleted in its entirety along with the rest of the related hacks once real wsi support is implemented Reviewed-by: Erik Faye-Lund Part-of: --- src/gallium/drivers/zink/zink_batch.c | 2 +- src/gallium/drivers/zink/zink_context.c | 143 +++++++++++++++++++++-- src/gallium/drivers/zink/zink_context.h | 1 + src/gallium/drivers/zink/zink_resource.c | 34 ++++-- src/gallium/drivers/zink/zink_resource.h | 1 + 5 files changed, 163 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/zink/zink_batch.c b/src/gallium/drivers/zink/zink_batch.c index e0a8decda94..8ae115933e7 100644 --- a/src/gallium/drivers/zink/zink_batch.c +++ b/src/gallium/drivers/zink/zink_batch.c @@ -338,7 +338,7 @@ submit_queue(void *data, int thread_index) }; if (bs->flush_res) { - mem_signal.memory = bs->flush_res->obj->mem; + mem_signal.memory = bs->flush_res->scanout_obj ? bs->flush_res->scanout_obj->mem : bs->flush_res->obj->mem; si.pNext = &mem_signal; } diff --git a/src/gallium/drivers/zink/zink_context.c b/src/gallium/drivers/zink/zink_context.c index 4260d028007..36e93f43831 100644 --- a/src/gallium/drivers/zink/zink_context.c +++ b/src/gallium/drivers/zink/zink_context.c @@ -1687,6 +1687,125 @@ equals_gfx_program(const void *a, const void *b) return memcmp(a, b, sizeof(struct zink_shader *) * (ZINK_SHADER_COUNT)) == 0; } +/* TODO: remove for wsi */ +static void +copy_scanout(struct zink_context *ctx, struct zink_resource *res) +{ + VkImageCopy region = {}; + struct pipe_box box = {0, 0, 0, + u_minify(res->base.b.width0, 0), + u_minify(res->base.b.height0, 0), res->base.b.array_size}; + box.depth = util_num_layers(&res->base.b, 0); + struct pipe_box *src_box = &box; + unsigned dstz = 0; + + region.srcSubresource.aspectMask = res->aspect; + region.srcSubresource.mipLevel = 0; + switch (res->base.b.target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_1D_ARRAY: + /* these use layer */ + region.srcSubresource.baseArrayLayer = src_box->z; + region.srcSubresource.layerCount = src_box->depth; + region.srcOffset.z = 0; + region.extent.depth = 1; + break; + case PIPE_TEXTURE_3D: + /* this uses depth */ + region.srcSubresource.baseArrayLayer = 0; + region.srcSubresource.layerCount = 1; + region.srcOffset.z = src_box->z; + region.extent.depth = src_box->depth; + break; + default: + /* these must only copy one layer */ + region.srcSubresource.baseArrayLayer = 0; + region.srcSubresource.layerCount = 1; + region.srcOffset.z = 0; + region.extent.depth = 1; + } + + region.srcOffset.x = src_box->x; + region.srcOffset.y = src_box->y; + + region.dstSubresource.aspectMask = res->aspect; + region.dstSubresource.mipLevel = 0; + switch (res->base.b.target) { + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_1D_ARRAY: + /* these use layer */ + region.dstSubresource.baseArrayLayer = dstz; + region.dstSubresource.layerCount = src_box->depth; + region.dstOffset.z = 0; + break; + case PIPE_TEXTURE_3D: + /* this uses depth */ + region.dstSubresource.baseArrayLayer = 0; + region.dstSubresource.layerCount = 1; + region.dstOffset.z = dstz; + break; + default: + /* these must only copy one layer */ + region.dstSubresource.baseArrayLayer = 0; + region.dstSubresource.layerCount = 1; + region.dstOffset.z = 0; + } + + region.dstOffset.x = 0; + region.dstOffset.y = 0; + region.extent.width = src_box->width; + region.extent.height = src_box->height; + zink_resource_image_barrier(ctx, NULL, res, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT); + + VkImageSubresourceRange isr = { + res->aspect, + 0, VK_REMAINING_MIP_LEVELS, + 0, VK_REMAINING_ARRAY_LAYERS + }; + VkImageMemoryBarrier imb = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + NULL, + 0, + VK_ACCESS_TRANSFER_WRITE_BIT, + VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, + res->scanout_obj->image, + isr + }; + vkCmdPipelineBarrier( + ctx->batch.state->cmdbuf, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, + 0, NULL, + 0, NULL, + 1, &imb + ); + + vkCmdCopyImage(ctx->batch.state->cmdbuf, res->obj->image, res->layout, + res->scanout_obj->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, ®ion); + imb.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; + imb.dstAccessMask = 0; + imb.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR; + vkCmdPipelineBarrier( + ctx->batch.state->cmdbuf, + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + 0, + 0, NULL, + 0, NULL, + 1, &imb + ); +} + static void zink_flush(struct pipe_context *pctx, struct pipe_fence_handle **pfence, @@ -1704,14 +1823,14 @@ zink_flush(struct pipe_context *pctx, zink_begin_render_pass(ctx, batch); } - if (flags & PIPE_FLUSH_END_OF_FRAME && ctx->fb_state.nr_cbufs) { + if (flags & PIPE_FLUSH_END_OF_FRAME) { zink_end_render_pass(ctx, batch); - for (int i = 0; i < ctx->fb_state.nr_cbufs; i++) - zink_resource_image_barrier(ctx, batch, - ctx->fb_state.cbufs[i] ? zink_resource(ctx->fb_state.cbufs[i]->texture) : NULL, - VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 0, 0); - if (zink_screen(pctx->screen)->needs_mesa_flush_wsi && ctx->fb_state.cbufs[0]) - batch->state->flush_res = zink_resource(ctx->fb_state.cbufs[0]->texture); + if (ctx->flush_res) { + copy_scanout(ctx, ctx->flush_res); + if (zink_screen(pctx->screen)->needs_mesa_flush_wsi) + batch->state->flush_res = ctx->flush_res; + ctx->flush_res = NULL; + } } if (!batch->has_work) { @@ -2047,9 +2166,15 @@ zink_memory_barrier(struct pipe_context *pctx, unsigned flags) } static void -zink_flush_resource(struct pipe_context *pipe, - struct pipe_resource *resource) +zink_flush_resource(struct pipe_context *pctx, + struct pipe_resource *pres) { + struct zink_context *ctx = zink_context(pctx); + /* TODO: this is not futureproof and should be updated once proper + * WSI support is added + */ + if (pres->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT)) + ctx->flush_res = zink_resource(pres); } void diff --git a/src/gallium/drivers/zink/zink_context.h b/src/gallium/drivers/zink/zink_context.h index e4de6448e32..b79467c0b30 100644 --- a/src/gallium/drivers/zink/zink_context.h +++ b/src/gallium/drivers/zink/zink_context.h @@ -181,6 +181,7 @@ struct zink_context { struct primconvert_context *primconvert; + struct zink_resource *flush_res; struct zink_framebuffer *framebuffer; struct zink_framebuffer_clear fb_clears[PIPE_MAX_COLOR_BUFS + 1]; uint16_t clears_enabled; diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c index f1c5f6f64d9..fc3a29ac5e8 100644 --- a/src/gallium/drivers/zink/zink_resource.c +++ b/src/gallium/drivers/zink/zink_resource.c @@ -162,6 +162,7 @@ zink_resource_destroy(struct pipe_screen *pscreen, util_range_destroy(&res->valid_buffer_range); zink_resource_object_reference(screen, &res->obj, NULL); + zink_resource_object_reference(screen, &res->scanout_obj, NULL); threaded_resource_deinit(pres); FREE(res); } @@ -248,7 +249,7 @@ get_image_usage(struct zink_screen *screen, VkImageTiling tiling, const struct p usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT; if (feats & VK_FORMAT_FEATURE_TRANSFER_DST_BIT) usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT; - if (feats & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) + if (feats & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT && !((bind & (PIPE_BIND_LINEAR | PIPE_BIND_SCANOUT)) == (PIPE_BIND_LINEAR | PIPE_BIND_SCANOUT))) usage |= VK_IMAGE_USAGE_SAMPLED_BIT; if ((templ->nr_samples <= 1 || screen->info.feats.features.shaderStorageImageMultisample) && @@ -359,6 +360,8 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t VkMemoryRequirements reqs = {}; VkMemoryPropertyFlags flags; + bool scanout = templ->bind & PIPE_BIND_SCANOUT; + bool shared = templ->bind & PIPE_BIND_SHARED; pipe_reference_init(&obj->reference, 1); util_dynarray_init(&obj->desc_set_refs.refs, NULL); @@ -382,8 +385,13 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t emici.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; ici.pNext = &emici; - /* TODO: deal with DRM modifiers here */ - ici.tiling = VK_IMAGE_TILING_LINEAR; + if (ici.tiling == VK_IMAGE_TILING_OPTIMAL) { + // TODO: remove for wsi + ici.pNext = NULL; + scanout = false; + shared = false; + } + } if (optimal_tiling) @@ -417,7 +425,7 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t .scanout = true, }; - if (screen->needs_mesa_wsi && (templ->bind & PIPE_BIND_SCANOUT)) { + if (screen->needs_mesa_wsi && scanout) { image_wsi_info.pNext = ici.pNext; ici.pNext = &image_wsi_info; } @@ -460,7 +468,7 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t } VkExportMemoryAllocateInfo emai = {}; - if (templ->bind & PIPE_BIND_SHARED) { + if (templ->bind & PIPE_BIND_SHARED && shared) { emai.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; emai.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; @@ -487,7 +495,7 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t NULL, }; - if (screen->needs_mesa_wsi && (templ->bind & PIPE_BIND_SCANOUT)) { + if (screen->needs_mesa_wsi && scanout) { memory_wsi_info.implicit_sync = true; memory_wsi_info.pNext = mai.pNext; @@ -574,6 +582,13 @@ resource_create(struct pipe_screen *pscreen, res->layout = VK_IMAGE_LAYOUT_UNDEFINED; res->optimal_tiling = optimal_tiling; res->aspect = aspect_from_format(templ->format); + if (res->base.b.bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED) && optimal_tiling) { + // TODO: remove for wsi + struct pipe_resource templ2 = res->base.b; + templ2.bind = (res->base.b.bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) | PIPE_BIND_LINEAR; + res->scanout_obj = resource_object_create(screen, &templ2, whandle, &optimal_tiling); + assert(!optimal_tiling); + } } if (screen->winsys && (templ->bind & PIPE_BIND_DISPLAY_TARGET)) { @@ -606,6 +621,8 @@ zink_resource_get_handle(struct pipe_screen *pscreen, { struct zink_resource *res = zink_resource(tex); struct zink_screen *screen = zink_screen(pscreen); + //TODO: remove for wsi + struct zink_resource_object *obj = res->scanout_obj ? res->scanout_obj : res->obj; if (res->base.b.target != PIPE_BUFFER) { VkImageSubresource sub_res = {}; @@ -613,7 +630,7 @@ zink_resource_get_handle(struct pipe_screen *pscreen, sub_res.aspectMask = res->aspect; - vkGetImageSubresourceLayout(screen->dev, res->obj->image, &sub_res, &sub_res_layout); + vkGetImageSubresourceLayout(screen->dev, obj->image, &sub_res, &sub_res_layout); whandle->stride = sub_res_layout.rowPitch; } @@ -623,7 +640,8 @@ zink_resource_get_handle(struct pipe_screen *pscreen, VkMemoryGetFdInfoKHR fd_info = {}; int fd; fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; - fd_info.memory = res->obj->mem; + //TODO: remove for wsi + fd_info.memory = obj->mem; fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; VkResult result = (*screen->vk_GetMemoryFdKHR)(screen->dev, &fd_info, &fd); if (result != VK_SUCCESS) diff --git a/src/gallium/drivers/zink/zink_resource.h b/src/gallium/drivers/zink/zink_resource.h index 64b9f84ab06..1bf959916ef 100644 --- a/src/gallium/drivers/zink/zink_resource.h +++ b/src/gallium/drivers/zink/zink_resource.h @@ -88,6 +88,7 @@ struct zink_resource { VkAccessFlags access; struct zink_resource_object *obj; + struct zink_resource_object *scanout_obj; //TODO: remove for wsi union { struct util_range valid_buffer_range; struct {