zink: create separate linear tiling image for scanout

rendering onto a linear-tiled image is unbelievably slow if any sort of
blending is enabled, so instead always render to optimal tiling and then
copy to linear for scanout

this doubles performance for now and can be deleted in its entirety along
with the rest of the related hacks once real wsi support is implemented

Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10180>
This commit is contained in:
Mike Blumenkrantz 2021-04-09 10:23:48 -04:00 committed by Marge Bot
parent e7f4f1b582
commit 104603fa76
5 changed files with 163 additions and 18 deletions

View File

@ -338,7 +338,7 @@ submit_queue(void *data, int thread_index)
};
if (bs->flush_res) {
mem_signal.memory = bs->flush_res->obj->mem;
mem_signal.memory = bs->flush_res->scanout_obj ? bs->flush_res->scanout_obj->mem : bs->flush_res->obj->mem;
si.pNext = &mem_signal;
}

View File

@ -1687,6 +1687,125 @@ equals_gfx_program(const void *a, const void *b)
return memcmp(a, b, sizeof(struct zink_shader *) * (ZINK_SHADER_COUNT)) == 0;
}
/* TODO: remove for wsi */
static void
copy_scanout(struct zink_context *ctx, struct zink_resource *res)
{
VkImageCopy region = {};
struct pipe_box box = {0, 0, 0,
u_minify(res->base.b.width0, 0),
u_minify(res->base.b.height0, 0), res->base.b.array_size};
box.depth = util_num_layers(&res->base.b, 0);
struct pipe_box *src_box = &box;
unsigned dstz = 0;
region.srcSubresource.aspectMask = res->aspect;
region.srcSubresource.mipLevel = 0;
switch (res->base.b.target) {
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_1D_ARRAY:
/* these use layer */
region.srcSubresource.baseArrayLayer = src_box->z;
region.srcSubresource.layerCount = src_box->depth;
region.srcOffset.z = 0;
region.extent.depth = 1;
break;
case PIPE_TEXTURE_3D:
/* this uses depth */
region.srcSubresource.baseArrayLayer = 0;
region.srcSubresource.layerCount = 1;
region.srcOffset.z = src_box->z;
region.extent.depth = src_box->depth;
break;
default:
/* these must only copy one layer */
region.srcSubresource.baseArrayLayer = 0;
region.srcSubresource.layerCount = 1;
region.srcOffset.z = 0;
region.extent.depth = 1;
}
region.srcOffset.x = src_box->x;
region.srcOffset.y = src_box->y;
region.dstSubresource.aspectMask = res->aspect;
region.dstSubresource.mipLevel = 0;
switch (res->base.b.target) {
case PIPE_TEXTURE_CUBE:
case PIPE_TEXTURE_CUBE_ARRAY:
case PIPE_TEXTURE_2D_ARRAY:
case PIPE_TEXTURE_1D_ARRAY:
/* these use layer */
region.dstSubresource.baseArrayLayer = dstz;
region.dstSubresource.layerCount = src_box->depth;
region.dstOffset.z = 0;
break;
case PIPE_TEXTURE_3D:
/* this uses depth */
region.dstSubresource.baseArrayLayer = 0;
region.dstSubresource.layerCount = 1;
region.dstOffset.z = dstz;
break;
default:
/* these must only copy one layer */
region.dstSubresource.baseArrayLayer = 0;
region.dstSubresource.layerCount = 1;
region.dstOffset.z = 0;
}
region.dstOffset.x = 0;
region.dstOffset.y = 0;
region.extent.width = src_box->width;
region.extent.height = src_box->height;
zink_resource_image_barrier(ctx, NULL, res, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_ACCESS_TRANSFER_READ_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
VkImageSubresourceRange isr = {
res->aspect,
0, VK_REMAINING_MIP_LEVELS,
0, VK_REMAINING_ARRAY_LAYERS
};
VkImageMemoryBarrier imb = {
VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
NULL,
0,
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
res->scanout_obj->image,
isr
};
vkCmdPipelineBarrier(
ctx->batch.state->cmdbuf,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
0, NULL,
0, NULL,
1, &imb
);
vkCmdCopyImage(ctx->batch.state->cmdbuf, res->obj->image, res->layout,
res->scanout_obj->image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1, &region);
imb.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
imb.dstAccessMask = 0;
imb.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
vkCmdPipelineBarrier(
ctx->batch.state->cmdbuf,
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
0,
0, NULL,
0, NULL,
1, &imb
);
}
static void
zink_flush(struct pipe_context *pctx,
struct pipe_fence_handle **pfence,
@ -1704,14 +1823,14 @@ zink_flush(struct pipe_context *pctx,
zink_begin_render_pass(ctx, batch);
}
if (flags & PIPE_FLUSH_END_OF_FRAME && ctx->fb_state.nr_cbufs) {
if (flags & PIPE_FLUSH_END_OF_FRAME) {
zink_end_render_pass(ctx, batch);
for (int i = 0; i < ctx->fb_state.nr_cbufs; i++)
zink_resource_image_barrier(ctx, batch,
ctx->fb_state.cbufs[i] ? zink_resource(ctx->fb_state.cbufs[i]->texture) : NULL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, 0, 0);
if (zink_screen(pctx->screen)->needs_mesa_flush_wsi && ctx->fb_state.cbufs[0])
batch->state->flush_res = zink_resource(ctx->fb_state.cbufs[0]->texture);
if (ctx->flush_res) {
copy_scanout(ctx, ctx->flush_res);
if (zink_screen(pctx->screen)->needs_mesa_flush_wsi)
batch->state->flush_res = ctx->flush_res;
ctx->flush_res = NULL;
}
}
if (!batch->has_work) {
@ -2047,9 +2166,15 @@ zink_memory_barrier(struct pipe_context *pctx, unsigned flags)
}
static void
zink_flush_resource(struct pipe_context *pipe,
struct pipe_resource *resource)
zink_flush_resource(struct pipe_context *pctx,
struct pipe_resource *pres)
{
struct zink_context *ctx = zink_context(pctx);
/* TODO: this is not futureproof and should be updated once proper
* WSI support is added
*/
if (pres->bind & (PIPE_BIND_SHARED | PIPE_BIND_SCANOUT))
ctx->flush_res = zink_resource(pres);
}
void

View File

@ -181,6 +181,7 @@ struct zink_context {
struct primconvert_context *primconvert;
struct zink_resource *flush_res;
struct zink_framebuffer *framebuffer;
struct zink_framebuffer_clear fb_clears[PIPE_MAX_COLOR_BUFS + 1];
uint16_t clears_enabled;

View File

@ -162,6 +162,7 @@ zink_resource_destroy(struct pipe_screen *pscreen,
util_range_destroy(&res->valid_buffer_range);
zink_resource_object_reference(screen, &res->obj, NULL);
zink_resource_object_reference(screen, &res->scanout_obj, NULL);
threaded_resource_deinit(pres);
FREE(res);
}
@ -248,7 +249,7 @@ get_image_usage(struct zink_screen *screen, VkImageTiling tiling, const struct p
usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
if (feats & VK_FORMAT_FEATURE_TRANSFER_DST_BIT)
usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
if (feats & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)
if (feats & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT && !((bind & (PIPE_BIND_LINEAR | PIPE_BIND_SCANOUT)) == (PIPE_BIND_LINEAR | PIPE_BIND_SCANOUT)))
usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
if ((templ->nr_samples <= 1 || screen->info.feats.features.shaderStorageImageMultisample) &&
@ -359,6 +360,8 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
VkMemoryRequirements reqs = {};
VkMemoryPropertyFlags flags;
bool scanout = templ->bind & PIPE_BIND_SCANOUT;
bool shared = templ->bind & PIPE_BIND_SHARED;
pipe_reference_init(&obj->reference, 1);
util_dynarray_init(&obj->desc_set_refs.refs, NULL);
@ -382,8 +385,13 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
emici.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
ici.pNext = &emici;
/* TODO: deal with DRM modifiers here */
ici.tiling = VK_IMAGE_TILING_LINEAR;
if (ici.tiling == VK_IMAGE_TILING_OPTIMAL) {
// TODO: remove for wsi
ici.pNext = NULL;
scanout = false;
shared = false;
}
}
if (optimal_tiling)
@ -417,7 +425,7 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
.scanout = true,
};
if (screen->needs_mesa_wsi && (templ->bind & PIPE_BIND_SCANOUT)) {
if (screen->needs_mesa_wsi && scanout) {
image_wsi_info.pNext = ici.pNext;
ici.pNext = &image_wsi_info;
}
@ -460,7 +468,7 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
}
VkExportMemoryAllocateInfo emai = {};
if (templ->bind & PIPE_BIND_SHARED) {
if (templ->bind & PIPE_BIND_SHARED && shared) {
emai.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
emai.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
@ -487,7 +495,7 @@ resource_object_create(struct zink_screen *screen, const struct pipe_resource *t
NULL,
};
if (screen->needs_mesa_wsi && (templ->bind & PIPE_BIND_SCANOUT)) {
if (screen->needs_mesa_wsi && scanout) {
memory_wsi_info.implicit_sync = true;
memory_wsi_info.pNext = mai.pNext;
@ -574,6 +582,13 @@ resource_create(struct pipe_screen *pscreen,
res->layout = VK_IMAGE_LAYOUT_UNDEFINED;
res->optimal_tiling = optimal_tiling;
res->aspect = aspect_from_format(templ->format);
if (res->base.b.bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED) && optimal_tiling) {
// TODO: remove for wsi
struct pipe_resource templ2 = res->base.b;
templ2.bind = (res->base.b.bind & (PIPE_BIND_SCANOUT | PIPE_BIND_SHARED)) | PIPE_BIND_LINEAR;
res->scanout_obj = resource_object_create(screen, &templ2, whandle, &optimal_tiling);
assert(!optimal_tiling);
}
}
if (screen->winsys && (templ->bind & PIPE_BIND_DISPLAY_TARGET)) {
@ -606,6 +621,8 @@ zink_resource_get_handle(struct pipe_screen *pscreen,
{
struct zink_resource *res = zink_resource(tex);
struct zink_screen *screen = zink_screen(pscreen);
//TODO: remove for wsi
struct zink_resource_object *obj = res->scanout_obj ? res->scanout_obj : res->obj;
if (res->base.b.target != PIPE_BUFFER) {
VkImageSubresource sub_res = {};
@ -613,7 +630,7 @@ zink_resource_get_handle(struct pipe_screen *pscreen,
sub_res.aspectMask = res->aspect;
vkGetImageSubresourceLayout(screen->dev, res->obj->image, &sub_res, &sub_res_layout);
vkGetImageSubresourceLayout(screen->dev, obj->image, &sub_res, &sub_res_layout);
whandle->stride = sub_res_layout.rowPitch;
}
@ -623,7 +640,8 @@ zink_resource_get_handle(struct pipe_screen *pscreen,
VkMemoryGetFdInfoKHR fd_info = {};
int fd;
fd_info.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR;
fd_info.memory = res->obj->mem;
//TODO: remove for wsi
fd_info.memory = obj->mem;
fd_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
VkResult result = (*screen->vk_GetMemoryFdKHR)(screen->dev, &fd_info, &fd);
if (result != VK_SUCCESS)

View File

@ -88,6 +88,7 @@ struct zink_resource {
VkAccessFlags access;
struct zink_resource_object *obj;
struct zink_resource_object *scanout_obj; //TODO: remove for wsi
union {
struct util_range valid_buffer_range;
struct {