zink: add caching for descriptor sets

this is a lot of churn that more or less amounts to hashing the descriptor
state during draw and then performing lookups with this to determine whether
we can reuse an existing descriptor set instead of allocating one

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9348>
This commit is contained in:
Mike Blumenkrantz 2020-10-02 13:40:40 -04:00 committed by Marge Bot
parent 840ea21fa1
commit 6c4c995836
7 changed files with 141 additions and 31 deletions

View File

@ -52,16 +52,16 @@ zink_reset_batch(struct zink_context *ctx, struct zink_batch *batch)
hash_table_foreach(batch->programs, entry) {
struct zink_program *pg = (struct zink_program*)entry->key;
struct set *desc_sets = (struct set*)entry->data;
set_foreach(desc_sets, sentry) {
struct zink_descriptor_set *zds = (void*)sentry->key;
struct hash_table *desc_sets = (struct hash_table*)entry->data;
hash_table_foreach(desc_sets, sentry) {
struct zink_descriptor_set *zds = (void*)sentry->data;
/* reset descriptor pools when no batch is using this program to avoid
* having some inactive program hogging a billion descriptors
*/
pipe_reference(&zds->reference, NULL);
zink_program_invalidate_desc_set(pg, zds);
zink_program_recycle_desc_set(pg, sentry->hash, zds);
}
_mesa_set_destroy(desc_sets, NULL);
_mesa_hash_table_destroy(desc_sets, NULL);
if (batch->batch_id == ZINK_COMPUTE_BATCH_ID) {
struct zink_compute_program *comp = (struct zink_compute_program*)entry->key;
zink_compute_program_reference(screen, &comp, NULL);
@ -228,21 +228,21 @@ zink_batch_reference_program(struct zink_batch *batch,
{
struct hash_entry *entry = _mesa_hash_table_search(batch->programs, pg);
if (!entry) {
entry = _mesa_hash_table_insert(batch->programs, pg, _mesa_pointer_set_create(NULL));
entry = _mesa_hash_table_insert(batch->programs, pg, _mesa_hash_table_create(batch->programs, NULL, _mesa_key_pointer_equal));
pipe_reference(NULL, &pg->reference);
}
batch->has_work = true;
}
bool
zink_batch_add_desc_set(struct zink_batch *batch, struct zink_program *pg, struct zink_descriptor_set *zds)
zink_batch_add_desc_set(struct zink_batch *batch, struct zink_program *pg, uint32_t hash, struct zink_descriptor_set *zds)
{
struct hash_entry *entry = _mesa_hash_table_search(batch->programs, pg);
assert(entry);
struct set *desc_sets = (void*)entry->data;
if (!_mesa_set_search(desc_sets, zds)) {
struct hash_table *desc_sets = (void*)entry->data;
if (!_mesa_hash_table_search_pre_hashed(desc_sets, hash, (void*)(uintptr_t)hash)) {
pipe_reference(NULL, &zds->reference);
_mesa_set_add(desc_sets, zds);
_mesa_hash_table_insert_pre_hashed(desc_sets, hash, (void*)(uintptr_t)hash, zds);
return true;
}
return false;

View File

@ -97,5 +97,5 @@ zink_batch_reference_surface(struct zink_batch *batch,
struct zink_surface *surface);
bool
zink_batch_add_desc_set(struct zink_batch *batch, struct zink_program *pg, struct zink_descriptor_set *zds);
zink_batch_add_desc_set(struct zink_batch *batch, struct zink_program *pg, uint32_t hash, struct zink_descriptor_set *zds);
#endif

View File

@ -224,12 +224,12 @@ get_gfx_program(struct zink_context *ctx)
}
static struct zink_descriptor_set *
get_descriptor_set(struct zink_context *ctx, bool is_compute)
get_descriptor_set(struct zink_context *ctx, bool is_compute, uint32_t desc_hash, bool *cache_hit)
{
struct zink_program *pg = is_compute ? (struct zink_program *)ctx->curr_compute : (struct zink_program *)ctx->curr_program;
struct zink_batch *batch = is_compute ? &ctx->compute_batch : zink_curr_batch(ctx);
zink_batch_reference_program(batch, pg);
return zink_program_allocate_desc_set(ctx, batch, pg);
return zink_program_allocate_desc_set(ctx, batch, pg, desc_hash, cache_hit);
}
struct zink_transition {
@ -321,6 +321,7 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
struct zink_transition transitions[MAX_DESCRIPTORS];
int num_transitions = 0;
struct set *ht = _mesa_set_create(NULL, transition_hash, transition_equals);
uint32_t desc_hash = 0;
for (int i = 0; i < num_stages; i++) {
struct zink_shader *shader = stages[i];
@ -352,6 +353,7 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
zink_resource(ctx->dummy_vertex_buffer)->buffer);
buffer_infos[num_buffer_info].offset = res ? ctx->ubos[stage][index].buffer_offset : 0;
buffer_infos[num_buffer_info].range = res ? ctx->ubos[stage][index].buffer_size : VK_WHOLE_SIZE;
desc_hash = XXH32(&buffer_infos[num_buffer_info], sizeof(VkDescriptorBufferInfo), desc_hash);
if (res)
add_transition(res, 0, VK_ACCESS_UNIFORM_READ_BIT, stage, &transitions[num_transitions], &num_transitions, ht);
wds[num_wds].pBufferInfo = buffer_infos + num_buffer_info;
@ -378,6 +380,7 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
buffer_infos[num_buffer_info].offset = 0;
buffer_infos[num_buffer_info].range = VK_WHOLE_SIZE;
}
desc_hash = XXH32(&buffer_infos[num_buffer_info], sizeof(VkDescriptorBufferInfo), desc_hash);
wds[num_wds].pBufferInfo = buffer_infos + num_buffer_info;
++num_buffer_info;
} else {
@ -396,9 +399,10 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
res = psampler_view ? zink_resource(psampler_view->texture) : NULL;
if (!res)
break;
if (res->base.target == PIPE_BUFFER)
if (res->base.target == PIPE_BUFFER) {
wds[num_wds].pTexelBufferView = &sampler_view->buffer_view;
else {
desc_hash = XXH32(&sampler_view->base.u.buf, sizeof(sampler_view->base.u.buf), desc_hash);
} else {
imageview = sampler_view->image_view;
layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
sampler = ctx->samplers[stage][index + k];
@ -414,10 +418,12 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
assert(image_view);
surface_refs[num_surface_refs++] = image_view->surface;
res = zink_resource(image_view->base.resource);
if (!res)
break;
if (image_view->base.resource->target == PIPE_BUFFER) {
wds[num_wds].pTexelBufferView = &image_view->buffer_view;
desc_hash = XXH32(&image_view->base.u.buf, sizeof(image_view->base.u.buf), desc_hash);
} else {
imageview = image_view->surface->image_view;
layout = VK_IMAGE_LAYOUT_GENERAL;
@ -448,6 +454,7 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
wds[num_wds].pTexelBufferView = &buffer_view[0];
desc_hash = XXH32(&buffer_view[0], sizeof(VkBufferView), desc_hash);
break;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
@ -456,6 +463,7 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
image_infos[num_image_info].sampler = sampler;
if (!k)
wds[num_wds].pImageInfo = image_infos + num_image_info;
desc_hash = XXH32(&image_infos[num_image_info], sizeof(VkDescriptorImageInfo), desc_hash);
++num_image_info;
break;
default:
@ -468,8 +476,10 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
image_infos[num_image_info].sampler = ctx->samplers[stage][index + k];
if (!k)
wds[num_wds].pImageInfo = image_infos + num_image_info;
desc_hash = XXH32(&image_infos[num_image_info], sizeof(VkDescriptorImageInfo), desc_hash);
++num_image_info;
}
} else
desc_hash = XXH32(&res->buffer, sizeof(VkBuffer), desc_hash);
}
}
@ -492,8 +502,9 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
}
}
bool cache_hit = false;
struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base;
struct zink_descriptor_set *zds = get_descriptor_set(ctx, is_compute);
struct zink_descriptor_set *zds = get_descriptor_set(ctx, is_compute, desc_hash, &cache_hit);
assert(zds != VK_NULL_HANDLE);
batch = is_compute ? &ctx->compute_batch : zink_curr_batch(ctx);
@ -506,8 +517,17 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
if (res) {
need_flush |= zink_batch_reference_resource_rw(batch, res, resources[i].write) == check_flush_id;
}
/* if we got a cache hit, we have to verify that the cached set is still valid;
* we store the vk resource to the set here to avoid a more complex and costly mechanism of maintaining a
* hash table on every resource with the associated descriptor sets that then needs to be iterated through
* whenever a resource is destroyed
*/
cache_hit = cache_hit && zds->resources[i] == res;
if (zds->resources[i] != res)
zink_resource_desc_set_add(res, zds, i);
}
vkUpdateDescriptorSets(screen->dev, num_wds, wds, 0, NULL);
if (!cache_hit)
vkUpdateDescriptorSets(screen->dev, num_wds, wds, 0, NULL);
for (int i = 0; i < num_surface_refs; i++) {
if (surface_refs[i])
zink_batch_reference_surface(batch, surface_refs[i]);

View File

@ -563,6 +563,10 @@ zink_create_gfx_program(struct zink_context *ctx,
if (!prog->layout)
goto fail;
prog->base.desc_sets = _mesa_hash_table_create(NULL, NULL, _mesa_key_pointer_equal);
if (!prog->base.desc_sets)
goto fail;
util_dynarray_init(&prog->base.alloc_desc_sets, NULL);
return prog;
@ -664,6 +668,10 @@ zink_create_compute_program(struct zink_context *ctx, struct zink_shader *shader
if (!comp->layout)
goto fail;
comp->base.desc_sets = _mesa_hash_table_create(NULL, NULL, _mesa_key_pointer_equal);
if (!comp->base.desc_sets)
goto fail;
util_dynarray_init(&comp->base.alloc_desc_sets, NULL);
return comp;
@ -674,6 +682,14 @@ fail:
return NULL;
}
static inline void
desc_set_invalidate_resources(struct zink_program *pg, struct zink_descriptor_set *zds)
{
for (unsigned i = 0; i < pg->num_descriptors; i++)
zds->resources[i] = NULL;
zds->valid = false;
}
static struct zink_descriptor_set *
allocate_desc_set(struct zink_screen *screen, struct zink_program *pg, unsigned descs_used)
{
@ -700,9 +716,13 @@ allocate_desc_set(struct zink_screen *screen, struct zink_program *pg, unsigned
struct zink_descriptor_set *alloc = ralloc_array(pg, struct zink_descriptor_set, bucket_size);
assert(alloc);
struct zink_resource **resources = rzalloc_array(pg, struct zink_resource*, pg->num_descriptors * bucket_size);
assert(resources);
for (unsigned i = 0; i < bucket_size; i ++) {
struct zink_descriptor_set *zds = &alloc[i];
pipe_reference_init(&zds->reference, 1);
zds->valid = false;
zds->resources = &resources[i * pg->num_descriptors];
zds->desc_set = desc_set[i];
if (i > 0)
util_dynarray_append(&pg->alloc_desc_sets, struct zink_descriptor_set *, zds);
@ -713,39 +733,80 @@ allocate_desc_set(struct zink_screen *screen, struct zink_program *pg, unsigned
struct zink_descriptor_set *
zink_program_allocate_desc_set(struct zink_context *ctx,
struct zink_batch *batch,
struct zink_program *pg)
struct zink_program *pg,
uint32_t hash,
bool *cache_hit)
{
*cache_hit = false;
struct zink_descriptor_set *zds;
struct zink_screen *screen = zink_screen(ctx->base.screen);
struct hash_entry *he = _mesa_hash_table_search_pre_hashed(pg->desc_sets, hash, (void*)(uintptr_t)hash);
if (he) {
zds = (void*)he->data;
/* this shouldn't happen, but if we somehow get a cache hit on an invalidated, active desc set then
* we probably should just crash here rather than later
*/
assert(zds->valid);
}
if (he) {
zds = (void*)he->data;
*cache_hit = zds->valid;
if (zink_batch_add_desc_set(batch, pg, hash, zds))
batch->descs_used += pg->num_descriptors;
return zds;
}
if (util_dynarray_num_elements(&pg->alloc_desc_sets, struct zink_descriptor_set *)) {
/* grab one off the allocated array */
zds = util_dynarray_pop(&pg->alloc_desc_sets, struct zink_descriptor_set *);
//printf("POP%u %p %u\n", batch->batch_id, pg, hash);
goto out;
}
unsigned descs_used = pg->descs_used;
unsigned descs_used = _mesa_hash_table_num_entries(pg->desc_sets);
if (descs_used + pg->num_descriptors > ZINK_DEFAULT_MAX_DESCS) {
batch = zink_flush_batch(ctx, batch);
zink_batch_reference_program(batch, pg);
return zink_program_allocate_desc_set(ctx, batch, pg);
return zink_program_allocate_desc_set(ctx, batch, pg, hash, cache_hit);
}
zds = allocate_desc_set(screen, pg, descs_used);
//printf("NEW%u %p %u (%u total - %u / %u)\n", batch->batch_id, pg, hash,
//_mesa_hash_table_num_entries(pg->desc_sets),
//_mesa_hash_table_num_entries(pg->desc_sets));
out:
if (zink_batch_add_desc_set(batch, pg, zds))
zds->valid = true;
_mesa_hash_table_insert_pre_hashed(pg->desc_sets, hash, (void*)(uintptr_t)hash, zds);
if (zink_batch_add_desc_set(batch, pg, hash, zds))
batch->descs_used += pg->num_descriptors;
return zds;
}
void
zink_program_invalidate_desc_set(struct zink_program *pg, struct zink_descriptor_set *zds)
zink_program_recycle_desc_set(struct zink_program *pg, uint32_t hash, struct zink_descriptor_set *zds)
{
/* if desc set is still in use by a batch, don't recache */
uint32_t refcount = p_atomic_read(&zds->reference.count);
/* refcount > 1 means this is currently in use, so we can't recycle it yet */
if (refcount == 1)
util_dynarray_append(&pg->alloc_desc_sets, struct zink_descriptor_set *, zds);
if (refcount != 1)
return;
struct hash_entry *he = _mesa_hash_table_search_pre_hashed(pg->desc_sets, hash, (void*)(uintptr_t)hash);
if (!he)
/* desc sets can be used multiple times in the same batch */
return;
_mesa_hash_table_remove(pg->desc_sets, he);
util_dynarray_append(&pg->alloc_desc_sets, struct zink_descriptor_set *, zds);
}
static void
zink_program_clear_desc_sets(struct zink_program *pg, struct hash_table *ht)
{
//printf("CLEAR %p\n", pg);
hash_table_foreach(ht, entry) {
struct zink_descriptor_set *zds = entry->data;
desc_set_invalidate_resources(pg, zds);
}
_mesa_hash_table_clear(ht, NULL);
}
static void
@ -786,6 +847,8 @@ zink_destroy_gfx_program(struct zink_screen *screen,
}
zink_shader_cache_reference(screen, &prog->shader_cache, NULL);
zink_program_clear_desc_sets((struct zink_program*)prog, prog->base.desc_sets);
_mesa_hash_table_destroy(prog->base.desc_sets, NULL);
if (prog->base.descpool)
vkDestroyDescriptorPool(screen->dev, prog->base.descpool, NULL);
@ -818,6 +881,8 @@ zink_destroy_compute_program(struct zink_screen *screen,
_mesa_hash_table_destroy(comp->pipelines, NULL);
zink_shader_cache_reference(screen, &comp->shader_cache, NULL);
zink_program_clear_desc_sets((struct zink_program*)comp, comp->base.desc_sets);
_mesa_hash_table_destroy(comp->base.desc_sets, NULL);
if (comp->base.descpool)
vkDestroyDescriptorPool(screen->dev, comp->base.descpool, NULL);

View File

@ -66,16 +66,18 @@ struct zink_shader_cache {
struct zink_descriptor_set {
struct pipe_reference reference; //incremented for batch usage
VkDescriptorSet desc_set;
bool valid;
struct zink_resource **resources;
};
struct zink_program {
struct pipe_reference reference;
struct hash_table *desc_sets;
struct util_dynarray alloc_desc_sets;
VkDescriptorPool descpool;
VkDescriptorSetLayout dsl;
unsigned num_descriptors;
unsigned descs_used;
};
struct zink_gfx_program {
@ -166,11 +168,12 @@ zink_get_compute_pipeline(struct zink_screen *screen,
struct zink_compute_program *comp,
struct zink_compute_pipeline_state *state);
void
zink_program_invalidate_desc_set(struct zink_program *pg, struct zink_descriptor_set *zds);
struct zink_descriptor_set *
zink_program_allocate_desc_set(struct zink_context *ctx,
struct zink_batch *batch,
struct zink_program *pg);
struct zink_program *pg,
uint32_t desc_hash,
bool *cache_hit);
void
zink_program_recycle_desc_set(struct zink_program *pg, uint32_t hash, struct zink_descriptor_set *zds);
#endif

View File

@ -25,6 +25,7 @@
#include "zink_batch.h"
#include "zink_context.h"
#include "zink_program.h"
#include "zink_screen.h"
#include "vulkan/wsi/wsi_common.h"
@ -78,6 +79,12 @@ zink_resource_destroy(struct pipe_screen *pscreen,
} else
vkDestroyImage(screen->dev, res->image, NULL);
util_dynarray_foreach(&res->desc_set_refs, struct zink_resource **, ref) {
if (**ref == res)
**ref = NULL;
}
util_dynarray_fini(&res->desc_set_refs);
vkFreeMemory(screen->dev, res->mem, NULL);
FREE(res);
}
@ -391,6 +398,7 @@ resource_create(struct pipe_screen *pscreen,
&res->dt_stride);
}
util_dynarray_init(&res->desc_set_refs, NULL);
return &res->base;
fail:
@ -827,6 +835,13 @@ zink_resource_setup_transfer_layouts(struct zink_context *ctx, struct zink_resou
}
}
void
zink_resource_desc_set_add(struct zink_resource *res, struct zink_descriptor_set *zds, unsigned idx)
{
zds->resources[idx] = res;
util_dynarray_append(&res->desc_set_refs, struct zink_resource**, &zds->resources[idx]);
}
void
zink_get_depth_stencil_resources(struct pipe_resource *res,
struct zink_resource **out_z,

View File

@ -28,9 +28,11 @@ struct pipe_screen;
struct sw_displaytarget;
struct zink_batch;
struct zink_context;
struct zink_descriptor_set;
#include "util/u_transfer.h"
#include "util/u_range.h"
#include "util/u_dynarray.h"
#include <vulkan/vulkan.h>
@ -65,6 +67,8 @@ struct zink_resource {
unsigned dt_stride;
unsigned persistent_maps; //if nonzero, requires vkFlushMappedMemoryRanges during batch use
struct util_dynarray desc_set_refs;
/* this has to be atomic for fence access, so we can't use a bitmask and make everything neat */
uint8_t batch_uses[5]; //ZINK_NUM_BATCHES
};
@ -96,4 +100,7 @@ zink_resource_setup_transfer_layouts(struct zink_context *ctx, struct zink_resou
uint32_t
zink_get_resource_usage(struct zink_resource *res);
void
zink_resource_desc_set_add(struct zink_resource *res, struct zink_descriptor_set *zds, unsigned idx);
#endif