From 6c4c995836664963a8cf2bfaed8ae1611d5ec075 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Fri, 2 Oct 2020 13:40:40 -0400 Subject: [PATCH] zink: add caching for descriptor sets this is a lot of churn that more or less amounts to hashing the descriptor state during draw and then performing lookups with this to determine whether we can reuse an existing descriptor set instead of allocating one Reviewed-by: Bas Nieuwenhuizen Part-of: --- src/gallium/drivers/zink/zink_batch.c | 20 +++--- src/gallium/drivers/zink/zink_batch.h | 2 +- src/gallium/drivers/zink/zink_draw.c | 34 ++++++++-- src/gallium/drivers/zink/zink_program.c | 81 +++++++++++++++++++++--- src/gallium/drivers/zink/zink_program.h | 13 ++-- src/gallium/drivers/zink/zink_resource.c | 15 +++++ src/gallium/drivers/zink/zink_resource.h | 7 ++ 7 files changed, 141 insertions(+), 31 deletions(-) diff --git a/src/gallium/drivers/zink/zink_batch.c b/src/gallium/drivers/zink/zink_batch.c index 676149ee12a..23eb773a895 100644 --- a/src/gallium/drivers/zink/zink_batch.c +++ b/src/gallium/drivers/zink/zink_batch.c @@ -52,16 +52,16 @@ zink_reset_batch(struct zink_context *ctx, struct zink_batch *batch) hash_table_foreach(batch->programs, entry) { struct zink_program *pg = (struct zink_program*)entry->key; - struct set *desc_sets = (struct set*)entry->data; - set_foreach(desc_sets, sentry) { - struct zink_descriptor_set *zds = (void*)sentry->key; + struct hash_table *desc_sets = (struct hash_table*)entry->data; + hash_table_foreach(desc_sets, sentry) { + struct zink_descriptor_set *zds = (void*)sentry->data; /* reset descriptor pools when no batch is using this program to avoid * having some inactive program hogging a billion descriptors */ pipe_reference(&zds->reference, NULL); - zink_program_invalidate_desc_set(pg, zds); + zink_program_recycle_desc_set(pg, sentry->hash, zds); } - _mesa_set_destroy(desc_sets, NULL); + _mesa_hash_table_destroy(desc_sets, NULL); if (batch->batch_id == ZINK_COMPUTE_BATCH_ID) { struct zink_compute_program *comp = (struct zink_compute_program*)entry->key; zink_compute_program_reference(screen, &comp, NULL); @@ -228,21 +228,21 @@ zink_batch_reference_program(struct zink_batch *batch, { struct hash_entry *entry = _mesa_hash_table_search(batch->programs, pg); if (!entry) { - entry = _mesa_hash_table_insert(batch->programs, pg, _mesa_pointer_set_create(NULL)); + entry = _mesa_hash_table_insert(batch->programs, pg, _mesa_hash_table_create(batch->programs, NULL, _mesa_key_pointer_equal)); pipe_reference(NULL, &pg->reference); } batch->has_work = true; } bool -zink_batch_add_desc_set(struct zink_batch *batch, struct zink_program *pg, struct zink_descriptor_set *zds) +zink_batch_add_desc_set(struct zink_batch *batch, struct zink_program *pg, uint32_t hash, struct zink_descriptor_set *zds) { struct hash_entry *entry = _mesa_hash_table_search(batch->programs, pg); assert(entry); - struct set *desc_sets = (void*)entry->data; - if (!_mesa_set_search(desc_sets, zds)) { + struct hash_table *desc_sets = (void*)entry->data; + if (!_mesa_hash_table_search_pre_hashed(desc_sets, hash, (void*)(uintptr_t)hash)) { pipe_reference(NULL, &zds->reference); - _mesa_set_add(desc_sets, zds); + _mesa_hash_table_insert_pre_hashed(desc_sets, hash, (void*)(uintptr_t)hash, zds); return true; } return false; diff --git a/src/gallium/drivers/zink/zink_batch.h b/src/gallium/drivers/zink/zink_batch.h index 989a50473a3..6645f53e986 100644 --- a/src/gallium/drivers/zink/zink_batch.h +++ b/src/gallium/drivers/zink/zink_batch.h @@ -97,5 +97,5 @@ zink_batch_reference_surface(struct zink_batch *batch, struct zink_surface *surface); bool -zink_batch_add_desc_set(struct zink_batch *batch, struct zink_program *pg, struct zink_descriptor_set *zds); +zink_batch_add_desc_set(struct zink_batch *batch, struct zink_program *pg, uint32_t hash, struct zink_descriptor_set *zds); #endif diff --git a/src/gallium/drivers/zink/zink_draw.c b/src/gallium/drivers/zink/zink_draw.c index a94317f1ffa..935439e5ac4 100644 --- a/src/gallium/drivers/zink/zink_draw.c +++ b/src/gallium/drivers/zink/zink_draw.c @@ -224,12 +224,12 @@ get_gfx_program(struct zink_context *ctx) } static struct zink_descriptor_set * -get_descriptor_set(struct zink_context *ctx, bool is_compute) +get_descriptor_set(struct zink_context *ctx, bool is_compute, uint32_t desc_hash, bool *cache_hit) { struct zink_program *pg = is_compute ? (struct zink_program *)ctx->curr_compute : (struct zink_program *)ctx->curr_program; struct zink_batch *batch = is_compute ? &ctx->compute_batch : zink_curr_batch(ctx); zink_batch_reference_program(batch, pg); - return zink_program_allocate_desc_set(ctx, batch, pg); + return zink_program_allocate_desc_set(ctx, batch, pg, desc_hash, cache_hit); } struct zink_transition { @@ -321,6 +321,7 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is struct zink_transition transitions[MAX_DESCRIPTORS]; int num_transitions = 0; struct set *ht = _mesa_set_create(NULL, transition_hash, transition_equals); + uint32_t desc_hash = 0; for (int i = 0; i < num_stages; i++) { struct zink_shader *shader = stages[i]; @@ -352,6 +353,7 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is zink_resource(ctx->dummy_vertex_buffer)->buffer); buffer_infos[num_buffer_info].offset = res ? ctx->ubos[stage][index].buffer_offset : 0; buffer_infos[num_buffer_info].range = res ? ctx->ubos[stage][index].buffer_size : VK_WHOLE_SIZE; + desc_hash = XXH32(&buffer_infos[num_buffer_info], sizeof(VkDescriptorBufferInfo), desc_hash); if (res) add_transition(res, 0, VK_ACCESS_UNIFORM_READ_BIT, stage, &transitions[num_transitions], &num_transitions, ht); wds[num_wds].pBufferInfo = buffer_infos + num_buffer_info; @@ -378,6 +380,7 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is buffer_infos[num_buffer_info].offset = 0; buffer_infos[num_buffer_info].range = VK_WHOLE_SIZE; } + desc_hash = XXH32(&buffer_infos[num_buffer_info], sizeof(VkDescriptorBufferInfo), desc_hash); wds[num_wds].pBufferInfo = buffer_infos + num_buffer_info; ++num_buffer_info; } else { @@ -396,9 +399,10 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is res = psampler_view ? zink_resource(psampler_view->texture) : NULL; if (!res) break; - if (res->base.target == PIPE_BUFFER) + if (res->base.target == PIPE_BUFFER) { wds[num_wds].pTexelBufferView = &sampler_view->buffer_view; - else { + desc_hash = XXH32(&sampler_view->base.u.buf, sizeof(sampler_view->base.u.buf), desc_hash); + } else { imageview = sampler_view->image_view; layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; sampler = ctx->samplers[stage][index + k]; @@ -414,10 +418,12 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is assert(image_view); surface_refs[num_surface_refs++] = image_view->surface; res = zink_resource(image_view->base.resource); + if (!res) break; if (image_view->base.resource->target == PIPE_BUFFER) { wds[num_wds].pTexelBufferView = &image_view->buffer_view; + desc_hash = XXH32(&image_view->base.u.buf, sizeof(image_view->base.u.buf), desc_hash); } else { imageview = image_view->surface->image_view; layout = VK_IMAGE_LAYOUT_GENERAL; @@ -448,6 +454,7 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: wds[num_wds].pTexelBufferView = &buffer_view[0]; + desc_hash = XXH32(&buffer_view[0], sizeof(VkBufferView), desc_hash); break; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: @@ -456,6 +463,7 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is image_infos[num_image_info].sampler = sampler; if (!k) wds[num_wds].pImageInfo = image_infos + num_image_info; + desc_hash = XXH32(&image_infos[num_image_info], sizeof(VkDescriptorImageInfo), desc_hash); ++num_image_info; break; default: @@ -468,8 +476,10 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is image_infos[num_image_info].sampler = ctx->samplers[stage][index + k]; if (!k) wds[num_wds].pImageInfo = image_infos + num_image_info; + desc_hash = XXH32(&image_infos[num_image_info], sizeof(VkDescriptorImageInfo), desc_hash); ++num_image_info; - } + } else + desc_hash = XXH32(&res->buffer, sizeof(VkBuffer), desc_hash); } } @@ -492,8 +502,9 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is } } + bool cache_hit = false; struct zink_program *pg = is_compute ? &ctx->curr_compute->base : &ctx->curr_program->base; - struct zink_descriptor_set *zds = get_descriptor_set(ctx, is_compute); + struct zink_descriptor_set *zds = get_descriptor_set(ctx, is_compute, desc_hash, &cache_hit); assert(zds != VK_NULL_HANDLE); batch = is_compute ? &ctx->compute_batch : zink_curr_batch(ctx); @@ -506,8 +517,17 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is if (res) { need_flush |= zink_batch_reference_resource_rw(batch, res, resources[i].write) == check_flush_id; } + /* if we got a cache hit, we have to verify that the cached set is still valid; + * we store the vk resource to the set here to avoid a more complex and costly mechanism of maintaining a + * hash table on every resource with the associated descriptor sets that then needs to be iterated through + * whenever a resource is destroyed + */ + cache_hit = cache_hit && zds->resources[i] == res; + if (zds->resources[i] != res) + zink_resource_desc_set_add(res, zds, i); } - vkUpdateDescriptorSets(screen->dev, num_wds, wds, 0, NULL); + if (!cache_hit) + vkUpdateDescriptorSets(screen->dev, num_wds, wds, 0, NULL); for (int i = 0; i < num_surface_refs; i++) { if (surface_refs[i]) zink_batch_reference_surface(batch, surface_refs[i]); diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index 9fd5d4cc29f..1f3796757e5 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -563,6 +563,10 @@ zink_create_gfx_program(struct zink_context *ctx, if (!prog->layout) goto fail; + prog->base.desc_sets = _mesa_hash_table_create(NULL, NULL, _mesa_key_pointer_equal); + if (!prog->base.desc_sets) + goto fail; + util_dynarray_init(&prog->base.alloc_desc_sets, NULL); return prog; @@ -664,6 +668,10 @@ zink_create_compute_program(struct zink_context *ctx, struct zink_shader *shader if (!comp->layout) goto fail; + comp->base.desc_sets = _mesa_hash_table_create(NULL, NULL, _mesa_key_pointer_equal); + if (!comp->base.desc_sets) + goto fail; + util_dynarray_init(&comp->base.alloc_desc_sets, NULL); return comp; @@ -674,6 +682,14 @@ fail: return NULL; } +static inline void +desc_set_invalidate_resources(struct zink_program *pg, struct zink_descriptor_set *zds) +{ + for (unsigned i = 0; i < pg->num_descriptors; i++) + zds->resources[i] = NULL; + zds->valid = false; +} + static struct zink_descriptor_set * allocate_desc_set(struct zink_screen *screen, struct zink_program *pg, unsigned descs_used) { @@ -700,9 +716,13 @@ allocate_desc_set(struct zink_screen *screen, struct zink_program *pg, unsigned struct zink_descriptor_set *alloc = ralloc_array(pg, struct zink_descriptor_set, bucket_size); assert(alloc); + struct zink_resource **resources = rzalloc_array(pg, struct zink_resource*, pg->num_descriptors * bucket_size); + assert(resources); for (unsigned i = 0; i < bucket_size; i ++) { struct zink_descriptor_set *zds = &alloc[i]; pipe_reference_init(&zds->reference, 1); + zds->valid = false; + zds->resources = &resources[i * pg->num_descriptors]; zds->desc_set = desc_set[i]; if (i > 0) util_dynarray_append(&pg->alloc_desc_sets, struct zink_descriptor_set *, zds); @@ -713,39 +733,80 @@ allocate_desc_set(struct zink_screen *screen, struct zink_program *pg, unsigned struct zink_descriptor_set * zink_program_allocate_desc_set(struct zink_context *ctx, struct zink_batch *batch, - struct zink_program *pg) + struct zink_program *pg, + uint32_t hash, + bool *cache_hit) { + *cache_hit = false; struct zink_descriptor_set *zds; struct zink_screen *screen = zink_screen(ctx->base.screen); + struct hash_entry *he = _mesa_hash_table_search_pre_hashed(pg->desc_sets, hash, (void*)(uintptr_t)hash); + if (he) { + zds = (void*)he->data; + /* this shouldn't happen, but if we somehow get a cache hit on an invalidated, active desc set then + * we probably should just crash here rather than later + */ + assert(zds->valid); + } + if (he) { + zds = (void*)he->data; + *cache_hit = zds->valid; + if (zink_batch_add_desc_set(batch, pg, hash, zds)) + batch->descs_used += pg->num_descriptors; + return zds; + } if (util_dynarray_num_elements(&pg->alloc_desc_sets, struct zink_descriptor_set *)) { /* grab one off the allocated array */ zds = util_dynarray_pop(&pg->alloc_desc_sets, struct zink_descriptor_set *); + //printf("POP%u %p %u\n", batch->batch_id, pg, hash); goto out; } - unsigned descs_used = pg->descs_used; + unsigned descs_used = _mesa_hash_table_num_entries(pg->desc_sets); if (descs_used + pg->num_descriptors > ZINK_DEFAULT_MAX_DESCS) { batch = zink_flush_batch(ctx, batch); zink_batch_reference_program(batch, pg); - return zink_program_allocate_desc_set(ctx, batch, pg); + return zink_program_allocate_desc_set(ctx, batch, pg, hash, cache_hit); } zds = allocate_desc_set(screen, pg, descs_used); + //printf("NEW%u %p %u (%u total - %u / %u)\n", batch->batch_id, pg, hash, + //_mesa_hash_table_num_entries(pg->desc_sets), + //_mesa_hash_table_num_entries(pg->desc_sets)); out: - if (zink_batch_add_desc_set(batch, pg, zds)) + zds->valid = true; + _mesa_hash_table_insert_pre_hashed(pg->desc_sets, hash, (void*)(uintptr_t)hash, zds); + if (zink_batch_add_desc_set(batch, pg, hash, zds)) batch->descs_used += pg->num_descriptors; return zds; } void -zink_program_invalidate_desc_set(struct zink_program *pg, struct zink_descriptor_set *zds) +zink_program_recycle_desc_set(struct zink_program *pg, uint32_t hash, struct zink_descriptor_set *zds) { + /* if desc set is still in use by a batch, don't recache */ uint32_t refcount = p_atomic_read(&zds->reference.count); - /* refcount > 1 means this is currently in use, so we can't recycle it yet */ - if (refcount == 1) - util_dynarray_append(&pg->alloc_desc_sets, struct zink_descriptor_set *, zds); + if (refcount != 1) + return; + struct hash_entry *he = _mesa_hash_table_search_pre_hashed(pg->desc_sets, hash, (void*)(uintptr_t)hash); + if (!he) + /* desc sets can be used multiple times in the same batch */ + return; + _mesa_hash_table_remove(pg->desc_sets, he); + util_dynarray_append(&pg->alloc_desc_sets, struct zink_descriptor_set *, zds); +} + +static void +zink_program_clear_desc_sets(struct zink_program *pg, struct hash_table *ht) +{ + //printf("CLEAR %p\n", pg); + hash_table_foreach(ht, entry) { + struct zink_descriptor_set *zds = entry->data; + desc_set_invalidate_resources(pg, zds); + } + _mesa_hash_table_clear(ht, NULL); } static void @@ -786,6 +847,8 @@ zink_destroy_gfx_program(struct zink_screen *screen, } zink_shader_cache_reference(screen, &prog->shader_cache, NULL); + zink_program_clear_desc_sets((struct zink_program*)prog, prog->base.desc_sets); + _mesa_hash_table_destroy(prog->base.desc_sets, NULL); if (prog->base.descpool) vkDestroyDescriptorPool(screen->dev, prog->base.descpool, NULL); @@ -818,6 +881,8 @@ zink_destroy_compute_program(struct zink_screen *screen, _mesa_hash_table_destroy(comp->pipelines, NULL); zink_shader_cache_reference(screen, &comp->shader_cache, NULL); + zink_program_clear_desc_sets((struct zink_program*)comp, comp->base.desc_sets); + _mesa_hash_table_destroy(comp->base.desc_sets, NULL); if (comp->base.descpool) vkDestroyDescriptorPool(screen->dev, comp->base.descpool, NULL); diff --git a/src/gallium/drivers/zink/zink_program.h b/src/gallium/drivers/zink/zink_program.h index e00505b5fa4..ce3fa6f3fa4 100644 --- a/src/gallium/drivers/zink/zink_program.h +++ b/src/gallium/drivers/zink/zink_program.h @@ -66,16 +66,18 @@ struct zink_shader_cache { struct zink_descriptor_set { struct pipe_reference reference; //incremented for batch usage VkDescriptorSet desc_set; + bool valid; + struct zink_resource **resources; }; struct zink_program { struct pipe_reference reference; + struct hash_table *desc_sets; struct util_dynarray alloc_desc_sets; VkDescriptorPool descpool; VkDescriptorSetLayout dsl; unsigned num_descriptors; - unsigned descs_used; }; struct zink_gfx_program { @@ -166,11 +168,12 @@ zink_get_compute_pipeline(struct zink_screen *screen, struct zink_compute_program *comp, struct zink_compute_pipeline_state *state); -void -zink_program_invalidate_desc_set(struct zink_program *pg, struct zink_descriptor_set *zds); - struct zink_descriptor_set * zink_program_allocate_desc_set(struct zink_context *ctx, struct zink_batch *batch, - struct zink_program *pg); + struct zink_program *pg, + uint32_t desc_hash, + bool *cache_hit); +void +zink_program_recycle_desc_set(struct zink_program *pg, uint32_t hash, struct zink_descriptor_set *zds); #endif diff --git a/src/gallium/drivers/zink/zink_resource.c b/src/gallium/drivers/zink/zink_resource.c index f98d8b853a3..0c1f368b341 100644 --- a/src/gallium/drivers/zink/zink_resource.c +++ b/src/gallium/drivers/zink/zink_resource.c @@ -25,6 +25,7 @@ #include "zink_batch.h" #include "zink_context.h" +#include "zink_program.h" #include "zink_screen.h" #include "vulkan/wsi/wsi_common.h" @@ -78,6 +79,12 @@ zink_resource_destroy(struct pipe_screen *pscreen, } else vkDestroyImage(screen->dev, res->image, NULL); + util_dynarray_foreach(&res->desc_set_refs, struct zink_resource **, ref) { + if (**ref == res) + **ref = NULL; + } + util_dynarray_fini(&res->desc_set_refs); + vkFreeMemory(screen->dev, res->mem, NULL); FREE(res); } @@ -391,6 +398,7 @@ resource_create(struct pipe_screen *pscreen, &res->dt_stride); } + util_dynarray_init(&res->desc_set_refs, NULL); return &res->base; fail: @@ -827,6 +835,13 @@ zink_resource_setup_transfer_layouts(struct zink_context *ctx, struct zink_resou } } +void +zink_resource_desc_set_add(struct zink_resource *res, struct zink_descriptor_set *zds, unsigned idx) +{ + zds->resources[idx] = res; + util_dynarray_append(&res->desc_set_refs, struct zink_resource**, &zds->resources[idx]); +} + void zink_get_depth_stencil_resources(struct pipe_resource *res, struct zink_resource **out_z, diff --git a/src/gallium/drivers/zink/zink_resource.h b/src/gallium/drivers/zink/zink_resource.h index a731c50812f..3e115c11666 100644 --- a/src/gallium/drivers/zink/zink_resource.h +++ b/src/gallium/drivers/zink/zink_resource.h @@ -28,9 +28,11 @@ struct pipe_screen; struct sw_displaytarget; struct zink_batch; struct zink_context; +struct zink_descriptor_set; #include "util/u_transfer.h" #include "util/u_range.h" +#include "util/u_dynarray.h" #include @@ -65,6 +67,8 @@ struct zink_resource { unsigned dt_stride; unsigned persistent_maps; //if nonzero, requires vkFlushMappedMemoryRanges during batch use + struct util_dynarray desc_set_refs; + /* this has to be atomic for fence access, so we can't use a bitmask and make everything neat */ uint8_t batch_uses[5]; //ZINK_NUM_BATCHES }; @@ -96,4 +100,7 @@ zink_resource_setup_transfer_layouts(struct zink_context *ctx, struct zink_resou uint32_t zink_get_resource_usage(struct zink_resource *res); + +void +zink_resource_desc_set_add(struct zink_resource *res, struct zink_descriptor_set *zds, unsigned idx); #endif