From 6d233e74ad01439c5472b748354c2ef918570893 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Thu, 1 Oct 2020 16:51:49 -0400 Subject: [PATCH] zink: use dynamic offsets for first ubo this lets us avoid invalidating the ubo descriptor state, which reduces our cache overhead Reviewed-by: Bas Nieuwenhuizen Part-of: --- .../drivers/zink/nir_to_spirv/nir_to_spirv.c | 5 ++- src/gallium/drivers/zink/zink_compiler.c | 6 +-- src/gallium/drivers/zink/zink_draw.c | 40 +++++++++++++++++-- src/gallium/drivers/zink/zink_program.c | 8 +++- src/gallium/drivers/zink/zink_program.h | 1 + 5 files changed, 50 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c index 2c176fe1c28..fdc269932c1 100644 --- a/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c +++ b/src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c @@ -677,6 +677,7 @@ zink_binding(gl_shader_stage stage, VkDescriptorType type, int index) switch (type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: assert(index < PIPE_MAX_CONSTANT_BUFFERS); return stage_offset + index; @@ -978,7 +979,9 @@ emit_bo(struct ntv_context *ctx, struct nir_variable *var) spirv_builder_emit_descriptor_set(&ctx->builder, var_id, ssbo ? ZINK_DESCRIPTOR_TYPE_SSBO : ZINK_DESCRIPTOR_TYPE_UBO); int binding = zink_binding(ctx->stage, - ssbo ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + ssbo ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER : + /* only make the first ubo dynamic to stay within driver limits */ + (ctx->num_ubos == 1) ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, var->data.binding + i); spirv_builder_emit_binding(&ctx->builder, var_id, binding); } diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index fa580f582aa..7ff8b30d88f 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -639,13 +639,13 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir, (also it's just easier) */ for (unsigned i = 0; i < (ubo_array ? glsl_get_aoa_size(var->type) : 1); i++) { - + VkDescriptorType vktype = !ubo_index ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; int binding = zink_binding(nir->info.stage, - VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + vktype, cur_ubo++); ret->bindings[ztype][ret->num_bindings[ztype]].index = ubo_index++; ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding; - ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; + ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype; ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index); ret->num_bindings[ztype]++; diff --git a/src/gallium/drivers/zink/zink_draw.c b/src/gallium/drivers/zink/zink_draw.c index c7a025e9f84..f7fbfdf7bbd 100644 --- a/src/gallium/drivers/zink/zink_draw.c +++ b/src/gallium/drivers/zink/zink_draw.c @@ -281,6 +281,13 @@ add_transition(struct zink_resource *res, VkImageLayout layout, VkAccessFlags fl t->stage |= pipeline; } +static int +cmp_dynamic_offset_binding(const void *a, const void *b) +{ + const uint32_t *binding_a = a, *binding_b = b; + return *binding_a - *binding_b; +} + struct zink_descriptor_resource { struct zink_resource *res; bool write; @@ -318,6 +325,12 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is unsigned num_image_info[ZINK_DESCRIPTOR_TYPES] = {0}; unsigned num_surface_refs = 0; struct zink_shader **stages; + struct { + uint32_t binding; + uint32_t offset; + } dynamic_buffers[PIPE_MAX_CONSTANT_BUFFERS]; + uint32_t dynamic_offsets[PIPE_MAX_CONSTANT_BUFFERS]; + unsigned dynamic_offset_idx = 0; unsigned num_stages = is_compute ? 1 : ZINK_SHADER_COUNT; if (is_compute) @@ -341,7 +354,8 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is for (int j = 0; j < shader->num_bindings[h]; j++) { int index = shader->bindings[h][j].index; - if (shader->bindings[h][j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { + if (shader->bindings[h][j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER || + shader->bindings[h][j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { assert(ctx->ubos[stage][index].buffer_size <= screen->info.props.limits.maxUniformBufferRange); struct zink_resource *res = zink_resource(ctx->ubos[stage][index].buffer); assert(num_wds[h] < num_bindings); @@ -353,7 +367,13 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is (screen->info.rb2_feats.nullDescriptor ? VK_NULL_HANDLE : zink_resource(ctx->dummy_vertex_buffer)->buffer); - buffer_infos[h][num_buffer_info[h]].offset = res ? ctx->ubos[stage][index].buffer_offset : 0; + if (shader->bindings[h][j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) { + buffer_infos[h][num_buffer_info[h]].offset = 0; + /* we're storing this to qsort later */ + dynamic_buffers[dynamic_offset_idx].binding = shader->bindings[h][j].binding; + dynamic_buffers[dynamic_offset_idx++].offset = res ? ctx->ubos[stage][index].buffer_offset : 0; + } else + buffer_infos[h][num_buffer_info[h]].offset = res ? ctx->ubos[stage][index].buffer_offset : 0; buffer_infos[h][num_buffer_info[h]].range = res ? ctx->ubos[stage][index].buffer_size : VK_WHOLE_SIZE; desc_hash[h] = XXH32(&buffer_infos[h][num_buffer_info[h]], sizeof(VkDescriptorBufferInfo), desc_hash[h]); if (res) @@ -506,6 +526,18 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is } _mesa_set_destroy(ht, NULL); + /* Values are taken from pDynamicOffsets in an order such that all entries for set N come before set N+1; + * within a set, entries are ordered by the binding numbers in the descriptor set layouts + * - vkCmdBindDescriptorSets spec + * + * because of this, we have to sort all the dynamic offsets by their associated binding to ensure they + * match what the driver expects + */ + if (dynamic_offset_idx > 1) + qsort(dynamic_buffers, dynamic_offset_idx, sizeof(uint32_t) * 2, cmp_dynamic_offset_binding); + for (int i = 0; i < dynamic_offset_idx; i++) + dynamic_offsets[i] = dynamic_buffers[i].offset; + struct zink_batch *batch = NULL; bool cache_hit[ZINK_DESCRIPTOR_TYPES]; struct zink_descriptor_set *zds[ZINK_DESCRIPTOR_TYPES]; @@ -543,10 +575,10 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is if (is_compute) vkCmdBindDescriptorSets(batch->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, - ctx->curr_compute->layout, h, 1, &zds[h]->desc_set, 0, NULL); + ctx->curr_compute->layout, h, 1, &zds[h]->desc_set, h == ZINK_DESCRIPTOR_TYPE_UBO ? dynamic_offset_idx : 0, dynamic_offsets); else vkCmdBindDescriptorSets(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, - ctx->curr_program->layout, h, 1, &zds[h]->desc_set, 0, NULL); + ctx->curr_program->layout, h, 1, &zds[h]->desc_set, h == ZINK_DESCRIPTOR_TYPE_UBO ? dynamic_offset_idx : 0, dynamic_offsets); for (int i = 0; i < num_stages; i++) { struct zink_shader *shader = stages[i]; diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index e91dcfaacd3..18d427ff3e9 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -212,8 +212,12 @@ create_desc_set_layout(VkDevice dev, switch (i) { case ZINK_DESCRIPTOR_TYPE_UBO: if (type_map[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] != -1) { - num_type_sizes = 1; - type_sizes[0] = sizes[type_map[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER]]; + type_sizes[num_type_sizes] = sizes[type_map[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER]]; + num_type_sizes++; + } + if (type_map[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] != -1) { + type_sizes[num_type_sizes] = sizes[type_map[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC]]; + num_type_sizes++; } break; case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: diff --git a/src/gallium/drivers/zink/zink_program.h b/src/gallium/drivers/zink/zink_program.h index fc97ba40258..c95a51149a4 100644 --- a/src/gallium/drivers/zink/zink_program.h +++ b/src/gallium/drivers/zink/zink_program.h @@ -112,6 +112,7 @@ zink_desc_type_from_vktype(VkDescriptorType type) { switch (type) { case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: return ZINK_DESCRIPTOR_TYPE_UBO; case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: