zink: use dynamic offsets for first ubo

this lets us avoid invalidating the ubo descriptor state, which reduces our
cache overhead

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9348>
This commit is contained in:
Mike Blumenkrantz 2020-10-01 16:51:49 -04:00 committed by Marge Bot
parent d4304ab0d2
commit 6d233e74ad
5 changed files with 50 additions and 10 deletions

View File

@ -677,6 +677,7 @@ zink_binding(gl_shader_stage stage, VkDescriptorType type, int index)
switch (type) { switch (type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
assert(index < PIPE_MAX_CONSTANT_BUFFERS); assert(index < PIPE_MAX_CONSTANT_BUFFERS);
return stage_offset + index; return stage_offset + index;
@ -978,7 +979,9 @@ emit_bo(struct ntv_context *ctx, struct nir_variable *var)
spirv_builder_emit_descriptor_set(&ctx->builder, var_id, ssbo ? ZINK_DESCRIPTOR_TYPE_SSBO : ZINK_DESCRIPTOR_TYPE_UBO); spirv_builder_emit_descriptor_set(&ctx->builder, var_id, ssbo ? ZINK_DESCRIPTOR_TYPE_SSBO : ZINK_DESCRIPTOR_TYPE_UBO);
int binding = zink_binding(ctx->stage, int binding = zink_binding(ctx->stage,
ssbo ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, ssbo ? VK_DESCRIPTOR_TYPE_STORAGE_BUFFER :
/* only make the first ubo dynamic to stay within driver limits */
(ctx->num_ubos == 1) ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
var->data.binding + i); var->data.binding + i);
spirv_builder_emit_binding(&ctx->builder, var_id, binding); spirv_builder_emit_binding(&ctx->builder, var_id, binding);
} }

View File

@ -639,13 +639,13 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
(also it's just easier) (also it's just easier)
*/ */
for (unsigned i = 0; i < (ubo_array ? glsl_get_aoa_size(var->type) : 1); i++) { for (unsigned i = 0; i < (ubo_array ? glsl_get_aoa_size(var->type) : 1); i++) {
VkDescriptorType vktype = !ubo_index ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
int binding = zink_binding(nir->info.stage, int binding = zink_binding(nir->info.stage,
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, vktype,
cur_ubo++); cur_ubo++);
ret->bindings[ztype][ret->num_bindings[ztype]].index = ubo_index++; ret->bindings[ztype][ret->num_bindings[ztype]].index = ubo_index++;
ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding; ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
ret->bindings[ztype][ret->num_bindings[ztype]].size = 1; ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index); ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
ret->num_bindings[ztype]++; ret->num_bindings[ztype]++;

View File

@ -281,6 +281,13 @@ add_transition(struct zink_resource *res, VkImageLayout layout, VkAccessFlags fl
t->stage |= pipeline; t->stage |= pipeline;
} }
static int
cmp_dynamic_offset_binding(const void *a, const void *b)
{
const uint32_t *binding_a = a, *binding_b = b;
return *binding_a - *binding_b;
}
struct zink_descriptor_resource { struct zink_descriptor_resource {
struct zink_resource *res; struct zink_resource *res;
bool write; bool write;
@ -318,6 +325,12 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
unsigned num_image_info[ZINK_DESCRIPTOR_TYPES] = {0}; unsigned num_image_info[ZINK_DESCRIPTOR_TYPES] = {0};
unsigned num_surface_refs = 0; unsigned num_surface_refs = 0;
struct zink_shader **stages; struct zink_shader **stages;
struct {
uint32_t binding;
uint32_t offset;
} dynamic_buffers[PIPE_MAX_CONSTANT_BUFFERS];
uint32_t dynamic_offsets[PIPE_MAX_CONSTANT_BUFFERS];
unsigned dynamic_offset_idx = 0;
unsigned num_stages = is_compute ? 1 : ZINK_SHADER_COUNT; unsigned num_stages = is_compute ? 1 : ZINK_SHADER_COUNT;
if (is_compute) if (is_compute)
@ -341,7 +354,8 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
for (int j = 0; j < shader->num_bindings[h]; j++) { for (int j = 0; j < shader->num_bindings[h]; j++) {
int index = shader->bindings[h][j].index; int index = shader->bindings[h][j].index;
if (shader->bindings[h][j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) { if (shader->bindings[h][j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ||
shader->bindings[h][j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
assert(ctx->ubos[stage][index].buffer_size <= screen->info.props.limits.maxUniformBufferRange); assert(ctx->ubos[stage][index].buffer_size <= screen->info.props.limits.maxUniformBufferRange);
struct zink_resource *res = zink_resource(ctx->ubos[stage][index].buffer); struct zink_resource *res = zink_resource(ctx->ubos[stage][index].buffer);
assert(num_wds[h] < num_bindings); assert(num_wds[h] < num_bindings);
@ -353,7 +367,13 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
(screen->info.rb2_feats.nullDescriptor ? (screen->info.rb2_feats.nullDescriptor ?
VK_NULL_HANDLE : VK_NULL_HANDLE :
zink_resource(ctx->dummy_vertex_buffer)->buffer); zink_resource(ctx->dummy_vertex_buffer)->buffer);
buffer_infos[h][num_buffer_info[h]].offset = res ? ctx->ubos[stage][index].buffer_offset : 0; if (shader->bindings[h][j].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
buffer_infos[h][num_buffer_info[h]].offset = 0;
/* we're storing this to qsort later */
dynamic_buffers[dynamic_offset_idx].binding = shader->bindings[h][j].binding;
dynamic_buffers[dynamic_offset_idx++].offset = res ? ctx->ubos[stage][index].buffer_offset : 0;
} else
buffer_infos[h][num_buffer_info[h]].offset = res ? ctx->ubos[stage][index].buffer_offset : 0;
buffer_infos[h][num_buffer_info[h]].range = res ? ctx->ubos[stage][index].buffer_size : VK_WHOLE_SIZE; buffer_infos[h][num_buffer_info[h]].range = res ? ctx->ubos[stage][index].buffer_size : VK_WHOLE_SIZE;
desc_hash[h] = XXH32(&buffer_infos[h][num_buffer_info[h]], sizeof(VkDescriptorBufferInfo), desc_hash[h]); desc_hash[h] = XXH32(&buffer_infos[h][num_buffer_info[h]], sizeof(VkDescriptorBufferInfo), desc_hash[h]);
if (res) if (res)
@ -506,6 +526,18 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
} }
_mesa_set_destroy(ht, NULL); _mesa_set_destroy(ht, NULL);
/* Values are taken from pDynamicOffsets in an order such that all entries for set N come before set N+1;
* within a set, entries are ordered by the binding numbers in the descriptor set layouts
* - vkCmdBindDescriptorSets spec
*
* because of this, we have to sort all the dynamic offsets by their associated binding to ensure they
* match what the driver expects
*/
if (dynamic_offset_idx > 1)
qsort(dynamic_buffers, dynamic_offset_idx, sizeof(uint32_t) * 2, cmp_dynamic_offset_binding);
for (int i = 0; i < dynamic_offset_idx; i++)
dynamic_offsets[i] = dynamic_buffers[i].offset;
struct zink_batch *batch = NULL; struct zink_batch *batch = NULL;
bool cache_hit[ZINK_DESCRIPTOR_TYPES]; bool cache_hit[ZINK_DESCRIPTOR_TYPES];
struct zink_descriptor_set *zds[ZINK_DESCRIPTOR_TYPES]; struct zink_descriptor_set *zds[ZINK_DESCRIPTOR_TYPES];
@ -543,10 +575,10 @@ update_descriptors(struct zink_context *ctx, struct zink_screen *screen, bool is
if (is_compute) if (is_compute)
vkCmdBindDescriptorSets(batch->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE, vkCmdBindDescriptorSets(batch->cmdbuf, VK_PIPELINE_BIND_POINT_COMPUTE,
ctx->curr_compute->layout, h, 1, &zds[h]->desc_set, 0, NULL); ctx->curr_compute->layout, h, 1, &zds[h]->desc_set, h == ZINK_DESCRIPTOR_TYPE_UBO ? dynamic_offset_idx : 0, dynamic_offsets);
else else
vkCmdBindDescriptorSets(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, vkCmdBindDescriptorSets(batch->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
ctx->curr_program->layout, h, 1, &zds[h]->desc_set, 0, NULL); ctx->curr_program->layout, h, 1, &zds[h]->desc_set, h == ZINK_DESCRIPTOR_TYPE_UBO ? dynamic_offset_idx : 0, dynamic_offsets);
for (int i = 0; i < num_stages; i++) { for (int i = 0; i < num_stages; i++) {
struct zink_shader *shader = stages[i]; struct zink_shader *shader = stages[i];

View File

@ -212,8 +212,12 @@ create_desc_set_layout(VkDevice dev,
switch (i) { switch (i) {
case ZINK_DESCRIPTOR_TYPE_UBO: case ZINK_DESCRIPTOR_TYPE_UBO:
if (type_map[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] != -1) { if (type_map[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] != -1) {
num_type_sizes = 1; type_sizes[num_type_sizes] = sizes[type_map[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER]];
type_sizes[0] = sizes[type_map[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER]]; num_type_sizes++;
}
if (type_map[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] != -1) {
type_sizes[num_type_sizes] = sizes[type_map[VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC]];
num_type_sizes++;
} }
break; break;
case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW: case ZINK_DESCRIPTOR_TYPE_SAMPLER_VIEW:

View File

@ -112,6 +112,7 @@ zink_desc_type_from_vktype(VkDescriptorType type)
{ {
switch (type) { switch (type) {
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
return ZINK_DESCRIPTOR_TYPE_UBO; return ZINK_DESCRIPTOR_TYPE_UBO;
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: