From f9d9c0172a7ae534ad05a1583fb1ab4d0f8775b9 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 23 Feb 2022 16:52:38 +0100 Subject: [PATCH] tu: Add an extra storage descriptor for isam Based on a workaround the blob does. Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 15 +++-- src/freedreno/vulkan/tu_descriptor_set.c | 77 +++++++++++++++--------- src/freedreno/vulkan/tu_pipeline.c | 8 ++- src/freedreno/vulkan/tu_private.h | 8 ++- src/freedreno/vulkan/tu_shader.c | 25 ++++++-- 5 files changed, 87 insertions(+), 46 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index 2608c529133..a91d4ea7645 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -1864,11 +1864,16 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, dst[0] = va; dst[1] = va >> 32; } else { - /* Note: A6XX_TEX_CONST_5_DEPTH is always 0 */ - uint64_t va = dst[4] | ((uint64_t)dst[5] << 32); - va += offset; - dst[4] = va; - dst[5] = va >> 32; + uint32_t *dst_desc = dst; + for (unsigned i = 0; + i < binding->size / (4 * A6XX_TEX_CONST_DWORDS); + i++, dst_desc += A6XX_TEX_CONST_DWORDS) { + /* Note: A6XX_TEX_CONST_5_DEPTH is always 0 */ + uint64_t va = dst_desc[4] | ((uint64_t)dst_desc[5] << 32); + va += offset; + dst_desc[4] = va; + dst_desc[5] = va >> 32; + } } dst += binding->size / 4; diff --git a/src/freedreno/vulkan/tu_descriptor_set.c b/src/freedreno/vulkan/tu_descriptor_set.c index a9e56ea797e..e7150ecafd3 100644 --- a/src/freedreno/vulkan/tu_descriptor_set.c +++ b/src/freedreno/vulkan/tu_descriptor_set.c @@ -54,7 +54,7 @@ pool_base(struct tu_descriptor_pool *pool) } static uint32_t -descriptor_size(VkDescriptorType type) +descriptor_size(struct tu_device *dev, VkDescriptorType type) { switch (type) { case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: @@ -69,6 +69,16 @@ descriptor_size(VkDescriptorType type) * and samplers are actually two descriptors, so they have size 2. */ return A6XX_TEX_CONST_DWORDS * 4 * 2; + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: + /* When we support 16-bit storage, we need an extra descriptor setup as + * a 32-bit array for isam to work. + */ + if (dev->physical_device->info->a6xx.storage_16bit) { + return A6XX_TEX_CONST_DWORDS * 4 * 2; + } else { + return A6XX_TEX_CONST_DWORDS * 4; + } default: return A6XX_TEX_CONST_DWORDS * 4; } @@ -82,12 +92,12 @@ is_dynamic(VkDescriptorType type) } static uint32_t -mutable_descriptor_size(const VkMutableDescriptorTypeListVALVE *list) +mutable_descriptor_size(struct tu_device *dev, const VkMutableDescriptorTypeListVALVE *list) { uint32_t max_size = 0; for (uint32_t i = 0; i < list->descriptorTypeCount; i++) { - uint32_t size = descriptor_size(list->pDescriptorTypes[i]); + uint32_t size = descriptor_size(dev, list->pDescriptorTypes[i]); max_size = MAX2(max_size, size); } @@ -188,9 +198,9 @@ tu_CreateDescriptorSetLayout( * largest descriptor type that the binding can mutate to. */ set_layout->binding[b].size = - mutable_descriptor_size(&mutable_info->pMutableDescriptorTypeLists[j]); + mutable_descriptor_size(device, &mutable_info->pMutableDescriptorTypeLists[j]); } else { - set_layout->binding[b].size = descriptor_size(binding->descriptorType); + set_layout->binding[b].size = descriptor_size(device, binding->descriptorType); } if (variable_flags && binding->binding < variable_flags->bindingCount && @@ -281,10 +291,12 @@ tu_descriptor_set_layout_destroy(struct tu_device *device, VKAPI_ATTR void VKAPI_CALL tu_GetDescriptorSetLayoutSupport( - VkDevice device, + VkDevice _device, const VkDescriptorSetLayoutCreateInfo *pCreateInfo, VkDescriptorSetLayoutSupport *pSupport) { + TU_FROM_HANDLE(tu_device, device, _device); + VkDescriptorSetLayoutBinding *bindings = NULL; VkResult result = vk_create_sorted_bindings( pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings); @@ -334,9 +346,9 @@ tu_GetDescriptorSetLayoutSupport( } descriptor_sz = - mutable_descriptor_size(&mutable_info->pMutableDescriptorTypeLists[i]); + mutable_descriptor_size(device, &mutable_info->pMutableDescriptorTypeLists[i]); } else { - descriptor_sz = descriptor_size(binding->descriptorType); + descriptor_sz = descriptor_size(device, binding->descriptorType); } uint64_t descriptor_alignment = 8; @@ -613,14 +625,11 @@ tu_CreateDescriptorPool(VkDevice _device, if (mutable_info && i < mutable_info->mutableDescriptorTypeListCount && mutable_info->pMutableDescriptorTypeLists[i].descriptorTypeCount > 0) { bo_size += - mutable_descriptor_size(&mutable_info->pMutableDescriptorTypeLists[i]) * + mutable_descriptor_size(device, &mutable_info->pMutableDescriptorTypeLists[i]) * pCreateInfo->pPoolSizes[i].descriptorCount; } else { - /* Allocate the maximum size possible. - * Since we don't support VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER for - * mutable descriptors, we can set the default size of descriptor types. - */ - bo_size += A6XX_TEX_CONST_DWORDS * 4 * + /* Allocate the maximum size possible. */ + bo_size += 2 * A6XX_TEX_CONST_DWORDS * 4 * pCreateInfo->pPoolSizes[i].descriptorCount; } continue; @@ -628,7 +637,7 @@ tu_CreateDescriptorPool(VkDevice _device, break; } - bo_size += descriptor_size(pCreateInfo->pPoolSizes[i].type) * + bo_size += descriptor_size(device, pCreateInfo->pPoolSizes[i].type) * pCreateInfo->pPoolSizes[i].descriptorCount; } @@ -842,8 +851,14 @@ write_buffer_descriptor(const struct tu_device *device, uint32_t *dst, const VkDescriptorBufferInfo *buffer_info) { + bool storage_16bit = device->physical_device->info->a6xx.storage_16bit; + /* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit + * access, but we need to keep a 32-bit descriptor for readonly access via + * isam. + */ + unsigned descriptors = storage_16bit ? 2 : 1; if (buffer_info->buffer == VK_NULL_HANDLE) { - memset(dst, 0, A6XX_TEX_CONST_DWORDS * sizeof(uint32_t)); + memset(dst, 0, descriptors * A6XX_TEX_CONST_DWORDS * sizeof(uint32_t)); return; } @@ -853,21 +868,23 @@ write_buffer_descriptor(const struct tu_device *device, uint64_t va = buffer->iova + buffer_info->offset; uint32_t range = get_range(buffer, buffer_info->offset, buffer_info->range); - /* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit access */ - if (device->physical_device->info->a6xx.storage_16bit) { - dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_16_UINT); - dst[1] = DIV_ROUND_UP(range, 2); - } else { - dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_32_UINT); - dst[1] = DIV_ROUND_UP(range, 4); + for (unsigned i = 0; i < descriptors; i++) { + if (storage_16bit && i == 0) { + dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_16_UINT); + dst[1] = DIV_ROUND_UP(range, 2); + } else { + dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_32_UINT); + dst[1] = DIV_ROUND_UP(range, 4); + } + dst[2] = + A6XX_TEX_CONST_2_BUFFER | A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER); + dst[3] = 0; + dst[4] = A6XX_TEX_CONST_4_BASE_LO(va); + dst[5] = A6XX_TEX_CONST_5_BASE_HI(va >> 32); + for (int j = 6; j < A6XX_TEX_CONST_DWORDS; j++) + dst[j] = 0; + dst += A6XX_TEX_CONST_DWORDS; } - dst[2] = - A6XX_TEX_CONST_2_BUFFER | A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER); - dst[3] = 0; - dst[4] = A6XX_TEX_CONST_4_BASE_LO(va); - dst[5] = A6XX_TEX_CONST_5_BASE_HI(va >> 32); - for (int i = 6; i < A6XX_TEX_CONST_DWORDS; i++) - dst[i] = 0; } static void diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 5d58d0b1d42..4e9131523e8 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -178,17 +178,19 @@ tu6_emit_load_state(struct tu_pipeline *pipeline, bool compute) FALLTHROUGH; case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: - case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: { + unsigned mul = binding->size / (A6XX_TEX_CONST_DWORDS * 4); /* IBO-backed resources only need one packet for all graphics stages */ if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT) { emit_load_state(&cs, CP_LOAD_STATE6, ST6_SHADER, SB6_IBO, - base, offset, count); + base, offset, count * mul); } if (stages & VK_SHADER_STAGE_COMPUTE_BIT) { emit_load_state(&cs, CP_LOAD_STATE6_FRAG, ST6_IBO, SB6_CS_SHADER, - base, offset, count); + base, offset, count * mul); } break; + } case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE: /* nothing - input attachment doesn't use bindless */ diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 0d342343aa4..7d1441475b9 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -119,8 +119,10 @@ typedef uint32_t xcb_window_t; #define MAX_PUSH_DESCRIPTORS 32 #define MAX_DYNAMIC_UNIFORM_BUFFERS 16 #define MAX_DYNAMIC_STORAGE_BUFFERS 8 -#define MAX_DYNAMIC_BUFFERS \ - (MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS) +#define MAX_DYNAMIC_BUFFERS_SIZE \ + (MAX_DYNAMIC_UNIFORM_BUFFERS + 2 * MAX_DYNAMIC_STORAGE_BUFFERS) * \ + A6XX_TEX_CONST_DWORDS + #define TU_MAX_DRM_DEVICES 8 #define MAX_VIEWS 16 #define MAX_BIND_POINTS 2 /* compute + graphics */ @@ -807,7 +809,7 @@ struct tu_descriptor_state { struct tu_descriptor_set *sets[MAX_SETS]; struct tu_descriptor_set push_set; - uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS]; + uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS_SIZE]; }; enum tu_cmd_dirty_bits diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index abe8a71a9c3..ef6689c868c 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -257,7 +257,8 @@ lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin) } static void -lower_ssbo_ubo_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin) +lower_ssbo_ubo_intrinsic(struct tu_device *dev, + nir_builder *b, nir_intrinsic_instr *intrin) { const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic]; @@ -278,6 +279,16 @@ lower_ssbo_ubo_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin) nir_ssa_scalar scalar_idx = nir_ssa_scalar_resolved(intrin->src[buffer_src].ssa, 0); nir_ssa_def *descriptor_idx = nir_channel(b, intrin->src[buffer_src].ssa, 1); + /* For isam, we need to use the appropriate descriptor if 16-bit storage is + * enabled. Descriptor 0 is the 16-bit one, descriptor 1 is the 32-bit one. + */ + if (dev->physical_device->info->a6xx.storage_16bit && + intrin->intrinsic == nir_intrinsic_load_ssbo && + (nir_intrinsic_access(intrin) & ACCESS_CAN_REORDER) && + intrin->dest.ssa.bit_size > 16) { + descriptor_idx = nir_iadd(b, descriptor_idx, nir_imm_int(b, 1)); + } + nir_ssa_def *results[MAX_SETS + 1] = { NULL }; if (nir_ssa_scalar_is_const(scalar_idx)) { @@ -409,6 +420,7 @@ lower_image_deref(nir_builder *b, static bool lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, + struct tu_device *dev, struct tu_shader *shader, const struct tu_pipeline_layout *layout) { @@ -446,7 +458,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, case nir_intrinsic_ssbo_atomic_fmax: case nir_intrinsic_ssbo_atomic_fcomp_swap: case nir_intrinsic_get_ssbo_size: - lower_ssbo_ubo_intrinsic(b, instr); + lower_ssbo_ubo_intrinsic(dev, b, instr); return true; case nir_intrinsic_image_deref_load: @@ -560,6 +572,7 @@ lower_tex(nir_builder *b, nir_tex_instr *tex, } struct lower_instr_params { + struct tu_device *dev; struct tu_shader *shader; const struct tu_pipeline_layout *layout; }; @@ -573,7 +586,7 @@ lower_instr(nir_builder *b, nir_instr *instr, void *cb_data) case nir_instr_type_tex: return lower_tex(b, nir_instr_as_tex(instr), params->shader, params->layout); case nir_instr_type_intrinsic: - return lower_intrinsic(b, nir_instr_as_intrinsic(instr), params->shader, params->layout); + return lower_intrinsic(b, nir_instr_as_intrinsic(instr), params->dev, params->shader, params->layout); default: return false; } @@ -626,12 +639,14 @@ gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader) } static bool -tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader, +tu_lower_io(nir_shader *shader, struct tu_device *dev, + struct tu_shader *tu_shader, const struct tu_pipeline_layout *layout) { gather_push_constants(shader, tu_shader); struct lower_instr_params params = { + .dev = dev, .shader = tu_shader, .layout = layout, }; @@ -808,7 +823,7 @@ tu_shader_create(struct tu_device *dev, nir->info.stage == MESA_SHADER_GEOMETRY) tu_gather_xfb_info(nir, &so_info); - NIR_PASS_V(nir, tu_lower_io, shader, layout); + NIR_PASS_V(nir, tu_lower_io, dev, shader, layout); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));