tu: Add an extra storage descriptor for isam

Based on a workaround the blob does.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15288>
This commit is contained in:
Connor Abbott 2022-02-23 16:52:38 +01:00 committed by Marge Bot
parent 1ec3d39407
commit f9d9c0172a
5 changed files with 87 additions and 46 deletions

View File

@ -1864,11 +1864,16 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
dst[0] = va;
dst[1] = va >> 32;
} else {
/* Note: A6XX_TEX_CONST_5_DEPTH is always 0 */
uint64_t va = dst[4] | ((uint64_t)dst[5] << 32);
va += offset;
dst[4] = va;
dst[5] = va >> 32;
uint32_t *dst_desc = dst;
for (unsigned i = 0;
i < binding->size / (4 * A6XX_TEX_CONST_DWORDS);
i++, dst_desc += A6XX_TEX_CONST_DWORDS) {
/* Note: A6XX_TEX_CONST_5_DEPTH is always 0 */
uint64_t va = dst_desc[4] | ((uint64_t)dst_desc[5] << 32);
va += offset;
dst_desc[4] = va;
dst_desc[5] = va >> 32;
}
}
dst += binding->size / 4;

View File

@ -54,7 +54,7 @@ pool_base(struct tu_descriptor_pool *pool)
}
static uint32_t
descriptor_size(VkDescriptorType type)
descriptor_size(struct tu_device *dev, VkDescriptorType type)
{
switch (type) {
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
@ -69,6 +69,16 @@ descriptor_size(VkDescriptorType type)
* and samplers are actually two descriptors, so they have size 2.
*/
return A6XX_TEX_CONST_DWORDS * 4 * 2;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
/* When we support 16-bit storage, we need an extra descriptor setup as
* a 32-bit array for isam to work.
*/
if (dev->physical_device->info->a6xx.storage_16bit) {
return A6XX_TEX_CONST_DWORDS * 4 * 2;
} else {
return A6XX_TEX_CONST_DWORDS * 4;
}
default:
return A6XX_TEX_CONST_DWORDS * 4;
}
@ -82,12 +92,12 @@ is_dynamic(VkDescriptorType type)
}
static uint32_t
mutable_descriptor_size(const VkMutableDescriptorTypeListVALVE *list)
mutable_descriptor_size(struct tu_device *dev, const VkMutableDescriptorTypeListVALVE *list)
{
uint32_t max_size = 0;
for (uint32_t i = 0; i < list->descriptorTypeCount; i++) {
uint32_t size = descriptor_size(list->pDescriptorTypes[i]);
uint32_t size = descriptor_size(dev, list->pDescriptorTypes[i]);
max_size = MAX2(max_size, size);
}
@ -188,9 +198,9 @@ tu_CreateDescriptorSetLayout(
* largest descriptor type that the binding can mutate to.
*/
set_layout->binding[b].size =
mutable_descriptor_size(&mutable_info->pMutableDescriptorTypeLists[j]);
mutable_descriptor_size(device, &mutable_info->pMutableDescriptorTypeLists[j]);
} else {
set_layout->binding[b].size = descriptor_size(binding->descriptorType);
set_layout->binding[b].size = descriptor_size(device, binding->descriptorType);
}
if (variable_flags && binding->binding < variable_flags->bindingCount &&
@ -281,10 +291,12 @@ tu_descriptor_set_layout_destroy(struct tu_device *device,
VKAPI_ATTR void VKAPI_CALL
tu_GetDescriptorSetLayoutSupport(
VkDevice device,
VkDevice _device,
const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
VkDescriptorSetLayoutSupport *pSupport)
{
TU_FROM_HANDLE(tu_device, device, _device);
VkDescriptorSetLayoutBinding *bindings = NULL;
VkResult result = vk_create_sorted_bindings(
pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
@ -334,9 +346,9 @@ tu_GetDescriptorSetLayoutSupport(
}
descriptor_sz =
mutable_descriptor_size(&mutable_info->pMutableDescriptorTypeLists[i]);
mutable_descriptor_size(device, &mutable_info->pMutableDescriptorTypeLists[i]);
} else {
descriptor_sz = descriptor_size(binding->descriptorType);
descriptor_sz = descriptor_size(device, binding->descriptorType);
}
uint64_t descriptor_alignment = 8;
@ -613,14 +625,11 @@ tu_CreateDescriptorPool(VkDevice _device,
if (mutable_info && i < mutable_info->mutableDescriptorTypeListCount &&
mutable_info->pMutableDescriptorTypeLists[i].descriptorTypeCount > 0) {
bo_size +=
mutable_descriptor_size(&mutable_info->pMutableDescriptorTypeLists[i]) *
mutable_descriptor_size(device, &mutable_info->pMutableDescriptorTypeLists[i]) *
pCreateInfo->pPoolSizes[i].descriptorCount;
} else {
/* Allocate the maximum size possible.
* Since we don't support VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER for
* mutable descriptors, we can set the default size of descriptor types.
*/
bo_size += A6XX_TEX_CONST_DWORDS * 4 *
/* Allocate the maximum size possible. */
bo_size += 2 * A6XX_TEX_CONST_DWORDS * 4 *
pCreateInfo->pPoolSizes[i].descriptorCount;
}
continue;
@ -628,7 +637,7 @@ tu_CreateDescriptorPool(VkDevice _device,
break;
}
bo_size += descriptor_size(pCreateInfo->pPoolSizes[i].type) *
bo_size += descriptor_size(device, pCreateInfo->pPoolSizes[i].type) *
pCreateInfo->pPoolSizes[i].descriptorCount;
}
@ -842,8 +851,14 @@ write_buffer_descriptor(const struct tu_device *device,
uint32_t *dst,
const VkDescriptorBufferInfo *buffer_info)
{
bool storage_16bit = device->physical_device->info->a6xx.storage_16bit;
/* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit
* access, but we need to keep a 32-bit descriptor for readonly access via
* isam.
*/
unsigned descriptors = storage_16bit ? 2 : 1;
if (buffer_info->buffer == VK_NULL_HANDLE) {
memset(dst, 0, A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
memset(dst, 0, descriptors * A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
return;
}
@ -853,21 +868,23 @@ write_buffer_descriptor(const struct tu_device *device,
uint64_t va = buffer->iova + buffer_info->offset;
uint32_t range = get_range(buffer, buffer_info->offset, buffer_info->range);
/* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit access */
if (device->physical_device->info->a6xx.storage_16bit) {
dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_16_UINT);
dst[1] = DIV_ROUND_UP(range, 2);
} else {
dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_32_UINT);
dst[1] = DIV_ROUND_UP(range, 4);
for (unsigned i = 0; i < descriptors; i++) {
if (storage_16bit && i == 0) {
dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_16_UINT);
dst[1] = DIV_ROUND_UP(range, 2);
} else {
dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_32_UINT);
dst[1] = DIV_ROUND_UP(range, 4);
}
dst[2] =
A6XX_TEX_CONST_2_BUFFER | A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER);
dst[3] = 0;
dst[4] = A6XX_TEX_CONST_4_BASE_LO(va);
dst[5] = A6XX_TEX_CONST_5_BASE_HI(va >> 32);
for (int j = 6; j < A6XX_TEX_CONST_DWORDS; j++)
dst[j] = 0;
dst += A6XX_TEX_CONST_DWORDS;
}
dst[2] =
A6XX_TEX_CONST_2_BUFFER | A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER);
dst[3] = 0;
dst[4] = A6XX_TEX_CONST_4_BASE_LO(va);
dst[5] = A6XX_TEX_CONST_5_BASE_HI(va >> 32);
for (int i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
dst[i] = 0;
}
static void

View File

@ -178,17 +178,19 @@ tu6_emit_load_state(struct tu_pipeline *pipeline, bool compute)
FALLTHROUGH;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: {
unsigned mul = binding->size / (A6XX_TEX_CONST_DWORDS * 4);
/* IBO-backed resources only need one packet for all graphics stages */
if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT) {
emit_load_state(&cs, CP_LOAD_STATE6, ST6_SHADER, SB6_IBO,
base, offset, count);
base, offset, count * mul);
}
if (stages & VK_SHADER_STAGE_COMPUTE_BIT) {
emit_load_state(&cs, CP_LOAD_STATE6_FRAG, ST6_IBO, SB6_CS_SHADER,
base, offset, count);
base, offset, count * mul);
}
break;
}
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
/* nothing - input attachment doesn't use bindless */

View File

@ -119,8 +119,10 @@ typedef uint32_t xcb_window_t;
#define MAX_PUSH_DESCRIPTORS 32
#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
#define MAX_DYNAMIC_STORAGE_BUFFERS 8
#define MAX_DYNAMIC_BUFFERS \
(MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
#define MAX_DYNAMIC_BUFFERS_SIZE \
(MAX_DYNAMIC_UNIFORM_BUFFERS + 2 * MAX_DYNAMIC_STORAGE_BUFFERS) * \
A6XX_TEX_CONST_DWORDS
#define TU_MAX_DRM_DEVICES 8
#define MAX_VIEWS 16
#define MAX_BIND_POINTS 2 /* compute + graphics */
@ -807,7 +809,7 @@ struct tu_descriptor_state
{
struct tu_descriptor_set *sets[MAX_SETS];
struct tu_descriptor_set push_set;
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS];
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS_SIZE];
};
enum tu_cmd_dirty_bits

View File

@ -257,7 +257,8 @@ lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin)
}
static void
lower_ssbo_ubo_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
lower_ssbo_ubo_intrinsic(struct tu_device *dev,
nir_builder *b, nir_intrinsic_instr *intrin)
{
const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
@ -278,6 +279,16 @@ lower_ssbo_ubo_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
nir_ssa_scalar scalar_idx = nir_ssa_scalar_resolved(intrin->src[buffer_src].ssa, 0);
nir_ssa_def *descriptor_idx = nir_channel(b, intrin->src[buffer_src].ssa, 1);
/* For isam, we need to use the appropriate descriptor if 16-bit storage is
* enabled. Descriptor 0 is the 16-bit one, descriptor 1 is the 32-bit one.
*/
if (dev->physical_device->info->a6xx.storage_16bit &&
intrin->intrinsic == nir_intrinsic_load_ssbo &&
(nir_intrinsic_access(intrin) & ACCESS_CAN_REORDER) &&
intrin->dest.ssa.bit_size > 16) {
descriptor_idx = nir_iadd(b, descriptor_idx, nir_imm_int(b, 1));
}
nir_ssa_def *results[MAX_SETS + 1] = { NULL };
if (nir_ssa_scalar_is_const(scalar_idx)) {
@ -409,6 +420,7 @@ lower_image_deref(nir_builder *b,
static bool
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
struct tu_device *dev,
struct tu_shader *shader,
const struct tu_pipeline_layout *layout)
{
@ -446,7 +458,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
case nir_intrinsic_ssbo_atomic_fmax:
case nir_intrinsic_ssbo_atomic_fcomp_swap:
case nir_intrinsic_get_ssbo_size:
lower_ssbo_ubo_intrinsic(b, instr);
lower_ssbo_ubo_intrinsic(dev, b, instr);
return true;
case nir_intrinsic_image_deref_load:
@ -560,6 +572,7 @@ lower_tex(nir_builder *b, nir_tex_instr *tex,
}
struct lower_instr_params {
struct tu_device *dev;
struct tu_shader *shader;
const struct tu_pipeline_layout *layout;
};
@ -573,7 +586,7 @@ lower_instr(nir_builder *b, nir_instr *instr, void *cb_data)
case nir_instr_type_tex:
return lower_tex(b, nir_instr_as_tex(instr), params->shader, params->layout);
case nir_instr_type_intrinsic:
return lower_intrinsic(b, nir_instr_as_intrinsic(instr), params->shader, params->layout);
return lower_intrinsic(b, nir_instr_as_intrinsic(instr), params->dev, params->shader, params->layout);
default:
return false;
}
@ -626,12 +639,14 @@ gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
}
static bool
tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
tu_lower_io(nir_shader *shader, struct tu_device *dev,
struct tu_shader *tu_shader,
const struct tu_pipeline_layout *layout)
{
gather_push_constants(shader, tu_shader);
struct lower_instr_params params = {
.dev = dev,
.shader = tu_shader,
.layout = layout,
};
@ -808,7 +823,7 @@ tu_shader_create(struct tu_device *dev,
nir->info.stage == MESA_SHADER_GEOMETRY)
tu_gather_xfb_info(nir, &so_info);
NIR_PASS_V(nir, tu_lower_io, shader, layout);
NIR_PASS_V(nir, tu_lower_io, dev, shader, layout);
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));