tu: Add an extra storage descriptor for isam
Based on a workaround the blob does. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15288>
This commit is contained in:
parent
1ec3d39407
commit
f9d9c0172a
|
@ -1864,11 +1864,16 @@ tu_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
|
|||
dst[0] = va;
|
||||
dst[1] = va >> 32;
|
||||
} else {
|
||||
/* Note: A6XX_TEX_CONST_5_DEPTH is always 0 */
|
||||
uint64_t va = dst[4] | ((uint64_t)dst[5] << 32);
|
||||
va += offset;
|
||||
dst[4] = va;
|
||||
dst[5] = va >> 32;
|
||||
uint32_t *dst_desc = dst;
|
||||
for (unsigned i = 0;
|
||||
i < binding->size / (4 * A6XX_TEX_CONST_DWORDS);
|
||||
i++, dst_desc += A6XX_TEX_CONST_DWORDS) {
|
||||
/* Note: A6XX_TEX_CONST_5_DEPTH is always 0 */
|
||||
uint64_t va = dst_desc[4] | ((uint64_t)dst_desc[5] << 32);
|
||||
va += offset;
|
||||
dst_desc[4] = va;
|
||||
dst_desc[5] = va >> 32;
|
||||
}
|
||||
}
|
||||
|
||||
dst += binding->size / 4;
|
||||
|
|
|
@ -54,7 +54,7 @@ pool_base(struct tu_descriptor_pool *pool)
|
|||
}
|
||||
|
||||
static uint32_t
|
||||
descriptor_size(VkDescriptorType type)
|
||||
descriptor_size(struct tu_device *dev, VkDescriptorType type)
|
||||
{
|
||||
switch (type) {
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||
|
@ -69,6 +69,16 @@ descriptor_size(VkDescriptorType type)
|
|||
* and samplers are actually two descriptors, so they have size 2.
|
||||
*/
|
||||
return A6XX_TEX_CONST_DWORDS * 4 * 2;
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
|
||||
/* When we support 16-bit storage, we need an extra descriptor setup as
|
||||
* a 32-bit array for isam to work.
|
||||
*/
|
||||
if (dev->physical_device->info->a6xx.storage_16bit) {
|
||||
return A6XX_TEX_CONST_DWORDS * 4 * 2;
|
||||
} else {
|
||||
return A6XX_TEX_CONST_DWORDS * 4;
|
||||
}
|
||||
default:
|
||||
return A6XX_TEX_CONST_DWORDS * 4;
|
||||
}
|
||||
|
@ -82,12 +92,12 @@ is_dynamic(VkDescriptorType type)
|
|||
}
|
||||
|
||||
static uint32_t
|
||||
mutable_descriptor_size(const VkMutableDescriptorTypeListVALVE *list)
|
||||
mutable_descriptor_size(struct tu_device *dev, const VkMutableDescriptorTypeListVALVE *list)
|
||||
{
|
||||
uint32_t max_size = 0;
|
||||
|
||||
for (uint32_t i = 0; i < list->descriptorTypeCount; i++) {
|
||||
uint32_t size = descriptor_size(list->pDescriptorTypes[i]);
|
||||
uint32_t size = descriptor_size(dev, list->pDescriptorTypes[i]);
|
||||
max_size = MAX2(max_size, size);
|
||||
}
|
||||
|
||||
|
@ -188,9 +198,9 @@ tu_CreateDescriptorSetLayout(
|
|||
* largest descriptor type that the binding can mutate to.
|
||||
*/
|
||||
set_layout->binding[b].size =
|
||||
mutable_descriptor_size(&mutable_info->pMutableDescriptorTypeLists[j]);
|
||||
mutable_descriptor_size(device, &mutable_info->pMutableDescriptorTypeLists[j]);
|
||||
} else {
|
||||
set_layout->binding[b].size = descriptor_size(binding->descriptorType);
|
||||
set_layout->binding[b].size = descriptor_size(device, binding->descriptorType);
|
||||
}
|
||||
|
||||
if (variable_flags && binding->binding < variable_flags->bindingCount &&
|
||||
|
@ -281,10 +291,12 @@ tu_descriptor_set_layout_destroy(struct tu_device *device,
|
|||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
tu_GetDescriptorSetLayoutSupport(
|
||||
VkDevice device,
|
||||
VkDevice _device,
|
||||
const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
|
||||
VkDescriptorSetLayoutSupport *pSupport)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_device, device, _device);
|
||||
|
||||
VkDescriptorSetLayoutBinding *bindings = NULL;
|
||||
VkResult result = vk_create_sorted_bindings(
|
||||
pCreateInfo->pBindings, pCreateInfo->bindingCount, &bindings);
|
||||
|
@ -334,9 +346,9 @@ tu_GetDescriptorSetLayoutSupport(
|
|||
}
|
||||
|
||||
descriptor_sz =
|
||||
mutable_descriptor_size(&mutable_info->pMutableDescriptorTypeLists[i]);
|
||||
mutable_descriptor_size(device, &mutable_info->pMutableDescriptorTypeLists[i]);
|
||||
} else {
|
||||
descriptor_sz = descriptor_size(binding->descriptorType);
|
||||
descriptor_sz = descriptor_size(device, binding->descriptorType);
|
||||
}
|
||||
uint64_t descriptor_alignment = 8;
|
||||
|
||||
|
@ -613,14 +625,11 @@ tu_CreateDescriptorPool(VkDevice _device,
|
|||
if (mutable_info && i < mutable_info->mutableDescriptorTypeListCount &&
|
||||
mutable_info->pMutableDescriptorTypeLists[i].descriptorTypeCount > 0) {
|
||||
bo_size +=
|
||||
mutable_descriptor_size(&mutable_info->pMutableDescriptorTypeLists[i]) *
|
||||
mutable_descriptor_size(device, &mutable_info->pMutableDescriptorTypeLists[i]) *
|
||||
pCreateInfo->pPoolSizes[i].descriptorCount;
|
||||
} else {
|
||||
/* Allocate the maximum size possible.
|
||||
* Since we don't support VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER for
|
||||
* mutable descriptors, we can set the default size of descriptor types.
|
||||
*/
|
||||
bo_size += A6XX_TEX_CONST_DWORDS * 4 *
|
||||
/* Allocate the maximum size possible. */
|
||||
bo_size += 2 * A6XX_TEX_CONST_DWORDS * 4 *
|
||||
pCreateInfo->pPoolSizes[i].descriptorCount;
|
||||
}
|
||||
continue;
|
||||
|
@ -628,7 +637,7 @@ tu_CreateDescriptorPool(VkDevice _device,
|
|||
break;
|
||||
}
|
||||
|
||||
bo_size += descriptor_size(pCreateInfo->pPoolSizes[i].type) *
|
||||
bo_size += descriptor_size(device, pCreateInfo->pPoolSizes[i].type) *
|
||||
pCreateInfo->pPoolSizes[i].descriptorCount;
|
||||
}
|
||||
|
||||
|
@ -842,8 +851,14 @@ write_buffer_descriptor(const struct tu_device *device,
|
|||
uint32_t *dst,
|
||||
const VkDescriptorBufferInfo *buffer_info)
|
||||
{
|
||||
bool storage_16bit = device->physical_device->info->a6xx.storage_16bit;
|
||||
/* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit
|
||||
* access, but we need to keep a 32-bit descriptor for readonly access via
|
||||
* isam.
|
||||
*/
|
||||
unsigned descriptors = storage_16bit ? 2 : 1;
|
||||
if (buffer_info->buffer == VK_NULL_HANDLE) {
|
||||
memset(dst, 0, A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
|
||||
memset(dst, 0, descriptors * A6XX_TEX_CONST_DWORDS * sizeof(uint32_t));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -853,21 +868,23 @@ write_buffer_descriptor(const struct tu_device *device,
|
|||
uint64_t va = buffer->iova + buffer_info->offset;
|
||||
uint32_t range = get_range(buffer, buffer_info->offset, buffer_info->range);
|
||||
|
||||
/* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit access */
|
||||
if (device->physical_device->info->a6xx.storage_16bit) {
|
||||
dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_16_UINT);
|
||||
dst[1] = DIV_ROUND_UP(range, 2);
|
||||
} else {
|
||||
dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_32_UINT);
|
||||
dst[1] = DIV_ROUND_UP(range, 4);
|
||||
for (unsigned i = 0; i < descriptors; i++) {
|
||||
if (storage_16bit && i == 0) {
|
||||
dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_16_UINT);
|
||||
dst[1] = DIV_ROUND_UP(range, 2);
|
||||
} else {
|
||||
dst[0] = A6XX_TEX_CONST_0_TILE_MODE(TILE6_LINEAR) | A6XX_TEX_CONST_0_FMT(FMT6_32_UINT);
|
||||
dst[1] = DIV_ROUND_UP(range, 4);
|
||||
}
|
||||
dst[2] =
|
||||
A6XX_TEX_CONST_2_BUFFER | A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER);
|
||||
dst[3] = 0;
|
||||
dst[4] = A6XX_TEX_CONST_4_BASE_LO(va);
|
||||
dst[5] = A6XX_TEX_CONST_5_BASE_HI(va >> 32);
|
||||
for (int j = 6; j < A6XX_TEX_CONST_DWORDS; j++)
|
||||
dst[j] = 0;
|
||||
dst += A6XX_TEX_CONST_DWORDS;
|
||||
}
|
||||
dst[2] =
|
||||
A6XX_TEX_CONST_2_BUFFER | A6XX_TEX_CONST_2_TYPE(A6XX_TEX_BUFFER);
|
||||
dst[3] = 0;
|
||||
dst[4] = A6XX_TEX_CONST_4_BASE_LO(va);
|
||||
dst[5] = A6XX_TEX_CONST_5_BASE_HI(va >> 32);
|
||||
for (int i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
|
||||
dst[i] = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -178,17 +178,19 @@ tu6_emit_load_state(struct tu_pipeline *pipeline, bool compute)
|
|||
FALLTHROUGH;
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: {
|
||||
unsigned mul = binding->size / (A6XX_TEX_CONST_DWORDS * 4);
|
||||
/* IBO-backed resources only need one packet for all graphics stages */
|
||||
if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||
emit_load_state(&cs, CP_LOAD_STATE6, ST6_SHADER, SB6_IBO,
|
||||
base, offset, count);
|
||||
base, offset, count * mul);
|
||||
}
|
||||
if (stages & VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||
emit_load_state(&cs, CP_LOAD_STATE6_FRAG, ST6_IBO, SB6_CS_SHADER,
|
||||
base, offset, count);
|
||||
base, offset, count * mul);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||
case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
|
||||
/* nothing - input attachment doesn't use bindless */
|
||||
|
|
|
@ -119,8 +119,10 @@ typedef uint32_t xcb_window_t;
|
|||
#define MAX_PUSH_DESCRIPTORS 32
|
||||
#define MAX_DYNAMIC_UNIFORM_BUFFERS 16
|
||||
#define MAX_DYNAMIC_STORAGE_BUFFERS 8
|
||||
#define MAX_DYNAMIC_BUFFERS \
|
||||
(MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)
|
||||
#define MAX_DYNAMIC_BUFFERS_SIZE \
|
||||
(MAX_DYNAMIC_UNIFORM_BUFFERS + 2 * MAX_DYNAMIC_STORAGE_BUFFERS) * \
|
||||
A6XX_TEX_CONST_DWORDS
|
||||
|
||||
#define TU_MAX_DRM_DEVICES 8
|
||||
#define MAX_VIEWS 16
|
||||
#define MAX_BIND_POINTS 2 /* compute + graphics */
|
||||
|
@ -807,7 +809,7 @@ struct tu_descriptor_state
|
|||
{
|
||||
struct tu_descriptor_set *sets[MAX_SETS];
|
||||
struct tu_descriptor_set push_set;
|
||||
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS];
|
||||
uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS_SIZE];
|
||||
};
|
||||
|
||||
enum tu_cmd_dirty_bits
|
||||
|
|
|
@ -257,7 +257,8 @@ lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin)
|
|||
}
|
||||
|
||||
static void
|
||||
lower_ssbo_ubo_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
|
||||
lower_ssbo_ubo_intrinsic(struct tu_device *dev,
|
||||
nir_builder *b, nir_intrinsic_instr *intrin)
|
||||
{
|
||||
const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];
|
||||
|
||||
|
@ -278,6 +279,16 @@ lower_ssbo_ubo_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)
|
|||
nir_ssa_scalar scalar_idx = nir_ssa_scalar_resolved(intrin->src[buffer_src].ssa, 0);
|
||||
nir_ssa_def *descriptor_idx = nir_channel(b, intrin->src[buffer_src].ssa, 1);
|
||||
|
||||
/* For isam, we need to use the appropriate descriptor if 16-bit storage is
|
||||
* enabled. Descriptor 0 is the 16-bit one, descriptor 1 is the 32-bit one.
|
||||
*/
|
||||
if (dev->physical_device->info->a6xx.storage_16bit &&
|
||||
intrin->intrinsic == nir_intrinsic_load_ssbo &&
|
||||
(nir_intrinsic_access(intrin) & ACCESS_CAN_REORDER) &&
|
||||
intrin->dest.ssa.bit_size > 16) {
|
||||
descriptor_idx = nir_iadd(b, descriptor_idx, nir_imm_int(b, 1));
|
||||
}
|
||||
|
||||
nir_ssa_def *results[MAX_SETS + 1] = { NULL };
|
||||
|
||||
if (nir_ssa_scalar_is_const(scalar_idx)) {
|
||||
|
@ -409,6 +420,7 @@ lower_image_deref(nir_builder *b,
|
|||
|
||||
static bool
|
||||
lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
||||
struct tu_device *dev,
|
||||
struct tu_shader *shader,
|
||||
const struct tu_pipeline_layout *layout)
|
||||
{
|
||||
|
@ -446,7 +458,7 @@ lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,
|
|||
case nir_intrinsic_ssbo_atomic_fmax:
|
||||
case nir_intrinsic_ssbo_atomic_fcomp_swap:
|
||||
case nir_intrinsic_get_ssbo_size:
|
||||
lower_ssbo_ubo_intrinsic(b, instr);
|
||||
lower_ssbo_ubo_intrinsic(dev, b, instr);
|
||||
return true;
|
||||
|
||||
case nir_intrinsic_image_deref_load:
|
||||
|
@ -560,6 +572,7 @@ lower_tex(nir_builder *b, nir_tex_instr *tex,
|
|||
}
|
||||
|
||||
struct lower_instr_params {
|
||||
struct tu_device *dev;
|
||||
struct tu_shader *shader;
|
||||
const struct tu_pipeline_layout *layout;
|
||||
};
|
||||
|
@ -573,7 +586,7 @@ lower_instr(nir_builder *b, nir_instr *instr, void *cb_data)
|
|||
case nir_instr_type_tex:
|
||||
return lower_tex(b, nir_instr_as_tex(instr), params->shader, params->layout);
|
||||
case nir_instr_type_intrinsic:
|
||||
return lower_intrinsic(b, nir_instr_as_intrinsic(instr), params->shader, params->layout);
|
||||
return lower_intrinsic(b, nir_instr_as_intrinsic(instr), params->dev, params->shader, params->layout);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
@ -626,12 +639,14 @@ gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)
|
|||
}
|
||||
|
||||
static bool
|
||||
tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,
|
||||
tu_lower_io(nir_shader *shader, struct tu_device *dev,
|
||||
struct tu_shader *tu_shader,
|
||||
const struct tu_pipeline_layout *layout)
|
||||
{
|
||||
gather_push_constants(shader, tu_shader);
|
||||
|
||||
struct lower_instr_params params = {
|
||||
.dev = dev,
|
||||
.shader = tu_shader,
|
||||
.layout = layout,
|
||||
};
|
||||
|
@ -808,7 +823,7 @@ tu_shader_create(struct tu_device *dev,
|
|||
nir->info.stage == MESA_SHADER_GEOMETRY)
|
||||
tu_gather_xfb_info(nir, &so_info);
|
||||
|
||||
NIR_PASS_V(nir, tu_lower_io, shader, layout);
|
||||
NIR_PASS_V(nir, tu_lower_io, dev, shader, layout);
|
||||
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
|
||||
|
|
Loading…
Reference in New Issue