turnip: enable VK_KHR_16bit_storage on A650
A650 can use the same SSBO descriptor for both 32-bit and 16-bit access, which makes it easy to enable this extension. Passes tests that run under: dEQP-VK.spirv_assembly.instruction.*.16bit_storage.* Rebased and modified commit from Jonathan Marek. Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9840>
This commit is contained in:
parent
14acc64c3b
commit
ce1a381e57
|
@ -170,11 +170,21 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
|
|||
case OPC_STL:
|
||||
case OPC_STP:
|
||||
case OPC_STLW:
|
||||
case OPC_STIB:
|
||||
validate_assert(ctx, !(instr->regs[1]->flags & IR3_REG_HALF));
|
||||
validate_reg_size(ctx, instr->regs[2], instr->cat6.type);
|
||||
validate_assert(ctx, !(instr->regs[3]->flags & IR3_REG_HALF));
|
||||
break;
|
||||
case OPC_STIB:
|
||||
if (instr->flags & IR3_INSTR_B) {
|
||||
validate_assert(ctx, !(instr->regs[1]->flags & IR3_REG_HALF));
|
||||
validate_assert(ctx, !(instr->regs[2]->flags & IR3_REG_HALF));
|
||||
validate_reg_size(ctx, instr->regs[3], instr->cat6.type);
|
||||
} else {
|
||||
validate_assert(ctx, !(instr->regs[1]->flags & IR3_REG_HALF));
|
||||
validate_reg_size(ctx, instr->regs[2], instr->cat6.type);
|
||||
validate_assert(ctx, !(instr->regs[3]->flags & IR3_REG_HALF));
|
||||
}
|
||||
break;
|
||||
default:
|
||||
validate_reg_size(ctx, instr->regs[0], instr->cat6.type);
|
||||
validate_assert(ctx, !(instr->regs[1]->flags & IR3_REG_HALF));
|
||||
|
|
|
@ -143,6 +143,7 @@ static const struct test {
|
|||
|
||||
INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"),
|
||||
INSTR_5XX(d7660204_02000a01, "(sy)stib.typed.2d.u32.1 g[1], r0.x, r0.z, r1.x"),
|
||||
INSTR_6XX(c0240402_00674100, "stib.b.untyped.1d.u16.1.imm.base0 r0.z, r0.x, 2"),
|
||||
|
||||
// TODO is this a real instruction? Or float -6.0 ?
|
||||
// INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true),
|
||||
|
|
|
@ -1812,7 +1812,7 @@ void tu_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer,
|
|||
set->mapped_ptr = set_mem.map;
|
||||
set->va = set_mem.iova;
|
||||
|
||||
tu_update_descriptor_sets(tu_descriptor_set_to_handle(set),
|
||||
tu_update_descriptor_sets(cmd->device, tu_descriptor_set_to_handle(set),
|
||||
descriptorWriteCount, pDescriptorWrites, 0, NULL);
|
||||
|
||||
tu_CmdBindDescriptorSets(commandBuffer, pipelineBindPoint, _layout, _set,
|
||||
|
@ -1851,7 +1851,7 @@ void tu_CmdPushDescriptorSetWithTemplateKHR(
|
|||
set->mapped_ptr = set_mem.map;
|
||||
set->va = set_mem.iova;
|
||||
|
||||
tu_update_descriptor_set_with_template(set, descriptorUpdateTemplate, pData);
|
||||
tu_update_descriptor_set_with_template(cmd->device, set, descriptorUpdateTemplate, pData);
|
||||
|
||||
tu_CmdBindDescriptorSets(commandBuffer, templ->bind_point, _layout, _set,
|
||||
1, (VkDescriptorSet[]) { tu_descriptor_set_to_handle(set) },
|
||||
|
|
|
@ -706,17 +706,23 @@ static uint32_t get_range(struct tu_buffer *buf, VkDeviceSize offset,
|
|||
}
|
||||
|
||||
static void
|
||||
write_buffer_descriptor(uint32_t *dst, const VkDescriptorBufferInfo *buffer_info)
|
||||
write_buffer_descriptor(const struct tu_device *device,
|
||||
uint32_t *dst,
|
||||
const VkDescriptorBufferInfo *buffer_info)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_buffer, buffer, buffer_info->buffer);
|
||||
|
||||
assert((buffer_info->offset & 63) == 0); /* minStorageBufferOffsetAlignment */
|
||||
uint64_t va = tu_buffer_iova(buffer) + buffer_info->offset;
|
||||
uint32_t range = get_range(buffer, buffer_info->offset, buffer_info->range);
|
||||
range = ALIGN_POT(range, 4) / 4;
|
||||
dst[0] =
|
||||
A6XX_IBO_0_TILE_MODE(TILE6_LINEAR) | A6XX_IBO_0_FMT(FMT6_32_UINT);
|
||||
dst[1] = range;
|
||||
/* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit access */
|
||||
if (device->physical_device->gpu_id >= 650) {
|
||||
dst[0] = A6XX_IBO_0_TILE_MODE(TILE6_LINEAR) | A6XX_IBO_0_FMT(FMT6_16_UINT);
|
||||
dst[1] = DIV_ROUND_UP(range, 2);
|
||||
} else {
|
||||
dst[0] = A6XX_IBO_0_TILE_MODE(TILE6_LINEAR) | A6XX_IBO_0_FMT(FMT6_32_UINT);
|
||||
dst[1] = DIV_ROUND_UP(range, 4);
|
||||
}
|
||||
dst[2] =
|
||||
A6XX_IBO_2_UNK4 | A6XX_IBO_2_TYPE(A6XX_TEX_1D) | A6XX_IBO_2_UNK31;
|
||||
dst[3] = 0;
|
||||
|
@ -784,7 +790,8 @@ write_sampler_push(uint32_t *dst, const struct tu_sampler *sampler)
|
|||
}
|
||||
|
||||
void
|
||||
tu_update_descriptor_sets(VkDescriptorSet dstSetOverride,
|
||||
tu_update_descriptor_sets(const struct tu_device *device,
|
||||
VkDescriptorSet dstSetOverride,
|
||||
uint32_t descriptorWriteCount,
|
||||
const VkWriteDescriptorSet *pDescriptorWrites,
|
||||
uint32_t descriptorCopyCount,
|
||||
|
@ -823,12 +830,12 @@ tu_update_descriptor_sets(VkDescriptorSet dstSetOverride,
|
|||
assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
|
||||
unsigned idx = writeset->dstArrayElement + j;
|
||||
idx += binding_layout->dynamic_offset_offset;
|
||||
write_buffer_descriptor(set->dynamic_descriptors + A6XX_TEX_CONST_DWORDS * idx,
|
||||
write_buffer_descriptor(device, set->dynamic_descriptors + A6XX_TEX_CONST_DWORDS * idx,
|
||||
writeset->pBufferInfo + j);
|
||||
break;
|
||||
}
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
write_buffer_descriptor(ptr, writeset->pBufferInfo + j);
|
||||
write_buffer_descriptor(device, ptr, writeset->pBufferInfo + j);
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
|
@ -915,7 +922,8 @@ tu_UpdateDescriptorSets(VkDevice _device,
|
|||
uint32_t descriptorCopyCount,
|
||||
const VkCopyDescriptorSet *pDescriptorCopies)
|
||||
{
|
||||
tu_update_descriptor_sets(VK_NULL_HANDLE,
|
||||
TU_FROM_HANDLE(tu_device, device, _device);
|
||||
tu_update_descriptor_sets(device, VK_NULL_HANDLE,
|
||||
descriptorWriteCount, pDescriptorWrites,
|
||||
descriptorCopyCount, pDescriptorCopies);
|
||||
}
|
||||
|
@ -1023,6 +1031,7 @@ tu_DestroyDescriptorUpdateTemplate(
|
|||
|
||||
void
|
||||
tu_update_descriptor_set_with_template(
|
||||
const struct tu_device *device,
|
||||
struct tu_descriptor_set *set,
|
||||
VkDescriptorUpdateTemplate descriptorUpdateTemplate,
|
||||
const void *pData)
|
||||
|
@ -1049,11 +1058,11 @@ tu_update_descriptor_set_with_template(
|
|||
break;
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
|
||||
write_buffer_descriptor(set->dynamic_descriptors + dst_offset, src);
|
||||
write_buffer_descriptor(device, set->dynamic_descriptors + dst_offset, src);
|
||||
break;
|
||||
}
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
|
||||
write_buffer_descriptor(ptr, src);
|
||||
write_buffer_descriptor(device, ptr, src);
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
|
||||
|
@ -1099,9 +1108,10 @@ tu_UpdateDescriptorSetWithTemplate(
|
|||
VkDescriptorUpdateTemplate descriptorUpdateTemplate,
|
||||
const void *pData)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_device, device, _device);
|
||||
TU_FROM_HANDLE(tu_descriptor_set, set, descriptorSet);
|
||||
|
||||
tu_update_descriptor_set_with_template(set, descriptorUpdateTemplate, pData);
|
||||
tu_update_descriptor_set_with_template(device, set, descriptorUpdateTemplate, pData);
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
|
|
@ -340,6 +340,8 @@ void
|
|||
tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceFeatures2 *pFeatures)
|
||||
{
|
||||
TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
|
||||
|
||||
pFeatures->features = (VkPhysicalDeviceFeatures) {
|
||||
.robustBufferAccess = true,
|
||||
.fullDrawIndexUint32 = true,
|
||||
|
@ -393,7 +395,7 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
|
||||
VkPhysicalDeviceVulkan11Features *features = (void *) ext;
|
||||
features->storageBuffer16BitAccess = false;
|
||||
features->storageBuffer16BitAccess = pdevice->gpu_id >= 650;
|
||||
features->uniformAndStorageBuffer16BitAccess = false;
|
||||
features->storagePushConstant16 = false;
|
||||
features->storageInputOutput16 = false;
|
||||
|
@ -489,7 +491,7 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
|
||||
VkPhysicalDevice16BitStorageFeatures *features =
|
||||
(VkPhysicalDevice16BitStorageFeatures *) ext;
|
||||
features->storageBuffer16BitAccess = false;
|
||||
features->storageBuffer16BitAccess = pdevice->gpu_id >= 650;
|
||||
features->uniformAndStorageBuffer16BitAccess = false;
|
||||
features->storagePushConstant16 = false;
|
||||
features->storageInputOutput16 = false;
|
||||
|
|
|
@ -112,6 +112,7 @@ EXTENSIONS = [
|
|||
Extension('VK_KHR_pipeline_executable_properties', 1, True),
|
||||
Extension('VK_KHR_shader_float_controls', 1, True),
|
||||
Extension('VK_KHR_shader_float16_int8', 1, True),
|
||||
Extension('VK_KHR_16bit_storage', 1, 'device->gpu_id >= 650'),
|
||||
]
|
||||
|
||||
MAX_API_VERSION = VkVersion(MAX_API_VERSION)
|
||||
|
|
|
@ -1552,7 +1552,8 @@ uint32_t
|
|||
tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index);
|
||||
|
||||
void
|
||||
tu_update_descriptor_sets(VkDescriptorSet overrideSet,
|
||||
tu_update_descriptor_sets(const struct tu_device *device,
|
||||
VkDescriptorSet overrideSet,
|
||||
uint32_t descriptorWriteCount,
|
||||
const VkWriteDescriptorSet *pDescriptorWrites,
|
||||
uint32_t descriptorCopyCount,
|
||||
|
@ -1560,6 +1561,7 @@ tu_update_descriptor_sets(VkDescriptorSet overrideSet,
|
|||
|
||||
void
|
||||
tu_update_descriptor_set_with_template(
|
||||
const struct tu_device *device,
|
||||
struct tu_descriptor_set *set,
|
||||
VkDescriptorUpdateTemplate descriptorUpdateTemplate,
|
||||
const void *pData);
|
||||
|
|
|
@ -75,6 +75,7 @@ tu_spirv_to_nir(struct tu_device *dev,
|
|||
.runtime_descriptor_array = true,
|
||||
.float_controls = true,
|
||||
.float16 = true,
|
||||
.storage_16bit = dev->physical_device->gpu_id >= 650,
|
||||
},
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue