turnip: enable VK_KHR_16bit_storage on A650

A650 can use the same SSBO descriptor for both 32-bit and 16-bit access,
which makes it easy to enable this extension.

Passes tests that run under:

dEQP-VK.spirv_assembly.instruction.*.16bit_storage.*

Rebased and modified commit from Jonathan Marek.

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9840>
This commit is contained in:
Danylo Piliaiev 2021-03-23 18:39:32 +02:00 committed by Marge Bot
parent 14acc64c3b
commit ce1a381e57
8 changed files with 45 additions and 18 deletions

View File

@ -170,11 +170,21 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
case OPC_STL:
case OPC_STP:
case OPC_STLW:
case OPC_STIB:
validate_assert(ctx, !(instr->regs[1]->flags & IR3_REG_HALF));
validate_reg_size(ctx, instr->regs[2], instr->cat6.type);
validate_assert(ctx, !(instr->regs[3]->flags & IR3_REG_HALF));
break;
case OPC_STIB:
if (instr->flags & IR3_INSTR_B) {
validate_assert(ctx, !(instr->regs[1]->flags & IR3_REG_HALF));
validate_assert(ctx, !(instr->regs[2]->flags & IR3_REG_HALF));
validate_reg_size(ctx, instr->regs[3], instr->cat6.type);
} else {
validate_assert(ctx, !(instr->regs[1]->flags & IR3_REG_HALF));
validate_reg_size(ctx, instr->regs[2], instr->cat6.type);
validate_assert(ctx, !(instr->regs[3]->flags & IR3_REG_HALF));
}
break;
default:
validate_reg_size(ctx, instr->regs[0], instr->cat6.type);
validate_assert(ctx, !(instr->regs[1]->flags & IR3_REG_HALF));

View File

@ -143,6 +143,7 @@ static const struct test {
INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"),
INSTR_5XX(d7660204_02000a01, "(sy)stib.typed.2d.u32.1 g[1], r0.x, r0.z, r1.x"),
INSTR_6XX(c0240402_00674100, "stib.b.untyped.1d.u16.1.imm.base0 r0.z, r0.x, 2"),
// TODO is this a real instruction? Or float -6.0 ?
// INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true),

View File

@ -1812,7 +1812,7 @@ void tu_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer,
set->mapped_ptr = set_mem.map;
set->va = set_mem.iova;
tu_update_descriptor_sets(tu_descriptor_set_to_handle(set),
tu_update_descriptor_sets(cmd->device, tu_descriptor_set_to_handle(set),
descriptorWriteCount, pDescriptorWrites, 0, NULL);
tu_CmdBindDescriptorSets(commandBuffer, pipelineBindPoint, _layout, _set,
@ -1851,7 +1851,7 @@ void tu_CmdPushDescriptorSetWithTemplateKHR(
set->mapped_ptr = set_mem.map;
set->va = set_mem.iova;
tu_update_descriptor_set_with_template(set, descriptorUpdateTemplate, pData);
tu_update_descriptor_set_with_template(cmd->device, set, descriptorUpdateTemplate, pData);
tu_CmdBindDescriptorSets(commandBuffer, templ->bind_point, _layout, _set,
1, (VkDescriptorSet[]) { tu_descriptor_set_to_handle(set) },

View File

@ -706,17 +706,23 @@ static uint32_t get_range(struct tu_buffer *buf, VkDeviceSize offset,
}
static void
write_buffer_descriptor(uint32_t *dst, const VkDescriptorBufferInfo *buffer_info)
write_buffer_descriptor(const struct tu_device *device,
uint32_t *dst,
const VkDescriptorBufferInfo *buffer_info)
{
TU_FROM_HANDLE(tu_buffer, buffer, buffer_info->buffer);
assert((buffer_info->offset & 63) == 0); /* minStorageBufferOffsetAlignment */
uint64_t va = tu_buffer_iova(buffer) + buffer_info->offset;
uint32_t range = get_range(buffer, buffer_info->offset, buffer_info->range);
range = ALIGN_POT(range, 4) / 4;
dst[0] =
A6XX_IBO_0_TILE_MODE(TILE6_LINEAR) | A6XX_IBO_0_FMT(FMT6_32_UINT);
dst[1] = range;
/* newer a6xx allows using 16-bit descriptor for both 16-bit and 32-bit access */
if (device->physical_device->gpu_id >= 650) {
dst[0] = A6XX_IBO_0_TILE_MODE(TILE6_LINEAR) | A6XX_IBO_0_FMT(FMT6_16_UINT);
dst[1] = DIV_ROUND_UP(range, 2);
} else {
dst[0] = A6XX_IBO_0_TILE_MODE(TILE6_LINEAR) | A6XX_IBO_0_FMT(FMT6_32_UINT);
dst[1] = DIV_ROUND_UP(range, 4);
}
dst[2] =
A6XX_IBO_2_UNK4 | A6XX_IBO_2_TYPE(A6XX_TEX_1D) | A6XX_IBO_2_UNK31;
dst[3] = 0;
@ -784,7 +790,8 @@ write_sampler_push(uint32_t *dst, const struct tu_sampler *sampler)
}
void
tu_update_descriptor_sets(VkDescriptorSet dstSetOverride,
tu_update_descriptor_sets(const struct tu_device *device,
VkDescriptorSet dstSetOverride,
uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites,
uint32_t descriptorCopyCount,
@ -823,12 +830,12 @@ tu_update_descriptor_sets(VkDescriptorSet dstSetOverride,
assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
unsigned idx = writeset->dstArrayElement + j;
idx += binding_layout->dynamic_offset_offset;
write_buffer_descriptor(set->dynamic_descriptors + A6XX_TEX_CONST_DWORDS * idx,
write_buffer_descriptor(device, set->dynamic_descriptors + A6XX_TEX_CONST_DWORDS * idx,
writeset->pBufferInfo + j);
break;
}
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
write_buffer_descriptor(ptr, writeset->pBufferInfo + j);
write_buffer_descriptor(device, ptr, writeset->pBufferInfo + j);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
@ -915,7 +922,8 @@ tu_UpdateDescriptorSets(VkDevice _device,
uint32_t descriptorCopyCount,
const VkCopyDescriptorSet *pDescriptorCopies)
{
tu_update_descriptor_sets(VK_NULL_HANDLE,
TU_FROM_HANDLE(tu_device, device, _device);
tu_update_descriptor_sets(device, VK_NULL_HANDLE,
descriptorWriteCount, pDescriptorWrites,
descriptorCopyCount, pDescriptorCopies);
}
@ -1023,6 +1031,7 @@ tu_DestroyDescriptorUpdateTemplate(
void
tu_update_descriptor_set_with_template(
const struct tu_device *device,
struct tu_descriptor_set *set,
VkDescriptorUpdateTemplate descriptorUpdateTemplate,
const void *pData)
@ -1049,11 +1058,11 @@ tu_update_descriptor_set_with_template(
break;
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
write_buffer_descriptor(set->dynamic_descriptors + dst_offset, src);
write_buffer_descriptor(device, set->dynamic_descriptors + dst_offset, src);
break;
}
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
write_buffer_descriptor(ptr, src);
write_buffer_descriptor(device, ptr, src);
break;
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
@ -1099,9 +1108,10 @@ tu_UpdateDescriptorSetWithTemplate(
VkDescriptorUpdateTemplate descriptorUpdateTemplate,
const void *pData)
{
TU_FROM_HANDLE(tu_device, device, _device);
TU_FROM_HANDLE(tu_descriptor_set, set, descriptorSet);
tu_update_descriptor_set_with_template(set, descriptorUpdateTemplate, pData);
tu_update_descriptor_set_with_template(device, set, descriptorUpdateTemplate, pData);
}
VkResult

View File

@ -340,6 +340,8 @@ void
tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures2 *pFeatures)
{
TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
pFeatures->features = (VkPhysicalDeviceFeatures) {
.robustBufferAccess = true,
.fullDrawIndexUint32 = true,
@ -393,7 +395,7 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
switch (ext->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
VkPhysicalDeviceVulkan11Features *features = (void *) ext;
features->storageBuffer16BitAccess = false;
features->storageBuffer16BitAccess = pdevice->gpu_id >= 650;
features->uniformAndStorageBuffer16BitAccess = false;
features->storagePushConstant16 = false;
features->storageInputOutput16 = false;
@ -489,7 +491,7 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
VkPhysicalDevice16BitStorageFeatures *features =
(VkPhysicalDevice16BitStorageFeatures *) ext;
features->storageBuffer16BitAccess = false;
features->storageBuffer16BitAccess = pdevice->gpu_id >= 650;
features->uniformAndStorageBuffer16BitAccess = false;
features->storagePushConstant16 = false;
features->storageInputOutput16 = false;

View File

@ -112,6 +112,7 @@ EXTENSIONS = [
Extension('VK_KHR_pipeline_executable_properties', 1, True),
Extension('VK_KHR_shader_float_controls', 1, True),
Extension('VK_KHR_shader_float16_int8', 1, True),
Extension('VK_KHR_16bit_storage', 1, 'device->gpu_id >= 650'),
]
MAX_API_VERSION = VkVersion(MAX_API_VERSION)

View File

@ -1552,7 +1552,8 @@ uint32_t
tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index);
void
tu_update_descriptor_sets(VkDescriptorSet overrideSet,
tu_update_descriptor_sets(const struct tu_device *device,
VkDescriptorSet overrideSet,
uint32_t descriptorWriteCount,
const VkWriteDescriptorSet *pDescriptorWrites,
uint32_t descriptorCopyCount,
@ -1560,6 +1561,7 @@ tu_update_descriptor_sets(VkDescriptorSet overrideSet,
void
tu_update_descriptor_set_with_template(
const struct tu_device *device,
struct tu_descriptor_set *set,
VkDescriptorUpdateTemplate descriptorUpdateTemplate,
const void *pData);

View File

@ -75,6 +75,7 @@ tu_spirv_to_nir(struct tu_device *dev,
.runtime_descriptor_array = true,
.float_controls = true,
.float16 = true,
.storage_16bit = dev->physical_device->gpu_id >= 650,
},
};