panvk: Move to vk_properties

This remove GetPhysicalDeviceProperties2 in favor of vk_properties.

This report the same values as previous on vulkaninfo.

In case some missing values, the same values as the ARM proprietary
driver were used.

Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com>
Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28715>
This commit is contained in:
Mary Guillemard 2024-04-12 10:38:34 +02:00 committed by Marge Bot
parent a9f170f613
commit f7f9b3d170
1 changed files with 389 additions and 347 deletions

View File

@ -12,6 +12,7 @@
#include <sys/sysinfo.h>
#include "util/disk_cache.h"
#include "git_sha1.h"
#include "vk_device.h"
#include "vk_drm_syncobj.h"
@ -30,8 +31,14 @@
#include "genxml/gen_macros.h"
#define ARM_VENDOR_ID 0x13b5
#define MAX_VIEWPORTS 1
#define MAX_PUSH_DESCRIPTORS 32
/* We reserve one ubo for push constant, one for sysvals and one per-set for the
* descriptor metadata */
#define RESERVED_UBO_COUNT 6
#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 - RESERVED_UBO_COUNT
#define MAX_INLINE_UNIFORM_BLOCK_SIZE (1 << 16)
static int
get_cache_uuid(uint16_t family, void *uuid)
@ -215,6 +222,364 @@ get_features(const struct panvk_physical_device *device,
};
}
static void
get_device_properties(const struct panvk_physical_device *device,
struct vk_properties *properties)
{
/* HW supports MSAA 4, 8 and 16, but we limit ourselves to MSAA 4 for now. */
VkSampleCountFlags sample_counts =
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
uint64_t os_page_size = 4096;
os_get_page_size(&os_page_size);
*properties = (struct vk_properties){
.apiVersion = panvk_get_vk_version(),
.driverVersion = vk_get_driver_version(),
.vendorID = ARM_VENDOR_ID,
/* Collect arch_major, arch_minor, arch_rev and product_major,
* as done by the Arm driver.
*/
.deviceID = device->kmod.props.gpu_prod_id << 16,
.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
/* Vulkan 1.0 limits */
/* Maximum texture dimension is 2^16. */
.maxImageDimension1D = (1 << 16),
.maxImageDimension2D = (1 << 16),
.maxImageDimension3D = (1 << 16),
.maxImageDimensionCube = (1 << 16),
.maxImageArrayLayers = (1 << 16),
/* Currently limited by the 1D texture size, which is 2^16.
* TODO: If we expose buffer views as 2D textures, we can increase the
* limit.
*/
.maxTexelBufferElements = (1 << 16),
/* Each uniform entry is 16-byte and the number of entries is encoded in a
* 12-bit field, with the minus(1) modifier, which gives 2^20.
*/
.maxUniformBufferRange = 1 << 20,
/* Storage buffer access is lowered to globals, so there's no limit here,
* except for the SW-descriptor we use to encode storage buffer
* descriptors, where the size is a 32-bit field.
*/
.maxStorageBufferRange = UINT32_MAX,
/* 128 bytes of push constants, so we're aligned with the minimum Vulkan
* requirements.
*/
.maxPushConstantsSize = 128,
/* There's no HW limit here. Should we advertize something smaller? */
.maxMemoryAllocationCount = UINT32_MAX,
/* Again, no hardware limit, but most drivers seem to advertive 64k. */
.maxSamplerAllocationCount = 64 * 1024,
/* A cache line. */
.bufferImageGranularity = 64,
/* Sparse binding not supported yet. */
.sparseAddressSpaceSize = 0,
/* Software limit. Pick the minimum required by Vulkan, because Bifrost
* GPUs don't have unified descriptor tables, which forces us to
* agregatte all descriptors from all sets and dispatch them to per-type
* descriptor tables emitted at draw/dispatch time.
* The more sets we support the more copies we are likely to have to do
* at draw time.
*/
.maxBoundDescriptorSets = 4,
/* MALI_RENDERER_STATE::sampler_count is 16-bit. */
.maxPerStageDescriptorSamplers = UINT16_MAX,
.maxDescriptorSetSamplers = UINT16_MAX,
/* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots
* for our internal UBOs.
*/
.maxPerStageDescriptorUniformBuffers = UINT8_MAX - 32,
.maxDescriptorSetUniformBuffers = UINT8_MAX - 32,
/* SSBOs are limited by the size of a uniform buffer which contains our
* panvk_ssbo_desc objects.
* panvk_ssbo_desc is 16-byte, and each uniform entry in the Mali UBO is
* 16-byte too. The number of entries is encoded in a 12-bit field, with
* a minus(1) modifier, which gives a maximum of 2^12 SSBO
* descriptors.
*/
.maxPerStageDescriptorStorageBuffers = 1 << 12,
.maxDescriptorSetStorageBuffers = 1 << 12,
/* MALI_RENDERER_STATE::sampler_count is 16-bit. */
.maxPerStageDescriptorSampledImages = UINT16_MAX,
.maxDescriptorSetSampledImages = UINT16_MAX,
/* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two
* MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images.
*/
.maxPerStageDescriptorStorageImages = 1 << 8,
.maxDescriptorSetStorageImages = 1 << 8,
/* A maximum of 8 color render targets, and one depth-stencil render
* target.
*/
.maxPerStageDescriptorInputAttachments = 9,
.maxDescriptorSetInputAttachments = 9,
/* Could be the sum of all maxPerStageXxx values, but we limit ourselves
* to 2^16 to make things simpler.
*/
.maxPerStageResources = 1 << 16,
/* Software limits to keep VkCommandBuffer tracking sane. */
.maxDescriptorSetUniformBuffersDynamic = 16,
.maxDescriptorSetStorageBuffersDynamic = 8,
/* Software limit to keep VkCommandBuffer tracking sane. The HW supports
* up to 2^9 vertex attributes.
*/
.maxVertexInputAttributes = 16,
.maxVertexInputBindings = 16,
/* MALI_ATTRIBUTE::offset is 32-bit. */
.maxVertexInputAttributeOffset = UINT32_MAX,
/* MALI_ATTRIBUTE_BUFFER::stride is 32-bit. */
.maxVertexInputBindingStride = UINT32_MAX,
/* 32 vec4 varyings. */
.maxVertexOutputComponents = 128,
/* Tesselation shaders not supported. */
.maxTessellationGenerationLevel = 0,
.maxTessellationPatchSize = 0,
.maxTessellationControlPerVertexInputComponents = 0,
.maxTessellationControlPerVertexOutputComponents = 0,
.maxTessellationControlPerPatchOutputComponents = 0,
.maxTessellationControlTotalOutputComponents = 0,
.maxTessellationEvaluationInputComponents = 0,
.maxTessellationEvaluationOutputComponents = 0,
/* Geometry shaders not supported. */
.maxGeometryShaderInvocations = 0,
.maxGeometryInputComponents = 0,
.maxGeometryOutputComponents = 0,
.maxGeometryOutputVertices = 0,
.maxGeometryTotalOutputComponents = 0,
/* 32 vec4 varyings. */
.maxFragmentInputComponents = 128,
/* 8 render targets. */
.maxFragmentOutputAttachments = 8,
/* We don't support dual source blending yet. */
.maxFragmentDualSrcAttachments = 0,
/* 8 render targets, 2^12 storage buffers and 2^8 storage images (see
* above).
*/
.maxFragmentCombinedOutputResources = 8 + (1 << 12) + (1 << 8),
/* MALI_LOCAL_STORAGE::wls_size_{base,scale} allows us to have up to
* (7 << 30) bytes of shared memory, but we cap it to 32K as it doesn't
* really make sense to expose this amount of memory, especially since
* it's backed by global memory anyway.
*/
.maxComputeSharedMemorySize = 32768,
/* Software limit to meet Vulkan 1.0 requirements. We split the
* dispatch in several jobs if it's too big.
*/
.maxComputeWorkGroupCount = {65535, 65535, 65535},
/* We have 10 bits to encode the local-size, and there's a minus(1)
* modifier, so, a size of 1 takes no bit.
*/
.maxComputeWorkGroupInvocations = 1 << 10,
.maxComputeWorkGroupSize = {1 << 10, 1 << 10, 1 << 10},
/* 8-bit subpixel precision. */
.subPixelPrecisionBits = 8,
.subTexelPrecisionBits = 8,
.mipmapPrecisionBits = 8,
/* Software limit. */
.maxDrawIndexedIndexValue = UINT32_MAX,
/* Make it one for now. */
.maxDrawIndirectCount = 1,
.maxSamplerLodBias = 255,
.maxSamplerAnisotropy = 16,
.maxViewports = 1,
/* Same as the framebuffer limit. */
.maxViewportDimensions = {(1 << 14), (1 << 14)},
/* Encoded in a 16-bit signed integer. */
.viewportBoundsRange = {INT16_MIN, INT16_MAX},
.viewportSubPixelBits = 0,
/* Align on a page. */
.minMemoryMapAlignment = os_page_size,
/* Some compressed texture formats require 128-byte alignment. */
.minTexelBufferOffsetAlignment = 64,
/* Always aligned on a uniform slot (vec4). */
.minUniformBufferOffsetAlignment = 16,
/* Lowered to global accesses, which happen at the 32-bit granularity. */
.minStorageBufferOffsetAlignment = 4,
/* Signed 4-bit value. */
.minTexelOffset = -8,
.maxTexelOffset = 7,
.minTexelGatherOffset = -8,
.maxTexelGatherOffset = 7,
.minInterpolationOffset = -0.5,
.maxInterpolationOffset = 0.5,
.subPixelInterpolationOffsetBits = 8,
.maxFramebufferWidth = (1 << 14),
.maxFramebufferHeight = (1 << 14),
.maxFramebufferLayers = 256,
.framebufferColorSampleCounts = sample_counts,
.framebufferDepthSampleCounts = sample_counts,
.framebufferStencilSampleCounts = sample_counts,
.framebufferNoAttachmentsSampleCounts = sample_counts,
.maxColorAttachments = 8,
.sampledImageColorSampleCounts = sample_counts,
.sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.sampledImageDepthSampleCounts = sample_counts,
.sampledImageStencilSampleCounts = sample_counts,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = false,
.timestampPeriod = 0,
.maxClipDistances = 0,
.maxCullDistances = 0,
.maxCombinedClipAndCullDistances = 0,
.discreteQueuePriorities = 1,
.pointSizeRange = {0.125, 4095.9375},
.lineWidthRange = {0.0, 7.9921875},
.pointSizeGranularity = (1.0 / 16.0),
.lineWidthGranularity = (1.0 / 128.0),
.strictLines = false,
.standardSampleLocations = true,
.optimalBufferCopyOffsetAlignment = 64,
.optimalBufferCopyRowPitchAlignment = 64,
.nonCoherentAtomSize = 64,
/* Vulkan 1.0 sparse properties */
.sparseResidencyNonResidentStrict = false,
.sparseResidencyAlignedMipSize = false,
.sparseResidencyStandard2DBlockShape = false,
.sparseResidencyStandard2DMultisampleBlockShape = false,
.sparseResidencyStandard3DBlockShape = false,
/* Vulkan 1.1 properties */
/* XXX: 1.1 support */
.subgroupSize = 8,
.subgroupSupportedStages = VK_SHADER_STAGE_ALL,
.subgroupSupportedOperations =
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
VK_SUBGROUP_FEATURE_QUAD_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
VK_SUBGROUP_FEATURE_VOTE_BIT,
.subgroupQuadOperationsInAllStages = false,
.pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
.maxMultiviewViewCount = 0,
.maxMultiviewInstanceIndex = 0,
.protectedNoFault = false,
/* Make sure everything is addressable by a signed 32-bit int, and
* our largest descriptors are 96 bytes. */
.maxPerSetDescriptors = (1ull << 31) / 96,
/* Our buffer size fields allow only this much */
.maxMemoryAllocationSize = UINT32_MAX,
/* Vulkan 1.2 properties */
/* XXX: 1.2 support */
/* XXX: VK_KHR_depth_stencil_resolve */
.supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
.supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
.independentResolveNone = true,
.independentResolve = true,
/* XXX: VK_KHR_driver_properties */
.driverID = VK_DRIVER_ID_MESA_PANVK,
.conformanceVersion = (VkConformanceVersion){0, 0, 0, 0},
/* XXX: VK_KHR_shader_float_controls */
.denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.shaderSignedZeroInfNanPreserveFloat16 = true,
.shaderSignedZeroInfNanPreserveFloat32 = true,
.shaderSignedZeroInfNanPreserveFloat64 = false,
.shaderDenormPreserveFloat16 = true,
.shaderDenormPreserveFloat32 = true,
.shaderDenormPreserveFloat64 = false,
.shaderDenormFlushToZeroFloat16 = true,
.shaderDenormFlushToZeroFloat32 = true,
.shaderDenormFlushToZeroFloat64 = false,
.shaderRoundingModeRTEFloat16 = true,
.shaderRoundingModeRTEFloat32 = true,
.shaderRoundingModeRTEFloat64 = false,
.shaderRoundingModeRTZFloat16 = true,
.shaderRoundingModeRTZFloat32 = true,
.shaderRoundingModeRTZFloat64 = false,
/* XXX: VK_EXT_descriptor_indexing */
.maxUpdateAfterBindDescriptorsInAllPools = 0,
.shaderUniformBufferArrayNonUniformIndexingNative = false,
.shaderSampledImageArrayNonUniformIndexingNative = false,
.shaderStorageBufferArrayNonUniformIndexingNative = false,
.shaderStorageImageArrayNonUniformIndexingNative = false,
.shaderInputAttachmentArrayNonUniformIndexingNative = false,
.robustBufferAccessUpdateAfterBind = false,
.quadDivergentImplicitLod = false,
.maxPerStageDescriptorUpdateAfterBindSamplers = 0,
.maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0,
.maxPerStageDescriptorUpdateAfterBindStorageBuffers = 0,
.maxPerStageDescriptorUpdateAfterBindSampledImages = 0,
.maxPerStageDescriptorUpdateAfterBindStorageImages = 0,
.maxPerStageDescriptorUpdateAfterBindInputAttachments = 0,
.maxPerStageDescriptorUpdateAfterBindInputAttachments = 0,
.maxPerStageUpdateAfterBindResources = 0,
.maxDescriptorSetUpdateAfterBindSamplers = 0,
.maxDescriptorSetUpdateAfterBindUniformBuffers = 0,
.maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0,
.maxDescriptorSetUpdateAfterBindStorageBuffers = 0,
.maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = 0,
.maxDescriptorSetUpdateAfterBindSampledImages = 0,
.maxDescriptorSetUpdateAfterBindStorageImages = 0,
.maxDescriptorSetUpdateAfterBindInputAttachments = 0,
/* XXX: VK_EXT_sampler_filter_minmax */
.filterMinmaxSingleComponentFormats = false,
.filterMinmaxImageComponentMapping = false,
/* XXX: VK_KHR_timeline_semaphore */
.maxTimelineSemaphoreValueDifference = INT64_MAX,
.framebufferIntegerColorSampleCounts = sample_counts,
/* Vulkan 1.3 properties */
/* XXX: 1.3 support */
/* XXX: VK_EXT_subgroup_size_control */
.minSubgroupSize = 8,
.maxSubgroupSize = 8,
.maxComputeWorkgroupSubgroups = 48,
.requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL,
/* XXX: VK_EXT_inline_uniform_block */
.maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE,
.maxPerStageDescriptorInlineUniformBlocks =
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
.maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
.maxDescriptorSetInlineUniformBlocks =
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
.maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
.maxInlineUniformTotalSize =
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS * MAX_INLINE_UNIFORM_BLOCK_SIZE,
/* XXX: VK_KHR_shader_integer_dot_product */
.integerDotProduct8BitUnsignedAccelerated = true,
.integerDotProduct8BitSignedAccelerated = true,
.integerDotProduct4x8BitPackedUnsignedAccelerated = true,
.integerDotProduct4x8BitPackedSignedAccelerated = true,
/* XXX: VK_EXT_texel_buffer_alignment */
.storageTexelBufferOffsetAlignmentBytes = 64,
.storageTexelBufferOffsetSingleTexelAlignment = false,
.uniformTexelBufferOffsetAlignmentBytes = 4,
.uniformTexelBufferOffsetSingleTexelAlignment = true,
/* XXX: VK_KHR_maintenance4 */
.maxBufferSize = 1 << 30,
/* VK_EXT_custom_border_color */
.maxCustomBorderColorSamplers = 32768,
/* VK_KHR_vertex_attribute_divisor */
/* We will have to restrict this a bit for multiview */
.maxVertexAttribDivisor = UINT32_MAX,
.supportsNonZeroFirstInstance = false,
/* VK_KHR_push_descriptor */
.maxPushDescriptors = MAX_PUSH_DESCRIPTORS,
};
snprintf(properties->deviceName, sizeof(properties->deviceName), "%s",
device->name);
memcpy(properties->pipelineCacheUUID, device->cache_uuid, VK_UUID_SIZE);
memcpy(properties->driverUUID, device->driver_uuid, VK_UUID_SIZE);
memcpy(properties->deviceUUID, device->device_uuid, VK_UUID_SIZE);
snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "panvk");
snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
"Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
}
void
panvk_physical_device_finish(struct panvk_physical_device *device)
{
@ -284,27 +649,6 @@ panvk_physical_device_init(struct panvk_physical_device *device,
goto fail;
}
struct vk_device_extension_table supported_extensions;
get_device_extensions(device, &supported_extensions);
struct vk_features supported_features;
get_features(device, &supported_features);
struct vk_physical_device_dispatch_table dispatch_table;
vk_physical_device_dispatch_table_from_entrypoints(
&dispatch_table, &panvk_physical_device_entrypoints, true);
vk_physical_device_dispatch_table_from_entrypoints(
&dispatch_table, &wsi_physical_device_entrypoints, false);
result =
vk_physical_device_init(&device->vk, &instance->vk, &supported_extensions,
&supported_features, NULL, &dispatch_table);
if (result != VK_SUCCESS) {
vk_error(instance, result);
goto fail;
}
if (instance->vk.enabled_extensions.KHR_display) {
master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
if (master_fd >= 0) {
@ -340,6 +684,30 @@ panvk_physical_device_init(struct panvk_physical_device *device,
*/
device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
struct vk_device_extension_table supported_extensions;
get_device_extensions(device, &supported_extensions);
struct vk_features supported_features;
get_features(device, &supported_features);
struct vk_properties properties;
get_device_properties(device, &properties);
struct vk_physical_device_dispatch_table dispatch_table;
vk_physical_device_dispatch_table_from_entrypoints(
&dispatch_table, &panvk_physical_device_entrypoints, true);
vk_physical_device_dispatch_table_from_entrypoints(
&dispatch_table, &wsi_physical_device_entrypoints, false);
result = vk_physical_device_init(&device->vk, &instance->vk,
&supported_extensions, &supported_features,
&properties, &dispatch_table);
if (result != VK_SUCCESS) {
vk_error(instance, result);
goto fail;
}
device->sync_types[0] = &device->drm_syncobj_type;
device->sync_types[1] = NULL;
device->vk.supported_sync_types = device->sync_types;
@ -366,332 +734,6 @@ fail:
return result;
}
VKAPI_ATTR void VKAPI_CALL
panvk_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties2 *pProperties)
{
VK_FROM_HANDLE(panvk_physical_device, pdevice, physicalDevice);
uint64_t os_page_size = 4096;
os_get_page_size(&os_page_size);
/* HW supports MSAA 4, 8 and 16, but we limit ourselves to MSAA 4 for now. */
VkSampleCountFlags sample_counts =
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
const VkPhysicalDeviceLimits limits = {
/* Maximum texture dimension is 2^16. */
.maxImageDimension1D = (1 << 16),
.maxImageDimension2D = (1 << 16),
.maxImageDimension3D = (1 << 16),
.maxImageDimensionCube = (1 << 16),
.maxImageArrayLayers = (1 << 16),
/* Currently limited by the 1D texture size, which is 2^16.
* TODO: If we expose buffer views as 2D textures, we can increase the
* limit.
*/
.maxTexelBufferElements = (1 << 16),
/* Each uniform entry is 16-byte and the number of entries is encoded in a
* 12-bit field, with the minus(1) modifier, which gives 2^20.
*/
.maxUniformBufferRange = 1 << 20,
/* Storage buffer access is lowered to globals, so there's no limit here,
* except for the SW-descriptor we use to encode storage buffer
* descriptors, where the size is a 32-bit field.
*/
.maxStorageBufferRange = UINT32_MAX,
/* 128 bytes of push constants, so we're aligned with the minimum Vulkan
* requirements.
*/
.maxPushConstantsSize = 128,
/* There's no HW limit here. Should we advertize something smaller? */
.maxMemoryAllocationCount = UINT32_MAX,
/* Again, no hardware limit, but most drivers seem to advertive 64k. */
.maxSamplerAllocationCount = 64 * 1024,
/* A cache line. */
.bufferImageGranularity = 64,
/* Sparse binding not supported yet. */
.sparseAddressSpaceSize = 0,
/* Software limit. Pick the minimum required by Vulkan, because Bifrost
* GPUs don't have unified descriptor tables, which forces us to
* agregatte all descriptors from all sets and dispatch them to per-type
* descriptor tables emitted at draw/dispatch time.
* The more sets we support the more copies we are likely to have to do
* at draw time.
*/
.maxBoundDescriptorSets = 4,
/* MALI_RENDERER_STATE::sampler_count is 16-bit. */
.maxPerStageDescriptorSamplers = UINT16_MAX,
.maxDescriptorSetSamplers = UINT16_MAX,
/* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots
* for our internal UBOs.
*/
.maxPerStageDescriptorUniformBuffers = UINT8_MAX - 32,
.maxDescriptorSetUniformBuffers = UINT8_MAX - 32,
/* SSBOs are limited by the size of a uniform buffer which contains our
* panvk_ssbo_desc objects.
* panvk_ssbo_desc is 16-byte, and each uniform entry in the Mali UBO is
* 16-byte too. The number of entries is encoded in a 12-bit field, with
* a minus(1) modifier, which gives a maximum of 2^12 SSBO
* descriptors.
*/
.maxPerStageDescriptorStorageBuffers = 1 << 12,
.maxDescriptorSetStorageBuffers = 1 << 12,
/* MALI_RENDERER_STATE::sampler_count is 16-bit. */
.maxPerStageDescriptorSampledImages = UINT16_MAX,
.maxDescriptorSetSampledImages = UINT16_MAX,
/* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two
* MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images.
*/
.maxPerStageDescriptorStorageImages = 1 << 8,
.maxDescriptorSetStorageImages = 1 << 8,
/* A maximum of 8 color render targets, and one depth-stencil render
* target.
*/
.maxPerStageDescriptorInputAttachments = 9,
.maxDescriptorSetInputAttachments = 9,
/* Could be the sum of all maxPerStageXxx values, but we limit ourselves
* to 2^16 to make things simpler.
*/
.maxPerStageResources = 1 << 16,
/* Software limits to keep VkCommandBuffer tracking sane. */
.maxDescriptorSetUniformBuffersDynamic = 16,
.maxDescriptorSetStorageBuffersDynamic = 8,
/* Software limit to keep VkCommandBuffer tracking sane. The HW supports
* up to 2^9 vertex attributes.
*/
.maxVertexInputAttributes = 16,
.maxVertexInputBindings = 16,
/* MALI_ATTRIBUTE::offset is 32-bit. */
.maxVertexInputAttributeOffset = UINT32_MAX,
/* MALI_ATTRIBUTE_BUFFER::stride is 32-bit. */
.maxVertexInputBindingStride = UINT32_MAX,
/* 32 vec4 varyings. */
.maxVertexOutputComponents = 128,
/* Tesselation shaders not supported. */
.maxTessellationGenerationLevel = 0,
.maxTessellationPatchSize = 0,
.maxTessellationControlPerVertexInputComponents = 0,
.maxTessellationControlPerVertexOutputComponents = 0,
.maxTessellationControlPerPatchOutputComponents = 0,
.maxTessellationControlTotalOutputComponents = 0,
.maxTessellationEvaluationInputComponents = 0,
.maxTessellationEvaluationOutputComponents = 0,
/* Geometry shaders not supported. */
.maxGeometryShaderInvocations = 0,
.maxGeometryInputComponents = 0,
.maxGeometryOutputComponents = 0,
.maxGeometryOutputVertices = 0,
.maxGeometryTotalOutputComponents = 0,
/* 32 vec4 varyings. */
.maxFragmentInputComponents = 128,
/* 8 render targets. */
.maxFragmentOutputAttachments = 8,
/* We don't support dual source blending yet. */
.maxFragmentDualSrcAttachments = 0,
/* 8 render targets, 2^12 storage buffers and 2^8 storage images (see
* above).
*/
.maxFragmentCombinedOutputResources = 8 + (1 << 12) + (1 << 8),
/* MALI_LOCAL_STORAGE::wls_size_{base,scale} allows us to have up to
* (7 << 30) bytes of shared memory, but we cap it to 32K as it doesn't
* really make sense to expose this amount of memory, especially since
* it's backed by global memory anyway.
*/
.maxComputeSharedMemorySize = 32768,
/* Software limit to meet Vulkan 1.0 requirements. We split the
* dispatch in several jobs if it's too big.
*/
.maxComputeWorkGroupCount = {65535, 65535, 65535},
/* We have 10 bits to encode the local-size, and there's a minus(1)
* modifier, so, a size of 1 takes no bit.
*/
.maxComputeWorkGroupInvocations = 1 << 10,
.maxComputeWorkGroupSize = {1 << 10, 1 << 10, 1 << 10},
/* 8-bit subpixel precision. */
.subPixelPrecisionBits = 8,
.subTexelPrecisionBits = 8,
.mipmapPrecisionBits = 8,
/* Software limit. */
.maxDrawIndexedIndexValue = UINT32_MAX,
/* Make it one for now. */
.maxDrawIndirectCount = 1,
.maxSamplerLodBias = 255,
.maxSamplerAnisotropy = 16,
.maxViewports = 1,
/* Same as the framebuffer limit. */
.maxViewportDimensions = {(1 << 14), (1 << 14)},
/* Encoded in a 16-bit signed integer. */
.viewportBoundsRange = {INT16_MIN, INT16_MAX},
.viewportSubPixelBits = 0,
/* Align on a page. */
.minMemoryMapAlignment = os_page_size,
/* Some compressed texture formats require 128-byte alignment. */
.minTexelBufferOffsetAlignment = 64,
/* Always aligned on a uniform slot (vec4). */
.minUniformBufferOffsetAlignment = 16,
/* Lowered to global accesses, which happen at the 32-bit granularity. */
.minStorageBufferOffsetAlignment = 4,
/* Signed 4-bit value. */
.minTexelOffset = -8,
.maxTexelOffset = 7,
.minTexelGatherOffset = -8,
.maxTexelGatherOffset = 7,
.minInterpolationOffset = -0.5,
.maxInterpolationOffset = 0.5,
.subPixelInterpolationOffsetBits = 8,
.maxFramebufferWidth = (1 << 14),
.maxFramebufferHeight = (1 << 14),
.maxFramebufferLayers = 256,
.framebufferColorSampleCounts = sample_counts,
.framebufferDepthSampleCounts = sample_counts,
.framebufferStencilSampleCounts = sample_counts,
.framebufferNoAttachmentsSampleCounts = sample_counts,
.maxColorAttachments = 8,
.sampledImageColorSampleCounts = sample_counts,
.sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.sampledImageDepthSampleCounts = sample_counts,
.sampledImageStencilSampleCounts = sample_counts,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = false,
.timestampPeriod = 0,
.maxClipDistances = 0,
.maxCullDistances = 0,
.maxCombinedClipAndCullDistances = 0,
.discreteQueuePriorities = 1,
.pointSizeRange = {0.125, 4095.9375},
.lineWidthRange = {0.0, 7.9921875},
.pointSizeGranularity = (1.0 / 16.0),
.lineWidthGranularity = (1.0 / 128.0),
.strictLines = false,
.standardSampleLocations = true,
.optimalBufferCopyOffsetAlignment = 64,
.optimalBufferCopyRowPitchAlignment = 64,
.nonCoherentAtomSize = 64,
};
pProperties->properties = (VkPhysicalDeviceProperties){
.apiVersion = panvk_get_vk_version(),
.driverVersion = vk_get_driver_version(),
/* Arm vendor ID. */
.vendorID = 0x13b5,
/* Collect arch_major, arch_minor, arch_rev and product_major,
* as done by the Arm driver.
*/
.deviceID = pdevice->kmod.props.gpu_prod_id << 16,
.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
.limits = limits,
.sparseProperties = {0},
};
strcpy(pProperties->properties.deviceName, pdevice->name);
memcpy(pProperties->properties.pipelineCacheUUID, pdevice->cache_uuid,
VK_UUID_SIZE);
VkPhysicalDeviceVulkan11Properties core_1_1 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
.deviceLUIDValid = false,
.pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
.maxMultiviewViewCount = 0,
.maxMultiviewInstanceIndex = 0,
.protectedNoFault = false,
/* Make sure everything is addressable by a signed 32-bit int, and
* our largest descriptors are 96 bytes. */
.maxPerSetDescriptors = (1ull << 31) / 96,
/* Our buffer size fields allow only this much */
.maxMemoryAllocationSize = 0xFFFFFFFFull,
};
memcpy(core_1_1.driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
memcpy(core_1_1.deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
const VkPhysicalDeviceVulkan12Properties core_1_2 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
};
const VkPhysicalDeviceVulkan13Properties core_1_3 = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
};
vk_foreach_struct(ext, pProperties->pNext) {
if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
continue;
if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
continue;
if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
continue;
switch (ext->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
(VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;
/* Software limit. */
properties->maxPushDescriptors = 32;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
(VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
/* We will have to restrict this a bit for multiview */
properties->maxVertexAttribDivisor = UINT32_MAX;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties =
(VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
properties->maxCustomBorderColorSamplers = 32768;
break;
}
default:
break;
}
}
}
static const VkQueueFamilyProperties panvk_queue_family_properties = {
.queueFlags =
VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,