mirror of https://gitlab.freedesktop.org/mesa/mesa
panvk: Move to vk_properties
This remove GetPhysicalDeviceProperties2 in favor of vk_properties. This report the same values as previous on vulkaninfo. In case some missing values, the same values as the ARM proprietary driver were used. Signed-off-by: Mary Guillemard <mary.guillemard@collabora.com> Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/28715>
This commit is contained in:
parent
a9f170f613
commit
f7f9b3d170
|
@ -12,6 +12,7 @@
|
|||
#include <sys/sysinfo.h>
|
||||
|
||||
#include "util/disk_cache.h"
|
||||
#include "git_sha1.h"
|
||||
|
||||
#include "vk_device.h"
|
||||
#include "vk_drm_syncobj.h"
|
||||
|
@ -30,8 +31,14 @@
|
|||
|
||||
#include "genxml/gen_macros.h"
|
||||
|
||||
#define ARM_VENDOR_ID 0x13b5
|
||||
#define MAX_VIEWPORTS 1
|
||||
#define MAX_PUSH_DESCRIPTORS 32
|
||||
/* We reserve one ubo for push constant, one for sysvals and one per-set for the
|
||||
* descriptor metadata */
|
||||
#define RESERVED_UBO_COUNT 6
|
||||
#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32 - RESERVED_UBO_COUNT
|
||||
#define MAX_INLINE_UNIFORM_BLOCK_SIZE (1 << 16)
|
||||
|
||||
static int
|
||||
get_cache_uuid(uint16_t family, void *uuid)
|
||||
|
@ -215,6 +222,364 @@ get_features(const struct panvk_physical_device *device,
|
|||
};
|
||||
}
|
||||
|
||||
static void
|
||||
get_device_properties(const struct panvk_physical_device *device,
|
||||
struct vk_properties *properties)
|
||||
{
|
||||
/* HW supports MSAA 4, 8 and 16, but we limit ourselves to MSAA 4 for now. */
|
||||
VkSampleCountFlags sample_counts =
|
||||
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
|
||||
|
||||
uint64_t os_page_size = 4096;
|
||||
os_get_page_size(&os_page_size);
|
||||
|
||||
*properties = (struct vk_properties){
|
||||
.apiVersion = panvk_get_vk_version(),
|
||||
.driverVersion = vk_get_driver_version(),
|
||||
.vendorID = ARM_VENDOR_ID,
|
||||
|
||||
/* Collect arch_major, arch_minor, arch_rev and product_major,
|
||||
* as done by the Arm driver.
|
||||
*/
|
||||
.deviceID = device->kmod.props.gpu_prod_id << 16,
|
||||
.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
|
||||
|
||||
/* Vulkan 1.0 limits */
|
||||
/* Maximum texture dimension is 2^16. */
|
||||
.maxImageDimension1D = (1 << 16),
|
||||
.maxImageDimension2D = (1 << 16),
|
||||
.maxImageDimension3D = (1 << 16),
|
||||
.maxImageDimensionCube = (1 << 16),
|
||||
.maxImageArrayLayers = (1 << 16),
|
||||
/* Currently limited by the 1D texture size, which is 2^16.
|
||||
* TODO: If we expose buffer views as 2D textures, we can increase the
|
||||
* limit.
|
||||
*/
|
||||
.maxTexelBufferElements = (1 << 16),
|
||||
/* Each uniform entry is 16-byte and the number of entries is encoded in a
|
||||
* 12-bit field, with the minus(1) modifier, which gives 2^20.
|
||||
*/
|
||||
.maxUniformBufferRange = 1 << 20,
|
||||
/* Storage buffer access is lowered to globals, so there's no limit here,
|
||||
* except for the SW-descriptor we use to encode storage buffer
|
||||
* descriptors, where the size is a 32-bit field.
|
||||
*/
|
||||
.maxStorageBufferRange = UINT32_MAX,
|
||||
/* 128 bytes of push constants, so we're aligned with the minimum Vulkan
|
||||
* requirements.
|
||||
*/
|
||||
.maxPushConstantsSize = 128,
|
||||
/* There's no HW limit here. Should we advertize something smaller? */
|
||||
.maxMemoryAllocationCount = UINT32_MAX,
|
||||
/* Again, no hardware limit, but most drivers seem to advertive 64k. */
|
||||
.maxSamplerAllocationCount = 64 * 1024,
|
||||
/* A cache line. */
|
||||
.bufferImageGranularity = 64,
|
||||
/* Sparse binding not supported yet. */
|
||||
.sparseAddressSpaceSize = 0,
|
||||
/* Software limit. Pick the minimum required by Vulkan, because Bifrost
|
||||
* GPUs don't have unified descriptor tables, which forces us to
|
||||
* agregatte all descriptors from all sets and dispatch them to per-type
|
||||
* descriptor tables emitted at draw/dispatch time.
|
||||
* The more sets we support the more copies we are likely to have to do
|
||||
* at draw time.
|
||||
*/
|
||||
.maxBoundDescriptorSets = 4,
|
||||
/* MALI_RENDERER_STATE::sampler_count is 16-bit. */
|
||||
.maxPerStageDescriptorSamplers = UINT16_MAX,
|
||||
.maxDescriptorSetSamplers = UINT16_MAX,
|
||||
/* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots
|
||||
* for our internal UBOs.
|
||||
*/
|
||||
.maxPerStageDescriptorUniformBuffers = UINT8_MAX - 32,
|
||||
.maxDescriptorSetUniformBuffers = UINT8_MAX - 32,
|
||||
/* SSBOs are limited by the size of a uniform buffer which contains our
|
||||
* panvk_ssbo_desc objects.
|
||||
* panvk_ssbo_desc is 16-byte, and each uniform entry in the Mali UBO is
|
||||
* 16-byte too. The number of entries is encoded in a 12-bit field, with
|
||||
* a minus(1) modifier, which gives a maximum of 2^12 SSBO
|
||||
* descriptors.
|
||||
*/
|
||||
.maxPerStageDescriptorStorageBuffers = 1 << 12,
|
||||
.maxDescriptorSetStorageBuffers = 1 << 12,
|
||||
/* MALI_RENDERER_STATE::sampler_count is 16-bit. */
|
||||
.maxPerStageDescriptorSampledImages = UINT16_MAX,
|
||||
.maxDescriptorSetSampledImages = UINT16_MAX,
|
||||
/* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two
|
||||
* MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images.
|
||||
*/
|
||||
.maxPerStageDescriptorStorageImages = 1 << 8,
|
||||
.maxDescriptorSetStorageImages = 1 << 8,
|
||||
/* A maximum of 8 color render targets, and one depth-stencil render
|
||||
* target.
|
||||
*/
|
||||
.maxPerStageDescriptorInputAttachments = 9,
|
||||
.maxDescriptorSetInputAttachments = 9,
|
||||
/* Could be the sum of all maxPerStageXxx values, but we limit ourselves
|
||||
* to 2^16 to make things simpler.
|
||||
*/
|
||||
.maxPerStageResources = 1 << 16,
|
||||
/* Software limits to keep VkCommandBuffer tracking sane. */
|
||||
.maxDescriptorSetUniformBuffersDynamic = 16,
|
||||
.maxDescriptorSetStorageBuffersDynamic = 8,
|
||||
/* Software limit to keep VkCommandBuffer tracking sane. The HW supports
|
||||
* up to 2^9 vertex attributes.
|
||||
*/
|
||||
.maxVertexInputAttributes = 16,
|
||||
.maxVertexInputBindings = 16,
|
||||
/* MALI_ATTRIBUTE::offset is 32-bit. */
|
||||
.maxVertexInputAttributeOffset = UINT32_MAX,
|
||||
/* MALI_ATTRIBUTE_BUFFER::stride is 32-bit. */
|
||||
.maxVertexInputBindingStride = UINT32_MAX,
|
||||
/* 32 vec4 varyings. */
|
||||
.maxVertexOutputComponents = 128,
|
||||
/* Tesselation shaders not supported. */
|
||||
.maxTessellationGenerationLevel = 0,
|
||||
.maxTessellationPatchSize = 0,
|
||||
.maxTessellationControlPerVertexInputComponents = 0,
|
||||
.maxTessellationControlPerVertexOutputComponents = 0,
|
||||
.maxTessellationControlPerPatchOutputComponents = 0,
|
||||
.maxTessellationControlTotalOutputComponents = 0,
|
||||
.maxTessellationEvaluationInputComponents = 0,
|
||||
.maxTessellationEvaluationOutputComponents = 0,
|
||||
/* Geometry shaders not supported. */
|
||||
.maxGeometryShaderInvocations = 0,
|
||||
.maxGeometryInputComponents = 0,
|
||||
.maxGeometryOutputComponents = 0,
|
||||
.maxGeometryOutputVertices = 0,
|
||||
.maxGeometryTotalOutputComponents = 0,
|
||||
/* 32 vec4 varyings. */
|
||||
.maxFragmentInputComponents = 128,
|
||||
/* 8 render targets. */
|
||||
.maxFragmentOutputAttachments = 8,
|
||||
/* We don't support dual source blending yet. */
|
||||
.maxFragmentDualSrcAttachments = 0,
|
||||
/* 8 render targets, 2^12 storage buffers and 2^8 storage images (see
|
||||
* above).
|
||||
*/
|
||||
.maxFragmentCombinedOutputResources = 8 + (1 << 12) + (1 << 8),
|
||||
/* MALI_LOCAL_STORAGE::wls_size_{base,scale} allows us to have up to
|
||||
* (7 << 30) bytes of shared memory, but we cap it to 32K as it doesn't
|
||||
* really make sense to expose this amount of memory, especially since
|
||||
* it's backed by global memory anyway.
|
||||
*/
|
||||
.maxComputeSharedMemorySize = 32768,
|
||||
/* Software limit to meet Vulkan 1.0 requirements. We split the
|
||||
* dispatch in several jobs if it's too big.
|
||||
*/
|
||||
.maxComputeWorkGroupCount = {65535, 65535, 65535},
|
||||
/* We have 10 bits to encode the local-size, and there's a minus(1)
|
||||
* modifier, so, a size of 1 takes no bit.
|
||||
*/
|
||||
.maxComputeWorkGroupInvocations = 1 << 10,
|
||||
.maxComputeWorkGroupSize = {1 << 10, 1 << 10, 1 << 10},
|
||||
/* 8-bit subpixel precision. */
|
||||
.subPixelPrecisionBits = 8,
|
||||
.subTexelPrecisionBits = 8,
|
||||
.mipmapPrecisionBits = 8,
|
||||
/* Software limit. */
|
||||
.maxDrawIndexedIndexValue = UINT32_MAX,
|
||||
/* Make it one for now. */
|
||||
.maxDrawIndirectCount = 1,
|
||||
.maxSamplerLodBias = 255,
|
||||
.maxSamplerAnisotropy = 16,
|
||||
.maxViewports = 1,
|
||||
/* Same as the framebuffer limit. */
|
||||
.maxViewportDimensions = {(1 << 14), (1 << 14)},
|
||||
/* Encoded in a 16-bit signed integer. */
|
||||
.viewportBoundsRange = {INT16_MIN, INT16_MAX},
|
||||
.viewportSubPixelBits = 0,
|
||||
/* Align on a page. */
|
||||
.minMemoryMapAlignment = os_page_size,
|
||||
/* Some compressed texture formats require 128-byte alignment. */
|
||||
.minTexelBufferOffsetAlignment = 64,
|
||||
/* Always aligned on a uniform slot (vec4). */
|
||||
.minUniformBufferOffsetAlignment = 16,
|
||||
/* Lowered to global accesses, which happen at the 32-bit granularity. */
|
||||
.minStorageBufferOffsetAlignment = 4,
|
||||
/* Signed 4-bit value. */
|
||||
.minTexelOffset = -8,
|
||||
.maxTexelOffset = 7,
|
||||
.minTexelGatherOffset = -8,
|
||||
.maxTexelGatherOffset = 7,
|
||||
.minInterpolationOffset = -0.5,
|
||||
.maxInterpolationOffset = 0.5,
|
||||
.subPixelInterpolationOffsetBits = 8,
|
||||
.maxFramebufferWidth = (1 << 14),
|
||||
.maxFramebufferHeight = (1 << 14),
|
||||
.maxFramebufferLayers = 256,
|
||||
.framebufferColorSampleCounts = sample_counts,
|
||||
.framebufferDepthSampleCounts = sample_counts,
|
||||
.framebufferStencilSampleCounts = sample_counts,
|
||||
.framebufferNoAttachmentsSampleCounts = sample_counts,
|
||||
.maxColorAttachments = 8,
|
||||
.sampledImageColorSampleCounts = sample_counts,
|
||||
.sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
|
||||
.sampledImageDepthSampleCounts = sample_counts,
|
||||
.sampledImageStencilSampleCounts = sample_counts,
|
||||
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
|
||||
.maxSampleMaskWords = 1,
|
||||
.timestampComputeAndGraphics = false,
|
||||
.timestampPeriod = 0,
|
||||
.maxClipDistances = 0,
|
||||
.maxCullDistances = 0,
|
||||
.maxCombinedClipAndCullDistances = 0,
|
||||
.discreteQueuePriorities = 1,
|
||||
.pointSizeRange = {0.125, 4095.9375},
|
||||
.lineWidthRange = {0.0, 7.9921875},
|
||||
.pointSizeGranularity = (1.0 / 16.0),
|
||||
.lineWidthGranularity = (1.0 / 128.0),
|
||||
.strictLines = false,
|
||||
.standardSampleLocations = true,
|
||||
.optimalBufferCopyOffsetAlignment = 64,
|
||||
.optimalBufferCopyRowPitchAlignment = 64,
|
||||
.nonCoherentAtomSize = 64,
|
||||
|
||||
/* Vulkan 1.0 sparse properties */
|
||||
.sparseResidencyNonResidentStrict = false,
|
||||
.sparseResidencyAlignedMipSize = false,
|
||||
.sparseResidencyStandard2DBlockShape = false,
|
||||
.sparseResidencyStandard2DMultisampleBlockShape = false,
|
||||
.sparseResidencyStandard3DBlockShape = false,
|
||||
|
||||
/* Vulkan 1.1 properties */
|
||||
/* XXX: 1.1 support */
|
||||
.subgroupSize = 8,
|
||||
.subgroupSupportedStages = VK_SHADER_STAGE_ALL,
|
||||
.subgroupSupportedOperations =
|
||||
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
|
||||
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
|
||||
VK_SUBGROUP_FEATURE_QUAD_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
|
||||
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
|
||||
VK_SUBGROUP_FEATURE_VOTE_BIT,
|
||||
.subgroupQuadOperationsInAllStages = false,
|
||||
.pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
|
||||
.maxMultiviewViewCount = 0,
|
||||
.maxMultiviewInstanceIndex = 0,
|
||||
.protectedNoFault = false,
|
||||
/* Make sure everything is addressable by a signed 32-bit int, and
|
||||
* our largest descriptors are 96 bytes. */
|
||||
.maxPerSetDescriptors = (1ull << 31) / 96,
|
||||
/* Our buffer size fields allow only this much */
|
||||
.maxMemoryAllocationSize = UINT32_MAX,
|
||||
|
||||
/* Vulkan 1.2 properties */
|
||||
/* XXX: 1.2 support */
|
||||
/* XXX: VK_KHR_depth_stencil_resolve */
|
||||
.supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
|
||||
.supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
|
||||
.independentResolveNone = true,
|
||||
.independentResolve = true,
|
||||
/* XXX: VK_KHR_driver_properties */
|
||||
.driverID = VK_DRIVER_ID_MESA_PANVK,
|
||||
.conformanceVersion = (VkConformanceVersion){0, 0, 0, 0},
|
||||
/* XXX: VK_KHR_shader_float_controls */
|
||||
.denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
|
||||
.roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
|
||||
.shaderSignedZeroInfNanPreserveFloat16 = true,
|
||||
.shaderSignedZeroInfNanPreserveFloat32 = true,
|
||||
.shaderSignedZeroInfNanPreserveFloat64 = false,
|
||||
.shaderDenormPreserveFloat16 = true,
|
||||
.shaderDenormPreserveFloat32 = true,
|
||||
.shaderDenormPreserveFloat64 = false,
|
||||
.shaderDenormFlushToZeroFloat16 = true,
|
||||
.shaderDenormFlushToZeroFloat32 = true,
|
||||
.shaderDenormFlushToZeroFloat64 = false,
|
||||
.shaderRoundingModeRTEFloat16 = true,
|
||||
.shaderRoundingModeRTEFloat32 = true,
|
||||
.shaderRoundingModeRTEFloat64 = false,
|
||||
.shaderRoundingModeRTZFloat16 = true,
|
||||
.shaderRoundingModeRTZFloat32 = true,
|
||||
.shaderRoundingModeRTZFloat64 = false,
|
||||
/* XXX: VK_EXT_descriptor_indexing */
|
||||
.maxUpdateAfterBindDescriptorsInAllPools = 0,
|
||||
.shaderUniformBufferArrayNonUniformIndexingNative = false,
|
||||
.shaderSampledImageArrayNonUniformIndexingNative = false,
|
||||
.shaderStorageBufferArrayNonUniformIndexingNative = false,
|
||||
.shaderStorageImageArrayNonUniformIndexingNative = false,
|
||||
.shaderInputAttachmentArrayNonUniformIndexingNative = false,
|
||||
.robustBufferAccessUpdateAfterBind = false,
|
||||
.quadDivergentImplicitLod = false,
|
||||
.maxPerStageDescriptorUpdateAfterBindSamplers = 0,
|
||||
.maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0,
|
||||
.maxPerStageDescriptorUpdateAfterBindStorageBuffers = 0,
|
||||
.maxPerStageDescriptorUpdateAfterBindSampledImages = 0,
|
||||
.maxPerStageDescriptorUpdateAfterBindStorageImages = 0,
|
||||
.maxPerStageDescriptorUpdateAfterBindInputAttachments = 0,
|
||||
.maxPerStageDescriptorUpdateAfterBindInputAttachments = 0,
|
||||
.maxPerStageUpdateAfterBindResources = 0,
|
||||
.maxDescriptorSetUpdateAfterBindSamplers = 0,
|
||||
.maxDescriptorSetUpdateAfterBindUniformBuffers = 0,
|
||||
.maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0,
|
||||
.maxDescriptorSetUpdateAfterBindStorageBuffers = 0,
|
||||
.maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = 0,
|
||||
.maxDescriptorSetUpdateAfterBindSampledImages = 0,
|
||||
.maxDescriptorSetUpdateAfterBindStorageImages = 0,
|
||||
.maxDescriptorSetUpdateAfterBindInputAttachments = 0,
|
||||
/* XXX: VK_EXT_sampler_filter_minmax */
|
||||
.filterMinmaxSingleComponentFormats = false,
|
||||
.filterMinmaxImageComponentMapping = false,
|
||||
/* XXX: VK_KHR_timeline_semaphore */
|
||||
.maxTimelineSemaphoreValueDifference = INT64_MAX,
|
||||
.framebufferIntegerColorSampleCounts = sample_counts,
|
||||
|
||||
/* Vulkan 1.3 properties */
|
||||
/* XXX: 1.3 support */
|
||||
/* XXX: VK_EXT_subgroup_size_control */
|
||||
.minSubgroupSize = 8,
|
||||
.maxSubgroupSize = 8,
|
||||
.maxComputeWorkgroupSubgroups = 48,
|
||||
.requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL,
|
||||
/* XXX: VK_EXT_inline_uniform_block */
|
||||
.maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE,
|
||||
.maxPerStageDescriptorInlineUniformBlocks =
|
||||
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
|
||||
.maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
|
||||
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
|
||||
.maxDescriptorSetInlineUniformBlocks =
|
||||
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
|
||||
.maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
|
||||
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS,
|
||||
.maxInlineUniformTotalSize =
|
||||
MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS * MAX_INLINE_UNIFORM_BLOCK_SIZE,
|
||||
/* XXX: VK_KHR_shader_integer_dot_product */
|
||||
.integerDotProduct8BitUnsignedAccelerated = true,
|
||||
.integerDotProduct8BitSignedAccelerated = true,
|
||||
.integerDotProduct4x8BitPackedUnsignedAccelerated = true,
|
||||
.integerDotProduct4x8BitPackedSignedAccelerated = true,
|
||||
/* XXX: VK_EXT_texel_buffer_alignment */
|
||||
.storageTexelBufferOffsetAlignmentBytes = 64,
|
||||
.storageTexelBufferOffsetSingleTexelAlignment = false,
|
||||
.uniformTexelBufferOffsetAlignmentBytes = 4,
|
||||
.uniformTexelBufferOffsetSingleTexelAlignment = true,
|
||||
/* XXX: VK_KHR_maintenance4 */
|
||||
.maxBufferSize = 1 << 30,
|
||||
|
||||
/* VK_EXT_custom_border_color */
|
||||
.maxCustomBorderColorSamplers = 32768,
|
||||
|
||||
/* VK_KHR_vertex_attribute_divisor */
|
||||
/* We will have to restrict this a bit for multiview */
|
||||
.maxVertexAttribDivisor = UINT32_MAX,
|
||||
.supportsNonZeroFirstInstance = false,
|
||||
|
||||
/* VK_KHR_push_descriptor */
|
||||
.maxPushDescriptors = MAX_PUSH_DESCRIPTORS,
|
||||
};
|
||||
|
||||
snprintf(properties->deviceName, sizeof(properties->deviceName), "%s",
|
||||
device->name);
|
||||
|
||||
memcpy(properties->pipelineCacheUUID, device->cache_uuid, VK_UUID_SIZE);
|
||||
|
||||
memcpy(properties->driverUUID, device->driver_uuid, VK_UUID_SIZE);
|
||||
memcpy(properties->deviceUUID, device->device_uuid, VK_UUID_SIZE);
|
||||
|
||||
snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "panvk");
|
||||
snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
|
||||
"Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
|
||||
}
|
||||
|
||||
void
|
||||
panvk_physical_device_finish(struct panvk_physical_device *device)
|
||||
{
|
||||
|
@ -284,27 +649,6 @@ panvk_physical_device_init(struct panvk_physical_device *device,
|
|||
goto fail;
|
||||
}
|
||||
|
||||
struct vk_device_extension_table supported_extensions;
|
||||
get_device_extensions(device, &supported_extensions);
|
||||
|
||||
struct vk_features supported_features;
|
||||
get_features(device, &supported_features);
|
||||
|
||||
struct vk_physical_device_dispatch_table dispatch_table;
|
||||
vk_physical_device_dispatch_table_from_entrypoints(
|
||||
&dispatch_table, &panvk_physical_device_entrypoints, true);
|
||||
vk_physical_device_dispatch_table_from_entrypoints(
|
||||
&dispatch_table, &wsi_physical_device_entrypoints, false);
|
||||
|
||||
result =
|
||||
vk_physical_device_init(&device->vk, &instance->vk, &supported_extensions,
|
||||
&supported_features, NULL, &dispatch_table);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_error(instance, result);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (instance->vk.enabled_extensions.KHR_display) {
|
||||
master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
|
||||
if (master_fd >= 0) {
|
||||
|
@ -340,6 +684,30 @@ panvk_physical_device_init(struct panvk_physical_device *device,
|
|||
*/
|
||||
device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
|
||||
|
||||
struct vk_device_extension_table supported_extensions;
|
||||
get_device_extensions(device, &supported_extensions);
|
||||
|
||||
struct vk_features supported_features;
|
||||
get_features(device, &supported_features);
|
||||
|
||||
struct vk_properties properties;
|
||||
get_device_properties(device, &properties);
|
||||
|
||||
struct vk_physical_device_dispatch_table dispatch_table;
|
||||
vk_physical_device_dispatch_table_from_entrypoints(
|
||||
&dispatch_table, &panvk_physical_device_entrypoints, true);
|
||||
vk_physical_device_dispatch_table_from_entrypoints(
|
||||
&dispatch_table, &wsi_physical_device_entrypoints, false);
|
||||
|
||||
result = vk_physical_device_init(&device->vk, &instance->vk,
|
||||
&supported_extensions, &supported_features,
|
||||
&properties, &dispatch_table);
|
||||
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_error(instance, result);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
device->sync_types[0] = &device->drm_syncobj_type;
|
||||
device->sync_types[1] = NULL;
|
||||
device->vk.supported_sync_types = device->sync_types;
|
||||
|
@ -366,332 +734,6 @@ fail:
|
|||
return result;
|
||||
}
|
||||
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
panvk_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceProperties2 *pProperties)
|
||||
{
|
||||
VK_FROM_HANDLE(panvk_physical_device, pdevice, physicalDevice);
|
||||
|
||||
uint64_t os_page_size = 4096;
|
||||
os_get_page_size(&os_page_size);
|
||||
|
||||
/* HW supports MSAA 4, 8 and 16, but we limit ourselves to MSAA 4 for now. */
|
||||
VkSampleCountFlags sample_counts =
|
||||
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
|
||||
|
||||
const VkPhysicalDeviceLimits limits = {
|
||||
/* Maximum texture dimension is 2^16. */
|
||||
.maxImageDimension1D = (1 << 16),
|
||||
.maxImageDimension2D = (1 << 16),
|
||||
.maxImageDimension3D = (1 << 16),
|
||||
.maxImageDimensionCube = (1 << 16),
|
||||
.maxImageArrayLayers = (1 << 16),
|
||||
|
||||
/* Currently limited by the 1D texture size, which is 2^16.
|
||||
* TODO: If we expose buffer views as 2D textures, we can increase the
|
||||
* limit.
|
||||
*/
|
||||
.maxTexelBufferElements = (1 << 16),
|
||||
|
||||
/* Each uniform entry is 16-byte and the number of entries is encoded in a
|
||||
* 12-bit field, with the minus(1) modifier, which gives 2^20.
|
||||
*/
|
||||
.maxUniformBufferRange = 1 << 20,
|
||||
|
||||
/* Storage buffer access is lowered to globals, so there's no limit here,
|
||||
* except for the SW-descriptor we use to encode storage buffer
|
||||
* descriptors, where the size is a 32-bit field.
|
||||
*/
|
||||
.maxStorageBufferRange = UINT32_MAX,
|
||||
|
||||
/* 128 bytes of push constants, so we're aligned with the minimum Vulkan
|
||||
* requirements.
|
||||
*/
|
||||
.maxPushConstantsSize = 128,
|
||||
|
||||
/* There's no HW limit here. Should we advertize something smaller? */
|
||||
.maxMemoryAllocationCount = UINT32_MAX,
|
||||
|
||||
/* Again, no hardware limit, but most drivers seem to advertive 64k. */
|
||||
.maxSamplerAllocationCount = 64 * 1024,
|
||||
|
||||
/* A cache line. */
|
||||
.bufferImageGranularity = 64,
|
||||
|
||||
/* Sparse binding not supported yet. */
|
||||
.sparseAddressSpaceSize = 0,
|
||||
|
||||
/* Software limit. Pick the minimum required by Vulkan, because Bifrost
|
||||
* GPUs don't have unified descriptor tables, which forces us to
|
||||
* agregatte all descriptors from all sets and dispatch them to per-type
|
||||
* descriptor tables emitted at draw/dispatch time.
|
||||
* The more sets we support the more copies we are likely to have to do
|
||||
* at draw time.
|
||||
*/
|
||||
.maxBoundDescriptorSets = 4,
|
||||
|
||||
/* MALI_RENDERER_STATE::sampler_count is 16-bit. */
|
||||
.maxPerStageDescriptorSamplers = UINT16_MAX,
|
||||
.maxDescriptorSetSamplers = UINT16_MAX,
|
||||
|
||||
/* MALI_RENDERER_STATE::uniform_buffer_count is 8-bit. We reserve 32 slots
|
||||
* for our internal UBOs.
|
||||
*/
|
||||
.maxPerStageDescriptorUniformBuffers = UINT8_MAX - 32,
|
||||
.maxDescriptorSetUniformBuffers = UINT8_MAX - 32,
|
||||
|
||||
/* SSBOs are limited by the size of a uniform buffer which contains our
|
||||
* panvk_ssbo_desc objects.
|
||||
* panvk_ssbo_desc is 16-byte, and each uniform entry in the Mali UBO is
|
||||
* 16-byte too. The number of entries is encoded in a 12-bit field, with
|
||||
* a minus(1) modifier, which gives a maximum of 2^12 SSBO
|
||||
* descriptors.
|
||||
*/
|
||||
.maxPerStageDescriptorStorageBuffers = 1 << 12,
|
||||
.maxDescriptorSetStorageBuffers = 1 << 12,
|
||||
|
||||
/* MALI_RENDERER_STATE::sampler_count is 16-bit. */
|
||||
.maxPerStageDescriptorSampledImages = UINT16_MAX,
|
||||
.maxDescriptorSetSampledImages = UINT16_MAX,
|
||||
|
||||
/* MALI_ATTRIBUTE::buffer_index is 9-bit, and each image takes two
|
||||
* MALI_ATTRIBUTE_BUFFER slots, which gives a maximum of (1 << 8) images.
|
||||
*/
|
||||
.maxPerStageDescriptorStorageImages = 1 << 8,
|
||||
.maxDescriptorSetStorageImages = 1 << 8,
|
||||
|
||||
/* A maximum of 8 color render targets, and one depth-stencil render
|
||||
* target.
|
||||
*/
|
||||
.maxPerStageDescriptorInputAttachments = 9,
|
||||
.maxDescriptorSetInputAttachments = 9,
|
||||
|
||||
/* Could be the sum of all maxPerStageXxx values, but we limit ourselves
|
||||
* to 2^16 to make things simpler.
|
||||
*/
|
||||
.maxPerStageResources = 1 << 16,
|
||||
|
||||
/* Software limits to keep VkCommandBuffer tracking sane. */
|
||||
.maxDescriptorSetUniformBuffersDynamic = 16,
|
||||
.maxDescriptorSetStorageBuffersDynamic = 8,
|
||||
|
||||
/* Software limit to keep VkCommandBuffer tracking sane. The HW supports
|
||||
* up to 2^9 vertex attributes.
|
||||
*/
|
||||
.maxVertexInputAttributes = 16,
|
||||
.maxVertexInputBindings = 16,
|
||||
|
||||
/* MALI_ATTRIBUTE::offset is 32-bit. */
|
||||
.maxVertexInputAttributeOffset = UINT32_MAX,
|
||||
|
||||
/* MALI_ATTRIBUTE_BUFFER::stride is 32-bit. */
|
||||
.maxVertexInputBindingStride = UINT32_MAX,
|
||||
|
||||
/* 32 vec4 varyings. */
|
||||
.maxVertexOutputComponents = 128,
|
||||
|
||||
/* Tesselation shaders not supported. */
|
||||
.maxTessellationGenerationLevel = 0,
|
||||
.maxTessellationPatchSize = 0,
|
||||
.maxTessellationControlPerVertexInputComponents = 0,
|
||||
.maxTessellationControlPerVertexOutputComponents = 0,
|
||||
.maxTessellationControlPerPatchOutputComponents = 0,
|
||||
.maxTessellationControlTotalOutputComponents = 0,
|
||||
.maxTessellationEvaluationInputComponents = 0,
|
||||
.maxTessellationEvaluationOutputComponents = 0,
|
||||
|
||||
/* Geometry shaders not supported. */
|
||||
.maxGeometryShaderInvocations = 0,
|
||||
.maxGeometryInputComponents = 0,
|
||||
.maxGeometryOutputComponents = 0,
|
||||
.maxGeometryOutputVertices = 0,
|
||||
.maxGeometryTotalOutputComponents = 0,
|
||||
|
||||
/* 32 vec4 varyings. */
|
||||
.maxFragmentInputComponents = 128,
|
||||
|
||||
/* 8 render targets. */
|
||||
.maxFragmentOutputAttachments = 8,
|
||||
|
||||
/* We don't support dual source blending yet. */
|
||||
.maxFragmentDualSrcAttachments = 0,
|
||||
|
||||
/* 8 render targets, 2^12 storage buffers and 2^8 storage images (see
|
||||
* above).
|
||||
*/
|
||||
.maxFragmentCombinedOutputResources = 8 + (1 << 12) + (1 << 8),
|
||||
|
||||
/* MALI_LOCAL_STORAGE::wls_size_{base,scale} allows us to have up to
|
||||
* (7 << 30) bytes of shared memory, but we cap it to 32K as it doesn't
|
||||
* really make sense to expose this amount of memory, especially since
|
||||
* it's backed by global memory anyway.
|
||||
*/
|
||||
.maxComputeSharedMemorySize = 32768,
|
||||
|
||||
/* Software limit to meet Vulkan 1.0 requirements. We split the
|
||||
* dispatch in several jobs if it's too big.
|
||||
*/
|
||||
.maxComputeWorkGroupCount = {65535, 65535, 65535},
|
||||
|
||||
/* We have 10 bits to encode the local-size, and there's a minus(1)
|
||||
* modifier, so, a size of 1 takes no bit.
|
||||
*/
|
||||
.maxComputeWorkGroupInvocations = 1 << 10,
|
||||
.maxComputeWorkGroupSize = {1 << 10, 1 << 10, 1 << 10},
|
||||
|
||||
/* 8-bit subpixel precision. */
|
||||
.subPixelPrecisionBits = 8,
|
||||
.subTexelPrecisionBits = 8,
|
||||
.mipmapPrecisionBits = 8,
|
||||
|
||||
/* Software limit. */
|
||||
.maxDrawIndexedIndexValue = UINT32_MAX,
|
||||
|
||||
/* Make it one for now. */
|
||||
.maxDrawIndirectCount = 1,
|
||||
|
||||
.maxSamplerLodBias = 255,
|
||||
.maxSamplerAnisotropy = 16,
|
||||
.maxViewports = 1,
|
||||
|
||||
/* Same as the framebuffer limit. */
|
||||
.maxViewportDimensions = {(1 << 14), (1 << 14)},
|
||||
|
||||
/* Encoded in a 16-bit signed integer. */
|
||||
.viewportBoundsRange = {INT16_MIN, INT16_MAX},
|
||||
.viewportSubPixelBits = 0,
|
||||
|
||||
/* Align on a page. */
|
||||
.minMemoryMapAlignment = os_page_size,
|
||||
|
||||
/* Some compressed texture formats require 128-byte alignment. */
|
||||
.minTexelBufferOffsetAlignment = 64,
|
||||
|
||||
/* Always aligned on a uniform slot (vec4). */
|
||||
.minUniformBufferOffsetAlignment = 16,
|
||||
|
||||
/* Lowered to global accesses, which happen at the 32-bit granularity. */
|
||||
.minStorageBufferOffsetAlignment = 4,
|
||||
|
||||
/* Signed 4-bit value. */
|
||||
.minTexelOffset = -8,
|
||||
.maxTexelOffset = 7,
|
||||
.minTexelGatherOffset = -8,
|
||||
.maxTexelGatherOffset = 7,
|
||||
.minInterpolationOffset = -0.5,
|
||||
.maxInterpolationOffset = 0.5,
|
||||
.subPixelInterpolationOffsetBits = 8,
|
||||
|
||||
.maxFramebufferWidth = (1 << 14),
|
||||
.maxFramebufferHeight = (1 << 14),
|
||||
.maxFramebufferLayers = 256,
|
||||
.framebufferColorSampleCounts = sample_counts,
|
||||
.framebufferDepthSampleCounts = sample_counts,
|
||||
.framebufferStencilSampleCounts = sample_counts,
|
||||
.framebufferNoAttachmentsSampleCounts = sample_counts,
|
||||
.maxColorAttachments = 8,
|
||||
.sampledImageColorSampleCounts = sample_counts,
|
||||
.sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT,
|
||||
.sampledImageDepthSampleCounts = sample_counts,
|
||||
.sampledImageStencilSampleCounts = sample_counts,
|
||||
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
|
||||
.maxSampleMaskWords = 1,
|
||||
.timestampComputeAndGraphics = false,
|
||||
.timestampPeriod = 0,
|
||||
.maxClipDistances = 0,
|
||||
.maxCullDistances = 0,
|
||||
.maxCombinedClipAndCullDistances = 0,
|
||||
.discreteQueuePriorities = 1,
|
||||
.pointSizeRange = {0.125, 4095.9375},
|
||||
.lineWidthRange = {0.0, 7.9921875},
|
||||
.pointSizeGranularity = (1.0 / 16.0),
|
||||
.lineWidthGranularity = (1.0 / 128.0),
|
||||
.strictLines = false,
|
||||
.standardSampleLocations = true,
|
||||
.optimalBufferCopyOffsetAlignment = 64,
|
||||
.optimalBufferCopyRowPitchAlignment = 64,
|
||||
.nonCoherentAtomSize = 64,
|
||||
};
|
||||
|
||||
pProperties->properties = (VkPhysicalDeviceProperties){
|
||||
.apiVersion = panvk_get_vk_version(),
|
||||
.driverVersion = vk_get_driver_version(),
|
||||
|
||||
/* Arm vendor ID. */
|
||||
.vendorID = 0x13b5,
|
||||
|
||||
/* Collect arch_major, arch_minor, arch_rev and product_major,
|
||||
* as done by the Arm driver.
|
||||
*/
|
||||
.deviceID = pdevice->kmod.props.gpu_prod_id << 16,
|
||||
.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
|
||||
.limits = limits,
|
||||
.sparseProperties = {0},
|
||||
};
|
||||
|
||||
strcpy(pProperties->properties.deviceName, pdevice->name);
|
||||
memcpy(pProperties->properties.pipelineCacheUUID, pdevice->cache_uuid,
|
||||
VK_UUID_SIZE);
|
||||
|
||||
VkPhysicalDeviceVulkan11Properties core_1_1 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
|
||||
.deviceLUIDValid = false,
|
||||
.pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
|
||||
.maxMultiviewViewCount = 0,
|
||||
.maxMultiviewInstanceIndex = 0,
|
||||
.protectedNoFault = false,
|
||||
/* Make sure everything is addressable by a signed 32-bit int, and
|
||||
* our largest descriptors are 96 bytes. */
|
||||
.maxPerSetDescriptors = (1ull << 31) / 96,
|
||||
/* Our buffer size fields allow only this much */
|
||||
.maxMemoryAllocationSize = 0xFFFFFFFFull,
|
||||
};
|
||||
memcpy(core_1_1.driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
|
||||
memcpy(core_1_1.deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
|
||||
|
||||
const VkPhysicalDeviceVulkan12Properties core_1_2 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
|
||||
};
|
||||
|
||||
const VkPhysicalDeviceVulkan13Properties core_1_3 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
|
||||
};
|
||||
|
||||
vk_foreach_struct(ext, pProperties->pNext) {
|
||||
if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
|
||||
continue;
|
||||
if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
|
||||
continue;
|
||||
if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
|
||||
continue;
|
||||
|
||||
switch (ext->sType) {
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
|
||||
VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
|
||||
(VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;
|
||||
/* Software limit. */
|
||||
properties->maxPushDescriptors = 32;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
|
||||
VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
|
||||
(VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
|
||||
/* We will have to restrict this a bit for multiview */
|
||||
properties->maxVertexAttribDivisor = UINT32_MAX;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
|
||||
VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties =
|
||||
(VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
|
||||
properties->maxCustomBorderColorSamplers = 32768;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const VkQueueFamilyProperties panvk_queue_family_properties = {
|
||||
.queueFlags =
|
||||
VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
|
||||
|
|
Loading…
Reference in New Issue