mesa/src/nouveau/vulkan/nvk_physical_device.c

1489 lines
56 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright © 2022 Collabora Ltd. and Red Hat Inc.
* SPDX-License-Identifier: MIT
*/
#include "nvk_physical_device.h"
#include "nak.h"
#include "nvk_buffer.h"
#include "nvk_entrypoints.h"
#include "nvk_format.h"
#include "nvk_image.h"
#include "nvk_instance.h"
#include "nvk_shader.h"
#include "nvk_wsi.h"
#include "git_sha1.h"
#include "util/disk_cache.h"
#include "util/mesa-sha1.h"
#include "vk_device.h"
#include "vk_drm_syncobj.h"
#include "vk_shader_module.h"
#include "vulkan/wsi/wsi_common.h"
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
#include <xf86drm.h>
#include "cl90c0.h"
#include "cl91c0.h"
#include "cla097.h"
#include "cla0c0.h"
#include "cla1c0.h"
#include "clb097.h"
#include "clb0c0.h"
#include "clb197.h"
#include "clb1c0.h"
#include "clc097.h"
#include "clc0c0.h"
#include "clc1c0.h"
#include "clc397.h"
#include "clc3c0.h"
#include "clc597.h"
#include "clc5c0.h"
#include "clc997.h"
static bool
nvk_use_nak(const struct nv_device_info *info)
{
const VkShaderStageFlags vk10_stages =
VK_SHADER_STAGE_VERTEX_BIT |
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
VK_SHADER_STAGE_GEOMETRY_BIT |
VK_SHADER_STAGE_FRAGMENT_BIT |
VK_SHADER_STAGE_COMPUTE_BIT;
return !(vk10_stages & ~nvk_nak_stages(info));
}
static uint32_t
nvk_get_vk_version(const struct nv_device_info *info)
{
/* Version override takes priority */
const uint32_t version_override = vk_get_version_override();
if (version_override)
return version_override;
/* If we're using codegen for anything, lock to version 1.0 */
if (!nvk_use_nak(info))
return VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
return VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION);
}
static void
nvk_get_device_extensions(const struct nvk_instance *instance,
const struct nv_device_info *info,
struct vk_device_extension_table *ext)
{
*ext = (struct vk_device_extension_table) {
.KHR_8bit_storage = true,
.KHR_16bit_storage = true,
.KHR_bind_memory2 = true,
.KHR_buffer_device_address = true,
.KHR_calibrated_timestamps = true,
.KHR_copy_commands2 = true,
.KHR_create_renderpass2 = true,
.KHR_dedicated_allocation = true,
.KHR_depth_stencil_resolve = true,
.KHR_descriptor_update_template = true,
.KHR_device_group = true,
.KHR_draw_indirect_count = info->cls_eng3d >= TURING_A,
.KHR_driver_properties = true,
.KHR_dynamic_rendering = true,
.KHR_external_fence = true,
.KHR_external_fence_fd = true,
.KHR_external_memory = true,
.KHR_external_memory_fd = true,
.KHR_external_semaphore = true,
.KHR_external_semaphore_fd = true,
.KHR_format_feature_flags2 = true,
.KHR_fragment_shader_barycentric = info->cls_eng3d >= TURING_A &&
(nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
.KHR_get_memory_requirements2 = true,
.KHR_image_format_list = true,
.KHR_imageless_framebuffer = true,
#ifdef NVK_USE_WSI_PLATFORM
.KHR_incremental_present = true,
#endif
.KHR_index_type_uint8 = true,
.KHR_line_rasterization = true,
.KHR_load_store_op_none = true,
.KHR_maintenance1 = true,
.KHR_maintenance2 = true,
.KHR_maintenance3 = true,
.KHR_maintenance4 = true,
.KHR_maintenance5 = true,
.KHR_maintenance6 = true,
.KHR_map_memory2 = true,
.KHR_multiview = true,
.KHR_pipeline_executable_properties = true,
.KHR_pipeline_library = true,
#ifdef NVK_USE_WSI_PLATFORM
/* Hide these behind dri configs for now since we cannot implement it
* reliably on all surfaces yet. There is no surface capability query
* for present wait/id, but the feature is useful enough to hide behind
* an opt-in mechanism for now. If the instance only enables surface
* extensions that unconditionally support present wait, we can also
* expose the extension that way.
*/
.KHR_present_id = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
wsi_common_vk_instance_supports_present_wait(&instance->vk),
.KHR_present_wait = driQueryOptionb(&instance->dri_options, "vk_khr_present_wait") ||
wsi_common_vk_instance_supports_present_wait(&instance->vk),
#endif
.KHR_push_descriptor = true,
.KHR_relaxed_block_layout = true,
.KHR_sampler_mirror_clamp_to_edge = true,
.KHR_sampler_ycbcr_conversion = true,
.KHR_separate_depth_stencil_layouts = true,
.KHR_shader_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
nvk_use_nak(info),
.KHR_shader_clock = true,
.KHR_shader_draw_parameters = true,
.KHR_shader_expect_assume = true,
.KHR_shader_float_controls = true,
.KHR_shader_float16_int8 = true,
.KHR_shader_integer_dot_product = true,
.KHR_shader_maximal_reconvergence = true,
.KHR_shader_non_semantic_info = true,
.KHR_shader_subgroup_extended_types = true,
.KHR_shader_subgroup_rotate = nvk_use_nak(info),
.KHR_shader_subgroup_uniform_control_flow = nvk_use_nak(info),
.KHR_shader_terminate_invocation =
(nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
.KHR_spirv_1_4 = true,
.KHR_storage_buffer_storage_class = true,
.KHR_timeline_semaphore = true,
#ifdef NVK_USE_WSI_PLATFORM
.KHR_swapchain = true,
.KHR_swapchain_mutable_format = true,
#endif
.KHR_synchronization2 = true,
.KHR_uniform_buffer_standard_layout = true,
.KHR_variable_pointers = true,
.KHR_vertex_attribute_divisor = true,
.KHR_vulkan_memory_model = nvk_use_nak(info),
.KHR_workgroup_memory_explicit_layout = true,
.KHR_zero_initialize_workgroup_memory = true,
.EXT_4444_formats = true,
.EXT_attachment_feedback_loop_layout = true,
.EXT_border_color_swizzle = true,
.EXT_buffer_device_address = true,
.EXT_calibrated_timestamps = true,
.EXT_conditional_rendering = true,
.EXT_color_write_enable = true,
.EXT_custom_border_color = true,
.EXT_depth_bias_control = true,
.EXT_depth_clip_control = true,
.EXT_depth_clip_enable = true,
.EXT_descriptor_indexing = true,
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
.EXT_display_control = true,
#endif
.EXT_dynamic_rendering_unused_attachments = true,
.EXT_extended_dynamic_state = true,
.EXT_extended_dynamic_state2 = true,
.EXT_extended_dynamic_state3 = true,
.EXT_external_memory_dma_buf = true,
.EXT_graphics_pipeline_library = true,
.EXT_host_query_reset = true,
.EXT_image_2d_view_of_3d = true,
.EXT_image_robustness = true,
.EXT_image_sliced_view_of_3d = true,
.EXT_image_view_min_lod = true,
.EXT_index_type_uint8 = true,
.EXT_inline_uniform_block = true,
.EXT_line_rasterization = true,
.EXT_load_store_op_none = true,
.EXT_map_memory_placed = true,
.EXT_memory_budget = true,
.EXT_multi_draw = true,
.EXT_mutable_descriptor_type = true,
.EXT_nested_command_buffer = true,
.EXT_non_seamless_cube_map = true,
.EXT_pci_bus_info = info->type == NV_DEVICE_TYPE_DIS,
.EXT_pipeline_creation_cache_control = true,
.EXT_pipeline_creation_feedback = true,
.EXT_physical_device_drm = true,
.EXT_primitive_topology_list_restart = true,
.EXT_private_data = true,
.EXT_primitives_generated_query = true,
.EXT_provoking_vertex = true,
.EXT_robustness2 = true,
.EXT_sample_locations = info->cls_eng3d >= MAXWELL_B,
.EXT_sampler_filter_minmax = info->cls_eng3d >= MAXWELL_B,
.EXT_scalar_block_layout = nvk_use_nak(info),
.EXT_separate_stencil_usage = true,
.EXT_shader_image_atomic_int64 = info->cls_eng3d >= MAXWELL_A &&
nvk_use_nak(info),
.EXT_shader_demote_to_helper_invocation = true,
.EXT_shader_module_identifier = true,
.EXT_shader_object = true,
.EXT_shader_subgroup_ballot = true,
.EXT_shader_subgroup_vote = true,
.EXT_shader_viewport_index_layer = info->cls_eng3d >= MAXWELL_B,
.EXT_subgroup_size_control = true,
#ifdef NVK_USE_WSI_PLATFORM
.EXT_swapchain_maintenance1 = true,
#endif
.EXT_texel_buffer_alignment = true,
.EXT_tooling_info = true,
.EXT_transform_feedback = true,
.EXT_vertex_attribute_divisor = true,
.EXT_vertex_input_dynamic_state = true,
.EXT_ycbcr_2plane_444_formats = true,
.EXT_ycbcr_image_arrays = true,
.GOOGLE_decorate_string = true,
.GOOGLE_hlsl_functionality1 = true,
.GOOGLE_user_type = true,
.NV_shader_sm_builtins = true,
.VALVE_mutable_descriptor_type = true,
};
}
static void
nvk_get_device_features(const struct nv_device_info *info,
const struct vk_device_extension_table *supported_extensions,
struct vk_features *features)
{
*features = (struct vk_features) {
/* Vulkan 1.0 */
.robustBufferAccess = true,
.fullDrawIndexUint32 = true,
.imageCubeArray = true,
.independentBlend = true,
.geometryShader = true,
.tessellationShader = true,
.sampleRateShading = true,
.dualSrcBlend = true,
.logicOp = true,
.multiDrawIndirect = true,
.drawIndirectFirstInstance = true,
.depthClamp = true,
.depthBiasClamp = true,
.fillModeNonSolid = true,
.depthBounds = true,
.wideLines = true,
.largePoints = true,
.alphaToOne = true,
.multiViewport = true,
.samplerAnisotropy = true,
.textureCompressionETC2 = false,
.textureCompressionBC = true,
.textureCompressionASTC_LDR = false,
.occlusionQueryPrecise = true,
.pipelineStatisticsQuery = true,
.vertexPipelineStoresAndAtomics = true,
.fragmentStoresAndAtomics = true,
.shaderTessellationAndGeometryPointSize = true,
.shaderImageGatherExtended = true,
.shaderStorageImageExtendedFormats = true,
.shaderStorageImageMultisample = true,
.shaderStorageImageReadWithoutFormat = info->cls_eng3d >= MAXWELL_A,
.shaderStorageImageWriteWithoutFormat = true,
.shaderUniformBufferArrayDynamicIndexing = true,
.shaderSampledImageArrayDynamicIndexing = true,
.shaderStorageBufferArrayDynamicIndexing = true,
.shaderStorageImageArrayDynamicIndexing = true,
.shaderClipDistance = true,
.shaderCullDistance = true,
.shaderFloat64 = true,
.shaderInt64 = true,
.shaderInt16 = true,
.shaderResourceResidency = info->cls_eng3d >= VOLTA_A,
.shaderResourceMinLod = info->cls_eng3d >= VOLTA_A,
.sparseBinding = true,
.sparseResidency2Samples = info->cls_eng3d >= MAXWELL_A,
.sparseResidency4Samples = info->cls_eng3d >= MAXWELL_A,
.sparseResidency8Samples = info->cls_eng3d >= MAXWELL_A,
.sparseResidencyAliased = info->cls_eng3d >= MAXWELL_A,
.sparseResidencyBuffer = info->cls_eng3d >= MAXWELL_A,
.sparseResidencyImage2D = info->cls_eng3d >= MAXWELL_A,
.sparseResidencyImage3D = info->cls_eng3d >= MAXWELL_A,
.variableMultisampleRate = true,
.inheritedQueries = true,
/* Vulkan 1.1 */
.storageBuffer16BitAccess = true,
.uniformAndStorageBuffer16BitAccess = true,
.storagePushConstant16 = true,
.multiview = true,
.multiviewGeometryShader = true,
.multiviewTessellationShader = true,
.variablePointersStorageBuffer = true,
.variablePointers = true,
.shaderDrawParameters = true,
.samplerYcbcrConversion = true,
/* Vulkan 1.2 */
.samplerMirrorClampToEdge = true,
.drawIndirectCount = info->cls_eng3d >= TURING_A,
.storageBuffer8BitAccess = true,
.uniformAndStorageBuffer8BitAccess = true,
.storagePushConstant8 = true,
.shaderBufferInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
nvk_use_nak(info),
.shaderSharedInt64Atomics = false, /* TODO */
/* TODO: Fp16 is currently busted on Turing and Volta due to instruction
* scheduling issues. Re-enable it once those are sorted.
*/
.shaderFloat16 = info->sm >= 80 && nvk_use_nak(info),
.shaderInt8 = true,
.descriptorIndexing = true,
.shaderInputAttachmentArrayDynamicIndexing = true,
.shaderUniformTexelBufferArrayDynamicIndexing = true,
.shaderStorageTexelBufferArrayDynamicIndexing = true,
.shaderUniformBufferArrayNonUniformIndexing = true,
.shaderSampledImageArrayNonUniformIndexing = true,
.shaderStorageBufferArrayNonUniformIndexing = true,
.shaderStorageImageArrayNonUniformIndexing = true,
.shaderInputAttachmentArrayNonUniformIndexing = true,
.shaderUniformTexelBufferArrayNonUniformIndexing = true,
.shaderStorageTexelBufferArrayNonUniformIndexing = true,
.descriptorBindingUniformBufferUpdateAfterBind = true,
.descriptorBindingSampledImageUpdateAfterBind = true,
.descriptorBindingStorageImageUpdateAfterBind = true,
.descriptorBindingStorageBufferUpdateAfterBind = true,
.descriptorBindingUniformTexelBufferUpdateAfterBind = true,
.descriptorBindingStorageTexelBufferUpdateAfterBind = true,
.descriptorBindingUpdateUnusedWhilePending = true,
.descriptorBindingPartiallyBound = true,
.descriptorBindingVariableDescriptorCount = true,
.runtimeDescriptorArray = true,
.samplerFilterMinmax = info->cls_eng3d >= MAXWELL_B,
.scalarBlockLayout = nvk_use_nak(info),
.imagelessFramebuffer = true,
.uniformBufferStandardLayout = true,
.shaderSubgroupExtendedTypes = true,
.separateDepthStencilLayouts = true,
.hostQueryReset = true,
.timelineSemaphore = true,
.bufferDeviceAddress = true,
.bufferDeviceAddressCaptureReplay = true,
.bufferDeviceAddressMultiDevice = false,
.vulkanMemoryModel = nvk_use_nak(info),
.vulkanMemoryModelDeviceScope = nvk_use_nak(info),
.vulkanMemoryModelAvailabilityVisibilityChains = nvk_use_nak(info),
.shaderOutputViewportIndex = info->cls_eng3d >= MAXWELL_B,
.shaderOutputLayer = info->cls_eng3d >= MAXWELL_B,
.subgroupBroadcastDynamicId = nvk_use_nak(info),
/* Vulkan 1.3 */
.robustImageAccess = true,
.inlineUniformBlock = true,
.descriptorBindingInlineUniformBlockUpdateAfterBind = true,
.pipelineCreationCacheControl = true,
.privateData = true,
.shaderDemoteToHelperInvocation = true,
.shaderTerminateInvocation = true,
.subgroupSizeControl = true,
.computeFullSubgroups = true,
.synchronization2 = true,
.shaderZeroInitializeWorkgroupMemory = true,
.dynamicRendering = true,
.shaderIntegerDotProduct = true,
.maintenance4 = true,
/* VK_KHR_fragment_shader_barycentric */
.fragmentShaderBarycentric = info->cls_eng3d >= TURING_A &&
(nvk_nak_stages(info) & VK_SHADER_STAGE_FRAGMENT_BIT) != 0,
/* VK_KHR_index_type_uint8 */
.indexTypeUint8 = true,
/* VK_KHR_line_rasterization */
.rectangularLines = true,
.bresenhamLines = true,
.smoothLines = true,
.stippledRectangularLines = true,
.stippledBresenhamLines = true,
.stippledSmoothLines = true,
/* VK_KHR_maintenance5 */
.maintenance5 = true,
/* VK_KHR_maintenance6 */
.maintenance6 = true,
/* VK_KHR_pipeline_executable_properties */
.pipelineExecutableInfo = true,
/* VK_KHR_present_id */
.presentId = supported_extensions->KHR_present_id,
/* VK_KHR_present_wait */
.presentWait = supported_extensions->KHR_present_wait,
/* VK_KHR_shader_clock */
.shaderSubgroupClock = true,
.shaderDeviceClock = true,
/* VK_KHR_shader_expect_assume */
.shaderExpectAssume = true,
/* VK_KHR_shader_maximal_reconvergence */
.shaderMaximalReconvergence = true,
/* VK_KHR_shader_subgroup_rotate */
.shaderSubgroupRotate = nvk_use_nak(info),
.shaderSubgroupRotateClustered = nvk_use_nak(info),
/* VK_KHR_vertex_attribute_divisor */
.vertexAttributeInstanceRateDivisor = true,
.vertexAttributeInstanceRateZeroDivisor = true,
/* VK_KHR_workgroup_memory_explicit_layout */
.workgroupMemoryExplicitLayout = true,
.workgroupMemoryExplicitLayoutScalarBlockLayout = true,
.workgroupMemoryExplicitLayout8BitAccess = false,
.workgroupMemoryExplicitLayout16BitAccess = false,
/* VK_EXT_4444_formats */
.formatA4R4G4B4 = true,
.formatA4B4G4R4 = true,
/* VK_EXT_attachment_feedback_loop_layout */
.attachmentFeedbackLoopLayout = true,
/* VK_EXT_border_color_swizzle */
.borderColorSwizzle = true,
.borderColorSwizzleFromImage = false,
/* VK_EXT_buffer_device_address */
.bufferDeviceAddressCaptureReplayEXT = true,
/* VK_EXT_color_write_enable */
.colorWriteEnable = true,
/* VK_EXT_conditional_rendering */
.conditionalRendering = true,
.inheritedConditionalRendering = true,
/* VK_EXT_custom_border_color */
.customBorderColors = true,
.customBorderColorWithoutFormat = true,
/* VK_EXT_depth_bias_control */
.depthBiasControl = true,
.leastRepresentableValueForceUnormRepresentation = true,
.floatRepresentation = false,
.depthBiasExact = true,
/* VK_EXT_depth_clip_control */
.depthClipControl = info->cls_eng3d >= VOLTA_A,
/* VK_EXT_depth_clip_enable */
.depthClipEnable = true,
/* VK_EXT_dynamic_rendering_unused_attachments */
.dynamicRenderingUnusedAttachments = true,
/* VK_EXT_extended_dynamic_state */
.extendedDynamicState = true,
/* VK_EXT_extended_dynamic_state2 */
.extendedDynamicState2 = true,
.extendedDynamicState2LogicOp = true,
.extendedDynamicState2PatchControlPoints = true,
/* VK_EXT_extended_dynamic_state3 */
.extendedDynamicState3TessellationDomainOrigin = true,
.extendedDynamicState3DepthClampEnable = true,
.extendedDynamicState3PolygonMode = true,
.extendedDynamicState3RasterizationSamples = true,
.extendedDynamicState3SampleMask = true,
.extendedDynamicState3AlphaToCoverageEnable = true,
.extendedDynamicState3AlphaToOneEnable = true,
.extendedDynamicState3LogicOpEnable = true,
.extendedDynamicState3ColorBlendEnable = true,
.extendedDynamicState3ColorBlendEquation = true,
.extendedDynamicState3ColorWriteMask = true,
.extendedDynamicState3RasterizationStream = true,
.extendedDynamicState3ConservativeRasterizationMode = false,
.extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
.extendedDynamicState3DepthClipEnable = true,
.extendedDynamicState3SampleLocationsEnable = info->cls_eng3d >= MAXWELL_B,
.extendedDynamicState3ColorBlendAdvanced = false,
.extendedDynamicState3ProvokingVertexMode = true,
.extendedDynamicState3LineRasterizationMode = true,
.extendedDynamicState3LineStippleEnable = true,
.extendedDynamicState3DepthClipNegativeOneToOne = true,
.extendedDynamicState3ViewportWScalingEnable = false,
.extendedDynamicState3ViewportSwizzle = false,
.extendedDynamicState3CoverageToColorEnable = false,
.extendedDynamicState3CoverageToColorLocation = false,
.extendedDynamicState3CoverageModulationMode = false,
.extendedDynamicState3CoverageModulationTableEnable = false,
.extendedDynamicState3CoverageModulationTable = false,
.extendedDynamicState3CoverageReductionMode = false,
.extendedDynamicState3RepresentativeFragmentTestEnable = false,
.extendedDynamicState3ShadingRateImageEnable = false,
/* VK_EXT_graphics_pipeline_library */
.graphicsPipelineLibrary = true,
/* VK_EXT_image_2d_view_of_3d */
.image2DViewOf3D = true,
.sampler2DViewOf3D = true,
/* VK_EXT_image_sliced_view_of_3d */
.imageSlicedViewOf3D = true,
#ifdef NVK_USE_WSI_PLATFORM
/* VK_EXT_swapchain_maintenance1 */
.swapchainMaintenance1 = true,
#endif
/* VK_EXT_image_view_min_lod */
.minLod = true,
/* VK_EXT_map_memory_placed */
.memoryMapPlaced = true,
.memoryMapRangePlaced = false,
.memoryUnmapReserve = true,
/* VK_EXT_multi_draw */
.multiDraw = true,
/* VK_EXT_mutable_descriptor_type */
.mutableDescriptorType = true,
/* VK_EXT_nested_command_buffer */
.nestedCommandBuffer = true,
.nestedCommandBufferRendering = true,
.nestedCommandBufferSimultaneousUse = true,
/* VK_EXT_non_seamless_cube_map */
.nonSeamlessCubeMap = true,
/* VK_EXT_primitive_topology_list_restart */
.primitiveTopologyListRestart = true,
.primitiveTopologyPatchListRestart = true,
/* VK_EXT_primitives_generated_query */
.primitivesGeneratedQuery = true,
.primitivesGeneratedQueryWithNonZeroStreams = true,
.primitivesGeneratedQueryWithRasterizerDiscard = true,
/* VK_EXT_provoking_vertex */
.provokingVertexLast = true,
.transformFeedbackPreservesProvokingVertex = true,
/* VK_EXT_robustness2 */
.robustBufferAccess2 = true,
.robustImageAccess2 = true,
.nullDescriptor = true,
/* VK_EXT_shader_image_atomic_int64 */
.shaderImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
nvk_use_nak(info),
.sparseImageInt64Atomics = info->cls_eng3d >= MAXWELL_A &&
nvk_use_nak(info),
/* VK_EXT_shader_module_identifier */
.shaderModuleIdentifier = true,
/* VK_EXT_shader_object */
.shaderObject = true,
/* VK_KHR_shader_subgroup_uniform_control_flow */
.shaderSubgroupUniformControlFlow = nvk_use_nak(info),
/* VK_EXT_texel_buffer_alignment */
.texelBufferAlignment = true,
/* VK_EXT_transform_feedback */
.transformFeedback = true,
.geometryStreams = true,
/* VK_EXT_vertex_input_dynamic_state */
.vertexInputDynamicState = true,
/* VK_EXT_ycbcr_2plane_444_formats */
.ycbcr2plane444Formats = true,
/* VK_EXT_ycbcr_image_arrays */
.ycbcrImageArrays = true,
/* VK_NV_shader_sm_builtins */
.shaderSMBuiltins = true,
};
}
uint32_t
nvk_min_cbuf_alignment(const struct nv_device_info *info)
{
return info->cls_eng3d >= TURING_A ? 64 : 256;
}
static void
nvk_get_device_properties(const struct nvk_instance *instance,
const struct nv_device_info *info,
bool conformant,
struct vk_properties *properties)
{
const VkSampleCountFlagBits sample_counts = VK_SAMPLE_COUNT_1_BIT |
VK_SAMPLE_COUNT_2_BIT |
VK_SAMPLE_COUNT_4_BIT |
VK_SAMPLE_COUNT_8_BIT;
uint64_t os_page_size = 4096;
os_get_page_size(&os_page_size);
*properties = (struct vk_properties) {
.apiVersion = nvk_get_vk_version(info),
.driverVersion = vk_get_driver_version(),
.vendorID = instance->force_vk_vendor != 0 ?
instance->force_vk_vendor : NVIDIA_VENDOR_ID,
.deviceID = info->device_id,
.deviceType = info->type == NV_DEVICE_TYPE_DIS ?
VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
/* Vulkan 1.0 limits */
.maxImageDimension1D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_1D),
.maxImageDimension2D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_2D),
.maxImageDimension3D = nvk_image_max_dimension(info, VK_IMAGE_TYPE_3D),
.maxImageDimensionCube = 0x8000,
.maxImageArrayLayers = 2048,
.maxTexelBufferElements = 128 * 1024 * 1024,
.maxUniformBufferRange = 65536,
.maxStorageBufferRange = UINT32_MAX,
.maxPushConstantsSize = NVK_MAX_PUSH_SIZE,
.maxMemoryAllocationCount = 4096,
.maxSamplerAllocationCount = 4000,
.bufferImageGranularity = info->cls_eng3d >= MAXWELL_B ? 0x400 : 0x10000,
.sparseAddressSpaceSize = NVK_SPARSE_ADDR_SPACE_SIZE,
.maxBoundDescriptorSets = NVK_MAX_SETS,
.maxPerStageDescriptorSamplers = NVK_MAX_DESCRIPTORS,
.maxPerStageDescriptorUniformBuffers = NVK_MAX_DESCRIPTORS,
.maxPerStageDescriptorStorageBuffers = NVK_MAX_DESCRIPTORS,
.maxPerStageDescriptorSampledImages = NVK_MAX_DESCRIPTORS,
.maxPerStageDescriptorStorageImages = NVK_MAX_DESCRIPTORS,
.maxPerStageDescriptorInputAttachments = NVK_MAX_DESCRIPTORS,
.maxPerStageResources = UINT32_MAX,
.maxDescriptorSetSamplers = NVK_MAX_DESCRIPTORS,
.maxDescriptorSetUniformBuffers = NVK_MAX_DESCRIPTORS,
.maxDescriptorSetUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
.maxDescriptorSetStorageBuffers = NVK_MAX_DESCRIPTORS,
.maxDescriptorSetStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
.maxDescriptorSetSampledImages = NVK_MAX_DESCRIPTORS,
.maxDescriptorSetStorageImages = NVK_MAX_DESCRIPTORS,
.maxDescriptorSetInputAttachments = NVK_MAX_DESCRIPTORS,
.maxVertexInputAttributes = 32,
.maxVertexInputBindings = 32,
.maxVertexInputAttributeOffset = 2047,
.maxVertexInputBindingStride = 2048,
.maxVertexOutputComponents = 128,
.maxTessellationGenerationLevel = 64,
.maxTessellationPatchSize = 32,
.maxTessellationControlPerVertexInputComponents = 128,
.maxTessellationControlPerVertexOutputComponents = 128,
.maxTessellationControlPerPatchOutputComponents = 120,
.maxTessellationControlTotalOutputComponents = 4216,
.maxTessellationEvaluationInputComponents = 128,
.maxTessellationEvaluationOutputComponents = 128,
.maxGeometryShaderInvocations = 32,
.maxGeometryInputComponents = 128,
.maxGeometryOutputComponents = 128,
.maxGeometryOutputVertices = 1024,
.maxGeometryTotalOutputComponents = 1024,
.maxFragmentInputComponents = 128,
.maxFragmentOutputAttachments = NVK_MAX_RTS,
.maxFragmentDualSrcAttachments = 1,
.maxFragmentCombinedOutputResources = 16,
.maxComputeSharedMemorySize = NVK_MAX_SHARED_SIZE,
.maxComputeWorkGroupCount = {0x7fffffff, 65535, 65535},
.maxComputeWorkGroupInvocations = 1024,
.maxComputeWorkGroupSize = {1024, 1024, 64},
.subPixelPrecisionBits = 8,
.subTexelPrecisionBits = 8,
.mipmapPrecisionBits = 8,
.maxDrawIndexedIndexValue = UINT32_MAX,
.maxDrawIndirectCount = UINT32_MAX,
.maxSamplerLodBias = 15,
.maxSamplerAnisotropy = 16,
.maxViewports = NVK_MAX_VIEWPORTS,
.maxViewportDimensions = { 32768, 32768 },
.viewportBoundsRange = { -65536, 65536 },
.viewportSubPixelBits = 8,
.minMemoryMapAlignment = os_page_size,
.minTexelBufferOffsetAlignment = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
.minUniformBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
.minStorageBufferOffsetAlignment = NVK_MIN_SSBO_ALIGNMENT,
.minTexelOffset = -8,
.maxTexelOffset = 7,
.minTexelGatherOffset = -32,
.maxTexelGatherOffset = 31,
.minInterpolationOffset = -0.5,
.maxInterpolationOffset = 0.4375,
.subPixelInterpolationOffsetBits = 4,
.maxFramebufferHeight = info->cls_eng3d >= PASCAL_A ? 0x8000 : 0x4000,
.maxFramebufferWidth = info->cls_eng3d >= PASCAL_A ? 0x8000 : 0x4000,
.maxFramebufferLayers = 2048,
.framebufferColorSampleCounts = sample_counts,
.framebufferDepthSampleCounts = sample_counts,
.framebufferNoAttachmentsSampleCounts = sample_counts,
.framebufferStencilSampleCounts = sample_counts,
.maxColorAttachments = NVK_MAX_RTS,
.sampledImageColorSampleCounts = sample_counts,
.sampledImageIntegerSampleCounts = sample_counts,
.sampledImageDepthSampleCounts = sample_counts,
.sampledImageStencilSampleCounts = sample_counts,
.storageImageSampleCounts = sample_counts,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = true,
.timestampPeriod = 1,
.maxClipDistances = 8,
.maxCullDistances = 8,
.maxCombinedClipAndCullDistances = 8,
.discreteQueuePriorities = 2,
.pointSizeRange = { 1.0, 2047.94 },
.lineWidthRange = { 1, 64 },
.pointSizeGranularity = 0.0625,
.lineWidthGranularity = 0.0625,
.strictLines = true,
.standardSampleLocations = true,
.optimalBufferCopyOffsetAlignment = 1,
.optimalBufferCopyRowPitchAlignment = 1,
.nonCoherentAtomSize = 64,
/* Vulkan 1.0 sparse properties */
.sparseResidencyNonResidentStrict = true,
.sparseResidencyAlignedMipSize = true,
.sparseResidencyStandard2DBlockShape = true,
.sparseResidencyStandard2DMultisampleBlockShape = true,
.sparseResidencyStandard3DBlockShape = true,
/* Vulkan 1.1 properties */
.subgroupSize = 32,
.subgroupSupportedStages = nvk_nak_stages(info),
.subgroupSupportedOperations = VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
VK_SUBGROUP_FEATURE_BALLOT_BIT |
VK_SUBGROUP_FEATURE_BASIC_BIT |
VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
VK_SUBGROUP_FEATURE_QUAD_BIT |
VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR |
VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
VK_SUBGROUP_FEATURE_VOTE_BIT,
.subgroupQuadOperationsInAllStages = false,
.pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY,
.maxMultiviewViewCount = NVK_MAX_MULTIVIEW_VIEW_COUNT,
.maxMultiviewInstanceIndex = UINT32_MAX,
.maxPerSetDescriptors = UINT32_MAX,
.maxMemoryAllocationSize = (1u << 31),
/* Vulkan 1.2 properties */
.supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
VK_RESOLVE_MODE_AVERAGE_BIT |
VK_RESOLVE_MODE_MIN_BIT |
VK_RESOLVE_MODE_MAX_BIT,
.supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
VK_RESOLVE_MODE_MIN_BIT |
VK_RESOLVE_MODE_MAX_BIT,
.independentResolveNone = true,
.independentResolve = true,
.driverID = VK_DRIVER_ID_MESA_NVK,
.conformanceVersion =
conformant ? (VkConformanceVersion) { 1, 3, 7, 3 }
: (VkConformanceVersion) { 0, 0, 0, 0 },
.denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.shaderSignedZeroInfNanPreserveFloat16 = true,
.shaderSignedZeroInfNanPreserveFloat32 = true,
.shaderSignedZeroInfNanPreserveFloat64 = true,
.shaderDenormPreserveFloat16 = true,
.shaderDenormPreserveFloat32 = true,
.shaderDenormPreserveFloat64 = true,
.shaderDenormFlushToZeroFloat16 = false,
.shaderDenormFlushToZeroFloat32 = true,
.shaderDenormFlushToZeroFloat64 = false,
.shaderRoundingModeRTEFloat16 = true,
.shaderRoundingModeRTEFloat32 = true,
.shaderRoundingModeRTEFloat64 = true,
.shaderRoundingModeRTZFloat16 = false,
.shaderRoundingModeRTZFloat32 = true,
.shaderRoundingModeRTZFloat64 = true,
.maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX,
.shaderUniformBufferArrayNonUniformIndexingNative = false,
.shaderSampledImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
.shaderStorageBufferArrayNonUniformIndexingNative = true,
.shaderStorageImageArrayNonUniformIndexingNative = info->cls_eng3d >= TURING_A,
.shaderInputAttachmentArrayNonUniformIndexingNative = false,
.robustBufferAccessUpdateAfterBind = true,
.quadDivergentImplicitLod = info->cls_eng3d >= TURING_A,
.maxPerStageDescriptorUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
.maxPerStageDescriptorUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
.maxPerStageDescriptorUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
.maxPerStageDescriptorUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
.maxPerStageDescriptorUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
.maxPerStageDescriptorUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
.maxPerStageUpdateAfterBindResources = UINT32_MAX,
.maxDescriptorSetUpdateAfterBindSamplers = NVK_MAX_DESCRIPTORS,
.maxDescriptorSetUpdateAfterBindUniformBuffers = NVK_MAX_DESCRIPTORS,
.maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
.maxDescriptorSetUpdateAfterBindStorageBuffers = NVK_MAX_DESCRIPTORS,
.maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = NVK_MAX_DYNAMIC_BUFFERS / 2,
.maxDescriptorSetUpdateAfterBindSampledImages = NVK_MAX_DESCRIPTORS,
.maxDescriptorSetUpdateAfterBindStorageImages = NVK_MAX_DESCRIPTORS,
.maxDescriptorSetUpdateAfterBindInputAttachments = NVK_MAX_DESCRIPTORS,
.filterMinmaxSingleComponentFormats = true,
.filterMinmaxImageComponentMapping = true,
.maxTimelineSemaphoreValueDifference = UINT64_MAX,
.framebufferIntegerColorSampleCounts = sample_counts,
/* Vulkan 1.3 properties */
.minSubgroupSize = 32,
.maxSubgroupSize = 32,
.maxComputeWorkgroupSubgroups = 1024 / 32,
.requiredSubgroupSizeStages = 0,
.maxInlineUniformBlockSize = 1 << 16,
.maxPerStageDescriptorInlineUniformBlocks = 32,
.maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 32,
.maxDescriptorSetInlineUniformBlocks = 6 * 32,
.maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 6 * 32,
.maxInlineUniformTotalSize = 1 << 16,
.integerDotProduct4x8BitPackedUnsignedAccelerated
= info->cls_eng3d >= VOLTA_A,
.integerDotProduct4x8BitPackedSignedAccelerated
= info->cls_eng3d >= VOLTA_A,
.integerDotProduct4x8BitPackedMixedSignednessAccelerated
= info->cls_eng3d >= VOLTA_A,
.storageTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
.storageTexelBufferOffsetSingleTexelAlignment = true,
.uniformTexelBufferOffsetAlignmentBytes = NVK_MIN_TEXEL_BUFFER_ALIGNMENT,
.uniformTexelBufferOffsetSingleTexelAlignment = true,
.maxBufferSize = NVK_MAX_BUFFER_SIZE,
/* VK_KHR_push_descriptor */
.maxPushDescriptors = NVK_MAX_PUSH_DESCRIPTORS,
/* VK_EXT_custom_border_color */
.maxCustomBorderColorSamplers = 4000,
/* VK_EXT_extended_dynamic_state3 */
.dynamicPrimitiveTopologyUnrestricted = true,
/* VK_EXT_graphics_pipeline_library */
.graphicsPipelineLibraryFastLinking = true,
.graphicsPipelineLibraryIndependentInterpolationDecoration = true,
/* VK_KHR_line_rasterization */
.lineSubPixelPrecisionBits = 8,
/* VK_KHR_maintenance5 */
.earlyFragmentMultisampleCoverageAfterSampleCounting = true,
.earlyFragmentSampleMaskTestBeforeSampleCounting = true,
.depthStencilSwizzleOneSupport = true,
.polygonModePointSize = true,
.nonStrictSinglePixelWideLinesUseParallelogram = false,
.nonStrictWideLinesUseParallelogram = false,
/* VK_KHR_maintenance6 */
.blockTexelViewCompatibleMultipleLayers = true,
.maxCombinedImageSamplerDescriptorCount = 3,
.fragmentShadingRateClampCombinerInputs = false, /* TODO */
/* VK_EXT_map_memory_placed */
.minPlacedMemoryMapAlignment = os_page_size,
/* VK_EXT_multi_draw */
.maxMultiDrawCount = UINT32_MAX,
/* VK_EXT_nested_command_buffer */
.maxCommandBufferNestingLevel = UINT32_MAX,
/* VK_EXT_pci_bus_info */
.pciDomain = info->pci.domain,
.pciBus = info->pci.bus,
.pciDevice = info->pci.dev,
.pciFunction = info->pci.func,
/* VK_EXT_physical_device_drm gets populated later */
/* VK_EXT_provoking_vertex */
.provokingVertexModePerPipeline = true,
.transformFeedbackPreservesTriangleFanProvokingVertex = true,
/* VK_EXT_robustness2 */
.robustStorageBufferAccessSizeAlignment = NVK_SSBO_BOUNDS_CHECK_ALIGNMENT,
.robustUniformBufferAccessSizeAlignment = nvk_min_cbuf_alignment(info),
/* VK_EXT_sample_locations */
.sampleLocationSampleCounts = sample_counts,
.maxSampleLocationGridSize = (VkExtent2D){ 1, 1 },
.sampleLocationCoordinateRange[0] = 0.0f,
.sampleLocationCoordinateRange[1] = 0.9375f,
.sampleLocationSubPixelBits = 4,
.variableSampleLocations = true,
/* VK_EXT_shader_object */
.shaderBinaryVersion = 0,
/* VK_EXT_transform_feedback */
.maxTransformFeedbackStreams = 4,
.maxTransformFeedbackBuffers = 4,
.maxTransformFeedbackBufferSize = UINT32_MAX,
.maxTransformFeedbackStreamDataSize = 2048,
.maxTransformFeedbackBufferDataSize = 512,
.maxTransformFeedbackBufferDataStride = 2048,
.transformFeedbackQueries = true,
.transformFeedbackStreamsLinesTriangles = false,
.transformFeedbackRasterizationStreamSelect = true,
.transformFeedbackDraw = true,
/* VK_KHR_vertex_attribute_divisor */
.maxVertexAttribDivisor = UINT32_MAX,
.supportsNonZeroFirstInstance = true,
/* VK_KHR_fragment_shader_barycentric */
.triStripVertexOrderIndependentOfProvokingVertex = false,
/* VK_NV_shader_sm_builtins */
.shaderSMCount = (uint32_t)info->tpc_count * info->mp_per_tpc,
.shaderWarpsPerSM = info->max_warps_per_mp,
};
/* Add the driver to the device name (like other Mesa drivers do) */
if (!strcmp(info->device_name, info->chipset_name)) {
snprintf(properties->deviceName, sizeof(properties->deviceName),
"NVK %s", info->device_name);
} else {
snprintf(properties->deviceName, sizeof(properties->deviceName),
"%s (NVK %s)", info->device_name, info->chipset_name);
}
/* VK_EXT_shader_module_identifier */
STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
vk_shaderModuleIdentifierAlgorithmUUID,
sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
const struct {
uint16_t vendor_id;
uint16_t device_id;
uint8_t pad[12];
} dev_uuid = {
.vendor_id = NVIDIA_VENDOR_ID,
.device_id = info->device_id,
};
STATIC_ASSERT(sizeof(dev_uuid) == VK_UUID_SIZE);
memcpy(properties->deviceUUID, &dev_uuid, VK_UUID_SIZE);
STATIC_ASSERT(sizeof(instance->driver_build_sha) >= VK_UUID_SIZE);
memcpy(properties->driverUUID, instance->driver_build_sha, VK_UUID_SIZE);
snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "NVK");
snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
"Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
}
static void
nvk_physical_device_init_pipeline_cache(struct nvk_physical_device *pdev)
{
struct nvk_instance *instance = nvk_physical_device_instance(pdev);
struct mesa_sha1 sha_ctx;
_mesa_sha1_init(&sha_ctx);
_mesa_sha1_update(&sha_ctx, instance->driver_build_sha,
sizeof(instance->driver_build_sha));
const uint64_t compiler_flags = nvk_physical_device_compiler_flags(pdev);
_mesa_sha1_update(&sha_ctx, &compiler_flags, sizeof(compiler_flags));
unsigned char sha[SHA1_DIGEST_LENGTH];
_mesa_sha1_final(&sha_ctx, sha);
STATIC_ASSERT(SHA1_DIGEST_LENGTH >= VK_UUID_SIZE);
memcpy(pdev->vk.properties.pipelineCacheUUID, sha, VK_UUID_SIZE);
memcpy(pdev->vk.properties.shaderBinaryUUID, sha, VK_UUID_SIZE);
#ifdef ENABLE_SHADER_CACHE
char renderer[10];
ASSERTED int len = snprintf(renderer, sizeof(renderer), "nvk_%04x",
pdev->info.chipset);
assert(len == sizeof(renderer) - 2);
char timestamp[41];
_mesa_sha1_format(timestamp, instance->driver_build_sha);
const uint64_t driver_flags = nvk_physical_device_compiler_flags(pdev);
pdev->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
#endif
}
static void
nvk_physical_device_free_disk_cache(struct nvk_physical_device *pdev)
{
#ifdef ENABLE_SHADER_CACHE
if (pdev->vk.disk_cache) {
disk_cache_destroy(pdev->vk.disk_cache);
pdev->vk.disk_cache = NULL;
}
#else
assert(pdev->vk.disk_cache == NULL);
#endif
}
static uint64_t
nvk_get_sysmem_heap_size(void)
{
uint64_t sysmem_size_B = 0;
if (!os_get_total_physical_memory(&sysmem_size_B))
return 0;
/* Use 3/4 of total size to avoid swapping */
return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
}
static uint64_t
nvk_get_sysmem_heap_available(struct nvk_physical_device *pdev)
{
uint64_t sysmem_size_B = 0;
if (!os_get_available_system_memory(&sysmem_size_B)) {
vk_loge(VK_LOG_OBJS(pdev), "Failed to query available system memory");
return 0;
}
/* Use 3/4 of available to avoid swapping */
return ROUND_DOWN_TO(sysmem_size_B * 3 / 4, 1 << 20);
}
static uint64_t
nvk_get_vram_heap_available(struct nvk_physical_device *pdev)
{
const uint64_t used = nouveau_ws_device_vram_used(pdev->ws_dev);
if (used > pdev->info.vram_size_B)
return 0;
return pdev->info.vram_size_B - used;
}
VkResult
nvk_create_drm_physical_device(struct vk_instance *_instance,
drmDevicePtr drm_device,
struct vk_physical_device **pdev_out)
{
struct nvk_instance *instance = (struct nvk_instance *)_instance;
VkResult result;
int master_fd = -1;
if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)))
return VK_ERROR_INCOMPATIBLE_DRIVER;
switch (drm_device->bustype) {
case DRM_BUS_PCI:
if (drm_device->deviceinfo.pci->vendor_id != NVIDIA_VENDOR_ID)
return VK_ERROR_INCOMPATIBLE_DRIVER;
break;
case DRM_BUS_PLATFORM: {
const char *compat_prefix = "nvidia,";
bool found = false;
for (int i = 0; drm_device->deviceinfo.platform->compatible[i] != NULL; i++) {
if (strncmp(drm_device->deviceinfo.platform->compatible[0], compat_prefix, strlen(compat_prefix)) == 0) {
found = true;
break;
}
}
if (!found)
return VK_ERROR_INCOMPATIBLE_DRIVER;
break;
}
default:
return VK_ERROR_INCOMPATIBLE_DRIVER;
}
struct nouveau_ws_device *ws_dev = nouveau_ws_device_new(drm_device);
if (!ws_dev)
return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
const struct nv_device_info info = ws_dev->info;
const struct vk_sync_type syncobj_sync_type =
vk_drm_syncobj_get_type(ws_dev->fd);
/* We don't support anything pre-Kepler */
if (info.cls_eng3d < KEPLER_A) {
result = VK_ERROR_INCOMPATIBLE_DRIVER;
goto fail_ws_dev;
}
bool conformant =
info.type == NV_DEVICE_TYPE_DIS &&
info.cls_eng3d >= TURING_A && info.cls_eng3d <= ADA_A;
if (!conformant &&
!debug_get_bool_option("NVK_I_WANT_A_BROKEN_VULKAN_DRIVER", false)) {
#ifdef NDEBUG
result = VK_ERROR_INCOMPATIBLE_DRIVER;
#else
result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
"WARNING: NVK is not well-tested on %s, pass "
"NVK_I_WANT_A_BROKEN_VULKAN_DRIVER=1 "
"if you know what you're doing.",
info.device_name);
#endif
goto fail_ws_dev;
}
if (!ws_dev->has_vm_bind) {
result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
"NVK Requires a Linux kernel version 6.6 or later");
goto fail_ws_dev;
}
struct stat st;
if (stat(drm_device->nodes[DRM_NODE_RENDER], &st)) {
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"fstat() failed on %s: %m",
drm_device->nodes[DRM_NODE_RENDER]);
goto fail_ws_dev;
}
const dev_t render_dev = st.st_rdev;
if (!conformant)
vk_warn_non_conformant_implementation("NVK");
struct nvk_physical_device *pdev =
vk_zalloc(&instance->vk.alloc, sizeof(*pdev),
8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (pdev == NULL) {
result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_ws_dev;
}
struct vk_physical_device_dispatch_table dispatch_table;
vk_physical_device_dispatch_table_from_entrypoints(
&dispatch_table, &nvk_physical_device_entrypoints, true);
vk_physical_device_dispatch_table_from_entrypoints(
&dispatch_table, &wsi_physical_device_entrypoints, false);
struct vk_device_extension_table supported_extensions;
nvk_get_device_extensions(instance, &info, &supported_extensions);
struct vk_features supported_features;
nvk_get_device_features(&info, &supported_extensions, &supported_features);
struct vk_properties properties;
nvk_get_device_properties(instance, &info, conformant, &properties);
properties.drmHasRender = true;
properties.drmRenderMajor = major(render_dev);
properties.drmRenderMinor = minor(render_dev);
/* DRM primary is optional */
if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
!stat(drm_device->nodes[DRM_NODE_PRIMARY], &st)) {
assert(st.st_rdev != 0);
properties.drmHasPrimary = true;
properties.drmPrimaryMajor = major(st.st_rdev);
properties.drmPrimaryMinor = minor(st.st_rdev);
/* TODO: Test if the FD is usable? */
if (instance->vk.enabled_extensions.KHR_display)
master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
}
result = vk_physical_device_init(&pdev->vk, &instance->vk,
&supported_extensions,
&supported_features,
&properties,
&dispatch_table);
if (result != VK_SUCCESS)
goto fail_master_fd;
pdev->info = info;
pdev->debug_flags = ws_dev->debug_flags;
pdev->render_dev = render_dev;
pdev->master_fd = master_fd;
pdev->ws_dev = ws_dev;
pdev->nak = nak_compiler_create(&pdev->info);
if (pdev->nak == NULL) {
result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
goto fail_init;
}
nvk_physical_device_init_pipeline_cache(pdev);
uint64_t sysmem_size_B = nvk_get_sysmem_heap_size();
if (sysmem_size_B == 0) {
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"Failed to query total system memory");
goto fail_disk_cache;
}
if (pdev->info.vram_size_B > 0) {
uint32_t vram_heap_idx = pdev->mem_heap_count++;
uint32_t bar_heap_idx = vram_heap_idx;
pdev->mem_heaps[vram_heap_idx] = (struct nvk_memory_heap) {
.size = pdev->info.vram_size_B,
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
};
if (pdev->info.bar_size_B > 0 &&
pdev->info.bar_size_B < pdev->info.vram_size_B) {
bar_heap_idx = pdev->mem_heap_count++;
pdev->mem_heaps[bar_heap_idx] = (struct nvk_memory_heap) {
.size = pdev->info.bar_size_B,
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
};
}
/* Only set available if we have the ioctl. */
if (nouveau_ws_device_vram_used(ws_dev) > 0)
pdev->mem_heaps[vram_heap_idx].available = nvk_get_vram_heap_available;
pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
.heapIndex = vram_heap_idx,
};
if (pdev->info.cls_eng3d >= MAXWELL_A) {
pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
.heapIndex = bar_heap_idx,
};
}
}
uint32_t sysmem_heap_idx = pdev->mem_heap_count++;
pdev->mem_heaps[sysmem_heap_idx] = (struct nvk_memory_heap) {
.size = sysmem_size_B,
/* If we don't have any VRAM (iGPU), claim sysmem as DEVICE_LOCAL */
.flags = pdev->info.vram_size_B == 0
? VK_MEMORY_HEAP_DEVICE_LOCAL_BIT
: 0,
.available = nvk_get_sysmem_heap_available,
};
pdev->mem_types[pdev->mem_type_count++] = (VkMemoryType) {
/* TODO: What's the right thing to do here on Tegra? */
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
.heapIndex = sysmem_heap_idx,
};
assert(pdev->mem_heap_count <= ARRAY_SIZE(pdev->mem_heaps));
assert(pdev->mem_type_count <= ARRAY_SIZE(pdev->mem_types));
pdev->queue_families[pdev->queue_family_count++] = (struct nvk_queue_family) {
.queue_flags = VK_QUEUE_GRAPHICS_BIT |
VK_QUEUE_COMPUTE_BIT |
VK_QUEUE_TRANSFER_BIT |
VK_QUEUE_SPARSE_BINDING_BIT,
.queue_count = 1,
};
assert(pdev->queue_family_count <= ARRAY_SIZE(pdev->queue_families));
unsigned st_idx = 0;
pdev->syncobj_sync_type = syncobj_sync_type;
pdev->sync_types[st_idx++] = &pdev->syncobj_sync_type;
pdev->sync_types[st_idx++] = NULL;
assert(st_idx <= ARRAY_SIZE(pdev->sync_types));
pdev->vk.supported_sync_types = pdev->sync_types;
result = nvk_init_wsi(pdev);
if (result != VK_SUCCESS)
goto fail_disk_cache;
*pdev_out = &pdev->vk;
return VK_SUCCESS;
fail_disk_cache:
nvk_physical_device_free_disk_cache(pdev);
nak_compiler_destroy(pdev->nak);
fail_init:
vk_physical_device_finish(&pdev->vk);
fail_master_fd:
if (master_fd >= 0)
close(master_fd);
vk_free(&instance->vk.alloc, pdev);
fail_ws_dev:
nouveau_ws_device_destroy(ws_dev);
return result;
}
void
nvk_physical_device_destroy(struct vk_physical_device *vk_pdev)
{
struct nvk_physical_device *pdev =
container_of(vk_pdev, struct nvk_physical_device, vk);
nvk_finish_wsi(pdev);
nvk_physical_device_free_disk_cache(pdev);
nak_compiler_destroy(pdev->nak);
if (pdev->master_fd >= 0)
close(pdev->master_fd);
nouveau_ws_device_destroy(pdev->ws_dev);
vk_physical_device_finish(&pdev->vk);
vk_free(&pdev->vk.instance->alloc, pdev);
}
VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMemoryProperties2(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
{
VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
pMemoryProperties->memoryProperties.memoryHeapCount = pdev->mem_heap_count;
for (int i = 0; i < pdev->mem_heap_count; i++) {
pMemoryProperties->memoryProperties.memoryHeaps[i] = (VkMemoryHeap) {
.size = pdev->mem_heaps[i].size,
.flags = pdev->mem_heaps[i].flags,
};
}
pMemoryProperties->memoryProperties.memoryTypeCount = pdev->mem_type_count;
for (int i = 0; i < pdev->mem_type_count; i++) {
pMemoryProperties->memoryProperties.memoryTypes[i] = pdev->mem_types[i];
}
vk_foreach_struct(ext, pMemoryProperties->pNext)
{
switch (ext->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
VkPhysicalDeviceMemoryBudgetPropertiesEXT *p = (void *)ext;
for (unsigned i = 0; i < pdev->mem_heap_count; i++) {
const struct nvk_memory_heap *heap = &pdev->mem_heaps[i];
uint64_t used = p_atomic_read(&heap->used);
/* From the Vulkan 1.3.278 spec:
*
* "heapUsage is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
* values in which memory usages are returned, with one element
* for each memory heap. A heaps usage is an estimate of how
* much memory the process is currently using in that heap."
*
* TODO: Include internal allocations?
*/
p->heapUsage[i] = used;
uint64_t available = heap->size;
if (heap->available)
available = heap->available(pdev);
/* From the Vulkan 1.3.278 spec:
*
* "heapBudget is an array of VK_MAX_MEMORY_HEAPS VkDeviceSize
* values in which memory budgets are returned, with one
* element for each memory heap. A heaps budget is a rough
* estimate of how much memory the process can allocate from
* that heap before allocations may fail or cause performance
* degradation. The budget includes any currently allocated
* device memory."
*
* and
*
* "The heapBudget value must be less than or equal to
* VkMemoryHeap::size for each heap."
*
* available (queried above) is the total amount free memory
* system-wide and does not include our allocations so we need
* to add that in.
*/
uint64_t budget = MIN2(available + used, heap->size);
/* Set the budget at 90% of available to avoid thrashing */
p->heapBudget[i] = ROUND_DOWN_TO(budget * 9 / 10, 1 << 20);
}
/* From the Vulkan 1.3.278 spec:
*
* "The heapBudget and heapUsage values must be zero for array
* elements greater than or equal to
* VkPhysicalDeviceMemoryProperties::memoryHeapCount. The
* heapBudget value must be non-zero for array elements less than
* VkPhysicalDeviceMemoryProperties::memoryHeapCount."
*/
for (unsigned i = pdev->mem_heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
p->heapBudget[i] = 0u;
p->heapUsage[i] = 0u;
}
break;
}
default:
nvk_debug_ignored_stype(ext->sType);
break;
}
}
}
VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceQueueFamilyProperties2(
VkPhysicalDevice physicalDevice,
uint32_t *pQueueFamilyPropertyCount,
VkQueueFamilyProperties2 *pQueueFamilyProperties)
{
VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out, pQueueFamilyProperties,
pQueueFamilyPropertyCount);
for (uint8_t i = 0; i < pdev->queue_family_count; i++) {
const struct nvk_queue_family *queue_family = &pdev->queue_families[i];
vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
p->queueFamilyProperties.queueFlags = queue_family->queue_flags;
p->queueFamilyProperties.queueCount = queue_family->queue_count;
p->queueFamilyProperties.timestampValidBits = 64;
p->queueFamilyProperties.minImageTransferGranularity =
(VkExtent3D){1, 1, 1};
}
}
}
static const VkTimeDomainKHR nvk_time_domains[] = {
VK_TIME_DOMAIN_DEVICE_KHR,
VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
#ifdef CLOCK_MONOTONIC_RAW
VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR,
#endif
};
VKAPI_ATTR VkResult VKAPI_CALL
nvk_GetPhysicalDeviceCalibrateableTimeDomainsKHR(
VkPhysicalDevice physicalDevice,
uint32_t *pTimeDomainCount,
VkTimeDomainKHR *pTimeDomains)
{
VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount);
for (int d = 0; d < ARRAY_SIZE(nvk_time_domains); d++) {
vk_outarray_append_typed(VkTimeDomainKHR, &out, i) {
*i = nvk_time_domains[d];
}
}
return vk_outarray_status(&out);
}
VKAPI_ATTR void VKAPI_CALL
nvk_GetPhysicalDeviceMultisamplePropertiesEXT(
VkPhysicalDevice physicalDevice,
VkSampleCountFlagBits samples,
VkMultisamplePropertiesEXT *pMultisampleProperties)
{
VK_FROM_HANDLE(nvk_physical_device, pdev, physicalDevice);
if (samples & pdev->vk.properties.sampleLocationSampleCounts) {
pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){1, 1};
} else {
pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
}
}