mesa/src/broadcom/vulkan/v3dv_device.c

3145 lines
110 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright © 2019 Raspberry Pi Ltd
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <fcntl.h>
#include <stdbool.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/sysinfo.h>
#include <unistd.h>
#include <xf86drm.h>
#ifdef MAJOR_IN_MKDEV
#include <sys/mkdev.h>
#endif
#ifdef MAJOR_IN_SYSMACROS
#include <sys/sysmacros.h>
#endif
#include "v3dv_private.h"
#include "common/v3d_debug.h"
#include "compiler/v3d_compiler.h"
#include "drm-uapi/v3d_drm.h"
#include "vk_drm_syncobj.h"
#include "vk_util.h"
#include "git_sha1.h"
#include "util/build_id.h"
#include "util/u_debug.h"
#include "util/format/u_format.h"
#ifdef VK_USE_PLATFORM_XCB_KHR
#include <xcb/xcb.h>
#include <xcb/dri3.h>
#include <X11/Xlib-xcb.h>
#endif
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
#include <wayland-client.h>
#include "wayland-drm-client-protocol.h"
#endif
#ifndef ANDROID
# define V3DV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
#else
/* Android CDD require additional extensions for API v1.1+ */
# define V3DV_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
#endif
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateInstanceVersion(uint32_t *pApiVersion)
{
*pApiVersion = V3DV_API_VERSION;
return VK_SUCCESS;
}
#if defined(VK_USE_PLATFORM_WIN32_KHR) || \
defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
defined(VK_USE_PLATFORM_XCB_KHR) || \
defined(VK_USE_PLATFORM_XLIB_KHR) || \
defined(VK_USE_PLATFORM_DISPLAY_KHR)
#define V3DV_USE_WSI_PLATFORM
#endif
static const struct vk_instance_extension_table instance_extensions = {
.KHR_device_group_creation = true,
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
.KHR_display = true,
.KHR_get_display_properties2 = true,
.EXT_direct_mode_display = true,
.EXT_acquire_drm_display = true,
#endif
.KHR_external_fence_capabilities = true,
.KHR_external_memory_capabilities = true,
.KHR_external_semaphore_capabilities = true,
.KHR_get_physical_device_properties2 = true,
#ifdef V3DV_USE_WSI_PLATFORM
.KHR_get_surface_capabilities2 = true,
.KHR_surface = true,
.KHR_surface_protected_capabilities = true,
#endif
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
.KHR_wayland_surface = true,
#endif
#ifdef VK_USE_PLATFORM_XCB_KHR
.KHR_xcb_surface = true,
#endif
#ifdef VK_USE_PLATFORM_XLIB_KHR
.KHR_xlib_surface = true,
#endif
#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
.EXT_acquire_xlib_display = true,
#endif
.EXT_debug_report = true,
.EXT_debug_utils = true,
};
static void
get_device_extensions(const struct v3dv_physical_device *device,
struct vk_device_extension_table *ext)
{
*ext = (struct vk_device_extension_table) {
.KHR_8bit_storage = true,
.KHR_16bit_storage = true,
.KHR_bind_memory2 = true,
.KHR_buffer_device_address = true,
.KHR_copy_commands2 = true,
.KHR_create_renderpass2 = true,
.KHR_dedicated_allocation = true,
.KHR_device_group = true,
.KHR_driver_properties = true,
.KHR_descriptor_update_template = true,
.KHR_depth_stencil_resolve = true,
.KHR_external_fence = true,
.KHR_external_fence_fd = true,
.KHR_external_memory = true,
.KHR_external_memory_fd = true,
.KHR_external_semaphore = true,
.KHR_external_semaphore_fd = true,
.KHR_format_feature_flags2 = true,
.KHR_get_memory_requirements2 = true,
.KHR_image_format_list = true,
.KHR_imageless_framebuffer = true,
.KHR_performance_query = device->caps.perfmon,
.KHR_relaxed_block_layout = true,
.KHR_maintenance1 = true,
.KHR_maintenance2 = true,
.KHR_maintenance3 = true,
.KHR_maintenance4 = true,
.KHR_multiview = true,
.KHR_pipeline_executable_properties = true,
.KHR_separate_depth_stencil_layouts = true,
.KHR_shader_float_controls = true,
.KHR_shader_non_semantic_info = true,
.KHR_sampler_mirror_clamp_to_edge = true,
#ifndef ANDROID
.KHR_sampler_ycbcr_conversion = true,
#endif
.KHR_spirv_1_4 = true,
.KHR_storage_buffer_storage_class = true,
.KHR_timeline_semaphore = true,
.KHR_uniform_buffer_standard_layout = true,
.KHR_shader_integer_dot_product = true,
.KHR_synchronization2 = true,
.KHR_workgroup_memory_explicit_layout = true,
#ifdef V3DV_USE_WSI_PLATFORM
.KHR_swapchain = true,
.KHR_swapchain_mutable_format = true,
.KHR_incremental_present = true,
#endif
.KHR_variable_pointers = true,
.KHR_vulkan_memory_model = true,
.KHR_zero_initialize_workgroup_memory = true,
.EXT_4444_formats = true,
.EXT_attachment_feedback_loop_layout = true,
.EXT_border_color_swizzle = true,
.EXT_color_write_enable = true,
.EXT_custom_border_color = true,
.EXT_depth_clip_control = true,
.EXT_load_store_op_none = true,
.EXT_inline_uniform_block = true,
.EXT_external_memory_dma_buf = true,
.EXT_host_query_reset = true,
.EXT_image_drm_format_modifier = true,
.EXT_image_robustness = true,
.EXT_index_type_uint8 = true,
.EXT_line_rasterization = true,
.EXT_memory_budget = true,
.EXT_physical_device_drm = true,
.EXT_pipeline_creation_cache_control = true,
.EXT_pipeline_creation_feedback = true,
.EXT_pipeline_robustness = true,
.EXT_primitive_topology_list_restart = true,
.EXT_private_data = true,
.EXT_provoking_vertex = true,
.EXT_separate_stencil_usage = true,
.EXT_shader_module_identifier = true,
.EXT_texel_buffer_alignment = true,
.EXT_tooling_info = true,
.EXT_vertex_attribute_divisor = true,
#ifdef ANDROID
.ANDROID_native_buffer = true,
#endif
};
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateInstanceExtensionProperties(const char *pLayerName,
uint32_t *pPropertyCount,
VkExtensionProperties *pProperties)
{
/* We don't support any layers */
if (pLayerName)
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
return vk_enumerate_instance_extension_properties(
&instance_extensions, pPropertyCount, pProperties);
}
static VkResult enumerate_devices(struct vk_instance *vk_instance);
static void destroy_physical_device(struct vk_physical_device *device);
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkInstance *pInstance)
{
struct v3dv_instance *instance;
VkResult result;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
if (pAllocator == NULL)
pAllocator = vk_default_allocator();
instance = vk_alloc(pAllocator, sizeof(*instance), 8,
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!instance)
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
struct vk_instance_dispatch_table dispatch_table;
vk_instance_dispatch_table_from_entrypoints(
&dispatch_table, &v3dv_instance_entrypoints, true);
vk_instance_dispatch_table_from_entrypoints(
&dispatch_table, &wsi_instance_entrypoints, false);
result = vk_instance_init(&instance->vk,
&instance_extensions,
&dispatch_table,
pCreateInfo, pAllocator);
if (result != VK_SUCCESS) {
vk_free(pAllocator, instance);
return vk_error(NULL, result);
}
v3d_process_debug_variable();
instance->vk.physical_devices.enumerate = enumerate_devices;
instance->vk.physical_devices.destroy = destroy_physical_device;
/* We start with the default values for the pipeline_cache envvars */
instance->pipeline_cache_enabled = true;
instance->default_pipeline_cache_enabled = true;
const char *pipeline_cache_str = getenv("V3DV_ENABLE_PIPELINE_CACHE");
if (pipeline_cache_str != NULL) {
if (strncmp(pipeline_cache_str, "full", 4) == 0) {
/* nothing to do, just to filter correct values */
} else if (strncmp(pipeline_cache_str, "no-default-cache", 16) == 0) {
instance->default_pipeline_cache_enabled = false;
} else if (strncmp(pipeline_cache_str, "off", 3) == 0) {
instance->pipeline_cache_enabled = false;
instance->default_pipeline_cache_enabled = false;
} else {
fprintf(stderr, "Wrong value for envvar V3DV_ENABLE_PIPELINE_CACHE. "
"Allowed values are: full, no-default-cache, off\n");
}
}
if (instance->pipeline_cache_enabled == false) {
fprintf(stderr, "WARNING: v3dv pipeline cache is disabled. Performance "
"can be affected negatively\n");
} else {
if (instance->default_pipeline_cache_enabled == false) {
fprintf(stderr, "WARNING: default v3dv pipeline cache is disabled. "
"Performance can be affected negatively\n");
}
}
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
*pInstance = v3dv_instance_to_handle(instance);
return VK_SUCCESS;
}
static void
v3dv_physical_device_free_disk_cache(struct v3dv_physical_device *device)
{
#ifdef ENABLE_SHADER_CACHE
if (device->disk_cache)
disk_cache_destroy(device->disk_cache);
#else
assert(device->disk_cache == NULL);
#endif
}
static void
physical_device_finish(struct v3dv_physical_device *device)
{
v3dv_wsi_finish(device);
v3dv_physical_device_free_disk_cache(device);
v3d_compiler_free(device->compiler);
util_sparse_array_finish(&device->bo_map);
close(device->render_fd);
if (device->display_fd >= 0)
close(device->display_fd);
if (device->master_fd >= 0)
close(device->master_fd);
free(device->name);
#if using_v3d_simulator
v3d_simulator_destroy(device->sim_file);
#endif
vk_physical_device_finish(&device->vk);
mtx_destroy(&device->mutex);
}
static void
destroy_physical_device(struct vk_physical_device *device)
{
physical_device_finish((struct v3dv_physical_device *)device);
vk_free(&device->instance->alloc, device);
}
VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyInstance(VkInstance _instance,
const VkAllocationCallbacks *pAllocator)
{
V3DV_FROM_HANDLE(v3dv_instance, instance, _instance);
if (!instance)
return;
VG(VALGRIND_DESTROY_MEMPOOL(instance));
vk_instance_finish(&instance->vk);
vk_free(&instance->vk.alloc, instance);
}
static uint64_t
compute_heap_size()
{
#if !using_v3d_simulator
/* Query the total ram from the system */
struct sysinfo info;
sysinfo(&info);
uint64_t total_ram = (uint64_t)info.totalram * (uint64_t)info.mem_unit;
#else
uint64_t total_ram = (uint64_t) v3d_simulator_get_mem_size();
#endif
/* We don't want to burn too much ram with the GPU. If the user has 4GB
* or less, we use at most half. If they have more than 4GB we limit it
* to 3/4 with a max. of 4GB since the GPU cannot address more than that.
*/
const uint64_t MAX_HEAP_SIZE = 4ull * 1024ull * 1024ull * 1024ull;
uint64_t available;
if (total_ram <= MAX_HEAP_SIZE)
available = total_ram / 2;
else
available = MIN2(MAX_HEAP_SIZE, total_ram * 3 / 4);
return available;
}
static uint64_t
compute_memory_budget(struct v3dv_physical_device *device)
{
uint64_t heap_size = device->memory.memoryHeaps[0].size;
uint64_t heap_used = device->heap_used;
uint64_t sys_available;
#if !using_v3d_simulator
ASSERTED bool has_available_memory =
os_get_available_system_memory(&sys_available);
assert(has_available_memory);
#else
sys_available = (uint64_t) v3d_simulator_get_mem_free();
#endif
/* Let's not incite the app to starve the system: report at most 90% of
* available system memory.
*/
uint64_t heap_available = sys_available * 9 / 10;
return MIN2(heap_size, heap_used + heap_available);
}
#if !using_v3d_simulator
#ifdef VK_USE_PLATFORM_XCB_KHR
static int
create_display_fd_xcb(VkIcdSurfaceBase *surface)
{
int fd = -1;
xcb_connection_t *conn;
xcb_dri3_open_reply_t *reply = NULL;
if (surface) {
if (surface->platform == VK_ICD_WSI_PLATFORM_XLIB)
conn = XGetXCBConnection(((VkIcdSurfaceXlib *)surface)->dpy);
else
conn = ((VkIcdSurfaceXcb *)surface)->connection;
} else {
conn = xcb_connect(NULL, NULL);
}
if (xcb_connection_has_error(conn))
goto finish;
const xcb_setup_t *setup = xcb_get_setup(conn);
xcb_screen_iterator_t iter = xcb_setup_roots_iterator(setup);
xcb_screen_t *screen = iter.data;
xcb_dri3_open_cookie_t cookie;
cookie = xcb_dri3_open(conn, screen->root, None);
reply = xcb_dri3_open_reply(conn, cookie, NULL);
if (!reply)
goto finish;
if (reply->nfd != 1)
goto finish;
fd = xcb_dri3_open_reply_fds(conn, reply)[0];
fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
finish:
if (!surface)
xcb_disconnect(conn);
if (reply)
free(reply);
return fd;
}
#endif
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
struct v3dv_wayland_info {
struct wl_drm *wl_drm;
int fd;
bool is_set;
bool authenticated;
};
static void
v3dv_drm_handle_device(void *data, struct wl_drm *drm, const char *device)
{
struct v3dv_wayland_info *info = data;
info->fd = open(device, O_RDWR | O_CLOEXEC);
info->is_set = info->fd != -1;
if (!info->is_set) {
fprintf(stderr, "v3dv_drm_handle_device: could not open %s (%s)\n",
device, strerror(errno));
return;
}
drm_magic_t magic;
if (drmGetMagic(info->fd, &magic)) {
fprintf(stderr, "v3dv_drm_handle_device: drmGetMagic failed\n");
close(info->fd);
info->fd = -1;
info->is_set = false;
return;
}
wl_drm_authenticate(info->wl_drm, magic);
}
static void
v3dv_drm_handle_format(void *data, struct wl_drm *drm, uint32_t format)
{
}
static void
v3dv_drm_handle_authenticated(void *data, struct wl_drm *drm)
{
struct v3dv_wayland_info *info = data;
info->authenticated = true;
}
static void
v3dv_drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t value)
{
}
struct wl_drm_listener v3dv_drm_listener = {
.device = v3dv_drm_handle_device,
.format = v3dv_drm_handle_format,
.authenticated = v3dv_drm_handle_authenticated,
.capabilities = v3dv_drm_handle_capabilities
};
static void
v3dv_registry_global(void *data,
struct wl_registry *registry,
uint32_t name,
const char *interface,
uint32_t version)
{
struct v3dv_wayland_info *info = data;
if (strcmp(interface, wl_drm_interface.name) == 0) {
info->wl_drm = wl_registry_bind(registry, name, &wl_drm_interface,
MIN2(version, 2));
wl_drm_add_listener(info->wl_drm, &v3dv_drm_listener, data);
};
}
static void
v3dv_registry_global_remove_cb(void *data,
struct wl_registry *registry,
uint32_t name)
{
}
static int
create_display_fd_wayland(VkIcdSurfaceBase *surface)
{
struct wl_display *display;
struct wl_registry *registry = NULL;
struct v3dv_wayland_info info = {
.wl_drm = NULL,
.fd = -1,
.is_set = false,
.authenticated = false
};
if (surface)
display = ((VkIcdSurfaceWayland *) surface)->display;
else
display = wl_display_connect(NULL);
if (!display)
return -1;
registry = wl_display_get_registry(display);
if (!registry) {
if (!surface)
wl_display_disconnect(display);
return -1;
}
static const struct wl_registry_listener registry_listener = {
v3dv_registry_global,
v3dv_registry_global_remove_cb
};
wl_registry_add_listener(registry, &registry_listener, &info);
wl_display_roundtrip(display); /* For the registry advertisement */
wl_display_roundtrip(display); /* For the DRM device event */
wl_display_roundtrip(display); /* For the authentication event */
wl_drm_destroy(info.wl_drm);
wl_registry_destroy(registry);
if (!surface)
wl_display_disconnect(display);
if (!info.is_set)
return -1;
if (!info.authenticated)
return -1;
return info.fd;
}
#endif
/* Acquire an authenticated display fd without a surface reference. This is the
* case where the application is making WSI allocations outside the Vulkan
* swapchain context (only Zink, for now). Since we lack information about the
* underlying surface we just try our best to figure out the correct display
* and platform to use. It should work in most cases.
*/
static void
acquire_display_device_no_surface(struct v3dv_physical_device *pdevice)
{
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
pdevice->display_fd = create_display_fd_wayland(NULL);
#endif
#ifdef VK_USE_PLATFORM_XCB_KHR
if (pdevice->display_fd == -1)
pdevice->display_fd = create_display_fd_xcb(NULL);
#endif
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
if (pdevice->display_fd == - 1 && pdevice->master_fd >= 0)
pdevice->display_fd = dup(pdevice->master_fd);
#endif
}
/* Acquire an authenticated display fd from the surface. This is the regular
* case where the application is using swapchains to create WSI allocations.
* In this case we use the surface information to figure out the correct
* display and platform combination.
*/
static void
acquire_display_device_surface(struct v3dv_physical_device *pdevice,
VkIcdSurfaceBase *surface)
{
/* Mesa will set both of VK_USE_PLATFORM_{XCB,XLIB} when building with
* platform X11, so only check for XCB and rely on XCB to get an
* authenticated device also for Xlib.
*/
#ifdef VK_USE_PLATFORM_XCB_KHR
if (surface->platform == VK_ICD_WSI_PLATFORM_XCB ||
surface->platform == VK_ICD_WSI_PLATFORM_XLIB) {
pdevice->display_fd = create_display_fd_xcb(surface);
}
#endif
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
if (surface->platform == VK_ICD_WSI_PLATFORM_WAYLAND)
pdevice->display_fd = create_display_fd_wayland(surface);
#endif
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
if (surface->platform == VK_ICD_WSI_PLATFORM_DISPLAY &&
pdevice->master_fd >= 0) {
pdevice->display_fd = dup(pdevice->master_fd);
}
#endif
}
#endif /* !using_v3d_simulator */
/* Attempts to get an authenticated display fd from the display server that
* we can use to allocate BOs for presentable images.
*/
VkResult
v3dv_physical_device_acquire_display(struct v3dv_physical_device *pdevice,
VkIcdSurfaceBase *surface)
{
VkResult result = VK_SUCCESS;
mtx_lock(&pdevice->mutex);
if (pdevice->display_fd != -1)
goto done;
/* When running on the simulator we do everything on a single render node so
* we don't need to get an authenticated display fd from the display server.
*/
#if !using_v3d_simulator
if (surface)
acquire_display_device_surface(pdevice, surface);
else
acquire_display_device_no_surface(pdevice);
if (pdevice->display_fd == -1)
result = VK_ERROR_INITIALIZATION_FAILED;
#endif
done:
mtx_unlock(&pdevice->mutex);
return result;
}
static bool
v3d_has_feature(struct v3dv_physical_device *device, enum drm_v3d_param feature)
{
struct drm_v3d_get_param p = {
.param = feature,
};
if (v3dv_ioctl(device->render_fd, DRM_IOCTL_V3D_GET_PARAM, &p) != 0)
return false;
return p.value;
}
static bool
device_has_expected_features(struct v3dv_physical_device *device)
{
return v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_TFU) &&
v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_CSD) &&
v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH);
}
static VkResult
init_uuids(struct v3dv_physical_device *device)
{
const struct build_id_note *note =
build_id_find_nhdr_for_addr(init_uuids);
if (!note) {
return vk_errorf(device->vk.instance,
VK_ERROR_INITIALIZATION_FAILED,
"Failed to find build-id");
}
unsigned build_id_len = build_id_length(note);
if (build_id_len < 20) {
return vk_errorf(device->vk.instance,
VK_ERROR_INITIALIZATION_FAILED,
"build-id too short. It needs to be a SHA");
}
memcpy(device->driver_build_sha1, build_id_data(note), 20);
uint32_t vendor_id = v3dv_physical_device_vendor_id(device);
uint32_t device_id = v3dv_physical_device_device_id(device);
struct mesa_sha1 sha1_ctx;
uint8_t sha1[20];
STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
/* The pipeline cache UUID is used for determining when a pipeline cache is
* invalid. It needs both a driver build and the PCI ID of the device.
*/
_mesa_sha1_init(&sha1_ctx);
_mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
_mesa_sha1_update(&sha1_ctx, &device_id, sizeof(device_id));
_mesa_sha1_final(&sha1_ctx, sha1);
memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
/* The driver UUID is used for determining sharability of images and memory
* between two Vulkan instances in separate processes. People who want to
* share memory need to also check the device UUID (below) so all this
* needs to be is the build-id.
*/
memcpy(device->driver_uuid, build_id_data(note), VK_UUID_SIZE);
/* The device UUID uniquely identifies the given device within the machine.
* Since we never have more than one device, this doesn't need to be a real
* UUID.
*/
_mesa_sha1_init(&sha1_ctx);
_mesa_sha1_update(&sha1_ctx, &vendor_id, sizeof(vendor_id));
_mesa_sha1_update(&sha1_ctx, &device_id, sizeof(device_id));
_mesa_sha1_final(&sha1_ctx, sha1);
memcpy(device->device_uuid, sha1, VK_UUID_SIZE);
return VK_SUCCESS;
}
static void
v3dv_physical_device_init_disk_cache(struct v3dv_physical_device *device)
{
#ifdef ENABLE_SHADER_CACHE
char timestamp[41];
_mesa_sha1_format(timestamp, device->driver_build_sha1);
assert(device->name);
device->disk_cache = disk_cache_create(device->name, timestamp, v3d_mesa_debug);
#else
device->disk_cache = NULL;
#endif
}
static VkResult
create_physical_device(struct v3dv_instance *instance,
drmDevicePtr drm_render_device,
drmDevicePtr drm_primary_device)
{
VkResult result = VK_SUCCESS;
int32_t master_fd = -1;
int32_t render_fd = -1;
struct v3dv_physical_device *device =
vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
if (!device)
return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
struct vk_physical_device_dispatch_table dispatch_table;
vk_physical_device_dispatch_table_from_entrypoints
(&dispatch_table, &v3dv_physical_device_entrypoints, true);
vk_physical_device_dispatch_table_from_entrypoints(
&dispatch_table, &wsi_physical_device_entrypoints, false);
result = vk_physical_device_init(&device->vk, &instance->vk, NULL,
&dispatch_table);
if (result != VK_SUCCESS)
goto fail;
assert(drm_render_device);
const char *path = drm_render_device->nodes[DRM_NODE_RENDER];
render_fd = open(path, O_RDWR | O_CLOEXEC);
if (render_fd < 0) {
fprintf(stderr, "Opening %s failed: %s\n", path, strerror(errno));
result = VK_ERROR_INITIALIZATION_FAILED;
goto fail;
}
/* If we are running on VK_KHR_display we need to acquire the master
* display device now for the v3dv_wsi_init() call below. For anything else
* we postpone that until a swapchain is created.
*/
const char *primary_path;
#if !using_v3d_simulator
if (drm_primary_device)
primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY];
else
primary_path = NULL;
#else
primary_path = drm_render_device->nodes[DRM_NODE_PRIMARY];
#endif
struct stat primary_stat = {0}, render_stat = {0};
device->has_primary = primary_path;
if (device->has_primary) {
if (stat(primary_path, &primary_stat) != 0) {
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"failed to stat DRM primary node %s",
primary_path);
goto fail;
}
device->primary_devid = primary_stat.st_rdev;
}
if (fstat(render_fd, &render_stat) != 0) {
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"failed to stat DRM render node %s",
path);
goto fail;
}
device->has_render = true;
device->render_devid = render_stat.st_rdev;
#if using_v3d_simulator
device->device_id = drm_render_device->deviceinfo.pci->device_id;
#endif
if (instance->vk.enabled_extensions.KHR_display ||
instance->vk.enabled_extensions.EXT_acquire_drm_display) {
#if !using_v3d_simulator
/* Open the primary node on the vc4 display device */
assert(drm_primary_device);
master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
#else
/* There is only one device with primary and render nodes.
* Open its primary node.
*/
master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
#endif
}
#if using_v3d_simulator
device->sim_file = v3d_simulator_init(render_fd);
#endif
device->render_fd = render_fd; /* The v3d render node */
device->display_fd = -1; /* Authenticated vc4 primary node */
device->master_fd = master_fd; /* Master vc4 primary node */
if (!v3d_get_device_info(device->render_fd, &device->devinfo, &v3dv_ioctl)) {
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"Failed to get info from device.");
goto fail;
}
if (device->devinfo.ver < 42) {
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"Device version < 42.");
goto fail;
}
if (!device_has_expected_features(device)) {
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
"Kernel driver doesn't have required features.");
goto fail;
}
device->caps.multisync =
v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT);
device->caps.perfmon =
v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_PERFMON);
result = init_uuids(device);
if (result != VK_SUCCESS)
goto fail;
device->compiler = v3d_compiler_init(&device->devinfo,
MAX_INLINE_UNIFORM_BUFFERS);
device->next_program_id = 0;
ASSERTED int len =
asprintf(&device->name, "V3D %d.%d",
device->devinfo.ver / 10, device->devinfo.ver % 10);
assert(len != -1);
v3dv_physical_device_init_disk_cache(device);
/* Setup available memory heaps and types */
VkPhysicalDeviceMemoryProperties *mem = &device->memory;
mem->memoryHeapCount = 1;
mem->memoryHeaps[0].size = compute_heap_size();
mem->memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
/* This is the only combination required by the spec */
mem->memoryTypeCount = 1;
mem->memoryTypes[0].propertyFlags =
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
mem->memoryTypes[0].heapIndex = 0;
/* Initialize sparse array for refcounting imported BOs */
util_sparse_array_init(&device->bo_map, sizeof(struct v3dv_bo), 512);
device->options.merge_jobs = !V3D_DBG(NO_MERGE_JOBS);
device->drm_syncobj_type = vk_drm_syncobj_get_type(device->render_fd);
/* We don't support timelines in the uAPI yet and we don't want it getting
* suddenly turned on by vk_drm_syncobj_get_type() without us adding v3dv
* code for it first.
*/
device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
#if using_v3d_simulator
/* There are CTS tests which do the following:
*
* 1. Create a command buffer with a vkCmdWaitEvents()
* 2. Submit the command buffer
* 3. vkGetSemaphoreFdKHR() to try to get a sync_file
* 4. vkSetEvent()
*
* This deadlocks in the simulator because we have to wait for the syncobj
* to get a real fence in vkGetSemaphoreFdKHR(). This will never happen
* though because the simulator, unlike real hardware, executes ioctls
* synchronously in the same thread, which means that it will try to
* execute the wait for event immediately and never get to emit the
* signaling job that comes after the compute job that implements the wait
* in the command buffer, which would be responsible for creating the fence
* for the signaling semaphore.
*
* This behavior was seemingly allowed in previous Vulkan versions, however,
* this was fixed in Vulkan the 1.3.228 spec. From commit 355367640f2e:
*
* "Clarify that vkCmdWaitEvents must not execute before a vkSetEvent it
* waits on (internal issue 2971)"
*
* Either way, we disable sync file support in the simulator for now, until
* the CTS is fixed.
*/
device->drm_syncobj_type.import_sync_file = NULL;
device->drm_syncobj_type.export_sync_file = NULL;
#endif
/* Multiwait is required for emulated timeline semaphores and is supported
* by the v3d kernel interface.
*/
device->drm_syncobj_type.features |= VK_SYNC_FEATURE_GPU_MULTI_WAIT;
device->sync_timeline_type =
vk_sync_timeline_get_type(&device->drm_syncobj_type);
device->sync_types[0] = &device->drm_syncobj_type;
device->sync_types[1] = &device->sync_timeline_type.sync;
device->sync_types[2] = NULL;
device->vk.supported_sync_types = device->sync_types;
result = v3dv_wsi_init(device);
if (result != VK_SUCCESS) {
vk_error(instance, result);
goto fail;
}
get_device_extensions(device, &device->vk.supported_extensions);
mtx_init(&device->mutex, mtx_plain);
list_addtail(&device->vk.link, &instance->vk.physical_devices.list);
return VK_SUCCESS;
fail:
vk_physical_device_finish(&device->vk);
vk_free(&instance->vk.alloc, device);
if (render_fd >= 0)
close(render_fd);
if (master_fd >= 0)
close(master_fd);
return result;
}
/* This driver hook is expected to return VK_SUCCESS (unless a memory
* allocation error happened) if no compatible device is found. If a
* compatible device is found, it may return an error code if device
* inialization failed.
*/
static VkResult
enumerate_devices(struct vk_instance *vk_instance)
{
struct v3dv_instance *instance =
container_of(vk_instance, struct v3dv_instance, vk);
/* FIXME: Check for more devices? */
drmDevicePtr devices[8];
int max_devices;
max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
if (max_devices < 1)
return VK_SUCCESS;
VkResult result = VK_SUCCESS;
#if !using_v3d_simulator
int32_t v3d_idx = -1;
int32_t vc4_idx = -1;
#endif
for (unsigned i = 0; i < (unsigned)max_devices; i++) {
#if using_v3d_simulator
/* In the simulator, we look for an Intel/AMD render node */
const int required_nodes = (1 << DRM_NODE_RENDER) | (1 << DRM_NODE_PRIMARY);
if ((devices[i]->available_nodes & required_nodes) == required_nodes &&
devices[i]->bustype == DRM_BUS_PCI &&
(devices[i]->deviceinfo.pci->vendor_id == 0x8086 ||
devices[i]->deviceinfo.pci->vendor_id == 0x1002)) {
result = create_physical_device(instance, devices[i], NULL);
if (result == VK_SUCCESS)
break;
}
#else
/* On actual hardware, we should have a render node (v3d)
* and a primary node (vc4). We will need to use the primary
* to allocate WSI buffers and share them with the render node
* via prime, but that is a privileged operation so we need the
* primary node to be authenticated, and for that we need the
* display server to provide the device fd (with DRI3), so we
* here we only check that the device is present but we don't
* try to open it.
*/
if (devices[i]->bustype != DRM_BUS_PLATFORM)
continue;
if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER) {
char **compat = devices[i]->deviceinfo.platform->compatible;
while (*compat) {
if (strncmp(*compat, "brcm,2711-v3d", 13) == 0) {
v3d_idx = i;
break;
}
compat++;
}
} else if (devices[i]->available_nodes & 1 << DRM_NODE_PRIMARY) {
char **compat = devices[i]->deviceinfo.platform->compatible;
while (*compat) {
if (strncmp(*compat, "brcm,bcm2711-vc5", 16) == 0 ||
strncmp(*compat, "brcm,bcm2835-vc4", 16) == 0 ) {
vc4_idx = i;
break;
}
compat++;
}
}
#endif
}
#if !using_v3d_simulator
if (v3d_idx != -1 && vc4_idx != -1) {
result =
create_physical_device(instance, devices[v3d_idx], devices[vc4_idx]);
}
#endif
drmFreeDevices(devices, max_devices);
return result;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures2 *pFeatures)
{
V3DV_FROM_HANDLE(v3dv_physical_device, physical_device, physicalDevice);
struct vk_features features = {
/* Vulkan 1.0 */
.robustBufferAccess = true, /* This feature is mandatory */
.fullDrawIndexUint32 = false, /* Only available since V3D 4.4.9.1 */
.imageCubeArray = true,
.independentBlend = true,
.geometryShader = true,
.tessellationShader = false,
.sampleRateShading = true,
.dualSrcBlend = false,
.logicOp = true,
.multiDrawIndirect = false,
.drawIndirectFirstInstance = true,
.depthClamp = false, /* Only available since V3D 4.5.1.1 */
.depthBiasClamp = true,
.fillModeNonSolid = true,
.depthBounds = false, /* Only available since V3D 4.3.16.2 */
.wideLines = true,
.largePoints = true,
.alphaToOne = true,
.multiViewport = false,
.samplerAnisotropy = true,
.textureCompressionETC2 = true,
.textureCompressionASTC_LDR = true,
/* Note that textureCompressionBC requires that the driver support all
* the BC formats. V3D 4.2 only support the BC1-3, so we can't claim
* that we support it.
*/
.textureCompressionBC = false,
.occlusionQueryPrecise = true,
.pipelineStatisticsQuery = false,
.vertexPipelineStoresAndAtomics = true,
.fragmentStoresAndAtomics = true,
.shaderTessellationAndGeometryPointSize = true,
.shaderImageGatherExtended = false,
.shaderStorageImageExtendedFormats = true,
.shaderStorageImageMultisample = false,
.shaderStorageImageReadWithoutFormat = true,
.shaderStorageImageWriteWithoutFormat = false,
.shaderUniformBufferArrayDynamicIndexing = false,
.shaderSampledImageArrayDynamicIndexing = false,
.shaderStorageBufferArrayDynamicIndexing = false,
.shaderStorageImageArrayDynamicIndexing = false,
.shaderClipDistance = true,
.shaderCullDistance = false,
.shaderFloat64 = false,
.shaderInt64 = false,
.shaderInt16 = false,
.shaderResourceResidency = false,
.shaderResourceMinLod = false,
.sparseBinding = false,
.sparseResidencyBuffer = false,
.sparseResidencyImage2D = false,
.sparseResidencyImage3D = false,
.sparseResidency2Samples = false,
.sparseResidency4Samples = false,
.sparseResidency8Samples = false,
.sparseResidency16Samples = false,
.sparseResidencyAliased = false,
.variableMultisampleRate = false,
.inheritedQueries = true,
/* Vulkan 1.1 */
.storageBuffer16BitAccess = true,
.uniformAndStorageBuffer16BitAccess = true,
.storagePushConstant16 = true,
.storageInputOutput16 = false,
.multiview = true,
.multiviewGeometryShader = false,
.multiviewTessellationShader = false,
.variablePointersStorageBuffer = true,
/* FIXME: this needs support for non-constant index on UBO/SSBO */
.variablePointers = false,
.protectedMemory = false,
#ifdef ANDROID
.samplerYcbcrConversion = false,
#else
.samplerYcbcrConversion = true,
#endif
.shaderDrawParameters = false,
/* Vulkan 1.2 */
.hostQueryReset = true,
.uniformAndStorageBuffer8BitAccess = true,
.uniformBufferStandardLayout = true,
/* V3D 4.2 wraps TMU vector accesses to 16-byte boundaries, so loads and
* stores of vectors that cross these boundaries would not work correcly
* with scalarBlockLayout and would need to be split into smaller vectors
* (and/or scalars) that don't cross these boundaries. For load/stores
* with dynamic offsets where we can't identify if the offset is
* problematic, we would always have to scalarize. Overall, this would
* not lead to best performance so let's just not support it.
*/
.scalarBlockLayout = false,
/* This tells applications 2 things:
*
* 1. If they can select just one aspect for barriers. For us barriers
* decide if we need to split a job and we don't care if it is only
* for one of the aspects of the image or both, so we don't really
* benefit from seeing barriers that select just one aspect.
*
* 2. If they can program different layouts for each aspect. We
* generally don't care about layouts, so again, we don't get any
* benefits from this to limit the scope of image layout transitions.
*
* Still, Vulkan 1.2 requires this feature to be supported so we
* advertise it even though we don't really take advantage of it.
*/
.separateDepthStencilLayouts = true,
.storageBuffer8BitAccess = true,
.storagePushConstant8 = true,
.imagelessFramebuffer = true,
.timelineSemaphore = true,
.samplerMirrorClampToEdge = true,
/* These are mandatory by Vulkan 1.2, however, we don't support any of
* the optional features affected by them (non 32-bit types for
* shaderSubgroupExtendedTypes and additional subgroup ballot for
* subgroupBroadcastDynamicId), so in practice setting them to true
* doesn't have any implications for us until we implement any of these
* optional features.
*/
.shaderSubgroupExtendedTypes = true,
.subgroupBroadcastDynamicId = true,
.vulkanMemoryModel = true,
.vulkanMemoryModelDeviceScope = true,
.vulkanMemoryModelAvailabilityVisibilityChains = true,
.bufferDeviceAddress = true,
.bufferDeviceAddressCaptureReplay = false,
.bufferDeviceAddressMultiDevice = false,
/* Vulkan 1.3 */
.inlineUniformBlock = true,
/* Inline buffers work like push constants, so after their are bound
* some of their contents may be copied into the uniform stream as soon
* as the next draw/dispatch is recorded in the command buffer. This means
* that if the client updates the buffer contents after binding it to
* a command buffer, the next queue submit of that command buffer may
* not use the latest update to the buffer contents, but the data that
* was present in the buffer at the time it was bound to the command
* buffer.
*/
.descriptorBindingInlineUniformBlockUpdateAfterBind = false,
.pipelineCreationCacheControl = true,
.privateData = true,
.maintenance4 = true,
.shaderZeroInitializeWorkgroupMemory = true,
.synchronization2 = true,
.robustImageAccess = true,
.shaderIntegerDotProduct = true,
/* VK_EXT_4444_formats */
.formatA4R4G4B4 = true,
.formatA4B4G4R4 = true,
/* VK_EXT_custom_border_color */
.customBorderColors = true,
.customBorderColorWithoutFormat = false,
/* VK_EXT_index_type_uint8 */
.indexTypeUint8 = true,
/* VK_EXT_line_rasterization */
.rectangularLines = true,
.bresenhamLines = true,
.smoothLines = false,
.stippledRectangularLines = false,
.stippledBresenhamLines = false,
.stippledSmoothLines = false,
/* VK_EXT_color_write_enable */
.colorWriteEnable = true,
/* VK_KHR_pipeline_executable_properties */
.pipelineExecutableInfo = true,
/* VK_EXT_provoking_vertex */
.provokingVertexLast = true,
/* FIXME: update when supporting EXT_transform_feedback */
.transformFeedbackPreservesProvokingVertex = false,
/* VK_EXT_vertex_attribute_divisor */
.vertexAttributeInstanceRateDivisor = true,
.vertexAttributeInstanceRateZeroDivisor = false,
/* VK_KHR_performance_query */
.performanceCounterQueryPools = physical_device->caps.perfmon,
.performanceCounterMultipleQueryPools = false,
/* VK_EXT_texel_buffer_alignment */
.texelBufferAlignment = true,
/* VK_KHR_workgroup_memory_explicit_layout */
.workgroupMemoryExplicitLayout = true,
.workgroupMemoryExplicitLayoutScalarBlockLayout = false,
.workgroupMemoryExplicitLayout8BitAccess = true,
.workgroupMemoryExplicitLayout16BitAccess = true,
/* VK_EXT_border_color_swizzle */
.borderColorSwizzle = true,
.borderColorSwizzleFromImage = true,
/* VK_EXT_shader_module_identifier */
.shaderModuleIdentifier = true,
/* VK_EXT_depth_clip_control */
.depthClipControl = true,
/* VK_EXT_attachment_feedback_loop_layout */
.attachmentFeedbackLoopLayout = true,
/* VK_EXT_primitive_topology_list_restart */
.primitiveTopologyListRestart = true,
/* FIXME: we don't support tessellation shaders yet */
.primitiveTopologyPatchListRestart = false,
/* VK_EXT_pipeline_robustness */
.pipelineRobustness = true,
};
vk_get_physical_device_features(pFeatures, &features);
}
uint32_t
v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev)
{
return 0x14E4; /* Broadcom */
}
uint32_t
v3dv_physical_device_device_id(struct v3dv_physical_device *dev)
{
#if using_v3d_simulator
return dev->device_id;
#else
switch (dev->devinfo.ver) {
case 42:
return 0xBE485FD3; /* Broadcom deviceID for 2711 */
default:
unreachable("Unsupported V3D version");
}
#endif
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties *pProperties)
{
V3DV_FROM_HANDLE(v3dv_physical_device, pdevice, physicalDevice);
STATIC_ASSERT(MAX_SAMPLED_IMAGES + MAX_STORAGE_IMAGES + MAX_INPUT_ATTACHMENTS
<= V3D_MAX_TEXTURE_SAMPLERS);
STATIC_ASSERT(MAX_UNIFORM_BUFFERS >= MAX_DYNAMIC_UNIFORM_BUFFERS);
STATIC_ASSERT(MAX_STORAGE_BUFFERS >= MAX_DYNAMIC_STORAGE_BUFFERS);
const uint32_t page_size = 4096;
const uint64_t mem_size = compute_heap_size();
const uint32_t max_varying_components = 16 * 4;
const float v3d_point_line_granularity = 2.0f / (1 << V3D_COORD_SHIFT);
const uint32_t max_fb_size = V3D_MAX_IMAGE_DIMENSION;
const VkSampleCountFlags supported_sample_counts =
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
struct timespec clock_res;
clock_getres(CLOCK_MONOTONIC, &clock_res);
const float timestamp_period =
clock_res.tv_sec * 1000000000.0f + clock_res.tv_nsec;
/* FIXME: this will probably require an in-depth review */
VkPhysicalDeviceLimits limits = {
.maxImageDimension1D = V3D_MAX_IMAGE_DIMENSION,
.maxImageDimension2D = V3D_MAX_IMAGE_DIMENSION,
.maxImageDimension3D = V3D_MAX_IMAGE_DIMENSION,
.maxImageDimensionCube = V3D_MAX_IMAGE_DIMENSION,
.maxImageArrayLayers = V3D_MAX_ARRAY_LAYERS,
.maxTexelBufferElements = (1ul << 28),
.maxUniformBufferRange = V3D_MAX_BUFFER_RANGE,
.maxStorageBufferRange = V3D_MAX_BUFFER_RANGE,
.maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
.maxMemoryAllocationCount = mem_size / page_size,
.maxSamplerAllocationCount = 64 * 1024,
.bufferImageGranularity = V3D_NON_COHERENT_ATOM_SIZE,
.sparseAddressSpaceSize = 0,
.maxBoundDescriptorSets = MAX_SETS,
.maxPerStageDescriptorSamplers = V3D_MAX_TEXTURE_SAMPLERS,
.maxPerStageDescriptorUniformBuffers = MAX_UNIFORM_BUFFERS,
.maxPerStageDescriptorStorageBuffers = MAX_STORAGE_BUFFERS,
.maxPerStageDescriptorSampledImages = MAX_SAMPLED_IMAGES,
.maxPerStageDescriptorStorageImages = MAX_STORAGE_IMAGES,
.maxPerStageDescriptorInputAttachments = MAX_INPUT_ATTACHMENTS,
.maxPerStageResources = 128,
/* Some of these limits are multiplied by 6 because they need to
* include all possible shader stages (even if not supported). See
* 'Required Limits' table in the Vulkan spec.
*/
.maxDescriptorSetSamplers = 6 * V3D_MAX_TEXTURE_SAMPLERS,
.maxDescriptorSetUniformBuffers = 6 * MAX_UNIFORM_BUFFERS,
.maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
.maxDescriptorSetStorageBuffers = 6 * MAX_STORAGE_BUFFERS,
.maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
.maxDescriptorSetSampledImages = 6 * MAX_SAMPLED_IMAGES,
.maxDescriptorSetStorageImages = 6 * MAX_STORAGE_IMAGES,
.maxDescriptorSetInputAttachments = MAX_INPUT_ATTACHMENTS,
/* Vertex limits */
.maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
.maxVertexInputBindings = MAX_VBS,
.maxVertexInputAttributeOffset = 0xffffffff,
.maxVertexInputBindingStride = 0xffffffff,
.maxVertexOutputComponents = max_varying_components,
/* Tessellation limits */
.maxTessellationGenerationLevel = 0,
.maxTessellationPatchSize = 0,
.maxTessellationControlPerVertexInputComponents = 0,
.maxTessellationControlPerVertexOutputComponents = 0,
.maxTessellationControlPerPatchOutputComponents = 0,
.maxTessellationControlTotalOutputComponents = 0,
.maxTessellationEvaluationInputComponents = 0,
.maxTessellationEvaluationOutputComponents = 0,
/* Geometry limits */
.maxGeometryShaderInvocations = 32,
.maxGeometryInputComponents = 64,
.maxGeometryOutputComponents = 64,
.maxGeometryOutputVertices = 256,
.maxGeometryTotalOutputComponents = 1024,
/* Fragment limits */
.maxFragmentInputComponents = max_varying_components,
.maxFragmentOutputAttachments = 4,
.maxFragmentDualSrcAttachments = 0,
.maxFragmentCombinedOutputResources = MAX_RENDER_TARGETS +
MAX_STORAGE_BUFFERS +
MAX_STORAGE_IMAGES,
/* Compute limits */
.maxComputeSharedMemorySize = 16384,
.maxComputeWorkGroupCount = { 65535, 65535, 65535 },
.maxComputeWorkGroupInvocations = 256,
.maxComputeWorkGroupSize = { 256, 256, 256 },
.subPixelPrecisionBits = V3D_COORD_SHIFT,
.subTexelPrecisionBits = 8,
.mipmapPrecisionBits = 8,
.maxDrawIndexedIndexValue = 0x00ffffff,
.maxDrawIndirectCount = 0x7fffffff,
.maxSamplerLodBias = 14.0f,
.maxSamplerAnisotropy = 16.0f,
.maxViewports = MAX_VIEWPORTS,
.maxViewportDimensions = { max_fb_size, max_fb_size },
.viewportBoundsRange = { -2.0 * max_fb_size,
2.0 * max_fb_size - 1 },
.viewportSubPixelBits = 0,
.minMemoryMapAlignment = page_size,
.minTexelBufferOffsetAlignment = V3D_TMU_TEXEL_ALIGN,
.minUniformBufferOffsetAlignment = 32,
.minStorageBufferOffsetAlignment = 32,
.minTexelOffset = -8,
.maxTexelOffset = 7,
.minTexelGatherOffset = -8,
.maxTexelGatherOffset = 7,
.minInterpolationOffset = -0.5,
.maxInterpolationOffset = 0.5,
.subPixelInterpolationOffsetBits = V3D_COORD_SHIFT,
.maxFramebufferWidth = max_fb_size,
.maxFramebufferHeight = max_fb_size,
.maxFramebufferLayers = 256,
.framebufferColorSampleCounts = supported_sample_counts,
.framebufferDepthSampleCounts = supported_sample_counts,
.framebufferStencilSampleCounts = supported_sample_counts,
.framebufferNoAttachmentsSampleCounts = supported_sample_counts,
.maxColorAttachments = MAX_RENDER_TARGETS,
.sampledImageColorSampleCounts = supported_sample_counts,
.sampledImageIntegerSampleCounts = supported_sample_counts,
.sampledImageDepthSampleCounts = supported_sample_counts,
.sampledImageStencilSampleCounts = supported_sample_counts,
.storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
.maxSampleMaskWords = 1,
.timestampComputeAndGraphics = true,
.timestampPeriod = timestamp_period,
.maxClipDistances = 8,
.maxCullDistances = 0,
.maxCombinedClipAndCullDistances = 8,
.discreteQueuePriorities = 2,
.pointSizeRange = { v3d_point_line_granularity,
V3D_MAX_POINT_SIZE },
.lineWidthRange = { 1.0f, V3D_MAX_LINE_WIDTH },
.pointSizeGranularity = v3d_point_line_granularity,
.lineWidthGranularity = v3d_point_line_granularity,
.strictLines = true,
.standardSampleLocations = false,
.optimalBufferCopyOffsetAlignment = 32,
.optimalBufferCopyRowPitchAlignment = 32,
.nonCoherentAtomSize = V3D_NON_COHERENT_ATOM_SIZE,
};
*pProperties = (VkPhysicalDeviceProperties) {
.apiVersion = V3DV_API_VERSION,
.driverVersion = vk_get_driver_version(),
.vendorID = v3dv_physical_device_vendor_id(pdevice),
.deviceID = v3dv_physical_device_device_id(pdevice),
.deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
.limits = limits,
.sparseProperties = { 0 },
};
snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
"%s", pdevice->name);
memcpy(pProperties->pipelineCacheUUID,
pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties2 *pProperties)
{
V3DV_FROM_HANDLE(v3dv_physical_device, pdevice, physicalDevice);
v3dv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
/* We don't really have special restrictions for the maximum
* descriptors per set, other than maybe not exceeding the limits
* of addressable memory in a single allocation on either the host
* or the GPU. This will be a much larger limit than any of the
* per-stage limits already available in Vulkan though, so in practice,
* it is not expected to limit anything beyond what is already
* constrained through per-stage limits.
*/
const uint32_t max_host_descriptors =
(UINT32_MAX - sizeof(struct v3dv_descriptor_set)) /
sizeof(struct v3dv_descriptor);
const uint32_t max_gpu_descriptors =
(UINT32_MAX / v3dv_X(pdevice, max_descriptor_bo_size)());
VkPhysicalDeviceVulkan13Properties vk13 = {
.maxInlineUniformBlockSize = 4096,
.maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BUFFERS,
.maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BUFFERS,
.maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
MAX_INLINE_UNIFORM_BUFFERS,
.maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
MAX_INLINE_UNIFORM_BUFFERS,
.maxBufferSize = V3D_MAX_BUFFER_RANGE,
.storageTexelBufferOffsetAlignmentBytes = V3D_TMU_TEXEL_ALIGN,
.storageTexelBufferOffsetSingleTexelAlignment = false,
.uniformTexelBufferOffsetAlignmentBytes = V3D_TMU_TEXEL_ALIGN,
.uniformTexelBufferOffsetSingleTexelAlignment = false,
/* No native acceleration for integer dot product. We use NIR lowering. */
.integerDotProduct8BitUnsignedAccelerated = false,
.integerDotProduct8BitMixedSignednessAccelerated = false,
.integerDotProduct4x8BitPackedUnsignedAccelerated = false,
.integerDotProduct4x8BitPackedSignedAccelerated = false,
.integerDotProduct4x8BitPackedMixedSignednessAccelerated = false,
.integerDotProduct16BitUnsignedAccelerated = false,
.integerDotProduct16BitSignedAccelerated = false,
.integerDotProduct16BitMixedSignednessAccelerated = false,
.integerDotProduct32BitUnsignedAccelerated = false,
.integerDotProduct32BitSignedAccelerated = false,
.integerDotProduct32BitMixedSignednessAccelerated = false,
.integerDotProduct64BitUnsignedAccelerated = false,
.integerDotProduct64BitSignedAccelerated = false,
.integerDotProduct64BitMixedSignednessAccelerated = false,
.integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false,
.integerDotProductAccumulatingSaturating8BitSignedAccelerated = false,
.integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false,
.integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false,
.integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false,
.integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false,
.integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false,
.integerDotProductAccumulatingSaturating16BitSignedAccelerated = false,
.integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false,
.integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false,
.integerDotProductAccumulatingSaturating32BitSignedAccelerated = false,
.integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false,
.integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false,
.integerDotProductAccumulatingSaturating64BitSignedAccelerated = false,
.integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false,
};
VkPhysicalDeviceVulkan12Properties vk12 = {
.driverID = VK_DRIVER_ID_MESA_V3DV,
.conformanceVersion = {
.major = 1,
.minor = 2,
.subminor = 7,
.patch = 1,
},
.supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
.supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
/* FIXME: if we want to support independentResolveNone then we would
* need to honor attachment load operations on resolve attachments,
* which we currently ignore because the resolve makes them irrelevant,
* as it unconditionally writes all pixels in the render area. However,
* with independentResolveNone, it is possible to have one aspect of a
* D/S resolve attachment stay unresolved, in which case the attachment
* load operation is relevant.
*
* NOTE: implementing attachment load for resolve attachments isn't
* immediately trivial because these attachments are not part of the
* framebuffer and therefore we can't use the same mechanism we use
* for framebuffer attachments. Instead, we should probably have to
* emit a meta operation for that right at the start of the render
* pass (or subpass).
*/
.independentResolveNone = false,
.independentResolve = false,
.maxTimelineSemaphoreValueDifference = UINT64_MAX,
.denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
.shaderSignedZeroInfNanPreserveFloat16 = true,
.shaderSignedZeroInfNanPreserveFloat32 = true,
.shaderSignedZeroInfNanPreserveFloat64 = false,
.shaderDenormPreserveFloat16 = true,
.shaderDenormPreserveFloat32 = true,
.shaderDenormPreserveFloat64 = false,
.shaderDenormFlushToZeroFloat16 = false,
.shaderDenormFlushToZeroFloat32 = false,
.shaderDenormFlushToZeroFloat64 = false,
.shaderRoundingModeRTEFloat16 = true,
.shaderRoundingModeRTEFloat32 = true,
.shaderRoundingModeRTEFloat64 = false,
.shaderRoundingModeRTZFloat16 = false,
.shaderRoundingModeRTZFloat32 = false,
.shaderRoundingModeRTZFloat64 = false,
/* V3D doesn't support min/max filtering */
.filterMinmaxSingleComponentFormats = false,
.filterMinmaxImageComponentMapping = false,
.framebufferIntegerColorSampleCounts =
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT,
};
memset(vk12.driverName, 0, VK_MAX_DRIVER_NAME_SIZE);
snprintf(vk12.driverName, VK_MAX_DRIVER_NAME_SIZE, "V3DV Mesa");
memset(vk12.driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE);
snprintf(vk12.driverInfo, VK_MAX_DRIVER_INFO_SIZE,
"Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
VkPhysicalDeviceVulkan11Properties vk11 = {
.deviceLUIDValid = false,
.subgroupSize = V3D_CHANNELS,
.subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT,
.subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT,
.subgroupQuadOperationsInAllStages = false,
.pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
.maxMultiviewViewCount = MAX_MULTIVIEW_VIEW_COUNT,
.maxMultiviewInstanceIndex = UINT32_MAX - 1,
.protectedNoFault = false,
.maxPerSetDescriptors = MIN2(max_host_descriptors, max_gpu_descriptors),
/* Minimum required by the spec */
.maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE,
};
memcpy(vk11.deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
memcpy(vk11.driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
vk_foreach_struct(ext, pProperties->pNext) {
if (vk_get_physical_device_core_1_1_property_ext(ext, &vk11))
continue;
if (vk_get_physical_device_core_1_2_property_ext(ext, &vk12))
continue;
if (vk_get_physical_device_core_1_3_property_ext(ext, &vk13))
continue;
switch (ext->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
(VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
props->maxCustomBorderColorSamplers = V3D_MAX_TEXTURE_SAMPLERS;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
VkPhysicalDeviceProvokingVertexPropertiesEXT *props =
(VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
props->provokingVertexModePerPipeline = true;
/* FIXME: update when supporting EXT_transform_feedback */
props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
(VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
props->maxVertexAttribDivisor = 0xffff;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR : {
VkPhysicalDevicePerformanceQueryPropertiesKHR *props =
(VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
props->allowCommandBufferQueryCopies = true;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
VkPhysicalDeviceDrmPropertiesEXT *props =
(VkPhysicalDeviceDrmPropertiesEXT *)ext;
props->hasPrimary = pdevice->has_primary;
if (props->hasPrimary) {
props->primaryMajor = (int64_t) major(pdevice->primary_devid);
props->primaryMinor = (int64_t) minor(pdevice->primary_devid);
}
props->hasRender = pdevice->has_render;
if (props->hasRender) {
props->renderMajor = (int64_t) major(pdevice->render_devid);
props->renderMinor = (int64_t) minor(pdevice->render_devid);
}
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
(VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
props->lineSubPixelPrecisionBits = V3D_COORD_SHIFT;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT:
/* Do nothing, not even logging. This is a non-PCI device, so we will
* never provide this extension.
*/
break;
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: {
VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *props =
(VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext;
STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
sizeof(props->shaderModuleIdentifierAlgorithmUUID));
memcpy(props->shaderModuleIdentifierAlgorithmUUID,
vk_shaderModuleIdentifierAlgorithmUUID,
sizeof(props->shaderModuleIdentifierAlgorithmUUID));
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_ROBUSTNESS_PROPERTIES_EXT: {
VkPhysicalDevicePipelineRobustnessPropertiesEXT *props =
(VkPhysicalDevicePipelineRobustnessPropertiesEXT *)ext;
props->defaultRobustnessStorageBuffers =
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT;
props->defaultRobustnessUniformBuffers =
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT;
props->defaultRobustnessVertexInputs =
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT;
props->defaultRobustnessImages =
VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT;
break;
}
default:
v3dv_debug_ignored_stype(ext->sType);
break;
}
}
}
/* We support exactly one queue family. */
static const VkQueueFamilyProperties
v3dv_queue_family_properties = {
.queueFlags = VK_QUEUE_GRAPHICS_BIT |
VK_QUEUE_COMPUTE_BIT |
VK_QUEUE_TRANSFER_BIT,
.queueCount = 1,
.timestampValidBits = 64,
.minImageTransferGranularity = { 1, 1, 1 },
};
VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,
uint32_t *pQueueFamilyPropertyCount,
VkQueueFamilyProperties2 *pQueueFamilyProperties)
{
VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
pQueueFamilyProperties, pQueueFamilyPropertyCount);
vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
p->queueFamilyProperties = v3dv_queue_family_properties;
vk_foreach_struct(s, p->pNext) {
v3dv_debug_ignored_stype(s->sType);
}
}
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceMemoryProperties *pMemoryProperties)
{
V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
*pMemoryProperties = device->memory;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
{
V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
v3dv_GetPhysicalDeviceMemoryProperties(physicalDevice,
&pMemoryProperties->memoryProperties);
vk_foreach_struct(ext, pMemoryProperties->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
VkPhysicalDeviceMemoryBudgetPropertiesEXT *p =
(VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
p->heapUsage[0] = device->heap_used;
p->heapBudget[0] = compute_memory_budget(device);
/* The heapBudget and heapUsage values must be zero for array elements
* greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
*/
for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
p->heapBudget[i] = 0u;
p->heapUsage[i] = 0u;
}
break;
}
default:
v3dv_debug_ignored_stype(ext->sType);
break;
}
}
}
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
v3dv_GetInstanceProcAddr(VkInstance _instance,
const char *pName)
{
V3DV_FROM_HANDLE(v3dv_instance, instance, _instance);
return vk_instance_get_proc_addr(&instance->vk,
&v3dv_instance_entrypoints,
pName);
}
/* With version 1+ of the loader interface the ICD should expose
* vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
*/
PUBLIC
VKAPI_ATTR PFN_vkVoidFunction
VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance instance,
const char *pName);
PUBLIC
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,
const char* pName)
{
return v3dv_GetInstanceProcAddr(instance, pName);
}
/* With version 4+ of the loader interface the ICD should expose
* vk_icdGetPhysicalDeviceProcAddr()
*/
PUBLIC
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,
const char* pName);
PFN_vkVoidFunction
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,
const char* pName)
{
V3DV_FROM_HANDLE(v3dv_instance, instance, _instance);
return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
VkLayerProperties *pProperties)
{
if (pProperties == NULL) {
*pPropertyCount = 0;
return VK_SUCCESS;
}
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,
uint32_t *pPropertyCount,
VkLayerProperties *pProperties)
{
V3DV_FROM_HANDLE(v3dv_physical_device, physical_device, physicalDevice);
if (pProperties == NULL) {
*pPropertyCount = 0;
return VK_SUCCESS;
}
return vk_error(physical_device, VK_ERROR_LAYER_NOT_PRESENT);
}
static void
destroy_queue_syncs(struct v3dv_queue *queue)
{
for (int i = 0; i < V3DV_QUEUE_COUNT; i++) {
if (queue->last_job_syncs.syncs[i]) {
drmSyncobjDestroy(queue->device->pdevice->render_fd,
queue->last_job_syncs.syncs[i]);
}
}
}
static VkResult
queue_init(struct v3dv_device *device, struct v3dv_queue *queue,
const VkDeviceQueueCreateInfo *create_info,
uint32_t index_in_family)
{
VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info,
index_in_family);
if (result != VK_SUCCESS)
return result;
result = vk_queue_enable_submit_thread(&queue->vk);
if (result != VK_SUCCESS)
goto fail_submit_thread;
queue->device = device;
queue->vk.driver_submit = v3dv_queue_driver_submit;
for (int i = 0; i < V3DV_QUEUE_COUNT; i++) {
queue->last_job_syncs.first[i] = true;
int ret = drmSyncobjCreate(device->pdevice->render_fd,
DRM_SYNCOBJ_CREATE_SIGNALED,
&queue->last_job_syncs.syncs[i]);
if (ret) {
result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
"syncobj create failed: %m");
goto fail_last_job_syncs;
}
}
queue->noop_job = NULL;
return VK_SUCCESS;
fail_last_job_syncs:
destroy_queue_syncs(queue);
fail_submit_thread:
vk_queue_finish(&queue->vk);
return result;
}
static void
queue_finish(struct v3dv_queue *queue)
{
if (queue->noop_job)
v3dv_job_destroy(queue->noop_job);
destroy_queue_syncs(queue);
vk_queue_finish(&queue->vk);
}
static void
init_device_meta(struct v3dv_device *device)
{
mtx_init(&device->meta.mtx, mtx_plain);
v3dv_meta_clear_init(device);
v3dv_meta_blit_init(device);
v3dv_meta_texel_buffer_copy_init(device);
}
static void
destroy_device_meta(struct v3dv_device *device)
{
mtx_destroy(&device->meta.mtx);
v3dv_meta_clear_finish(device);
v3dv_meta_blit_finish(device);
v3dv_meta_texel_buffer_copy_finish(device);
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
const VkDeviceCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkDevice *pDevice)
{
V3DV_FROM_HANDLE(v3dv_physical_device, physical_device, physicalDevice);
struct v3dv_instance *instance = (struct v3dv_instance*) physical_device->vk.instance;
VkResult result;
struct v3dv_device *device;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
/* Check requested queues (we only expose one queue ) */
assert(pCreateInfo->queueCreateInfoCount == 1);
for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
assert(pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex == 0);
assert(pCreateInfo->pQueueCreateInfos[i].queueCount == 1);
if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
return vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
}
device = vk_zalloc2(&physical_device->vk.instance->alloc, pAllocator,
sizeof(*device), 8,
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
if (!device)
return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
struct vk_device_dispatch_table dispatch_table;
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
&v3dv_device_entrypoints, true);
vk_device_dispatch_table_from_entrypoints(&dispatch_table,
&wsi_device_entrypoints, false);
result = vk_device_init(&device->vk, &physical_device->vk,
&dispatch_table, pCreateInfo, pAllocator);
if (result != VK_SUCCESS) {
vk_free(&device->vk.alloc, device);
return vk_error(NULL, result);
}
device->instance = instance;
device->pdevice = physical_device;
mtx_init(&device->query_mutex, mtx_plain);
cnd_init(&device->query_ended);
device->vk.command_buffer_ops = &v3dv_cmd_buffer_ops;
vk_device_set_drm_fd(&device->vk, physical_device->render_fd);
vk_device_enable_threaded_submit(&device->vk);
result = queue_init(device, &device->queue,
pCreateInfo->pQueueCreateInfos, 0);
if (result != VK_SUCCESS)
goto fail;
device->devinfo = physical_device->devinfo;
if (device->vk.enabled_features.robustBufferAccess)
perf_debug("Device created with Robust Buffer Access enabled.\n");
if (device->vk.enabled_features.robustImageAccess)
perf_debug("Device created with Robust Image Access enabled.\n");
#ifdef DEBUG
v3dv_X(device, device_check_prepacked_sizes)();
#endif
init_device_meta(device);
v3dv_bo_cache_init(device);
v3dv_pipeline_cache_init(&device->default_pipeline_cache, device, 0,
device->instance->default_pipeline_cache_enabled);
device->default_attribute_float =
v3dv_pipeline_create_default_attribute_values(device, NULL);
device->device_address_mem_ctx = ralloc_context(NULL);
util_dynarray_init(&device->device_address_bo_list,
device->device_address_mem_ctx);
mtx_init(&device->events.lock, mtx_plain);
result = v3dv_event_allocate_resources(device);
if (result != VK_SUCCESS)
goto fail;
if (list_is_empty(&device->events.free_list)) {
result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
goto fail;
}
result = v3dv_query_allocate_resources(device);
if (result != VK_SUCCESS)
goto fail;
*pDevice = v3dv_device_to_handle(device);
return VK_SUCCESS;
fail:
cnd_destroy(&device->query_ended);
mtx_destroy(&device->query_mutex);
queue_finish(&device->queue);
destroy_device_meta(device);
v3dv_pipeline_cache_finish(&device->default_pipeline_cache);
v3dv_event_free_resources(device);
v3dv_query_free_resources(device);
vk_device_finish(&device->vk);
vk_free(&device->vk.alloc, device);
return result;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyDevice(VkDevice _device,
const VkAllocationCallbacks *pAllocator)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
device->vk.dispatch_table.DeviceWaitIdle(_device);
queue_finish(&device->queue);
v3dv_event_free_resources(device);
mtx_destroy(&device->events.lock);
v3dv_query_free_resources(device);
destroy_device_meta(device);
v3dv_pipeline_cache_finish(&device->default_pipeline_cache);
if (device->default_attribute_float) {
v3dv_bo_free(device, device->default_attribute_float);
device->default_attribute_float = NULL;
}
ralloc_free(device->device_address_mem_ctx);
/* Bo cache should be removed the last, as any other object could be
* freeing their private bos
*/
v3dv_bo_cache_destroy(device);
cnd_destroy(&device->query_ended);
mtx_destroy(&device->query_mutex);
vk_device_finish(&device->vk);
vk_free2(&device->vk.alloc, pAllocator, device);
}
static VkResult
device_alloc(struct v3dv_device *device,
struct v3dv_device_memory *mem,
VkDeviceSize size)
{
/* Our kernel interface is 32-bit */
assert(size <= UINT32_MAX);
mem->bo = v3dv_bo_alloc(device, size, "device_alloc", false);
if (!mem->bo)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
return VK_SUCCESS;
}
static void
device_free_wsi_dumb(int32_t display_fd, int32_t dumb_handle)
{
assert(display_fd != -1);
if (dumb_handle < 0)
return;
struct drm_mode_destroy_dumb destroy_dumb = {
.handle = dumb_handle,
};
if (v3dv_ioctl(display_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb)) {
fprintf(stderr, "destroy dumb object %d: %s\n", dumb_handle, strerror(errno));
}
}
static void
device_free(struct v3dv_device *device, struct v3dv_device_memory *mem)
{
/* If this memory allocation was for WSI, then we need to use the
* display device to free the allocated dumb BO.
*/
if (mem->is_for_wsi) {
device_free_wsi_dumb(device->pdevice->display_fd, mem->bo->dumb_handle);
}
p_atomic_add(&device->pdevice->heap_used, -((int64_t)mem->bo->size));
v3dv_bo_free(device, mem->bo);
}
static void
device_unmap(struct v3dv_device *device, struct v3dv_device_memory *mem)
{
assert(mem && mem->bo->map && mem->bo->map_size > 0);
v3dv_bo_unmap(device, mem->bo);
}
static VkResult
device_map(struct v3dv_device *device, struct v3dv_device_memory *mem)
{
assert(mem && mem->bo);
/* From the spec:
*
* "After a successful call to vkMapMemory the memory object memory is
* considered to be currently host mapped. It is an application error to
* call vkMapMemory on a memory object that is already host mapped."
*
* We are not concerned with this ourselves (validation layers should
* catch these errors and warn users), however, the driver may internally
* map things (for example for debug CLIF dumps or some CPU-side operations)
* so by the time the user calls here the buffer might already been mapped
* internally by the driver.
*/
if (mem->bo->map) {
assert(mem->bo->map_size == mem->bo->size);
return VK_SUCCESS;
}
bool ok = v3dv_bo_map(device, mem->bo, mem->bo->size);
if (!ok)
return VK_ERROR_MEMORY_MAP_FAILED;
return VK_SUCCESS;
}
static VkResult
device_import_bo(struct v3dv_device *device,
const VkAllocationCallbacks *pAllocator,
int fd, uint64_t size,
struct v3dv_bo **bo)
{
*bo = NULL;
off_t real_size = lseek(fd, 0, SEEK_END);
lseek(fd, 0, SEEK_SET);
if (real_size < 0 || (uint64_t) real_size < size)
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
int render_fd = device->pdevice->render_fd;
assert(render_fd >= 0);
int ret;
uint32_t handle;
ret = drmPrimeFDToHandle(render_fd, fd, &handle);
if (ret)
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
struct drm_v3d_get_bo_offset get_offset = {
.handle = handle,
};
ret = v3dv_ioctl(render_fd, DRM_IOCTL_V3D_GET_BO_OFFSET, &get_offset);
if (ret)
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
assert(get_offset.offset != 0);
*bo = v3dv_device_lookup_bo(device->pdevice, handle);
assert(*bo);
if ((*bo)->refcnt == 0)
v3dv_bo_init_import(*bo, handle, size, get_offset.offset, false);
else
p_atomic_inc(&(*bo)->refcnt);
return VK_SUCCESS;
}
static VkResult
device_alloc_for_wsi(struct v3dv_device *device,
const VkAllocationCallbacks *pAllocator,
struct v3dv_device_memory *mem,
VkDeviceSize size)
{
/* In the simulator we can get away with a regular allocation since both
* allocation and rendering happen in the same DRM render node. On actual
* hardware we need to allocate our winsys BOs on the vc4 display device
* and import them into v3d.
*/
#if using_v3d_simulator
return device_alloc(device, mem, size);
#else
/* If we are allocating for WSI we should have a swapchain and thus,
* we should've initialized the display device. However, Zink doesn't
* use swapchains, so in that case we can get here without acquiring the
* display device and we need to do it now.
*/
VkResult result;
struct v3dv_physical_device *pdevice = device->pdevice;
if (unlikely(pdevice->display_fd < 0)) {
result = v3dv_physical_device_acquire_display(pdevice, NULL);
if (result != VK_SUCCESS)
return result;
}
assert(pdevice->display_fd != -1);
mem->is_for_wsi = true;
int display_fd = pdevice->display_fd;
struct drm_mode_create_dumb create_dumb = {
.width = 1024, /* one page */
.height = align(size, 4096) / 4096,
.bpp = util_format_get_blocksizebits(PIPE_FORMAT_RGBA8888_UNORM),
};
int err;
err = v3dv_ioctl(display_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb);
if (err < 0)
goto fail_create;
int fd;
err =
drmPrimeHandleToFD(display_fd, create_dumb.handle, O_CLOEXEC, &fd);
if (err < 0)
goto fail_export;
result = device_import_bo(device, pAllocator, fd, size, &mem->bo);
close(fd);
if (result != VK_SUCCESS)
goto fail_import;
mem->bo->dumb_handle = create_dumb.handle;
return VK_SUCCESS;
fail_import:
fail_export:
device_free_wsi_dumb(display_fd, create_dumb.handle);
fail_create:
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
#endif
}
static void
device_add_device_address_bo(struct v3dv_device *device,
struct v3dv_bo *bo)
{
util_dynarray_append(&device->device_address_bo_list,
struct v3dv_bo *,
bo);
}
static void
device_remove_device_address_bo(struct v3dv_device *device,
struct v3dv_bo *bo)
{
util_dynarray_delete_unordered(&device->device_address_bo_list,
struct v3dv_bo *,
bo);
}
static void
free_memory(struct v3dv_device *device,
struct v3dv_device_memory *mem,
const VkAllocationCallbacks *pAllocator)
{
if (mem == NULL)
return;
if (mem->bo->map)
device_unmap(device, mem);
if (mem->is_for_device_address)
device_remove_device_address_bo(device, mem->bo);
device_free(device, mem);
vk_object_free(&device->vk, pAllocator, mem);
}
VKAPI_ATTR void VKAPI_CALL
v3dv_FreeMemory(VkDevice _device,
VkDeviceMemory _mem,
const VkAllocationCallbacks *pAllocator)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_device_memory, mem, _mem);
free_memory(device, mem, pAllocator);
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_AllocateMemory(VkDevice _device,
const VkMemoryAllocateInfo *pAllocateInfo,
const VkAllocationCallbacks *pAllocator,
VkDeviceMemory *pMem)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_device_memory *mem;
struct v3dv_physical_device *pdevice = device->pdevice;
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
/* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
assert(pAllocateInfo->allocationSize > 0);
/* We always allocate device memory in multiples of a page, so round up
* requested size to that.
*/
const VkDeviceSize alloc_size = ALIGN(pAllocateInfo->allocationSize, 4096);
if (unlikely(alloc_size > MAX_MEMORY_ALLOCATION_SIZE))
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
uint64_t heap_used = p_atomic_read(&pdevice->heap_used);
if (unlikely(heap_used + alloc_size > pdevice->memory.memoryHeaps[0].size))
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
mem = vk_object_zalloc(&device->vk, pAllocator, sizeof(*mem),
VK_OBJECT_TYPE_DEVICE_MEMORY);
if (mem == NULL)
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.memoryTypeCount);
mem->type = &pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex];
mem->is_for_wsi = false;
const struct wsi_memory_allocate_info *wsi_info = NULL;
const VkImportMemoryFdInfoKHR *fd_info = NULL;
const VkMemoryAllocateFlagsInfo *flags_info = NULL;
vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
switch ((unsigned)ext->sType) {
case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
wsi_info = (void *)ext;
break;
case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
fd_info = (void *)ext;
break;
case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO:
flags_info = (void *)ext;
break;
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
/* We don't have particular optimizations associated with memory
* allocations that won't be suballocated to multiple resources.
*/
break;
case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
/* The mask of handle types specified here must be supported
* according to VkExternalImageFormatProperties, so it must be
* fd or dmabuf, which don't have special requirements for us.
*/
break;
default:
v3dv_debug_ignored_stype(ext->sType);
break;
}
}
VkResult result;
if (wsi_info) {
result = device_alloc_for_wsi(device, pAllocator, mem, alloc_size);
} else if (fd_info && fd_info->handleType) {
assert(fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
result = device_import_bo(device, pAllocator,
fd_info->fd, alloc_size, &mem->bo);
if (result == VK_SUCCESS)
close(fd_info->fd);
} else {
result = device_alloc(device, mem, alloc_size);
}
if (result != VK_SUCCESS) {
vk_object_free(&device->vk, pAllocator, mem);
return vk_error(device, result);
}
heap_used = p_atomic_add_return(&pdevice->heap_used, mem->bo->size);
if (heap_used > pdevice->memory.memoryHeaps[0].size) {
free_memory(device, mem, pAllocator);
return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
/* If this memory can be used via VK_KHR_buffer_device_address then we
* will need to manually add the BO to any job submit that makes use of
* VK_KHR_buffer_device_address, since such jobs may produde buffer
* load/store operations that may access any buffer memory allocated with
* this flag and we don't have any means to tell which buffers will be
* accessed through this mechanism since they don't even have to be bound
* through descriptor state.
*/
if (flags_info &&
(flags_info->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR)) {
mem->is_for_device_address = true;
device_add_device_address_bo(device, mem->bo);
}
*pMem = v3dv_device_memory_to_handle(mem);
return result;
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_MapMemory(VkDevice _device,
VkDeviceMemory _memory,
VkDeviceSize offset,
VkDeviceSize size,
VkMemoryMapFlags flags,
void **ppData)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_device_memory, mem, _memory);
if (mem == NULL) {
*ppData = NULL;
return VK_SUCCESS;
}
assert(offset < mem->bo->size);
/* Since the driver can map BOs internally as well and the mapped range
* required by the user or the driver might not be the same, we always map
* the entire BO and then add the requested offset to the start address
* of the mapped region.
*/
VkResult result = device_map(device, mem);
if (result != VK_SUCCESS)
return vk_error(device, result);
*ppData = ((uint8_t *) mem->bo->map) + offset;
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_UnmapMemory(VkDevice _device,
VkDeviceMemory _memory)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_device_memory, mem, _memory);
if (mem == NULL)
return;
device_unmap(device, mem);
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_FlushMappedMemoryRanges(VkDevice _device,
uint32_t memoryRangeCount,
const VkMappedMemoryRange *pMemoryRanges)
{
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_InvalidateMappedMemoryRanges(VkDevice _device,
uint32_t memoryRangeCount,
const VkMappedMemoryRange *pMemoryRanges)
{
return VK_SUCCESS;
}
static void
get_image_memory_requirements(struct v3dv_image *image,
VkImageAspectFlagBits planeAspect,
VkMemoryRequirements2 *pMemoryRequirements)
{
pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
.memoryTypeBits = 0x1,
.alignment = image->planes[0].alignment,
.size = image->non_disjoint_size
};
if (planeAspect != VK_IMAGE_ASPECT_NONE) {
assert(image->format->plane_count > 1);
/* Disjoint images should have a 0 non_disjoint_size */
assert(!pMemoryRequirements->memoryRequirements.size);
uint8_t plane = v3dv_image_aspect_to_plane(image, planeAspect);
VkMemoryRequirements *mem_reqs =
&pMemoryRequirements->memoryRequirements;
mem_reqs->alignment = image->planes[plane].alignment;
mem_reqs->size = image->planes[plane].size;
}
vk_foreach_struct(ext, pMemoryRequirements->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
VkMemoryDedicatedRequirements *req =
(VkMemoryDedicatedRequirements *) ext;
req->requiresDedicatedAllocation = image->vk.external_handle_types != 0;
req->prefersDedicatedAllocation = image->vk.external_handle_types != 0;
break;
}
default:
v3dv_debug_ignored_stype(ext->sType);
break;
}
}
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetImageMemoryRequirements2(VkDevice device,
const VkImageMemoryRequirementsInfo2 *pInfo,
VkMemoryRequirements2 *pMemoryRequirements)
{
V3DV_FROM_HANDLE(v3dv_image, image, pInfo->image);
VkImageAspectFlagBits planeAspect = VK_IMAGE_ASPECT_NONE;
vk_foreach_struct_const(ext, pInfo->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO: {
VkImagePlaneMemoryRequirementsInfo *req =
(VkImagePlaneMemoryRequirementsInfo *) ext;
planeAspect = req->planeAspect;
break;
}
default:
v3dv_debug_ignored_stype(ext->sType);
break;
}
}
get_image_memory_requirements(image, planeAspect, pMemoryRequirements);
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceImageMemoryRequirementsKHR(
VkDevice _device,
const VkDeviceImageMemoryRequirements *pInfo,
VkMemoryRequirements2 *pMemoryRequirements)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_image image = { 0 };
vk_image_init(&device->vk, &image.vk, pInfo->pCreateInfo);
ASSERTED VkResult result =
v3dv_image_init(device, pInfo->pCreateInfo, NULL, &image);
assert(result == VK_SUCCESS);
/* From VkDeviceImageMemoryRequirements spec:
*
* " planeAspect is a VkImageAspectFlagBits value specifying the aspect
* corresponding to the image plane to query. This parameter is ignored
* unless pCreateInfo::tiling is
* VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, or pCreateInfo::flags has
* VK_IMAGE_CREATE_DISJOINT_BIT set"
*
* We need to explicitly ignore that flag, or following asserts could be
* triggered.
*/
VkImageAspectFlagBits planeAspect =
pInfo->pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT ||
pInfo->pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT ?
pInfo->planeAspect : 0;
get_image_memory_requirements(&image, planeAspect, pMemoryRequirements);
}
static void
bind_image_memory(const VkBindImageMemoryInfo *info)
{
V3DV_FROM_HANDLE(v3dv_image, image, info->image);
V3DV_FROM_HANDLE(v3dv_device_memory, mem, info->memory);
/* Valid usage:
*
* "memoryOffset must be an integer multiple of the alignment member of
* the VkMemoryRequirements structure returned from a call to
* vkGetImageMemoryRequirements with image"
*/
assert(info->memoryOffset < mem->bo->size);
uint64_t offset = info->memoryOffset;
if (image->non_disjoint_size) {
/* We only check for plane 0 as it is the only one that actually starts
* at that offset
*/
assert(offset % image->planes[0].alignment == 0);
for (uint8_t plane = 0; plane < image->plane_count; plane++) {
image->planes[plane].mem = mem;
image->planes[plane].mem_offset = offset;
}
} else {
const VkBindImagePlaneMemoryInfo *plane_mem_info =
vk_find_struct_const(info->pNext, BIND_IMAGE_PLANE_MEMORY_INFO);
assert(plane_mem_info);
/*
* From VkBindImagePlaneMemoryInfo spec:
*
* "If the images tiling is VK_IMAGE_TILING_LINEAR or
* VK_IMAGE_TILING_OPTIMAL, then planeAspect must be a single valid
* format plane for the image"
*
* <skip>
*
* "If the images tiling is VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
* then planeAspect must be a single valid memory plane for the
* image"
*
* So planeAspect should only refer to one plane.
*/
uint8_t plane = v3dv_plane_from_aspect(plane_mem_info->planeAspect);
assert(offset % image->planes[plane].alignment == 0);
image->planes[plane].mem = mem;
image->planes[plane].mem_offset = offset;
}
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_BindImageMemory2(VkDevice _device,
uint32_t bindInfoCount,
const VkBindImageMemoryInfo *pBindInfos)
{
for (uint32_t i = 0; i < bindInfoCount; i++) {
#ifndef ANDROID
const VkBindImageMemorySwapchainInfoKHR *swapchain_info =
vk_find_struct_const(pBindInfos->pNext,
BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR);
if (swapchain_info && swapchain_info->swapchain) {
struct v3dv_image *swapchain_image =
v3dv_wsi_get_image_from_swapchain(swapchain_info->swapchain,
swapchain_info->imageIndex);
/* Making the assumption that swapchain images are a single plane */
assert(swapchain_image->plane_count == 1);
VkBindImageMemoryInfo swapchain_bind = {
.sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO,
.image = pBindInfos[i].image,
.memory = v3dv_device_memory_to_handle(swapchain_image->planes[0].mem),
.memoryOffset = swapchain_image->planes[0].mem_offset,
};
bind_image_memory(&swapchain_bind);
} else
#endif
{
bind_image_memory(&pBindInfos[i]);
}
}
return VK_SUCCESS;
}
void
v3dv_buffer_init(struct v3dv_device *device,
const VkBufferCreateInfo *pCreateInfo,
struct v3dv_buffer *buffer,
uint32_t alignment)
{
buffer->size = pCreateInfo->size;
buffer->usage = pCreateInfo->usage;
buffer->alignment = alignment;
}
static void
get_buffer_memory_requirements(struct v3dv_buffer *buffer,
VkMemoryRequirements2 *pMemoryRequirements)
{
pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
.memoryTypeBits = 0x1,
.alignment = buffer->alignment,
.size = align64(buffer->size, buffer->alignment),
};
vk_foreach_struct(ext, pMemoryRequirements->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
VkMemoryDedicatedRequirements *req =
(VkMemoryDedicatedRequirements *) ext;
req->requiresDedicatedAllocation = false;
req->prefersDedicatedAllocation = false;
break;
}
default:
v3dv_debug_ignored_stype(ext->sType);
break;
}
}
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetBufferMemoryRequirements2(VkDevice device,
const VkBufferMemoryRequirementsInfo2 *pInfo,
VkMemoryRequirements2 *pMemoryRequirements)
{
V3DV_FROM_HANDLE(v3dv_buffer, buffer, pInfo->buffer);
get_buffer_memory_requirements(buffer, pMemoryRequirements);
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceBufferMemoryRequirementsKHR(
VkDevice _device,
const VkDeviceBufferMemoryRequirements *pInfo,
VkMemoryRequirements2 *pMemoryRequirements)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_buffer buffer = { 0 };
v3dv_buffer_init(device, pInfo->pCreateInfo, &buffer, V3D_NON_COHERENT_ATOM_SIZE);
get_buffer_memory_requirements(&buffer, pMemoryRequirements);
}
void
v3dv_buffer_bind_memory(const VkBindBufferMemoryInfo *info)
{
V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->buffer);
V3DV_FROM_HANDLE(v3dv_device_memory, mem, info->memory);
/* Valid usage:
*
* "memoryOffset must be an integer multiple of the alignment member of
* the VkMemoryRequirements structure returned from a call to
* vkGetBufferMemoryRequirements with buffer"
*/
assert(info->memoryOffset % buffer->alignment == 0);
assert(info->memoryOffset < mem->bo->size);
buffer->mem = mem;
buffer->mem_offset = info->memoryOffset;
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_BindBufferMemory2(VkDevice device,
uint32_t bindInfoCount,
const VkBindBufferMemoryInfo *pBindInfos)
{
for (uint32_t i = 0; i < bindInfoCount; i++)
v3dv_buffer_bind_memory(&pBindInfos[i]);
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateBuffer(VkDevice _device,
const VkBufferCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkBuffer *pBuffer)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_buffer *buffer;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
assert(pCreateInfo->usage != 0);
/* We don't support any flags for now */
assert(pCreateInfo->flags == 0);
buffer = vk_object_zalloc(&device->vk, pAllocator, sizeof(*buffer),
VK_OBJECT_TYPE_BUFFER);
if (buffer == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
v3dv_buffer_init(device, pCreateInfo, buffer, V3D_NON_COHERENT_ATOM_SIZE);
/* Limit allocations to 32-bit */
const VkDeviceSize aligned_size = align64(buffer->size, buffer->alignment);
if (aligned_size > UINT32_MAX || aligned_size < buffer->size) {
vk_free(&device->vk.alloc, buffer);
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
}
*pBuffer = v3dv_buffer_to_handle(buffer);
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyBuffer(VkDevice _device,
VkBuffer _buffer,
const VkAllocationCallbacks *pAllocator)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
if (!buffer)
return;
vk_object_free(&device->vk, pAllocator, buffer);
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateFramebuffer(VkDevice _device,
const VkFramebufferCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkFramebuffer *pFramebuffer)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_framebuffer *framebuffer;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
size_t size = sizeof(*framebuffer) +
sizeof(struct v3dv_image_view *) * pCreateInfo->attachmentCount;
framebuffer = vk_object_zalloc(&device->vk, pAllocator, size,
VK_OBJECT_TYPE_FRAMEBUFFER);
if (framebuffer == NULL)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
framebuffer->width = pCreateInfo->width;
framebuffer->height = pCreateInfo->height;
framebuffer->layers = pCreateInfo->layers;
framebuffer->has_edge_padding = true;
const VkFramebufferAttachmentsCreateInfo *imageless =
vk_find_struct_const(pCreateInfo->pNext,
FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
framebuffer->attachment_count = pCreateInfo->attachmentCount;
framebuffer->color_attachment_count = 0;
for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
if (!imageless) {
framebuffer->attachments[i] =
v3dv_image_view_from_handle(pCreateInfo->pAttachments[i]);
if (framebuffer->attachments[i]->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT)
framebuffer->color_attachment_count++;
} else {
assert(i < imageless->attachmentImageInfoCount);
if (imageless->pAttachmentImageInfos[i].usage &
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
framebuffer->color_attachment_count++;
}
}
}
*pFramebuffer = v3dv_framebuffer_to_handle(framebuffer);
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyFramebuffer(VkDevice _device,
VkFramebuffer _fb,
const VkAllocationCallbacks *pAllocator)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_framebuffer, fb, _fb);
if (!fb)
return;
vk_object_free(&device->vk, pAllocator, fb);
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetMemoryFdPropertiesKHR(VkDevice _device,
VkExternalMemoryHandleTypeFlagBits handleType,
int fd,
VkMemoryFdPropertiesKHR *pMemoryFdProperties)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_physical_device *pdevice = device->pdevice;
switch (handleType) {
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
pMemoryFdProperties->memoryTypeBits =
(1 << pdevice->memory.memoryTypeCount) - 1;
return VK_SUCCESS;
default:
return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
}
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetMemoryFdKHR(VkDevice _device,
const VkMemoryGetFdInfoKHR *pGetFdInfo,
int *pFd)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_device_memory, mem, pGetFdInfo->memory);
assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
int fd, ret;
ret = drmPrimeHandleToFD(device->pdevice->render_fd,
mem->bo->handle,
DRM_CLOEXEC, &fd);
if (ret)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
*pFd = fd;
return VK_SUCCESS;
}
VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateSampler(VkDevice _device,
const VkSamplerCreateInfo *pCreateInfo,
const VkAllocationCallbacks *pAllocator,
VkSampler *pSampler)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
struct v3dv_sampler *sampler;
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
sampler = vk_object_zalloc(&device->vk, pAllocator, sizeof(*sampler),
VK_OBJECT_TYPE_SAMPLER);
if (!sampler)
return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
sampler->plane_count = 1;
sampler->compare_enable = pCreateInfo->compareEnable;
sampler->unnormalized_coordinates = pCreateInfo->unnormalizedCoordinates;
const VkSamplerCustomBorderColorCreateInfoEXT *bc_info =
vk_find_struct_const(pCreateInfo->pNext,
SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
const VkSamplerYcbcrConversionInfo *ycbcr_conv_info =
vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
const struct vk_format_ycbcr_info *ycbcr_info = NULL;
if (ycbcr_conv_info) {
VK_FROM_HANDLE(vk_ycbcr_conversion, conversion, ycbcr_conv_info->conversion);
ycbcr_info = vk_format_get_ycbcr_info(conversion->state.format);
if (ycbcr_info) {
sampler->plane_count = ycbcr_info->n_planes;
sampler->conversion = conversion;
}
}
v3dv_X(device, pack_sampler_state)(sampler, pCreateInfo, bc_info);
*pSampler = v3dv_sampler_to_handle(sampler);
return VK_SUCCESS;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_DestroySampler(VkDevice _device,
VkSampler _sampler,
const VkAllocationCallbacks *pAllocator)
{
V3DV_FROM_HANDLE(v3dv_device, device, _device);
V3DV_FROM_HANDLE(v3dv_sampler, sampler, _sampler);
if (!sampler)
return;
vk_object_free(&device->vk, pAllocator, sampler);
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceMemoryCommitment(VkDevice device,
VkDeviceMemory memory,
VkDeviceSize *pCommittedMemoryInBytes)
{
*pCommittedMemoryInBytes = 0;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetImageSparseMemoryRequirements(
VkDevice device,
VkImage image,
uint32_t *pSparseMemoryRequirementCount,
VkSparseImageMemoryRequirements *pSparseMemoryRequirements)
{
*pSparseMemoryRequirementCount = 0;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetImageSparseMemoryRequirements2(
VkDevice device,
const VkImageSparseMemoryRequirementsInfo2 *pInfo,
uint32_t *pSparseMemoryRequirementCount,
VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
{
*pSparseMemoryRequirementCount = 0;
}
VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceImageSparseMemoryRequirementsKHR(
VkDevice device,
const VkDeviceImageMemoryRequirements *pInfo,
uint32_t *pSparseMemoryRequirementCount,
VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
{
*pSparseMemoryRequirementCount = 0;
}
/* vk_icd.h does not declare this function, so we declare it here to
* suppress Wmissing-prototypes.
*/
PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
{
/* For the full details on loader interface versioning, see
* <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
* What follows is a condensed summary, to help you navigate the large and
* confusing official doc.
*
* - Loader interface v0 is incompatible with later versions. We don't
* support it.
*
* - In loader interface v1:
* - The first ICD entrypoint called by the loader is
* vk_icdGetInstanceProcAddr(). The ICD must statically expose this
* entrypoint.
* - The ICD must statically expose no other Vulkan symbol unless it is
* linked with -Bsymbolic.
* - Each dispatchable Vulkan handle created by the ICD must be
* a pointer to a struct whose first member is VK_LOADER_DATA. The
* ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
* - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
* vkDestroySurfaceKHR(). The ICD must be capable of working with
* such loader-managed surfaces.
*
* - Loader interface v2 differs from v1 in:
* - The first ICD entrypoint called by the loader is
* vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
* statically expose this entrypoint.
*
* - Loader interface v3 differs from v2 in:
* - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
* vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
* because the loader no longer does so.
*
* - Loader interface v4 differs from v3 in:
* - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
*
* - Loader interface v5 differs from v4 in:
* - The ICD must support Vulkan API version 1.1 and must not return
* VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
* Vulkan Loader with interface v4 or smaller is being used and the
* application provides an API version that is greater than 1.0.
*/
*pSupportedVersion = MIN2(*pSupportedVersion, 5u);
return VK_SUCCESS;
}
VkDeviceAddress
v3dv_GetBufferDeviceAddress(VkDevice device,
const VkBufferDeviceAddressInfoKHR *pInfo)
{
V3DV_FROM_HANDLE(v3dv_buffer, buffer, pInfo->buffer);
return buffer->mem_offset + buffer->mem->bo->offset;
}
uint64_t
v3dv_GetBufferOpaqueCaptureAddress(VkDevice device,
const VkBufferDeviceAddressInfoKHR *pInfo)
{
/* Not implemented */
return 0;
}
uint64_t
v3dv_GetDeviceMemoryOpaqueCaptureAddress(
VkDevice device,
const VkDeviceMemoryOpaqueCaptureAddressInfoKHR *pInfo)
{
/* Not implemented */
return 0;
}
VkResult
v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device,
nir_shader *nir,
VkPipelineLayout pipeline_layout,
VkPipeline *pipeline)
{
struct vk_shader_module cs_m = vk_shader_module_from_nir(nir);
VkPipelineShaderStageCreateInfo set_event_cs_stage = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = vk_shader_module_to_handle(&cs_m),
.pName = "main",
};
VkComputePipelineCreateInfo info = {
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.stage = set_event_cs_stage,
.layout = pipeline_layout,
};
VkResult result =
v3dv_CreateComputePipelines(v3dv_device_to_handle(device), VK_NULL_HANDLE,
1, &info, &device->vk.alloc, pipeline);
return result;
}