turnip: Porting to common implementation for timeline semaphore
Define struct tu_timeline_sync for emulated timeline support in common implementation that is on top of drm syncobj as a binary sync. Also implement init/finish/reset/wait_many methods for the struct. v1. Does not set MSM_SUBMIT_SYNCOBJ_RESET for waiting syncobjs since it's being managed in the common implementation already. Signed-off-by: Hyunjun Ko <zzoon@igalia.com> Reviewed-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14105>
This commit is contained in:
parent
479a1c405e
commit
0a82a26a18
|
@ -159,7 +159,7 @@ get_device_extensions(const struct tu_physical_device *device,
|
|||
.KHR_buffer_device_address = true,
|
||||
.KHR_shader_integer_dot_product = true,
|
||||
#ifndef TU_USE_KGSL
|
||||
.KHR_timeline_semaphore = false,
|
||||
.KHR_timeline_semaphore = true,
|
||||
#endif
|
||||
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
|
||||
/* This extension is supported by common code across drivers, but it is
|
||||
|
@ -562,7 +562,7 @@ tu_get_physical_device_features_1_2(struct tu_physical_device *pdevice,
|
|||
features->shaderSubgroupExtendedTypes = true;
|
||||
features->separateDepthStencilLayouts = true;
|
||||
features->hostQueryReset = true;
|
||||
features->timelineSemaphore = false;
|
||||
features->timelineSemaphore = true;
|
||||
features->bufferDeviceAddress = true;
|
||||
features->bufferDeviceAddressCaptureReplay = false;
|
||||
features->bufferDeviceAddressMultiDevice = false;
|
||||
|
@ -757,7 +757,7 @@ tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
|
|||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
|
||||
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *features =
|
||||
(VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *) ext;
|
||||
features->timelineSemaphore = false;
|
||||
features->timelineSemaphore = true;
|
||||
break;
|
||||
}
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
|
||||
|
|
|
@ -331,6 +331,221 @@ tu_bo_finish(struct tu_device *dev, struct tu_bo *bo)
|
|||
tu_gem_close(dev, bo->gem_handle);
|
||||
}
|
||||
|
||||
extern const struct vk_sync_type tu_timeline_sync_type;
|
||||
|
||||
static inline bool
|
||||
vk_sync_is_tu_timeline_sync(const struct vk_sync *sync)
|
||||
{
|
||||
return sync->type == &tu_timeline_sync_type;
|
||||
}
|
||||
|
||||
static struct tu_timeline_sync *
|
||||
to_tu_timeline_sync(struct vk_sync *sync)
|
||||
{
|
||||
assert(sync->type == &tu_timeline_sync_type);
|
||||
return container_of(sync, struct tu_timeline_sync, base);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
tu_syncobj_from_vk_sync(struct vk_sync *sync)
|
||||
{
|
||||
uint32_t syncobj = -1;
|
||||
if (vk_sync_is_tu_timeline_sync(sync)) {
|
||||
syncobj = to_tu_timeline_sync(sync)->syncobj;
|
||||
} else if (vk_sync_type_is_drm_syncobj(sync->type)) {
|
||||
syncobj = vk_sync_as_drm_syncobj(sync)->syncobj;
|
||||
}
|
||||
|
||||
assert(syncobj != -1);
|
||||
|
||||
return syncobj;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu_timeline_sync_init(struct vk_device *vk_device,
|
||||
struct vk_sync *vk_sync,
|
||||
uint64_t initial_value)
|
||||
{
|
||||
struct tu_device *device = container_of(vk_device, struct tu_device, vk);
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
|
||||
uint32_t flags = 0;
|
||||
|
||||
assert(device->fd >= 0);
|
||||
|
||||
int err = drmSyncobjCreate(device->fd, flags, &sync->syncobj);
|
||||
|
||||
if (err < 0) {
|
||||
return vk_error(device, VK_ERROR_DEVICE_LOST);
|
||||
}
|
||||
|
||||
sync->state = initial_value ? TU_TIMELINE_SYNC_STATE_SIGNALED :
|
||||
TU_TIMELINE_SYNC_STATE_RESET;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
tu_timeline_sync_finish(struct vk_device *vk_device,
|
||||
struct vk_sync *vk_sync)
|
||||
{
|
||||
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
|
||||
|
||||
assert(dev->fd >= 0);
|
||||
ASSERTED int err = drmSyncobjDestroy(dev->fd, sync->syncobj);
|
||||
assert(err == 0);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
tu_timeline_sync_reset(struct vk_device *vk_device,
|
||||
struct vk_sync *vk_sync)
|
||||
{
|
||||
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(vk_sync);
|
||||
|
||||
int err = drmSyncobjReset(dev->fd, &sync->syncobj, 1);
|
||||
if (err) {
|
||||
return vk_errorf(dev, VK_ERROR_UNKNOWN,
|
||||
"DRM_IOCTL_SYNCOBJ_RESET failed: %m");
|
||||
} else {
|
||||
sync->state = TU_TIMELINE_SYNC_STATE_RESET;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
drm_syncobj_wait(struct tu_device *device,
|
||||
uint32_t *handles, uint32_t count_handles,
|
||||
int64_t timeout_nsec, bool wait_all)
|
||||
{
|
||||
uint32_t syncobj_wait_flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT;
|
||||
if (wait_all) syncobj_wait_flags |= DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL;
|
||||
|
||||
int err = drmSyncobjWait(device->fd, handles,
|
||||
count_handles, timeout_nsec,
|
||||
syncobj_wait_flags,
|
||||
NULL /* first_signaled */);
|
||||
if (err && errno == ETIME) {
|
||||
return VK_TIMEOUT;
|
||||
} else if (err) {
|
||||
return vk_errorf(device, VK_ERROR_UNKNOWN,
|
||||
"DRM_IOCTL_SYNCOBJ_WAIT failed: %m");
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
/* Based on anv_bo_sync_wait */
|
||||
static VkResult
|
||||
tu_timeline_sync_wait(struct vk_device *vk_device,
|
||||
uint32_t wait_count,
|
||||
const struct vk_sync_wait *waits,
|
||||
enum vk_sync_wait_flags wait_flags,
|
||||
uint64_t abs_timeout_ns)
|
||||
{
|
||||
struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
|
||||
bool wait_all = !(wait_flags & VK_SYNC_WAIT_ANY);
|
||||
|
||||
uint32_t handles[wait_count];
|
||||
uint32_t submit_count;
|
||||
VkResult ret = VK_SUCCESS;
|
||||
uint32_t pending = wait_count;
|
||||
struct tu_timeline_sync *submitted_syncs[wait_count];
|
||||
|
||||
while (pending) {
|
||||
pending = 0;
|
||||
submit_count = 0;
|
||||
|
||||
for (unsigned i = 0; i < wait_count; ++i) {
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
|
||||
|
||||
if (sync->state == TU_TIMELINE_SYNC_STATE_RESET) {
|
||||
assert(!(wait_flags & VK_SYNC_WAIT_PENDING));
|
||||
pending++;
|
||||
} else if (sync->state == TU_TIMELINE_SYNC_STATE_SIGNALED) {
|
||||
if (wait_flags & VK_SYNC_WAIT_ANY)
|
||||
return VK_SUCCESS;
|
||||
} else if (sync->state == TU_TIMELINE_SYNC_STATE_SUBMITTED) {
|
||||
if (!(wait_flags & VK_SYNC_WAIT_PENDING)) {
|
||||
handles[submit_count] = sync->syncobj;
|
||||
submitted_syncs[submit_count++] = sync;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (submit_count > 0) {
|
||||
do {
|
||||
ret = drm_syncobj_wait(dev, handles, submit_count, abs_timeout_ns, wait_all);
|
||||
} while (ret == VK_TIMEOUT && os_time_get_nano() < abs_timeout_ns);
|
||||
|
||||
if (ret == VK_SUCCESS) {
|
||||
for (unsigned i = 0; i < submit_count; ++i) {
|
||||
struct tu_timeline_sync *sync = submitted_syncs[i];
|
||||
sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
|
||||
}
|
||||
} else {
|
||||
/* return error covering timeout */
|
||||
return ret;
|
||||
}
|
||||
} else if (pending > 0) {
|
||||
/* If we've hit this then someone decided to vkWaitForFences before
|
||||
* they've actually submitted any of them to a queue. This is a
|
||||
* fairly pessimal case, so it's ok to lock here and use a standard
|
||||
* pthreads condition variable.
|
||||
*/
|
||||
pthread_mutex_lock(&dev->submit_mutex);
|
||||
|
||||
/* It's possible that some of the fences have changed state since the
|
||||
* last time we checked. Now that we have the lock, check for
|
||||
* pending fences again and don't wait if it's changed.
|
||||
*/
|
||||
uint32_t now_pending = 0;
|
||||
for (uint32_t i = 0; i < wait_count; i++) {
|
||||
struct tu_timeline_sync *sync = to_tu_timeline_sync(waits[i].sync);
|
||||
if (sync->state == TU_TIMELINE_SYNC_STATE_RESET)
|
||||
now_pending++;
|
||||
}
|
||||
assert(now_pending <= pending);
|
||||
|
||||
if (now_pending == pending) {
|
||||
struct timespec abstime = {
|
||||
.tv_sec = abs_timeout_ns / NSEC_PER_SEC,
|
||||
.tv_nsec = abs_timeout_ns % NSEC_PER_SEC,
|
||||
};
|
||||
|
||||
ASSERTED int ret;
|
||||
ret = pthread_cond_timedwait(&dev->timeline_cond,
|
||||
&dev->submit_mutex, &abstime);
|
||||
assert(ret != EINVAL);
|
||||
if (os_time_get_nano() >= abs_timeout_ns) {
|
||||
pthread_mutex_unlock(&dev->submit_mutex);
|
||||
return VK_TIMEOUT;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&dev->submit_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct vk_sync_type tu_timeline_sync_type = {
|
||||
.size = sizeof(struct tu_timeline_sync),
|
||||
.features = VK_SYNC_FEATURE_BINARY |
|
||||
VK_SYNC_FEATURE_GPU_WAIT |
|
||||
VK_SYNC_FEATURE_GPU_MULTI_WAIT |
|
||||
VK_SYNC_FEATURE_CPU_WAIT |
|
||||
VK_SYNC_FEATURE_CPU_RESET |
|
||||
VK_SYNC_FEATURE_WAIT_ANY |
|
||||
VK_SYNC_FEATURE_WAIT_PENDING,
|
||||
.init = tu_timeline_sync_init,
|
||||
.finish = tu_timeline_sync_finish,
|
||||
.reset = tu_timeline_sync_reset,
|
||||
.wait_many = tu_timeline_sync_wait,
|
||||
};
|
||||
|
||||
static VkResult
|
||||
tu_drm_device_init(struct tu_physical_device *device,
|
||||
struct tu_instance *instance,
|
||||
|
@ -427,9 +642,11 @@ tu_drm_device_init(struct tu_physical_device *device,
|
|||
}
|
||||
|
||||
device->syncobj_type = vk_drm_syncobj_get_type(fd);
|
||||
device->timeline_type = vk_sync_timeline_get_type(&tu_timeline_sync_type);
|
||||
|
||||
device->sync_types[0] = &device->syncobj_type;
|
||||
device->sync_types[1] = NULL;
|
||||
device->sync_types[1] = &device->timeline_type.sync;
|
||||
device->sync_types[2] = NULL;
|
||||
|
||||
device->heap.size = tu_get_system_heap_size();
|
||||
device->heap.used = 0u;
|
||||
|
@ -745,6 +962,37 @@ tu_queue_submit_locked(struct tu_queue *queue, struct tu_queue_submit *submit)
|
|||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < submit->vk_submit->wait_count; i++) {
|
||||
if (!vk_sync_is_tu_timeline_sync(submit->vk_submit->waits[i].sync))
|
||||
continue;
|
||||
|
||||
struct tu_timeline_sync *sync =
|
||||
container_of(submit->vk_submit->waits[i].sync, struct tu_timeline_sync, base);
|
||||
|
||||
assert(sync->state != TU_TIMELINE_SYNC_STATE_RESET);
|
||||
|
||||
/* Set SIGNALED to the state of the wait timeline sync since this means the syncobj
|
||||
* is done and ready again so this can be garbage-collectioned later.
|
||||
*/
|
||||
sync->state = TU_TIMELINE_SYNC_STATE_SIGNALED;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < submit->vk_submit->signal_count; i++) {
|
||||
if (!vk_sync_is_tu_timeline_sync(submit->vk_submit->signals[i].sync))
|
||||
continue;
|
||||
|
||||
struct tu_timeline_sync *sync =
|
||||
container_of(submit->vk_submit->signals[i].sync, struct tu_timeline_sync, base);
|
||||
|
||||
assert(sync->state == TU_TIMELINE_SYNC_STATE_RESET);
|
||||
/* Set SUBMITTED to the state of the signal timeline sync so we could wait for
|
||||
* this timeline sync until completed if necessary.
|
||||
*/
|
||||
sync->state = TU_TIMELINE_SYNC_STATE_SUBMITTED;
|
||||
}
|
||||
|
||||
pthread_cond_broadcast(&queue->device->timeline_cond);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -756,6 +1004,7 @@ get_abs_timeout(struct drm_msm_timespec *tv, uint64_t ns)
|
|||
tv->tv_sec = t.tv_sec + ns / 1000000000;
|
||||
tv->tv_nsec = t.tv_nsec + ns % 1000000000;
|
||||
}
|
||||
|
||||
VkResult
|
||||
tu_device_wait_u_trace(struct tu_device *dev, struct tu_u_trace_syncobj *syncobj)
|
||||
{
|
||||
|
@ -804,27 +1053,19 @@ tu_queue_submit(struct vk_queue *vk_queue, struct vk_queue_submit *submit)
|
|||
for (uint32_t i = 0; i < submit->wait_count; i++) {
|
||||
struct vk_sync *sync = submit->waits[i].sync;
|
||||
|
||||
if (vk_sync_type_is_drm_syncobj(sync->type)) {
|
||||
struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
|
||||
|
||||
in_syncobjs[nr_in_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
|
||||
.handle = syncobj->syncobj,
|
||||
.flags = 0,
|
||||
};
|
||||
}
|
||||
in_syncobjs[nr_in_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
|
||||
.handle = tu_syncobj_from_vk_sync(sync),
|
||||
.flags = 0,
|
||||
};
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < submit->signal_count; i++) {
|
||||
struct vk_sync *sync = submit->signals[i].sync;
|
||||
|
||||
if (vk_sync_type_is_drm_syncobj(sync->type)) {
|
||||
struct vk_drm_syncobj *syncobj = vk_sync_as_drm_syncobj(sync);
|
||||
|
||||
out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
|
||||
.handle = syncobj->syncobj,
|
||||
.flags = 0,
|
||||
};
|
||||
}
|
||||
out_syncobjs[nr_out_syncobjs++] = (struct drm_msm_gem_submit_syncobj) {
|
||||
.handle = tu_syncobj_from_vk_sync(sync),
|
||||
.flags = 0,
|
||||
};
|
||||
}
|
||||
|
||||
ret = tu_queue_submit_locked(queue, submit_req);
|
||||
|
|
|
@ -102,6 +102,7 @@ typedef uint32_t xcb_window_t;
|
|||
#include "vk_fence.h"
|
||||
#include "vk_semaphore.h"
|
||||
#include "vk_drm_syncobj.h"
|
||||
#include "vk_sync_timeline.h"
|
||||
|
||||
#define MAX_VBS 32
|
||||
#define MAX_VERTEX_ATTRIBS 32
|
||||
|
@ -232,6 +233,7 @@ struct tu_physical_device
|
|||
struct tu_memory_heap heap;
|
||||
|
||||
struct vk_sync_type syncobj_type;
|
||||
struct vk_sync_timeline_type timeline_type;
|
||||
const struct vk_sync_type *sync_types[3];
|
||||
};
|
||||
|
||||
|
@ -312,6 +314,29 @@ struct tu_syncobj;
|
|||
#endif
|
||||
struct tu_u_trace_syncobj;
|
||||
|
||||
/* Define tu_timeline_sync type based on drm syncobj for a point type
|
||||
* for vk_sync_timeline, and the logic to handle is mostly copied from
|
||||
* anv_bo_sync since it seems it can be used by similar way to anv.
|
||||
*/
|
||||
enum tu_timeline_sync_state {
|
||||
/** Indicates that this is a new (or newly reset fence) */
|
||||
TU_TIMELINE_SYNC_STATE_RESET,
|
||||
|
||||
/** Indicates that this fence has been submitted to the GPU but is still
|
||||
* (as far as we know) in use by the GPU.
|
||||
*/
|
||||
TU_TIMELINE_SYNC_STATE_SUBMITTED,
|
||||
|
||||
TU_TIMELINE_SYNC_STATE_SIGNALED,
|
||||
};
|
||||
|
||||
struct tu_timeline_sync {
|
||||
struct vk_sync base;
|
||||
|
||||
enum tu_timeline_sync_state state;
|
||||
uint32_t syncobj;
|
||||
};
|
||||
|
||||
struct tu_queue
|
||||
{
|
||||
struct vk_queue vk;
|
||||
|
|
Loading…
Reference in New Issue