From cd5d01d25c8a55cb535edc2c64d6b22daf74d2fb Mon Sep 17 00:00:00 2001 From: Joshua Ashton Date: Mon, 15 Jun 2020 05:13:04 +0100 Subject: [PATCH] vkd3d: Refactor atomics There is no stdatomic available on MSVC so let's clean things up. This moves all the atomic helpers to vkd3d_atomic.h and implements all platform's spinlocks in entirely the same way. Signed-off-by: Joshua Ashton --- include/private/vkd3d_atomic.h | 80 ++++++++++++++++++++++++++++++++ include/private/vkd3d_common.h | 36 -------------- include/private/vkd3d_spinlock.h | 53 +++------------------ libs/vkd3d/command.c | 8 ++-- 4 files changed, 91 insertions(+), 86 deletions(-) create mode 100644 include/private/vkd3d_atomic.h diff --git a/include/private/vkd3d_atomic.h b/include/private/vkd3d_atomic.h new file mode 100644 index 00000000..51b9fb13 --- /dev/null +++ b/include/private/vkd3d_atomic.h @@ -0,0 +1,80 @@ +/* + * Copyright 2020 Joshua Ashton for Valve Corporation + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_ATOMIC_H +#define __VKD3D_ATOMIC_H + +#include + +#if defined(_MSC_VER) + +# include + +typedef enum +{ + memory_order_relaxed, + memory_order_acquire, + memory_order_release, + memory_order_acq_rel, +} memory_order; + +# define vkd3d_atomic_rw_barrier() _ReadWriteBarrier() +# define vkd3d_atomic_rw_barrier_acquire(order) { if (order == memory_order_acquire || order == memory_order_acq_rel) { vkd3d_atomic_rw_barrier(); } } +# define vkd3d_atomic_rw_barrier_release(order) { if (order == memory_order_release || order == memory_order_acq_rel) { vkd3d_atomic_rw_barrier(); } } + +FORCEINLINE uint32_t vkd3d_uint32_atomic_load_explicit(uint32_t *target, memory_order order) +{ + uint32_t value = *((volatile uint32_t*)target); + vkd3d_atomic_rw_barrier_acquire(order); + return value; +} + +FORCEINLINE void vkd3d_uint32_atomic_store_explicit(uint32_t *target, uint32_t value, memory_order order) +{ + vkd3d_atomic_rw_barrier_release(order); + *((volatile uint32_t*)target) = value; +} + +FORCEINLINE uint32_t vkd3d_uint32_atomic_exchange_explicit(uint32_t *target, uint32_t value, memory_order order) +{ + vkd3d_atomic_rw_barrier_release(order); + uint32_t oldValue = InterlockedExchange((LONG*)target, value); + vkd3d_atomic_rw_barrier_acquire(order); + return oldValue; +} + +#elif defined(__GNUC__) || defined(__clang__) + +# include + +# define vkd3d_uint32_atomic_load_explicit(target, order) atomic_load_explicit(target, order) +# define vkd3d_uint32_atomic_store_explicit(target, value, order) atomic_store_explicit(target, value, order) +# define vkd3d_uint32_atomic_exchange_explicit(target, value, order) atomic_exchange_explicit(target, value, order) + +/* Unfortunately only fetch_add is in stdatomic + * so use the common GCC extensions for these. */ +# define InterlockedIncrement(target) __atomic_add_fetch(target, 1, memory_order_seq_cst) +# define InterlockedDecrement(target) __atomic_sub_fetch(target, 1, memory_order_seq_cst) + +#else + +# error "No atomics for this platform" + +#endif + +#endif \ No newline at end of file diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index 57465d9c..ad5df093 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -174,42 +174,6 @@ static inline int ascii_strcasecmp(const char *a, const char *b) return c_a - c_b; } -#ifndef _WIN32 -# if HAVE_SYNC_ADD_AND_FETCH -static inline LONG InterlockedIncrement(LONG volatile *x) -{ - return __sync_add_and_fetch(x, 1); -} -# else -# error "InterlockedIncrement() not implemented for this platform" -# endif /* HAVE_SYNC_ADD_AND_FETCH */ - -# if HAVE_SYNC_SUB_AND_FETCH -static inline LONG InterlockedDecrement(LONG volatile *x) -{ - return __sync_sub_and_fetch(x, 1); -} -# else -# error "InterlockedDecrement() not implemented for this platform" -# endif -#endif /* _WIN32 */ - -#if HAVE_SYNC_ADD_AND_FETCH -# define atomic_add_fetch(ptr, val) __sync_add_and_fetch(ptr, val) -#elif defined(_MSC_VER) -/* InterlockedAdd returns value after increment, like add_and_fetch. */ -# define atomic_add_fetch(ptr, val) InterlockedAdd(ptr, val) -#else -# error "atomic_add_fetch() not implemented for this platform" -#endif /* HAVE_SYNC_ADD_AND_FETCH */ - -#ifdef HAVE_EXPLICIT_ATOMIC_LOADS -/* Piggyback on stdatomic from spinlock.h */ -# define atomic_load_acquire(ptr) atomic_load_explicit(ptr, memory_order_acquire) -#else -# define atomic_load_acquire(ptr) atomic_add_fetch(ptr, 0) -#endif - static inline bool is_power_of_two(unsigned int x) { return x && !(x & (x -1)); diff --git a/include/private/vkd3d_spinlock.h b/include/private/vkd3d_spinlock.h index 6b41880a..a69a4237 100644 --- a/include/private/vkd3d_spinlock.h +++ b/include/private/vkd3d_spinlock.h @@ -22,54 +22,15 @@ #include #include +#include "vkd3d_atomic.h" -#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) -#include -#define HAVE_EXPLICIT_ATOMIC_LOADS +#define vkd3d_spinlock_try_lock(lock) \ + (!vkd3d_uint32_atomic_load_explicit(lock, memory_order_relaxed) && \ + !vkd3d_uint32_atomic_exchange_explicit(lock, 1u, memory_order_acquire)) -#elif defined(__GNUC__) && ((__GNUC__ * 1000 + __GNUC_MINOR__) >= 4007) +#define vkd3d_spinlock_unlock(lock) vkd3d_uint32_atomic_store_explicit(lock, 0u, memory_order_release) -/* Can use GCC's intrinsic versions of C11 atomics. */ -#define atomic_load_explicit(lock, order) __atomic_load_n(lock, order) -#define atomic_store_explicit(lock, value, order) __atomic_store_n(lock, value, order) -#define atomic_exchange_explicit(lock, value, order) __atomic_exchange_n(lock, value, order) -#define memory_order_relaxed __ATOMIC_RELAXED -#define memory_order_acquire __ATOMIC_ACQUIRE -#define memory_order_release __ATOMIC_RELEASE -#define HAVE_EXPLICIT_ATOMIC_LOADS - -#elif defined(__GNUC__) -/* Legacy GCC intrinsics */ -#define atomic_try_lock(lock) !__sync_lock_test_and_set(lock, 1) -#define atomic_unlock(lock) __sync_lock_release(lock) - -#elif defined(_MSC_VER) -#include -#define atomic_try_lock(lock) !InterlockedExchange(lock, 1) -/* There is no "unlock" equivalent on MSVC, but exchange without consuming the result - * is effectively an unlock with correct memory semantics. - * Compiler is be free to optimize this. */ -#define atomic_unlock(lock) InterlockedExchange(lock, 0) -#else -#error "No possible spinlock implementation for this platform." -#endif - -/* Generic C11 implementations of try_lock and unlock. */ -#ifndef atomic_try_lock -#define atomic_try_lock(lock) \ - (!atomic_load_explicit(lock, memory_order_relaxed) && \ - !atomic_exchange_explicit(lock, 1u, memory_order_acquire)) -#endif - -#ifndef atomic_unlock -#define atomic_unlock(lock) atomic_store_explicit(lock, 0u, memory_order_release) -#endif - -#ifdef _MSC_VER -typedef LONG spinlock_t; -#else typedef uint32_t spinlock_t; -#endif static inline void spinlock_init(spinlock_t *lock) { @@ -78,7 +39,7 @@ static inline void spinlock_init(spinlock_t *lock) static inline bool spinlock_try_acquire(spinlock_t *lock) { - return atomic_try_lock(lock); + return vkd3d_spinlock_try_lock(lock); } static inline void spinlock_acquire(spinlock_t *lock) @@ -89,7 +50,7 @@ static inline void spinlock_acquire(spinlock_t *lock) static inline void spinlock_release(spinlock_t *lock) { - atomic_unlock(lock); + vkd3d_spinlock_unlock(lock); } #endif diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index cc1c3f7b..b93b62b8 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -177,7 +177,7 @@ static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, s LONG count; int rc; - if (!(count = atomic_load_acquire(&fence->pending_worker_operation_count))) + if (!(count = vkd3d_uint32_atomic_load_explicit(&fence->pending_worker_operation_count, memory_order_acquire))) return; WARN("Waiting for %u pending fence operations (fence %p).\n", count, fence); @@ -188,7 +188,7 @@ static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, s return; } - while ((count = atomic_load_acquire(&fence->pending_worker_operation_count))) + while ((count = vkd3d_uint32_atomic_load_explicit(&fence->pending_worker_operation_count, memory_order_acquire))) { TRACE("Still waiting for %u pending fence operations (fence %p).\n", count, fence); @@ -313,7 +313,7 @@ static void *vkd3d_fence_worker_main(void *arg) { vkd3d_wait_for_gpu_timeline_semaphores(worker); - if (!worker->fence_count || atomic_load_acquire(&worker->enqueued_fence_count)) + if (!worker->fence_count || vkd3d_uint32_atomic_load_explicit(&worker->enqueued_fence_count, memory_order_acquire)) { if ((rc = pthread_mutex_lock(&worker->mutex))) { @@ -1497,7 +1497,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo TRACE("Resetting command list %p.\n", list); } - if ((pending = atomic_load_acquire(&allocator->outstanding_submissions_count)) != 0) + if ((pending = vkd3d_uint32_atomic_load_explicit(&allocator->outstanding_submissions_count, memory_order_acquire)) != 0) { /* HACK: There are currently command lists waiting to be submitted to the queue in the submission threads. * Buggy application, but work around this by not resetting the command pool this time.