vkd3d: Refactor atomics

There is no stdatomic available on MSVC so let's clean things up. This moves all the atomic helpers to vkd3d_atomic.h and implements all platform's spinlocks in entirely the same way. Signed-off-by: Joshua Ashton <joshua@froggi.es>
2020-06-15 05:13:04 +01:00 · 2020-06-15 05:13:04 +01:00 · cd5d01d25c
parent b9909854fe
commit cd5d01d25c
4 changed files with 91 additions and 86 deletions
--- a/include/private/vkd3d_atomic.h
+++ b/include/private/vkd3d_atomic.h
@ -0,0 +1,80 @@
+/*
+ * Copyright 2020 Joshua Ashton for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_ATOMIC_H
+#define __VKD3D_ATOMIC_H
+
+#include <stdint.h>
+
+#if defined(_MSC_VER)
+
+# include <intrin.h>
+
+typedef enum
+{
+    memory_order_relaxed,
+    memory_order_acquire,
+    memory_order_release,
+    memory_order_acq_rel,
+} memory_order;
+
+# define vkd3d_atomic_rw_barrier() _ReadWriteBarrier()
+# define vkd3d_atomic_rw_barrier_acquire(order) { if (order == memory_order_acquire || order == memory_order_acq_rel) { vkd3d_atomic_rw_barrier(); } }
+# define vkd3d_atomic_rw_barrier_release(order) { if (order == memory_order_release || order == memory_order_acq_rel) { vkd3d_atomic_rw_barrier(); } }
+
+FORCEINLINE uint32_t vkd3d_uint32_atomic_load_explicit(uint32_t *target, memory_order order)
+{
+    uint32_t value = *((volatile uint32_t*)target);
+    vkd3d_atomic_rw_barrier_acquire(order);
+    return value;
+}
+
+FORCEINLINE void vkd3d_uint32_atomic_store_explicit(uint32_t *target, uint32_t value, memory_order order)
+{
+    vkd3d_atomic_rw_barrier_release(order);
+    *((volatile uint32_t*)target) = value;
+}
+
+FORCEINLINE uint32_t vkd3d_uint32_atomic_exchange_explicit(uint32_t *target, uint32_t value, memory_order order)
+{
+    vkd3d_atomic_rw_barrier_release(order);
+    uint32_t oldValue = InterlockedExchange((LONG*)target, value);
+    vkd3d_atomic_rw_barrier_acquire(order);
+    return oldValue;
+}
+
+#elif defined(__GNUC__) || defined(__clang__)
+
+# include <stdatomic.h>
+
+# define vkd3d_uint32_atomic_load_explicit(target, order)            atomic_load_explicit(target, order)
+# define vkd3d_uint32_atomic_store_explicit(target, value, order)    atomic_store_explicit(target, value, order)
+# define vkd3d_uint32_atomic_exchange_explicit(target, value, order) atomic_exchange_explicit(target, value, order)
+
+/* Unfortunately only fetch_add is in stdatomic
+ * so use the common GCC extensions for these. */
+# define InterlockedIncrement(target) __atomic_add_fetch(target, 1, memory_order_seq_cst)
+# define InterlockedDecrement(target) __atomic_sub_fetch(target, 1, memory_order_seq_cst)
+
+#else
+
+# error "No atomics for this platform"
+
+#endif
+
+#endif
--- a/include/private/vkd3d_common.h
+++ b/include/private/vkd3d_common.h
@ -174,42 +174,6 @@ static inline int ascii_strcasecmp(const char *a, const char *b)
    return c_a - c_b;
 }

-#ifndef _WIN32
-# if HAVE_SYNC_ADD_AND_FETCH
-static inline LONG InterlockedIncrement(LONG volatile *x)
-{
-    return __sync_add_and_fetch(x, 1);
-}
-# else
-#  error "InterlockedIncrement() not implemented for this platform"
-# endif  /* HAVE_SYNC_ADD_AND_FETCH */
-
-# if HAVE_SYNC_SUB_AND_FETCH
-static inline LONG InterlockedDecrement(LONG volatile *x)
-{
-    return __sync_sub_and_fetch(x, 1);
-}
-# else
-#  error "InterlockedDecrement() not implemented for this platform"
-# endif
-#endif  /* _WIN32 */
-
-#if HAVE_SYNC_ADD_AND_FETCH
-# define atomic_add_fetch(ptr, val) __sync_add_and_fetch(ptr, val)
-#elif defined(_MSC_VER)
-/* InterlockedAdd returns value after increment, like add_and_fetch. */
-# define atomic_add_fetch(ptr, val) InterlockedAdd(ptr, val)
-#else
-# error "atomic_add_fetch() not implemented for this platform"
-#endif  /* HAVE_SYNC_ADD_AND_FETCH */
-
-#ifdef HAVE_EXPLICIT_ATOMIC_LOADS
-/* Piggyback on stdatomic from spinlock.h */
-# define atomic_load_acquire(ptr) atomic_load_explicit(ptr, memory_order_acquire)
-#else
-# define atomic_load_acquire(ptr) atomic_add_fetch(ptr, 0)
-#endif
-
 static inline bool is_power_of_two(unsigned int x)
 {
    return x && !(x & (x -1));
--- a/include/private/vkd3d_spinlock.h
+++ b/include/private/vkd3d_spinlock.h
@ -22,54 +22,15 @@

 #include <stdint.h>
 #include <stdbool.h>
+#include "vkd3d_atomic.h"

-#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
-#include <stdatomic.h>
-#define HAVE_EXPLICIT_ATOMIC_LOADS
+#define vkd3d_spinlock_try_lock(lock) \
+    (!vkd3d_uint32_atomic_load_explicit(lock, memory_order_relaxed) && \
+     !vkd3d_uint32_atomic_exchange_explicit(lock, 1u, memory_order_acquire))

-#elif defined(__GNUC__) && ((__GNUC__ * 1000 + __GNUC_MINOR__) >= 4007)
+#define vkd3d_spinlock_unlock(lock) vkd3d_uint32_atomic_store_explicit(lock, 0u, memory_order_release)

-/* Can use GCC's intrinsic versions of C11 atomics. */
-#define atomic_load_explicit(lock, order) __atomic_load_n(lock, order)
-#define atomic_store_explicit(lock, value, order) __atomic_store_n(lock, value, order)
-#define atomic_exchange_explicit(lock, value, order) __atomic_exchange_n(lock, value, order)
-#define memory_order_relaxed __ATOMIC_RELAXED
-#define memory_order_acquire __ATOMIC_ACQUIRE
-#define memory_order_release __ATOMIC_RELEASE
-#define HAVE_EXPLICIT_ATOMIC_LOADS
-
-#elif defined(__GNUC__)
-/* Legacy GCC intrinsics */
-#define atomic_try_lock(lock) !__sync_lock_test_and_set(lock, 1)
-#define atomic_unlock(lock) __sync_lock_release(lock)
-
-#elif defined(_MSC_VER)
-#include <intrin.h>
-#define atomic_try_lock(lock) !InterlockedExchange(lock, 1)
-/* There is no "unlock" equivalent on MSVC, but exchange without consuming the result
- * is effectively an unlock with correct memory semantics.
- * Compiler is be free to optimize this. */
-#define atomic_unlock(lock) InterlockedExchange(lock, 0)
-#else
-#error "No possible spinlock implementation for this platform."
-#endif
-
-/* Generic C11 implementations of try_lock and unlock. */
-#ifndef atomic_try_lock
-#define atomic_try_lock(lock) \
-    (!atomic_load_explicit(lock, memory_order_relaxed) && \
-     !atomic_exchange_explicit(lock, 1u, memory_order_acquire))
-#endif
-
-#ifndef atomic_unlock
-#define atomic_unlock(lock) atomic_store_explicit(lock, 0u, memory_order_release)
-#endif
-
-#ifdef _MSC_VER
-typedef LONG spinlock_t;
-#else
 typedef uint32_t spinlock_t;
-#endif

 static inline void spinlock_init(spinlock_t *lock)
 {
@ -78,7 +39,7 @@ static inline void spinlock_init(spinlock_t *lock)

 static inline bool spinlock_try_acquire(spinlock_t *lock)
 {
-    return atomic_try_lock(lock);
+    return vkd3d_spinlock_try_lock(lock);
 }

 static inline void spinlock_acquire(spinlock_t *lock)
@ -89,7 +50,7 @@ static inline void spinlock_acquire(spinlock_t *lock)

 static inline void spinlock_release(spinlock_t *lock)
 {
-    atomic_unlock(lock);
+    vkd3d_spinlock_unlock(lock);
 }

 #endif
--- a/libs/vkd3d/command.c
+++ b/libs/vkd3d/command.c
@ -177,7 +177,7 @@ static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, s
    LONG count;
    int rc;

-    if (!(count = atomic_load_acquire(&fence->pending_worker_operation_count)))
+    if (!(count = vkd3d_uint32_atomic_load_explicit(&fence->pending_worker_operation_count, memory_order_acquire)))
        return;

    WARN("Waiting for %u pending fence operations (fence %p).\n", count, fence);
@ -188,7 +188,7 @@ static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, s
        return;
    }

-    while ((count = atomic_load_acquire(&fence->pending_worker_operation_count)))
+    while ((count = vkd3d_uint32_atomic_load_explicit(&fence->pending_worker_operation_count, memory_order_acquire)))
    {
        TRACE("Still waiting for %u pending fence operations (fence %p).\n", count, fence);

@ -313,7 +313,7 @@ static void *vkd3d_fence_worker_main(void *arg)
    {
        vkd3d_wait_for_gpu_timeline_semaphores(worker);

-        if (!worker->fence_count || atomic_load_acquire(&worker->enqueued_fence_count))
+        if (!worker->fence_count || vkd3d_uint32_atomic_load_explicit(&worker->enqueued_fence_count, memory_order_acquire))
        {
            if ((rc = pthread_mutex_lock(&worker->mutex)))
            {
@ -1497,7 +1497,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo
        TRACE("Resetting command list %p.\n", list);
    }

-    if ((pending = atomic_load_acquire(&allocator->outstanding_submissions_count)) != 0)
+    if ((pending = vkd3d_uint32_atomic_load_explicit(&allocator->outstanding_submissions_count, memory_order_acquire)) != 0)
    {
        /* HACK: There are currently command lists waiting to be submitted to the queue in the submission threads.
         * Buggy application, but work around this by not resetting the command pool this time.