From cd5d01d25c8a55cb535edc2c64d6b22daf74d2fb Mon Sep 17 00:00:00 2001
From: Joshua Ashton <joshua@froggi.es>
Date: Mon, 15 Jun 2020 05:13:04 +0100
Subject: [PATCH] vkd3d: Refactor atomics

There is no stdatomic available on MSVC so let's clean things up.

This moves all the atomic helpers to vkd3d_atomic.h and implements all platform's spinlocks in entirely the same way.

Signed-off-by: Joshua Ashton <joshua@froggi.es>
---
 include/private/vkd3d_atomic.h   | 80 ++++++++++++++++++++++++++++++++
 include/private/vkd3d_common.h   | 36 --------------
 include/private/vkd3d_spinlock.h | 53 +++------------------
 libs/vkd3d/command.c             |  8 ++--
 4 files changed, 91 insertions(+), 86 deletions(-)
 create mode 100644 include/private/vkd3d_atomic.h

diff --git a/include/private/vkd3d_atomic.h b/include/private/vkd3d_atomic.h
new file mode 100644
index 00000000..51b9fb13
--- /dev/null
+++ b/include/private/vkd3d_atomic.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2020 Joshua Ashton for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_ATOMIC_H
+#define __VKD3D_ATOMIC_H
+
+#include <stdint.h>
+
+#if defined(_MSC_VER)
+
+# include <intrin.h>
+
+typedef enum
+{
+    memory_order_relaxed,
+    memory_order_acquire,
+    memory_order_release,
+    memory_order_acq_rel,
+} memory_order;
+
+# define vkd3d_atomic_rw_barrier() _ReadWriteBarrier()
+# define vkd3d_atomic_rw_barrier_acquire(order) { if (order == memory_order_acquire || order == memory_order_acq_rel) { vkd3d_atomic_rw_barrier(); } }
+# define vkd3d_atomic_rw_barrier_release(order) { if (order == memory_order_release || order == memory_order_acq_rel) { vkd3d_atomic_rw_barrier(); } }
+
+FORCEINLINE uint32_t vkd3d_uint32_atomic_load_explicit(uint32_t *target, memory_order order)
+{
+    uint32_t value = *((volatile uint32_t*)target);
+    vkd3d_atomic_rw_barrier_acquire(order);
+    return value;
+}
+
+FORCEINLINE void vkd3d_uint32_atomic_store_explicit(uint32_t *target, uint32_t value, memory_order order)
+{
+    vkd3d_atomic_rw_barrier_release(order);
+    *((volatile uint32_t*)target) = value;
+}
+
+FORCEINLINE uint32_t vkd3d_uint32_atomic_exchange_explicit(uint32_t *target, uint32_t value, memory_order order)
+{
+    vkd3d_atomic_rw_barrier_release(order);
+    uint32_t oldValue = InterlockedExchange((LONG*)target, value);
+    vkd3d_atomic_rw_barrier_acquire(order);
+    return oldValue;
+}
+
+#elif defined(__GNUC__) || defined(__clang__)
+
+# include <stdatomic.h>
+
+# define vkd3d_uint32_atomic_load_explicit(target, order)            atomic_load_explicit(target, order)
+# define vkd3d_uint32_atomic_store_explicit(target, value, order)    atomic_store_explicit(target, value, order)
+# define vkd3d_uint32_atomic_exchange_explicit(target, value, order) atomic_exchange_explicit(target, value, order)
+
+/* Unfortunately only fetch_add is in stdatomic
+ * so use the common GCC extensions for these. */
+# define InterlockedIncrement(target) __atomic_add_fetch(target, 1, memory_order_seq_cst)
+# define InterlockedDecrement(target) __atomic_sub_fetch(target, 1, memory_order_seq_cst)
+
+#else
+
+# error "No atomics for this platform"
+
+#endif
+
+#endif
\ No newline at end of file
diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h
index 57465d9c..ad5df093 100644
--- a/include/private/vkd3d_common.h
+++ b/include/private/vkd3d_common.h
@@ -174,42 +174,6 @@ static inline int ascii_strcasecmp(const char *a, const char *b)
     return c_a - c_b;
 }
 
-#ifndef _WIN32
-# if HAVE_SYNC_ADD_AND_FETCH
-static inline LONG InterlockedIncrement(LONG volatile *x)
-{
-    return __sync_add_and_fetch(x, 1);
-}
-# else
-#  error "InterlockedIncrement() not implemented for this platform"
-# endif  /* HAVE_SYNC_ADD_AND_FETCH */
-
-# if HAVE_SYNC_SUB_AND_FETCH
-static inline LONG InterlockedDecrement(LONG volatile *x)
-{
-    return __sync_sub_and_fetch(x, 1);
-}
-# else
-#  error "InterlockedDecrement() not implemented for this platform"
-# endif
-#endif  /* _WIN32 */
-
-#if HAVE_SYNC_ADD_AND_FETCH
-# define atomic_add_fetch(ptr, val) __sync_add_and_fetch(ptr, val)
-#elif defined(_MSC_VER)
-/* InterlockedAdd returns value after increment, like add_and_fetch. */
-# define atomic_add_fetch(ptr, val) InterlockedAdd(ptr, val)
-#else
-# error "atomic_add_fetch() not implemented for this platform"
-#endif  /* HAVE_SYNC_ADD_AND_FETCH */
-
-#ifdef HAVE_EXPLICIT_ATOMIC_LOADS
-/* Piggyback on stdatomic from spinlock.h */
-# define atomic_load_acquire(ptr) atomic_load_explicit(ptr, memory_order_acquire)
-#else
-# define atomic_load_acquire(ptr) atomic_add_fetch(ptr, 0)
-#endif
-
 static inline bool is_power_of_two(unsigned int x)
 {
     return x && !(x & (x -1));
diff --git a/include/private/vkd3d_spinlock.h b/include/private/vkd3d_spinlock.h
index 6b41880a..a69a4237 100644
--- a/include/private/vkd3d_spinlock.h
+++ b/include/private/vkd3d_spinlock.h
@@ -22,54 +22,15 @@
 
 #include <stdint.h>
 #include <stdbool.h>
+#include "vkd3d_atomic.h"
 
-#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
-#include <stdatomic.h>
-#define HAVE_EXPLICIT_ATOMIC_LOADS
+#define vkd3d_spinlock_try_lock(lock) \
+    (!vkd3d_uint32_atomic_load_explicit(lock, memory_order_relaxed) && \
+     !vkd3d_uint32_atomic_exchange_explicit(lock, 1u, memory_order_acquire))
 
-#elif defined(__GNUC__) && ((__GNUC__ * 1000 + __GNUC_MINOR__) >= 4007)
+#define vkd3d_spinlock_unlock(lock) vkd3d_uint32_atomic_store_explicit(lock, 0u, memory_order_release)
 
-/* Can use GCC's intrinsic versions of C11 atomics. */
-#define atomic_load_explicit(lock, order) __atomic_load_n(lock, order)
-#define atomic_store_explicit(lock, value, order) __atomic_store_n(lock, value, order)
-#define atomic_exchange_explicit(lock, value, order) __atomic_exchange_n(lock, value, order)
-#define memory_order_relaxed __ATOMIC_RELAXED
-#define memory_order_acquire __ATOMIC_ACQUIRE
-#define memory_order_release __ATOMIC_RELEASE
-#define HAVE_EXPLICIT_ATOMIC_LOADS
-
-#elif defined(__GNUC__)
-/* Legacy GCC intrinsics */
-#define atomic_try_lock(lock) !__sync_lock_test_and_set(lock, 1)
-#define atomic_unlock(lock) __sync_lock_release(lock)
-
-#elif defined(_MSC_VER)
-#include <intrin.h>
-#define atomic_try_lock(lock) !InterlockedExchange(lock, 1)
-/* There is no "unlock" equivalent on MSVC, but exchange without consuming the result
- * is effectively an unlock with correct memory semantics.
- * Compiler is be free to optimize this. */
-#define atomic_unlock(lock) InterlockedExchange(lock, 0)
-#else
-#error "No possible spinlock implementation for this platform."
-#endif
-
-/* Generic C11 implementations of try_lock and unlock. */
-#ifndef atomic_try_lock
-#define atomic_try_lock(lock) \
-    (!atomic_load_explicit(lock, memory_order_relaxed) && \
-     !atomic_exchange_explicit(lock, 1u, memory_order_acquire))
-#endif
-
-#ifndef atomic_unlock
-#define atomic_unlock(lock) atomic_store_explicit(lock, 0u, memory_order_release)
-#endif
-
-#ifdef _MSC_VER
-typedef LONG spinlock_t;
-#else
 typedef uint32_t spinlock_t;
-#endif
 
 static inline void spinlock_init(spinlock_t *lock)
 {
@@ -78,7 +39,7 @@ static inline void spinlock_init(spinlock_t *lock)
 
 static inline bool spinlock_try_acquire(spinlock_t *lock)
 {
-    return atomic_try_lock(lock);
+    return vkd3d_spinlock_try_lock(lock);
 }
 
 static inline void spinlock_acquire(spinlock_t *lock)
@@ -89,7 +50,7 @@ static inline void spinlock_acquire(spinlock_t *lock)
 
 static inline void spinlock_release(spinlock_t *lock)
 {
-    atomic_unlock(lock);
+    vkd3d_spinlock_unlock(lock);
 }
 
 #endif
diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c
index cc1c3f7b..b93b62b8 100644
--- a/libs/vkd3d/command.c
+++ b/libs/vkd3d/command.c
@@ -177,7 +177,7 @@ static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, s
     LONG count;
     int rc;
 
-    if (!(count = atomic_load_acquire(&fence->pending_worker_operation_count)))
+    if (!(count = vkd3d_uint32_atomic_load_explicit(&fence->pending_worker_operation_count, memory_order_acquire)))
         return;
 
     WARN("Waiting for %u pending fence operations (fence %p).\n", count, fence);
@@ -188,7 +188,7 @@ static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, s
         return;
     }
 
-    while ((count = atomic_load_acquire(&fence->pending_worker_operation_count)))
+    while ((count = vkd3d_uint32_atomic_load_explicit(&fence->pending_worker_operation_count, memory_order_acquire)))
     {
         TRACE("Still waiting for %u pending fence operations (fence %p).\n", count, fence);
 
@@ -313,7 +313,7 @@ static void *vkd3d_fence_worker_main(void *arg)
     {
         vkd3d_wait_for_gpu_timeline_semaphores(worker);
 
-        if (!worker->fence_count || atomic_load_acquire(&worker->enqueued_fence_count))
+        if (!worker->fence_count || vkd3d_uint32_atomic_load_explicit(&worker->enqueued_fence_count, memory_order_acquire))
         {
             if ((rc = pthread_mutex_lock(&worker->mutex)))
             {
@@ -1497,7 +1497,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_allocator_Reset(ID3D12CommandAllo
         TRACE("Resetting command list %p.\n", list);
     }
 
-    if ((pending = atomic_load_acquire(&allocator->outstanding_submissions_count)) != 0)
+    if ((pending = vkd3d_uint32_atomic_load_explicit(&allocator->outstanding_submissions_count, memory_order_acquire)) != 0)
     {
         /* HACK: There are currently command lists waiting to be submitted to the queue in the submission threads.
          * Buggy application, but work around this by not resetting the command pool this time.