ci: Use arch-mingw-github-action v8

Fixes safe directory stuff giving invalid version info.
Revert "ci: Workaround safe directory errors in vkd3d_build generation."
2022-07-26 18:37:26 +00:00 · 2022-07-26 18:37:26 +00:00 · 2022-07-25 23:55:40 +02:00 · 2022-07-25 23:11:37 +02:00 · 2022-07-25 21:55:45 +02:00 · 2022-07-25 18:39:06 +02:00
208 changed files with 132381 additions and 46506 deletions
--- a/.github/workflows/artifacts.yml
+++ b/.github/workflows/artifacts.yml
@ -0,0 +1,31 @@
+name: Artifacts (Package)
+
+on: [push, pull_request, workflow_dispatch]
+
+jobs:
+  build-artifacts:
+    runs-on: ubuntu-20.04
+
+    steps:
+    - name: Checkout code
+      id: checkout-code
+      uses: actions/checkout@v2
+      with:
+        submodules: recursive
+
+    - name: Build release
+      id: build-release
+      uses: Joshua-Ashton/arch-mingw-github-action@v8
+      with:
+        command: |
+          export VERSION_NAME="${GITHUB_REF##*/}-${GITHUB_SHA##*/}"
+          ./package-release.sh ${VERSION_NAME} build --no-package
+          echo "VERSION_NAME=${VERSION_NAME}" >> $GITHUB_ENV
+
+    - name: Upload artifacts
+      id: upload-artifacts
+      uses: actions/upload-artifact@v2
+      with:
+        name: vkd3d-proton-${{ env.VERSION_NAME }}
+        path: build/vkd3d-proton-${{ env.VERSION_NAME }}
+        if-no-files-found: error
--- a/.github/workflows/test-build-linux.yml
+++ b/.github/workflows/test-build-linux.yml
@ -0,0 +1,75 @@
+name: Test Builds on Linux
+
+on: [push, pull_request, workflow_dispatch]
+
+jobs:
+  build-set-linux:
+    runs-on: ubuntu-20.04
+
+    steps:
+    - name: Checkout code
+      id: checkout-code
+      uses: actions/checkout@v2
+      with:
+        submodules: recursive
+
+    - name: Setup problem matcher
+      uses: Joshua-Ashton/gcc-problem-matcher@v1
+
+    - name: Build MinGW x86
+      id: build-mingw-x86
+      uses: Joshua-Ashton/arch-mingw-github-action@v8
+      with:
+        command: |
+          meson -Denable_tests=True -Denable_extras=True --cross-file=build-win32.txt --buildtype release build-mingw-x86
+          ninja -C build-mingw-x86
+
+    - name: Build MinGW x64
+      id: build-mingw-x64
+      uses: Joshua-Ashton/arch-mingw-github-action@v8
+      with:
+        command: |
+          meson -Denable_tests=True -Denable_extras=True --cross-file=build-win64.txt --buildtype release build-mingw-x64
+          ninja -C build-mingw-x64
+
+    - name: Build Native GCC x86
+      id: build-native-gcc-x86
+      uses: Joshua-Ashton/arch-mingw-github-action@v8
+      with:
+        command: |
+          export CC="gcc -m32"
+          export CXX="g++ -m32"
+          export PKG_CONFIG_PATH="/usr/lib32/pkgconfig:/usr/lib/i386-linux-gnu/pkgconfig:/usr/lib/pkgconfig"
+          meson -Denable_tests=True -Denable_extras=True --buildtype release build-native-gcc-x86
+          ninja -C build-native-gcc-x86
+
+    - name: Build Native GCC x64
+      id: build-native-gcc-x64
+      uses: Joshua-Ashton/arch-mingw-github-action@v8
+      with:
+        command: |
+          export CC="gcc"
+          export CXX="g++"
+          meson -Denable_tests=True -Denable_extras=True --buildtype release build-native-gcc-x64
+          ninja -C build-native-gcc-x64
+
+    - name: Build Native Clang x86
+      id: build-native-clang-x86
+      uses: Joshua-Ashton/arch-mingw-github-action@v8
+      with:
+        command: |
+          export CC="clang -m32"
+          export CXX="clang++ -m32"
+          export PKG_CONFIG_PATH="/usr/lib32/pkgconfig:/usr/lib/i386-linux-gnu/pkgconfig:/usr/lib/pkgconfig"
+          meson -Denable_tests=True -Denable_extras=True --buildtype release build-native-clang-x86
+          ninja -C build-native-clang-x86
+
+    - name: Build Native Clang x64
+      id: build-native-clang-x64
+      uses: Joshua-Ashton/arch-mingw-github-action@v8
+      with:
+        command: |
+          export CC="clang"
+          export CXX="clang++"
+          meson -Denable_tests=True -Denable_extras=True --buildtype release build-native-clang-x64
+          ninja -C build-native-clang-x64
--- a/.github/workflows/test-build-windows.yml
+++ b/.github/workflows/test-build-windows.yml
@ -0,0 +1,53 @@
+name: Test Builds on Windows
+
+on: [push, pull_request, workflow_dispatch]
+
+jobs:
+  build-set-windows:
+    runs-on: windows-2022
+
+    steps:
+    - name: Checkout code
+      id: checkout-code
+      uses: actions/checkout@v2
+      with:
+        submodules: recursive
+
+    - name: Setup widl and glslangValidator
+      shell: pwsh
+      run: |
+        choco install strawberryperl vulkan-sdk -y
+        Write-Output "C:\Strawberry\c\bin" | Out-File -FilePath "${Env:GITHUB_PATH}" -Append
+        Write-Output "$([System.Environment]::GetEnvironmentVariable('VULKAN_SDK', 'Machine'))\Bin" `
+          | Out-File -FilePath "${Env:GITHUB_PATH}" -Append
+
+    - name: Setup Meson
+      shell: pwsh
+      run: pip install meson
+
+    - name: Find Visual Studio
+      shell: pwsh
+      run: |
+        $installationPath = Get-VSSetupInstance `
+          | Select-VSSetupInstance -Require Microsoft.VisualStudio.Workload.NativeDesktop -Latest `
+          | Select-Object -ExpandProperty InstallationPath
+        Write-Output "VSDEVCMD=${installationPath}\Common7\Tools\VsDevCmd.bat" `
+          | Out-File -FilePath "${Env:GITHUB_ENV}" -Append
+
+    - name: Build MSVC x86
+      shell: pwsh
+      run: |
+        & "${Env:COMSPEC}" /s /c "`"${Env:VSDEVCMD}`" -arch=x86 -host_arch=x64 -no_logo && set" `
+          | % { , ($_ -Split '=', 2) } `
+          | % { [System.Environment]::SetEnvironmentVariable($_[0], $_[1]) }
+        meson -Denable_tests=True -Denable_extras=True --buildtype release --backend vs2022 build-msvc-x86
+        msbuild -m build-msvc-x86/vkd3d-proton.sln
+
+    - name: Build MSVC x64
+      shell: pwsh
+      run: |
+        & "${Env:COMSPEC}" /s /c "`"${Env:VSDEVCMD}`" -arch=x64 -host_arch=x64 -no_logo && set" `
+          | % { , ($_ -Split '=', 2) } `
+          | % { [System.Environment]::SetEnvironmentVariable($_[0], $_[1]) }
+        meson -Denable_tests=True -Denable_extras=True --buildtype release --backend vs2022 build-msvc-x64
+        msbuild -m build-msvc-x64/vkd3d-proton.sln
--- a/.gitignore
+++ b/.gitignore
@ -1,25 +1,5 @@
-aclocal.m4
-autom4te.cache
-config.log
-config.status
-configure
-libtool
-Makefile
-Makefile.in
-test-suite.log
-vkd3d-compiler
+build
+build.*
+vkd3d-proton-*.tar.zst
+vkd3d-proton-*/

-vkd3d-*.tar.xz
-
-*.exe
-*.la
-*.lo
-*.log
-*.o
-*.pc
-*.trs
-*~
-
-.deps
-.dirstamp
-.libs
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,9 @@
+[submodule "subprojects/dxil-spirv"]
+	path = subprojects/dxil-spirv
+	url = https://github.com/HansKristian-Work/dxil-spirv
+[submodule "subprojects/Vulkan-Headers"]
+	path = subprojects/Vulkan-Headers
+	url = https://github.com/KhronosGroup/Vulkan-Headers
+[submodule "subprojects/SPIRV-Headers"]
+	path = subprojects/SPIRV-Headers
+	url = https://github.com/KhronosGroup/SPIRV-Headers
--- a/.mailmap
+++ b/.mailmap
@ -0,0 +1,4 @@
+Conor McCarthy <cmccarthy@codeweavers.com>
+Ivan Fedorov <ifedorov@nvidia.com>
+James Beddek <telans@protonmail.com>
+Roshan Chaudhari <rochaudhari@nvidia.com>
--- a/85
+++ b/85
@ -1,85 +0,0 @@
-The Wine team is proud to announce that release 1.1 of vkd3d, the
-Direct3D 12 to Vulkan translation library, is now available.
-
-This release contains improvements that are listed in the release notes below.
-The main highlights are:
-
-  - Initial support for placed resources.
-  - MoltenVK support.
-  - Support for geometry shaders.
-
-The source is available from the following location:
-
-  https://dl.winehq.org/vkd3d/source/vkd3d-1.1.tar.xz
-
-The current source can also be pulled directly from the git repository:
-
-  https://source.winehq.org/git/vkd3d.git/
-
-Vkd3d is available thanks to the work of multiple people. See the file AUTHORS
-for the complete list.
-
----------------------------------------------------------------
-
-What's new in vkd3d 1.1
-=======================
-
-
-*** libvkd3d
-
- Initial support for memory heaps and placed resources.
-
- Improved support for resource views.
-
- ClearUnorderedAccessViewUint() is implemented for textures.
-
- Blend factor is implemented.
-
- Performance improvements.
-
- A new interface is available for enabling additional Vulkan instance
-  extensions.
-
- A new public function is available for mapping VkFormats to DXGI_FORMATs.
-
- Support for more DXGI formats.
-
- Various bug fixes.
-
-
-*** libvkd3d-shader
-
- Support for geometry shaders.
-
- Pretty printing is implemented for shader code extracted from DXBC.
-
- Clip and cull distances are supported.
-
- Support for more shader instructions:
-  - round_ne,
-  - sincos,
-  - ineg,
-  - continue,
-  - continuec,
-  - gather4_po,
-  - gather4_po_c,
-  - gather4_c.
-
- Texel offsets are supported.
-
- Various shader translation fixes.
-
-
-*** libvkd3d-utils
-
- Vulkan WSI extensions are detected at runtime.
-
-
-*** build
-
- Demos are not built by default.
-
- libxcb is now an optional dependency required only for demos.
-
- MoltenVK is supported.
-
--- a/30
+++ b/30
@ -1,4 +1,34 @@
+Alexander Gabello
+Alexandre Julliard
+Andrew Eikum
+Arkadiusz Hiler
+Biswapriyo Nath
 Chip Davis
+Conor McCarthy
+Danylo Piliaiev
+David Gow
+David McCloskey
+Derek Lesho
+Fabian Bornschein
+Georg Lehmann
+Hans-Kristian Arntzen
 Henri Verbeet
+Ivan Fedorov
+Jactry Zeng
+James Beddek
+Jens Peters
+Joshua Ashton
 Józef Kucia
+Juuso Alasuutari
+Krzysztof Bogacki
+Paul Gofman
+Philip Rebohle
+Rémi Bernon
+Robin Kertels
+Rodrigo Locatti
+Roshan Chaudhari
+Samuel Pitoiset
+Sveinar Søpler
 Sven Hesse
+Thomas Crider
+Zhiyi Zhang
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,388 @@
+# Change Log
+
+## 2.6
+
+It has been a long while since 2.5, and this release rolls up a lot of fixes, features and optimizations.
+
+### Fixes
+
+- Fix black screen rendering bug in Horizon Zero Dawn after latest game updates.
+- Fix crashes on startup in Final Fantasy VII: Remake and Warframe.
+- Fix crashes in Guardians of the Galaxy when interacting with certain game objects.
+- Fix hang on game shutdown in Elden Ring.
+- Fix broken geometry rendering in Age of Empires: IV.
+
+### Optimization
+
+- Improve generated shader code for vectorized load-store operations in DXIL.
+- Greatly reduce CPU overhead for descriptor copy operations,
+  which is a key contributor to CPU overhead in D3D12.
+
+### Features
+
+#### Pipeline library rewrite
+
+Support D3D12 pipeline libraries better where we can now also cache
+generated SPIR-V from DXBC/DXIL.
+Massively reduces subsequent load times in Monster Hunter: Rise,
+and helps other titles like Guardian of the Galaxy and Elden Ring.
+Also lays the groundwork for internal driver caches down the line for games which do not use this API.
+Also, deduplicates binary blobs for reduced disk size requirements.
+
+#### Shader models
+
+Shader model 6.6 is now fully implemented. This includes support for:
+- ResourceDescriptorHeap[] direct access
+- 64-bit atomics
+- IsHelperLane()
+- Compute shader derivatives
+- WaveSize attribute
+- Packed math intrinsics
+
+#### Minor features
+
+- Handle API feature MinResourceLODClamp correctly if `VK_EXT_image_view_min_lod` is supported.
+- Expose CastFullyTypedFormat feature.
+- Expose some advanced shader features on Intel related to UAV formats (`VK_KHR_format_feature_flags2`).
+- Support COLOR -> STENCIL copies.
+
+### Workarounds
+
+- Workaround DEATHLOOP not emitting synchronization commands correctly. Fixes menu flicker on RADV.
+- Workaround quirky API usage in Elden Ring. Removes many kinds of stutter and chug when traversing the scenery.
+- Workaround certain environments failing to create Vulkan device if some `VK_NVX_*` extensions are enabled.
+- Workaround glitched foliage rendering in Horizon Zero Dawn after latest game updates.
+- Workaround some questionable UE4 shaders causing glitched rendering on RADV.
+
+### Note on future Vulkan driver requirements
+
+2.6 is expected to be the last vkd3d-proton release before we require some newer Vulkan extensions.
+`VK_KHR_dynamic_rendering` and `VK_EXT_extended_dynamic_state`
+(and likely `dynamic_state_2` as well) will be required.
+
+`VK_KHR_dynamic_rendering` in particular requires up-to-date drivers and the legacy render pass path
+will be abandoned in favor of it. Supporting both paths at the same time is not practical.
+Moving to `VK_KHR_dynamic_rendering` allows us to fix some critical flaws with the legacy API
+which caused potential shader compilation stutters and extra CPU overhead.
+
+## 2.5
+
+This is a release with a little bit of everything!
+
+### Features
+
+#### DXR progress
+
+DXR has seen significant work in the background.
+
+- DXR 1.1 is now experimentally exposed. It can be enabled with `VKD3D_CONFIG=dxr11`.
+  Note that DXR 1.1 cannot be fully implemented in `VK_KHR_ray_tracing`'s current form, in particular
+  DispatchRays() indirect is not compatible yet,
+  although we have not observed a game which requires this API feature.
+- DXR 1.1 inline raytracing support is fully implemented.
+- DXR 1.0 support is more or less feature complete.
+  Some weird edge cases remain, but will likely not be implemented unless required by a game.
+  `VKD3D_CONFIG=dxr` will eventually be dropped when it matures.
+
+Some new DXR games are starting to come alive, especially with DXR 1.1 enabled,
+but there are significant bugs as well that we currently cannot easily debug.
+Some experimental results on NVIDIA:
+
+- **Control** - already worked
+- **DEATHLOOP** - appears to work correctly
+- **Cyberpunk 2077** - DXR can be enabled, but GPU timeouts
+- **World of Warcraft** - according to a user, it works, but we have not confirmed ourselves
+- **Metro Exodus: Enhanced Edition** -
+    gets ingame and appears to work? Not sure if it looks correct.
+    Heavy CPU stutter for some reason ...
+- **Metro Exodus** (original release) - GPU timeouts when enabling DXR
+- **Resident Evil: Village** - Appears to work, but the visual difference is subtle.
+
+It's worth experimenting with these and others.
+DXR is incredibly complicated, so expect bugs.
+From here, DXR support is mostly a case of stamping out issues one by one.
+
+#### NVIDIA DLSS
+
+NVIDIA contributed integration APIs in vkd3d-proton which enables DLSS support in D3D12 titles in Proton.
+See Proton documentation for how to enable NvAPI support.
+
+#### Shader models
+
+A fair bit of work went into DXIL translation support to catch up with native drivers.
+
+- Shader model 6.5 is exposed.
+  Shader model 6.6 should be straight forward once that becomes relevant.
+- Shader model 6.4 implementation takes advantage of `VK_KHR_shader_integer_dot_product` when supported.
+- Proper fallback for FP16 math on GPUs which do not expose native FP16 support (Polaris, Pascal).
+  Notably fixes AMD FSR shaders in Resident Evil: Village (and others).
+- Shader model 6.1 SV_Barycentric support implemented (NVIDIA only for now).
+- Support shader model 6.2 FP32 denorm control.
+
+### Performance
+
+Resizable BAR can improve GPU performance about 10-15% in the best case, depends a lot on the game.
+Horizon Zero Dawn and Death Stranding in particular improve massively with this change.
+
+By default, vkd3d-proton will now take advantage of PCI-e BAR memory types through heuristics
+as D3D12 does not expose direct support for resizable BAR, and native D3D12 drivers are known to use heuristics as well.
+Without resizable BAR enabled in BIOS/vBIOS, we only get 256 MiB which can help performance,
+but many games will improve performance even more
+when we are allowed to use more than that.
+There is an upper limit for how much VRAM is dedicated to this purpose.
+We also added `VKD3D_CONFIG=no_upload_hvv` to disable all uses of PCI-e BAR memory.
+
+Other performance improvements:
+
+- Avoid redundant descriptor update work in certain scenarios (NVIDIA contribution).
+- Minor tweaks here and there to reduce CPU overhead.
+
+### Fixes and workarounds
+
+- Fix behavior for swap chain presentation latency HANDLE. Fixes spurious deadlocks in some cases.
+- Fix many issues related to depth-stencil handling, which fixed various issues in DEATHLOOP, F1 2021, WRC 10.
+- Fix DIRT 5 rendering issues and crashes. Should be fully playable now.
+- Fix some Diablo II Resurrected rendering issues.
+- Workaround shader bugs in Psychonauts 2.
+- Workaround some Unreal Engine 4 shader bugs which multiple titles trigger.
+- Fix some stability issues when VRAM is exhausted on NVIDIA.
+- Fix CPU crash in boot-up sequence of Far Cry 6 (game is still kinda buggy though, but gets in-game).
+- Fix various bugs with host visible images. Fixes DEATHLOOP.
+- Fix various DXIL conversion bugs.
+- Add Invariant geometry workarounds for specific games which require it.
+- Fix how d3d12.dll exports symbols to be more in line with MSVC.
+- Fix some edge cases in bitfield instructions.
+- Work around extreme CPU memory bloat on the specific NVIDIA driver versions which had this bug.
+- Fix regression in Evil Genius 2: World Domination.
+- Fix crashes in Hitman 3.
+- Fix terrain rendering in Anno 1800.
+- Various correctness and crash fixes.
+
+## 2.4
+
+This is a release which focuses on performance and bug-fixes.
+
+### Performance
+
+- Improve swapchain latency and frame pacing by up to one frame.
+- Optimize lookup of format info.
+- Avoid potential pipeline compilation stutter in certain scenarios.
+- Rewrite how we handle image layouts for color and depth-stencil targets.
+  Allows us to remove a lot of dumb
+  barriers giving significant GPU-bound performance improvements.
+  ~15%-20% GPU bound uplift in Horizon Zero Dawn,
+  ~10% in Death Stranding,
+  and 5%-10% improvements in many other titles.
+
+### Features
+
+- Enable support for sparse 3D textures (tiled resources tier 3).
+
+### Bug fixes and workarounds
+
+- Various bug fixes in DXIL.
+- Fix weird bug where sun would pop through walls in RE: Village.
+- Workaround game bug in Cyberpunk 2077 where certain locales would render a black screen.
+- Fix various bugs (in benchmark and in vkd3d-proton) allowing GravityMark to run.
+- Improve robustness against certain app bugs related to NULL descriptors.
+- Fix bug with constant FP64 vector handling in DXBC.
+- Fix bug where Cyberpunk 2077 inventory screen could spuriously hang GPU on RADV.
+- Add workaround for Necromunda: Hired Gun where character models would render random garbage on RADV.
+- Fix bug in Necromunda: Hired Gun causing random screen flicker.
+- Fix windowed mode tracking when leaving fullscreen. Fix Alt-Tab handling in Horizon Zero Dawn.
+- Temporary workaround for SRV ResourceMinLODClamp. Fix black ground rendering in DIRT 5.
+  The overbright HDR rendering in DIRT 5 sadly persists however :(
+- Implement fallback maximum swapchain latency correctly.
+
+### Development features
+
+Various features which are useful for developers were added to aid debugging.
+
+- Descriptor QA can instrument shaders in runtime for GPU-assisted validation.
+  Performance is good enough (> 40 FPS) that games are actually playable in this mode.
+  See README for details.
+- Allow forcing off CONCURRENT queue, and using EXCLUSIVE queue.
+  Not valid, but can be useful as a speed hack on Polaris when `single_queue` is not an option
+  and for testing driver behavior differences.
+
+## 2.3.1
+
+This is a minor bugfix release to address some issues solved shortly after the last release.
+
+### Fixes
+
+- Improved support for older Wine and Vulkan Loader versions.
+- Fix blocky shadows in Horizon Zero Dawn.
+- Fix the install script failing on Wine installs not built with upstream vkd3d.
+- Fix minor dxil translation issues.
+
+## 2.3
+
+This release adds support for more D3D12 features and greatly improves GPU bound performance
+in many scenarios.
+
+### Features
+
+#### Early DXR 1.0 support
+
+`VK_KHR_raytracing` is used to enable cross-vendor ray-tracing support.
+The implementation is WIP, but it is good enough to run some real content.
+
+As of writing, only the NVIDIA driver works correctly.
+It is expected AMD RDNA2 GPUs will work when working drivers are available
+(amdgpu-pro 21.10 is known to not work).
+
+Games which are expected to work include:
+- Control (appears to be fully working)
+- Ghostrunner (seems to work, not exhaustively tested)
+
+To enable DXR support, `VKD3D_CONFIG=dxr %command%` should be used when launching game.
+Certain games may be unstable if DXR is enabled by default.
+
+#### Conservative rasterization
+
+Full support (tier 3) for conservative rasterization was added.
+
+#### Variable rate shading
+
+Full support (tier 2) for variable rate shading was added.
+
+#### Command list bundles
+
+Allows Kingdom Hearts remaster to get past the errors, unsure if game fully works yet.
+
+#### Write Watch and APITrace
+
+Support for `D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH` has been added.
+This means [APITraces](https://github.com/Joshua-Ashton/apitrace/releases) of titles can now be captured.
+
+### Performance
+
+- Improve GPU bound performance in RE2 by up to 20% on NVIDIA.
+- Enable async compute queues. Greatly improves GPU performance and frame pacing in many titles.
+  Horizon Zero Dawn and Death Stranding see exceptional gains with this fix,
+  due to how the engines work. GPU utilization should now reach ~100%.
+  For best results, AMD Navi+ GPUs are recommended, but Polaris and earlier still
+  see great results. It is possible to disable this path, if for whatever reason
+  multiple queues are causing issues. See README.
+- Optimize bindless constant buffer GPU-bound performance on NVIDIA if certain API code paths are used.
+- Optimize sparse binding CPU overhead.
+- `TRACE` logging calls are disabled by default on release builds.
+
+### Fixes and workarounds
+
+- Fix various DXIL bugs.
+- Be more robust against broken pipeline creation API calls.
+  Avoids driver crashes in Forza Horizon 4.
+- Workaround some buggy shaders in F1 2020.
+- Fix bugs if depth bounds test is used in certain ways.
+- Fix a read out-of-bounds in `UpdateTileMappings`.
+- Fix `SV_ClipDistance` and `SV_CullDistance` in Hull Shaders.
+
+## 2.2
+
+This release is mostly a maintenance release which fixes bugs and regressions.
+It also unblocks significant future feature development.
+
+### Workaround removals
+
+- Replace old `force_bindless_texel_buffer` workaround with
+  a more correct and performant implementation.
+  Death Stranding and Cyberpunk 2077 (and probably other games as well) do the right thing by default without the hack now.
+- Remove old workaround `disable_query_optimization` for occlusion queries which was enabled for AC: Valhalla,
+  and is now replaced by a correct and efficient implementation.
+
+#### Cyberpunk 2077 status
+From recent testing on our end, it is unknown at this time if `VK_VALVE_mutable_descriptor_type` is still required for
+Cyberpunk 2077. Manual testing hasn't been able to trigger a GPU hang.
+The memory allocation rewrite in 2.2 can plausibly work around some of the bugs that `VK_VALVE_mutable_descriptor_type` fixed by accident.
+The bugs in question could also have been fixed since release day, but we cannot prove this since the bug is completely random in nature.
+
+### Regression fixes
+
+- Fix regression in Horizon Zero Dawn for screen space reflections on water surfaces.
+
+### Stability fixes
+
+- Greatly improve stability on Polaris or older cards for certain titles.
+  Crashes which used to happen in Horizon Zero Dawn and Death Stranding seem to have disappeared
+  after the memory allocation rewrite.
+  GPU memory usage should decrease on these cards as well.
+- DIRT 5 can get in-game now due to DXIL fixes, but is not yet playable.
+
+### New features
+
+- Add support for Variable Rate Shading tier 1.
+
+### Future development
+
+DXR is not yet supported, but has seen a fair bit of background work.
+
+- Basic DXR pipelines can be created successfully.
+- Memory allocation rewrite in 2.2 unblocks further DXR development.
+
+## 2.1
+
+This release fixes various bugs (mostly workarounds) and improves GPU-bound performance.
+
+New games added to "expected to work" list:
+ - The Division (was working already in 2.0, but missing from list)
+ - AC: Valhalla (*)
+
+(*): Game requires full D3D12 sparse texture support to work.
+Currently only works on NVIDIA drivers.
+RADV status remains unknown until support for this feature lands in Mesa.
+
+New games added to "kinda works, but expect a lot of jank" list:
+ - Cyberpunk 2077 (**)
+
+(**): Currently only runs correctly on AMD hardware with RADV and `VK_VALVE_mutable_descriptor_type`.
+As of game version 1.03, this requires the latest Mesa Git build.
+The game has some fatal bugs where it relies on undefined behavior with descriptor management
+which this extension works around by accident.
+The game will start and run on NVIDIA, but just like what happens without the extension on AMD,
+the GPU will randomly hang, making the game effectively unplayable.
+A game update to fix this bug would likely make the game playable on NVIDIA as well.
+Game version 1.04 changed some behavior, and support for this game will likely fluctuate over time as future patches come in.
+
+Bug fixes and workarounds:
+ - Fix various implementation bugs which caused AC: Valhalla to not work.
+ - Work around game bug in Death Stranding where accessing map could cause corrupt rendering.
+   (Several games appear to have the same kind of application bug.)
+ - Fix corrupt textures in Horizon Zero Dawn benchmark.
+ - Fix SM 6.0 wave-op detection for Horizon Zero Dawn and DIRT 5.
+ - Work around GPU hangs in certain situations where games do not use D3D12 correctly,
+   but native D3D12 drivers just render wrong results rather than hang the system.
+ - Fix invalid SPIR-V generated by FP64 code.
+ - Fix crash with minimized windows in certain cases.
+
+Performance:
+ - ~15% GPU-bound uplift in Ghostrunner. Might help UE4 titles in general.
+ - Slightly improve GPU bound performance when fully GPU bound on both AMD and NVIDIA.
+ - Slightly improve GPU bound performance on RADV in various titles.
+ - Reduce multi-threaded CPU overhead for certain D3D12 API usage patterns.
+ - Add support for `VK_VALVE_mutable_descriptor_type` which
+   improves CPU overhead, memory bloat, and avoids potential memory management thrashing on RADV.
+   Also avoids GPU hangs in certain situations where games misuse the D3D12 API.
+
+Misc:
+ - Implement `DXGI_PRESENT_TEST`.
+ - Fix log spam when `DXGI_PRESENT_ALLOW_TEARING` is used.
+
+## 2.0
+
+This initial release supports D3D12 Feature Level 12.0 and Shader Model 6.0 (DXIL).
+
+Games expected to work include:
+
+ - Control
+ - Death Stranding
+ - Devil May Cry 5
+ - Ghostrunner
+ - Horizon Zero Dawn
+ - Metro Exodus
+ - Monster Hunter World
+ - Resident Evil 2 / 3
+
+Please refer to the README for supported driver versions.
+
--- a/4
+++ b/4
@ -1,7 +1,7 @@
-Copyright 2016-2019 the Vkd3d project authors (see the file AUTHORS for a
+Copyright 2016-2022 the vkd3d-proton project authors (see the file AUTHORS for a
 complete list)

-Vkd3d is free software; you can redistribute it and/or modify it under
+vkd3d-proton is free software; you can redistribute it and/or modify it under
 the terms of the GNU Lesser General Public License as published by the
 Free Software Foundation; either version 2.1 of the License, or (at
 your option) any later version.
--- a/370
+++ b/370
@ -1,370 +0,0 @@
-Installation Instructions
-*************************
-
-Copyright (C) 1994-1996, 1999-2002, 2004-2013 Free Software Foundation,
-Inc.
-
-   Copying and distribution of this file, with or without modification,
-are permitted in any medium without royalty provided the copyright
-notice and this notice are preserved.  This file is offered as-is,
-without warranty of any kind.
-
-Basic Installation
-==================
-
-   Briefly, the shell command `./configure && make && make install'
-should configure, build, and install this package.  The following
-more-detailed instructions are generic; see the `README' file for
-instructions specific to this package.  Some packages provide this
-`INSTALL' file but do not implement all of the features documented
-below.  The lack of an optional feature in a given package is not
-necessarily a bug.  More recommendations for GNU packages can be found
-in *note Makefile Conventions: (standards)Makefile Conventions.
-
-   The `configure' shell script attempts to guess correct values for
-various system-dependent variables used during compilation.  It uses
-those values to create a `Makefile' in each directory of the package.
-It may also create one or more `.h' files containing system-dependent
-definitions.  Finally, it creates a shell script `config.status' that
-you can run in the future to recreate the current configuration, and a
-file `config.log' containing compiler output (useful mainly for
-debugging `configure').
-
-   It can also use an optional file (typically called `config.cache'
-and enabled with `--cache-file=config.cache' or simply `-C') that saves
-the results of its tests to speed up reconfiguring.  Caching is
-disabled by default to prevent problems with accidental use of stale
-cache files.
-
-   If you need to do unusual things to compile the package, please try
-to figure out how `configure' could check whether to do them, and mail
-diffs or instructions to the address given in the `README' so they can
-be considered for the next release.  If you are using the cache, and at
-some point `config.cache' contains results you don't want to keep, you
-may remove or edit it.
-
-   The file `configure.ac' (or `configure.in') is used to create
-`configure' by a program called `autoconf'.  You need `configure.ac' if
-you want to change it or regenerate `configure' using a newer version
-of `autoconf'.
-
-   The simplest way to compile this package is:
-
-  1. `cd' to the directory containing the package's source code and type
-     `./configure' to configure the package for your system.
-
-     Running `configure' might take a while.  While running, it prints
-     some messages telling which features it is checking for.
-
-  2. Type `make' to compile the package.
-
-  3. Optionally, type `make check' to run any self-tests that come with
-     the package, generally using the just-built uninstalled binaries.
-
-  4. Type `make install' to install the programs and any data files and
-     documentation.  When installing into a prefix owned by root, it is
-     recommended that the package be configured and built as a regular
-     user, and only the `make install' phase executed with root
-     privileges.
-
-  5. Optionally, type `make installcheck' to repeat any self-tests, but
-     this time using the binaries in their final installed location.
-     This target does not install anything.  Running this target as a
-     regular user, particularly if the prior `make install' required
-     root privileges, verifies that the installation completed
-     correctly.
-
-  6. You can remove the program binaries and object files from the
-     source code directory by typing `make clean'.  To also remove the
-     files that `configure' created (so you can compile the package for
-     a different kind of computer), type `make distclean'.  There is
-     also a `make maintainer-clean' target, but that is intended mainly
-     for the package's developers.  If you use it, you may have to get
-     all sorts of other programs in order to regenerate files that came
-     with the distribution.
-
-  7. Often, you can also type `make uninstall' to remove the installed
-     files again.  In practice, not all packages have tested that
-     uninstallation works correctly, even though it is required by the
-     GNU Coding Standards.
-
-  8. Some packages, particularly those that use Automake, provide `make
-     distcheck', which can by used by developers to test that all other
-     targets like `make install' and `make uninstall' work correctly.
-     This target is generally not run by end users.
-
-Compilers and Options
-=====================
-
-   Some systems require unusual options for compilation or linking that
-the `configure' script does not know about.  Run `./configure --help'
-for details on some of the pertinent environment variables.
-
-   You can give `configure' initial values for configuration parameters
-by setting variables in the command line or in the environment.  Here
-is an example:
-
-     ./configure CC=c99 CFLAGS=-g LIBS=-lposix
-
-   *Note Defining Variables::, for more details.
-
-Compiling For Multiple Architectures
-====================================
-
-   You can compile the package for more than one kind of computer at the
-same time, by placing the object files for each architecture in their
-own directory.  To do this, you can use GNU `make'.  `cd' to the
-directory where you want the object files and executables to go and run
-the `configure' script.  `configure' automatically checks for the
-source code in the directory that `configure' is in and in `..'.  This
-is known as a "VPATH" build.
-
-   With a non-GNU `make', it is safer to compile the package for one
-architecture at a time in the source code directory.  After you have
-installed the package for one architecture, use `make distclean' before
-reconfiguring for another architecture.
-
-   On MacOS X 10.5 and later systems, you can create libraries and
-executables that work on multiple system types--known as "fat" or
-"universal" binaries--by specifying multiple `-arch' options to the
-compiler but only a single `-arch' option to the preprocessor.  Like
-this:
-
-     ./configure CC="gcc -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
-                 CXX="g++ -arch i386 -arch x86_64 -arch ppc -arch ppc64" \
-                 CPP="gcc -E" CXXCPP="g++ -E"
-
-   This is not guaranteed to produce working output in all cases, you
-may have to build one architecture at a time and combine the results
-using the `lipo' tool if you have problems.
-
-Installation Names
-==================
-
-   By default, `make install' installs the package's commands under
-`/usr/local/bin', include files under `/usr/local/include', etc.  You
-can specify an installation prefix other than `/usr/local' by giving
-`configure' the option `--prefix=PREFIX', where PREFIX must be an
-absolute file name.
-
-   You can specify separate installation prefixes for
-architecture-specific files and architecture-independent files.  If you
-pass the option `--exec-prefix=PREFIX' to `configure', the package uses
-PREFIX as the prefix for installing programs and libraries.
-Documentation and other data files still use the regular prefix.
-
-   In addition, if you use an unusual directory layout you can give
-options like `--bindir=DIR' to specify different values for particular
-kinds of files.  Run `configure --help' for a list of the directories
-you can set and what kinds of files go in them.  In general, the
-default for these options is expressed in terms of `${prefix}', so that
-specifying just `--prefix' will affect all of the other directory
-specifications that were not explicitly provided.
-
-   The most portable way to affect installation locations is to pass the
-correct locations to `configure'; however, many packages provide one or
-both of the following shortcuts of passing variable assignments to the
-`make install' command line to change installation locations without
-having to reconfigure or recompile.
-
-   The first method involves providing an override variable for each
-affected directory.  For example, `make install
-prefix=/alternate/directory' will choose an alternate location for all
-directory configuration variables that were expressed in terms of
-`${prefix}'.  Any directories that were specified during `configure',
-but not in terms of `${prefix}', must each be overridden at install
-time for the entire installation to be relocated.  The approach of
-makefile variable overrides for each directory variable is required by
-the GNU Coding Standards, and ideally causes no recompilation.
-However, some platforms have known limitations with the semantics of
-shared libraries that end up requiring recompilation when using this
-method, particularly noticeable in packages that use GNU Libtool.
-
-   The second method involves providing the `DESTDIR' variable.  For
-example, `make install DESTDIR=/alternate/directory' will prepend
-`/alternate/directory' before all installation names.  The approach of
-`DESTDIR' overrides is not required by the GNU Coding Standards, and
-does not work on platforms that have drive letters.  On the other hand,
-it does better at avoiding recompilation issues, and works well even
-when some directory options were not specified in terms of `${prefix}'
-at `configure' time.
-
-Optional Features
-=================
-
-   If the package supports it, you can cause programs to be installed
-with an extra prefix or suffix on their names by giving `configure' the
-option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
-
-   Some packages pay attention to `--enable-FEATURE' options to
-`configure', where FEATURE indicates an optional part of the package.
-They may also pay attention to `--with-PACKAGE' options, where PACKAGE
-is something like `gnu-as' or `x' (for the X Window System).  The
-`README' should mention any `--enable-' and `--with-' options that the
-package recognizes.
-
-   For packages that use the X Window System, `configure' can usually
-find the X include and library files automatically, but if it doesn't,
-you can use the `configure' options `--x-includes=DIR' and
-`--x-libraries=DIR' to specify their locations.
-
-   Some packages offer the ability to configure how verbose the
-execution of `make' will be.  For these packages, running `./configure
--enable-silent-rules' sets the default to minimal output, which can be
-overridden with `make V=1'; while running `./configure
--disable-silent-rules' sets the default to verbose, which can be
-overridden with `make V=0'.
-
-Particular systems
-==================
-
-   On HP-UX, the default C compiler is not ANSI C compatible.  If GNU
-CC is not installed, it is recommended to use the following options in
-order to use an ANSI C compiler:
-
-     ./configure CC="cc -Ae -D_XOPEN_SOURCE=500"
-
-and if that doesn't work, install pre-built binaries of GCC for HP-UX.
-
-   HP-UX `make' updates targets which have the same time stamps as
-their prerequisites, which makes it generally unusable when shipped
-generated files such as `configure' are involved.  Use GNU `make'
-instead.
-
-   On OSF/1 a.k.a. Tru64, some versions of the default C compiler cannot
-parse its `<wchar.h>' header file.  The option `-nodtk' can be used as
-a workaround.  If GNU CC is not installed, it is therefore recommended
-to try
-
-     ./configure CC="cc"
-
-and if that doesn't work, try
-
-     ./configure CC="cc -nodtk"
-
-   On Solaris, don't put `/usr/ucb' early in your `PATH'.  This
-directory contains several dysfunctional programs; working variants of
-these programs are available in `/usr/bin'.  So, if you need `/usr/ucb'
-in your `PATH', put it _after_ `/usr/bin'.
-
-   On Haiku, software installed for all users goes in `/boot/common',
-not `/usr/local'.  It is recommended to use the following options:
-
-     ./configure --prefix=/boot/common
-
-Specifying the System Type
-==========================
-
-   There may be some features `configure' cannot figure out
-automatically, but needs to determine by the type of machine the package
-will run on.  Usually, assuming the package is built to be run on the
-_same_ architectures, `configure' can figure that out, but if it prints
-a message saying it cannot guess the machine type, give it the
-`--build=TYPE' option.  TYPE can either be a short name for the system
-type, such as `sun4', or a canonical name which has the form:
-
-     CPU-COMPANY-SYSTEM
-
-where SYSTEM can have one of these forms:
-
-     OS
-     KERNEL-OS
-
-   See the file `config.sub' for the possible values of each field.  If
-`config.sub' isn't included in this package, then this package doesn't
-need to know the machine type.
-
-   If you are _building_ compiler tools for cross-compiling, you should
-use the option `--target=TYPE' to select the type of system they will
-produce code for.
-
-   If you want to _use_ a cross compiler, that generates code for a
-platform different from the build platform, you should specify the
-"host" platform (i.e., that on which the generated programs will
-eventually be run) with `--host=TYPE'.
-
-Sharing Defaults
-================
-
-   If you want to set default values for `configure' scripts to share,
-you can create a site shell script called `config.site' that gives
-default values for variables like `CC', `cache_file', and `prefix'.
-`configure' looks for `PREFIX/share/config.site' if it exists, then
-`PREFIX/etc/config.site' if it exists.  Or, you can set the
-`CONFIG_SITE' environment variable to the location of the site script.
-A warning: not all `configure' scripts look for a site script.
-
-Defining Variables
-==================
-
-   Variables not defined in a site shell script can be set in the
-environment passed to `configure'.  However, some packages may run
-configure again during the build, and the customized values of these
-variables may be lost.  In order to avoid this problem, you should set
-them in the `configure' command line, using `VAR=value'.  For example:
-
-     ./configure CC=/usr/local2/bin/gcc
-
-causes the specified `gcc' to be used as the C compiler (unless it is
-overridden in the site shell script).
-
-Unfortunately, this technique does not work for `CONFIG_SHELL' due to
-an Autoconf limitation.  Until the limitation is lifted, you can use
-this workaround:
-
-     CONFIG_SHELL=/bin/bash ./configure CONFIG_SHELL=/bin/bash
-
-`configure' Invocation
-======================
-
-   `configure' recognizes the following options to control how it
-operates.
-
-`--help'
-`-h'
-     Print a summary of all of the options to `configure', and exit.
-
-`--help=short'
-`--help=recursive'
-     Print a summary of the options unique to this package's
-     `configure', and exit.  The `short' variant lists options used
-     only in the top level, while the `recursive' variant lists options
-     also present in any nested packages.
-
-`--version'
-`-V'
-     Print the version of Autoconf used to generate the `configure'
-     script, and exit.
-
-`--cache-file=FILE'
-     Enable the cache: use and save the results of the tests in FILE,
-     traditionally `config.cache'.  FILE defaults to `/dev/null' to
-     disable caching.
-
-`--config-cache'
-`-C'
-     Alias for `--cache-file=config.cache'.
-
-`--quiet'
-`--silent'
-`-q'
-     Do not print messages saying which checks are being made.  To
-     suppress all normal output, redirect it to `/dev/null' (any error
-     messages will still be shown).
-
-`--srcdir=DIR'
-     Look for the package's source code in directory DIR.  Usually
-     `configure' can determine that directory automatically.
-
-`--prefix=DIR'
-     Use DIR as the installation prefix.  *note Installation Names::
-     for more details, including other options available for fine-tuning
-     the installation locations.
-
-`--no-create'
-`-n'
-     Run the configure checks, but stop before creating any output
-     files.
-
-`configure' also accepts some other, not widely useful, options.  Run
-`configure --help' for more details.
--- a/Makefile.am
+++ b/Makefile.am
@ -1,276 +0,0 @@
-ACLOCAL_AMFLAGS = -I m4
-AM_CFLAGS = @VKD3D_CFLAGS@
-AM_CPPFLAGS = -I$(srcdir)/include -I$(srcdir)/include/dummy -I$(srcdir)/include/private
-AM_LDFLAGS = -no-undefined
-
-widl_headers = \
-	include/vkd3d_d3d12.h \
-	include/vkd3d_d3d12sdklayers.h \
-	include/vkd3d_d3dcommon.h \
-	include/vkd3d_dxgi.h \
-	include/vkd3d_dxgi1_2.h \
-	include/vkd3d_dxgi1_3.h \
-	include/vkd3d_dxgi1_4.h \
-	include/vkd3d_dxgibase.h \
-	include/vkd3d_dxgiformat.h \
-	include/vkd3d_dxgitype.h
-
-vkd3d_public_headers = \
-	include/vkd3d.h \
-	include/vkd3d_d3d12.h \
-	include/vkd3d_d3d12sdklayers.h \
-	include/vkd3d_d3dcommon.h \
-	include/vkd3d_dxgibase.h \
-	include/vkd3d_dxgiformat.h \
-	include/vkd3d_shader.h \
-	include/vkd3d_types.h \
-	include/vkd3d_utils.h \
-	include/vkd3d_windows.h
-
-vkd3d_demos_shaders = \
-	demos/gears.hlsl \
-	demos/gears_ps_flat.h \
-	demos/gears_ps_smooth.h \
-	demos/gears_vs.h \
-	demos/triangle.hlsl \
-	demos/triangle_ps.h \
-	demos/triangle_vs.h
-
-vkd3d_tests = \
-	tests/vkd3d_api \
-	tests/vkd3d_common \
-	tests/vkd3d_shader_api
-
-vkd3d_cross_tests = \
-	tests/d3d12 \
-	tests/d3d12_invalid_usage
-
-vkd3d_test_headers = \
-	tests/d3d12_crosstest.h \
-	tests/d3d12_test_utils.h
-
-vkd3d_demos = \
-	demos/gears \
-	demos/triangle
-
-vkd3d_demos_headers = \
-	demos/demo.h \
-	demos/demo_win32.h \
-	demos/demo_xcb.h
-
-BUILT_SOURCES = $(widl_headers)
-
-noinst_LTLIBRARIES = libvkd3d-common.la
-libvkd3d_common_la_SOURCES = \
-	include/private/vkd3d_debug.h \
-	libs/vkd3d-common/debug.c \
-	libs/vkd3d-common/memory.c \
-	libs/vkd3d-common/utf8.c
-
-lib_LTLIBRARIES = libvkd3d-shader.la libvkd3d.la libvkd3d-utils.la
-
-libvkd3d_shader_la_SOURCES = \
-	include/private/list.h \
-	include/private/rbtree.h \
-	include/private/vkd3d_common.h \
-	include/private/vkd3d_debug.h \
-	include/private/vkd3d_memory.h \
-	include/vkd3d_shader.h \
-	libs/vkd3d-shader/checksum.c \
-	libs/vkd3d-shader/dxbc.c \
-	libs/vkd3d-shader/spirv.c \
-	libs/vkd3d-shader/trace.c \
-	libs/vkd3d-shader/vkd3d_shader.map \
-	libs/vkd3d-shader/vkd3d_shader_main.c \
-	libs/vkd3d-shader/vkd3d_shader_private.h
-libvkd3d_shader_la_CFLAGS = $(AM_CFLAGS) @SPIRV_TOOLS_CFLAGS@
-libvkd3d_shader_la_LDFLAGS = $(AM_LDFLAGS) -version-info 1:0:0
-libvkd3d_shader_la_LIBADD = libvkd3d-common.la @SPIRV_TOOLS_LIBS@
-if HAVE_LD_VERSION_SCRIPT
-libvkd3d_shader_la_LDFLAGS += -Wl,--version-script=$(srcdir)/libs/vkd3d-shader/vkd3d_shader.map
-EXTRA_libvkd3d_shader_la_DEPENDENCIES = $(srcdir)/libs/vkd3d-shader/vkd3d_shader.map
-endif
-
-libvkd3d_la_SOURCES = \
-	include/private/vkd3d_common.h \
-	include/private/vkd3d_debug.h \
-	include/private/vkd3d_memory.h \
-	include/private/vkd3d_utf8.h \
-	include/private/vkd3d_test.h \
-	include/vkd3d_d3d12.idl \
-	include/vkd3d_d3dcommon.idl \
-	include/vkd3d_unknown.idl \
-	libs/vkd3d/command.c \
-	libs/vkd3d/device.c \
-	libs/vkd3d/resource.c \
-	libs/vkd3d/state.c \
-	libs/vkd3d/utils.c \
-	libs/vkd3d/vkd3d.map \
-	libs/vkd3d/vkd3d_main.c \
-	libs/vkd3d/vkd3d_private.h \
-	libs/vkd3d/vkd3d_shaders.h \
-	libs/vkd3d/vulkan_procs.h \
-	libs/vkd3d_version.c
-libvkd3d_la_LDFLAGS = $(AM_LDFLAGS) -version-info 2:0:1
-libvkd3d_la_LIBADD = libvkd3d-common.la libvkd3d-shader.la @DL_LIBS@ @PTHREAD_LIBS@
-if HAVE_LD_VERSION_SCRIPT
-libvkd3d_la_LDFLAGS += -Wl,--version-script=$(srcdir)/libs/vkd3d/vkd3d.map
-EXTRA_libvkd3d_la_DEPENDENCIES = $(srcdir)/libs/vkd3d/vkd3d.map
-endif
-
-libvkd3d_utils_la_SOURCES = \
-	libs/vkd3d-utils/vkd3d_utils.map \
-	libs/vkd3d-utils/vkd3d_utils_main.c \
-	libs/vkd3d-utils/vkd3d_utils_private.h
-libvkd3d_utils_la_LDFLAGS = $(AM_LDFLAGS) -version-info 1:1:0
-libvkd3d_utils_la_LIBADD = libvkd3d-common.la libvkd3d.la
-if HAVE_LD_VERSION_SCRIPT
-libvkd3d_utils_la_LDFLAGS += -Wl,--version-script=$(srcdir)/libs/vkd3d-utils/vkd3d_utils.map
-EXTRA_libvkd3d_utils_la_DEPENDENCIES = $(srcdir)/libs/vkd3d-utils/vkd3d_utils.map
-endif
-
-EXTRA_DIST = LICENSE
-
-pkgconfigdir = $(libdir)/pkgconfig
-pkginclude_HEADERS = $(vkd3d_public_headers)
-nodist_pkgconfig_DATA = libvkd3d.pc libvkd3d-shader.pc libvkd3d-utils.pc
-CLEANFILES = libvkd3d.pc libvkd3d-shader.pc libvkd3d-utils.pc
-EXTRA_DIST += \
-	libs/vkd3d/libvkd3d.pc.in \
-	libs/vkd3d-shader/libvkd3d-shader.pc.in \
-	libs/vkd3d-utils/libvkd3d-utils.pc.in
-
-noinst_PROGRAMS = vkd3d-compiler
-vkd3d_compiler_SOURCES = programs/vkd3d-compiler/main.c
-vkd3d_compiler_LDADD = libvkd3d-shader.la
-
-LDADD = libvkd3d.la libvkd3d-utils.la
-AM_DEFAULT_SOURCE_EXT = .c
-if BUILD_TESTS
-check_PROGRAMS = $(vkd3d_tests) $(vkd3d_cross_tests)
-TESTS = $(vkd3d_tests) $(vkd3d_cross_tests)
-tests_d3d12_LDADD = $(LDADD) @PTHREAD_LIBS@ @VULKAN_LIBS@
-tests_d3d12_invalid_usage_LDADD = $(LDADD) @VULKAN_LIBS@
-tests_vkd3d_api_LDADD = libvkd3d.la @VULKAN_LIBS@
-tests_vkd3d_shader_api_LDADD = libvkd3d-shader.la
-endif
-
-if BUILD_DEMOS
-DEMOS_LDADD = $(LDADD) libvkd3d-shader.la @XCB_LIBS@ @VULKAN_LIBS@
-DEMOS_CFLAGS = $(AM_CFLAGS) @XCB_CFLAGS@
-noinst_PROGRAMS += $(vkd3d_demos)
-
-demos_gears_CFLAGS = $(DEMOS_CFLAGS)
-demos_gears_LDADD = $(DEMOS_LDADD) -lm
-
-demos_triangle_CFLAGS = $(DEMOS_CFLAGS)
-demos_triangle_LDADD = $(DEMOS_LDADD)
-endif
-
-EXTRA_DIST += $(vkd3d_test_headers) $(vkd3d_demos_headers)
-
-VKD3D_V_WIDL = $(vkd3d_v_widl_@AM_V@)
-vkd3d_v_widl_ = $(vkd3d_v_widl_@AM_DEFAULT_V@)
-vkd3d_v_widl_0 = @echo "  WIDL    " $@;
-vkd3d_v_widl_1 =
-
-EXTRA_DIST += $(widl_headers) $(widl_headers:.h=.idl)
-$(widl_headers): %.h: %.idl
-if HAVE_WIDL
-	$(VKD3D_V_WIDL)$(WIDL) -h -o $@ $<
-else
-	@echo "widl is required to generate $@"
-endif
-
-EXTRA_DIST += $(vkd3d_demos_shaders)
-
-libvkd3d-utils.pc: $(srcdir)/libs/vkd3d-utils/libvkd3d-utils.pc.in Makefile
-	$(AM_V_GEN)$(SED) -e 's![@]prefix[@]!$(prefix)!g' \
-		-e 's![@]exec_prefix[@]!$(exec_prefix)!g' \
-		-e 's![@]includedir[@]!$(includedir)!g' \
-		-e 's![@]libdir[@]!$(libdir)!g' \
-		-e 's![@]PACKAGE_VERSION[@]!$(PACKAGE_VERSION)!g' \
-		$< > $@
-
-libvkd3d-shader.pc: $(srcdir)/libs/vkd3d-shader/libvkd3d-shader.pc.in Makefile
-	$(AM_V_GEN)$(SED) -e 's![@]prefix[@]!$(prefix)!g' \
-		-e 's![@]exec_prefix[@]!$(exec_prefix)!g' \
-		-e 's![@]includedir[@]!$(includedir)!g' \
-		-e 's![@]libdir[@]!$(libdir)!g' \
-		-e 's![@]PACKAGE_VERSION[@]!$(PACKAGE_VERSION)!g' \
-		$< > $@
-
-libvkd3d.pc: $(srcdir)/libs/vkd3d/libvkd3d.pc.in Makefile
-	$(AM_V_GEN)$(SED) -e 's![@]prefix[@]!$(prefix)!g' \
-		-e 's![@]exec_prefix[@]!$(exec_prefix)!g' \
-		-e 's![@]includedir[@]!$(includedir)!g' \
-		-e 's![@]libdir[@]!$(libdir)!g' \
-		-e 's![@]PACKAGE_VERSION[@]!$(PACKAGE_VERSION)!g' \
-		$< > $@
-
-libs/vkd3d_version.c: dummy-vkd3d-version
-	$(AM_V_GEN)version=`(GIT_DIR=$(top_srcdir)/.git git describe HEAD 2>/dev/null || echo "vkd3d-$(PACKAGE_VERSION)") | $(SED) -n -e '$$s/\(.*\)/const char vkd3d_build[] = "\1";/p'` && (echo $$version | cmp -s - $@) || echo $$version >$@ || ($(RM) $@ && exit 1)
-.SILENT: libs/vkd3d_version.c
-CLEANFILES += libs/vkd3d_version.c
-
-.PHONY: dummy-vkd3d-version
-dummy-vkd3d-version:
-
-## Cross-compile tests
-cross_implibs = crosslibs/d3d12
-CROSS_CPPFLAGS = -I$(srcdir)/include -I$(srcdir)/include/private -I$(builddir)/include
-CROSS_CFLAGS = -g -O2 -Wall -municode ${CROSS_CPPFLAGS}
-EXTRA_DIST += $(cross_implibs:=.cross32.def) $(cross_implibs:=.cross64.def)
-
-if HAVE_CROSSTARGET32
-CROSS32_CC = @CROSSCC32@
-CROSS32_DLLTOOL = @CROSSTARGET32@-dlltool
-CROSS32_IMPLIBS = $(cross_implibs:=.cross32.a)
-CROSS32_EXEFILES = $(vkd3d_cross_tests:=.cross32.exe) $(vkd3d_demos:=.cross32.exe)
-CROSS32_FILES = $(CROSS32_IMPLIBS) $(CROSS32_EXEFILES)
-
-CLEANFILES += $(CROSS32_FILES)
-crosstest32: $(CROSS32_FILES)
-
-include tests/$(DEPDIR)/*.cross32.Po
-include demos/$(DEPDIR)/*.cross32.Po
-
-$(CROSS32_IMPLIBS): %.cross32.a: %.cross32.def
-	@${MKDIR_P} crosslibs
-	$(AM_V_GEN)$(CROSS32_DLLTOOL) -k -m i386 --as-flags=-32 -d $< -l $@
-
-$(CROSS32_EXEFILES): %.cross32.exe: %.c $(CROSS32_IMPLIBS) $(widl_headers)
-	$(AM_V_CCLD)depbase=`echo $@ | $(SED) 's![^/]*$$!$(DEPDIR)/&!;s!\.exe$$!!'`; \
-	$(CROSS32_CC) $(CROSS_CFLAGS) -MT $@ -MD -MP -MF $$depbase.Tpo -o $@ $< $(CROSS32_IMPLIBS) -ldxgi -lgdi32 && \
-	$(am__mv) $$depbase.Tpo $$depbase.Po
-else
-crosstest32:
-endif
-
-if HAVE_CROSSTARGET64
-CROSS64_CC = @CROSSCC64@
-CROSS64_DLLTOOL = @CROSSTARGET64@-dlltool
-CROSS64_IMPLIBS = $(cross_implibs:=.cross64.a)
-CROSS64_EXEFILES = $(vkd3d_cross_tests:=.cross64.exe) $(vkd3d_demos:=.cross64.exe)
-CROSS64_FILES = $(CROSS64_IMPLIBS) $(CROSS64_EXEFILES)
-
-CLEANFILES += $(CROSS64_FILES)
-crosstest64: $(CROSS64_FILES)
-
-include tests/$(DEPDIR)/*.cross64.Po
-include demos/$(DEPDIR)/*.cross64.Po
-
-$(CROSS64_IMPLIBS): %.cross64.a: %.cross64.def
-	@${MKDIR_P} crosslibs
-	$(AM_V_GEN)$(CROSS64_DLLTOOL) -k -m i386:x86-64 --as-flags=-64 -d $< -l $@
-
-$(CROSS64_EXEFILES): %.cross64.exe: %.c $(CROSS64_IMPLIBS) $(widl_headers)
-	$(AM_V_CCLD)depbase=`echo $@ | sed 's![^/]*$$!$(DEPDIR)/&!;s!\.exe$$!!'`; \
-	$(CROSS64_CC) $(CROSS_CFLAGS) -MT $@ -MD -MP -MF $$depbase.Tpo -o $@ $< $(CROSS64_IMPLIBS) -ldxgi -lgdi32 && \
-	$(am__mv) $$depbase.Tpo $$depbase.Po
-else
-crosstest64:
-endif
-
-.PHONY: crosstest crosstest32 crosstest64
-crosstest: crosstest32 crosstest64
--- a/76
+++ b/76
@ -1,76 +0,0 @@
-=============================
-The vkd3d 3D Graphics Library
-=============================
-
-Vkd3d is a 3D graphics library built on top of Vulkan. It has an API very
-similar, but not identical, to Direct3D 12.
-
-==============
-Building vkd3d
-==============
-
-Vkd3d depends on SPIRV-Headers and Vulkan-Headers (>= 1.1.113).
-
-Vkd3d generates some of its headers from IDL files. If you are using the
-release tarballs, then these headers are pre-generated and are included. If
-you are building from git, then they will be generated at build-time using
-widl. By default, vkd3d will use the widl found in `PATH'. If widl is not
-available or is not recent (>= 3.20), then you can build Wine with `make
-tools/widl' to avoid building all of Wine. You can then point vkd3d's
-configure at that widl binary with `WIDL="/path/to/widl"'.
-
-For release builds, you may want to define NDEBUG. If you do not need debug log
-messages, you may also consider VKD3D_NO_TRACE_MESSAGES and
-VKD3D_NO_DEBUG_MESSAGES. For example, you can pass `CPPFLAGS="-DNDEBUG
-DVKD3D_NO_TRACE_MESSAGES"' to configure.
-
-===========
-Using vkd3d
-===========
-
-Vkd3d can be used by projects that target Direct3D 12 as a drop-in replacement
-at build-time with some modest source modifications.
-
-If vkd3d is available when building Wine, then Wine will use it to support
-Direct3D 12 applications.
-
-=====================
-Environment variables
-=====================
-
-Most of the environment variables used by vkd3d are for debugging purposes. The
-environment variables are not considered a part of API and might be changed or
-removed in the future versions of vkd3d.
-
-Some of debug variables are lists of elements. Elements must be separated by
-commas or semicolons.
-
- * VKD3D_CONFIG - a list of options that change the behavior of libvkd3d.
-    * vk_debug - enables Vulkan debug extensions.
-
- * VKD3D_DEBUG - controls the debug level for log messages produced by
-   libvkd3d. Accepts the following values: none, err, fixme, warn, trace.
-
- * VKD3D_VULKAN_DEVICE - a zero-based device index. Use to force the selected
-   Vulkan device.
-
- * VKD3D_DISABLE_EXTENSIONS - a list of Vulkan extensions that libvkd3d should
-   not use even if available.
-
- * VKD3D_SHADER_DEBUG - controls the debug level for log messages produced by
-   libvkd3d-shader. See VKD3D_DEBUG for accepted values.
-
- * VKD3D_SHADER_DUMP_PATH - path where shader bytecode is dumped.
-
- * VKD3D_TEST_DEBUG - enables additional debug messages in tests. Set to 0, 1
-   or 2.
-
- * VKD3D_TEST_FILTER - a filter string. Only the tests whose names matches the
-   filter string will be run, e.g. VKD3D_TEST_FILTER=clear_render_target.
-   Useful for debugging or developing new tests.
-
- * VKD3D_TEST_PLATFORM - can be set to "wine", "windows" or "other". The test
-   platform controls the behavior of todo(), todo_if(), bug_if() and broken()
-   conditions in tests.
-
- * VKD3D_TEST_BUG - set to 0 to disable bug_if() conditions in tests.
--- a/README.md
+++ b/README.md
@ -0,0 +1,447 @@
+# VKD3D-Proton
+
+VKD3D-Proton is a fork of VKD3D, which aims to implement the full Direct3D 12 API on top of Vulkan.
+The project serves as the development effort for Direct3D 12 support in [Proton](https://github.com/ValveSoftware/Proton).
+
+## Upstream
+
+The original project is available at [WineHQ](https://source.winehq.org/git/vkd3d.git/).
+
+## Priorities
+
+Performance and game compatibility are important targets, at the expense of compatibility with older drivers and systems.
+Modern Vulkan extensions and features are aggressively made use of to improve game performance and compatibility.
+It is recommended to use the very latest drivers you can get your hands on for the best experience.
+Backwards compatibility with the vkd3d standalone API is not a goal of this project.
+
+## Drivers
+
+There are some hard requirements on drivers to be able to implement D3D12 in a reasonably performant way.
+
+- Vulkan 1.1
+- `VK_EXT_descriptor_indexing` with at least 1000000 UpdateAfterBind descriptors for all types except UniformBuffer.
+  Essentially all features in `VkPhysicalDeviceDescriptorIndexingFeatures` must be supported.
+- `VK_KHR_timeline_semaphore`
+- `VK_KHR_sampler_mirror_clamp_to_edge`
+- `VK_EXT_robustness2`
+- `VK_KHR_separate_depth_stencil_layouts`
+- `VK_KHR_bind_memory2`
+- `VK_KHR_copy_commands2`
+- `VK_KHR_dynamic_rendering`
+- `VK_EXT_extended_dynamic_state`
+- `VK_EXT_extended_dynamic_state2`
+
+Some notable extensions that **should** be supported for optimal or correct behavior.
+These extensions will likely become mandatory later.
+
+- `VK_KHR_buffer_device_address`
+- `VK_EXT_image_view_min_lod`
+
+`VK_VALVE_mutable_descriptor_type` is also highly recommended, but not mandatory.
+
+### AMD (RADV)
+
+For AMD, RADV is the recommended driver and the one that sees most testing on AMD GPUs.
+The minimum requirement at the moment is Mesa 22.0 since it supports `VK_KHR_dynamic_rendering`.
+
+NOTE: For older Mesa versions, use the v2.6 release.
+
+### NVIDIA
+
+The [Vulkan beta drivers](https://developer.nvidia.com/vulkan-driver) generally contain the latest
+driver fixes that we identify while getting games to work.
+The latest drivers (stable, beta or Vulkan beta tracks) are always preferred.
+If you're having problems, always try the latest drivers.
+
+### Intel
+
+We have not done any testing against Intel iGPUs yet.
+
+------
+
+## Cloning the repo
+
+To clone the repo you should run:
+```
+git clone --recursive https://github.com/HansKristian-Work/vkd3d-proton
+```
+in order to pull in all the submodules which are needed for building.
+
+## Building VKD3D-Proton
+
+### Requirements:
+- [wine](https://www.winehq.org/) (for `widl`) [for native builds]
+  - On Windows this may be substituted for [Strawberry Perl](http://strawberryperl.com/) as it ships `widl` and is easy to find and install -- although this dependency may be eliminated in the future.
+- [Meson](http://mesonbuild.com/) build system (at least version 0.49)
+- [glslang](https://github.com/KhronosGroup/glslang) compiler
+- [Mingw-w64](http://mingw-w64.org/) compiler, headers and tools (at least version 7.0) [for cross-builds for d3d12.dll which are default]
+
+### Building:
+#### The simple way
+Inside the VKD3D-Proton directory, run:
+```
+./package-release.sh master /your/target/directory --no-package
+```
+
+This will create a folder `vkd3d-master` in `/your/target/directory`, which contains both 32-bit and 64-bit versions of VKD3D-Proton, which can be set up in the same way as the release versions as noted above.
+
+If you want to build natively (ie. for `libvkd3d-proton.so`), pass `--native` to the build script. This option will make it build using your system's compilers.
+
+In order to preserve the build directories for development, pass `--dev-build` to the script. This option implies `--no-package`. After making changes to the source code, you can then do the following to rebuild VKD3D-Proton:
+```
+# change to build.86 for 32-bit
+ninja -C /your/target/directory/build.64 install
+```
+
+#### Compiling manually (cross for d3d12.dll, default)
+```
+# 64-bit build.
+meson --cross-file build-win64.txt --buildtype release --prefix /your/vkd3d-proton/directory build.64
+ninja -C build.64 install
+
+# 32-bit build
+meson --cross-file build-win32.txt --buildtype release --prefix /your/vkd3d-proton/directory build.86
+ninja -C build.86 install
+```
+
+#### Compiling manually (native)
+```
+# 64-bit build.
+meson --buildtype release --prefix /your/vkd3d-proton/directory build.64
+ninja -C build.64 install
+
+# 32-bit build
+CC="gcc -m32" CXX="g++ -m32" \
+PKG_CONFIG_PATH="/usr/lib32/pkgconfig:/usr/lib/i386-linux-gnu/pkgconfig:/usr/lib/pkgconfig" \
+meson --buildtype release --prefix /your/vkd3d-proton/directory build.86
+ninja -C build.86 install
+```
+
+## Using VKD3D-Proton
+
+The intended way to use VKD3D-Proton is as a native Win32 d3d12.dll.
+This serves as a drop-in replacement for D3D12, and can be used in Wine (Proton or vanilla flavors), or on Windows.
+
+VKD3D-Proton does not supply the necessary DXGI component.
+VKD3D-Proton can be used with either DXVK's DXGI implementation, or
+Wine's DXGI implementation.
+VKD3D-Proton implements its own IDXGISwapChain when built as a native d3d12.dll.
+
+### A note on using VKD3D-Proton on Windows
+
+Native Windows use is mostly relevant for developer testing purposes.
+Do not expect games running on Windows 7 or 8.1 to magically make use of VKD3D-Proton,
+as many games will only even attempt to load d3d12.dll if they are running on Windows 10.
+
+### Native Linux build
+
+A native Linux binary can be built, but it is not intended to be compatible with upstream Wine.
+A native option is mostly relevant for development purposes.
+
+## Environment variables
+
+Most of the environment variables used by VKD3D-Proton are for debugging purposes. The
+environment variables are not considered a part of API and might be changed or
+removed in the future versions of VKD3D-Proton.
+
+Some of debug variables are lists of elements. Elements must be separated by
+commas or semicolons.
+
+ - `VKD3D_CONFIG` - a list of options that change the behavior of vkd3d-proton.
+    - `vk_debug` - enables Vulkan debug extensions and loads validation layer.
+    - `skip_application_workarounds` - Skips all application workarounds.
+      For debugging purposes.
+    - `dxr` - Enables DXR support if supported by device.
+    - `dxr11` - Enables DXR tier 1.1 support if supported by device.
+    - `force_static_cbv` - Unsafe speed hack on NVIDIA. May or may not give a significant performance uplift.
+    - `single_queue` - Do not use asynchronous compute or transfer queues.
+    - `no_upload_hvv` - Blocks any attempt to use host-visible VRAM (large/resizable BAR) for the UPLOAD heap.
+      May free up vital VRAM in certain critical situations, at cost of lower GPU performance.
+      A fraction of VRAM is reserved for resizable BAR allocations either way,
+      so it should not be a real issue even on lower VRAM cards.
+    - `force_host_cached` - Forces all host visible allocations to be CACHED, which greatly accelerates captures.
+    - `no_invariant_position` - Avoids workarounds for invariant position. The workaround is enabled by default.
+ - `VKD3D_DEBUG` - controls the debug level for log messages produced by
+   vkd3d-proton. Accepts the following values: none, err, info, fixme, warn, trace.
+ - `VKD3D_SHADER_DEBUG` - controls the debug level for log messages produced by
+   the shader compilers. See `VKD3D_DEBUG` for accepted values.
+ - `VKD3D_LOG_FILE` - If set, redirects `VKD3D_DEBUG` logging output to a file instead.
+ - `VKD3D_VULKAN_DEVICE` - a zero-based device index. Use to force the selected
+   Vulkan device.
+ - `VKD3D_FILTER_DEVICE_NAME` - skips devices that don't include this substring.
+ - `VKD3D_DISABLE_EXTENSIONS` - a list of Vulkan extensions that vkd3d-proton should
+   not use even if available.
+ - `VKD3D_TEST_DEBUG` - enables additional debug messages in tests. Set to 0, 1
+   or 2.
+ - `VKD3D_TEST_FILTER` - a filter string. Only the tests whose names matches the
+   filter string will be run, e.g. `VKD3D_TEST_FILTER=clear_render_target`.
+   Useful for debugging or developing new tests.
+ - `VKD3D_TEST_EXCLUDE` - excludes tests of which the name is included in the string,
+   e.g. `VKD3D_TEST_EXCLUDE=test_root_signature_priority,test_conservative_rasterization_dxil`.
+ - `VKD3D_TEST_PLATFORM` - can be set to "wine", "windows" or "other". The test
+   platform controls the behavior of todo(), todo_if(), bug_if() and broken()
+   conditions in tests.
+ - `VKD3D_TEST_BUG` - set to 0 to disable bug_if() conditions in tests.
+ - `VKD3D_PROFILE_PATH` - If profiling is enabled in the build, a profiling block is
+   emitted to `${VKD3D_PROFILE_PATH}.${pid}`.
+
+## Shader cache
+
+By default, vkd3d-proton manages its own driver cache.
+This cache is intended to cache DXBC/DXIL -> SPIR-V conversion.
+This reduces stutter (when pipelines are created last minute and app relies on hot driver cache)
+and load times (when applications do the right thing of loading PSOs up front).
+
+Behavior is designed to be close to DXVK state cache.
+
+#### Default behavior
+
+`vkd3d-proton.cache` (and `vkd3d-proton.cache.write`) are placed in the current working directory.
+Generally, this is the game install folder when running in Steam.
+
+#### Custom directory
+
+`VKD3D_SHADER_CACHE_PATH=/path/to/directory` overrides the directory where `vkd3d-proton.cache` is placed.
+
+#### Disable cache
+
+`VKD3D_SHADER_CACHE_PATH=0` disables the internal cache, and any caching would have to be explicitly managed
+by application.
+
+### Behavior of ID3D12PipelineLibrary
+
+When explicit shader cache is used, the need for application managed pipeline libraries is greatly diminished,
+and the cache applications interact with is a dummy cache.
+If the vkd3d-proton shader cache is disabled, ID3D12PipelineLibrary stores everything relevant for a full cache,
+i.e. SPIR-V and PSO driver cache blob.
+`VKD3D_CONFIG=pipeline_library_app_cache` is an alternative to `VKD3D_SHADER_CACHE_PATH=0` and can be
+automatically enabled based on app-profiles if relevant in the future if applications manage the caches better
+than vkd3d-proton can do automagically.
+
+## CPU profiling (development)
+
+Pass `-Denable_profiling=true` to Meson to enable a profiled build. With a profiled build, use `VKD3D_PROFILE_PATH` environment variable.
+The profiling dumps out a binary blob which can be analyzed with `programs/vkd3d-profile.py`.
+The profile is a trivial system which records number of iterations and total ticks (ns) spent.
+It is easy to instrument parts of code you are working on optimizing.
+
+## Advanced shader debugging
+
+These features are only meant to be used by vkd3d-proton developers. For any builtin RenderDoc related functionality
+pass `-Denable_renderdoc=true` to Meson.
+
+ - `VKD3D_SHADER_DUMP_PATH` - path where shader bytecode is dumped.
+   Bytecode is dumped in format of `$hash.{spv,dxbc,dxil}`.
+ - `VKD3D_SHADER_OVERRIDE` - path to where overridden shaders can be found.
+   If application is creating a pipeline with `$hash` and `$VKD3D_SHADER_OVERRIDE/$hash.spv` exists,
+   that SPIR-V file will be used instead.
+ - `VKD3D_AUTO_CAPTURE_SHADER` - If this is set to a shader hash, and the RenderDoc layer is enabled,
+ vkd3d-proton will automatically make a capture when a specific shader is encountered.
+ - `VKD3D_AUTO_CAPTURE_COUNTS` - A comma-separated list of indices. This can be used to control which queue submissions to capture.
+ E.g., use `VKD3D_AUTO_CAPTURE_COUNTS=0,4,10` to capture the 0th (first submission), 4th and 10th submissions which are candidates for capturing.
+ If `VKD3D_AUTO_CAPTURE_COUNTS` is `-1`, the entire app runtime can be turned into one big capture.
+ This is only intended to be used when capturing something like the test suite,
+ or tiny applications with a finite runtime to make it easier to debug cross submission work.
+
+ If only `VKD3D_AUTO_CAPTURE_COUNTS` is set, any queue submission is considered for capturing.
+ If only `VKD3D_AUTO_CAPTURE_SHADER` is set, `VKD3D_AUTO_CAPTURE_COUNTS` is considered to be equal to `"0"`, i.e. a capture is only
+ made on first encounter with the target shader.
+ If both are set, the capture counter is only incremented and considered when a submission contains the use of the target shader.
+
+### Breadcrumbs debugging
+
+For debugging GPU hangs, it's useful to know where crashes happen.
+If the build has trace enabled (non-release builds), breadcrumbs support is also enabled.
+
+`VKD3D_CONFIG=breadcrumbs` will instrument command lists with `VK_AMD_buffer_marker` or `VK_NV_device_checkpoints`.
+On GPU device lost or timeout, crash dumps are written to the log.
+For best results on RADV, use `RADV_DEBUG=syncshaders`. The logs will print a digested form of the command lists
+which were executing at the time, and attempt to narrow down the possible range of commands which could
+have caused a crash.
+
+### Shader logging
+
+It is possible to log the output of replaced shaders, essentially a custom shader printf. To enable this feature, `VK_KHR_buffer_device_address` must be supported.
+First, use `VKD3D_SHADER_DEBUG_RING_SIZE_LOG2=28` for example to set up a 256 MiB ring buffer in host memory.
+Since this buffer is allocated in host memory, feel free to make it as large as you want, as it does not consume VRAM.
+A worker thread will read the data as it comes in and log it. There is potential here to emit more structured information later.
+The main reason this is implemented instead of the validation layer printf system is run-time performance,
+and avoids any possible accidental hiding of bugs by introducing validation layers which add locking, etc.
+Using `debugPrintEXT` is also possible if that fits better with your debugging scenario.
+With this shader replacement scheme, we're able to add shader logging as unintrusive as possible.
+
+```
+# Inside folder full of override shaders, build everything with:
+make -C /path/to/include/shader-debug M=$PWD
+```
+The shader can then include `#include "debug_channel.h"` and use various functions below.
+
+```
+void DEBUG_CHANNEL_INIT(uvec3 ID);
+```
+
+is used somewhere in your replaced shader. This should be initialized with `gl_GlobalInvocationID` or similar.
+This ID will show up in the log. For each subgroup which calls `DEBUG_CHANNEL_INIT`, an instance counter is generated.
+This allows you to correlate several messages which all originate from the same instance counter, which is logged alongside the ID.
+An invocation can be uniquely identified with the instance + `DEBUG_CHANNEL_INIT` id.
+`DEBUG_CHANNEL_INIT` can be called from non-uniform control flow, as it does not use `barrier()` or similar constructs.
+It can also be used in vertex and fragment shaders for this reason.
+
+```
+void DEBUG_CHANNEL_MSG();
+void DEBUG_CHANNEL_MSG(uint v0);
+void DEBUG_CHANNEL_MSG(uint v0, uint v1, ...); // Up to 4 components, can be expanded as needed up to 16.
+void DEBUG_CHANNEL_MSG(int v0);
+void DEBUG_CHANNEL_MSG(int v0, int v1, ...); // Up to 4 components, ...
+void DEBUG_CHANNEL_MSG(float v0);
+void DEBUG_CHANNEL_MSG(float v0, float v1, ...); // Up to 4 components, ...
+```
+
+These functions log, formatting is `#%x` for uint, `%d` for int and `%f` for float type.
+
+## Descriptor debugging
+
+If `-Denable_descriptor_qa=true` is enabled in build, you can set the `VKD3D_DESCRIPTOR_QA_LOG` env-var to a file.
+All descriptor updates and copies are logged so that it's possible to correlate descriptors with
+GPU crash dumps. `enable_descriptor_qa` is not enabled by default,
+since it adds some flat overhead in an extremely hot code path.
+
+### GPU-assisted debugging
+
+If `VKD3D_CONFIG=descriptor_qa_checks` is set with a build which enables `-Denable_descriptor_qa=true`,
+all shaders will be instrumented to check for invalid access. In the log, you will see this to
+make sure the feature is enabled.
+
+```
+932:info:vkd3d_descriptor_debug_init_once: Enabling descriptor QA checks!
+```
+
+The main motivation is the tight integration and high performance.
+GPU-assisted debugging can be run at well over playable speeds.
+
+#### Descriptor heap index out of bounds
+
+```
+============
+Fault type: HEAP_OUT_OF_RANGE
+Fault type: MISMATCH_DESCRIPTOR_TYPE
+CBV_SRV_UAV heap cookie: 1800
+Shader hash and instruction: edbaf1b5ed344467 (1)
+Accessed resource/view cookie: 0
+Shader desired descriptor type: 8 (STORAGE_BUFFER)
+Found descriptor type in heap: 0 (NONE)
+Failed heap index: 1024000
+==========
+```
+
+The instruction `(1)`, is reported as well,
+and a disassembly of the shader in question can be used to pinpoint exactly where
+things are going wrong.
+Dump all shaders with `VKD3D_SHADER_DUMP_PATH=/my/folder`,
+and run `spirv-cross -V /my/folder/edbaf1b5ed344467.spv`.
+(NOTE: clear out the folder before dumping, existing files are not overwritten).
+The faulting instruction can be identified by looking at last argument, e.g.:
+
+```
+uint fixup_index = descriptor_qa_check(heap_index, descriptor_type, 1u /* instruction ID */);
+```
+
+#### Mismatch descriptor type
+
+```
+============
+Fault type: MISMATCH_DESCRIPTOR_TYPE
+CBV_SRV_UAV heap cookie: 1800 // Refer to VKD3D_DESCRIPTOR_QA_LOG
+Shader hash and instruction: edbaf1b5ed344467 (1)
+Accessed resource/view cookie: 1802 // Refer to VKD3D_DESCRIPTOR_QA_LOG
+Shader desired descriptor type: 8 (STORAGE_BUFFER)
+Found descriptor type in heap: 1 (SAMPLED_IMAGE)
+Failed heap index: 1025
+==========
+```
+
+#### Accessing destroyed resource
+
+```
+============
+Fault type: DESTROYED_RESOURCE
+CBV_SRV_UAV heap cookie: 1800
+Shader hash and instruction: edbaf1b5ed344467 (2)
+Accessed resource/view cookie: 1806
+Shader desired descriptor type: 1 (SAMPLED_IMAGE)
+Found descriptor type in heap: 1 (SAMPLED_IMAGE)
+Failed heap index: 1029
+==========
+```
+
+### Debugging descriptor crashes with RADV dumps (hardcore ultra nightmare mode)
+
+For when you're absolutely desperate, there is a way to debug GPU hangs.
+First, install [umr](https://gitlab.freedesktop.org/tomstdenis/umr) and make the binary setsuid.
+
+`ACO_DEBUG=force-waitcnt RADV_DEBUG=hang VKD3D_DESCRIPTOR_QA_LOG=/somewhere/desc.txt %command%`
+
+It is possible to use `RADV_DEBUG=hang,umr` as well, but from within Wine, there are weird things
+happening where UMR dumps do not always succeed.
+Instead, it is possible to invoke umr manually from an SSH shell when the GPU hangs.
+
+```
+#!/bin/bash
+
+mkdir -p "$HOME/umr-dump"
+
+# For Navi, older GPUs might have different rings. See RADV source.
+umr -R gfx_0.0.0 > "$HOME/umr-dump/ring.txt" 2>&1
+umr -O halt_waves -wa gfx_0.0.0 > "$HOME/umr-dump/halt-waves-1.txt" 2>&1
+umr -O bits,halt_waves -wa gfx_0.0.0 > "$HOME/umr-dump/halt-waves-2.txt" 2>&1
+```
+
+A folder is placed in `~/radv_dumps*` by RADV, and the UMR script will place wave dumps in `~/umr-dump`.
+
+First, we can study the wave dumps to see where things crash, e.g.:
+
+```
+    pgm[6@0x800120e26c00 + 0x584 ] = 0xf0001108		image_load v47, v[4:5], s[48:55] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm
+    pgm[6@0x800120e26c00 + 0x588 ] = 0x000c2f04	;;
+    pgm[6@0x800120e26c00 + 0x58c ] = 0xbf8c3f70		s_waitcnt vmcnt(0)
+ *  pgm[6@0x800120e26c00 + 0x590 ] = 0x930118c0		s_mul_i32 s1, 64, s24
+    pgm[6@0x800120e26c00 + 0x594 ] = 0xf40c0c09		s_load_dwordx8 s[48:55], s[18:19], s1
+    pgm[6@0x800120e26c00 + 0x598 ] = 0x02000000	;;
+```
+
+excp: 256 is a memory error (at least on 5700xt).
+```
+TRAPSTS[50000100]:
+	                excp:      256 |         illegal_inst:        0 |           buffer_oob:        0 |           excp_cycle:        0 |
+	       excp_wave64hi:        0 |          xnack_error:        1 |              dp_rate:        2 |      excp_group_mask:        0 |
+```
+
+We can inspect all VGPRs and all SGPRs, here for the image descriptor.
+
+```
+    [  48..  51] = { 0130a000, c0500080, 810dc1df, 93b00204 }
+    [  52..  55] = { 00000000, 00400000, 002b0000, 800130c8 }
+```
+
+Decode the VA and study `bo_history.log`. There is a script in RADV which lets you query history for a VA.
+This lets us verify that the VA in question was freed at some point.
+At point of writing, there is no easy way to decode raw descriptor blobs, but when you're desperate enough you can do it by hand :|
+
+In `pipeline.log` we have the full SPIR-V (with OpSource reference to the source DXIL/DXBC)
+and disassembly of the crashed pipeline. Here we can study the code to figure out which descriptor was read.
+
+```
+    // s7 is the descriptor heap index, s1 is the offset (64 bytes per image descriptor),
+    // s[18:19] is the descriptor heap.
+    s_mul_i32 s1, 64, s7                                        ; 930107c0
+    s_load_dwordx8 s[48:55], s[18:19], s1                       ; f40c0c09 02000000
+    s_waitcnt lgkmcnt(0)                                        ; bf8cc07f
+    image_load v47, v[4:5], s[48:55] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm ; f0001108 000c2f04
+```
+
+```
+    [   4..   7] = { 03200020, ffff8000, 0000002b, 00000103 }
+```
+
+Which is descriptor index #259. Based on this, we can inspect the descriptor QA log and verify that the application
+did indeed do something invalid, which caused the GPU hang.
--- a/autogen.sh
+++ b/autogen.sh
@ -1,4 +0,0 @@
-#!/bin/sh
-set -e
-autoreconf -ifv
-rm -rf autom4te.cache
--- a/bin/.gitignore
+++ b/bin/.gitignore
@ -1,9 +0,0 @@
-ar-lib
-compile
-config.guess
-config.sub
-depcomp
-install-sh
-ltmain.sh
-missing
-test-driver
--- a/build-win32.txt
+++ b/build-win32.txt
@ -0,0 +1,19 @@
+[binaries]
+c = 'i686-w64-mingw32-gcc'
+cpp = 'i686-w64-mingw32-g++'
+ar = 'i686-w64-mingw32-ar'
+strip = 'i686-w64-mingw32-strip'
+widl = 'i686-w64-mingw32-widl'
+
+[properties]
+c_args=['-msse', '-msse2']
+cpp_args=['-msse', '-msse2']
+c_link_args = ['-static', '-static-libgcc']
+cpp_link_args = ['-static', '-static-libgcc', '-static-libstdc++']
+needs_exe_wrapper = true
+
+[host_machine]
+system = 'windows'
+cpu_family = 'x86'
+cpu = 'x86'
+endian = 'little'
--- a/build-win64.txt
+++ b/build-win64.txt
@ -0,0 +1,17 @@
+[binaries]
+c = 'x86_64-w64-mingw32-gcc'
+cpp = 'x86_64-w64-mingw32-g++'
+ar = 'x86_64-w64-mingw32-ar'
+strip = 'x86_64-w64-mingw32-strip'
+widl = 'x86_64-w64-mingw32-widl'
+
+[properties]
+c_link_args = ['-static', '-static-libgcc']
+cpp_link_args = ['-static', '-static-libgcc', '-static-libstdc++']
+needs_exe_wrapper = true
+
+[host_machine]
+system = 'windows'
+cpu_family = 'x86_64'
+cpu = 'x86_64'
+endian = 'little'
--- a/configure.ac
+++ b/configure.ac
@ -1,154 +0,0 @@
-AC_PREREQ([2.69])
-AC_INIT([vkd3d],[1.1])
-
-AC_CONFIG_AUX_DIR([bin])
-AC_CONFIG_MACRO_DIR([m4])
-AC_CONFIG_HEADERS(include/config.h)
-
-AC_ARG_VAR([WIDL], [widl IDL compiler])
-AC_ARG_VAR([CROSSCC32], [32-bit Windows cross compiler])
-AC_ARG_VAR([CROSSCC64], [64-bit Windows cross compiler])
-AC_ARG_WITH([xcb], AS_HELP_STRING([--with-xcb], [Build with XCB library (default: test)]))
-AC_ARG_WITH([spirv-tools], AS_HELP_STRING([--with-spirv-tools],
-                                          [Build with SPIRV-Tools library (default: disabled)]))
-AC_ARG_ENABLE([demos],
-              AS_HELP_STRING([--enable-demos], [Build demo programs (default: disabled)]),,
-              [enable_demos=no])
-AC_ARG_ENABLE([tests],
-              AS_HELP_STRING([--enable-tests], [Build tests (default: enabled)]),,
-              [enable_tests=yes])
-
-AC_USE_SYSTEM_EXTENSIONS
-
-dnl Check for progs
-AM_PROG_AR
-AC_PROG_CC
-AM_PROG_CC_C_O
-AC_PROG_SED
-AC_PROG_MKDIR_P
-VKD3D_PROG_WIDL(3, 20)
-AS_IF([test "x$WIDL" = "xno"], [AC_MSG_WARN([widl is required to build header files.])])
-
-AM_INIT_AUTOMAKE([1.11 foreign silent-rules subdir-objects no-dist-gzip dist-xz -Wall -Werror])
-AM_MAINTAINER_MODE([enable])
-AM_SILENT_RULES([yes])
-
-LT_PREREQ([2.4.2])
-LT_INIT([win32-dll])
-
-gl_LD_VERSION_SCRIPT
-
-dnl Check compiler specific flags
-AC_SUBST([VKD3D_CFLAGS])
-AS_IF([test "x${GCC}" = "xyes"],
-      [VKD3D_CFLAGS="-Wall -pipe"
-      VKD3D_CHECK_CFLAGS([-std=c99])
-      VKD3D_CHECK_CFLAGS([-Wdeclaration-after-statement])
-      VKD3D_CHECK_CFLAGS([-Wimplicit-fallthrough])
-      VKD3D_CHECK_CFLAGS([-Wmissing-prototypes])
-      VKD3D_CHECK_CFLAGS([-Wunused-but-set-parameter])
-      VKD3D_CHECK_CFLAGS([-Wvla])
-      VKD3D_CHECK_CFLAGS([-Wpointer-arith])
-      VKD3D_CHECK_CFLAGS([-Wl,--no-undefined])])
-
-dnl Check for cross compilers
-VKD3D_CHECK_MINGW32_PROG([CROSSCC32], [CROSSTARGET32], [no])
-VKD3D_CHECK_MINGW64_PROG([CROSSCC64], [CROSSTARGET64], [no])
-
-dnl Check for headers
-AC_CHECK_HEADERS([dlfcn.h pthread.h \
-                  vulkan/vulkan.h \
-                  vulkan/spirv.h vulkan/GLSL.std.450.h \
-                  spirv/unified1/spirv.h spirv/unified1/GLSL.std.450.h])
-AS_IF([test "x$ac_cv_header_pthread_h" != "xyes"], [AC_MSG_ERROR([pthread.h not found.])])
-AS_IF([test "x$ac_cv_header_vulkan_vulkan_h" != "xyes"], [AC_MSG_ERROR([vulkan.h not found.])])
-AS_IF([test "x$ac_cv_header_spirv_unified1_spirv_h" != "xyes" \
-       -a "x$ac_cv_header_vulkan_spirv_h" != "xyes"],
-      [AC_MSG_ERROR([spirv.h not found.])])
-AS_IF([test "x$ac_cv_header_spirv_unified1_GLSL_std_450_h" != "xyes" \
-       -a "x$ac_cv_header_vulkan_GLSL_std_450_h" != "xyes"],
-      [AC_MSG_ERROR([GLSL.std.450.h not found.])])
-
-VKD3D_CHECK_VULKAN_HEADER_VERSION([113], [AC_MSG_ERROR([Vulkan headers are too old, 1.1.113 is required.])])
-
-AC_CHECK_DECL([SpvCapabilityDemoteToHelperInvocationEXT],, [AC_MSG_ERROR([SPIR-V headers are too old.])], [
-#ifdef HAVE_SPIRV_UNIFIED1_SPIRV_H
-# include "spirv/unified1/spirv.h"
-#else
-# include "vulkan/spirv.h"
-#endif])
-
-AC_CHECK_DECLS([program_invocation_name],,,[#include <errno.h>])
-
-dnl Check for libraries
-m4_ifdef([PKG_PROG_PKG_CONFIG], [PKG_PROG_PKG_CONFIG], [m4_fatal([pkg-config autoconf macros not found.])])
-
-AC_CHECK_LIB([m], [ceilf])
-
-AC_SUBST([DL_LIBS])
-AC_CHECK_LIB([dl], [dlopen],
-             [DL_LIBS="-ldl"],
-             [AS_IF([test "$ac_cv_header_dlfnc_h" = "xyes"], [AC_MSG_ERROR([libdl not found.])])])
-
-AC_ARG_VAR([PTHREAD_LIBS], [linker flags for pthreads])
-VKD3D_CHECK_PTHREAD
-
-AC_SUBST([VULKAN_LIBS])
-VKD3D_CHECK_SONAME([vulkan], [vkGetInstanceProcAddr],
-                   [VULKAN_LIBS="-lvulkan"],
-                   [VKD3D_CHECK_SONAME([MoltenVK], [vkGetInstanceProcAddr],
-                                       [VULKAN_LIBS="-lMoltenVK"
-                                       AC_DEFINE_UNQUOTED([SONAME_LIBVULKAN],["$ac_cv_lib_soname_MoltenVK"])],
-                                       [AC_MSG_ERROR([libvulkan and libMoltenVK not found.])])])
-
-AS_IF([test "x$with_spirv_tools" = "xyes"],
-      [PKG_CHECK_MODULES([SPIRV_TOOLS], [SPIRV-Tools-shared],
-      [AC_DEFINE([HAVE_SPIRV_TOOLS], [1], [Define to 1 if you have SPIRV-Tools.])])],
-      [with_spirv_tools=no])
-
-HAVE_XCB=no
-AS_IF([test "x$with_xcb" != "xno"],
-      [PKG_CHECK_MODULES([XCB], [xcb xcb-keysyms],
-                                [AC_DEFINE([HAVE_XCB], [1], [Define to 1 if you have libxcb.])
-                                HAVE_XCB=yes],
-                                [HAVE_XCB=no])])
-
-dnl Check for functions
-VKD3D_CHECK_FUNC([HAVE_BUILTIN_CLZ], [__builtin_clz], [__builtin_clz(0)])
-VKD3D_CHECK_FUNC([HAVE_BUILTIN_POPCOUNT], [__builtin_popcount], [__builtin_popcount(0)])
-VKD3D_CHECK_FUNC([HAVE_SYNC_ADD_AND_FETCH], [__sync_add_and_fetch], [__sync_add_and_fetch((int *)0, 0)])
-VKD3D_CHECK_FUNC([HAVE_SYNC_SUB_AND_FETCH], [__sync_sub_and_fetch], [__sync_sub_and_fetch((int *)0, 0)])
-
-VKD3D_CHECK_PTHREAD_SETNAME_NP
-
-dnl Makefiles
-AS_IF([test "x$enable_demos" = "xyes" -a "x$HAVE_XCB" != "xyes"],
-      [AC_MSG_ERROR([libxcb is required for demos.])])
-AM_CONDITIONAL([BUILD_DEMOS], [test "x$enable_demos" = "xyes"])
-AM_CONDITIONAL([BUILD_TESTS], [test "x$enable_tests" != "xno"])
-AM_CONDITIONAL([HAVE_WIDL], [test "x$WIDL" != "xno"])
-AM_CONDITIONAL([HAVE_CROSSTARGET32], [test "x$CROSSTARGET32" != "xno"])
-AM_CONDITIONAL([HAVE_CROSSTARGET64], [test "x$CROSSTARGET64" != "xno"])
-
-AC_CONFIG_FILES([Makefile])
-AC_OUTPUT
-
-dnl Output configuration summary
-AS_IF([test "x$CROSSTARGET32" != "xno" -o "x$CROSSTARGET64" != "xno"],
-      [HAVE_CROSSTEST=yes], [HAVE_CROSSTEST=no])
-
-AS_ECHO(["
-  Configuration summary for $PACKAGE $VERSION
-
-  widl: ${WIDL}
-
-  Have XCB: ${HAVE_XCB}
-  Have SPIRV-Tools: ${with_spirv_tools}
-
-  Building demos: ${enable_demos}
-  Building tests: ${enable_tests}
-
-  Building crosstests: ${HAVE_CROSSTEST}"])
-AS_IF([test "x$CROSSTARGET32" != "xno"], [AS_ECHO(["    Using 32-bit cross compiler: $CROSSCC32"])])
-AS_IF([test "x$CROSSTARGET64" != "xno"], [AS_ECHO(["    Using 64-bit cross compiler: $CROSSCC64"])])
-AS_ECHO([])
--- a/crosslibs/.gitignore
+++ b/crosslibs/.gitignore
@ -1 +0,0 @@
-*.a
--- a/crosslibs/d3d12.cross32.def
+++ b/crosslibs/d3d12.cross32.def
@ -1,7 +0,0 @@
-LIBRARY D3D12.dll
-
-EXPORTS
-    D3D12CreateDevice@16 @101
-    D3D12GetDebugInterface@8 @102
-    D3D12CreateRootSignatureDeserializer@16 @106
-    D3D12SerializeRootSignature@16 @107
--- a/crosslibs/d3d12.cross64.def
+++ b/crosslibs/d3d12.cross64.def
@ -1,7 +0,0 @@
-LIBRARY D3D12.dll
-
-EXPORTS
-    D3D12CreateDevice @101
-    D3D12GetDebugInterface @102
-    D3D12CreateRootSignatureDeserializer @106
-    D3D12SerializeRootSignature @107
--- a/demos/demo.h
+++ b/demos/demo.h
@ -16,21 +16,8 @@
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

-/* Hack for MinGW-w64 headers.
- *
- * We want to use WIDL C inline wrappers because some methods
- * in D3D12 interfaces return aggregate objects. Unfortunately,
- * WIDL C inline wrappers are broken when used with MinGW-w64
- * headers because FORCEINLINE expands to extern inline
- * which leads to the "multiple storage classes in declaration
- * specifiers" compiler error.
- */
-#ifdef __MINGW32__
-#include <_mingw.h>
-# ifdef __MINGW64_VERSION_MAJOR
-#  undef __forceinline
-#  define __forceinline __inline__ __attribute__((__always_inline__,__gnu_inline__))
-# endif
+#ifdef _WIN32
+# include <vkd3d_win32.h>
 #endif

 #include <vkd3d_windows.h>
--- a/demos/demo_win32.h
+++ b/demos/demo_win32.h
@ -17,11 +17,10 @@
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

-#include <vkd3d_dxgi1_4.h>
 #include <stdbool.h>
 #include <stdio.h>

-#define DEMO_WINDOW_CLASS_NAME L"demo_wc"
+#define DEMO_WINDOW_CLASS_NAME u"demo_wc"

 struct demo
 {
@ -200,12 +199,12 @@ static inline bool demo_init(struct demo *demo, void *user_data)
    wc.cbClsExtra = 0;
    wc.cbWndExtra = 0;
    wc.hInstance = GetModuleHandle(NULL);
-    wc.hIcon = LoadIconW(NULL, IDI_APPLICATION);
-    wc.hCursor = LoadCursorW(NULL, IDC_ARROW);
+    wc.hIcon = LoadIconA(NULL, IDI_APPLICATION);
+    wc.hCursor = LoadCursorA(NULL, IDC_ARROW);
    wc.hbrBackground = (HBRUSH)GetStockObject(WHITE_BRUSH);
    wc.lpszMenuName = NULL;
    wc.lpszClassName = DEMO_WINDOW_CLASS_NAME;
-    wc.hIconSm = LoadIconW(NULL, IDI_WINLOGO);
+    wc.hIconSm = LoadIconA(NULL, IDI_WINLOGO);
    if (!RegisterClassExW(&wc))
        return false;

--- a/demos/demo_xcb.h
+++ b/demos/demo_xcb.h
@ -20,6 +20,7 @@
 #define VK_USE_PLATFORM_XCB_KHR
 #include <vkd3d.h>
 #include <vkd3d_utils.h>
+#include <vkd3d_sonames.h>
 #include <xcb/xcb_event.h>
 #include <xcb/xcb_icccm.h>
 #include <xcb/xcb_keysyms.h>
@ -28,6 +29,24 @@
 #include <unistd.h>
 #include <fcntl.h>
 #include <stdbool.h>
+#include <dlfcn.h>
+
+#define SYMBOL(x) static PFN_vk##x x
+SYMBOL(CreateXcbSurfaceKHR);
+SYMBOL(GetPhysicalDeviceSurfaceSupportKHR);
+SYMBOL(GetPhysicalDeviceSurfaceCapabilitiesKHR);
+SYMBOL(GetPhysicalDeviceSurfaceFormatsKHR);
+SYMBOL(CreateSwapchainKHR);
+SYMBOL(CreateFence);
+SYMBOL(GetSwapchainImagesKHR);
+SYMBOL(AcquireNextImageKHR);
+SYMBOL(WaitForFences);
+SYMBOL(ResetFences);
+SYMBOL(DestroyFence);
+SYMBOL(DestroySurfaceKHR);
+SYMBOL(QueuePresentKHR);
+SYMBOL(DestroySwapchainKHR);
+#undef SYMBOL

 struct demo
 {
@ -43,6 +62,7 @@ struct demo

    void *user_data;
    void (*idle_func)(struct demo *demo, void *user_data);
+
 };

 struct demo_window
@ -70,6 +90,29 @@ struct demo_swapchain
    ID3D12Resource *buffers[1];
 };

+static inline void init_symbols(VkInstance instance)
+{
+    PFN_vkGetInstanceProcAddr gpa;
+    void *handle = dlopen(SONAME_LIBVULKAN, RTLD_LAZY);
+    gpa = (PFN_vkGetInstanceProcAddr)dlsym(handle, "vkGetInstanceProcAddr");
+#define SYMBOL(x) x = (PFN_vk##x)gpa(instance, "vk" #x)
+    SYMBOL(CreateXcbSurfaceKHR);
+    SYMBOL(GetPhysicalDeviceSurfaceSupportKHR);
+    SYMBOL(GetPhysicalDeviceSurfaceCapabilitiesKHR);
+    SYMBOL(GetPhysicalDeviceSurfaceFormatsKHR);
+    SYMBOL(CreateSwapchainKHR);
+    SYMBOL(CreateFence);
+    SYMBOL(GetSwapchainImagesKHR);
+    SYMBOL(AcquireNextImageKHR);
+    SYMBOL(WaitForFences);
+    SYMBOL(ResetFences);
+    SYMBOL(DestroySurfaceKHR);
+    SYMBOL(DestroyFence);
+    SYMBOL(QueuePresentKHR);
+    SYMBOL(DestroySwapchainKHR);
+#undef SYMBOL
+}
+
 static inline xcb_atom_t demo_get_atom(xcb_connection_t *c, const char *name)
 {
    xcb_intern_atom_cookie_t cookie;
@ -339,23 +382,25 @@ static inline struct demo_swapchain *demo_swapchain_create(ID3D12CommandQueue *c
    vk_physical_device = vkd3d_get_vk_physical_device(d3d12_device);
    vk_device = vkd3d_get_vk_device(d3d12_device);

+    init_symbols(vk_instance);
+
    surface_desc.sType = VK_STRUCTURE_TYPE_XCB_SURFACE_CREATE_INFO_KHR;
    surface_desc.pNext = NULL;
    surface_desc.flags = 0;
    surface_desc.connection = window->demo->connection;
    surface_desc.window = window->window;
-    if (vkCreateXcbSurfaceKHR(vk_instance, &surface_desc, NULL, &vk_surface) < 0)
+    if (CreateXcbSurfaceKHR(vk_instance, &surface_desc, NULL, &vk_surface) < 0)
    {
        ID3D12Device_Release(d3d12_device);
        return NULL;
    }

    queue_family_index = vkd3d_get_vk_queue_family_index(command_queue);
-    if (vkGetPhysicalDeviceSurfaceSupportKHR(vk_physical_device,
+    if (GetPhysicalDeviceSurfaceSupportKHR(vk_physical_device,
            queue_family_index, vk_surface, &supported) < 0 || !supported)
        goto fail;

-    if (vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk_physical_device, vk_surface, &surface_caps) < 0)
+    if (GetPhysicalDeviceSurfaceCapabilitiesKHR(vk_physical_device, vk_surface, &surface_caps) < 0)
        goto fail;

    if ((surface_caps.maxImageCount && desc->buffer_count > surface_caps.maxImageCount)
@ -365,11 +410,11 @@ static inline struct demo_swapchain *demo_swapchain_create(ID3D12CommandQueue *c
            || !(surface_caps.supportedCompositeAlpha & VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR))
        goto fail;

-    if (vkGetPhysicalDeviceSurfaceFormatsKHR(vk_physical_device, vk_surface, &format_count, NULL) < 0
+    if (GetPhysicalDeviceSurfaceFormatsKHR(vk_physical_device, vk_surface, &format_count, NULL) < 0
            || !format_count || !(formats = calloc(format_count, sizeof(*formats))))
        goto fail;

-    if (vkGetPhysicalDeviceSurfaceFormatsKHR(vk_physical_device, vk_surface, &format_count, formats) < 0)
+    if (GetPhysicalDeviceSurfaceFormatsKHR(vk_physical_device, vk_surface, &format_count, formats) < 0)
    {
        free(formats);
        goto fail;
@ -413,20 +458,20 @@ static inline struct demo_swapchain *demo_swapchain_create(ID3D12CommandQueue *c
    vk_swapchain_desc.presentMode = VK_PRESENT_MODE_FIFO_KHR;
    vk_swapchain_desc.clipped = VK_TRUE;
    vk_swapchain_desc.oldSwapchain = VK_NULL_HANDLE;
-    if (vkCreateSwapchainKHR(vk_device, &vk_swapchain_desc, NULL, &vk_swapchain) < 0)
+    if (CreateSwapchainKHR(vk_device, &vk_swapchain_desc, NULL, &vk_swapchain) < 0)
        goto fail;

    fence_desc.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
    fence_desc.pNext = NULL;
    fence_desc.flags = 0;
-    if (vkCreateFence(vk_device, &fence_desc, NULL, &vk_fence) < 0)
+    if (CreateFence(vk_device, &fence_desc, NULL, &vk_fence) < 0)
        goto fail;

-    if (vkGetSwapchainImagesKHR(vk_device, vk_swapchain, &image_count, NULL) < 0
+    if (GetSwapchainImagesKHR(vk_device, vk_swapchain, &image_count, NULL) < 0
            || !(vk_images = calloc(image_count, sizeof(*vk_images))))
        goto fail;

-    if (vkGetSwapchainImagesKHR(vk_device, vk_swapchain, &image_count, vk_images) < 0)
+    if (GetSwapchainImagesKHR(vk_device, vk_swapchain, &image_count, vk_images) < 0)
    {
        free(vk_images);
        goto fail;
@ -443,13 +488,11 @@ static inline struct demo_swapchain *demo_swapchain_create(ID3D12CommandQueue *c
    swapchain->vk_instance = vk_instance;
    swapchain->vk_device = vk_device;

-    vkAcquireNextImageKHR(vk_device, vk_swapchain, UINT64_MAX,
+    AcquireNextImageKHR(vk_device, vk_swapchain, UINT64_MAX,
            VK_NULL_HANDLE, vk_fence, &swapchain->current_buffer);
-    vkWaitForFences(vk_device, 1, &vk_fence, VK_TRUE, UINT64_MAX);
-    vkResetFences(vk_device, 1, &vk_fence);
+    WaitForFences(vk_device, 1, &vk_fence, VK_TRUE, UINT64_MAX);
+    ResetFences(vk_device, 1, &vk_fence);

-    resource_create_info.type = VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO;
-    resource_create_info.next = NULL;
    resource_create_info.desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
    resource_create_info.desc.Alignment = 0;
    resource_create_info.desc.Width = desc->width;
@ -461,7 +504,7 @@ static inline struct demo_swapchain *demo_swapchain_create(ID3D12CommandQueue *c
    resource_create_info.desc.SampleDesc.Quality = 0;
    resource_create_info.desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
    resource_create_info.desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
-    resource_create_info.flags = VKD3D_RESOURCE_INITIAL_STATE_TRANSITION | VKD3D_RESOURCE_PRESENT_STATE_TRANSITION;
+    resource_create_info.flags = 0;
    resource_create_info.present_state = D3D12_RESOURCE_STATE_PRESENT;
    for (i = 0; i < image_count; ++i)
    {
@ -476,6 +519,8 @@ static inline struct demo_swapchain *demo_swapchain_create(ID3D12CommandQueue *c
            free(vk_images);
            goto fail;
        }
+
+        vkd3d_enqueue_initial_transition(command_queue, swapchain->buffers[i]);
    }
    swapchain->buffer_count = image_count;
    free(vk_images);
@ -487,10 +532,10 @@ static inline struct demo_swapchain *demo_swapchain_create(ID3D12CommandQueue *c

 fail:
    if (vk_fence != VK_NULL_HANDLE)
-        vkDestroyFence(vk_device, vk_fence, NULL);
+        DestroyFence(vk_device, vk_fence, NULL);
    if (vk_swapchain != VK_NULL_HANDLE)
-        vkDestroySwapchainKHR(vk_device, vk_swapchain, NULL);
-    vkDestroySurfaceKHR(vk_instance, vk_surface, NULL);
+        DestroySwapchainKHR(vk_device, vk_swapchain, NULL);
+    DestroySurfaceKHR(vk_instance, vk_surface, NULL);
    ID3D12Device_Release(d3d12_device);
    return NULL;
 }
@ -525,13 +570,13 @@ static inline void demo_swapchain_present(struct demo_swapchain *swapchain)
    present_desc.pResults = NULL;

    vk_queue = vkd3d_acquire_vk_queue(swapchain->command_queue);
-    vkQueuePresentKHR(vk_queue, &present_desc);
+    QueuePresentKHR(vk_queue, &present_desc);
    vkd3d_release_vk_queue(swapchain->command_queue);

-    vkAcquireNextImageKHR(swapchain->vk_device, swapchain->vk_swapchain, UINT64_MAX,
+    AcquireNextImageKHR(swapchain->vk_device, swapchain->vk_swapchain, UINT64_MAX,
            VK_NULL_HANDLE, swapchain->vk_fence, &swapchain->current_buffer);
-    vkWaitForFences(swapchain->vk_device, 1, &swapchain->vk_fence, VK_TRUE, UINT64_MAX);
-    vkResetFences(swapchain->vk_device, 1, &swapchain->vk_fence);
+    WaitForFences(swapchain->vk_device, 1, &swapchain->vk_fence, VK_TRUE, UINT64_MAX);
+    ResetFences(swapchain->vk_device, 1, &swapchain->vk_fence);
 }

 static inline void demo_swapchain_destroy(struct demo_swapchain *swapchain)
@ -543,9 +588,9 @@ static inline void demo_swapchain_destroy(struct demo_swapchain *swapchain)
    {
        ID3D12Resource_Release(swapchain->buffers[i]);
    }
-    vkDestroyFence(swapchain->vk_device, swapchain->vk_fence, NULL);
-    vkDestroySwapchainKHR(swapchain->vk_device, swapchain->vk_swapchain, NULL);
-    vkDestroySurfaceKHR(swapchain->vk_instance, swapchain->vk_surface, NULL);
+    DestroyFence(swapchain->vk_device, swapchain->vk_fence, NULL);
+    DestroySwapchainKHR(swapchain->vk_device, swapchain->vk_swapchain, NULL);
+    DestroySurfaceKHR(swapchain->vk_instance, swapchain->vk_surface, NULL);
    free(swapchain);
 }

--- a/demos/gears.c
+++ b/demos/gears.c
@ -41,10 +41,12 @@
 */

 #define INITGUID
-#define _GNU_SOURCE
+#ifndef _WIN32
 #include <sys/time.h>
+#endif
 #include <assert.h>
 #include <stdio.h>
+#define _USE_MATH_DEFINES
 #include <math.h>
 #include "demo.h"

@ -268,15 +270,32 @@ static void cxg_update_mvp(struct cx_gears *cxg)
    memcpy(cxg->cb_data->normal_matrix, world, sizeof(cxg->cb_data->normal_matrix));
 }

+static double cxg_get_time(void)
+{
+#ifdef _WIN32
+    LARGE_INTEGER freq;
+    LARGE_INTEGER counter;
+
+    QueryPerformanceFrequency(&freq);
+    QueryPerformanceCounter(&counter);
+
+    return ((double)counter.QuadPart) / (double)freq.QuadPart;
+#else
+    struct timeval tv;
+    double t;
+    gettimeofday(&tv, NULL);
+    t = tv.tv_sec + tv.tv_usec / 1000000.0;
+    return t;
+#endif
+}
+
 static void cxg_render_frame(struct cx_gears *cxg)
 {
    static double t_prev = -1.0;
-    struct timeval tv;
    double dt, t;
    float a;

-    gettimeofday(&tv, NULL);
-    t = tv.tv_sec + tv.tv_usec / 1000000.0;
+    t = cxg_get_time();
    if (t_prev < 0.0)
        t_prev = t;
    dt = t - t_prev;
@ -437,13 +456,8 @@ static void cxg_mesh_create(ID3D12Device *device, float inner_radius, float oute
    float r0, r1, r2;
    float angle, da;

-    if (!(vertices = calloc(tooth_count, 12 * sizeof(*vertices))))
-        return;
-    if (!(faces = calloc(tooth_count, 20 * sizeof(*faces))))
-    {
-        free(vertices);
-        return;
-    }
+    vertices = calloc(tooth_count, 12 * sizeof(*vertices));
+    faces = calloc(tooth_count, 20 * sizeof(*faces));

    r0 = inner_radius;
    r1 = outer_radius - tooth_depth / 2.0f;
@ -878,7 +892,11 @@ static int cxg_main(void)
 }

 #ifdef _WIN32
-int wmain(void)
+int WINAPI WinMain(
+    HINSTANCE hInstance,
+    HINSTANCE hPrevInstance,
+    LPSTR lpCmdLine,
+    int nCmdShow)
 #else
 int main(void)
 #endif
--- a/demos/meson.build
+++ b/demos/meson.build
@ -0,0 +1,33 @@
+demo_vkd3d_deps = [
+  threads_dep, vkd3d_common_dep
+]
+
+if vkd3d_platform != 'windows'
+  demo_vkd3d_deps += [
+    lib_m,
+    lib_xcb,
+    lib_xcbkeysyms,
+
+    vkd3d_dep,
+    vkd3d_utils_dep,
+  ]
+else
+  demo_vkd3d_deps += [
+    lib_dxgi,
+    lib_d3d12
+  ]
+endif
+
+executable('gears', 'gears.c',
+  dependencies        : demo_vkd3d_deps,
+  include_directories : vkd3d_public_includes,
+  install             : true,
+  gui_app             : true,
+  override_options    : [ 'c_std='+vkd3d_c_std ])
+
+executable('triangle', 'triangle.c',
+  dependencies        : demo_vkd3d_deps,
+  include_directories : vkd3d_public_includes,
+  install             : true,
+  gui_app             : true,
+  override_options    : [ 'c_std='+vkd3d_c_std ])
--- a/demos/triangle.c
+++ b/demos/triangle.c
@ -393,7 +393,11 @@ static int cxt_main(void)
 }

 #ifdef _WIN32
-int wmain(void)
+int WINAPI WinMain(
+    HINSTANCE hInstance,
+    HINSTANCE hPrevInstance,
+    LPSTR lpCmdLine,
+    int nCmdShow)
 #else
 int main(void)
 #endif
--- a/include/meson.build
+++ b/include/meson.build
@ -0,0 +1,17 @@
+vkd3d_idl = [
+  'vkd3d_d3d12.idl',
+  'vkd3d_d3d12sdklayers.idl',
+  'vkd3d_d3dcommon.idl',
+  'vkd3d_dxgi.idl',
+  'vkd3d_dxgi1_2.idl',
+  'vkd3d_dxgi1_3.idl',
+  'vkd3d_dxgi1_4.idl',
+  'vkd3d_dxgibase.idl',
+  'vkd3d_dxgiformat.idl',
+  'vkd3d_dxgitype.idl',
+  'vkd3d_swapchain_factory.idl',
+  'vkd3d_command_list_vkd3d_ext.idl',
+  'vkd3d_device_vkd3d_ext.idl'
+]
+
+vkd3d_header_files = idl_generator.process(vkd3d_idl)
--- a/include/private/hashmap.h
+++ b/include/private/hashmap.h
@ -0,0 +1,255 @@
+/*
+ * Hash map support
+ *
+ * Copyright 2020 Philip Rebohle for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_HASHMAP_H
+#define __VKD3D_HASHMAP_H
+
+#include <stddef.h>
+
+#include "vkd3d_memory.h"
+
+enum hash_map_entry_flag
+{
+    HASH_MAP_ENTRY_OCCUPIED = (1 << 0),
+};
+
+struct hash_map_entry
+{
+    uint32_t hash_value;
+    uint32_t flags;
+};
+
+typedef uint32_t (*pfn_hash_func)(const void* key);
+typedef bool (*pfn_hash_compare_func)(const void *key, const struct hash_map_entry *entry);
+
+/* Open-addressing hash table */
+struct hash_map
+{
+    pfn_hash_func hash_func;
+    pfn_hash_compare_func compare_func;
+    void *entries;
+    size_t entry_size;
+    uint32_t entry_count;
+    uint32_t used_count;
+};
+
+static inline struct hash_map_entry *hash_map_get_entry(const struct hash_map *hash_map, uint32_t entry_idx)
+{
+    return void_ptr_offset(hash_map->entries, hash_map->entry_size * entry_idx);
+}
+
+static inline uint32_t hash_map_get_entry_idx(const struct hash_map *hash_map, uint32_t hash_value)
+{
+    return hash_value % hash_map->entry_count;
+}
+
+static inline uint32_t hash_map_next_entry_idx(const struct hash_map *hash_map, uint32_t entry_idx)
+{
+    uint32_t next_idx = entry_idx + 1;
+    return next_idx < hash_map->entry_count ? next_idx : 0;
+}
+
+static inline uint32_t hash_map_next_size(uint32_t old_size)
+{
+    /* This yields a sequence of primes and numbers with two
+     * relatively large prime factors for any reasonable hash
+     * table size */
+    return old_size ? (old_size * 2 + 5) : 37;
+}
+
+static inline bool hash_map_grow(struct hash_map *hash_map)
+{
+    uint32_t i, old_count, new_count;
+    void *new_entries, *old_entries;
+
+    old_count = hash_map->entry_count;
+    old_entries = hash_map->entries;
+
+    new_count = hash_map_next_size(hash_map->entry_count);
+
+    if (!(new_entries = vkd3d_calloc(new_count, hash_map->entry_size)))
+        return false;
+
+    hash_map->entry_count = new_count;
+    hash_map->entries = new_entries;
+
+    for (i = 0; i < old_count; i++)
+    {
+        /* Relocate existing entries one by one */
+        struct hash_map_entry *old_entry = void_ptr_offset(old_entries, i * hash_map->entry_size);
+
+        if (old_entry->flags & HASH_MAP_ENTRY_OCCUPIED)
+        {
+            uint32_t entry_idx = hash_map_get_entry_idx(hash_map, old_entry->hash_value);
+            struct hash_map_entry *new_entry = hash_map_get_entry(hash_map, entry_idx);
+
+            while (new_entry->flags & HASH_MAP_ENTRY_OCCUPIED)
+            {
+                entry_idx = hash_map_next_entry_idx(hash_map, entry_idx);
+                new_entry = hash_map_get_entry(hash_map, entry_idx);
+            }
+
+            memcpy(new_entry, old_entry, hash_map->entry_size);
+        }
+    }
+
+    vkd3d_free(old_entries);
+    return true;
+}
+
+static inline bool hash_map_should_grow_before_insert(struct hash_map *hash_map)
+{
+    /* Allow a load factor of 0.7 for performance reasons */
+    return 10 * hash_map->used_count >= 7 * hash_map->entry_count;
+}
+
+static inline struct hash_map_entry *hash_map_find(const struct hash_map *hash_map, const void *key)
+{
+    uint32_t hash_value, entry_idx;
+
+    if (!hash_map->entries)
+        return NULL;
+
+    hash_value = hash_map->hash_func(key);
+    entry_idx = hash_map_get_entry_idx(hash_map, hash_value);
+
+    /* We never allow the hash table to be completely
+     * populated, so this is guaranteed to return */
+    while (true)
+    {
+        struct hash_map_entry *entry = hash_map_get_entry(hash_map, entry_idx);
+
+        if (!(entry->flags & HASH_MAP_ENTRY_OCCUPIED))
+            return NULL;
+
+        if (entry->hash_value == hash_value && hash_map->compare_func(key, entry))
+            return entry;
+
+        entry_idx = hash_map_next_entry_idx(hash_map, entry_idx);
+    }
+}
+
+static inline struct hash_map_entry *hash_map_insert(struct hash_map *hash_map, const void *key, const struct hash_map_entry *entry)
+{
+    struct hash_map_entry *target = NULL;
+    uint32_t hash_value, entry_idx;
+
+    if (hash_map_should_grow_before_insert(hash_map))
+    {
+        if (!hash_map_grow(hash_map))
+            return NULL;
+    }
+
+    hash_value = hash_map->hash_func(key);
+    entry_idx = hash_map_get_entry_idx(hash_map, hash_value);
+
+    while (!target)
+    {
+        struct hash_map_entry *current = hash_map_get_entry(hash_map, entry_idx);
+
+        if (!(current->flags & HASH_MAP_ENTRY_OCCUPIED) ||
+                (current->hash_value == hash_value && hash_map->compare_func(key, current)))
+            target = current;
+        else
+            entry_idx = hash_map_next_entry_idx(hash_map, entry_idx);
+    }
+
+    if (!(target->flags & HASH_MAP_ENTRY_OCCUPIED))
+    {
+        hash_map->used_count += 1;
+        target->flags = HASH_MAP_ENTRY_OCCUPIED;
+        target->hash_value = hash_value;
+        memcpy(target + 1, entry + 1, hash_map->entry_size - sizeof(*entry));
+    }
+
+    /* If target is occupied, we already have an entry in the hashmap.
+     * Return old one, caller is responsible for cleaning up the node we attempted to add. */
+
+    return target;
+}
+
+static inline void hash_map_init(struct hash_map *hash_map, pfn_hash_func hash_func, pfn_hash_compare_func compare_func, size_t entry_size)
+{
+    hash_map->hash_func = hash_func;
+    hash_map->compare_func = compare_func;
+    hash_map->entries = NULL;
+    hash_map->entry_size = entry_size;
+    hash_map->entry_count = 0;
+    hash_map->used_count = 0;
+    assert(entry_size > sizeof(struct hash_map_entry));
+}
+
+static inline void hash_map_clear(struct hash_map *hash_map)
+{
+    vkd3d_free(hash_map->entries);
+    hash_map->entries = NULL;
+    hash_map->entry_count = 0;
+    hash_map->used_count = 0;
+}
+
+static inline uint32_t hash_combine(uint32_t old_hash, uint32_t new_hash) {
+    return old_hash ^ (new_hash + 0x9e3779b9 + (old_hash << 6) + (old_hash >> 2));
+}
+
+static inline uint32_t hash_uint64(uint64_t n)
+{
+    return hash_combine((uint32_t)n, (uint32_t)(n >> 32));
+}
+
+/* A somewhat stronger hash when we're meant to store the hash (pipeline caches, etc). Based on FNV-1a. */
+static inline uint64_t hash_fnv1_init()
+{
+    return 0xcbf29ce484222325ull;
+}
+
+static inline uint64_t hash_fnv1_iterate_u8(uint64_t h, uint8_t value)
+{
+    return (h * 0x100000001b3ull) ^ value;
+}
+
+static inline uint64_t hash_fnv1_iterate_u32(uint64_t h, uint32_t value)
+{
+    return (h * 0x100000001b3ull) ^ value;
+}
+
+static inline uint64_t hash_fnv1_iterate_f32(uint64_t h, float value)
+{
+    union u { float f32; uint32_t u32; } v;
+    v.f32 = value;
+    return hash_fnv1_iterate_u32(h, v.u32);
+}
+
+static inline uint64_t hash_fnv1_iterate_u64(uint64_t h, uint64_t value)
+{
+    h = hash_fnv1_iterate_u32(h, value & UINT32_MAX);
+    h = hash_fnv1_iterate_u32(h, value >> 32);
+    return h;
+}
+
+static inline uint64_t hash_fnv1_iterate_string(uint64_t h, const char *str)
+{
+    if (str)
+        while (*str)
+            h = hash_fnv1_iterate_u8(h, *str++);
+    h = hash_fnv1_iterate_u8(h, 0);
+    return h;
+}
+
+#endif  /* __VKD3D_HASHMAP_H */
--- a/include/private/renderdoc_app.h
+++ b/include/private/renderdoc_app.h
@ -0,0 +1,688 @@
+/******************************************************************************
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2019-2020 Baldur Karlsson
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ ******************************************************************************/
+
+#pragma once
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+//
+// Documentation for the API is available at https://renderdoc.org/docs/in_application_api.html
+//
+
+#if !defined(RENDERDOC_NO_STDINT)
+#include <stdint.h>
+#endif
+
+#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER)
+#define RENDERDOC_CC __cdecl
+#elif defined(__linux__)
+#define RENDERDOC_CC
+#elif defined(__APPLE__)
+#define RENDERDOC_CC
+#else
+#error "Unknown platform"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// Constants not used directly in below API
+
+// This is a GUID/magic value used for when applications pass a path where shader debug
+// information can be found to match up with a stripped shader.
+// the define can be used like so: const GUID RENDERDOC_ShaderDebugMagicValue =
+// RENDERDOC_ShaderDebugMagicValue_value
+#define RENDERDOC_ShaderDebugMagicValue_struct                                \
+  {                                                                           \
+    0xeab25520, 0x6670, 0x4865, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \
+  }
+
+// as an alternative when you want a byte array (assuming x86 endianness):
+#define RENDERDOC_ShaderDebugMagicValue_bytearray                                                 \
+  {                                                                                               \
+    0x20, 0x55, 0xb2, 0xea, 0x70, 0x66, 0x65, 0x48, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \
+  }
+
+// truncated version when only a uint64_t is available (e.g. Vulkan tags):
+#define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// RenderDoc capture options
+//
+
+typedef enum RENDERDOC_CaptureOption {
+  // Allow the application to enable vsync
+  //
+  // Default - enabled
+  //
+  // 1 - The application can enable or disable vsync at will
+  // 0 - vsync is force disabled
+  eRENDERDOC_Option_AllowVSync = 0,
+
+  // Allow the application to enable fullscreen
+  //
+  // Default - enabled
+  //
+  // 1 - The application can enable or disable fullscreen at will
+  // 0 - fullscreen is force disabled
+  eRENDERDOC_Option_AllowFullscreen = 1,
+
+  // Record API debugging events and messages
+  //
+  // Default - disabled
+  //
+  // 1 - Enable built-in API debugging features and records the results into
+  //     the capture, which is matched up with events on replay
+  // 0 - no API debugging is forcibly enabled
+  eRENDERDOC_Option_APIValidation = 2,
+  eRENDERDOC_Option_DebugDeviceMode = 2,    // deprecated name of this enum
+
+  // Capture CPU callstacks for API events
+  //
+  // Default - disabled
+  //
+  // 1 - Enables capturing of callstacks
+  // 0 - no callstacks are captured
+  eRENDERDOC_Option_CaptureCallstacks = 3,
+
+  // When capturing CPU callstacks, only capture them from drawcalls.
+  // This option does nothing without the above option being enabled
+  //
+  // Default - disabled
+  //
+  // 1 - Only captures callstacks for drawcall type API events.
+  //     Ignored if CaptureCallstacks is disabled
+  // 0 - Callstacks, if enabled, are captured for every event.
+  eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4,
+
+  // Specify a delay in seconds to wait for a debugger to attach, after
+  // creating or injecting into a process, before continuing to allow it to run.
+  //
+  // 0 indicates no delay, and the process will run immediately after injection
+  //
+  // Default - 0 seconds
+  //
+  eRENDERDOC_Option_DelayForDebugger = 5,
+
+  // Verify buffer access. This includes checking the memory returned by a Map() call to
+  // detect any out-of-bounds modification, as well as initialising buffers with undefined contents
+  // to a marker value to catch use of uninitialised memory.
+  //
+  // NOTE: This option is only valid for OpenGL and D3D11. Explicit APIs such as D3D12 and Vulkan do
+  // not do the same kind of interception & checking and undefined contents are really undefined.
+  //
+  // Default - disabled
+  //
+  // 1 - Verify buffer access
+  // 0 - No verification is performed, and overwriting bounds may cause crashes or corruption in
+  //     RenderDoc.
+  eRENDERDOC_Option_VerifyBufferAccess = 6,
+
+  // The old name for eRENDERDOC_Option_VerifyBufferAccess was eRENDERDOC_Option_VerifyMapWrites.
+  // This option now controls the filling of uninitialised buffers with 0xdddddddd which was
+  // previously always enabled
+  eRENDERDOC_Option_VerifyMapWrites = eRENDERDOC_Option_VerifyBufferAccess,
+
+  // Hooks any system API calls that create child processes, and injects
+  // RenderDoc into them recursively with the same options.
+  //
+  // Default - disabled
+  //
+  // 1 - Hooks into spawned child processes
+  // 0 - Child processes are not hooked by RenderDoc
+  eRENDERDOC_Option_HookIntoChildren = 7,
+
+  // By default RenderDoc only includes resources in the final capture necessary
+  // for that frame, this allows you to override that behaviour.
+  //
+  // Default - disabled
+  //
+  // 1 - all live resources at the time of capture are included in the capture
+  //     and available for inspection
+  // 0 - only the resources referenced by the captured frame are included
+  eRENDERDOC_Option_RefAllResources = 8,
+
+  // **NOTE**: As of RenderDoc v1.1 this option has been deprecated. Setting or
+  // getting it will be ignored, to allow compatibility with older versions.
+  // In v1.1 the option acts as if it's always enabled.
+  //
+  // By default RenderDoc skips saving initial states for resources where the
+  // previous contents don't appear to be used, assuming that writes before
+  // reads indicate previous contents aren't used.
+  //
+  // Default - disabled
+  //
+  // 1 - initial contents at the start of each captured frame are saved, even if
+  //     they are later overwritten or cleared before being used.
+  // 0 - unless a read is detected, initial contents will not be saved and will
+  //     appear as black or empty data.
+  eRENDERDOC_Option_SaveAllInitials = 9,
+
+  // In APIs that allow for the recording of command lists to be replayed later,
+  // RenderDoc may choose to not capture command lists before a frame capture is
+  // triggered, to reduce overheads. This means any command lists recorded once
+  // and replayed many times will not be available and may cause a failure to
+  // capture.
+  //
+  // NOTE: This is only true for APIs where multithreading is difficult or
+  // discouraged. Newer APIs like Vulkan and D3D12 will ignore this option
+  // and always capture all command lists since the API is heavily oriented
+  // around it and the overheads have been reduced by API design.
+  //
+  // 1 - All command lists are captured from the start of the application
+  // 0 - Command lists are only captured if their recording begins during
+  //     the period when a frame capture is in progress.
+  eRENDERDOC_Option_CaptureAllCmdLists = 10,
+
+  // Mute API debugging output when the API validation mode option is enabled
+  //
+  // Default - enabled
+  //
+  // 1 - Mute any API debug messages from being displayed or passed through
+  // 0 - API debugging is displayed as normal
+  eRENDERDOC_Option_DebugOutputMute = 11,
+
+  // Option to allow vendor extensions to be used even when they may be
+  // incompatible with RenderDoc and cause corrupted replays or crashes.
+  //
+  // Default - inactive
+  //
+  // No values are documented, this option should only be used when absolutely
+  // necessary as directed by a RenderDoc developer.
+  eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12,
+
+} RENDERDOC_CaptureOption;
+
+// Sets an option that controls how RenderDoc behaves on capture.
+//
+// Returns 1 if the option and value are valid
+// Returns 0 if either is invalid and the option is unchanged
+typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val);
+typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val);
+
+// Gets the current value of an option as a uint32_t
+//
+// If the option is invalid, 0xffffffff is returned
+typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt);
+
+// Gets the current value of an option as a float
+//
+// If the option is invalid, -FLT_MAX is returned
+typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt);
+
+typedef enum RENDERDOC_InputButton {
+  // '0' - '9' matches ASCII values
+  eRENDERDOC_Key_0 = 0x30,
+  eRENDERDOC_Key_1 = 0x31,
+  eRENDERDOC_Key_2 = 0x32,
+  eRENDERDOC_Key_3 = 0x33,
+  eRENDERDOC_Key_4 = 0x34,
+  eRENDERDOC_Key_5 = 0x35,
+  eRENDERDOC_Key_6 = 0x36,
+  eRENDERDOC_Key_7 = 0x37,
+  eRENDERDOC_Key_8 = 0x38,
+  eRENDERDOC_Key_9 = 0x39,
+
+  // 'A' - 'Z' matches ASCII values
+  eRENDERDOC_Key_A = 0x41,
+  eRENDERDOC_Key_B = 0x42,
+  eRENDERDOC_Key_C = 0x43,
+  eRENDERDOC_Key_D = 0x44,
+  eRENDERDOC_Key_E = 0x45,
+  eRENDERDOC_Key_F = 0x46,
+  eRENDERDOC_Key_G = 0x47,
+  eRENDERDOC_Key_H = 0x48,
+  eRENDERDOC_Key_I = 0x49,
+  eRENDERDOC_Key_J = 0x4A,
+  eRENDERDOC_Key_K = 0x4B,
+  eRENDERDOC_Key_L = 0x4C,
+  eRENDERDOC_Key_M = 0x4D,
+  eRENDERDOC_Key_N = 0x4E,
+  eRENDERDOC_Key_O = 0x4F,
+  eRENDERDOC_Key_P = 0x50,
+  eRENDERDOC_Key_Q = 0x51,
+  eRENDERDOC_Key_R = 0x52,
+  eRENDERDOC_Key_S = 0x53,
+  eRENDERDOC_Key_T = 0x54,
+  eRENDERDOC_Key_U = 0x55,
+  eRENDERDOC_Key_V = 0x56,
+  eRENDERDOC_Key_W = 0x57,
+  eRENDERDOC_Key_X = 0x58,
+  eRENDERDOC_Key_Y = 0x59,
+  eRENDERDOC_Key_Z = 0x5A,
+
+  // leave the rest of the ASCII range free
+  // in case we want to use it later
+  eRENDERDOC_Key_NonPrintable = 0x100,
+
+  eRENDERDOC_Key_Divide,
+  eRENDERDOC_Key_Multiply,
+  eRENDERDOC_Key_Subtract,
+  eRENDERDOC_Key_Plus,
+
+  eRENDERDOC_Key_F1,
+  eRENDERDOC_Key_F2,
+  eRENDERDOC_Key_F3,
+  eRENDERDOC_Key_F4,
+  eRENDERDOC_Key_F5,
+  eRENDERDOC_Key_F6,
+  eRENDERDOC_Key_F7,
+  eRENDERDOC_Key_F8,
+  eRENDERDOC_Key_F9,
+  eRENDERDOC_Key_F10,
+  eRENDERDOC_Key_F11,
+  eRENDERDOC_Key_F12,
+
+  eRENDERDOC_Key_Home,
+  eRENDERDOC_Key_End,
+  eRENDERDOC_Key_Insert,
+  eRENDERDOC_Key_Delete,
+  eRENDERDOC_Key_PageUp,
+  eRENDERDOC_Key_PageDn,
+
+  eRENDERDOC_Key_Backspace,
+  eRENDERDOC_Key_Tab,
+  eRENDERDOC_Key_PrtScrn,
+  eRENDERDOC_Key_Pause,
+
+  eRENDERDOC_Key_Max,
+} RENDERDOC_InputButton;
+
+// Sets which key or keys can be used to toggle focus between multiple windows
+//
+// If keys is NULL or num is 0, toggle keys will be disabled
+typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num);
+
+// Sets which key or keys can be used to capture the next frame
+//
+// If keys is NULL or num is 0, captures keys will be disabled
+typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num);
+
+typedef enum RENDERDOC_OverlayBits {
+  // This single bit controls whether the overlay is enabled or disabled globally
+  eRENDERDOC_Overlay_Enabled = 0x1,
+
+  // Show the average framerate over several seconds as well as min/max
+  eRENDERDOC_Overlay_FrameRate = 0x2,
+
+  // Show the current frame number
+  eRENDERDOC_Overlay_FrameNumber = 0x4,
+
+  // Show a list of recent captures, and how many captures have been made
+  eRENDERDOC_Overlay_CaptureList = 0x8,
+
+  // Default values for the overlay mask
+  eRENDERDOC_Overlay_Default = (eRENDERDOC_Overlay_Enabled | eRENDERDOC_Overlay_FrameRate |
+                                eRENDERDOC_Overlay_FrameNumber | eRENDERDOC_Overlay_CaptureList),
+
+  // Enable all bits
+  eRENDERDOC_Overlay_All = ~0U,
+
+  // Disable all bits
+  eRENDERDOC_Overlay_None = 0,
+} RENDERDOC_OverlayBits;
+
+// returns the overlay bits that have been set
+typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetOverlayBits)();
+// sets the overlay bits with an and & or mask
+typedef void(RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or);
+
+// this function will attempt to remove RenderDoc's hooks in the application.
+//
+// Note: that this can only work correctly if done immediately after
+// the module is loaded, before any API work happens. RenderDoc will remove its
+// injected hooks and shut down. Behaviour is undefined if this is called
+// after any API functions have been called, and there is still no guarantee of
+// success.
+typedef void(RENDERDOC_CC *pRENDERDOC_RemoveHooks)();
+
+// DEPRECATED: compatibility for code compiled against pre-1.4.1 headers.
+typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown;
+
+// This function will unload RenderDoc's crash handler.
+//
+// If you use your own crash handler and don't want RenderDoc's handler to
+// intercede, you can call this function to unload it and any unhandled
+// exceptions will pass to the next handler.
+typedef void(RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)();
+
+// Sets the capture file path template
+//
+// pathtemplate is a UTF-8 string that gives a template for how captures will be named
+// and where they will be saved.
+//
+// Any extension is stripped off the path, and captures are saved in the directory
+// specified, and named with the filename and the frame number appended. If the
+// directory does not exist it will be created, including any parent directories.
+//
+// If pathtemplate is NULL, the template will remain unchanged
+//
+// Example:
+//
+// SetCaptureFilePathTemplate("my_captures/example");
+//
+// Capture #1 -> my_captures/example_frame123.rdc
+// Capture #2 -> my_captures/example_frame456.rdc
+typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFilePathTemplate)(const char *pathtemplate);
+
+// returns the current capture path template, see SetCaptureFileTemplate above, as a UTF-8 string
+typedef const char *(RENDERDOC_CC *pRENDERDOC_GetCaptureFilePathTemplate)();
+
+// DEPRECATED: compatibility for code compiled against pre-1.1.2 headers.
+typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathTemplate;
+typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathTemplate;
+
+// returns the number of captures that have been made
+typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetNumCaptures)();
+
+// This function returns the details of a capture, by index. New captures are added
+// to the end of the list.
+//
+// filename will be filled with the absolute path to the capture file, as a UTF-8 string
+// pathlength will be written with the length in bytes of the filename string
+// timestamp will be written with the time of the capture, in seconds since the Unix epoch
+//
+// Any of the parameters can be NULL and they'll be skipped.
+//
+// The function will return 1 if the capture index is valid, or 0 if the index is invalid
+// If the index is invalid, the values will be unchanged
+//
+// Note: when captures are deleted in the UI they will remain in this list, so the
+// capture path may not exist anymore.
+typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *filename,
+                                                      uint32_t *pathlength, uint64_t *timestamp);
+
+// Sets the comments associated with a capture file. These comments are displayed in the
+// UI program when opening.
+//
+// filePath should be a path to the capture file to add comments to. If set to NULL or ""
+// the most recent capture file created made will be used instead.
+// comments should be a NULL-terminated UTF-8 string to add as comments.
+//
+// Any existing comments will be overwritten.
+typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFileComments)(const char *filePath,
+                                                              const char *comments);
+
+// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise
+typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsTargetControlConnected)();
+
+// DEPRECATED: compatibility for code compiled against pre-1.1.1 headers.
+// This was renamed to IsTargetControlConnected in API 1.1.1, the old typedef is kept here for
+// backwards compatibility with old code, it is castable either way since it's ABI compatible
+// as the same function pointer type.
+typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessConnected;
+
+// This function will launch the Replay UI associated with the RenderDoc library injected
+// into the running application.
+//
+// if connectTargetControl is 1, the Replay UI will be launched with a command line parameter
+// to connect to this application
+// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open
+// if cmdline is NULL, the command line will be empty.
+//
+// returns the PID of the replay UI if successful, 0 if not successful.
+typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTargetControl,
+                                                          const char *cmdline);
+
+// RenderDoc can return a higher version than requested if it's backwards compatible,
+// this function returns the actual version returned. If a parameter is NULL, it will be
+// ignored and the others will be filled out.
+typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch);
+
+//////////////////////////////////////////////////////////////////////////
+// Capturing functions
+//
+
+// A device pointer is a pointer to the API's root handle.
+//
+// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc
+typedef void *RENDERDOC_DevicePointer;
+
+// A window handle is the OS's native window handle
+//
+// This would be an HWND, GLXDrawable, etc
+typedef void *RENDERDOC_WindowHandle;
+
+// A helper macro for Vulkan, where the device handle cannot be used directly.
+//
+// Passing the VkInstance to this macro will return the RENDERDOC_DevicePointer to use.
+//
+// Specifically, the value needed is the dispatch table pointer, which sits as the first
+// pointer-sized object in the memory pointed to by the VkInstance. Thus we cast to a void** and
+// indirect once.
+#define RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(inst) (*((void **)(inst)))
+
+// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will
+// respond to keypresses. Neither parameter can be NULL
+typedef void(RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device,
+                                                       RENDERDOC_WindowHandle wndHandle);
+
+// capture the next frame on whichever window and API is currently considered active
+typedef void(RENDERDOC_CC *pRENDERDOC_TriggerCapture)();
+
+// capture the next N frames on whichever window and API is currently considered active
+typedef void(RENDERDOC_CC *pRENDERDOC_TriggerMultiFrameCapture)(uint32_t numFrames);
+
+// When choosing either a device pointer or a window handle to capture, you can pass NULL.
+// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify
+// any API rendering to a specific window, or a specific API instance rendering to any window,
+// or in the simplest case of one window and one API, you can just pass NULL for both.
+//
+// In either case, if there are two or more possible matching (device,window) pairs it
+// is undefined which one will be captured.
+//
+// Note: for headless rendering you can pass NULL for the window handle and either specify
+// a device pointer or leave it NULL as above.
+
+// Immediately starts capturing API calls on the specified device pointer and window handle.
+//
+// If there is no matching thing to capture (e.g. no supported API has been initialised),
+// this will do nothing.
+//
+// The results are undefined (including crashes) if two captures are started overlapping,
+// even on separate devices and/oror windows.
+typedef void(RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device,
+                                                         RENDERDOC_WindowHandle wndHandle);
+
+// Returns whether or not a frame capture is currently ongoing anywhere.
+//
+// This will return 1 if a capture is ongoing, and 0 if there is no capture running
+typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)();
+
+// Ends capturing immediately.
+//
+// This will return 1 if the capture succeeded, and 0 if there was an error capturing.
+typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device,
+                                                           RENDERDOC_WindowHandle wndHandle);
+
+// Ends capturing immediately and discard any data stored without saving to disk.
+//
+// This will return 1 if the capture was discarded, and 0 if there was an error or no capture
+// was in progress
+typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device,
+                                                               RENDERDOC_WindowHandle wndHandle);
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// RenderDoc API versions
+//
+
+// RenderDoc uses semantic versioning (http://semver.org/).
+//
+// MAJOR version is incremented when incompatible API changes happen.
+// MINOR version is incremented when functionality is added in a backwards-compatible manner.
+// PATCH version is incremented when backwards-compatible bug fixes happen.
+//
+// Note that this means the API returned can be higher than the one you might have requested.
+// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned
+// instead of 1.0.0. You can check this with the GetAPIVersion entry point
+typedef enum RENDERDOC_Version {
+  eRENDERDOC_API_Version_1_0_0 = 10000,    // RENDERDOC_API_1_0_0 = 1 00 00
+  eRENDERDOC_API_Version_1_0_1 = 10001,    // RENDERDOC_API_1_0_1 = 1 00 01
+  eRENDERDOC_API_Version_1_0_2 = 10002,    // RENDERDOC_API_1_0_2 = 1 00 02
+  eRENDERDOC_API_Version_1_1_0 = 10100,    // RENDERDOC_API_1_1_0 = 1 01 00
+  eRENDERDOC_API_Version_1_1_1 = 10101,    // RENDERDOC_API_1_1_1 = 1 01 01
+  eRENDERDOC_API_Version_1_1_2 = 10102,    // RENDERDOC_API_1_1_2 = 1 01 02
+  eRENDERDOC_API_Version_1_2_0 = 10200,    // RENDERDOC_API_1_2_0 = 1 02 00
+  eRENDERDOC_API_Version_1_3_0 = 10300,    // RENDERDOC_API_1_3_0 = 1 03 00
+  eRENDERDOC_API_Version_1_4_0 = 10400,    // RENDERDOC_API_1_4_0 = 1 04 00
+  eRENDERDOC_API_Version_1_4_1 = 10401,    // RENDERDOC_API_1_4_1 = 1 04 01
+} RENDERDOC_Version;
+
+// API version changelog:
+//
+// 1.0.0 - initial release
+// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered
+//         by keypress or TriggerCapture, instead of Start/EndFrameCapture.
+// 1.0.2 - Refactor: Renamed eRENDERDOC_Option_DebugDeviceMode to eRENDERDOC_Option_APIValidation
+// 1.1.0 - Add feature: TriggerMultiFrameCapture(). Backwards compatible with 1.0.x since the new
+//         function pointer is added to the end of the struct, the original layout is identical
+// 1.1.1 - Refactor: Renamed remote access to target control (to better disambiguate from remote
+//         replay/remote server concept in replay UI)
+// 1.1.2 - Refactor: Renamed "log file" in function names to just capture, to clarify that these
+//         are captures and not debug logging files. This is the first API version in the v1.0
+//         branch.
+// 1.2.0 - Added feature: SetCaptureFileComments() to add comments to a capture file that will be
+//         displayed in the UI program on load.
+// 1.3.0 - Added feature: New capture option eRENDERDOC_Option_AllowUnsupportedVendorExtensions
+//         which allows users to opt-in to allowing unsupported vendor extensions to function.
+//         Should be used at the user's own risk.
+//         Refactor: Renamed eRENDERDOC_Option_VerifyMapWrites to
+//         eRENDERDOC_Option_VerifyBufferAccess, which now also controls initialisation to
+//         0xdddddddd of uninitialised buffer contents.
+// 1.4.0 - Added feature: DiscardFrameCapture() to discard a frame capture in progress and stop
+//         capturing without saving anything to disk.
+// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening
+
+typedef struct RENDERDOC_API_1_4_1
+{
+  pRENDERDOC_GetAPIVersion GetAPIVersion;
+
+  pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32;
+  pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32;
+
+  pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32;
+  pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32;
+
+  pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys;
+  pRENDERDOC_SetCaptureKeys SetCaptureKeys;
+
+  pRENDERDOC_GetOverlayBits GetOverlayBits;
+  pRENDERDOC_MaskOverlayBits MaskOverlayBits;
+
+  // Shutdown was renamed to RemoveHooks in 1.4.1.
+  // These unions allow old code to continue compiling without changes
+  union
+  {
+    pRENDERDOC_Shutdown Shutdown;
+    pRENDERDOC_RemoveHooks RemoveHooks;
+  };
+  pRENDERDOC_UnloadCrashHandler UnloadCrashHandler;
+
+  // Get/SetLogFilePathTemplate was renamed to Get/SetCaptureFilePathTemplate in 1.1.2.
+  // These unions allow old code to continue compiling without changes
+  union
+  {
+    // deprecated name
+    pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate;
+    // current name
+    pRENDERDOC_SetCaptureFilePathTemplate SetCaptureFilePathTemplate;
+  };
+  union
+  {
+    // deprecated name
+    pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate;
+    // current name
+    pRENDERDOC_GetCaptureFilePathTemplate GetCaptureFilePathTemplate;
+  };
+
+  pRENDERDOC_GetNumCaptures GetNumCaptures;
+  pRENDERDOC_GetCapture GetCapture;
+
+  pRENDERDOC_TriggerCapture TriggerCapture;
+
+  // IsRemoteAccessConnected was renamed to IsTargetControlConnected in 1.1.1.
+  // This union allows old code to continue compiling without changes
+  union
+  {
+    // deprecated name
+    pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected;
+    // current name
+    pRENDERDOC_IsTargetControlConnected IsTargetControlConnected;
+  };
+  pRENDERDOC_LaunchReplayUI LaunchReplayUI;
+
+  pRENDERDOC_SetActiveWindow SetActiveWindow;
+
+  pRENDERDOC_StartFrameCapture StartFrameCapture;
+  pRENDERDOC_IsFrameCapturing IsFrameCapturing;
+  pRENDERDOC_EndFrameCapture EndFrameCapture;
+
+  // new function in 1.1.0
+  pRENDERDOC_TriggerMultiFrameCapture TriggerMultiFrameCapture;
+
+  // new function in 1.2.0
+  pRENDERDOC_SetCaptureFileComments SetCaptureFileComments;
+
+  // new function in 1.4.0
+  pRENDERDOC_DiscardFrameCapture DiscardFrameCapture;
+} RENDERDOC_API_1_4_1;
+
+typedef RENDERDOC_API_1_4_1 RENDERDOC_API_1_0_0;
+typedef RENDERDOC_API_1_4_1 RENDERDOC_API_1_0_1;
+typedef RENDERDOC_API_1_4_1 RENDERDOC_API_1_0_2;
+typedef RENDERDOC_API_1_4_1 RENDERDOC_API_1_1_0;
+typedef RENDERDOC_API_1_4_1 RENDERDOC_API_1_1_1;
+typedef RENDERDOC_API_1_4_1 RENDERDOC_API_1_1_2;
+typedef RENDERDOC_API_1_4_1 RENDERDOC_API_1_2_0;
+typedef RENDERDOC_API_1_4_1 RENDERDOC_API_1_3_0;
+typedef RENDERDOC_API_1_4_1 RENDERDOC_API_1_4_0;
+
+//////////////////////////////////////////////////////////////////////////////////////////////////
+// RenderDoc API entry point
+//
+// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available.
+//
+// The name is the same as the typedef - "RENDERDOC_GetAPI"
+//
+// This function is not thread safe, and should not be called on multiple threads at once.
+// Ideally, call this once as early as possible in your application's startup, before doing
+// any API work, since some configuration functionality etc has to be done also before
+// initialising any APIs.
+//
+// Parameters:
+//   version is a single value from the RENDERDOC_Version above.
+//
+//   outAPIPointers will be filled out with a pointer to the corresponding struct of function
+//   pointers.
+//
+// Returns:
+//   1 - if the outAPIPointers has been filled with a pointer to the API struct requested
+//   0 - if the requested version is not supported or the arguments are invalid.
+//
+typedef int(RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers);
+
+#ifdef __cplusplus
+}    // extern "C"
+#endif
--- a/include/private/vkd3d_atomic.h
+++ b/include/private/vkd3d_atomic.h
@ -0,0 +1,283 @@
+/*
+ * Copyright 2020 Joshua Ashton for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_ATOMIC_H
+#define __VKD3D_ATOMIC_H
+
+#include <stdint.h>
+
+#if defined(_MSC_VER)
+
+# include <intrin.h>
+
+typedef enum
+{
+    vkd3d_memory_order_relaxed,
+    vkd3d_memory_order_consume,
+    vkd3d_memory_order_acquire,
+    vkd3d_memory_order_release,
+    vkd3d_memory_order_acq_rel,
+    vkd3d_memory_order_seq_cst,
+} vkd3d_memory_order;
+
+# define vkd3d_atomic_rw_barrier() _ReadWriteBarrier()
+
+FORCEINLINE void vkd3d_atomic_load_barrier(vkd3d_memory_order order)
+{
+    switch (order)
+    {
+        case vkd3d_memory_order_consume:
+        case vkd3d_memory_order_acquire:
+        case vkd3d_memory_order_seq_cst:
+            vkd3d_atomic_rw_barrier();
+            break;
+
+        case vkd3d_memory_order_relaxed:
+        default:
+            break;
+    }
+}
+
+/* Redefinitions for invalid memory orders */
+#define InterlockedExchangeRelease     InterlockedExchange
+#define InterlockedExchangeRelease64   InterlockedExchange64
+
+#define vkd3d_atomic_choose_intrinsic(order, result, intrinsic, suffix, ...)                        \
+    switch (order)                                                                                  \
+    {                                                                                               \
+        case vkd3d_memory_order_relaxed: result = intrinsic##NoFence##suffix  (__VA_ARGS__); break; \
+        case vkd3d_memory_order_consume:                                                            \
+        case vkd3d_memory_order_acquire: result = intrinsic##Acquire##suffix (__VA_ARGS__); break;  \
+        case vkd3d_memory_order_release: result = intrinsic##Release##suffix (__VA_ARGS__); break;  \
+        case vkd3d_memory_order_acq_rel:                                                            \
+        case vkd3d_memory_order_seq_cst: result = intrinsic##suffix          (__VA_ARGS__); break;  \
+    }
+
+FORCEINLINE uint32_t vkd3d_atomic_uint32_load_explicit(uint32_t *target, vkd3d_memory_order order)
+{
+    uint32_t value = *((volatile uint32_t*)target);
+    vkd3d_atomic_load_barrier(order);
+    return value;
+}
+
+FORCEINLINE void vkd3d_atomic_uint32_store_explicit(uint32_t *target, uint32_t value, vkd3d_memory_order order)
+{
+    switch (order)
+    {
+        case vkd3d_memory_order_release: vkd3d_atomic_rw_barrier(); /* fallthrough */
+        case vkd3d_memory_order_relaxed: *((volatile uint32_t*)target) = value; break;
+        default:
+        case vkd3d_memory_order_seq_cst:
+            (void) InterlockedExchange((LONG*) target, value);
+    }
+}
+
+FORCEINLINE uint32_t vkd3d_atomic_uint32_exchange_explicit(uint32_t *target, uint32_t value, vkd3d_memory_order order)
+{
+    uint32_t result;
+    vkd3d_atomic_choose_intrinsic(order, result, InterlockedExchange, /* no suffix */,(LONG*)target, value);
+    return result;
+}
+
+FORCEINLINE uint32_t vkd3d_atomic_uint32_increment(uint32_t *target, vkd3d_memory_order order)
+{
+    uint32_t result;
+    vkd3d_atomic_choose_intrinsic(order, result, InterlockedIncrement, /* no suffix */,(LONG*)target);
+    return result;
+}
+
+FORCEINLINE uint32_t vkd3d_atomic_uint32_decrement(uint32_t *target, vkd3d_memory_order order)
+{
+    uint32_t result;
+    vkd3d_atomic_choose_intrinsic(order, result, InterlockedDecrement, /* no suffix */,(LONG*)target);
+    return result;
+}
+
+FORCEINLINE uint32_t vkd3d_atomic_uint32_add(uint32_t *target, uint32_t value, vkd3d_memory_order order)
+{
+    uint32_t result;
+    vkd3d_atomic_choose_intrinsic(order, result, InterlockedAdd, /* no suffix */,(LONG*)target, value);
+    return result;
+}
+
+FORCEINLINE uint32_t vkd3d_atomic_uint32_sub(uint32_t *target, uint32_t value, vkd3d_memory_order order)
+{
+    uint32_t result;
+    vkd3d_atomic_choose_intrinsic(order, result, InterlockedAdd, /* no suffix */,(LONG*)target, (uint32_t)(-(int32_t)value));
+    return result;
+}
+
+FORCEINLINE uint32_t vkd3d_atomic_uint32_and(uint32_t *target, uint32_t value, vkd3d_memory_order order)
+{
+    uint32_t result;
+    vkd3d_atomic_choose_intrinsic(order, result, InterlockedAnd, /* no suffix */,(LONG*)target, value);
+    return result;
+}
+
+FORCEINLINE uint32_t vkd3d_atomic_uint32_or(uint32_t *target, uint32_t value, vkd3d_memory_order order)
+{
+    uint32_t result;
+    vkd3d_atomic_choose_intrinsic(order, result, InterlockedOr, /* no suffix */,(LONG*)target, value);
+    return result;
+}
+
+FORCEINLINE uint32_t vkd3d_atomic_uint32_compare_exchange(uint32_t* target, uint32_t expected, uint32_t desired,
+        vkd3d_memory_order success_order, vkd3d_memory_order fail_order)
+{
+    uint32_t result;
+    /* InterlockedCompareExchange has desired (ExChange) first, then expected (Comperand) */
+    vkd3d_atomic_choose_intrinsic(success_order, result, InterlockedCompareExchange, /* no suffix */, (LONG*)target, desired, expected);
+    return result;
+}
+
+FORCEINLINE uint64_t vkd3d_atomic_uint64_load_explicit(uint64_t *target, vkd3d_memory_order order)
+{
+    uint64_t value = *((volatile uint64_t*)target);
+    vkd3d_atomic_load_barrier(order);
+    return value;
+}
+
+FORCEINLINE void vkd3d_atomic_uint64_store_explicit(uint64_t *target, uint64_t value, vkd3d_memory_order order)
+{
+    switch (order)
+    {
+        case vkd3d_memory_order_release: vkd3d_atomic_rw_barrier(); /* fallthrough */
+        case vkd3d_memory_order_relaxed: *((volatile uint64_t*)target) = value; break;
+        default:
+        case vkd3d_memory_order_seq_cst:
+            (void) InterlockedExchange64((LONG64*) target, value);
+    }
+}
+
+FORCEINLINE uint64_t vkd3d_atomic_uint64_exchange_explicit(uint64_t *target, uint64_t value, vkd3d_memory_order order)
+{
+    uint64_t result;
+    vkd3d_atomic_choose_intrinsic(order, result, InterlockedExchange, 64, (LONG64*)target, value);
+    return result;
+}
+
+FORCEINLINE uint64_t vkd3d_atomic_uint64_increment(uint64_t *target, vkd3d_memory_order order)
+{
+    uint64_t result;
+    vkd3d_atomic_choose_intrinsic(order, result, InterlockedIncrement, 64, (LONG64*)target);
+    return result;
+}
+
+FORCEINLINE uint64_t vkd3d_atomic_uint64_decrement(uint64_t *target, vkd3d_memory_order order)
+{
+    uint64_t result;
+    vkd3d_atomic_choose_intrinsic(order, result, InterlockedDecrement, 64, (LONG64*)target);
+    return result;
+}
+
+FORCEINLINE uint64_t vkd3d_atomic_uint64_compare_exchange(UINT64* target, uint64_t expected, uint64_t desired,
+        vkd3d_memory_order success_order, vkd3d_memory_order fail_order)
+{
+    uint64_t result;
+    /* InterlockedCompareExchange has desired (ExChange) first, then expected (Comperand). Use UINT64 to mark 8-byte alignment. */
+    vkd3d_atomic_choose_intrinsic(success_order, result, InterlockedCompareExchange, 64, (LONG64*)target, desired, expected);
+    return result;
+}
+
+#elif defined(__GNUC__) || defined(__clang__)
+
+typedef enum
+{
+    vkd3d_memory_order_relaxed = __ATOMIC_RELAXED,
+    vkd3d_memory_order_consume = __ATOMIC_CONSUME,
+    vkd3d_memory_order_acquire = __ATOMIC_ACQUIRE,
+    vkd3d_memory_order_release = __ATOMIC_RELEASE,
+    vkd3d_memory_order_acq_rel = __ATOMIC_ACQ_REL,
+    vkd3d_memory_order_seq_cst = __ATOMIC_SEQ_CST,
+} vkd3d_memory_order;
+
+# define vkd3d_atomic_generic_load_explicit(target, order)            __atomic_load_n(target, order)
+# define vkd3d_atomic_generic_store_explicit(target, value, order)    __atomic_store_n(target, value, order)
+# define vkd3d_atomic_generic_exchange_explicit(target, value, order) __atomic_exchange_n(target, value, order)
+# define vkd3d_atomic_generic_increment(target, order)                __atomic_add_fetch(target, 1, order)
+# define vkd3d_atomic_generic_decrement(target, order)                __atomic_sub_fetch(target, 1, order)
+# define vkd3d_atomic_generic_add(target, value, order)               __atomic_add_fetch(target, value, order)
+# define vkd3d_atomic_generic_sub(target, value, order)               __atomic_sub_fetch(target, value, order)
+# define vkd3d_atomic_generic_and(target, value, order)               __atomic_and_fetch(target, value, order)
+# define vkd3d_atomic_generic_or(target, value, order)                __atomic_or_fetch(target, value, order)
+
+# define vkd3d_atomic_uint32_load_explicit(target, order)            vkd3d_atomic_generic_load_explicit(target, order)
+# define vkd3d_atomic_uint32_store_explicit(target, value, order)    vkd3d_atomic_generic_store_explicit(target, value, order)
+# define vkd3d_atomic_uint32_exchange_explicit(target, value, order) vkd3d_atomic_generic_exchange_explicit(target, value, order)
+# define vkd3d_atomic_uint32_increment(target, order)                vkd3d_atomic_generic_increment(target, order)
+# define vkd3d_atomic_uint32_decrement(target, order)                vkd3d_atomic_generic_decrement(target, order)
+# define vkd3d_atomic_uint32_add(target, value, order)               vkd3d_atomic_generic_add(target, value, order)
+# define vkd3d_atomic_uint32_sub(target, value, order)               vkd3d_atomic_generic_sub(target, value, order)
+# define vkd3d_atomic_uint32_and(target, value, order)               vkd3d_atomic_generic_and(target, value, order)
+# define vkd3d_atomic_uint32_or(target, value, order)                vkd3d_atomic_generic_or(target, value, order)
+static inline uint32_t vkd3d_atomic_uint32_compare_exchange(uint32_t* target, uint32_t expected, uint32_t desired,
+        vkd3d_memory_order success_order, vkd3d_memory_order fail_order)
+{
+    /* Expected is written to with the old value in the case that *target != expected */
+    __atomic_compare_exchange_n(target, &expected, desired, 0, success_order, fail_order);
+    return expected;
+}
+
+# define vkd3d_atomic_uint64_load_explicit(target, order)            vkd3d_atomic_generic_load_explicit(target, order)
+# define vkd3d_atomic_uint64_store_explicit(target, value, order)    vkd3d_atomic_generic_store_explicit(target, value, order)
+# define vkd3d_atomic_uint64_exchange_explicit(target, value, order) vkd3d_atomic_generic_exchange_explicit(target, value, order)
+# define vkd3d_atomic_uint64_increment(target, order)                vkd3d_atomic_generic_increment(target, order)
+# define vkd3d_atomic_uint64_decrement(target, order)                vkd3d_atomic_generic_decrement(target, order)
+static inline uint64_t vkd3d_atomic_uint64_compare_exchange(UINT64* target, uint64_t expected, uint64_t desired,
+        vkd3d_memory_order success_order, vkd3d_memory_order fail_order)
+{
+    /* Expected is written to with the old value in the case that *target != expected. Use UINT64 to mark 8-byte alignment. */
+    __atomic_compare_exchange_n(target, &expected, desired, 0, success_order, fail_order);
+    return expected;
+}
+
+# ifndef __MINGW32__
+#  define InterlockedIncrement(target)                            vkd3d_atomic_uint32_increment(target, vkd3d_memory_order_seq_cst)
+#  define InterlockedDecrement(target)                            vkd3d_atomic_uint32_decrement(target, vkd3d_memory_order_seq_cst)
+#  define InterlockedCompareExchange(target, desired, expected)   vkd3d_atomic_uint32_compare_exchange(target, expected, desired, vkd3d_memory_order_seq_cst, vkd3d_memory_order_acquire)
+
+#  define InterlockedIncrement64(target)                          vkd3d_atomic_uint64_increment(target, vkd3d_memory_order_seq_cst)
+#  define InterlockedDecrement64(target)                          vkd3d_atomic_uint64_decrement(target, vkd3d_memory_order_seq_cst)
+#  define InterlockedCompareExchange64(target, desired, expected) vkd3d_atomic_uint64_compare_exchange(target, expected, desired, vkd3d_memory_order_seq_cst, vkd3d_memory_order_acquire)
+# endif
+
+#else
+
+# error "No atomics for this platform"
+
+#endif
+
+#if INTPTR_MAX == INT64_MAX
+# define vkd3d_atomic_ptr_load_explicit(target, order)                       ((void *)vkd3d_atomic_uint64_load_explicit((uint64_t *)target, order))
+# define vkd3d_atomic_ptr_store_explicit(target, value, order)               (vkd3d_atomic_uint64_store_explicit((uint64_t *)target, (uint64_t)value, order))
+# define vkd3d_atomic_ptr_exchange_explicit(target, value, order)            ((void *)vkd3d_atomic_uint64_exchange_explicit((uint64_t *)target, (uint64_t)value, order))
+# define vkd3d_atomic_ptr_increment(target, order)                           ((void *)vkd3d_atomic_uint64_increment((uint64_t *)target, order))
+# define vkd3d_atomic_ptr_decrement(target, order)                           ((void *)vkd3d_atomic_uint64_decrement((uint64_t *)target, order))
+# define vkd3d_atomic_ptr_compare_exchange(target, expected, desired, success_order, fail_order) \
+        ((void *)vkd3d_atomic_uint64_compare_exchange((UINT64 *)target, (uint64_t)expected, (uint64_t)desired, success_order, fail_order))
+#else
+# define vkd3d_atomic_ptr_load_explicit(target, order)                       ((void *)vkd3d_atomic_uint32_load_explicit((uint32_t *)target, order))
+# define vkd3d_atomic_ptr_store_explicit(target, value, order)               (vkd3d_atomic_uint32_store_explicit((uint32_t *)target, (uint32_t)value, order))
+# define vkd3d_atomic_ptr_exchange_explicit(target, value, order)            ((void *)vkd3d_atomic_uint32_exchange_explicit((uint32_t *)target, (uint32_t)value, order))
+# define vkd3d_atomic_ptr_increment(target, order)                           ((void *)vkd3d_atomic_uint32_increment((uint32_t *)target, order))
+# define vkd3d_atomic_ptr_decrement(target, order)                           ((void *)vkd3d_atomic_uint32_decrement((uint32_t *)target, order))
+# define vkd3d_atomic_ptr_compare_exchange(target, expected, desired, success_order, fail_order) \
+        ((void *)vkd3d_atomic_uint32_compare_exchange((uint32_t *)target, (uint32_t)expected, (uint32_t)desired, success_order, fail_order))
+#endif
+
+#endif
--- a/include/private/vkd3d_common.h
+++ b/include/private/vkd3d_common.h
@ -19,15 +19,20 @@
 #ifndef __VKD3D_COMMON_H
 #define __VKD3D_COMMON_H

-#include "config.h"
 #include "vkd3d_windows.h"
+#include "vkd3d_spinlock.h"
+#include "vkd3d_profiling.h"

 #include <ctype.h>
+#include <stdint.h>
 #include <limits.h>
 #include <stdbool.h>
+#include <assert.h>

 #ifdef _MSC_VER
 #include <intrin.h>
+#else
+#include <time.h>
 #endif

 #ifndef ARRAY_SIZE
@ -40,8 +45,15 @@

 #define MEMBER_SIZE(t, m) sizeof(((t *)0)->m)

+static inline uint64_t align64(uint64_t addr, uint64_t alignment)
+{
+    assert(alignment > 0 && (alignment & (alignment - 1)) == 0);
+    return (addr + (alignment - 1)) & ~(alignment - 1);
+}
+
 static inline size_t align(size_t addr, size_t alignment)
 {
+    assert(alignment > 0 && (alignment & (alignment - 1)) == 0);
    return (addr + (alignment - 1)) & ~(alignment - 1);
 }

@ -57,7 +69,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v)
 {
 #ifdef _MSC_VER
    return __popcnt(v);
-#elif defined(HAVE_BUILTIN_POPCOUNT)
+#elif defined(__GNUC__) || defined(__clang__)
    return __builtin_popcount(v);
 #else
    v -= (v >> 1) & 0x55555555;
@ -81,6 +93,87 @@ static inline bool vkd3d_bitmask_is_contiguous(unsigned int mask)
    return vkd3d_popcount(mask) == j;
 }

+/* Returns 64 for mask == 0 */
+static inline unsigned int vkd3d_bitmask_tzcnt64(uint64_t mask)
+{
+#ifdef _MSC_VER
+    unsigned long result;
+#ifdef _WIN64
+    return _BitScanForward64(&result, mask) ? result : 64;
+#else
+    uint32_t lower, upper;
+    lower = (uint32_t)mask;
+    upper = (uint32_t)(mask >> 32);
+    if (_BitScanForward(&result, lower))
+        return result;
+    else if (_BitScanForward(&result, upper))
+        return result + 32;
+    else
+        return 64;
+#endif
+#elif defined(__GNUC__) || defined(__clang__)
+    return mask ? __builtin_ctzll(mask) : 64;
+#else
+    #error "No implementation for ctzll."
+#endif
+}
+
+/* Returns 32 for mask == 0 */
+static inline unsigned int vkd3d_bitmask_tzcnt32(uint32_t mask)
+{
+#ifdef _MSC_VER
+    unsigned long result;
+    return _BitScanForward(&result, mask) ? result : 32;
+#elif defined(__GNUC__) || defined(__clang__)
+    return mask ? __builtin_ctz(mask) : 32;
+#else
+    #error "No implementation for ctz."
+#endif
+}
+
+/* find least significant bit, then remove that bit from mask */
+static inline unsigned int vkd3d_bitmask_iter64(uint64_t* mask)
+{
+    uint64_t cur_mask = *mask;
+    *mask = cur_mask & (cur_mask - 1);
+    return vkd3d_bitmask_tzcnt64(cur_mask);
+}
+
+static inline unsigned int vkd3d_bitmask_iter32(uint32_t *mask)
+{
+    uint32_t cur_mask = *mask;
+    *mask = cur_mask & (cur_mask - 1);
+    return vkd3d_bitmask_tzcnt32(cur_mask);
+}
+
+struct vkd3d_bitmask_range
+{
+    unsigned int offset;
+    unsigned int count;
+};
+
+static inline struct vkd3d_bitmask_range vkd3d_bitmask_iter32_range(uint32_t *mask)
+{
+    struct vkd3d_bitmask_range range;
+    uint32_t tmp;
+
+    if (*mask == ~0u)
+    {
+        range.offset = 0;
+        range.count = 32;
+        *mask = 0u;
+    }
+    else
+    {
+        range.offset = vkd3d_bitmask_tzcnt32(*mask);
+        tmp = *mask >> range.offset;
+        range.count = vkd3d_bitmask_tzcnt32(~tmp);
+        *mask &= ~(((1u << range.count) - 1u) << range.offset);
+    }
+
+    return range;
+}
+
 /* Undefined for x == 0. */
 static inline unsigned int vkd3d_log2i(unsigned int x)
 {
@ -90,7 +183,7 @@ static inline unsigned int vkd3d_log2i(unsigned int x)
    unsigned long result;
    _BitScanReverse(&result, x);
    return (unsigned int)result;
-#elif defined(HAVE_BUILTIN_CLZ)
+#elif defined(__GNUC__) || defined(__clang__)
    return __builtin_clz(x) ^ 0x1f;
 #else
    static const unsigned int l[] =
@ -119,6 +212,14 @@ static inline unsigned int vkd3d_log2i(unsigned int x)
 #endif
 }

+static inline unsigned int vkd3d_log2i_ceil(unsigned int x)
+{
+    if (x == 1)
+        return 0;
+    else
+        return vkd3d_log2i(x - 1) + 1;
+}
+
 static inline int ascii_isupper(int c)
 {
    return 'A' <= c && c <= 'Z';
@ -142,45 +243,87 @@ static inline int ascii_strcasecmp(const char *a, const char *b)
    return c_a - c_b;
 }

-#ifndef _WIN32
-# if HAVE_SYNC_ADD_AND_FETCH
-static inline LONG InterlockedIncrement(LONG volatile *x)
+static inline bool is_power_of_two(unsigned int x)
 {
-    return __sync_add_and_fetch(x, 1);
+    return x && !(x & (x -1));
 }
-# else
-#  error "InterlockedIncrement() not implemented for this platform"
-# endif  /* HAVE_SYNC_ADD_AND_FETCH */

-# if HAVE_SYNC_SUB_AND_FETCH
-static inline LONG InterlockedDecrement(LONG volatile *x)
+static inline void vkd3d_parse_version(const char *version, int *major, int *minor, int *patch)
 {
-    return __sync_sub_and_fetch(x, 1);
-}
-# else
-#  error "InterlockedDecrement() not implemented for this platform"
-# endif
-#endif  /* _WIN32 */
+    char *end;

-#if HAVE_SYNC_ADD_AND_FETCH
-# define atomic_add_fetch(ptr, val) __sync_add_and_fetch(ptr, val)
-#elif defined(_MSC_VER)
-/* InterlockedAdd returns value after increment, like add_and_fetch. */
-# define atomic_add_fetch(ptr, val) InterlockedAdd(ptr, val)
-#else
-# error "atomic_add_fetch() not implemented for this platform"
-#endif  /* HAVE_SYNC_ADD_AND_FETCH */
-
-static inline void vkd3d_parse_version(const char *version, int *major, int *minor)
-{
-    *major = atoi(version);
-
-    while (isdigit(*version))
-        ++version;
+    *major = strtol(version, &end, 10);
+    version = end;
    if (*version == '.')
        ++version;
+    *minor = strtol(version, &end, 10);
+    version = end;
+    if (*version == '.')
+        ++version;
+    *patch = strtol(version, NULL, 10);
+}

-    *minor = atoi(version);
+static inline uint32_t float_bits_to_uint32(float f)
+{
+    uint32_t u;
+    memcpy(&u, &f, sizeof(u));
+    return u;
+}
+
+static inline size_t vkd3d_wcslen(const WCHAR *wstr)
+{
+    size_t length = 0;
+
+    while (true)
+    {
+        if (!wstr[length])
+            return length;
+
+        length += 1;
+    }
+}
+
+static inline void *void_ptr_offset(void *ptr, size_t offset)
+{
+    return ((char*)ptr) + offset;
+}
+
+#ifdef _MSC_VER
+#define VKD3D_THREAD_LOCAL __declspec(thread)
+#else
+#define VKD3D_THREAD_LOCAL __thread
+#endif
+
+static inline uint64_t vkd3d_get_current_time_ns(void)
+{
+#ifdef _WIN32
+    LARGE_INTEGER li, lf;
+    uint64_t whole, part;
+    QueryPerformanceCounter(&li);
+    QueryPerformanceFrequency(&lf);
+    whole = (li.QuadPart / lf.QuadPart) * 1000000000;
+    part = ((li.QuadPart % lf.QuadPart) * 1000000000) / lf.QuadPart;
+    return whole + part;
+#else
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+    return ts.tv_sec * 1000000000ll + ts.tv_nsec;
+#endif
+}
+
+#ifdef _MSC_VER
+#pragma intrinsic(__rdtsc)
+#endif
+
+static inline uint64_t vkd3d_get_current_time_ticks(void)
+{
+#ifdef _MSC_VER
+    return __rdtsc();
+#elif defined(__i386__) || defined(__x86_64__)
+    return __builtin_ia32_rdtsc();
+#else
+    return vkd3d_get_current_time_ns();
+#endif
 }

 #endif  /* __VKD3D_COMMON_H */
--- a/include/private/vkd3d_debug.h
+++ b/include/private/vkd3d_debug.h
@ -26,49 +26,64 @@
 #include <stdint.h>

 #ifdef VKD3D_NO_TRACE_MESSAGES
-#define TRACE(args...) do { } while (0)
+#define TRACE(...) do { } while (0)
 #define TRACE_ON() (false)
 #endif

 #ifdef VKD3D_NO_DEBUG_MESSAGES
-#define WARN(args...) do { } while (0)
-#define FIXME(args...) do { } while (0)
+#define WARN(...) do { } while (0)
+#define FIXME(...) do { } while (0)
 #endif

 enum vkd3d_dbg_level
 {
+    VKD3D_DBG_LEVEL_UNKNOWN,
    VKD3D_DBG_LEVEL_NONE,
    VKD3D_DBG_LEVEL_ERR,
+    VKD3D_DBG_LEVEL_INFO,
    VKD3D_DBG_LEVEL_FIXME,
    VKD3D_DBG_LEVEL_WARN,
    VKD3D_DBG_LEVEL_TRACE,
 };

-enum vkd3d_dbg_level vkd3d_dbg_get_level(void) DECLSPEC_HIDDEN;
+#ifndef VKD3D_DBG_CHANNEL
+#error Must define VKD3D_DBG_CHANNEL to either VKD3D_DBG_CHANNEL_API or SHADER to use vkd3d_debug.h
+#endif

-void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function,
-        const char *fmt, ...) VKD3D_PRINTF_FUNC(3, 4) DECLSPEC_HIDDEN;
+enum vkd3d_dbg_channel
+{
+    VKD3D_DBG_CHANNEL_API,
+    VKD3D_DBG_CHANNEL_SHADER,
+    VKD3D_DBG_CHANNEL_COUNT
+};

-const char *vkd3d_dbg_sprintf(const char *fmt, ...) VKD3D_PRINTF_FUNC(1, 2) DECLSPEC_HIDDEN;
-const char *vkd3d_dbg_vsprintf(const char *fmt, va_list args) DECLSPEC_HIDDEN;
-const char *debugstr_a(const char *str) DECLSPEC_HIDDEN;
-const char *debugstr_w(const WCHAR *wstr, size_t wchar_size) DECLSPEC_HIDDEN;
+enum vkd3d_dbg_level vkd3d_dbg_get_level(enum vkd3d_dbg_channel channel);
+
+void vkd3d_dbg_printf(enum vkd3d_dbg_channel channel, enum vkd3d_dbg_level level, const char *function,
+        const char *fmt, ...) VKD3D_PRINTF_FUNC(4, 5);
+
+const char *vkd3d_dbg_sprintf(const char *fmt, ...) VKD3D_PRINTF_FUNC(1, 2);
+const char *vkd3d_dbg_vsprintf(const char *fmt, va_list args);
+const char *debugstr_a(const char *str);
+const char *debugstr_w(const WCHAR *wstr);

 #define VKD3D_DBG_LOG(level) \
        do { \
+        const enum vkd3d_dbg_channel vkd3d_dbg_channel = VKD3D_DBG_CHANNEL; \
        const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \
        VKD3D_DBG_PRINTF

 #define VKD3D_DBG_LOG_ONCE(first_time_level, level) \
        do { \
        static bool vkd3d_dbg_next_time; \
+        const enum vkd3d_dbg_channel vkd3d_dbg_channel = VKD3D_DBG_CHANNEL; \
        const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \
        ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \
        vkd3d_dbg_next_time = true; \
        VKD3D_DBG_PRINTF

 #define VKD3D_DBG_PRINTF(...) \
-        vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0)
+        vkd3d_dbg_printf(vkd3d_dbg_channel, vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0)

 #ifndef TRACE
 #define TRACE VKD3D_DBG_LOG(TRACE)
@ -83,15 +98,14 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size) DECLSPEC_HIDDEN;
 #endif

 #define ERR   VKD3D_DBG_LOG(ERR)
+#define INFO  VKD3D_DBG_LOG(INFO)

 #ifndef TRACE_ON
-#define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE)
+#define TRACE_ON() (vkd3d_dbg_get_level(VKD3D_DBG_CHANNEL) == VKD3D_DBG_LEVEL_TRACE)
 #endif

 #define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN)

-#define VKD3D_DEBUG_ENV_NAME(name) const char *vkd3d_dbg_env_name = name
-
 static inline const char *debugstr_guid(const GUID *guid)
 {
    if (!guid)
@ -103,7 +117,7 @@ static inline const char *debugstr_guid(const GUID *guid)
            guid->Data4[5], guid->Data4[6], guid->Data4[7]);
 }

-unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value) DECLSPEC_HIDDEN;
+unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value);

 struct vkd3d_debug_option
 {
@ -111,8 +125,8 @@ struct vkd3d_debug_option
    uint64_t flag;
 };

-bool vkd3d_debug_list_has_member(const char *string, const char *member) DECLSPEC_HIDDEN;
+bool vkd3d_debug_list_has_member(const char *string, const char *member);
 uint64_t vkd3d_parse_debug_options(const char *string,
-        const struct vkd3d_debug_option *options, unsigned int option_count) DECLSPEC_HIDDEN;
+        const struct vkd3d_debug_option *options, unsigned int option_count);

 #endif  /* __VKD3D_DEBUG_H */
--- a/include/private/vkd3d_descriptor_qa_data.h
+++ b/include/private/vkd3d_descriptor_qa_data.h
@ -0,0 +1,119 @@
+/*
+ * Copyright 2021 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_DESCRIPTOR_QA_DATA_H
+#define __VKD3D_DESCRIPTOR_QA_DATA_H
+
+#include <stdint.h>
+
+/* Data types which are used by shader backends when emitting code. */
+
+enum vkd3d_descriptor_qa_flag_bits
+{
+    VKD3D_DESCRIPTOR_QA_TYPE_NONE_BIT = 0,
+    VKD3D_DESCRIPTOR_QA_TYPE_SAMPLED_IMAGE_BIT = 1 << 0,
+    VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_IMAGE_BIT = 1 << 1,
+    VKD3D_DESCRIPTOR_QA_TYPE_UNIFORM_BUFFER_BIT = 1 << 2,
+    VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT = 1 << 3,
+    VKD3D_DESCRIPTOR_QA_TYPE_UNIFORM_TEXEL_BUFFER_BIT = 1 << 4,
+    VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_TEXEL_BUFFER_BIT = 1 << 5,
+    VKD3D_DESCRIPTOR_QA_TYPE_RT_ACCELERATION_STRUCTURE_BIT = 1 << 6,
+    VKD3D_DESCRIPTOR_QA_TYPE_SAMPLER_BIT = 1 << 7,
+    VKD3D_DESCRIPTOR_QA_TYPE_RAW_VA_BIT = 1 << 8
+};
+typedef uint32_t vkd3d_descriptor_qa_flags;
+
+struct vkd3d_descriptor_qa_cookie_descriptor
+{
+    uint32_t cookie;
+    uint32_t descriptor_type;
+};
+
+enum vkd3d_descriptor_debug_fault_type
+{
+    VKD3D_DESCRIPTOR_FAULT_TYPE_HEAP_OF_OF_RANGE = 1 << 0,
+    VKD3D_DESCRIPTOR_FAULT_TYPE_MISMATCH_DESCRIPTOR_TYPE = 1 << 1,
+    VKD3D_DESCRIPTOR_FAULT_TYPE_DESTROYED_RESOURCE = 1 << 2
+};
+
+/* Physical layout of QA buffer. */
+struct vkd3d_descriptor_qa_global_buffer_data
+{
+    uint64_t failed_hash;
+    uint32_t failed_offset;
+    uint32_t failed_heap;
+    uint32_t failed_cookie;
+    uint32_t fault_atomic;
+    uint32_t failed_instruction;
+    uint32_t failed_descriptor_type_mask;
+    uint32_t actual_descriptor_type_mask;
+    uint32_t fault_type;
+    uint32_t live_status_table[];
+};
+
+/* Physical layout of QA heap buffer. */
+struct vkd3d_descriptor_qa_heap_buffer_data
+{
+    uint32_t num_descriptors;
+    uint32_t heap_index;
+    struct vkd3d_descriptor_qa_cookie_descriptor desc[];
+};
+
+enum vkd3d_descriptor_qa_heap_buffer_data_member
+{
+    VKD3D_DESCRIPTOR_QA_HEAP_MEMBER_NUM_DESCRIPTORS = 0,
+    VKD3D_DESCRIPTOR_QA_HEAP_MEMBER_HEAP_INDEX,
+    VKD3D_DESCRIPTOR_QA_HEAP_MEMBER_DESC,
+    VKD3D_DESCRIPTOR_QA_HEAP_MEMBER_COUNT
+};
+
+VKD3D_UNUSED static const char *vkd3d_descriptor_qa_heap_data_names[VKD3D_DESCRIPTOR_QA_HEAP_MEMBER_COUNT] = {
+    "num_descriptors",
+    "heap_index",
+    "desc",
+};
+
+enum vkd3d_descriptor_qa_global_buffer_data_member
+{
+    VKD3D_DESCRIPTOR_QA_GLOBAL_BUFFER_DATA_MEMBER_FAILED_HASH = 0,
+    VKD3D_DESCRIPTOR_QA_GLOBAL_BUFFER_DATA_MEMBER_FAILED_OFFSET,
+    VKD3D_DESCRIPTOR_QA_GLOBAL_BUFFER_DATA_MEMBER_FAILED_HEAP,
+    VKD3D_DESCRIPTOR_QA_GLOBAL_BUFFER_DATA_MEMBER_FAILED_COOKIE,
+    VKD3D_DESCRIPTOR_QA_GLOBAL_BUFFER_DATA_MEMBER_FAULT_ATOMIC,
+    VKD3D_DESCRIPTOR_QA_GLOBAL_BUFFER_DATA_MEMBER_FAILED_INSTRUCTION,
+    VKD3D_DESCRIPTOR_QA_GLOBAL_BUFFER_DATA_MEMBER_FAILED_DESCRIPTOR_TYPE_MASK,
+    VKD3D_DESCRIPTOR_QA_GLOBAL_BUFFER_DATA_MEMBER_ACTUAL_DESCRIPTOR_TYPE_MASK,
+    VKD3D_DESCRIPTOR_QA_GLOBAL_BUFFER_DATA_MEMBER_FAULT_TYPE,
+    VKD3D_DESCRIPTOR_QA_GLOBAL_BUFFER_DATA_MEMBER_LIVE_STATUS_TABLE,
+    VKD3D_DESCRIPTOR_QA_GLOBAL_BUFFER_DATA_MEMBER_COUNT
+};
+
+VKD3D_UNUSED static const char *vkd3d_descriptor_qa_global_buffer_data_names[VKD3D_DESCRIPTOR_QA_GLOBAL_BUFFER_DATA_MEMBER_COUNT] = {
+    "failed_hash",
+    "failed_offset",
+    "failed_heap",
+    "failed_cookie",
+    "fault_atomic",
+    "failed_instruction",
+    "failed_descriptor_type_mask",
+    "actual_descriptor_type_mask",
+    "fault_type",
+    "live_status_table",
+};
+
+#endif
--- a/include/private/vkd3d_file_utils.h
+++ b/include/private/vkd3d_file_utils.h
@ -0,0 +1,42 @@
+/*
+ * Copyright 2022 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_FILE_UTILS_H
+#define __VKD3D_FILE_UTILS_H
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+struct vkd3d_memory_mapped_file
+{
+    void *mapped;
+    size_t mapped_size;
+};
+
+/* On failure, ensures the struct is cleared to zero.
+ * A reference to the file is kept through the memory mapping. */
+bool vkd3d_file_map_read_only(const char *path, struct vkd3d_memory_mapped_file *file);
+/* Clears out file on unmap. */
+void vkd3d_file_unmap(struct vkd3d_memory_mapped_file *file);
+bool vkd3d_file_rename_overwrite(const char *from_path, const char *to_path);
+bool vkd3d_file_rename_no_replace(const char *from_path, const char *to_path);
+bool vkd3d_file_delete(const char *path);
+FILE *vkd3d_file_open_exclusive_write(const char *path);
+
+#endif
--- a/include/private/vkd3d_memory.h
+++ b/include/private/vkd3d_memory.h
@ -23,6 +23,7 @@
 #include <stdbool.h>
 #include <stdlib.h>

+#include "vkd3d_common.h"
 #include "vkd3d_debug.h"

 static inline void *vkd3d_malloc(size_t size)
@ -55,6 +56,24 @@ static inline void vkd3d_free(void *ptr)
 }

 bool vkd3d_array_reserve(void **elements, size_t *capacity,
-        size_t element_count, size_t element_size) DECLSPEC_HIDDEN;
+        size_t element_count, size_t element_size);
+
+static inline void *vkd3d_malloc_aligned(size_t size, size_t alignment)
+{
+#ifdef _WIN32
+    return _aligned_malloc(size, alignment);
+#else
+    return aligned_alloc(alignment, align(size, alignment));
+#endif
+}
+
+static inline void vkd3d_free_aligned(void *ptr)
+{
+#ifdef _WIN32
+    _aligned_free(ptr);
+#else
+    free(ptr);
+#endif
+}

 #endif  /* __VKD3D_MEMORY_H */
--- a/include/private/vkd3d_platform.h
+++ b/include/private/vkd3d_platform.h
@ -0,0 +1,44 @@
+/*
+ * Copyright 2020 Joshua Ashton for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_PLATFORM_H
+#define __VKD3D_PLATFORM_H
+
+#include "vkd3d_common.h"
+
+#if defined(_WIN32)
+#define VKD3D_PATH_MAX _MAX_PATH
+#else
+#define VKD3D_PATH_MAX PATH_MAX
+#endif
+
+typedef void* vkd3d_module_t;
+
+vkd3d_module_t vkd3d_dlopen(const char *name);
+
+void *vkd3d_dlsym(vkd3d_module_t handle, const char *symbol);
+
+int vkd3d_dlclose(vkd3d_module_t handle);
+
+const char *vkd3d_dlerror(void);
+
+bool vkd3d_get_env_var(const char *name, char *value, size_t value_size);
+
+bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX]);
+
+#endif
--- a/include/private/vkd3d_profiling.h
+++ b/include/private/vkd3d_profiling.h
@ -0,0 +1,64 @@
+/*
+ * Copyright 2020 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_PROFILING_H
+#define __VKD3D_PROFILING_H
+
+#include "vkd3d_windows.h"
+#include "vkd3d_spinlock.h"
+#include "vkd3d_common.h"
+
+#ifdef VKD3D_ENABLE_PROFILING
+
+void vkd3d_init_profiling(void);
+bool vkd3d_uses_profiling(void);
+unsigned int vkd3d_profiling_register_region(const char *name, spinlock_t *lock, uint32_t *latch);
+void vkd3d_profiling_notify_work(unsigned int index, uint64_t start_ticks, uint64_t end_ticks, unsigned int iteration_count);
+
+#define VKD3D_REGION_DECL(name) \
+    static uint32_t _vkd3d_region_latch_##name; \
+    static spinlock_t _vkd3d_region_lock_##name; \
+    uint64_t _vkd3d_region_begin_tick_##name; \
+    uint64_t _vkd3d_region_end_tick_##name; \
+    unsigned int _vkd3d_region_index_##name
+
+#define VKD3D_REGION_BEGIN(name) \
+    do { \
+        if (!(_vkd3d_region_index_##name = vkd3d_atomic_uint32_load_explicit(&_vkd3d_region_latch_##name, vkd3d_memory_order_acquire))) \
+            _vkd3d_region_index_##name = vkd3d_profiling_register_region(#name, &_vkd3d_region_lock_##name, &_vkd3d_region_latch_##name); \
+        _vkd3d_region_begin_tick_##name = vkd3d_get_current_time_ticks(); \
+    } while(0)
+
+#define VKD3D_REGION_END_ITERATIONS(name, iter) \
+    do { \
+        _vkd3d_region_end_tick_##name = vkd3d_get_current_time_ticks(); \
+        vkd3d_profiling_notify_work(_vkd3d_region_index_##name, _vkd3d_region_begin_tick_##name, _vkd3d_region_end_tick_##name, iter); \
+    } while(0)
+
+#else
+static inline void vkd3d_init_profiling(void)
+{
+}
+#define VKD3D_REGION_DECL(name) ((void)0)
+#define VKD3D_REGION_BEGIN(name) ((void)0)
+#define VKD3D_REGION_END_ITERATIONS(name, iter) ((void)0)
+#endif /* VKD3D_ENABLE_PROFILING */
+
+#define VKD3D_REGION_END(name) VKD3D_REGION_END_ITERATIONS(name, 1)
+
+#endif /* __VKD3D_PROFILING_H */
--- a/include/private/vkd3d_rw_spinlock.h
+++ b/include/private/vkd3d_rw_spinlock.h
@ -0,0 +1,59 @@
+/*
+ * Copyright 2020 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_RW_SPINLOCK_H
+#define __VKD3D_RW_SPINLOCK_H
+
+#include "vkd3d_spinlock.h"
+
+#define VKD3D_RW_SPINLOCK_WRITE 1u
+#define VKD3D_RW_SPINLOCK_READ 2u
+#define VKD3D_RW_SPINLOCK_IDLE 0u
+
+static inline void rw_spinlock_acquire_read(spinlock_t *spinlock)
+{
+    uint32_t count = vkd3d_atomic_uint32_add(spinlock, VKD3D_RW_SPINLOCK_READ, vkd3d_memory_order_acquire);
+    while (count & VKD3D_RW_SPINLOCK_WRITE)
+    {
+        vkd3d_pause();
+        count = vkd3d_atomic_uint32_load_explicit(spinlock, vkd3d_memory_order_acquire);
+    }
+}
+
+static inline void rw_spinlock_release_read(spinlock_t *spinlock)
+{
+    vkd3d_atomic_uint32_sub(spinlock, VKD3D_RW_SPINLOCK_READ, vkd3d_memory_order_release);
+}
+
+static inline void rw_spinlock_acquire_write(spinlock_t *spinlock)
+{
+    while (vkd3d_atomic_uint32_load_explicit(spinlock, vkd3d_memory_order_relaxed) != VKD3D_RW_SPINLOCK_IDLE ||
+            vkd3d_atomic_uint32_compare_exchange(spinlock,
+                    VKD3D_RW_SPINLOCK_IDLE, VKD3D_RW_SPINLOCK_WRITE,
+                    vkd3d_memory_order_acquire, vkd3d_memory_order_relaxed) != VKD3D_RW_SPINLOCK_IDLE)
+    {
+        vkd3d_pause();
+    }
+}
+
+static inline void rw_spinlock_release_write(spinlock_t *spinlock)
+{
+    vkd3d_atomic_uint32_and(spinlock, ~VKD3D_RW_SPINLOCK_WRITE, vkd3d_memory_order_release);
+}
+
+#endif
--- a/include/private/vkd3d_spinlock.h
+++ b/include/private/vkd3d_spinlock.h
@ -0,0 +1,67 @@
+/*
+ * Copyright 2020 Philip Rebohle for Valve Corporation
+ * Copyright 2020 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_SPINLOCK_H
+#define __VKD3D_SPINLOCK_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include "vkd3d_atomic.h"
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+static inline void vkd3d_pause(void)
+{
+#ifdef __SSE2__
+    _mm_pause();
+#endif
+}
+
+#define vkd3d_spinlock_try_lock(lock) \
+    (!vkd3d_atomic_uint32_load_explicit(lock, vkd3d_memory_order_relaxed) && \
+     !vkd3d_atomic_uint32_exchange_explicit(lock, 1u, vkd3d_memory_order_acquire))
+
+#define vkd3d_spinlock_unlock(lock) vkd3d_atomic_uint32_store_explicit(lock, 0u, vkd3d_memory_order_release)
+
+typedef uint32_t spinlock_t;
+
+static inline void spinlock_init(spinlock_t *lock)
+{
+    *lock = 0;
+}
+
+static inline bool spinlock_try_acquire(spinlock_t *lock)
+{
+    return vkd3d_spinlock_try_lock(lock);
+}
+
+static inline void spinlock_acquire(spinlock_t *lock)
+{
+    while (!spinlock_try_acquire(lock))
+        vkd3d_pause();
+}
+
+static inline void spinlock_release(spinlock_t *lock)
+{
+    vkd3d_spinlock_unlock(lock);
+}
+
+#endif
--- a/include/private/vkd3d_string.h
+++ b/include/private/vkd3d_string.h
@ -0,0 +1,82 @@
+/*
+ * Copyright 2021 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_STRING_H
+#define __VKD3D_STRING_H
+
+#include "vkd3d_common.h"
+#include <stddef.h>
+
+/* Various string utilities. */
+
+WCHAR *vkd3d_dup_entry_point(const char *str);
+WCHAR *vkd3d_dup_entry_point_n(const char *str, size_t len);
+WCHAR *vkd3d_dup_demangled_entry_point(const char *str);
+char *vkd3d_dup_demangled_entry_point_ascii(const char *str);
+
+bool vkd3d_export_strequal(const WCHAR *a, const WCHAR *b);
+bool vkd3d_export_strequal_mixed(const WCHAR *a, const char *b);
+bool vkd3d_export_strequal_substr(const WCHAR *a, size_t n, const WCHAR *b);
+
+char *vkd3d_strdup(const char *str);
+char *vkd3d_strdup_n(const char *str, size_t n);
+WCHAR *vkd3d_wstrdup(const WCHAR *str);
+WCHAR *vkd3d_wstrdup_n(const WCHAR *str, size_t n);
+
+static inline bool vkd3d_string_ends_with_n(const char *str, size_t str_len, const char *ending, size_t ending_len)
+{
+    return str_len >= ending_len && !strncmp(str + (str_len - ending_len), ending, ending_len);
+}
+
+static inline bool vkd3d_string_ends_with(const char *str, const char *ending)
+{
+    return vkd3d_string_ends_with_n(str, strlen(str), ending, strlen(ending));
+}
+
+enum vkd3d_string_compare_mode
+{
+    VKD3D_STRING_COMPARE_NEVER,
+    VKD3D_STRING_COMPARE_ALWAYS,
+    VKD3D_STRING_COMPARE_EXACT,
+    VKD3D_STRING_COMPARE_STARTS_WITH,
+    VKD3D_STRING_COMPARE_ENDS_WITH,
+    VKD3D_STRING_COMPARE_CONTAINS,
+};
+
+static inline bool vkd3d_string_compare(enum vkd3d_string_compare_mode mode, const char *string, const char *comparator)
+{
+    switch (mode)
+    {
+        default:
+        case VKD3D_STRING_COMPARE_NEVER:
+            return false;
+        case VKD3D_STRING_COMPARE_ALWAYS:
+            return true;
+        case VKD3D_STRING_COMPARE_EXACT:
+            return !strcmp(string, comparator);
+        case VKD3D_STRING_COMPARE_STARTS_WITH:
+            return !strncmp(string, comparator, strlen(comparator));
+        case VKD3D_STRING_COMPARE_ENDS_WITH:
+            return vkd3d_string_ends_with(string, comparator);
+        case VKD3D_STRING_COMPARE_CONTAINS:
+            return strstr(string, comparator) != NULL;
+    }
+}
+
+
+#endif /* __VKD3D_STRING_H */
--- a/include/private/vkd3d_test.h
+++ b/include/private/vkd3d_test.h
@ -20,6 +20,7 @@
 #define __VKD3D_TEST_H

 #include "vkd3d_common.h"
+#include "vkd3d_debug.h"
 #include <assert.h>
 #include <inttypes.h>
 #include <stdarg.h>
@ -28,16 +29,19 @@
 #include <stdlib.h>
 #include <string.h>

+#ifdef VKD3D_TEST_DECLARE_MAIN
 static void vkd3d_test_main(int argc, char **argv);
-static const char *vkd3d_test_name;
-static const char *vkd3d_test_platform = "other";
+#endif
+
+extern const char *vkd3d_test_name;
+extern const char *vkd3d_test_platform;

 static void vkd3d_test_start_todo(bool is_todo);
 static int vkd3d_test_loop_todo(void);
 static void vkd3d_test_end_todo(void);

 #define START_TEST(name) \
-        static const char *vkd3d_test_name = #name; \
+        const char *vkd3d_test_name = #name; \
        static void vkd3d_test_main(int argc, char **argv)

 /*
@ -100,7 +104,7 @@ static void vkd3d_test_end_todo(void);

 #define todo todo_if(true)

-static struct
+struct vkd3d_test_state_context
 {
    LONG success_count;
    LONG failure_count;
@ -119,8 +123,10 @@ static struct
    bool bug_enabled;

    const char *test_name_filter;
+    const char *test_exclude_list;
    char context[1024];
-} vkd3d_test_state;
+};
+extern struct vkd3d_test_state_context vkd3d_test_state;

 static bool
 vkd3d_test_platform_is_windows(void)
@ -141,13 +147,17 @@ vkd3d_test_check_assert_that(unsigned int line, bool result, const char *fmt, va
    {
        InterlockedIncrement(&vkd3d_test_state.success_count);
        if (vkd3d_test_state.debug_level > 1)
+        {
            printf("%s:%d%s: Test succeeded.\n", vkd3d_test_name, line, vkd3d_test_state.context);
+            fflush(stdout);
+        }
    }
    else
    {
        InterlockedIncrement(&vkd3d_test_state.failure_count);
        printf("%s:%d%s: Test failed: ", vkd3d_test_name, line, vkd3d_test_state.context);
        vprintf(fmt, args);
+        fflush(stdout);
    }
 }

@ -177,6 +187,7 @@ vkd3d_test_check_ok(unsigned int line, bool result, const char *fmt, va_list arg
        else
            printf("%s:%d%s: Bug: ", vkd3d_test_name, line, vkd3d_test_state.context);
        vprintf(fmt, args);
+        fflush(stdout);
    }
    else if (is_todo)
    {
@ -191,6 +202,7 @@ vkd3d_test_check_ok(unsigned int line, bool result, const char *fmt, va_list arg
            printf("%s:%d%s: Todo: ", vkd3d_test_name, line, vkd3d_test_state.context);
        }
        vprintf(fmt, args);
+        fflush(stdout);
    }
    else
    {
@ -217,6 +229,7 @@ vkd3d_test_skip(unsigned int line, const char *fmt, ...)
    vprintf(fmt, args);
    va_end(args);
    InterlockedIncrement(&vkd3d_test_state.skip_count);
+    fflush(stdout);
 }

 static void VKD3D_PRINTF_FUNC(2, 3) VKD3D_UNUSED
@ -227,6 +240,7 @@ vkd3d_test_trace(unsigned int line, const char *fmt, ...)
    printf("%s:%d%s: ", vkd3d_test_name, line, vkd3d_test_state.context);
    vprintf(fmt, args);
    va_end(args);
+    fflush(stdout);
 }

 static void VKD3D_PRINTF_FUNC(1, 2) VKD3D_UNUSED
@ -237,7 +251,7 @@ vkd3d_test_debug(const char *fmt, ...)
    int size;

    size = snprintf(buffer, sizeof(buffer), "%s: ", vkd3d_test_name);
-    if (0 < size && size < sizeof(buffer))
+    if (0 < size && size < (int)sizeof(buffer))
    {
        va_start(args, fmt);
        vsnprintf(buffer + size, sizeof(buffer) - size, fmt, args);
@ -250,20 +264,26 @@ vkd3d_test_debug(const char *fmt, ...)
 #endif

    if (vkd3d_test_state.debug_level > 0)
+    {
        printf("%s\n", buffer);
+        fflush(stdout);
+    }
 }

+#ifdef VKD3D_TEST_DECLARE_MAIN
 int main(int argc, char **argv)
 {
+    const char *exclude_list = getenv("VKD3D_TEST_EXCLUDE");
    const char *test_filter = getenv("VKD3D_TEST_FILTER");
    const char *debug_level = getenv("VKD3D_TEST_DEBUG");
    char *test_platform = getenv("VKD3D_TEST_PLATFORM");
    const char *bug = getenv("VKD3D_TEST_BUG");

    memset(&vkd3d_test_state, 0, sizeof(vkd3d_test_state));
-    vkd3d_test_state.debug_level = debug_level ? atoi(debug_level) : 0;
+    vkd3d_test_state.debug_level = debug_level ? atoi(debug_level) : 1;
    vkd3d_test_state.bug_enabled = bug ? atoi(bug) : true;
    vkd3d_test_state.test_name_filter = test_filter;
+    vkd3d_test_state.test_exclude_list = exclude_list;

    if (test_platform)
    {
@ -276,13 +296,13 @@ int main(int argc, char **argv)

    vkd3d_test_main(argc, argv);

-    printf("%s: %lu tests executed (%lu failures, %lu skipped, %lu todo, %lu bugs).\n",
+    printf("%s: %lu tests executed (%lu failures, %lu successful todo, %lu skipped, %lu todo, %lu bugs).\n",
            vkd3d_test_name,
            (unsigned long)(vkd3d_test_state.success_count
            + vkd3d_test_state.failure_count + vkd3d_test_state.todo_count
            + vkd3d_test_state.todo_success_count),
-            (unsigned long)(vkd3d_test_state.failure_count
-            + vkd3d_test_state.todo_success_count),
+            (unsigned long)vkd3d_test_state.failure_count,
+            (unsigned long)vkd3d_test_state.todo_success_count,
            (unsigned long)vkd3d_test_state.skip_count,
            (unsigned long)vkd3d_test_state.todo_count,
            (unsigned long)vkd3d_test_state.bug_count);
@ -290,7 +310,8 @@ int main(int argc, char **argv)
    if (test_platform)
        free(test_platform);

-    return vkd3d_test_state.failure_count || vkd3d_test_state.todo_success_count;
+    fflush(stdout);
+    return vkd3d_test_state.failure_count != 0;
 }

 #ifdef _WIN32
@ -339,16 +360,27 @@ int wmain(int argc, WCHAR **wargv)
    return ret;
 }
 #endif  /* _WIN32 */
+#endif /* VKD3D_TEST_DECLARE_MAIN */

 typedef void (*vkd3d_test_pfn)(void);

 static inline void vkd3d_run_test(const char *name, vkd3d_test_pfn test_pfn)
 {
+    const char *old_test_name;
+
    if (vkd3d_test_state.test_name_filter && !strstr(name, vkd3d_test_state.test_name_filter))
        return;

-    vkd3d_test_debug("%s", name);
+    if (vkd3d_test_state.test_exclude_list
+            && vkd3d_debug_list_has_member(vkd3d_test_state.test_exclude_list, name))
+        return;
+
+    old_test_name = vkd3d_test_name;
+    vkd3d_test_debug("======== %s begin ========", name);
+    vkd3d_test_name = name;
    test_pfn();
+    vkd3d_test_name = old_test_name;
+    vkd3d_test_debug("======== %s end ==========", name);
 }

 static inline void vkd3d_test_start_todo(bool is_todo)
--- a/include/private/vkd3d_threads.h
+++ b/include/private/vkd3d_threads.h
@ -0,0 +1,351 @@
+/*
+ * Copyright 2019 Hans-Kristian Arntzen for Valve
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_THREADS_H
+#define __VKD3D_THREADS_H
+
+#include "vkd3d_memory.h"
+
+#if defined(_WIN32)
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+/* pthread_t is passed by value in some functions,
+ * which implies we need pthread_t to be a pointer type here. */
+struct pthread
+{
+    HANDLE thread;
+    DWORD id;
+    void * (*routine)(void *);
+    void *arg;
+};
+typedef struct pthread *pthread_t;
+
+/* pthread_mutex_t is not copyable, so embed CS inline. */
+typedef struct pthread_mutex
+{
+    SRWLOCK lock;
+} pthread_mutex_t;
+
+#define PTHREAD_MUTEX_INITIALIZER {SRWLOCK_INIT}
+
+/* pthread_cond_t is not copyable, so embed CV inline. */
+typedef struct pthread_cond
+{
+    CONDITION_VARIABLE cond;
+} pthread_cond_t;
+
+typedef pthread_cond_t condvar_reltime_t;
+
+static DWORD WINAPI win32_thread_wrapper_routine(void *arg)
+{
+    pthread_t thread = arg;
+    thread->routine(thread->arg);
+    return 0;
+}
+
+static inline int pthread_create(pthread_t *out_thread, void *attr, void * (*thread_fun)(void *), void *arg)
+{
+    pthread_t thread = vkd3d_calloc(1, sizeof(*thread));
+    if (!thread)
+        return -1;
+
+    (void)attr;
+    thread->routine = thread_fun;
+    thread->arg = arg;
+    thread->thread = CreateThread(NULL, 0, win32_thread_wrapper_routine, thread, 0, &thread->id);
+    if (!thread->thread)
+    {
+        vkd3d_free(thread);
+        return -1;
+    }
+    *out_thread = thread;
+    return 0;
+}
+
+static inline int pthread_join(pthread_t thread, void **ret)
+{
+    int success;
+    (void)ret;
+    success = WaitForSingleObject(thread->thread, INFINITE) == WAIT_OBJECT_0;
+    if (success)
+    {
+        CloseHandle(thread->thread);
+        vkd3d_free(thread);
+    }
+    return success ? 0 : -1;
+}
+
+static inline int pthread_mutex_init(pthread_mutex_t *lock, void *attr)
+{
+    (void)attr;
+    InitializeSRWLock(&lock->lock);
+    return 0;
+}
+
+static inline int pthread_mutex_lock(pthread_mutex_t *lock)
+{
+    AcquireSRWLockExclusive(&lock->lock);
+    return 0;
+}
+
+static inline int pthread_mutex_unlock(pthread_mutex_t *lock)
+{
+    ReleaseSRWLockExclusive(&lock->lock);
+    return 0;
+}
+
+static inline int pthread_mutex_destroy(pthread_mutex_t *lock)
+{
+    return 0;
+}
+
+/* SRWLocks distinguish between write and read unlocks, but pthread interface does not,
+ * so make a trivial wrapper type instead to avoid any possible API conflicts. */
+typedef struct rwlock
+{
+    SRWLOCK rwlock;
+} rwlock_t;
+
+static inline int rwlock_init(rwlock_t *lock)
+{
+    InitializeSRWLock(&lock->rwlock);
+    return 0;
+}
+
+static inline int rwlock_lock_write(rwlock_t *lock)
+{
+    AcquireSRWLockExclusive(&lock->rwlock);
+    return 0;
+}
+
+static inline int rwlock_lock_read(rwlock_t *lock)
+{
+    AcquireSRWLockShared(&lock->rwlock);
+    return 0;
+}
+
+static inline int rwlock_unlock_write(rwlock_t *lock)
+{
+    ReleaseSRWLockExclusive(&lock->rwlock);
+    return 0;
+}
+
+static inline int rwlock_unlock_read(rwlock_t *lock)
+{
+    ReleaseSRWLockShared(&lock->rwlock);
+    return 0;
+}
+
+static inline int rwlock_destroy(rwlock_t *lock)
+{
+    return 0;
+}
+
+static inline int pthread_cond_init(pthread_cond_t *cond, void *attr)
+{
+    (void)attr;
+    InitializeConditionVariable(&cond->cond);
+    return 0;
+}
+
+static inline int pthread_cond_destroy(pthread_cond_t *cond)
+{
+    (void)cond;
+    return 0;
+}
+
+static inline int pthread_cond_signal(pthread_cond_t *cond)
+{
+    WakeConditionVariable(&cond->cond);
+    return 0;
+}
+
+static inline int pthread_cond_broadcast(pthread_cond_t *cond)
+{
+    WakeAllConditionVariable(&cond->cond);
+    return 0;
+}
+
+static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *lock)
+{
+    BOOL ret = SleepConditionVariableSRW(&cond->cond, &lock->lock, INFINITE, 0);
+    return ret ? 0 : -1;
+}
+
+static inline int condvar_reltime_init(condvar_reltime_t *cond)
+{
+    return pthread_cond_init(cond, NULL);
+}
+
+static inline int condvar_reltime_destroy(condvar_reltime_t *cond)
+{
+    return pthread_cond_destroy(cond);
+}
+
+static inline int condvar_reltime_signal(condvar_reltime_t *cond)
+{
+    return pthread_cond_signal(cond);
+}
+
+static inline int condvar_reltime_wait_timeout_seconds(condvar_reltime_t *cond, pthread_mutex_t *lock, unsigned int seconds)
+{
+    BOOL ret = SleepConditionVariableSRW(&cond->cond, &lock->lock, seconds * 1000, 0);
+    if (ret)
+        return 0;
+    else if (GetLastError() == ERROR_TIMEOUT)
+        return 1;
+    else
+        return -1;
+}
+
+static inline void vkd3d_set_thread_name(const char *name)
+{
+    (void)name;
+}
+
+typedef INIT_ONCE pthread_once_t;
+#define PTHREAD_ONCE_INIT INIT_ONCE_STATIC_INIT
+
+static inline BOOL CALLBACK pthread_once_wrapper(PINIT_ONCE once, PVOID parameter, PVOID *context)
+{
+    void (*func)(void) = parameter;
+    (void)once;
+    (void)context;
+    func();
+    return TRUE;
+}
+
+static inline void pthread_once(pthread_once_t *once, void (*func)(void))
+{
+    InitOnceExecuteOnce(once, pthread_once_wrapper, func, NULL);
+}
+#else
+#include <pthread.h>
+#include <errno.h>
+#include <time.h>
+
+static inline void vkd3d_set_thread_name(const char *name)
+{
+    pthread_setname_np(pthread_self(), name);
+}
+
+typedef struct rwlock
+{
+    pthread_rwlock_t rwlock;
+} rwlock_t;
+
+static inline int rwlock_init(rwlock_t *lock)
+{
+    return pthread_rwlock_init(&lock->rwlock, NULL);
+}
+
+static inline int rwlock_lock_write(rwlock_t *lock)
+{
+    return pthread_rwlock_wrlock(&lock->rwlock);
+}
+
+static inline int rwlock_lock_read(rwlock_t *lock)
+{
+    return pthread_rwlock_rdlock(&lock->rwlock);
+}
+
+static inline int rwlock_unlock_write(rwlock_t *lock)
+{
+    return pthread_rwlock_unlock(&lock->rwlock);
+}
+
+static inline int rwlock_unlock_read(rwlock_t *lock)
+{
+    return pthread_rwlock_unlock(&lock->rwlock);
+}
+
+static inline int rwlock_destroy(rwlock_t *lock)
+{
+    return pthread_rwlock_destroy(&lock->rwlock);
+}
+
+typedef struct condvar_reltime
+{
+    pthread_cond_t cond;
+} condvar_reltime_t;
+
+static inline int condvar_reltime_init(condvar_reltime_t *cond)
+{
+    pthread_condattr_t attr;
+    int rc;
+
+    pthread_condattr_init(&attr);
+    pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+    rc = pthread_cond_init(&cond->cond, &attr);
+    pthread_condattr_destroy(&attr);
+
+    return rc;
+}
+
+static inline void condvar_reltime_destroy(condvar_reltime_t *cond)
+{
+    pthread_cond_destroy(&cond->cond);
+}
+
+static inline int condvar_reltime_signal(condvar_reltime_t *cond)
+{
+    return pthread_cond_signal(&cond->cond);
+}
+
+static inline int condvar_reltime_wait_timeout_seconds(condvar_reltime_t *cond, pthread_mutex_t *lock, unsigned int seconds)
+{
+    struct timespec ts;
+    int rc;
+
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    ts.tv_sec += seconds;
+
+    /* This is absolute time. */
+    rc = pthread_cond_timedwait(&cond->cond, lock, &ts);
+
+    if (rc == ETIMEDOUT)
+        return 1;
+    else if (rc == 0)
+        return 0;
+    else
+        return -1;
+}
+
+#define PTHREAD_ONCE_CALLBACK
+#endif
+
+#ifdef __linux__
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#endif
+
+static inline unsigned int vkd3d_get_current_thread_id(void)
+{
+#ifdef _WIN32
+    return GetCurrentThreadId();
+#elif defined(__linux__)
+    return syscall(__NR_gettid);
+#else
+    return 0;
+#endif
+}
+
+#endif /* __VKD3D_THREADS_H */
--- a/include/private/vkd3d_utf8.h
+++ b/include/private/vkd3d_utf8.h
@ -21,6 +21,8 @@

 #include "vkd3d_common.h"

-char *vkd3d_strdup_w_utf8(const WCHAR *wstr, size_t wchar_size) DECLSPEC_HIDDEN;
+/* max_elements is 0 if only nul-terminator should be used.
+ * Otherwise, terminate the string after either a nul-termination byte or max_elements. */
+char *vkd3d_strdup_w_utf8(const WCHAR *wstr, size_t max_elements);

 #endif /* __VKD3D_UTF8_H */
--- a/include/private/vulkan_private_extensions.h
+++ b/include/private/vulkan_private_extensions.h
@ -0,0 +1,6 @@
+#ifndef __VULKAN_PRIVATE_EXTENSIONS_H__
+#define __VULKAN_PRIVATE_EXTENSIONS_H__
+
+/* Nothing here at the moment. Add hacks here! */
+
+#endif
--- a/include/shader-debug/GNUmakefile
+++ b/include/shader-debug/GNUmakefile
@ -0,0 +1,71 @@
+INCLUDE_DIR := $(CURDIR)
+
+VERT_SOURCES := $(wildcard $(M)/*.vert)
+FRAG_SOURCES := $(wildcard $(M)/*.frag)
+COMP_SOURCES := $(wildcard $(M)/*.comp)
+TESC_SOURCES := $(wildcard $(M)/*.tesc)
+TESE_SOURCES := $(wildcard $(M)/*.tese)
+GEOM_SOURCES := $(wildcard $(M)/*.geom)
+RGEN_SOURCES := $(wildcard $(M)/*.rgen)
+RINT_SOURCES := $(wildcard $(M)/*.rint)
+RAHIT_SOURCES := $(wildcard $(M)/*.rahit)
+RCHIT_SOURCES := $(wildcard $(M)/*.rchit)
+RMISS_SOURCES := $(wildcard $(M)/*.rmiss)
+RCALL_SOURCES := $(wildcard $(M)/*.rcall)
+
+SPV_OBJECTS := \
+			   $(VERT_SOURCES:.vert=.spv) \
+			   $(FRAG_SOURCES:.frag=.spv) \
+			   $(COMP_SOURCES:.comp=.spv) \
+			   $(TESC_SOURCES:.tesc=.spv) \
+			   $(TESE_SOURCES:.tese=.spv) \
+			   $(GEOM_SOURCES:.geom=.spv) \
+			   $(RGEN_SOURCES:.rgen=.spv) \
+			   $(RINT_SOURCES:.rint=.spv) \
+			   $(RAHIT_SOURCES:.rahit=.spv) \
+			   $(RCHIT_SOURCES:.rchit=.spv) \
+			   $(RMISS_SOURCES:.rmiss=.spv) \
+			   $(RCALL_SOURCES:.rcall=.spv)
+
+%.spv: %.vert
+	glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
+
+%.spv: %.frag
+	glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 -DDEBUG_CHANNEL_HELPER_LANES $(GLSLC_FLAGS)
+
+%.spv: %.comp
+	glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
+
+%.spv: %.geom
+	glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
+
+%.spv: %.tesc
+	glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
+
+%.spv: %.tese
+	glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
+
+%.spv: %.rgen
+	glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
+
+%.spv: %.rint
+	glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
+
+%.spv: %.rahit
+	glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
+
+%.spv: %.rchit
+	glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
+
+%.spv: %.rmiss
+	glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
+
+%.spv: %.rcall
+	glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
+
+all: $(SPV_OBJECTS)
+
+clean:
+	rm -f $(SPV_OBJECTS)
+
+.PHONY: clean
--- a/include/shader-debug/debug_channel.h
+++ b/include/shader-debug/debug_channel.h
@ -0,0 +1,338 @@
+/*
+ * Copyright 2020 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef DEBUG_CHANNEL_H_
+#define DEBUG_CHANNEL_H_
+
+#extension GL_EXT_buffer_reference : require
+#extension GL_ARB_gpu_shader_int64 : require
+#extension GL_KHR_shader_subgroup_basic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#ifdef DEBUG_CHANNEL_HELPER_LANES
+#extension GL_EXT_demote_to_helper_invocation : require
+#endif
+
+layout(buffer_reference, std430, buffer_reference_align = 4) coherent buffer ControlBlock
+{
+	uint message_counter;
+	uint instance_counter;
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 4) coherent buffer RingBuffer
+{
+	uint data[];
+};
+
+layout(constant_id = 0) const uint64_t DEBUG_SHADER_HASH = 0;
+layout(constant_id = 1) const uint64_t DEBUG_SHADER_ATOMIC_BDA = 0;
+layout(constant_id = 2) const uint64_t DEBUG_SHADER_RING_BDA = 0;
+layout(constant_id = 3) const uint DEBUG_SHADER_RING_SIZE = 0;
+const uint DEBUG_SHADER_RING_MASK = DEBUG_SHADER_RING_SIZE - 1;
+const bool DEBUG_SHADER_RING_ACTIVE = DEBUG_SHADER_ATOMIC_BDA != 0;
+
+const uint DEBUG_CHANNEL_FMT_HEX = 0;
+const uint DEBUG_CHANNEL_FMT_I32 = 1;
+const uint DEBUG_CHANNEL_FMT_F32 = 2;
+const uint DEBUG_CHANNEL_FMT_HEX_ALL = DEBUG_CHANNEL_FMT_HEX * 0x55555555u;
+const uint DEBUG_CHANNEL_FMT_I32_ALL = DEBUG_CHANNEL_FMT_I32 * 0x55555555u;
+const uint DEBUG_CHANNEL_FMT_F32_ALL = DEBUG_CHANNEL_FMT_F32 * 0x55555555u;
+const uint DEBUG_CHANNEL_WORD_COOKIE = 0xdeadca70u; /* Let host fish for this cookie in device lost scenarios. */
+
+uint DEBUG_CHANNEL_INSTANCE_COUNTER;
+uvec3 DEBUG_CHANNEL_ID;
+
+/* Need to make sure the elected subgroup can have side effects. */
+#ifdef DEBUG_CHANNEL_HELPER_LANES
+bool DEBUG_CHANNEL_ELECT()
+{
+	bool elected = false;
+	if (!helperInvocationEXT())
+		elected = subgroupElect();
+	return elected;
+}
+#else
+bool DEBUG_CHANNEL_ELECT()
+{
+	return subgroupElect();
+}
+#endif
+
+void DEBUG_CHANNEL_INIT(uvec3 id)
+{
+	if (!DEBUG_SHADER_RING_ACTIVE)
+		return;
+	DEBUG_CHANNEL_ID = id;
+	uint inst;
+#ifdef DEBUG_CHANNEL_HELPER_LANES
+	if (!helperInvocationEXT())
+	{
+		/* Elect and broadcast must happen without helper lanes here.
+		 * We must perform the instance increment with side effects,
+		 * and broadcast first must pick the elected lane. */
+		if (subgroupElect())
+			inst = atomicAdd(ControlBlock(DEBUG_SHADER_ATOMIC_BDA).instance_counter, 1u);
+		DEBUG_CHANNEL_INSTANCE_COUNTER = subgroupBroadcastFirst(inst);
+	}
+	/* Helper lanes cannot write debug messages, since they cannot have side effects.
+	 * Leave it undefined, and we should ensure SGPR propagation either way ... */
+#else
+	if (DEBUG_CHANNEL_ELECT())
+		inst = atomicAdd(ControlBlock(DEBUG_SHADER_ATOMIC_BDA).instance_counter, 1u);
+	DEBUG_CHANNEL_INSTANCE_COUNTER = subgroupBroadcastFirst(inst);
+#endif
+}
+
+void DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3 id, uint inst)
+{
+	if (!DEBUG_SHADER_RING_ACTIVE)
+		return;
+	DEBUG_CHANNEL_ID = id;
+	DEBUG_CHANNEL_INSTANCE_COUNTER = inst;
+}
+
+void DEBUG_CHANNEL_UNLOCK_MESSAGE(RingBuffer buf, uint offset, uint num_words)
+{
+	memoryBarrierBuffer();
+	/* Make sure this word is made visible last. This way the ring thread can avoid reading bogus messages.
+	 * If the host thread observed a num_word of 0, we know a message was allocated, but we don't necessarily
+	 * have a complete write yet.
+	 * In a device lost scenario, we can try to fish for valid messages. */
+	buf.data[(offset + 0) & DEBUG_SHADER_RING_MASK] = num_words | DEBUG_CHANNEL_WORD_COOKIE;
+	memoryBarrierBuffer();
+}
+
+void DEBUG_CHANNEL_WRITE_HEADER(RingBuffer buf, uint offset, uint fmt)
+{
+	buf.data[(offset + 1) & DEBUG_SHADER_RING_MASK] = uint(DEBUG_SHADER_HASH);
+	buf.data[(offset + 2) & DEBUG_SHADER_RING_MASK] = uint(DEBUG_SHADER_HASH >> 32);
+	buf.data[(offset + 3) & DEBUG_SHADER_RING_MASK] = DEBUG_CHANNEL_INSTANCE_COUNTER;
+	buf.data[(offset + 4) & DEBUG_SHADER_RING_MASK] = DEBUG_CHANNEL_ID.x;
+	buf.data[(offset + 5) & DEBUG_SHADER_RING_MASK] = DEBUG_CHANNEL_ID.y;
+	buf.data[(offset + 6) & DEBUG_SHADER_RING_MASK] = DEBUG_CHANNEL_ID.z;
+	buf.data[(offset + 7) & DEBUG_SHADER_RING_MASK] = fmt;
+}
+
+uint DEBUG_CHANNEL_ALLOCATE(uint words)
+{
+	uint offset = atomicAdd(ControlBlock(DEBUG_SHADER_ATOMIC_BDA).message_counter, words);
+	return offset;
+}
+
+void DEBUG_CHANNEL_MSG_()
+{
+	if (!DEBUG_SHADER_RING_ACTIVE)
+		return;
+	uint words = 8;
+	uint offset = DEBUG_CHANNEL_ALLOCATE(words);
+	RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
+	DEBUG_CHANNEL_WRITE_HEADER(buf, offset, 0);
+	DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
+}
+
+void DEBUG_CHANNEL_MSG_(uint fmt, uint v0)
+{
+	if (!DEBUG_SHADER_RING_ACTIVE)
+		return;
+	RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
+	uint words = 9;
+	uint offset = DEBUG_CHANNEL_ALLOCATE(words);
+	DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
+	buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
+	DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
+}
+
+void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1)
+{
+	if (!DEBUG_SHADER_RING_ACTIVE)
+		return;
+	RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
+	uint words = 10;
+	uint offset = DEBUG_CHANNEL_ALLOCATE(words);
+	DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
+	buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
+	buf.data[(offset + 9) & DEBUG_SHADER_RING_MASK] = v1;
+	DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
+}
+
+void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2)
+{
+	if (!DEBUG_SHADER_RING_ACTIVE)
+		return;
+	RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
+	uint words = 11;
+	uint offset = DEBUG_CHANNEL_ALLOCATE(words);
+	DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
+	buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
+	buf.data[(offset + 9) & DEBUG_SHADER_RING_MASK] = v1;
+	buf.data[(offset + 10) & DEBUG_SHADER_RING_MASK] = v2;
+	DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
+}
+
+void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2, uint v3)
+{
+	if (!DEBUG_SHADER_RING_ACTIVE)
+		return;
+	RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
+	uint words = 12;
+	uint offset = DEBUG_CHANNEL_ALLOCATE(words);
+	DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
+	buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
+	buf.data[(offset + 9) & DEBUG_SHADER_RING_MASK] = v1;
+	buf.data[(offset + 10) & DEBUG_SHADER_RING_MASK] = v2;
+	buf.data[(offset + 11) & DEBUG_SHADER_RING_MASK] = v3;
+	DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
+}
+
+void DEBUG_CHANNEL_MSG()
+{
+	DEBUG_CHANNEL_MSG_();
+}
+
+void DEBUG_CHANNEL_MSG(uint v0)
+{
+	DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_HEX_ALL, v0);
+}
+
+void DEBUG_CHANNEL_MSG(uint v0, uint v1)
+{
+	DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_HEX_ALL, v0, v1);
+}
+
+void DEBUG_CHANNEL_MSG(uint v0, uint v1, uint v2)
+{
+	DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_HEX_ALL, v0, v1, v2);
+}
+
+void DEBUG_CHANNEL_MSG(uint v0, uint v1, uint v2, uint v3)
+{
+	DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_HEX_ALL, v0, v1, v2, v3);
+}
+
+void DEBUG_CHANNEL_MSG(int v0)
+{
+	DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_I32_ALL, v0);
+}
+
+void DEBUG_CHANNEL_MSG(int v0, int v1)
+{
+	DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_I32_ALL, v0, v1);
+}
+
+void DEBUG_CHANNEL_MSG(int v0, int v1, int v2)
+{
+	DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_I32_ALL, v0, v1, v2);
+}
+
+void DEBUG_CHANNEL_MSG(int v0, int v1, int v2, int v3)
+{
+	DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_I32_ALL, v0, v1, v2, v3);
+}
+
+void DEBUG_CHANNEL_MSG(float v0)
+{
+	DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_F32_ALL, floatBitsToUint(v0));
+}
+
+void DEBUG_CHANNEL_MSG(float v0, float v1)
+{
+	DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_F32_ALL, floatBitsToUint(v0), floatBitsToUint(v1));
+}
+
+void DEBUG_CHANNEL_MSG(float v0, float v1, float v2)
+{
+	DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_F32_ALL, floatBitsToUint(v0), floatBitsToUint(v1), floatBitsToUint(v2));
+}
+
+void DEBUG_CHANNEL_MSG(float v0, float v1, float v2, float v3)
+{
+	DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_F32_ALL, floatBitsToUint(v0), floatBitsToUint(v1), floatBitsToUint(v2), floatBitsToUint(v3));
+}
+
+void DEBUG_CHANNEL_MSG_UNIFORM(uint v0)
+{
+    if (DEBUG_CHANNEL_ELECT())
+        DEBUG_CHANNEL_MSG(v0);
+}
+
+void DEBUG_CHANNEL_MSG_UNIFORM(uint v0, uint v1)
+{
+    if (DEBUG_CHANNEL_ELECT())
+        DEBUG_CHANNEL_MSG(v0, v1);
+}
+
+void DEBUG_CHANNEL_MSG_UNIFORM(uint v0, uint v1, uint v2)
+{
+    if (DEBUG_CHANNEL_ELECT())
+        DEBUG_CHANNEL_MSG(v0, v1, v2);
+}
+
+void DEBUG_CHANNEL_MSG_UNIFORM(uint v0, uint v1, uint v2, uint v3)
+{
+    if (DEBUG_CHANNEL_ELECT())
+        DEBUG_CHANNEL_MSG(v0, v1, v2, v3);
+}
+
+void DEBUG_CHANNEL_MSG_UNIFORM(int v0)
+{
+    if (DEBUG_CHANNEL_ELECT())
+        DEBUG_CHANNEL_MSG(v0);
+}
+
+void DEBUG_CHANNEL_MSG_UNIFORM(int v0, int v1)
+{
+    if (DEBUG_CHANNEL_ELECT())
+        DEBUG_CHANNEL_MSG(v0, v1);
+}
+
+void DEBUG_CHANNEL_MSG_UNIFORM(int v0, int v1, int v2)
+{
+    if (DEBUG_CHANNEL_ELECT())
+        DEBUG_CHANNEL_MSG(v0, v1, v2);
+}
+
+void DEBUG_CHANNEL_MSG_UNIFORM(int v0, int v1, int v2, int v3)
+{
+    if (DEBUG_CHANNEL_ELECT())
+        DEBUG_CHANNEL_MSG(v0, v1, v2, v3);
+}
+
+void DEBUG_CHANNEL_MSG_UNIFORM(float v0)
+{
+    if (DEBUG_CHANNEL_ELECT())
+        DEBUG_CHANNEL_MSG(v0);
+}
+
+void DEBUG_CHANNEL_MSG_UNIFORM(float v0, float v1)
+{
+    if (DEBUG_CHANNEL_ELECT())
+        DEBUG_CHANNEL_MSG(v0, v1);
+}
+
+void DEBUG_CHANNEL_MSG_UNIFORM(float v0, float v1, float v2)
+{
+    if (DEBUG_CHANNEL_ELECT())
+        DEBUG_CHANNEL_MSG(v0, v1, v2);
+}
+
+void DEBUG_CHANNEL_MSG_UNIFORM(float v0, float v1, float v2, float v3)
+{
+    if (DEBUG_CHANNEL_ELECT())
+        DEBUG_CHANNEL_MSG(v0, v1, v2, v3);
+}
+
+#endif
--- a/include/vkd3d.h
+++ b/include/vkd3d.h
@ -27,29 +27,71 @@
 #endif  /* VKD3D_NO_WIN32_TYPES */

 #ifndef VKD3D_NO_VULKAN_H
+# ifdef _WIN32
+#  define VK_USE_PLATFORM_WIN32_KHR
+# endif
 # include <vulkan/vulkan.h>
+# include "private/vulkan_private_extensions.h"
 #endif  /* VKD3D_NO_VULKAN_H */

+#define VKD3D_MIN_API_VERSION VK_API_VERSION_1_1
+#define VKD3D_MAX_API_VERSION VK_API_VERSION_1_1
+
+#if defined(__GNUC__)
+# define DECLSPEC_VISIBLE __attribute__((visibility("default")))
+#else
+# define DECLSPEC_VISIBLE
+#endif
+
+#if defined(_WIN32) && !defined(VKD3D_BUILD_STANDALONE_D3D12)
+# ifdef VKD3D_EXPORTS
+#  define VKD3D_EXPORT __declspec(dllexport)
+# else
+#  define VKD3D_EXPORT __declspec(dllimport)
+# endif
+#elif defined(__GNUC__)
+# define VKD3D_EXPORT DECLSPEC_VISIBLE
+#else
+# define VKD3D_EXPORT
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif  /* __cplusplus */

-enum vkd3d_structure_type
-{
-    /* 1.0 */
-    VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
-    VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
-    VKD3D_STRUCTURE_TYPE_IMAGE_RESOURCE_CREATE_INFO,
-
-    /* 1.1 */
-    VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO,
-
-    /* 1.2 */
-    VKD3D_STRUCTURE_TYPE_OPTIONAL_DEVICE_EXTENSIONS_INFO,
-    VKD3D_STRUCTURE_TYPE_APPLICATION_INFO,
-
-    VKD3D_FORCE_32_BIT_ENUM(VKD3D_STRUCTURE_TYPE),
-};
+#define VKD3D_CONFIG_FLAG_VULKAN_DEBUG (1ull << 0)
+#define VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS (1ull << 1)
+#define VKD3D_CONFIG_FLAG_DEBUG_UTILS (1ull << 2)
+#define VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV (1ull << 3)
+#define VKD3D_CONFIG_FLAG_DXR (1ull << 4)
+#define VKD3D_CONFIG_FLAG_SINGLE_QUEUE (1ull << 5)
+#define VKD3D_CONFIG_FLAG_DESCRIPTOR_QA_CHECKS (1ull << 6)
+#define VKD3D_CONFIG_FLAG_FORCE_RTV_EXCLUSIVE_QUEUE (1ull << 7)
+#define VKD3D_CONFIG_FLAG_FORCE_DSV_EXCLUSIVE_QUEUE (1ull << 8)
+#define VKD3D_CONFIG_FLAG_FORCE_MINIMUM_SUBGROUP_SIZE (1ull << 9)
+#define VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV (1ull << 10)
+#define VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET (1ull << 11)
+#define VKD3D_CONFIG_FLAG_IGNORE_RTV_HOST_VISIBLE (1ull << 12)
+#define VKD3D_CONFIG_FLAG_FORCE_HOST_CACHED (1ull << 13)
+#define VKD3D_CONFIG_FLAG_DXR11 (1ull << 14)
+#define VKD3D_CONFIG_FLAG_FORCE_NO_INVARIANT_POSITION (1ull << 15)
+#define VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE (1ull << 16)
+#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_NO_SERIALIZE_SPIRV (1ull << 17)
+#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_SANITIZE_SPIRV (1ull << 18)
+#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG (1ull << 19)
+#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_SPIRV (1ull << 20)
+#define VKD3D_CONFIG_FLAG_MUTABLE_SINGLE_SET (1ull << 21)
+#define VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR (1ull << 22)
+#define VKD3D_CONFIG_FLAG_RECYCLE_COMMAND_POOLS (1ull << 23)
+#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_MISMATCH_DRIVER (1ull << 24)
+#define VKD3D_CONFIG_FLAG_BREADCRUMBS (1ull << 25)
+#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_APP_CACHE_ONLY (1ull << 26)
+#define VKD3D_CONFIG_FLAG_SHADER_CACHE_SYNC (1ull << 27)
+#define VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV (1ull << 28)
+#define VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV (1ull << 29)
+#define VKD3D_CONFIG_FLAG_ALLOW_SBT_COLLECTION (1ull << 30)
+#define VKD3D_CONFIG_FLAG_FORCE_NATIVE_FP16 (1ull << 31)
+#define VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK (1ull << 32)

 typedef HRESULT (*PFN_vkd3d_signal_event)(HANDLE event);

@ -62,49 +104,22 @@ struct vkd3d_instance;

 struct vkd3d_instance_create_info
 {
-    enum vkd3d_structure_type type;
-    const void *next;
-
    PFN_vkd3d_signal_event pfn_signal_event;
    PFN_vkd3d_create_thread pfn_create_thread;
    PFN_vkd3d_join_thread pfn_join_thread;
-    size_t wchar_size;

    /* If set to NULL, libvkd3d loads libvulkan. */
    PFN_vkGetInstanceProcAddr pfn_vkGetInstanceProcAddr;

    const char * const *instance_extensions;
    uint32_t instance_extension_count;
-};

-/* Extends vkd3d_instance_create_info. Available since 1.1. */
-struct vkd3d_optional_instance_extensions_info
-{
-    enum vkd3d_structure_type type;
-    const void *next;
-
-    const char * const *extensions;
-    uint32_t extension_count;
-};
-
-/* Extends vkd3d_instance_create_info. Available since 1.2. */
-struct vkd3d_application_info
-{
-    enum vkd3d_structure_type type;
-    const void *next;
-
-    const char *application_name;
-    uint32_t application_version;
-
-    const char *engine_name; /* "vkd3d" if NULL */
-    uint32_t engine_version; /* vkd3d version if engine_name is NULL */
+    const char * const *optional_instance_extensions;
+    uint32_t optional_instance_extension_count;
 };

 struct vkd3d_device_create_info
 {
-    enum vkd3d_structure_type type;
-    const void *next;
-
    D3D_FEATURE_LEVEL minimum_feature_level;

    struct vkd3d_instance *instance;
@ -115,29 +130,15 @@ struct vkd3d_device_create_info
    const char * const *device_extensions;
    uint32_t device_extension_count;

+    const char * const *optional_device_extensions;
+    uint32_t optional_device_extension_count;
+
    IUnknown *parent;
    LUID adapter_luid;
 };

-/* Extends vkd3d_device_create_info. Available since 1.2. */
-struct vkd3d_optional_device_extensions_info
-{
-    enum vkd3d_structure_type type;
-    const void *next;
-
-    const char * const *extensions;
-    uint32_t extension_count;
-};
-
-/* vkd3d_image_resource_create_info flags */
-#define VKD3D_RESOURCE_INITIAL_STATE_TRANSITION 0x00000001
-#define VKD3D_RESOURCE_PRESENT_STATE_TRANSITION 0x00000002
-
 struct vkd3d_image_resource_create_info
 {
-    enum vkd3d_structure_type type;
-    const void *next;
-
    VkImage vk_image;
    D3D12_RESOURCE_DESC desc;
    unsigned int flags;
@ -146,42 +147,43 @@ struct vkd3d_image_resource_create_info

 #ifndef VKD3D_NO_PROTOTYPES

-HRESULT vkd3d_create_instance(const struct vkd3d_instance_create_info *create_info,
+VKD3D_EXPORT HRESULT vkd3d_create_instance(const struct vkd3d_instance_create_info *create_info,
        struct vkd3d_instance **instance);
-ULONG vkd3d_instance_decref(struct vkd3d_instance *instance);
-VkInstance vkd3d_instance_get_vk_instance(struct vkd3d_instance *instance);
-ULONG vkd3d_instance_incref(struct vkd3d_instance *instance);
+VKD3D_EXPORT ULONG vkd3d_instance_decref(struct vkd3d_instance *instance);
+VKD3D_EXPORT VkInstance vkd3d_instance_get_vk_instance(struct vkd3d_instance *instance);
+VKD3D_EXPORT ULONG vkd3d_instance_incref(struct vkd3d_instance *instance);

-HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info,
+VKD3D_EXPORT HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info,
        REFIID iid, void **device);
-IUnknown *vkd3d_get_device_parent(ID3D12Device *device);
-VkDevice vkd3d_get_vk_device(ID3D12Device *device);
-VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device);
-struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device);
+VKD3D_EXPORT IUnknown *vkd3d_get_device_parent(ID3D12Device *device);
+VKD3D_EXPORT VkDevice vkd3d_get_vk_device(ID3D12Device *device);
+VKD3D_EXPORT VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device);
+VKD3D_EXPORT struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device);

-uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue);
-VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue);
-void vkd3d_release_vk_queue(ID3D12CommandQueue *queue);
+VKD3D_EXPORT uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue);
+VKD3D_EXPORT VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue);
+VKD3D_EXPORT void vkd3d_release_vk_queue(ID3D12CommandQueue *queue);
+VKD3D_EXPORT void vkd3d_enqueue_initial_transition(ID3D12CommandQueue *queue, ID3D12Resource *resource);

-HRESULT vkd3d_create_image_resource(ID3D12Device *device,
+VKD3D_EXPORT HRESULT vkd3d_create_image_resource(ID3D12Device *device,
        const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource);
-ULONG vkd3d_resource_decref(ID3D12Resource *resource);
-ULONG vkd3d_resource_incref(ID3D12Resource *resource);
+VKD3D_EXPORT ULONG vkd3d_resource_decref(ID3D12Resource *resource);
+VKD3D_EXPORT ULONG vkd3d_resource_incref(ID3D12Resource *resource);

-HRESULT vkd3d_serialize_root_signature(const D3D12_ROOT_SIGNATURE_DESC *desc,
+VKD3D_EXPORT HRESULT vkd3d_serialize_root_signature(const D3D12_ROOT_SIGNATURE_DESC *desc,
        D3D_ROOT_SIGNATURE_VERSION version, ID3DBlob **blob, ID3DBlob **error_blob);
-HRESULT vkd3d_create_root_signature_deserializer(const void *data, SIZE_T data_size,
+VKD3D_EXPORT HRESULT vkd3d_create_root_signature_deserializer(const void *data, SIZE_T data_size,
        REFIID iid, void **deserializer);

-VkFormat vkd3d_get_vk_format(DXGI_FORMAT format);
+VKD3D_EXPORT VkFormat vkd3d_get_vk_format(DXGI_FORMAT format);

 /* 1.1 */
-DXGI_FORMAT vkd3d_get_dxgi_format(VkFormat format);
+VKD3D_EXPORT DXGI_FORMAT vkd3d_get_dxgi_format(VkFormat format);

 /* 1.2 */
-HRESULT vkd3d_serialize_versioned_root_signature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc,
+VKD3D_EXPORT HRESULT vkd3d_serialize_versioned_root_signature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc,
        ID3DBlob **blob, ID3DBlob **error_blob);
-HRESULT vkd3d_create_versioned_root_signature_deserializer(const void *data, SIZE_T data_size,
+VKD3D_EXPORT HRESULT vkd3d_create_versioned_root_signature_deserializer(const void *data, SIZE_T data_size,
        REFIID iid, void **deserializer);

 #endif  /* VKD3D_NO_PROTOTYPES */
--- a/include/vkd3d_command_list_vkd3d_ext.idl
+++ b/include/vkd3d_command_list_vkd3d_ext.idl
@ -0,0 +1,32 @@
+/*
+ * * Copyright 2021 NVIDIA Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+import "vkd3d_d3d12.idl";
+import "vkd3d_vk_includes.h";
+
+[
+    uuid(77a86b09-2bea-4801-b89a-37648e104af1),
+    object,
+    local,
+    pointer_default(unique)
+]
+interface ID3D12GraphicsCommandListExt : IUnknown
+{
+   HRESULT GetVulkanHandle(VkCommandBuffer *pVkCommandBuffer);
+   HRESULT LaunchCubinShader(D3D12_CUBIN_DATA_HANDLE *handle, UINT32 block_x, UINT32 block_y, UINT32 block_z, const void *params, UINT32 param_size);
+}
+
--- a/include/vkd3d_d3d12.idl
+++ b/include/vkd3d_d3d12.idl
--- a/include/vkd3d_d3dcommon.idl
+++ b/include/vkd3d_d3dcommon.idl
@ -77,6 +77,7 @@ typedef enum D3D_FEATURE_LEVEL
    D3D_FEATURE_LEVEL_11_1 = 0xb100,
    D3D_FEATURE_LEVEL_12_0 = 0xc000,
    D3D_FEATURE_LEVEL_12_1 = 0xc100,
+    D3D_FEATURE_LEVEL_12_2 = 0xc200,
 } D3D_FEATURE_LEVEL;

 [
@ -93,3 +94,7 @@ interface ID3D10Blob : IUnknown

 typedef ID3D10Blob ID3DBlob;
 cpp_quote("#define IID_ID3DBlob IID_ID3D10Blob")
+
+cpp_quote("DEFINE_GUID(WKPDID_D3DDebugObjectName,0x429b8c22,0x9188,0x4b0c,0x87,0x42,0xac,0xb0,0xbf,0x85,0xc2,0x00);")
+cpp_quote("DEFINE_GUID(WKPDID_D3DDebugObjectNameW,0x4cca5fd8,0x921f,0x42c8,0x85,0x66,0x70,0xca,0xf2,0xa9,0xb7,0x41);")
+cpp_quote("DEFINE_GUID(WKPDID_CommentStringW,0xd0149dc0,0x90e8,0x4ec8,0x81,0x44,0xe9,0x00,0xad,0x26,0x6b,0xb2);")
--- a/include/vkd3d_device_vkd3d_ext.idl
+++ b/include/vkd3d_device_vkd3d_ext.idl
@ -0,0 +1,37 @@
+/*
+ * * Copyright 2021 NVIDIA Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+import "vkd3d_d3d12.idl";
+import "vkd3d_vk_includes.h";
+
+[
+    uuid(11ea7a1a-0f6a-49bf-b612-3e30f8e201dd),
+    object,
+    local,
+    pointer_default(unique)
+]
+interface ID3D12DeviceExt : IUnknown
+{
+    HRESULT GetVulkanHandles(VkInstance *vk_instance, VkPhysicalDevice *vk_physical_device, VkDevice *vk_device);
+    BOOL GetExtensionSupport(D3D12_VK_EXTENSION extension);
+    HRESULT CreateCubinComputeShaderWithName(const void *cubin_data, UINT32 cubin_size, UINT32 block_x, UINT32 block_y, UINT32 block_z, const char *shader_name, D3D12_CUBIN_DATA_HANDLE **handle);
+    HRESULT DestroyCubinComputeShader(D3D12_CUBIN_DATA_HANDLE *handle);
+    HRESULT GetCudaTextureObject(D3D12_CPU_DESCRIPTOR_HANDLE srv_handle, D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle, UINT32 *cuda_texture_handle);
+    HRESULT GetCudaSurfaceObject(D3D12_CPU_DESCRIPTOR_HANDLE uav_handle, UINT32 *cuda_surface_handle);
+    HRESULT CaptureUAVInfo(D3D12_UAV_INFO *uav_info);
+}
+
--- a/include/vkd3d_dxgiformat.idl
+++ b/include/vkd3d_dxgiformat.idl
@ -135,5 +135,12 @@ typedef enum DXGI_FORMAT
    DXGI_FORMAT_A8P8                        = 0x72,
    DXGI_FORMAT_B4G4R4A4_UNORM              = 0x73,

+    DXGI_FORMAT_P208                        = 0x82,
+    DXGI_FORMAT_V208                        = 0x83,
+    DXGI_FORMAT_V408                        = 0x84,
+
+    DXGI_FORMAT_SAMPLER_FEEDBACK_MIN_MIP_OPAQUE         = 0xbd,
+    DXGI_FORMAT_SAMPLER_FEEDBACK_MIP_REGION_USED_OPAQUE = 0xbe,
+
    DXGI_FORMAT_FORCE_UINT                  = 0xffffffff,
 } DXGI_FORMAT;
--- a/include/vkd3d_shader.h
+++ b/include/vkd3d_shader.h
@ -21,24 +21,16 @@

 #include <stdbool.h>
 #include <stdint.h>
+#include <stddef.h>
+#include <hashmap.h>
 #include <vkd3d_types.h>
+#include <vkd3d_d3d12.h>
+#include <vkd3d.h>

 #ifdef __cplusplus
 extern "C" {
 #endif  /* __cplusplus */

-enum vkd3d_shader_structure_type
-{
-    /* 1.2 */
-    VKD3D_SHADER_STRUCTURE_TYPE_SHADER_INTERFACE_INFO,
-    VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_ARGUMENTS,
-    VKD3D_SHADER_STRUCTURE_TYPE_SCAN_INFO,
-    VKD3D_SHADER_STRUCTURE_TYPE_TRANSFORM_FEEDBACK_INFO,
-    VKD3D_SHADER_STRUCTURE_TYPE_DOMAIN_SHADER_COMPILE_ARGUMENTS,
-
-    VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE),
-};
-
 enum vkd3d_shader_compiler_option
 {
    VKD3D_SHADER_STRIP_DEBUG = 0x00000001,
@ -60,12 +52,34 @@ enum vkd3d_shader_visibility
    VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_VISIBILITY),
 };

+typedef uint64_t vkd3d_shader_hash_t;
+
+enum vkd3d_shader_meta_flags
+{
+    VKD3D_SHADER_META_FLAG_REPLACED = 1 << 0,
+    VKD3D_SHADER_META_FLAG_USES_SUBGROUP_SIZE = 1 << 1,
+    VKD3D_SHADER_META_FLAG_USES_NATIVE_16BIT_OPERATIONS = 1 << 2,
+};
+
+struct vkd3d_shader_meta
+{
+    vkd3d_shader_hash_t hash;
+    unsigned int cs_workgroup_size[3]; /* Only contains valid data if uses_subgroup_size is true. */
+    unsigned int patch_vertex_count; /* Relevant for HS. May be 0, in which case the patch vertex count is not known. */
+    unsigned int cs_required_wave_size; /* If non-zero, force a specific CS subgroup size. */
+    uint32_t flags; /* vkd3d_shader_meta_flags */
+};
+STATIC_ASSERT(sizeof(struct vkd3d_shader_meta) == 32);
+
 struct vkd3d_shader_code
 {
    const void *code;
    size_t size;
+    struct vkd3d_shader_meta meta;
 };

+vkd3d_shader_hash_t vkd3d_shader_hash(const struct vkd3d_shader_code *shader);
+
 enum vkd3d_shader_descriptor_type
 {
    VKD3D_SHADER_DESCRIPTOR_TYPE_UNKNOWN,
@ -85,8 +99,12 @@ struct vkd3d_shader_descriptor_binding

 enum vkd3d_shader_binding_flag
 {
-    VKD3D_SHADER_BINDING_FLAG_BUFFER = 0x00000001,
-    VKD3D_SHADER_BINDING_FLAG_IMAGE  = 0x00000002,
+    VKD3D_SHADER_BINDING_FLAG_BUFFER     = 0x00000001,
+    VKD3D_SHADER_BINDING_FLAG_IMAGE      = 0x00000002,
+    VKD3D_SHADER_BINDING_FLAG_AUX_BUFFER = 0x00000004,
+    VKD3D_SHADER_BINDING_FLAG_BINDLESS   = 0x00000008,
+    VKD3D_SHADER_BINDING_FLAG_RAW_VA     = 0x00000010,
+    VKD3D_SHADER_BINDING_FLAG_RAW_SSBO   = 0x00000020,

    VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG),
 };
@ -115,7 +133,7 @@ struct vkd3d_shader_parameter_immediate_constant
    union
    {
        uint32_t u32;
-    } u;
+    };
 };

 struct vkd3d_shader_parameter_specialization_constant
@ -132,13 +150,19 @@ struct vkd3d_shader_parameter
    {
        struct vkd3d_shader_parameter_immediate_constant immediate_constant;
        struct vkd3d_shader_parameter_specialization_constant specialization_constant;
-    } u;
+    };
 };

+#define VKD3D_SHADER_DESCRIPTOR_RANGE_UNBOUNDED (~0u)
+
 struct vkd3d_shader_resource_binding
 {
    enum vkd3d_shader_descriptor_type type;
+    unsigned int register_space;
    unsigned int register_index;
+    unsigned int register_count;
+    unsigned int descriptor_table;
+    unsigned int descriptor_offset;
    enum vkd3d_shader_visibility shader_visibility;
    unsigned int flags; /* vkd3d_shader_binding_flags */

@ -147,27 +171,9 @@ struct vkd3d_shader_resource_binding

 #define VKD3D_DUMMY_SAMPLER_INDEX ~0u

-struct vkd3d_shader_combined_resource_sampler
-{
-    unsigned int resource_index;
-    unsigned int sampler_index;
-    enum vkd3d_shader_visibility shader_visibility;
-    unsigned int flags; /* vkd3d_shader_binding_flags */
-
-    struct vkd3d_shader_descriptor_binding binding;
-};
-
-struct vkd3d_shader_uav_counter_binding
-{
-    unsigned int register_index; /* u# */
-    enum vkd3d_shader_visibility shader_visibility;
-
-    struct vkd3d_shader_descriptor_binding binding;
-    unsigned int offset;
-};
-
 struct vkd3d_shader_push_constant_buffer
 {
+    unsigned int register_space;
    unsigned int register_index;
    enum vkd3d_shader_visibility shader_visibility;

@ -175,22 +181,89 @@ struct vkd3d_shader_push_constant_buffer
    unsigned int size;   /* in bytes */
 };

+struct vkd3d_shader_descriptor_table_buffer
+{
+    unsigned int offset; /* in bytes */
+    unsigned int count;  /* number of tables */
+};
+
+enum vkd3d_shader_interface_flag
+{
+    VKD3D_SHADER_INTERFACE_PUSH_CONSTANTS_AS_UNIFORM_BUFFER = 0x00000001u,
+    VKD3D_SHADER_INTERFACE_BINDLESS_CBV_AS_STORAGE_BUFFER   = 0x00000002u,
+    VKD3D_SHADER_INTERFACE_SSBO_OFFSET_BUFFER               = 0x00000004u,
+    VKD3D_SHADER_INTERFACE_TYPED_OFFSET_BUFFER              = 0x00000008u,
+    VKD3D_SHADER_INTERFACE_DESCRIPTOR_QA_BUFFER             = 0x00000010u
+};
+
 struct vkd3d_shader_interface_info
 {
-    enum vkd3d_shader_structure_type type;
-    const void *next;
+    unsigned int flags; /* vkd3d_shader_interface_flags */
+    unsigned int min_ssbo_alignment;

+    struct vkd3d_shader_descriptor_table_buffer descriptor_tables;
    const struct vkd3d_shader_resource_binding *bindings;
    unsigned int binding_count;

    const struct vkd3d_shader_push_constant_buffer *push_constant_buffers;
    unsigned int push_constant_buffer_count;

-    const struct vkd3d_shader_combined_resource_sampler *combined_samplers;
-    unsigned int combined_sampler_count;
+    /* Ignored unless VKD3D_SHADER_INTERFACE_PUSH_CONSTANTS_AS_UNIFORM_BUFFER is set */
+    const struct vkd3d_shader_descriptor_binding *push_constant_ubo_binding;
+    /* Ignored unless VKD3D_SHADER_INTERFACE_SSBO_OFFSET_BUFFER or TYPED_OFFSET_BUFFER is set */
+    const struct vkd3d_shader_descriptor_binding *offset_buffer_binding;

-    const struct vkd3d_shader_uav_counter_binding *uav_counters;
-    unsigned int uav_counter_count;
+#ifdef VKD3D_ENABLE_DESCRIPTOR_QA
+    /* Ignored unless VKD3D_SHADER_INTERFACE_DESCRIPTOR_QA_BUFFER is set. */
+    const struct vkd3d_shader_descriptor_binding *descriptor_qa_global_binding;
+    /* Ignored unless VKD3D_SHADER_INTERFACE_DESCRIPTOR_QA_BUFFER is set. */
+    const struct vkd3d_shader_descriptor_binding *descriptor_qa_heap_binding;
+#endif
+
+    VkShaderStageFlagBits stage;
+
+    const struct vkd3d_shader_transform_feedback_info *xfb_info;
+};
+
+struct vkd3d_shader_descriptor_table
+{
+    uint32_t table_index;
+    uint32_t binding_count;
+    struct vkd3d_shader_resource_binding *first_binding;
+};
+
+struct vkd3d_shader_root_constant
+{
+    uint32_t constant_index;
+    uint32_t constant_count;
+};
+
+struct vkd3d_shader_root_descriptor
+{
+    struct vkd3d_shader_resource_binding *binding;
+    uint32_t raw_va_root_descriptor_index;
+};
+
+struct vkd3d_shader_root_parameter
+{
+    D3D12_ROOT_PARAMETER_TYPE parameter_type;
+    union
+    {
+        struct vkd3d_shader_root_constant constant;
+        struct vkd3d_shader_root_descriptor descriptor;
+        struct vkd3d_shader_descriptor_table descriptor_table;
+    };
+};
+
+struct vkd3d_shader_interface_local_info
+{
+    const struct vkd3d_shader_root_parameter *local_root_parameters;
+    unsigned int local_root_parameter_count;
+    const struct vkd3d_shader_push_constant_buffer *shader_record_constant_buffers;
+    unsigned int shader_record_buffer_count;
+    const struct vkd3d_shader_resource_binding *bindings;
+    unsigned int binding_count;
+    uint32_t descriptor_size;
 };

 struct vkd3d_shader_transform_feedback_element
@ -203,12 +276,8 @@ struct vkd3d_shader_transform_feedback_element
    uint8_t output_slot;
 };

-/* Extends vkd3d_shader_interface_info. */
 struct vkd3d_shader_transform_feedback_info
 {
-    enum vkd3d_shader_structure_type type;
-    const void *next;
-
    const struct vkd3d_shader_transform_feedback_element *elements;
    unsigned int element_count;
    const unsigned int *buffer_strides;
@ -218,7 +287,6 @@ struct vkd3d_shader_transform_feedback_info
 enum vkd3d_shader_target
 {
    VKD3D_SHADER_TARGET_NONE,
-    VKD3D_SHADER_TARGET_SPIRV_OPENGL_4_5,
    VKD3D_SHADER_TARGET_SPIRV_VULKAN_1_0, /* default target */

    VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_TARGET),
@ -229,13 +297,63 @@ enum vkd3d_shader_target_extension
    VKD3D_SHADER_TARGET_EXTENSION_NONE,

    VKD3D_SHADER_TARGET_EXTENSION_SPV_EXT_DEMOTE_TO_HELPER_INVOCATION,
+    VKD3D_SHADER_TARGET_EXTENSION_READ_STORAGE_IMAGE_WITHOUT_FORMAT,
+    VKD3D_SHADER_TARGET_EXTENSION_SPV_KHR_INTEGER_DOT_PRODUCT,
+    VKD3D_SHADER_TARGET_EXTENSION_RAY_TRACING_PRIMITIVE_CULLING,
+    VKD3D_SHADER_TARGET_EXTENSION_SCALAR_BLOCK_LAYOUT,
+
+    /* When using scalar block layout with a vec3 array on a byte address buffer,
+     * there is diverging behavior across hardware.
+     * On AMD, robustness is checked per component, which means we can implement ByteAddressBuffer
+     * without further hackery. On NVIDIA, robustness does not seem to work this way, so it's either
+     * all in range, or all out of range. We can implement structured buffer vectorization of vec3,
+     * but not byte address buffer. */
+    VKD3D_SHADER_TARGET_EXTENSION_ASSUME_PER_COMPONENT_SSBO_ROBUSTNESS,
+    VKD3D_SHADER_TARGET_EXTENSION_BARYCENTRIC_KHR,
+    VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT,
+    VKD3D_SHADER_TARGET_EXTENSION_COUNT,
+};
+
+enum vkd3d_shader_quirk
+{
+    /* If sample or sample_b is used in control flow, force LOD 0.0 (which game should expect anyway).
+     * Works around specific, questionable shaders which rely on this to give sensible results,
+     * since LOD can become garbage on certain implementations, and even on native drivers
+     * the result is implementation defined.
+     * Outside of making this edge case well-defined in Vulkan or hacking driver compilers,
+     * this is the pragmatic solution.
+     * Hoisting gradients is not possible in all cases,
+     * and would not be worth it until it's a widespread problem. */
+    VKD3D_SHADER_QUIRK_FORCE_EXPLICIT_LOD_IN_CONTROL_FLOW = (1 << 0),
+
+    /* After every write to group shared memory, force a memory barrier.
+     * This works around buggy games which forget to use barrier(). */
+    VKD3D_SHADER_QUIRK_FORCE_TGSM_BARRIERS = (1 << 1),
+
+    /* For Position builtins in Output storage class, emit Invariant decoration.
+     * Normally, games have to emit Precise math for position, but if they forget ... */
+    VKD3D_SHADER_QUIRK_INVARIANT_POSITION = (1 << 2),
+};
+
+struct vkd3d_shader_quirk_hash
+{
+    vkd3d_shader_hash_t shader_hash;
+    uint32_t quirks;
+};
+
+struct vkd3d_shader_quirk_info
+{
+    const struct vkd3d_shader_quirk_hash *hashes;
+    unsigned int num_hashes;
+    uint32_t default_quirks;
+
+    /* Quirks which are ORed in with the other masks (including default_quirks).
+     * Used mostly for additional overrides from VKD3D_CONFIG. */
+    uint32_t global_quirks;
 };

 struct vkd3d_shader_compile_arguments
 {
-    enum vkd3d_shader_structure_type type;
-    const void *next;
-
    enum vkd3d_shader_target target;

    unsigned int target_extension_count;
@ -247,6 +365,8 @@ struct vkd3d_shader_compile_arguments
    bool dual_source_blending;
    const unsigned int *output_swizzles;
    unsigned int output_swizzle_count;
+
+    const struct vkd3d_shader_quirk_info *quirks;
 };

 enum vkd3d_tessellator_output_primitive
@ -265,16 +385,6 @@ enum vkd3d_tessellator_partitioning
    VKD3D_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4,
 };

-/* Extends vkd3d_shader_compile_arguments. */
-struct vkd3d_shader_domain_shader_compile_arguments
-{
-    enum vkd3d_shader_structure_type type;
-    const void *next;
-
-    enum vkd3d_tessellator_output_primitive output_primitive;
-    enum vkd3d_tessellator_output_primitive partitioning;
-};
-
 /* root signature 1.0 */
 enum vkd3d_filter
 {
@ -426,7 +536,7 @@ struct vkd3d_root_parameter
        struct vkd3d_root_descriptor_table descriptor_table;
        struct vkd3d_root_constants constants;
        struct vkd3d_root_descriptor descriptor;
-    } u;
+    };
    enum vkd3d_shader_visibility shader_visibility;
 };

@ -469,6 +579,7 @@ enum vkd3d_descriptor_range_flags
    VKD3D_DESCRIPTOR_RANGE_FLAG_DATA_VOLATILE = 0x2,
    VKD3D_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE = 0x4,
    VKD3D_DESCRIPTOR_RANGE_FLAG_DATA_STATIC = 0x8,
+    VKD3D_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS = 0x10000
 };

 struct vkd3d_descriptor_range1
@ -502,7 +613,7 @@ struct vkd3d_root_parameter1
        struct vkd3d_root_descriptor_table1 descriptor_table;
        struct vkd3d_root_constants constants;
        struct vkd3d_root_descriptor1 descriptor;
-    } u;
+    };
    enum vkd3d_shader_visibility shader_visibility;
 };

@ -530,21 +641,27 @@ struct vkd3d_versioned_root_signature_desc
    {
        struct vkd3d_root_signature_desc v_1_0;
        struct vkd3d_root_signature_desc1 v_1_1;
-    } u;
+    };
 };

-/* FIXME: Add support for 64 UAV bind slots. */
-#define VKD3D_SHADER_MAX_UNORDERED_ACCESS_VIEWS 8
+enum vkd3d_shader_uav_flag
+{
+    VKD3D_SHADER_UAV_FLAG_READ_ACCESS     = 0x00000001,
+    VKD3D_SHADER_UAV_FLAG_ATOMIC_COUNTER  = 0x00000002,
+    VKD3D_SHADER_UAV_FLAG_ATOMIC_ACCESS   = 0x00000004,
+};

 struct vkd3d_shader_scan_info
 {
-    enum vkd3d_shader_structure_type type;
-    void *next;
-
-    unsigned int uav_read_mask;    /* VKD3D_SHADER_MAX_UNORDERED_ACCESS_VIEWS */
-    unsigned int uav_counter_mask; /* VKD3D_SHADER_MAX_UNORDERED_ACCESS_VIEWS */
-    unsigned int sampler_comparison_mode_mask; /* 16 */
+    struct hash_map register_map;
    bool use_vocp;
+
+    bool early_fragment_tests;
+    bool has_side_effects;
+    bool needs_late_zs;
+    bool discards;
+    bool has_uav_counter;
+    unsigned int patch_vertex_count;
 };

 enum vkd3d_component_type
@ -554,6 +671,7 @@ enum vkd3d_component_type
    VKD3D_TYPE_INT     = 2,
    VKD3D_TYPE_FLOAT   = 3,
    VKD3D_TYPE_BOOL,
+    VKD3D_TYPE_DOUBLE,
    VKD3D_TYPE_COUNT,

    VKD3D_FORCE_32_BIT_ENUM(VKD3D_COMPONENT_TYPE),
@ -636,7 +754,11 @@ int vkd3d_shader_compile_dxbc(const struct vkd3d_shader_code *dxbc,
 void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *code);

 int vkd3d_shader_parse_root_signature(const struct vkd3d_shader_code *dxbc,
-        struct vkd3d_versioned_root_signature_desc *root_signature);
+        struct vkd3d_versioned_root_signature_desc *root_signature,
+        vkd3d_shader_hash_t *compatibility_hash);
+int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_size,
+        struct vkd3d_versioned_root_signature_desc *desc,
+        vkd3d_shader_hash_t *compatibility_hash);
 void vkd3d_shader_free_root_signature(struct vkd3d_versioned_root_signature_desc *root_signature);

 /* FIXME: Add support for returning error messages (ID3DBlob). */
@ -651,11 +773,89 @@ int vkd3d_shader_scan_dxbc(const struct vkd3d_shader_code *dxbc,

 int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc,
        struct vkd3d_shader_signature *signature);
+int vkd3d_shader_parse_output_signature(const struct vkd3d_shader_code *dxbc,
+        struct vkd3d_shader_signature *signature);
 struct vkd3d_shader_signature_element *vkd3d_shader_find_signature_element(
        const struct vkd3d_shader_signature *signature, const char *semantic_name,
        unsigned int semantic_index, unsigned int stream_index);
 void vkd3d_shader_free_shader_signature(struct vkd3d_shader_signature *signature);

+/* For DXR, use special purpose entry points since there's a lot of special purpose reflection required. */
+struct vkd3d_shader_library_entry_point
+{
+    unsigned int identifier;
+    VkShaderStageFlagBits stage;
+    WCHAR *mangled_entry_point;
+    WCHAR *plain_entry_point;
+    char *real_entry_point;
+};
+
+enum vkd3d_shader_subobject_kind
+{
+    /* Matches DXIL for simplicity. */
+    VKD3D_SHADER_SUBOBJECT_KIND_STATE_OBJECT_CONFIG = 0,
+    VKD3D_SHADER_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE = 1,
+    VKD3D_SHADER_SUBOBJECT_KIND_LOCAL_ROOT_SIGNATURE = 2,
+    VKD3D_SHADER_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION = 8,
+    VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_SHADER_CONFIG = 9,
+    VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG = 10,
+    VKD3D_SHADER_SUBOBJECT_KIND_HIT_GROUP = 11,
+    VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1 = 12,
+};
+
+struct vkd3d_shader_library_subobject
+{
+    enum vkd3d_shader_subobject_kind kind;
+    unsigned int dxil_identifier;
+
+    /* All const pointers here point directly to the DXBC blob,
+     * so they do not need to be freed.
+     * Fortunately for us, the C strings are zero-terminated in the blob itself. */
+
+    /* In the blob, ASCII is used as identifier, where API uses wide strings, sigh ... */
+    const char *name;
+
+    union
+    {
+        D3D12_RAYTRACING_PIPELINE_CONFIG1 pipeline_config;
+        D3D12_RAYTRACING_SHADER_CONFIG shader_config;
+        D3D12_STATE_OBJECT_CONFIG object_config;
+
+        /* Duped strings because API wants wide strings for no good reason. */
+        D3D12_HIT_GROUP_DESC hit_group;
+        D3D12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION association;
+
+        struct
+        {
+            const void *data;
+            size_t size;
+        } payload;
+    } data;
+};
+
+int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
+        const D3D12_DXIL_LIBRARY_DESC *library_desc,
+        unsigned int identifier,
+        struct vkd3d_shader_library_entry_point **entry_points,
+        size_t *entry_point_size, size_t *entry_point_count,
+        struct vkd3d_shader_library_subobject **subobjects,
+        size_t *subobjects_size, size_t *subobjects_count);
+
+void vkd3d_shader_dxil_free_library_entry_points(struct vkd3d_shader_library_entry_point *entry_points, size_t count);
+void vkd3d_shader_dxil_free_library_subobjects(struct vkd3d_shader_library_subobject *subobjects, size_t count);
+
+int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
+        const char *export,
+        struct vkd3d_shader_code *spirv,
+        const struct vkd3d_shader_interface_info *shader_interface_info,
+        const struct vkd3d_shader_interface_local_info *shader_interface_local_info,
+        const struct vkd3d_shader_compile_arguments *compiler_args);
+
+uint32_t vkd3d_shader_compile_arguments_select_quirks(
+        const struct vkd3d_shader_compile_arguments *args, vkd3d_shader_hash_t hash);
+
+uint64_t vkd3d_shader_get_revision(void);
+
 #endif  /* VKD3D_SHADER_NO_PROTOTYPES */

 /*
@ -668,7 +868,8 @@ typedef int (*PFN_vkd3d_shader_compile_dxbc)(const struct vkd3d_shader_code *dxb
 typedef void (*PFN_vkd3d_shader_free_shader_code)(struct vkd3d_shader_code *code);

 typedef int (*PFN_vkd3d_shader_parse_root_signature)(const struct vkd3d_shader_code *dxbc,
-        struct vkd3d_versioned_root_signature_desc *root_signature);
+        struct vkd3d_versioned_root_signature_desc *root_signature,
+        vkd3d_shader_hash_t *compatibility_hash);
 typedef void (*PFN_vkd3d_shader_free_root_signature)(struct vkd3d_versioned_root_signature_desc *root_signature);

 typedef int (*PFN_vkd3d_shader_serialize_root_signature)(
--- a/include/vkd3d_sonames.h
+++ b/include/vkd3d_sonames.h
@ -0,0 +1,35 @@
+/*
+ * Copyright 2020 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_SONAMES_H
+#define __VKD3D_SONAMES_H
+
+/* These sonames are defined by the loader ABI. */
+
+#if defined(_WIN32)
+#define SONAME_LIBVULKAN "vulkan-1.dll"
+#elif defined(__linux__)
+#define SONAME_LIBVULKAN "libvulkan.so.1"
+#elif defined(__APPLE__)
+#define SONAME_LIBVULKAN "libvulkan.1.dylib"
+#else
+#error "Unrecognized platform."
+#endif
+
+#endif
+
--- a/include/vkd3d_swapchain_factory.idl
+++ b/include/vkd3d_swapchain_factory.idl
@ -0,0 +1,39 @@
+/*
+ * Copyright 2020 Joshua Ashton for Valve Software
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ *
+ */
+
+import "vkd3d_windows.h";
+import "vkd3d_dxgibase.idl";
+import "vkd3d_dxgi1_2.idl";
+
+[
+    object,
+    local,
+    uuid(53cb4ff0-c25a-4164-a891-0e83db0a7aac)
+]
+interface IWineDXGISwapChainFactory : IUnknown
+{
+    HRESULT CreateSwapChainForHwnd(
+        [in] IDXGIFactory *pFactory,
+        [in] HWND hWnd,
+        [in] const DXGI_SWAP_CHAIN_DESC1 *pDesc,
+        [in] const DXGI_SWAP_CHAIN_FULLSCREEN_DESC *pFullscreenDesc,
+        [in] IDXGIOutput *pRestrictToOutput,
+        [out] IDXGISwapChain1 **ppSwapChain
+    );
+}
--- a/include/vkd3d_utils.h
+++ b/include/vkd3d_utils.h
@ -30,22 +30,42 @@ extern "C" {
 #define VKD3D_WAIT_FAILED (~0u)
 #define VKD3D_INFINITE (~0u)

-/* 1.0 */
-HANDLE vkd3d_create_event(void);
-HRESULT vkd3d_signal_event(HANDLE event);
-unsigned int vkd3d_wait_event(HANDLE event, unsigned int milliseconds);
-void vkd3d_destroy_event(HANDLE event);
+#ifdef _WIN32
+# ifdef _MSC_VER
+#  define VKD3D_UTILS_EXPORT
+# else
+  /* We need to specify the __declspec(dllexport) attribute
+   * on MinGW because otherwise the stdcall aliases/fixups
+   * don't get exported.
+   */
+#  ifdef VKD3D_UTILS_EXPORTS
+#   define VKD3D_UTILS_EXPORT __declspec(dllexport)
+#  else
+#   define VKD3D_UTILS_EXPORT __declspec(dllimport)
+#  endif
+# endif
+#elif defined(__GNUC__)
+# define VKD3D_UTILS_EXPORT DECLSPEC_VISIBLE
+#else
+# define VKD3D_UTILS_EXPORT
+#endif

-HRESULT WINAPI D3D12CreateDevice(IUnknown *adapter, D3D_FEATURE_LEVEL feature_level, REFIID iid, void **device);
-HRESULT WINAPI D3D12CreateRootSignatureDeserializer(const void *data, SIZE_T data_size, REFIID iid, void **deserializer);
-HRESULT WINAPI D3D12GetDebugInterface(REFIID iid, void **debug);
-HRESULT WINAPI D3D12SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC *desc,
+/* 1.0 */
+VKD3D_UTILS_EXPORT HANDLE vkd3d_create_event(void);
+VKD3D_UTILS_EXPORT HRESULT vkd3d_signal_event(HANDLE event);
+VKD3D_UTILS_EXPORT unsigned int vkd3d_wait_event(HANDLE event, unsigned int milliseconds);
+VKD3D_UTILS_EXPORT void vkd3d_destroy_event(HANDLE event);
+
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12CreateDevice(IUnknown *adapter, D3D_FEATURE_LEVEL feature_level, REFIID iid, void **device);
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12CreateRootSignatureDeserializer(const void *data, SIZE_T data_size, REFIID iid, void **deserializer);
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12GetDebugInterface(REFIID iid, void **debug);
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC *desc,
        D3D_ROOT_SIGNATURE_VERSION version, ID3DBlob **blob, ID3DBlob **error_blob);

 /* 1.2 */
-HRESULT WINAPI D3D12CreateVersionedRootSignatureDeserializer(const void *data,
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12CreateVersionedRootSignatureDeserializer(const void *data,
        SIZE_T data_size, REFIID iid, void **deserializer);
-HRESULT WINAPI D3D12SerializeVersionedRootSignature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc,
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12SerializeVersionedRootSignature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc,
        ID3DBlob **blob, ID3DBlob **error_blob);

 #ifdef __cplusplus
--- a/include/vkd3d_vk_includes.h
+++ b/include/vkd3d_vk_includes.h
@ -0,0 +1,58 @@
+/*
+ * * Copyright 2021 NVIDIA Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+#ifndef __VKD3D_VK_INCLUDES_H
+#define __VKD3D_VK_INCLUDES_H
+
+#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
+    typedef struct VkCuFunctionNVX_T *VkCuFunctionNVX;
+    typedef struct VkCuModuleNVX_T *VkCuModuleNVX;
+#else
+    typedef UINT64 VkCuFunctionNVX;
+    typedef UINT64 VkCuModuleNVX;
+#endif 
+
+typedef struct VkPhysicalDevice_T *VkPhysicalDevice;
+typedef struct VkCommandBuffer_T *VkCommandBuffer;
+typedef struct VkInstance_T *VkInstance;
+typedef struct VkDevice_T *VkDevice;
+
+typedef enum D3D12_VK_EXTENSION
+{
+    D3D12_VK_NVX_BINARY_IMPORT      = 0x1,
+    D3D12_VK_NVX_IMAGE_VIEW_HANDLE  = 0x2
+} D3D12_VK_EXTENSION;
+
+typedef struct D3D12_CUBIN_DATA_HANDLE
+{
+    VkCuFunctionNVX vkCuFunction;
+    VkCuModuleNVX vkCuModule;
+    UINT32 blockX;
+    UINT32 blockY;
+    UINT32 blockZ;
+} D3D12_CUBIN_DATA_HANDLE;
+
+typedef struct D3D12_UAV_INFO
+{
+    UINT32 version;
+    UINT32 surfaceHandle;
+    UINT64 gpuVAStart;
+    UINT64 gpuVASize;  
+} D3D12_UAV_INFO;
+
+#endif  // __VKD3D_VK_INCLUDES_H
+
--- a/include/vkd3d_win32.h
+++ b/include/vkd3d_win32.h
@ -0,0 +1,86 @@
+/*
+ * Copyright 2020 Joshua Ashton for Valve Software
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ *
+ */
+
+#ifndef __VKD3D_WIN32_H
+#define __VKD3D_WIN32_H
+
+/* Hack for MinGW-w64 headers.
+ *
+ * We want to use WIDL C inline wrappers because some methods
+ * in D3D12 interfaces return aggregate objects. Unfortunately,
+ * WIDL C inline wrappers are broken when used with MinGW-w64
+ * headers because FORCEINLINE expands to extern inline
+ * which leads to the "multiple storage classes in declaration
+ * specifiers" compiler error.
+ *
+ * This hack will define static to be meaningless when these
+ * headers are included, which are the only things declared
+ * static.
+ */
+#ifdef __MINGW32__
+# define static
+#endif
+
+#define INITGUID
+#define COBJMACROS
+#define WIDL_C_INLINE_WRAPPERS
+#include <vkd3d_windows.h>
+
+/* Vulkan headers include static const declarations. Enable static keyword for
+ * them.
+ */
+#ifdef __MINGW32__
+# undef static
+#endif
+
+#define VK_USE_PLATFORM_WIN32_KHR
+#include <vulkan/vulkan.h>
+#include "private/vulkan_private_extensions.h"
+
+#ifdef __MINGW32__
+# define static
+#endif
+
+#include <dxgi1_6.h>
+
+/* We already included regular DXGI...
+ * let's not redefine everything under a new header
+ */
+#define __vkd3d_dxgibase_h__
+#define __vkd3d_dxgi_h__
+#define __vkd3d_dxgi1_2_h__
+#define __vkd3d_dxgi1_3_h__
+#define __vkd3d_dxgi1_4_h__
+
+#include <vkd3d_swapchain_factory.h>
+#include <vkd3d_command_list_vkd3d_ext.h>
+#include <vkd3d_device_vkd3d_ext.h>
+#include <vkd3d_d3d12.h>
+#include <vkd3d_d3d12sdklayers.h>
+
+/* End of MinGW hack. All Windows headers have been included */
+#ifdef __MINGW32__
+# undef static
+#endif
+
+#define VKD3D_NO_WIN32_TYPES
+#define VKD3D_NO_VULKAN_H
+#include <vkd3d.h>
+
+#endif
--- a/include/vkd3d_windows.h
+++ b/include/vkd3d_windows.h
@ -22,21 +22,12 @@

 /* Nameless unions */
 #ifndef __C89_NAMELESS
-# ifdef NONAMELESSUNION
-#  define __C89_NAMELESS
-#  define __C89_NAMELESSUNIONNAME u
-# else
 #  define __C89_NAMELESS
 #  define __C89_NAMELESSUNIONNAME
-# endif /* NONAMELESSUNION */
 #endif  /* __C89_NAMELESS */

 #if !defined(_WIN32) || defined(__WIDL__)

-# if !defined(__WIDL__) && !defined(VKD3D_WIN32_WCHAR)
-#  include <wchar.h>
-# endif
-
 # ifdef __GNUC__
 #  define DECLSPEC_ALIGN(x) __attribute__((aligned(x)))
 # endif
@ -86,18 +77,20 @@ typedef unsigned long UINT64;
 typedef long long DECLSPEC_ALIGN(8) INT64;
 typedef unsigned long long DECLSPEC_ALIGN(8) UINT64;
 # endif
+typedef INT64 LONG64;
 typedef long LONG_PTR;
 typedef unsigned long ULONG_PTR;

 typedef ULONG_PTR SIZE_T;

-# ifdef VKD3D_WIN32_WCHAR
 typedef unsigned short WCHAR;
-# else
-typedef wchar_t WCHAR;
-# endif /* VKD3D_WIN32_WCHAR */
 typedef void *HANDLE;

+typedef const WCHAR* LPCWSTR;
+
+#define _fseeki64(a, b, c) fseeko64(a, b, c)
+#define _ftelli64(a) ftello64(a)
+
 /* GUID */
 # ifdef __WIDL__
 typedef struct
@ -122,18 +115,18 @@ typedef GUID IID;
 # ifdef INITGUID
 #  ifndef __cplusplus
 #   define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
-        const GUID name DECLSPEC_HIDDEN; \
+        const GUID name; \
        const GUID name = \
    { l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 }}
 #  else
 #   define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
-        EXTERN_C const GUID name DECLSPEC_HIDDEN; \
+        EXTERN_C const GUID name; \
        EXTERN_C const GUID name = \
    { l, w1, w2, { b1, b2, b3, b4, b5, b6, b7, b8 }}
 #  endif
 # else
 #  define DEFINE_GUID(name, l, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
-        EXTERN_C const GUID name DECLSPEC_HIDDEN;
+        EXTERN_C const GUID name;
 # endif /* INITGUID */

 /* __uuidof emulation */
@ -166,7 +159,6 @@ extern "C++"
 typedef struct SECURITY_ATTRIBUTES SECURITY_ATTRIBUTES;
 #endif  /* !defined(_WIN32) || defined(__WIDL__) */

-
 #ifndef _WIN32
 # include <stddef.h>
 # include <stdlib.h>
@ -233,18 +225,6 @@ typedef struct SECURITY_ATTRIBUTES SECURITY_ATTRIBUTES;

 #endif  /* _WIN32 */

-
-/* Define DECLSPEC_HIDDEN */
-#ifndef DECLSPEC_HIDDEN
-# if defined(__MINGW32__)
-#  define DECLSPEC_HIDDEN
-# elif defined(__GNUC__)
-#  define DECLSPEC_HIDDEN __attribute__((visibility("hidden")))
-# else
-#  define DECLSPEC_HIDDEN
-# endif
-#endif  /* DECLSPEC_HIDDEN */
-
 /* Define min() & max() macros */
 #ifndef NOMINMAX
 # ifndef min
--- a/libs/d3d12/d3d12.def
+++ b/libs/d3d12/d3d12.def
@ -0,0 +1,11 @@
+LIBRARY d3d12.dll
+
+EXPORTS
+    D3D12CreateDevice @101
+    D3D12GetDebugInterface @102
+    D3D12CreateRootSignatureDeserializer
+    D3D12CreateVersionedRootSignatureDeserializer
+
+    D3D12EnableExperimentalFeatures
+    D3D12SerializeRootSignature
+    D3D12SerializeVersionedRootSignature
--- a/libs/d3d12/main.c
+++ b/libs/d3d12/main.c
@ -0,0 +1,364 @@
+/*
+ * Copyright 2018 Józef Kucia for CodeWeavers
+ * Copyright 2020 Joshua Ashton for Valve Software
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ *
+ */
+
+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
+#define VK_NO_PROTOTYPES
+#include "vkd3d_win32.h"
+#include "vkd3d_atomic.h"
+#include "vkd3d_debug.h"
+#include "vkd3d_threads.h"
+
+/* We need to specify the __declspec(dllexport) attribute
+ * on MinGW because otherwise the stdcall aliases/fixups
+ * don't get exported.
+ */
+#if defined(_MSC_VER)
+  #define DLLEXPORT
+#else
+  #define DLLEXPORT __declspec(dllexport)
+#endif
+
+static pthread_once_t library_once = PTHREAD_ONCE_INIT;
+static HMODULE vulkan_module = NULL;
+
+static void load_vulkan_once(void)
+{
+    if (!vulkan_module)
+        vulkan_module = LoadLibraryA("vulkan-1.dll");
+}
+
+static PFN_vkGetInstanceProcAddr load_vulkan(void)
+{
+    pthread_once(&library_once, load_vulkan_once);
+    if (vulkan_module)
+        return (void *)GetProcAddress(vulkan_module, "vkGetInstanceProcAddr");
+    else
+        return NULL;
+}
+
+HRESULT WINAPI DLLEXPORT D3D12GetDebugInterface(REFIID iid, void **debug)
+{
+    TRACE("iid %s, debug %p.\n", debugstr_guid(iid), debug);
+
+    WARN("Returning DXGI_ERROR_SDK_COMPONENT_MISSING.\n");
+    return DXGI_ERROR_SDK_COMPONENT_MISSING;
+}
+
+HRESULT WINAPI DLLEXPORT D3D12EnableExperimentalFeatures(UINT feature_count,
+        const IID *iids, void *configurations, UINT *configurations_sizes)
+{
+    FIXME("feature_count %u, iids %p, configurations %p, configurations_sizes %p stub!\n",
+            feature_count, iids, configurations, configurations_sizes);
+
+    return E_NOINTERFACE;
+}
+
+static HRESULT d3d12_signal_event(HANDLE event)
+{
+    return SetEvent(event) ? S_OK : E_FAIL;
+}
+
+struct d3d12_thread_data
+{
+    PFN_vkd3d_thread main_pfn;
+    void *data;
+};
+
+static DWORD WINAPI d3d12_thread_main(void *data)
+{
+    struct d3d12_thread_data *thread_data = data;
+
+    thread_data->main_pfn(thread_data->data);
+    free(thread_data);
+    return 0;
+}
+
+static void *d3d12_create_thread(PFN_vkd3d_thread main_pfn, void *data)
+{
+    struct d3d12_thread_data *thread_data;
+    HANDLE thread;
+
+    if (!(thread_data = malloc(sizeof(*thread_data))))
+    {
+        ERR("Failed to allocate thread data.\n");
+        return NULL;
+    }
+
+    thread_data->main_pfn = main_pfn;
+    thread_data->data = data;
+
+    if (!(thread = CreateThread(NULL, 0, d3d12_thread_main, thread_data, 0, NULL)))
+        free(thread_data);
+
+    return thread;
+}
+
+static HRESULT d3d12_join_thread(void *handle)
+{
+    HANDLE thread = handle;
+    DWORD ret;
+
+    if ((ret = WaitForSingleObject(thread, INFINITE)) != WAIT_OBJECT_0)
+    {
+        ERR("Failed to wait for thread, ret %#x.\n", ret);
+    }
+    CloseHandle(thread);
+    return ret == WAIT_OBJECT_0 ? S_OK : E_FAIL;
+}
+
+static HRESULT d3d12_get_adapter(IDXGIAdapter **dxgi_adapter, IUnknown *adapter)
+{
+    IDXGIFactory4 *factory = NULL;
+    HRESULT hr;
+
+    if (!adapter)
+    {
+        if (FAILED(hr = CreateDXGIFactory1(&IID_IDXGIFactory4, (void **)&factory)))
+        {
+            WARN("Failed to create DXGI factory, hr %#x.\n", hr);
+            goto done;
+        }
+
+        if (FAILED(hr = IDXGIFactory4_EnumAdapters(factory, 0, dxgi_adapter)))
+        {
+            WARN("Failed to enumerate primary adapter, hr %#x.\n", hr);
+            goto done;
+        }
+    }
+    else
+    {
+        if (FAILED(hr = IUnknown_QueryInterface(adapter, &IID_IDXGIAdapter, (void **)dxgi_adapter)))
+        {
+            WARN("Invalid adapter %p, hr %#x.\n", adapter, hr);
+            goto done;
+        }
+    }
+
+done:
+    if (factory)
+        IDXGIFactory4_Release(factory);
+
+    return hr;
+}
+
+static VkPhysicalDevice d3d12_find_physical_device(struct vkd3d_instance *instance,
+        PFN_vkGetInstanceProcAddr pfn_vkGetInstanceProcAddr, struct DXGI_ADAPTER_DESC *adapter_desc)
+{
+    PFN_vkGetPhysicalDeviceProperties2 pfn_vkGetPhysicalDeviceProperties2;
+    PFN_vkGetPhysicalDeviceProperties pfn_vkGetPhysicalDeviceProperties;
+    PFN_vkEnumeratePhysicalDevices pfn_vkEnumeratePhysicalDevices;
+    VkPhysicalDevice vk_physical_device = VK_NULL_HANDLE;
+    VkPhysicalDeviceIDProperties id_properties;
+    VkPhysicalDeviceProperties2 properties2;
+    VkPhysicalDevice *vk_physical_devices;
+    VkInstance vk_instance;
+    unsigned int i;
+    uint32_t count;
+    VkResult vr;
+
+    vk_instance = vkd3d_instance_get_vk_instance(instance);
+
+    pfn_vkEnumeratePhysicalDevices = (void *)pfn_vkGetInstanceProcAddr(vk_instance, "vkEnumeratePhysicalDevices");
+    pfn_vkGetPhysicalDeviceProperties = (void *)pfn_vkGetInstanceProcAddr(vk_instance, "vkGetPhysicalDeviceProperties");
+    pfn_vkGetPhysicalDeviceProperties2 = (void *)pfn_vkGetInstanceProcAddr(vk_instance, "vkGetPhysicalDeviceProperties2");
+
+    if ((vr = pfn_vkEnumeratePhysicalDevices(vk_instance, &count, NULL)) < 0)
+    {
+        WARN("Failed to get device count, vr %d.\n", vr);
+        return VK_NULL_HANDLE;
+    }
+    if (!count)
+    {
+        WARN("No physical device available.\n");
+        return VK_NULL_HANDLE;
+    }
+
+    if (!(vk_physical_devices = calloc(count, sizeof(*vk_physical_devices))))
+        return VK_NULL_HANDLE;
+
+    if ((vr = pfn_vkEnumeratePhysicalDevices(vk_instance, &count, vk_physical_devices)) < 0)
+        goto done;
+
+    TRACE("Matching adapters by LUIDs.\n");
+
+    for (i = 0; i < count; ++i)
+    {
+        pfn_vkGetPhysicalDeviceProperties(vk_physical_devices[i], &properties2.properties);
+
+        /* Skip over physical devices below our minimum API version */
+        if (properties2.properties.apiVersion < VKD3D_MIN_API_VERSION)
+        {
+            WARN("Skipped adapter %s as it is below our minimum API version.", properties2.properties.deviceName);
+            continue;
+        }
+
+        id_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES;
+        id_properties.pNext = NULL;
+
+        properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+        properties2.pNext = &id_properties;
+
+        pfn_vkGetPhysicalDeviceProperties2(vk_physical_devices[i], &properties2);
+
+        if (id_properties.deviceLUIDValid && !memcmp(id_properties.deviceLUID, &adapter_desc->AdapterLuid, VK_LUID_SIZE))
+        {
+            vk_physical_device = vk_physical_devices[i];
+            break;
+        }
+    }
+
+    if (!vk_physical_device)
+    {
+        TRACE("Matching adapters by PCI IDs.\n");
+
+        for (i = 0; i < count; ++i)
+        {
+            pfn_vkGetPhysicalDeviceProperties(vk_physical_devices[i], &properties2.properties);
+
+            if (properties2.properties.deviceID == adapter_desc->DeviceId &&
+                properties2.properties.vendorID == adapter_desc->VendorId)
+            {
+                vk_physical_device = vk_physical_devices[i];
+                break;
+            }
+        }
+    }
+
+    if (!vk_physical_device)
+    {
+        FIXME("Could not find Vulkan physical device for DXGI adapter.\n");
+        WARN("Using first available physical device...\n");
+        vk_physical_device = vk_physical_devices[0];
+    }
+
+done:
+    free(vk_physical_devices);
+    return vk_physical_device;
+}
+
+HRESULT WINAPI DLLEXPORT D3D12CreateDevice(IUnknown *adapter, D3D_FEATURE_LEVEL minimum_feature_level,
+        REFIID iid, void **device)
+{
+    struct vkd3d_instance_create_info instance_create_info;
+    PFN_vkGetInstanceProcAddr pfn_vkGetInstanceProcAddr;
+    struct vkd3d_device_create_info device_create_info;
+    struct DXGI_ADAPTER_DESC adapter_desc;
+    struct vkd3d_instance *instance;
+    IDXGIAdapter *dxgi_adapter;
+    HRESULT hr;
+
+    static const char * const instance_extensions[] =
+    {
+        VK_KHR_SURFACE_EXTENSION_NAME,
+        VK_KHR_WIN32_SURFACE_EXTENSION_NAME,
+    };
+    static const char * const device_extensions[] =
+    {
+        VK_KHR_SWAPCHAIN_EXTENSION_NAME,
+    };
+
+    TRACE("adapter %p, minimum_feature_level %#x, iid %s, device %p.\n",
+            adapter, minimum_feature_level, debugstr_guid(iid), device);
+
+    if (!(pfn_vkGetInstanceProcAddr = load_vulkan()))
+    {
+        ERR("Failed to load Vulkan library.\n");
+        return E_FAIL;
+    }
+
+    if (FAILED(hr = d3d12_get_adapter(&dxgi_adapter, adapter)))
+        return hr;
+
+    if (FAILED(hr = IDXGIAdapter_GetDesc(dxgi_adapter, &adapter_desc)))
+    {
+        WARN("Failed to get adapter desc, hr %#x.\n", hr);
+        goto done;
+    }
+
+    instance_create_info.pfn_signal_event = d3d12_signal_event;
+    instance_create_info.pfn_create_thread = d3d12_create_thread;
+    instance_create_info.pfn_join_thread = d3d12_join_thread;
+    instance_create_info.pfn_vkGetInstanceProcAddr = pfn_vkGetInstanceProcAddr;
+    instance_create_info.instance_extensions = instance_extensions;
+    instance_create_info.instance_extension_count = ARRAYSIZE(instance_extensions);
+    instance_create_info.optional_instance_extensions = NULL;
+    instance_create_info.optional_instance_extension_count = 0;
+
+    if (FAILED(hr = vkd3d_create_instance(&instance_create_info, &instance)))
+    {
+        WARN("Failed to create vkd3d instance, hr %#x.\n", hr);
+        goto done;
+    }
+
+    device_create_info.minimum_feature_level = minimum_feature_level;
+    device_create_info.instance = instance;
+    device_create_info.instance_create_info = NULL;
+    device_create_info.vk_physical_device = d3d12_find_physical_device(instance, pfn_vkGetInstanceProcAddr, &adapter_desc);
+    device_create_info.device_extensions = device_extensions;
+    device_create_info.device_extension_count = ARRAYSIZE(device_extensions);
+    device_create_info.optional_device_extensions = NULL;
+    device_create_info.optional_device_extension_count = 0;
+    device_create_info.parent = (IUnknown *)dxgi_adapter;
+    memcpy(&device_create_info.adapter_luid, &adapter_desc.AdapterLuid, VK_LUID_SIZE);
+
+    hr = vkd3d_create_device(&device_create_info, iid, device);
+
+    vkd3d_instance_decref(instance);
+
+done:
+    IDXGIAdapter_Release(dxgi_adapter);
+    return hr;
+}
+
+HRESULT WINAPI DLLEXPORT D3D12CreateRootSignatureDeserializer(const void *data, SIZE_T data_size,
+        REFIID iid, void **deserializer)
+{
+    TRACE("data %p, data_size %lu, iid %s, deserializer %p.\n",
+            data, data_size, debugstr_guid(iid), deserializer);
+
+    return vkd3d_create_root_signature_deserializer(data, data_size, iid, deserializer);
+}
+
+HRESULT WINAPI DLLEXPORT D3D12SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC *root_signature_desc,
+        D3D_ROOT_SIGNATURE_VERSION version, ID3DBlob **blob, ID3DBlob **error_blob)
+{
+    TRACE("root_signature_desc %p, version %#x, blob %p, error_blob %p.\n",
+            root_signature_desc, version, blob, error_blob);
+
+    return vkd3d_serialize_root_signature(root_signature_desc, version, blob, error_blob);
+}
+
+HRESULT WINAPI DLLEXPORT D3D12CreateVersionedRootSignatureDeserializer(const void *data, SIZE_T data_size,
+        REFIID iid, void **deserializer)
+{
+    TRACE("data %p, data_size %lu, iid %s, deserializer %p.\n",
+            data, data_size, debugstr_guid(iid), deserializer);
+
+    return vkd3d_create_versioned_root_signature_deserializer(data, data_size, iid, deserializer);
+}
+
+HRESULT WINAPI DLLEXPORT D3D12SerializeVersionedRootSignature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc,
+        ID3DBlob **blob, ID3DBlob **error_blob)
+{
+    TRACE("desc %p, blob %p, error_blob %p.\n", desc, blob, error_blob);
+
+    return vkd3d_serialize_versioned_root_signature(desc, blob, error_blob);
+}
--- a/libs/d3d12/meson.build
+++ b/libs/d3d12/meson.build
@ -0,0 +1,16 @@
+d3d12_src = [
+  'main.c'
+]
+
+d3d12_lib = shared_library('d3d12', d3d12_src,
+  name_prefix         : '', # libd3d12.dll -> d3d12.dll
+  dependencies        : [ vkd3d_dep, lib_dxgi ],
+  include_directories : vkd3d_private_includes,
+  install             : true,
+  objects             : not vkd3d_is_msvc ? 'd3d12.def' : [],
+  vs_module_defs      : 'd3d12.def',
+  override_options    : [ 'c_std='+vkd3d_c_std ])
+
+d3d12_dep = declare_dependency(
+  link_with           : d3d12_lib,
+  include_directories : vkd3d_public_includes)
--- a/libs/meson.build
+++ b/libs/meson.build
@ -0,0 +1,7 @@
+subdir('vkd3d-common')
+subdir('vkd3d-shader')
+subdir('vkd3d')
+subdir('vkd3d-utils')
+if enable_d3d12
+  subdir('d3d12')
+endif
--- a/libs/vkd3d-common/debug.c
+++ b/libs/vkd3d-common/debug.c
@ -16,7 +16,11 @@
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_COUNT
 #include "vkd3d_debug.h"
+#include "vkd3d_threads.h"
+
+#include "vkd3d_platform.h"

 #include <assert.h>
 #include <ctype.h>
@ -27,60 +31,103 @@
 #include <stdlib.h>
 #include <stdbool.h>
 #include <string.h>
+#include <stdio.h>

 #define VKD3D_DEBUG_BUFFER_COUNT 64
 #define VKD3D_DEBUG_BUFFER_SIZE 512

-extern const char *vkd3d_dbg_env_name DECLSPEC_HIDDEN;
-
 static const char *debug_level_names[] =
 {
-    /* VKD3D_DBG_LEVEL_NONE  */ "none",
-    /* VKD3D_DBG_LEVEL_ERR   */ "err",
-    /* VKD3D_DBG_LEVEL_FIXME */ "fixme",
-    /* VKD3D_DBG_LEVEL_WARN  */ "warn",
-    /* VKD3D_DBG_LEVEL_TRACE */ "trace",
+    /* VKD3D_DBG_LEVEL_UNKNOWN */ NULL,
+    /* VKD3D_DBG_LEVEL_NONE    */ "none",
+    /* VKD3D_DBG_LEVEL_ERR     */ "err",
+    /* VKD3D_DBG_LEVEL_INFO    */ "info",
+    /* VKD3D_DBG_LEVEL_FIXME   */ "fixme",
+    /* VKD3D_DBG_LEVEL_WARN    */ "warn",
+    /* VKD3D_DBG_LEVEL_TRACE   */ "trace",
 };

-enum vkd3d_dbg_level vkd3d_dbg_get_level(void)
+static const char *env_for_channel[] =
 {
-    static unsigned int level = ~0u;
-    const char *vkd3d_debug;
-    unsigned int i;
+    /* VKD3D_DBG_CHANNEL_API    */ "VKD3D_DEBUG",
+    /* VKD3D_DBG_CHANNEL_SHADER */ "VKD3D_SHADER_DEBUG",
+};

-    if (level != ~0u)
-        return level;
+static unsigned int vkd3d_dbg_level[VKD3D_DBG_CHANNEL_COUNT];
+static spinlock_t vkd3d_dbg_initialized;
+static pthread_once_t vkd3d_dbg_once = PTHREAD_ONCE_INIT;
+static FILE *vkd3d_log_file;

-    if (!(vkd3d_debug = getenv(vkd3d_dbg_env_name)))
-        vkd3d_debug = "";
+static void vkd3d_dbg_init_once(void)
+{
+    char vkd3d_debug[VKD3D_PATH_MAX];
+    unsigned int channel, i;

-    for (i = 0; i < ARRAY_SIZE(debug_level_names); ++i)
+    for (channel = 0; channel < VKD3D_DBG_CHANNEL_COUNT; channel++)
    {
-        if (!strcmp(debug_level_names[i], vkd3d_debug))
+        if (!vkd3d_get_env_var(env_for_channel[channel], vkd3d_debug, sizeof(vkd3d_debug)))
+            strncpy(vkd3d_debug, "", VKD3D_PATH_MAX);
+
+        for (i = 1; i < ARRAY_SIZE(debug_level_names); ++i)
+            if (!strcmp(debug_level_names[i], vkd3d_debug))
+                vkd3d_dbg_level[channel] = i;
+
+        /* Default debug level. */
+        if (vkd3d_dbg_level[channel] == VKD3D_DBG_LEVEL_UNKNOWN)
+            vkd3d_dbg_level[channel] = VKD3D_DBG_LEVEL_FIXME;
+    }
+
+    if (vkd3d_get_env_var("VKD3D_LOG_FILE", vkd3d_debug, sizeof(vkd3d_debug)))
+    {
+        vkd3d_log_file = fopen(vkd3d_debug, "w");
+        if (!vkd3d_log_file)
        {
-            level = i;
-            return level;
+            fprintf(stderr, "Failed to open log file: %s!\n", vkd3d_debug);
+            fflush(stderr);
        }
    }

-    /* Default debug level. */
-    level = VKD3D_DBG_LEVEL_FIXME;
-    return level;
+    vkd3d_atomic_uint32_store_explicit(&vkd3d_dbg_initialized, 1, vkd3d_memory_order_release);
 }

-void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...)
+static inline void vkd3d_dbg_init(void)
 {
+    /* Early out since we're going to be spamming calls to vkd3d_dbg_init() for every trace call. */
+    if (!vkd3d_atomic_uint32_load_explicit(&vkd3d_dbg_initialized, vkd3d_memory_order_acquire))
+        pthread_once(&vkd3d_dbg_once, vkd3d_dbg_init_once);
+}
+
+enum vkd3d_dbg_level vkd3d_dbg_get_level(enum vkd3d_dbg_channel channel)
+{
+    vkd3d_dbg_init();
+    if (channel >= VKD3D_DBG_CHANNEL_COUNT)
+        return VKD3D_DBG_LEVEL_FIXME;
+    assert(vkd3d_dbg_level[channel] != VKD3D_DBG_LEVEL_UNKNOWN);
+    return vkd3d_dbg_level[channel];
+}
+
+void vkd3d_dbg_printf(enum vkd3d_dbg_channel channel, enum vkd3d_dbg_level level, const char *function, const char *fmt, ...)
+{
+    static spinlock_t spin;
+    unsigned int tid;
+    FILE *log_file;
    va_list args;

-    if (vkd3d_dbg_get_level() < level)
+    if (vkd3d_dbg_get_level(channel) < level)
        return;

+    log_file = vkd3d_log_file ? vkd3d_log_file : stderr;
    assert(level < ARRAY_SIZE(debug_level_names));

-    fprintf(stderr, "%s:%s: ", debug_level_names[level], function);
+    tid = vkd3d_get_current_thread_id();
+
    va_start(args, fmt);
-    vfprintf(stderr, fmt, args);
+    spinlock_acquire(&spin);
+    fprintf(log_file, "%04x:%s:%s: ", tid, debug_level_names[level], function);
+    vfprintf(log_file, fmt, args);
+    spinlock_release(&spin);
    va_end(args);
+    fflush(log_file);
 }

 static char *get_buffer(void)
@ -174,10 +221,10 @@ const char *debugstr_a(const char *str)
    return buffer;
 }

-static const char *debugstr_w16(const uint16_t *wstr)
+const char *debugstr_w(const WCHAR *wstr)
 {
    char *buffer, *ptr;
-    uint16_t c;
+    WCHAR c;

    if (!wstr)
        return "(null)";
@ -234,80 +281,13 @@ static const char *debugstr_w16(const uint16_t *wstr)
    return buffer;
 }

-static const char *debugstr_w32(const uint32_t *wstr)
-{
-    char *buffer, *ptr;
-    uint32_t c;
-
-    if (!wstr)
-        return "(null)";
-
-    ptr = buffer = get_buffer();
-
-    *ptr++ = '"';
-    while ((c = *wstr++) && ptr <= buffer + VKD3D_DEBUG_BUFFER_SIZE - 10)
-    {
-        int escape_char;
-
-        switch (c)
-        {
-            case '"':
-            case '\\':
-            case '\n':
-            case '\r':
-            case '\t':
-                escape_char = c;
-                break;
-            default:
-                escape_char = 0;
-                break;
-        }
-
-        if (escape_char)
-        {
-            *ptr++ = '\\';
-            *ptr++ = escape_char;
-            continue;
-        }
-
-        if (isprint(c))
-        {
-            *ptr++ = c;
-        }
-        else
-        {
-            *ptr++ = '\\';
-            sprintf(ptr, "%04x", c);
-            ptr += 4;
-        }
-    }
-    *ptr++ = '"';
-
-    if (c)
-    {
-        *ptr++ = '.';
-        *ptr++ = '.';
-        *ptr++ = '.';
-    }
-    *ptr = '\0';
-
-    return buffer;
-}
-
-const char *debugstr_w(const WCHAR *wstr, size_t wchar_size)
-{
-    if (wchar_size == 2)
-        return debugstr_w16((const uint16_t *)wstr);
-    return debugstr_w32((const uint32_t *)wstr);
-}
-
 unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value)
 {
-    const char *value = getenv(name);
+    char value[VKD3D_PATH_MAX];
    unsigned long r;
    char *end_ptr;

-    if (value)
+    if (vkd3d_get_env_var(name, value, sizeof(value)) && strlen(value) > 0)
    {
        errno = 0;
        r = strtoul(value, &end_ptr, 0);
--- a/libs/vkd3d-common/file_utils.c
+++ b/libs/vkd3d-common/file_utils.c
@ -0,0 +1,188 @@
+/*
+ * Copyright 2022 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
+#include "vkd3d_file_utils.h"
+#include "vkd3d_debug.h"
+
+/* For disk cache. */
+#ifdef _WIN32
+#include <windows.h>
+#include <io.h>
+#else
+#include <unistd.h>
+#include <sys/mman.h>
+#include <errno.h>
+#endif
+
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <stdio.h>
+
+bool vkd3d_file_rename_overwrite(const char *from_path, const char *to_path)
+{
+#ifdef _WIN32
+    DWORD code = ERROR_SUCCESS;
+
+    if (!MoveFileA(from_path, to_path))
+    {
+        code = GetLastError();
+        if (code == ERROR_ALREADY_EXISTS)
+        {
+            code = ERROR_SUCCESS;
+            if (!ReplaceFileA(to_path, from_path, NULL, 0, NULL, NULL))
+                code = GetLastError();
+        }
+    }
+
+    return code == ERROR_SUCCESS;
+#else
+    return rename(from_path, to_path) == 0;
+#endif
+}
+
+bool vkd3d_file_rename_no_replace(const char *from_path, const char *to_path)
+{
+#ifdef _WIN32
+    DWORD code = ERROR_SUCCESS;
+    if (!MoveFileA(from_path, to_path))
+        code = GetLastError();
+    return code == ERROR_SUCCESS;
+#else
+    return renameat2(AT_FDCWD, from_path, AT_FDCWD, to_path, RENAME_NOREPLACE) == 0;
+#endif
+}
+
+bool vkd3d_file_delete(const char *path)
+{
+#ifdef _WIN32
+    DWORD code = ERROR_SUCCESS;
+    if (!DeleteFileA(path))
+        code = GetLastError();
+    return code == ERROR_SUCCESS;
+#else
+    return unlink(path) == 0;
+#endif
+}
+
+FILE *vkd3d_file_open_exclusive_write(const char *path)
+{
+#ifdef _WIN32
+    /* From Fossilize. AFAIK, there is no direct way to make this work with FILE interface, so have to roundtrip
+     * through jank POSIX layer.
+     * wbx kinda works, but Wine warns about it, despite it working anyways.
+     * Older MSVC runtimes do not support wbx. */
+    FILE *file = NULL;
+    int fd;
+    fd = _open(path, _O_BINARY | _O_WRONLY | _O_CREAT | _O_EXCL | _O_TRUNC | _O_SEQUENTIAL,
+            _S_IWRITE | _S_IREAD);
+    if (fd >= 0)
+    {
+        file = _fdopen(fd, "wb");
+        /* _fdopen takes ownership. */
+        if (!file)
+            _close(fd);
+    }
+    return file;
+#else
+    return fopen(path, "wbx");
+#endif
+}
+
+void vkd3d_file_unmap(struct vkd3d_memory_mapped_file *file)
+{
+    if (file->mapped)
+    {
+#ifdef _WIN32
+        UnmapViewOfFile(file->mapped);
+#else
+        munmap(file->mapped, file->mapped_size);
+#endif
+    }
+    memset(file, 0, sizeof(*file));
+}
+
+bool vkd3d_file_map_read_only(const char *path, struct vkd3d_memory_mapped_file *file)
+{
+#ifdef _WIN32
+    DWORD size_hi, size_lo;
+    HANDLE file_mapping;
+    HANDLE handle;
+#else
+    struct stat stat_buf;
+    int fd;
+#endif
+
+    file->mapped = NULL;
+    file->mapped_size = 0;
+
+#ifdef _WIN32
+    handle = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL,
+            OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN,
+            INVALID_HANDLE_VALUE);
+    if (handle == INVALID_HANDLE_VALUE)
+        goto out;
+
+    size_lo = GetFileSize(handle, &size_hi);
+    file->mapped_size = size_lo | (((uint64_t)size_hi) << 32);
+
+    file_mapping = CreateFileMappingA(handle, NULL, PAGE_READONLY, 0, 0, NULL);
+    if (file_mapping == INVALID_HANDLE_VALUE)
+        goto out;
+
+    file->mapped = MapViewOfFile(file_mapping, FILE_MAP_READ, 0, 0, file->mapped_size);
+    CloseHandle(file_mapping);
+    file_mapping = INVALID_HANDLE_VALUE;
+    if (!file->mapped)
+    {
+        ERR("Failed to MapViewOfFile for %s.\n", path);
+        goto out;
+    }
+
+out:
+    if (handle != INVALID_HANDLE_VALUE)
+        CloseHandle(handle);
+#else
+    fd = open(path, O_RDONLY);
+    if (fd < 0)
+        goto out;
+
+    if (fstat(fd, &stat_buf) < 0)
+    {
+        ERR("Failed to fstat pipeline cache.\n");
+        goto out;
+    }
+
+    /* Map private to make sure we get CoW behavior in case someone clobbers
+     * the cache while in flight. We need to read data directly out of the cache. */
+    file->mapped = mmap(NULL, stat_buf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (file->mapped != MAP_FAILED)
+        file->mapped_size = stat_buf.st_size;
+    else
+        goto out;
+
+out:
+    if (fd >= 0)
+        close(fd);
+#endif
+
+    if (!file->mapped)
+        file->mapped_size = 0;
+    return file->mapped != NULL;
+}
--- a/libs/vkd3d-common/memory.c
+++ b/libs/vkd3d-common/memory.c
@ -17,6 +17,8 @@
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
 #include "vkd3d_memory.h"

 bool vkd3d_array_reserve(void **elements, size_t *capacity, size_t element_count, size_t element_size)
--- a/libs/vkd3d-common/meson.build
+++ b/libs/vkd3d-common/meson.build
@ -0,0 +1,17 @@
+vkd3d_common_src = [
+  'debug.c',
+  'memory.c',
+  'utf8.c',
+  'profiling.c',
+  'string.c',
+  'file_utils.c',
+  'platform.c',
+]
+
+vkd3d_common_lib = static_library('vkd3d_common', vkd3d_common_src, vkd3d_header_files,
+  include_directories : vkd3d_private_includes,
+  override_options    : [ 'c_std='+vkd3d_c_std ])
+
+vkd3d_common_dep = declare_dependency(
+  link_with           : vkd3d_common_lib,
+  include_directories : [ vkd3d_public_includes, vkd3d_common_lib.private_dir_include() ])
--- a/libs/vkd3d-common/platform.c
+++ b/libs/vkd3d-common/platform.c
@ -0,0 +1,198 @@
+/*
+ * Copyright 2020 Joshua Ashton for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#include "vkd3d_platform.h"
+
+#include <assert.h>
+#include <stdio.h>
+
+#if defined(__linux__)
+
+# include <dlfcn.h>
+# include <errno.h>
+
+vkd3d_module_t vkd3d_dlopen(const char *name)
+{
+    return dlopen(name, RTLD_NOW);
+}
+
+void *vkd3d_dlsym(vkd3d_module_t handle, const char *symbol)
+{
+    return dlsym(handle, symbol);
+}
+
+int vkd3d_dlclose(vkd3d_module_t handle)
+{
+    return dlclose(handle);
+}
+
+const char *vkd3d_dlerror(void)
+{
+    return dlerror();
+}
+
+bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX])
+{
+    char *name, *p, *real_path = NULL;
+
+    if ((name = strrchr(program_invocation_name, '/')))
+    {
+        real_path = realpath("/proc/self/exe", NULL);
+
+        /* Try to strip command line arguments. */
+        if (real_path && (p = strrchr(real_path, '/'))
+                && !strncmp(real_path, program_invocation_name, strlen(real_path)))
+        {
+            name = p;
+        }
+
+        ++name;
+    }
+    else if ((name = strrchr(program_invocation_name, '\\')))
+    {
+        ++name;
+    }
+    else
+    {
+        name = program_invocation_name;
+    }
+
+    strncpy(program_name, name, VKD3D_PATH_MAX);
+    program_name[VKD3D_PATH_MAX - 1] = '\0';
+    free(real_path);
+    return true;
+}
+
+#elif defined(_WIN32)
+
+# include <windows.h>
+
+vkd3d_module_t vkd3d_dlopen(const char *name)
+{
+    return LoadLibraryA(name);
+}
+
+void *vkd3d_dlsym(vkd3d_module_t handle, const char *symbol)
+{
+    return GetProcAddress(handle, symbol);
+}
+
+int vkd3d_dlclose(vkd3d_module_t handle)
+{
+    FreeLibrary(handle);
+    return 0;
+}
+
+const char *vkd3d_dlerror(void)
+{
+    return "Not implemented for this platform.";
+}
+
+bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX])
+{
+    char *name;
+    char exe_path[VKD3D_PATH_MAX];
+    GetModuleFileNameA(NULL, exe_path, VKD3D_PATH_MAX);
+
+    if ((name = strrchr(exe_path, '/')))
+    {
+        ++name;
+    }
+    else if ((name = strrchr(exe_path, '\\')))
+    {
+        ++name;
+    }
+    else
+    {
+        name = exe_path;
+    }
+
+    strncpy(program_name, name, VKD3D_PATH_MAX);
+    return true;
+}
+
+#else
+
+vkd3d_module_t vkd3d_dlopen(const char *name)
+{
+    FIXME("Not implemented for this platform.\n");
+    return NULL;
+}
+
+void *vkd3d_dlsym(vkd3d_module_t handle, const char *symbol)
+{
+    return NULL;
+}
+
+int vkd3d_dlclose(vkd3d_module_t handle)
+{
+    return 0;
+}
+
+const char *vkd3d_dlerror(void)
+{
+    return "Not implemented for this platform.";
+}
+
+bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX])
+{
+    *program_name = '\0';
+    return false;
+}
+
+#endif
+
+#if defined(_WIN32)
+
+bool vkd3d_get_env_var(const char *name, char *value, size_t value_size)
+{
+    DWORD len;
+    
+    assert(value);
+    assert(value_size > 0);
+
+    len = GetEnvironmentVariableA(name, value, value_size);
+    if (len > 0 && len <= value_size)
+    {
+        return true;
+    }
+
+    value[0] = '\0';
+    return false;
+}
+
+#else
+
+bool vkd3d_get_env_var(const char *name, char *value, size_t value_size)
+{
+    const char *env_value;
+
+    assert(value);
+    assert(value_size > 0);
+
+    if ((env_value = getenv(name)))
+    {
+        snprintf(value, value_size, "%s", env_value);
+        return true;
+    }
+
+    value[0] = '\0';
+    return false;
+}
+
+#endif
--- a/libs/vkd3d-common/profiling.c
+++ b/libs/vkd3d-common/profiling.c
@ -0,0 +1,198 @@
+/*
+ * Copyright 2020 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifdef VKD3D_ENABLE_PROFILING
+
+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
+#include "vkd3d_profiling.h"
+#include "vkd3d_platform.h"
+#include "vkd3d_threads.h"
+#include "vkd3d_debug.h"
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+#ifdef _WIN32
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#else
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+
+static pthread_once_t profiling_block_once = PTHREAD_ONCE_INIT;
+static unsigned int profiling_region_count;
+static spinlock_t profiling_lock;
+
+struct vkd3d_profiling_block
+{
+    uint64_t ticks_total;
+    uint64_t iteration_total;
+    char name[64 - 2 * sizeof(uint64_t)];
+};
+
+static struct vkd3d_profiling_block *mapped_blocks;
+
+#define VKD3D_MAX_PROFILING_REGIONS 256
+static spinlock_t region_locks[VKD3D_MAX_PROFILING_REGIONS];
+
+#ifdef _WIN32
+static void vkd3d_init_profiling_path(const char *path)
+{
+    HANDLE profiling_fd;
+    HANDLE file_view;
+    char path_pid[_MAX_PATH];
+
+    snprintf(path_pid, sizeof(path_pid), "%s.%u", path, GetCurrentProcessId());
+    profiling_fd = CreateFileA(path_pid, GENERIC_READ | GENERIC_WRITE,
+            FILE_SHARE_READ, NULL, CREATE_ALWAYS,
+            FILE_ATTRIBUTE_NORMAL, INVALID_HANDLE_VALUE);
+
+    if (profiling_fd == INVALID_HANDLE_VALUE)
+    {
+        ERR("Failed to open profiling FD.\n");
+        return;
+    }
+
+    file_view = CreateFileMappingA(profiling_fd, NULL, PAGE_READWRITE, 0,
+            VKD3D_MAX_PROFILING_REGIONS * sizeof(*mapped_blocks), NULL);
+    if (file_view == INVALID_HANDLE_VALUE)
+    {
+        ERR("Failed to create profiling file view.\n");
+        CloseHandle(profiling_fd);
+        return;
+    }
+
+    mapped_blocks = MapViewOfFile(file_view, FILE_MAP_ALL_ACCESS, 0, 0,
+            VKD3D_MAX_PROFILING_REGIONS * sizeof(*mapped_blocks));
+    if (!mapped_blocks)
+        ERR("Failed to map view of file.\n");
+    CloseHandle(file_view);
+    CloseHandle(profiling_fd);
+}
+#else
+static void vkd3d_init_profiling_path(const char *path)
+{
+    int profiling_fd;
+    char path_pid[PATH_MAX];
+
+    snprintf(path_pid, sizeof(path_pid), "%s.%u", path, getpid());
+    profiling_fd = open(path_pid, O_RDWR | O_CREAT, 0644);
+
+    if (profiling_fd >= 0)
+    {
+        if (ftruncate(profiling_fd, VKD3D_MAX_PROFILING_REGIONS * sizeof(*mapped_blocks)) < 0)
+        {
+            ERR("Failed to resize profiling FD.\n");
+            close(profiling_fd);
+            return;
+        }
+        mapped_blocks = mmap(NULL, VKD3D_MAX_PROFILING_REGIONS * sizeof(*mapped_blocks),
+                PROT_READ | PROT_WRITE, MAP_SHARED, profiling_fd, 0);
+        if (!mapped_blocks)
+        {
+            ERR("Failed to map block.\n");
+            close(profiling_fd);
+            return;
+        }
+        memset(mapped_blocks, 0, VKD3D_MAX_PROFILING_REGIONS * sizeof(*mapped_blocks));
+        close(profiling_fd);
+    }
+    else
+    {
+        ERR("Failed to open profiling FD.\n");
+    }
+}
+#endif
+
+static void vkd3d_init_profiling_once(void)
+{
+    char path[VKD3D_PATH_MAX];
+
+    vkd3d_get_env_var("VKD3D_PROFILE_PATH", path, sizeof(path));
+    if (strlen(path) > 0)
+        vkd3d_init_profiling_path(path);
+}
+
+void vkd3d_init_profiling(void)
+{
+    pthread_once(&profiling_block_once, vkd3d_init_profiling_once);
+}
+
+bool vkd3d_uses_profiling(void)
+{
+    return mapped_blocks != NULL;
+}
+
+unsigned int vkd3d_profiling_register_region(const char *name, spinlock_t *lock, uint32_t *latch)
+{
+    unsigned int index;
+    if (!mapped_blocks)
+        return 0;
+
+    spinlock_acquire(lock);
+
+    if (*latch == 0)
+    {
+        spinlock_acquire(&profiling_lock);
+        /* Begin at 1, 0 is reserved as a sentinel. */
+        index = ++profiling_region_count;
+        if (index <= VKD3D_MAX_PROFILING_REGIONS)
+        {
+            strncpy(mapped_blocks[index - 1].name, name, sizeof(mapped_blocks[index - 1].name) - 1);
+            /* Important to store with release semantics after we've initialized the block. */
+            vkd3d_atomic_uint32_store_explicit(latch, index, vkd3d_memory_order_release);
+        }
+        else
+        {
+            ERR("Too many profiling regions!\n");
+            index = 0;
+        }
+        spinlock_release(&profiling_lock);
+    }
+    else
+        index = *latch;
+
+    spinlock_release(lock);
+    return index;
+}
+
+void vkd3d_profiling_notify_work(unsigned int index,
+        uint64_t start_ticks, uint64_t end_ticks,
+        unsigned int iteration_count)
+{
+    struct vkd3d_profiling_block *block;
+    spinlock_t *lock;
+
+    if (index == 0 || index > VKD3D_MAX_PROFILING_REGIONS || !mapped_blocks)
+        return;
+    index--;
+
+    lock = &region_locks[index];
+    block = &mapped_blocks[index];
+
+    spinlock_acquire(lock);
+    block->iteration_total += iteration_count;
+    block->ticks_total += end_ticks - start_ticks;
+    spinlock_release(lock);
+}
+
+#endif /* VKD3D_ENABLE_PROFILING */
--- a/libs/vkd3d-common/string.c
+++ b/libs/vkd3d-common/string.c
@ -0,0 +1,176 @@
+/*
+ * Copyright 2021 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
+#include "vkd3d_string.h"
+#include "vkd3d_memory.h"
+
+STATIC_ASSERT(sizeof(WCHAR) == sizeof(uint16_t));
+
+char *vkd3d_strdup(const char *str)
+{
+    /* strdup() is actually not standard. */
+    char *duped;
+    size_t len;
+
+    len = strlen(str) + 1;
+
+    duped = vkd3d_malloc(len);
+    if (duped)
+        memcpy(duped, str, len);
+    return duped;
+}
+
+char *vkd3d_strdup_n(const char *str, size_t n)
+{
+    char *duped;
+    size_t len;
+
+    len = strnlen(str, n);
+
+    duped = vkd3d_malloc(len + 1);
+    if (duped)
+    {
+        memcpy(duped, str, len);
+        duped[len] = '\0';
+    }
+
+    return duped;
+}
+
+WCHAR *vkd3d_wstrdup(const WCHAR *str)
+{
+    WCHAR *duped;
+    size_t len;
+
+    len = vkd3d_wcslen(str) + 1;
+
+    duped = vkd3d_malloc(len * sizeof(WCHAR));
+    if (duped)
+        memcpy(duped, str, len * sizeof(WCHAR));
+    return duped;
+}
+
+bool vkd3d_export_strequal(const WCHAR *a, const WCHAR *b)
+{
+    if (!a || !b)
+        return false;
+
+    while (*a != '\0' && *b != '\0')
+    {
+        if (*a != *b)
+            return false;
+        a++;
+        b++;
+    }
+    return *a == *b;
+}
+
+bool vkd3d_export_strequal_mixed(const WCHAR *a, const char *b)
+{
+    if (!a || !b)
+        return false;
+
+    while (*a != '\0' && *b != '\0')
+    {
+        if (*a != *b)
+            return false;
+        a++;
+        b++;
+    }
+    return *a == *b;
+}
+
+bool vkd3d_export_strequal_substr(const WCHAR *a, size_t expected_n, const WCHAR *b)
+{
+    size_t n = 0;
+
+    if (!a || !b)
+        return false;
+
+    while (*a != '\0' && *b != '\0' && n < expected_n)
+    {
+        if (*a != *b)
+            return false;
+        a++;
+        b++;
+        n++;
+    }
+
+    return n == expected_n && *b == '\0';
+}
+
+WCHAR *vkd3d_dup_entry_point(const char *str)
+{
+    return vkd3d_dup_entry_point_n(str, strlen(str));
+}
+
+WCHAR *vkd3d_dup_entry_point_n(const char *str, size_t len)
+{
+    WCHAR *duped;
+    size_t i;
+
+    duped = vkd3d_malloc((len + 1) * sizeof(WCHAR));
+    if (!duped)
+        return NULL;
+
+    for (i = 0; i < len; i++)
+        duped[i] = (unsigned char)str[i];
+    duped[len] = 0;
+    return duped;
+}
+
+static bool is_valid_identifier_character(char v)
+{
+    return (v >= 'a' && v <= 'z') || (v >= 'A' && v <= 'Z') || v == '_' || (v >= '0' && v <= '9');
+}
+
+static const char *vkd3d_manged_entry_point_scan(const char *entry, const char **out_end_entry)
+{
+    const char *end_entry;
+
+    while (*entry != '\0' && !is_valid_identifier_character(*entry))
+        entry++;
+
+    end_entry = entry;
+    while (*end_entry != '\0' && is_valid_identifier_character(*end_entry))
+        end_entry++;
+
+    if (entry == end_entry)
+        return NULL;
+
+    *out_end_entry = end_entry;
+    return entry;
+}
+
+WCHAR *vkd3d_dup_demangled_entry_point(const char *entry)
+{
+    const char *end_entry;
+    if (!(entry = vkd3d_manged_entry_point_scan(entry, &end_entry)))
+        return NULL;
+    return vkd3d_dup_entry_point_n(entry, end_entry - entry);
+}
+
+char *vkd3d_dup_demangled_entry_point_ascii(const char *entry)
+{
+    const char *end_entry;
+    if (!(entry = vkd3d_manged_entry_point_scan(entry, &end_entry)))
+        return NULL;
+    return vkd3d_strdup_n(entry, end_entry - entry);
+}
--- a/libs/vkd3d-common/utf8.c
+++ b/libs/vkd3d-common/utf8.c
@ -17,6 +17,8 @@
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
 #include "vkd3d_memory.h"
 #include "vkd3d_utf8.h"

@ -82,9 +84,9 @@ static void vkd3d_utf8_append(char **dst, uint32_t c)
    *dst += 4;
 }

-static uint32_t vkd3d_utf16_read(const uint16_t **src)
+static uint32_t vkd3d_utf16_read(const WCHAR **src)
 {
-    const uint16_t *s = *src;
+    const WCHAR *s = *src;

    if (s[0] < 0xd800 || s[0] > 0xdfff) /* Not a surrogate pair. */
    {
@ -103,14 +105,20 @@ static uint32_t vkd3d_utf16_read(const uint16_t **src)
    return 0x10000 + ((s[0] & 0x3ff) << 10) + (s[1] & 0x3ff);
 }

-static char *vkd3d_strdup_w16_utf8(const uint16_t *wstr)
+static inline bool vkd3d_string_should_loop_u16(ptrdiff_t max_elements, const WCHAR* src, const WCHAR* wstr)
 {
-    const uint16_t *src = wstr;
+    ptrdiff_t cursor_pos = src - wstr;
+    return (!max_elements || cursor_pos < max_elements) && *src;
+}
+
+char *vkd3d_strdup_w_utf8(const WCHAR *wstr, size_t max_elements)
+{
+    const WCHAR *src = wstr;
    size_t dst_size = 0;
    char *dst, *utf8;
    uint32_t c;

-    while (*src)
+    while (vkd3d_string_should_loop_u16(max_elements, src, wstr))
    {
        if (!(c = vkd3d_utf16_read(&src)))
            continue;
@ -123,42 +131,13 @@ static char *vkd3d_strdup_w16_utf8(const uint16_t *wstr)

    utf8 = dst;
    src = wstr;
-    while (*src)
+    while (vkd3d_string_should_loop_u16(max_elements, src, wstr))
    {
        if (!(c = vkd3d_utf16_read(&src)))
            continue;
        vkd3d_utf8_append(&utf8, c);
    }
-    *utf8 = 0;
+    *utf8 = '\0';

    return dst;
 }
-
-static char *vkd3d_strdup_w32_utf8(const uint32_t *wstr)
-{
-    const uint32_t *src = wstr;
-    size_t dst_size = 0;
-    char *dst, *utf8;
-
-    while (*src)
-        dst_size += vkd3d_utf8_len(*src++);
-    ++dst_size;
-
-    if (!(dst = vkd3d_malloc(dst_size)))
-        return NULL;
-
-    utf8 = dst;
-    src = wstr;
-    while (*src)
-        vkd3d_utf8_append(&utf8, *src++);
-    *utf8 = 0;
-
-    return dst;
-}
-
-char *vkd3d_strdup_w_utf8(const WCHAR *wstr, size_t wchar_size)
-{
-    if (wchar_size == 2)
-        return vkd3d_strdup_w16_utf8((const uint16_t *)wstr);
-    return vkd3d_strdup_w32_utf8((const uint32_t *)wstr);
-}
--- a/libs/vkd3d-shader/checksum.c
+++ b/libs/vkd3d-shader/checksum.c
@ -33,6 +33,8 @@
 * will fill a supplied 16-byte array with the digest.
 */

+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_SHADER
+
 #include "vkd3d_shader_private.h"

 #define DXBC_CHECKSUM_BLOCK_SIZE 64
--- a/libs/vkd3d-shader/dxbc.c
+++ b/libs/vkd3d-shader/dxbc.c
--- a/libs/vkd3d-shader/dxil.c
+++ b/libs/vkd3d-shader/dxil.c
--- a/libs/vkd3d-shader/libvkd3d-shader.pc.in
+++ b/libs/vkd3d-shader/libvkd3d-shader.pc.in
@ -1,10 +0,0 @@
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-Name: vkd3d-shader
-Description: The vkd3d Shader Translation Library
-Version: @PACKAGE_VERSION@
-Cflags: -I${includedir}/vkd3d
-Libs: -L${libdir} -lvkd3d-shader
--- a/libs/vkd3d-shader/meson.build
+++ b/libs/vkd3d-shader/meson.build
@ -0,0 +1,18 @@
+vkd3d_shader_src = [
+  'checksum.c',
+  'dxil.c',
+  'dxbc.c',
+  'spirv.c',
+  'trace.c',
+  'vkd3d_shader_main.c',
+]
+
+vkd3d_shader_lib = static_library('vkd3d-shader', vkd3d_shader_src,
+  dependencies        : [ vkd3d_common_dep, dxil_spirv_dep ],
+  include_directories : vkd3d_private_includes,
+  override_options    : [ 'c_std='+vkd3d_c_std ])
+
+vkd3d_shader_dep = declare_dependency(
+  link_with           : vkd3d_shader_lib,
+  dependencies        : vkd3d_common_dep,
+  include_directories : vkd3d_public_includes)
--- a/libs/vkd3d-shader/spirv.c
+++ b/libs/vkd3d-shader/spirv.c
--- a/libs/vkd3d-shader/trace.c
+++ b/libs/vkd3d-shader/trace.c
@ -22,6 +22,8 @@
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_SHADER
+
 #include "vkd3d_shader_private.h"

 #include <stdio.h>
@ -36,7 +38,6 @@ struct vkd3d_string_buffer

 static const char * const shader_opcode_names[] =
 {
-    /* VKD3DSIH_ABS                              */ "abs",
    /* VKD3DSIH_ADD                              */ "add",
    /* VKD3DSIH_AND                              */ "and",
    /* VKD3DSIH_ATOMIC_AND                       */ "atomic_and",
@ -58,12 +59,9 @@ static const char * const shader_opcode_names[] =
    /* VKD3DSIH_CALL                             */ "call",
    /* VKD3DSIH_CALLNZ                           */ "callnz",
    /* VKD3DSIH_CASE                             */ "case",
-    /* VKD3DSIH_CMP                              */ "cmp",
-    /* VKD3DSIH_CND                              */ "cnd",
    /* VKD3DSIH_CONTINUE                         */ "continue",
    /* VKD3DSIH_CONTINUEP                        */ "continuec",
    /* VKD3DSIH_COUNTBITS                        */ "countbits",
-    /* VKD3DSIH_CRS                              */ "crs",
    /* VKD3DSIH_CUT                              */ "cut",
    /* VKD3DSIH_CUT_STREAM                       */ "cut_stream",
    /* VKD3DSIH_DCL                              */ "dcl",
@ -106,16 +104,11 @@ static const char * const shader_opcode_names[] =
    /* VKD3DSIH_DCL_UAV_STRUCTURED               */ "dcl_uav_structured",
    /* VKD3DSIH_DCL_UAV_TYPED                    */ "dcl_uav_typed",
    /* VKD3DSIH_DCL_VERTICES_OUT                 */ "dcl_maxOutputVertexCount",
-    /* VKD3DSIH_DEF                              */ "def",
    /* VKD3DSIH_DEFAULT                          */ "default",
-    /* VKD3DSIH_DEFB                             */ "defb",
-    /* VKD3DSIH_DEFI                             */ "defi",
    /* VKD3DSIH_DIV                              */ "div",
    /* VKD3DSIH_DP2                              */ "dp2",
-    /* VKD3DSIH_DP2ADD                           */ "dp2add",
    /* VKD3DSIH_DP3                              */ "dp3",
    /* VKD3DSIH_DP4                              */ "dp4",
-    /* VKD3DSIH_DST                              */ "dst",
    /* VKD3DSIH_DSX                              */ "dsx",
    /* VKD3DSIH_DSX_COARSE                       */ "deriv_rtx_coarse",
    /* VKD3DSIH_DSX_FINE                         */ "deriv_rtx_fine",
@ -124,16 +117,17 @@ static const char * const shader_opcode_names[] =
    /* VKD3DSIH_DSY_FINE                         */ "deriv_rty_fine",
    /* VKD3DSIH_ELSE                             */ "else",
    /* VKD3DSIH_EMIT                             */ "emit",
+    /* VKD3DSIH_EMIT_THEN_CUT                    */ "emit_then_cut",
    /* VKD3DSIH_EMIT_STREAM                      */ "emit_stream",
+    /* VKD3DSIH_EMIT_THEN_CUT_STREAM             */ "emit_then_cut_stream",
    /* VKD3DSIH_ENDIF                            */ "endif",
    /* VKD3DSIH_ENDLOOP                          */ "endloop",
-    /* VKD3DSIH_ENDREP                           */ "endrep",
    /* VKD3DSIH_ENDSWITCH                        */ "endswitch",
    /* VKD3DSIH_EQ                               */ "eq",
    /* VKD3DSIH_EVAL_CENTROID                    */ "eval_centroid",
    /* VKD3DSIH_EVAL_SAMPLE_INDEX                */ "eval_sample_index",
+    /* VKD3DSIH_EVAL_SNAPPED                     */ "eval_snapped",
    /* VKD3DSIH_EXP                              */ "exp",
-    /* VKD3DSIH_EXPP                             */ "expp",
    /* VKD3DSIH_F16TOF32                         */ "f16tof32",
    /* VKD3DSIH_F32TOF16                         */ "f32tof16",
    /* VKD3DSIH_FCALL                            */ "fcall",
@ -186,34 +180,23 @@ static const char * const shader_opcode_names[] =
    /* VKD3DSIH_LD_RAW                           */ "ld_raw",
    /* VKD3DSIH_LD_STRUCTURED                    */ "ld_structured",
    /* VKD3DSIH_LD_UAV_TYPED                     */ "ld_uav_typed",
-    /* VKD3DSIH_LIT                              */ "lit",
    /* VKD3DSIH_LOD                              */ "lod",
    /* VKD3DSIH_LOG                              */ "log",
-    /* VKD3DSIH_LOGP                             */ "logp",
    /* VKD3DSIH_LOOP                             */ "loop",
    /* VKD3DSIH_LRP                              */ "lrp",
    /* VKD3DSIH_LT                               */ "lt",
-    /* VKD3DSIH_M3x2                             */ "m3x2",
-    /* VKD3DSIH_M3x3                             */ "m3x3",
-    /* VKD3DSIH_M3x4                             */ "m3x4",
-    /* VKD3DSIH_M4x3                             */ "m4x3",
-    /* VKD3DSIH_M4x4                             */ "m4x4",
    /* VKD3DSIH_MAD                              */ "mad",
    /* VKD3DSIH_MAX                              */ "max",
    /* VKD3DSIH_MIN                              */ "min",
    /* VKD3DSIH_MOV                              */ "mov",
-    /* VKD3DSIH_MOVA                             */ "mova",
    /* VKD3DSIH_MOVC                             */ "movc",
    /* VKD3DSIH_MUL                              */ "mul",
    /* VKD3DSIH_NE                               */ "ne",
    /* VKD3DSIH_NOP                              */ "nop",
    /* VKD3DSIH_NOT                              */ "not",
-    /* VKD3DSIH_NRM                              */ "nrm",
    /* VKD3DSIH_OR                               */ "or",
-    /* VKD3DSIH_PHASE                            */ "phase",
    /* VKD3DSIH_POW                              */ "pow",
    /* VKD3DSIH_RCP                              */ "rcp",
-    /* VKD3DSIH_REP                              */ "rep",
    /* VKD3DSIH_RESINFO                          */ "resinfo",
    /* VKD3DSIH_RET                              */ "ret",
    /* VKD3DSIH_RETP                             */ "retp",
@ -230,7 +213,6 @@ static const char * const shader_opcode_names[] =
    /* VKD3DSIH_SAMPLE_INFO                      */ "sample_info",
    /* VKD3DSIH_SAMPLE_LOD                       */ "sample_l",
    /* VKD3DSIH_SAMPLE_POS                       */ "sample_pos",
-    /* VKD3DSIH_SETP                             */ "setp",
    /* VKD3DSIH_SGE                              */ "sge",
    /* VKD3DSIH_SGN                              */ "sgn",
    /* VKD3DSIH_SINCOS                           */ "sincos",
@ -243,28 +225,7 @@ static const char * const shader_opcode_names[] =
    /* VKD3DSIH_SWAPC                            */ "swapc",
    /* VKD3DSIH_SWITCH                           */ "switch",
    /* VKD3DSIH_SYNC                             */ "sync",
-    /* VKD3DSIH_TEX                              */ "texld",
-    /* VKD3DSIH_TEXBEM                           */ "texbem",
-    /* VKD3DSIH_TEXBEML                          */ "texbeml",
-    /* VKD3DSIH_TEXCOORD                         */ "texcrd",
-    /* VKD3DSIH_TEXDEPTH                         */ "texdepth",
-    /* VKD3DSIH_TEXDP3                           */ "texdp3",
-    /* VKD3DSIH_TEXDP3TEX                        */ "texdp3tex",
-    /* VKD3DSIH_TEXKILL                          */ "texkill",
-    /* VKD3DSIH_TEXLDD                           */ "texldd",
-    /* VKD3DSIH_TEXLDL                           */ "texldl",
-    /* VKD3DSIH_TEXM3x2DEPTH                     */ "texm3x2depth",
-    /* VKD3DSIH_TEXM3x2PAD                       */ "texm3x2pad",
-    /* VKD3DSIH_TEXM3x2TEX                       */ "texm3x2tex",
-    /* VKD3DSIH_TEXM3x3                          */ "texm3x3",
-    /* VKD3DSIH_TEXM3x3DIFF                      */ "texm3x3diff",
-    /* VKD3DSIH_TEXM3x3PAD                       */ "texm3x3pad",
-    /* VKD3DSIH_TEXM3x3SPEC                      */ "texm3x3spec",
-    /* VKD3DSIH_TEXM3x3TEX                       */ "texm3x3tex",
-    /* VKD3DSIH_TEXM3x3VSPEC                     */ "texm3x3vspec",
-    /* VKD3DSIH_TEXREG2AR                        */ "texreg2ar",
-    /* VKD3DSIH_TEXREG2GB                        */ "texreg2gb",
-    /* VKD3DSIH_TEXREG2RGB                       */ "texreg2rgb",
+    /* VKD3DSIH_DISCARD                          */ "discard",
    /* VKD3DSIH_UBFE                             */ "ubfe",
    /* VKD3DSIH_UDIV                             */ "udiv",
    /* VKD3DSIH_UGE                              */ "uge",
@ -272,6 +233,7 @@ static const char * const shader_opcode_names[] =
    /* VKD3DSIH_UMAX                             */ "umax",
    /* VKD3DSIH_UMIN                             */ "umin",
    /* VKD3DSIH_UMUL                             */ "umul",
+    /* VKD3DSIH_UMAD                             */ "umad",
    /* VKD3DSIH_USHR                             */ "ushr",
    /* VKD3DSIH_UTOF                             */ "utof",
    /* VKD3DSIH_XOR                              */ "xor",
@ -381,22 +343,6 @@ static int shader_addline(struct vkd3d_string_buffer *buffer, const char *format
    }
 }

-/* Convert floating point offset relative to a register file to an absolute
- * offset for float constants. */
-static unsigned int shader_get_float_offset(enum vkd3d_shader_register_type register_type, UINT register_idx)
-{
-    switch (register_type)
-    {
-        case VKD3DSPR_CONST: return register_idx;
-        case VKD3DSPR_CONST2: return 2048 + register_idx;
-        case VKD3DSPR_CONST3: return 4096 + register_idx;
-        case VKD3DSPR_CONST4: return 6144 + register_idx;
-        default:
-            FIXME("Unsupported register type: %u.\n", register_type);
-            return register_idx;
-    }
-}
-
 static void shader_dump_global_flags(struct vkd3d_string_buffer *buffer, DWORD global_flags)
 {
    unsigned int i;
@ -412,6 +358,8 @@ static void shader_dump_global_flags(struct vkd3d_string_buffer *buffer, DWORD g
        {VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL,         "forceEarlyDepthStencil"},
        {VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS, "enableRawAndStructuredBuffers"},
        {VKD3DSGF_ENABLE_MINIMUM_PRECISION,          "enableMinimumPrecision"},
+        {VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS, "enableDoublePrecisionFloatOps"},
+        {VKD3DSGF_ENABLE_11_1_DOUBLE_EXTENSIONS,     "enable11_1DoubleExtensions"},
    };

    for (i = 0; i < ARRAY_SIZE(global_flag_info); ++i)
@ -590,7 +538,7 @@ static void shader_dump_decl_usage(struct vkd3d_string_buffer *buffer,
                break;
        }
    }
-    else if (semantic->reg.reg.type == VKD3DSPR_RESOURCE || semantic->reg.reg.type == VKD3DSPR_UAV)
+    else // if (semantic->reg.reg.type == VKD3DSPR_RESOURCE || semantic->reg.reg.type == VKD3DSPR_UAV)
    {
        if (semantic->reg.reg.type == VKD3DSPR_RESOURCE)
            shader_addline(buffer, "_resource_");
@ -671,80 +619,6 @@ static void shader_dump_decl_usage(struct vkd3d_string_buffer *buffer,
                break;
        }
    }
-    else
-    {
-        /* Pixel shaders 3.0 don't have usage semantics. */
-        if (shader_version->major < 3 && shader_version->type == VKD3D_SHADER_TYPE_PIXEL)
-            return;
-        else
-            shader_addline(buffer, "_");
-
-        switch (semantic->usage)
-        {
-            case VKD3D_DECL_USAGE_POSITION:
-                shader_addline(buffer, "position%u", semantic->usage_idx);
-                break;
-
-            case VKD3D_DECL_USAGE_BLEND_INDICES:
-                shader_addline(buffer, "blend");
-                break;
-
-            case VKD3D_DECL_USAGE_BLEND_WEIGHT:
-                shader_addline(buffer, "weight");
-                break;
-
-            case VKD3D_DECL_USAGE_NORMAL:
-                shader_addline(buffer, "normal%u", semantic->usage_idx);
-                break;
-
-            case VKD3D_DECL_USAGE_PSIZE:
-                shader_addline(buffer, "psize");
-                break;
-
-            case VKD3D_DECL_USAGE_COLOR:
-                if (!semantic->usage_idx)
-                    shader_addline(buffer, "color");
-                else
-                    shader_addline(buffer, "specular%u", (semantic->usage_idx - 1));
-                break;
-
-            case VKD3D_DECL_USAGE_TEXCOORD:
-                shader_addline(buffer, "texture%u", semantic->usage_idx);
-                break;
-
-            case VKD3D_DECL_USAGE_TANGENT:
-                shader_addline(buffer, "tangent");
-                break;
-
-            case VKD3D_DECL_USAGE_BINORMAL:
-                shader_addline(buffer, "binormal");
-                break;
-
-            case VKD3D_DECL_USAGE_TESS_FACTOR:
-                shader_addline(buffer, "tessfactor");
-                break;
-
-            case VKD3D_DECL_USAGE_POSITIONT:
-                shader_addline(buffer, "positionT%u", semantic->usage_idx);
-                break;
-
-            case VKD3D_DECL_USAGE_FOG:
-                shader_addline(buffer, "fog");
-                break;
-
-            case VKD3D_DECL_USAGE_DEPTH:
-                shader_addline(buffer, "depth");
-                break;
-
-            case VKD3D_DECL_USAGE_SAMPLE:
-                shader_addline(buffer, "sample");
-                break;
-
-            default:
-                shader_addline(buffer, "<unknown_semantic(%#x)>", semantic->usage);
-                FIXME("Unrecognised semantic usage %#x.\n", semantic->usage);
-        }
-    }
 }

 static void shader_dump_src_param(struct vkd3d_string_buffer *buffer,
@ -753,8 +627,6 @@ static void shader_dump_src_param(struct vkd3d_string_buffer *buffer,
 static void shader_dump_register(struct vkd3d_string_buffer *buffer,
        const struct vkd3d_shader_register *reg, const struct vkd3d_shader_version *shader_version)
 {
-    static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"};
-    static const char * const misctype_reg_names[] = {"vPos", "vFace"};
    unsigned int offset = reg->idx[0].offset;

    switch (reg->type)
@ -767,22 +639,6 @@ static void shader_dump_register(struct vkd3d_string_buffer *buffer,
            shader_addline(buffer, "v");
            break;

-        case VKD3DSPR_CONST:
-        case VKD3DSPR_CONST2:
-        case VKD3DSPR_CONST3:
-        case VKD3DSPR_CONST4:
-            shader_addline(buffer, "c");
-            offset = shader_get_float_offset(reg->type, offset);
-            break;
-
-        case VKD3DSPR_TEXTURE: /* vs: case VKD3DSPR_ADDR */
-            shader_addline(buffer, "%c", shader_version->type == VKD3D_SHADER_TYPE_PIXEL ? 't' : 'a');
-            break;
-
-        case VKD3DSPR_RASTOUT:
-            shader_addline(buffer, "%s", rastout_reg_names[offset]);
-            break;
-
        case VKD3DSPR_COLOROUT:
            shader_addline(buffer, "oC");
            break;
@ -799,59 +655,22 @@ static void shader_dump_register(struct vkd3d_string_buffer *buffer,
            shader_addline(buffer, "oDepthLE");
            break;

-        case VKD3DSPR_ATTROUT:
-            shader_addline(buffer, "oD");
-            break;
-
-        case VKD3DSPR_TEXCRDOUT:
-            /* Vertex shaders >= 3.0 use general purpose output registers
-             * (VKD3DSPR_OUTPUT), which can include an address token. */
-            if (shader_version->major >= 3)
-                shader_addline(buffer, "o");
-            else
-                shader_addline(buffer, "oT");
-            break;
-
-        case VKD3DSPR_CONSTINT:
-            shader_addline(buffer, "i");
-            break;
-
-        case VKD3DSPR_CONSTBOOL:
-            shader_addline(buffer, "b");
-            break;
-
-        case VKD3DSPR_LABEL:
-            shader_addline(buffer, "l");
-            break;
-
-        case VKD3DSPR_LOOP:
-            shader_addline(buffer, "aL");
+        case VKD3DSPR_OUTPUT:
+            shader_addline(buffer, "o");
            break;

        case VKD3DSPR_SAMPLER:
            shader_addline(buffer, "s");
            break;

-        case VKD3DSPR_MISCTYPE:
-            if (offset > 1)
-            {
-                FIXME("Unhandled misctype register %u.\n", offset);
-                shader_addline(buffer, "<unhandled misctype %#x>", offset);
-            }
-            else
-            {
-                shader_addline(buffer, "%s", misctype_reg_names[offset]);
-            }
-            break;
-
-        case VKD3DSPR_PREDICATE:
-            shader_addline(buffer, "p");
-            break;
-
        case VKD3DSPR_IMMCONST:
            shader_addline(buffer, "l");
            break;

+        case VKD3DSPR_IMMCONST64:
+            shader_addline(buffer, "d");
+            break;
+
        case VKD3DSPR_CONSTBUFFER:
            shader_addline(buffer, "cb");
            break;
@ -970,15 +789,15 @@ static void shader_dump_register(struct vkd3d_string_buffer *buffer,
                switch (reg->data_type)
                {
                    case VKD3D_DATA_FLOAT:
-                        shader_addline(buffer, "%.8e", reg->u.immconst_float[0]);
+                        shader_addline(buffer, "%.8e", reg->immconst_float[0]);
                        break;
                    case VKD3D_DATA_INT:
-                        shader_addline(buffer, "%d", reg->u.immconst_uint[0]);
+                        shader_addline(buffer, "%d", reg->immconst_uint[0]);
                        break;
                    case VKD3D_DATA_RESOURCE:
                    case VKD3D_DATA_SAMPLER:
                    case VKD3D_DATA_UINT:
-                        shader_addline(buffer, "%u", reg->u.immconst_uint[0]);
+                        shader_addline(buffer, "%u", reg->immconst_uint[0]);
                        break;
                    default:
                        shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
@ -991,20 +810,20 @@ static void shader_dump_register(struct vkd3d_string_buffer *buffer,
                {
                    case VKD3D_DATA_FLOAT:
                        shader_addline(buffer, "%.8e, %.8e, %.8e, %.8e",
-                                reg->u.immconst_float[0], reg->u.immconst_float[1],
-                                reg->u.immconst_float[2], reg->u.immconst_float[3]);
+                                reg->immconst_float[0], reg->immconst_float[1],
+                                reg->immconst_float[2], reg->immconst_float[3]);
                        break;
                    case VKD3D_DATA_INT:
                        shader_addline(buffer, "%d, %d, %d, %d",
-                                reg->u.immconst_uint[0], reg->u.immconst_uint[1],
-                                reg->u.immconst_uint[2], reg->u.immconst_uint[3]);
+                                reg->immconst_uint[0], reg->immconst_uint[1],
+                                reg->immconst_uint[2], reg->immconst_uint[3]);
                        break;
                    case VKD3D_DATA_RESOURCE:
                    case VKD3D_DATA_SAMPLER:
                    case VKD3D_DATA_UINT:
                        shader_addline(buffer, "%u, %u, %u, %u",
-                                reg->u.immconst_uint[0], reg->u.immconst_uint[1],
-                                reg->u.immconst_uint[2], reg->u.immconst_uint[3]);
+                                reg->immconst_uint[0], reg->immconst_uint[1],
+                                reg->immconst_uint[2], reg->immconst_uint[3]);
                        break;
                    default:
                        shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
@ -1018,9 +837,43 @@ static void shader_dump_register(struct vkd3d_string_buffer *buffer,
        }
        shader_addline(buffer, ")");
    }
-    else if (reg->type != VKD3DSPR_RASTOUT
-            && reg->type != VKD3DSPR_MISCTYPE
-            && reg->type != VKD3DSPR_NULL)
+    else if (reg->type == VKD3DSPR_IMMCONST64)
+    {
+        shader_addline(buffer, "(");
+        switch (reg->immconst_type)
+        {
+            case VKD3D_IMMCONST_SCALAR:
+                switch (reg->data_type)
+                {
+                    case VKD3D_DATA_DOUBLE:
+                        shader_addline(buffer, "%f", reg->immconst_double[0]);
+                        break;
+                    default:
+                        shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
+                        break;
+                }
+                break;
+
+            case VKD3D_IMMCONST_DVEC2:
+                switch (reg->data_type)
+                {
+                    case VKD3D_DATA_DOUBLE:
+                        shader_addline(buffer, "%f, %f",
+                                reg->immconst_double[0], reg->immconst_double[1]);
+                        break;
+                    default:
+                        shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
+                        break;
+                }
+                break;
+
+            default:
+                shader_addline(buffer, "<unhandled immconst_type %#x>", reg->immconst_type);
+                break;
+        }
+        shader_addline(buffer, ")");
+    }
+    else if (reg->type != VKD3DSPR_NULL)
    {
        if (offset != ~0u)
        {
@ -1056,7 +909,7 @@ static void shader_dump_register(struct vkd3d_string_buffer *buffer,
        }

        if (reg->type == VKD3DSPR_FUNCTIONPOINTER)
-            shader_addline(buffer, "[%u]", reg->u.fp_body_idx);
+            shader_addline(buffer, "[%u]", reg->fp_body_idx);
    }
 }

@ -1089,16 +942,8 @@ static void shader_dump_src_param(struct vkd3d_string_buffer *buffer,
    enum vkd3d_shader_src_modifier src_modifier = param->modifiers;
    DWORD swizzle = param->swizzle;

-    if (src_modifier == VKD3DSPSM_NEG
-            || src_modifier == VKD3DSPSM_BIASNEG
-            || src_modifier == VKD3DSPSM_SIGNNEG
-            || src_modifier == VKD3DSPSM_X2NEG
-            || src_modifier == VKD3DSPSM_ABSNEG)
+    if (src_modifier == VKD3DSPSM_NEG || src_modifier == VKD3DSPSM_ABSNEG)
        shader_addline(buffer, "-");
-    else if (src_modifier == VKD3DSPSM_COMP)
-        shader_addline(buffer, "1-");
-    else if (src_modifier == VKD3DSPSM_NOT)
-        shader_addline(buffer, "!");

    if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG)
        shader_addline(buffer, "abs(");
@ -1109,16 +954,6 @@ static void shader_dump_src_param(struct vkd3d_string_buffer *buffer,
    {
        case VKD3DSPSM_NONE:    break;
        case VKD3DSPSM_NEG:     break;
-        case VKD3DSPSM_NOT:     break;
-        case VKD3DSPSM_BIAS:    shader_addline(buffer, "_bias"); break;
-        case VKD3DSPSM_BIASNEG: shader_addline(buffer, "_bias"); break;
-        case VKD3DSPSM_SIGN:    shader_addline(buffer, "_bx2"); break;
-        case VKD3DSPSM_SIGNNEG: shader_addline(buffer, "_bx2"); break;
-        case VKD3DSPSM_COMP:    break;
-        case VKD3DSPSM_X2:      shader_addline(buffer, "_x2"); break;
-        case VKD3DSPSM_X2NEG:   shader_addline(buffer, "_x2"); break;
-        case VKD3DSPSM_DZ:      shader_addline(buffer, "_dz"); break;
-        case VKD3DSPSM_DW:      shader_addline(buffer, "_dw"); break;
        case VKD3DSPSM_ABSNEG:  shader_addline(buffer, ")"); break;
        case VKD3DSPSM_ABS:     shader_addline(buffer, ")"); break;
        default:                  shader_addline(buffer, "_unknown_modifier(%#x)", src_modifier);
@ -1151,23 +986,9 @@ static void shader_dump_ins_modifiers(struct vkd3d_string_buffer *buffer,
 {
    DWORD mmask = dst->modifiers;

-    switch (dst->shift)
-    {
-        case 0: break;
-        case 13: shader_addline(buffer, "_d8"); break;
-        case 14: shader_addline(buffer, "_d4"); break;
-        case 15: shader_addline(buffer, "_d2"); break;
-        case 1: shader_addline(buffer, "_x2"); break;
-        case 2: shader_addline(buffer, "_x4"); break;
-        case 3: shader_addline(buffer, "_x8"); break;
-        default: shader_addline(buffer, "_unhandled_shift(%d)", dst->shift); break;
-    }
-
    if (mmask & VKD3DSPDM_SATURATE)         shader_addline(buffer, "_sat");
-    if (mmask & VKD3DSPDM_PARTIALPRECISION) shader_addline(buffer, "_pp");
-    if (mmask & VKD3DSPDM_MSAMPCENTROID)    shader_addline(buffer, "_centroid");

-    mmask &= ~(VKD3DSPDM_SATURATE | VKD3DSPDM_PARTIALPRECISION | VKD3DSPDM_MSAMPCENTROID);
+    mmask &= ~(VKD3DSPDM_SATURATE);
    if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask);
 }

@ -1287,7 +1108,7 @@ static void shader_dump_instruction_flags(struct vkd3d_string_buffer *buffer,
        case VKD3DSIH_CONTINUEP:
        case VKD3DSIH_IF:
        case VKD3DSIH_RETP:
-        case VKD3DSIH_TEXKILL:
+        case VKD3DSIH_DISCARD:
            switch (ins->flags)
            {
                case VKD3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(buffer, "_nz"); break;
@ -1333,11 +1154,6 @@ static void shader_dump_instruction_flags(struct vkd3d_string_buffer *buffer,
            shader_dump_sync_flags(buffer, ins->flags);
            break;

-        case VKD3DSIH_TEX:
-            if (shader_version->major >= 2 && (ins->flags & VKD3DSI_TEXLD_PROJECT))
-                shader_addline(buffer, "p");
-            break;
-
        default:
            shader_dump_precise_flags(buffer, ins->flags);
            break;
@ -1550,24 +1366,6 @@ static void shader_dump_instruction(struct vkd3d_string_buffer *buffer,
            shader_dump_register_space(buffer, ins->declaration.structured_resource.register_space, shader_version);
            break;

-        case VKD3DSIH_DEF:
-            shader_addline(buffer, "def c%u = %.8e, %.8e, %.8e, %.8e",
-                    shader_get_float_offset(ins->dst[0].reg.type, ins->dst[0].reg.idx[0].offset),
-                    ins->src[0].reg.u.immconst_float[0], ins->src[0].reg.u.immconst_float[1],
-                    ins->src[0].reg.u.immconst_float[2], ins->src[0].reg.u.immconst_float[3]);
-            break;
-
-        case VKD3DSIH_DEFI:
-            shader_addline(buffer, "defi i%u = %d, %d, %d, %d", ins->dst[0].reg.idx[0].offset,
-                    ins->src[0].reg.u.immconst_uint[0], ins->src[0].reg.u.immconst_uint[1],
-                    ins->src[0].reg.u.immconst_uint[2], ins->src[0].reg.u.immconst_uint[3]);
-            break;
-
-        case VKD3DSIH_DEFB:
-            shader_addline(buffer, "defb b%u = %s",
-                    ins->dst[0].reg.idx[0].offset, ins->src[0].reg.u.immconst_uint[0] ? "true" : "false");
-            break;
-
        default:
            if (ins->predicate)
            {
--- a/libs/vkd3d-shader/vkd3d_shader.map
+++ b/libs/vkd3d-shader/vkd3d_shader.map
@ -1,16 +0,0 @@
-VKD3D_1_0
-{
-global:
-    vkd3d_shader_compile_dxbc;
-    vkd3d_shader_convert_root_signature;
-    vkd3d_shader_find_signature_element;
-    vkd3d_shader_free_root_signature;
-    vkd3d_shader_free_shader_code;
-    vkd3d_shader_free_shader_signature;
-    vkd3d_shader_parse_input_signature;
-    vkd3d_shader_parse_root_signature;
-    vkd3d_shader_scan_dxbc;
-    vkd3d_shader_serialize_root_signature;
-
-local: *;
-};
--- a/libs/vkd3d-shader/vkd3d_shader_main.c
+++ b/libs/vkd3d-shader/vkd3d_shader_main.c
@ -16,53 +16,160 @@
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_SHADER
+
 #include "vkd3d_shader_private.h"

+#include "vkd3d_platform.h"
+
 #include <stdio.h>
+#include <inttypes.h>

-VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG");
-
-STATIC_ASSERT(MEMBER_SIZE(struct vkd3d_shader_scan_info, uav_counter_mask) * CHAR_BIT >= VKD3D_SHADER_MAX_UNORDERED_ACCESS_VIEWS);
-STATIC_ASSERT(MEMBER_SIZE(struct vkd3d_shader_scan_info, uav_read_mask) * CHAR_BIT >= VKD3D_SHADER_MAX_UNORDERED_ACCESS_VIEWS);
-
-static void vkd3d_shader_dump_blob(const char *path, const char *prefix, const void *data, size_t size)
+static void vkd3d_shader_dump_blob(const char *path, vkd3d_shader_hash_t hash, const void *data, size_t size, const char *ext)
 {
-    static int shader_id = 0;
    char filename[1024];
-    unsigned int id;
    FILE *f;

-    id = InterlockedIncrement(&shader_id) - 1;
+    snprintf(filename, ARRAY_SIZE(filename), "%s/%016"PRIx64".%s", path, hash, ext);

-    snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%s-%u.dxbc", path, prefix, id);
-    if ((f = fopen(filename, "wb")))
+    INFO("Dumping blob to %s.\n", filename);
+
+    /* Exclusive open to avoid multiple threads spamming out the same shader module, and avoids race condition. */
+    if ((f = fopen(filename, "wbx")))
    {
        if (fwrite(data, 1, size, f) != size)
            ERR("Failed to write shader to %s.\n", filename);
        if (fclose(f))
            ERR("Failed to close stream %s.\n", filename);
    }
-    else
-    {
-        ERR("Failed to open %s for dumping shader.\n", filename);
-    }
 }

-static void vkd3d_shader_dump_shader(enum vkd3d_shader_type type, const struct vkd3d_shader_code *shader)
+static bool vkd3d_shader_replace_path(const char *filename, vkd3d_shader_hash_t hash, const void **data, size_t *size)
+{
+    void *buffer = NULL;
+    FILE *f = NULL;
+    size_t len;
+
+    if ((f = fopen(filename, "rb")))
+    {
+        if (fseek(f, 0, SEEK_END) < 0)
+            goto err;
+        len = ftell(f);
+        if (len < 16)
+            goto err;
+        rewind(f);
+        buffer = vkd3d_malloc(len);
+        if (!buffer)
+            goto err;
+        if (fread(buffer, 1, len, f) != len)
+            goto err;
+    }
+    else
+        goto err;
+
+    *data = buffer;
+    *size = len;
+    INFO("Overriding shader hash %016"PRIx64" with alternative SPIR-V module from %s!\n", hash, filename);
+    fclose(f);
+    return true;
+
+err:
+    if (f)
+        fclose(f);
+    vkd3d_free(buffer);
+    return false;
+}
+
+bool vkd3d_shader_replace(vkd3d_shader_hash_t hash, const void **data, size_t *size)
 {
    static bool enabled = true;
-    const char *path;
+    char path[VKD3D_PATH_MAX];
+    char filename[1024];
+
+    if (!enabled)
+        return false;
+
+    if (!vkd3d_get_env_var("VKD3D_SHADER_OVERRIDE", path, sizeof(path)))
+    {
+        enabled = false;
+        return false;
+    }
+
+    snprintf(filename, ARRAY_SIZE(filename), "%s/%016"PRIx64".spv", path, hash);
+    return vkd3d_shader_replace_path(filename, hash, data, size);
+}
+
+bool vkd3d_shader_replace_export(vkd3d_shader_hash_t hash, const void **data, size_t *size, const char *export)
+{
+    static bool enabled = true;
+    char path[VKD3D_PATH_MAX];
+    char filename[1024];
+
+    if (!enabled)
+        return false;
+
+    if (!vkd3d_get_env_var("VKD3D_SHADER_OVERRIDE", path, sizeof(path)))
+    {
+        enabled = false;
+        return false;
+    }
+
+    snprintf(filename, ARRAY_SIZE(filename), "%s/%016"PRIx64".lib.%s.spv", path, hash, export);
+    return vkd3d_shader_replace_path(filename, hash, data, size);
+}
+
+void vkd3d_shader_dump_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shader_code *shader, const char *ext)
+{
+    static bool enabled = true;
+    char path[VKD3D_PATH_MAX];

    if (!enabled)
        return;

-    if (!(path = getenv("VKD3D_SHADER_DUMP_PATH")))
+    if (!vkd3d_get_env_var("VKD3D_SHADER_DUMP_PATH", path, sizeof(path)))
    {
        enabled = false;
        return;
    }

-    vkd3d_shader_dump_blob(path, shader_get_type_prefix(type), shader->code, shader->size);
+    vkd3d_shader_dump_blob(path, hash, shader->code, shader->size, ext);
+}
+
+void vkd3d_shader_dump_spirv_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shader_code *shader)
+{
+    static bool enabled = true;
+    char path[VKD3D_PATH_MAX];
+
+    if (!enabled)
+        return;
+
+    if (!vkd3d_get_env_var("VKD3D_SHADER_DUMP_PATH", path, sizeof(path)))
+    {
+        enabled = false;
+        return;
+    }
+
+    vkd3d_shader_dump_blob(path, hash, shader->code, shader->size, "spv");
+}
+
+void vkd3d_shader_dump_spirv_shader_export(vkd3d_shader_hash_t hash, const struct vkd3d_shader_code *shader,
+        const char *export)
+{
+    static bool enabled = true;
+    char path[VKD3D_PATH_MAX];
+    char tag[1024];
+
+    if (!enabled)
+        return;
+
+    if (!vkd3d_get_env_var("VKD3D_SHADER_DUMP_PATH", path, sizeof(path)))
+    {
+        enabled = false;
+        return;
+    }
+
+    snprintf(tag, sizeof(tag), "lib.%s.spv", export);
+    vkd3d_shader_dump_blob(path, hash, shader->code, shader->size, tag);
 }

 struct vkd3d_shader_parser
@ -108,15 +215,8 @@ static int vkd3d_shader_validate_compile_args(const struct vkd3d_shader_compile_
    if (!compile_args)
        return VKD3D_OK;

-    if (compile_args->type != VKD3D_SHADER_STRUCTURE_TYPE_COMPILE_ARGUMENTS)
-    {
-        WARN("Invalid structure type %#x.\n", compile_args->type);
-        return VKD3D_ERROR_INVALID_ARGUMENT;
-    }
-
    switch (compile_args->target)
    {
-        case VKD3D_SHADER_TARGET_SPIRV_OPENGL_4_5:
        case VKD3D_SHADER_TARGET_SPIRV_VULKAN_1_0:
            break;
        default:
@ -127,6 +227,100 @@ static int vkd3d_shader_validate_compile_args(const struct vkd3d_shader_compile_
    return VKD3D_OK;
 }

+struct vkd3d_shader_scan_key
+{
+    enum vkd3d_shader_register_type register_type;
+    unsigned int register_id;
+};
+
+struct vkd3d_shader_scan_entry
+{
+    struct hash_map_entry entry;
+    struct vkd3d_shader_scan_key key;
+    unsigned int flags;
+};
+
+static uint32_t vkd3d_shader_scan_entry_hash(const void *key)
+{
+    const struct vkd3d_shader_scan_key *k = key;
+    return hash_combine(k->register_type, k->register_id);
+}
+
+static bool vkd3d_shader_scan_entry_compare(const void *key, const struct hash_map_entry *entry)
+{
+    const struct vkd3d_shader_scan_entry *e = (const struct vkd3d_shader_scan_entry*) entry;
+    const struct vkd3d_shader_scan_key *k = key;
+    return e->key.register_type == k->register_type && e->key.register_id == k->register_id;
+}
+
+unsigned int vkd3d_shader_scan_get_register_flags(const struct vkd3d_shader_scan_info *scan_info,
+        enum vkd3d_shader_register_type type, unsigned int id)
+{
+    struct vkd3d_shader_scan_key key;
+    struct hash_map_entry *e;
+
+    key.register_type = type;
+    key.register_id = id;
+
+    e = hash_map_find(&scan_info->register_map, &key);
+    return e ? e->flags : 0u;
+}
+
+static void vkd3d_shader_scan_set_register_flags(struct vkd3d_shader_scan_info *scan_info,
+        enum vkd3d_shader_register_type type, unsigned int id, unsigned int flags)
+{
+    struct vkd3d_shader_scan_entry entry;
+    struct vkd3d_shader_scan_key key;
+    struct hash_map_entry *e;
+
+    key.register_type = type;
+    key.register_id = id;
+
+    if ((e = hash_map_find(&scan_info->register_map, &key)))
+        e->flags |= flags;
+    else
+    {
+        entry.key = key;
+        entry.flags = flags;
+        hash_map_insert(&scan_info->register_map, &key, &entry.entry);
+    }
+}
+
+static void vkd3d_shader_scan_init(struct vkd3d_shader_scan_info *scan_info)
+{
+    memset(scan_info, 0, sizeof(*scan_info));
+    hash_map_init(&scan_info->register_map, &vkd3d_shader_scan_entry_hash,
+            &vkd3d_shader_scan_entry_compare, sizeof(struct vkd3d_shader_scan_entry));
+}
+
+static void vkd3d_shader_scan_destroy(struct vkd3d_shader_scan_info *scan_info)
+{
+    hash_map_clear(&scan_info->register_map);
+}
+
+static int vkd3d_shader_validate_shader_type(enum vkd3d_shader_type type, VkShaderStageFlagBits stages)
+{
+    static const VkShaderStageFlagBits table[VKD3D_SHADER_TYPE_COUNT] = {
+        VK_SHADER_STAGE_FRAGMENT_BIT,
+        VK_SHADER_STAGE_VERTEX_BIT,
+        VK_SHADER_STAGE_GEOMETRY_BIT,
+        VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+        VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
+        VK_SHADER_STAGE_COMPUTE_BIT,
+    };
+
+    if (type >= VKD3D_SHADER_TYPE_COUNT)
+        return VKD3D_ERROR_INVALID_ARGUMENT;
+
+    if (table[type] != stages)
+    {
+        ERR("Expected VkShaderStage #%x, but got VkShaderStage #%x.\n", stages, table[type]);
+        return VKD3D_ERROR_INVALID_ARGUMENT;
+    }
+
+    return 0;
+}
+
 int vkd3d_shader_compile_dxbc(const struct vkd3d_shader_code *dxbc,
        struct vkd3d_shader_code *spirv, unsigned int compiler_options,
        const struct vkd3d_shader_interface_info *shader_interface_info,
@ -136,37 +330,67 @@ int vkd3d_shader_compile_dxbc(const struct vkd3d_shader_code *dxbc,
    struct vkd3d_dxbc_compiler *spirv_compiler;
    struct vkd3d_shader_scan_info scan_info;
    struct vkd3d_shader_parser parser;
+    vkd3d_shader_hash_t hash;
    int ret;

    TRACE("dxbc {%p, %zu}, spirv %p, compiler_options %#x, shader_interface_info %p, compile_args %p.\n",
            dxbc->code, dxbc->size, spirv, compiler_options, shader_interface_info, compile_args);

-    if (shader_interface_info && shader_interface_info->type != VKD3D_SHADER_STRUCTURE_TYPE_SHADER_INTERFACE_INFO)
-    {
-        WARN("Invalid structure type %#x.\n", shader_interface_info->type);
-        return VKD3D_ERROR_INVALID_ARGUMENT;
-    }
-
    if ((ret = vkd3d_shader_validate_compile_args(compile_args)) < 0)
        return ret;

-    scan_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_INFO;
-    scan_info.next = NULL;
+    /* DXIL is handled externally through dxil-spirv. */
+    if (shader_is_dxil(dxbc->code, dxbc->size))
+    {
+        return vkd3d_shader_compile_dxil(dxbc, spirv, shader_interface_info, compile_args);
+    }
+
+    memset(&spirv->meta, 0, sizeof(spirv->meta));
+
+    hash = vkd3d_shader_hash(dxbc);
+    spirv->meta.hash = hash;
+    if (vkd3d_shader_replace(hash, &spirv->code, &spirv->size))
+    {
+        spirv->meta.flags |= VKD3D_SHADER_META_FLAG_REPLACED;
+        return VKD3D_OK;
+    }
+
+    vkd3d_shader_scan_init(&scan_info);
+
    if ((ret = vkd3d_shader_scan_dxbc(dxbc, &scan_info)) < 0)
+    {
+        vkd3d_shader_scan_destroy(&scan_info);
        return ret;
+    }
+
+    spirv->meta.patch_vertex_count = scan_info.patch_vertex_count;

    if ((ret = vkd3d_shader_parser_init(&parser, dxbc)) < 0)
+    {
+        vkd3d_shader_scan_destroy(&scan_info);
        return ret;
+    }

-    vkd3d_shader_dump_shader(parser.shader_version.type, dxbc);
+    if (shader_interface_info)
+    {
+        if ((ret = vkd3d_shader_validate_shader_type(parser.shader_version.type, shader_interface_info->stage)) < 0)
+        {
+            vkd3d_shader_scan_destroy(&scan_info);
+            return ret;
+        }
+    }
+
+    vkd3d_shader_dump_shader(hash, dxbc, "dxbc");

    if (TRACE_ON())
        vkd3d_shader_trace(parser.data);

    if (!(spirv_compiler = vkd3d_dxbc_compiler_create(&parser.shader_version,
-            &parser.shader_desc, compiler_options, shader_interface_info, compile_args, &scan_info)))
+            &parser.shader_desc, compiler_options, shader_interface_info, compile_args, &scan_info,
+            spirv->meta.hash)))
    {
        ERR("Failed to create DXBC compiler.\n");
+        vkd3d_shader_scan_destroy(&scan_info);
        vkd3d_shader_parser_destroy(&parser);
        return VKD3D_ERROR;
    }
@ -179,6 +403,7 @@ int vkd3d_shader_compile_dxbc(const struct vkd3d_shader_code *dxbc,
        {
            WARN("Encountered unrecognized or invalid instruction.\n");
            vkd3d_dxbc_compiler_destroy(spirv_compiler);
+            vkd3d_shader_scan_destroy(&scan_info);
            vkd3d_shader_parser_destroy(&parser);
            return VKD3D_ERROR_INVALID_ARGUMENT;
        }
@ -190,7 +415,11 @@ int vkd3d_shader_compile_dxbc(const struct vkd3d_shader_code *dxbc,
    if (ret >= 0)
        ret = vkd3d_dxbc_compiler_generate_spirv(spirv_compiler, spirv);

+    if (ret == 0)
+        vkd3d_shader_dump_spirv_shader(hash, spirv);
+
    vkd3d_dxbc_compiler_destroy(spirv_compiler);
+    vkd3d_shader_scan_destroy(&scan_info);
    vkd3d_shader_parser_destroy(&parser);
    return ret;
 }
@ -200,16 +429,41 @@ static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instr
    enum VKD3D_SHADER_INSTRUCTION_HANDLER handler_idx = instruction->handler_idx;
    return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR)
            || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR)
-            || handler_idx == VKD3DSIH_LD_UAV_TYPED
-            || (handler_idx == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV)
-            || (handler_idx == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV);
+            || handler_idx == VKD3DSIH_LD_UAV_TYPED || handler_idx == VKD3DSIH_LD_UAV_TYPED_FEEDBACK
+            || ((handler_idx == VKD3DSIH_LD_RAW || handler_idx == VKD3DSIH_LD_RAW_FEEDBACK) && instruction->src[1].reg.type == VKD3DSPR_UAV)
+            || ((handler_idx == VKD3DSIH_LD_STRUCTURED || handler_idx == VKD3DSIH_LD_STRUCTURED_FEEDBACK) && instruction->src[2].reg.type == VKD3DSPR_UAV);
+}
+
+static bool vkd3d_shader_instruction_is_uav_write(const struct vkd3d_shader_instruction *instruction)
+{
+    enum VKD3D_SHADER_INSTRUCTION_HANDLER handler_idx = instruction->handler_idx;
+    return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR)
+            || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR)
+            || handler_idx == VKD3DSIH_STORE_UAV_TYPED
+            || handler_idx == VKD3DSIH_STORE_RAW
+            || handler_idx == VKD3DSIH_STORE_STRUCTURED;
+}
+
+static bool vkd3d_shader_instruction_is_uav_atomic(const struct vkd3d_shader_instruction *instruction)
+{
+    enum VKD3D_SHADER_INSTRUCTION_HANDLER handler_idx = instruction->handler_idx;
+    return ((VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) ||
+            (VKD3DSIH_IMM_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR)) &&
+            handler_idx != VKD3DSIH_IMM_ATOMIC_CONSUME;
 }

 static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_info *scan_info,
        const struct vkd3d_shader_register *reg)
 {
-    assert(reg->idx[0].offset < VKD3D_SHADER_MAX_UNORDERED_ACCESS_VIEWS);
-    scan_info->uav_read_mask |= 1u << reg->idx[0].offset;
+    vkd3d_shader_scan_set_register_flags(scan_info, VKD3DSPR_UAV,
+            reg->idx[0].offset, VKD3D_SHADER_UAV_FLAG_READ_ACCESS);
+}
+
+static void vkd3d_shader_scan_record_uav_atomic(struct vkd3d_shader_scan_info *scan_info,
+        const struct vkd3d_shader_register *reg)
+{
+    vkd3d_shader_scan_set_register_flags(scan_info, VKD3DSPR_UAV,
+            reg->idx[0].offset, VKD3D_SHADER_UAV_FLAG_ATOMIC_ACCESS);
 }

 static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction)
@ -222,8 +476,10 @@ static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_in
 static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_info *scan_info,
        const struct vkd3d_shader_register *reg)
 {
-    assert(reg->idx[0].offset < VKD3D_SHADER_MAX_UNORDERED_ACCESS_VIEWS);
-    scan_info->uav_counter_mask |= 1u << reg->idx[0].offset;
+    scan_info->has_side_effects = true;
+    scan_info->has_uav_counter = true;
+    vkd3d_shader_scan_set_register_flags(scan_info, VKD3DSPR_UAV,
+            reg->idx[0].offset, VKD3D_SHADER_UAV_FLAG_ATOMIC_COUNTER);
 }

 static void vkd3d_shader_scan_input_declaration(struct vkd3d_shader_scan_info *scan_info,
@ -235,14 +491,21 @@ static void vkd3d_shader_scan_input_declaration(struct vkd3d_shader_scan_info *s
        scan_info->use_vocp = true;
 }

-static void vkd3d_shader_scan_sampler_declaration(struct vkd3d_shader_scan_info *scan_info,
+static void vkd3d_shader_scan_output_declaration(struct vkd3d_shader_scan_info *scan_info,
        const struct vkd3d_shader_instruction *instruction)
 {
-    unsigned int sampler_index = instruction->declaration.dst.reg.idx[0].offset;
-    if (instruction->flags & VKD3DSI_SAMPLER_COMPARISON_MODE)
+    switch (instruction->declaration.dst.reg.type)
    {
-        assert(sampler_index < CHAR_BIT * sizeof(scan_info->sampler_comparison_mode_mask));
-        scan_info->sampler_comparison_mode_mask |= 1u << sampler_index;
+        case VKD3DSPR_DEPTHOUT:
+        case VKD3DSPR_DEPTHOUTLE:
+        case VKD3DSPR_DEPTHOUTGE:
+        case VKD3DSPR_STENCILREFOUT:
+        case VKD3DSPR_SAMPLEMASK:
+            scan_info->needs_late_zs = true;
+            break;
+
+        default:
+            break;
    }
 }

@ -250,14 +513,25 @@ static void vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_info *scan_in
        const struct vkd3d_shader_instruction *instruction)
 {
    unsigned int i;
+    bool is_atomic;

    switch (instruction->handler_idx)
    {
        case VKD3DSIH_DCL_INPUT:
            vkd3d_shader_scan_input_declaration(scan_info, instruction);
            break;
-        case VKD3DSIH_DCL_SAMPLER:
-            vkd3d_shader_scan_sampler_declaration(scan_info, instruction);
+        case VKD3DSIH_DCL_OUTPUT:
+            vkd3d_shader_scan_output_declaration(scan_info, instruction);
+            break;
+        case VKD3DSIH_DISCARD:
+            scan_info->discards = true;
+            break;
+        case VKD3DSIH_DCL_GLOBAL_FLAGS:
+            if (instruction->flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL)
+                scan_info->early_fragment_tests = true;
+            break;
+        case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT:
+            scan_info->patch_vertex_count = instruction->declaration.count;
            break;
        default:
            break;
@ -265,18 +539,31 @@ static void vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_info *scan_in

    if (vkd3d_shader_instruction_is_uav_read(instruction))
    {
+        is_atomic = vkd3d_shader_instruction_is_uav_atomic(instruction);
+
        for (i = 0; i < instruction->dst_count; ++i)
        {
            if (instruction->dst[i].reg.type == VKD3DSPR_UAV)
+            {
                vkd3d_shader_scan_record_uav_read(scan_info, &instruction->dst[i].reg);
+                if (is_atomic)
+                    vkd3d_shader_scan_record_uav_atomic(scan_info, &instruction->dst[i].reg);
+            }
        }
        for (i = 0; i < instruction->src_count; ++i)
        {
            if (instruction->src[i].reg.type == VKD3DSPR_UAV)
+            {
                vkd3d_shader_scan_record_uav_read(scan_info, &instruction->src[i].reg);
+                if (is_atomic)
+                    vkd3d_shader_scan_record_uav_atomic(scan_info, &instruction->src[i].reg);
+            }
        }
    }

+    if (vkd3d_shader_instruction_is_uav_write(instruction))
+        scan_info->has_side_effects = true;
+
    if (vkd3d_shader_instruction_is_uav_counter(instruction))
        vkd3d_shader_scan_record_uav_counter(scan_info, &instruction->src[0].reg);
 }
@ -290,33 +577,33 @@ int vkd3d_shader_scan_dxbc(const struct vkd3d_shader_code *dxbc,

    TRACE("dxbc {%p, %zu}, scan_info %p.\n", dxbc->code, dxbc->size, scan_info);

-    if (scan_info->type != VKD3D_SHADER_STRUCTURE_TYPE_SCAN_INFO)
+    if (shader_is_dxil(dxbc->code, dxbc->size))
    {
-        WARN("Invalid structure type %#x.\n", scan_info->type);
-        return VKD3D_ERROR_INVALID_ARGUMENT;
+        /* There is nothing interesting to scan. DXIL does this internally. */
+        return VKD3D_OK;
    }
-
-    if ((ret = vkd3d_shader_parser_init(&parser, dxbc)) < 0)
-        return ret;
-
-    memset(scan_info, 0, sizeof(*scan_info));
-
-    while (!shader_sm4_is_end(parser.data, &parser.ptr))
+    else
    {
-        shader_sm4_read_instruction(parser.data, &parser.ptr, &instruction);
+        if ((ret = vkd3d_shader_parser_init(&parser, dxbc)) < 0)
+            return ret;

-        if (instruction.handler_idx == VKD3DSIH_INVALID)
+        while (!shader_sm4_is_end(parser.data, &parser.ptr))
        {
-            WARN("Encountered unrecognized or invalid instruction.\n");
-            vkd3d_shader_parser_destroy(&parser);
-            return VKD3D_ERROR_INVALID_ARGUMENT;
+            shader_sm4_read_instruction(parser.data, &parser.ptr, &instruction);
+
+            if (instruction.handler_idx == VKD3DSIH_INVALID)
+            {
+                WARN("Encountered unrecognized or invalid instruction.\n");
+                vkd3d_shader_parser_destroy(&parser);
+                return VKD3D_ERROR_INVALID_ARGUMENT;
+            }
+
+            vkd3d_shader_scan_instruction(scan_info, &instruction);
        }

-        vkd3d_shader_scan_instruction(scan_info, &instruction);
+        vkd3d_shader_parser_destroy(&parser);
+        return VKD3D_OK;
    }
-
-    vkd3d_shader_parser_destroy(&parser);
-    return VKD3D_OK;
 }

 void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *shader_code)
@ -336,7 +623,7 @@ static void vkd3d_shader_free_root_signature_v_1_0(struct vkd3d_root_signature_d
        const struct vkd3d_root_parameter *parameter = &root_signature->parameters[i];

        if (parameter->parameter_type == VKD3D_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE)
-            vkd3d_free((void *)parameter->u.descriptor_table.descriptor_ranges);
+            vkd3d_free((void *)parameter->descriptor_table.descriptor_ranges);
    }
    vkd3d_free((void *)root_signature->parameters);
    vkd3d_free((void *)root_signature->static_samplers);
@ -353,7 +640,7 @@ static void vkd3d_shader_free_root_signature_v_1_1(struct vkd3d_root_signature_d
        const struct vkd3d_root_parameter1 *parameter = &root_signature->parameters[i];

        if (parameter->parameter_type == VKD3D_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE)
-            vkd3d_free((void *)parameter->u.descriptor_table.descriptor_ranges);
+            vkd3d_free((void *)parameter->descriptor_table.descriptor_ranges);
    }
    vkd3d_free((void *)root_signature->parameters);
    vkd3d_free((void *)root_signature->static_samplers);
@ -365,11 +652,11 @@ void vkd3d_shader_free_root_signature(struct vkd3d_versioned_root_signature_desc
 {
    if (desc->version == VKD3D_ROOT_SIGNATURE_VERSION_1_0)
    {
-        vkd3d_shader_free_root_signature_v_1_0(&desc->u.v_1_0);
+        vkd3d_shader_free_root_signature_v_1_0(&desc->v_1_0);
    }
    else if (desc->version == VKD3D_ROOT_SIGNATURE_VERSION_1_1)
    {
-        vkd3d_shader_free_root_signature_v_1_1(&desc->u.v_1_1);
+        vkd3d_shader_free_root_signature_v_1_1(&desc->v_1_1);
    }
    else if (desc->version)
    {
@ -388,6 +675,14 @@ int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc,
    return shader_parse_input_signature(dxbc->code, dxbc->size, signature);
 }

+int vkd3d_shader_parse_output_signature(const struct vkd3d_shader_code *dxbc,
+        struct vkd3d_shader_signature *signature)
+{
+    TRACE("dxbc {%p, %zu}, signature %p.\n", dxbc->code, dxbc->size, signature);
+
+    return shader_parse_output_signature(dxbc->code, dxbc->size, signature);
+}
+
 struct vkd3d_shader_signature_element *vkd3d_shader_find_signature_element(
        const struct vkd3d_shader_signature *signature, const char *semantic_name,
        unsigned int semantic_index, unsigned int stream_index)
@ -417,3 +712,39 @@ void vkd3d_shader_free_shader_signature(struct vkd3d_shader_signature *signature
    vkd3d_free(signature->elements);
    signature->elements = NULL;
 }
+
+vkd3d_shader_hash_t vkd3d_shader_hash(const struct vkd3d_shader_code *shader)
+{
+    vkd3d_shader_hash_t h = hash_fnv1_init();
+    const uint8_t *code = shader->code;
+    size_t i, n;
+
+    for (i = 0, n = shader->size; i < n; i++)
+        h = hash_fnv1_iterate_u8(h, code[i]);
+
+    return h;
+}
+
+uint32_t vkd3d_shader_compile_arguments_select_quirks(
+        const struct vkd3d_shader_compile_arguments *compile_args, vkd3d_shader_hash_t shader_hash)
+{
+    unsigned int i;
+    if (compile_args && compile_args->quirks)
+    {
+        for (i = 0; i < compile_args->quirks->num_hashes; i++)
+            if (compile_args->quirks->hashes[i].shader_hash == shader_hash)
+                return compile_args->quirks->hashes[i].quirks | compile_args->quirks->global_quirks;
+        return compile_args->quirks->default_quirks | compile_args->quirks->global_quirks;
+    }
+    else
+        return 0;
+}
+
+uint64_t vkd3d_shader_get_revision(void)
+{
+    /* This is meant to be bumped every time a change is made to the shader compiler.
+     * Might get nuked later ...
+     * It's not immediately useful for invalidating pipeline caches, since that would mostly be covered
+     * by vkd3d-proton Git hash. */
+    return 1;
+}
--- a/libs/vkd3d-shader/vkd3d_shader_private.h
+++ b/libs/vkd3d-shader/vkd3d_shader_private.h
@ -45,7 +45,6 @@
 #ifndef __VKD3D_SHADER_PRIVATE_H
 #define __VKD3D_SHADER_PRIVATE_H

-#define NONAMELESSUNION
 #include "vkd3d_common.h"
 #include "vkd3d_memory.h"
 #include "vkd3d_shader.h"
@ -59,9 +58,12 @@

 #define VKD3D_VEC4_SIZE 4

+#define VKD3D_DVEC2_SIZE 2
+#define VKD3D_DOUBLE_DWORD_SIZE 2
+#define VKD3D_DVEC2_DWORD_SIZE (VKD3D_DOUBLE_DWORD_SIZE * VKD3D_DVEC2_SIZE)
+
 enum VKD3D_SHADER_INSTRUCTION_HANDLER
 {
-    VKD3DSIH_ABS,
    VKD3DSIH_ADD,
    VKD3DSIH_AND,
    VKD3DSIH_ATOMIC_AND,
@ -83,12 +85,10 @@ enum VKD3D_SHADER_INSTRUCTION_HANDLER
    VKD3DSIH_CALL,
    VKD3DSIH_CALLNZ,
    VKD3DSIH_CASE,
-    VKD3DSIH_CMP,
-    VKD3DSIH_CND,
+    VKD3DSIH_CHECK_ACCESS_FULLY_MAPPED,
    VKD3DSIH_CONTINUE,
    VKD3DSIH_CONTINUEP,
    VKD3DSIH_COUNTBITS,
-    VKD3DSIH_CRS,
    VKD3DSIH_CUT,
    VKD3DSIH_CUT_STREAM,
    VKD3DSIH_DCL,
@ -131,16 +131,11 @@ enum VKD3D_SHADER_INSTRUCTION_HANDLER
    VKD3DSIH_DCL_UAV_STRUCTURED,
    VKD3DSIH_DCL_UAV_TYPED,
    VKD3DSIH_DCL_VERTICES_OUT,
-    VKD3DSIH_DEF,
    VKD3DSIH_DEFAULT,
-    VKD3DSIH_DEFB,
-    VKD3DSIH_DEFI,
    VKD3DSIH_DIV,
    VKD3DSIH_DP2,
-    VKD3DSIH_DP2ADD,
    VKD3DSIH_DP3,
    VKD3DSIH_DP4,
-    VKD3DSIH_DST,
    VKD3DSIH_DSX,
    VKD3DSIH_DSX_COARSE,
    VKD3DSIH_DSX_FINE,
@ -149,16 +144,17 @@ enum VKD3D_SHADER_INSTRUCTION_HANDLER
    VKD3DSIH_DSY_FINE,
    VKD3DSIH_ELSE,
    VKD3DSIH_EMIT,
+    VKD3DSIH_EMIT_THEN_CUT,
    VKD3DSIH_EMIT_STREAM,
+    VKD3DSIH_EMIT_THEN_CUT_STREAM,
    VKD3DSIH_ENDIF,
    VKD3DSIH_ENDLOOP,
-    VKD3DSIH_ENDREP,
    VKD3DSIH_ENDSWITCH,
    VKD3DSIH_EQ,
    VKD3DSIH_EVAL_CENTROID,
    VKD3DSIH_EVAL_SAMPLE_INDEX,
+    VKD3DSIH_EVAL_SNAPPED,
    VKD3DSIH_EXP,
-    VKD3DSIH_EXPP,
    VKD3DSIH_F16TOF32,
    VKD3DSIH_F32TOF16,
    VKD3DSIH_FCALL,
@ -170,8 +166,12 @@ enum VKD3D_SHADER_INSTRUCTION_HANDLER
    VKD3DSIH_FTOU,
    VKD3DSIH_GATHER4,
    VKD3DSIH_GATHER4_C,
+    VKD3DSIH_GATHER4_C_FEEDBACK,
+    VKD3DSIH_GATHER4_FEEDBACK,
    VKD3DSIH_GATHER4_PO,
    VKD3DSIH_GATHER4_PO_C,
+    VKD3DSIH_GATHER4_PO_C_FEEDBACK,
+    VKD3DSIH_GATHER4_PO_FEEDBACK,
    VKD3DSIH_GE,
    VKD3DSIH_HS_CONTROL_POINT_PHASE,
    VKD3DSIH_HS_DECLS,
@ -207,38 +207,32 @@ enum VKD3D_SHADER_INSTRUCTION_HANDLER
    VKD3DSIH_ITOF,
    VKD3DSIH_LABEL,
    VKD3DSIH_LD,
+    VKD3DSIH_LD_FEEDBACK,
    VKD3DSIH_LD2DMS,
+    VKD3DSIH_LD2DMS_FEEDBACK,
    VKD3DSIH_LD_RAW,
+    VKD3DSIH_LD_RAW_FEEDBACK,
    VKD3DSIH_LD_STRUCTURED,
+    VKD3DSIH_LD_STRUCTURED_FEEDBACK,
    VKD3DSIH_LD_UAV_TYPED,
-    VKD3DSIH_LIT,
+    VKD3DSIH_LD_UAV_TYPED_FEEDBACK,
    VKD3DSIH_LOD,
    VKD3DSIH_LOG,
-    VKD3DSIH_LOGP,
    VKD3DSIH_LOOP,
    VKD3DSIH_LRP,
    VKD3DSIH_LT,
-    VKD3DSIH_M3x2,
-    VKD3DSIH_M3x3,
-    VKD3DSIH_M3x4,
-    VKD3DSIH_M4x3,
-    VKD3DSIH_M4x4,
    VKD3DSIH_MAD,
    VKD3DSIH_MAX,
    VKD3DSIH_MIN,
    VKD3DSIH_MOV,
-    VKD3DSIH_MOVA,
    VKD3DSIH_MOVC,
    VKD3DSIH_MUL,
    VKD3DSIH_NE,
    VKD3DSIH_NOP,
    VKD3DSIH_NOT,
-    VKD3DSIH_NRM,
    VKD3DSIH_OR,
-    VKD3DSIH_PHASE,
    VKD3DSIH_POW,
    VKD3DSIH_RCP,
-    VKD3DSIH_REP,
    VKD3DSIH_RESINFO,
    VKD3DSIH_RET,
    VKD3DSIH_RETP,
@ -249,13 +243,18 @@ enum VKD3D_SHADER_INSTRUCTION_HANDLER
    VKD3DSIH_RSQ,
    VKD3DSIH_SAMPLE,
    VKD3DSIH_SAMPLE_B,
+    VKD3DSIH_SAMPLE_B_FEEDBACK,
    VKD3DSIH_SAMPLE_C,
+    VKD3DSIH_SAMPLE_C_FEEDBACK,
    VKD3DSIH_SAMPLE_C_LZ,
+    VKD3DSIH_SAMPLE_C_LZ_FEEDBACK,
+    VKD3DSIH_SAMPLE_FEEDBACK,
    VKD3DSIH_SAMPLE_GRAD,
+    VKD3DSIH_SAMPLE_GRAD_FEEDBACK,
    VKD3DSIH_SAMPLE_INFO,
    VKD3DSIH_SAMPLE_LOD,
+    VKD3DSIH_SAMPLE_LOD_FEEDBACK,
    VKD3DSIH_SAMPLE_POS,
-    VKD3DSIH_SETP,
    VKD3DSIH_SGE,
    VKD3DSIH_SGN,
    VKD3DSIH_SINCOS,
@ -268,28 +267,7 @@ enum VKD3D_SHADER_INSTRUCTION_HANDLER
    VKD3DSIH_SWAPC,
    VKD3DSIH_SWITCH,
    VKD3DSIH_SYNC,
-    VKD3DSIH_TEX,
-    VKD3DSIH_TEXBEM,
-    VKD3DSIH_TEXBEML,
-    VKD3DSIH_TEXCOORD,
-    VKD3DSIH_TEXDEPTH,
-    VKD3DSIH_TEXDP3,
-    VKD3DSIH_TEXDP3TEX,
-    VKD3DSIH_TEXKILL,
-    VKD3DSIH_TEXLDD,
-    VKD3DSIH_TEXLDL,
-    VKD3DSIH_TEXM3x2DEPTH,
-    VKD3DSIH_TEXM3x2PAD,
-    VKD3DSIH_TEXM3x2TEX,
-    VKD3DSIH_TEXM3x3,
-    VKD3DSIH_TEXM3x3DIFF,
-    VKD3DSIH_TEXM3x3PAD,
-    VKD3DSIH_TEXM3x3SPEC,
-    VKD3DSIH_TEXM3x3TEX,
-    VKD3DSIH_TEXM3x3VSPEC,
-    VKD3DSIH_TEXREG2AR,
-    VKD3DSIH_TEXREG2GB,
-    VKD3DSIH_TEXREG2RGB,
+    VKD3DSIH_DISCARD,
    VKD3DSIH_UBFE,
    VKD3DSIH_UDIV,
    VKD3DSIH_UGE,
@ -297,38 +275,44 @@ enum VKD3D_SHADER_INSTRUCTION_HANDLER
    VKD3DSIH_UMAX,
    VKD3DSIH_UMIN,
    VKD3DSIH_UMUL,
+    VKD3DSIH_UMAD,
    VKD3DSIH_USHR,
    VKD3DSIH_UTOF,
    VKD3DSIH_XOR,

+    VKD3DSIH_DADD,
+    VKD3DSIH_DMAX,
+    VKD3DSIH_DMIN,
+    VKD3DSIH_DMUL,
+    VKD3DSIH_DEQ,
+    VKD3DSIH_DGE,
+    VKD3DSIH_DLT,
+    VKD3DSIH_DNE,
+    VKD3DSIH_DMOV,
+    VKD3DSIH_DMOVC,
+    VKD3DSIH_DTOF,
+    VKD3DSIH_FTOD,
+    VKD3DSIH_DDIV,
+    VKD3DSIH_DFMA,
+    VKD3DSIH_DRCP,
+    VKD3DSIH_DTOI,
+    VKD3DSIH_DTOU,
+    VKD3DSIH_ITOD,
+    VKD3DSIH_UTOD,
+
    VKD3DSIH_INVALID,
 };

 enum vkd3d_shader_register_type
 {
-    VKD3DSPR_TEMP = 0,
-    VKD3DSPR_INPUT = 1,
-    VKD3DSPR_CONST = 2,
-    VKD3DSPR_ADDR = 3,
-    VKD3DSPR_TEXTURE = 3,
-    VKD3DSPR_RASTOUT = 4,
-    VKD3DSPR_ATTROUT = 5,
-    VKD3DSPR_TEXCRDOUT = 6,
-    VKD3DSPR_OUTPUT = 6,
-    VKD3DSPR_CONSTINT = 7,
-    VKD3DSPR_COLOROUT = 8,
-    VKD3DSPR_DEPTHOUT = 9,
-    VKD3DSPR_SAMPLER = 10,
-    VKD3DSPR_CONST2 = 11,
-    VKD3DSPR_CONST3 = 12,
-    VKD3DSPR_CONST4 = 13,
-    VKD3DSPR_CONSTBOOL = 14,
-    VKD3DSPR_LOOP = 15,
-    VKD3DSPR_TEMPFLOAT16 = 16,
-    VKD3DSPR_MISCTYPE = 17,
-    VKD3DSPR_LABEL = 18,
-    VKD3DSPR_PREDICATE = 19,
+    VKD3DSPR_TEMP,
+    VKD3DSPR_INPUT,
+    VKD3DSPR_OUTPUT,
+    VKD3DSPR_COLOROUT,
+    VKD3DSPR_DEPTHOUT,
+    VKD3DSPR_SAMPLER,
    VKD3DSPR_IMMCONST,
+    VKD3DSPR_IMMCONST64,
    VKD3DSPR_CONSTBUFFER,
    VKD3DSPR_IMMCONSTBUFFER,
    VKD3DSPR_PRIMID,
@ -357,6 +341,8 @@ enum vkd3d_shader_register_type
    VKD3DSPR_DEPTHOUTGE,
    VKD3DSPR_DEPTHOUTLE,
    VKD3DSPR_RASTERIZER,
+    VKD3DSPR_STENCILREFOUT,
+    VKD3DSPR_INNERCOVERAGE,

    VKD3DSPR_INVALID = ~0u,
 };
@ -387,30 +373,28 @@ enum vkd3d_data_type
    VKD3D_DATA_UNORM,
    VKD3D_DATA_SNORM,
    VKD3D_DATA_OPAQUE,
+    VKD3D_DATA_DOUBLE,
 };

 enum vkd3d_immconst_type
 {
    VKD3D_IMMCONST_SCALAR,
    VKD3D_IMMCONST_VEC4,
+    VKD3D_IMMCONST_DVEC2 = VKD3D_IMMCONST_VEC4,
+};
+
+enum vkd3d_shader_register_modifier
+{
+    VKD3DSPRM_NONE = 0,
+    VKD3DSPRM_NONUNIFORM = 1,
 };

 enum vkd3d_shader_src_modifier
 {
    VKD3DSPSM_NONE = 0,
    VKD3DSPSM_NEG = 1,
-    VKD3DSPSM_BIAS = 2,
-    VKD3DSPSM_BIASNEG = 3,
-    VKD3DSPSM_SIGN = 4,
-    VKD3DSPSM_SIGNNEG = 5,
-    VKD3DSPSM_COMP = 6,
-    VKD3DSPSM_X2 = 7,
-    VKD3DSPSM_X2NEG = 8,
-    VKD3DSPSM_DZ = 9,
-    VKD3DSPSM_DW = 10,
-    VKD3DSPSM_ABS = 11,
-    VKD3DSPSM_ABSNEG = 12,
-    VKD3DSPSM_NOT = 13,
+    VKD3DSPSM_ABS = 2,
+    VKD3DSPSM_ABSNEG = 3
 };

 #define VKD3DSP_WRITEMASK_0   0x1u /* .x r */
@ -422,9 +406,7 @@ enum vkd3d_shader_src_modifier
 enum vkd3d_shader_dst_modifier
 {
    VKD3DSPDM_NONE = 0,
-    VKD3DSPDM_SATURATE = 1,
-    VKD3DSPDM_PARTIALPRECISION = 2,
-    VKD3DSPDM_MSAMPCENTROID = 4,
+    VKD3DSPDM_SATURATE = 1
 };

 enum vkd3d_shader_interpolation_mode
@ -442,9 +424,11 @@ enum vkd3d_shader_interpolation_mode
 enum vkd3d_shader_global_flags
 {
    VKD3DSGF_REFACTORING_ALLOWED               = 0x01,
+    VKD3DSGF_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = 0x02,
    VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL         = 0x04,
    VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS = 0x08,
-    VKD3DSGF_ENABLE_MINIMUM_PRECISION          = 0x20
+    VKD3DSGF_ENABLE_MINIMUM_PRECISION          = 0x20,
+    VKD3DSGF_ENABLE_11_1_DOUBLE_EXTENSIONS     = 0x40,
 };

 enum vkd3d_shader_sync_flags
@ -467,7 +451,6 @@ enum vkd3d_tessellator_domain
 };

 #define VKD3DSI_NONE                    0x0
-#define VKD3DSI_TEXLD_PROJECT           0x1
 #define VKD3DSI_INDEXED_DYNAMIC         0x4
 #define VKD3DSI_RESINFO_RCP_FLOAT       0x1
 #define VKD3DSI_RESINFO_UINT            0x2
@ -498,8 +481,6 @@ enum vkd3d_shader_conditional_op
    VKD3D_SHADER_CONDITIONAL_OP_Z  = 1
 };

-#define VKD3D_SM1_VS  0xfffeu
-#define VKD3D_SM1_PS  0xffffu
 #define VKD3D_SM4_PS  0x0000u
 #define VKD3D_SM4_VS  0x0001u
 #define VKD3D_SM4_GS  0x0002u
@ -507,10 +488,6 @@ enum vkd3d_shader_conditional_op
 #define VKD3D_SM5_DS  0x0004u
 #define VKD3D_SM5_CS  0x0005u

-/* Shader version tokens, and shader end tokens */
-#define VKD3DPS_VERSION(major, minor) ((VKD3D_SM1_PS << 16) | ((major) << 8) | (minor))
-#define VKD3DVS_VERSION(major, minor) ((VKD3D_SM1_VS << 16) | ((major) << 8) | (minor))
-
 #define MAX_IMMEDIATE_CONSTANT_BUFFER_SIZE 4096
 #define MAX_REG_OUTPUT 32

@ -537,7 +514,7 @@ struct vkd3d_shader_version
 struct vkd3d_shader_immediate_constant_buffer
 {
    unsigned int vec4_count;
-    DWORD data[MAX_IMMEDIATE_CONSTANT_BUFFER_SIZE];
+    uint32_t data[MAX_IMMEDIATE_CONSTANT_BUFFER_SIZE];
 };

 struct vkd3d_shader_indexable_temp
@ -557,15 +534,18 @@ struct vkd3d_shader_register_index
 struct vkd3d_shader_register
 {
    enum vkd3d_shader_register_type type;
+    enum vkd3d_shader_register_modifier modifier;
    enum vkd3d_data_type data_type;
    struct vkd3d_shader_register_index idx[3];
    enum vkd3d_immconst_type immconst_type;
    union
    {
-        DWORD immconst_uint[VKD3D_VEC4_SIZE];
+        uint32_t immconst_uint[VKD3D_VEC4_SIZE];
        float immconst_float[VKD3D_VEC4_SIZE];
+        double immconst_double[VKD3D_DVEC2_SIZE];
+        uint64_t immconst_uint64[VKD3D_DVEC2_SIZE];
        unsigned fp_body_idx;
-    } u;
+    };
 };

 struct vkd3d_shader_dst_param
@ -573,7 +553,6 @@ struct vkd3d_shader_dst_param
    struct vkd3d_shader_register reg;
    DWORD write_mask;
    DWORD modifiers;
-    DWORD shift;
 };

 struct vkd3d_shader_src_param
@ -589,32 +568,13 @@ struct vkd3d_shader_index_range
    unsigned int register_count;
 };

-enum vkd3d_decl_usage
-{
-    VKD3D_DECL_USAGE_POSITION             = 0,
-    VKD3D_DECL_USAGE_BLEND_WEIGHT         = 1,
-    VKD3D_DECL_USAGE_BLEND_INDICES        = 2,
-    VKD3D_DECL_USAGE_NORMAL               = 3,
-    VKD3D_DECL_USAGE_PSIZE                = 4,
-    VKD3D_DECL_USAGE_TEXCOORD             = 5,
-    VKD3D_DECL_USAGE_TANGENT              = 6,
-    VKD3D_DECL_USAGE_BINORMAL             = 7,
-    VKD3D_DECL_USAGE_TESS_FACTOR          = 8,
-    VKD3D_DECL_USAGE_POSITIONT            = 9,
-    VKD3D_DECL_USAGE_COLOR                = 10,
-    VKD3D_DECL_USAGE_FOG                  = 11,
-    VKD3D_DECL_USAGE_DEPTH                = 12,
-    VKD3D_DECL_USAGE_SAMPLE               = 13
-};
-
 struct vkd3d_shader_semantic
 {
-    enum vkd3d_decl_usage usage;
-    unsigned int usage_idx;
    enum vkd3d_shader_resource_type resource_type;
    enum vkd3d_data_type resource_data_type;
    struct vkd3d_shader_dst_param reg;
    unsigned int register_space;
+    unsigned int register_index;
 };

 enum vkd3d_shader_input_sysval_semantic
@ -662,6 +622,7 @@ struct vkd3d_shader_register_semantic
 struct vkd3d_shader_sampler
 {
    struct vkd3d_shader_src_param src;
+    unsigned int register_index;
    unsigned int register_space;
 };

@ -669,6 +630,7 @@ struct vkd3d_shader_constant_buffer
 {
    struct vkd3d_shader_src_param src;
    unsigned int size;
+    unsigned int register_index;
    unsigned int register_space;
 };

@ -676,12 +638,14 @@ struct vkd3d_shader_structured_resource
 {
    struct vkd3d_shader_dst_param reg;
    unsigned int byte_stride;
+    unsigned int register_index;
    unsigned int register_space;
 };

 struct vkd3d_shader_raw_resource
 {
    struct vkd3d_shader_dst_param dst;
+    unsigned int register_index;
    unsigned int register_space;
 };

@ -797,25 +761,28 @@ static inline bool vkd3d_shader_register_is_output(const struct vkd3d_shader_reg
    return reg->type == VKD3DSPR_OUTPUT || reg->type == VKD3DSPR_COLOROUT;
 }

-void vkd3d_shader_trace(void *data) DECLSPEC_HIDDEN;
+void vkd3d_shader_trace(void *data);

-const char *shader_get_type_prefix(enum vkd3d_shader_type type) DECLSPEC_HIDDEN;
+const char *shader_get_type_prefix(enum vkd3d_shader_type type);

 void *shader_sm4_init(const DWORD *byte_code, size_t byte_code_size,
-        const struct vkd3d_shader_signature *output_signature) DECLSPEC_HIDDEN;
-void shader_sm4_free(void *data) DECLSPEC_HIDDEN;
+        const struct vkd3d_shader_signature *output_signature);
+void shader_sm4_free(void *data);
 void shader_sm4_read_header(void *data, const DWORD **ptr,
-        struct vkd3d_shader_version *shader_version) DECLSPEC_HIDDEN;
+        struct vkd3d_shader_version *shader_version);
 void shader_sm4_read_instruction(void *data, const DWORD **ptr,
-        struct vkd3d_shader_instruction *ins) DECLSPEC_HIDDEN;
-bool shader_sm4_is_end(void *data, const DWORD **ptr) DECLSPEC_HIDDEN;
+        struct vkd3d_shader_instruction *ins);
+bool shader_sm4_is_end(void *data, const DWORD **ptr);

 int shader_extract_from_dxbc(const void *dxbc, size_t dxbc_length,
-        struct vkd3d_shader_desc *desc) DECLSPEC_HIDDEN;
-void free_shader_desc(struct vkd3d_shader_desc *desc) DECLSPEC_HIDDEN;
+        struct vkd3d_shader_desc *desc);
+bool shader_is_dxil(const void *dxbc, size_t dxbc_length);
+void free_shader_desc(struct vkd3d_shader_desc *desc);

 int shader_parse_input_signature(const void *dxbc, size_t dxbc_length,
-        struct vkd3d_shader_signature *signature) DECLSPEC_HIDDEN;
+        struct vkd3d_shader_signature *signature);
+int shader_parse_output_signature(const void *dxbc, size_t dxbc_length,
+        struct vkd3d_shader_signature *signature);

 struct vkd3d_dxbc_compiler;

@ -823,14 +790,22 @@ struct vkd3d_dxbc_compiler *vkd3d_dxbc_compiler_create(const struct vkd3d_shader
        const struct vkd3d_shader_desc *shader_desc, uint32_t compiler_options,
        const struct vkd3d_shader_interface_info *shader_interface_info,
        const struct vkd3d_shader_compile_arguments *compile_args,
-        const struct vkd3d_shader_scan_info *scan_info) DECLSPEC_HIDDEN;
+        const struct vkd3d_shader_scan_info *scan_info,
+        vkd3d_shader_hash_t shader_hash);
 int vkd3d_dxbc_compiler_handle_instruction(struct vkd3d_dxbc_compiler *compiler,
-        const struct vkd3d_shader_instruction *instruction) DECLSPEC_HIDDEN;
+        const struct vkd3d_shader_instruction *instruction);
 int vkd3d_dxbc_compiler_generate_spirv(struct vkd3d_dxbc_compiler *compiler,
-        struct vkd3d_shader_code *spirv) DECLSPEC_HIDDEN;
-void vkd3d_dxbc_compiler_destroy(struct vkd3d_dxbc_compiler *compiler) DECLSPEC_HIDDEN;
+        struct vkd3d_shader_code *spirv);
+void vkd3d_dxbc_compiler_destroy(struct vkd3d_dxbc_compiler *compiler);

-void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]) DECLSPEC_HIDDEN;
+void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]);
+
+void vkd3d_shader_dump_spirv_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shader_code *shader);
+void vkd3d_shader_dump_spirv_shader_export(vkd3d_shader_hash_t hash, const struct vkd3d_shader_code *shader,
+        const char *export);
+void vkd3d_shader_dump_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shader_code *shader, const char *ext);
+bool vkd3d_shader_replace(vkd3d_shader_hash_t hash, const void **data, size_t *size);
+bool vkd3d_shader_replace_export(vkd3d_shader_hash_t hash, const void **data, size_t *size, const char *export);

 static inline enum vkd3d_component_type vkd3d_component_type_from_data_type(
        enum vkd3d_data_type data_type)
@ -845,6 +820,8 @@ static inline enum vkd3d_component_type vkd3d_component_type_from_data_type(
            return VKD3D_TYPE_UINT;
        case VKD3D_DATA_INT:
            return VKD3D_TYPE_INT;
+        case VKD3D_DATA_DOUBLE:
+            return VKD3D_TYPE_DOUBLE;
        default:
            FIXME("Unhandled data type %#x.\n", data_type);
            return VKD3D_TYPE_UINT;
@ -862,6 +839,8 @@ static inline enum vkd3d_data_type vkd3d_data_type_from_component_type(
            return VKD3D_DATA_UINT;
        case VKD3D_TYPE_INT:
            return VKD3D_DATA_INT;
+        case VKD3D_TYPE_DOUBLE:
+            return VKD3D_DATA_DOUBLE;
        default:
            FIXME("Unhandled component type %#x.\n", component_type);
            return VKD3D_DATA_FLOAT;
@ -890,6 +869,16 @@ static inline unsigned int vkd3d_write_mask_component_count(DWORD write_mask)
    return count;
 }

+static inline unsigned int vkd3d_write_mask_component_count_typed(DWORD write_mask,
+        enum vkd3d_component_type type)
+{
+    unsigned int component_count = vkd3d_write_mask_component_count(write_mask);
+    if (type == VKD3D_TYPE_DOUBLE)
+        component_count /= 2;
+    assert(component_count != 0);
+    return component_count;
+}
+
 static inline unsigned int vkd3d_write_mask_from_component_count(unsigned int component_count)
 {
    assert(component_count <= VKD3D_VEC4_SIZE);
@ -921,4 +910,13 @@ static inline unsigned int vkd3d_compact_swizzle(unsigned int swizzle, unsigned
 #define VKD3D_DXBC_MAX_SOURCE_COUNT 6
 #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t))

+unsigned int vkd3d_shader_scan_get_register_flags(const struct vkd3d_shader_scan_info *scan_info,
+        enum vkd3d_shader_register_type type, unsigned int id);
+
+/* DXIL support */
+int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
+        struct vkd3d_shader_code *spirv,
+        const struct vkd3d_shader_interface_info *shader_interface_info,
+        const struct vkd3d_shader_compile_arguments *compiler_args);
+
 #endif  /* __VKD3D_SHADER_PRIVATE_H */
--- a/libs/vkd3d-utils/libvkd3d-utils.pc.in
+++ b/libs/vkd3d-utils/libvkd3d-utils.pc.in
@ -1,10 +0,0 @@
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-
-Name: vkd3d-utils
-Description: The vkd3d 3D Graphics Utility Library
-Version: @PACKAGE_VERSION@
-Cflags: -I${includedir}/vkd3d
-Libs: -L${libdir} -lvkd3d-utils
--- a/libs/vkd3d-utils/meson.build
+++ b/libs/vkd3d-utils/meson.build
@ -0,0 +1,19 @@
+vkd3d_utils_src = [
+  'vkd3d_utils_main.c',
+]
+
+vkd3d_utils_lib = shared_library('vkd3d-proton-utils', vkd3d_utils_src,
+  dependencies        : vkd3d_dep,
+  include_directories : vkd3d_private_includes,
+  install             : true,
+  objects             : not vkd3d_is_msvc and vkd3d_platform == 'windows'
+                        ? 'vkd3d-proton-utils.def'
+                        : [],
+  vs_module_defs      : 'vkd3d-proton-utils.def',
+  version             : '3.0.0',
+  c_args              : '-DVKD3D_UTILS_EXPORTS',
+  override_options    : [ 'c_std='+vkd3d_c_std ])
+
+vkd3d_utils_dep = declare_dependency(
+  link_with           : vkd3d_utils_lib,
+  include_directories : vkd3d_public_includes)
--- a/libs/vkd3d-utils/vkd3d-proton-utils.def
+++ b/libs/vkd3d-utils/vkd3d-proton-utils.def
@ -0,0 +1,16 @@
+LIBRARY vkd3d-proton-utils-3.dll
+
+EXPORTS
+    D3D12CreateDevice @101
+    D3D12GetDebugInterface @102
+    D3D12CreateRootSignatureDeserializer
+    D3D12CreateVersionedRootSignatureDeserializer
+
+    D3D12EnableExperimentalFeatures
+    D3D12SerializeRootSignature
+    D3D12SerializeVersionedRootSignature
+
+    vkd3d_create_event
+    vkd3d_wait_event
+    vkd3d_signal_event
+    vkd3d_destroy_event
--- a/libs/vkd3d-utils/vkd3d_utils.map
+++ b/libs/vkd3d-utils/vkd3d_utils.map
@ -1,16 +0,0 @@
-VKD3D_1_0
-{
-global:
-    D3D12CreateDevice;
-    D3D12CreateRootSignatureDeserializer;
-    D3D12CreateVersionedRootSignatureDeserializer;
-    D3D12GetDebugInterface;
-    D3D12SerializeRootSignature;
-    D3D12SerializeVersionedRootSignature;
-    vkd3d_create_event;
-    vkd3d_destroy_event;
-    vkd3d_signal_event;
-    vkd3d_wait_event;
-
-local: *;
-};
--- a/libs/vkd3d-utils/vkd3d_utils_main.c
+++ b/libs/vkd3d-utils/vkd3d_utils_main.c
@ -16,21 +16,21 @@
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 */

+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
+#include "vkd3d_common.h"
 #include "vkd3d_utils_private.h"

-VKD3D_DEBUG_ENV_NAME("VKD3D_DEBUG");
-
-HRESULT WINAPI D3D12GetDebugInterface(REFIID iid, void **debug)
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12GetDebugInterface(REFIID iid, void **debug)
 {
    FIXME("iid %s, debug %p stub!\n", debugstr_guid(iid), debug);

    return E_NOTIMPL;
 }

-HRESULT WINAPI D3D12CreateDevice(IUnknown *adapter,
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12CreateDevice(IUnknown *adapter,
        D3D_FEATURE_LEVEL minimum_feature_level, REFIID iid, void **device)
 {
-    struct vkd3d_optional_instance_extensions_info optional_extensions_info;
    struct vkd3d_instance_create_info instance_create_info;
    struct vkd3d_device_create_info device_create_info;

@ -54,22 +54,14 @@ HRESULT WINAPI D3D12CreateDevice(IUnknown *adapter,
    if (adapter)
        FIXME("Ignoring adapter %p.\n", adapter);

-    memset(&optional_extensions_info, 0, sizeof(optional_extensions_info));
-    optional_extensions_info.type = VKD3D_STRUCTURE_TYPE_OPTIONAL_INSTANCE_EXTENSIONS_INFO;
-    optional_extensions_info.extensions = optional_instance_extensions;
-    optional_extensions_info.extension_count = ARRAY_SIZE(optional_instance_extensions);
-
    memset(&instance_create_info, 0, sizeof(instance_create_info));
-    instance_create_info.type = VKD3D_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
-    instance_create_info.next = &optional_extensions_info;
    instance_create_info.pfn_signal_event = vkd3d_signal_event;
-    instance_create_info.wchar_size = sizeof(WCHAR);
    instance_create_info.instance_extensions = instance_extensions;
    instance_create_info.instance_extension_count = ARRAY_SIZE(instance_extensions);
+    instance_create_info.optional_instance_extensions = optional_instance_extensions;
+    instance_create_info.optional_instance_extension_count = ARRAY_SIZE(optional_instance_extensions);

    memset(&device_create_info, 0, sizeof(device_create_info));
-    device_create_info.type = VKD3D_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
-    device_create_info.next = NULL;
    device_create_info.minimum_feature_level = minimum_feature_level;
    device_create_info.instance_create_info = &instance_create_info;
    device_create_info.device_extensions = device_extensions;
@ -78,7 +70,7 @@ HRESULT WINAPI D3D12CreateDevice(IUnknown *adapter,
    return vkd3d_create_device(&device_create_info, iid, device);
 }

-HRESULT WINAPI D3D12CreateRootSignatureDeserializer(const void *data, SIZE_T data_size,
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12CreateRootSignatureDeserializer(const void *data, SIZE_T data_size,
        REFIID iid, void **deserializer)
 {
    TRACE("data %p, data_size %lu, iid %s, deserializer %p.\n",
@ -87,7 +79,7 @@ HRESULT WINAPI D3D12CreateRootSignatureDeserializer(const void *data, SIZE_T dat
    return vkd3d_create_root_signature_deserializer(data, data_size, iid, deserializer);
 }

-HRESULT WINAPI D3D12CreateVersionedRootSignatureDeserializer(const void *data, SIZE_T data_size,
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12CreateVersionedRootSignatureDeserializer(const void *data, SIZE_T data_size,
        REFIID iid,void **deserializer)
 {
    TRACE("data %p, data_size %lu, iid %s, deserializer %p.\n",
@ -96,7 +88,16 @@ HRESULT WINAPI D3D12CreateVersionedRootSignatureDeserializer(const void *data, S
    return vkd3d_create_versioned_root_signature_deserializer(data, data_size, iid, deserializer);
 }

-HRESULT WINAPI D3D12SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC *desc,
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12EnableExperimentalFeatures(UINT feature_count,
+        const IID *iids, void *configurations, UINT *configurations_sizes)
+{
+    FIXME("feature_count %u, iids %p, configurations %p, configurations_sizes %p stub!\n",
+            feature_count, iids, configurations, configurations_sizes);
+
+    return E_NOINTERFACE;
+}
+
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC *desc,
        D3D_ROOT_SIGNATURE_VERSION version, ID3DBlob **blob, ID3DBlob **error_blob)
 {
    TRACE("desc %p, version %#x, blob %p, error_blob %p.\n", desc, version, blob, error_blob);
@ -104,7 +105,7 @@ HRESULT WINAPI D3D12SerializeRootSignature(const D3D12_ROOT_SIGNATURE_DESC *desc
    return vkd3d_serialize_root_signature(desc, version, blob, error_blob);
 }

-HRESULT WINAPI D3D12SerializeVersionedRootSignature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc,
+VKD3D_UTILS_EXPORT HRESULT WINAPI D3D12SerializeVersionedRootSignature(const D3D12_VERSIONED_ROOT_SIGNATURE_DESC *desc,
        ID3DBlob **blob, ID3DBlob **error_blob)
 {
    TRACE("desc %p, blob %p, error_blob %p.\n", desc, blob, error_blob);
@ -113,7 +114,7 @@ HRESULT WINAPI D3D12SerializeVersionedRootSignature(const D3D12_VERSIONED_ROOT_S
 }

 /* Events */
-HANDLE vkd3d_create_event(void)
+VKD3D_UTILS_EXPORT HANDLE vkd3d_create_event(void)
 {
    struct vkd3d_event *event;
    int rc;
@ -144,7 +145,7 @@ HANDLE vkd3d_create_event(void)
    return event;
 }

-unsigned int vkd3d_wait_event(HANDLE event, unsigned int milliseconds)
+VKD3D_UTILS_EXPORT unsigned int vkd3d_wait_event(HANDLE event, unsigned int milliseconds)
 {
    struct vkd3d_event *impl = event;
    int rc;
@ -187,7 +188,7 @@ unsigned int vkd3d_wait_event(HANDLE event, unsigned int milliseconds)
    return VKD3D_WAIT_FAILED;
 }

-HRESULT vkd3d_signal_event(HANDLE event)
+VKD3D_UTILS_EXPORT HRESULT vkd3d_signal_event(HANDLE event)
 {
    struct vkd3d_event *impl = event;
    int rc;
@ -206,7 +207,7 @@ HRESULT vkd3d_signal_event(HANDLE event)
    return S_OK;
 }

-void vkd3d_destroy_event(HANDLE event)
+VKD3D_UTILS_EXPORT void vkd3d_destroy_event(HANDLE event)
 {
    struct vkd3d_event *impl = event;
    int rc;
--- a/libs/vkd3d-utils/vkd3d_utils_private.h
+++ b/libs/vkd3d-utils/vkd3d_utils_private.h
@ -20,10 +20,9 @@
 #define __VKD3D_UTILS_PRIVATE_H

 #define COBJMACROS
-#define NONAMELESSUNION
 #define VK_NO_PROTOTYPES

-#include <pthread.h>
+#include "vkd3d_threads.h"
 #include <vkd3d.h>

 #include "vkd3d_memory.h"
--- a/libs/vkd3d/acceleration_structure.c
+++ b/libs/vkd3d/acceleration_structure.c
@ -0,0 +1,498 @@
+/*
+ * Copyright 2021 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+#include "vkd3d_private.h"
+
+#define RT_TRACE TRACE
+
+void vkd3d_acceleration_structure_build_info_cleanup(
+        struct vkd3d_acceleration_structure_build_info *info)
+{
+    if (info->primitive_counts != info->primitive_counts_stack)
+        vkd3d_free(info->primitive_counts);
+    if (info->geometries != info->geometries_stack)
+        vkd3d_free(info->geometries);
+    if (info->build_range_ptrs != info->build_range_ptr_stack)
+        vkd3d_free((void *)info->build_range_ptrs);
+    if (info->build_ranges != info->build_range_stack)
+        vkd3d_free(info->build_ranges);
+}
+
+static VkBuildAccelerationStructureFlagsKHR d3d12_build_flags_to_vk(
+        D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAGS flags)
+{
+    VkBuildAccelerationStructureFlagsKHR vk_flags = 0;
+
+    if (flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_COMPACTION)
+        vk_flags |= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_COMPACTION_BIT_KHR;
+    if (flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_ALLOW_UPDATE)
+        vk_flags |= VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_KHR;
+    if (flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_MINIMIZE_MEMORY)
+        vk_flags |= VK_BUILD_ACCELERATION_STRUCTURE_LOW_MEMORY_BIT_KHR;
+    if (flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_BUILD)
+        vk_flags |= VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR;
+    if (flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PREFER_FAST_TRACE)
+        vk_flags |= VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR;
+
+    return vk_flags;
+}
+
+static VkGeometryFlagsKHR d3d12_geometry_flags_to_vk(D3D12_RAYTRACING_GEOMETRY_FLAGS flags)
+{
+    VkGeometryFlagsKHR vk_flags = 0;
+
+    if (flags & D3D12_RAYTRACING_GEOMETRY_FLAG_OPAQUE)
+        vk_flags |= VK_GEOMETRY_OPAQUE_BIT_KHR;
+    if (flags & D3D12_RAYTRACING_GEOMETRY_FLAG_NO_DUPLICATE_ANYHIT_INVOCATION)
+        vk_flags |= VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR;
+
+    return vk_flags;
+}
+
+bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *device,
+        struct vkd3d_acceleration_structure_build_info *info,
+        const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS *desc)
+{
+    VkAccelerationStructureGeometryTrianglesDataKHR *triangles;
+    VkAccelerationStructureBuildGeometryInfoKHR *build_info;
+    VkAccelerationStructureGeometryAabbsDataKHR *aabbs;
+    const D3D12_RAYTRACING_GEOMETRY_DESC *geom_desc;
+    bool have_triangles, have_aabbs;
+    unsigned int i;
+
+    RT_TRACE("Converting inputs.\n");
+    RT_TRACE("=====================\n");
+
+    build_info = &info->build_info;
+    memset(build_info, 0, sizeof(*build_info));
+    build_info->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR;
+
+    if (desc->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
+    {
+        build_info->type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
+        RT_TRACE("Top level build.\n");
+    }
+    else
+    {
+        build_info->type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
+        RT_TRACE("Bottom level build.\n");
+    }
+
+    build_info->flags = d3d12_build_flags_to_vk(desc->Flags);
+
+    if (desc->Flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE)
+    {
+        RT_TRACE("BUILD_FLAG_PERFORM_UPDATE.\n");
+        build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR;
+    }
+    else
+        build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
+
+    info->geometries = info->geometries_stack;
+    info->primitive_counts = info->primitive_counts_stack;
+    info->build_ranges = info->build_range_stack;
+    info->build_range_ptrs = info->build_range_ptr_stack;
+
+    if (desc->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
+    {
+        memset(info->geometries, 0, sizeof(*info->geometries));
+        info->geometries[0].sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
+        info->geometries[0].geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
+        info->geometries[0].geometry.instances.sType =
+                VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR;
+        info->geometries[0].geometry.instances.arrayOfPointers =
+                desc->DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS ? VK_TRUE : VK_FALSE;
+        info->geometries[0].geometry.instances.data.deviceAddress = desc->InstanceDescs;
+
+        info->primitive_counts = info->primitive_counts_stack;
+        info->primitive_counts[0] = desc->NumDescs;
+        build_info->geometryCount = 1;
+        RT_TRACE("  ArrayOfPointers: %u.\n",
+                desc->DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS ? 1 : 0);
+        RT_TRACE("  NumDescs: %u.\n", info->primitive_counts[0]);
+    }
+    else
+    {
+        have_triangles = false;
+        have_aabbs = false;
+
+        if (desc->NumDescs <= VKD3D_BUILD_INFO_STACK_COUNT)
+        {
+            memset(info->geometries, 0, sizeof(*info->geometries) * desc->NumDescs);
+            memset(info->primitive_counts, 0, sizeof(*info->primitive_counts) * desc->NumDescs);
+        }
+        else
+        {
+            info->geometries = vkd3d_calloc(desc->NumDescs, sizeof(*info->geometries));
+            info->primitive_counts = vkd3d_calloc(desc->NumDescs, sizeof(*info->primitive_counts));
+            info->build_ranges = vkd3d_malloc(desc->NumDescs * sizeof(*info->build_ranges));
+            info->build_range_ptrs = vkd3d_malloc(desc->NumDescs * sizeof(*info->build_range_ptrs));
+        }
+        build_info->geometryCount = desc->NumDescs;
+
+        for (i = 0; i < desc->NumDescs; i++)
+        {
+            info->geometries[i].sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
+            RT_TRACE(" Geom %u:\n", i);
+
+            if (desc->DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS)
+            {
+                geom_desc = desc->ppGeometryDescs[i];
+                RT_TRACE("  ArrayOfPointers\n");
+            }
+            else
+            {
+                geom_desc = &desc->pGeometryDescs[i];
+                RT_TRACE("  PointerToArray\n");
+            }
+
+            info->geometries[i].flags = d3d12_geometry_flags_to_vk(geom_desc->Flags);
+            RT_TRACE("  Flags = #%x\n", geom_desc->Flags);
+
+            switch (geom_desc->Type)
+            {
+                case D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES:
+                    /* Runtime validates this. */
+                    if (have_aabbs)
+                    {
+                        ERR("Cannot mix and match geometry types in a BLAS.\n");
+                        return false;
+                    }
+                    have_triangles = true;
+
+                    info->geometries[i].geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
+                    triangles = &info->geometries[i].geometry.triangles;
+                    triangles->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR;
+                    triangles->indexData.deviceAddress = geom_desc->Triangles.IndexBuffer;
+                    if (geom_desc->Triangles.IndexFormat != DXGI_FORMAT_UNKNOWN)
+                    {
+                        if (!geom_desc->Triangles.IndexBuffer)
+                            WARN("Application is using IndexBuffer = 0 and IndexFormat != UNKNOWN. Likely application bug.\n");
+
+                        triangles->indexType =
+                                geom_desc->Triangles.IndexFormat == DXGI_FORMAT_R16_UINT ?
+                                        VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
+                        info->primitive_counts[i] = geom_desc->Triangles.IndexCount / 3;
+                        RT_TRACE("  Indexed : Index count = %u (%u bits)\n",
+                                geom_desc->Triangles.IndexCount,
+                                triangles->indexType == VK_INDEX_TYPE_UINT16 ? 16 : 32);
+                        RT_TRACE("  Vertex count: %u\n", geom_desc->Triangles.VertexCount);
+                        RT_TRACE("  IBO VA: %"PRIx64".\n", geom_desc->Triangles.IndexBuffer);
+                    }
+                    else
+                    {
+                        info->primitive_counts[i] = geom_desc->Triangles.VertexCount / 3;
+                        triangles->indexType = VK_INDEX_TYPE_NONE_KHR;
+                        RT_TRACE("  Triangle list : Vertex count: %u\n", geom_desc->Triangles.VertexCount);
+                    }
+
+                    triangles->maxVertex = max(1, geom_desc->Triangles.VertexCount) - 1;
+                    triangles->vertexStride = geom_desc->Triangles.VertexBuffer.StrideInBytes;
+                    triangles->vertexFormat = vkd3d_internal_get_vk_format(device, geom_desc->Triangles.VertexFormat);
+                    triangles->vertexData.deviceAddress = geom_desc->Triangles.VertexBuffer.StartAddress;
+                    triangles->transformData.deviceAddress = geom_desc->Triangles.Transform3x4;
+
+                    RT_TRACE("  Transform3x4: %s\n", geom_desc->Triangles.Transform3x4 ? "on" : "off");
+                    RT_TRACE("  Vertex format: %s\n", debug_dxgi_format(geom_desc->Triangles.VertexFormat));
+                    RT_TRACE("  VBO VA: %"PRIx64"\n", geom_desc->Triangles.VertexBuffer.StartAddress);
+                    RT_TRACE("  Vertex stride: %"PRIu64" bytes\n", geom_desc->Triangles.VertexBuffer.StrideInBytes);
+                    break;
+
+                case D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS:
+                    /* Runtime validates this. */
+                    if (have_triangles)
+                    {
+                        ERR("Cannot mix and match geometry types in a BLAS.\n");
+                        return false;
+                    }
+                    have_aabbs = true;
+
+                    info->geometries[i].geometryType = VK_GEOMETRY_TYPE_AABBS_KHR;
+                    aabbs = &info->geometries[i].geometry.aabbs;
+                    aabbs->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR;
+                    aabbs->stride = geom_desc->AABBs.AABBs.StrideInBytes;
+                    aabbs->data.deviceAddress = geom_desc->AABBs.AABBs.StartAddress;
+                    info->primitive_counts[i] = geom_desc->AABBs.AABBCount;
+                    RT_TRACE("  AABB stride: %"PRIu64" bytes\n", geom_desc->AABBs.AABBs.StrideInBytes);
+                    break;
+
+                default:
+                    FIXME("Unsupported geometry type %u.\n", geom_desc->Type);
+                    return false;
+            }
+
+            RT_TRACE("  Primitive count %u.\n", info->primitive_counts[i]);
+        }
+    }
+
+    for (i = 0; i < build_info->geometryCount; i++)
+    {
+        info->build_range_ptrs[i] = &info->build_ranges[i];
+        info->build_ranges[i].primitiveCount = info->primitive_counts[i];
+        info->build_ranges[i].firstVertex = 0;
+        info->build_ranges[i].primitiveOffset = 0;
+        info->build_ranges[i].transformOffset = 0;
+    }
+
+    build_info->pGeometries = info->geometries;
+
+    RT_TRACE("=====================\n");
+    return true;
+}
+
+static void vkd3d_acceleration_structure_end_barrier(struct d3d12_command_list *list)
+{
+    /* We resolve the query in TRANSFER, but DXR expects UNORDERED_ACCESS. */
+    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+    VkMemoryBarrier barrier;
+
+    barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+    barrier.pNext = NULL;
+    barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+    barrier.dstAccessMask = 0;
+
+    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
+            VK_PIPELINE_STAGE_TRANSFER_BIT,
+            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
+            1, &barrier, 0, NULL, 0, NULL));
+}
+
+static void vkd3d_acceleration_structure_write_postbuild_info(
+        struct d3d12_command_list *list,
+        const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc,
+        VkDeviceSize desc_offset,
+        VkAccelerationStructureKHR vk_acceleration_structure)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+    const struct vkd3d_unique_resource *resource;
+    VkQueryPool vk_query_pool;
+    VkQueryType vk_query_type;
+    uint32_t vk_query_index;
+    VkDeviceSize stride;
+    uint32_t type_index;
+    VkBuffer vk_buffer;
+    uint32_t offset;
+
+    resource = vkd3d_va_map_deref(&list->device->memory_allocator.va_map, desc->DestBuffer);
+    if (!resource)
+    {
+        ERR("Invalid resource.\n");
+        return;
+    }
+
+    vk_buffer = resource->vk_buffer;
+    offset = desc->DestBuffer - resource->va;
+    offset += desc_offset;
+
+    if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_COMPACTED_SIZE)
+    {
+        vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_COMPACTED_SIZE_KHR;
+        type_index = VKD3D_QUERY_TYPE_INDEX_RT_COMPACTED_SIZE;
+        stride = sizeof(uint64_t);
+    }
+    else if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_CURRENT_SIZE &&
+            list->device->device_info.ray_tracing_maintenance1_features.rayTracingMaintenance1)
+    {
+        vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR;
+        type_index = VKD3D_QUERY_TYPE_INDEX_RT_CURRENT_SIZE;
+        stride = sizeof(uint64_t);
+    }
+    else if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION)
+    {
+        vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR;
+        type_index = VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE;
+        stride = sizeof(uint64_t);
+    }
+    else
+    {
+        FIXME("Unsupported InfoType %u.\n", desc->InfoType);
+        /* TODO: CURRENT_SIZE is something we cannot query in Vulkan, so
+         * we'll need to keep around a buffer to handle this.
+         * For now, just clear to 0. */
+        VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, vk_buffer, offset,
+                sizeof(uint64_t), 0));
+        return;
+    }
+
+    if (!d3d12_command_allocator_allocate_query_from_type_index(list->allocator,
+            type_index, &vk_query_pool, &vk_query_index))
+    {
+        ERR("Failed to allocate query.\n");
+        return;
+    }
+
+    d3d12_command_list_reset_query(list, vk_query_pool, vk_query_index);
+
+    VK_CALL(vkCmdWriteAccelerationStructuresPropertiesKHR(list->vk_command_buffer,
+            1, &vk_acceleration_structure, vk_query_type, vk_query_pool, vk_query_index));
+    VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer,
+            vk_query_pool, vk_query_index, 1,
+            vk_buffer, offset, stride,
+            VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
+
+    if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION)
+    {
+        if (list->device->device_info.ray_tracing_maintenance1_features.rayTracingMaintenance1)
+        {
+            type_index = VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE_BOTTOM_LEVEL_POINTERS;
+            if (!d3d12_command_allocator_allocate_query_from_type_index(list->allocator,
+                    type_index, &vk_query_pool, &vk_query_index))
+            {
+                ERR("Failed to allocate query.\n");
+                return;
+            }
+
+            d3d12_command_list_reset_query(list, vk_query_pool, vk_query_index);
+
+            VK_CALL(vkCmdWriteAccelerationStructuresPropertiesKHR(list->vk_command_buffer,
+                    1, &vk_acceleration_structure, vk_query_type, vk_query_pool, vk_query_index));
+            VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer,
+                    vk_query_pool, vk_query_index, 1,
+                    vk_buffer, offset + sizeof(uint64_t), stride,
+                    VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
+        }
+        else
+        {
+            FIXME("NumBottomLevelPointers will always return 0.\n");
+            VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, vk_buffer, offset + sizeof(uint64_t),
+                    sizeof(uint64_t), 0));
+        }
+    }
+}
+
+void vkd3d_acceleration_structure_emit_postbuild_info(
+        struct d3d12_command_list *list,
+        const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc,
+        uint32_t count,
+        const D3D12_GPU_VIRTUAL_ADDRESS *addresses)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+    VkAccelerationStructureKHR vk_acceleration_structure;
+    VkMemoryBarrier barrier;
+    VkDeviceSize stride;
+    uint32_t i;
+
+    barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+    barrier.pNext = NULL;
+    barrier.srcAccessMask = 0;
+    barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+
+    /* We resolve the query in TRANSFER, but DXR expects UNORDERED_ACCESS. */
+    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
+            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+            VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
+            1, &barrier, 0, NULL, 0, NULL));
+
+    stride = desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION ?
+            2 * sizeof(uint64_t) : sizeof(uint64_t);
+
+    for (i = 0; i < count; i++)
+    {
+        vk_acceleration_structure = vkd3d_va_map_place_acceleration_structure(
+                &list->device->memory_allocator.va_map, list->device, addresses[i]);
+        if (vk_acceleration_structure)
+            vkd3d_acceleration_structure_write_postbuild_info(list, desc, i * stride, vk_acceleration_structure);
+        else
+            ERR("Failed to query acceleration structure for VA 0x%"PRIx64".\n", addresses[i]);
+    }
+
+    vkd3d_acceleration_structure_end_barrier(list);
+}
+
+void vkd3d_acceleration_structure_emit_immediate_postbuild_info(
+        struct d3d12_command_list *list, uint32_t count,
+        const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc,
+        VkAccelerationStructureKHR vk_acceleration_structure)
+{
+    /* In D3D12 we are supposed to be able to emit without an explicit barrier,
+     * but we need to emit them for Vulkan. */
+
+    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+    VkMemoryBarrier barrier;
+    uint32_t i;
+
+    barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
+    barrier.pNext = NULL;
+    barrier.srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR;
+    /* The query accesses STRUCTURE_READ_BIT in BUILD_BIT stage. */
+    barrier.dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_TRANSFER_WRITE_BIT;
+
+    /* Writing to the result buffer is supposed to happen in UNORDERED_ACCESS on DXR for
+     * some bizarre reason, so we have to satisfy a transfer barrier.
+     * Have to basically do a full stall to make this work ... */
+    VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
+            VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
+            VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
+            1, &barrier, 0, NULL, 0, NULL));
+
+    /* Could optimize a bit by batching more aggressively, but no idea if it's going to help in practice. */
+    for (i = 0; i < count; i++)
+        vkd3d_acceleration_structure_write_postbuild_info(list, &desc[i], 0, vk_acceleration_structure);
+
+    vkd3d_acceleration_structure_end_barrier(list);
+}
+
+static bool convert_copy_mode(
+        D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE mode,
+        VkCopyAccelerationStructureModeKHR *vk_mode)
+{
+    switch (mode)
+    {
+        case D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_CLONE:
+            *vk_mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_CLONE_KHR;
+            return true;
+        case D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE_COMPACT:
+            *vk_mode = VK_COPY_ACCELERATION_STRUCTURE_MODE_COMPACT_KHR;
+            return true;
+        default:
+            FIXME("Unsupported RTAS copy mode #%x.\n", mode);
+            return false;
+    }
+}
+
+void vkd3d_acceleration_structure_copy(
+        struct d3d12_command_list *list,
+        D3D12_GPU_VIRTUAL_ADDRESS dst, D3D12_GPU_VIRTUAL_ADDRESS src,
+        D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE mode)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+    VkAccelerationStructureKHR dst_as, src_as;
+    VkCopyAccelerationStructureInfoKHR info;
+
+    dst_as = vkd3d_va_map_place_acceleration_structure(&list->device->memory_allocator.va_map, list->device, dst);
+    if (dst_as == VK_NULL_HANDLE)
+    {
+        ERR("Invalid dst address #%"PRIx64" for RTAS copy.\n", dst);
+        return;
+    }
+
+    src_as = vkd3d_va_map_place_acceleration_structure(&list->device->memory_allocator.va_map, list->device, src);
+    if (src_as == VK_NULL_HANDLE)
+    {
+        ERR("Invalid src address #%"PRIx64" for RTAS copy.\n", src);
+        return;
+    }
+
+    info.sType = VK_STRUCTURE_TYPE_COPY_ACCELERATION_STRUCTURE_INFO_KHR;
+    info.pNext = NULL;
+    info.dst = dst_as;
+    info.src = src_as;
+    if (convert_copy_mode(mode, &info.mode))
+        VK_CALL(vkCmdCopyAccelerationStructureKHR(list->vk_command_buffer, &info));
+}
--- a/libs/vkd3d/breadcrumbs.c
+++ b/libs/vkd3d/breadcrumbs.c
@ -0,0 +1,655 @@
+/*
+ * Copyright 2022 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
+#include "vkd3d_private.h"
+#include "vkd3d_debug.h"
+#include "vkd3d_common.h"
+#include <assert.h>
+#include <stdio.h>
+
+/* Just allocate everything up front. This only consumes host memory anyways. */
+#define MAX_COMMAND_LISTS (32 * 1024)
+
+/* Questionable on 32-bit, but we don't really care. */
+#define NV_ENCODE_CHECKPOINT(context, counter) ((void*) ((uintptr_t)(context) + (uintptr_t)MAX_COMMAND_LISTS * (counter)))
+#define NV_CHECKPOINT_CONTEXT(ptr) ((uint32_t)((uintptr_t)(ptr) % MAX_COMMAND_LISTS))
+#define NV_CHECKPOINT_COUNTER(ptr) ((uint32_t)((uintptr_t)(ptr) / MAX_COMMAND_LISTS))
+
+static const char *vkd3d_breadcrumb_command_type_to_str(enum vkd3d_breadcrumb_command_type type)
+{
+    switch (type)
+    {
+        case VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER:
+            return "top_marker";
+        case VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER:
+            return "bottom_marker";
+        case VKD3D_BREADCRUMB_COMMAND_SET_SHADER_HASH:
+            return "set_shader_hash";
+        case VKD3D_BREADCRUMB_COMMAND_DRAW:
+            return "draw";
+        case VKD3D_BREADCRUMB_COMMAND_DRAW_INDEXED:
+            return "draw_indexed";
+        case VKD3D_BREADCRUMB_COMMAND_DISPATCH:
+            return "dispatch";
+        case VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT:
+            return "execute_indirect";
+        case VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT_TEMPLATE:
+            return "execute_indirect_template";
+        case VKD3D_BREADCRUMB_COMMAND_COPY:
+            return "copy";
+        case VKD3D_BREADCRUMB_COMMAND_RESOLVE:
+            return "resolve";
+        case VKD3D_BREADCRUMB_COMMAND_WBI:
+            return "wbi";
+        case VKD3D_BREADCRUMB_COMMAND_RESOLVE_QUERY:
+            return "resolve_query";
+        case VKD3D_BREADCRUMB_COMMAND_GATHER_VIRTUAL_QUERY:
+            return "gather_virtual_query";
+        case VKD3D_BREADCRUMB_COMMAND_BUILD_RTAS:
+            return "build_rtas";
+        case VKD3D_BREADCRUMB_COMMAND_COPY_RTAS:
+            return "copy_rtas";
+        case VKD3D_BREADCRUMB_COMMAND_EMIT_RTAS_POSTBUILD:
+            return "emit_rtas_postbuild";
+        case VKD3D_BREADCRUMB_COMMAND_TRACE_RAYS:
+            return "trace_rays";
+        case VKD3D_BREADCRUMB_COMMAND_BARRIER:
+            return "barrier";
+        case VKD3D_BREADCRUMB_COMMAND_AUX32:
+            return "aux32";
+        case VKD3D_BREADCRUMB_COMMAND_AUX64:
+            return "aux64";
+        case VKD3D_BREADCRUMB_COMMAND_VBO:
+            return "vbo";
+        case VKD3D_BREADCRUMB_COMMAND_IBO:
+            return "ibo";
+        case VKD3D_BREADCRUMB_COMMAND_ROOT_DESC:
+            return "root_desc";
+        case VKD3D_BREADCRUMB_COMMAND_ROOT_CONST:
+            return "root_const";
+        case VKD3D_BREADCRUMB_COMMAND_TAG:
+            return "tag";
+
+        default:
+            return "?";
+    }
+}
+
+HRESULT vkd3d_breadcrumb_tracer_init(struct vkd3d_breadcrumb_tracer *tracer, struct d3d12_device *device)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+    D3D12_HEAP_PROPERTIES heap_properties;
+    D3D12_RESOURCE_DESC1 resource_desc;
+    VkMemoryPropertyFlags memory_props;
+    HRESULT hr;
+    int rc;
+
+    memset(tracer, 0, sizeof(*tracer));
+
+    if ((rc = pthread_mutex_init(&tracer->lock, NULL)))
+        return hresult_from_errno(rc);
+
+    if (device->vk_info.AMD_buffer_marker)
+    {
+        INFO("Enabling AMD_buffer_marker breadcrumbs.\n");
+        memset(&resource_desc, 0, sizeof(resource_desc));
+        resource_desc.Width = MAX_COMMAND_LISTS * sizeof(struct vkd3d_breadcrumb_counter);
+        resource_desc.Height = 1;
+        resource_desc.DepthOrArraySize = 1;
+        resource_desc.MipLevels = 1;
+        resource_desc.Format = DXGI_FORMAT_UNKNOWN;
+        resource_desc.SampleDesc.Count = 1;
+        resource_desc.SampleDesc.Quality = 0;
+        resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+        resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE;
+
+        if (FAILED(hr = vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
+                &resource_desc, &tracer->host_buffer)))
+        {
+            goto err;
+        }
+
+        memory_props = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
+                VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
+
+        /* If device faults in the middle of execution we will never get the chance to flush device caches.
+         * Make sure that breadcrumbs are always written directly out.
+         * This is the primary usecase for the device coherent/uncached extension after all ...
+         * Don't make this a hard requirement since buffer markers might be implicitly coherent on some
+         * implementations (Turnip?). */
+        if (device->device_info.device_coherent_memory_features_amd.deviceCoherentMemory)
+        {
+            memory_props |= VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
+                    VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
+        }
+
+        if (FAILED(hr = vkd3d_allocate_buffer_memory(device, tracer->host_buffer,
+                memory_props, &tracer->host_buffer_memory)))
+        {
+            goto err;
+        }
+
+        if (VK_CALL(vkMapMemory(device->vk_device, tracer->host_buffer_memory.vk_memory,
+                0, VK_WHOLE_SIZE,
+                0, (void**)&tracer->mapped)) != VK_SUCCESS)
+        {
+            hr = E_OUTOFMEMORY;
+            goto err;
+        }
+
+        memset(tracer->mapped, 0, sizeof(*tracer->mapped) * MAX_COMMAND_LISTS);
+    }
+    else if (device->vk_info.NV_device_diagnostic_checkpoints)
+    {
+        INFO("Enabling NV_device_diagnostics_checkpoints breadcrumbs.\n");
+    }
+    else
+    {
+        ERR("Breadcrumbs require support for either AMD_buffer_marker or NV_device_diagnostics_checkpoints.\n");
+        hr = E_FAIL;
+        goto err;
+    }
+
+    tracer->trace_contexts = vkd3d_calloc(MAX_COMMAND_LISTS, sizeof(*tracer->trace_contexts));
+    tracer->trace_context_index = 0;
+
+    return S_OK;
+
+err:
+    vkd3d_breadcrumb_tracer_cleanup(tracer, device);
+    return hr;
+}
+
+void vkd3d_breadcrumb_tracer_cleanup(struct vkd3d_breadcrumb_tracer *tracer, struct d3d12_device *device)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+
+    if (device->vk_info.AMD_buffer_marker)
+    {
+        VK_CALL(vkDestroyBuffer(device->vk_device, tracer->host_buffer, NULL));
+        vkd3d_free_device_memory(device, &tracer->host_buffer_memory);
+    }
+
+    vkd3d_free(tracer->trace_contexts);
+    pthread_mutex_destroy(&tracer->lock);
+}
+
+unsigned int vkd3d_breadcrumb_tracer_allocate_command_list(struct vkd3d_breadcrumb_tracer *tracer,
+        struct d3d12_command_list *list, struct d3d12_command_allocator *allocator)
+{
+    unsigned int index = UINT32_MAX;
+    unsigned int iteration_count;
+    int rc;
+
+    if ((rc = pthread_mutex_lock(&tracer->lock)))
+    {
+        ERR("Failed to lock mutex, rc %d.\n", rc);
+        return UINT32_MAX;
+    }
+
+    /* Since this is a ring, this is extremely likely to succeed on first attempt. */
+    for (iteration_count = 0; iteration_count < MAX_COMMAND_LISTS; iteration_count++)
+    {
+        tracer->trace_context_index = (tracer->trace_context_index + 1) % MAX_COMMAND_LISTS;
+        if (!tracer->trace_contexts[tracer->trace_context_index].locked)
+        {
+            tracer->trace_contexts[tracer->trace_context_index].locked = 1;
+            index = tracer->trace_context_index;
+            break;
+        }
+    }
+
+    pthread_mutex_unlock(&tracer->lock);
+
+    if (index == UINT32_MAX)
+    {
+        ERR("Failed to allocate new index for command list.\n");
+        return index;
+    }
+
+    TRACE("Allocating breadcrumb context %u for list %p.\n", index, list);
+    list->breadcrumb_context_index = index;
+
+    /* Need to clear this on a fresh allocation rather than release, since we can end up releasing a command list
+     * before we observe the device lost. */
+    tracer->trace_contexts[index].command_count = 0;
+    tracer->trace_contexts[index].counter = 0;
+
+    if (list->device->vk_info.AMD_buffer_marker)
+        memset(&tracer->mapped[index], 0, sizeof(tracer->mapped[index]));
+
+    vkd3d_array_reserve((void**)&allocator->breadcrumb_context_indices, &allocator->breadcrumb_context_index_size,
+            allocator->breadcrumb_context_index_count + 1,
+            sizeof(*allocator->breadcrumb_context_indices));
+    allocator->breadcrumb_context_indices[allocator->breadcrumb_context_index_count++] = index;
+    return index;
+}
+
+/* Command allocator keeps a list of allocated breadcrumb command lists. */
+void vkd3d_breadcrumb_tracer_release_command_lists(struct vkd3d_breadcrumb_tracer *tracer,
+        const unsigned int *indices, size_t indices_count)
+{
+    unsigned int index;
+    size_t i;
+    int rc;
+
+    if (!indices_count)
+        return;
+
+    if ((rc = pthread_mutex_lock(&tracer->lock)))
+    {
+        ERR("Failed to lock mutex, rc %d.\n", rc);
+        return;
+    }
+
+    for (i = 0; i < indices_count; i++)
+    {
+        index = indices[i];
+        if (index != UINT32_MAX)
+            tracer->trace_contexts[index].locked = 0;
+        TRACE("Releasing breadcrumb context %u.\n", index);
+    }
+    pthread_mutex_unlock(&tracer->lock);
+}
+
+static void vkd3d_breadcrumb_tracer_report_command_list(
+        const struct vkd3d_breadcrumb_command_list_trace_context *context,
+        uint32_t begin_marker,
+        uint32_t end_marker)
+{
+    const struct vkd3d_breadcrumb_command *cmd;
+    bool observed_begin_cmd = false;
+    bool observed_end_cmd = false;
+    unsigned int i;
+
+    if (end_marker == 0)
+    {
+        ERR(" ===== Potential crash region BEGIN (make sure RADV_DEBUG=syncshaders is used for maximum accuracy) =====\n");
+        observed_begin_cmd = true;
+    }
+
+    /* We can assume that possible culprit commands lie between the end_marker
+     * and top_marker. */
+    for (i = 0; i < context->command_count; i++)
+    {
+        cmd = &context->commands[i];
+
+        /* If there is a command which sets TOP_OF_PIPE, but we haven't observed the marker yet,
+         * the command processor hasn't gotten there yet (most likely ...), so that should be the
+         * natural end-point. */
+        if (!observed_end_cmd &&
+                cmd->type == VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER &&
+                cmd->count > begin_marker)
+        {
+            observed_end_cmd = true;
+            ERR(" ===== Potential crash region END =====\n");
+        }
+
+        if (cmd->type == VKD3D_BREADCRUMB_COMMAND_AUX32)
+        {
+            ERR(" Set arg: %u (#%x)\n", cmd->word_32bit, cmd->word_32bit);
+        }
+        else if (cmd->type == VKD3D_BREADCRUMB_COMMAND_AUX64)
+        {
+            ERR(" Set arg: %"PRIu64" (#%"PRIx64")\n", cmd->word_64bit, cmd->word_64bit);
+        }
+        else if (cmd->type == VKD3D_BREADCRUMB_COMMAND_TAG)
+        {
+            ERR("     Tag: %s\n", cmd->tag);
+        }
+        else
+        {
+            ERR("  Command: %s\n", vkd3d_breadcrumb_command_type_to_str(cmd->type));
+
+            switch (cmd->type)
+            {
+                case VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER:
+                case VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER:
+                    ERR("    marker: %u\n", cmd->count);
+                    break;
+
+                case VKD3D_BREADCRUMB_COMMAND_SET_SHADER_HASH:
+                    ERR("    hash: %016"PRIx64", stage: %x\n", cmd->shader.hash, cmd->shader.stage);
+                    break;
+
+                default:
+                    break;
+            }
+        }
+
+        /* We have proved we observed this command is complete.
+         * Some command after this signal is at fault. */
+        if (!observed_begin_cmd &&
+                cmd->type == VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER &&
+                cmd->count == end_marker)
+        {
+            observed_begin_cmd = true;
+            ERR(" ===== Potential crash region BEGIN (make sure RADV_DEBUG=syncshaders is used for maximum accuracy) =====\n");
+        }
+    }
+}
+
+static void vkd3d_breadcrumb_tracer_report_command_list_amd(struct vkd3d_breadcrumb_tracer *tracer,
+        unsigned int context_index)
+{
+    const struct vkd3d_breadcrumb_command_list_trace_context *context;
+    uint32_t begin_marker;
+    uint32_t end_marker;
+
+    context = &tracer->trace_contexts[context_index];
+
+    /* Unused, cannot be the cause. */
+    if (context->counter == 0)
+        return;
+
+    begin_marker = tracer->mapped[context_index].begin_marker;
+    end_marker = tracer->mapped[context_index].end_marker;
+
+    /* Never executed, cannot be the cause. */
+    if (begin_marker == 0 && end_marker == 0)
+        return;
+
+    /* Successfully retired, cannot be the cause. */
+    if (begin_marker == UINT32_MAX && end_marker == UINT32_MAX)
+        return;
+
+    /* Edge case if we re-submitted a command list,
+     * but it ends up crashing before we hit any BOTTOM_OF_PIPE
+     * marker. Normalize the inputs such that end_marker <= begin_marker. */
+    if (begin_marker > 0 && end_marker == UINT32_MAX)
+        end_marker = 0;
+
+    ERR("Found pending command list context %u in executable state, TOP_OF_PIPE marker %u, BOTTOM_OF_PIPE marker %u.\n",
+            context_index, begin_marker, end_marker);
+    vkd3d_breadcrumb_tracer_report_command_list(context, begin_marker, end_marker);
+    ERR("Done analyzing command list.\n");
+}
+
+static void vkd3d_breadcrumb_tracer_report_queue_nv(struct vkd3d_breadcrumb_tracer *tracer,
+        struct d3d12_device *device,
+        VkQueue vk_queue)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+    uint32_t begin_marker, end_marker;
+    uint32_t checkpoint_context_index;
+    VkCheckpointDataNV *checkpoints;
+    uint32_t checkpoint_marker;
+    uint32_t checkpoint_count;
+    uint32_t context_index;
+    uint32_t i;
+
+    VK_CALL(vkGetQueueCheckpointDataNV(vk_queue, &checkpoint_count, NULL));
+    if (checkpoint_count == 0)
+        return;
+
+    checkpoints = vkd3d_calloc(checkpoint_count, sizeof(VkCheckpointDataNV));
+    for (i = 0; i < checkpoint_count; i++)
+        checkpoints[i].sType = VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV;
+    VK_CALL(vkGetQueueCheckpointDataNV(vk_queue, &checkpoint_count, checkpoints));
+
+    context_index = UINT32_MAX;
+    begin_marker = 0;
+    end_marker = 0;
+
+    for (i = 0; i < checkpoint_count; i++)
+    {
+        checkpoint_context_index = NV_CHECKPOINT_CONTEXT(checkpoints[i].pCheckpointMarker);
+        checkpoint_marker = NV_CHECKPOINT_COUNTER(checkpoints[i].pCheckpointMarker);
+
+        if (context_index != checkpoint_context_index && context_index != UINT32_MAX)
+        {
+            FIXME("Markers have different contexts. Execution is likely split across multiple command buffers?\n");
+            context_index = UINT32_MAX;
+            break;
+        }
+
+        context_index = checkpoint_context_index;
+
+        if (checkpoints[i].stage == VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT && checkpoint_marker > begin_marker)
+        {
+            /* We want to find the latest TOP_OF_PIPE_BIT. Then we prove that command processor got to that point. */
+            begin_marker = checkpoint_marker;
+        }
+        else if (checkpoints[i].stage == VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT && checkpoint_marker > end_marker)
+        {
+            /* We want to find the latest BOTTOM_OF_PIPE_BIT. Then we prove that we got that far. */
+            end_marker = checkpoint_marker;
+        }
+        else if (checkpoints[i].stage != VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT &&
+                checkpoints[i].stage != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+        {
+            FIXME("Unexpected checkpoint pipeline stage. #%x\n", checkpoints[i].stage);
+            context_index = UINT32_MAX;
+            break;
+        }
+    }
+
+    if (context_index != UINT32_MAX && begin_marker != 0 && end_marker != 0 && end_marker != UINT32_MAX)
+    {
+        ERR("Found pending command list context %u in executable state, TOP_OF_PIPE marker %u, BOTTOM_OF_PIPE marker %u.\n",
+                context_index, begin_marker, end_marker);
+        vkd3d_breadcrumb_tracer_report_command_list(&tracer->trace_contexts[context_index], begin_marker, end_marker);
+        ERR("Done analyzing command list.\n");
+    }
+
+    vkd3d_free(checkpoints);
+}
+
+void vkd3d_breadcrumb_tracer_report_device_lost(struct vkd3d_breadcrumb_tracer *tracer,
+        struct d3d12_device *device)
+{
+    struct vkd3d_queue_family_info *queue_family_info;
+    VkQueue vk_queue;
+    unsigned int i;
+
+    ERR("Device lost observed, analyzing breadcrumbs ...\n");
+
+    if (device->vk_info.AMD_buffer_marker)
+    {
+        /* AMD path, buffer marker. */
+        for (i = 0; i < MAX_COMMAND_LISTS; i++)
+            vkd3d_breadcrumb_tracer_report_command_list_amd(tracer, i);
+    }
+    else if (device->vk_info.NV_device_diagnostic_checkpoints)
+    {
+        /* vkGetQueueCheckpointDataNV does not require us to synchronize access to the queue. */
+        queue_family_info = d3d12_device_get_vkd3d_queue_family(device, D3D12_COMMAND_LIST_TYPE_DIRECT);
+        for (i = 0; i < queue_family_info->queue_count; i++)
+        {
+            vk_queue = queue_family_info->queues[i]->vk_queue;
+            vkd3d_breadcrumb_tracer_report_queue_nv(tracer, device, vk_queue);
+        }
+
+        queue_family_info = d3d12_device_get_vkd3d_queue_family(device, D3D12_COMMAND_LIST_TYPE_COMPUTE);
+        for (i = 0; i < queue_family_info->queue_count; i++)
+        {
+            vk_queue = queue_family_info->queues[i]->vk_queue;
+            vkd3d_breadcrumb_tracer_report_queue_nv(tracer, device, vk_queue);
+        }
+
+        queue_family_info = d3d12_device_get_vkd3d_queue_family(device, D3D12_COMMAND_LIST_TYPE_COPY);
+        for (i = 0; i < queue_family_info->queue_count; i++)
+        {
+            vk_queue = queue_family_info->queues[i]->vk_queue;
+            vkd3d_breadcrumb_tracer_report_queue_nv(tracer, device, vk_queue);
+        }
+    }
+
+    ERR("Done analyzing breadcrumbs ...\n");
+}
+
+void vkd3d_breadcrumb_tracer_begin_command_list(struct d3d12_command_list *list)
+{
+    struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
+    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+    struct vkd3d_breadcrumb_command_list_trace_context *trace;
+    unsigned int context = list->breadcrumb_context_index;
+    struct vkd3d_breadcrumb_command cmd;
+
+    if (context == UINT32_MAX)
+        return;
+
+    trace = &breadcrumb_tracer->trace_contexts[context];
+    trace->counter++;
+
+    cmd.count = trace->counter;
+    cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
+    vkd3d_breadcrumb_tracer_add_command(list, &cmd);
+
+    if (list->device->vk_info.AMD_buffer_marker)
+    {
+        VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
+                VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                breadcrumb_tracer->host_buffer,
+                context * sizeof(struct vkd3d_breadcrumb_counter) +
+                        offsetof(struct vkd3d_breadcrumb_counter, begin_marker),
+                trace->counter));
+    }
+    else if (list->device->vk_info.NV_device_diagnostic_checkpoints)
+    {
+        /* A checkpoint is implicitly a top and bottom marker. */
+        cmd.count = trace->counter;
+        cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
+        vkd3d_breadcrumb_tracer_add_command(list, &cmd);
+
+        VK_CALL(vkCmdSetCheckpointNV(list->vk_command_buffer, NV_ENCODE_CHECKPOINT(context, trace->counter)));
+    }
+}
+
+void vkd3d_breadcrumb_tracer_add_command(struct d3d12_command_list *list,
+        const struct vkd3d_breadcrumb_command *command)
+{
+    struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
+    struct vkd3d_breadcrumb_command_list_trace_context *trace;
+    unsigned int context = list->breadcrumb_context_index;
+
+    if (context == UINT32_MAX)
+        return;
+
+    trace = &breadcrumb_tracer->trace_contexts[context];
+
+    TRACE("Adding command (%s) to context %u.\n",
+            vkd3d_breadcrumb_command_type_to_str(command->type), context);
+
+    vkd3d_array_reserve((void**)&trace->commands, &trace->command_size,
+            trace->command_count + 1, sizeof(*trace->commands));
+    trace->commands[trace->command_count++] = *command;
+}
+
+void vkd3d_breadcrumb_tracer_signal(struct d3d12_command_list *list)
+{
+    struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
+    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+    struct vkd3d_breadcrumb_command_list_trace_context *trace;
+    unsigned int context = list->breadcrumb_context_index;
+    struct vkd3d_breadcrumb_command cmd;
+
+    if (context == UINT32_MAX)
+        return;
+
+    trace = &breadcrumb_tracer->trace_contexts[context];
+
+    if (list->device->vk_info.AMD_buffer_marker)
+    {
+        cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
+        cmd.count = trace->counter;
+        vkd3d_breadcrumb_tracer_add_command(list, &cmd);
+        TRACE("Breadcrumb signal bottom-of-pipe context %u -> %u\n", context, cmd.count);
+
+        VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
+                VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                breadcrumb_tracer->host_buffer,
+                context * sizeof(struct vkd3d_breadcrumb_counter) +
+                        offsetof(struct vkd3d_breadcrumb_counter, end_marker),
+                trace->counter));
+
+        trace->counter++;
+
+        cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
+        cmd.count = trace->counter;
+        vkd3d_breadcrumb_tracer_add_command(list, &cmd);
+        TRACE("Breadcrumb signal top-of-pipe context %u -> %u\n", context, cmd.count);
+
+        VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
+                VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
+                breadcrumb_tracer->host_buffer,
+                context * sizeof(struct vkd3d_breadcrumb_counter) +
+                        offsetof(struct vkd3d_breadcrumb_counter, begin_marker),
+                trace->counter));
+    }
+    else if (list->device->vk_info.NV_device_diagnostic_checkpoints)
+    {
+        trace->counter++;
+
+        cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
+        cmd.count = trace->counter;
+        vkd3d_breadcrumb_tracer_add_command(list, &cmd);
+        TRACE("Breadcrumb signal top-of-pipe context %u -> %u\n", context, cmd.count);
+
+        cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
+        cmd.count = trace->counter;
+        vkd3d_breadcrumb_tracer_add_command(list, &cmd);
+        TRACE("Breadcrumb signal bottom-of-pipe context %u -> %u\n", context, cmd.count);
+
+        VK_CALL(vkCmdSetCheckpointNV(list->vk_command_buffer, NV_ENCODE_CHECKPOINT(context, trace->counter)));
+    }
+}
+
+void vkd3d_breadcrumb_tracer_end_command_list(struct d3d12_command_list *list)
+{
+    struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
+    const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
+    struct vkd3d_breadcrumb_command_list_trace_context *trace;
+    unsigned int context = list->breadcrumb_context_index;
+    struct vkd3d_breadcrumb_command cmd;
+
+    if (context == UINT32_MAX)
+        return;
+
+    trace = &breadcrumb_tracer->trace_contexts[context];
+    trace->counter = UINT32_MAX;
+
+    if (list->device->vk_info.AMD_buffer_marker)
+    {
+        VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
+                VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                breadcrumb_tracer->host_buffer,
+                context * sizeof(struct vkd3d_breadcrumb_counter) +
+                        offsetof(struct vkd3d_breadcrumb_counter, begin_marker),
+                trace->counter));
+
+        VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
+                VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
+                breadcrumb_tracer->host_buffer,
+                context * sizeof(struct vkd3d_breadcrumb_counter) +
+                        offsetof(struct vkd3d_breadcrumb_counter, end_marker),
+                trace->counter));
+    }
+    else if (list->device->vk_info.NV_device_diagnostic_checkpoints)
+    {
+        VK_CALL(vkCmdSetCheckpointNV(list->vk_command_buffer, NV_ENCODE_CHECKPOINT(context, trace->counter)));
+    }
+
+    cmd.count = trace->counter;
+    cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
+    vkd3d_breadcrumb_tracer_add_command(list, &cmd);
+    cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
+    vkd3d_breadcrumb_tracer_add_command(list, &cmd);
+}
--- a/libs/vkd3d/bundle.c
+++ b/libs/vkd3d/bundle.c
--- a/libs/vkd3d/cache.c
+++ b/libs/vkd3d/cache.c
--- a/libs/vkd3d/command.c
+++ b/libs/vkd3d/command.c
--- a/libs/vkd3d/command_list_profiled.h
+++ b/libs/vkd3d/command_list_profiled.h
@ -0,0 +1,562 @@
+/*
+ * Copyright 2020 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_COMMAND_LIST_PROFILED
+#define __VKD3D_COMMAND_LIST_PROFILED
+
+#define COMMAND_LIST_PROFILED_CALL(name, ...) \
+    VKD3D_REGION_DECL(name); \
+    VKD3D_REGION_BEGIN(name); \
+    d3d12_command_list_##name(__VA_ARGS__); \
+    VKD3D_REGION_END(name)
+
+static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced_profiled(d3d12_command_list_iface *iface,
+        UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location,
+        UINT start_instance_location)
+{
+    COMMAND_LIST_PROFILED_CALL(DrawInstanced, iface, vertex_count_per_instance,
+            instance_count, start_vertex_location, start_instance_location);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced_profiled(d3d12_command_list_iface *iface,
+        UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location,
+        INT base_vertex_location, UINT start_instance_location)
+{
+    COMMAND_LIST_PROFILED_CALL(DrawIndexedInstanced, iface, index_count_per_instance, instance_count,
+            start_vertex_location, base_vertex_location, start_instance_location);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_Dispatch_profiled(d3d12_command_list_iface *iface,
+        UINT x, UINT y, UINT z)
+{
+    COMMAND_LIST_PROFILED_CALL(Dispatch, iface, x, y, z);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion_profiled(d3d12_command_list_iface *iface,
+        ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count)
+{
+    COMMAND_LIST_PROFILED_CALL(CopyBufferRegion, iface, dst, dst_offset, src, src_offset, byte_count);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion_profiled(d3d12_command_list_iface *iface,
+        const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z,
+        const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box)
+{
+    COMMAND_LIST_PROFILED_CALL(CopyTextureRegion, iface, dst, dst_x, dst_y, dst_z, src, src_box);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_CopyResource_profiled(d3d12_command_list_iface *iface,
+        ID3D12Resource *dst, ID3D12Resource *src)
+{
+    COMMAND_LIST_PROFILED_CALL(CopyResource, iface, dst, src);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles_profiled(d3d12_command_list_iface *iface,
+        ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *region_coord,
+        const D3D12_TILE_REGION_SIZE *region_size, ID3D12Resource *buffer, UINT64 buffer_offset,
+        D3D12_TILE_COPY_FLAGS flags)
+{
+    COMMAND_LIST_PROFILED_CALL(CopyTiles, iface, tiled_resource, region_coord, region_size, buffer, buffer_offset, flags);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource_profiled(d3d12_command_list_iface *iface,
+        ID3D12Resource *dst, UINT dst_sub_resource_idx,
+        ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format)
+{
+    COMMAND_LIST_PROFILED_CALL(ResolveSubresource, iface, dst, dst_sub_resource_idx, src, src_sub_resource_idx, format);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology_profiled(d3d12_command_list_iface *iface,
+        D3D12_PRIMITIVE_TOPOLOGY topology)
+{
+    COMMAND_LIST_PROFILED_CALL(IASetPrimitiveTopology, iface, topology);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports_profiled(d3d12_command_list_iface *iface,
+        UINT viewport_count, const D3D12_VIEWPORT *viewports)
+{
+    COMMAND_LIST_PROFILED_CALL(RSSetViewports, iface, viewport_count, viewports);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects_profiled(d3d12_command_list_iface *iface,
+        UINT rect_count, const D3D12_RECT *rects)
+{
+    COMMAND_LIST_PROFILED_CALL(RSSetScissorRects, iface, rect_count, rects);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor_profiled(d3d12_command_list_iface *iface,
+        const FLOAT blend_factor[4])
+{
+    COMMAND_LIST_PROFILED_CALL(OMSetBlendFactor, iface, blend_factor);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef_profiled(d3d12_command_list_iface *iface,
+        UINT stencil_ref)
+{
+    COMMAND_LIST_PROFILED_CALL(OMSetStencilRef, iface, stencil_ref);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState_profiled(d3d12_command_list_iface *iface,
+        ID3D12PipelineState *pipeline_state)
+{
+    COMMAND_LIST_PROFILED_CALL(SetPipelineState, iface, pipeline_state);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier_profiled(d3d12_command_list_iface *iface,
+        UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers)
+{
+    COMMAND_LIST_PROFILED_CALL(ResourceBarrier, iface, barrier_count, barriers);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle_profiled(d3d12_command_list_iface *iface,
+        ID3D12GraphicsCommandList *command_list)
+{
+    COMMAND_LIST_PROFILED_CALL(ExecuteBundle, iface, command_list);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps_profiled(d3d12_command_list_iface *iface,
+        UINT heap_count, ID3D12DescriptorHeap *const *heaps)
+{
+    COMMAND_LIST_PROFILED_CALL(SetDescriptorHeaps, iface, heap_count, heaps);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature_profiled(d3d12_command_list_iface *iface,
+        ID3D12RootSignature *root_signature)
+{
+    COMMAND_LIST_PROFILED_CALL(SetComputeRootSignature, iface, root_signature);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature_profiled(d3d12_command_list_iface *iface,
+        ID3D12RootSignature *root_signature)
+{
+    COMMAND_LIST_PROFILED_CALL(SetGraphicsRootSignature, iface, root_signature);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable_profiled(d3d12_command_list_iface *iface,
+        UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor)
+{
+    COMMAND_LIST_PROFILED_CALL(SetComputeRootDescriptorTable, iface, root_parameter_index, base_descriptor);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable_profiled(d3d12_command_list_iface *iface,
+        UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor)
+{
+    COMMAND_LIST_PROFILED_CALL(SetGraphicsRootDescriptorTable, iface, root_parameter_index, base_descriptor);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant_profiled(d3d12_command_list_iface *iface,
+        UINT root_parameter_index, UINT data, UINT dst_offset)
+{
+    COMMAND_LIST_PROFILED_CALL(SetComputeRoot32BitConstant, iface, root_parameter_index, data, dst_offset);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant_profiled(d3d12_command_list_iface *iface,
+        UINT root_parameter_index, UINT data, UINT dst_offset)
+{
+    COMMAND_LIST_PROFILED_CALL(SetGraphicsRoot32BitConstant, iface, root_parameter_index, data, dst_offset);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants_profiled(d3d12_command_list_iface *iface,
+        UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset)
+{
+    COMMAND_LIST_PROFILED_CALL(SetComputeRoot32BitConstants, iface, root_parameter_index, constant_count, data, dst_offset);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants_profiled(d3d12_command_list_iface *iface,
+        UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset)
+{
+    COMMAND_LIST_PROFILED_CALL(SetGraphicsRoot32BitConstants, iface, root_parameter_index, constant_count, data, dst_offset);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView_profiled(
+        d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
+{
+    COMMAND_LIST_PROFILED_CALL(SetComputeRootConstantBufferView, iface, root_parameter_index, address);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView_profiled(
+        d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
+{
+    COMMAND_LIST_PROFILED_CALL(SetGraphicsRootConstantBufferView, iface, root_parameter_index, address);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView_profiled(
+        d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
+{
+    COMMAND_LIST_PROFILED_CALL(SetComputeRootShaderResourceView, iface, root_parameter_index, address);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView_profiled(
+        d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
+{
+    COMMAND_LIST_PROFILED_CALL(SetGraphicsRootShaderResourceView, iface, root_parameter_index, address);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView_profiled(
+        d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
+{
+    COMMAND_LIST_PROFILED_CALL(SetComputeRootUnorderedAccessView, iface, root_parameter_index, address);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView_profiled(
+        d3d12_command_list_iface *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address)
+{
+    COMMAND_LIST_PROFILED_CALL(SetGraphicsRootUnorderedAccessView, iface, root_parameter_index, address);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer_profiled(d3d12_command_list_iface *iface,
+        const D3D12_INDEX_BUFFER_VIEW *view)
+{
+    COMMAND_LIST_PROFILED_CALL(IASetIndexBuffer, iface, view);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers_profiled(d3d12_command_list_iface *iface,
+        UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views)
+{
+    COMMAND_LIST_PROFILED_CALL(IASetVertexBuffers, iface, start_slot, view_count, views);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets_profiled(d3d12_command_list_iface *iface,
+        UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views)
+{
+    COMMAND_LIST_PROFILED_CALL(SOSetTargets, iface, start_slot, view_count, views);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets_profiled(d3d12_command_list_iface *iface,
+        UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors,
+        BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor)
+{
+    COMMAND_LIST_PROFILED_CALL(OMSetRenderTargets, iface, render_target_descriptor_count, render_target_descriptors,
+            single_descriptor_handle, depth_stencil_descriptor);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView_profiled(d3d12_command_list_iface *iface,
+        D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil,
+        UINT rect_count, const D3D12_RECT *rects)
+{
+    COMMAND_LIST_PROFILED_CALL(ClearDepthStencilView, iface, dsv, flags, depth, stencil, rect_count, rects);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView_profiled(d3d12_command_list_iface *iface,
+        D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects)
+{
+    COMMAND_LIST_PROFILED_CALL(ClearRenderTargetView, iface, rtv, color, rect_count, rects);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint_profiled(d3d12_command_list_iface *iface,
+        D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource,
+        const UINT values[4], UINT rect_count, const D3D12_RECT *rects)
+{
+    COMMAND_LIST_PROFILED_CALL(ClearUnorderedAccessViewUint, iface, gpu_handle, cpu_handle, resource, values, rect_count, rects);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat_profiled(d3d12_command_list_iface *iface,
+        D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource,
+        const float values[4], UINT rect_count, const D3D12_RECT *rects)
+{
+    COMMAND_LIST_PROFILED_CALL(ClearUnorderedAccessViewFloat, iface, gpu_handle, cpu_handle, resource, values, rect_count, rects);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource_profiled(d3d12_command_list_iface *iface,
+        ID3D12Resource *resource, const D3D12_DISCARD_REGION *region)
+{
+    COMMAND_LIST_PROFILED_CALL(DiscardResource, iface, resource, region);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery_profiled(d3d12_command_list_iface *iface,
+        ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index)
+{
+    COMMAND_LIST_PROFILED_CALL(BeginQuery, iface, heap, type, index);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_EndQuery_profiled(d3d12_command_list_iface *iface,
+        ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index)
+{
+    COMMAND_LIST_PROFILED_CALL(EndQuery, iface, heap, type, index);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData_profiled(d3d12_command_list_iface *iface,
+        ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count,
+        ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset)
+{
+    COMMAND_LIST_PROFILED_CALL(ResolveQueryData, iface, heap, type, start_index, query_count, dst_buffer, aligned_dst_buffer_offset);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetPredication_profiled(d3d12_command_list_iface *iface,
+        ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation)
+{
+    COMMAND_LIST_PROFILED_CALL(SetPredication, iface, buffer, aligned_buffer_offset, operation);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetMarker_profiled(d3d12_command_list_iface *iface,
+        UINT metadata, const void *data, UINT size)
+{
+    COMMAND_LIST_PROFILED_CALL(SetMarker, iface, metadata, data, size);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent_profiled(d3d12_command_list_iface *iface,
+        UINT metadata, const void *data, UINT size)
+{
+    COMMAND_LIST_PROFILED_CALL(BeginEvent, iface, metadata, data, size);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_EndEvent_profiled(d3d12_command_list_iface *iface)
+{
+    COMMAND_LIST_PROFILED_CALL(EndEvent, iface);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect_profiled(d3d12_command_list_iface *iface,
+        ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer,
+        UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset)
+{
+    COMMAND_LIST_PROFILED_CALL(ExecuteIndirect, iface, command_signature, max_command_count, arg_buffer, arg_buffer_offset, count_buffer, count_buffer_offset);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT_profiled(d3d12_command_list_iface *iface,
+        ID3D12Resource *dst_buffer, UINT64 dst_offset,
+        ID3D12Resource *src_buffer, UINT64 src_offset,
+        UINT dependent_resource_count, ID3D12Resource * const *dependent_resources,
+        const D3D12_SUBRESOURCE_RANGE_UINT64 *dependent_sub_resource_ranges)
+{
+    COMMAND_LIST_PROFILED_CALL(AtomicCopyBufferUINT, iface, dst_buffer, dst_offset, src_buffer, src_offset,
+            dependent_resource_count, dependent_resources, dependent_sub_resource_ranges);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64_profiled(d3d12_command_list_iface *iface,
+        ID3D12Resource *dst_buffer, UINT64 dst_offset,
+        ID3D12Resource *src_buffer, UINT64 src_offset,
+        UINT dependent_resource_count, ID3D12Resource * const *dependent_resources,
+        const D3D12_SUBRESOURCE_RANGE_UINT64 *dependent_sub_resource_ranges)
+{
+    COMMAND_LIST_PROFILED_CALL(AtomicCopyBufferUINT64, iface, dst_buffer, dst_offset,
+            src_buffer, src_offset,
+            dependent_resource_count, dependent_resources,
+            dependent_sub_resource_ranges);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds_profiled(d3d12_command_list_iface *iface,
+        FLOAT min, FLOAT max)
+{
+    COMMAND_LIST_PROFILED_CALL(OMSetDepthBounds, iface, min, max);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions_profiled(d3d12_command_list_iface *iface,
+        UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions)
+{
+    COMMAND_LIST_PROFILED_CALL(SetSamplePositions, iface, sample_count, pixel_count, sample_positions);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion_profiled(d3d12_command_list_iface *iface,
+        ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y,
+        ID3D12Resource *src_resource, UINT src_sub_resource_idx,
+        D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode)
+{
+    COMMAND_LIST_PROFILED_CALL(ResolveSubresourceRegion, iface, dst_resource, dst_sub_resource_idx,
+            dst_x, dst_y, src_resource, src_sub_resource_idx,
+            src_rect, format, mode);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask_profiled(d3d12_command_list_iface *iface, UINT mask)
+{
+    COMMAND_LIST_PROFILED_CALL(SetViewInstanceMask, iface, mask);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate_profiled(d3d12_command_list_iface *iface,
+        UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters,
+        const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes)
+{
+    COMMAND_LIST_PROFILED_CALL(WriteBufferImmediate, iface, count, parameters, modes);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession_profiled(d3d12_command_list_iface *iface,
+        ID3D12ProtectedResourceSession *protected_session)
+{
+    COMMAND_LIST_PROFILED_CALL(SetProtectedResourceSession, iface, protected_session);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass_profiled(d3d12_command_list_iface *iface,
+        UINT rt_count, const D3D12_RENDER_PASS_RENDER_TARGET_DESC *render_targets,
+        const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC *depth_stencil, D3D12_RENDER_PASS_FLAGS flags)
+{
+    COMMAND_LIST_PROFILED_CALL(BeginRenderPass, iface, rt_count, render_targets, depth_stencil, flags);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_EndRenderPass_profiled(d3d12_command_list_iface *iface)
+{
+    COMMAND_LIST_PROFILED_CALL(EndRenderPass, iface);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_InitializeMetaCommand_profiled(d3d12_command_list_iface *iface,
+        ID3D12MetaCommand *meta_command, const void *parameter_data, SIZE_T parameter_size)
+{
+    COMMAND_LIST_PROFILED_CALL(InitializeMetaCommand, iface, meta_command, parameter_data, parameter_size);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteMetaCommand_profiled(d3d12_command_list_iface *iface,
+        ID3D12MetaCommand *meta_command, const void *parameter_data, SIZE_T parameter_size)
+{
+    COMMAND_LIST_PROFILED_CALL(ExecuteMetaCommand, iface, meta_command, parameter_data, parameter_size);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStructure_profiled(d3d12_command_list_iface *iface,
+        const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *desc, UINT num_postbuild_info_descs,
+        const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *postbuild_info_descs)
+{
+    COMMAND_LIST_PROFILED_CALL(BuildRaytracingAccelerationStructure, iface, desc, num_postbuild_info_descs,
+            postbuild_info_descs);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo_profiled(d3d12_command_list_iface *iface,
+        const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc, UINT num_acceleration_structures,
+        const D3D12_GPU_VIRTUAL_ADDRESS *src_data)
+{
+    COMMAND_LIST_PROFILED_CALL(EmitRaytracingAccelerationStructurePostbuildInfo, iface, desc, num_acceleration_structures, src_data);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_CopyRaytracingAccelerationStructure_profiled(d3d12_command_list_iface *iface,
+        D3D12_GPU_VIRTUAL_ADDRESS dst_data, D3D12_GPU_VIRTUAL_ADDRESS src_data,
+        D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE mode)
+{
+    COMMAND_LIST_PROFILED_CALL(CopyRaytracingAccelerationStructure, iface, dst_data, src_data, mode);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1_profiled(d3d12_command_list_iface *iface,
+        ID3D12StateObject *state_object)
+{
+    COMMAND_LIST_PROFILED_CALL(SetPipelineState1, iface, state_object);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_DispatchRays_profiled(d3d12_command_list_iface *iface,
+        const D3D12_DISPATCH_RAYS_DESC *desc)
+{
+    COMMAND_LIST_PROFILED_CALL(DispatchRays, iface, desc);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRate_profiled(d3d12_command_list_iface *iface,
+        D3D12_SHADING_RATE base, const D3D12_SHADING_RATE_COMBINER *combiners)
+{
+    COMMAND_LIST_PROFILED_CALL(RSSetShadingRate, iface, base, combiners);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage_profiled(d3d12_command_list_iface *iface,
+        ID3D12Resource *image)
+{
+    COMMAND_LIST_PROFILED_CALL(RSSetShadingRateImage, iface, image);
+}
+
+static void STDMETHODCALLTYPE d3d12_command_list_DispatchMesh_profiled(d3d12_command_list_iface *iface, UINT x, UINT y, UINT z)
+{
+    COMMAND_LIST_PROFILED_CALL(DispatchMesh, iface, x, y, z);
+}
+
+static CONST_VTBL struct ID3D12GraphicsCommandList6Vtbl d3d12_command_list_vtbl_profiled =
+{
+    /* IUnknown methods */
+    d3d12_command_list_QueryInterface,
+    d3d12_command_list_AddRef,
+    d3d12_command_list_Release,
+    /* ID3D12Object methods */
+    d3d12_command_list_GetPrivateData,
+    d3d12_command_list_SetPrivateData,
+    d3d12_command_list_SetPrivateDataInterface,
+    (void *)d3d12_object_SetName,
+    /* ID3D12DeviceChild methods */
+    d3d12_command_list_GetDevice,
+    /* ID3D12CommandList methods */
+    d3d12_command_list_GetType,
+    /* ID3D12GraphicsCommandList methods */
+    d3d12_command_list_Close,
+    d3d12_command_list_Reset,
+    d3d12_command_list_ClearState,
+    d3d12_command_list_DrawInstanced_profiled,
+    d3d12_command_list_DrawIndexedInstanced_profiled,
+    d3d12_command_list_Dispatch_profiled,
+    d3d12_command_list_CopyBufferRegion_profiled,
+    d3d12_command_list_CopyTextureRegion_profiled,
+    d3d12_command_list_CopyResource_profiled,
+    d3d12_command_list_CopyTiles_profiled,
+    d3d12_command_list_ResolveSubresource_profiled,
+    d3d12_command_list_IASetPrimitiveTopology_profiled,
+    d3d12_command_list_RSSetViewports_profiled,
+    d3d12_command_list_RSSetScissorRects_profiled,
+    d3d12_command_list_OMSetBlendFactor_profiled,
+    d3d12_command_list_OMSetStencilRef_profiled,
+    d3d12_command_list_SetPipelineState_profiled,
+    d3d12_command_list_ResourceBarrier_profiled,
+    d3d12_command_list_ExecuteBundle_profiled,
+    d3d12_command_list_SetDescriptorHeaps_profiled,
+    d3d12_command_list_SetComputeRootSignature_profiled,
+    d3d12_command_list_SetGraphicsRootSignature_profiled,
+    d3d12_command_list_SetComputeRootDescriptorTable_profiled,
+    d3d12_command_list_SetGraphicsRootDescriptorTable_profiled,
+    d3d12_command_list_SetComputeRoot32BitConstant_profiled,
+    d3d12_command_list_SetGraphicsRoot32BitConstant_profiled,
+    d3d12_command_list_SetComputeRoot32BitConstants_profiled,
+    d3d12_command_list_SetGraphicsRoot32BitConstants_profiled,
+    d3d12_command_list_SetComputeRootConstantBufferView_profiled,
+    d3d12_command_list_SetGraphicsRootConstantBufferView_profiled,
+    d3d12_command_list_SetComputeRootShaderResourceView_profiled,
+    d3d12_command_list_SetGraphicsRootShaderResourceView_profiled,
+    d3d12_command_list_SetComputeRootUnorderedAccessView_profiled,
+    d3d12_command_list_SetGraphicsRootUnorderedAccessView_profiled,
+    d3d12_command_list_IASetIndexBuffer_profiled,
+    d3d12_command_list_IASetVertexBuffers_profiled,
+    d3d12_command_list_SOSetTargets_profiled,
+    d3d12_command_list_OMSetRenderTargets_profiled,
+    d3d12_command_list_ClearDepthStencilView_profiled,
+    d3d12_command_list_ClearRenderTargetView_profiled,
+    d3d12_command_list_ClearUnorderedAccessViewUint_profiled,
+    d3d12_command_list_ClearUnorderedAccessViewFloat_profiled,
+    d3d12_command_list_DiscardResource_profiled,
+    d3d12_command_list_BeginQuery_profiled,
+    d3d12_command_list_EndQuery_profiled,
+    d3d12_command_list_ResolveQueryData_profiled,
+    d3d12_command_list_SetPredication_profiled,
+    d3d12_command_list_SetMarker_profiled,
+    d3d12_command_list_BeginEvent_profiled,
+    d3d12_command_list_EndEvent_profiled,
+    d3d12_command_list_ExecuteIndirect_profiled,
+    /* ID3D12GraphicsCommandList1 methods */
+    d3d12_command_list_AtomicCopyBufferUINT_profiled,
+    d3d12_command_list_AtomicCopyBufferUINT64_profiled,
+    d3d12_command_list_OMSetDepthBounds_profiled,
+    d3d12_command_list_SetSamplePositions_profiled,
+    d3d12_command_list_ResolveSubresourceRegion_profiled,
+    d3d12_command_list_SetViewInstanceMask_profiled,
+    /* ID3D12GraphicsCommandList2 methods */
+    d3d12_command_list_WriteBufferImmediate_profiled,
+    /* ID3D12GraphicsCommandList3 methods */
+    d3d12_command_list_SetProtectedResourceSession_profiled,
+    /* ID3D12GraphicsCommandList4 methods */
+    d3d12_command_list_BeginRenderPass_profiled,
+    d3d12_command_list_EndRenderPass_profiled,
+    d3d12_command_list_InitializeMetaCommand_profiled,
+    d3d12_command_list_ExecuteMetaCommand_profiled,
+    d3d12_command_list_BuildRaytracingAccelerationStructure_profiled,
+    d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo_profiled,
+    d3d12_command_list_CopyRaytracingAccelerationStructure_profiled,
+    d3d12_command_list_SetPipelineState1_profiled,
+    d3d12_command_list_DispatchRays_profiled,
+    /* ID3D12GraphicsCommandList5 methods */
+    d3d12_command_list_RSSetShadingRate_profiled,
+    d3d12_command_list_RSSetShadingRateImage_profiled,
+    /* ID3D12GraphicsCommandList6 methods */
+    d3d12_command_list_DispatchMesh_profiled,
+};
+
+#endif
--- a/libs/vkd3d/command_list_vkd3d_ext.c
+++ b/libs/vkd3d/command_list_vkd3d_ext.c
@ -0,0 +1,116 @@
+/*
+ * * Copyright 2021 NVIDIA Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
+#include "vkd3d_private.h"
+
+static inline struct d3d12_command_list *d3d12_command_list_from_ID3D12GraphicsCommandListExt(ID3D12GraphicsCommandListExt *iface)
+{
+    return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandListExt_iface);
+}
+
+extern ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(d3d12_command_list_iface *iface);
+
+ULONG STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_AddRef(ID3D12GraphicsCommandListExt *iface)
+{
+    struct d3d12_command_list *command_list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
+    return d3d12_command_list_AddRef(&command_list->ID3D12GraphicsCommandList_iface);
+}
+
+extern ULONG STDMETHODCALLTYPE d3d12_command_list_Release(d3d12_command_list_iface *iface);
+
+static ULONG STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_Release(ID3D12GraphicsCommandListExt *iface)
+{
+    struct d3d12_command_list *command_list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
+    return d3d12_command_list_Release(&command_list->ID3D12GraphicsCommandList_iface);
+}
+
+extern HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(d3d12_command_list_iface *iface,
+        REFIID iid, void **object);
+
+static HRESULT STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_QueryInterface(ID3D12GraphicsCommandListExt *iface,
+        REFIID iid, void **out)
+{
+    struct d3d12_command_list *command_list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
+    TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
+    return d3d12_command_list_QueryInterface(&command_list->ID3D12GraphicsCommandList_iface, iid, out);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_GetVulkanHandle(ID3D12GraphicsCommandListExt *iface,
+        VkCommandBuffer *pVkCommandBuffer)
+{
+    struct d3d12_command_list *command_list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
+    TRACE("iface %p, pVkCommandBuffer %p.\n", iface, pVkCommandBuffer);
+    if (!pVkCommandBuffer)
+        return E_INVALIDARG;
+
+    *pVkCommandBuffer = command_list->vk_command_buffer;
+    return S_OK;
+}
+
+#define CU_LAUNCH_PARAM_BUFFER_POINTER (const void*)0x01
+#define CU_LAUNCH_PARAM_BUFFER_SIZE    (const void*)0x02
+#define CU_LAUNCH_PARAM_END            (const void*)0x00
+
+static HRESULT STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_LaunchCubinShader(ID3D12GraphicsCommandListExt *iface, D3D12_CUBIN_DATA_HANDLE *handle, UINT32 block_x, UINT32 block_y, UINT32 block_z, const void *params, UINT32 param_size)
+{
+    VkCuLaunchInfoNVX launchInfo = { VK_STRUCTURE_TYPE_CU_LAUNCH_INFO_NVX };
+    const struct vkd3d_vk_device_procs *vk_procs;
+
+    const void *config[] = {
+        CU_LAUNCH_PARAM_BUFFER_POINTER, params,
+        CU_LAUNCH_PARAM_BUFFER_SIZE,    &param_size,
+        CU_LAUNCH_PARAM_END
+    };
+
+    struct d3d12_command_list *command_list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
+    TRACE("iface %p, handle %p, block_x %u,  block_y %u, block_z %u, params %p, param_size %u \n", iface, handle, block_x, block_y, block_z, params, param_size);
+    if (!handle || !block_x || !block_y || !block_z || !params || !param_size)
+        return E_INVALIDARG;
+
+    launchInfo.function = handle->vkCuFunction;
+    launchInfo.gridDimX = block_x;
+    launchInfo.gridDimY = block_y;
+    launchInfo.gridDimZ = block_z;
+    launchInfo.blockDimX = handle->blockX;
+    launchInfo.blockDimY = handle->blockY;
+    launchInfo.blockDimZ = handle->blockZ;
+    launchInfo.sharedMemBytes = 0;
+    launchInfo.paramCount = 0;
+    launchInfo.pParams = NULL;
+    launchInfo.extraCount = 1;
+    launchInfo.pExtras = config;
+    
+    vk_procs = &command_list->device->vk_procs;
+    VK_CALL(vkCmdCuLaunchKernelNVX(command_list->vk_command_buffer, &launchInfo));
+    return S_OK;
+}
+
+CONST_VTBL struct ID3D12GraphicsCommandListExtVtbl d3d12_command_list_vkd3d_ext_vtbl =
+{
+    /* IUnknown methods */
+    d3d12_command_list_vkd3d_ext_QueryInterface,
+    d3d12_command_list_vkd3d_ext_AddRef,
+    d3d12_command_list_vkd3d_ext_Release,
+
+    /* ID3D12GraphicsCommandListExt methods */
+    d3d12_command_list_vkd3d_ext_GetVulkanHandle,
+    d3d12_command_list_vkd3d_ext_LaunchCubinShader
+};
+
--- a/libs/vkd3d/debug_ring.c
+++ b/libs/vkd3d/debug_ring.c
@ -0,0 +1,531 @@
+/*
+ * Copyright 2020 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
+#include "vkd3d_private.h"
+#include "vkd3d_debug.h"
+#include "vkd3d_common.h"
+#include "vkd3d_platform.h"
+#include <stdio.h>
+
+void vkd3d_shader_debug_ring_init_spec_constant(struct d3d12_device *device,
+        struct vkd3d_shader_debug_ring_spec_info *info, vkd3d_shader_hash_t hash)
+{
+    info->spec_info.pData = &info->constants;
+    info->spec_info.dataSize = sizeof(info->constants);
+    info->spec_info.pMapEntries = info->map_entries;
+    info->spec_info.mapEntryCount = 4;
+
+    info->constants.hash = hash;
+    info->constants.host_bda = device->debug_ring.ring_device_address;
+    info->constants.atomic_bda = device->debug_ring.atomic_device_address;
+    info->constants.ring_words = device->debug_ring.ring_size / sizeof(uint32_t);
+
+    info->map_entries[0].constantID = 0;
+    info->map_entries[0].offset = offsetof(struct vkd3d_shader_debug_ring_spec_constants, hash);
+    info->map_entries[0].size = sizeof(uint64_t);
+
+    info->map_entries[1].constantID = 1;
+    info->map_entries[1].offset = offsetof(struct vkd3d_shader_debug_ring_spec_constants, atomic_bda);
+    info->map_entries[1].size = sizeof(uint64_t);
+
+    info->map_entries[2].constantID = 2;
+    info->map_entries[2].offset = offsetof(struct vkd3d_shader_debug_ring_spec_constants, host_bda);
+    info->map_entries[2].size = sizeof(uint64_t);
+
+    info->map_entries[3].constantID = 3;
+    info->map_entries[3].offset = offsetof(struct vkd3d_shader_debug_ring_spec_constants, ring_words);
+    info->map_entries[3].size = sizeof(uint32_t);
+}
+
+#define READ_RING_WORD(off) ring->mapped_ring[(off) & ((ring->ring_size / sizeof(uint32_t)) - 1)]
+#define READ_RING_WORD_ACQUIRE(off) \
+    vkd3d_atomic_uint32_load_explicit(&ring->mapped_ring[(off) & ((ring->ring_size / sizeof(uint32_t)) - 1)], \
+    vkd3d_memory_order_acquire)
+#define DEBUG_CHANNEL_WORD_COOKIE 0xdeadca70u
+#define DEBUG_CHANNEL_WORD_MASK 0xfffffff0u
+
+static const char *vkd3d_patch_command_token_str(enum vkd3d_patch_command_token token)
+{
+    switch (token)
+    {
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32: return "RootConst";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO: return "IBO VA LO";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI: return "IBO VA HI";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_SIZE: return "IBO Size";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_FORMAT: return "IBO Type";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO: return "VBO VA LO";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI: return "VBO VA HI";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_SIZE: return "VBO Size";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_STRIDE: return "VBO Stride";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO: return "ROOT VA LO";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI: return "ROOT VA HI";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT: return "Vertex Count";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT: return "Index Count";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT: return "Instance Count";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX: return "First Index";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX: return "First Vertex";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE: return "First Instance";
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET: return "Vertex Offset";
+        default: return "???";
+    }
+}
+
+static bool vkd3d_patch_command_token_is_hex(enum vkd3d_patch_command_token token)
+{
+    switch (token)
+    {
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO:
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI:
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO:
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI:
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO:
+        case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI:
+            return true;
+
+        default:
+            return false;
+    }
+}
+
+static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring *ring,
+        uint32_t word_offset, uint32_t message_word_count)
+{
+    uint32_t i, debug_instance, debug_thread_id[3], fmt;
+    char message_buffer[4096];
+    uint64_t shader_hash;
+    size_t len, avail;
+
+    if (message_word_count < 8)
+    {
+        ERR("Message word count %u is invalid.\n", message_word_count);
+        return false;
+    }
+
+    shader_hash = (uint64_t)READ_RING_WORD(word_offset + 1) | ((uint64_t)READ_RING_WORD(word_offset + 2) << 32);
+    debug_instance = READ_RING_WORD(word_offset + 3);
+    for (i = 0; i < 3; i++)
+        debug_thread_id[i] = READ_RING_WORD(word_offset + 4 + i);
+    fmt = READ_RING_WORD(word_offset + 7);
+
+    word_offset += 8;
+    message_word_count -= 8;
+
+    if (shader_hash == 0)
+    {
+        /* We got this from our internal debug shaders. Pretty-print.
+         * Make sure the log is sortable for easier debug.
+         * TODO: Might consider a callback system that listeners from different subsystems can listen to and print their own messages,
+         * but that is overengineering at this time ... */
+        snprintf(message_buffer, sizeof(message_buffer), "ExecuteIndirect: GlobalCommandIndex %010u, Debug tag %010u, DrawID %04u (ThreadID %04u): ",
+                debug_instance, debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
+
+        if (message_word_count == 2)
+        {
+            len = strlen(message_buffer);
+            avail = sizeof(message_buffer) - len;
+            snprintf(message_buffer + len, avail, "DrawCount %u, MaxDrawCount %u",
+                    READ_RING_WORD(word_offset + 0),
+                    READ_RING_WORD(word_offset + 1));
+        }
+        else if (message_word_count == 4)
+        {
+            union { uint32_t u32; float f32; int32_t s32; } value;
+            enum vkd3d_patch_command_token token;
+            uint32_t dst_offset;
+            uint32_t src_offset;
+
+            len = strlen(message_buffer);
+            avail = sizeof(message_buffer) - len;
+
+            token = READ_RING_WORD(word_offset + 0);
+            dst_offset = READ_RING_WORD(word_offset + 1);
+            src_offset = READ_RING_WORD(word_offset + 2);
+            value.u32 = READ_RING_WORD(word_offset + 3);
+
+            if (vkd3d_patch_command_token_is_hex(token))
+            {
+                snprintf(message_buffer + len, avail, "%s <- #%08x",
+                        vkd3d_patch_command_token_str(token), value.u32);
+            }
+            else if (token == VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32)
+            {
+                snprintf(message_buffer + len, avail, "%s <- {hex #%08x, s32 %d, f32 %f}",
+                        vkd3d_patch_command_token_str(token), value.u32, value.s32, value.f32);
+            }
+            else
+            {
+                snprintf(message_buffer + len, avail, "%s <- %d",
+                        vkd3d_patch_command_token_str(token), value.s32);
+            }
+
+            len = strlen(message_buffer);
+            avail = sizeof(message_buffer) - len;
+            snprintf(message_buffer + len, avail, " (dst offset %u, src offset %u)", dst_offset, src_offset);
+        }
+    }
+    else
+    {
+        snprintf(message_buffer, sizeof(message_buffer), "Shader: %"PRIx64": Instance %010u, ID (%u, %u, %u):",
+                shader_hash, debug_instance,
+                debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
+
+        for (i = 0; i < message_word_count; i++)
+        {
+            union
+            {
+                float f32;
+                uint32_t u32;
+                int32_t i32;
+            } u;
+            const char *delim;
+            u.u32 = READ_RING_WORD(word_offset + i);
+
+            len = strlen(message_buffer);
+            if (len + 1 >= sizeof(message_buffer))
+                break;
+            avail = sizeof(message_buffer) - len;
+
+            delim = i == 0 ? " " : ", ";
+
+#define VKD3D_DEBUG_CHANNEL_FMT_HEX 0u
+#define VKD3D_DEBUG_CHANNEL_FMT_I32 1u
+#define VKD3D_DEBUG_CHANNEL_FMT_F32 2u
+            switch ((fmt >> (2u * i)) & 3u)
+            {
+                case VKD3D_DEBUG_CHANNEL_FMT_HEX:
+                    snprintf(message_buffer + len, avail, "%s#%x", delim, u.u32);
+                    break;
+
+                case VKD3D_DEBUG_CHANNEL_FMT_I32:
+                    snprintf(message_buffer + len, avail, "%s%d", delim, u.i32);
+                    break;
+
+                case VKD3D_DEBUG_CHANNEL_FMT_F32:
+                    snprintf(message_buffer + len, avail, "%s%f", delim, u.f32);
+                    break;
+
+                default:
+                    snprintf(message_buffer + len, avail, "%s????", delim);
+                    break;
+            }
+        }
+    }
+
+    INFO("%s\n", message_buffer);
+    return true;
+}
+
+void *vkd3d_shader_debug_ring_thread_main(void *arg)
+{
+    uint32_t last_counter, new_counter, count, i, cookie_word_count;
+    volatile const uint32_t *ring_counter; /* Atomic updated by the GPU. */
+    struct vkd3d_shader_debug_ring *ring;
+    struct d3d12_device *device = arg;
+    bool is_active = true;
+    uint32_t *ring_base;
+    uint32_t word_count;
+    size_t ring_mask;
+
+    ring = &device->debug_ring;
+    ring_mask = (ring->ring_size / sizeof(uint32_t)) - 1;
+    ring_counter = ring->mapped_control_block;
+    ring_base = ring->mapped_ring;
+    last_counter = 0;
+
+    vkd3d_set_thread_name("debug-ring");
+
+    while (is_active)
+    {
+        pthread_mutex_lock(&ring->ring_lock);
+        if (ring->active)
+            pthread_cond_wait(&ring->ring_cond, &ring->ring_lock);
+        is_active = ring->active;
+        pthread_mutex_unlock(&ring->ring_lock);
+
+        new_counter = *ring_counter;
+
+        if (last_counter != new_counter)
+        {
+            count = (new_counter - last_counter) & ring_mask;
+
+            /* Assume that each iteration can safely use 1/4th of the buffer to avoid WAR hazards. */
+            if (count > (ring->ring_size / 16))
+            {
+                ERR("Debug ring is probably too small (%u new words this iteration), increase size to avoid risk of dropping messages.\n",
+                    count);
+            }
+
+            for (i = 0; i < count; )
+            {
+                /* The debug ring shader has "release" semantics for the word count write,
+                 * so just make sure the reads don't get reordered here. */
+                cookie_word_count = READ_RING_WORD_ACQUIRE(last_counter + i);
+                word_count = cookie_word_count & ~DEBUG_CHANNEL_WORD_MASK;
+
+                if (cookie_word_count == 0)
+                {
+                    ERR("Message was allocated, but write did not complete. last_counter = %u, rewrite new_counter = %u -> %u\n",
+                            last_counter, new_counter, last_counter + i);
+                    /* Rewind the counter, and try again later. */
+                    new_counter = last_counter + i;
+                    break;
+                }
+
+                /* If something is written here, it must be a cookie. */
+                if ((cookie_word_count & DEBUG_CHANNEL_WORD_MASK) != DEBUG_CHANNEL_WORD_COOKIE)
+                {
+                    ERR("Invalid message work cookie detected, 0x%x.\n", cookie_word_count);
+                    break;
+                }
+
+                if (i + word_count > count)
+                {
+                    ERR("Message word count %u is out of bounds (i = %u, count = %u).\n",
+                            word_count, i, count);
+                    break;
+                }
+
+                if (!vkd3d_shader_debug_ring_print_message(ring, last_counter + i, word_count))
+                    break;
+
+                i += word_count;
+            }
+        }
+
+        /* Make sure to clear out any messages we read so that when the ring gets around to
+         * this point again, we can detect unwritten memory.
+         * This relies on having a ring that is large enough, but in practice, if we just make the ring
+         * large enough, there is nothing to worry about. */
+        while (last_counter != new_counter)
+        {
+            ring_base[last_counter & ring_mask] = 0;
+            last_counter++;
+        }
+    }
+
+    if (ring->device_lost)
+    {
+        INFO("Device lost detected, attempting to fish for clues.\n");
+        new_counter = *ring_counter;
+        if (last_counter != new_counter)
+        {
+            count = (new_counter - last_counter) & ring_mask;
+            for (i = 0; i < count; )
+            {
+                cookie_word_count = READ_RING_WORD_ACQUIRE(last_counter + i);
+                word_count = cookie_word_count & ~DEBUG_CHANNEL_WORD_MASK;
+
+                /* This is considered a message if it has the marker and a word count that is in-range. */
+                if ((cookie_word_count & DEBUG_CHANNEL_WORD_MASK) == DEBUG_CHANNEL_WORD_COOKIE &&
+                        i + word_count <= count &&
+                        vkd3d_shader_debug_ring_print_message(ring, last_counter + i, word_count))
+                {
+                    i += word_count;
+                }
+                else
+                {
+                    /* Keep going. */
+                    i++;
+                }
+            }
+        }
+        INFO("Done fishing for clues ...\n");
+    }
+
+    return NULL;
+}
+
+HRESULT vkd3d_shader_debug_ring_init(struct vkd3d_shader_debug_ring *ring,
+                                     struct d3d12_device *device)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+    D3D12_HEAP_PROPERTIES heap_properties;
+    D3D12_RESOURCE_DESC1 resource_desc;
+    VkMemoryPropertyFlags memory_props;
+    char env[VKD3D_PATH_MAX];
+
+    memset(ring, 0, sizeof(*ring));
+
+    if (!vkd3d_get_env_var("VKD3D_SHADER_DEBUG_RING_SIZE_LOG2", env, sizeof(env)))
+        return S_OK;
+
+    ring->active = true;
+
+    ring->ring_size = (size_t)1 << strtoul(env, NULL, 0);
+    ring->control_block_size = 4096;
+
+    INFO("Enabling shader debug ring of size: %zu.\n", ring->ring_size);
+
+    if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
+    {
+        ERR("Buffer device address must be supported to use VKD3D_SHADER_DEBUG_RING feature.\n");
+        return E_INVALIDARG;
+    }
+
+    memset(&heap_properties, 0, sizeof(heap_properties));
+    heap_properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
+    heap_properties.Type = D3D12_HEAP_TYPE_CUSTOM;
+    heap_properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
+
+    memset(&resource_desc, 0, sizeof(resource_desc));
+    resource_desc.Width = ring->ring_size;
+    resource_desc.Height = 1;
+    resource_desc.DepthOrArraySize = 1;
+    resource_desc.MipLevels = 1;
+    resource_desc.Format = DXGI_FORMAT_UNKNOWN;
+    resource_desc.SampleDesc.Count = 1;
+    resource_desc.SampleDesc.Quality = 0;
+    resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+    resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+
+    if (FAILED(vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
+            &resource_desc, &ring->host_buffer)))
+        goto err_free_buffers;
+
+    memory_props = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+            VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
+            VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+
+    /* If we're doing breadcrumb debugging, we also need to be able to read debug ring messages
+     * from a crash, so we cannot rely on being able to copy the device payload back to host.
+     * Use PCI-e BAR + UNCACHED + DEVICE_COHERENT if we must. */
+    if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
+    {
+        INFO("Using debug ring with breadcrumbs, opting in to device uncached payload buffer.\n");
+        /* We use coherent in the debug_channel.h header, but not necessarily guaranteed to be coherent with
+         * host reads, so make extra sure. */
+        if (device->device_info.device_coherent_memory_features_amd.deviceCoherentMemory)
+        {
+            memory_props |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD;
+            INFO("Enabling uncached device memory for debug ring.\n");
+        }
+    }
+
+    if (FAILED(vkd3d_allocate_buffer_memory(device, ring->host_buffer,
+            memory_props, &ring->host_buffer_memory)))
+        goto err_free_buffers;
+
+    resource_desc.Width = ring->control_block_size;
+    memset(&heap_properties, 0, sizeof(heap_properties));
+    heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
+
+    if (FAILED(vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
+            &resource_desc, &ring->device_atomic_buffer)))
+        goto err_free_buffers;
+
+    memory_props = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+            VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+            VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+
+    if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
+    {
+        /* Expect crashes since we won't have time to flush caches.
+         * We use coherent in the debug_channel.h header, but not necessarily guaranteed to be coherent with
+         * host reads, so make extra sure. */
+        if (device->device_info.device_coherent_memory_features_amd.deviceCoherentMemory)
+            memory_props |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD;
+    }
+
+    if (FAILED(vkd3d_allocate_buffer_memory(device, ring->device_atomic_buffer,
+            memory_props, &ring->device_atomic_buffer_memory)))
+        goto err_free_buffers;
+
+    if (VK_CALL(vkMapMemory(device->vk_device, ring->host_buffer_memory.vk_memory,
+            0, VK_WHOLE_SIZE, 0, (void**)&ring->mapped_ring)) != VK_SUCCESS)
+        goto err_free_buffers;
+
+    if (VK_CALL(vkMapMemory(device->vk_device, ring->device_atomic_buffer_memory.vk_memory,
+            0, VK_WHOLE_SIZE, 0, (void**)&ring->mapped_control_block)) != VK_SUCCESS)
+        goto err_free_buffers;
+
+    ring->ring_device_address = vkd3d_get_buffer_device_address(device, ring->host_buffer);
+    ring->atomic_device_address = vkd3d_get_buffer_device_address(device, ring->device_atomic_buffer);
+
+    memset(ring->mapped_control_block, 0, ring->control_block_size);
+    memset(ring->mapped_ring, 0, ring->ring_size);
+
+    if (pthread_mutex_init(&ring->ring_lock, NULL) != 0)
+        goto err_free_buffers;
+    if (pthread_cond_init(&ring->ring_cond, NULL) != 0)
+        goto err_destroy_mutex;
+
+    if (pthread_create(&ring->ring_thread, NULL, vkd3d_shader_debug_ring_thread_main, device) != 0)
+    {
+        ERR("Failed to create ring thread.\n");
+        goto err_destroy_cond;
+    }
+
+    return S_OK;
+
+err_destroy_mutex:
+    pthread_mutex_destroy(&ring->ring_lock);
+err_destroy_cond:
+    pthread_cond_destroy(&ring->ring_cond);
+err_free_buffers:
+    VK_CALL(vkDestroyBuffer(device->vk_device, ring->host_buffer, NULL));
+    VK_CALL(vkDestroyBuffer(device->vk_device, ring->device_atomic_buffer, NULL));
+    vkd3d_free_device_memory(device, &ring->host_buffer_memory);
+    vkd3d_free_device_memory(device, &ring->device_atomic_buffer_memory);
+    memset(ring, 0, sizeof(*ring));
+    return E_OUTOFMEMORY;
+}
+
+void vkd3d_shader_debug_ring_cleanup(struct vkd3d_shader_debug_ring *ring,
+                                     struct d3d12_device *device)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+    if (!ring->active)
+        return;
+
+    pthread_mutex_lock(&ring->ring_lock);
+    ring->active = false;
+    pthread_cond_signal(&ring->ring_cond);
+    pthread_mutex_unlock(&ring->ring_lock);
+    pthread_join(ring->ring_thread, NULL);
+    pthread_mutex_destroy(&ring->ring_lock);
+    pthread_cond_destroy(&ring->ring_cond);
+
+    VK_CALL(vkDestroyBuffer(device->vk_device, ring->host_buffer, NULL));
+    VK_CALL(vkDestroyBuffer(device->vk_device, ring->device_atomic_buffer, NULL));
+    vkd3d_free_device_memory(device, &ring->host_buffer_memory);
+    vkd3d_free_device_memory(device, &ring->device_atomic_buffer_memory);
+}
+
+static pthread_mutex_t debug_ring_teardown_lock = PTHREAD_MUTEX_INITIALIZER;
+
+void vkd3d_shader_debug_ring_kick(struct vkd3d_shader_debug_ring *ring, struct d3d12_device *device, bool device_lost)
+{
+    if (device_lost)
+    {
+        /* Need a global lock here since multiple threads can observe device lost at the same time. */
+        pthread_mutex_lock(&debug_ring_teardown_lock);
+        {
+            ring->device_lost = true;
+            /* We're going to die or hang after this most likely, so make sure we get to see all messages the
+             * GPU had to write. Just cleanup now. */
+            vkd3d_shader_debug_ring_cleanup(ring, device);
+        }
+        pthread_mutex_unlock(&debug_ring_teardown_lock);
+    }
+    else
+    {
+        pthread_cond_signal(&ring->ring_cond);
+    }
+}
--- a/libs/vkd3d/descriptor_debug.c
+++ b/libs/vkd3d/descriptor_debug.c
@ -0,0 +1,530 @@
+/*
+ * Copyright 2020 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
+
+#include "vkd3d_descriptor_debug.h"
+#include "vkd3d_threads.h"
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+static pthread_once_t debug_once = PTHREAD_ONCE_INIT;
+static pthread_mutex_t debug_lock = PTHREAD_MUTEX_INITIALIZER;
+static bool descriptor_debug_active_qa_checks;
+static bool descriptor_debug_active_log;
+static FILE *descriptor_debug_file;
+
+struct vkd3d_descriptor_qa_global_info
+{
+    struct vkd3d_descriptor_qa_global_buffer_data *data;
+    VkDescriptorBufferInfo descriptor;
+    VkBuffer vk_buffer;
+    struct vkd3d_device_memory_allocation device_allocation;
+    unsigned int num_cookies;
+
+    pthread_t ring_thread;
+    pthread_mutex_t ring_lock;
+    pthread_cond_t ring_cond;
+    bool active;
+};
+
+static const char *debug_descriptor_type(vkd3d_descriptor_qa_flags type_flags)
+{
+    bool has_raw_va = !!(type_flags & VKD3D_DESCRIPTOR_QA_TYPE_RAW_VA_BIT);
+
+    switch (type_flags & ~VKD3D_DESCRIPTOR_QA_TYPE_RAW_VA_BIT)
+    {
+        case VKD3D_DESCRIPTOR_QA_TYPE_SAMPLER_BIT: return "SAMPLER";
+        case VKD3D_DESCRIPTOR_QA_TYPE_SAMPLED_IMAGE_BIT: return "SAMPLED_IMAGE";
+        case VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_IMAGE_BIT: return "STORAGE_IMAGE";
+        case VKD3D_DESCRIPTOR_QA_TYPE_UNIFORM_BUFFER_BIT: return "UNIFORM_BUFFER";
+        case VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT: return "STORAGE_BUFFER";
+        case VKD3D_DESCRIPTOR_QA_TYPE_UNIFORM_TEXEL_BUFFER_BIT: return "UNIFORM_TEXEL_BUFFER";
+        case VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_TEXEL_BUFFER_BIT: return "STORAGE_TEXEL_BUFFER";
+
+        case VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_TEXEL_BUFFER_BIT | VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT:
+            return has_raw_va ? "STORAGE_TEXEL_BUFFER / STORAGE_BUFFER (w/ counter)" : "STORAGE_TEXEL_BUFFER / STORAGE_BUFFER";
+
+        case VKD3D_DESCRIPTOR_QA_TYPE_UNIFORM_TEXEL_BUFFER_BIT | VKD3D_DESCRIPTOR_QA_TYPE_STORAGE_BUFFER_BIT:
+            return has_raw_va ? "UNIFORM_TEXEL_BUFFER / STORAGE_BUFFER (w/ counter)" : "UNIFORM_TEXEL_BUFFER / STORAGE_BUFFER";
+
+        case VKD3D_DESCRIPTOR_QA_TYPE_RT_ACCELERATION_STRUCTURE_BIT:
+            return "RTAS";
+
+        case 0:
+            return "NONE";
+
+        default: return "?";
+    }
+}
+
+static void vkd3d_descriptor_debug_init_once(void)
+{
+    char env[VKD3D_PATH_MAX];
+    vkd3d_get_env_var("VKD3D_DESCRIPTOR_QA_LOG", env, sizeof(env));
+
+    if (strlen(env) > 0)
+    {
+        INFO("Enabling VKD3D_DESCRIPTOR_QA_LOG\n");
+        descriptor_debug_file = fopen(env, "w");
+        if (!descriptor_debug_file)
+            ERR("Failed to open file: %s.\n", env);
+        else
+            descriptor_debug_active_log = true;
+    }
+
+    if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_DESCRIPTOR_QA_CHECKS)
+    {
+        INFO("Enabling descriptor QA checks!\n");
+        descriptor_debug_active_qa_checks = true;
+    }
+}
+
+void vkd3d_descriptor_debug_init(void)
+{
+    pthread_once(&debug_once, vkd3d_descriptor_debug_init_once);
+}
+
+bool vkd3d_descriptor_debug_active_log(void)
+{
+    return descriptor_debug_active_log;
+}
+
+bool vkd3d_descriptor_debug_active_qa_checks(void)
+{
+    return descriptor_debug_active_qa_checks;
+}
+
+VkDeviceSize vkd3d_descriptor_debug_heap_info_size(unsigned int num_descriptors)
+{
+    return offsetof(struct vkd3d_descriptor_qa_heap_buffer_data, desc) + num_descriptors *
+            sizeof(struct vkd3d_descriptor_qa_cookie_descriptor);
+}
+
+static void vkd3d_descriptor_debug_set_live_status_bit(
+        struct vkd3d_descriptor_qa_global_info *global_info, uint64_t cookie)
+{
+    if (!global_info || !global_info->active || !global_info->data)
+        return;
+
+    if (cookie < global_info->num_cookies)
+    {
+        vkd3d_atomic_uint32_or(&global_info->data->live_status_table[cookie / 32],
+                1u << (cookie & 31), vkd3d_memory_order_relaxed);
+    }
+    else
+        INFO("Cookie index %"PRIu64" is out of range, cannot be tracked.\n", cookie);
+}
+
+static void vkd3d_descriptor_debug_unset_live_status_bit(
+        struct vkd3d_descriptor_qa_global_info *global_info, uint64_t cookie)
+{
+    if (!global_info || !global_info->active || !global_info->data)
+        return;
+
+    if (cookie < global_info->num_cookies)
+    {
+        vkd3d_atomic_uint32_and(&global_info->data->live_status_table[cookie / 32],
+                ~(1u << (cookie & 31)), vkd3d_memory_order_relaxed);
+    }
+}
+
+static void vkd3d_descriptor_debug_qa_check_report_fault(
+        struct vkd3d_descriptor_qa_global_info *global_info);
+
+static void *vkd3d_descriptor_debug_qa_check_entry(void *userdata)
+{
+    struct vkd3d_descriptor_qa_global_info *global_info = userdata;
+    bool active = true;
+
+    while (active)
+    {
+        /* Don't spin endlessly, this thread is kicked after a successful fence wait. */
+        pthread_mutex_lock(&global_info->ring_lock);
+        if (global_info->active)
+            pthread_cond_wait(&global_info->ring_cond, &global_info->ring_lock);
+        active = global_info->active;
+        pthread_mutex_unlock(&global_info->ring_lock);
+
+        if (global_info->data->fault_type != 0)
+        {
+            vkd3d_descriptor_debug_qa_check_report_fault(global_info);
+            ERR("Num failed checks: %u\n", global_info->data->fault_atomic);
+
+            /* Reset the latch so we can get more reports. */
+            vkd3d_atomic_uint32_store_explicit(&global_info->data->fault_type, 0, vkd3d_memory_order_relaxed);
+            vkd3d_atomic_uint32_store_explicit(&global_info->data->fault_atomic, 0, vkd3d_memory_order_release);
+        }
+    }
+
+    return NULL;
+}
+
+void vkd3d_descriptor_debug_kick_qa_check(struct vkd3d_descriptor_qa_global_info *global_info)
+{
+    if (global_info && global_info->active)
+        pthread_cond_signal(&global_info->ring_cond);
+}
+
+const VkDescriptorBufferInfo *vkd3d_descriptor_debug_get_global_info_descriptor(
+        struct vkd3d_descriptor_qa_global_info *global_info)
+{
+    if (global_info)
+        return &global_info->descriptor;
+    else
+        return NULL;
+}
+
+HRESULT vkd3d_descriptor_debug_alloc_global_info(
+        struct vkd3d_descriptor_qa_global_info **out_global_info, unsigned int num_cookies,
+        struct d3d12_device *device)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+    struct vkd3d_descriptor_qa_global_info *global_info;
+    D3D12_RESOURCE_DESC1 buffer_desc;
+    D3D12_HEAP_PROPERTIES heap_info;
+    D3D12_HEAP_FLAGS heap_flags;
+    VkResult vr;
+    HRESULT hr;
+
+    global_info = vkd3d_calloc(1, sizeof(*global_info));
+    if (!global_info)
+        return E_OUTOFMEMORY;
+
+    memset(&buffer_desc, 0, sizeof(buffer_desc));
+    buffer_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+    buffer_desc.Width = sizeof(uint32_t) * ((num_cookies + 31) / 32) +
+            offsetof(struct vkd3d_descriptor_qa_global_buffer_data, live_status_table);
+    buffer_desc.Height = 1;
+    buffer_desc.DepthOrArraySize = 1;
+    buffer_desc.MipLevels = 1;
+    buffer_desc.SampleDesc.Count = 1;
+    buffer_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+    buffer_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+
+    /* host-visible device memory */
+    memset(&heap_info, 0, sizeof(heap_info));
+    heap_info.Type = D3D12_HEAP_TYPE_UPLOAD;
+
+    heap_flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
+
+    if (FAILED(hr = vkd3d_create_buffer(device, &heap_info, heap_flags, &buffer_desc, &global_info->vk_buffer)))
+    {
+        vkd3d_descriptor_debug_free_global_info(global_info, device);
+        return hr;
+    }
+
+    if (FAILED(hr = vkd3d_allocate_buffer_memory(device, global_info->vk_buffer,
+            VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
+            &global_info->device_allocation)))
+    {
+        vkd3d_descriptor_debug_free_global_info(global_info, device);
+        return hr;
+    }
+
+    if ((vr = VK_CALL(vkMapMemory(device->vk_device, global_info->device_allocation.vk_memory,
+            0, VK_WHOLE_SIZE, 0, (void**)&global_info->data))))
+    {
+        ERR("Failed to map buffer, vr %d.\n", vr);
+        vkd3d_descriptor_debug_free_global_info(global_info, device);
+        return hresult_from_vk_result(vr);
+    }
+
+    memset(global_info->data, 0, buffer_desc.Width);
+
+    /* The NULL descriptor has cookie 0, and is always considered live. */
+    global_info->data->live_status_table[0] = 1u << 0;
+
+    global_info->descriptor.buffer = global_info->vk_buffer;
+    global_info->descriptor.offset = 0;
+    global_info->descriptor.range = buffer_desc.Width;
+    global_info->num_cookies = num_cookies;
+
+    pthread_mutex_init(&global_info->ring_lock, NULL);
+    pthread_cond_init(&global_info->ring_cond, NULL);
+    global_info->active = true;
+    if (pthread_create(&global_info->ring_thread, NULL, vkd3d_descriptor_debug_qa_check_entry, global_info) != 0)
+    {
+        vkd3d_descriptor_debug_free_global_info(global_info, device);
+        return E_OUTOFMEMORY;
+    }
+
+    *out_global_info = global_info;
+    return S_OK;
+}
+
+void vkd3d_descriptor_debug_free_global_info(
+        struct vkd3d_descriptor_qa_global_info *global_info,
+        struct d3d12_device *device)
+{
+    const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
+
+    if (!global_info)
+        return;
+
+    if (global_info->active)
+    {
+        pthread_mutex_lock(&global_info->ring_lock);
+        global_info->active = false;
+        pthread_cond_signal(&global_info->ring_cond);
+        pthread_mutex_unlock(&global_info->ring_lock);
+        pthread_join(global_info->ring_thread, NULL);
+        pthread_mutex_destroy(&global_info->ring_lock);
+        pthread_cond_destroy(&global_info->ring_cond);
+    }
+
+    vkd3d_free_device_memory(device, &global_info->device_allocation);
+    VK_CALL(vkDestroyBuffer(device->vk_device, global_info->vk_buffer, NULL));
+    vkd3d_free(global_info);
+}
+
+#define DECL_BUFFER() \
+    char buffer[4096]; \
+    char *ptr; \
+    ptr = buffer; \
+    *ptr = '\0'
+
+#define FLUSH_BUFFER() do { \
+    pthread_mutex_lock(&debug_lock); \
+    fprintf(descriptor_debug_file, "%s\n", buffer); \
+    pthread_mutex_unlock(&debug_lock); \
+    fflush(descriptor_debug_file); \
+} while (0)
+
+#define APPEND_SNPRINTF(...) do { ptr += strlen(ptr); snprintf(ptr, (buffer + ARRAY_SIZE(buffer)) - ptr, __VA_ARGS__); } while(0)
+
+static void vkd3d_descriptor_debug_qa_check_report_fault(
+        struct vkd3d_descriptor_qa_global_info *global_info)
+{
+    DECL_BUFFER();
+
+    if (global_info->data->fault_type & VKD3D_DESCRIPTOR_FAULT_TYPE_HEAP_OF_OF_RANGE)
+        APPEND_SNPRINTF("Fault type: HEAP_OUT_OF_RANGE\n");
+    if (global_info->data->fault_type & VKD3D_DESCRIPTOR_FAULT_TYPE_MISMATCH_DESCRIPTOR_TYPE)
+        APPEND_SNPRINTF("Fault type: MISMATCH_DESCRIPTOR_TYPE\n");
+    if (global_info->data->fault_type & VKD3D_DESCRIPTOR_FAULT_TYPE_DESTROYED_RESOURCE)
+        APPEND_SNPRINTF("Fault type: DESTROYED_RESOURCE\n");
+
+    APPEND_SNPRINTF("CBV_SRV_UAV heap cookie: %u\n", global_info->data->failed_heap);
+    APPEND_SNPRINTF("Shader hash and instruction: %"PRIx64" (%u)\n",
+            global_info->data->failed_hash, global_info->data->failed_instruction);
+    APPEND_SNPRINTF("Accessed resource/view cookie: %u\n", global_info->data->failed_cookie);
+    APPEND_SNPRINTF("Shader desired descriptor type: %u (%s)\n",
+            global_info->data->failed_descriptor_type_mask,
+            debug_descriptor_type(global_info->data->failed_descriptor_type_mask));
+    APPEND_SNPRINTF("Found descriptor type in heap: %u (%s)\n",
+            global_info->data->actual_descriptor_type_mask,
+            debug_descriptor_type(global_info->data->actual_descriptor_type_mask));
+    APPEND_SNPRINTF("Failed heap index: %u\n", global_info->data->failed_offset);
+    ERR("\n============\n%s==========\n", buffer);
+    if (!vkd3d_descriptor_debug_active_log())
+        return;
+    FLUSH_BUFFER();
+}
+
+void vkd3d_descriptor_debug_register_heap(
+        struct vkd3d_descriptor_qa_heap_buffer_data *heap, uint64_t cookie,
+        const D3D12_DESCRIPTOR_HEAP_DESC *desc)
+{
+    DECL_BUFFER();
+
+    if (heap)
+    {
+        heap->num_descriptors = desc->NumDescriptors;
+        heap->heap_index = cookie <= UINT32_MAX ? (uint32_t)cookie : 0u;
+        memset(heap->desc, 0, desc->NumDescriptors * sizeof(*heap->desc));
+    }
+
+    if (!vkd3d_descriptor_debug_active_log())
+        return;
+
+    APPEND_SNPRINTF("REGISTER HEAP %"PRIu64" || COUNT = %u", cookie, desc->NumDescriptors);
+    if (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)
+        APPEND_SNPRINTF(" || SHADER");
+
+    switch (desc->Type)
+    {
+        case D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV:
+            APPEND_SNPRINTF(" || CBV_SRV_UAV");
+            break;
+
+        case D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER:
+            APPEND_SNPRINTF(" || SAMPLER");
+            break;
+
+        case D3D12_DESCRIPTOR_HEAP_TYPE_RTV:
+            APPEND_SNPRINTF(" || RTV");
+            break;
+
+        case D3D12_DESCRIPTOR_HEAP_TYPE_DSV:
+            APPEND_SNPRINTF(" || DSV");
+            break;
+
+        default:
+            APPEND_SNPRINTF(" || ?");
+            break;
+    }
+
+    FLUSH_BUFFER();
+}
+
+void vkd3d_descriptor_debug_unregister_heap(uint64_t cookie)
+{
+    DECL_BUFFER();
+    if (!vkd3d_descriptor_debug_active_log())
+        return;
+
+    APPEND_SNPRINTF("DESTROY HEAP %"PRIu64, cookie);
+    FLUSH_BUFFER();
+}
+
+void vkd3d_descriptor_debug_register_resource_cookie(struct vkd3d_descriptor_qa_global_info *global_info,
+        uint64_t cookie, const D3D12_RESOURCE_DESC1 *desc)
+{
+    const char *fmt;
+    DECL_BUFFER();
+
+    vkd3d_descriptor_debug_set_live_status_bit(global_info, cookie);
+
+    if (!vkd3d_descriptor_debug_active_log())
+        return;
+
+    APPEND_SNPRINTF("RESOURCE CREATE #%"PRIu64" || ", cookie);
+
+    fmt = debug_dxgi_format(desc->Format);
+
+    switch (desc->Dimension)
+    {
+        case D3D12_RESOURCE_DIMENSION_BUFFER:
+            APPEND_SNPRINTF("Buffer");
+            APPEND_SNPRINTF(" || Size = 0x%"PRIx64" bytes", desc->Width);
+            break;
+
+        case D3D12_RESOURCE_DIMENSION_TEXTURE1D:
+            APPEND_SNPRINTF("Tex1D");
+            APPEND_SNPRINTF(" || Format = %s || Levels = %u || Layers = %u || Width = %"PRIu64,
+                    fmt, desc->MipLevels, desc->DepthOrArraySize, desc->Width);
+            break;
+
+        case D3D12_RESOURCE_DIMENSION_TEXTURE2D:
+            APPEND_SNPRINTF("Tex2D");
+            APPEND_SNPRINTF(" || Format = %s || Levels = %u || Layers = %u || Width = %"PRIu64" || Height = %u",
+                    fmt, desc->MipLevels, desc->DepthOrArraySize, desc->Width, desc->Height);
+            break;
+
+        case D3D12_RESOURCE_DIMENSION_TEXTURE3D:
+            APPEND_SNPRINTF("Tex3D");
+            APPEND_SNPRINTF(" || Format = %s || Levels = %u || Width = %"PRIu64" || Height = %u || Depth = %u",
+                    fmt, desc->MipLevels, desc->Width, desc->Height, desc->DepthOrArraySize);
+            break;
+
+        default:
+            APPEND_SNPRINTF("Unknown dimension");
+            break;
+    }
+
+    if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)
+        APPEND_SNPRINTF(" || UAV");
+    if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)
+        APPEND_SNPRINTF(" || RTV");
+    if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)
+        APPEND_SNPRINTF(" || DSV");
+
+    FLUSH_BUFFER();
+}
+
+void vkd3d_descriptor_debug_register_allocation_cookie(
+        struct vkd3d_descriptor_qa_global_info *global_info,
+        uint64_t cookie, const struct vkd3d_allocate_memory_info *info)
+{
+    D3D12_RESOURCE_DESC1 desc;
+
+    memset(&desc, 0, sizeof(desc));
+    desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+    desc.Width = info->memory_requirements.size;
+    vkd3d_descriptor_debug_register_resource_cookie(global_info, cookie, &desc);
+}
+
+void vkd3d_descriptor_debug_register_view_cookie(
+        struct vkd3d_descriptor_qa_global_info *global_info,
+        uint64_t cookie, uint64_t resource_cookie)
+{
+    DECL_BUFFER();
+
+    vkd3d_descriptor_debug_set_live_status_bit(global_info, cookie);
+
+    if (!vkd3d_descriptor_debug_active_log())
+        return;
+    APPEND_SNPRINTF("VIEW CREATE #%"PRIu64" <- RESOURCE #%"PRIu64, cookie, resource_cookie);
+    FLUSH_BUFFER();
+}
+
+void vkd3d_descriptor_debug_unregister_cookie(
+        struct vkd3d_descriptor_qa_global_info *global_info,
+        uint64_t cookie)
+{
+    DECL_BUFFER();
+
+    /* Don't unset the null descriptor by mistake. */
+    if (cookie != 0)
+        vkd3d_descriptor_debug_unset_live_status_bit(global_info, cookie);
+
+    if (!vkd3d_descriptor_debug_active_log())
+        return;
+    APPEND_SNPRINTF("COOKIE DESTROY #%"PRIu64, cookie);
+    FLUSH_BUFFER();
+}
+
+void vkd3d_descriptor_debug_write_descriptor(struct vkd3d_descriptor_qa_heap_buffer_data *heap, uint64_t heap_cookie,
+        uint32_t offset, vkd3d_descriptor_qa_flags type_flags, uint64_t cookie)
+{
+    DECL_BUFFER();
+
+    if (heap && offset < heap->num_descriptors)
+    {
+        /* Should never overflow here except if game is literally spamming allocations every frame and we
+         * wait around for hours/days.
+         * This case will trigger warnings either way. */
+        heap->desc[offset].cookie = cookie <= UINT32_MAX ? (uint32_t)cookie : 0u;
+        heap->desc[offset].descriptor_type = type_flags;
+    }
+
+    if (!vkd3d_descriptor_debug_active_log())
+        return;
+    APPEND_SNPRINTF("WRITE HEAP %"PRIu64" || OFFSET = %u || TYPE = %s || COOKIE = #%"PRIu64,
+            heap_cookie, offset, debug_descriptor_type(type_flags), cookie);
+    FLUSH_BUFFER();
+}
+
+void vkd3d_descriptor_debug_copy_descriptor(
+        struct vkd3d_descriptor_qa_heap_buffer_data *dst_heap, uint64_t dst_heap_cookie, uint32_t dst_offset,
+        struct vkd3d_descriptor_qa_heap_buffer_data *src_heap, uint64_t src_heap_cookie, uint32_t src_offset,
+        uint64_t cookie)
+{
+    DECL_BUFFER();
+
+    if (dst_heap && src_heap && dst_offset < dst_heap->num_descriptors && src_offset < src_heap->num_descriptors)
+        dst_heap->desc[dst_offset] = src_heap->desc[src_offset];
+
+    if (!vkd3d_descriptor_debug_active_log())
+        return;
+    APPEND_SNPRINTF("COPY DST HEAP %"PRIu64" || DST OFFSET = %u || COOKIE = #%"PRIu64" || SRC HEAP %"PRIu64" || SRC OFFSET = %u",
+            dst_heap_cookie, dst_offset, cookie, src_heap_cookie, src_offset);
+    FLUSH_BUFFER();
+}
--- a/libs/vkd3d/device.c
+++ b/libs/vkd3d/device.c
--- a/libs/vkd3d/device_profiled.h
+++ b/libs/vkd3d/device_profiled.h
@ -0,0 +1,334 @@
+/*
+ * Copyright 2020 Hans-Kristian Arntzen for Valve Corporation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
+ */
+
+#ifndef __VKD3D_DEVICE_PROFILED_H
+#define __VKD3D_DEVICE_PROFILED_H
+
+/* Only profile device commands which we know are somewhat performance sensitive. */
+
+#define DEVICE_PROFILED_CALL_HRESULT(name, ...) \
+    HRESULT hr; \
+    VKD3D_REGION_DECL(name); \
+    VKD3D_REGION_BEGIN(name); \
+    hr = d3d12_device_##name(__VA_ARGS__); \
+    VKD3D_REGION_END(name); \
+    return hr
+
+#define DEVICE_PROFILED_CALL(name, ...) \
+    VKD3D_REGION_DECL(name); \
+    VKD3D_REGION_BEGIN(name); \
+    d3d12_device_##name(__VA_ARGS__); \
+    VKD3D_REGION_END(name)
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState_profiled(d3d12_device_iface *iface,
+        const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreateGraphicsPipelineState, iface, desc, riid, pipeline_state);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState_profiled(d3d12_device_iface *iface,
+        const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreateComputePipelineState, iface, desc, riid, pipeline_state);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap_profiled(d3d12_device_iface *iface,
+        const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreateDescriptorHeap, iface, desc, riid, descriptor_heap);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature_profiled(d3d12_device_iface *iface,
+        UINT node_mask, const void *bytecode, SIZE_T bytecode_length,
+        REFIID riid, void **root_signature)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreateRootSignature, iface, node_mask, bytecode, bytecode_length,
+            riid, root_signature);
+}
+
+static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView_profiled(d3d12_device_iface *iface,
+        const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor)
+{
+    DEVICE_PROFILED_CALL(CreateConstantBufferView, iface, desc, descriptor);
+}
+
+static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView_profiled(d3d12_device_iface *iface,
+        ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc,
+        D3D12_CPU_DESCRIPTOR_HANDLE descriptor)
+{
+    DEVICE_PROFILED_CALL(CreateShaderResourceView, iface, resource, desc, descriptor);
+}
+
+static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView_profiled(d3d12_device_iface *iface,
+        ID3D12Resource *resource, ID3D12Resource *counter_resource,
+        const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor)
+{
+    DEVICE_PROFILED_CALL(CreateUnorderedAccessView, iface, resource, counter_resource, desc, descriptor);
+}
+
+static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView_profiled(d3d12_device_iface *iface,
+        ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc,
+        D3D12_CPU_DESCRIPTOR_HANDLE descriptor)
+{
+    DEVICE_PROFILED_CALL(CreateRenderTargetView, iface, resource, desc, descriptor);
+}
+
+static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView_profiled(d3d12_device_iface *iface,
+        ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc,
+        D3D12_CPU_DESCRIPTOR_HANDLE descriptor)
+{
+    DEVICE_PROFILED_CALL(CreateDepthStencilView, iface, resource, desc, descriptor);
+}
+
+static void STDMETHODCALLTYPE d3d12_device_CreateSampler_profiled(d3d12_device_iface *iface,
+        const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor)
+{
+    DEVICE_PROFILED_CALL(CreateSampler, iface, desc, descriptor);
+}
+
+static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors_profiled(d3d12_device_iface *iface,
+        UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets,
+        const UINT *dst_descriptor_range_sizes,
+        UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets,
+        const UINT *src_descriptor_range_sizes,
+        D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type)
+{
+    VKD3D_REGION_DECL(CopyDescriptors);
+    unsigned int total_descriptors, total_descriptors_src, total_descriptors_dst, i;
+
+    if (src_descriptor_range_sizes)
+    {
+        for (i = 0, total_descriptors_src = 0; i < src_descriptor_range_count; i++)
+            total_descriptors_src += src_descriptor_range_sizes[i];
+    }
+    else
+        total_descriptors_src = src_descriptor_range_count;
+
+    if (dst_descriptor_range_sizes)
+    {
+        for (i = 0, total_descriptors_dst = 0; i < dst_descriptor_range_count; i++)
+            total_descriptors_dst += dst_descriptor_range_sizes[i];
+    }
+    else
+        total_descriptors_dst = dst_descriptor_range_count;
+
+    VKD3D_REGION_BEGIN(CopyDescriptors);
+    d3d12_device_CopyDescriptors(iface,
+            dst_descriptor_range_count, dst_descriptor_range_offsets,
+            dst_descriptor_range_sizes,
+            src_descriptor_range_count, src_descriptor_range_offsets,
+            src_descriptor_range_sizes,
+            descriptor_heap_type);
+
+    total_descriptors = total_descriptors_src < total_descriptors_dst ? total_descriptors_src : total_descriptors_dst;
+    VKD3D_REGION_END_ITERATIONS(CopyDescriptors, total_descriptors);
+}
+
+static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple_profiled(d3d12_device_iface *iface,
+        UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset,
+        const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset,
+        D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type)
+{
+    VKD3D_REGION_DECL(CopyDescriptorsSimple);
+    VKD3D_REGION_BEGIN(CopyDescriptorsSimple);
+    d3d12_device_CopyDescriptorsSimple(iface, descriptor_count, dst_descriptor_range_offset,
+            src_descriptor_range_offset, descriptor_heap_type);
+    VKD3D_REGION_END_ITERATIONS(CopyDescriptorsSimple, descriptor_count);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource_profiled(d3d12_device_iface *iface,
+        const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
+        const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
+        const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreateCommittedResource, iface, heap_properties, heap_flags,
+            desc, initial_state,
+            optimized_clear_value, iid, resource);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1_profiled(d3d12_device_iface *iface,
+        const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
+        const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
+        const D3D12_CLEAR_VALUE *optimized_clear_value,
+        ID3D12ProtectedResourceSession *protected_session,
+        REFIID iid, void **resource)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreateCommittedResource1, iface, heap_properties, heap_flags,
+            desc, initial_state,
+            optimized_clear_value, protected_session, iid, resource);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1_profiled(d3d12_device_iface *iface,
+        const D3D12_HEAP_DESC *desc, ID3D12ProtectedResourceSession *protected_session,
+        REFIID iid, void **heap)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreateHeap1, iface, desc, protected_session, iid, heap);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap_profiled(d3d12_device_iface *iface,
+        const D3D12_HEAP_DESC *desc, REFIID iid, void **heap)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreateHeap, iface, desc, iid, heap);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource_profiled(d3d12_device_iface *iface,
+        ID3D12Heap *heap, UINT64 heap_offset,
+        const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
+        const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreatePlacedResource, iface, heap, heap_offset,
+            desc, initial_state, optimized_clear_value, iid, resource);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1_profiled(d3d12_device_iface *iface,
+        const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value,
+        ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreateReservedResource1, iface, desc, initial_state, optimized_clear_value, protected_session, iid, resource);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource_profiled(d3d12_device_iface *iface,
+        const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state,
+        const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreateReservedResource, iface, desc, initial_state, optimized_clear_value, iid, resource);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState_profiled(d3d12_device_iface *iface,
+        const D3D12_PIPELINE_STATE_STREAM_DESC *desc, REFIID riid, void **pipeline_state)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreatePipelineState, iface, desc, riid, pipeline_state);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource2_profiled(d3d12_device_iface *iface,
+        const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC1 *desc,
+        D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value,
+        ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreateCommittedResource2, iface, heap_properties, heap_flags,
+            desc, initial_state, optimized_clear_value, protected_session, iid, resource);
+}
+
+static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource1_profiled(d3d12_device_iface *iface,
+        ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC1 *desc,
+        D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value,
+        REFIID iid, void **resource)
+{
+    DEVICE_PROFILED_CALL_HRESULT(CreatePlacedResource1, iface, heap, heap_offset,
+            desc, initial_state, optimized_clear_value, iid, resource);
+}
+
+static void STDMETHODCALLTYPE d3d12_device_CreateSamplerFeedbackUnorderedAccessView_profiled(d3d12_device_iface *iface,
+        ID3D12Resource *target_resource, ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor)
+{
+    DEVICE_PROFILED_CALL(CreateSamplerFeedbackUnorderedAccessView, iface, target_resource, feedback_resource, descriptor);
+}
+
+CONST_VTBL struct ID3D12Device9Vtbl d3d12_device_vtbl_profiled =
+{
+    /* IUnknown methods */
+    d3d12_device_QueryInterface,
+    d3d12_device_AddRef,
+    d3d12_device_Release,
+    /* ID3D12Object methods */
+    d3d12_device_GetPrivateData,
+    d3d12_device_SetPrivateData,
+    d3d12_device_SetPrivateDataInterface,
+    (void *)d3d12_object_SetName,
+    /* ID3D12Device methods */
+    d3d12_device_GetNodeCount,
+    d3d12_device_CreateCommandQueue,
+    d3d12_device_CreateCommandAllocator,
+    d3d12_device_CreateGraphicsPipelineState_profiled,
+    d3d12_device_CreateComputePipelineState_profiled,
+    d3d12_device_CreateCommandList,
+    d3d12_device_CheckFeatureSupport,
+    d3d12_device_CreateDescriptorHeap_profiled,
+    d3d12_device_GetDescriptorHandleIncrementSize,
+    d3d12_device_CreateRootSignature_profiled,
+    d3d12_device_CreateConstantBufferView_profiled,
+    d3d12_device_CreateShaderResourceView_profiled,
+    d3d12_device_CreateUnorderedAccessView_profiled,
+    d3d12_device_CreateRenderTargetView_profiled,
+    d3d12_device_CreateDepthStencilView_profiled,
+    d3d12_device_CreateSampler_profiled,
+    d3d12_device_CopyDescriptors_profiled,
+    d3d12_device_CopyDescriptorsSimple_profiled,
+    d3d12_device_GetResourceAllocationInfo,
+    d3d12_device_GetCustomHeapProperties,
+    d3d12_device_CreateCommittedResource_profiled,
+    d3d12_device_CreateHeap_profiled,
+    d3d12_device_CreatePlacedResource_profiled,
+    d3d12_device_CreateReservedResource_profiled,
+    d3d12_device_CreateSharedHandle,
+    d3d12_device_OpenSharedHandle,
+    d3d12_device_OpenSharedHandleByName,
+    d3d12_device_MakeResident,
+    d3d12_device_Evict,
+    d3d12_device_CreateFence,
+    d3d12_device_GetDeviceRemovedReason,
+    d3d12_device_GetCopyableFootprints,
+    d3d12_device_CreateQueryHeap,
+    d3d12_device_SetStablePowerState,
+    d3d12_device_CreateCommandSignature,
+    d3d12_device_GetResourceTiling,
+    d3d12_device_GetAdapterLuid,
+    /* ID3D12Device1 methods */
+    d3d12_device_CreatePipelineLibrary,
+    d3d12_device_SetEventOnMultipleFenceCompletion,
+    d3d12_device_SetResidencyPriority,
+    /* ID3D12Device2 methods */
+    d3d12_device_CreatePipelineState_profiled,
+    /* ID3D12Device3 methods */
+    d3d12_device_OpenExistingHeapFromAddress,
+    d3d12_device_OpenExistingHeapFromFileMapping,
+    d3d12_device_EnqueueMakeResident,
+    /* ID3D12Device4 methods */
+    d3d12_device_CreateCommandList1,
+    d3d12_device_CreateProtectedResourceSession,
+    d3d12_device_CreateCommittedResource1_profiled,
+    d3d12_device_CreateHeap1_profiled,
+    d3d12_device_CreateReservedResource1_profiled,
+    d3d12_device_GetResourceAllocationInfo1,
+    /* ID3D12Device5 methods */
+    d3d12_device_CreateLifetimeTracker,
+    d3d12_device_RemoveDevice,
+    d3d12_device_EnumerateMetaCommands,
+    d3d12_device_EnumerateMetaCommandParameters,
+    d3d12_device_CreateMetaCommand,
+    d3d12_device_CreateStateObject,
+    d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo,
+    d3d12_device_CheckDriverMatchingIdentifier,
+    /* ID3D12Device6 methods */
+    d3d12_device_SetBackgroundProcessingMode,
+    /* ID3D12Device7 methods */
+    d3d12_device_AddToStateObject,
+    d3d12_device_CreateProtectedResourceSession1,
+    /* ID3D12Device8 methods */
+    d3d12_device_GetResourceAllocationInfo2,
+    d3d12_device_CreateCommittedResource2_profiled,
+    d3d12_device_CreatePlacedResource1_profiled,
+    d3d12_device_CreateSamplerFeedbackUnorderedAccessView_profiled,
+    d3d12_device_GetCopyableFootprints1,
+    /* ID3D12Device9 methods */
+    d3d12_device_CreateShaderCacheSession,
+    d3d12_device_ShaderCacheControl,
+    d3d12_device_CreateCommandQueue1,
+};
+
+#endif
--- a/Show More
+++ b/Show More