Compare commits

..

1 Commits

Author SHA1 Message Date
Hans-Kristian Arntzen ede4d280ae tests: Test behavior of interpolating in non-uniform control flow.
D3D11 functional spec defines that implicit LOD is well-defined in
varying control flow as long as the input is either a constant or shader
input.

Signed-off-by: Hans-Kristian Arntzen <post@arntzen-software.no>
2021-06-04 18:44:52 +02:00
117 changed files with 57130 additions and 91384 deletions

View File

@ -15,7 +15,7 @@ jobs:
- name: Build release
id: build-release
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v5
with:
command: |
export VERSION_NAME="${GITHUB_REF##*/}-${GITHUB_SHA##*/}"

View File

@ -18,7 +18,7 @@ jobs:
- name: Build MinGW x86
id: build-mingw-x86
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v5
with:
command: |
meson -Denable_tests=True -Denable_extras=True --cross-file=build-win32.txt --buildtype release build-mingw-x86
@ -26,7 +26,7 @@ jobs:
- name: Build MinGW x64
id: build-mingw-x64
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v5
with:
command: |
meson -Denable_tests=True -Denable_extras=True --cross-file=build-win64.txt --buildtype release build-mingw-x64
@ -34,7 +34,7 @@ jobs:
- name: Build Native GCC x86
id: build-native-gcc-x86
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v5
with:
command: |
export CC="gcc -m32"
@ -45,7 +45,7 @@ jobs:
- name: Build Native GCC x64
id: build-native-gcc-x64
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v5
with:
command: |
export CC="gcc"
@ -55,7 +55,7 @@ jobs:
- name: Build Native Clang x86
id: build-native-clang-x86
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v5
with:
command: |
export CC="clang -m32"
@ -66,7 +66,7 @@ jobs:
- name: Build Native Clang x64
id: build-native-clang-x64
uses: Joshua-Ashton/arch-mingw-github-action@v8
uses: Joshua-Ashton/arch-mingw-github-action@v5
with:
command: |
export CC="clang"

View File

@ -1,53 +0,0 @@
name: Test Builds on Windows
on: [push, pull_request, workflow_dispatch]
jobs:
build-set-windows:
runs-on: windows-2022
steps:
- name: Checkout code
id: checkout-code
uses: actions/checkout@v2
with:
submodules: recursive
- name: Setup widl and glslangValidator
shell: pwsh
run: |
choco install strawberryperl vulkan-sdk -y
Write-Output "C:\Strawberry\c\bin" | Out-File -FilePath "${Env:GITHUB_PATH}" -Append
Write-Output "$([System.Environment]::GetEnvironmentVariable('VULKAN_SDK', 'Machine'))\Bin" `
| Out-File -FilePath "${Env:GITHUB_PATH}" -Append
- name: Setup Meson
shell: pwsh
run: pip install meson
- name: Find Visual Studio
shell: pwsh
run: |
$installationPath = Get-VSSetupInstance `
| Select-VSSetupInstance -Require Microsoft.VisualStudio.Workload.NativeDesktop -Latest `
| Select-Object -ExpandProperty InstallationPath
Write-Output "VSDEVCMD=${installationPath}\Common7\Tools\VsDevCmd.bat" `
| Out-File -FilePath "${Env:GITHUB_ENV}" -Append
- name: Build MSVC x86
shell: pwsh
run: |
& "${Env:COMSPEC}" /s /c "`"${Env:VSDEVCMD}`" -arch=x86 -host_arch=x64 -no_logo && set" `
| % { , ($_ -Split '=', 2) } `
| % { [System.Environment]::SetEnvironmentVariable($_[0], $_[1]) }
meson -Denable_tests=True -Denable_extras=True --buildtype release --backend vs2022 build-msvc-x86
msbuild -m build-msvc-x86/vkd3d-proton.sln
- name: Build MSVC x64
shell: pwsh
run: |
& "${Env:COMSPEC}" /s /c "`"${Env:VSDEVCMD}`" -arch=x64 -host_arch=x64 -no_logo && set" `
| % { , ($_ -Split '=', 2) } `
| % { [System.Environment]::SetEnvironmentVariable($_[0], $_[1]) }
meson -Denable_tests=True -Denable_extras=True --buildtype release --backend vs2022 build-msvc-x64
msbuild -m build-msvc-x64/vkd3d-proton.sln

View File

@ -1,4 +0,0 @@
Conor McCarthy <cmccarthy@codeweavers.com>
Ivan Fedorov <ifedorov@nvidia.com>
James Beddek <telans@protonmail.com>
Roshan Chaudhari <rochaudhari@nvidia.com>

33
AUTHORS
View File

@ -1,34 +1,7 @@
Alexander Gabello
Alexandre Julliard
Andrew Eikum
Arkadiusz Hiler
Biswapriyo Nath
Chip Davis
Conor McCarthy
Danylo Piliaiev
David Gow
David McCloskey
Derek Lesho
Fabian Bornschein
Georg Lehmann
Hans-Kristian Arntzen
Henri Verbeet
Ivan Fedorov
Jactry Zeng
James Beddek
Jens Peters
Joshua Ashton
Józef Kucia
Juuso Alasuutari
Krzysztof Bogacki
Paul Gofman
Philip Rebohle
Rémi Bernon
Robin Kertels
Rodrigo Locatti
Roshan Chaudhari
Samuel Pitoiset
Sveinar Søpler
Sven Hesse
Thomas Crider
Zhiyi Zhang
Hans-Kristian Arntzen
Philip Rebohle
Joshua Ashton

View File

@ -1,210 +1,5 @@
# Change Log
## 2.6
It has been a long while since 2.5, and this release rolls up a lot of fixes, features and optimizations.
### Fixes
- Fix black screen rendering bug in Horizon Zero Dawn after latest game updates.
- Fix crashes on startup in Final Fantasy VII: Remake and Warframe.
- Fix crashes in Guardians of the Galaxy when interacting with certain game objects.
- Fix hang on game shutdown in Elden Ring.
- Fix broken geometry rendering in Age of Empires: IV.
### Optimization
- Improve generated shader code for vectorized load-store operations in DXIL.
- Greatly reduce CPU overhead for descriptor copy operations,
which is a key contributor to CPU overhead in D3D12.
### Features
#### Pipeline library rewrite
Support D3D12 pipeline libraries better where we can now also cache
generated SPIR-V from DXBC/DXIL.
Massively reduces subsequent load times in Monster Hunter: Rise,
and helps other titles like Guardian of the Galaxy and Elden Ring.
Also lays the groundwork for internal driver caches down the line for games which do not use this API.
Also, deduplicates binary blobs for reduced disk size requirements.
#### Shader models
Shader model 6.6 is now fully implemented. This includes support for:
- ResourceDescriptorHeap[] direct access
- 64-bit atomics
- IsHelperLane()
- Compute shader derivatives
- WaveSize attribute
- Packed math intrinsics
#### Minor features
- Handle API feature MinResourceLODClamp correctly if `VK_EXT_image_view_min_lod` is supported.
- Expose CastFullyTypedFormat feature.
- Expose some advanced shader features on Intel related to UAV formats (`VK_KHR_format_feature_flags2`).
- Support COLOR -> STENCIL copies.
### Workarounds
- Workaround DEATHLOOP not emitting synchronization commands correctly. Fixes menu flicker on RADV.
- Workaround quirky API usage in Elden Ring. Removes many kinds of stutter and chug when traversing the scenery.
- Workaround certain environments failing to create Vulkan device if some `VK_NVX_*` extensions are enabled.
- Workaround glitched foliage rendering in Horizon Zero Dawn after latest game updates.
- Workaround some questionable UE4 shaders causing glitched rendering on RADV.
### Note on future Vulkan driver requirements
2.6 is expected to be the last vkd3d-proton release before we require some newer Vulkan extensions.
`VK_KHR_dynamic_rendering` and `VK_EXT_extended_dynamic_state`
(and likely `dynamic_state_2` as well) will be required.
`VK_KHR_dynamic_rendering` in particular requires up-to-date drivers and the legacy render pass path
will be abandoned in favor of it. Supporting both paths at the same time is not practical.
Moving to `VK_KHR_dynamic_rendering` allows us to fix some critical flaws with the legacy API
which caused potential shader compilation stutters and extra CPU overhead.
## 2.5
This is a release with a little bit of everything!
### Features
#### DXR progress
DXR has seen significant work in the background.
- DXR 1.1 is now experimentally exposed. It can be enabled with `VKD3D_CONFIG=dxr11`.
Note that DXR 1.1 cannot be fully implemented in `VK_KHR_ray_tracing`'s current form, in particular
DispatchRays() indirect is not compatible yet,
although we have not observed a game which requires this API feature.
- DXR 1.1 inline raytracing support is fully implemented.
- DXR 1.0 support is more or less feature complete.
Some weird edge cases remain, but will likely not be implemented unless required by a game.
`VKD3D_CONFIG=dxr` will eventually be dropped when it matures.
Some new DXR games are starting to come alive, especially with DXR 1.1 enabled,
but there are significant bugs as well that we currently cannot easily debug.
Some experimental results on NVIDIA:
- **Control** - already worked
- **DEATHLOOP** - appears to work correctly
- **Cyberpunk 2077** - DXR can be enabled, but GPU timeouts
- **World of Warcraft** - according to a user, it works, but we have not confirmed ourselves
- **Metro Exodus: Enhanced Edition** -
gets ingame and appears to work? Not sure if it looks correct.
Heavy CPU stutter for some reason ...
- **Metro Exodus** (original release) - GPU timeouts when enabling DXR
- **Resident Evil: Village** - Appears to work, but the visual difference is subtle.
It's worth experimenting with these and others.
DXR is incredibly complicated, so expect bugs.
From here, DXR support is mostly a case of stamping out issues one by one.
#### NVIDIA DLSS
NVIDIA contributed integration APIs in vkd3d-proton which enables DLSS support in D3D12 titles in Proton.
See Proton documentation for how to enable NvAPI support.
#### Shader models
A fair bit of work went into DXIL translation support to catch up with native drivers.
- Shader model 6.5 is exposed.
Shader model 6.6 should be straight forward once that becomes relevant.
- Shader model 6.4 implementation takes advantage of `VK_KHR_shader_integer_dot_product` when supported.
- Proper fallback for FP16 math on GPUs which do not expose native FP16 support (Polaris, Pascal).
Notably fixes AMD FSR shaders in Resident Evil: Village (and others).
- Shader model 6.1 SV_Barycentric support implemented (NVIDIA only for now).
- Support shader model 6.2 FP32 denorm control.
### Performance
Resizable BAR can improve GPU performance about 10-15% in the best case, depends a lot on the game.
Horizon Zero Dawn and Death Stranding in particular improve massively with this change.
By default, vkd3d-proton will now take advantage of PCI-e BAR memory types through heuristics
as D3D12 does not expose direct support for resizable BAR, and native D3D12 drivers are known to use heuristics as well.
Without resizable BAR enabled in BIOS/vBIOS, we only get 256 MiB which can help performance,
but many games will improve performance even more
when we are allowed to use more than that.
There is an upper limit for how much VRAM is dedicated to this purpose.
We also added `VKD3D_CONFIG=no_upload_hvv` to disable all uses of PCI-e BAR memory.
Other performance improvements:
- Avoid redundant descriptor update work in certain scenarios (NVIDIA contribution).
- Minor tweaks here and there to reduce CPU overhead.
### Fixes and workarounds
- Fix behavior for swap chain presentation latency HANDLE. Fixes spurious deadlocks in some cases.
- Fix many issues related to depth-stencil handling, which fixed various issues in DEATHLOOP, F1 2021, WRC 10.
- Fix DIRT 5 rendering issues and crashes. Should be fully playable now.
- Fix some Diablo II Resurrected rendering issues.
- Workaround shader bugs in Psychonauts 2.
- Workaround some Unreal Engine 4 shader bugs which multiple titles trigger.
- Fix some stability issues when VRAM is exhausted on NVIDIA.
- Fix CPU crash in boot-up sequence of Far Cry 6 (game is still kinda buggy though, but gets in-game).
- Fix various bugs with host visible images. Fixes DEATHLOOP.
- Fix various DXIL conversion bugs.
- Add Invariant geometry workarounds for specific games which require it.
- Fix how d3d12.dll exports symbols to be more in line with MSVC.
- Fix some edge cases in bitfield instructions.
- Work around extreme CPU memory bloat on the specific NVIDIA driver versions which had this bug.
- Fix regression in Evil Genius 2: World Domination.
- Fix crashes in Hitman 3.
- Fix terrain rendering in Anno 1800.
- Various correctness and crash fixes.
## 2.4
This is a release which focuses on performance and bug-fixes.
### Performance
- Improve swapchain latency and frame pacing by up to one frame.
- Optimize lookup of format info.
- Avoid potential pipeline compilation stutter in certain scenarios.
- Rewrite how we handle image layouts for color and depth-stencil targets.
Allows us to remove a lot of dumb
barriers giving significant GPU-bound performance improvements.
~15%-20% GPU bound uplift in Horizon Zero Dawn,
~10% in Death Stranding,
and 5%-10% improvements in many other titles.
### Features
- Enable support for sparse 3D textures (tiled resources tier 3).
### Bug fixes and workarounds
- Various bug fixes in DXIL.
- Fix weird bug where sun would pop through walls in RE: Village.
- Workaround game bug in Cyberpunk 2077 where certain locales would render a black screen.
- Fix various bugs (in benchmark and in vkd3d-proton) allowing GravityMark to run.
- Improve robustness against certain app bugs related to NULL descriptors.
- Fix bug with constant FP64 vector handling in DXBC.
- Fix bug where Cyberpunk 2077 inventory screen could spuriously hang GPU on RADV.
- Add workaround for Necromunda: Hired Gun where character models would render random garbage on RADV.
- Fix bug in Necromunda: Hired Gun causing random screen flicker.
- Fix windowed mode tracking when leaving fullscreen. Fix Alt-Tab handling in Horizon Zero Dawn.
- Temporary workaround for SRV ResourceMinLODClamp. Fix black ground rendering in DIRT 5.
The overbright HDR rendering in DIRT 5 sadly persists however :(
- Implement fallback maximum swapchain latency correctly.
### Development features
Various features which are useful for developers were added to aid debugging.
- Descriptor QA can instrument shaders in runtime for GPU-assisted validation.
Performance is good enough (> 40 FPS) that games are actually playable in this mode.
See README for details.
- Allow forcing off CONCURRENT queue, and using EXCLUSIVE queue.
Not valid, but can be useful as a speed hack on Polaris when `single_queue` is not an option
and for testing driver behavior differences.
## 2.3.1
This is a minor bugfix release to address some issues solved shortly after the last release.

View File

@ -1,4 +1,4 @@
Copyright 2016-2022 the vkd3d-proton project authors (see the file AUTHORS for a
Copyright 2016-2020 the vkd3d-proton project authors (see the file AUTHORS for a
complete list)
vkd3d-proton is free software; you can redistribute it and/or modify it under

View File

@ -22,36 +22,35 @@ There are some hard requirements on drivers to be able to implement D3D12 in a r
- `VK_EXT_descriptor_indexing` with at least 1000000 UpdateAfterBind descriptors for all types except UniformBuffer.
Essentially all features in `VkPhysicalDeviceDescriptorIndexingFeatures` must be supported.
- `VK_KHR_timeline_semaphore`
- `VK_KHR_create_renderpass2`
- `VK_KHR_sampler_mirror_clamp_to_edge`
- `VK_EXT_robustness2`
- `VK_KHR_separate_depth_stencil_layouts`
- `VK_KHR_bind_memory2`
- `VK_KHR_copy_commands2`
- `VK_KHR_dynamic_rendering`
- `VK_EXT_extended_dynamic_state`
- `VK_EXT_extended_dynamic_state2`
Some notable extensions that **should** be supported for optimal or correct behavior.
These extensions will likely become mandatory later.
- `VK_KHR_buffer_device_address`
- `VK_EXT_image_view_min_lod`
- `VK_EXT_extended_dynamic_state`
`VK_VALVE_mutable_descriptor_type` is also highly recommended, but not mandatory.
### AMD (RADV)
### AMD (RADV / ACO)
For AMD, RADV is the recommended driver and the one that sees most testing on AMD GPUs.
The minimum requirement at the moment is Mesa 22.0 since it supports `VK_KHR_dynamic_rendering`.
NOTE: For older Mesa versions, use the v2.6 release.
The recommendation here is to use a driver built from Git.
### NVIDIA
The [Vulkan beta drivers](https://developer.nvidia.com/vulkan-driver) generally contain the latest
driver fixes that we identify while getting games to work.
The latest drivers (stable, beta or Vulkan beta tracks) are always preferred.
If you're having problems, always try the latest drivers.
At least Linux 455.26.01 (2020-10-20) is recommended as it contains fixes for:
> Reduce host memory consumption for descriptor memory when VkDescriptorSetVariableDescriptorCountAllocateInfo is used.
> Fixed a bug in a barrier optimization that allowed some back-to-back copies to run unordered
These fixes should find their way into stable drivers eventually, but if you're having issues, test the latest development drivers,
as that is what we test against.
### Intel
@ -152,15 +151,8 @@ commas or semicolons.
- `skip_application_workarounds` - Skips all application workarounds.
For debugging purposes.
- `dxr` - Enables DXR support if supported by device.
- `dxr11` - Enables DXR tier 1.1 support if supported by device.
- `force_static_cbv` - Unsafe speed hack on NVIDIA. May or may not give a significant performance uplift.
- `single_queue` - Do not use asynchronous compute or transfer queues.
- `no_upload_hvv` - Blocks any attempt to use host-visible VRAM (large/resizable BAR) for the UPLOAD heap.
May free up vital VRAM in certain critical situations, at cost of lower GPU performance.
A fraction of VRAM is reserved for resizable BAR allocations either way,
so it should not be a real issue even on lower VRAM cards.
- `force_host_cached` - Forces all host visible allocations to be CACHED, which greatly accelerates captures.
- `no_invariant_position` - Avoids workarounds for invariant position. The workaround is enabled by default.
- `VKD3D_DEBUG` - controls the debug level for log messages produced by
vkd3d-proton. Accepts the following values: none, err, info, fixme, warn, trace.
- `VKD3D_SHADER_DEBUG` - controls the debug level for log messages produced by
@ -185,39 +177,6 @@ commas or semicolons.
- `VKD3D_PROFILE_PATH` - If profiling is enabled in the build, a profiling block is
emitted to `${VKD3D_PROFILE_PATH}.${pid}`.
## Shader cache
By default, vkd3d-proton manages its own driver cache.
This cache is intended to cache DXBC/DXIL -> SPIR-V conversion.
This reduces stutter (when pipelines are created last minute and app relies on hot driver cache)
and load times (when applications do the right thing of loading PSOs up front).
Behavior is designed to be close to DXVK state cache.
#### Default behavior
`vkd3d-proton.cache` (and `vkd3d-proton.cache.write`) are placed in the current working directory.
Generally, this is the game install folder when running in Steam.
#### Custom directory
`VKD3D_SHADER_CACHE_PATH=/path/to/directory` overrides the directory where `vkd3d-proton.cache` is placed.
#### Disable cache
`VKD3D_SHADER_CACHE_PATH=0` disables the internal cache, and any caching would have to be explicitly managed
by application.
### Behavior of ID3D12PipelineLibrary
When explicit shader cache is used, the need for application managed pipeline libraries is greatly diminished,
and the cache applications interact with is a dummy cache.
If the vkd3d-proton shader cache is disabled, ID3D12PipelineLibrary stores everything relevant for a full cache,
i.e. SPIR-V and PSO driver cache blob.
`VKD3D_CONFIG=pipeline_library_app_cache` is an alternative to `VKD3D_SHADER_CACHE_PATH=0` and can be
automatically enabled based on app-profiles if relevant in the future if applications manage the caches better
than vkd3d-proton can do automagically.
## CPU profiling (development)
Pass `-Denable_profiling=true` to Meson to enable a profiled build. With a profiled build, use `VKD3D_PROFILE_PATH` environment variable.
@ -239,26 +198,12 @@ pass `-Denable_renderdoc=true` to Meson.
vkd3d-proton will automatically make a capture when a specific shader is encountered.
- `VKD3D_AUTO_CAPTURE_COUNTS` - A comma-separated list of indices. This can be used to control which queue submissions to capture.
E.g., use `VKD3D_AUTO_CAPTURE_COUNTS=0,4,10` to capture the 0th (first submission), 4th and 10th submissions which are candidates for capturing.
If `VKD3D_AUTO_CAPTURE_COUNTS` is `-1`, the entire app runtime can be turned into one big capture.
This is only intended to be used when capturing something like the test suite,
or tiny applications with a finite runtime to make it easier to debug cross submission work.
If only `VKD3D_AUTO_CAPTURE_COUNTS` is set, any queue submission is considered for capturing.
If only `VKD3D_AUTO_CAPTURE_SHADER` is set, `VKD3D_AUTO_CAPTURE_COUNTS` is considered to be equal to `"0"`, i.e. a capture is only
made on first encounter with the target shader.
If both are set, the capture counter is only incremented and considered when a submission contains the use of the target shader.
### Breadcrumbs debugging
For debugging GPU hangs, it's useful to know where crashes happen.
If the build has trace enabled (non-release builds), breadcrumbs support is also enabled.
`VKD3D_CONFIG=breadcrumbs` will instrument command lists with `VK_AMD_buffer_marker` or `VK_NV_device_checkpoints`.
On GPU device lost or timeout, crash dumps are written to the log.
For best results on RADV, use `RADV_DEBUG=syncshaders`. The logs will print a digested form of the command lists
which were executing at the time, and attempt to narrow down the possible range of commands which could
have caused a crash.
### Shader logging
It is possible to log the output of replaced shaders, essentially a custom shader printf. To enable this feature, `VK_KHR_buffer_device_address` must be supported.
@ -270,11 +215,8 @@ and avoids any possible accidental hiding of bugs by introducing validation laye
Using `debugPrintEXT` is also possible if that fits better with your debugging scenario.
With this shader replacement scheme, we're able to add shader logging as unintrusive as possible.
```
# Inside folder full of override shaders, build everything with:
make -C /path/to/include/shader-debug M=$PWD
```
The shader can then include `#include "debug_channel.h"` and use various functions below.
Replaced shaders will need to include `debug_channel.h` from `include/shader-debug`.
Use `glslc -I/path/to/vkd3d-proton/include/shader-debug --target-env=vulkan1.1` when compiling replaced shaders.
```
void DEBUG_CHANNEL_INIT(uvec3 ID);
@ -374,7 +316,7 @@ Failed heap index: 1029
==========
```
### Debugging descriptor crashes with RADV dumps (hardcore ultra nightmare mode)
### Debugging descriptor crashes with RADV/ACO dumps (hardcore ultra nightmare mode)
For when you're absolutely desperate, there is a way to debug GPU hangs.
First, install [umr](https://gitlab.freedesktop.org/tomstdenis/umr) and make the binary setsuid.

View File

@ -456,8 +456,13 @@ static void cxg_mesh_create(ID3D12Device *device, float inner_radius, float oute
float r0, r1, r2;
float angle, da;
vertices = calloc(tooth_count, 12 * sizeof(*vertices));
faces = calloc(tooth_count, 20 * sizeof(*faces));
if (!(vertices = calloc(tooth_count, 12 * sizeof(*vertices))))
return;
if (!(faces = calloc(tooth_count, 20 * sizeof(*faces))))
{
free(vertices);
return;
}
r0 = inner_radius;
r1 = outer_radius - tooth_depth / 2.0f;

View File

@ -10,8 +10,6 @@ vkd3d_idl = [
'vkd3d_dxgiformat.idl',
'vkd3d_dxgitype.idl',
'vkd3d_swapchain_factory.idl',
'vkd3d_command_list_vkd3d_ext.idl',
'vkd3d_device_vkd3d_ext.idl'
]
vkd3d_header_files = idl_generator.process(vkd3d_idl)

View File

@ -174,9 +174,9 @@ static inline struct hash_map_entry *hash_map_insert(struct hash_map *hash_map,
if (!(target->flags & HASH_MAP_ENTRY_OCCUPIED))
{
hash_map->used_count += 1;
memcpy(target, entry, hash_map->entry_size);
target->flags = HASH_MAP_ENTRY_OCCUPIED;
target->hash_value = hash_value;
memcpy(target + 1, entry + 1, hash_map->entry_size - sizeof(*entry));
}
/* If target is occupied, we already have an entry in the hashmap.
@ -193,7 +193,6 @@ static inline void hash_map_init(struct hash_map *hash_map, pfn_hash_func hash_f
hash_map->entry_size = entry_size;
hash_map->entry_count = 0;
hash_map->used_count = 0;
assert(entry_size > sizeof(struct hash_map_entry));
}
static inline void hash_map_clear(struct hash_map *hash_map)
@ -213,43 +212,4 @@ static inline uint32_t hash_uint64(uint64_t n)
return hash_combine((uint32_t)n, (uint32_t)(n >> 32));
}
/* A somewhat stronger hash when we're meant to store the hash (pipeline caches, etc). Based on FNV-1a. */
static inline uint64_t hash_fnv1_init()
{
return 0xcbf29ce484222325ull;
}
static inline uint64_t hash_fnv1_iterate_u8(uint64_t h, uint8_t value)
{
return (h * 0x100000001b3ull) ^ value;
}
static inline uint64_t hash_fnv1_iterate_u32(uint64_t h, uint32_t value)
{
return (h * 0x100000001b3ull) ^ value;
}
static inline uint64_t hash_fnv1_iterate_f32(uint64_t h, float value)
{
union u { float f32; uint32_t u32; } v;
v.f32 = value;
return hash_fnv1_iterate_u32(h, v.u32);
}
static inline uint64_t hash_fnv1_iterate_u64(uint64_t h, uint64_t value)
{
h = hash_fnv1_iterate_u32(h, value & UINT32_MAX);
h = hash_fnv1_iterate_u32(h, value >> 32);
return h;
}
static inline uint64_t hash_fnv1_iterate_string(uint64_t h, const char *str)
{
if (str)
while (*str)
h = hash_fnv1_iterate_u8(h, *str++);
h = hash_fnv1_iterate_u8(h, 0);
return h;
}
#endif /* __VKD3D_HASHMAP_H */

View File

@ -262,7 +262,7 @@ static inline uint64_t vkd3d_atomic_uint64_compare_exchange(UINT64* target, uint
#endif
#if INTPTR_MAX == INT64_MAX
#if defined(__x86_64__) || defined(_WIN64)
# define vkd3d_atomic_ptr_load_explicit(target, order) ((void *)vkd3d_atomic_uint64_load_explicit((uint64_t *)target, order))
# define vkd3d_atomic_ptr_store_explicit(target, value, order) (vkd3d_atomic_uint64_store_explicit((uint64_t *)target, (uint64_t)value, order))
# define vkd3d_atomic_ptr_exchange_explicit(target, value, order) ((void *)vkd3d_atomic_uint64_exchange_explicit((uint64_t *)target, (uint64_t)value, order))

View File

@ -27,12 +27,9 @@
#include <stdint.h>
#include <limits.h>
#include <stdbool.h>
#include <assert.h>
#ifdef _MSC_VER
#include <intrin.h>
#else
#include <time.h>
#endif
#ifndef ARRAY_SIZE
@ -45,15 +42,8 @@
#define MEMBER_SIZE(t, m) sizeof(((t *)0)->m)
static inline uint64_t align64(uint64_t addr, uint64_t alignment)
{
assert(alignment > 0 && (alignment & (alignment - 1)) == 0);
return (addr + (alignment - 1)) & ~(alignment - 1);
}
static inline size_t align(size_t addr, size_t alignment)
{
assert(alignment > 0 && (alignment & (alignment - 1)) == 0);
return (addr + (alignment - 1)) & ~(alignment - 1);
}
@ -123,7 +113,8 @@ static inline unsigned int vkd3d_bitmask_tzcnt32(uint32_t mask)
{
#ifdef _MSC_VER
unsigned long result;
return _BitScanForward(&result, mask) ? result : 32;
_BitScanForward(&result, mask) ? result : 32;
return result;
#elif defined(__GNUC__) || defined(__clang__)
return mask ? __builtin_ctz(mask) : 32;
#else
@ -212,14 +203,6 @@ static inline unsigned int vkd3d_log2i(unsigned int x)
#endif
}
static inline unsigned int vkd3d_log2i_ceil(unsigned int x)
{
if (x == 1)
return 0;
else
return vkd3d_log2i(x - 1) + 1;
}
static inline int ascii_isupper(int c)
{
return 'A' <= c && c <= 'Z';
@ -288,42 +271,4 @@ static inline void *void_ptr_offset(void *ptr, size_t offset)
return ((char*)ptr) + offset;
}
#ifdef _MSC_VER
#define VKD3D_THREAD_LOCAL __declspec(thread)
#else
#define VKD3D_THREAD_LOCAL __thread
#endif
static inline uint64_t vkd3d_get_current_time_ns(void)
{
#ifdef _WIN32
LARGE_INTEGER li, lf;
uint64_t whole, part;
QueryPerformanceCounter(&li);
QueryPerformanceFrequency(&lf);
whole = (li.QuadPart / lf.QuadPart) * 1000000000;
part = ((li.QuadPart % lf.QuadPart) * 1000000000) / lf.QuadPart;
return whole + part;
#else
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return ts.tv_sec * 1000000000ll + ts.tv_nsec;
#endif
}
#ifdef _MSC_VER
#pragma intrinsic(__rdtsc)
#endif
static inline uint64_t vkd3d_get_current_time_ticks(void)
{
#ifdef _MSC_VER
return __rdtsc();
#elif defined(__i386__) || defined(__x86_64__)
return __builtin_ia32_rdtsc();
#else
return vkd3d_get_current_time_ns();
#endif
}
#endif /* __VKD3D_COMMON_H */

View File

@ -1,42 +0,0 @@
/*
* Copyright 2022 Hans-Kristian Arntzen for Valve Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef __VKD3D_FILE_UTILS_H
#define __VKD3D_FILE_UTILS_H
#include <stddef.h>
#include <stdio.h>
#include <stdbool.h>
struct vkd3d_memory_mapped_file
{
void *mapped;
size_t mapped_size;
};
/* On failure, ensures the struct is cleared to zero.
* A reference to the file is kept through the memory mapping. */
bool vkd3d_file_map_read_only(const char *path, struct vkd3d_memory_mapped_file *file);
/* Clears out file on unmap. */
void vkd3d_file_unmap(struct vkd3d_memory_mapped_file *file);
bool vkd3d_file_rename_overwrite(const char *from_path, const char *to_path);
bool vkd3d_file_rename_no_replace(const char *from_path, const char *to_path);
bool vkd3d_file_delete(const char *path);
FILE *vkd3d_file_open_exclusive_write(const char *path);
#endif

View File

@ -23,7 +23,6 @@
#include <stdbool.h>
#include <stdlib.h>
#include "vkd3d_common.h"
#include "vkd3d_debug.h"
static inline void *vkd3d_malloc(size_t size)
@ -58,12 +57,12 @@ static inline void vkd3d_free(void *ptr)
bool vkd3d_array_reserve(void **elements, size_t *capacity,
size_t element_count, size_t element_size);
static inline void *vkd3d_malloc_aligned(size_t size, size_t alignment)
static inline void *vkd3d_malloc_aligned(size_t size, size_t align)
{
#ifdef _WIN32
return _aligned_malloc(size, alignment);
return _aligned_malloc(size, align);
#else
return aligned_alloc(alignment, align(size, alignment));
return aligned_alloc(align, size);
#endif
}

View File

@ -37,8 +37,6 @@ int vkd3d_dlclose(vkd3d_module_t handle);
const char *vkd3d_dlerror(void);
bool vkd3d_get_env_var(const char *name, char *value, size_t value_size);
bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX]);
#endif

View File

@ -21,15 +21,39 @@
#include "vkd3d_windows.h"
#include "vkd3d_spinlock.h"
#include "vkd3d_common.h"
#include <stdint.h>
#ifdef VKD3D_ENABLE_PROFILING
#ifdef _WIN32
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#else
#include <time.h>
#endif
void vkd3d_init_profiling(void);
bool vkd3d_uses_profiling(void);
unsigned int vkd3d_profiling_register_region(const char *name, spinlock_t *lock, uint32_t *latch);
void vkd3d_profiling_notify_work(unsigned int index, uint64_t start_ticks, uint64_t end_ticks, unsigned int iteration_count);
static inline uint64_t vkd3d_profiling_get_tick_count(void)
{
#ifdef _WIN32
LARGE_INTEGER li, lf;
uint64_t whole, part;
QueryPerformanceCounter(&li);
QueryPerformanceFrequency(&lf);
whole = (li.QuadPart / lf.QuadPart) * 1000000000;
part = ((li.QuadPart % lf.QuadPart) * 1000000000) / lf.QuadPart;
return whole + part;
#else
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return ts.tv_sec * 1000000000ll + ts.tv_nsec;
#endif
}
#define VKD3D_REGION_DECL(name) \
static uint32_t _vkd3d_region_latch_##name; \
static spinlock_t _vkd3d_region_lock_##name; \
@ -41,12 +65,12 @@ void vkd3d_profiling_notify_work(unsigned int index, uint64_t start_ticks, uint6
do { \
if (!(_vkd3d_region_index_##name = vkd3d_atomic_uint32_load_explicit(&_vkd3d_region_latch_##name, vkd3d_memory_order_acquire))) \
_vkd3d_region_index_##name = vkd3d_profiling_register_region(#name, &_vkd3d_region_lock_##name, &_vkd3d_region_latch_##name); \
_vkd3d_region_begin_tick_##name = vkd3d_get_current_time_ticks(); \
_vkd3d_region_begin_tick_##name = vkd3d_profiling_get_tick_count(); \
} while(0)
#define VKD3D_REGION_END_ITERATIONS(name, iter) \
do { \
_vkd3d_region_end_tick_##name = vkd3d_get_current_time_ticks(); \
_vkd3d_region_end_tick_##name = vkd3d_profiling_get_tick_count(); \
vkd3d_profiling_notify_work(_vkd3d_region_index_##name, _vkd3d_region_begin_tick_##name, _vkd3d_region_end_tick_##name, iter); \
} while(0)

View File

@ -30,7 +30,6 @@ WCHAR *vkd3d_dup_demangled_entry_point(const char *str);
char *vkd3d_dup_demangled_entry_point_ascii(const char *str);
bool vkd3d_export_strequal(const WCHAR *a, const WCHAR *b);
bool vkd3d_export_strequal_mixed(const WCHAR *a, const char *b);
bool vkd3d_export_strequal_substr(const WCHAR *a, size_t n, const WCHAR *b);
char *vkd3d_strdup(const char *str);
@ -38,45 +37,4 @@ char *vkd3d_strdup_n(const char *str, size_t n);
WCHAR *vkd3d_wstrdup(const WCHAR *str);
WCHAR *vkd3d_wstrdup_n(const WCHAR *str, size_t n);
static inline bool vkd3d_string_ends_with_n(const char *str, size_t str_len, const char *ending, size_t ending_len)
{
return str_len >= ending_len && !strncmp(str + (str_len - ending_len), ending, ending_len);
}
static inline bool vkd3d_string_ends_with(const char *str, const char *ending)
{
return vkd3d_string_ends_with_n(str, strlen(str), ending, strlen(ending));
}
enum vkd3d_string_compare_mode
{
VKD3D_STRING_COMPARE_NEVER,
VKD3D_STRING_COMPARE_ALWAYS,
VKD3D_STRING_COMPARE_EXACT,
VKD3D_STRING_COMPARE_STARTS_WITH,
VKD3D_STRING_COMPARE_ENDS_WITH,
VKD3D_STRING_COMPARE_CONTAINS,
};
static inline bool vkd3d_string_compare(enum vkd3d_string_compare_mode mode, const char *string, const char *comparator)
{
switch (mode)
{
default:
case VKD3D_STRING_COMPARE_NEVER:
return false;
case VKD3D_STRING_COMPARE_ALWAYS:
return true;
case VKD3D_STRING_COMPARE_EXACT:
return !strcmp(string, comparator);
case VKD3D_STRING_COMPARE_STARTS_WITH:
return !strncmp(string, comparator, strlen(comparator));
case VKD3D_STRING_COMPARE_ENDS_WITH:
return vkd3d_string_ends_with(string, comparator);
case VKD3D_STRING_COMPARE_CONTAINS:
return strstr(string, comparator) != NULL;
}
}
#endif /* __VKD3D_STRING_H */

View File

@ -29,19 +29,16 @@
#include <stdlib.h>
#include <string.h>
#ifdef VKD3D_TEST_DECLARE_MAIN
static void vkd3d_test_main(int argc, char **argv);
#endif
extern const char *vkd3d_test_name;
extern const char *vkd3d_test_platform;
static const char *vkd3d_test_name;
static const char *vkd3d_test_platform = "other";
static void vkd3d_test_start_todo(bool is_todo);
static int vkd3d_test_loop_todo(void);
static void vkd3d_test_end_todo(void);
#define START_TEST(name) \
const char *vkd3d_test_name = #name; \
static const char *vkd3d_test_name = #name; \
static void vkd3d_test_main(int argc, char **argv)
/*
@ -104,7 +101,7 @@ static void vkd3d_test_end_todo(void);
#define todo todo_if(true)
struct vkd3d_test_state_context
static struct
{
LONG success_count;
LONG failure_count;
@ -125,8 +122,7 @@ struct vkd3d_test_state_context
const char *test_name_filter;
const char *test_exclude_list;
char context[1024];
};
extern struct vkd3d_test_state_context vkd3d_test_state;
} vkd3d_test_state;
static bool
vkd3d_test_platform_is_windows(void)
@ -270,7 +266,6 @@ vkd3d_test_debug(const char *fmt, ...)
}
}
#ifdef VKD3D_TEST_DECLARE_MAIN
int main(int argc, char **argv)
{
const char *exclude_list = getenv("VKD3D_TEST_EXCLUDE");
@ -360,14 +355,11 @@ int wmain(int argc, WCHAR **wargv)
return ret;
}
#endif /* _WIN32 */
#endif /* VKD3D_TEST_DECLARE_MAIN */
typedef void (*vkd3d_test_pfn)(void);
static inline void vkd3d_run_test(const char *name, vkd3d_test_pfn test_pfn)
{
const char *old_test_name;
if (vkd3d_test_state.test_name_filter && !strstr(name, vkd3d_test_state.test_name_filter))
return;
@ -375,12 +367,8 @@ static inline void vkd3d_run_test(const char *name, vkd3d_test_pfn test_pfn)
&& vkd3d_debug_list_has_member(vkd3d_test_state.test_exclude_list, name))
return;
old_test_name = vkd3d_test_name;
vkd3d_test_debug("======== %s begin ========", name);
vkd3d_test_name = name;
vkd3d_test_debug("%s", name);
test_pfn();
vkd3d_test_name = old_test_name;
vkd3d_test_debug("======== %s end ==========", name);
}
static inline void vkd3d_test_start_todo(bool is_todo)

View File

@ -51,8 +51,6 @@ typedef struct pthread_cond
CONDITION_VARIABLE cond;
} pthread_cond_t;
typedef pthread_cond_t condvar_reltime_t;
static DWORD WINAPI win32_thread_wrapper_routine(void *arg)
{
pthread_t thread = arg;
@ -116,48 +114,6 @@ static inline int pthread_mutex_destroy(pthread_mutex_t *lock)
return 0;
}
/* SRWLocks distinguish between write and read unlocks, but pthread interface does not,
* so make a trivial wrapper type instead to avoid any possible API conflicts. */
typedef struct rwlock
{
SRWLOCK rwlock;
} rwlock_t;
static inline int rwlock_init(rwlock_t *lock)
{
InitializeSRWLock(&lock->rwlock);
return 0;
}
static inline int rwlock_lock_write(rwlock_t *lock)
{
AcquireSRWLockExclusive(&lock->rwlock);
return 0;
}
static inline int rwlock_lock_read(rwlock_t *lock)
{
AcquireSRWLockShared(&lock->rwlock);
return 0;
}
static inline int rwlock_unlock_write(rwlock_t *lock)
{
ReleaseSRWLockExclusive(&lock->rwlock);
return 0;
}
static inline int rwlock_unlock_read(rwlock_t *lock)
{
ReleaseSRWLockShared(&lock->rwlock);
return 0;
}
static inline int rwlock_destroy(rwlock_t *lock)
{
return 0;
}
static inline int pthread_cond_init(pthread_cond_t *cond, void *attr)
{
(void)attr;
@ -189,32 +145,6 @@ static inline int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *lock)
return ret ? 0 : -1;
}
static inline int condvar_reltime_init(condvar_reltime_t *cond)
{
return pthread_cond_init(cond, NULL);
}
static inline int condvar_reltime_destroy(condvar_reltime_t *cond)
{
return pthread_cond_destroy(cond);
}
static inline int condvar_reltime_signal(condvar_reltime_t *cond)
{
return pthread_cond_signal(cond);
}
static inline int condvar_reltime_wait_timeout_seconds(condvar_reltime_t *cond, pthread_mutex_t *lock, unsigned int seconds)
{
BOOL ret = SleepConditionVariableSRW(&cond->cond, &lock->lock, seconds * 1000, 0);
if (ret)
return 0;
else if (GetLastError() == ERROR_TIMEOUT)
return 1;
else
return -1;
}
static inline void vkd3d_set_thread_name(const char *name)
{
(void)name;
@ -238,96 +168,10 @@ static inline void pthread_once(pthread_once_t *once, void (*func)(void))
}
#else
#include <pthread.h>
#include <errno.h>
#include <time.h>
static inline void vkd3d_set_thread_name(const char *name)
{
pthread_setname_np(pthread_self(), name);
}
typedef struct rwlock
{
pthread_rwlock_t rwlock;
} rwlock_t;
static inline int rwlock_init(rwlock_t *lock)
{
return pthread_rwlock_init(&lock->rwlock, NULL);
}
static inline int rwlock_lock_write(rwlock_t *lock)
{
return pthread_rwlock_wrlock(&lock->rwlock);
}
static inline int rwlock_lock_read(rwlock_t *lock)
{
return pthread_rwlock_rdlock(&lock->rwlock);
}
static inline int rwlock_unlock_write(rwlock_t *lock)
{
return pthread_rwlock_unlock(&lock->rwlock);
}
static inline int rwlock_unlock_read(rwlock_t *lock)
{
return pthread_rwlock_unlock(&lock->rwlock);
}
static inline int rwlock_destroy(rwlock_t *lock)
{
return pthread_rwlock_destroy(&lock->rwlock);
}
typedef struct condvar_reltime
{
pthread_cond_t cond;
} condvar_reltime_t;
static inline int condvar_reltime_init(condvar_reltime_t *cond)
{
pthread_condattr_t attr;
int rc;
pthread_condattr_init(&attr);
pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
rc = pthread_cond_init(&cond->cond, &attr);
pthread_condattr_destroy(&attr);
return rc;
}
static inline void condvar_reltime_destroy(condvar_reltime_t *cond)
{
pthread_cond_destroy(&cond->cond);
}
static inline int condvar_reltime_signal(condvar_reltime_t *cond)
{
return pthread_cond_signal(&cond->cond);
}
static inline int condvar_reltime_wait_timeout_seconds(condvar_reltime_t *cond, pthread_mutex_t *lock, unsigned int seconds)
{
struct timespec ts;
int rc;
clock_gettime(CLOCK_MONOTONIC, &ts);
ts.tv_sec += seconds;
/* This is absolute time. */
rc = pthread_cond_timedwait(&cond->cond, lock, &ts);
if (rc == ETIMEDOUT)
return 1;
else if (rc == 0)
return 0;
else
return -1;
}
#define PTHREAD_ONCE_CALLBACK
#endif

View File

@ -1,6 +0,0 @@
#ifndef __VULKAN_PRIVATE_EXTENSIONS_H__
#define __VULKAN_PRIVATE_EXTENSIONS_H__
/* Nothing here at the moment. Add hacks here! */
#endif

View File

@ -1,71 +0,0 @@
INCLUDE_DIR := $(CURDIR)
VERT_SOURCES := $(wildcard $(M)/*.vert)
FRAG_SOURCES := $(wildcard $(M)/*.frag)
COMP_SOURCES := $(wildcard $(M)/*.comp)
TESC_SOURCES := $(wildcard $(M)/*.tesc)
TESE_SOURCES := $(wildcard $(M)/*.tese)
GEOM_SOURCES := $(wildcard $(M)/*.geom)
RGEN_SOURCES := $(wildcard $(M)/*.rgen)
RINT_SOURCES := $(wildcard $(M)/*.rint)
RAHIT_SOURCES := $(wildcard $(M)/*.rahit)
RCHIT_SOURCES := $(wildcard $(M)/*.rchit)
RMISS_SOURCES := $(wildcard $(M)/*.rmiss)
RCALL_SOURCES := $(wildcard $(M)/*.rcall)
SPV_OBJECTS := \
$(VERT_SOURCES:.vert=.spv) \
$(FRAG_SOURCES:.frag=.spv) \
$(COMP_SOURCES:.comp=.spv) \
$(TESC_SOURCES:.tesc=.spv) \
$(TESE_SOURCES:.tese=.spv) \
$(GEOM_SOURCES:.geom=.spv) \
$(RGEN_SOURCES:.rgen=.spv) \
$(RINT_SOURCES:.rint=.spv) \
$(RAHIT_SOURCES:.rahit=.spv) \
$(RCHIT_SOURCES:.rchit=.spv) \
$(RMISS_SOURCES:.rmiss=.spv) \
$(RCALL_SOURCES:.rcall=.spv)
%.spv: %.vert
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
%.spv: %.frag
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 -DDEBUG_CHANNEL_HELPER_LANES $(GLSLC_FLAGS)
%.spv: %.comp
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
%.spv: %.geom
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
%.spv: %.tesc
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
%.spv: %.tese
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
%.spv: %.rgen
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
%.spv: %.rint
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
%.spv: %.rahit
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
%.spv: %.rchit
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
%.spv: %.rmiss
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
%.spv: %.rcall
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
all: $(SPV_OBJECTS)
clean:
rm -f $(SPV_OBJECTS)
.PHONY: clean

View File

@ -23,17 +23,14 @@
#extension GL_ARB_gpu_shader_int64 : require
#extension GL_KHR_shader_subgroup_basic : require
#extension GL_KHR_shader_subgroup_ballot : require
#ifdef DEBUG_CHANNEL_HELPER_LANES
#extension GL_EXT_demote_to_helper_invocation : require
#endif
layout(buffer_reference, std430, buffer_reference_align = 4) coherent buffer ControlBlock
layout(buffer_reference, std430, buffer_reference_align = 4) buffer ControlBlock
{
uint message_counter;
uint instance_counter;
};
layout(buffer_reference, std430, buffer_reference_align = 4) coherent buffer RingBuffer
layout(buffer_reference, std430, buffer_reference_align = 4) buffer RingBuffer
{
uint data[];
};
@ -51,73 +48,24 @@ const uint DEBUG_CHANNEL_FMT_F32 = 2;
const uint DEBUG_CHANNEL_FMT_HEX_ALL = DEBUG_CHANNEL_FMT_HEX * 0x55555555u;
const uint DEBUG_CHANNEL_FMT_I32_ALL = DEBUG_CHANNEL_FMT_I32 * 0x55555555u;
const uint DEBUG_CHANNEL_FMT_F32_ALL = DEBUG_CHANNEL_FMT_F32 * 0x55555555u;
const uint DEBUG_CHANNEL_WORD_COOKIE = 0xdeadca70u; /* Let host fish for this cookie in device lost scenarios. */
uint DEBUG_CHANNEL_INSTANCE_COUNTER;
uvec3 DEBUG_CHANNEL_ID;
/* Need to make sure the elected subgroup can have side effects. */
#ifdef DEBUG_CHANNEL_HELPER_LANES
bool DEBUG_CHANNEL_ELECT()
{
bool elected = false;
if (!helperInvocationEXT())
elected = subgroupElect();
return elected;
}
#else
bool DEBUG_CHANNEL_ELECT()
{
return subgroupElect();
}
#endif
void DEBUG_CHANNEL_INIT(uvec3 id)
{
if (!DEBUG_SHADER_RING_ACTIVE)
return;
DEBUG_CHANNEL_ID = id;
uint inst;
#ifdef DEBUG_CHANNEL_HELPER_LANES
if (!helperInvocationEXT())
{
/* Elect and broadcast must happen without helper lanes here.
* We must perform the instance increment with side effects,
* and broadcast first must pick the elected lane. */
if (subgroupElect())
inst = atomicAdd(ControlBlock(DEBUG_SHADER_ATOMIC_BDA).instance_counter, 1u);
DEBUG_CHANNEL_INSTANCE_COUNTER = subgroupBroadcastFirst(inst);
}
/* Helper lanes cannot write debug messages, since they cannot have side effects.
* Leave it undefined, and we should ensure SGPR propagation either way ... */
#else
if (DEBUG_CHANNEL_ELECT())
if (subgroupElect())
inst = atomicAdd(ControlBlock(DEBUG_SHADER_ATOMIC_BDA).instance_counter, 1u);
DEBUG_CHANNEL_INSTANCE_COUNTER = subgroupBroadcastFirst(inst);
#endif
}
void DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3 id, uint inst)
{
if (!DEBUG_SHADER_RING_ACTIVE)
return;
DEBUG_CHANNEL_ID = id;
DEBUG_CHANNEL_INSTANCE_COUNTER = inst;
}
void DEBUG_CHANNEL_UNLOCK_MESSAGE(RingBuffer buf, uint offset, uint num_words)
{
memoryBarrierBuffer();
/* Make sure this word is made visible last. This way the ring thread can avoid reading bogus messages.
* If the host thread observed a num_word of 0, we know a message was allocated, but we don't necessarily
* have a complete write yet.
* In a device lost scenario, we can try to fish for valid messages. */
buf.data[(offset + 0) & DEBUG_SHADER_RING_MASK] = num_words | DEBUG_CHANNEL_WORD_COOKIE;
memoryBarrierBuffer();
}
void DEBUG_CHANNEL_WRITE_HEADER(RingBuffer buf, uint offset, uint fmt)
void DEBUG_CHANNEL_WRITE_HEADER(RingBuffer buf, uint offset, uint num_words, uint fmt)
{
buf.data[(offset + 0) & DEBUG_SHADER_RING_MASK] = num_words;
buf.data[(offset + 1) & DEBUG_SHADER_RING_MASK] = uint(DEBUG_SHADER_HASH);
buf.data[(offset + 2) & DEBUG_SHADER_RING_MASK] = uint(DEBUG_SHADER_HASH >> 32);
buf.data[(offset + 3) & DEBUG_SHADER_RING_MASK] = DEBUG_CHANNEL_INSTANCE_COUNTER;
@ -139,9 +87,7 @@ void DEBUG_CHANNEL_MSG_()
return;
uint words = 8;
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, 0);
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
DEBUG_CHANNEL_WRITE_HEADER(RingBuffer(DEBUG_SHADER_RING_BDA), offset, words, 0);
}
void DEBUG_CHANNEL_MSG_(uint fmt, uint v0)
@ -151,9 +97,8 @@ void DEBUG_CHANNEL_MSG_(uint fmt, uint v0)
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
uint words = 9;
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, words, fmt);
buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
}
void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1)
@ -163,10 +108,9 @@ void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1)
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
uint words = 10;
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, words, fmt);
buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
buf.data[(offset + 9) & DEBUG_SHADER_RING_MASK] = v1;
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
}
void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2)
@ -176,11 +120,10 @@ void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2)
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
uint words = 11;
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, words, fmt);
buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
buf.data[(offset + 9) & DEBUG_SHADER_RING_MASK] = v1;
buf.data[(offset + 10) & DEBUG_SHADER_RING_MASK] = v2;
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
}
void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2, uint v3)
@ -190,12 +133,11 @@ void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2, uint v3)
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
uint words = 12;
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, words, fmt);
buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
buf.data[(offset + 9) & DEBUG_SHADER_RING_MASK] = v1;
buf.data[(offset + 10) & DEBUG_SHADER_RING_MASK] = v2;
buf.data[(offset + 11) & DEBUG_SHADER_RING_MASK] = v3;
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
}
void DEBUG_CHANNEL_MSG()
@ -263,76 +205,4 @@ void DEBUG_CHANNEL_MSG(float v0, float v1, float v2, float v3)
DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_F32_ALL, floatBitsToUint(v0), floatBitsToUint(v1), floatBitsToUint(v2), floatBitsToUint(v3));
}
void DEBUG_CHANNEL_MSG_UNIFORM(uint v0)
{
if (DEBUG_CHANNEL_ELECT())
DEBUG_CHANNEL_MSG(v0);
}
void DEBUG_CHANNEL_MSG_UNIFORM(uint v0, uint v1)
{
if (DEBUG_CHANNEL_ELECT())
DEBUG_CHANNEL_MSG(v0, v1);
}
void DEBUG_CHANNEL_MSG_UNIFORM(uint v0, uint v1, uint v2)
{
if (DEBUG_CHANNEL_ELECT())
DEBUG_CHANNEL_MSG(v0, v1, v2);
}
void DEBUG_CHANNEL_MSG_UNIFORM(uint v0, uint v1, uint v2, uint v3)
{
if (DEBUG_CHANNEL_ELECT())
DEBUG_CHANNEL_MSG(v0, v1, v2, v3);
}
void DEBUG_CHANNEL_MSG_UNIFORM(int v0)
{
if (DEBUG_CHANNEL_ELECT())
DEBUG_CHANNEL_MSG(v0);
}
void DEBUG_CHANNEL_MSG_UNIFORM(int v0, int v1)
{
if (DEBUG_CHANNEL_ELECT())
DEBUG_CHANNEL_MSG(v0, v1);
}
void DEBUG_CHANNEL_MSG_UNIFORM(int v0, int v1, int v2)
{
if (DEBUG_CHANNEL_ELECT())
DEBUG_CHANNEL_MSG(v0, v1, v2);
}
void DEBUG_CHANNEL_MSG_UNIFORM(int v0, int v1, int v2, int v3)
{
if (DEBUG_CHANNEL_ELECT())
DEBUG_CHANNEL_MSG(v0, v1, v2, v3);
}
void DEBUG_CHANNEL_MSG_UNIFORM(float v0)
{
if (DEBUG_CHANNEL_ELECT())
DEBUG_CHANNEL_MSG(v0);
}
void DEBUG_CHANNEL_MSG_UNIFORM(float v0, float v1)
{
if (DEBUG_CHANNEL_ELECT())
DEBUG_CHANNEL_MSG(v0, v1);
}
void DEBUG_CHANNEL_MSG_UNIFORM(float v0, float v1, float v2)
{
if (DEBUG_CHANNEL_ELECT())
DEBUG_CHANNEL_MSG(v0, v1, v2);
}
void DEBUG_CHANNEL_MSG_UNIFORM(float v0, float v1, float v2, float v3)
{
if (DEBUG_CHANNEL_ELECT())
DEBUG_CHANNEL_MSG(v0, v1, v2, v3);
}
#endif

View File

@ -31,7 +31,6 @@
# define VK_USE_PLATFORM_WIN32_KHR
# endif
# include <vulkan/vulkan.h>
# include "private/vulkan_private_extensions.h"
#endif /* VKD3D_NO_VULKAN_H */
#define VKD3D_MIN_API_VERSION VK_API_VERSION_1_1
@ -59,39 +58,17 @@
extern "C" {
#endif /* __cplusplus */
#define VKD3D_CONFIG_FLAG_VULKAN_DEBUG (1ull << 0)
#define VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS (1ull << 1)
#define VKD3D_CONFIG_FLAG_DEBUG_UTILS (1ull << 2)
#define VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV (1ull << 3)
#define VKD3D_CONFIG_FLAG_DXR (1ull << 4)
#define VKD3D_CONFIG_FLAG_SINGLE_QUEUE (1ull << 5)
#define VKD3D_CONFIG_FLAG_DESCRIPTOR_QA_CHECKS (1ull << 6)
#define VKD3D_CONFIG_FLAG_FORCE_RTV_EXCLUSIVE_QUEUE (1ull << 7)
#define VKD3D_CONFIG_FLAG_FORCE_DSV_EXCLUSIVE_QUEUE (1ull << 8)
#define VKD3D_CONFIG_FLAG_FORCE_MINIMUM_SUBGROUP_SIZE (1ull << 9)
#define VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV (1ull << 10)
#define VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET (1ull << 11)
#define VKD3D_CONFIG_FLAG_IGNORE_RTV_HOST_VISIBLE (1ull << 12)
#define VKD3D_CONFIG_FLAG_FORCE_HOST_CACHED (1ull << 13)
#define VKD3D_CONFIG_FLAG_DXR11 (1ull << 14)
#define VKD3D_CONFIG_FLAG_FORCE_NO_INVARIANT_POSITION (1ull << 15)
#define VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE (1ull << 16)
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_NO_SERIALIZE_SPIRV (1ull << 17)
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_SANITIZE_SPIRV (1ull << 18)
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG (1ull << 19)
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_SPIRV (1ull << 20)
#define VKD3D_CONFIG_FLAG_MUTABLE_SINGLE_SET (1ull << 21)
#define VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR (1ull << 22)
#define VKD3D_CONFIG_FLAG_RECYCLE_COMMAND_POOLS (1ull << 23)
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_MISMATCH_DRIVER (1ull << 24)
#define VKD3D_CONFIG_FLAG_BREADCRUMBS (1ull << 25)
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_APP_CACHE_ONLY (1ull << 26)
#define VKD3D_CONFIG_FLAG_SHADER_CACHE_SYNC (1ull << 27)
#define VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV (1ull << 28)
#define VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV (1ull << 29)
#define VKD3D_CONFIG_FLAG_ALLOW_SBT_COLLECTION (1ull << 30)
#define VKD3D_CONFIG_FLAG_FORCE_NATIVE_FP16 (1ull << 31)
#define VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK (1ull << 32)
enum vkd3d_config_flags
{
VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001,
VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS = 0x00000002,
VKD3D_CONFIG_FLAG_DEBUG_UTILS = 0x00000004,
VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV = 0x00000008,
VKD3D_CONFIG_FLAG_DXR = 0x00000010,
VKD3D_CONFIG_FLAG_SINGLE_QUEUE = 0x00000020,
VKD3D_CONFIG_FLAG_FORCE_TGSM_BARRIERS = 0x00000040,
VKD3D_CONFIG_FLAG_DESCRIPTOR_QA_CHECKS = 0x00000080
};
typedef HRESULT (*PFN_vkd3d_signal_event)(HANDLE event);

View File

@ -1,32 +0,0 @@
/*
* * Copyright 2021 NVIDIA Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
import "vkd3d_d3d12.idl";
import "vkd3d_vk_includes.h";
[
uuid(77a86b09-2bea-4801-b89a-37648e104af1),
object,
local,
pointer_default(unique)
]
interface ID3D12GraphicsCommandListExt : IUnknown
{
HRESULT GetVulkanHandle(VkCommandBuffer *pVkCommandBuffer);
HRESULT LaunchCubinShader(D3D12_CUBIN_DATA_HANDLE *handle, UINT32 block_x, UINT32 block_y, UINT32 block_z, const void *params, UINT32 param_size);
}

View File

@ -26,11 +26,11 @@ cpp_quote("#ifndef _D3D12_CONSTANTS")
cpp_quote("#define _D3D12_CONSTANTS")
cpp_quote("#ifndef D3D12_ERROR_ADAPTER_NOT_FOUND")
cpp_quote("#define D3D12_ERROR_ADAPTER_NOT_FOUND ((HRESULT)0x887e0001)")
cpp_quote("#define D3D12_ERROR_ADAPTER_NOT_FOUND 0x887e0001")
cpp_quote("#endif")
cpp_quote("#ifndef D3D12_ERROR_DRIVER_VERSION_MISMATCH")
cpp_quote("#define D3D12_ERROR_DRIVER_VERSION_MISMATCH ((HRESULT)0x887e0002)")
cpp_quote("#define D3D12_ERROR_DRIVER_VERSION_MISMATCH 0x887e0002")
cpp_quote("#endif")
const UINT D3D12_CS_TGSM_REGISTER_COUNT = 8192;
@ -285,12 +285,6 @@ typedef enum D3D12_WRITEBUFFERIMMEDIATE_MODE
D3D12_WRITEBUFFERIMMEDIATE_MODE_MARKER_OUT = 0x2,
} D3D12_WRITEBUFFERIMMEDIATE_MODE;
typedef enum D3D12_WAVE_MMA_TIER
{
D3D12_WAVE_MMA_TIER_NOT_SUPPORTED = 0,
D3D12_WAVE_MMA_TIER_1_0 = 10,
} D3D12_WAVE_MMA_TIER;
interface ID3D12Fence;
interface ID3D12RootSignature;
interface ID3D12Heap;
@ -459,32 +453,6 @@ typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS7
D3D12_SAMPLER_FEEDBACK_TIER SamplerFeedbackTier;
} D3D12_FEATURE_DATA_D3D12_OPTIONS7;
typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS8
{
BOOL UnalignedBlockTexturesSupported;
} D3D12_FEATURE_DATA_D3D12_OPTIONS8;
typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS9
{
BOOL MeshShaderPipelineStatsSupported;
BOOL MeshShaderSupportsFullRangeRenderTargetArrayIndex;
BOOL AtomicInt64OnTypedResourceSupported;
BOOL AtomicInt64OnGroupSharedSupported;
BOOL DerivativesInMeshAndAmplificationShadersSupported;
D3D12_WAVE_MMA_TIER WaveMMATier;
} D3D12_FEATURE_DATA_D3D12_OPTIONS9;
typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS10
{
BOOL VariableRateShadingSumCombinerSupported;
BOOL MeshShaderPerPrimitiveShadingRateSupported;
} D3D12_FEATURE_DATA_D3D12_OPTIONS10;
typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS11
{
BOOL AtomicInt64OnDescriptorHeapResourceSupported;
} D3D12_FEATURE_DATA_D3D12_OPTIONS11;
typedef struct D3D12_FEATURE_DATA_FORMAT_SUPPORT
{
DXGI_FORMAT Format;
@ -1136,8 +1104,6 @@ typedef enum D3D12_ROOT_SIGNATURE_FLAGS
D3D12_ROOT_SIGNATURE_FLAG_LOCAL_ROOT_SIGNATURE = 0x80,
D3D12_ROOT_SIGNATURE_FLAG_DENY_AMPLIFICATION_SHADER_ROOT_ACCESS = 0x100,
D3D12_ROOT_SIGNATURE_FLAG_DENY_MESH_SHADER_ROOT_ACCESS = 0x200,
D3D12_ROOT_SIGNATURE_FLAG_CBV_SRV_UAV_HEAP_DIRECTLY_INDEXED = 0x400,
D3D12_ROOT_SIGNATURE_FLAG_SAMPLER_HEAP_DIRECTLY_INDEXED = 0x800,
} D3D12_ROOT_SIGNATURE_FLAGS;
cpp_quote("DEFINE_ENUM_FLAG_OPERATORS(D3D12_ROOT_SIGNATURE_FLAGS);")
@ -2025,10 +1991,6 @@ typedef enum D3D12_FEATURE
D3D12_FEATURE_D3D12_OPTIONS7 = 32,
D3D12_FEATURE_PROTECTED_RESOURCE_SESSION_TYPE_COUNT = 33,
D3D12_FEATURE_PROTECTED_RESOURCE_SESSION_TYPES = 34,
D3D12_FEATURE_D3D12_OPTIONS8 = 36,
D3D12_FEATURE_D3D12_OPTIONS9 = 37,
D3D12_FEATURE_D3D12_OPTIONS10 = 39,
D3D12_FEATURE_D3D12_OPTIONS11 = 40,
} D3D12_FEATURE;
typedef struct D3D12_MEMCPY_DEST
@ -2158,15 +2120,6 @@ interface ID3D12Resource1 : ID3D12Resource
{
HRESULT GetProtectedResourceSession(REFIID riid, void **protected_session);
}
[
uuid(be36ec3b-ea85-4aeb-a45a-e9d76404a495),
object,
local,
pointer_default(unique)
]
interface ID3D12Resource2 : ID3D12Resource1 {
D3D12_RESOURCE_DESC1 GetDesc1();
}
[
uuid(7116d91c-e7e4-47ce-b8c6-ec8168f437e5),
@ -3322,45 +3275,6 @@ typedef struct D3D12_DISPATCH_MESH_ARGUMENTS
UINT ThreadGroupCountZ;
} D3D12_DISPATCH_MESH_ARGUMENTS;
typedef enum D3D12_SHADER_CACHE_MODE
{
D3D12_SHADER_CACHE_MODE_MEMORY = 0,
D3D12_SHADER_CACHE_MODE_DISK = 1,
} D3D12_SHADER_CACHE_MODE;
typedef enum D3D12_SHADER_CACHE_FLAGS
{
D3D12_SHADER_CACHE_FLAG_NONE = 0,
D3D12_SHADER_CACHE_FLAG_DRIVER_VERSIONED = 0x1,
D3D12_SHADER_CACHE_FLAG_USE_WORKING_DIR = 0x2,
} D3D12_SHADER_CACHE_FLAGS;
typedef struct D3D12_SHADER_CACHE_SESSION_DESC
{
GUID Identifier;
D3D12_SHADER_CACHE_MODE Mode;
D3D12_SHADER_CACHE_FLAGS Flags;
UINT MaximumInMemoryCacheSizeBytes;
UINT MaximumInMemoryCacheEntries;
UINT MaximumValueFileSizeBytes;
UINT64 Version;
} D3D12_SHADER_CACHE_SESSION_DESC;
typedef enum D3D12_SHADER_CACHE_KIND_FLAGS
{
D3D12_SHADER_CACHE_KIND_FLAG_IMPLICIT_D3D_CACHE_FOR_DRIVER = 0x1,
D3D12_SHADER_CACHE_KIND_FLAG_IMPLICIT_D3D_CONVERSIONS = 0x2,
D3D12_SHADER_CACHE_KIND_FLAG_IMPLICIT_DRIVER_MANAGED = 0x4,
D3D12_SHADER_CACHE_KIND_FLAG_APPLICATION_MANAGED = 0x8,
} D3D12_SHADER_CACHE_KIND_FLAGS;
typedef enum D3D12_SHADER_CACHE_CONTROL_FLAGS
{
D3D12_SHADER_CACHE_CONTROL_FLAG_DISABLE = 0x1,
D3D12_SHADER_CACHE_CONTROL_FLAG_ENABLE = 0x2,
D3D12_SHADER_CACHE_CONTROL_FLAG_CLEAR = 0x4,
} D3D12_SHADER_CACHE_CONTROL_FLAGS;
[
uuid(dbb84c27-36ce-4fc9-b801-f048c46ac570),
object,
@ -3604,17 +3518,6 @@ interface ID3D12GraphicsCommandList5 : ID3D12GraphicsCommandList4
void RSSetShadingRateImage(ID3D12Resource *image);
}
[
uuid(c3827890-e548-4cfa-96cf-5689a9370f80),
object,
local,
pointer_default(unique)
]
interface ID3D12GraphicsCommandList6 : ID3D12GraphicsCommandList5
{
void DispatchMesh(UINT x, UINT y, UINT z);
}
typedef enum D3D12_TILE_RANGE_FLAGS
{
D3D12_TILE_RANGE_FLAG_NONE = 0x0,
@ -3644,8 +3547,8 @@ interface ID3D12CommandQueue : ID3D12Pageable
ID3D12Heap *heap,
UINT range_count,
const D3D12_TILE_RANGE_FLAGS *range_flags,
const UINT *heap_range_offsets,
const UINT *range_tile_counts,
UINT *heap_range_offsets,
UINT *range_tile_counts,
D3D12_TILE_MAPPING_FLAGS flags);
void CopyTileMappings(ID3D12Resource *dst_resource,
@ -4102,67 +4005,6 @@ interface ID3D12Device6 : ID3D12Device5
D3D12_MEASUREMENTS_ACTION action, HANDLE event, BOOL further_measurements);
}
[
uuid(5c014b53-68a1-4b9b-8bd1-dd6046b9358b),
object,
local,
pointer_default(unique)
]
interface ID3D12Device7 : ID3D12Device6
{
HRESULT AddToStateObject(const D3D12_STATE_OBJECT_DESC *addition,
ID3D12StateObject *state_object, REFIID riid, void **new_state_object);
HRESULT CreateProtectedResourceSession1(
const D3D12_PROTECTED_RESOURCE_SESSION_DESC1 *desc,
REFIID riid, void **session);
}
[
uuid(9218e6bb-f944-4f7e-a75c-b1b2c7b701f3),
object,
local,
pointer_default(unique)
]
interface ID3D12Device8 : ID3D12Device7
{
D3D12_RESOURCE_ALLOCATION_INFO GetResourceAllocationInfo2(UINT visible_mask,
UINT resource_desc_count, const D3D12_RESOURCE_DESC1 *resource_descs,
D3D12_RESOURCE_ALLOCATION_INFO1 *resource_allocation_infos);
HRESULT CreateCommittedResource2(const D3D12_HEAP_PROPERTIES *heap_properties,
D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC1 *resource_desc,
D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value,
ID3D12ProtectedResourceSession *protected_session, REFIID riid, void **resource);
HRESULT CreatePlacedResource1(ID3D12Heap *heap, UINT64 heap_offset,
const D3D12_RESOURCE_DESC1 *resource_desc, D3D12_RESOURCE_STATES initial_state,
const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID riid, void **resource);
void CreateSamplerFeedbackUnorderedAccessView(ID3D12Resource *target_resource,
ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor);
void GetCopyableFootprints1(const D3D12_RESOURCE_DESC1 *resource_desc,
UINT first_sub_resource, UINT sub_resource_count, UINT64 base_offset,
D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_count,
UINT64 *row_size, UINT64 *total_bytes);
}
[
uuid(4c80e962-f032-4f60-bc9e-ebc2cfa1d83c),
object,
local,
pointer_default(unique)
]
interface ID3D12Device9 : ID3D12Device8
{
HRESULT CreateShaderCacheSession(const D3D12_SHADER_CACHE_SESSION_DESC *desc,
REFIID riid, void **session);
HRESULT ShaderCacheControl(D3D12_SHADER_CACHE_KIND_FLAGS kinds,
D3D12_SHADER_CACHE_CONTROL_FLAGS control);
HRESULT CreateCommandQueue1(const D3D12_COMMAND_QUEUE_DESC *desc,
REFIID creator_id, REFIID riid, void **command_queue);
}
[
uuid(34ab647b-3cc8-46ac-841b-c0965645c046),
object,

View File

@ -1,37 +0,0 @@
/*
* * Copyright 2021 NVIDIA Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
import "vkd3d_d3d12.idl";
import "vkd3d_vk_includes.h";
[
uuid(11ea7a1a-0f6a-49bf-b612-3e30f8e201dd),
object,
local,
pointer_default(unique)
]
interface ID3D12DeviceExt : IUnknown
{
HRESULT GetVulkanHandles(VkInstance *vk_instance, VkPhysicalDevice *vk_physical_device, VkDevice *vk_device);
BOOL GetExtensionSupport(D3D12_VK_EXTENSION extension);
HRESULT CreateCubinComputeShaderWithName(const void *cubin_data, UINT32 cubin_size, UINT32 block_x, UINT32 block_y, UINT32 block_z, const char *shader_name, D3D12_CUBIN_DATA_HANDLE **handle);
HRESULT DestroyCubinComputeShader(D3D12_CUBIN_DATA_HANDLE *handle);
HRESULT GetCudaTextureObject(D3D12_CPU_DESCRIPTOR_HANDLE srv_handle, D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle, UINT32 *cuda_texture_handle);
HRESULT GetCudaSurfaceObject(D3D12_CPU_DESCRIPTOR_HANDLE uav_handle, UINT32 *cuda_surface_handle);
HRESULT CaptureUAVInfo(D3D12_UAV_INFO *uav_info);
}

View File

@ -135,12 +135,5 @@ typedef enum DXGI_FORMAT
DXGI_FORMAT_A8P8 = 0x72,
DXGI_FORMAT_B4G4R4A4_UNORM = 0x73,
DXGI_FORMAT_P208 = 0x82,
DXGI_FORMAT_V208 = 0x83,
DXGI_FORMAT_V408 = 0x84,
DXGI_FORMAT_SAMPLER_FEEDBACK_MIN_MIP_OPAQUE = 0xbd,
DXGI_FORMAT_SAMPLER_FEEDBACK_MIP_REGION_USED_OPAQUE = 0xbe,
DXGI_FORMAT_FORCE_UINT = 0xffffffff,
} DXGI_FORMAT;

View File

@ -54,22 +54,11 @@ enum vkd3d_shader_visibility
typedef uint64_t vkd3d_shader_hash_t;
enum vkd3d_shader_meta_flags
{
VKD3D_SHADER_META_FLAG_REPLACED = 1 << 0,
VKD3D_SHADER_META_FLAG_USES_SUBGROUP_SIZE = 1 << 1,
VKD3D_SHADER_META_FLAG_USES_NATIVE_16BIT_OPERATIONS = 1 << 2,
};
struct vkd3d_shader_meta
{
vkd3d_shader_hash_t hash;
unsigned int cs_workgroup_size[3]; /* Only contains valid data if uses_subgroup_size is true. */
unsigned int patch_vertex_count; /* Relevant for HS. May be 0, in which case the patch vertex count is not known. */
unsigned int cs_required_wave_size; /* If non-zero, force a specific CS subgroup size. */
uint32_t flags; /* vkd3d_shader_meta_flags */
bool replaced;
};
STATIC_ASSERT(sizeof(struct vkd3d_shader_meta) == 32);
struct vkd3d_shader_code
{
@ -78,8 +67,6 @@ struct vkd3d_shader_code
struct vkd3d_shader_meta meta;
};
vkd3d_shader_hash_t vkd3d_shader_hash(const struct vkd3d_shader_code *shader);
enum vkd3d_shader_descriptor_type
{
VKD3D_SHADER_DESCRIPTOR_TYPE_UNKNOWN,
@ -213,12 +200,10 @@ struct vkd3d_shader_interface_info
/* Ignored unless VKD3D_SHADER_INTERFACE_SSBO_OFFSET_BUFFER or TYPED_OFFSET_BUFFER is set */
const struct vkd3d_shader_descriptor_binding *offset_buffer_binding;
#ifdef VKD3D_ENABLE_DESCRIPTOR_QA
/* Ignored unless VKD3D_SHADER_INTERFACE_DESCRIPTOR_QA_BUFFER is set. */
const struct vkd3d_shader_descriptor_binding *descriptor_qa_global_binding;
/* Ignored unless VKD3D_SHADER_INTERFACE_DESCRIPTOR_QA_BUFFER is set. */
const struct vkd3d_shader_descriptor_binding *descriptor_qa_heap_binding;
#endif
VkShaderStageFlagBits stage;
@ -241,7 +226,6 @@ struct vkd3d_shader_root_constant
struct vkd3d_shader_root_descriptor
{
struct vkd3d_shader_resource_binding *binding;
uint32_t raw_va_root_descriptor_index;
};
struct vkd3d_shader_root_parameter
@ -297,59 +281,7 @@ enum vkd3d_shader_target_extension
VKD3D_SHADER_TARGET_EXTENSION_NONE,
VKD3D_SHADER_TARGET_EXTENSION_SPV_EXT_DEMOTE_TO_HELPER_INVOCATION,
VKD3D_SHADER_TARGET_EXTENSION_READ_STORAGE_IMAGE_WITHOUT_FORMAT,
VKD3D_SHADER_TARGET_EXTENSION_SPV_KHR_INTEGER_DOT_PRODUCT,
VKD3D_SHADER_TARGET_EXTENSION_RAY_TRACING_PRIMITIVE_CULLING,
VKD3D_SHADER_TARGET_EXTENSION_SCALAR_BLOCK_LAYOUT,
/* When using scalar block layout with a vec3 array on a byte address buffer,
* there is diverging behavior across hardware.
* On AMD, robustness is checked per component, which means we can implement ByteAddressBuffer
* without further hackery. On NVIDIA, robustness does not seem to work this way, so it's either
* all in range, or all out of range. We can implement structured buffer vectorization of vec3,
* but not byte address buffer. */
VKD3D_SHADER_TARGET_EXTENSION_ASSUME_PER_COMPONENT_SSBO_ROBUSTNESS,
VKD3D_SHADER_TARGET_EXTENSION_BARYCENTRIC_KHR,
VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT,
VKD3D_SHADER_TARGET_EXTENSION_COUNT,
};
enum vkd3d_shader_quirk
{
/* If sample or sample_b is used in control flow, force LOD 0.0 (which game should expect anyway).
* Works around specific, questionable shaders which rely on this to give sensible results,
* since LOD can become garbage on certain implementations, and even on native drivers
* the result is implementation defined.
* Outside of making this edge case well-defined in Vulkan or hacking driver compilers,
* this is the pragmatic solution.
* Hoisting gradients is not possible in all cases,
* and would not be worth it until it's a widespread problem. */
VKD3D_SHADER_QUIRK_FORCE_EXPLICIT_LOD_IN_CONTROL_FLOW = (1 << 0),
/* After every write to group shared memory, force a memory barrier.
* This works around buggy games which forget to use barrier(). */
VKD3D_SHADER_QUIRK_FORCE_TGSM_BARRIERS = (1 << 1),
/* For Position builtins in Output storage class, emit Invariant decoration.
* Normally, games have to emit Precise math for position, but if they forget ... */
VKD3D_SHADER_QUIRK_INVARIANT_POSITION = (1 << 2),
};
struct vkd3d_shader_quirk_hash
{
vkd3d_shader_hash_t shader_hash;
uint32_t quirks;
};
struct vkd3d_shader_quirk_info
{
const struct vkd3d_shader_quirk_hash *hashes;
unsigned int num_hashes;
uint32_t default_quirks;
/* Quirks which are ORed in with the other masks (including default_quirks).
* Used mostly for additional overrides from VKD3D_CONFIG. */
uint32_t global_quirks;
VKD3D_SHADER_TARGET_EXTENSION_READ_STORAGE_IMAGE_WITHOUT_FORMAT
};
struct vkd3d_shader_compile_arguments
@ -366,7 +298,7 @@ struct vkd3d_shader_compile_arguments
const unsigned int *output_swizzles;
unsigned int output_swizzle_count;
const struct vkd3d_shader_quirk_info *quirks;
uint64_t config_flags;
};
enum vkd3d_tessellator_output_primitive
@ -660,8 +592,6 @@ struct vkd3d_shader_scan_info
bool has_side_effects;
bool needs_late_zs;
bool discards;
bool has_uav_counter;
unsigned int patch_vertex_count;
};
enum vkd3d_component_type
@ -754,11 +684,7 @@ int vkd3d_shader_compile_dxbc(const struct vkd3d_shader_code *dxbc,
void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *code);
int vkd3d_shader_parse_root_signature(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *root_signature,
vkd3d_shader_hash_t *compatibility_hash);
int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_size,
struct vkd3d_versioned_root_signature_desc *desc,
vkd3d_shader_hash_t *compatibility_hash);
struct vkd3d_versioned_root_signature_desc *root_signature);
void vkd3d_shader_free_root_signature(struct vkd3d_versioned_root_signature_desc *root_signature);
/* FIXME: Add support for returning error messages (ID3DBlob). */
@ -771,10 +697,12 @@ int vkd3d_shader_convert_root_signature(struct vkd3d_versioned_root_signature_de
int vkd3d_shader_scan_dxbc(const struct vkd3d_shader_code *dxbc,
struct vkd3d_shader_scan_info *scan_info);
/* If value cannot be determined, *patch_vertex_count returns 0. */
int vkd3d_shader_scan_patch_vertex_count(const struct vkd3d_shader_code *dxbc,
unsigned int *patch_vertex_count);
int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc,
struct vkd3d_shader_signature *signature);
int vkd3d_shader_parse_output_signature(const struct vkd3d_shader_code *dxbc,
struct vkd3d_shader_signature *signature);
struct vkd3d_shader_signature_element *vkd3d_shader_find_signature_element(
const struct vkd3d_shader_signature *signature, const char *semantic_name,
unsigned int semantic_index, unsigned int stream_index);
@ -784,65 +712,19 @@ void vkd3d_shader_free_shader_signature(struct vkd3d_shader_signature *signature
struct vkd3d_shader_library_entry_point
{
unsigned int identifier;
VkShaderStageFlagBits stage;
WCHAR *mangled_entry_point;
WCHAR *plain_entry_point;
char *real_entry_point;
VkShaderStageFlagBits stage;
};
enum vkd3d_shader_subobject_kind
{
/* Matches DXIL for simplicity. */
VKD3D_SHADER_SUBOBJECT_KIND_STATE_OBJECT_CONFIG = 0,
VKD3D_SHADER_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE = 1,
VKD3D_SHADER_SUBOBJECT_KIND_LOCAL_ROOT_SIGNATURE = 2,
VKD3D_SHADER_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION = 8,
VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_SHADER_CONFIG = 9,
VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG = 10,
VKD3D_SHADER_SUBOBJECT_KIND_HIT_GROUP = 11,
VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1 = 12,
};
struct vkd3d_shader_library_subobject
{
enum vkd3d_shader_subobject_kind kind;
unsigned int dxil_identifier;
/* All const pointers here point directly to the DXBC blob,
* so they do not need to be freed.
* Fortunately for us, the C strings are zero-terminated in the blob itself. */
/* In the blob, ASCII is used as identifier, where API uses wide strings, sigh ... */
const char *name;
union
{
D3D12_RAYTRACING_PIPELINE_CONFIG1 pipeline_config;
D3D12_RAYTRACING_SHADER_CONFIG shader_config;
D3D12_STATE_OBJECT_CONFIG object_config;
/* Duped strings because API wants wide strings for no good reason. */
D3D12_HIT_GROUP_DESC hit_group;
D3D12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION association;
struct
{
const void *data;
size_t size;
} payload;
} data;
};
int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
int vkd3d_shader_dxil_append_library_entry_points(
const D3D12_DXIL_LIBRARY_DESC *library_desc,
unsigned int identifier,
struct vkd3d_shader_library_entry_point **entry_points,
size_t *entry_point_size, size_t *entry_point_count,
struct vkd3d_shader_library_subobject **subobjects,
size_t *subobjects_size, size_t *subobjects_count);
size_t *entry_point_size, size_t *entry_point_count);
void vkd3d_shader_dxil_free_library_entry_points(struct vkd3d_shader_library_entry_point *entry_points, size_t count);
void vkd3d_shader_dxil_free_library_subobjects(struct vkd3d_shader_library_subobject *subobjects, size_t count);
int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
const char *export,
@ -851,11 +733,6 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
const struct vkd3d_shader_interface_local_info *shader_interface_local_info,
const struct vkd3d_shader_compile_arguments *compiler_args);
uint32_t vkd3d_shader_compile_arguments_select_quirks(
const struct vkd3d_shader_compile_arguments *args, vkd3d_shader_hash_t hash);
uint64_t vkd3d_shader_get_revision(void);
#endif /* VKD3D_SHADER_NO_PROTOTYPES */
/*
@ -868,8 +745,7 @@ typedef int (*PFN_vkd3d_shader_compile_dxbc)(const struct vkd3d_shader_code *dxb
typedef void (*PFN_vkd3d_shader_free_shader_code)(struct vkd3d_shader_code *code);
typedef int (*PFN_vkd3d_shader_parse_root_signature)(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *root_signature,
vkd3d_shader_hash_t *compatibility_hash);
struct vkd3d_versioned_root_signature_desc *root_signature);
typedef void (*PFN_vkd3d_shader_free_root_signature)(struct vkd3d_versioned_root_signature_desc *root_signature);
typedef int (*PFN_vkd3d_shader_serialize_root_signature)(
@ -880,6 +756,8 @@ typedef int (*PFN_vkd3d_shader_convert_root_signature)(struct vkd3d_versioned_ro
typedef int (*PFN_vkd3d_shader_scan_dxbc)(const struct vkd3d_shader_code *dxbc,
struct vkd3d_shader_scan_info *scan_info);
typedef int (*PFN_vkd3d_shader_scan_patch_vertex_count)(const struct vkd3d_shader_code *dxbc,
unsigned int *patch_vertex_count);
typedef int (*PFN_vkd3d_shader_parse_input_signature)(const struct vkd3d_shader_code *dxbc,
struct vkd3d_shader_signature *signature);

View File

@ -1,58 +0,0 @@
/*
* * Copyright 2021 NVIDIA Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#ifndef __VKD3D_VK_INCLUDES_H
#define __VKD3D_VK_INCLUDES_H
#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
typedef struct VkCuFunctionNVX_T *VkCuFunctionNVX;
typedef struct VkCuModuleNVX_T *VkCuModuleNVX;
#else
typedef UINT64 VkCuFunctionNVX;
typedef UINT64 VkCuModuleNVX;
#endif
typedef struct VkPhysicalDevice_T *VkPhysicalDevice;
typedef struct VkCommandBuffer_T *VkCommandBuffer;
typedef struct VkInstance_T *VkInstance;
typedef struct VkDevice_T *VkDevice;
typedef enum D3D12_VK_EXTENSION
{
D3D12_VK_NVX_BINARY_IMPORT = 0x1,
D3D12_VK_NVX_IMAGE_VIEW_HANDLE = 0x2
} D3D12_VK_EXTENSION;
typedef struct D3D12_CUBIN_DATA_HANDLE
{
VkCuFunctionNVX vkCuFunction;
VkCuModuleNVX vkCuModule;
UINT32 blockX;
UINT32 blockY;
UINT32 blockZ;
} D3D12_CUBIN_DATA_HANDLE;
typedef struct D3D12_UAV_INFO
{
UINT32 version;
UINT32 surfaceHandle;
UINT64 gpuVAStart;
UINT64 gpuVASize;
} D3D12_UAV_INFO;
#endif // __VKD3D_VK_INCLUDES_H

View File

@ -42,20 +42,8 @@
#define WIDL_C_INLINE_WRAPPERS
#include <vkd3d_windows.h>
/* Vulkan headers include static const declarations. Enable static keyword for
* them.
*/
#ifdef __MINGW32__
# undef static
#endif
#define VK_USE_PLATFORM_WIN32_KHR
#include <vulkan/vulkan.h>
#include "private/vulkan_private_extensions.h"
#ifdef __MINGW32__
# define static
#endif
#include <dxgi1_6.h>
@ -69,8 +57,6 @@
#define __vkd3d_dxgi1_4_h__
#include <vkd3d_swapchain_factory.h>
#include <vkd3d_command_list_vkd3d_ext.h>
#include <vkd3d_device_vkd3d_ext.h>
#include <vkd3d_d3d12.h>
#include <vkd3d_d3d12sdklayers.h>

View File

@ -88,9 +88,6 @@ typedef void *HANDLE;
typedef const WCHAR* LPCWSTR;
#define _fseeki64(a, b, c) fseeko64(a, b, c)
#define _ftelli64(a) ftello64(a)
/* GUID */
# ifdef __WIDL__
typedef struct

View File

@ -3,9 +3,9 @@ LIBRARY d3d12.dll
EXPORTS
D3D12CreateDevice @101
D3D12GetDebugInterface @102
D3D12CreateRootSignatureDeserializer
D3D12CreateVersionedRootSignatureDeserializer
D3D12CreateRootSignatureDeserializer @107
D3D12CreateVersionedRootSignatureDeserializer @108
D3D12EnableExperimentalFeatures
D3D12SerializeRootSignature
D3D12SerializeVersionedRootSignature
D3D12EnableExperimentalFeatures @110
D3D12SerializeRootSignature @115
D3D12SerializeVersionedRootSignature @116

View File

@ -20,8 +20,6 @@
#include "vkd3d_debug.h"
#include "vkd3d_threads.h"
#include "vkd3d_platform.h"
#include <assert.h>
#include <ctype.h>
#include <errno.h>
@ -60,13 +58,13 @@ static FILE *vkd3d_log_file;
static void vkd3d_dbg_init_once(void)
{
char vkd3d_debug[VKD3D_PATH_MAX];
const char *vkd3d_debug;
unsigned int channel, i;
for (channel = 0; channel < VKD3D_DBG_CHANNEL_COUNT; channel++)
{
if (!vkd3d_get_env_var(env_for_channel[channel], vkd3d_debug, sizeof(vkd3d_debug)))
strncpy(vkd3d_debug, "", VKD3D_PATH_MAX);
if (!(vkd3d_debug = getenv(env_for_channel[channel])))
vkd3d_debug = "";
for (i = 1; i < ARRAY_SIZE(debug_level_names); ++i)
if (!strcmp(debug_level_names[i], vkd3d_debug))
@ -77,7 +75,7 @@ static void vkd3d_dbg_init_once(void)
vkd3d_dbg_level[channel] = VKD3D_DBG_LEVEL_FIXME;
}
if (vkd3d_get_env_var("VKD3D_LOG_FILE", vkd3d_debug, sizeof(vkd3d_debug)))
if ((vkd3d_debug = getenv("VKD3D_LOG_FILE")))
{
vkd3d_log_file = fopen(vkd3d_debug, "w");
if (!vkd3d_log_file)
@ -123,7 +121,7 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_channel channel, enum vkd3d_dbg_level level
va_start(args, fmt);
spinlock_acquire(&spin);
fprintf(log_file, "%04x:%s:%s: ", tid, debug_level_names[level], function);
fprintf(log_file, "%u:%s:%s: ", tid, debug_level_names[level], function);
vfprintf(log_file, fmt, args);
spinlock_release(&spin);
va_end(args);
@ -283,11 +281,11 @@ const char *debugstr_w(const WCHAR *wstr)
unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value)
{
char value[VKD3D_PATH_MAX];
const char *value = getenv(name);
unsigned long r;
char *end_ptr;
if (vkd3d_get_env_var(name, value, sizeof(value)) && strlen(value) > 0)
if (value)
{
errno = 0;
r = strtoul(value, &end_ptr, 0);

View File

@ -1,188 +0,0 @@
/*
* Copyright 2022 Hans-Kristian Arntzen for Valve Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "vkd3d_file_utils.h"
#include "vkd3d_debug.h"
/* For disk cache. */
#ifdef _WIN32
#include <windows.h>
#include <io.h>
#else
#include <unistd.h>
#include <sys/mman.h>
#include <errno.h>
#endif
#include <fcntl.h>
#include <sys/stat.h>
#include <stdio.h>
bool vkd3d_file_rename_overwrite(const char *from_path, const char *to_path)
{
#ifdef _WIN32
DWORD code = ERROR_SUCCESS;
if (!MoveFileA(from_path, to_path))
{
code = GetLastError();
if (code == ERROR_ALREADY_EXISTS)
{
code = ERROR_SUCCESS;
if (!ReplaceFileA(to_path, from_path, NULL, 0, NULL, NULL))
code = GetLastError();
}
}
return code == ERROR_SUCCESS;
#else
return rename(from_path, to_path) == 0;
#endif
}
bool vkd3d_file_rename_no_replace(const char *from_path, const char *to_path)
{
#ifdef _WIN32
DWORD code = ERROR_SUCCESS;
if (!MoveFileA(from_path, to_path))
code = GetLastError();
return code == ERROR_SUCCESS;
#else
return renameat2(AT_FDCWD, from_path, AT_FDCWD, to_path, RENAME_NOREPLACE) == 0;
#endif
}
bool vkd3d_file_delete(const char *path)
{
#ifdef _WIN32
DWORD code = ERROR_SUCCESS;
if (!DeleteFileA(path))
code = GetLastError();
return code == ERROR_SUCCESS;
#else
return unlink(path) == 0;
#endif
}
FILE *vkd3d_file_open_exclusive_write(const char *path)
{
#ifdef _WIN32
/* From Fossilize. AFAIK, there is no direct way to make this work with FILE interface, so have to roundtrip
* through jank POSIX layer.
* wbx kinda works, but Wine warns about it, despite it working anyways.
* Older MSVC runtimes do not support wbx. */
FILE *file = NULL;
int fd;
fd = _open(path, _O_BINARY | _O_WRONLY | _O_CREAT | _O_EXCL | _O_TRUNC | _O_SEQUENTIAL,
_S_IWRITE | _S_IREAD);
if (fd >= 0)
{
file = _fdopen(fd, "wb");
/* _fdopen takes ownership. */
if (!file)
_close(fd);
}
return file;
#else
return fopen(path, "wbx");
#endif
}
void vkd3d_file_unmap(struct vkd3d_memory_mapped_file *file)
{
if (file->mapped)
{
#ifdef _WIN32
UnmapViewOfFile(file->mapped);
#else
munmap(file->mapped, file->mapped_size);
#endif
}
memset(file, 0, sizeof(*file));
}
bool vkd3d_file_map_read_only(const char *path, struct vkd3d_memory_mapped_file *file)
{
#ifdef _WIN32
DWORD size_hi, size_lo;
HANDLE file_mapping;
HANDLE handle;
#else
struct stat stat_buf;
int fd;
#endif
file->mapped = NULL;
file->mapped_size = 0;
#ifdef _WIN32
handle = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL,
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN,
INVALID_HANDLE_VALUE);
if (handle == INVALID_HANDLE_VALUE)
goto out;
size_lo = GetFileSize(handle, &size_hi);
file->mapped_size = size_lo | (((uint64_t)size_hi) << 32);
file_mapping = CreateFileMappingA(handle, NULL, PAGE_READONLY, 0, 0, NULL);
if (file_mapping == INVALID_HANDLE_VALUE)
goto out;
file->mapped = MapViewOfFile(file_mapping, FILE_MAP_READ, 0, 0, file->mapped_size);
CloseHandle(file_mapping);
file_mapping = INVALID_HANDLE_VALUE;
if (!file->mapped)
{
ERR("Failed to MapViewOfFile for %s.\n", path);
goto out;
}
out:
if (handle != INVALID_HANDLE_VALUE)
CloseHandle(handle);
#else
fd = open(path, O_RDONLY);
if (fd < 0)
goto out;
if (fstat(fd, &stat_buf) < 0)
{
ERR("Failed to fstat pipeline cache.\n");
goto out;
}
/* Map private to make sure we get CoW behavior in case someone clobbers
* the cache while in flight. We need to read data directly out of the cache. */
file->mapped = mmap(NULL, stat_buf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (file->mapped != MAP_FAILED)
file->mapped_size = stat_buf.st_size;
else
goto out;
out:
if (fd >= 0)
close(fd);
#endif
if (!file->mapped)
file->mapped_size = 0;
return file->mapped != NULL;
}

View File

@ -4,8 +4,6 @@ vkd3d_common_src = [
'utf8.c',
'profiling.c',
'string.c',
'file_utils.c',
'platform.c',
]
vkd3d_common_lib = static_library('vkd3d_common', vkd3d_common_src, vkd3d_header_files,

View File

@ -21,7 +21,6 @@
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "vkd3d_profiling.h"
#include "vkd3d_platform.h"
#include "vkd3d_threads.h"
#include "vkd3d_debug.h"
#include <stdlib.h>
@ -125,10 +124,8 @@ static void vkd3d_init_profiling_path(const char *path)
static void vkd3d_init_profiling_once(void)
{
char path[VKD3D_PATH_MAX];
vkd3d_get_env_var("VKD3D_PROFILE_PATH", path, sizeof(path));
if (strlen(path) > 0)
const char *path = getenv("VKD3D_PROFILE_PATH");
if (path)
vkd3d_init_profiling_path(path);
}

View File

@ -82,21 +82,6 @@ bool vkd3d_export_strequal(const WCHAR *a, const WCHAR *b)
return *a == *b;
}
bool vkd3d_export_strequal_mixed(const WCHAR *a, const char *b)
{
if (!a || !b)
return false;
while (*a != '\0' && *b != '\0')
{
if (*a != *b)
return false;
a++;
b++;
}
return *a == *b;
}
bool vkd3d_export_strequal_substr(const WCHAR *a, size_t expected_n, const WCHAR *b)
{
size_t n = 0;
@ -138,7 +123,7 @@ WCHAR *vkd3d_dup_entry_point_n(const char *str, size_t len)
static bool is_valid_identifier_character(char v)
{
return (v >= 'a' && v <= 'z') || (v >= 'A' && v <= 'Z') || v == '_' || (v >= '0' && v <= '9');
return (v >= 'a' && v <= 'z') || (v >= 'A' && v <= 'Z') || v == '_';
}
static const char *vkd3d_manged_entry_point_scan(const char *entry, const char **out_end_entry)

View File

@ -2249,21 +2249,6 @@ static int isgn_handler(const char *data, DWORD data_size, DWORD tag, void *ctx)
return shader_parse_signature(tag, data, data_size, is);
}
static int osgn_handler(const char *data, DWORD data_size, DWORD tag, void *ctx)
{
struct vkd3d_shader_signature *is = ctx;
if (tag != TAG_OSGN && tag != TAG_OSG1)
return VKD3D_OK;
if (is->elements)
{
FIXME("Multiple input signatures.\n");
vkd3d_shader_free_shader_signature(is);
}
return shader_parse_signature(tag, data, data_size, is);
}
int shader_parse_input_signature(const void *dxbc, size_t dxbc_length,
struct vkd3d_shader_signature *signature)
{
@ -2275,17 +2260,6 @@ int shader_parse_input_signature(const void *dxbc, size_t dxbc_length,
return ret;
}
int shader_parse_output_signature(const void *dxbc, size_t dxbc_length,
struct vkd3d_shader_signature *signature)
{
int ret;
memset(signature, 0, sizeof(*signature));
if ((ret = parse_dxbc(dxbc, dxbc_length, osgn_handler, signature)) < 0)
ERR("Failed to parse output signature.\n");
return ret;
}
static int dxil_handler(const char *data, DWORD data_size, DWORD tag, void *context)
{
switch (tag)
@ -2755,9 +2729,8 @@ static int shader_parse_static_samplers(struct root_signature_parser_context *co
return VKD3D_OK;
}
int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_size,
struct vkd3d_versioned_root_signature_desc *desc,
vkd3d_shader_hash_t *compatibility_hash)
static int shader_parse_root_signature(const char *data, unsigned int data_size,
struct vkd3d_versioned_root_signature_desc *desc)
{
struct vkd3d_root_signature_desc *v_1_0 = &desc->v_1_0;
struct root_signature_parser_context context;
@ -2765,8 +2738,6 @@ int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_si
const char *ptr = data;
int ret;
memset(desc, 0, sizeof(*desc));
context.data = data;
context.data_size = data_size;
@ -2838,46 +2809,28 @@ int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_si
read_uint32(&ptr, &v_1_0->flags);
TRACE("Flags %#x.\n", v_1_0->flags);
if (compatibility_hash)
{
struct vkd3d_shader_code code = { data, data_size };
*compatibility_hash = vkd3d_shader_hash(&code);
}
return VKD3D_OK;
}
static int rts0_handler(const char *data, DWORD data_size, DWORD tag, void *context)
{
struct vkd3d_shader_code *payload = context;
struct vkd3d_versioned_root_signature_desc *desc = context;
if (tag != TAG_RTS0)
return VKD3D_OK;
payload->code = data;
payload->size = data_size;
return VKD3D_OK;
return shader_parse_root_signature(data, data_size, desc);
}
int vkd3d_shader_parse_root_signature(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *root_signature,
vkd3d_shader_hash_t *compatibility_hash)
struct vkd3d_versioned_root_signature_desc *root_signature)
{
struct vkd3d_shader_code raw_payload;
int ret;
TRACE("dxbc {%p, %zu}, root_signature %p.\n", dxbc->code, dxbc->size, root_signature);
memset(&raw_payload, 0, sizeof(raw_payload));
if ((ret = parse_dxbc(dxbc->code, dxbc->size, rts0_handler, &raw_payload)) < 0)
return ret;
if (!raw_payload.code)
return VKD3D_ERROR;
if ((ret = vkd3d_shader_parse_root_signature_raw(raw_payload.code, raw_payload.size,
root_signature, compatibility_hash)) < 0)
memset(root_signature, 0, sizeof(*root_signature));
if ((ret = parse_dxbc(dxbc->code, dxbc->size, rts0_handler, root_signature)) < 0)
{
vkd3d_shader_free_root_signature(root_signature);
return ret;

View File

@ -77,26 +77,9 @@ static unsigned dxil_resource_flags_from_kind(dxil_spv_resource_kind kind, bool
}
}
static bool dxil_resource_is_global_heap(const dxil_spv_d3d_binding *d3d_binding)
{
return d3d_binding->register_index == UINT32_MAX &&
d3d_binding->register_space == UINT32_MAX &&
d3d_binding->range_size == UINT32_MAX;
}
static bool vkd3d_shader_resource_binding_is_global_heap(const struct vkd3d_shader_resource_binding *binding)
{
return binding->register_index == UINT32_MAX &&
binding->register_space == UINT32_MAX &&
binding->register_count == UINT32_MAX;
}
static bool dxil_resource_is_in_range(const struct vkd3d_shader_resource_binding *binding,
const dxil_spv_d3d_binding *d3d_binding)
{
if (vkd3d_shader_resource_binding_is_global_heap(binding) && dxil_resource_is_global_heap(d3d_binding))
return true;
if (binding->register_space != d3d_binding->register_space)
return false;
if (d3d_binding->register_index < binding->register_index)
@ -160,28 +143,19 @@ static dxil_spv_bool dxil_remap_inner(
else if (binding->flags & VKD3D_SHADER_BINDING_FLAG_BINDLESS)
{
vk_binding->bindless.use_heap = DXIL_SPV_TRUE;
vk_binding->bindless.heap_root_offset = binding->descriptor_offset +
d3d_binding->register_index - binding->register_index;
vk_binding->root_constant_index = binding->descriptor_table + remap->descriptor_table_offset_words;
vk_binding->set = binding->binding.set;
vk_binding->binding = binding->binding.binding;
if (dxil_resource_is_global_heap(d3d_binding))
if (vk_binding->root_constant_index < 2 * remap->num_root_descriptors)
{
vk_binding->bindless.heap_root_offset = 0; /* No constant offset. */
vk_binding->root_constant_index = UINT32_MAX; /* No push offset. */
}
else
{
vk_binding->bindless.heap_root_offset = binding->descriptor_offset +
d3d_binding->register_index - binding->register_index;
vk_binding->root_constant_index = binding->descriptor_table + remap->descriptor_table_offset_words;
if (vk_binding->root_constant_index < 2 * remap->num_root_descriptors)
{
ERR("Bindless push constant table offset is impossible. %u < 2 * %u\n",
vk_binding->root_constant_index, remap->num_root_descriptors);
return DXIL_SPV_FALSE;
}
vk_binding->root_constant_index -= 2 * remap->num_root_descriptors;
ERR("Bindless push constant table offset is impossible. %u < 2 * %u\n",
vk_binding->root_constant_index, remap->num_root_descriptors);
return DXIL_SPV_FALSE;
}
vk_binding->root_constant_index -= 2 * remap->num_root_descriptors;
/* Acceleration structures are mapped to SSBO uvec2[] array instead of normal heap. */
if (d3d_binding->kind == DXIL_SPV_RESOURCE_KIND_RT_ACCELERATION_STRUCTURE)
@ -513,23 +487,21 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
dxil_spv_parsed_blob blob = NULL;
dxil_spv_compiled_spirv compiled;
dxil_spv_shader_stage stage;
unsigned int i, j, max_size;
unsigned int i, max_size;
vkd3d_shader_hash_t hash;
int ret = VKD3D_OK;
uint32_t quirks;
void *code;
dxil_spv_set_thread_log_callback(vkd3d_dxil_log_callback, NULL);
memset(&spirv->meta, 0, sizeof(spirv->meta));
hash = vkd3d_shader_hash(dxbc);
spirv->meta.replaced = false;
spirv->meta.hash = hash;
if (vkd3d_shader_replace(hash, &spirv->code, &spirv->size))
{
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_REPLACED;
spirv->meta.replaced = true;
return ret;
}
quirks = vkd3d_shader_compile_arguments_select_quirks(compiler_args, hash);
dxil_spv_begin_thread_allocator_context();
@ -663,7 +635,6 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
}
}
#ifdef VKD3D_ENABLE_DESCRIPTOR_QA
if (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_DESCRIPTOR_QA_BUFFER)
{
struct dxil_spv_option_descriptor_qa helper;
@ -683,7 +654,6 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
goto end;
}
}
#endif
{
const struct dxil_spv_option_bindless_offset_buffer_layout helper =
@ -731,63 +701,6 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
goto end;
}
}
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_SPV_KHR_INTEGER_DOT_PRODUCT)
{
static const dxil_spv_option_shader_i8_dot helper =
{ { DXIL_SPV_OPTION_SHADER_I8_DOT }, DXIL_SPV_TRUE };
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
{
ERR("dxil-spirv does not support SHADER_I8_DOT.\n");
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
goto end;
}
}
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_SCALAR_BLOCK_LAYOUT)
{
dxil_spv_option_scalar_block_layout helper =
{ { DXIL_SPV_OPTION_SCALAR_BLOCK_LAYOUT }, DXIL_SPV_TRUE };
for (j = 0; j < compiler_args->target_extension_count; j++)
{
if (compiler_args->target_extensions[j] ==
VKD3D_SHADER_TARGET_EXTENSION_ASSUME_PER_COMPONENT_SSBO_ROBUSTNESS)
{
helper.supports_per_component_robustness = DXIL_SPV_TRUE;
break;
}
}
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
{
ERR("dxil-spirv does not support SCALAR_BLOCK_LAYOUT.\n");
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
goto end;
}
}
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_BARYCENTRIC_KHR)
{
static const dxil_spv_option_barycentric_khr helper =
{ { DXIL_SPV_OPTION_BARYCENTRIC_KHR }, DXIL_SPV_TRUE };
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
{
ERR("dxil-spirv does not support BARYCENTRIC_KHR.\n");
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
goto end;
}
}
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT)
{
static const dxil_spv_option_min_precision_native_16bit helper =
{ { DXIL_SPV_OPTION_MIN_PRECISION_NATIVE_16BIT }, DXIL_SPV_TRUE };
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
{
ERR("dxil-spirv does not support MIN_PRECISION_NATIVE_16BIT.\n");
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
goto end;
}
}
}
if (compiler_args->dual_source_blending)
@ -834,18 +747,6 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
}
}
if (quirks & VKD3D_SHADER_QUIRK_INVARIANT_POSITION)
{
const dxil_spv_option_invariant_position helper =
{ { DXIL_SPV_OPTION_INVARIANT_POSITION }, DXIL_SPV_TRUE };
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
{
ERR("dxil-spirv does not support INVARIANT_POSITION.\n");
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
goto end;
}
}
remap_userdata.shader_interface_info = shader_interface_info;
remap_userdata.shader_interface_local_info = NULL;
remap_userdata.num_root_descriptors = num_root_descriptors;
@ -883,16 +784,6 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
memcpy(code, compiled.data, compiled.size);
spirv->code = code;
spirv->size = compiled.size;
if (dxil_spv_converter_uses_subgroup_size(converter) == DXIL_SPV_TRUE)
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_USES_SUBGROUP_SIZE;
dxil_spv_converter_get_compute_workgroup_dimensions(converter,
&spirv->meta.cs_workgroup_size[0],
&spirv->meta.cs_workgroup_size[1],
&spirv->meta.cs_workgroup_size[2]);
dxil_spv_converter_get_patch_vertex_count(converter, &spirv->meta.patch_vertex_count);
dxil_spv_converter_get_compute_required_wave_size(converter, &spirv->meta.cs_required_wave_size);
if (dxil_spv_converter_uses_shader_feature(converter, DXIL_SPV_SHADER_FEATURE_NATIVE_16BIT_OPERATIONS) == DXIL_SPV_TRUE)
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_USES_NATIVE_16BIT_OPERATIONS;
vkd3d_shader_dump_spirv_shader(hash, spirv);
@ -929,15 +820,15 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
dxil_spv_set_thread_log_callback(vkd3d_dxil_log_callback, NULL);
memset(&spirv->meta, 0, sizeof(spirv->meta));
hash = vkd3d_shader_hash(dxil);
spirv->meta.replaced = false;
spirv->meta.hash = hash;
demangled_export = vkd3d_dup_demangled_entry_point_ascii(export);
if (demangled_export)
{
if (vkd3d_shader_replace_export(hash, &spirv->code, &spirv->size, demangled_export))
{
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_REPLACED;
spirv->meta.replaced = true;
vkd3d_free(demangled_export);
return ret;
}
@ -1156,7 +1047,6 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
}
}
#ifdef VKD3D_ENABLE_DESCRIPTOR_QA
if (shader_interface_info->flags & VKD3D_SHADER_INTERFACE_DESCRIPTOR_QA_BUFFER)
{
struct dxil_spv_option_descriptor_qa helper;
@ -1176,7 +1066,6 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
goto end;
}
}
#endif
{
const struct dxil_spv_option_sbt_descriptor_size_log2 helper =
@ -1228,64 +1117,6 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
goto end;
}
}
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_SPV_KHR_INTEGER_DOT_PRODUCT)
{
static const dxil_spv_option_shader_i8_dot helper =
{ { DXIL_SPV_OPTION_SHADER_I8_DOT }, DXIL_SPV_TRUE };
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
{
ERR("dxil-spirv does not support SHADER_I8_DOT.\n");
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
goto end;
}
}
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_SCALAR_BLOCK_LAYOUT)
{
dxil_spv_option_scalar_block_layout helper =
{ { DXIL_SPV_OPTION_SCALAR_BLOCK_LAYOUT }, DXIL_SPV_TRUE };
for (j = 0; j < compiler_args->target_extension_count; j++)
{
if (compiler_args->target_extensions[j] ==
VKD3D_SHADER_TARGET_EXTENSION_ASSUME_PER_COMPONENT_SSBO_ROBUSTNESS)
{
helper.supports_per_component_robustness = DXIL_SPV_TRUE;
break;
}
}
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
{
ERR("dxil-spirv does not support SCALAR_BLOCK_LAYOUT.\n");
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
goto end;
}
}
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_RAY_TRACING_PRIMITIVE_CULLING)
{
/* Only relevant for ray tracing pipelines. Ray query requires support for PrimitiveCulling feature,
* and the SPIR-V capability is implicitly enabled. */
static const dxil_spv_option_shader_ray_tracing_primitive_culling helper =
{ { DXIL_SPV_OPTION_SHADER_RAY_TRACING_PRIMITIVE_CULLING }, DXIL_SPV_TRUE };
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
{
ERR("dxil-spirv does not support RAY_TRACING_PRIMITIVE_CULLING.\n");
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
goto end;
}
}
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT)
{
static const dxil_spv_option_min_precision_native_16bit helper =
{ { DXIL_SPV_OPTION_MIN_PRECISION_NATIVE_16BIT }, DXIL_SPV_TRUE };
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
{
ERR("dxil-spirv does not support MIN_PRECISION_NATIVE_16BIT.\n");
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
goto end;
}
}
}
}
@ -1323,10 +1154,6 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
memcpy(code, compiled.data, compiled.size);
spirv->code = code;
spirv->size = compiled.size;
if (dxil_spv_converter_uses_subgroup_size(converter) == DXIL_SPV_TRUE)
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_USES_SUBGROUP_SIZE;
if (dxil_spv_converter_uses_shader_feature(converter, DXIL_SPV_SHADER_FEATURE_NATIVE_16BIT_OPERATIONS) == DXIL_SPV_TRUE)
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_USES_NATIVE_16BIT_OPERATIONS;
if (demangled_export)
vkd3d_shader_dump_spirv_shader_export(hash, spirv, demangled_export);
@ -1352,31 +1179,6 @@ void vkd3d_shader_dxil_free_library_entry_points(struct vkd3d_shader_library_ent
vkd3d_free(entry_points);
}
void vkd3d_shader_dxil_free_library_subobjects(struct vkd3d_shader_library_subobject *subobjects, size_t count)
{
size_t i, j;
for (i = 0; i < count; i++)
{
if (subobjects[i].kind == VKD3D_SHADER_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION)
{
for (j = 0; j < subobjects[i].data.association.NumExports; j++)
vkd3d_free((void*)subobjects[i].data.association.pExports[j]);
vkd3d_free((void*)subobjects[i].data.association.pExports);
vkd3d_free((void*)subobjects[i].data.association.SubobjectToAssociate);
}
else if (subobjects[i].kind == VKD3D_SHADER_SUBOBJECT_KIND_HIT_GROUP)
{
vkd3d_free((void*)subobjects[i].data.hit_group.HitGroupExport);
vkd3d_free((void*)subobjects[i].data.hit_group.AnyHitShaderImport);
vkd3d_free((void*)subobjects[i].data.hit_group.ClosestHitShaderImport);
vkd3d_free((void*)subobjects[i].data.hit_group.IntersectionShaderImport);
}
}
vkd3d_free(subobjects);
}
static VkShaderStageFlagBits convert_stage(dxil_spv_shader_stage stage)
{
/* Only interested in RT entry_points. There is no way yet to use lib_6_3+ for non-RT. */
@ -1412,7 +1214,6 @@ static bool vkd3d_dxil_build_entry(struct vkd3d_shader_library_entry_point *entr
if (!entry->plain_entry_point)
{
vkd3d_free(entry->mangled_entry_point);
entry->mangled_entry_point = NULL;
return false;
}
@ -1421,95 +1222,20 @@ static bool vkd3d_dxil_build_entry(struct vkd3d_shader_library_entry_point *entr
return true;
}
static void vkd3d_shader_dxil_copy_subobject(unsigned int identifier,
struct vkd3d_shader_library_subobject *subobject,
const dxil_spv_rdat_subobject *dxil_subobject)
{
unsigned int i;
/* Reuse same enums as DXIL. */
subobject->kind = (enum vkd3d_shader_subobject_kind)dxil_subobject->kind;
subobject->name = dxil_subobject->subobject_name;
subobject->dxil_identifier = identifier;
switch (dxil_subobject->kind)
{
case DXIL_SPV_RDAT_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE:
case DXIL_SPV_RDAT_SUBOBJECT_KIND_LOCAL_ROOT_SIGNATURE:
subobject->data.payload.data = dxil_subobject->payload;
subobject->data.payload.size = dxil_subobject->payload_size;
break;
case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG:
/* Normalize the kind. */
subobject->kind = VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1;
subobject->data.pipeline_config.MaxTraceRecursionDepth = dxil_subobject->args[0];
subobject->data.pipeline_config.Flags = 0;
break;
case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1:
subobject->kind = VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1;
subobject->data.pipeline_config.MaxTraceRecursionDepth = dxil_subobject->args[0];
subobject->data.pipeline_config.Flags = dxil_subobject->args[1];
break;
case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_SHADER_CONFIG:
subobject->data.shader_config.MaxPayloadSizeInBytes = dxil_subobject->args[0];
subobject->data.shader_config.MaxAttributeSizeInBytes = dxil_subobject->args[1];
break;
case DXIL_SPV_RDAT_SUBOBJECT_KIND_HIT_GROUP:
/* Enum aliases. */
subobject->data.hit_group.Type = (D3D12_HIT_GROUP_TYPE)dxil_subobject->hit_group_type;
assert(dxil_subobject->num_exports == 3);
/* Implementation simplifies a lot if we can reuse the D3D12 type here. */
subobject->data.hit_group.HitGroupExport = vkd3d_dup_entry_point(dxil_subobject->subobject_name);
subobject->data.hit_group.AnyHitShaderImport = dxil_subobject->exports[0] && *dxil_subobject->exports[0] != '\0' ?
vkd3d_dup_entry_point(dxil_subobject->exports[0]) : NULL;
subobject->data.hit_group.ClosestHitShaderImport = dxil_subobject->exports[1] && *dxil_subobject->exports[1] != '\0' ?
vkd3d_dup_entry_point(dxil_subobject->exports[1]) : NULL;
subobject->data.hit_group.IntersectionShaderImport = dxil_subobject->exports[2] && *dxil_subobject->exports[2] != '\0' ?
vkd3d_dup_entry_point(dxil_subobject->exports[2]) : NULL;
break;
case DXIL_SPV_RDAT_SUBOBJECT_KIND_STATE_OBJECT_CONFIG:
subobject->data.object_config.Flags = dxil_subobject->args[0];
break;
case DXIL_SPV_RDAT_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION:
assert(dxil_subobject->num_exports >= 1);
subobject->data.association.SubobjectToAssociate = vkd3d_dup_entry_point(dxil_subobject->exports[0]);
subobject->data.association.pExports = vkd3d_malloc((dxil_subobject->num_exports - 1) * sizeof(LPCWSTR));
subobject->data.association.NumExports = dxil_subobject->num_exports - 1;
for (i = 1; i < dxil_subobject->num_exports; i++)
subobject->data.association.pExports[i - 1] = vkd3d_dup_entry_point(dxil_subobject->exports[i]);
break;
default:
FIXME("Unrecognized RDAT subobject type: %u.\n", dxil_subobject->kind);
break;
}
}
int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
int vkd3d_shader_dxil_append_library_entry_points(
const D3D12_DXIL_LIBRARY_DESC *library_desc,
unsigned int identifier,
struct vkd3d_shader_library_entry_point **entry_points,
size_t *entry_point_size, size_t *entry_point_count,
struct vkd3d_shader_library_subobject **subobjects,
size_t *subobjects_size, size_t *subobjects_count)
size_t *entry_point_size, size_t *entry_point_count)
{
struct vkd3d_shader_library_entry_point new_entry;
struct vkd3d_shader_library_subobject *subobject;
dxil_spv_parsed_blob blob = NULL;
struct vkd3d_shader_code code;
dxil_spv_rdat_subobject sub;
dxil_spv_shader_stage stage;
const char *mangled_entry;
char *ascii_entry = NULL;
vkd3d_shader_hash_t hash;
unsigned int count, i, j;
unsigned int rdat_count;
unsigned int count, i;
int ret = VKD3D_OK;
memset(&new_entry, 0, sizeof(new_entry));
@ -1530,8 +1256,6 @@ int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
goto end;
}
rdat_count = dxil_spv_parsed_blob_get_num_rdat_subobjects(blob);
if (library_desc->NumExports)
{
for (i = 0; i < library_desc->NumExports; i++)
@ -1541,44 +1265,24 @@ int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
else
ascii_entry = vkd3d_strdup_w_utf8(library_desc->pExports[i].Name, 0);
/* An export can point to a subobject or an entry point. */
for (j = 0; j < rdat_count; j++)
stage = dxil_spv_parsed_blob_get_shader_stage_for_entry(blob, ascii_entry);
if (stage == DXIL_SPV_STAGE_UNKNOWN)
{
dxil_spv_parsed_blob_get_rdat_subobject(blob, j, &sub);
/* Subobject names are not mangled. */
if (strcmp(sub.subobject_name, ascii_entry) == 0)
break;
ret = VKD3D_ERROR_INVALID_ARGUMENT;
goto end;
}
if (j < rdat_count)
{
vkd3d_array_reserve((void**)subobjects, subobjects_size,
*subobjects_count + 1, sizeof(**subobjects));
subobject = &(*subobjects)[*subobjects_count];
vkd3d_shader_dxil_copy_subobject(identifier, subobject, &sub);
*subobjects_count += 1;
}
else
{
stage = dxil_spv_parsed_blob_get_shader_stage_for_entry(blob, ascii_entry);
if (stage == DXIL_SPV_STAGE_UNKNOWN)
{
ret = VKD3D_ERROR_INVALID_ARGUMENT;
goto end;
}
new_entry.real_entry_point = ascii_entry;
new_entry.plain_entry_point = vkd3d_wstrdup(library_desc->pExports[i].Name);
new_entry.mangled_entry_point = NULL;
new_entry.identifier = identifier;
new_entry.stage = convert_stage(stage);
ascii_entry = NULL;
new_entry.real_entry_point = ascii_entry;
new_entry.plain_entry_point = vkd3d_wstrdup(library_desc->pExports[i].Name);
new_entry.mangled_entry_point = NULL;
new_entry.identifier = identifier;
new_entry.stage = convert_stage(stage);
ascii_entry = NULL;
vkd3d_array_reserve((void**)entry_points, entry_point_size,
*entry_point_count + 1, sizeof(new_entry));
(*entry_points)[(*entry_point_count)++] = new_entry;
memset(&new_entry, 0, sizeof(new_entry));
}
vkd3d_array_reserve((void**)entry_points, entry_point_size,
*entry_point_count + 1, sizeof(new_entry));
(*entry_points)[(*entry_point_count)++] = new_entry;
memset(&new_entry, 0, sizeof(new_entry));
}
}
else
@ -1611,21 +1315,6 @@ int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
(*entry_points)[(*entry_point_count)++] = new_entry;
memset(&new_entry, 0, sizeof(new_entry));
}
if (rdat_count)
{
/* All subobjects are also exported. */
vkd3d_array_reserve((void**)subobjects, subobjects_size,
*subobjects_count + rdat_count, sizeof(**subobjects));
for (i = 0; i < rdat_count; i++)
{
dxil_spv_parsed_blob_get_rdat_subobject(blob, i, &sub);
subobject = &(*subobjects)[*subobjects_count];
vkd3d_shader_dxil_copy_subobject(identifier, subobject, &sub);
*subobjects_count += 1;
}
}
}
end:

File diff suppressed because it is too large Load Diff

View File

@ -20,8 +20,6 @@
#include "vkd3d_shader_private.h"
#include "vkd3d_platform.h"
#include <stdio.h>
#include <inttypes.h>
@ -83,13 +81,13 @@ err:
bool vkd3d_shader_replace(vkd3d_shader_hash_t hash, const void **data, size_t *size)
{
static bool enabled = true;
char path[VKD3D_PATH_MAX];
char filename[1024];
const char *path;
if (!enabled)
return false;
if (!vkd3d_get_env_var("VKD3D_SHADER_OVERRIDE", path, sizeof(path)))
if (!(path = getenv("VKD3D_SHADER_OVERRIDE")))
{
enabled = false;
return false;
@ -102,13 +100,13 @@ bool vkd3d_shader_replace(vkd3d_shader_hash_t hash, const void **data, size_t *s
bool vkd3d_shader_replace_export(vkd3d_shader_hash_t hash, const void **data, size_t *size, const char *export)
{
static bool enabled = true;
char path[VKD3D_PATH_MAX];
char filename[1024];
const char *path;
if (!enabled)
return false;
if (!vkd3d_get_env_var("VKD3D_SHADER_OVERRIDE", path, sizeof(path)))
if (!(path = getenv("VKD3D_SHADER_OVERRIDE")))
{
enabled = false;
return false;
@ -121,12 +119,12 @@ bool vkd3d_shader_replace_export(vkd3d_shader_hash_t hash, const void **data, si
void vkd3d_shader_dump_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shader_code *shader, const char *ext)
{
static bool enabled = true;
char path[VKD3D_PATH_MAX];
const char *path;
if (!enabled)
return;
if (!vkd3d_get_env_var("VKD3D_SHADER_DUMP_PATH", path, sizeof(path)))
if (!(path = getenv("VKD3D_SHADER_DUMP_PATH")))
{
enabled = false;
return;
@ -138,12 +136,12 @@ void vkd3d_shader_dump_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shade
void vkd3d_shader_dump_spirv_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shader_code *shader)
{
static bool enabled = true;
char path[VKD3D_PATH_MAX];
const char *path;
if (!enabled)
return;
if (!vkd3d_get_env_var("VKD3D_SHADER_DUMP_PATH", path, sizeof(path)))
if (!(path = getenv("VKD3D_SHADER_DUMP_PATH")))
{
enabled = false;
return;
@ -156,13 +154,13 @@ void vkd3d_shader_dump_spirv_shader_export(vkd3d_shader_hash_t hash, const struc
const char *export)
{
static bool enabled = true;
char path[VKD3D_PATH_MAX];
const char *path;
char tag[1024];
if (!enabled)
return;
if (!vkd3d_get_env_var("VKD3D_SHADER_DUMP_PATH", path, sizeof(path)))
if (!(path = getenv("VKD3D_SHADER_DUMP_PATH")))
{
enabled = false;
return;
@ -345,13 +343,12 @@ int vkd3d_shader_compile_dxbc(const struct vkd3d_shader_code *dxbc,
return vkd3d_shader_compile_dxil(dxbc, spirv, shader_interface_info, compile_args);
}
memset(&spirv->meta, 0, sizeof(spirv->meta));
hash = vkd3d_shader_hash(dxbc);
spirv->meta.replaced = false;
spirv->meta.hash = hash;
if (vkd3d_shader_replace(hash, &spirv->code, &spirv->size))
{
spirv->meta.flags |= VKD3D_SHADER_META_FLAG_REPLACED;
spirv->meta.replaced = true;
return VKD3D_OK;
}
@ -363,21 +360,16 @@ int vkd3d_shader_compile_dxbc(const struct vkd3d_shader_code *dxbc,
return ret;
}
spirv->meta.patch_vertex_count = scan_info.patch_vertex_count;
if ((ret = vkd3d_shader_parser_init(&parser, dxbc)) < 0)
{
vkd3d_shader_scan_destroy(&scan_info);
return ret;
}
if (shader_interface_info)
if ((ret = vkd3d_shader_validate_shader_type(parser.shader_version.type, shader_interface_info->stage)) < 0)
{
if ((ret = vkd3d_shader_validate_shader_type(parser.shader_version.type, shader_interface_info->stage)) < 0)
{
vkd3d_shader_scan_destroy(&scan_info);
return ret;
}
vkd3d_shader_scan_destroy(&scan_info);
return ret;
}
vkd3d_shader_dump_shader(hash, dxbc, "dxbc");
@ -477,7 +469,6 @@ static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_info *
const struct vkd3d_shader_register *reg)
{
scan_info->has_side_effects = true;
scan_info->has_uav_counter = true;
vkd3d_shader_scan_set_register_flags(scan_info, VKD3DSPR_UAV,
reg->idx[0].offset, VKD3D_SHADER_UAV_FLAG_ATOMIC_COUNTER);
}
@ -530,9 +521,6 @@ static void vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_info *scan_in
if (instruction->flags & VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL)
scan_info->early_fragment_tests = true;
break;
case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT:
scan_info->patch_vertex_count = instruction->declaration.count;
break;
default:
break;
}
@ -568,6 +556,49 @@ static void vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_info *scan_in
vkd3d_shader_scan_record_uav_counter(scan_info, &instruction->src[0].reg);
}
int vkd3d_shader_scan_patch_vertex_count(const struct vkd3d_shader_code *dxbc,
unsigned int *patch_vertex_count)
{
struct vkd3d_shader_instruction instruction;
struct vkd3d_shader_parser parser;
int ret;
if (shader_is_dxil(dxbc->code, dxbc->size))
{
/* TODO */
*patch_vertex_count = 0;
return VKD3D_OK;
}
else
{
if ((ret = vkd3d_shader_parser_init(&parser, dxbc)) < 0)
return ret;
*patch_vertex_count = 0;
while (!shader_sm4_is_end(parser.data, &parser.ptr))
{
shader_sm4_read_instruction(parser.data, &parser.ptr, &instruction);
if (instruction.handler_idx == VKD3DSIH_INVALID)
{
WARN("Encountered unrecognized or invalid instruction.\n");
vkd3d_shader_parser_destroy(&parser);
return VKD3D_ERROR_INVALID_ARGUMENT;
}
if (instruction.handler_idx == VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT)
{
*patch_vertex_count = instruction.declaration.count;
break;
}
}
vkd3d_shader_parser_destroy(&parser);
return VKD3D_OK;
}
}
int vkd3d_shader_scan_dxbc(const struct vkd3d_shader_code *dxbc,
struct vkd3d_shader_scan_info *scan_info)
{
@ -675,14 +706,6 @@ int vkd3d_shader_parse_input_signature(const struct vkd3d_shader_code *dxbc,
return shader_parse_input_signature(dxbc->code, dxbc->size, signature);
}
int vkd3d_shader_parse_output_signature(const struct vkd3d_shader_code *dxbc,
struct vkd3d_shader_signature *signature)
{
TRACE("dxbc {%p, %zu}, signature %p.\n", dxbc->code, dxbc->size, signature);
return shader_parse_output_signature(dxbc->code, dxbc->size, signature);
}
struct vkd3d_shader_signature_element *vkd3d_shader_find_signature_element(
const struct vkd3d_shader_signature *signature, const char *semantic_name,
unsigned int semantic_index, unsigned int stream_index)
@ -715,36 +738,12 @@ void vkd3d_shader_free_shader_signature(struct vkd3d_shader_signature *signature
vkd3d_shader_hash_t vkd3d_shader_hash(const struct vkd3d_shader_code *shader)
{
vkd3d_shader_hash_t h = hash_fnv1_init();
vkd3d_shader_hash_t h = 0xcbf29ce484222325ull;
const uint8_t *code = shader->code;
size_t i, n;
for (i = 0, n = shader->size; i < n; i++)
h = hash_fnv1_iterate_u8(h, code[i]);
h = (h * 0x100000001b3ull) ^ code[i];
return h;
}
uint32_t vkd3d_shader_compile_arguments_select_quirks(
const struct vkd3d_shader_compile_arguments *compile_args, vkd3d_shader_hash_t shader_hash)
{
unsigned int i;
if (compile_args && compile_args->quirks)
{
for (i = 0; i < compile_args->quirks->num_hashes; i++)
if (compile_args->quirks->hashes[i].shader_hash == shader_hash)
return compile_args->quirks->hashes[i].quirks | compile_args->quirks->global_quirks;
return compile_args->quirks->default_quirks | compile_args->quirks->global_quirks;
}
else
return 0;
}
uint64_t vkd3d_shader_get_revision(void)
{
/* This is meant to be bumped every time a change is made to the shader compiler.
* Might get nuked later ...
* It's not immediately useful for invalidating pipeline caches, since that would mostly be covered
* by vkd3d-proton Git hash. */
return 1;
}

View File

@ -781,8 +781,6 @@ void free_shader_desc(struct vkd3d_shader_desc *desc);
int shader_parse_input_signature(const void *dxbc, size_t dxbc_length,
struct vkd3d_shader_signature *signature);
int shader_parse_output_signature(const void *dxbc, size_t dxbc_length,
struct vkd3d_shader_signature *signature);
struct vkd3d_dxbc_compiler;
@ -919,4 +917,6 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
const struct vkd3d_shader_interface_info *shader_interface_info,
const struct vkd3d_shader_compile_arguments *compiler_args);
vkd3d_shader_hash_t vkd3d_shader_hash(const struct vkd3d_shader_code *shader);
#endif /* __VKD3D_SHADER_PRIVATE_H */

View File

@ -3,12 +3,12 @@ LIBRARY vkd3d-proton-utils-3.dll
EXPORTS
D3D12CreateDevice @101
D3D12GetDebugInterface @102
D3D12CreateRootSignatureDeserializer
D3D12CreateVersionedRootSignatureDeserializer
D3D12CreateRootSignatureDeserializer @107
D3D12CreateVersionedRootSignatureDeserializer @108
D3D12EnableExperimentalFeatures
D3D12SerializeRootSignature
D3D12SerializeVersionedRootSignature
D3D12EnableExperimentalFeatures @110
D3D12SerializeRootSignature @115
D3D12SerializeVersionedRootSignature @116
vkd3d_create_event
vkd3d_wait_event

View File

@ -19,8 +19,6 @@
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "vkd3d_private.h"
#define RT_TRACE TRACE
void vkd3d_acceleration_structure_build_info_cleanup(
struct vkd3d_acceleration_structure_build_info *info)
{
@ -65,7 +63,7 @@ static VkGeometryFlagsKHR d3d12_geometry_flags_to_vk(D3D12_RAYTRACING_GEOMETRY_F
return vk_flags;
}
bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *device,
bool vkd3d_acceleration_structure_convert_inputs(
struct vkd3d_acceleration_structure_build_info *info,
const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS *desc)
{
@ -73,34 +71,21 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
VkAccelerationStructureBuildGeometryInfoKHR *build_info;
VkAccelerationStructureGeometryAabbsDataKHR *aabbs;
const D3D12_RAYTRACING_GEOMETRY_DESC *geom_desc;
bool have_triangles, have_aabbs;
unsigned int i;
RT_TRACE("Converting inputs.\n");
RT_TRACE("=====================\n");
build_info = &info->build_info;
memset(build_info, 0, sizeof(*build_info));
build_info->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR;
if (desc->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
{
build_info->type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
RT_TRACE("Top level build.\n");
}
else
{
build_info->type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
RT_TRACE("Bottom level build.\n");
}
build_info->flags = d3d12_build_flags_to_vk(desc->Flags);
if (desc->Flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE)
{
RT_TRACE("BUILD_FLAG_PERFORM_UPDATE.\n");
build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR;
}
else
build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
@ -123,15 +108,9 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
info->primitive_counts = info->primitive_counts_stack;
info->primitive_counts[0] = desc->NumDescs;
build_info->geometryCount = 1;
RT_TRACE(" ArrayOfPointers: %u.\n",
desc->DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS ? 1 : 0);
RT_TRACE(" NumDescs: %u.\n", info->primitive_counts[0]);
}
else
{
have_triangles = false;
have_aabbs = false;
if (desc->NumDescs <= VKD3D_BUILD_INFO_STACK_COUNT)
{
memset(info->geometries, 0, sizeof(*info->geometries) * desc->NumDescs);
@ -149,95 +128,54 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
for (i = 0; i < desc->NumDescs; i++)
{
info->geometries[i].sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
RT_TRACE(" Geom %u:\n", i);
if (desc->DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS)
{
geom_desc = desc->ppGeometryDescs[i];
RT_TRACE(" ArrayOfPointers\n");
}
else
{
geom_desc = &desc->pGeometryDescs[i];
RT_TRACE(" PointerToArray\n");
}
info->geometries[i].flags = d3d12_geometry_flags_to_vk(geom_desc->Flags);
RT_TRACE(" Flags = #%x\n", geom_desc->Flags);
switch (geom_desc->Type)
{
case D3D12_RAYTRACING_GEOMETRY_TYPE_TRIANGLES:
/* Runtime validates this. */
if (have_aabbs)
{
ERR("Cannot mix and match geometry types in a BLAS.\n");
return false;
}
have_triangles = true;
info->geometries[i].geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
triangles = &info->geometries[i].geometry.triangles;
triangles->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR;
triangles->indexData.deviceAddress = geom_desc->Triangles.IndexBuffer;
if (geom_desc->Triangles.IndexFormat != DXGI_FORMAT_UNKNOWN)
if (geom_desc->Triangles.IndexBuffer)
{
if (!geom_desc->Triangles.IndexBuffer)
WARN("Application is using IndexBuffer = 0 and IndexFormat != UNKNOWN. Likely application bug.\n");
triangles->indexType =
geom_desc->Triangles.IndexFormat == DXGI_FORMAT_R16_UINT ?
VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
info->primitive_counts[i] = geom_desc->Triangles.IndexCount / 3;
RT_TRACE(" Indexed : Index count = %u (%u bits)\n",
geom_desc->Triangles.IndexCount,
triangles->indexType == VK_INDEX_TYPE_UINT16 ? 16 : 32);
RT_TRACE(" Vertex count: %u\n", geom_desc->Triangles.VertexCount);
RT_TRACE(" IBO VA: %"PRIx64".\n", geom_desc->Triangles.IndexBuffer);
}
else
{
info->primitive_counts[i] = geom_desc->Triangles.VertexCount / 3;
triangles->indexType = VK_INDEX_TYPE_NONE_KHR;
RT_TRACE(" Triangle list : Vertex count: %u\n", geom_desc->Triangles.VertexCount);
}
triangles->maxVertex = max(1, geom_desc->Triangles.VertexCount) - 1;
triangles->vertexStride = geom_desc->Triangles.VertexBuffer.StrideInBytes;
triangles->vertexFormat = vkd3d_internal_get_vk_format(device, geom_desc->Triangles.VertexFormat);
triangles->vertexFormat = vkd3d_get_vk_format(geom_desc->Triangles.VertexFormat);
triangles->vertexData.deviceAddress = geom_desc->Triangles.VertexBuffer.StartAddress;
triangles->transformData.deviceAddress = geom_desc->Triangles.Transform3x4;
RT_TRACE(" Transform3x4: %s\n", geom_desc->Triangles.Transform3x4 ? "on" : "off");
RT_TRACE(" Vertex format: %s\n", debug_dxgi_format(geom_desc->Triangles.VertexFormat));
RT_TRACE(" VBO VA: %"PRIx64"\n", geom_desc->Triangles.VertexBuffer.StartAddress);
RT_TRACE(" Vertex stride: %"PRIu64" bytes\n", geom_desc->Triangles.VertexBuffer.StrideInBytes);
break;
case D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS:
/* Runtime validates this. */
if (have_triangles)
{
ERR("Cannot mix and match geometry types in a BLAS.\n");
return false;
}
have_aabbs = true;
info->geometries[i].geometryType = VK_GEOMETRY_TYPE_AABBS_KHR;
aabbs = &info->geometries[i].geometry.aabbs;
aabbs->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_AABBS_DATA_KHR;
aabbs->stride = geom_desc->AABBs.AABBs.StrideInBytes;
aabbs->data.deviceAddress = geom_desc->AABBs.AABBs.StartAddress;
info->primitive_counts[i] = geom_desc->AABBs.AABBCount;
RT_TRACE(" AABB stride: %"PRIu64" bytes\n", geom_desc->AABBs.AABBs.StrideInBytes);
break;
default:
FIXME("Unsupported geometry type %u.\n", geom_desc->Type);
return false;
}
RT_TRACE(" Primitive count %u.\n", info->primitive_counts[i]);
}
}
@ -251,8 +189,6 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
}
build_info->pGeometries = info->geometries;
RT_TRACE("=====================\n");
return true;
}
@ -306,18 +242,12 @@ static void vkd3d_acceleration_structure_write_postbuild_info(
type_index = VKD3D_QUERY_TYPE_INDEX_RT_COMPACTED_SIZE;
stride = sizeof(uint64_t);
}
else if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_CURRENT_SIZE &&
list->device->device_info.ray_tracing_maintenance1_features.rayTracingMaintenance1)
{
vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR;
type_index = VKD3D_QUERY_TYPE_INDEX_RT_CURRENT_SIZE;
stride = sizeof(uint64_t);
}
else if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION)
{
vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR;
type_index = VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE;
stride = sizeof(uint64_t);
FIXME("NumBottomLevelPointers will always return 0.\n");
}
else
{
@ -348,31 +278,9 @@ static void vkd3d_acceleration_structure_write_postbuild_info(
if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION)
{
if (list->device->device_info.ray_tracing_maintenance1_features.rayTracingMaintenance1)
{
type_index = VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE_BOTTOM_LEVEL_POINTERS;
if (!d3d12_command_allocator_allocate_query_from_type_index(list->allocator,
type_index, &vk_query_pool, &vk_query_index))
{
ERR("Failed to allocate query.\n");
return;
}
d3d12_command_list_reset_query(list, vk_query_pool, vk_query_index);
VK_CALL(vkCmdWriteAccelerationStructuresPropertiesKHR(list->vk_command_buffer,
1, &vk_acceleration_structure, vk_query_type, vk_query_pool, vk_query_index));
VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer,
vk_query_pool, vk_query_index, 1,
vk_buffer, offset + sizeof(uint64_t), stride,
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
}
else
{
FIXME("NumBottomLevelPointers will always return 0.\n");
VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, vk_buffer, offset + sizeof(uint64_t),
sizeof(uint64_t), 0));
}
/* TODO: We'll need some way to store these values for later use and copy them here instead. */
VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, vk_buffer, offset + sizeof(uint64_t),
sizeof(uint64_t), 0));
}
}

View File

@ -1,655 +0,0 @@
/*
* Copyright 2022 Hans-Kristian Arntzen for Valve Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "vkd3d_private.h"
#include "vkd3d_debug.h"
#include "vkd3d_common.h"
#include <assert.h>
#include <stdio.h>
/* Just allocate everything up front. This only consumes host memory anyways. */
#define MAX_COMMAND_LISTS (32 * 1024)
/* Questionable on 32-bit, but we don't really care. */
#define NV_ENCODE_CHECKPOINT(context, counter) ((void*) ((uintptr_t)(context) + (uintptr_t)MAX_COMMAND_LISTS * (counter)))
#define NV_CHECKPOINT_CONTEXT(ptr) ((uint32_t)((uintptr_t)(ptr) % MAX_COMMAND_LISTS))
#define NV_CHECKPOINT_COUNTER(ptr) ((uint32_t)((uintptr_t)(ptr) / MAX_COMMAND_LISTS))
static const char *vkd3d_breadcrumb_command_type_to_str(enum vkd3d_breadcrumb_command_type type)
{
switch (type)
{
case VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER:
return "top_marker";
case VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER:
return "bottom_marker";
case VKD3D_BREADCRUMB_COMMAND_SET_SHADER_HASH:
return "set_shader_hash";
case VKD3D_BREADCRUMB_COMMAND_DRAW:
return "draw";
case VKD3D_BREADCRUMB_COMMAND_DRAW_INDEXED:
return "draw_indexed";
case VKD3D_BREADCRUMB_COMMAND_DISPATCH:
return "dispatch";
case VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT:
return "execute_indirect";
case VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT_TEMPLATE:
return "execute_indirect_template";
case VKD3D_BREADCRUMB_COMMAND_COPY:
return "copy";
case VKD3D_BREADCRUMB_COMMAND_RESOLVE:
return "resolve";
case VKD3D_BREADCRUMB_COMMAND_WBI:
return "wbi";
case VKD3D_BREADCRUMB_COMMAND_RESOLVE_QUERY:
return "resolve_query";
case VKD3D_BREADCRUMB_COMMAND_GATHER_VIRTUAL_QUERY:
return "gather_virtual_query";
case VKD3D_BREADCRUMB_COMMAND_BUILD_RTAS:
return "build_rtas";
case VKD3D_BREADCRUMB_COMMAND_COPY_RTAS:
return "copy_rtas";
case VKD3D_BREADCRUMB_COMMAND_EMIT_RTAS_POSTBUILD:
return "emit_rtas_postbuild";
case VKD3D_BREADCRUMB_COMMAND_TRACE_RAYS:
return "trace_rays";
case VKD3D_BREADCRUMB_COMMAND_BARRIER:
return "barrier";
case VKD3D_BREADCRUMB_COMMAND_AUX32:
return "aux32";
case VKD3D_BREADCRUMB_COMMAND_AUX64:
return "aux64";
case VKD3D_BREADCRUMB_COMMAND_VBO:
return "vbo";
case VKD3D_BREADCRUMB_COMMAND_IBO:
return "ibo";
case VKD3D_BREADCRUMB_COMMAND_ROOT_DESC:
return "root_desc";
case VKD3D_BREADCRUMB_COMMAND_ROOT_CONST:
return "root_const";
case VKD3D_BREADCRUMB_COMMAND_TAG:
return "tag";
default:
return "?";
}
}
HRESULT vkd3d_breadcrumb_tracer_init(struct vkd3d_breadcrumb_tracer *tracer, struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
D3D12_HEAP_PROPERTIES heap_properties;
D3D12_RESOURCE_DESC1 resource_desc;
VkMemoryPropertyFlags memory_props;
HRESULT hr;
int rc;
memset(tracer, 0, sizeof(*tracer));
if ((rc = pthread_mutex_init(&tracer->lock, NULL)))
return hresult_from_errno(rc);
if (device->vk_info.AMD_buffer_marker)
{
INFO("Enabling AMD_buffer_marker breadcrumbs.\n");
memset(&resource_desc, 0, sizeof(resource_desc));
resource_desc.Width = MAX_COMMAND_LISTS * sizeof(struct vkd3d_breadcrumb_counter);
resource_desc.Height = 1;
resource_desc.DepthOrArraySize = 1;
resource_desc.MipLevels = 1;
resource_desc.Format = DXGI_FORMAT_UNKNOWN;
resource_desc.SampleDesc.Count = 1;
resource_desc.SampleDesc.Quality = 0;
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE;
if (FAILED(hr = vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
&resource_desc, &tracer->host_buffer)))
{
goto err;
}
memory_props = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
/* If device faults in the middle of execution we will never get the chance to flush device caches.
* Make sure that breadcrumbs are always written directly out.
* This is the primary usecase for the device coherent/uncached extension after all ...
* Don't make this a hard requirement since buffer markers might be implicitly coherent on some
* implementations (Turnip?). */
if (device->device_info.device_coherent_memory_features_amd.deviceCoherentMemory)
{
memory_props |= VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
}
if (FAILED(hr = vkd3d_allocate_buffer_memory(device, tracer->host_buffer,
memory_props, &tracer->host_buffer_memory)))
{
goto err;
}
if (VK_CALL(vkMapMemory(device->vk_device, tracer->host_buffer_memory.vk_memory,
0, VK_WHOLE_SIZE,
0, (void**)&tracer->mapped)) != VK_SUCCESS)
{
hr = E_OUTOFMEMORY;
goto err;
}
memset(tracer->mapped, 0, sizeof(*tracer->mapped) * MAX_COMMAND_LISTS);
}
else if (device->vk_info.NV_device_diagnostic_checkpoints)
{
INFO("Enabling NV_device_diagnostics_checkpoints breadcrumbs.\n");
}
else
{
ERR("Breadcrumbs require support for either AMD_buffer_marker or NV_device_diagnostics_checkpoints.\n");
hr = E_FAIL;
goto err;
}
tracer->trace_contexts = vkd3d_calloc(MAX_COMMAND_LISTS, sizeof(*tracer->trace_contexts));
tracer->trace_context_index = 0;
return S_OK;
err:
vkd3d_breadcrumb_tracer_cleanup(tracer, device);
return hr;
}
void vkd3d_breadcrumb_tracer_cleanup(struct vkd3d_breadcrumb_tracer *tracer, struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
if (device->vk_info.AMD_buffer_marker)
{
VK_CALL(vkDestroyBuffer(device->vk_device, tracer->host_buffer, NULL));
vkd3d_free_device_memory(device, &tracer->host_buffer_memory);
}
vkd3d_free(tracer->trace_contexts);
pthread_mutex_destroy(&tracer->lock);
}
unsigned int vkd3d_breadcrumb_tracer_allocate_command_list(struct vkd3d_breadcrumb_tracer *tracer,
struct d3d12_command_list *list, struct d3d12_command_allocator *allocator)
{
unsigned int index = UINT32_MAX;
unsigned int iteration_count;
int rc;
if ((rc = pthread_mutex_lock(&tracer->lock)))
{
ERR("Failed to lock mutex, rc %d.\n", rc);
return UINT32_MAX;
}
/* Since this is a ring, this is extremely likely to succeed on first attempt. */
for (iteration_count = 0; iteration_count < MAX_COMMAND_LISTS; iteration_count++)
{
tracer->trace_context_index = (tracer->trace_context_index + 1) % MAX_COMMAND_LISTS;
if (!tracer->trace_contexts[tracer->trace_context_index].locked)
{
tracer->trace_contexts[tracer->trace_context_index].locked = 1;
index = tracer->trace_context_index;
break;
}
}
pthread_mutex_unlock(&tracer->lock);
if (index == UINT32_MAX)
{
ERR("Failed to allocate new index for command list.\n");
return index;
}
TRACE("Allocating breadcrumb context %u for list %p.\n", index, list);
list->breadcrumb_context_index = index;
/* Need to clear this on a fresh allocation rather than release, since we can end up releasing a command list
* before we observe the device lost. */
tracer->trace_contexts[index].command_count = 0;
tracer->trace_contexts[index].counter = 0;
if (list->device->vk_info.AMD_buffer_marker)
memset(&tracer->mapped[index], 0, sizeof(tracer->mapped[index]));
vkd3d_array_reserve((void**)&allocator->breadcrumb_context_indices, &allocator->breadcrumb_context_index_size,
allocator->breadcrumb_context_index_count + 1,
sizeof(*allocator->breadcrumb_context_indices));
allocator->breadcrumb_context_indices[allocator->breadcrumb_context_index_count++] = index;
return index;
}
/* Command allocator keeps a list of allocated breadcrumb command lists. */
void vkd3d_breadcrumb_tracer_release_command_lists(struct vkd3d_breadcrumb_tracer *tracer,
const unsigned int *indices, size_t indices_count)
{
unsigned int index;
size_t i;
int rc;
if (!indices_count)
return;
if ((rc = pthread_mutex_lock(&tracer->lock)))
{
ERR("Failed to lock mutex, rc %d.\n", rc);
return;
}
for (i = 0; i < indices_count; i++)
{
index = indices[i];
if (index != UINT32_MAX)
tracer->trace_contexts[index].locked = 0;
TRACE("Releasing breadcrumb context %u.\n", index);
}
pthread_mutex_unlock(&tracer->lock);
}
static void vkd3d_breadcrumb_tracer_report_command_list(
const struct vkd3d_breadcrumb_command_list_trace_context *context,
uint32_t begin_marker,
uint32_t end_marker)
{
const struct vkd3d_breadcrumb_command *cmd;
bool observed_begin_cmd = false;
bool observed_end_cmd = false;
unsigned int i;
if (end_marker == 0)
{
ERR(" ===== Potential crash region BEGIN (make sure RADV_DEBUG=syncshaders is used for maximum accuracy) =====\n");
observed_begin_cmd = true;
}
/* We can assume that possible culprit commands lie between the end_marker
* and top_marker. */
for (i = 0; i < context->command_count; i++)
{
cmd = &context->commands[i];
/* If there is a command which sets TOP_OF_PIPE, but we haven't observed the marker yet,
* the command processor hasn't gotten there yet (most likely ...), so that should be the
* natural end-point. */
if (!observed_end_cmd &&
cmd->type == VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER &&
cmd->count > begin_marker)
{
observed_end_cmd = true;
ERR(" ===== Potential crash region END =====\n");
}
if (cmd->type == VKD3D_BREADCRUMB_COMMAND_AUX32)
{
ERR(" Set arg: %u (#%x)\n", cmd->word_32bit, cmd->word_32bit);
}
else if (cmd->type == VKD3D_BREADCRUMB_COMMAND_AUX64)
{
ERR(" Set arg: %"PRIu64" (#%"PRIx64")\n", cmd->word_64bit, cmd->word_64bit);
}
else if (cmd->type == VKD3D_BREADCRUMB_COMMAND_TAG)
{
ERR(" Tag: %s\n", cmd->tag);
}
else
{
ERR(" Command: %s\n", vkd3d_breadcrumb_command_type_to_str(cmd->type));
switch (cmd->type)
{
case VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER:
case VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER:
ERR(" marker: %u\n", cmd->count);
break;
case VKD3D_BREADCRUMB_COMMAND_SET_SHADER_HASH:
ERR(" hash: %016"PRIx64", stage: %x\n", cmd->shader.hash, cmd->shader.stage);
break;
default:
break;
}
}
/* We have proved we observed this command is complete.
* Some command after this signal is at fault. */
if (!observed_begin_cmd &&
cmd->type == VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER &&
cmd->count == end_marker)
{
observed_begin_cmd = true;
ERR(" ===== Potential crash region BEGIN (make sure RADV_DEBUG=syncshaders is used for maximum accuracy) =====\n");
}
}
}
static void vkd3d_breadcrumb_tracer_report_command_list_amd(struct vkd3d_breadcrumb_tracer *tracer,
unsigned int context_index)
{
const struct vkd3d_breadcrumb_command_list_trace_context *context;
uint32_t begin_marker;
uint32_t end_marker;
context = &tracer->trace_contexts[context_index];
/* Unused, cannot be the cause. */
if (context->counter == 0)
return;
begin_marker = tracer->mapped[context_index].begin_marker;
end_marker = tracer->mapped[context_index].end_marker;
/* Never executed, cannot be the cause. */
if (begin_marker == 0 && end_marker == 0)
return;
/* Successfully retired, cannot be the cause. */
if (begin_marker == UINT32_MAX && end_marker == UINT32_MAX)
return;
/* Edge case if we re-submitted a command list,
* but it ends up crashing before we hit any BOTTOM_OF_PIPE
* marker. Normalize the inputs such that end_marker <= begin_marker. */
if (begin_marker > 0 && end_marker == UINT32_MAX)
end_marker = 0;
ERR("Found pending command list context %u in executable state, TOP_OF_PIPE marker %u, BOTTOM_OF_PIPE marker %u.\n",
context_index, begin_marker, end_marker);
vkd3d_breadcrumb_tracer_report_command_list(context, begin_marker, end_marker);
ERR("Done analyzing command list.\n");
}
static void vkd3d_breadcrumb_tracer_report_queue_nv(struct vkd3d_breadcrumb_tracer *tracer,
struct d3d12_device *device,
VkQueue vk_queue)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
uint32_t begin_marker, end_marker;
uint32_t checkpoint_context_index;
VkCheckpointDataNV *checkpoints;
uint32_t checkpoint_marker;
uint32_t checkpoint_count;
uint32_t context_index;
uint32_t i;
VK_CALL(vkGetQueueCheckpointDataNV(vk_queue, &checkpoint_count, NULL));
if (checkpoint_count == 0)
return;
checkpoints = vkd3d_calloc(checkpoint_count, sizeof(VkCheckpointDataNV));
for (i = 0; i < checkpoint_count; i++)
checkpoints[i].sType = VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV;
VK_CALL(vkGetQueueCheckpointDataNV(vk_queue, &checkpoint_count, checkpoints));
context_index = UINT32_MAX;
begin_marker = 0;
end_marker = 0;
for (i = 0; i < checkpoint_count; i++)
{
checkpoint_context_index = NV_CHECKPOINT_CONTEXT(checkpoints[i].pCheckpointMarker);
checkpoint_marker = NV_CHECKPOINT_COUNTER(checkpoints[i].pCheckpointMarker);
if (context_index != checkpoint_context_index && context_index != UINT32_MAX)
{
FIXME("Markers have different contexts. Execution is likely split across multiple command buffers?\n");
context_index = UINT32_MAX;
break;
}
context_index = checkpoint_context_index;
if (checkpoints[i].stage == VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT && checkpoint_marker > begin_marker)
{
/* We want to find the latest TOP_OF_PIPE_BIT. Then we prove that command processor got to that point. */
begin_marker = checkpoint_marker;
}
else if (checkpoints[i].stage == VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT && checkpoint_marker > end_marker)
{
/* We want to find the latest BOTTOM_OF_PIPE_BIT. Then we prove that we got that far. */
end_marker = checkpoint_marker;
}
else if (checkpoints[i].stage != VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT &&
checkpoints[i].stage != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
{
FIXME("Unexpected checkpoint pipeline stage. #%x\n", checkpoints[i].stage);
context_index = UINT32_MAX;
break;
}
}
if (context_index != UINT32_MAX && begin_marker != 0 && end_marker != 0 && end_marker != UINT32_MAX)
{
ERR("Found pending command list context %u in executable state, TOP_OF_PIPE marker %u, BOTTOM_OF_PIPE marker %u.\n",
context_index, begin_marker, end_marker);
vkd3d_breadcrumb_tracer_report_command_list(&tracer->trace_contexts[context_index], begin_marker, end_marker);
ERR("Done analyzing command list.\n");
}
vkd3d_free(checkpoints);
}
void vkd3d_breadcrumb_tracer_report_device_lost(struct vkd3d_breadcrumb_tracer *tracer,
struct d3d12_device *device)
{
struct vkd3d_queue_family_info *queue_family_info;
VkQueue vk_queue;
unsigned int i;
ERR("Device lost observed, analyzing breadcrumbs ...\n");
if (device->vk_info.AMD_buffer_marker)
{
/* AMD path, buffer marker. */
for (i = 0; i < MAX_COMMAND_LISTS; i++)
vkd3d_breadcrumb_tracer_report_command_list_amd(tracer, i);
}
else if (device->vk_info.NV_device_diagnostic_checkpoints)
{
/* vkGetQueueCheckpointDataNV does not require us to synchronize access to the queue. */
queue_family_info = d3d12_device_get_vkd3d_queue_family(device, D3D12_COMMAND_LIST_TYPE_DIRECT);
for (i = 0; i < queue_family_info->queue_count; i++)
{
vk_queue = queue_family_info->queues[i]->vk_queue;
vkd3d_breadcrumb_tracer_report_queue_nv(tracer, device, vk_queue);
}
queue_family_info = d3d12_device_get_vkd3d_queue_family(device, D3D12_COMMAND_LIST_TYPE_COMPUTE);
for (i = 0; i < queue_family_info->queue_count; i++)
{
vk_queue = queue_family_info->queues[i]->vk_queue;
vkd3d_breadcrumb_tracer_report_queue_nv(tracer, device, vk_queue);
}
queue_family_info = d3d12_device_get_vkd3d_queue_family(device, D3D12_COMMAND_LIST_TYPE_COPY);
for (i = 0; i < queue_family_info->queue_count; i++)
{
vk_queue = queue_family_info->queues[i]->vk_queue;
vkd3d_breadcrumb_tracer_report_queue_nv(tracer, device, vk_queue);
}
}
ERR("Done analyzing breadcrumbs ...\n");
}
void vkd3d_breadcrumb_tracer_begin_command_list(struct d3d12_command_list *list)
{
struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
struct vkd3d_breadcrumb_command_list_trace_context *trace;
unsigned int context = list->breadcrumb_context_index;
struct vkd3d_breadcrumb_command cmd;
if (context == UINT32_MAX)
return;
trace = &breadcrumb_tracer->trace_contexts[context];
trace->counter++;
cmd.count = trace->counter;
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
if (list->device->vk_info.AMD_buffer_marker)
{
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
breadcrumb_tracer->host_buffer,
context * sizeof(struct vkd3d_breadcrumb_counter) +
offsetof(struct vkd3d_breadcrumb_counter, begin_marker),
trace->counter));
}
else if (list->device->vk_info.NV_device_diagnostic_checkpoints)
{
/* A checkpoint is implicitly a top and bottom marker. */
cmd.count = trace->counter;
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
VK_CALL(vkCmdSetCheckpointNV(list->vk_command_buffer, NV_ENCODE_CHECKPOINT(context, trace->counter)));
}
}
void vkd3d_breadcrumb_tracer_add_command(struct d3d12_command_list *list,
const struct vkd3d_breadcrumb_command *command)
{
struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
struct vkd3d_breadcrumb_command_list_trace_context *trace;
unsigned int context = list->breadcrumb_context_index;
if (context == UINT32_MAX)
return;
trace = &breadcrumb_tracer->trace_contexts[context];
TRACE("Adding command (%s) to context %u.\n",
vkd3d_breadcrumb_command_type_to_str(command->type), context);
vkd3d_array_reserve((void**)&trace->commands, &trace->command_size,
trace->command_count + 1, sizeof(*trace->commands));
trace->commands[trace->command_count++] = *command;
}
void vkd3d_breadcrumb_tracer_signal(struct d3d12_command_list *list)
{
struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
struct vkd3d_breadcrumb_command_list_trace_context *trace;
unsigned int context = list->breadcrumb_context_index;
struct vkd3d_breadcrumb_command cmd;
if (context == UINT32_MAX)
return;
trace = &breadcrumb_tracer->trace_contexts[context];
if (list->device->vk_info.AMD_buffer_marker)
{
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
cmd.count = trace->counter;
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
TRACE("Breadcrumb signal bottom-of-pipe context %u -> %u\n", context, cmd.count);
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
breadcrumb_tracer->host_buffer,
context * sizeof(struct vkd3d_breadcrumb_counter) +
offsetof(struct vkd3d_breadcrumb_counter, end_marker),
trace->counter));
trace->counter++;
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
cmd.count = trace->counter;
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
TRACE("Breadcrumb signal top-of-pipe context %u -> %u\n", context, cmd.count);
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
breadcrumb_tracer->host_buffer,
context * sizeof(struct vkd3d_breadcrumb_counter) +
offsetof(struct vkd3d_breadcrumb_counter, begin_marker),
trace->counter));
}
else if (list->device->vk_info.NV_device_diagnostic_checkpoints)
{
trace->counter++;
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
cmd.count = trace->counter;
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
TRACE("Breadcrumb signal top-of-pipe context %u -> %u\n", context, cmd.count);
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
cmd.count = trace->counter;
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
TRACE("Breadcrumb signal bottom-of-pipe context %u -> %u\n", context, cmd.count);
VK_CALL(vkCmdSetCheckpointNV(list->vk_command_buffer, NV_ENCODE_CHECKPOINT(context, trace->counter)));
}
}
void vkd3d_breadcrumb_tracer_end_command_list(struct d3d12_command_list *list)
{
struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
struct vkd3d_breadcrumb_command_list_trace_context *trace;
unsigned int context = list->breadcrumb_context_index;
struct vkd3d_breadcrumb_command cmd;
if (context == UINT32_MAX)
return;
trace = &breadcrumb_tracer->trace_contexts[context];
trace->counter = UINT32_MAX;
if (list->device->vk_info.AMD_buffer_marker)
{
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
breadcrumb_tracer->host_buffer,
context * sizeof(struct vkd3d_breadcrumb_counter) +
offsetof(struct vkd3d_breadcrumb_counter, begin_marker),
trace->counter));
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
breadcrumb_tracer->host_buffer,
context * sizeof(struct vkd3d_breadcrumb_counter) +
offsetof(struct vkd3d_breadcrumb_counter, end_marker),
trace->counter));
}
else if (list->device->vk_info.NV_device_diagnostic_checkpoints)
{
VK_CALL(vkCmdSetCheckpointNV(list->vk_command_buffer, NV_ENCODE_CHECKPOINT(context, trace->counter)));
}
cmd.count = trace->counter;
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
}

View File

@ -261,13 +261,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_bundle_QueryInterface(d3d12_command_list_
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3)
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList4)
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList5)
|| IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList6)
|| IsEqualGUID(iid, &IID_ID3D12CommandList)
|| IsEqualGUID(iid, &IID_ID3D12DeviceChild)
|| IsEqualGUID(iid, &IID_ID3D12Object)
|| IsEqualGUID(iid, &IID_IUnknown))
{
ID3D12GraphicsCommandList6_AddRef(iface);
ID3D12GraphicsCommandList5_AddRef(iface);
*object = iface;
return S_OK;
}
@ -430,7 +429,7 @@ static void d3d12_bundle_exec_draw_instanced(d3d12_command_list_iface *list, con
{
const struct d3d12_draw_instanced_command *args = args_v;
ID3D12GraphicsCommandList6_DrawInstanced(list, args->vertex_count,
ID3D12GraphicsCommandList5_DrawInstanced(list, args->vertex_count,
args->instance_count, args->first_vertex, args->first_instance);
}
@ -467,7 +466,7 @@ static void d3d12_bundle_exec_draw_indexed_instanced(d3d12_command_list_iface *l
{
const struct d3d12_draw_indexed_instanced_command *args = args_v;
ID3D12GraphicsCommandList6_DrawIndexedInstanced(list, args->index_count,
ID3D12GraphicsCommandList5_DrawIndexedInstanced(list, args->index_count,
args->instance_count, args->first_index, args->vertex_offset,
args->first_instance);
}
@ -502,7 +501,7 @@ static void d3d12_bundle_exec_dispatch(d3d12_command_list_iface *list, const voi
{
const struct d3d12_dispatch_command *args = args_v;
ID3D12GraphicsCommandList6_Dispatch(list, args->x, args->y, args->z);
ID3D12GraphicsCommandList5_Dispatch(list, args->x, args->y, args->z);
}
static void STDMETHODCALLTYPE d3d12_bundle_Dispatch(d3d12_command_list_iface *iface,
@ -570,7 +569,7 @@ static void d3d12_bundle_exec_ia_set_primitive_topology(d3d12_command_list_iface
{
const struct d3d12_ia_set_primitive_topology_command *args = args_v;
ID3D12GraphicsCommandList6_IASetPrimitiveTopology(list, args->topology);
ID3D12GraphicsCommandList5_IASetPrimitiveTopology(list, args->topology);
}
static void STDMETHODCALLTYPE d3d12_bundle_IASetPrimitiveTopology(d3d12_command_list_iface *iface,
@ -607,7 +606,7 @@ static void d3d12_bundle_exec_om_set_blend_factor(d3d12_command_list_iface *list
{
const struct d3d12_om_set_blend_factor_command *args = args_v;
ID3D12GraphicsCommandList6_OMSetBlendFactor(list, args->blend_factor);
ID3D12GraphicsCommandList5_OMSetBlendFactor(list, args->blend_factor);
}
static void STDMETHODCALLTYPE d3d12_bundle_OMSetBlendFactor(d3d12_command_list_iface *iface,
@ -635,7 +634,7 @@ static void d3d12_bundle_exec_om_set_stencil_ref(d3d12_command_list_iface *list,
{
const struct d3d12_om_set_stencil_ref_command *args = args_v;
ID3D12GraphicsCommandList6_OMSetStencilRef(list, args->stencil_ref);
ID3D12GraphicsCommandList5_OMSetStencilRef(list, args->stencil_ref);
}
static void STDMETHODCALLTYPE d3d12_bundle_OMSetStencilRef(d3d12_command_list_iface *iface,
@ -660,7 +659,7 @@ static void d3d12_bundle_exec_set_pipeline_state(d3d12_command_list_iface *list,
{
const struct d3d12_set_pipeline_state_command *args = args_v;
ID3D12GraphicsCommandList6_SetPipelineState(list, args->pipeline_state);
ID3D12GraphicsCommandList5_SetPipelineState(list, args->pipeline_state);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetPipelineState(d3d12_command_list_iface *iface,
@ -704,7 +703,7 @@ static void d3d12_bundle_exec_set_compute_root_signature(d3d12_command_list_ifac
{
const struct d3d12_set_root_signature_command *args = args_v;
ID3D12GraphicsCommandList6_SetComputeRootSignature(list, args->root_signature);
ID3D12GraphicsCommandList5_SetComputeRootSignature(list, args->root_signature);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetComputeRootSignature(d3d12_command_list_iface *iface,
@ -723,7 +722,7 @@ static void d3d12_bundle_exec_set_graphics_root_signature(d3d12_command_list_ifa
{
const struct d3d12_set_root_signature_command *args = args_v;
ID3D12GraphicsCommandList6_SetGraphicsRootSignature(list, args->root_signature);
ID3D12GraphicsCommandList5_SetGraphicsRootSignature(list, args->root_signature);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetGraphicsRootSignature(d3d12_command_list_iface *iface,
@ -749,7 +748,7 @@ static void d3d12_bundle_exec_set_compute_root_descriptor_table(d3d12_command_li
{
const struct d3d12_set_root_descriptor_table_command *args = args_v;
ID3D12GraphicsCommandList6_SetComputeRootDescriptorTable(list, args->parameter_index, args->base_descriptor);
ID3D12GraphicsCommandList5_SetComputeRootDescriptorTable(list, args->parameter_index, args->base_descriptor);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetComputeRootDescriptorTable(d3d12_command_list_iface *iface,
@ -770,7 +769,7 @@ static void d3d12_bundle_exec_set_graphics_root_descriptor_table(d3d12_command_l
{
const struct d3d12_set_root_descriptor_table_command *args = args_v;
ID3D12GraphicsCommandList6_SetGraphicsRootDescriptorTable(list, args->parameter_index, args->base_descriptor);
ID3D12GraphicsCommandList5_SetGraphicsRootDescriptorTable(list, args->parameter_index, args->base_descriptor);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetGraphicsRootDescriptorTable(d3d12_command_list_iface *iface,
@ -799,7 +798,7 @@ static void d3d12_bundle_exec_set_compute_root_32bit_constant(d3d12_command_list
{
const struct d3d12_set_root_32bit_constant_command *args = args_v;
ID3D12GraphicsCommandList6_SetComputeRoot32BitConstant(list, args->parameter_index, args->data, args->offset);
ID3D12GraphicsCommandList5_SetComputeRoot32BitConstant(list, args->parameter_index, args->data, args->offset);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetComputeRoot32BitConstant(d3d12_command_list_iface *iface,
@ -821,7 +820,7 @@ static void d3d12_bundle_exec_set_graphics_root_32bit_constant(d3d12_command_lis
{
const struct d3d12_set_root_32bit_constant_command *args = args_v;
ID3D12GraphicsCommandList6_SetGraphicsRoot32BitConstant(list, args->parameter_index, args->data, args->offset);
ID3D12GraphicsCommandList5_SetGraphicsRoot32BitConstant(list, args->parameter_index, args->data, args->offset);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetGraphicsRoot32BitConstant(d3d12_command_list_iface *iface,
@ -852,7 +851,7 @@ static void d3d12_bundle_exec_set_compute_root_32bit_constants(d3d12_command_lis
{
const struct d3d12_set_root_32bit_constants_command *args = args_v;
ID3D12GraphicsCommandList6_SetComputeRoot32BitConstants(list, args->parameter_index,
ID3D12GraphicsCommandList5_SetComputeRoot32BitConstants(list, args->parameter_index,
args->constant_count, args->data, args->offset);
}
@ -880,7 +879,7 @@ static void d3d12_bundle_exec_set_graphics_root_32bit_constants(d3d12_command_li
{
const struct d3d12_set_root_32bit_constants_command *args = args_v;
ID3D12GraphicsCommandList6_SetGraphicsRoot32BitConstants(list, args->parameter_index,
ID3D12GraphicsCommandList5_SetGraphicsRoot32BitConstants(list, args->parameter_index,
args->constant_count, args->data, args->offset);
}
@ -915,7 +914,7 @@ static void d3d12_bundle_exec_set_compute_root_cbv(d3d12_command_list_iface *lis
{
const struct d3d12_set_root_descriptor_command *args = args_v;
ID3D12GraphicsCommandList6_SetComputeRootConstantBufferView(list, args->parameter_index, args->address);
ID3D12GraphicsCommandList5_SetComputeRootConstantBufferView(list, args->parameter_index, args->address);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetComputeRootConstantBufferView(
@ -936,7 +935,7 @@ static void d3d12_bundle_exec_set_graphics_root_cbv(d3d12_command_list_iface *li
{
const struct d3d12_set_root_descriptor_command *args = args_v;
ID3D12GraphicsCommandList6_SetGraphicsRootConstantBufferView(list, args->parameter_index, args->address);
ID3D12GraphicsCommandList5_SetGraphicsRootConstantBufferView(list, args->parameter_index, args->address);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetGraphicsRootConstantBufferView(
@ -957,7 +956,7 @@ static void d3d12_bundle_exec_set_compute_root_srv(d3d12_command_list_iface *lis
{
const struct d3d12_set_root_descriptor_command *args = args_v;
ID3D12GraphicsCommandList6_SetComputeRootShaderResourceView(list, args->parameter_index, args->address);
ID3D12GraphicsCommandList5_SetComputeRootShaderResourceView(list, args->parameter_index, args->address);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetComputeRootShaderResourceView(
@ -978,7 +977,7 @@ static void d3d12_bundle_exec_set_graphics_root_srv(d3d12_command_list_iface *li
{
const struct d3d12_set_root_descriptor_command *args = args_v;
ID3D12GraphicsCommandList6_SetGraphicsRootShaderResourceView(list, args->parameter_index, args->address);
ID3D12GraphicsCommandList5_SetGraphicsRootShaderResourceView(list, args->parameter_index, args->address);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetGraphicsRootShaderResourceView(
@ -999,7 +998,7 @@ static void d3d12_bundle_exec_set_compute_root_uav(d3d12_command_list_iface *lis
{
const struct d3d12_set_root_descriptor_command *args = args_v;
ID3D12GraphicsCommandList6_SetComputeRootUnorderedAccessView(list, args->parameter_index, args->address);
ID3D12GraphicsCommandList5_SetComputeRootUnorderedAccessView(list, args->parameter_index, args->address);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetComputeRootUnorderedAccessView(
@ -1020,7 +1019,7 @@ static void d3d12_bundle_exec_set_graphics_root_uav(d3d12_command_list_iface *li
{
const struct d3d12_set_root_descriptor_command *args = args_v;
ID3D12GraphicsCommandList6_SetGraphicsRootUnorderedAccessView(list, args->parameter_index, args->address);
ID3D12GraphicsCommandList5_SetGraphicsRootUnorderedAccessView(list, args->parameter_index, args->address);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetGraphicsRootUnorderedAccessView(
@ -1045,14 +1044,14 @@ struct d3d12_ia_set_index_buffer_command
static void d3d12_bundle_exec_ia_set_index_buffer_null(d3d12_command_list_iface *list, const void *args_v)
{
ID3D12GraphicsCommandList6_IASetIndexBuffer(list, NULL);
ID3D12GraphicsCommandList5_IASetIndexBuffer(list, NULL);
}
static void d3d12_bundle_exec_ia_set_index_buffer(d3d12_command_list_iface *list, const void *args_v)
{
const struct d3d12_ia_set_index_buffer_command *args = args_v;
ID3D12GraphicsCommandList6_IASetIndexBuffer(list, &args->view);
ID3D12GraphicsCommandList5_IASetIndexBuffer(list, &args->view);
}
static void STDMETHODCALLTYPE d3d12_bundle_IASetIndexBuffer(d3d12_command_list_iface *iface,
@ -1202,7 +1201,7 @@ static void d3d12_bundle_exec_set_marker(d3d12_command_list_iface *list, const v
{
const struct d3d12_debug_marker_command *args = args_v;
ID3D12GraphicsCommandList6_SetMarker(list, args->metadata, args->data, args->data_size);
ID3D12GraphicsCommandList5_SetMarker(list, args->metadata, args->data, args->data_size);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetMarker(d3d12_command_list_iface *iface,
@ -1223,7 +1222,7 @@ static void d3d12_bundle_exec_begin_event(d3d12_command_list_iface *list, const
{
const struct d3d12_debug_marker_command *args = args_v;
ID3D12GraphicsCommandList6_BeginEvent(list, args->metadata, args->data, args->data_size);
ID3D12GraphicsCommandList5_BeginEvent(list, args->metadata, args->data, args->data_size);
}
static void STDMETHODCALLTYPE d3d12_bundle_BeginEvent(d3d12_command_list_iface *iface,
@ -1242,7 +1241,7 @@ static void STDMETHODCALLTYPE d3d12_bundle_BeginEvent(d3d12_command_list_iface *
static void d3d12_bundle_exec_end_event(d3d12_command_list_iface *list, const void *args_v)
{
ID3D12GraphicsCommandList6_EndEvent(list);
ID3D12GraphicsCommandList5_EndEvent(list);
}
static void STDMETHODCALLTYPE d3d12_bundle_EndEvent(d3d12_command_list_iface *iface)
@ -1269,7 +1268,7 @@ static void d3d12_bundle_exec_execute_indirect(d3d12_command_list_iface *list, c
{
const struct d3d12_execute_indirect_command *args = args_v;
ID3D12GraphicsCommandList6_ExecuteIndirect(list, args->signature, args->max_count,
ID3D12GraphicsCommandList5_ExecuteIndirect(list, args->signature, args->max_count,
args->arg_buffer, args->arg_offset, args->count_buffer, args->count_offset);
}
@ -1331,7 +1330,7 @@ static void d3d12_bundle_exec_om_set_depth_bounds(d3d12_command_list_iface *list
{
const struct d3d12_om_set_depth_bounds_command *args = args_v;
ID3D12GraphicsCommandList6_OMSetDepthBounds(list, args->min, args->max);
ID3D12GraphicsCommandList5_OMSetDepthBounds(list, args->min, args->max);
}
static void STDMETHODCALLTYPE d3d12_bundle_OMSetDepthBounds(d3d12_command_list_iface *iface,
@ -1360,7 +1359,7 @@ static void d3d12_bundle_exec_set_sample_positions(d3d12_command_list_iface *lis
const struct d3d12_set_sample_positions_command *args = args_v;
/* The sample position array is non-const but does not get written to */
ID3D12GraphicsCommandList6_SetSamplePositions(list, args->sample_count,
ID3D12GraphicsCommandList5_SetSamplePositions(list, args->sample_count,
args->pixel_count, (D3D12_SAMPLE_POSITION*)args->positions);
}
@ -1403,7 +1402,7 @@ static void d3d12_bundle_exec_set_view_instance_mask(d3d12_command_list_iface *l
{
const struct d3d12_set_view_instance_mask_command *args = args_v;
ID3D12GraphicsCommandList6_SetViewInstanceMask(list, args->mask);
ID3D12GraphicsCommandList5_SetViewInstanceMask(list, args->mask);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetViewInstanceMask(d3d12_command_list_iface *iface, UINT mask)
@ -1429,7 +1428,7 @@ static void d3d12_bundle_exec_write_buffer_immediate(d3d12_command_list_iface *l
{
const struct d3d12_write_buffer_immediate_command *args = args_v;
ID3D12GraphicsCommandList6_WriteBufferImmediate(list, args->count, args->parameters, args->modes);
ID3D12GraphicsCommandList5_WriteBufferImmediate(list, args->count, args->parameters, args->modes);
}
static void STDMETHODCALLTYPE d3d12_bundle_WriteBufferImmediate(d3d12_command_list_iface *iface,
@ -1525,7 +1524,7 @@ static void d3d12_bundle_exec_set_pipeline_state1(d3d12_command_list_iface *list
{
const struct d3d12_set_pipeline_state1_command *args = args_v;
ID3D12GraphicsCommandList6_SetPipelineState1(list, args->state_object);
ID3D12GraphicsCommandList5_SetPipelineState1(list, args->state_object);
}
static void STDMETHODCALLTYPE d3d12_bundle_SetPipelineState1(d3d12_command_list_iface *iface,
@ -1550,7 +1549,7 @@ static void d3d12_bundle_exec_dispatch_rays(d3d12_command_list_iface *list, cons
{
const struct d3d12_dispatch_rays_command *args = args_v;
ID3D12GraphicsCommandList6_DispatchRays(list, &args->desc);
ID3D12GraphicsCommandList5_DispatchRays(list, &args->desc);
}
static void STDMETHODCALLTYPE d3d12_bundle_DispatchRays(d3d12_command_list_iface *iface,
@ -1576,14 +1575,14 @@ static void d3d12_bundle_exec_rs_set_shading_rate(d3d12_command_list_iface *list
{
const struct d3d12_rs_set_shading_rate_command *args = args_v;
ID3D12GraphicsCommandList6_RSSetShadingRate(list, args->base, args->combiners);
ID3D12GraphicsCommandList5_RSSetShadingRate(list, args->base, args->combiners);
}
static void d3d12_bundle_exec_rs_set_shading_rate_base(d3d12_command_list_iface *list, const void *args_v)
{
const struct d3d12_rs_set_shading_rate_command *args = args_v;
ID3D12GraphicsCommandList6_RSSetShadingRate(list, args->base, NULL);
ID3D12GraphicsCommandList5_RSSetShadingRate(list, args->base, NULL);
}
static void STDMETHODCALLTYPE d3d12_bundle_RSSetShadingRate(d3d12_command_list_iface *iface,
@ -1613,7 +1612,7 @@ static void d3d12_bundle_exec_rs_set_shading_rate_image(d3d12_command_list_iface
{
const struct d3d12_rs_set_shading_rate_image_command *args = args_v;
ID3D12GraphicsCommandList6_RSSetShadingRateImage(list, args->image);
ID3D12GraphicsCommandList5_RSSetShadingRateImage(list, args->image);
}
static void STDMETHODCALLTYPE d3d12_bundle_RSSetShadingRateImage(d3d12_command_list_iface *iface,
@ -1628,27 +1627,7 @@ static void STDMETHODCALLTYPE d3d12_bundle_RSSetShadingRateImage(d3d12_command_l
args->image = image;
}
static void d3d12_bundle_exec_dispatch_mesh(d3d12_command_list_iface *list, const void *args_v)
{
const struct d3d12_dispatch_command *args = args_v;
ID3D12GraphicsCommandList6_DispatchMesh(list, args->x, args->y, args->z);
}
static void STDMETHODCALLTYPE d3d12_bundle_DispatchMesh(d3d12_command_list_iface *iface, UINT x, UINT y, UINT z)
{
struct d3d12_bundle *bundle = impl_from_ID3D12GraphicsCommandList(iface);
struct d3d12_dispatch_command *args;
TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z);
args = d3d12_bundle_add_command(bundle, &d3d12_bundle_exec_dispatch_mesh, sizeof(*args));
args->x = x;
args->y = y;
args->z = z;
}
static CONST_VTBL struct ID3D12GraphicsCommandList6Vtbl d3d12_bundle_vtbl =
static CONST_VTBL struct ID3D12GraphicsCommandList5Vtbl d3d12_bundle_vtbl =
{
/* IUnknown methods */
d3d12_bundle_QueryInterface,
@ -1739,8 +1718,6 @@ static CONST_VTBL struct ID3D12GraphicsCommandList6Vtbl d3d12_bundle_vtbl =
/* ID3D12GraphicsCommandList5 methods */
d3d12_bundle_RSSetShadingRate,
d3d12_bundle_RSSetShadingRateImage,
/* ID3D12GraphicsCommandList6 methods */
d3d12_bundle_DispatchMesh,
};
HRESULT d3d12_bundle_create(struct d3d12_device *device,

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -459,12 +459,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage_profiled(
COMMAND_LIST_PROFILED_CALL(RSSetShadingRateImage, iface, image);
}
static void STDMETHODCALLTYPE d3d12_command_list_DispatchMesh_profiled(d3d12_command_list_iface *iface, UINT x, UINT y, UINT z)
{
COMMAND_LIST_PROFILED_CALL(DispatchMesh, iface, x, y, z);
}
static CONST_VTBL struct ID3D12GraphicsCommandList6Vtbl d3d12_command_list_vtbl_profiled =
static CONST_VTBL struct ID3D12GraphicsCommandList5Vtbl d3d12_command_list_vtbl_profiled =
{
/* IUnknown methods */
d3d12_command_list_QueryInterface,
@ -555,8 +550,6 @@ static CONST_VTBL struct ID3D12GraphicsCommandList6Vtbl d3d12_command_list_vtbl_
/* ID3D12GraphicsCommandList5 methods */
d3d12_command_list_RSSetShadingRate_profiled,
d3d12_command_list_RSSetShadingRateImage_profiled,
/* ID3D12GraphicsCommandList6 methods */
d3d12_command_list_DispatchMesh_profiled,
};
#endif

View File

@ -1,116 +0,0 @@
/*
* * Copyright 2021 NVIDIA Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "vkd3d_private.h"
static inline struct d3d12_command_list *d3d12_command_list_from_ID3D12GraphicsCommandListExt(ID3D12GraphicsCommandListExt *iface)
{
return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandListExt_iface);
}
extern ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(d3d12_command_list_iface *iface);
ULONG STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_AddRef(ID3D12GraphicsCommandListExt *iface)
{
struct d3d12_command_list *command_list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
return d3d12_command_list_AddRef(&command_list->ID3D12GraphicsCommandList_iface);
}
extern ULONG STDMETHODCALLTYPE d3d12_command_list_Release(d3d12_command_list_iface *iface);
static ULONG STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_Release(ID3D12GraphicsCommandListExt *iface)
{
struct d3d12_command_list *command_list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
return d3d12_command_list_Release(&command_list->ID3D12GraphicsCommandList_iface);
}
extern HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(d3d12_command_list_iface *iface,
REFIID iid, void **object);
static HRESULT STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_QueryInterface(ID3D12GraphicsCommandListExt *iface,
REFIID iid, void **out)
{
struct d3d12_command_list *command_list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
return d3d12_command_list_QueryInterface(&command_list->ID3D12GraphicsCommandList_iface, iid, out);
}
static HRESULT STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_GetVulkanHandle(ID3D12GraphicsCommandListExt *iface,
VkCommandBuffer *pVkCommandBuffer)
{
struct d3d12_command_list *command_list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
TRACE("iface %p, pVkCommandBuffer %p.\n", iface, pVkCommandBuffer);
if (!pVkCommandBuffer)
return E_INVALIDARG;
*pVkCommandBuffer = command_list->vk_command_buffer;
return S_OK;
}
#define CU_LAUNCH_PARAM_BUFFER_POINTER (const void*)0x01
#define CU_LAUNCH_PARAM_BUFFER_SIZE (const void*)0x02
#define CU_LAUNCH_PARAM_END (const void*)0x00
static HRESULT STDMETHODCALLTYPE d3d12_command_list_vkd3d_ext_LaunchCubinShader(ID3D12GraphicsCommandListExt *iface, D3D12_CUBIN_DATA_HANDLE *handle, UINT32 block_x, UINT32 block_y, UINT32 block_z, const void *params, UINT32 param_size)
{
VkCuLaunchInfoNVX launchInfo = { VK_STRUCTURE_TYPE_CU_LAUNCH_INFO_NVX };
const struct vkd3d_vk_device_procs *vk_procs;
const void *config[] = {
CU_LAUNCH_PARAM_BUFFER_POINTER, params,
CU_LAUNCH_PARAM_BUFFER_SIZE, &param_size,
CU_LAUNCH_PARAM_END
};
struct d3d12_command_list *command_list = d3d12_command_list_from_ID3D12GraphicsCommandListExt(iface);
TRACE("iface %p, handle %p, block_x %u, block_y %u, block_z %u, params %p, param_size %u \n", iface, handle, block_x, block_y, block_z, params, param_size);
if (!handle || !block_x || !block_y || !block_z || !params || !param_size)
return E_INVALIDARG;
launchInfo.function = handle->vkCuFunction;
launchInfo.gridDimX = block_x;
launchInfo.gridDimY = block_y;
launchInfo.gridDimZ = block_z;
launchInfo.blockDimX = handle->blockX;
launchInfo.blockDimY = handle->blockY;
launchInfo.blockDimZ = handle->blockZ;
launchInfo.sharedMemBytes = 0;
launchInfo.paramCount = 0;
launchInfo.pParams = NULL;
launchInfo.extraCount = 1;
launchInfo.pExtras = config;
vk_procs = &command_list->device->vk_procs;
VK_CALL(vkCmdCuLaunchKernelNVX(command_list->vk_command_buffer, &launchInfo));
return S_OK;
}
CONST_VTBL struct ID3D12GraphicsCommandListExtVtbl d3d12_command_list_vkd3d_ext_vtbl =
{
/* IUnknown methods */
d3d12_command_list_vkd3d_ext_QueryInterface,
d3d12_command_list_vkd3d_ext_AddRef,
d3d12_command_list_vkd3d_ext_Release,
/* ID3D12GraphicsCommandListExt methods */
d3d12_command_list_vkd3d_ext_GetVulkanHandle,
d3d12_command_list_vkd3d_ext_LaunchCubinShader
};

View File

@ -21,7 +21,6 @@
#include "vkd3d_private.h"
#include "vkd3d_debug.h"
#include "vkd3d_common.h"
#include "vkd3d_platform.h"
#include <stdio.h>
void vkd3d_shader_debug_ring_init_spec_constant(struct d3d12_device *device,
@ -54,199 +53,22 @@ void vkd3d_shader_debug_ring_init_spec_constant(struct d3d12_device *device,
info->map_entries[3].size = sizeof(uint32_t);
}
#define READ_RING_WORD(off) ring->mapped_ring[(off) & ((ring->ring_size / sizeof(uint32_t)) - 1)]
#define READ_RING_WORD_ACQUIRE(off) \
vkd3d_atomic_uint32_load_explicit(&ring->mapped_ring[(off) & ((ring->ring_size / sizeof(uint32_t)) - 1)], \
vkd3d_memory_order_acquire)
#define DEBUG_CHANNEL_WORD_COOKIE 0xdeadca70u
#define DEBUG_CHANNEL_WORD_MASK 0xfffffff0u
static const char *vkd3d_patch_command_token_str(enum vkd3d_patch_command_token token)
{
switch (token)
{
case VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32: return "RootConst";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO: return "IBO VA LO";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI: return "IBO VA HI";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_SIZE: return "IBO Size";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_FORMAT: return "IBO Type";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO: return "VBO VA LO";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI: return "VBO VA HI";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_SIZE: return "VBO Size";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_STRIDE: return "VBO Stride";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO: return "ROOT VA LO";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI: return "ROOT VA HI";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT: return "Vertex Count";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT: return "Index Count";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT: return "Instance Count";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX: return "First Index";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX: return "First Vertex";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE: return "First Instance";
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET: return "Vertex Offset";
default: return "???";
}
}
static bool vkd3d_patch_command_token_is_hex(enum vkd3d_patch_command_token token)
{
switch (token)
{
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO:
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI:
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO:
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI:
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO:
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI:
return true;
default:
return false;
}
}
static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring *ring,
uint32_t word_offset, uint32_t message_word_count)
{
uint32_t i, debug_instance, debug_thread_id[3], fmt;
char message_buffer[4096];
uint64_t shader_hash;
size_t len, avail;
if (message_word_count < 8)
{
ERR("Message word count %u is invalid.\n", message_word_count);
return false;
}
shader_hash = (uint64_t)READ_RING_WORD(word_offset + 1) | ((uint64_t)READ_RING_WORD(word_offset + 2) << 32);
debug_instance = READ_RING_WORD(word_offset + 3);
for (i = 0; i < 3; i++)
debug_thread_id[i] = READ_RING_WORD(word_offset + 4 + i);
fmt = READ_RING_WORD(word_offset + 7);
word_offset += 8;
message_word_count -= 8;
if (shader_hash == 0)
{
/* We got this from our internal debug shaders. Pretty-print.
* Make sure the log is sortable for easier debug.
* TODO: Might consider a callback system that listeners from different subsystems can listen to and print their own messages,
* but that is overengineering at this time ... */
snprintf(message_buffer, sizeof(message_buffer), "ExecuteIndirect: GlobalCommandIndex %010u, Debug tag %010u, DrawID %04u (ThreadID %04u): ",
debug_instance, debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
if (message_word_count == 2)
{
len = strlen(message_buffer);
avail = sizeof(message_buffer) - len;
snprintf(message_buffer + len, avail, "DrawCount %u, MaxDrawCount %u",
READ_RING_WORD(word_offset + 0),
READ_RING_WORD(word_offset + 1));
}
else if (message_word_count == 4)
{
union { uint32_t u32; float f32; int32_t s32; } value;
enum vkd3d_patch_command_token token;
uint32_t dst_offset;
uint32_t src_offset;
len = strlen(message_buffer);
avail = sizeof(message_buffer) - len;
token = READ_RING_WORD(word_offset + 0);
dst_offset = READ_RING_WORD(word_offset + 1);
src_offset = READ_RING_WORD(word_offset + 2);
value.u32 = READ_RING_WORD(word_offset + 3);
if (vkd3d_patch_command_token_is_hex(token))
{
snprintf(message_buffer + len, avail, "%s <- #%08x",
vkd3d_patch_command_token_str(token), value.u32);
}
else if (token == VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32)
{
snprintf(message_buffer + len, avail, "%s <- {hex #%08x, s32 %d, f32 %f}",
vkd3d_patch_command_token_str(token), value.u32, value.s32, value.f32);
}
else
{
snprintf(message_buffer + len, avail, "%s <- %d",
vkd3d_patch_command_token_str(token), value.s32);
}
len = strlen(message_buffer);
avail = sizeof(message_buffer) - len;
snprintf(message_buffer + len, avail, " (dst offset %u, src offset %u)", dst_offset, src_offset);
}
}
else
{
snprintf(message_buffer, sizeof(message_buffer), "Shader: %"PRIx64": Instance %010u, ID (%u, %u, %u):",
shader_hash, debug_instance,
debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
for (i = 0; i < message_word_count; i++)
{
union
{
float f32;
uint32_t u32;
int32_t i32;
} u;
const char *delim;
u.u32 = READ_RING_WORD(word_offset + i);
len = strlen(message_buffer);
if (len + 1 >= sizeof(message_buffer))
break;
avail = sizeof(message_buffer) - len;
delim = i == 0 ? " " : ", ";
#define VKD3D_DEBUG_CHANNEL_FMT_HEX 0u
#define VKD3D_DEBUG_CHANNEL_FMT_I32 1u
#define VKD3D_DEBUG_CHANNEL_FMT_F32 2u
switch ((fmt >> (2u * i)) & 3u)
{
case VKD3D_DEBUG_CHANNEL_FMT_HEX:
snprintf(message_buffer + len, avail, "%s#%x", delim, u.u32);
break;
case VKD3D_DEBUG_CHANNEL_FMT_I32:
snprintf(message_buffer + len, avail, "%s%d", delim, u.i32);
break;
case VKD3D_DEBUG_CHANNEL_FMT_F32:
snprintf(message_buffer + len, avail, "%s%f", delim, u.f32);
break;
default:
snprintf(message_buffer + len, avail, "%s????", delim);
break;
}
}
}
INFO("%s\n", message_buffer);
return true;
}
void *vkd3d_shader_debug_ring_thread_main(void *arg)
{
uint32_t last_counter, new_counter, count, i, cookie_word_count;
volatile const uint32_t *ring_counter; /* Atomic updated by the GPU. */
uint32_t last_counter, new_counter, count, i, j, message_word_count, debug_instance, debug_thread_id[3], fmt;
struct vkd3d_shader_debug_ring *ring;
struct d3d12_device *device = arg;
const uint32_t *ring_counter;
const uint32_t *ring_base;
char message_buffer[4096];
bool is_active = true;
uint32_t *ring_base;
uint32_t word_count;
uint64_t shader_hash;
size_t ring_mask;
ring = &device->debug_ring;
ring_mask = (ring->ring_size / sizeof(uint32_t)) - 1;
ring_counter = ring->mapped_control_block;
ring_base = ring->mapped_ring;
ring_mask = ring->ring_size - 1;
ring_counter = ring->mapped;
ring_base = ring_counter + (ring->ring_offset / sizeof(uint32_t));
last_counter = 0;
vkd3d_set_thread_name("debug-ring");
@ -254,99 +76,93 @@ void *vkd3d_shader_debug_ring_thread_main(void *arg)
while (is_active)
{
pthread_mutex_lock(&ring->ring_lock);
if (ring->active)
pthread_cond_wait(&ring->ring_cond, &ring->ring_lock);
pthread_cond_wait(&ring->ring_cond, &ring->ring_lock);
is_active = ring->active;
pthread_mutex_unlock(&ring->ring_lock);
new_counter = *ring_counter;
if (last_counter != new_counter)
{
count = (new_counter - last_counter) & ring_mask;
/* Assume that each iteration can safely use 1/4th of the buffer to avoid WAR hazards. */
if (count > (ring->ring_size / 16))
if ((new_counter - last_counter) > (ring->ring_size / 16))
{
ERR("Debug ring is probably too small (%u new words this iteration), increase size to avoid risk of dropping messages.\n",
count);
new_counter - last_counter);
}
for (i = 0; i < count; )
{
/* The debug ring shader has "release" semantics for the word count write,
* so just make sure the reads don't get reordered here. */
cookie_word_count = READ_RING_WORD_ACQUIRE(last_counter + i);
word_count = cookie_word_count & ~DEBUG_CHANNEL_WORD_MASK;
if (cookie_word_count == 0)
{
ERR("Message was allocated, but write did not complete. last_counter = %u, rewrite new_counter = %u -> %u\n",
last_counter, new_counter, last_counter + i);
/* Rewind the counter, and try again later. */
new_counter = last_counter + i;
#define READ_RING_WORD(off) ring_base[((off) + i + last_counter) & ring_mask]
message_word_count = READ_RING_WORD(0);
if (i + message_word_count > count)
break;
if (message_word_count < 8 || message_word_count > 16 + 8)
break;
shader_hash = (uint64_t)READ_RING_WORD(1) | ((uint64_t)READ_RING_WORD(2) << 32);
debug_instance = READ_RING_WORD(3);
for (j = 0; j < 3; j++)
debug_thread_id[j] = READ_RING_WORD(4 + j);
fmt = READ_RING_WORD(7);
snprintf(message_buffer, sizeof(message_buffer), "Shader: %"PRIx64": Instance %u, ID (%u, %u, %u):",
shader_hash, debug_instance,
debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
i += 8;
message_word_count -= 8;
for (j = 0; j < message_word_count; j++)
{
union
{
float f32;
uint32_t u32;
int32_t i32;
} u;
const char *delim;
size_t len, avail;
u.u32 = READ_RING_WORD(j);
len = strlen(message_buffer);
if (len + 1 >= sizeof(message_buffer))
break;
avail = sizeof(message_buffer) - len;
delim = j == 0 ? " " : ", ";
#define VKD3D_DEBUG_CHANNEL_FMT_HEX 0u
#define VKD3D_DEBUG_CHANNEL_FMT_I32 1u
#define VKD3D_DEBUG_CHANNEL_FMT_F32 2u
switch ((fmt >> (2u * j)) & 3u)
{
case VKD3D_DEBUG_CHANNEL_FMT_HEX:
snprintf(message_buffer + len, avail, "%s#%x", delim, u.u32);
break;
case VKD3D_DEBUG_CHANNEL_FMT_I32:
snprintf(message_buffer + len, avail, "%s%d", delim, u.i32);
break;
case VKD3D_DEBUG_CHANNEL_FMT_F32:
snprintf(message_buffer + len, avail, "%s%f", delim, u.f32);
break;
default:
snprintf(message_buffer + len, avail, "%s????", delim);
break;
}
}
/* If something is written here, it must be a cookie. */
if ((cookie_word_count & DEBUG_CHANNEL_WORD_MASK) != DEBUG_CHANNEL_WORD_COOKIE)
{
ERR("Invalid message work cookie detected, 0x%x.\n", cookie_word_count);
break;
}
INFO("%s\n", message_buffer);
if (i + word_count > count)
{
ERR("Message word count %u is out of bounds (i = %u, count = %u).\n",
word_count, i, count);
break;
}
if (!vkd3d_shader_debug_ring_print_message(ring, last_counter + i, word_count))
break;
i += word_count;
#undef READ_RING_WORD
i += message_word_count;
}
}
/* Make sure to clear out any messages we read so that when the ring gets around to
* this point again, we can detect unwritten memory.
* This relies on having a ring that is large enough, but in practice, if we just make the ring
* large enough, there is nothing to worry about. */
while (last_counter != new_counter)
{
ring_base[last_counter & ring_mask] = 0;
last_counter++;
}
}
if (ring->device_lost)
{
INFO("Device lost detected, attempting to fish for clues.\n");
new_counter = *ring_counter;
if (last_counter != new_counter)
{
count = (new_counter - last_counter) & ring_mask;
for (i = 0; i < count; )
{
cookie_word_count = READ_RING_WORD_ACQUIRE(last_counter + i);
word_count = cookie_word_count & ~DEBUG_CHANNEL_WORD_MASK;
/* This is considered a message if it has the marker and a word count that is in-range. */
if ((cookie_word_count & DEBUG_CHANNEL_WORD_MASK) == DEBUG_CHANNEL_WORD_COOKIE &&
i + word_count <= count &&
vkd3d_shader_debug_ring_print_message(ring, last_counter + i, word_count))
{
i += word_count;
}
else
{
/* Keep going. */
i++;
}
}
}
INFO("Done fishing for clues ...\n");
last_counter = new_counter;
}
return NULL;
@ -357,21 +173,20 @@ HRESULT vkd3d_shader_debug_ring_init(struct vkd3d_shader_debug_ring *ring,
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
D3D12_HEAP_PROPERTIES heap_properties;
D3D12_RESOURCE_DESC1 resource_desc;
VkMemoryPropertyFlags memory_props;
char env[VKD3D_PATH_MAX];
D3D12_RESOURCE_DESC resource_desc;
const char *env;
memset(ring, 0, sizeof(*ring));
if (!vkd3d_get_env_var("VKD3D_SHADER_DEBUG_RING_SIZE_LOG2", env, sizeof(env)))
if (!(env = getenv("VKD3D_SHADER_DEBUG_RING_SIZE_LOG2")))
return S_OK;
ring->active = true;
ring->ring_size = (size_t)1 << strtoul(env, NULL, 0);
ring->control_block_size = 4096;
// Reserve 4k to be used as a control block of some sort.
ring->ring_offset = 4096;
INFO("Enabling shader debug ring of size: %zu.\n", ring->ring_size);
WARN("Enabling shader debug ring of size: %zu.\n", ring->ring_size);
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
{
@ -385,7 +200,7 @@ HRESULT vkd3d_shader_debug_ring_init(struct vkd3d_shader_debug_ring *ring,
heap_properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
memset(&resource_desc, 0, sizeof(resource_desc));
resource_desc.Width = ring->ring_size;
resource_desc.Width = ring->ring_offset + ring->ring_size;
resource_desc.Height = 1;
resource_desc.DepthOrArraySize = 1;
resource_desc.MipLevels = 1;
@ -396,71 +211,33 @@ HRESULT vkd3d_shader_debug_ring_init(struct vkd3d_shader_debug_ring *ring,
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
if (FAILED(vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
&resource_desc, &ring->host_buffer)))
&resource_desc, &ring->host_buffer)))
goto err_free_buffers;
memory_props = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
/* If we're doing breadcrumb debugging, we also need to be able to read debug ring messages
* from a crash, so we cannot rely on being able to copy the device payload back to host.
* Use PCI-e BAR + UNCACHED + DEVICE_COHERENT if we must. */
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
{
INFO("Using debug ring with breadcrumbs, opting in to device uncached payload buffer.\n");
/* We use coherent in the debug_channel.h header, but not necessarily guaranteed to be coherent with
* host reads, so make extra sure. */
if (device->device_info.device_coherent_memory_features_amd.deviceCoherentMemory)
{
memory_props |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD;
INFO("Enabling uncached device memory for debug ring.\n");
}
}
if (FAILED(vkd3d_allocate_buffer_memory(device, ring->host_buffer,
memory_props, &ring->host_buffer_memory)))
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
&ring->host_buffer_memory)))
goto err_free_buffers;
resource_desc.Width = ring->control_block_size;
ring->ring_device_address = vkd3d_get_buffer_device_address(device, ring->host_buffer) + ring->ring_offset;
resource_desc.Width = ring->ring_offset;
memset(&heap_properties, 0, sizeof(heap_properties));
heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
if (FAILED(vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
&resource_desc, &ring->device_atomic_buffer)))
&resource_desc, &ring->device_atomic_buffer)))
goto err_free_buffers;
memory_props = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
{
/* Expect crashes since we won't have time to flush caches.
* We use coherent in the debug_channel.h header, but not necessarily guaranteed to be coherent with
* host reads, so make extra sure. */
if (device->device_info.device_coherent_memory_features_amd.deviceCoherentMemory)
memory_props |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD;
}
if (FAILED(vkd3d_allocate_buffer_memory(device, ring->device_atomic_buffer,
memory_props, &ring->device_atomic_buffer_memory)))
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &ring->device_atomic_buffer_memory)))
goto err_free_buffers;
if (VK_CALL(vkMapMemory(device->vk_device, ring->host_buffer_memory.vk_memory,
0, VK_WHOLE_SIZE, 0, (void**)&ring->mapped_ring)) != VK_SUCCESS)
if (VK_CALL(vkMapMemory(device->vk_device, ring->host_buffer_memory, 0, VK_WHOLE_SIZE, 0, &ring->mapped)) != VK_SUCCESS)
goto err_free_buffers;
if (VK_CALL(vkMapMemory(device->vk_device, ring->device_atomic_buffer_memory.vk_memory,
0, VK_WHOLE_SIZE, 0, (void**)&ring->mapped_control_block)) != VK_SUCCESS)
goto err_free_buffers;
ring->ring_device_address = vkd3d_get_buffer_device_address(device, ring->host_buffer);
ring->atomic_device_address = vkd3d_get_buffer_device_address(device, ring->device_atomic_buffer);
memset(ring->mapped_control_block, 0, ring->control_block_size);
memset(ring->mapped_ring, 0, ring->ring_size);
if (pthread_mutex_init(&ring->ring_lock, NULL) != 0)
goto err_free_buffers;
if (pthread_cond_init(&ring->ring_cond, NULL) != 0)
@ -481,8 +258,8 @@ err_destroy_cond:
err_free_buffers:
VK_CALL(vkDestroyBuffer(device->vk_device, ring->host_buffer, NULL));
VK_CALL(vkDestroyBuffer(device->vk_device, ring->device_atomic_buffer, NULL));
vkd3d_free_device_memory(device, &ring->host_buffer_memory);
vkd3d_free_device_memory(device, &ring->device_atomic_buffer_memory);
VK_CALL(vkFreeMemory(device->vk_device, ring->host_buffer_memory, NULL));
VK_CALL(vkFreeMemory(device->vk_device, ring->device_atomic_buffer_memory, NULL));
memset(ring, 0, sizeof(*ring));
return E_OUTOFMEMORY;
}
@ -504,28 +281,38 @@ void vkd3d_shader_debug_ring_cleanup(struct vkd3d_shader_debug_ring *ring,
VK_CALL(vkDestroyBuffer(device->vk_device, ring->host_buffer, NULL));
VK_CALL(vkDestroyBuffer(device->vk_device, ring->device_atomic_buffer, NULL));
vkd3d_free_device_memory(device, &ring->host_buffer_memory);
vkd3d_free_device_memory(device, &ring->device_atomic_buffer_memory);
VK_CALL(vkFreeMemory(device->vk_device, ring->host_buffer_memory, NULL));
VK_CALL(vkFreeMemory(device->vk_device, ring->device_atomic_buffer_memory, NULL));
}
static pthread_mutex_t debug_ring_teardown_lock = PTHREAD_MUTEX_INITIALIZER;
void vkd3d_shader_debug_ring_kick(struct vkd3d_shader_debug_ring *ring, struct d3d12_device *device, bool device_lost)
void vkd3d_shader_debug_ring_end_command_buffer(struct d3d12_command_list *list)
{
if (device_lost)
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
VkBufferCopy buffer_copy;
VkMemoryBarrier barrier;
if (list->device->debug_ring.active &&
list->has_replaced_shaders &&
(list->type == D3D12_COMMAND_LIST_TYPE_DIRECT || list->type == D3D12_COMMAND_LIST_TYPE_COMPUTE))
{
/* Need a global lock here since multiple threads can observe device lost at the same time. */
pthread_mutex_lock(&debug_ring_teardown_lock);
{
ring->device_lost = true;
/* We're going to die or hang after this most likely, so make sure we get to see all messages the
* GPU had to write. Just cleanup now. */
vkd3d_shader_debug_ring_cleanup(ring, device);
}
pthread_mutex_unlock(&debug_ring_teardown_lock);
}
else
{
pthread_cond_signal(&ring->ring_cond);
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
barrier.pNext = NULL;
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
1, &barrier, 0, NULL, 0, NULL));
buffer_copy.size = list->device->debug_ring.ring_offset;
buffer_copy.dstOffset = 0;
buffer_copy.srcOffset = 0;
VK_CALL(vkCmdCopyBuffer(list->vk_command_buffer,
list->device->debug_ring.device_atomic_buffer,
list->device->debug_ring.host_buffer,
1, &buffer_copy));
/* Host barrier is taken care of automatically. */
}
}

View File

@ -35,7 +35,7 @@ struct vkd3d_descriptor_qa_global_info
struct vkd3d_descriptor_qa_global_buffer_data *data;
VkDescriptorBufferInfo descriptor;
VkBuffer vk_buffer;
struct vkd3d_device_memory_allocation device_allocation;
VkDeviceMemory vk_memory;
unsigned int num_cookies;
pthread_t ring_thread;
@ -76,10 +76,10 @@ static const char *debug_descriptor_type(vkd3d_descriptor_qa_flags type_flags)
static void vkd3d_descriptor_debug_init_once(void)
{
char env[VKD3D_PATH_MAX];
vkd3d_get_env_var("VKD3D_DESCRIPTOR_QA_LOG", env, sizeof(env));
const char *env;
if (strlen(env) > 0)
env = getenv("VKD3D_DESCRIPTOR_QA_LOG");
if (env)
{
INFO("Enabling VKD3D_DESCRIPTOR_QA_LOG\n");
descriptor_debug_file = fopen(env, "w");
@ -157,8 +157,7 @@ static void *vkd3d_descriptor_debug_qa_check_entry(void *userdata)
{
/* Don't spin endlessly, this thread is kicked after a successful fence wait. */
pthread_mutex_lock(&global_info->ring_lock);
if (global_info->active)
pthread_cond_wait(&global_info->ring_cond, &global_info->ring_lock);
pthread_cond_wait(&global_info->ring_cond, &global_info->ring_lock);
active = global_info->active;
pthread_mutex_unlock(&global_info->ring_lock);
@ -197,7 +196,7 @@ HRESULT vkd3d_descriptor_debug_alloc_global_info(
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
struct vkd3d_descriptor_qa_global_info *global_info;
D3D12_RESOURCE_DESC1 buffer_desc;
D3D12_RESOURCE_DESC buffer_desc;
D3D12_HEAP_PROPERTIES heap_info;
D3D12_HEAP_FLAGS heap_flags;
VkResult vr;
@ -232,13 +231,13 @@ HRESULT vkd3d_descriptor_debug_alloc_global_info(
if (FAILED(hr = vkd3d_allocate_buffer_memory(device, global_info->vk_buffer,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&global_info->device_allocation)))
&global_info->vk_memory)))
{
vkd3d_descriptor_debug_free_global_info(global_info, device);
return hr;
}
if ((vr = VK_CALL(vkMapMemory(device->vk_device, global_info->device_allocation.vk_memory,
if ((vr = VK_CALL(vkMapMemory(device->vk_device, global_info->vk_memory,
0, VK_WHOLE_SIZE, 0, (void**)&global_info->data))))
{
ERR("Failed to map buffer, vr %d.\n", vr);
@ -289,7 +288,7 @@ void vkd3d_descriptor_debug_free_global_info(
pthread_cond_destroy(&global_info->ring_cond);
}
vkd3d_free_device_memory(device, &global_info->device_allocation);
VK_CALL(vkFreeMemory(device->vk_device, global_info->vk_memory, NULL));
VK_CALL(vkDestroyBuffer(device->vk_device, global_info->vk_buffer, NULL));
vkd3d_free(global_info);
}
@ -395,7 +394,7 @@ void vkd3d_descriptor_debug_unregister_heap(uint64_t cookie)
}
void vkd3d_descriptor_debug_register_resource_cookie(struct vkd3d_descriptor_qa_global_info *global_info,
uint64_t cookie, const D3D12_RESOURCE_DESC1 *desc)
uint64_t cookie, const D3D12_RESOURCE_DESC *desc)
{
const char *fmt;
DECL_BUFFER();
@ -453,7 +452,7 @@ void vkd3d_descriptor_debug_register_allocation_cookie(
struct vkd3d_descriptor_qa_global_info *global_info,
uint64_t cookie, const struct vkd3d_allocate_memory_info *info)
{
D3D12_RESOURCE_DESC1 desc;
D3D12_RESOURCE_DESC desc;
memset(&desc, 0, sizeof(desc));
desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;

File diff suppressed because it is too large Load Diff

View File

@ -215,31 +215,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState_profiled(d3d12
DEVICE_PROFILED_CALL_HRESULT(CreatePipelineState, iface, desc, riid, pipeline_state);
}
static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource2_profiled(d3d12_device_iface *iface,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC1 *desc,
D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value,
ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource)
{
DEVICE_PROFILED_CALL_HRESULT(CreateCommittedResource2, iface, heap_properties, heap_flags,
desc, initial_state, optimized_clear_value, protected_session, iid, resource);
}
static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource1_profiled(d3d12_device_iface *iface,
ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC1 *desc,
D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value,
REFIID iid, void **resource)
{
DEVICE_PROFILED_CALL_HRESULT(CreatePlacedResource1, iface, heap, heap_offset,
desc, initial_state, optimized_clear_value, iid, resource);
}
static void STDMETHODCALLTYPE d3d12_device_CreateSamplerFeedbackUnorderedAccessView_profiled(d3d12_device_iface *iface,
ID3D12Resource *target_resource, ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor)
{
DEVICE_PROFILED_CALL(CreateSamplerFeedbackUnorderedAccessView, iface, target_resource, feedback_resource, descriptor);
}
CONST_VTBL struct ID3D12Device9Vtbl d3d12_device_vtbl_profiled =
static CONST_VTBL struct ID3D12Device6Vtbl d3d12_device_vtbl_profiled =
{
/* IUnknown methods */
d3d12_device_QueryInterface,
@ -316,19 +292,6 @@ CONST_VTBL struct ID3D12Device9Vtbl d3d12_device_vtbl_profiled =
d3d12_device_CheckDriverMatchingIdentifier,
/* ID3D12Device6 methods */
d3d12_device_SetBackgroundProcessingMode,
/* ID3D12Device7 methods */
d3d12_device_AddToStateObject,
d3d12_device_CreateProtectedResourceSession1,
/* ID3D12Device8 methods */
d3d12_device_GetResourceAllocationInfo2,
d3d12_device_CreateCommittedResource2_profiled,
d3d12_device_CreatePlacedResource1_profiled,
d3d12_device_CreateSamplerFeedbackUnorderedAccessView_profiled,
d3d12_device_GetCopyableFootprints1,
/* ID3D12Device9 methods */
d3d12_device_CreateShaderCacheSession,
d3d12_device_ShaderCacheControl,
d3d12_device_CreateCommandQueue1,
};
#endif

View File

@ -1,234 +0,0 @@
/*
* * Copyright 2021 NVIDIA Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "vkd3d_private.h"
static inline struct d3d12_device *d3d12_device_from_ID3D12DeviceExt(ID3D12DeviceExt *iface)
{
return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12DeviceExt_iface);
}
ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_AddRef(ID3D12DeviceExt *iface)
{
struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
return d3d12_device_add_ref(device);
}
static ULONG STDMETHODCALLTYPE d3d12_device_vkd3d_ext_Release(ID3D12DeviceExt *iface)
{
struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
return d3d12_device_release(device);
}
extern HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(d3d12_device_iface *iface,
REFIID riid, void **object);
static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_QueryInterface(ID3D12DeviceExt *iface,
REFIID iid, void **out)
{
struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
return d3d12_device_QueryInterface(&device->ID3D12Device_iface, iid, out);
}
static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetVulkanHandles(ID3D12DeviceExt *iface, VkInstance *vk_instance, VkPhysicalDevice *vk_physical_device, VkDevice *vk_device)
{
struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
TRACE("iface %p, vk_instance %p, vk_physical_device %u, vk_device %p \n", iface, vk_instance, vk_physical_device, vk_device);
if (!vk_device || !vk_instance || !vk_physical_device)
return E_INVALIDARG;
*vk_instance = device->vkd3d_instance->vk_instance;
*vk_physical_device = device->vk_physical_device;
*vk_device = device->vk_device;
return S_OK;
}
static BOOL STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetExtensionSupport(ID3D12DeviceExt *iface, D3D12_VK_EXTENSION extension)
{
const struct d3d12_device *device = d3d12_device_from_ID3D12DeviceExt(iface);
bool ret_val = false;
TRACE("iface %p, extension %u \n", iface, extension);
switch (extension)
{
case D3D12_VK_NVX_BINARY_IMPORT:
ret_val = device->vk_info.NVX_binary_import;
break;
case D3D12_VK_NVX_IMAGE_VIEW_HANDLE:
ret_val = device->vk_info.NVX_image_view_handle;
break;
default:
WARN("Invalid extension %x\n", extension);
}
return ret_val;
}
static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CreateCubinComputeShaderWithName(ID3D12DeviceExt *iface, const void *cubin_data,
UINT32 cubin_size, UINT32 block_x, UINT32 block_y, UINT32 block_z, const char *shader_name, D3D12_CUBIN_DATA_HANDLE **out_handle)
{
VkCuFunctionCreateInfoNVX functionCreateInfo = { VK_STRUCTURE_TYPE_CU_FUNCTION_CREATE_INFO_NVX };
VkCuModuleCreateInfoNVX moduleCreateInfo = { VK_STRUCTURE_TYPE_CU_MODULE_CREATE_INFO_NVX };
const struct vkd3d_vk_device_procs *vk_procs;
D3D12_CUBIN_DATA_HANDLE *handle;
struct d3d12_device *device;
VkDevice vk_device;
VkResult vr;
TRACE("iface %p, cubin_data %p, cubin_size %u, shader_name %s \n", iface, cubin_data, cubin_size, shader_name);
if (!cubin_data || !cubin_size || !shader_name)
return E_INVALIDARG;
device = d3d12_device_from_ID3D12DeviceExt(iface);
vk_device = device->vk_device;
handle = vkd3d_calloc(1, sizeof(D3D12_CUBIN_DATA_HANDLE));
handle->blockX = block_x;
handle->blockY = block_y;
handle->blockZ = block_z;
moduleCreateInfo.pData = cubin_data;
moduleCreateInfo.dataSize = cubin_size;
vk_procs = &device->vk_procs;
if ((vr = VK_CALL(vkCreateCuModuleNVX(vk_device, &moduleCreateInfo, NULL, &handle->vkCuModule))) < 0)
{
ERR("Failed to create cubin shader, vr %d.\n", vr);
vkd3d_free(handle);
return hresult_from_vk_result(vr);
}
functionCreateInfo.module = handle->vkCuModule;
functionCreateInfo.pName = shader_name;
if ((vr = VK_CALL(vkCreateCuFunctionNVX(vk_device, &functionCreateInfo, NULL, &handle->vkCuFunction))) < 0)
{
ERR("Failed to create cubin function module, vr %d.\n", vr);
VK_CALL(vkDestroyCuModuleNVX(vk_device, handle->vkCuModule, NULL));
vkd3d_free(handle);
return hresult_from_vk_result(vr);
}
*out_handle = handle;
return S_OK;
}
static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_DestroyCubinComputeShader(ID3D12DeviceExt *iface, D3D12_CUBIN_DATA_HANDLE *handle)
{
const struct vkd3d_vk_device_procs *vk_procs;
struct d3d12_device *device;
VkDevice vk_device;
TRACE("iface %p, handle %p \n", iface, handle);
if (!iface || !handle)
return E_INVALIDARG;
device = d3d12_device_from_ID3D12DeviceExt(iface);
vk_device = device->vk_device;
vk_procs = &device->vk_procs;
VK_CALL(vkDestroyCuFunctionNVX(vk_device, handle->vkCuFunction, NULL));
VK_CALL(vkDestroyCuModuleNVX(vk_device, handle->vkCuModule, NULL));
vkd3d_free(handle);
return S_OK;
}
static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaTextureObject(ID3D12DeviceExt *iface, D3D12_CPU_DESCRIPTOR_HANDLE srv_handle,
D3D12_CPU_DESCRIPTOR_HANDLE sampler_handle, UINT32 *cuda_texture_handle)
{
VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX };
const struct vkd3d_vk_device_procs *vk_procs;
struct d3d12_desc_split sampler_desc;
struct d3d12_desc_split srv_desc;
struct d3d12_device *device;
TRACE("iface %p, srv_handle %zu, sampler_handle %zu, cuda_texture_handle %p.\n",
iface, srv_handle.ptr, sampler_handle.ptr, cuda_texture_handle);
if (!cuda_texture_handle)
return E_INVALIDARG;
device = d3d12_device_from_ID3D12DeviceExt(iface);
srv_desc = d3d12_desc_decode_va(srv_handle.ptr);
sampler_desc = d3d12_desc_decode_va(sampler_handle.ptr);
imageViewHandleInfo.imageView = srv_desc.view->info.view->vk_image_view;
imageViewHandleInfo.sampler = sampler_desc.view->info.view->vk_sampler;
imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
vk_procs = &device->vk_procs;
*cuda_texture_handle = VK_CALL(vkGetImageViewHandleNVX(device->vk_device, &imageViewHandleInfo));
return S_OK;
}
static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_GetCudaSurfaceObject(ID3D12DeviceExt *iface, D3D12_CPU_DESCRIPTOR_HANDLE uav_handle,
UINT32 *cuda_surface_handle)
{
VkImageViewHandleInfoNVX imageViewHandleInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_HANDLE_INFO_NVX };
const struct vkd3d_vk_device_procs *vk_procs;
struct d3d12_desc_split uav_desc;
struct d3d12_device *device;
TRACE("iface %p, uav_handle %zu, cuda_surface_handle %p.\n", iface, uav_handle.ptr, cuda_surface_handle);
if (!cuda_surface_handle)
return E_INVALIDARG;
device = d3d12_device_from_ID3D12DeviceExt(iface);
uav_desc = d3d12_desc_decode_va(uav_handle.ptr);
imageViewHandleInfo.imageView = uav_desc.view->info.view->vk_image_view;
imageViewHandleInfo.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
vk_procs = &device->vk_procs;
*cuda_surface_handle = VK_CALL(vkGetImageViewHandleNVX(device->vk_device, &imageViewHandleInfo));
return S_OK;
}
extern VKD3D_THREAD_LOCAL struct D3D12_UAV_INFO *d3d12_uav_info;
static HRESULT STDMETHODCALLTYPE d3d12_device_vkd3d_ext_CaptureUAVInfo(ID3D12DeviceExt *iface, D3D12_UAV_INFO *uav_info)
{
if (!uav_info)
return E_INVALIDARG;
TRACE("iface %p, uav_info %p.\n", iface, uav_info);
/* CaptureUAVInfo() supposed to capture the information from the next CreateUnorderedAccess() on the same thread.
We use d3d12_uav_info pointer to update the information in CreateUnorderedAccess() */
d3d12_uav_info = uav_info;
return S_OK;
}
CONST_VTBL struct ID3D12DeviceExtVtbl d3d12_device_vkd3d_ext_vtbl =
{
/* IUnknown methods */
d3d12_device_vkd3d_ext_QueryInterface,
d3d12_device_vkd3d_ext_AddRef,
d3d12_device_vkd3d_ext_Release,
/* ID3D12DeviceExt methods */
d3d12_device_vkd3d_ext_GetVulkanHandles,
d3d12_device_vkd3d_ext_GetExtensionSupport,
d3d12_device_vkd3d_ext_CreateCubinComputeShaderWithName,
d3d12_device_vkd3d_ext_DestroyCubinComputeShader,
d3d12_device_vkd3d_ext_GetCudaTextureObject,
d3d12_device_vkd3d_ext_GetCudaSurfaceObject,
d3d12_device_vkd3d_ext_CaptureUAVInfo
};

View File

@ -23,6 +23,11 @@
#include "vkd3d_private.h"
/* ID3D12Heap */
static inline struct d3d12_heap *impl_from_ID3D12Heap(d3d12_heap_iface *iface)
{
return CONTAINING_RECORD(iface, struct d3d12_heap, ID3D12Heap_iface);
}
static HRESULT STDMETHODCALLTYPE d3d12_heap_QueryInterface(d3d12_heap_iface *iface,
REFIID iid, void **object)
{
@ -48,7 +53,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_heap_QueryInterface(d3d12_heap_iface *ifa
static ULONG STDMETHODCALLTYPE d3d12_heap_AddRef(d3d12_heap_iface *iface)
{
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
ULONG refcount = InterlockedIncrement(&heap->refcount);
TRACE("%p increasing refcount to %u.\n", heap, refcount);
@ -68,13 +73,13 @@ static void d3d12_heap_destroy(struct d3d12_heap *heap)
static void d3d12_heap_set_name(struct d3d12_heap *heap, const char *name)
{
if (!heap->allocation.chunk)
vkd3d_set_vk_object_name(heap->device, (uint64_t)heap->allocation.device_allocation.vk_memory,
vkd3d_set_vk_object_name(heap->device, (uint64_t)heap->allocation.vk_memory,
VK_OBJECT_TYPE_DEVICE_MEMORY, name);
}
static ULONG STDMETHODCALLTYPE d3d12_heap_Release(d3d12_heap_iface *iface)
{
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
ULONG refcount = InterlockedDecrement(&heap->refcount);
TRACE("%p decreasing refcount to %u.\n", heap, refcount);
@ -88,7 +93,7 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_Release(d3d12_heap_iface *iface)
static HRESULT STDMETHODCALLTYPE d3d12_heap_GetPrivateData(d3d12_heap_iface *iface,
REFGUID guid, UINT *data_size, void *data)
{
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data);
@ -98,7 +103,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_heap_GetPrivateData(d3d12_heap_iface *ifa
static HRESULT STDMETHODCALLTYPE d3d12_heap_SetPrivateData(d3d12_heap_iface *iface,
REFGUID guid, UINT data_size, const void *data)
{
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data);
@ -109,7 +114,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_heap_SetPrivateData(d3d12_heap_iface *ifa
static HRESULT STDMETHODCALLTYPE d3d12_heap_SetPrivateDataInterface(d3d12_heap_iface *iface,
REFGUID guid, const IUnknown *data)
{
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data);
@ -119,7 +124,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_heap_SetPrivateDataInterface(d3d12_heap_i
static HRESULT STDMETHODCALLTYPE d3d12_heap_GetDevice(d3d12_heap_iface *iface, REFIID iid, void **device)
{
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device);
@ -129,7 +134,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_heap_GetDevice(d3d12_heap_iface *iface, R
static D3D12_HEAP_DESC * STDMETHODCALLTYPE d3d12_heap_GetDesc(d3d12_heap_iface *iface,
D3D12_HEAP_DESC *desc)
{
struct d3d12_heap *heap = impl_from_ID3D12Heap1(iface);
struct d3d12_heap *heap = impl_from_ID3D12Heap(iface);
TRACE("iface %p, desc %p.\n", iface, desc);
@ -145,7 +150,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_heap_GetProtectedResourceSession(d3d12_he
return E_NOTIMPL;
}
CONST_VTBL struct ID3D12Heap1Vtbl d3d12_heap_vtbl =
static CONST_VTBL struct ID3D12Heap1Vtbl d3d12_heap_vtbl =
{
/* IUnknown methods */
d3d12_heap_QueryInterface,
@ -164,33 +169,21 @@ CONST_VTBL struct ID3D12Heap1Vtbl d3d12_heap_vtbl =
d3d12_heap_GetProtectedResourceSession,
};
HRESULT d3d12_device_validate_custom_heap_type(struct d3d12_device *device,
const D3D12_HEAP_PROPERTIES *heap_properties)
static struct d3d12_heap *unsafe_impl_from_ID3D12Heap1(ID3D12Heap1 *iface)
{
if (heap_properties->Type != D3D12_HEAP_TYPE_CUSTOM)
return S_OK;
if (heap_properties->MemoryPoolPreference == D3D12_MEMORY_POOL_UNKNOWN
|| (heap_properties->MemoryPoolPreference == D3D12_MEMORY_POOL_L1
&& (is_cpu_accessible_heap(heap_properties) || d3d12_device_is_uma(device, NULL))))
{
WARN("Invalid memory pool preference.\n");
return E_INVALIDARG;
}
if (heap_properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_UNKNOWN)
{
WARN("Must have explicit CPU page property for CUSTOM heap type.\n");
return E_INVALIDARG;
}
return S_OK;
if (!iface)
return NULL;
assert(iface->lpVtbl == &d3d12_heap_vtbl);
return impl_from_ID3D12Heap(iface);
}
static HRESULT validate_heap_desc(struct d3d12_device *device, const D3D12_HEAP_DESC *desc)
struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface)
{
HRESULT hr;
return unsafe_impl_from_ID3D12Heap1((ID3D12Heap1 *)iface);
}
static HRESULT validate_heap_desc(const D3D12_HEAP_DESC *desc)
{
if (!desc->SizeInBytes)
{
WARN("Invalid size %"PRIu64".\n", desc->SizeInBytes);
@ -210,9 +203,6 @@ static HRESULT validate_heap_desc(struct d3d12_device *device, const D3D12_HEAP_
return E_INVALIDARG;
}
if (FAILED(hr = d3d12_device_validate_custom_heap_type(device, &desc->Properties)))
return hr;
return S_OK;
}
@ -235,12 +225,11 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, struct d3d12_device *dev
if (!heap->desc.Alignment)
heap->desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
if (FAILED(hr = validate_heap_desc(device, &heap->desc)))
if (FAILED(hr = validate_heap_desc(&heap->desc)))
return hr;
alloc_info.heap_desc = heap->desc;
alloc_info.host_ptr = host_address;
alloc_info.extra_allocation_flags = 0;
if (FAILED(hr = vkd3d_private_store_init(&heap->private_store)))
return hr;

View File

@ -24,25 +24,34 @@
static void vkd3d_memory_allocator_wait_allocation(struct vkd3d_memory_allocator *allocator,
struct d3d12_device *device, const struct vkd3d_memory_allocation *allocation);
static inline bool is_cpu_accessible_heap(const D3D12_HEAP_PROPERTIES *properties)
{
if (properties->Type == D3D12_HEAP_TYPE_DEFAULT)
return false;
if (properties->Type == D3D12_HEAP_TYPE_CUSTOM)
{
return properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE
|| properties->CPUPageProperty == D3D12_CPU_PAGE_PROPERTY_WRITE_BACK;
}
return true;
}
static uint32_t vkd3d_select_memory_types(struct d3d12_device *device, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags)
{
const VkPhysicalDeviceMemoryProperties *memory_info = &device->memory_properties;
uint32_t type_mask = (1 << memory_info->memoryTypeCount) - 1;
const struct vkd3d_memory_info_domain *domain_info;
domain_info = d3d12_device_get_memory_info_domain(device, heap_properties);
if (!(heap_flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
type_mask &= domain_info->buffer_type_mask;
type_mask &= device->memory_info.buffer_type_mask;
if (!(heap_flags & D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES))
type_mask &= domain_info->sampled_type_mask;
type_mask &= device->memory_info.sampled_type_mask;
/* Render targets are not allowed on UPLOAD and READBACK heaps */
if (!(heap_flags & D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES) &&
heap_properties->Type != D3D12_HEAP_TYPE_UPLOAD &&
heap_properties->Type != D3D12_HEAP_TYPE_READBACK)
type_mask &= domain_info->rt_ds_type_mask;
type_mask &= device->memory_info.rt_ds_type_mask;
if (!type_mask)
ERR("No memory type found for heap flags %#x.\n", heap_flags);
@ -66,7 +75,6 @@ static uint32_t vkd3d_find_memory_types_with_flags(struct d3d12_device *device,
static HRESULT vkd3d_select_memory_flags(struct d3d12_device *device, const D3D12_HEAP_PROPERTIES *heap_properties, VkMemoryPropertyFlags *type_flags)
{
HRESULT hr;
switch (heap_properties->Type)
{
case D3D12_HEAP_TYPE_DEFAULT:
@ -75,10 +83,6 @@ static HRESULT vkd3d_select_memory_flags(struct d3d12_device *device, const D3D1
case D3D12_HEAP_TYPE_UPLOAD:
*type_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_FORCE_HOST_CACHED)
*type_flags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
else if (!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV))
*type_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
break;
case D3D12_HEAP_TYPE_READBACK:
@ -86,8 +90,13 @@ static HRESULT vkd3d_select_memory_flags(struct d3d12_device *device, const D3D1
break;
case D3D12_HEAP_TYPE_CUSTOM:
if (FAILED(hr = d3d12_device_validate_custom_heap_type(device, heap_properties)))
return hr;
if (heap_properties->MemoryPoolPreference == D3D12_MEMORY_POOL_UNKNOWN
|| (heap_properties->MemoryPoolPreference == D3D12_MEMORY_POOL_L1
&& (is_cpu_accessible_heap(heap_properties) || d3d12_device_is_uma(device, NULL))))
{
WARN("Invalid memory pool preference.\n");
return E_INVALIDARG;
}
switch (heap_properties->CPUPageProperty)
{
@ -96,13 +105,13 @@ static HRESULT vkd3d_select_memory_flags(struct d3d12_device *device, const D3D1
break;
case D3D12_CPU_PAGE_PROPERTY_WRITE_COMBINE:
*type_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_FORCE_HOST_CACHED)
*type_flags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
break;
case D3D12_CPU_PAGE_PROPERTY_NOT_AVAILABLE:
*type_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
break;
case D3D12_CPU_PAGE_PROPERTY_UNKNOWN:
default:
WARN("Invalid CPU page property.\n");
return E_INVALIDARG;
}
break;
@ -117,7 +126,7 @@ static HRESULT vkd3d_select_memory_flags(struct d3d12_device *device, const D3D1
static HRESULT vkd3d_create_global_buffer(struct d3d12_device *device, VkDeviceSize size, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, VkBuffer *vk_buffer)
{
D3D12_RESOURCE_DESC1 resource_desc;
D3D12_RESOURCE_DESC resource_desc;
memset(&resource_desc, 0, sizeof(resource_desc));
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
@ -138,52 +147,13 @@ static HRESULT vkd3d_create_global_buffer(struct d3d12_device *device, VkDeviceS
return vkd3d_create_buffer(device, heap_properties, heap_flags, &resource_desc, vk_buffer);
}
void vkd3d_free_device_memory(struct d3d12_device *device, const struct vkd3d_device_memory_allocation *allocation)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkDeviceSize *type_current;
bool budget_sensitive;
if (allocation->vk_memory == VK_NULL_HANDLE)
{
/* Deferred heap. Return early to skip confusing log messages. */
return;
}
VK_CALL(vkFreeMemory(device->vk_device, allocation->vk_memory, NULL));
budget_sensitive = !!(device->memory_info.budget_sensitive_mask & (1u << allocation->vk_memory_type));
if (budget_sensitive)
{
type_current = &device->memory_info.type_current[allocation->vk_memory_type];
pthread_mutex_lock(&device->memory_info.budget_lock);
assert(*type_current >= allocation->size);
*type_current -= allocation->size;
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("Freeing memory of type %u, new total allocated size %"PRIu64" MiB.\n",
allocation->vk_memory_type, *type_current / (1024 * 1024));
}
pthread_mutex_unlock(&device->memory_info.budget_lock);
}
else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("Freeing memory of type %u, %"PRIu64" KiB.\n",
allocation->vk_memory_type, allocation->size / 1024);
}
}
static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device,
VkDeviceSize size, VkMemoryPropertyFlags type_flags, uint32_t type_mask,
void *pNext, struct vkd3d_device_memory_allocation *allocation)
void *pNext, VkDeviceMemory *vk_memory, uint32_t *vk_memory_type)
{
const VkPhysicalDeviceMemoryProperties *memory_props = &device->memory_properties;
const VkMemoryPropertyFlags optional_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
const VkPhysicalDeviceMemoryProperties *memory_info = &device->memory_properties;
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
struct vkd3d_memory_info *memory_info = &device->memory_info;
VkMemoryAllocateInfo allocate_info;
VkDeviceSize *type_current;
VkDeviceSize *type_budget;
bool budget_sensitive;
VkResult vr;
/* buffer_mask / sampled_mask etc will generally take care of this,
@ -199,118 +169,40 @@ static HRESULT vkd3d_try_allocate_device_memory(struct d3d12_device *device,
{
uint32_t type_index = vkd3d_bitmask_iter32(&type_mask);
if ((memory_props->memoryTypes[type_index].propertyFlags & type_flags) != type_flags)
if ((memory_info->memoryTypes[type_index].propertyFlags & type_flags) != type_flags)
continue;
allocate_info.memoryTypeIndex = type_index;
budget_sensitive = !!(device->memory_info.budget_sensitive_mask & (1u << type_index));
if (budget_sensitive)
if ((vr = VK_CALL(vkAllocateMemory(device->vk_device,
&allocate_info, NULL, vk_memory))) == VK_SUCCESS)
{
type_budget = &memory_info->type_budget[type_index];
type_current = &memory_info->type_current[type_index];
pthread_mutex_lock(&memory_info->budget_lock);
if (*type_current + size > *type_budget)
{
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("Attempting to allocate from memory type %u, but exceeding fixed budget: %"PRIu64" + %"PRIu64" > %"PRIu64".\n",
type_index, *type_current, size, *type_budget);
}
pthread_mutex_unlock(&memory_info->budget_lock);
if (vk_memory_type)
*vk_memory_type = type_index;
/* If we're out of DEVICE budget, don't try other types. */
if (type_flags & optional_flags)
return E_OUTOFMEMORY;
else
continue;
}
}
vr = VK_CALL(vkAllocateMemory(device->vk_device, &allocate_info, NULL, &allocation->vk_memory));
if (budget_sensitive)
{
if (vr == VK_SUCCESS)
{
*type_current += size;
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("Allocated memory of type %u, new total allocated size %"PRIu64" MiB.\n",
type_index, *type_current / (1024 * 1024));
}
}
pthread_mutex_unlock(&memory_info->budget_lock);
}
else if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("%s memory of type #%u, size %"PRIu64" KiB.\n",
(vr == VK_SUCCESS ? "Allocated" : "Failed to allocate"),
type_index, allocate_info.allocationSize / 1024);
}
if (vr == VK_SUCCESS)
{
allocation->vk_memory_type = type_index;
allocation->size = size;
return S_OK;
}
else if (type_flags & optional_flags)
{
/* If we fail to allocate DEVICE_LOCAL memory, immediately fail the call.
* This way we avoid any attempt to fall back to PCI-e BAR memory types
* which are also DEVICE_LOCAL.
* After failure, the calling code removes the DEVICE_LOCAL_BIT flag and tries again,
* where we will fall back to system memory instead. */
return E_OUTOFMEMORY;
}
}
return E_OUTOFMEMORY;
}
static bool vkd3d_memory_info_type_mask_covers_multiple_memory_heaps(
const struct VkPhysicalDeviceMemoryProperties *props, uint32_t type_mask)
{
uint32_t heap_mask = 0;
if (!type_mask)
return false;
while (type_mask)
heap_mask |= 1u << props->memoryTypes[vkd3d_bitmask_iter32(&type_mask)].heapIndex;
return !!(heap_mask & (heap_mask - 1u));
}
HRESULT vkd3d_allocate_device_memory(struct d3d12_device *device,
VkDeviceSize size, VkMemoryPropertyFlags type_flags, uint32_t type_mask,
void *pNext, struct vkd3d_device_memory_allocation *allocation)
void *pNext, VkDeviceMemory *vk_memory, uint32_t *vk_memory_type)
{
const VkMemoryPropertyFlags optional_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
HRESULT hr;
hr = vkd3d_try_allocate_device_memory(device, size, type_flags,
type_mask, pNext, allocation);
type_mask, pNext, vk_memory, vk_memory_type);
if (FAILED(hr) && (type_flags & optional_flags))
{
if (vkd3d_memory_info_type_mask_covers_multiple_memory_heaps(&device->memory_properties, type_mask))
{
WARN("Memory allocation failed, falling back to system memory.\n");
hr = vkd3d_try_allocate_device_memory(device, size,
type_flags & ~optional_flags, type_mask, pNext, allocation);
}
else if (device->memory_properties.memoryHeapCount > 1)
{
/* It might be the case (NV with RT/DS heap) that we just cannot fall back in any meaningful way.
* E.g. there exists no memory type that is not DEVICE_LOCAL and covers both RT and DS.
* For this case, we have no choice but to not allocate,
* and defer actual memory allocation to CreatePlacedResource() time.
* NVIDIA bug reference for fixing this case: 2175829. */
WARN("Memory allocation failed, but it is not possible to fallback to system memory here. Deferring allocation.\n");
return hr;
}
/* If we fail to allocate, and only have one heap to work with (iGPU),
* falling back is meaningless, just fail. */
WARN("Memory allocation failed, falling back to system memory.\n");
hr = vkd3d_try_allocate_device_memory(device, size,
type_flags & ~optional_flags, type_mask, pNext,
vk_memory, vk_memory_type);
}
if (FAILED(hr))
@ -324,42 +216,37 @@ HRESULT vkd3d_allocate_device_memory(struct d3d12_device *device,
static HRESULT vkd3d_import_host_memory(struct d3d12_device *device, void *host_address,
VkDeviceSize size, VkMemoryPropertyFlags type_flags, uint32_t type_mask,
void *pNext, struct vkd3d_device_memory_allocation *allocation)
void *pNext, VkDeviceMemory *vk_memory, uint32_t *vk_memory_type)
{
VkImportMemoryHostPointerInfoEXT import_info;
HRESULT hr = S_OK;
HRESULT hr;
import_info.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT;
import_info.pNext = pNext;
import_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
import_info.pHostPointer = host_address;
if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK) ||
FAILED(hr = vkd3d_try_allocate_device_memory(device, size,
type_flags, type_mask, &import_info, allocation)))
if (FAILED(hr = vkd3d_try_allocate_device_memory(device, size,
type_flags, type_mask, &import_info, vk_memory, vk_memory_type)))
{
if (FAILED(hr))
WARN("Failed to import host memory, hr %#x.\n", hr);
WARN("Failed to import host memory, hr %#x.\n", hr);
/* If we failed, fall back to a host-visible allocation. Generally
* the app will access the memory thorugh the main host pointer,
* so it's fine. */
hr = vkd3d_try_allocate_device_memory(device, size,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
type_mask, pNext, allocation);
type_mask, &import_info, vk_memory, vk_memory_type);
}
return hr;
}
static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocation *allocation,
struct d3d12_device *device, struct vkd3d_memory_allocator *allocator)
static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocation *allocation, struct d3d12_device *device, struct vkd3d_memory_allocator *allocator)
{
if (device->device_info.buffer_device_address_features.bufferDeviceAddress)
allocation->resource.va = vkd3d_get_buffer_device_address(device, allocation->resource.vk_buffer);
else if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
allocation->resource.va = vkd3d_va_map_alloc_fake_va(&allocator->va_map, allocation->resource.size);
else
allocation->resource.va = 0xdeadbeef;
allocation->resource.va = vkd3d_va_map_alloc_fake_va(&allocator->va_map, allocation->resource.size);
if (!allocation->resource.va)
{
@ -367,9 +254,7 @@ static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocatio
return E_OUTOFMEMORY;
}
/* Internal scratch buffers are not visible to application so we never have to map it back to VkBuffer. */
if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
vkd3d_va_map_insert(&allocator->va_map, &allocation->resource);
vkd3d_va_map_insert(&allocator->va_map, &allocation->resource);
return S_OK;
}
@ -453,12 +338,10 @@ static void vkd3d_memory_allocation_free(const struct vkd3d_memory_allocation *a
if ((allocation->flags & VKD3D_ALLOCATION_FLAG_GPU_ADDRESS) && allocation->resource.va)
{
if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
{
vkd3d_va_map_remove(&allocator->va_map, &allocation->resource);
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
vkd3d_va_map_free_fake_va(&allocator->va_map, allocation->resource.va, allocation->resource.size);
}
vkd3d_va_map_remove(&allocator->va_map, &allocation->resource);
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
vkd3d_va_map_free_fake_va(&allocator->va_map, allocation->resource.va, allocation->resource.size);
}
if (allocation->resource.view_map)
@ -470,7 +353,7 @@ static void vkd3d_memory_allocation_free(const struct vkd3d_memory_allocation *a
if (allocation->flags & VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER)
VK_CALL(vkDestroyBuffer(device->vk_device, allocation->resource.vk_buffer, NULL));
vkd3d_free_device_memory(device, &allocation->device_allocation);
VK_CALL(vkFreeMemory(device->vk_device, allocation->vk_memory, NULL));
}
static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allocation, struct d3d12_device *device,
@ -480,7 +363,6 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
VkMemoryRequirements memory_requirements;
VkMemoryAllocateFlagsInfo flags_info;
VkMemoryPropertyFlags type_flags;
VkBindBufferMemoryInfo bind_info;
void *host_ptr = info->host_ptr;
uint32_t type_mask;
VkResult vr;
@ -498,12 +380,6 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
if (FAILED(hr = vkd3d_select_memory_flags(device, &info->heap_properties, &type_flags)))
return hr;
/* Mask out optional memory properties as needed.
* This is relevant for chunk allocator fallbacks
* since the info->memory_requirements already encodes
* only HOST_VISIBLE types and we use NO_FALLBACK allocation mode. */
type_flags &= ~info->optional_memory_properties;
if (allocation->flags & VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER)
{
/* If requested, create a buffer covering the entire allocation
@ -525,14 +401,8 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
memory_requirements = info->memory_requirements;
}
/* If an allocation is a dedicated fallback allocation,
* we must not look at heap_flags, since we might end up noping out
* the memory types we want to allocate with. */
type_mask = memory_requirements.memoryTypeBits;
if (info->flags & VKD3D_ALLOCATION_FLAG_DEDICATED)
type_mask &= device->memory_info.global_mask;
else
type_mask &= vkd3d_select_memory_types(device, &info->heap_properties, info->heap_flags);
type_mask = vkd3d_select_memory_types(device, &info->heap_properties,
info->heap_flags) & memory_requirements.memoryTypeBits;
/* Allocate actual backing storage */
flags_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO;
@ -547,7 +417,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
flags_info.flags |= VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT_KHR;
}
allocation->resource.size = info->memory_requirements.size;
allocation->resource.size = memory_requirements.size;
if (info->heap_flags & D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH)
{
@ -555,33 +425,22 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
allocation->flags |= VKD3D_ALLOCATION_FLAG_ALLOW_WRITE_WATCH;
if (!(host_ptr = vkd3d_allocate_write_watch_pointer(&info->heap_properties, memory_requirements.size)))
{
VK_CALL(vkDestroyBuffer(device->vk_device, allocation->resource.vk_buffer, NULL));
return E_INVALIDARG;
}
}
if (host_ptr)
{
hr = vkd3d_import_host_memory(device, host_ptr, memory_requirements.size,
type_flags, type_mask, &flags_info, &allocation->device_allocation);
}
else if (info->flags & VKD3D_ALLOCATION_FLAG_NO_FALLBACK)
{
hr = vkd3d_try_allocate_device_memory(device, memory_requirements.size, type_flags,
type_mask, &flags_info, &allocation->device_allocation);
type_flags, type_mask, &flags_info, &allocation->vk_memory, &allocation->vk_memory_type);
}
else
{
hr = vkd3d_allocate_device_memory(device, memory_requirements.size, type_flags,
type_mask, &flags_info, &allocation->device_allocation);
type_mask, &flags_info, &allocation->vk_memory, &allocation->vk_memory_type);
}
if (FAILED(hr))
{
VK_CALL(vkDestroyBuffer(device->vk_device, allocation->resource.vk_buffer, NULL));
return hr;
}
/* Map memory if the allocation was requested to be host-visible,
* but do not map if the allocation was meant to be device-local
@ -597,7 +456,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
{
allocation->flags |= VKD3D_ALLOCATION_FLAG_CPU_ACCESS;
if ((vr = VK_CALL(vkMapMemory(device->vk_device, allocation->device_allocation.vk_memory,
if ((vr = VK_CALL(vkMapMemory(device->vk_device, allocation->vk_memory,
0, VK_WHOLE_SIZE, 0, &allocation->cpu_address))))
{
ERR("Failed to map memory, vr %d.\n", vr);
@ -609,13 +468,8 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
/* Bind memory to global or dedicated buffer as needed */
if (allocation->resource.vk_buffer)
{
bind_info.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO;
bind_info.pNext = NULL;
bind_info.buffer = allocation->resource.vk_buffer;
bind_info.memory = allocation->device_allocation.vk_memory;
bind_info.memoryOffset = 0;
if ((vr = VK_CALL(vkBindBufferMemory2KHR(device->vk_device, 1, &bind_info))) < 0)
if ((vr = VK_CALL(vkBindBufferMemory(device->vk_device,
allocation->resource.vk_buffer, allocation->vk_memory, 0))) < 0)
{
ERR("Failed to bind buffer memory, vr %d.\n", vr);
vkd3d_memory_allocation_free(allocation, device, allocator);
@ -638,7 +492,7 @@ static HRESULT vkd3d_memory_allocation_init(struct vkd3d_memory_allocation *allo
allocation->resource.cookie, info);
TRACE("Created allocation %p on memory type %u (%"PRIu64" bytes).\n",
allocation, allocation->device_allocation.vk_memory_type, allocation->resource.size);
allocation, allocation->vk_memory_type, allocation->resource.size);
return S_OK;
}
@ -1065,13 +919,6 @@ static HRESULT vkd3d_memory_allocator_flush_clears_locked(struct vkd3d_memory_al
* rather than rewriting the command buffer or dispatching the clear */
vk_cmd_buffer = clear_queue->vk_command_buffers[clear_queue->command_buffer_index];
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET)
{
INFO("Submitting clear command list.\n");
for (i = 0; i < clear_queue->allocations_count; i++)
INFO("Clearing allocation %zu: %"PRIu64".\n", i, clear_queue->allocations[i]->resource.size);
}
vkd3d_memory_allocator_wait_clear_semaphore(allocator, device,
clear_queue->next_signal_value - VKD3D_MEMORY_CLEAR_COMMAND_BUFFER_COUNT, UINT64_MAX);
@ -1126,8 +973,6 @@ static HRESULT vkd3d_memory_allocator_flush_clears_locked(struct vkd3d_memory_al
vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE));
vkd3d_queue_release(allocator->vkd3d_queue);
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(device, vr == VK_ERROR_DEVICE_LOST);
if (vr < 0)
{
ERR("Failed to submit command buffer, vr %d.\n", vr);
@ -1157,7 +1002,6 @@ static HRESULT vkd3d_memory_allocator_flush_clears_locked(struct vkd3d_memory_al
for (i = 0; i < queue_family->queue_count; i++)
{
vkd3d_queue_add_wait(queue_family->queues[i],
NULL,
clear_queue->vk_semaphore,
clear_queue->next_signal_value);
}
@ -1271,9 +1115,8 @@ static void vkd3d_memory_allocator_wait_allocation(struct vkd3d_memory_allocator
vkd3d_memory_allocator_wait_clear_semaphore(allocator, device, wait_value, UINT64_MAX);
}
static HRESULT vkd3d_memory_allocator_try_add_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, uint32_t type_mask,
VkMemoryPropertyFlags optional_properties, struct vkd3d_memory_chunk **chunk)
static HRESULT vkd3d_memory_allocator_add_chunk(struct vkd3d_memory_allocator *allocator, struct d3d12_device *device,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, uint32_t type_mask, struct vkd3d_memory_chunk **chunk)
{
struct vkd3d_allocate_memory_info alloc_info;
struct vkd3d_memory_chunk *object;
@ -1285,8 +1128,6 @@ static HRESULT vkd3d_memory_allocator_try_add_chunk(struct vkd3d_memory_allocato
alloc_info.memory_requirements.memoryTypeBits = type_mask;
alloc_info.heap_properties = *heap_properties;
alloc_info.heap_flags = heap_flags;
alloc_info.flags = VKD3D_ALLOCATION_FLAG_NO_FALLBACK;
alloc_info.optional_memory_properties = optional_properties;
if (!(heap_flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
@ -1307,7 +1148,6 @@ static HRESULT vkd3d_memory_allocator_try_add_chunk(struct vkd3d_memory_allocato
static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory_allocator *allocator,
struct d3d12_device *device, const VkMemoryRequirements *memory_requirements, uint32_t type_mask,
VkMemoryPropertyFlags optional_properties,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
struct vkd3d_memory_allocation *allocation)
{
@ -1330,7 +1170,7 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory
continue;
/* Filter out unsupported memory types */
if (!(type_mask & (1u << chunk->allocation.device_allocation.vk_memory_type)))
if (!(type_mask & (1u << chunk->allocation.vk_memory_type)))
continue;
if (SUCCEEDED(hr = vkd3d_memory_chunk_allocate_range(chunk, memory_requirements, allocation)))
@ -1339,8 +1179,8 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory
/* Try allocating a new chunk on one of the supported memory type
* before the caller falls back to potentially slower memory */
if (FAILED(hr = vkd3d_memory_allocator_try_add_chunk(allocator, device, heap_properties,
heap_flags & heap_flag_mask, type_mask, optional_properties, &chunk)))
if (FAILED(hr = vkd3d_memory_allocator_add_chunk(allocator, device, heap_properties,
heap_flags & heap_flag_mask, memory_requirements->memoryTypeBits, &chunk)))
return hr;
return vkd3d_memory_chunk_allocate_range(chunk, memory_requirements, allocation);
@ -1349,9 +1189,6 @@ static HRESULT vkd3d_memory_allocator_try_suballocate_memory(struct vkd3d_memory
void vkd3d_free_memory(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
const struct vkd3d_memory_allocation *allocation)
{
if (allocation->device_allocation.vk_memory == VK_NULL_HANDLE)
return;
if (allocation->clear_semaphore_value)
vkd3d_memory_allocator_wait_allocation(allocator, device, allocation);
@ -1387,14 +1224,13 @@ static HRESULT vkd3d_suballocate_memory(struct d3d12_device *device, struct vkd3
pthread_mutex_lock(&allocator->mutex);
hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device,
&memory_requirements, optional_mask, 0, &info->heap_properties,
&memory_requirements, optional_mask, &info->heap_properties,
info->heap_flags, allocation);
if (FAILED(hr) && (required_mask & ~optional_mask))
{
hr = vkd3d_memory_allocator_try_suballocate_memory(allocator, device,
&memory_requirements, required_mask & ~optional_mask,
optional_flags,
&info->heap_properties, info->heap_flags, allocation);
}
@ -1402,35 +1238,13 @@ static HRESULT vkd3d_suballocate_memory(struct d3d12_device *device, struct vkd3
return hr;
}
static inline bool vkd3d_driver_implicitly_clears(VkDriverId driver_id)
{
switch (driver_id)
{
/* Known to pass test_stress_suballocation which hits this path. */
case VK_DRIVER_ID_MESA_RADV:
case VK_DRIVER_ID_NVIDIA_PROPRIETARY:
case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
return true;
default:
return false;
}
}
HRESULT vkd3d_allocate_memory(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
const struct vkd3d_allocate_memory_info *info, struct vkd3d_memory_allocation *allocation)
{
bool implementation_implicitly_clears;
bool needs_clear;
bool suballocate;
HRESULT hr;
suballocate = !info->pNext && !info->host_ptr &&
info->memory_requirements.size < VKD3D_VA_BLOCK_SIZE &&
!(info->heap_flags & (D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH)) &&
!(info->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH);
if (suballocate)
if (!info->pNext && !info->host_ptr && info->memory_requirements.size < VKD3D_VA_BLOCK_SIZE &&
!(info->heap_flags & (D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH)))
hr = vkd3d_suballocate_memory(device, allocator, info, allocation);
else
hr = vkd3d_memory_allocation_init(allocation, device, allocator, info);
@ -1438,51 +1252,16 @@ HRESULT vkd3d_allocate_memory(struct d3d12_device *device, struct vkd3d_memory_a
if (FAILED(hr))
return hr;
/* If we're allocating Vulkan memory directly,
* we can rely on the driver doing this for us.
* This is relying on implementation details.
* RADV definitely does this, and it seems like NV also does it.
* TODO: an extension for this would be nice. */
implementation_implicitly_clears =
vkd3d_driver_implicitly_clears(device->device_info.driver_properties.driverID) &&
!suballocate;
needs_clear = !implementation_implicitly_clears &&
!(info->heap_flags & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) &&
!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR);
if (needs_clear)
if (!(info->heap_flags & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED))
vkd3d_memory_allocator_clear_allocation(allocator, device, allocation);
return hr;
}
static bool vkd3d_heap_allocation_accept_deferred_resource_placements(struct d3d12_device *device,
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags)
{
uint32_t type_mask;
/* Normally, if a memory allocation fails, we consider it an error, but there are some exceptions
* where we can defer memory allocation, like CreateHeap where fallback system memory type is not available.
* In this case, we will defer memory allocation until CreatePlacedResource() time, and we should
* accept that a memory allocation failed. */
/* Only accept deferrals for DEFAULT / CPU_NOT_AVAILABLE heaps.
* If we're going for host memory, we have nowhere left to fall back to either way. */
if (is_cpu_accessible_heap(heap_properties))
return false;
type_mask = vkd3d_select_memory_types(device, heap_properties, heap_flags);
return device->memory_properties.memoryHeapCount > 1 &&
!vkd3d_memory_info_type_mask_covers_multiple_memory_heaps(&device->memory_properties, type_mask);
}
HRESULT vkd3d_allocate_heap_memory(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
const struct vkd3d_allocate_heap_memory_info *info, struct vkd3d_memory_allocation *allocation)
{
struct vkd3d_allocate_heap_memory_info heap_info;
struct vkd3d_allocate_memory_info alloc_info;
HRESULT hr;
memset(&alloc_info, 0, sizeof(alloc_info));
alloc_info.memory_requirements.memoryTypeBits = ~0u;
@ -1492,52 +1271,18 @@ HRESULT vkd3d_allocate_heap_memory(struct d3d12_device *device, struct vkd3d_mem
alloc_info.heap_flags = info->heap_desc.Flags;
alloc_info.host_ptr = info->host_ptr;
alloc_info.flags |= info->extra_allocation_flags;
if (!(info->heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
if (is_cpu_accessible_heap(&info->heap_desc.Properties))
{
if (info->heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS)
{
/* If the heap was only designed to handle images, the heap is useless,
* and we can force everything to go through committed path. */
memset(allocation, 0, sizeof(*allocation));
return S_OK;
}
else
{
/* CPU visible textures are never placed on a heap directly,
* since LINEAR images have alignment / size requirements
* that are vastly different from OPTIMAL ones.
* We can place buffers however. */
heap_info = *info;
info = &heap_info;
heap_info.heap_desc.Flags |= D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
}
}
hr = vkd3d_allocate_memory(device, allocator, &alloc_info, allocation);
if (hr == E_OUTOFMEMORY && vkd3d_heap_allocation_accept_deferred_resource_placements(device,
&info->heap_desc.Properties, info->heap_desc.Flags))
{
/* It's okay and sometimes expected that we fail here.
* Defer allocation until CreatePlacedResource(). */
memset(allocation, 0, sizeof(*allocation));
hr = S_OK;
}
return hr;
return vkd3d_allocate_memory(device, allocator, &alloc_info, allocation);
}
HRESULT vkd3d_allocate_buffer_memory(struct d3d12_device *device, VkBuffer vk_buffer,
VkMemoryPropertyFlags type_flags,
struct vkd3d_device_memory_allocation *allocation)
VkMemoryPropertyFlags type_flags, VkDeviceMemory *vk_memory)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkMemoryRequirements memory_requirements;
VkMemoryAllocateFlagsInfo flags_info;
VkBindBufferMemoryInfo bind_info;
VkResult vr;
HRESULT hr;
@ -1551,44 +1296,30 @@ HRESULT vkd3d_allocate_buffer_memory(struct d3d12_device *device, VkBuffer vk_bu
VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, vk_buffer, &memory_requirements));
if (FAILED(hr = vkd3d_allocate_device_memory(device, memory_requirements.size,
type_flags, memory_requirements.memoryTypeBits, &flags_info, allocation)))
type_flags, memory_requirements.memoryTypeBits, &flags_info, vk_memory, NULL)))
return hr;
bind_info.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO;
bind_info.pNext = NULL;
bind_info.buffer = vk_buffer;
bind_info.memory = allocation->vk_memory;
bind_info.memoryOffset = 0;
if (FAILED(vr = VK_CALL(vkBindBufferMemory2KHR(device->vk_device, 1, &bind_info))))
if (FAILED(vr = VK_CALL(vkBindBufferMemory(device->vk_device, vk_buffer, *vk_memory, 0))))
return hresult_from_vk_result(vr);
return hr;
}
HRESULT vkd3d_allocate_image_memory(struct d3d12_device *device, VkImage vk_image,
VkMemoryPropertyFlags type_flags,
struct vkd3d_device_memory_allocation *allocation)
VkMemoryPropertyFlags type_flags, VkDeviceMemory *vk_memory)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkMemoryRequirements memory_requirements;
VkBindImageMemoryInfo bind_info;
VkResult vr;
HRESULT hr;
VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &memory_requirements));
if (FAILED(hr = vkd3d_allocate_device_memory(device, memory_requirements.size,
type_flags, memory_requirements.memoryTypeBits, NULL, allocation)))
type_flags, memory_requirements.memoryTypeBits, NULL, vk_memory, NULL)))
return hr;
bind_info.sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO;
bind_info.pNext = NULL;
bind_info.image = vk_image;
bind_info.memory = allocation->vk_memory;
bind_info.memoryOffset = 0;
if (FAILED(vr = VK_CALL(vkBindImageMemory2KHR(device->vk_device, 1, &bind_info))))
if (FAILED(vr = VK_CALL(vkBindImageMemory(device->vk_device, vk_image, *vk_memory, 0))))
return hresult_from_vk_result(vr);
return hr;

View File

@ -19,7 +19,6 @@ vkd3d_shaders =[
'shaders/fs_copy_image_float.frag',
'shaders/fs_copy_image_uint.frag',
'shaders/fs_copy_image_stencil.frag',
'shaders/gs_fullscreen.geom',
'shaders/vs_fullscreen.vert',
@ -27,20 +26,17 @@ vkd3d_shaders =[
'shaders/vs_swapchain_fullscreen.vert',
'shaders/fs_swapchain_fullscreen.frag',
'shaders/cs_execute_indirect_patch.comp',
'shaders/cs_execute_indirect_patch_debug_ring.comp',
]
vkd3d_src = [
'bundle.c',
'cache.c',
'command.c',
'command_list_vkd3d_ext.c',
'device.c',
'device_vkd3d_ext.c',
'heap.c',
'memory.c',
'meta.c',
'platform.c',
'resource.c',
'state.c',
'utils.c',
@ -63,14 +59,6 @@ if enable_descriptor_qa
vkd3d_src += ['descriptor_debug.c']
endif
if enable_breadcrumbs
vkd3d_src += ['breadcrumbs.c']
endif
if vkd3d_platform == 'windows'
vkd3d_src += ['shared_metadata.c']
endif
if not enable_d3d12
vkd3d_lib = shared_library('vkd3d-proton', vkd3d_src, glsl_generator.process(vkd3d_shaders), vkd3d_build, vkd3d_version,
dependencies : [ vkd3d_common_dep, vkd3d_shader_dep ] + vkd3d_extra_libs,

View File

@ -137,8 +137,78 @@ static VkResult vkd3d_meta_create_compute_pipeline(struct d3d12_device *device,
return vr;
}
static VkResult vkd3d_meta_create_render_pass(struct d3d12_device *device, VkSampleCountFlagBits samples,
const struct vkd3d_format *format, VkRenderPass *vk_render_pass)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkAttachmentDescription2KHR attachment_desc;
VkAttachmentReference2KHR attachment_ref;
VkSubpassDescription2KHR subpass_desc;
VkRenderPassCreateInfo2KHR pass_info;
bool has_depth_target;
VkImageLayout layout;
VkResult vr;
assert(format);
has_depth_target = (format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0;
layout = has_depth_target
? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
: VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachment_desc.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR;
attachment_desc.pNext = NULL;
attachment_desc.flags = 0;
attachment_desc.format = format->vk_format;
attachment_desc.samples = samples;
attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
attachment_desc.initialLayout = layout;
attachment_desc.finalLayout = layout;
attachment_ref.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
attachment_ref.pNext = NULL;
attachment_ref.attachment = 0;
attachment_ref.layout = layout;
attachment_ref.aspectMask = 0; /* input attachment aspect mask */
subpass_desc.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR;
subpass_desc.pNext = NULL;
subpass_desc.flags = 0;
subpass_desc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass_desc.viewMask = 0;
subpass_desc.inputAttachmentCount = 0;
subpass_desc.pInputAttachments = NULL;
subpass_desc.colorAttachmentCount = has_depth_target ? 0 : 1;
subpass_desc.pColorAttachments = has_depth_target ? NULL : &attachment_ref;
subpass_desc.pResolveAttachments = NULL;
subpass_desc.pDepthStencilAttachment = has_depth_target ? &attachment_ref : NULL;
subpass_desc.preserveAttachmentCount = 0;
subpass_desc.pPreserveAttachments = NULL;
pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR;
pass_info.pNext = NULL;
pass_info.flags = 0;
pass_info.attachmentCount = 1;
pass_info.pAttachments = &attachment_desc;
pass_info.subpassCount = 1;
pass_info.pSubpasses = &subpass_desc;
pass_info.dependencyCount = 0;
pass_info.pDependencies = NULL;
pass_info.correlatedViewMaskCount = 0;
pass_info.pCorrelatedViewMasks = NULL;
if ((vr = VK_CALL(vkCreateRenderPass2KHR(device->vk_device, &pass_info, NULL, vk_render_pass))) < 0)
ERR("Failed to create render pass, vr %d.\n", vr);
return vr;
}
static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_ops,
VkPipelineLayout layout, VkFormat color_format, VkFormat ds_format, VkImageAspectFlags vk_aspect_mask,
VkPipelineLayout layout, VkRenderPass render_pass,
VkShaderModule vs_module, VkShaderModule fs_module,
VkSampleCountFlagBits samples, const VkPipelineDepthStencilStateCreateInfo *ds_state,
const VkPipelineColorBlendStateCreateInfo *cb_state, const VkSpecializationInfo *spec_info,
@ -148,7 +218,6 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_
VkPipelineShaderStageCreateInfo shader_stages[3];
VkPipelineInputAssemblyStateCreateInfo ia_state;
VkPipelineRasterizationStateCreateInfo rs_state;
VkPipelineRenderingCreateInfoKHR rendering_info;
VkPipelineVertexInputStateCreateInfo vi_state;
VkPipelineMultisampleStateCreateInfo ms_state;
VkPipelineViewportStateCreateInfo vp_state;
@ -215,16 +284,8 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_
dyn_state.dynamicStateCount = ARRAY_SIZE(dynamic_states);
dyn_state.pDynamicStates = dynamic_states;
rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR;
rendering_info.pNext = NULL;
rendering_info.viewMask = 0;
rendering_info.colorAttachmentCount = color_format && (vk_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) ? 1 : 0;
rendering_info.pColorAttachmentFormats = color_format ? &color_format : NULL;
rendering_info.depthAttachmentFormat = (vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) ? ds_format : VK_FORMAT_UNDEFINED;
rendering_info.stencilAttachmentFormat = (vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) ? ds_format : VK_FORMAT_UNDEFINED;
pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
pipeline_info.pNext = &rendering_info;
pipeline_info.pNext = NULL;
pipeline_info.flags = 0;
pipeline_info.stageCount = 0;
pipeline_info.pStages = shader_stages;
@ -238,7 +299,7 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_
pipeline_info.pColorBlendState = cb_state;
pipeline_info.pDynamicState = &dyn_state;
pipeline_info.layout = layout;
pipeline_info.renderPass = VK_NULL_HANDLE;
pipeline_info.renderPass = render_pass;
pipeline_info.subpass = 0;
pipeline_info.basePipelineHandle = VK_NULL_HANDLE;
pipeline_info.basePipelineIndex = -1;
@ -550,16 +611,6 @@ HRESULT vkd3d_copy_image_ops_init(struct vkd3d_copy_image_ops *meta_copy_image_o
goto fail;
}
if (device->vk_info.EXT_shader_stencil_export)
{
if ((vr = vkd3d_meta_create_shader_module(device, SPIRV_CODE(fs_copy_image_stencil),
&meta_copy_image_ops->vk_fs_stencil_module)) < 0)
{
ERR("Failed to create shader modules, vr %d.\n", vr);
goto fail;
}
}
return S_OK;
fail:
@ -577,6 +628,7 @@ void vkd3d_copy_image_ops_cleanup(struct vkd3d_copy_image_ops *meta_copy_image_o
{
struct vkd3d_copy_image_pipeline *pipeline = &meta_copy_image_ops->pipelines[i];
VK_CALL(vkDestroyRenderPass(device->vk_device, pipeline->vk_render_pass, NULL));
VK_CALL(vkDestroyPipeline(device->vk_device, pipeline->vk_pipeline, NULL));
}
@ -584,21 +636,95 @@ void vkd3d_copy_image_ops_cleanup(struct vkd3d_copy_image_ops *meta_copy_image_o
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_copy_image_ops->vk_pipeline_layout, NULL));
VK_CALL(vkDestroyShaderModule(device->vk_device, meta_copy_image_ops->vk_fs_float_module, NULL));
VK_CALL(vkDestroyShaderModule(device->vk_device, meta_copy_image_ops->vk_fs_uint_module, NULL));
VK_CALL(vkDestroyShaderModule(device->vk_device, meta_copy_image_ops->vk_fs_stencil_module, NULL));
pthread_mutex_destroy(&meta_copy_image_ops->mutex);
vkd3d_free(meta_copy_image_ops->pipelines);
}
static VkResult vkd3d_meta_create_swapchain_render_pass(struct d3d12_device *device,
const struct vkd3d_swapchain_pipeline_key *key, VkRenderPass *render_pass)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkRenderPassCreateInfo2KHR render_pass_info;
VkAttachmentDescription2KHR attachment_desc;
VkAttachmentReference2KHR attachment_ref;
VkSubpassDescription2KHR subpass_desc;
VkSubpassDependency2KHR subpass_dep;
attachment_desc.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR;
attachment_desc.pNext = NULL;
attachment_desc.loadOp = key->load_op;
attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
attachment_desc.format = key->format;
attachment_desc.samples = VK_SAMPLE_COUNT_1_BIT;
attachment_desc.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
attachment_desc.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
attachment_desc.flags = 0;
attachment_ref.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
attachment_ref.pNext = NULL;
attachment_ref.attachment = 0;
attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
attachment_ref.aspectMask = 0; /* input attachment aspect mask */
subpass_desc.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR;
subpass_desc.pNext = NULL;
subpass_desc.flags = 0;
subpass_desc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass_desc.viewMask = 0;
subpass_desc.inputAttachmentCount = 0;
subpass_desc.pInputAttachments = NULL;
subpass_desc.colorAttachmentCount = 1;
subpass_desc.pColorAttachments = &attachment_ref;
subpass_desc.pResolveAttachments = NULL;
subpass_desc.pDepthStencilAttachment = NULL;
subpass_desc.preserveAttachmentCount = 0;
subpass_desc.pPreserveAttachments = NULL;
subpass_dep.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR;
subpass_dep.pNext = NULL;
subpass_dep.srcSubpass = VK_SUBPASS_EXTERNAL;
subpass_dep.dstSubpass = 0;
subpass_dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
subpass_dep.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
subpass_dep.srcAccessMask = 0;
subpass_dep.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
subpass_dep.dependencyFlags = 0;
subpass_dep.viewOffset = 0;
render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR;
render_pass_info.pNext = NULL;
render_pass_info.flags = 0;
render_pass_info.attachmentCount = 1;
render_pass_info.pAttachments = &attachment_desc;
render_pass_info.subpassCount = 1;
render_pass_info.pSubpasses = &subpass_desc;
render_pass_info.dependencyCount = 1;
render_pass_info.pDependencies = &subpass_dep;
render_pass_info.correlatedViewMaskCount = 0;
render_pass_info.pCorrelatedViewMasks = NULL;
return VK_CALL(vkCreateRenderPass2KHR(device->vk_device, &render_pass_info, NULL, render_pass));
}
static HRESULT vkd3d_meta_create_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
const struct vkd3d_swapchain_pipeline_key *key, struct vkd3d_swapchain_pipeline *pipeline)
{
const struct vkd3d_vk_device_procs *vk_procs = &meta_ops->device->vk_procs;
struct vkd3d_swapchain_ops *meta_swapchain_ops = &meta_ops->swapchain;
VkPipelineColorBlendAttachmentState blend_att;
VkPipelineColorBlendStateCreateInfo cb_state;
VkResult vr;
if ((vr = vkd3d_meta_create_swapchain_render_pass(meta_ops->device, key, &pipeline->vk_render_pass)))
{
ERR("Failed to create render pass, vr %d.\n", vr);
return hresult_from_vk_result(vr);
}
memset(&cb_state, 0, sizeof(cb_state));
memset(&blend_att, 0, sizeof(blend_att));
cb_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
@ -611,11 +737,14 @@ static HRESULT vkd3d_meta_create_swapchain_pipeline(struct vkd3d_meta_ops *meta_
VK_COLOR_COMPONENT_A_BIT;
if ((vr = vkd3d_meta_create_graphics_pipeline(meta_ops,
meta_swapchain_ops->vk_pipeline_layouts[key->filter], key->format, VK_FORMAT_UNDEFINED, VK_IMAGE_ASPECT_COLOR_BIT,
meta_swapchain_ops->vk_pipeline_layouts[key->filter], pipeline->vk_render_pass,
meta_swapchain_ops->vk_vs_module, meta_swapchain_ops->vk_fs_module, 1,
NULL, &cb_state,
NULL, &pipeline->vk_pipeline)) < 0)
{
VK_CALL(vkDestroyRenderPass(meta_ops->device->vk_device, pipeline->vk_render_pass, NULL));
return hresult_from_vk_result(vr);
}
pipeline->key = *key;
return S_OK;
@ -624,6 +753,7 @@ static HRESULT vkd3d_meta_create_swapchain_pipeline(struct vkd3d_meta_ops *meta_
static HRESULT vkd3d_meta_create_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops,
const struct vkd3d_copy_image_pipeline_key *key, struct vkd3d_copy_image_pipeline *pipeline)
{
const struct vkd3d_vk_device_procs *vk_procs = &meta_ops->device->vk_procs;
struct vkd3d_copy_image_ops *meta_copy_image_ops = &meta_ops->copy_image;
VkPipelineColorBlendAttachmentState blend_attachment;
VkPipelineDepthStencilStateCreateInfo ds_state;
@ -668,30 +798,13 @@ static HRESULT vkd3d_meta_create_copy_image_pipeline(struct vkd3d_meta_ops *meta
ds_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
ds_state.pNext = NULL;
ds_state.flags = 0;
ds_state.depthTestEnable = (key->dst_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_TRUE : VK_FALSE;
ds_state.depthWriteEnable = ds_state.depthTestEnable;
ds_state.depthTestEnable = VK_TRUE;
ds_state.depthWriteEnable = VK_TRUE;
ds_state.depthCompareOp = VK_COMPARE_OP_ALWAYS;
ds_state.depthBoundsTestEnable = VK_FALSE;
if (key->dst_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
{
ds_state.stencilTestEnable = VK_TRUE;
ds_state.front.reference = 0;
ds_state.front.writeMask = 0xff;
ds_state.front.compareMask = 0xff;
ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
ds_state.front.failOp = VK_STENCIL_OP_KEEP;
ds_state.front.depthFailOp = VK_STENCIL_OP_KEEP;
ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
ds_state.back = ds_state.front;
}
else
{
ds_state.stencilTestEnable = VK_FALSE;
memset(&ds_state.front, 0, sizeof(ds_state.front));
memset(&ds_state.back, 0, sizeof(ds_state.back));
}
ds_state.stencilTestEnable = VK_FALSE;
memset(&ds_state.front, 0, sizeof(ds_state.front));
memset(&ds_state.back, 0, sizeof(ds_state.back));
ds_state.minDepthBounds = 0.0f;
ds_state.maxDepthBounds = 1.0f;
@ -710,32 +823,25 @@ static HRESULT vkd3d_meta_create_copy_image_pipeline(struct vkd3d_meta_ops *meta
cb_state.pAttachments = &blend_attachment;
memset(&cb_state.blendConstants, 0, sizeof(cb_state.blendConstants));
if ((vr = vkd3d_meta_create_render_pass(meta_ops->device,
key->sample_count, key->format, &pipeline->vk_render_pass)) < 0)
return hresult_from_vk_result(vr);
/* Special path when copying stencil -> color. */
if (key->format->vk_format == VK_FORMAT_R8_UINT)
{
/* Special path when copying stencil -> color. */
vk_module = meta_copy_image_ops->vk_fs_uint_module;
}
else if (key->dst_aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
{
/* FragStencilRef path. */
vk_module = meta_copy_image_ops->vk_fs_stencil_module;
}
else
{
/* Depth or float color path. */
vk_module = meta_copy_image_ops->vk_fs_float_module;
}
if ((vr = vkd3d_meta_create_graphics_pipeline(meta_ops,
meta_copy_image_ops->vk_pipeline_layout,
has_depth_target ? VK_FORMAT_UNDEFINED : key->format->vk_format,
has_depth_target ? key->format->vk_format : VK_FORMAT_UNDEFINED,
key->format->vk_aspect_mask,
meta_copy_image_ops->vk_pipeline_layout, pipeline->vk_render_pass,
VK_NULL_HANDLE, vk_module, key->sample_count,
has_depth_target ? &ds_state : NULL, has_depth_target ? NULL : &cb_state,
&spec_info, &pipeline->vk_pipeline)) < 0)
{
VK_CALL(vkDestroyRenderPass(meta_ops->device->vk_device, pipeline->vk_render_pass, NULL));
return hresult_from_vk_result(vr);
}
pipeline->key = *key;
return S_OK;
@ -765,6 +871,7 @@ HRESULT vkd3d_meta_get_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops,
if (!memcmp(key, &pipeline->key, sizeof(*key)))
{
info->vk_render_pass = pipeline->vk_render_pass;
info->vk_pipeline = pipeline->vk_pipeline;
pthread_mutex_unlock(&meta_copy_image_ops->mutex);
return S_OK;
@ -786,6 +893,7 @@ HRESULT vkd3d_meta_get_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops,
return hr;
}
info->vk_render_pass = pipeline->vk_render_pass;
info->vk_pipeline = pipeline->vk_pipeline;
pthread_mutex_unlock(&meta_copy_image_ops->mutex);
@ -946,6 +1054,7 @@ void vkd3d_swapchain_ops_cleanup(struct vkd3d_swapchain_ops *meta_swapchain_ops,
{
struct vkd3d_swapchain_pipeline *pipeline = &meta_swapchain_ops->pipelines[i];
VK_CALL(vkDestroyRenderPass(device->vk_device, pipeline->vk_render_pass, NULL));
VK_CALL(vkDestroyPipeline(device->vk_device, pipeline->vk_pipeline, NULL));
}
@ -986,6 +1095,7 @@ HRESULT vkd3d_meta_get_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
if (!memcmp(key, &pipeline->key, sizeof(*key)))
{
info->vk_render_pass = pipeline->vk_render_pass;
info->vk_pipeline = pipeline->vk_pipeline;
pthread_mutex_unlock(&meta_swapchain_ops->mutex);
return S_OK;
@ -1007,6 +1117,7 @@ HRESULT vkd3d_meta_get_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
return hr;
}
info->vk_render_pass = pipeline->vk_render_pass;
info->vk_pipeline = pipeline->vk_pipeline;
pthread_mutex_unlock(&meta_swapchain_ops->mutex);
@ -1217,144 +1328,6 @@ void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
info->data_size = predicate_ops->data_sizes[command_type];
}
HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
struct d3d12_device *device)
{
VkPushConstantRange push_constant_range;
VkResult vr;
int rc;
if ((rc = pthread_mutex_init(&meta_indirect_ops->mutex, NULL)))
return hresult_from_errno(rc);
push_constant_range.offset = 0;
push_constant_range.size = sizeof(struct vkd3d_execute_indirect_args);
push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
&push_constant_range, &meta_indirect_ops->vk_pipeline_layout)) < 0)
{
pthread_mutex_destroy(&meta_indirect_ops->mutex);
return hresult_from_vk_result(vr);
}
meta_indirect_ops->pipelines_count = 0;
meta_indirect_ops->pipelines_size = 0;
meta_indirect_ops->pipelines = NULL;
return S_OK;
}
struct vkd3d_meta_execute_indirect_spec_constant_data
{
struct vkd3d_shader_debug_ring_spec_constants constants;
uint32_t workgroup_size_x;
};
HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops,
uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info)
{
struct vkd3d_meta_execute_indirect_spec_constant_data execute_indirect_spec_constants;
VkSpecializationMapEntry map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES + 1];
struct vkd3d_execute_indirect_ops *meta_indirect_ops = &meta_ops->execute_indirect;
struct vkd3d_shader_debug_ring_spec_info debug_ring_info;
VkSpecializationInfo spec;
HRESULT hr = S_OK;
VkResult vr;
bool debug;
size_t i;
int rc;
if ((rc = pthread_mutex_lock(&meta_indirect_ops->mutex)))
{
ERR("Failed to lock mutex, error %d.\n", rc);
return hresult_from_errno(rc);
}
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
{
if (meta_indirect_ops->pipelines[i].workgroup_size_x == patch_command_count)
{
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
info->vk_pipeline = meta_indirect_ops->pipelines[i].vk_pipeline;
goto out;
}
}
debug = meta_ops->device->debug_ring.active;
/* If we have debug ring, we can dump indirect command buffer data to the ring as well.
* Vital for debugging broken execute indirect data with templates. */
if (debug)
{
vkd3d_shader_debug_ring_init_spec_constant(meta_ops->device, &debug_ring_info,
0 /* Reserve this hash for internal debug streams. */);
memset(&execute_indirect_spec_constants, 0, sizeof(execute_indirect_spec_constants));
execute_indirect_spec_constants.constants = debug_ring_info.constants;
execute_indirect_spec_constants.workgroup_size_x = patch_command_count;
memcpy(map_entry, debug_ring_info.map_entries, sizeof(debug_ring_info.map_entries));
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].constantID = 4;
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].offset =
offsetof(struct vkd3d_meta_execute_indirect_spec_constant_data, workgroup_size_x);
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].size = sizeof(patch_command_count);
spec.pMapEntries = map_entry;
spec.pData = &execute_indirect_spec_constants;
spec.mapEntryCount = ARRAY_SIZE(map_entry);
spec.dataSize = sizeof(execute_indirect_spec_constants);
}
else
{
map_entry[0].constantID = 0;
map_entry[0].offset = 0;
map_entry[0].size = sizeof(patch_command_count);
spec.pMapEntries = map_entry;
spec.pData = &patch_command_count;
spec.mapEntryCount = 1;
spec.dataSize = sizeof(patch_command_count);
}
vkd3d_array_reserve((void**)&meta_indirect_ops->pipelines, &meta_indirect_ops->pipelines_size,
meta_indirect_ops->pipelines_count + 1, sizeof(*meta_indirect_ops->pipelines));
meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].workgroup_size_x = patch_command_count;
vr = vkd3d_meta_create_compute_pipeline(meta_ops->device,
debug ? sizeof(cs_execute_indirect_patch_debug_ring) : sizeof(cs_execute_indirect_patch),
debug ? cs_execute_indirect_patch_debug_ring : cs_execute_indirect_patch,
meta_indirect_ops->vk_pipeline_layout, &spec,
&meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline);
if (vr)
{
hr = hresult_from_vk_result(vr);
goto out;
}
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
info->vk_pipeline = meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline;
meta_indirect_ops->pipelines_count++;
out:
pthread_mutex_unlock(&meta_indirect_ops->mutex);
return hr;
}
void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
size_t i;
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
VK_CALL(vkDestroyPipeline(device->vk_device, meta_indirect_ops->pipelines[i].vk_pipeline, NULL));
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_indirect_ops->vk_pipeline_layout, NULL));
pthread_mutex_destroy(&meta_indirect_ops->mutex);
}
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
{
HRESULT hr;
@ -1380,13 +1353,8 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device
if (FAILED(hr = vkd3d_predicate_ops_init(&meta_ops->predicate, device)))
goto fail_predicate_ops;
if (FAILED(hr = vkd3d_execute_indirect_ops_init(&meta_ops->execute_indirect, device)))
goto fail_execute_indirect_ops;
return S_OK;
fail_execute_indirect_ops:
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
fail_predicate_ops:
vkd3d_query_ops_cleanup(&meta_ops->query, device);
fail_query_ops:
@ -1403,7 +1371,6 @@ fail_common:
HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
{
vkd3d_execute_indirect_ops_cleanup(&meta_ops->execute_indirect, device);
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
vkd3d_query_ops_cleanup(&meta_ops->query, device);
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);

View File

@ -18,9 +18,6 @@
#include "vkd3d_platform.h"
#include <assert.h>
#include <stdio.h>
#if defined(__linux__)
# include <dlfcn.h>
@ -156,43 +153,3 @@ bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX])
}
#endif
#if defined(_WIN32)
bool vkd3d_get_env_var(const char *name, char *value, size_t value_size)
{
DWORD len;
assert(value);
assert(value_size > 0);
len = GetEnvironmentVariableA(name, value, value_size);
if (len > 0 && len <= value_size)
{
return true;
}
value[0] = '\0';
return false;
}
#else
bool vkd3d_get_env_var(const char *name, char *value, size_t value_size)
{
const char *env_value;
assert(value);
assert(value_size > 0);
if ((env_value = getenv(name)))
{
snprintf(value, value_size, "%s", env_value);
return true;
}
value[0] = '\0';
return false;
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -42,7 +42,6 @@ static vkd3d_shader_hash_t renderdoc_capture_shader_hash;
static uint32_t *renderdoc_capture_counts;
static size_t renderdoc_capture_counts_count;
static bool vkd3d_renderdoc_is_active;
static bool vkd3d_renderdoc_global_capture;
static void vkd3d_renderdoc_init_capture_count_list(const char *env)
{
@ -50,13 +49,6 @@ static void vkd3d_renderdoc_init_capture_count_list(const char *env)
uint32_t count;
char *endp;
if (strcmp(env, "-1") == 0)
{
INFO("Doing one big capture of the entire lifetime of a device.\n");
vkd3d_renderdoc_global_capture = true;
return;
}
while (*env != '\0')
{
errno = 0;
@ -100,9 +92,9 @@ static bool vkd3d_renderdoc_enable_submit_counter(uint32_t counter)
static void vkd3d_renderdoc_init_once(void)
{
char counts[VKD3D_PATH_MAX];
pRENDERDOC_GetAPI get_api;
char env[VKD3D_PATH_MAX];
const char *counts;
const char *env;
#ifdef _WIN32
HMODULE renderdoc;
@ -112,19 +104,19 @@ static void vkd3d_renderdoc_init_once(void)
void *fn_ptr;
#endif
vkd3d_get_env_var("VKD3D_AUTO_CAPTURE_SHADER", env, sizeof(env));
vkd3d_get_env_var("VKD3D_AUTO_CAPTURE_COUNTS", counts, sizeof(counts));
env = getenv("VKD3D_AUTO_CAPTURE_SHADER");
counts = getenv("VKD3D_AUTO_CAPTURE_COUNTS");
if (strlen(env) == 0 && strlen(counts) == 0)
if (!env && !counts)
{
WARN("VKD3D_AUTO_CAPTURE_SHADER or VKD3D_AUTO_CAPTURE_COUNTS is not set, RenderDoc auto capture will not be enabled.\n");
return;
}
if (strlen(counts) == 0)
if (!counts)
WARN("VKD3D_AUTO_CAPTURE_COUNTS is not set, will assume that only the first submission is captured.\n");
if (strlen(env) > 0)
if (env)
renderdoc_capture_shader_hash = strtoull(env, NULL, 16);
if (renderdoc_capture_shader_hash)
@ -132,7 +124,7 @@ static void vkd3d_renderdoc_init_once(void)
else
INFO("Enabling RenderDoc capture for all shaders.\n");
if (strlen(counts) > 0)
if (counts)
vkd3d_renderdoc_init_capture_count_list(counts);
else
{
@ -188,11 +180,6 @@ bool vkd3d_renderdoc_active(void)
return vkd3d_renderdoc_is_active;
}
bool vkd3d_renderdoc_global_capture_enabled(void)
{
return vkd3d_renderdoc_global_capture;
}
bool vkd3d_renderdoc_should_capture_shader_hash(vkd3d_shader_hash_t hash)
{
return (renderdoc_capture_shader_hash == hash) || (renderdoc_capture_shader_hash == 0);
@ -203,12 +190,9 @@ bool vkd3d_renderdoc_begin_capture(void *instance)
static uint32_t overall_counter;
uint32_t counter;
if (!vkd3d_renderdoc_global_capture)
{
counter = vkd3d_atomic_uint32_increment(&overall_counter, vkd3d_memory_order_relaxed) - 1;
if (!vkd3d_renderdoc_enable_submit_counter(counter))
return false;
}
counter = vkd3d_atomic_uint32_increment(&overall_counter, vkd3d_memory_order_relaxed) - 1;
if (!vkd3d_renderdoc_enable_submit_counter(counter))
return false;
if (renderdoc_api)
renderdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), NULL);
@ -231,14 +215,11 @@ void vkd3d_renderdoc_command_list_check_capture(struct d3d12_command_list *list,
{
unsigned int i;
if (vkd3d_renderdoc_global_capture_enabled())
return;
if (vkd3d_renderdoc_active() && state)
{
if (state->vk_bind_point == VK_PIPELINE_BIND_POINT_COMPUTE)
{
if (vkd3d_renderdoc_should_capture_shader_hash(state->compute.code.meta.hash))
if (vkd3d_renderdoc_should_capture_shader_hash(state->compute.meta.hash))
{
WARN("Triggering RenderDoc capture for this command list.\n");
list->debug_capture = true;
@ -248,7 +229,7 @@ void vkd3d_renderdoc_command_list_check_capture(struct d3d12_command_list *list,
{
for (i = 0; i < state->graphics.stage_count; i++)
{
if (vkd3d_renderdoc_should_capture_shader_hash(state->graphics.code[i].meta.hash))
if (vkd3d_renderdoc_should_capture_shader_hash(state->graphics.stage_meta[i].hash))
{
WARN("Triggering RenderDoc capture for this command list.\n");
list->debug_capture = true;
@ -265,9 +246,6 @@ bool vkd3d_renderdoc_command_queue_begin_capture(struct d3d12_command_queue *com
VkDebugUtilsLabelEXT capture_label;
bool debug_capture;
if (vkd3d_renderdoc_global_capture_enabled())
return false;
debug_capture = vkd3d_renderdoc_begin_capture(command_queue->device->vkd3d_instance->vk_instance);
if (debug_capture && !vkd3d_renderdoc_loaded_api())
{
@ -295,9 +273,6 @@ void vkd3d_renderdoc_command_queue_end_capture(struct d3d12_command_queue *comma
const struct vkd3d_vk_device_procs *vk_procs = &command_queue->device->vk_procs;
VkDebugUtilsLabelEXT capture_label;
if (vkd3d_renderdoc_global_capture_enabled())
return;
if (!vkd3d_renderdoc_loaded_api())
{
/* Magic fallback which lets us bridge the Wine barrier over to Linux RenderDoc. */

File diff suppressed because it is too large Load Diff

View File

@ -1,67 +0,0 @@
#version 450
#extension GL_EXT_buffer_reference : require
#extension GL_EXT_buffer_reference_uvec2 : require
layout(local_size_x_id = 0) in;
struct Command
{
uint type;
uint src_offset;
uint dst_offset;
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands
{
Command commands[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer {
uint values[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer {
uint values[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount {
uint count;
};
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite {
uint count;
};
layout(push_constant) uniform Registers
{
Commands commands_va;
SrcBuffer src_buffer_va;
DstBuffer dst_buffer_va;
uvec2 indirect_count_va;
IndirectCountWrite dst_indirect_count_va;
uint src_stride;
uint dst_stride;
};
void main()
{
Command cmd = commands_va.commands[gl_LocalInvocationIndex];
uint draw_id = gl_WorkGroupID.x;
uint max_draws = gl_NumWorkGroups.x;
if (any(notEqual(indirect_count_va, uvec2(0))))
{
max_draws = min(max_draws, IndirectCount(indirect_count_va).count);
if (gl_WorkGroupID.x == 0u)
dst_indirect_count_va.count = max_draws;
}
if (draw_id < max_draws)
{
uint src_offset = src_stride * draw_id + cmd.src_offset;
uint dst_offset = dst_stride * draw_id + cmd.dst_offset;
uint src_value = src_buffer_va.values[src_offset];
dst_buffer_va.values[dst_offset] = src_value;
}
}

View File

@ -1,83 +0,0 @@
#version 450
#extension GL_EXT_buffer_reference : require
#extension GL_EXT_buffer_reference_uvec2 : require
#extension GL_GOOGLE_include_directive : require
#include "../../../include/shader-debug/debug_channel.h"
layout(local_size_x_id = 4) in;
struct Command
{
uint type;
uint src_offset;
uint dst_offset;
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands
{
Command commands[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer {
uint values[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer {
uint values[];
};
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount {
uint count;
};
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite {
uint count;
};
layout(push_constant) uniform Registers
{
Commands commands_va;
SrcBuffer src_buffer_va;
DstBuffer dst_buffer_va;
uvec2 indirect_count_va;
IndirectCountWrite dst_indirect_count_va;
uint src_stride;
uint dst_stride;
// Debug metadata here
uint debug_tag;
uint implicit_instance;
};
void main()
{
if (debug_tag != 0u)
DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3(debug_tag, gl_WorkGroupID.x, gl_LocalInvocationIndex), implicit_instance);
Command cmd = commands_va.commands[gl_LocalInvocationIndex];
uint draw_id = gl_WorkGroupID.x;
uint max_draws = gl_NumWorkGroups.x;
if (any(notEqual(indirect_count_va, uvec2(0))))
{
max_draws = min(max_draws, IndirectCount(indirect_count_va).count);
if (gl_WorkGroupID.x == 0u)
dst_indirect_count_va.count = max_draws;
}
if (debug_tag != 0u && gl_WorkGroupID.x == 0)
DEBUG_CHANNEL_MSG_UNIFORM(int(max_draws), int(gl_NumWorkGroups.x));
if (draw_id < max_draws)
{
uint src_offset = src_stride * draw_id + cmd.src_offset;
uint dst_offset = dst_stride * draw_id + cmd.dst_offset;
uint src_value = src_buffer_va.values[src_offset];
if (debug_tag != 0u)
DEBUG_CHANNEL_MSG(cmd.type, dst_offset, src_offset, src_value);
dst_buffer_va.values[dst_offset] = src_value;
}
}

View File

@ -1,28 +0,0 @@
#version 450
#extension GL_EXT_samplerless_texture_functions : enable
#extension GL_ARB_shader_stencil_export : enable
#define MODE_1D 0
#define MODE_2D 1
#define MODE_MS 2
layout(constant_id = 0) const uint c_mode = MODE_2D;
layout(binding = 0) uniform utexture1DArray tex_1d;
layout(binding = 0) uniform utexture2DArray tex_2d;
layout(binding = 0) uniform utexture2DMSArray tex_ms;
layout(push_constant)
uniform u_info_t {
ivec2 offset;
} u_info;
void main() {
ivec3 coord = ivec3(u_info.offset + ivec2(gl_FragCoord.xy), gl_Layer);
uint value;
if (c_mode == MODE_1D) value = texelFetch(tex_1d, coord.xz, 0).r;
if (c_mode == MODE_2D) value = texelFetch(tex_2d, coord, 0).r;
if (c_mode == MODE_MS) value = texelFetch(tex_ms, coord, gl_SampleID).r;
gl_FragStencilRefARB = int(value);
}

View File

@ -1,68 +0,0 @@
/*
* Copyright 2021 Derek Lesho for Codeweavers
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "vkd3d_private.h"
#include "winioctl.h"
#define IOCTL_SHARED_GPU_RESOURCE_SET_METADATA CTL_CODE(FILE_DEVICE_VIDEO, 4, METHOD_BUFFERED, FILE_WRITE_ACCESS)
#define IOCTL_SHARED_GPU_RESOURCE_GET_METADATA CTL_CODE(FILE_DEVICE_VIDEO, 5, METHOD_BUFFERED, FILE_READ_ACCESS)
#define IOCTL_SHARED_GPU_RESOURCE_OPEN CTL_CODE(FILE_DEVICE_VIDEO, 1, METHOD_BUFFERED, FILE_WRITE_ACCESS)
bool vkd3d_set_shared_metadata(HANDLE handle, void *buf, uint32_t buf_size)
{
DWORD ret_size;
return DeviceIoControl(handle, IOCTL_SHARED_GPU_RESOURCE_SET_METADATA, buf, buf_size, NULL, 0, &ret_size, NULL);
}
bool vkd3d_get_shared_metadata(HANDLE handle, void *buf, uint32_t buf_size, uint32_t *metadata_size)
{
DWORD ret_size;
bool ret = DeviceIoControl(handle, IOCTL_SHARED_GPU_RESOURCE_GET_METADATA, NULL, 0, buf, buf_size, &ret_size, NULL);
if (metadata_size)
*metadata_size = ret_size;
return ret;
}
HANDLE vkd3d_open_kmt_handle(HANDLE kmt_handle)
{
struct
{
unsigned int kmt_handle;
/* the following parameter represents a larger sized string for a dynamically allocated struct for use when opening an object by name */
WCHAR name[1];
} shared_resource_open;
HANDLE nt_handle = CreateFileA("\\\\.\\SharedGpuResource", GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (nt_handle == INVALID_HANDLE_VALUE)
return nt_handle;
shared_resource_open.kmt_handle = (ULONG_PTR)kmt_handle;
shared_resource_open.name[0] = 0;
if (!DeviceIoControl(nt_handle, IOCTL_SHARED_GPU_RESOURCE_OPEN, &shared_resource_open, sizeof(shared_resource_open), NULL, 0, NULL, NULL))
{
CloseHandle(nt_handle);
return INVALID_HANDLE_VALUE;
}
return nt_handle;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -22,8 +22,6 @@
#include <errno.h>
#define VKD3D_MAX_DXGI_FORMAT DXGI_FORMAT_B4G4R4A4_UNORM
#define COLOR (VK_IMAGE_ASPECT_COLOR_BIT)
#define DEPTH (VK_IMAGE_ASPECT_DEPTH_BIT)
#define STENCIL (VK_IMAGE_ASPECT_STENCIL_BIT)
@ -123,23 +121,17 @@ static const struct vkd3d_format vkd3d_formats[] =
{DXGI_FORMAT_B4G4R4A4_UNORM, VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT,2, 1, 1, 1, COLOR, 1},
};
static const struct vkd3d_format_footprint depth_stencil_copy_footprints[] =
{
{ DXGI_FORMAT_R32_TYPELESS, 1, 1, 4, 0, 0 },
{ DXGI_FORMAT_R8_TYPELESS, 1, 1, 1, 0, 0 },
};
/* Each depth/stencil format is only compatible with itself in Vulkan. */
static const struct vkd3d_format vkd3d_depth_stencil_formats[] =
{
{DXGI_FORMAT_R32G8X24_TYPELESS, VK_FORMAT_D32_SFLOAT_S8_UINT, 8, 1, 1, 1, DEPTH_STENCIL, 2, TYPELESS, false, depth_stencil_copy_footprints},
{DXGI_FORMAT_D32_FLOAT_S8X24_UINT, VK_FORMAT_D32_SFLOAT_S8_UINT, 8, 1, 1, 1, DEPTH_STENCIL, 2, 0, false, depth_stencil_copy_footprints},
{DXGI_FORMAT_R32G8X24_TYPELESS, VK_FORMAT_D32_SFLOAT_S8_UINT, 8, 1, 1, 1, DEPTH_STENCIL, 2, TYPELESS},
{DXGI_FORMAT_D32_FLOAT_S8X24_UINT, VK_FORMAT_D32_SFLOAT_S8_UINT, 8, 1, 1, 1, DEPTH_STENCIL, 2},
{DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, VK_FORMAT_D32_SFLOAT_S8_UINT, 8, 1, 1, 1, DEPTH, 2},
{DXGI_FORMAT_X32_TYPELESS_G8X24_UINT, VK_FORMAT_D32_SFLOAT_S8_UINT, 8, 1, 1, 1, STENCIL, 2},
{DXGI_FORMAT_R32_TYPELESS, VK_FORMAT_D32_SFLOAT, 4, 1, 1, 1, DEPTH, 1, TYPELESS},
{DXGI_FORMAT_R32_FLOAT, VK_FORMAT_D32_SFLOAT, 4, 1, 1, 1, DEPTH, 1},
{DXGI_FORMAT_R24G8_TYPELESS, VK_FORMAT_D24_UNORM_S8_UINT, 4, 1, 1, 1, DEPTH_STENCIL, 2, TYPELESS, false, depth_stencil_copy_footprints},
{DXGI_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT, 4, 1, 1, 1, DEPTH_STENCIL, 2, 0, false, depth_stencil_copy_footprints},
{DXGI_FORMAT_R24G8_TYPELESS, VK_FORMAT_D24_UNORM_S8_UINT, 4, 1, 1, 1, DEPTH_STENCIL, 2, TYPELESS},
{DXGI_FORMAT_D24_UNORM_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT, 4, 1, 1, 1, DEPTH_STENCIL, 2},
{DXGI_FORMAT_R24_UNORM_X8_TYPELESS, VK_FORMAT_D24_UNORM_S8_UINT, 4, 1, 1, 1, DEPTH, 2},
{DXGI_FORMAT_X24_TYPELESS_G8_UINT, VK_FORMAT_D24_UNORM_S8_UINT, 4, 1, 1, 1, STENCIL, 2},
{DXGI_FORMAT_R16_TYPELESS, VK_FORMAT_D16_UNORM, 2, 1, 1, 1, DEPTH, 1, TYPELESS},
@ -153,258 +145,133 @@ static const struct vkd3d_format vkd3d_depth_stencil_formats[] =
#undef SINT
#undef UINT
static const struct dxgi_format_compatibility_list
static const struct vkd3d_format_compatibility_info
{
DXGI_FORMAT image_format;
DXGI_FORMAT view_formats[VKD3D_MAX_COMPATIBLE_FORMAT_COUNT];
DXGI_FORMAT uint_format; /* for ClearUAVUint */
DXGI_FORMAT format;
DXGI_FORMAT typeless_format;
}
dxgi_format_compatibility_list[] =
vkd3d_format_compatibility_info[] =
{
{DXGI_FORMAT_R32G32B32A32_TYPELESS,
{DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_SINT},
DXGI_FORMAT_R32G32B32A32_UINT},
{DXGI_FORMAT_R32G32B32A32_FLOAT, {DXGI_FORMAT_UNKNOWN},
DXGI_FORMAT_R32G32B32A32_UINT},
{DXGI_FORMAT_R32G32B32A32_UINT,
{DXGI_FORMAT_R32G32B32A32_SINT},
DXGI_FORMAT_R32G32B32A32_UINT},
{DXGI_FORMAT_R32G32B32A32_SINT,
{DXGI_FORMAT_R32G32B32A32_UINT},
DXGI_FORMAT_R32G32B32A32_UINT},
{DXGI_FORMAT_R32G32B32_TYPELESS,
{DXGI_FORMAT_R32G32B32_FLOAT, DXGI_FORMAT_R32G32B32_UINT, DXGI_FORMAT_R32G32B32_SINT},
DXGI_FORMAT_R32G32B32_UINT},
{DXGI_FORMAT_R32G32B32_FLOAT, {DXGI_FORMAT_UNKNOWN},
DXGI_FORMAT_R32G32B32_UINT},
{DXGI_FORMAT_R32G32B32_UINT,
{DXGI_FORMAT_R32G32B32_SINT},
DXGI_FORMAT_R32G32B32_UINT},
{DXGI_FORMAT_R32G32B32_SINT,
{DXGI_FORMAT_R32G32B32_UINT},
DXGI_FORMAT_R32G32B32_UINT},
{DXGI_FORMAT_R16G16B16A16_TYPELESS,
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_SNORM, DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SINT},
DXGI_FORMAT_R16G16B16A16_UINT},
{DXGI_FORMAT_R16G16B16A16_FLOAT, {DXGI_FORMAT_UNKNOWN},
DXGI_FORMAT_R16G16B16A16_UINT},
{DXGI_FORMAT_R16G16B16A16_UINT,
{DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_SNORM},
DXGI_FORMAT_R16G16B16A16_UINT},
{DXGI_FORMAT_R16G16B16A16_SINT,
{DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_SNORM},
DXGI_FORMAT_R16G16B16A16_UINT},
{DXGI_FORMAT_R16G16B16A16_UNORM,
{DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SINT},
DXGI_FORMAT_R16G16B16A16_UINT},
{DXGI_FORMAT_R16G16B16A16_SNORM,
{DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_SINT},
DXGI_FORMAT_R16G16B16A16_UINT},
{DXGI_FORMAT_R32G32_TYPELESS,
{DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_SINT},
DXGI_FORMAT_R32G32_UINT},
{DXGI_FORMAT_R32G32_FLOAT, {DXGI_FORMAT_UNKNOWN},
DXGI_FORMAT_R32G32_UINT},
{DXGI_FORMAT_R32G32_UINT,
{DXGI_FORMAT_R32G32_SINT},
DXGI_FORMAT_R32G32_UINT},
{DXGI_FORMAT_R32G32_SINT,
{DXGI_FORMAT_R32G32_UINT},
DXGI_FORMAT_R32G32_UINT},
{DXGI_FORMAT_R10G10B10A2_TYPELESS,
{DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UINT},
DXGI_FORMAT_R10G10B10A2_UINT},
{DXGI_FORMAT_R10G10B10A2_UINT,
{DXGI_FORMAT_R10G10B10A2_UNORM},
DXGI_FORMAT_R10G10B10A2_UINT},
{DXGI_FORMAT_R10G10B10A2_UNORM,
{DXGI_FORMAT_R10G10B10A2_UINT},
DXGI_FORMAT_R10G10B10A2_UINT},
{DXGI_FORMAT_R11G11B10_FLOAT, {DXGI_FORMAT_UNKNOWN},
DXGI_FORMAT_R32_UINT},
{DXGI_FORMAT_R8G8_TYPELESS,
{DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_SNORM},
DXGI_FORMAT_R8G8_UINT},
{DXGI_FORMAT_R8G8_UINT,
{DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_SNORM},
DXGI_FORMAT_R8G8_UINT},
{DXGI_FORMAT_R8G8_SINT,
{DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_SNORM},
DXGI_FORMAT_R8G8_UINT},
{DXGI_FORMAT_R8G8_UNORM,
{DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SINT},
DXGI_FORMAT_R8G8_UINT},
{DXGI_FORMAT_R8G8_SNORM,
{DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_SINT},
DXGI_FORMAT_R8G8_UINT},
{DXGI_FORMAT_R8G8B8A8_TYPELESS,
{DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_SNORM},
DXGI_FORMAT_R8G8B8A8_UINT},
{DXGI_FORMAT_R8G8B8A8_UINT,
{DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_SNORM},
DXGI_FORMAT_R8G8B8A8_UINT},
{DXGI_FORMAT_R8G8B8A8_SINT,
{DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_SNORM},
DXGI_FORMAT_R8G8B8A8_UINT},
{DXGI_FORMAT_R8G8B8A8_UNORM_SRGB,
{DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R8G8B8A8_UNORM},
DXGI_FORMAT_R8G8B8A8_UINT},
{DXGI_FORMAT_R8G8B8A8_UNORM,
{DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB},
DXGI_FORMAT_R8G8B8A8_UINT},
{DXGI_FORMAT_R8G8B8A8_SNORM,
{DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_SINT},
DXGI_FORMAT_R8G8B8A8_UINT},
{DXGI_FORMAT_R16G16_TYPELESS,
{DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_SNORM},
DXGI_FORMAT_R16G16_UINT},
{DXGI_FORMAT_R16G16_FLOAT, {DXGI_FORMAT_UNKNOWN},
DXGI_FORMAT_R16G16_UINT},
{DXGI_FORMAT_R16G16_UINT,
{DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_SNORM},
DXGI_FORMAT_R16G16_UINT},
{DXGI_FORMAT_R16G16_SINT,
{DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_SNORM},
DXGI_FORMAT_R16G16_UINT},
{DXGI_FORMAT_R16G16_UNORM,
{DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_SINT},
DXGI_FORMAT_R16G16_UINT},
{DXGI_FORMAT_R16G16_SNORM,
{DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_SINT},
DXGI_FORMAT_R16G16_UINT},
{DXGI_FORMAT_R32_TYPELESS,
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_SINT},
DXGI_FORMAT_R32_UINT},
{DXGI_FORMAT_R32_FLOAT, {DXGI_FORMAT_UNKNOWN},
DXGI_FORMAT_R32_UINT},
{DXGI_FORMAT_R32_UINT,
{DXGI_FORMAT_R32_SINT},
DXGI_FORMAT_R32_UINT},
{DXGI_FORMAT_R32_SINT,
{DXGI_FORMAT_R32_UINT},
DXGI_FORMAT_R32_UINT},
{DXGI_FORMAT_R16_TYPELESS,
{DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_SNORM},
DXGI_FORMAT_R16_UINT},
{DXGI_FORMAT_R16_FLOAT, {DXGI_FORMAT_UNKNOWN},
DXGI_FORMAT_R16_UINT},
{DXGI_FORMAT_R16_UINT,
{DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_SNORM},
DXGI_FORMAT_R16_UINT},
{DXGI_FORMAT_R16_SINT,
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_SNORM},
DXGI_FORMAT_R16_UINT},
{DXGI_FORMAT_R16_UNORM,
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SINT},
DXGI_FORMAT_R16_UINT},
{DXGI_FORMAT_R16_SNORM,
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_SINT},
DXGI_FORMAT_R16_UINT},
{DXGI_FORMAT_R8_TYPELESS,
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_A8_UNORM},
DXGI_FORMAT_R8_UINT},
{DXGI_FORMAT_R8_UINT,
{DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_A8_UNORM},
DXGI_FORMAT_R8_UINT},
{DXGI_FORMAT_R8_SINT,
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_A8_UNORM},
DXGI_FORMAT_R8_UINT},
{DXGI_FORMAT_R8_UNORM,
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_A8_UNORM},
DXGI_FORMAT_R8_UINT},
{DXGI_FORMAT_R8_SNORM,
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SINT},
DXGI_FORMAT_R8_UINT},
{DXGI_FORMAT_A8_UNORM,
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8_UNORM},
DXGI_FORMAT_R8_UINT},
{DXGI_FORMAT_B8G8R8A8_TYPELESS,
{DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8A8_UNORM_SRGB},
DXGI_FORMAT_R8G8B8A8_UINT},
{DXGI_FORMAT_B8G8R8A8_UNORM,
{DXGI_FORMAT_B8G8R8A8_UNORM_SRGB},
DXGI_FORMAT_R8G8B8A8_UINT},
{DXGI_FORMAT_B8G8R8A8_UNORM_SRGB,
{DXGI_FORMAT_B8G8R8A8_UNORM},
DXGI_FORMAT_R8G8B8A8_UINT},
{DXGI_FORMAT_B8G8R8X8_TYPELESS,
{DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_FORMAT_B8G8R8X8_UNORM_SRGB},
DXGI_FORMAT_R8G8B8A8_UINT},
{DXGI_FORMAT_B8G8R8X8_UNORM,
{DXGI_FORMAT_B8G8R8X8_UNORM_SRGB},
DXGI_FORMAT_R8G8B8A8_UINT},
{DXGI_FORMAT_B8G8R8X8_UNORM_SRGB,
{DXGI_FORMAT_B8G8R8X8_UNORM},
DXGI_FORMAT_R8G8B8A8_UINT},
{DXGI_FORMAT_BC1_TYPELESS,
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM_SRGB}},
{DXGI_FORMAT_BC1_UNORM,
{DXGI_FORMAT_BC1_UNORM_SRGB}},
{DXGI_FORMAT_BC1_UNORM_SRGB,
{DXGI_FORMAT_BC1_UNORM}},
{DXGI_FORMAT_BC2_TYPELESS,
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM_SRGB}},
{DXGI_FORMAT_BC2_UNORM,
{DXGI_FORMAT_BC2_UNORM_SRGB}},
{DXGI_FORMAT_BC2_UNORM_SRGB,
{DXGI_FORMAT_BC2_UNORM}},
{DXGI_FORMAT_BC3_TYPELESS,
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM_SRGB}},
{DXGI_FORMAT_BC3_UNORM,
{DXGI_FORMAT_BC3_UNORM_SRGB}},
{DXGI_FORMAT_BC3_UNORM_SRGB,
{DXGI_FORMAT_BC3_UNORM}},
{DXGI_FORMAT_BC4_TYPELESS,
{DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_SNORM}},
{DXGI_FORMAT_BC5_TYPELESS,
{DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_SNORM}},
{DXGI_FORMAT_BC6H_TYPELESS,
{DXGI_FORMAT_BC6H_UF16, DXGI_FORMAT_BC6H_SF16}},
{DXGI_FORMAT_BC7_TYPELESS,
{DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_UNORM_SRGB}},
{DXGI_FORMAT_BC7_UNORM,
{DXGI_FORMAT_BC7_UNORM_SRGB}},
{DXGI_FORMAT_BC7_UNORM_SRGB,
{DXGI_FORMAT_BC7_UNORM}},
/* DXGI_FORMAT_R32G32B32A32_TYPELESS */
{DXGI_FORMAT_R32G32B32A32_UINT, DXGI_FORMAT_R32G32B32A32_TYPELESS},
{DXGI_FORMAT_R32G32B32A32_SINT, DXGI_FORMAT_R32G32B32A32_TYPELESS},
{DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_TYPELESS},
/* DXGI_FORMAT_R32G32B32_TYPELESS */
{DXGI_FORMAT_R32G32B32_UINT, DXGI_FORMAT_R32G32B32_TYPELESS},
{DXGI_FORMAT_R32G32B32_SINT, DXGI_FORMAT_R32G32B32_TYPELESS},
{DXGI_FORMAT_R32G32B32_FLOAT, DXGI_FORMAT_R32G32B32_TYPELESS},
/* DXGI_FORMAT_R16G16B16A16_TYPELESS */
{DXGI_FORMAT_R16G16B16A16_UNORM, DXGI_FORMAT_R16G16B16A16_TYPELESS},
{DXGI_FORMAT_R16G16B16A16_SNORM, DXGI_FORMAT_R16G16B16A16_TYPELESS},
{DXGI_FORMAT_R16G16B16A16_UINT, DXGI_FORMAT_R16G16B16A16_TYPELESS},
{DXGI_FORMAT_R16G16B16A16_SINT, DXGI_FORMAT_R16G16B16A16_TYPELESS},
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_TYPELESS},
/* DXGI_FORMAT_R32G32_TYPELESS */
{DXGI_FORMAT_R32G32_UINT, DXGI_FORMAT_R32G32_TYPELESS},
{DXGI_FORMAT_R32G32_SINT, DXGI_FORMAT_R32G32_TYPELESS},
{DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_TYPELESS},
/* DXGI_FORMAT_R32G8X24_TYPELESS */
{DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS, DXGI_FORMAT_R32G8X24_TYPELESS},
{DXGI_FORMAT_X32_TYPELESS_G8X24_UINT, DXGI_FORMAT_R32G8X24_TYPELESS},
{DXGI_FORMAT_D32_FLOAT_S8X24_UINT, DXGI_FORMAT_R32G8X24_TYPELESS},
/* DXGI_FORMAT_R10G10B10A2_TYPELESS */
{DXGI_FORMAT_R10G10B10A2_UINT, DXGI_FORMAT_R10G10B10A2_TYPELESS},
{DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_TYPELESS},
/* DXGI_FORMAT_R8G8B8A8_TYPELESS */
{DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R8G8B8A8_TYPELESS},
{DXGI_FORMAT_R8G8B8A8_SINT, DXGI_FORMAT_R8G8B8A8_TYPELESS},
{DXGI_FORMAT_R8G8B8A8_SNORM, DXGI_FORMAT_R8G8B8A8_TYPELESS},
{DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, DXGI_FORMAT_R8G8B8A8_TYPELESS},
{DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R8G8B8A8_TYPELESS},
/* DXGI_FORMAT_R16G16_TYPELESS */
{DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R16G16_TYPELESS},
{DXGI_FORMAT_R16G16_SNORM, DXGI_FORMAT_R16G16_TYPELESS},
{DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R16G16_TYPELESS},
{DXGI_FORMAT_R16G16_SINT, DXGI_FORMAT_R16G16_TYPELESS},
{DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_TYPELESS},
/* DXGI_FORMAT_R32_TYPELESS */
{DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_R32_TYPELESS},
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_TYPELESS},
{DXGI_FORMAT_R32_UINT, DXGI_FORMAT_R32_TYPELESS},
{DXGI_FORMAT_R32_SINT, DXGI_FORMAT_R32_TYPELESS},
/* DXGI_FORMAT_R24G8_TYPELESS */
{DXGI_FORMAT_R24_UNORM_X8_TYPELESS, DXGI_FORMAT_R24G8_TYPELESS},
{DXGI_FORMAT_X24_TYPELESS_G8_UINT, DXGI_FORMAT_R24G8_TYPELESS},
{DXGI_FORMAT_D24_UNORM_S8_UINT, DXGI_FORMAT_R24G8_TYPELESS},
/* DXGI_FORMAT_R8G8_TYPELESS */
{DXGI_FORMAT_R8G8_SNORM, DXGI_FORMAT_R8G8_TYPELESS},
{DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_TYPELESS},
{DXGI_FORMAT_R8G8_UINT, DXGI_FORMAT_R8G8_TYPELESS},
{DXGI_FORMAT_R8G8_SINT, DXGI_FORMAT_R8G8_TYPELESS},
/* DXGI_FORMAT_R16_TYPELESS */
{DXGI_FORMAT_D16_UNORM, DXGI_FORMAT_R16_TYPELESS},
{DXGI_FORMAT_R16_UNORM, DXGI_FORMAT_R16_TYPELESS},
{DXGI_FORMAT_R16_SNORM, DXGI_FORMAT_R16_TYPELESS},
{DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_TYPELESS},
{DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16_TYPELESS},
{DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_TYPELESS},
/* DXGI_FORMAT_R8_TYPELESS */
{DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_TYPELESS},
{DXGI_FORMAT_R8_SNORM, DXGI_FORMAT_R8_TYPELESS},
{DXGI_FORMAT_R8_UINT, DXGI_FORMAT_R8_TYPELESS},
{DXGI_FORMAT_R8_SINT, DXGI_FORMAT_R8_TYPELESS},
/* DXGI_FORMAT_BC1_TYPELESS */
{DXGI_FORMAT_BC1_UNORM_SRGB, DXGI_FORMAT_BC1_TYPELESS},
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_TYPELESS},
/* DXGI_FORMAT_BC2_TYPELESS */
{DXGI_FORMAT_BC2_UNORM_SRGB, DXGI_FORMAT_BC2_TYPELESS},
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_TYPELESS},
/* DXGI_FORMAT_BC3_TYPELESS */
{DXGI_FORMAT_BC3_UNORM_SRGB, DXGI_FORMAT_BC3_TYPELESS},
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_TYPELESS},
/* DXGI_FORMAT_BC4_TYPELESS */
{DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_TYPELESS},
{DXGI_FORMAT_BC4_SNORM, DXGI_FORMAT_BC4_TYPELESS},
/* DXGI_FORMAT_BC5_TYPELESS */
{DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_TYPELESS},
{DXGI_FORMAT_BC5_SNORM, DXGI_FORMAT_BC5_TYPELESS},
/* DXGI_FORMAT_BC6H_TYPELESS */
{DXGI_FORMAT_BC6H_UF16, DXGI_FORMAT_BC6H_TYPELESS},
{DXGI_FORMAT_BC6H_SF16, DXGI_FORMAT_BC6H_TYPELESS},
/* DXGI_FORMAT_BC7_TYPELESS */
{DXGI_FORMAT_BC7_UNORM_SRGB, DXGI_FORMAT_BC7_TYPELESS},
{DXGI_FORMAT_BC7_UNORM, DXGI_FORMAT_BC7_TYPELESS},
/* DXGI_FORMAT_B8G8R8A8_TYPELESS */
{DXGI_FORMAT_B8G8R8A8_UNORM_SRGB, DXGI_FORMAT_B8G8R8A8_TYPELESS},
{DXGI_FORMAT_B8G8R8A8_UNORM, DXGI_FORMAT_B8G8R8A8_TYPELESS},
/* DXGI_FORMAT_B8G8R8X8_TYPELESS */
{DXGI_FORMAT_B8G8R8X8_UNORM_SRGB, DXGI_FORMAT_B8G8R8X8_TYPELESS},
{DXGI_FORMAT_B8G8R8X8_UNORM, DXGI_FORMAT_B8G8R8X8_TYPELESS},
};
void vkd3d_format_compatibility_list_add_format(struct vkd3d_format_compatibility_list *list, VkFormat vk_format)
static bool dxgi_format_is_depth_stencil(DXGI_FORMAT dxgi_format)
{
unsigned int i;
bool found = false;
for (i = 0; i < list->format_count && !found; i++)
found = list->vk_formats[i] == vk_format;
if (!found)
for (i = 0; i < ARRAY_SIZE(vkd3d_formats); ++i)
{
assert(list->format_count < ARRAY_SIZE(list->vk_formats));
list->vk_formats[list->format_count++] = vk_format;
const struct vkd3d_format *current = &vkd3d_formats[i];
if (current->dxgi_format == dxgi_format)
return current->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
}
for (i = 0; i < ARRAY_SIZE(vkd3d_depth_stencil_formats); ++i)
{
if (vkd3d_depth_stencil_formats[i].dxgi_format == dxgi_format)
return true;
}
return false;
}
/* FIXME: This table should be generated at compile-time. */
static HRESULT vkd3d_init_format_compatibility_lists(struct d3d12_device *device)
{
struct vkd3d_format_compatibility_list *lists, *dst;
const struct dxgi_format_compatibility_list *src;
struct vkd3d_format_compatibility_list *lists, *current_list;
const struct vkd3d_format_compatibility_info *current;
DXGI_FORMAT dxgi_format;
VkFormat vk_format;
unsigned int count;
unsigned int i, j;
@ -414,25 +281,62 @@ static HRESULT vkd3d_init_format_compatibility_lists(struct d3d12_device *device
if (!device->vk_info.KHR_image_format_list)
return S_OK;
count = 0;
for (i = 0; i < ARRAY_SIZE(dxgi_format_compatibility_list); ++i)
count = max(count, dxgi_format_compatibility_list[i].image_format + 1);
count = 1;
dxgi_format = vkd3d_format_compatibility_info[0].typeless_format;
for (i = 0; i < ARRAY_SIZE(vkd3d_format_compatibility_info); ++i)
{
DXGI_FORMAT typeless_format = vkd3d_format_compatibility_info[i].typeless_format;
if (dxgi_format != typeless_format)
{
++count;
dxgi_format = typeless_format;
}
}
if (!(lists = vkd3d_calloc(count, sizeof(*lists))))
return E_OUTOFMEMORY;
for (i = 0; i < ARRAY_SIZE(dxgi_format_compatibility_list); ++i)
count = 0;
current_list = lists;
current_list->typeless_format = vkd3d_format_compatibility_info[0].typeless_format;
for (i = 0; i < ARRAY_SIZE(vkd3d_format_compatibility_info); ++i)
{
src = &dxgi_format_compatibility_list[i];
dst = &lists[src->image_format];
current = &vkd3d_format_compatibility_info[i];
dst->uint_format = src->uint_format;
dst->vk_formats[dst->format_count++] = vkd3d_get_vk_format(src->image_format);
if (current_list->typeless_format != current->typeless_format)
{
/* Avoid empty format lists. */
if (current_list->format_count)
{
++current_list;
++count;
}
for (j = 0; j < ARRAY_SIZE(src->view_formats) && src->view_formats[j]; j++)
vkd3d_format_compatibility_list_add_format(dst, vkd3d_get_vk_format(src->view_formats[j]));
current_list->typeless_format = current->typeless_format;
}
/* In Vulkan, each depth-stencil format is only compatible with itself. */
if (dxgi_format_is_depth_stencil(current->format))
continue;
if (!(vk_format = vkd3d_get_vk_format(current->format)))
continue;
for (j = 0; j < current_list->format_count; ++j)
{
if (current_list->vk_formats[j] == vk_format)
break;
}
if (j >= current_list->format_count)
{
assert(current_list->format_count < VKD3D_MAX_COMPATIBLE_FORMAT_COUNT);
current_list->vk_formats[current_list->format_count++] = vk_format;
}
}
if (current_list->format_count)
++count;
device->format_compatibility_list_count = count;
device->format_compatibility_lists = lists;
@ -449,74 +353,51 @@ static void vkd3d_cleanup_format_compatibility_lists(struct d3d12_device *device
static HRESULT vkd3d_init_depth_stencil_formats(struct d3d12_device *device)
{
const unsigned int count = ARRAY_SIZE(vkd3d_depth_stencil_formats);
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
struct vkd3d_format *formats, *format;
VkFormatProperties properties;
struct vkd3d_format *formats;
unsigned int i;
if (!(formats = vkd3d_calloc(VKD3D_MAX_DXGI_FORMAT + 1, sizeof(*formats))))
return E_OUTOFMEMORY;
VK_CALL(vkGetPhysicalDeviceFormatProperties(device->vk_physical_device,
VK_FORMAT_D24_UNORM_S8_UINT, &properties));
if (!(properties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT))
if (properties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)
{
device->depth_stencil_formats = vkd3d_depth_stencil_formats;
}
else
{
/* AMD doesn't support VK_FORMAT_D24_UNORM_S8_UINT. */
WARN("Mapping VK_FORMAT_D24_UNORM_S8_UINT to VK_FORMAT_D32_SFLOAT_S8_UINT.\n");
}
for (i = 0; i < ARRAY_SIZE(vkd3d_depth_stencil_formats); ++i)
{
assert(vkd3d_depth_stencil_formats[i].dxgi_format <= VKD3D_MAX_DXGI_FORMAT);
format = &formats[vkd3d_depth_stencil_formats[i].dxgi_format];
*format = vkd3d_depth_stencil_formats[i];
if (format->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
!(properties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT))
if (!(formats = vkd3d_calloc(count, sizeof(*formats))))
return E_OUTOFMEMORY;
memcpy(formats, vkd3d_depth_stencil_formats, sizeof(vkd3d_depth_stencil_formats));
for (i = 0; i < count; ++i)
{
format->vk_format = VK_FORMAT_D32_SFLOAT_S8_UINT;
format->is_emulated = true;
if (formats[i].vk_format == VK_FORMAT_D24_UNORM_S8_UINT)
{
formats[i].vk_format = VK_FORMAT_D32_SFLOAT_S8_UINT;
formats[i].is_emulated = true;
}
}
}
device->depth_stencil_formats = formats;
device->depth_stencil_formats = formats;
}
return S_OK;
}
static void vkd3d_cleanup_depth_stencil_formats(struct d3d12_device *device)
{
vkd3d_free((void *)device->depth_stencil_formats);
if (vkd3d_depth_stencil_formats != device->depth_stencil_formats)
vkd3d_free((void *)device->depth_stencil_formats);
device->depth_stencil_formats = NULL;
}
static HRESULT vkd3d_init_formats(struct d3d12_device *device)
{
struct vkd3d_format *formats;
unsigned int i;
if (!(formats = vkd3d_calloc(VKD3D_MAX_DXGI_FORMAT + 1, sizeof(*formats))))
return E_OUTOFMEMORY;
for (i = 0; i < ARRAY_SIZE(vkd3d_formats); ++i)
{
assert(vkd3d_formats[i].dxgi_format <= VKD3D_MAX_DXGI_FORMAT);
formats[vkd3d_formats[i].dxgi_format] = vkd3d_formats[i];
}
device->formats = formats;
return S_OK;
}
static void vkd3d_cleanup_formats(struct d3d12_device *device)
{
vkd3d_free((void *)device->formats);
device->formats = NULL;
}
HRESULT vkd3d_init_format_info(struct d3d12_device *device)
{
HRESULT hr;
@ -524,17 +405,8 @@ HRESULT vkd3d_init_format_info(struct d3d12_device *device)
if (FAILED(hr = vkd3d_init_depth_stencil_formats(device)))
return hr;
if (FAILED(hr = vkd3d_init_format_compatibility_lists(device)))
{
if FAILED(hr = vkd3d_init_format_compatibility_lists(device))
vkd3d_cleanup_depth_stencil_formats(device);
return hr;
}
if (FAILED(hr = vkd3d_init_formats(device)))
{
vkd3d_cleanup_depth_stencil_formats(device);
vkd3d_cleanup_format_compatibility_lists(device);
}
return hr;
}
@ -543,7 +415,6 @@ void vkd3d_cleanup_format_info(struct d3d12_device *device)
{
vkd3d_cleanup_depth_stencil_formats(device);
vkd3d_cleanup_format_compatibility_lists(device);
vkd3d_cleanup_formats(device);
}
/* We use overrides for depth/stencil formats. This is required in order to
@ -553,64 +424,79 @@ void vkd3d_cleanup_format_info(struct d3d12_device *device)
static const struct vkd3d_format *vkd3d_get_depth_stencil_format(const struct d3d12_device *device,
DXGI_FORMAT dxgi_format)
{
const struct vkd3d_format *format;
const struct vkd3d_format *formats;
unsigned int i;
assert(device);
format = &device->depth_stencil_formats[dxgi_format];
formats = device->depth_stencil_formats;
return format->dxgi_format ? format : NULL;
for (i = 0; i < ARRAY_SIZE(vkd3d_depth_stencil_formats); ++i)
{
if (formats[i].dxgi_format == dxgi_format)
return &formats[i];
}
return NULL;
}
const struct vkd3d_format *vkd3d_get_format(const struct d3d12_device *device,
DXGI_FORMAT dxgi_format, bool depth_stencil)
{
const struct vkd3d_format *format;
unsigned int i;
if (dxgi_format > VKD3D_MAX_DXGI_FORMAT)
return NULL;
/* If we request a depth-stencil format (or typeless variant) that is planar,
* there cannot be any ambiguity which format to select, we must choose a depth-stencil format.
* For single aspect formats,
* there are cases where we need to choose either COLOR or DEPTH aspect variants based on depth_stencil argument,
* but there cannot be any such issue for DEPTH_STENCIL types.
* This fixes issues where e.g. R24_UNORM_X8_TYPELESS format is used without ALLOW_DEPTH_STENCIL. */
format = vkd3d_get_depth_stencil_format(device, dxgi_format);
if (format && (depth_stencil || format->plane_count > 1))
if (depth_stencil && (format = vkd3d_get_depth_stencil_format(device, dxgi_format)))
return format;
format = &device->formats[dxgi_format];
for (i = 0; i < ARRAY_SIZE(vkd3d_formats); ++i)
{
if (vkd3d_formats[i].dxgi_format == dxgi_format)
return &vkd3d_formats[i];
}
return format->dxgi_format ? format : NULL;
return NULL;
}
struct vkd3d_format_footprint vkd3d_format_footprint_for_plane(const struct vkd3d_format *format, unsigned int plane_idx)
DXGI_FORMAT vkd3d_get_typeless_format(const struct d3d12_device *device, DXGI_FORMAT dxgi_format)
{
if (format->plane_footprints)
const struct vkd3d_format *format = vkd3d_get_format(device, dxgi_format, true);
unsigned int i;
if (!format)
return DXGI_FORMAT_UNKNOWN;
if (format->type == VKD3D_FORMAT_TYPE_TYPELESS)
return dxgi_format;
for (i = 0; i < ARRAY_SIZE(vkd3d_format_compatibility_info); ++i)
{
return format->plane_footprints[plane_idx];
}
else
{
struct vkd3d_format_footprint footprint;
footprint.dxgi_format = format->dxgi_format;
footprint.block_width = format->block_width;
footprint.block_height = format->block_height;
footprint.subsample_x_log2 = 0;
footprint.subsample_y_log2 = 0;
footprint.block_byte_count = format->byte_count * format->block_byte_count;
return footprint;
if (vkd3d_format_compatibility_info[i].format == dxgi_format)
return vkd3d_format_compatibility_info[i].typeless_format;
}
return DXGI_FORMAT_UNKNOWN;
}
VkFormat vkd3d_internal_get_vk_format(const struct d3d12_device *device, DXGI_FORMAT dxgi_format)
const struct vkd3d_format *vkd3d_find_uint_format(const struct d3d12_device *device, DXGI_FORMAT dxgi_format)
{
const struct vkd3d_format *format;
DXGI_FORMAT typeless_format = DXGI_FORMAT_UNKNOWN;
const struct vkd3d_format *vkd3d_format;
unsigned int i;
if ((format = vkd3d_get_format(device, dxgi_format, false)))
return format->vk_format;
if (!(typeless_format = vkd3d_get_typeless_format(device, dxgi_format)))
return NULL;
return VK_FORMAT_UNDEFINED;
for (i = 0; i < ARRAY_SIZE(vkd3d_format_compatibility_info); ++i)
{
if (vkd3d_format_compatibility_info[i].typeless_format != typeless_format)
continue;
vkd3d_format = vkd3d_get_format(device, vkd3d_format_compatibility_info[i].format, false);
if (vkd3d_format->type == VKD3D_FORMAT_TYPE_UINT)
return vkd3d_format;
}
return NULL;
}
void vkd3d_format_copy_data(const struct vkd3d_format *format, const uint8_t *src,
@ -639,15 +525,12 @@ void vkd3d_format_copy_data(const struct vkd3d_format *format, const uint8_t *sr
VKD3D_EXPORT VkFormat vkd3d_get_vk_format(DXGI_FORMAT format)
{
unsigned int i;
const struct vkd3d_format *vkd3d_format;
for (i = 0; i < ARRAY_SIZE(vkd3d_formats); ++i)
{
if (vkd3d_formats[i].dxgi_format == format)
return vkd3d_formats[i].vk_format;
}
if (!(vkd3d_format = vkd3d_get_format(NULL, format, false)))
return VK_FORMAT_UNDEFINED;
return VK_FORMAT_UNDEFINED;
return vkd3d_format->vk_format;
}
VKD3D_EXPORT DXGI_FORMAT vkd3d_get_dxgi_format(VkFormat format)
@ -672,7 +555,6 @@ bool is_valid_feature_level(D3D_FEATURE_LEVEL feature_level)
{
static const D3D_FEATURE_LEVEL valid_feature_levels[] =
{
D3D_FEATURE_LEVEL_12_2,
D3D_FEATURE_LEVEL_12_1,
D3D_FEATURE_LEVEL_12_0,
D3D_FEATURE_LEVEL_11_1,
@ -884,11 +766,6 @@ const char *debug_dxgi_format(DXGI_FORMAT format)
ENUM_NAME(DXGI_FORMAT_P8)
ENUM_NAME(DXGI_FORMAT_A8P8)
ENUM_NAME(DXGI_FORMAT_B4G4R4A4_UNORM)
ENUM_NAME(DXGI_FORMAT_P208)
ENUM_NAME(DXGI_FORMAT_V208)
ENUM_NAME(DXGI_FORMAT_V408)
ENUM_NAME(DXGI_FORMAT_SAMPLER_FEEDBACK_MIN_MIP_OPAQUE)
ENUM_NAME(DXGI_FORMAT_SAMPLER_FEEDBACK_MIP_REGION_USED_OPAQUE)
ENUM_NAME(DXGI_FORMAT_FORCE_UINT)
}
#undef ENUM_NAME
@ -1018,16 +895,6 @@ HRESULT hresult_from_errno(int rc)
HRESULT hresult_from_vk_result(VkResult vr)
{
/* Wine tends to dispatch Vulkan calls to their own syscall stack.
* Crashes are captured and return this magic VkResult.
* Report it explicitly here so it's easier to debug when it happens. */
if (vr == -1073741819)
{
ERR("Detected segfault in Wine syscall handler.\n");
/* HACK: For ad-hoc debugging can also trigger backtrace printing here. */
return E_POINTER;
}
switch (vr)
{
case VK_SUCCESS:
@ -1037,9 +904,6 @@ HRESULT hresult_from_vk_result(VkResult vr)
/* fall-through */
case VK_ERROR_OUT_OF_HOST_MEMORY:
return E_OUTOFMEMORY;
case VK_ERROR_VALIDATION_FAILED_EXT:
/* NV driver sometimes returns this on invalid API usage. */
return E_INVALIDARG;
default:
FIXME("Unhandled VkResult %d.\n", vr);
/* fall-through */

View File

@ -52,7 +52,7 @@ void vkd3d_descriptor_debug_unregister_heap(uint64_t cookie);
void vkd3d_descriptor_debug_register_resource_cookie(
struct vkd3d_descriptor_qa_global_info *global_info,
uint64_t cookie, const D3D12_RESOURCE_DESC1 *desc);
uint64_t cookie, const D3D12_RESOURCE_DESC *desc);
void vkd3d_descriptor_debug_register_allocation_cookie(
struct vkd3d_descriptor_qa_global_info *global_info,
uint64_t cookie, const struct vkd3d_allocate_memory_info *info);

View File

@ -165,28 +165,15 @@ static CONST_VTBL struct ID3D12RootSignatureDeserializerVtbl d3d12_root_signatur
static int vkd3d_parse_root_signature_for_version(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *out_desc,
enum vkd3d_root_signature_version target_version,
bool raw_payload,
vkd3d_shader_hash_t *compatibility_hash)
enum vkd3d_root_signature_version target_version)
{
struct vkd3d_versioned_root_signature_desc desc, converted_desc;
int ret;
if (raw_payload)
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &desc)) < 0)
{
if ((ret = vkd3d_shader_parse_root_signature_raw(dxbc->code, dxbc->size, &desc, compatibility_hash)) < 0)
{
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
return ret;
}
}
else
{
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &desc, compatibility_hash)) < 0)
{
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
return ret;
}
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
return ret;
}
if (desc.version == target_version)
@ -210,27 +197,15 @@ static int vkd3d_parse_root_signature_for_version(const struct vkd3d_shader_code
}
int vkd3d_parse_root_signature_v_1_0(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *out_desc,
vkd3d_shader_hash_t *compatibility_hash)
struct vkd3d_versioned_root_signature_desc *out_desc)
{
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_0, false,
compatibility_hash);
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_0);
}
int vkd3d_parse_root_signature_v_1_1(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *out_desc,
vkd3d_shader_hash_t *compatibility_hash)
struct vkd3d_versioned_root_signature_desc *out_desc)
{
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_1, false,
compatibility_hash);
}
int vkd3d_parse_root_signature_v_1_1_from_raw_payload(const struct vkd3d_shader_code *dxbc,
struct vkd3d_versioned_root_signature_desc *out_desc,
vkd3d_shader_hash_t *compatibility_hash)
{
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_1, true,
compatibility_hash);
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_1);
}
static HRESULT d3d12_root_signature_deserializer_init(struct d3d12_root_signature_deserializer *deserializer,
@ -241,7 +216,7 @@ static HRESULT d3d12_root_signature_deserializer_init(struct d3d12_root_signatur
deserializer->ID3D12RootSignatureDeserializer_iface.lpVtbl = &d3d12_root_signature_deserializer_vtbl;
deserializer->refcount = 1;
if ((ret = vkd3d_parse_root_signature_v_1_0(dxbc, &deserializer->desc.vkd3d, NULL)) < 0)
if ((ret = vkd3d_parse_root_signature_v_1_0(dxbc, &deserializer->desc.vkd3d)) < 0)
return hresult_from_vkd3d_result(ret);
return S_OK;
@ -419,7 +394,7 @@ static HRESULT d3d12_versioned_root_signature_deserializer_init(struct d3d12_ver
deserializer->ID3D12VersionedRootSignatureDeserializer_iface.lpVtbl = &d3d12_versioned_root_signature_deserializer_vtbl;
deserializer->refcount = 1;
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &deserializer->desc.vkd3d, NULL)) < 0)
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &deserializer->desc.vkd3d)) < 0)
{
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
return hresult_from_vkd3d_result(ret);

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,6 @@
bool vkd3d_renderdoc_active(void);
bool vkd3d_renderdoc_loaded_api(void);
bool vkd3d_renderdoc_should_capture_shader_hash(vkd3d_shader_hash_t hash);
bool vkd3d_renderdoc_global_capture_enabled(void);
bool vkd3d_renderdoc_begin_capture(void *instance);
void vkd3d_renderdoc_end_capture(void *instance);

View File

@ -45,14 +45,11 @@ enum vkd3d_meta_copy_mode
#include <cs_resolve_binary_queries.h>
#include <cs_resolve_predicate.h>
#include <cs_resolve_query.h>
#include <cs_execute_indirect_patch.h>
#include <cs_execute_indirect_patch_debug_ring.h>
#include <vs_fullscreen_layer.h>
#include <vs_fullscreen.h>
#include <gs_fullscreen.h>
#include <fs_copy_image_float.h>
#include <fs_copy_image_uint.h>
#include <fs_copy_image_stencil.h>
#include <vs_swapchain_fullscreen.h>
#include <fs_swapchain_fullscreen.h>

View File

@ -41,7 +41,6 @@ VK_INSTANCE_PFN(vkEnumeratePhysicalDevices)
VK_INSTANCE_PFN(vkGetDeviceProcAddr)
VK_INSTANCE_PFN(vkGetPhysicalDeviceFeatures)
VK_INSTANCE_PFN(vkGetPhysicalDeviceFormatProperties)
VK_INSTANCE_PFN(vkGetPhysicalDeviceFormatProperties2)
VK_INSTANCE_PFN(vkGetPhysicalDeviceImageFormatProperties)
VK_INSTANCE_PFN(vkGetPhysicalDeviceMemoryProperties)
VK_INSTANCE_PFN(vkGetPhysicalDeviceProperties)
@ -49,7 +48,6 @@ VK_INSTANCE_PFN(vkGetPhysicalDeviceQueueFamilyProperties)
VK_INSTANCE_PFN(vkGetPhysicalDeviceSparseImageFormatProperties)
VK_INSTANCE_PFN(vkGetPhysicalDeviceFeatures2)
VK_INSTANCE_PFN(vkGetPhysicalDeviceProperties2)
VK_INSTANCE_PFN(vkGetPhysicalDeviceExternalSemaphoreProperties)
/* VK_EXT_debug_utils */
VK_INSTANCE_EXT_PFN(vkCreateDebugUtilsMessengerEXT)
@ -62,14 +60,22 @@ VK_DEVICE_PFN(vkAllocateCommandBuffers)
VK_DEVICE_PFN(vkAllocateDescriptorSets)
VK_DEVICE_PFN(vkAllocateMemory)
VK_DEVICE_PFN(vkBeginCommandBuffer)
VK_DEVICE_PFN(vkBindBufferMemory)
VK_DEVICE_PFN(vkBindImageMemory)
VK_DEVICE_PFN(vkCmdBeginQuery)
VK_DEVICE_PFN(vkCmdBeginRenderPass)
VK_DEVICE_PFN(vkCmdBindDescriptorSets)
VK_DEVICE_PFN(vkCmdBindIndexBuffer)
VK_DEVICE_PFN(vkCmdBindPipeline)
VK_DEVICE_PFN(vkCmdBindVertexBuffers)
VK_DEVICE_PFN(vkCmdBlitImage)
VK_DEVICE_PFN(vkCmdClearAttachments)
VK_DEVICE_PFN(vkCmdClearColorImage)
VK_DEVICE_PFN(vkCmdClearDepthStencilImage)
VK_DEVICE_PFN(vkCmdCopyBuffer)
VK_DEVICE_PFN(vkCmdCopyBufferToImage)
VK_DEVICE_PFN(vkCmdCopyImage)
VK_DEVICE_PFN(vkCmdCopyImageToBuffer)
VK_DEVICE_PFN(vkCmdCopyQueryPoolResults)
VK_DEVICE_PFN(vkCmdDispatch)
VK_DEVICE_PFN(vkCmdDispatchIndirect)
@ -78,6 +84,7 @@ VK_DEVICE_PFN(vkCmdDrawIndexed)
VK_DEVICE_PFN(vkCmdDrawIndexedIndirect)
VK_DEVICE_PFN(vkCmdDrawIndirect)
VK_DEVICE_PFN(vkCmdEndQuery)
VK_DEVICE_PFN(vkCmdEndRenderPass)
VK_DEVICE_PFN(vkCmdExecuteCommands)
VK_DEVICE_PFN(vkCmdFillBuffer)
VK_DEVICE_PFN(vkCmdNextSubpass)
@ -85,6 +92,7 @@ VK_DEVICE_PFN(vkCmdPipelineBarrier)
VK_DEVICE_PFN(vkCmdPushConstants)
VK_DEVICE_PFN(vkCmdResetEvent)
VK_DEVICE_PFN(vkCmdResetQueryPool)
VK_DEVICE_PFN(vkCmdResolveImage)
VK_DEVICE_PFN(vkCmdSetBlendConstants)
VK_DEVICE_PFN(vkCmdSetDepthBias)
VK_DEVICE_PFN(vkCmdSetDepthBounds)
@ -113,6 +121,7 @@ VK_DEVICE_PFN(vkCreateImageView)
VK_DEVICE_PFN(vkCreatePipelineCache)
VK_DEVICE_PFN(vkCreatePipelineLayout)
VK_DEVICE_PFN(vkCreateQueryPool)
VK_DEVICE_PFN(vkCreateRenderPass)
VK_DEVICE_PFN(vkCreateSampler)
VK_DEVICE_PFN(vkCreateSemaphore)
VK_DEVICE_PFN(vkCreateShaderModule)
@ -130,6 +139,7 @@ VK_DEVICE_PFN(vkDestroyPipeline)
VK_DEVICE_PFN(vkDestroyPipelineCache)
VK_DEVICE_PFN(vkDestroyPipelineLayout)
VK_DEVICE_PFN(vkDestroyQueryPool)
VK_DEVICE_PFN(vkDestroyRenderPass)
VK_DEVICE_PFN(vkDestroySampler)
VK_DEVICE_PFN(vkDestroySemaphore)
VK_DEVICE_PFN(vkDestroyShaderModule)
@ -153,6 +163,7 @@ VK_DEVICE_PFN(vkGetImageSparseMemoryRequirements2)
VK_DEVICE_PFN(vkGetImageSubresourceLayout)
VK_DEVICE_PFN(vkGetPipelineCacheData)
VK_DEVICE_PFN(vkGetQueryPoolResults)
VK_DEVICE_PFN(vkGetRenderAreaGranularity)
VK_DEVICE_PFN(vkInvalidateMappedMemoryRanges)
VK_DEVICE_PFN(vkMapMemory)
VK_DEVICE_PFN(vkMergePipelineCaches)
@ -207,34 +218,11 @@ VK_DEVICE_EXT_PFN(vkCmdCopyAccelerationStructureKHR)
VK_INSTANCE_EXT_PFN(vkGetPhysicalDeviceFragmentShadingRatesKHR)
VK_DEVICE_EXT_PFN(vkCmdSetFragmentShadingRateKHR)
/* VK_KHR_bind_memory2 */
VK_DEVICE_EXT_PFN(vkBindBufferMemory2KHR)
VK_DEVICE_EXT_PFN(vkBindImageMemory2KHR)
/* VK_KHR_copy_commands2 */
VK_DEVICE_EXT_PFN(vkCmdBlitImage2KHR)
VK_DEVICE_EXT_PFN(vkCmdCopyBuffer2KHR)
VK_DEVICE_EXT_PFN(vkCmdCopyBufferToImage2KHR)
VK_DEVICE_EXT_PFN(vkCmdCopyImage2KHR)
VK_DEVICE_EXT_PFN(vkCmdCopyImageToBuffer2KHR)
VK_DEVICE_EXT_PFN(vkCmdResolveImage2KHR)
/* VK_KHR_maintenance4 */
VK_DEVICE_EXT_PFN(vkGetDeviceBufferMemoryRequirementsKHR)
VK_DEVICE_EXT_PFN(vkGetDeviceImageMemoryRequirementsKHR)
VK_DEVICE_EXT_PFN(vkGetDeviceImageSparseMemoryRequirementsKHR)
#ifdef VK_KHR_external_memory_win32
/* VK_KHR_external_memory_win32 */
VK_DEVICE_EXT_PFN(vkGetMemoryWin32HandleKHR)
VK_DEVICE_EXT_PFN(vkGetMemoryWin32HandlePropertiesKHR)
#endif
#ifdef VK_KHR_external_semaphore_win32
/* VK_KHR_external_semaphore_win32 */
VK_DEVICE_EXT_PFN(vkGetSemaphoreWin32HandleKHR)
VK_DEVICE_EXT_PFN(vkImportSemaphoreWin32HandleKHR)
#endif
/* VK_KHR_create_renderpass2 */
VK_DEVICE_EXT_PFN(vkCmdBeginRenderPass2KHR)
VK_DEVICE_EXT_PFN(vkCmdEndRenderPass2KHR)
VK_DEVICE_EXT_PFN(vkCmdNextSubpass2KHR)
VK_DEVICE_EXT_PFN(vkCreateRenderPass2KHR)
/* VK_EXT_calibrated_timestamps */
VK_DEVICE_EXT_PFN(vkGetCalibratedTimestampsEXT)
@ -263,9 +251,6 @@ VK_DEVICE_EXT_PFN(vkCmdSetPrimitiveTopologyEXT)
VK_DEVICE_EXT_PFN(vkCmdSetScissorWithCountEXT)
VK_DEVICE_EXT_PFN(vkCmdSetViewportWithCountEXT)
/* VK_EXT_extended_dynamic_state2 */
VK_DEVICE_EXT_PFN(vkCmdSetPrimitiveRestartEnableEXT)
/* VK_EXT_external_memory_host */
VK_DEVICE_EXT_PFN(vkGetMemoryHostPointerPropertiesEXT)
@ -289,41 +274,9 @@ VK_DEVICE_EXT_PFN(vkGetSwapchainImagesKHR)
VK_DEVICE_EXT_PFN(vkAcquireNextImageKHR)
VK_DEVICE_EXT_PFN(vkQueuePresentKHR)
/* VK_KHR_dynamic_rendering */
VK_DEVICE_EXT_PFN(vkCmdBeginRenderingKHR)
VK_DEVICE_EXT_PFN(vkCmdEndRenderingKHR)
/* VK_KHR_ray_tracing_maintenance1 */
VK_DEVICE_EXT_PFN(vkCmdTraceRaysIndirect2KHR)
/* VK_AMD_buffer_marker */
VK_DEVICE_EXT_PFN(vkCmdWriteBufferMarkerAMD)
/* VK_NV_device_diagnostic_checkpoints */
VK_DEVICE_EXT_PFN(vkCmdSetCheckpointNV)
VK_DEVICE_EXT_PFN(vkGetQueueCheckpointDataNV)
/* VK_NVX_binary_import */
VK_DEVICE_EXT_PFN(vkCreateCuModuleNVX)
VK_DEVICE_EXT_PFN(vkCreateCuFunctionNVX)
VK_DEVICE_EXT_PFN(vkDestroyCuModuleNVX)
VK_DEVICE_EXT_PFN(vkDestroyCuFunctionNVX)
VK_DEVICE_EXT_PFN(vkCmdCuLaunchKernelNVX)
/* VK_NVX_image_view_handle */
VK_DEVICE_EXT_PFN(vkGetImageViewHandleNVX)
VK_DEVICE_EXT_PFN(vkGetImageViewAddressNVX)
/* VK_VALVE_descriptor_set_host_mapping */
VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutHostMappingInfoVALVE)
VK_DEVICE_EXT_PFN(vkGetDescriptorSetHostMappingVALVE)
/* VK_NV_device_generated_commands */
VK_DEVICE_EXT_PFN(vkCreateIndirectCommandsLayoutNV)
VK_DEVICE_EXT_PFN(vkDestroyIndirectCommandsLayoutNV)
VK_DEVICE_EXT_PFN(vkGetGeneratedCommandsMemoryRequirementsNV)
VK_DEVICE_EXT_PFN(vkCmdExecuteGeneratedCommandsNV)
#undef VK_INSTANCE_PFN
#undef VK_INSTANCE_EXT_PFN
#undef VK_DEVICE_PFN

View File

@ -1,4 +1,4 @@
project('vkd3d-proton', ['c'], version : '2.6', meson_version : '>= 0.49', default_options : [
project('vkd3d-proton', ['c'], version : '2.3.1', meson_version : '>= 0.49', default_options : [
'warning_level=2',
])
@ -64,11 +64,6 @@ if not enable_trace
add_project_arguments('-DVKD3D_NO_TRACE_MESSAGES', language : 'c')
endif
enable_breadcrumbs = enable_trace
if enable_breadcrumbs
add_project_arguments('-DVKD3D_ENABLE_BREADCRUMBS', language : 'c')
endif
vkd3d_external_includes = [ './subprojects/Vulkan-Headers/include', './subprojects/SPIRV-Headers/include' ]
vkd3d_public_includes = [ './include' ] + vkd3d_external_includes
vkd3d_private_includes = [ './include/private' ] + vkd3d_public_includes
@ -83,8 +78,8 @@ idl_generator = generator(idl_compiler,
arguments : [ '-h', '-o', '@OUTPUT@', '@INPUT@' ])
glsl_compiler = find_program('glslangValidator')
glsl_args = [ '-V', '--target-env', 'vulkan1.1', '--vn', '@BASENAME@', '@INPUT@', '-o', '@OUTPUT@' ]
if run_command(glsl_compiler, [ '--quiet', '--version' ], check : false).returncode() == 0
glsl_args = [ '-V', '--vn', '@BASENAME@', '@INPUT@', '-o', '@OUTPUT@' ]
if run_command(glsl_compiler, [ '--quiet', '--version' ]).returncode() == 0
glsl_args += [ '--quiet' ]
endif
glsl_generator = generator(glsl_compiler,

View File

@ -75,35 +75,17 @@ def main():
parser.add_argument('--per-iteration', action = 'store_true', help = 'Represent ticks in terms of ticks / iteration. Cannot be used with --divider.')
parser.add_argument('--name', nargs = '+', type = str, help = 'Only display data for certain counters.')
parser.add_argument('--sort', type = str, default = 'none', help = 'Sorts input data according to "iterations" or "ticks".')
parser.add_argument('--delta', type = str, help = 'Subtract iterations and timing from other profile blob.')
parser.add_argument('profile', help = 'The profile binary blob.')
args = parser.parse_args()
if not args.profile:
raise AssertionError('Need profile folder.')
delta_map = {}
if args.delta is not None:
with open(args.delta, 'rb') as f:
for block in iter(lambda: f.read(64), b''):
if is_valid_block(block):
b = parse_block(block)
delta_map[b.name] = b
blocks = []
with open(args.profile, 'rb') as f:
for block in iter(lambda: f.read(64), b''):
if is_valid_block(block):
b = parse_block(block)
if b.name in delta_map:
d = delta_map[b.name]
b = ProfileCase(ticks = b.ticks - d.ticks,
iterations = b.iterations - d.iterations,
name = b.name)
if b.iterations < 0 or b.ticks < 0:
raise AssertionError('After subtracting, iterations or ticks became negative.')
if b.iterations > 0:
blocks.append(b)
blocks.append(parse_block(block))
if args.divider is not None:
if args.per_iteration:
@ -132,11 +114,11 @@ def main():
print(' Iterations:', block.iterations)
if args.divider is not None:
print(' Time spent per iteration of {}: {:.3f}'.format(args.divider, block.ticks / 1000.0), "Kcycles")
print(' Time spent per iteration of {}: {:.3f}'.format(args.divider, block.ticks / 1000.0), "us")
elif args.per_iteration:
print(' Time spent per iteration: {:.3f}'.format(block.ticks / 1000.0), "Kcycles")
print(' Time spent per iteration: {:.3f}'.format(block.ticks / 1000.0), "us")
else:
print(' Total time spent: {:.3f}'.format(block.ticks / 1000.0), "Kcycles")
print(' Total time spent: {:.3f}'.format(block.ticks / 1000.0), "us")
if __name__ == '__main__':
main()

@ -1 +1 @@
Subproject commit ae217c17809fadb232ec94b29304b4afcd417bb4
Subproject commit f9e1ffdcc1c123b79dd9f6002b418d9703d98904

@ -1 +1 @@
Subproject commit 245d25ce8c3337919dc7916d0e62e31a0d8748ab
Subproject commit fe9850767d00e46b230da6cfbc15eb86636017bd

@ -1 +1 @@
Subproject commit 9f2fd6356c14376ab5b88518d6dd4e6787084525
Subproject commit 77722451141d3ac63477d3f603176f13f40569ff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,936 +0,0 @@
/*
* Copyright 2016-2017 Józef Kucia for CodeWeavers
* Copyright 2020-2021 Philip Rebohle for Valve Corporation
* Copyright 2020-2021 Joshua Ashton for Valve Corporation
* Copyright 2020-2021 Hans-Kristian Arntzen for Valve Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "d3d12_crosstest.h"
void test_clear_depth_stencil_view(void)
{
static const float expected_values[] = {0.5f, 0.1f, 0.1f, 0.6, 1.0f, 0.5f};
ID3D12GraphicsCommandList *command_list;
D3D12_DEPTH_STENCIL_VIEW_DESC dsv_desc;
ID3D12Resource *tmp_float, *tmp_uint;
struct depth_stencil_resource ds;
unsigned int dsv_increment_size;
D3D12_CLEAR_VALUE clear_value;
struct test_context_desc desc;
struct test_context context;
ID3D12CommandQueue *queue;
ID3D12Device *device;
unsigned int i;
memset(&desc, 0, sizeof(desc));
desc.no_render_target = true;
if (!init_test_context(&context, &desc))
return;
device = context.device;
command_list = context.list;
queue = context.queue;
dsv_increment_size = ID3D12Device_GetDescriptorHandleIncrementSize(device,
D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
trace("DSV descriptor handle increment size: %u.\n", dsv_increment_size);
ok(dsv_increment_size, "Got unexpected increment size %#x.\n", dsv_increment_size);
clear_value.Format = DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
clear_value.DepthStencil.Depth = 0.5f;
clear_value.DepthStencil.Stencil = 0x3;
init_depth_stencil(&ds, device, 32, 32, 1, 1, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, 0, &clear_value);
/* Tests that separate layout clear works correctly. */
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds.dsv_handle,
D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL, 0.5f, 0x3, 0, NULL);
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds.dsv_handle,
D3D12_CLEAR_FLAG_DEPTH, 0.75f, 0x7, 0, NULL);
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds.dsv_handle,
D3D12_CLEAR_FLAG_STENCIL, 0.75f, 0x7, 0, NULL);
transition_resource_state(command_list, ds.texture,
D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
tmp_float = create_default_texture2d(context.device, 32, 32, 1, 1, DXGI_FORMAT_R32_FLOAT,
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_DEST);
tmp_uint = create_default_texture2d(context.device, 32, 32, 1, 1, DXGI_FORMAT_R8_UINT,
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_DEST);
{
D3D12_TEXTURE_COPY_LOCATION dst_location, src_location;
D3D12_BOX src_box;
dst_location.SubresourceIndex = 0;
dst_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
src_location.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
src_location.pResource = ds.texture;
src_box.left = 0;
src_box.right = 32;
src_box.top = 0;
src_box.bottom = 32;
src_box.front = 0;
src_box.back = 1;
dst_location.pResource = tmp_float;
src_location.SubresourceIndex = 0;
ID3D12GraphicsCommandList_CopyTextureRegion(context.list, &dst_location, 0, 0, 0, &src_location, &src_box);
dst_location.pResource = tmp_uint;
src_location.SubresourceIndex = 1;
ID3D12GraphicsCommandList_CopyTextureRegion(context.list, &dst_location, 0, 0, 0, &src_location, &src_box);
}
transition_resource_state(command_list, tmp_float,
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
transition_resource_state(command_list, tmp_uint,
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_float(tmp_float, 0, queue, command_list, 0.75f, 1);
reset_command_list(command_list, context.allocator);
check_sub_resource_uint8(tmp_uint, 0, queue, command_list, 0x7, 0);
ID3D12Resource_Release(tmp_float);
ID3D12Resource_Release(tmp_uint);
destroy_depth_stencil(&ds);
reset_command_list(command_list, context.allocator);
clear_value.Format = DXGI_FORMAT_D32_FLOAT;
init_depth_stencil(&ds, device, 32, 32, 6, 1, DXGI_FORMAT_D32_FLOAT, 0, &clear_value);
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds.dsv_handle,
D3D12_CLEAR_FLAG_DEPTH, expected_values[0], 0, 0, NULL);
memset(&dsv_desc, 0, sizeof(dsv_desc));
dsv_desc.Format = DXGI_FORMAT_D32_FLOAT;
dsv_desc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DARRAY;
dsv_desc.Texture2DArray.FirstArraySlice = 1;
dsv_desc.Texture2DArray.ArraySize = 2;
ID3D12Device_CreateDepthStencilView(device, ds.texture, &dsv_desc, ds.dsv_handle);
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds.dsv_handle,
D3D12_CLEAR_FLAG_DEPTH, expected_values[1], 0, 0, NULL);
dsv_desc.Texture2DArray.FirstArraySlice = 3;
dsv_desc.Texture2DArray.ArraySize = 1;
ID3D12Device_CreateDepthStencilView(device, ds.texture, &dsv_desc, ds.dsv_handle);
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds.dsv_handle,
D3D12_CLEAR_FLAG_DEPTH, expected_values[3], 0, 0, NULL);
dsv_desc.Texture2DArray.FirstArraySlice = 4;
ID3D12Device_CreateDepthStencilView(device, ds.texture, &dsv_desc, ds.dsv_handle);
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds.dsv_handle,
D3D12_CLEAR_FLAG_DEPTH, expected_values[4], 0, 0, NULL);
transition_resource_state(command_list, ds.texture,
D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
for (i = 0; i < ARRAY_SIZE(expected_values); ++i)
{
check_sub_resource_float(ds.texture, i, queue, command_list, expected_values[i], 1);
reset_command_list(command_list, context.allocator);
}
destroy_depth_stencil(&ds);
destroy_test_context(&context);
}
void test_clear_render_target_view(void)
{
static const unsigned int array_expected_colors[] = {0xff00ff00, 0xff0000ff, 0xffff0000};
static const float array_colors[][4] =
{
{0.0f, 1.0f, 0.0f, 1.0f},
{1.0f, 0.0f, 0.0f, 1.0f},
{0.0f, 0.0f, 1.0f, 1.0f},
};
static const float negative_value[] = {1.0f, -1.0f, -0.5f, -2.0f};
static const float color[] = {0.1f, 0.5f, 0.3f, 0.75f};
static const float green[] = {0.0f, 1.0f, 0.0f, 1.0f};
ID3D12GraphicsCommandList *command_list;
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle;
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
D3D12_HEAP_PROPERTIES heap_properties;
D3D12_RESOURCE_DESC resource_desc;
unsigned int rtv_increment_size;
ID3D12DescriptorHeap *rtv_heap;
D3D12_CLEAR_VALUE clear_value;
struct test_context_desc desc;
struct resource_readback rb;
struct test_context context;
ID3D12CommandQueue *queue;
ID3D12Resource *resource;
ID3D12Device *device;
unsigned int i;
D3D12_BOX box;
HRESULT hr;
static const struct
{
const float *color;
DXGI_FORMAT format;
uint32_t result;
}
r8g8b8a8[] =
{
{color, DXGI_FORMAT_R8G8B8A8_UNORM_SRGB, 0xbf95bc59},
{green, DXGI_FORMAT_R8G8B8A8_UNORM, 0xff00ff00},
{color, DXGI_FORMAT_R8G8B8A8_UNORM, 0xbf4c7f19},
{green, DXGI_FORMAT_R8G8B8A8_UINT, 0x01000100},
{color, DXGI_FORMAT_R8G8B8A8_UINT, 0x00000000},
{negative_value, DXGI_FORMAT_R8G8B8A8_UINT, 0x00000001},
{green, DXGI_FORMAT_R8G8B8A8_SINT, 0x01000100},
{color, DXGI_FORMAT_R8G8B8A8_SINT, 0x00000000},
{negative_value, DXGI_FORMAT_R8G8B8A8_SINT, 0xfe00ff01},
};
static const struct
{
const float *color;
DXGI_FORMAT format;
uint64_t result;
}
r16g16b16a16[] =
{
{green, DXGI_FORMAT_R16G16B16A16_UNORM, 0xffff0000ffff0000},
{green, DXGI_FORMAT_R16G16B16A16_UINT, 0x0001000000010000},
{color, DXGI_FORMAT_R16G16B16A16_UINT, 0x0000000000000000},
{negative_value, DXGI_FORMAT_R16G16B16A16_UINT, 0x0000000000000001},
{green, DXGI_FORMAT_R16G16B16A16_SINT, 0x0001000000010000},
{color, DXGI_FORMAT_R16G16B16A16_SINT, 0x0000000000000000},
{negative_value, DXGI_FORMAT_R16G16B16A16_SINT, 0xfffe0000ffff0001},
};
STATIC_ASSERT(ARRAY_SIZE(array_colors) == ARRAY_SIZE(array_expected_colors));
memset(&desc, 0, sizeof(desc));
desc.no_render_target = true;
if (!init_test_context(&context, &desc))
return;
device = context.device;
command_list = context.list;
queue = context.queue;
rtv_heap = create_cpu_descriptor_heap(device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1);
rtv_increment_size = ID3D12Device_GetDescriptorHandleIncrementSize(device,
D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
trace("RTV descriptor handle increment size: %u.\n", rtv_increment_size);
rtv_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(rtv_heap);
memset(&heap_properties, 0, sizeof(heap_properties));
heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
resource_desc.Alignment = 0;
resource_desc.Width = 32;
resource_desc.Height = 32;
resource_desc.DepthOrArraySize = 1;
resource_desc.MipLevels = 1;
resource_desc.Format = DXGI_FORMAT_R8G8B8A8_TYPELESS;
resource_desc.SampleDesc.Count = 1;
resource_desc.SampleDesc.Quality = 0;
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
clear_value.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
clear_value.Color[0] = 1.0f;
clear_value.Color[1] = 0.0f;
clear_value.Color[2] = 0.0f;
clear_value.Color[3] = 1.0f;
hr = ID3D12Device_CreateCommittedResource(device,
&heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc,
D3D12_RESOURCE_STATE_RENDER_TARGET, &clear_value,
&IID_ID3D12Resource, (void **)&resource);
ok(hr == S_OK, "Failed to create texture, hr %#x.\n", hr);
memset(&rtv_desc, 0, sizeof(rtv_desc));
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
/* R8G8B8A8 */
for (i = 0; i < ARRAY_SIZE(r8g8b8a8); ++i)
{
vkd3d_test_set_context("Test %u", i);
rtv_desc.Format = r8g8b8a8[i].format;
ID3D12Device_CreateRenderTargetView(device, resource, &rtv_desc, rtv_handle);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, r8g8b8a8[i].color, 0, NULL);
transition_resource_state(command_list, resource,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_uint(resource, 0, queue, command_list, r8g8b8a8[i].result, 2);
reset_command_list(command_list, context.allocator);
transition_resource_state(command_list, resource,
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
}
vkd3d_test_set_context(NULL);
/* R16G16B16A16 */
hr = ID3D12GraphicsCommandList_Close(command_list);
ok(hr == S_OK, "Failed to close command list, hr %#x.\n", hr);
reset_command_list(command_list, context.allocator);
ID3D12Resource_Release(resource);
resource_desc.Format = DXGI_FORMAT_R16G16B16A16_TYPELESS;
hr = ID3D12Device_CreateCommittedResource(device,
&heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc,
D3D12_RESOURCE_STATE_RENDER_TARGET, NULL,
&IID_ID3D12Resource, (void **)&resource);
ok(hr == S_OK, "Failed to create texture, hr %#x.\n", hr);
for (i = 0; i < ARRAY_SIZE(r16g16b16a16); ++i)
{
vkd3d_test_set_context("Test %u", i);
rtv_desc.Format = r16g16b16a16[i].format;
ID3D12Device_CreateRenderTargetView(device, resource, &rtv_desc, rtv_handle);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, r16g16b16a16[i].color, 0, NULL);
transition_resource_state(command_list, resource,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_uint64(resource, 0, queue, command_list, r16g16b16a16[i].result, 0);
reset_command_list(command_list, context.allocator);
transition_resource_state(command_list, resource,
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
}
vkd3d_test_set_context(NULL);
/* 2D array texture */
hr = ID3D12GraphicsCommandList_Close(command_list);
ok(hr == S_OK, "Failed to close command list, hr %#x.\n", hr);
reset_command_list(command_list, context.allocator);
ID3D12Resource_Release(resource);
resource_desc.Format = DXGI_FORMAT_R8G8B8A8_TYPELESS;
resource_desc.DepthOrArraySize = ARRAY_SIZE(array_colors);
hr = ID3D12Device_CreateCommittedResource(device,
&heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc,
D3D12_RESOURCE_STATE_RENDER_TARGET, &clear_value,
&IID_ID3D12Resource, (void **)&resource);
ok(hr == S_OK, "Failed to create texture, hr %#x.\n", hr);
for (i = 0; i < ARRAY_SIZE(array_colors); ++i)
{
memset(&rtv_desc, 0, sizeof(rtv_desc));
rtv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
rtv_desc.Texture2DArray.FirstArraySlice = i;
rtv_desc.Texture2DArray.ArraySize = 1;
ID3D12Device_CreateRenderTargetView(device, resource, &rtv_desc, rtv_handle);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, array_colors[i], 0, NULL);
}
transition_resource_state(command_list, resource,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
for (i = 0; i < ARRAY_SIZE(array_expected_colors); ++i)
{
check_sub_resource_uint(resource, i, queue, command_list, array_expected_colors[i], 2);
reset_command_list(command_list, context.allocator);
}
/* 2D multisample array texture */
ID3D12Resource_Release(resource);
resource_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
resource_desc.SampleDesc.Count = 4;
hr = ID3D12Device_CreateCommittedResource(device,
&heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc,
D3D12_RESOURCE_STATE_RENDER_TARGET, &clear_value,
&IID_ID3D12Resource, (void **)&resource);
ok(hr == S_OK, "Failed to create texture, hr %#x.\n", hr);
for (i = 0; i < ARRAY_SIZE(array_colors); ++i)
{
memset(&rtv_desc, 0, sizeof(rtv_desc));
rtv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY;
rtv_desc.Texture2DMSArray.FirstArraySlice = i;
rtv_desc.Texture2DMSArray.ArraySize = 1;
ID3D12Device_CreateRenderTargetView(device, resource, &rtv_desc, rtv_handle);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, array_colors[i], 0, NULL);
}
transition_resource_state(command_list, resource,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
for (i = 0; i < ARRAY_SIZE(array_expected_colors); ++i)
{
check_sub_resource_uint(resource, i, queue, command_list, array_expected_colors[i], 2);
reset_command_list(command_list, context.allocator);
}
/* 3D texture */
ID3D12Resource_Release(resource);
resource_desc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D;
resource_desc.DepthOrArraySize = 32;
resource_desc.MipLevels = 1;
resource_desc.SampleDesc.Count = 1;
hr = ID3D12Device_CreateCommittedResource(device,
&heap_properties, D3D12_HEAP_FLAG_NONE, &resource_desc,
D3D12_RESOURCE_STATE_RENDER_TARGET, &clear_value,
&IID_ID3D12Resource, (void **)&resource);
ok(hr == S_OK, "Failed to create texture, hr %#x.\n", hr);
ID3D12Device_CreateRenderTargetView(device, resource, NULL, rtv_handle);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, color, 0, NULL);
transition_resource_state(command_list, resource,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_uint(resource, 0, queue, command_list, 0xbf4c7f19, 2);
memset(&rtv_desc, 0, sizeof(rtv_desc));
rtv_desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE3D;
rtv_desc.Texture3D.FirstWSlice = 2;
rtv_desc.Texture3D.WSize = 2;
ID3D12Device_CreateRenderTargetView(device, resource, &rtv_desc, rtv_handle);
reset_command_list(command_list, context.allocator);
transition_resource_state(command_list, resource,
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, green, 0, NULL);
transition_resource_state(command_list, resource,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
get_texture_readback_with_command_list(resource, 0, &rb, queue, command_list);
set_box(&box, 0, 0, 0, 32, 32, 2);
check_readback_data_uint(&rb, &box, 0xbf4c7f19, 1);
set_box(&box, 0, 0, 2, 32, 32, 4);
check_readback_data_uint(&rb, &box, 0xff00ff00, 1);
set_box(&box, 0, 0, 4, 32, 32, 32);
check_readback_data_uint(&rb, &box, 0xbf4c7f19, 1);
release_resource_readback(&rb);
ID3D12Resource_Release(resource);
ID3D12DescriptorHeap_Release(rtv_heap);
destroy_test_context(&context);
}
void test_clear_unordered_access_view_buffer(void)
{
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
ID3D12DescriptorHeap *cpu_heap, *gpu_heap;
ID3D12GraphicsCommandList *command_list;
struct test_context_desc desc;
struct test_context context;
struct resource_readback rb;
ID3D12CommandQueue *queue;
D3D12_HEAP_DESC heap_desc;
ID3D12Resource *buffer;
ID3D12Device *device;
UINT clear_value[4];
unsigned int i, j;
ID3D12Heap *heap;
D3D12_BOX box;
HRESULT hr;
#define BUFFER_SIZE (1024 * 1024)
static const struct
{
DXGI_FORMAT format;
D3D12_BUFFER_UAV buffer_uav;
unsigned int values[4];
unsigned int expected;
bool is_float;
bool is_null_descriptor;
}
tests[] =
{
{DXGI_FORMAT_R32_UINT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0, 0, 0, 0}, 0},
{DXGI_FORMAT_R32_UINT, {64, BUFFER_SIZE / sizeof(uint32_t) - 64, 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0, 0, 0, 0}, 0},
{DXGI_FORMAT_R32_UINT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{1, 0, 0, 0}, 1},
{DXGI_FORMAT_R32_UINT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{1, 0, 0, 0}, 0, false, true},
{DXGI_FORMAT_R32_UINT, {64, BUFFER_SIZE / sizeof(uint32_t) - 64, 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{2, 0, 0, 0}, 2},
{DXGI_FORMAT_R32_UINT, {64, BUFFER_SIZE / sizeof(uint32_t) - 64, 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{3, 0, 0, 0}, 3},
{DXGI_FORMAT_R32_UINT, {64, BUFFER_SIZE / sizeof(uint32_t) - 64, 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{4, 2, 3, 4}, 4},
{DXGI_FORMAT_R32_UINT, { 0, BUFFER_SIZE / sizeof(uint32_t) - 10, 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{5, 0, 0, 0}, 5},
{DXGI_FORMAT_R32_TYPELESS, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_RAW},
{0, 0, 0, 0}, 0},
{DXGI_FORMAT_R32_TYPELESS, {64, BUFFER_SIZE / sizeof(uint32_t) - 64, 0, 0, D3D12_BUFFER_UAV_FLAG_RAW},
{0, 0, 0, 0}, 0},
{DXGI_FORMAT_R32_TYPELESS, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_RAW},
{6, 0, 0, 0}, 6},
{DXGI_FORMAT_R32_TYPELESS, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_RAW},
{6, 0, 0, 0}, 0, false, true},
{DXGI_FORMAT_R32_TYPELESS, {64, BUFFER_SIZE / sizeof(uint32_t) - 64, 0, 0, D3D12_BUFFER_UAV_FLAG_RAW},
{7, 0, 0, 0}, 7},
{DXGI_FORMAT_R32_TYPELESS, {64, BUFFER_SIZE / sizeof(uint32_t) - 64, 0, 0, D3D12_BUFFER_UAV_FLAG_RAW},
{8, 0, 0, 0}, 8},
{DXGI_FORMAT_R32_TYPELESS, {64, BUFFER_SIZE / sizeof(uint32_t) - 64, 0, 0, D3D12_BUFFER_UAV_FLAG_RAW},
{9, 1, 1, 1}, 9},
{DXGI_FORMAT_R32_TYPELESS, {64, BUFFER_SIZE / sizeof(uint32_t) - 64, 0, 0, D3D12_BUFFER_UAV_FLAG_RAW},
{~0u, 0, 0, 0}, ~0u},
{DXGI_FORMAT_R32_TYPELESS, { 0, BUFFER_SIZE / sizeof(uint32_t) - 10, 0, 0, D3D12_BUFFER_UAV_FLAG_RAW},
{10, 0, 0, 0}, 10},
{DXGI_FORMAT_R32_TYPELESS, { 0, BUFFER_SIZE / sizeof(uint32_t) - 9, 0, 0, D3D12_BUFFER_UAV_FLAG_RAW},
{11, 0, 0, 0}, 11},
{DXGI_FORMAT_R32_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0, 0, 0, 0}, 0},
{DXGI_FORMAT_R32_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{1, 0, 0, 0}, 1},
{DXGI_FORMAT_R32_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x3f800000 /* 1.0f */, 0, 0, 0}, 0x3f800000 /* 1.0f */, true},
{DXGI_FORMAT_R16G16_UINT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x1234, 0xabcd, 0, 0}, 0xabcd1234},
{DXGI_FORMAT_R16G16_UINT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x10000, 0, 0, 0}, 0},
{DXGI_FORMAT_R16G16_UNORM, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x1234, 0xabcd, 0, 0}, 0xabcd1234},
{DXGI_FORMAT_R16G16_UNORM, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x3f000080 /* 0.50000762951f */, 0x3f800000 /* 1.0f */, 0, 0}, 0xffff8000, true},
{DXGI_FORMAT_R16G16_UNORM, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x40000000 /* 2.0f */, 0 /* 0.0f */, 0, 0}, 0x0000ffff, true},
{DXGI_FORMAT_R16G16_UNORM, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0xbf800000 /* -1.0f */, 0 /* 0.0f */, 0x3f000000 /* 1.0f */, 0x3f000000 /* 1.0f */}, 0, true},
{DXGI_FORMAT_R16G16_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x1234, 0xabcd, 0, 0}, 0xabcd1234},
{DXGI_FORMAT_R16G16_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0, 0}, 0x3c003800, true},
{DXGI_FORMAT_R8G8B8A8_UINT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x11, 0x22, 0x33, 0x44}, 0x44332211},
{DXGI_FORMAT_R8G8B8A8_UINT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x100, 0, 0, 0}, 0},
{DXGI_FORMAT_R11G11B10_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0, 0, 0, 0}, 0},
{DXGI_FORMAT_R11G11B10_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x7ff, 0x7ff, 0x3ff, 0}, 0xffffffff},
{DXGI_FORMAT_R11G11B10_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x7ff, 0, 0x3ff, 0}, 0xffc007ff},
{DXGI_FORMAT_R11G11B10_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0x40000000 /* 2.0f */, 0}, 0x801e0380, true},
{DXGI_FORMAT_R11G11B10_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x3f000000 /* 1.0f */, 0 /* 0.0f */, 0xbf800000 /* -1.0f */, 0x3f000000 /* 1.0f */},
0x00000380, true},
{DXGI_FORMAT_R10G10B10A2_UINT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x1010, 0x1020, 0x1030, 0x41}, (0x30 << 20) | (0x20 << 10) | (0x10 << 0) | (0x1 << 30)},
{DXGI_FORMAT_R10G10B10A2_UNORM, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x1010, 0x1020, 0x1030, 0x41}, (0x30u << 20) | (0x20u << 10) | (0x10u << 0) | (0x1u << 30)},
{DXGI_FORMAT_R10G10B10A2_UNORM, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE},
{0x3f002008 /* 0.5004887585532747f */, 0x3f800000 /* 1.0f */, 0, 0x3f800000 /* 1.0f */},
(0x3ffu << 10) | (0x200u << 0) | (0x3u << 30), true},
};
memset(&desc, 0, sizeof(desc));
desc.no_render_target = true;
if (!init_test_context(&context, &desc))
return;
device = context.device;
command_list = context.list;
queue = context.queue;
cpu_heap = create_cpu_descriptor_heap(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2);
gpu_heap = create_gpu_descriptor_heap(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2);
heap_desc.SizeInBytes = 2 * BUFFER_SIZE;
memset(&heap_desc.Properties, 0, sizeof(heap_desc.Properties));
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
hr = ID3D12Device_CreateHeap(device, &heap_desc, &IID_ID3D12Heap, (void **)&heap);
ok(hr == S_OK, "Failed to create heap, hr %#x.\n", hr);
for (i = 0; i < ARRAY_SIZE(tests); ++i)
{
vkd3d_test_set_context("Test %u", i);
buffer = create_placed_buffer(device, heap, BUFFER_SIZE, BUFFER_SIZE,
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
for (j = 0; j < ARRAY_SIZE(clear_value); ++j)
clear_value[j] = tests[i].expected || tests[i].is_null_descriptor ? 0 : ~0u;
memset(&uav_desc, 0, sizeof(uav_desc));
uav_desc.Format = DXGI_FORMAT_R32_UINT;
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uav_desc.Buffer.NumElements = BUFFER_SIZE / sizeof(uint32_t);
ID3D12Device_CreateUnorderedAccessView(device, buffer, NULL, &uav_desc,
get_cpu_descriptor_handle(&context, cpu_heap, 1));
ID3D12Device_CreateUnorderedAccessView(device, buffer, NULL, &uav_desc,
get_cpu_descriptor_handle(&context, gpu_heap, 1));
uav_desc.Format = tests[i].format;
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
uav_desc.Buffer = tests[i].buffer_uav;
ID3D12Device_CreateUnorderedAccessView(device, buffer, NULL, &uav_desc,
get_cpu_descriptor_handle(&context, cpu_heap, 0));
ID3D12Device_CreateUnorderedAccessView(device, buffer, NULL, &uav_desc,
get_cpu_descriptor_handle(&context, gpu_heap, 0));
if (tests[i].is_null_descriptor)
{
/* Test that we can clear out any knowledge about the existing descriptor. */
ID3D12Device_CreateUnorderedAccessView(device, NULL, NULL, &uav_desc,
get_cpu_descriptor_handle(&context, cpu_heap, 0));
ID3D12Device_CreateUnorderedAccessView(device, NULL, NULL, &uav_desc,
get_cpu_descriptor_handle(&context, gpu_heap, 0));
}
ID3D12GraphicsCommandList_ClearUnorderedAccessViewUint(command_list,
get_gpu_descriptor_handle(&context, gpu_heap, 1),
get_cpu_descriptor_handle(&context, cpu_heap, 1),
buffer, clear_value, 0, NULL);
uav_barrier(command_list, buffer);
if (tests[i].is_float)
ID3D12GraphicsCommandList_ClearUnorderedAccessViewFloat(command_list,
get_gpu_descriptor_handle(&context, gpu_heap, 0),
get_cpu_descriptor_handle(&context, cpu_heap, 0),
buffer, (const float *)tests[i].values, 0, NULL);
else
ID3D12GraphicsCommandList_ClearUnorderedAccessViewUint(command_list,
get_gpu_descriptor_handle(&context, gpu_heap, 0),
get_cpu_descriptor_handle(&context, cpu_heap, 0),
buffer, tests[i].values, 0, NULL);
set_box(&box, 0, 0, 0, 1, 1, 1);
transition_resource_state(command_list, buffer,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_TYPELESS, &rb, queue, command_list);
box.left = 0;
box.right = uav_desc.Buffer.FirstElement;
check_readback_data_uint(&rb, &box, clear_value[0], 0);
box.left = uav_desc.Buffer.FirstElement;
box.right = uav_desc.Buffer.FirstElement + uav_desc.Buffer.NumElements;
check_readback_data_uint(&rb, &box, tests[i].expected, tests[i].is_float ? 1 : 0);
box.left = uav_desc.Buffer.FirstElement + uav_desc.Buffer.NumElements;
box.right = BUFFER_SIZE / format_size(uav_desc.Format);
check_readback_data_uint(&rb, &box, clear_value[0], 0);
release_resource_readback(&rb);
reset_command_list(command_list, context.allocator);
ID3D12Resource_Release(buffer);
}
vkd3d_test_set_context(NULL);
ID3D12DescriptorHeap_Release(cpu_heap);
ID3D12DescriptorHeap_Release(gpu_heap);
ID3D12Heap_Release(heap);
destroy_test_context(&context);
#undef BUFFER_SIZE
}
void test_clear_unordered_access_view_image(void)
{
D3D12_FEATURE_DATA_FORMAT_SUPPORT format_support;
unsigned int expected_colour, actual_colour;
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
ID3D12DescriptorHeap *cpu_heap, *gpu_heap;
ID3D12GraphicsCommandList *command_list;
unsigned int i, j, d, p, z, layer;
D3D12_HEAP_PROPERTIES heap_properties;
unsigned int image_size, image_depth;
D3D12_RESOURCE_DESC resource_desc;
struct test_context_desc desc;
struct test_context context;
struct resource_readback rb;
ID3D12CommandQueue *queue;
bool is_inside, success;
ID3D12Resource *texture;
ID3D12Device *device;
UINT clear_value[4];
HRESULT hr;
int x, y;
#define IMAGE_SIZE 16u
static const struct
{
DXGI_FORMAT format;
unsigned int image_mips;
unsigned int image_layers;
unsigned int mip_level;
unsigned int first_layer;
unsigned int layer_count;
unsigned int rect_count;
RECT clear_rects[2];
unsigned int values[4];
unsigned int expected;
bool is_float;
bool is_null_descriptor;
}
tests[] =
{
/* Test clearing a specific mip level. */
{DXGI_FORMAT_R32_FLOAT, 2, 1, 0, 0, 1, 0, {{0}}, {1, 0, 0, 0}, 1},
{DXGI_FORMAT_R32_FLOAT, 2, 1, 0, 0, 1, 0, {{0}}, {1, 0, 0, 0}, 0, false, true},
{DXGI_FORMAT_R32_FLOAT, 2, 1, 1, 0, 1, 0, {{0}}, {1, 0, 0, 0}, 1},
{DXGI_FORMAT_R32_FLOAT, 2, 1, 0, 0, 1, 0, {{0}}, {0x3f000000, 0, 0, 0}, 0x3f000000, true},
{DXGI_FORMAT_R32_FLOAT, 2, 1, 1, 0, 1, 0, {{0}}, {0x3f000000, 0, 0, 0}, 0x3f000000, true},
{DXGI_FORMAT_R32_FLOAT, 2, 1, 1, 0, 1, 0, {{0}}, {0x3f000000, 0, 0, 0}, 0, true, true},
/* Test clearing specific array layers. */
{DXGI_FORMAT_R32_FLOAT, 1, IMAGE_SIZE, 0, 0, IMAGE_SIZE, 0, {{0}}, {1, 0, 0, 0}, 1},
{DXGI_FORMAT_R32_FLOAT, 1, IMAGE_SIZE, 0, 3, 2, 0, {{0}}, {1, 0, 0, 0}, 1},
{DXGI_FORMAT_R32_FLOAT, 1, IMAGE_SIZE, 0, 0, IMAGE_SIZE, 0, {{0}},
{0x3f000000, 0, 0, 0}, 0x3f000000, true},
{DXGI_FORMAT_R32_FLOAT, 1, IMAGE_SIZE, 0, 3, 2, 0, {{0}},
{0x3f000000, 0, 0, 0}, 0x3f000000, true},
/* Test a single clear rect. */
{DXGI_FORMAT_R32_FLOAT, 1, 1, 0, 0, 1, 1, {{1, 2, IMAGE_SIZE - 4, IMAGE_SIZE - 2}},
{1, 0, 0, 0}, 1},
{DXGI_FORMAT_R32_FLOAT, 1, 1, 0, 0, 1, 1, {{1, 2, IMAGE_SIZE - 4, IMAGE_SIZE - 2}},
{0x3f000000, 0, 0, 0}, 0x3f000000, true},
/* Test multiple clear rects. */
{DXGI_FORMAT_R32_FLOAT, 1, 1, 0, 0, 1, 2, {{1, 2, 3, 4}, {5, 6, 7, 8}},
{1, 0, 0, 0}, 1},
{DXGI_FORMAT_R32_FLOAT, 1, 1, 0, 0, 1, 2, {{1, 2, 3, 4}, {5, 6, 7, 8}},
{0x3f000000, 0, 0, 0}, 0x3f000000, true},
/* Test uint clears with formats. */
{DXGI_FORMAT_R16G16_UINT, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x00020001},
{DXGI_FORMAT_R16G16_UINT, 1, 1, 0, 0, 1, 0, {{0}}, {0x12345, 0, 0, 0}, 0x00002345},
{DXGI_FORMAT_R16G16_UNORM, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x00020001},
{DXGI_FORMAT_R16G16_FLOAT, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x00020001},
{DXGI_FORMAT_R8G8B8A8_UINT, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x04030201},
{DXGI_FORMAT_R8G8B8A8_UINT, 1, 1, 0, 0, 1, 0, {{0}}, {0x123, 0, 0, 0}, 0x00000023},
{DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x04030201},
{DXGI_FORMAT_R11G11B10_FLOAT, 1, 1, 0, 0, 1, 0, {{0}}, {0, 0, 0, 0}, 0x00000000},
{DXGI_FORMAT_R11G11B10_FLOAT, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x00c01001},
/* Test float clears with formats. */
{DXGI_FORMAT_R16G16_UNORM, 1, 1, 0, 0, 1, 0, {{0}},
{0x3f000080 /* 0.5f + unorm16 epsilon */, 0x3f800000 /* 1.0f */, 0, 0}, 0xffff8000, true},
{DXGI_FORMAT_R16G16_FLOAT, 1, 1, 0, 0, 1, 0, {{0}},
{0x3f000080 /* 0.5f */, 0x3f800000 /* 1.0f */, 0, 0}, 0x3c003800, true},
{DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, 0, 0, 1, 0, {{0}},
{0x3f000080 /* 0.5f + epsilon */, 0x3f800000 /* 1.0f */, 0, 0}, 0x0000ff80, true},
{DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, 0, 0, 1, 0, {{0}},
{0, 0, 0x3f000080 /* 0.5f + epsilon */, 0x3f800000 /* 1.0f */}, 0xff800000, true},
{DXGI_FORMAT_R11G11B10_FLOAT, 1, 1, 0, 0, 1, 0, {{0}},
{0x3f000000 /* 1.0f */, 0 /* 0.0f */, 0xbf800000 /* -1.0f */, 0x3f000000 /* 1.0f */},
0x00000380, true},
{DXGI_FORMAT_B8G8R8A8_UNORM, 1, 1, 0, 0, 1, 0, {{0}},
{0, 0, 0x3f000080 /* 0.5f + epsilon */, 0x3f800000 /* 1.0f */}, 0xff000080, true},
{DXGI_FORMAT_B8G8R8A8_UNORM, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x04010203},
};
static const struct
{
D3D12_RESOURCE_DIMENSION resource_dim;
D3D12_UAV_DIMENSION view_dim;
bool is_layered;
}
uav_dimensions[] =
{
{D3D12_RESOURCE_DIMENSION_TEXTURE2D, D3D12_UAV_DIMENSION_TEXTURE2D, false},
{D3D12_RESOURCE_DIMENSION_TEXTURE2D, D3D12_UAV_DIMENSION_TEXTURE2DARRAY, true },
/* Expected behaviour with partial layer coverage is unclear. */
{D3D12_RESOURCE_DIMENSION_TEXTURE3D, D3D12_UAV_DIMENSION_TEXTURE3D, false},
};
memset(&desc, 0, sizeof(desc));
desc.no_render_target = true;
if (!init_test_context(&context, &desc))
return;
device = context.device;
command_list = context.list;
queue = context.queue;
cpu_heap = create_cpu_descriptor_heap(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2);
gpu_heap = create_gpu_descriptor_heap(device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2);
memset(&heap_properties, 0, sizeof(heap_properties));
heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
for (d = 0; d < ARRAY_SIZE(uav_dimensions); ++d)
{
for (i = 0; i < ARRAY_SIZE(tests); ++i)
{
vkd3d_test_set_context("Dim %u, Test %u", d, i);
if (tests[i].image_layers > 1 && !uav_dimensions[d].is_layered)
continue;
memset(&format_support, 0, sizeof(format_support));
format_support.Format = tests[i].format;
if (FAILED(hr = ID3D12Device_CheckFeatureSupport(device,
D3D12_FEATURE_FORMAT_SUPPORT, &format_support, sizeof(format_support))) ||
!(format_support.Support1 & D3D12_FORMAT_SUPPORT1_TYPED_UNORDERED_ACCESS_VIEW))
{
skip("Format %u not supported.\n", tests[i].format);
continue;
}
resource_desc.Dimension = uav_dimensions[d].resource_dim;
resource_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
resource_desc.Width = IMAGE_SIZE;
resource_desc.Height = IMAGE_SIZE;
if (uav_dimensions[d].resource_dim == D3D12_RESOURCE_DIMENSION_TEXTURE1D)
resource_desc.Height = 1;
resource_desc.DepthOrArraySize = tests[i].image_layers;
resource_desc.MipLevels = tests[i].image_mips;
resource_desc.Format = tests[i].format;
resource_desc.SampleDesc.Count = 1;
resource_desc.SampleDesc.Quality = 0;
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
if (FAILED(hr = ID3D12Device_CreateCommittedResource(device, &heap_properties,
D3D12_HEAP_FLAG_NONE, &resource_desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
NULL, &IID_ID3D12Resource, (void **)&texture)))
{
skip("Failed to create texture, hr %#x.\n", hr);
continue;
}
uav_desc.Format = tests[i].format;
uav_desc.ViewDimension = uav_dimensions[d].view_dim;
for (j = 0; j < 2; ++j)
{
unsigned int first_layer = j ? 0 : tests[i].first_layer;
unsigned int layer_count = j ? tests[i].image_layers : tests[i].layer_count;
switch (uav_desc.ViewDimension)
{
case D3D12_UAV_DIMENSION_TEXTURE1D:
uav_desc.Texture1D.MipSlice = tests[i].mip_level;
break;
case D3D12_UAV_DIMENSION_TEXTURE1DARRAY:
uav_desc.Texture1DArray.MipSlice = tests[i].mip_level;
uav_desc.Texture1DArray.FirstArraySlice = first_layer;
uav_desc.Texture1DArray.ArraySize = layer_count;
break;
case D3D12_UAV_DIMENSION_TEXTURE2D:
uav_desc.Texture2D.MipSlice = tests[i].mip_level;
uav_desc.Texture2D.PlaneSlice = 0;
break;
case D3D12_UAV_DIMENSION_TEXTURE2DARRAY:
uav_desc.Texture2DArray.MipSlice = tests[i].mip_level;
uav_desc.Texture2DArray.FirstArraySlice = first_layer;
uav_desc.Texture2DArray.ArraySize = layer_count;
uav_desc.Texture2DArray.PlaneSlice = 0;
break;
case D3D12_UAV_DIMENSION_TEXTURE3D:
uav_desc.Texture3D.MipSlice = tests[i].mip_level;
uav_desc.Texture3D.FirstWSlice = first_layer;
uav_desc.Texture3D.WSize = layer_count;
break;
default:
continue;
}
ID3D12Device_CreateUnorderedAccessView(device, texture, NULL,
&uav_desc, get_cpu_descriptor_handle(&context, cpu_heap, j));
ID3D12Device_CreateUnorderedAccessView(device, texture, NULL,
&uav_desc, get_cpu_descriptor_handle(&context, gpu_heap, j));
}
if (tests[i].is_null_descriptor)
{
/* Test that we can clear out any knowledge about the existing descriptor. */
ID3D12Device_CreateUnorderedAccessView(device, NULL, NULL,
&uav_desc, get_cpu_descriptor_handle(&context, cpu_heap, 0));
ID3D12Device_CreateUnorderedAccessView(device, NULL, NULL,
&uav_desc, get_cpu_descriptor_handle(&context, gpu_heap, 0));
}
for (j = 0; j < 4; ++j)
{
clear_value[j] = tests[i].expected || tests[i].is_null_descriptor ? 0u : ~0u;
}
ID3D12GraphicsCommandList_ClearUnorderedAccessViewUint(command_list,
get_gpu_descriptor_handle(&context, gpu_heap, 1),
get_cpu_descriptor_handle(&context, cpu_heap, 1),
texture, clear_value, 0, NULL);
uav_barrier(command_list, texture);
if (tests[i].is_float)
ID3D12GraphicsCommandList_ClearUnorderedAccessViewFloat(command_list,
get_gpu_descriptor_handle(&context, gpu_heap, 0),
get_cpu_descriptor_handle(&context, cpu_heap, 0),
texture, (const float *)tests[i].values, tests[i].rect_count, tests[i].clear_rects);
else
ID3D12GraphicsCommandList_ClearUnorderedAccessViewUint(command_list,
get_gpu_descriptor_handle(&context, gpu_heap, 0),
get_cpu_descriptor_handle(&context, cpu_heap, 0),
texture, tests[i].values, tests[i].rect_count, tests[i].clear_rects);
transition_resource_state(command_list, texture,
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
image_depth = uav_dimensions[d].resource_dim == D3D12_RESOURCE_DIMENSION_TEXTURE3D
? max(tests[i].image_layers >> tests[i].mip_level, 1u) : 1;
image_size = max(IMAGE_SIZE >> tests[i].mip_level, 1u);
for (layer = 0; layer < tests[i].image_layers / image_depth; ++layer)
{
get_texture_readback_with_command_list(texture,
tests[i].mip_level + (layer * tests[i].image_mips),
&rb, queue, command_list);
for (p = 0; p < image_depth * image_size * image_size; ++p)
{
x = p % image_size;
y = (p / image_size) % image_size;
z = p / (image_size * image_size);
is_inside = tests[i].rect_count == 0;
for (j = 0; j < tests[i].rect_count; ++j)
{
if (y >= tests[i].clear_rects[j].top && y < tests[i].clear_rects[j].bottom
&& x >= tests[i].clear_rects[j].left && x < tests[i].clear_rects[j].right)
{
is_inside = true;
break;
}
}
if (uav_dimensions[d].resource_dim == D3D12_RESOURCE_DIMENSION_TEXTURE3D)
is_inside = is_inside && z >= tests[i].first_layer
&& z < tests[i].first_layer + tests[i].layer_count;
else
is_inside = is_inside && layer >= tests[i].first_layer
&& layer < tests[i].first_layer + tests[i].layer_count;
expected_colour = is_inside ? tests[i].expected : clear_value[0];
actual_colour = get_readback_uint(&rb, x, y, z);
success = compare_color(actual_colour, expected_colour, tests[i].is_float ? 1 : 0);
ok(success, "At layer %u, (%u,%u,%u), expected %#x, got %#x.\n",
layer, x, y, z, expected_colour, actual_colour);
if (!success)
break;
}
release_resource_readback(&rb);
reset_command_list(command_list, context.allocator);
}
ID3D12Resource_Release(texture);
}
}
ID3D12DescriptorHeap_Release(cpu_heap);
ID3D12DescriptorHeap_Release(gpu_heap);
destroy_test_context(&context);
#undef IMAGE_SIZE
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -19,16 +19,12 @@
#ifndef __VKD3D_D3D12_CROSSTEST_H
#define __VKD3D_D3D12_CROSSTEST_H
#ifdef _MSC_VER
/* Used for M_PI */
#define _USE_MATH_DEFINES
#endif
#ifdef _WIN32
# include <vkd3d_win32.h>
#endif
#define COBJMACROS
#define INITGUID
#include "vkd3d_test.h"
#include "vkd3d_windows.h"
#define WIDL_C_INLINE_WRAPPERS
@ -57,8 +53,6 @@
extern PFN_D3D12_CREATE_DEVICE pfn_D3D12CreateDevice;
extern PFN_D3D12_ENABLE_EXPERIMENTAL_FEATURES pfn_D3D12EnableExperimentalFeatures;
extern PFN_D3D12_GET_DEBUG_INTERFACE pfn_D3D12GetDebugInterface;
extern PFN_D3D12_CREATE_VERSIONED_ROOT_SIGNATURE_DESERIALIZER pfn_D3D12CreateVersionedRootSignatureDeserializer;
extern PFN_D3D12_SERIALIZE_VERSIONED_ROOT_SIGNATURE pfn_D3D12SerializeVersionedRootSignature;
#if defined(_WIN32) && !defined(VKD3D_FORCE_UTILS_WRAPPER)
static inline HANDLE create_event(void)
@ -236,15 +230,6 @@ static HRESULT wait_for_fence(ID3D12Fence *fence, uint64_t value)
return ret == WAIT_OBJECT_0 ? S_OK : E_FAIL;
}
static HRESULT wait_for_fence_no_event(ID3D12Fence *fence, uint64_t value)
{
if (ID3D12Fence_GetCompletedValue(fence) >= value)
return S_OK;
/* This is defined to block on the value with infinite timeout. */
return ID3D12Fence_SetEventOnCompletion(fence, value, NULL);
}
static void wait_queue_idle_(unsigned int line, ID3D12Device *device, ID3D12CommandQueue *queue)
{
ID3D12Fence *fence;
@ -262,23 +247,6 @@ static void wait_queue_idle_(unsigned int line, ID3D12Device *device, ID3D12Comm
ID3D12Fence_Release(fence);
}
static inline void wait_queue_idle_no_event_(unsigned int line, ID3D12Device *device, ID3D12CommandQueue *queue)
{
ID3D12Fence *fence;
HRESULT hr;
hr = ID3D12Device_CreateFence(device, 0, D3D12_FENCE_FLAG_NONE,
&IID_ID3D12Fence, (void **)&fence);
assert_that_(line)(hr == S_OK, "Failed to create fence, hr %#x.\n", hr);
hr = ID3D12CommandQueue_Signal(queue, fence, 1);
assert_that_(line)(hr == S_OK, "Failed to signal fence, hr %#x.\n", hr);
hr = wait_for_fence_no_event(fence, 1);
assert_that_(line)(hr == S_OK, "Failed to wait for fence, hr %#x.\n", hr);
ID3D12Fence_Release(fence);
}
static bool use_warp_device;
static unsigned int use_adapter_idx;
@ -340,7 +308,7 @@ static ID3D12Device *create_device(void)
return SUCCEEDED(hr) ? device : NULL;
}
static inline void init_adapter_info(void)
static void init_adapter_info(void)
{
char name[MEMBER_SIZE(DXGI_ADAPTER_DESC, Description)];
IDXGIAdapter *dxgi_adapter;
@ -637,7 +605,7 @@ static bool get_driver_properties(ID3D12Device *device, VkPhysicalDeviceDriverPr
return false;
}
static inline void init_adapter_info(void)
static void init_adapter_info(void)
{
VkPhysicalDeviceDriverPropertiesKHR driver_properties;
struct vkd3d_instance *instance;
@ -710,7 +678,7 @@ static inline bool is_depth_clip_enable_supported(ID3D12Device *device)
}
#endif
static inline void parse_args(int argc, char **argv)
static void parse_args(int argc, char **argv)
{
int i;
@ -723,7 +691,7 @@ static inline void parse_args(int argc, char **argv)
}
}
static inline void enable_d3d12_debug_layer(int argc, char **argv)
static void enable_d3d12_debug_layer(int argc, char **argv)
{
bool enable_debug_layer = false, enable_gpu_based_validation = false;
ID3D12Debug1 *debug1;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,377 +0,0 @@
/*
* Copyright 2016-2017 Józef Kucia for CodeWeavers
* Copyright 2020-2021 Philip Rebohle for Valve Corporation
* Copyright 2020-2021 Joshua Ashton for Valve Corporation
* Copyright 2020-2021 Hans-Kristian Arntzen for Valve Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "d3d12_crosstest.h"
void test_get_cached_blob(void)
{
D3D12_COMPUTE_PIPELINE_STATE_DESC compute_desc;
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
ID3D12RootSignature *root_signature_alt;
ID3D12RootSignature *root_signature;
struct test_context context;
ID3D12PipelineState *state;
ID3D12Device *device;
ID3DBlob *blob;
HRESULT hr;
#if 0
[numthreads(1,1,1)]
void main() { }
#endif
static const DWORD cs_dxbc[] =
{
0x43425844, 0x1acc3ad0, 0x71c7b057, 0xc72c4306, 0xf432cb57, 0x00000001, 0x00000074, 0x00000003,
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000020, 0x00050050, 0x00000008, 0x0100086a,
0x0400009b, 0x00000001, 0x00000001, 0x00000001, 0x0100003e,
};
#if 0
[numthreads(2,1,1)]
void main() { }
#endif
static const DWORD cs_dxbc_2[] =
{
0x43425844, 0xcdd3f1fb, 0x7e892d91, 0xe5a2ea15, 0xab4fc56d, 0x00000001, 0x00000074, 0x00000003,
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000020, 0x00050050, 0x00000008, 0x0100086a,
0x0400009b, 0x00000002, 0x00000001, 0x00000001, 0x0100003e,
};
if (!init_test_context(&context, NULL))
return;
device = context.device;
memset(&root_signature_desc, 0, sizeof(root_signature_desc));
hr = create_root_signature(device, &root_signature_desc, &root_signature);
ok(hr == S_OK, "Failed to create root signature, hr %#x.\n", hr);
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
hr = create_root_signature(device, &root_signature_desc, &root_signature_alt);
ok(hr == S_OK, "Failed to create root signature, hr %#x.\n", hr);
memset(&compute_desc, 0, sizeof(compute_desc));
compute_desc.pRootSignature = root_signature;
compute_desc.CS.pShaderBytecode = cs_dxbc;
compute_desc.CS.BytecodeLength = sizeof(cs_dxbc);
hr = ID3D12Device_CreateComputePipelineState(device,
&compute_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == S_OK, "Failed to create compute pipeline, hr %#x.\n", hr);
hr = ID3D12PipelineState_GetCachedBlob(state, &blob);
ok(hr == S_OK, "Failed to get cached blob, hr %#x.\n", hr);
ok(ID3D10Blob_GetBufferSize(blob) > 0, "Cached blob is empty.\n");
ID3D12PipelineState_Release(state);
compute_desc.CachedPSO.pCachedBlob = ID3D10Blob_GetBufferPointer(blob);
compute_desc.CachedPSO.CachedBlobSizeInBytes = ID3D10Blob_GetBufferSize(blob);
hr = ID3D12Device_CreateComputePipelineState(device,
&compute_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == S_OK, "Failed to create compute pipeline, hr %#x.\n", hr);
ID3D12PipelineState_Release(state);
/* Using mismatched shader code must fail. */
compute_desc.CS.pShaderBytecode = cs_dxbc_2;
compute_desc.CS.BytecodeLength = sizeof(cs_dxbc_2);
hr = ID3D12Device_CreateComputePipelineState(device,
&compute_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == E_INVALIDARG, "Unexpected hr %#x.\n", hr);
/* Using mismatched root signature must fail. */
compute_desc.CS.pShaderBytecode = cs_dxbc;
compute_desc.CS.BytecodeLength = sizeof(cs_dxbc);
compute_desc.pRootSignature = root_signature_alt;
hr = ID3D12Device_CreateComputePipelineState(device,
&compute_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == E_INVALIDARG, "Unexpected hr %#x.\n", hr);
ID3D12RootSignature_Release(root_signature);
ID3D12RootSignature_Release(root_signature_alt);
ID3D10Blob_Release(blob);
destroy_test_context(&context);
}
void test_pipeline_library(void)
{
D3D12_GRAPHICS_PIPELINE_STATE_DESC graphics_desc;
D3D12_COMPUTE_PIPELINE_STATE_DESC compute_desc;
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
ID3D12PipelineLibrary *pipeline_library;
ID3D12RootSignature *root_signature;
struct test_context context;
ID3D12PipelineState *state3;
ID3D12PipelineState *state2;
ID3D12PipelineState *state;
ULONG reference_refcount;
size_t serialized_size;
ID3D12Device1 *device1;
void *serialized_data;
ID3D12Device *device;
ID3D12Fence *fence;
HRESULT hr;
#if 0
[numthreads(1,1,1)]
void main() { }
#endif
static const DWORD cs_dxbc[] =
{
0x43425844, 0x1acc3ad0, 0x71c7b057, 0xc72c4306, 0xf432cb57, 0x00000001, 0x00000074, 0x00000003,
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000020, 0x00050050, 0x00000008, 0x0100086a,
0x0400009b, 0x00000001, 0x00000001, 0x00000001, 0x0100003e,
};
#if 0
float4 main() : SV_POSITION {
return float4(0.0f, 0.0f, 0.0f, 0.0f);
}
#endif
static const DWORD vs_dxbc[] =
{
0x43425844, 0xae39b246, 0xddd05b5a, 0x5057a6a2, 0x034461ee, 0x00000001, 0x000000b8, 0x00000003,
0x0000002c, 0x0000003c, 0x00000070, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003, 0x00000000,
0x0000000f, 0x505f5653, 0x5449534f, 0x004e4f49, 0x58454853, 0x00000040, 0x00010050, 0x00000010,
0x0100086a, 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x08000036, 0x001020f2, 0x00000000,
0x00004002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0100003e,
};
#if 0
float4 main() : SV_TARGET {
return float4(1.0f, 1.0f, 1.0f, 1.0f);
}
#endif
static const DWORD ps_dxbc[] =
{
0x43425844, 0x29b14cf3, 0xb991cf90, 0x9e455ffc, 0x4675b046, 0x00000001, 0x000000b4, 0x00000003,
0x0000002c, 0x0000003c, 0x00000070, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003, 0x00000000,
0x0000000f, 0x545f5653, 0x45475241, 0xabab0054, 0x58454853, 0x0000003c, 0x00000050, 0x0000000f,
0x0100086a, 0x03000065, 0x001020f2, 0x00000000, 0x08000036, 0x001020f2, 0x00000000, 0x00004002,
0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x0100003e,
};
const WCHAR *graphics_name = u"GRAPHICS";
const WCHAR *compute_name = u"COMPUTE";
if (!init_test_context(&context, NULL))
return;
device = context.device;
if (FAILED(hr = ID3D12Device_QueryInterface(device, &IID_ID3D12Device1, (void**)&device1)))
{
skip("ID3D12Device1 not available.\n");
return;
}
/* Test adding pipelines to an empty pipeline library */
hr = ID3D12Device1_CreatePipelineLibrary(device1, NULL, 0, &IID_ID3D12PipelineLibrary, (void**)&pipeline_library);
ok(hr == S_OK, "Failed to create pipeline library, hr %#x.\n", hr);
/* ppData == NULL means a query */
hr = ID3D12Device1_CreatePipelineLibrary(device1, NULL, 0, NULL, NULL);
ok(hr == S_FALSE, "Failed to query pipeline library, hr %#x.\n", hr);
memset(&root_signature_desc, 0, sizeof(root_signature_desc));
hr = create_root_signature(device, &root_signature_desc, &root_signature);
ok(hr == S_OK, "Failed to create root signature, hr %#x.\n", hr);
memset(&compute_desc, 0, sizeof(compute_desc));
compute_desc.pRootSignature = root_signature;
compute_desc.CS.pShaderBytecode = cs_dxbc;
compute_desc.CS.BytecodeLength = sizeof(cs_dxbc);
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == E_INVALIDARG, "Unexpected hr %#x.\n", hr);
hr = ID3D12Device_CreateComputePipelineState(device,
&compute_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == S_OK, "Failed to create compute pipeline, hr %#x.\n", hr);
hr = ID3D12PipelineLibrary_StorePipeline(pipeline_library, compute_name, state);
ok(hr == S_OK, "Failed to store compute pipeline, hr %x.\n", hr);
ID3D12PipelineState_Release(state);
memset(&graphics_desc, 0, sizeof(graphics_desc));
graphics_desc.pRootSignature = root_signature;
graphics_desc.VS.pShaderBytecode = vs_dxbc;
graphics_desc.VS.BytecodeLength = sizeof(vs_dxbc);
graphics_desc.PS.pShaderBytecode = ps_dxbc;
graphics_desc.PS.BytecodeLength = sizeof(ps_dxbc);
graphics_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
graphics_desc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK;
graphics_desc.RasterizerState.FrontCounterClockwise = true;
graphics_desc.SampleMask = 0x1;
graphics_desc.SampleDesc.Count = 1;
graphics_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
graphics_desc.NumRenderTargets = 1;
graphics_desc.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM;
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == E_INVALIDARG, "Unexpected hr %#x.\n", hr);
hr = ID3D12Device_CreateGraphicsPipelineState(device,
&graphics_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == S_OK, "Failed to create graphics pipeline, hr %#x.\n", hr);
hr = ID3D12PipelineLibrary_StorePipeline(pipeline_library, graphics_name, state);
ok(hr == S_OK, "Failed to store graphics pipeline, hr %x.\n", hr);
/* Try to load PSO after a Store. Verify that we have a ref-count. */
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library, graphics_name, &graphics_desc,
&IID_ID3D12PipelineState, (void**)&state2);
ok(hr == S_OK, "Failed to load graphics pipeline, hr %x.\n", hr);
ok(state == state2, "Resulting PSOs must point to same object.\n");
ok(get_refcount(state2) == 2, "Refcount %u != 2.\n", get_refcount(state2));
hr = ID3D12PipelineLibrary_StorePipeline(pipeline_library, compute_name, state);
ok(hr == E_INVALIDARG, "Storing pipeline with already existing name succeeded, hr %x.\n", hr);
ID3D12PipelineState_Release(state);
ID3D12PipelineState_Release(state2);
/* Test looking up pipelines in a new pipeline library */
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
ID3D12PipelineState_Release(state);
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == S_OK, "Failed to load graphics pipeline from pipeline library, hr %#x.\n", hr);
ID3D12PipelineState_Release(state);
/* Verify that modifying a PSO description must be invalidated by runtime. */
graphics_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == E_INVALIDARG, "Unexpected result, hr %#x.\n", hr);
graphics_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
serialized_size = ID3D12PipelineLibrary_GetSerializedSize(pipeline_library);
ok(serialized_size > 0, "Serialized size for pipeline library is 0.\n");
serialized_data = malloc(serialized_size);
hr = ID3D12PipelineLibrary_Serialize(pipeline_library, serialized_data, serialized_size - 1);
ok(hr == E_INVALIDARG, "Unexpected hr %#x.\n", hr);
hr = ID3D12PipelineLibrary_Serialize(pipeline_library, serialized_data, serialized_size);
ok(hr == S_OK, "Failed to serialize pipeline library, hr %#x.\n", hr);
ID3D12PipelineLibrary_Release(pipeline_library);
/* Test deserializing a pipeline library */
hr = ID3D12Device1_CreatePipelineLibrary(device1, serialized_data,
serialized_size, &IID_ID3D12PipelineLibrary, (void**)&pipeline_library);
ok(hr == S_OK, "Failed to create pipeline library, hr %#x.\n");
/* Verify that PSO library must internally ref-count a unique PSO. */
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == S_OK, "Failed to load graphics pipeline from pipeline library, hr %#x.\n", hr);
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state2);
ok(hr == S_OK, "Failed to load graphics pipeline from pipeline library, hr %#x.\n", hr);
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state3);
ok(hr == S_OK, "Failed to load graphics pipeline from pipeline library, hr %#x.\n", hr);
ok(state == state2 && state == state3, "Resulting PSOs must point to same object.\n");
ok(get_refcount(state) == 3, "Refcount %u != 3.\n", get_refcount(state));
ok(get_refcount(state2) == 3, "Refcount %u != 3.\n", get_refcount(state2));
ok(get_refcount(state3) == 3, "Refcount %u != 3.\n", get_refcount(state3));
ID3D12PipelineState_Release(state);
ID3D12PipelineState_Release(state2);
ID3D12PipelineState_Release(state3);
reference_refcount = get_refcount(context.device);
/* Verify that PSO library must internally ref-count a unique PSO. */
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state2);
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state3);
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
ok(get_refcount(context.device) == reference_refcount + 1, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 1);
ID3D12Device_CreateFence(context.device, 0, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, (void**)&fence);
ok(get_refcount(context.device) == reference_refcount + 2, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 2);
ID3D12PipelineState_SetPrivateDataInterface(state, &IID_ID3D12Fence, (const IUnknown *)fence);
ok(get_refcount(fence) == 2, "Refcount %u != 2.\n", get_refcount(fence));
ok(state == state2 && state == state3, "Resulting PSOs must point to same object.\n");
ok(state && get_refcount(state) == 3, "Refcount %u != 3.\n", get_refcount(state));
ok(state2 && get_refcount(state2) == 3, "Refcount %u != 3.\n", get_refcount(state2));
ok(state3 && get_refcount(state3) == 3, "Refcount %u != 3.\n", get_refcount(state3));
ID3D12PipelineState_Release(state);
ID3D12PipelineState_Release(state2);
ok(get_refcount(fence) == 2, "Refcount %u != 2.\n", get_refcount(fence));
ok(get_refcount(context.device) == reference_refcount + 2, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 2);
ok(ID3D12PipelineState_Release(state3) == 0, "Refcount did not hit 0.\n");
/* Releasing the last public reference does not release private data. */
ok(get_refcount(fence) == 2, "Refcount %u != 2.\n", get_refcount(fence));
/* Device ref count does release however ... */
ok(get_refcount(context.device) == reference_refcount + 1, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 1);
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state2);
/* Device ref count increases here again. */
ok(get_refcount(context.device) == reference_refcount + 2, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 2);
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
ok(state == state2, "Reloading dead PSO must point to same object.\n");
ID3D12PipelineState_Release(state2);
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
graphics_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state);
ok(hr == E_INVALIDARG, "Unexpected hr %#x.\n", hr);
if (SUCCEEDED(hr))
ID3D12PipelineState_Release(state);
ID3D12PipelineLibrary_Release(pipeline_library);
/* This should release the fence reference. */
ok(get_refcount(fence) == 1, "Refcount %u != 1.\n", get_refcount(fence));
ID3D12Fence_Release(fence);
free(serialized_data);
ID3D12RootSignature_Release(root_signature);
ID3D12Device1_Release(device1);
destroy_test_context(&context);
}

View File

@ -1,722 +0,0 @@
/*
* Copyright 2016-2017 Józef Kucia for CodeWeavers
* Copyright 2020-2021 Philip Rebohle for Valve Corporation
* Copyright 2020-2021 Joshua Ashton for Valve Corporation
* Copyright 2020-2021 Hans-Kristian Arntzen for Valve Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "d3d12_crosstest.h"
void test_create_query_heap(void)
{
ID3D12Device *device;
D3D12_QUERY_HEAP_DESC heap_desc;
ID3D12QueryHeap *query_heap;
ULONG refcount;
unsigned int i;
HRESULT hr;
static const D3D12_QUERY_HEAP_TYPE types[] =
{
D3D12_QUERY_HEAP_TYPE_OCCLUSION,
D3D12_QUERY_HEAP_TYPE_TIMESTAMP,
D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS,
};
if (!(device = create_device()))
{
skip("Failed to create device.\n");
return;
}
for (i = 0; i < ARRAY_SIZE(types); ++i)
{
heap_desc.Type = types[i];
heap_desc.Count = 1;
heap_desc.NodeMask = 0;
hr = ID3D12Device_CreateQueryHeap(device, &heap_desc, &IID_ID3D12QueryHeap, (void **)&query_heap);
ok(hr == S_OK, "Failed to create query heap, type %u, hr %#x.\n", types[i], hr);
ID3D12QueryHeap_Release(query_heap);
}
heap_desc.Type = D3D12_QUERY_HEAP_TYPE_SO_STATISTICS;
heap_desc.Count = 1;
heap_desc.NodeMask = 0;
hr = ID3D12Device_CreateQueryHeap(device, &heap_desc, &IID_ID3D12QueryHeap, (void **)&query_heap);
if (hr != E_NOTIMPL)
{
ok(hr == S_OK, "Failed to create query heap, type %u, hr %#x.\n", heap_desc.Type, hr);
ID3D12QueryHeap_Release(query_heap);
}
else
{
skip("Stream output is not supported.\n");
}
refcount = ID3D12Device_Release(device);
ok(!refcount, "ID3D12Device has %u references left.\n", (unsigned int)refcount);
}
void test_query_timestamp(void)
{
UINT64 timestamps[4], timestamp_frequency, timestamp_diff, time_diff;
ID3D12GraphicsCommandList *command_list;
D3D12_QUERY_HEAP_DESC heap_desc;
struct test_context_desc desc;
ID3D12QueryHeap *query_heap;
struct resource_readback rb;
struct test_context context;
time_t time_start, time_end;
ID3D12CommandQueue *queue;
ID3D12Resource *resource;
ID3D12Device *device;
unsigned int i;
HRESULT hr;
time_start = time(NULL);
memset(&desc, 0, sizeof(desc));
desc.no_render_target = true;
if (!init_test_context(&context, &desc))
return;
device = context.device;
command_list = context.list;
queue = context.queue;
hr = ID3D12CommandQueue_GetTimestampFrequency(queue, &timestamp_frequency);
ok(SUCCEEDED(hr), "Failed to get timestamp frequency, hr %#x.\n", hr);
heap_desc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
heap_desc.Count = ARRAY_SIZE(timestamps);
heap_desc.NodeMask = 0;
hr = ID3D12Device_CreateQueryHeap(device, &heap_desc, &IID_ID3D12QueryHeap, (void **)&query_heap);
ok(SUCCEEDED(hr), "Failed to create query heap, type %u, hr %#x.\n", heap_desc.Type, hr);
resource = create_readback_buffer(device, sizeof(timestamps));
for (i = 0; i < ARRAY_SIZE(timestamps); ++i)
ID3D12GraphicsCommandList_EndQuery(command_list, query_heap, D3D12_QUERY_TYPE_TIMESTAMP, i);
ID3D12GraphicsCommandList_ResolveQueryData(command_list, query_heap,
D3D12_QUERY_TYPE_TIMESTAMP, 0, 1, resource, 0);
ID3D12GraphicsCommandList_ResolveQueryData(command_list, query_heap,
D3D12_QUERY_TYPE_TIMESTAMP, 1, 3, resource, sizeof(uint64_t));
get_buffer_readback_with_command_list(resource, DXGI_FORMAT_UNKNOWN, &rb, queue, command_list);
time_end = time(NULL) + 1;
for (i = 0; i < ARRAY_SIZE(timestamps); ++i)
timestamps[i] = get_readback_uint64(&rb, i, 0);
for (i = 0; i < ARRAY_SIZE(timestamps) - 1; ++i)
{
ok(timestamps[i] <= timestamps[i + 1], "Expected timestamps to monotonically increase, "
"but got %"PRIu64" > %"PRIu64".\n", timestamps[i], timestamps[i + 1]);
}
time_diff = (uint64_t)difftime(time_end, time_start) * timestamp_frequency;
timestamp_diff = timestamps[ARRAY_SIZE(timestamps) - 1] - timestamps[0];
ok(timestamp_diff <= time_diff, "Expected timestamp difference to be bounded by CPU time difference, "
"but got %"PRIu64" > %"PRIu64".\n", timestamp_diff, time_diff);
release_resource_readback(&rb);
ID3D12QueryHeap_Release(query_heap);
ID3D12Resource_Release(resource);
destroy_test_context(&context);
}
void test_query_pipeline_statistics(void)
{
D3D12_QUERY_DATA_PIPELINE_STATISTICS *pipeline_statistics;
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
ID3D12GraphicsCommandList *command_list;
struct test_context context;
ID3D12CommandQueue *queue;
ID3D12Device *device;
D3D12_QUERY_HEAP_DESC heap_desc;
ID3D12QueryHeap *query_heap;
ID3D12Resource *resource;
struct resource_readback rb;
unsigned int pixel_count, i;
HRESULT hr;
if (!init_test_context(&context, NULL))
return;
device = context.device;
command_list = context.list;
queue = context.queue;
heap_desc.Type = D3D12_QUERY_HEAP_TYPE_PIPELINE_STATISTICS;
heap_desc.Count = 2;
heap_desc.NodeMask = 0;
hr = ID3D12Device_CreateQueryHeap(device, &heap_desc, &IID_ID3D12QueryHeap, (void **)&query_heap);
ok(SUCCEEDED(hr), "Failed to create query heap, type %u, hr %#x.\n", heap_desc.Type, hr);
resource = create_readback_buffer(device, 2 * sizeof(struct D3D12_QUERY_DATA_PIPELINE_STATISTICS));
/* First query: do nothing. */
ID3D12GraphicsCommandList_BeginQuery(command_list, query_heap, D3D12_QUERY_TYPE_PIPELINE_STATISTICS, 0);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heap, D3D12_QUERY_TYPE_PIPELINE_STATISTICS, 0);
ID3D12GraphicsCommandList_ResolveQueryData(command_list, query_heap, D3D12_QUERY_TYPE_PIPELINE_STATISTICS, 0, 1,
resource, 0);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
/* Second query: draw something simple. */
ID3D12GraphicsCommandList_BeginQuery(command_list, query_heap, D3D12_QUERY_TYPE_PIPELINE_STATISTICS, 1);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heap, D3D12_QUERY_TYPE_PIPELINE_STATISTICS, 1);
ID3D12GraphicsCommandList_ResolveQueryData(command_list, query_heap, D3D12_QUERY_TYPE_PIPELINE_STATISTICS, 1, 1,
resource, sizeof(struct D3D12_QUERY_DATA_PIPELINE_STATISTICS));
get_buffer_readback_with_command_list(resource, DXGI_FORMAT_UNKNOWN, &rb, queue, command_list);
for (i = 0; i < sizeof(struct D3D12_QUERY_DATA_PIPELINE_STATISTICS) / sizeof(uint64_t); ++i)
{
uint64_t value = get_readback_uint64(&rb, i, 0);
ok(!value, "Element %d: Got %"PRIu64", expected 0.\n", i, value);
}
pipeline_statistics = get_readback_data(&rb, 1, 0, 0, sizeof(*pipeline_statistics));
/* We read 3 vertices that formed one primitive. */
ok(pipeline_statistics->IAVertices == 3, "IAVertices: Got %"PRIu64", expected 3.\n",
pipeline_statistics->IAVertices);
ok(pipeline_statistics->IAPrimitives == 1, "IAPrimitives: Got %"PRIu64", expected 1.\n",
pipeline_statistics->IAPrimitives);
ok(pipeline_statistics->VSInvocations == 3, "VSInvocations: Got %"PRIu64", expected 3.\n",
pipeline_statistics->VSInvocations);
/* No geometry shader output primitives.
* Depending on the graphics card, the geometry shader might still have been invoked, so
* GSInvocations might be whatever. */
ok(pipeline_statistics->GSPrimitives == 0, "GSPrimitives: Got %"PRIu64", expected 0.\n",
pipeline_statistics->GSPrimitives);
/* One primitive sent to the rasterizer, but it might have been broken up into smaller pieces then. */
ok(pipeline_statistics->CInvocations == 1, "CInvocations: Got %"PRIu64", expected 1.\n",
pipeline_statistics->CInvocations);
ok(pipeline_statistics->CPrimitives > 0, "CPrimitives: Got %"PRIu64", expected > 0.\n",
pipeline_statistics->CPrimitives);
/* Exact number of pixel shader invocations depends on the graphics card. */
pixel_count = context.render_target_desc.Width * context.render_target_desc.Height;
ok(pipeline_statistics->PSInvocations >= pixel_count, "PSInvocations: Got %"PRIu64", expected >= %u.\n",
pipeline_statistics->PSInvocations, pixel_count);
/* We used no tessellation or compute shaders at all. */
ok(pipeline_statistics->HSInvocations == 0, "HSInvocations: Got %"PRIu64", expected 0.\n",
pipeline_statistics->HSInvocations);
ok(pipeline_statistics->DSInvocations == 0, "DSInvocations: Got %"PRIu64", expected 0.\n",
pipeline_statistics->DSInvocations);
ok(pipeline_statistics->CSInvocations == 0, "CSInvocations: Got %"PRIu64", expected 0.\n",
pipeline_statistics->CSInvocations);
release_resource_readback(&rb);
ID3D12QueryHeap_Release(query_heap);
ID3D12Resource_Release(resource);
destroy_test_context(&context);
}
void test_query_occlusion(void)
{
struct test_context_desc desc;
ID3D12GraphicsCommandList *command_list;
struct test_context context;
ID3D12CommandQueue *queue;
ID3D12Device *device;
struct depth_stencil_resource ds;
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
D3D12_QUERY_HEAP_DESC heap_desc;
ID3D12QueryHeap *query_heap;
ID3D12Resource *resource;
struct resource_readback rb;
unsigned int i;
HRESULT hr;
static const DWORD ps_code[] =
{
#if 0
float depth;
float main() : SV_Depth
{
return depth;
}
#endif
0x43425844, 0x91af6cd0, 0x7e884502, 0xcede4f54, 0x6f2c9326, 0x00000001, 0x000000b0, 0x00000003,
0x0000002c, 0x0000003c, 0x00000070, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003, 0xffffffff,
0x00000e01, 0x445f5653, 0x68747065, 0xababab00, 0x52444853, 0x00000038, 0x00000040, 0x0000000e,
0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x02000065, 0x0000c001, 0x05000036, 0x0000c001,
0x0020800a, 0x00000000, 0x00000000, 0x0100003e,
};
static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)};
static const struct
{
D3D12_QUERY_TYPE type;
bool draw;
float clear_depth;
float depth;
}
tests[] =
{
{D3D12_QUERY_TYPE_OCCLUSION, false, 1.0f, 0.5f},
{D3D12_QUERY_TYPE_OCCLUSION, true, 1.0f, 0.5f},
{D3D12_QUERY_TYPE_BINARY_OCCLUSION, false, 1.0f, 0.5f},
{D3D12_QUERY_TYPE_BINARY_OCCLUSION, true, 1.0f, 0.5f},
{D3D12_QUERY_TYPE_OCCLUSION, false, 0.0f, 0.5f},
{D3D12_QUERY_TYPE_OCCLUSION, true, 0.0f, 0.5f},
{D3D12_QUERY_TYPE_BINARY_OCCLUSION, false, 0.0f, 0.5f},
{D3D12_QUERY_TYPE_BINARY_OCCLUSION, true, 0.0f, 0.5f},
};
memset(&desc, 0, sizeof(desc));
desc.no_render_target = true;
if (!init_test_context(&context, &desc))
return;
device = context.device;
command_list = context.list;
queue = context.queue;
init_depth_stencil(&ds, context.device, 640, 480, 1, 1, DXGI_FORMAT_D32_FLOAT, 0, NULL);
set_viewport(&context.viewport, 0.0f, 0.0f, 640.0f, 480.0f, 0.0f, 1.0f);
set_rect(&context.scissor_rect, 0, 0, 640, 480);
context.root_signature = create_32bit_constants_root_signature(context.device,
0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
init_pipeline_state_desc(&pso_desc, context.root_signature, 0, NULL, &ps, NULL);
pso_desc.NumRenderTargets = 0;
pso_desc.DSVFormat = DXGI_FORMAT_D32_FLOAT;
pso_desc.DepthStencilState.DepthEnable = true;
pso_desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS;
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
ok(SUCCEEDED(hr), "Failed to create graphics pipeline state, hr %#x.\n", hr);
heap_desc.Type = D3D12_QUERY_HEAP_TYPE_OCCLUSION;
heap_desc.Count = ARRAY_SIZE(tests);
heap_desc.NodeMask = 0;
hr = ID3D12Device_CreateQueryHeap(device, &heap_desc, &IID_ID3D12QueryHeap, (void **)&query_heap);
ok(SUCCEEDED(hr), "Failed to create query heap, type %u, hr %#x.\n", heap_desc.Type, hr);
resource = create_readback_buffer(device, ARRAY_SIZE(tests) * sizeof(uint64_t));
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 0, NULL, false, &ds.dsv_handle);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
for (i = 0; i < ARRAY_SIZE(tests); ++i)
{
vkd3d_test_set_context("Test %u", i);
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds.dsv_handle,
D3D12_CLEAR_FLAG_DEPTH, tests[i].clear_depth, 0, 0, NULL);
ID3D12GraphicsCommandList_BeginQuery(command_list, query_heap, tests[i].type, i);
if (tests[i].draw)
{
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 1, &tests[i].depth, 0);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
}
ID3D12GraphicsCommandList_EndQuery(command_list, query_heap, tests[i].type, i);
ID3D12GraphicsCommandList_ResolveQueryData(command_list, query_heap, tests[i].type, i, 1,
resource, i * sizeof(uint64_t));
}
vkd3d_test_set_context(NULL);
get_buffer_readback_with_command_list(resource, DXGI_FORMAT_UNKNOWN, &rb, queue, command_list);
for (i = 0; i < ARRAY_SIZE(tests); ++i)
{
const bool samples_passed = tests[i].draw && tests[i].clear_depth > tests[i].depth;
const uint64_t result = get_readback_uint64(&rb, i, 0);
uint64_t expected_result;
if (tests[i].type == D3D12_QUERY_TYPE_BINARY_OCCLUSION)
expected_result = samples_passed ? 1 : 0;
else
expected_result = samples_passed ? 640 * 480 : 0;
ok(result == expected_result, "Test %u: Got unexpected result %"PRIu64".\n", i, result);
}
release_resource_readback(&rb);
ID3D12QueryHeap_Release(query_heap);
ID3D12Resource_Release(resource);
destroy_depth_stencil(&ds);
destroy_test_context(&context);
}
void test_resolve_non_issued_query_data(void)
{
static const uint64_t initial_data[] = {0xdeadbeef, 0xdeadbeef, 0xdeadbabe, 0xdeadbeef};
ID3D12Resource *readback_buffer, *upload_buffer;
ID3D12GraphicsCommandList *command_list;
D3D12_QUERY_HEAP_DESC heap_desc;
struct test_context_desc desc;
ID3D12QueryHeap *query_heap;
struct resource_readback rb;
struct test_context context;
ID3D12CommandQueue *queue;
ID3D12Device *device;
uint64_t *timestamps;
HRESULT hr;
memset(&desc, 0, sizeof(desc));
desc.no_render_target = true;
if (!init_test_context(&context, &desc))
return;
device = context.device;
command_list = context.list;
queue = context.queue;
heap_desc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
heap_desc.Count = ARRAY_SIZE(initial_data);
heap_desc.NodeMask = 0;
hr = ID3D12Device_CreateQueryHeap(device, &heap_desc, &IID_ID3D12QueryHeap, (void **)&query_heap);
ok(SUCCEEDED(hr), "Failed to create query heap, hr %#x.\n", hr);
readback_buffer = create_readback_buffer(device, sizeof(initial_data));
upload_buffer = create_upload_buffer(context.device, sizeof(initial_data), initial_data);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heap, D3D12_QUERY_TYPE_TIMESTAMP, 0);
ID3D12GraphicsCommandList_CopyResource(command_list, readback_buffer, upload_buffer);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heap, D3D12_QUERY_TYPE_TIMESTAMP, 3);
ID3D12GraphicsCommandList_ResolveQueryData(command_list, query_heap,
D3D12_QUERY_TYPE_TIMESTAMP, 0, 4, readback_buffer, 0);
get_buffer_readback_with_command_list(readback_buffer, DXGI_FORMAT_UNKNOWN, &rb, queue, command_list);
timestamps = get_readback_data(&rb, 0, 0, 0, sizeof(*timestamps));
ok(timestamps[0] != initial_data[0] && timestamps[0] > 0,
"Got unexpected timestamp %#"PRIx64".\n", timestamps[0]);
todo ok(!timestamps[1], "Got unexpected timestamp %#"PRIx64".\n", timestamps[1]);
todo ok(!timestamps[2], "Got unexpected timestamp %#"PRIx64".\n", timestamps[2]);
ok(timestamps[3] != initial_data[3] && timestamps[3] > 0,
"Got unexpected timestamp %#"PRIx64".\n", timestamps[3]);
release_resource_readback(&rb);
ID3D12QueryHeap_Release(query_heap);
ID3D12Resource_Release(readback_buffer);
ID3D12Resource_Release(upload_buffer);
destroy_test_context(&context);
}
void test_resolve_query_data_in_different_command_list(void)
{
ID3D12GraphicsCommandList *command_list;
D3D12_QUERY_HEAP_DESC heap_desc;
ID3D12Resource *readback_buffer;
struct resource_readback rb;
ID3D12QueryHeap *query_heap;
struct test_context context;
ID3D12CommandQueue *queue;
ID3D12Device *device;
unsigned int i;
HRESULT hr;
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
const unsigned int readback_buffer_capacity = 4;
if (!init_test_context(&context, NULL))
return;
device = context.device;
command_list = context.list;
queue = context.queue;
heap_desc.Type = D3D12_QUERY_HEAP_TYPE_OCCLUSION;
heap_desc.Count = 1;
heap_desc.NodeMask = 0;
hr = ID3D12Device_CreateQueryHeap(device, &heap_desc, &IID_ID3D12QueryHeap, (void **)&query_heap);
ok(SUCCEEDED(hr), "Failed to create query heap, hr %#x.\n", hr);
readback_buffer = create_readback_buffer(device, readback_buffer_capacity * sizeof(uint64_t));
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_BeginQuery(command_list, query_heap, D3D12_QUERY_TYPE_OCCLUSION, 0);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heap, D3D12_QUERY_TYPE_OCCLUSION, 0);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xff00ff00, 0);
reset_command_list(command_list, context.allocator);
for (i = 0; i < readback_buffer_capacity / 2; ++i)
{
ID3D12GraphicsCommandList_ResolveQueryData(command_list,
query_heap, D3D12_QUERY_TYPE_OCCLUSION, 0, 1, readback_buffer, i * sizeof(uint64_t));
}
hr = ID3D12GraphicsCommandList_Close(command_list);
ok(SUCCEEDED(hr), "Failed to close command list, hr %#x.\n", hr);
exec_command_list(queue, command_list);
wait_queue_idle(context.device, queue);
reset_command_list(command_list, context.allocator);
for (; i < readback_buffer_capacity; ++i)
{
ID3D12GraphicsCommandList_ResolveQueryData(command_list,
query_heap, D3D12_QUERY_TYPE_OCCLUSION, 0, 1, readback_buffer, i * sizeof(uint64_t));
}
get_buffer_readback_with_command_list(readback_buffer, DXGI_FORMAT_UNKNOWN, &rb, queue, command_list);
for (i = 0; i < readback_buffer_capacity; ++i)
{
uint64_t expected_result = context.render_target_desc.Width * context.render_target_desc.Height;
uint64_t result = get_readback_uint64(&rb, i, 0);
ok(result == expected_result, "Got unexpected result %"PRIu64" at %u.\n", result, i);
}
release_resource_readback(&rb);
ID3D12QueryHeap_Release(query_heap);
ID3D12Resource_Release(readback_buffer);
destroy_test_context(&context);
}
void test_resolve_query_data_in_reordered_command_list(void)
{
ID3D12GraphicsCommandList *command_lists[2];
ID3D12CommandAllocator *command_allocator;
D3D12_QUERY_HEAP_DESC heap_desc;
ID3D12Resource *readback_buffer;
struct resource_readback rb;
ID3D12QueryHeap *query_heap;
struct test_context context;
ID3D12CommandQueue *queue;
ID3D12Device *device;
uint64_t result;
HRESULT hr;
if (!init_test_context(&context, NULL))
return;
device = context.device;
command_lists[0] = context.list;
queue = context.queue;
hr = ID3D12Device_CreateCommandAllocator(device, D3D12_COMMAND_LIST_TYPE_DIRECT,
&IID_ID3D12CommandAllocator, (void **)&command_allocator);
ok(SUCCEEDED(hr), "Failed to create command allocator, hr %#x.\n", hr);
hr = ID3D12Device_CreateCommandList(device, 0, D3D12_COMMAND_LIST_TYPE_DIRECT,
command_allocator, NULL, &IID_ID3D12GraphicsCommandList, (void **)&command_lists[1]);
ok(SUCCEEDED(hr), "Failed to create command list, hr %#x.\n", hr);
heap_desc.Type = D3D12_QUERY_HEAP_TYPE_OCCLUSION;
heap_desc.Count = 1;
heap_desc.NodeMask = 0;
hr = ID3D12Device_CreateQueryHeap(device, &heap_desc, &IID_ID3D12QueryHeap, (void **)&query_heap);
ok(SUCCEEDED(hr), "Failed to create query heap, hr %#x.\n", hr);
readback_buffer = create_readback_buffer(device, sizeof(uint64_t));
/* Read query results in the second command list. */
ID3D12GraphicsCommandList_ResolveQueryData(command_lists[1],
query_heap, D3D12_QUERY_TYPE_OCCLUSION, 0, 1, readback_buffer, 0);
hr = ID3D12GraphicsCommandList_Close(command_lists[1]);
ok(SUCCEEDED(hr), "Failed to close command list, hr %#x.\n", hr);
/* Produce query results in the first command list. */
ID3D12GraphicsCommandList_OMSetRenderTargets(command_lists[0], 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_lists[0], context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_lists[0], context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_lists[0], D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetViewports(command_lists[0], 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_lists[0], 1, &context.scissor_rect);
ID3D12GraphicsCommandList_BeginQuery(command_lists[0], query_heap, D3D12_QUERY_TYPE_OCCLUSION, 0);
ID3D12GraphicsCommandList_DrawInstanced(command_lists[0], 3, 1, 0, 0);
ID3D12GraphicsCommandList_EndQuery(command_lists[0], query_heap, D3D12_QUERY_TYPE_OCCLUSION, 0);
hr = ID3D12GraphicsCommandList_Close(command_lists[0]);
ok(SUCCEEDED(hr), "Failed to close command list, hr %#x.\n", hr);
ID3D12CommandQueue_ExecuteCommandLists(queue,
ARRAY_SIZE(command_lists), (ID3D12CommandList **)command_lists);
wait_queue_idle(device, queue);
reset_command_list(command_lists[0], context.allocator);
get_buffer_readback_with_command_list(readback_buffer, DXGI_FORMAT_UNKNOWN, &rb, queue, command_lists[0]);
result = get_readback_uint64(&rb, 0, 0);
ok(result == context.render_target_desc.Width * context.render_target_desc.Height,
"Got unexpected result %"PRIu64".\n", result);
release_resource_readback(&rb);
ID3D12GraphicsCommandList_Release(command_lists[1]);
ID3D12CommandAllocator_Release(command_allocator);
ID3D12QueryHeap_Release(query_heap);
ID3D12Resource_Release(readback_buffer);
destroy_test_context(&context);
}
void test_virtual_queries(void)
{
struct test_context_desc desc;
ID3D12GraphicsCommandList *command_list;
struct test_context context;
ID3D12CommandQueue *queue;
ID3D12Device *device;
struct depth_stencil_resource ds[2];
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
D3D12_QUERY_HEAP_DESC heap_desc;
ID3D12QueryHeap *query_heaps[2];
ID3D12Resource *resource;
struct resource_readback rb;
unsigned int i;
HRESULT hr;
static const DWORD ps_code[] =
{
#if 0
float depth;
float main() : SV_Depth
{
return depth;
}
#endif
0x43425844, 0x91af6cd0, 0x7e884502, 0xcede4f54, 0x6f2c9326, 0x00000001, 0x000000b0, 0x00000003,
0x0000002c, 0x0000003c, 0x00000070, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003, 0xffffffff,
0x00000e01, 0x445f5653, 0x68747065, 0xababab00, 0x52444853, 0x00000038, 0x00000040, 0x0000000e,
0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x02000065, 0x0000c001, 0x05000036, 0x0000c001,
0x0020800a, 0x00000000, 0x00000000, 0x0100003e,
};
static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)};
static const uint32_t expected_results[] = {1,0,1,1,614400,0,307200,307200};
static const float depth_one = 1.0f;
static const float depth_zero = 0.0f;
memset(&desc, 0, sizeof(desc));
desc.no_render_target = true;
if (!init_test_context(&context, &desc))
return;
device = context.device;
command_list = context.list;
queue = context.queue;
for (i = 0; i < ARRAY_SIZE(ds); i++)
init_depth_stencil(&ds[i], context.device, 640, 480, 1, 1, DXGI_FORMAT_D32_FLOAT, 0, NULL);
set_viewport(&context.viewport, 0.0f, 0.0f, 640.0f, 480.0f, 0.0f, 1.0f);
set_rect(&context.scissor_rect, 0, 0, 640, 480);
context.root_signature = create_32bit_constants_root_signature(context.device,
0, 1, D3D12_SHADER_VISIBILITY_PIXEL);
init_pipeline_state_desc(&pso_desc, context.root_signature, 0, NULL, &ps, NULL);
pso_desc.NumRenderTargets = 0;
pso_desc.DSVFormat = DXGI_FORMAT_D32_FLOAT;
pso_desc.DepthStencilState.DepthEnable = true;
pso_desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS;
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
ok(SUCCEEDED(hr), "Failed to create graphics pipeline state, hr %#x.\n", hr);
heap_desc.Type = D3D12_QUERY_HEAP_TYPE_OCCLUSION;
heap_desc.Count = ARRAY_SIZE(expected_results) / 2;
heap_desc.NodeMask = 0;
for (i = 0; i < ARRAY_SIZE(query_heaps); i++)
{
hr = ID3D12Device_CreateQueryHeap(device, &heap_desc, &IID_ID3D12QueryHeap, (void **)&query_heaps[i]);
ok(SUCCEEDED(hr), "Failed to create query heap, type %u, hr %#x.\n", heap_desc.Type, hr);
}
resource = create_readback_buffer(device, ARRAY_SIZE(expected_results) * sizeof(uint64_t));
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds[0].dsv_handle, D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, NULL);
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds[1].dsv_handle, D3D12_CLEAR_FLAG_DEPTH, 0.5f, 0, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 0, NULL, false, &ds[0].dsv_handle);
ID3D12GraphicsCommandList_BeginQuery(command_list, query_heaps[0], D3D12_QUERY_TYPE_BINARY_OCCLUSION, 0);
ID3D12GraphicsCommandList_BeginQuery(command_list, query_heaps[0], D3D12_QUERY_TYPE_BINARY_OCCLUSION, 1);
ID3D12GraphicsCommandList_BeginQuery(command_list, query_heaps[0], D3D12_QUERY_TYPE_BINARY_OCCLUSION, 2);
ID3D12GraphicsCommandList_BeginQuery(command_list, query_heaps[1], D3D12_QUERY_TYPE_OCCLUSION, 0);
ID3D12GraphicsCommandList_BeginQuery(command_list, query_heaps[1], D3D12_QUERY_TYPE_OCCLUSION, 1);
ID3D12GraphicsCommandList_BeginQuery(command_list, query_heaps[1], D3D12_QUERY_TYPE_OCCLUSION, 2);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heaps[0], D3D12_QUERY_TYPE_BINARY_OCCLUSION, 1);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heaps[1], D3D12_QUERY_TYPE_OCCLUSION, 1);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 1, &depth_zero, 0);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heaps[0], D3D12_QUERY_TYPE_BINARY_OCCLUSION, 2);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heaps[1], D3D12_QUERY_TYPE_OCCLUSION, 2);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 0, NULL, false, &ds[1].dsv_handle);
ID3D12GraphicsCommandList_BeginQuery(command_list, query_heaps[0], D3D12_QUERY_TYPE_BINARY_OCCLUSION, 3);
ID3D12GraphicsCommandList_BeginQuery(command_list, query_heaps[1], D3D12_QUERY_TYPE_OCCLUSION, 3);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heaps[0], D3D12_QUERY_TYPE_BINARY_OCCLUSION, 0);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heaps[1], D3D12_QUERY_TYPE_OCCLUSION, 0);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 1, &depth_one, 0);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heaps[0], D3D12_QUERY_TYPE_BINARY_OCCLUSION, 3);
ID3D12GraphicsCommandList_EndQuery(command_list, query_heaps[1], D3D12_QUERY_TYPE_OCCLUSION, 3);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 0, NULL, false, &ds[1].dsv_handle);
for (i = 0; i < ARRAY_SIZE(query_heaps); i++)
{
ID3D12GraphicsCommandList_ResolveQueryData(command_list, query_heaps[i],
i ? D3D12_QUERY_TYPE_OCCLUSION : D3D12_QUERY_TYPE_BINARY_OCCLUSION,
0, 4, resource, i * 4 * sizeof(uint64_t));
}
get_buffer_readback_with_command_list(resource, DXGI_FORMAT_UNKNOWN, &rb, queue, command_list);
for (i = 0; i < ARRAY_SIZE(expected_results); ++i)
{
const uint64_t result = get_readback_uint64(&rb, i, 0);
ok(result == expected_results[i], "Test %u: Got unexpected result %"PRIu64".\n", i, result);
}
release_resource_readback(&rb);
for (i = 0; i < ARRAY_SIZE(query_heaps); i++)
ID3D12QueryHeap_Release(query_heaps[i]);
ID3D12Resource_Release(resource);
for (i = 0; i < ARRAY_SIZE(ds); i++)
destroy_depth_stencil(&ds[i]);
destroy_test_context(&context);
}

File diff suppressed because it is too large Load Diff

View File

@ -1,870 +0,0 @@
/*
* Copyright 2016-2017 Józef Kucia for CodeWeavers
* Copyright 2020-2021 Philip Rebohle for Valve Corporation
* Copyright 2020-2021 Joshua Ashton for Valve Corporation
* Copyright 2020-2021 Hans-Kristian Arntzen for Valve Corporation
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
#include "d3d12_crosstest.h"
void test_unbound_rtv_rendering(void)
{
static const struct vec4 red = { 1.0f, 0.0f, 0.0f, 1.0f };
static const float white[] = { 1.0f, 1.0f, 1.0f, 1.0f };
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
ID3D12GraphicsCommandList *command_list;
D3D12_CPU_DESCRIPTOR_HANDLE rt_handle;
struct test_context_desc desc;
struct test_context context;
ID3D12CommandQueue *queue;
ID3D12Resource *fp32_rt;
HRESULT hr;
static const DWORD ps_code[] =
{
#if 0
Outputs main()
{
Outputs o;
o.col0 = float4(1.0, 0.0, 0.0, 1.0);
o.col1 = 0.5;
return o;
}
#endif
0x43425844, 0xbbb26641, 0x99a7dc17, 0xc556a4cd, 0x3aa2843e, 0x00000001, 0x000000ec, 0x00000003,
0x0000002c, 0x0000003c, 0x00000088, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x00000044, 0x00000002, 0x00000008, 0x00000038, 0x00000000, 0x00000000, 0x00000003, 0x00000000,
0x0000000f, 0x00000038, 0x00000001, 0x00000000, 0x00000003, 0x00000001, 0x00000e01, 0x545f5653,
0x65677261, 0xabab0074, 0x58454853, 0x0000005c, 0x00000050, 0x00000017, 0x0100086a, 0x03000065,
0x001020f2, 0x00000000, 0x03000065, 0x00102012, 0x00000001, 0x08000036, 0x001020f2, 0x00000000,
0x00004002, 0x3f800000, 0x00000000, 0x00000000, 0x3f800000, 0x05000036, 0x00102012, 0x00000001,
0x00004001, 0x3f000000, 0x0100003e,
};
static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)};
memset(&desc, 0, sizeof(desc));
desc.rt_format = DXGI_FORMAT_R32G32B32A32_FLOAT;
desc.rt_width = 32;
desc.rt_height = 32;
desc.rt_descriptor_count = 2;
desc.no_pipeline = true;
if (!init_test_context(&context, &desc))
return;
command_list = context.list;
queue = context.queue;
fp32_rt = create_default_texture2d(context.device, 32, 32,
1, 1, DXGI_FORMAT_R32_FLOAT, D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET,
D3D12_RESOURCE_STATE_RENDER_TARGET);
rt_handle = context.rtv;
rt_handle.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
ID3D12Device_CreateRenderTargetView(context.device, fp32_rt, NULL, rt_handle);
/* Apparently, rendering to an NULL RTV is fine. D3D12 validation does not complain about this case at all. */
init_pipeline_state_desc(&pso_desc, context.root_signature, 0, NULL, &ps, NULL);
pso_desc.NumRenderTargets = 2;
pso_desc.RTVFormats[0] = DXGI_FORMAT_R32G32B32A32_FLOAT;
pso_desc.RTVFormats[1] = DXGI_FORMAT_R32_FLOAT;
pso_desc.DSVFormat = DXGI_FORMAT_UNKNOWN;
pso_desc.BlendState.RenderTarget[0].RenderTargetWriteMask = 0xf;
pso_desc.BlendState.RenderTarget[1].RenderTargetWriteMask = 0xf;
pso_desc.DepthStencilState.DepthEnable = false;
pso_desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
ok(hr == S_OK, "Failed to create state, hr %#x.\n", hr);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rt_handle, white, 0, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.5f, 0.5f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
/* First, render to both RTs, but then only render to 1 RT. */
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 2, &context.rtv, true, NULL);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
transition_resource_state(command_list, fp32_rt,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &red, 0);
reset_command_list(command_list, context.allocator);
check_sub_resource_float(fp32_rt, 0, queue, command_list, 0.5f, 0);
ID3D12Resource_Release(fp32_rt);
destroy_test_context(&context);
}
void test_unknown_rtv_format(void)
{
static const struct vec4 vec4_white = {1.0f, 1.0f, 1.0f, 1.0f};
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
struct vec4 expected_vec4 = {0.0f, 0.0f, 0.0f, 1.0f};
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
ID3D12GraphicsCommandList *command_list;
D3D12_RENDER_TARGET_VIEW_DESC rtv_desc;
D3D12_CPU_DESCRIPTOR_HANDLE rtvs[3];
ID3D12Resource *render_targets[2];
struct depth_stencil_resource ds;
struct test_context_desc desc;
struct test_context context;
ID3D12CommandQueue *queue;
unsigned int i;
HRESULT hr;
static const DWORD ps_code[] =
{
#if 0
void main(out float4 target1 : SV_Target1, out float4 target2 : SV_Target2)
{
target1 = float4(2.0f, 0.0f, 0.0f, 1.0f);
target2 = float4(3.0f, 0.0f, 0.0f, 1.0f);
}
#endif
0x43425844, 0x980554be, 0xb8743fb0, 0xf5bb8deb, 0x639feaf8, 0x00000001, 0x000000f4, 0x00000003,
0x0000002c, 0x0000003c, 0x00000088, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x00000044, 0x00000002, 0x00000008, 0x00000038, 0x00000001, 0x00000000, 0x00000003, 0x00000001,
0x0000000f, 0x00000038, 0x00000002, 0x00000000, 0x00000003, 0x00000002, 0x0000000f, 0x545f5653,
0x65677261, 0xabab0074, 0x52444853, 0x00000064, 0x00000040, 0x00000019, 0x03000065, 0x001020f2,
0x00000001, 0x03000065, 0x001020f2, 0x00000002, 0x08000036, 0x001020f2, 0x00000001, 0x00004002,
0x40000000, 0x00000000, 0x00000000, 0x3f800000, 0x08000036, 0x001020f2, 0x00000002, 0x00004002,
0x40400000, 0x00000000, 0x00000000, 0x3f800000, 0x0100003e,
};
static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)};
memset(&desc, 0, sizeof(desc));
desc.rt_format = DXGI_FORMAT_R32G32B32A32_FLOAT;
desc.rt_descriptor_count = 16;
desc.no_pipeline = true;
if (!init_test_context(&context, &desc))
return;
command_list = context.list;
queue = context.queue;
init_depth_stencil(&ds, context.device, 32, 32, 1, 1, DXGI_FORMAT_D32_FLOAT, 0, NULL);
init_pipeline_state_desc(&pso_desc, context.root_signature, 0, NULL, &ps, NULL);
pso_desc.NumRenderTargets = ARRAY_SIZE(rtvs);
for (i = 0; i < ARRAY_SIZE(rtvs); ++i)
pso_desc.RTVFormats[i] = desc.rt_format;
pso_desc.RTVFormats[0] = DXGI_FORMAT_UNKNOWN;
pso_desc.DSVFormat = DXGI_FORMAT_D32_FLOAT;
pso_desc.DepthStencilState.DepthEnable = true;
pso_desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
ok(hr == S_OK, "Failed to create state, hr %#x.\n", hr);
rtvs[0] = get_cpu_rtv_handle(&context, context.rtv_heap, 0);
rtvs[1] = get_cpu_rtv_handle(&context, context.rtv_heap, 1);
rtvs[2] = get_cpu_rtv_handle(&context, context.rtv_heap, 2);
create_render_target(&context, &desc, &render_targets[0], &rtvs[1]);
create_render_target(&context, &desc, &render_targets[1], &rtvs[2]);
for (i = 0; i < ARRAY_SIZE(rtvs); ++i)
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtvs[i], white, 0, NULL);
/* NULL RTV */
memset(&rtv_desc, 0, sizeof(rtv_desc));
rtv_desc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
rtv_desc.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
rtv_desc.Texture2D.MipSlice = 0;
rtv_desc.Texture2D.PlaneSlice = 0;
ID3D12Device_CreateRenderTargetView(context.device, NULL, &rtv_desc,
get_cpu_rtv_handle(&context, context.rtv_heap, 0));
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, ARRAY_SIZE(rtvs), rtvs, false, &ds.dsv_handle);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.5f, 0.5f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
transition_resource_state(command_list, render_targets[0],
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
transition_resource_state(command_list, render_targets[1],
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &vec4_white, 0);
reset_command_list(command_list, context.allocator);
expected_vec4.x = 2.0f;
check_sub_resource_vec4(render_targets[0], 0, queue, command_list, &expected_vec4, 0);
reset_command_list(command_list, context.allocator);
expected_vec4.x = 3.0f;
check_sub_resource_vec4(render_targets[1], 0, queue, command_list, &expected_vec4, 0);
reset_command_list(command_list, context.allocator);
transition_resource_state(command_list, ds.texture,
D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_float(ds.texture, 0, queue, command_list, 0.5f, 1);
for (i = 0; i < ARRAY_SIZE(render_targets); ++i)
ID3D12Resource_Release(render_targets[i]);
destroy_depth_stencil(&ds);
destroy_test_context(&context);
}
void test_unknown_dsv_format(void)
{
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
ID3D12GraphicsCommandList *command_list;
struct depth_stencil_resource ds;
D3D12_CLEAR_VALUE clear_value;
struct test_context_desc desc;
struct test_context context;
ID3D12CommandQueue *queue;
HRESULT hr;
static const DWORD ps_color_code[] =
{
#if 0
float4 color;
float4 main(float4 position : SV_POSITION) : SV_Target
{
return color;
}
#endif
0x43425844, 0xd18ead43, 0x8b8264c1, 0x9c0a062d, 0xfc843226, 0x00000001, 0x000000e0, 0x00000003,
0x0000002c, 0x00000060, 0x00000094, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020,
0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f, 0x505f5653, 0x5449534f, 0x004e4f49,
0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003,
0x00000000, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, 0x00000044, 0x00000050,
0x00000011, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x03000065, 0x001020f2,
0x00000000, 0x06000036, 0x001020f2, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x0100003e,
};
static const D3D12_SHADER_BYTECODE ps_color = {ps_color_code, sizeof(ps_color_code)};
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
static const struct vec4 green = {0.0f, 1.0f, 0.0f, 1.0f};
static const struct vec4 red = {1.0f, 0.0f, 0.0f, 1.0f};
memset(&desc, 0, sizeof(desc));
desc.rt_format = DXGI_FORMAT_R32G32B32A32_FLOAT;
desc.no_root_signature = true;
if (!init_test_context(&context, &desc))
return;
command_list = context.list;
queue = context.queue;
clear_value.Format = DXGI_FORMAT_D32_FLOAT;
clear_value.DepthStencil.Depth = 0.5f;
clear_value.DepthStencil.Stencil = 0;
init_depth_stencil(&ds, context.device, 32, 32, 1, 1, DXGI_FORMAT_D32_FLOAT, 0, &clear_value);
context.root_signature = create_32bit_constants_root_signature(context.device,
0, 4, D3D12_SHADER_VISIBILITY_PIXEL);
/* DSVFormat = DXGI_FORMAT_UNKNOWN and D3D12_DEPTH_WRITE_MASK_ZERO */
init_pipeline_state_desc(&pso_desc, context.root_signature, desc.rt_format, NULL, &ps_color, NULL);
pso_desc.DSVFormat = DXGI_FORMAT_UNKNOWN;
pso_desc.DepthStencilState.DepthEnable = true;
pso_desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_EQUAL;
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
ok(hr == S_OK, "Failed to create graphics pipeline state, hr %#x.\n", hr);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds.dsv_handle,
D3D12_CLEAR_FLAG_DEPTH, 0.5f, 0, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, &ds.dsv_handle);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &green.x, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.5f, 0.5f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &red.x, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 1.0f, 1.0f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.0f, 0.0f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.55f, 0.55f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, ds.texture,
D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_float(ds.texture, 0, queue, command_list, 0.5f, 1);
reset_command_list(command_list, context.allocator);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &green, 0);
/* DSVFormat = DXGI_FORMAT_UNKNOWN and no DSV */
reset_command_list(command_list, context.allocator);
transition_resource_state(command_list, ds.texture,
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &red.x, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.0f, 0.0f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &green.x, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.5f, 0.5f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &green, 0);
/* DSVFormat = DXGI_FORMAT_UNKNOWN and D3D12_COMPARISON_FUNC_ALWAYS */
ID3D12PipelineState_Release(context.pipeline_state);
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
ok(hr == S_OK, "Failed to create graphics pipeline state, hr %#x.\n", hr);
reset_command_list(command_list, context.allocator);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, &ds.dsv_handle);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &red.x, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.0f, 0.0f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &green.x, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.6f, 0.6f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &green, 0);
/* DSVFormat = DXGI_FORMAT_UNKNOWN and depth write */
ID3D12PipelineState_Release(context.pipeline_state);
pso_desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
ok(hr == S_OK, "Failed to create graphics pipeline state, hr %#x.\n", hr);
reset_command_list(command_list, context.allocator);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds.dsv_handle,
D3D12_CLEAR_FLAG_DEPTH, 0.0f, 0, 0, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, &ds.dsv_handle);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &red.x, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 1.0f, 1.0f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &green.x, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.6f, 0.6f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &green, 0);
reset_command_list(command_list, context.allocator);
transition_resource_state(command_list, ds.texture,
D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_float(ds.texture, 0, queue, command_list, 1.0f, 1);
destroy_depth_stencil(&ds);
destroy_test_context(&context);
}
void test_depth_stencil_test_no_dsv(void)
{
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
ID3D12GraphicsCommandList *command_list;
struct depth_stencil_resource ds;
struct test_context_desc desc;
struct test_context context;
ID3D12CommandQueue *queue;
HRESULT hr;
static const DWORD ps_color_code[] =
{
#if 0
float4 color;
float4 main(float4 position : SV_POSITION) : SV_Target
{
return color;
}
#endif
0x43425844, 0xd18ead43, 0x8b8264c1, 0x9c0a062d, 0xfc843226, 0x00000001, 0x000000e0, 0x00000003,
0x0000002c, 0x00000060, 0x00000094, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020,
0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f, 0x505f5653, 0x5449534f, 0x004e4f49,
0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003,
0x00000000, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, 0x00000044, 0x00000050,
0x00000011, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x03000065, 0x001020f2,
0x00000000, 0x06000036, 0x001020f2, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x0100003e,
};
static const D3D12_SHADER_BYTECODE ps_color = {ps_color_code, sizeof(ps_color_code)};
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
static const struct vec4 red = {1.0f, 0.0f, 0.0f, 1.0f};
static const struct vec4 green = { 0.0f, 1.0f, 0.0f, 1.0f };
static const struct vec4 blue = { 0.0f, 0.0f, 1.0f, 1.0f };
memset(&desc, 0, sizeof(desc));
desc.rt_format = DXGI_FORMAT_R32G32B32A32_FLOAT;
desc.no_root_signature = true;
desc.rt_width = 32;
desc.rt_height = 32;
if (!init_test_context(&context, &desc))
return;
command_list = context.list;
queue = context.queue;
init_depth_stencil(&ds, context.device, 32, 32, 1, 1, DXGI_FORMAT_D32_FLOAT, 0, NULL);
context.root_signature = create_32bit_constants_root_signature(context.device,
0, 4, D3D12_SHADER_VISIBILITY_PIXEL);
init_pipeline_state_desc(&pso_desc, context.root_signature, desc.rt_format, NULL, &ps_color, NULL);
pso_desc.DSVFormat = DXGI_FORMAT_D32_FLOAT;
pso_desc.DepthStencilState.DepthEnable = true;
pso_desc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
ok(hr == S_OK, "Failed to create graphics pipeline state, hr %#x.\n", hr);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearDepthStencilView(command_list, ds.dsv_handle, D3D12_CLEAR_FLAG_DEPTH,
1.0f, 0, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, &ds.dsv_handle);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &green.x, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.5f, 0.5f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &red.x, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.9f, 0.9f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
/* Now, dynamically disable the depth attachment. */
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
/* Native behavior seems to be that depth test is just disabled entirely here.
* This last draw is the color we should get on NV at least.
* D3D12 validation layers report errors here of course,
* but Metro Exodus relies on depth testing on DSV NULL apparently. */
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &blue.x, 0);
set_viewport(&context.viewport, 0.0f, 0.0f, 32.0f, 32.0f, 0.55f, 0.55f);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
/* vkd3d-proton just skips the draw call in this situation.
* At least test that we don't crash. */
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &blue, 0);
destroy_depth_stencil(&ds);
destroy_test_context(&context);
}
void test_render_a8_dxbc(void)
{
static const float black[] = {0.0f, 0.0f, 0.0f, 0.0f};
ID3D12GraphicsCommandList *command_list;
struct test_context_desc desc;
struct test_context context;
ID3D12CommandQueue *queue;
static const DWORD ps_code[] =
{
#if 0
void main(out float4 target : SV_Target)
{
target = float4(0.0f, 0.25f, 0.5f, 1.0f);
}
#endif
0x43425844, 0x2f09e5ff, 0xaa135d5e, 0x7860f4b5, 0x5c7b8cbc, 0x00000001, 0x000000b4, 0x00000003,
0x0000002c, 0x0000003c, 0x00000070, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003, 0x00000000,
0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, 0x0000003c, 0x00000050, 0x0000000f,
0x0100086a, 0x03000065, 0x001020f2, 0x00000000, 0x08000036, 0x001020f2, 0x00000000, 0x00004002,
0x00000000, 0x3e800000, 0x3f000000, 0x3f800000, 0x0100003e,
};
static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)};
memset(&desc, 0, sizeof(desc));
desc.rt_format = DXGI_FORMAT_A8_UNORM;
desc.ps = &ps;
if (!init_test_context(&context, &desc))
return;
command_list = context.list;
queue = context.queue;
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, black, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_uint8(context.render_target, 0, queue, command_list, 0xff, 0);
destroy_test_context(&context);
}
void test_render_a8_dxil(void)
{
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
static const float black[] = {0.0f, 0.0f, 0.0f, 0.0f};
ID3D12GraphicsCommandList *command_list;
struct test_context_desc desc;
struct test_context context;
ID3D12CommandQueue *queue;
ID3D12Device *device;
HRESULT hr;
static const BYTE ps_code_dxil[] =
{
#if 0
void main(out float4 target : SV_Target)
{
target = float4(0.0f, 0.25f, 0.5f, 1.0f);
}
#endif
0x44, 0x58, 0x42, 0x43, 0x21, 0x97, 0x41, 0xc7, 0x9f, 0x1a, 0xed, 0x0b, 0xa5, 0x57, 0x8b, 0x4b, 0xd2, 0x3f, 0xe9, 0x18, 0x01, 0x00, 0x00, 0x00, 0x32, 0x05, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
0x34, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x8e, 0x00, 0x00, 0x00, 0xe6, 0x00, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x32, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0f, 0xf0, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x53, 0x56, 0x5f, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x00, 0x50, 0x53, 0x56, 0x30, 0x50, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
0x44, 0x10, 0x03, 0x00, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x44, 0x04, 0x00, 0x00, 0x60, 0x00, 0x00, 0x00, 0x11, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x00, 0x01, 0x00, 0x00, 0x10, 0x00,
0x00, 0x00, 0x2c, 0x04, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81,
0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x10, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0x84, 0x10,
0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x42, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x11, 0x22, 0xc4, 0x50, 0x41, 0x51,
0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x21, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x1b, 0x88, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0x00, 0x00, 0x49, 0x18,
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x13, 0x82, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x32, 0x22, 0x08, 0x09, 0x20, 0x64, 0x85, 0x04, 0x13, 0x22, 0xa4, 0x84, 0x04, 0x13,
0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x88, 0x8c, 0x0b, 0x84, 0x84, 0x4c, 0x10, 0x28, 0x23, 0x00, 0x25, 0x00, 0x8a, 0x39, 0x02, 0x30, 0x98, 0x23, 0x40, 0x66, 0x00, 0x8a, 0x01, 0x33,
0x43, 0x45, 0x36, 0x10, 0x90, 0x02, 0x03, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d,
0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07,
0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60,
0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76,
0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x86, 0x3c, 0x06, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0x81, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x10, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09,
0x26, 0x47, 0xc6, 0x04, 0x43, 0x9a, 0x12, 0x18, 0x01, 0x28, 0x84, 0x62, 0x20, 0x2a, 0x89, 0x02, 0x19, 0x01, 0x28, 0x04, 0xca, 0xb1, 0x04, 0x80, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x3e, 0x00,
0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0xc4, 0x88, 0x0c, 0x6f, 0xec, 0xed, 0x4d, 0x0c, 0x44, 0x06, 0x26, 0x26, 0xc7, 0x05, 0xa6, 0xc6, 0x05, 0x06, 0x66, 0x43, 0x10, 0x4c, 0x10,
0x06, 0x61, 0x82, 0x30, 0x0c, 0x1b, 0x84, 0x81, 0x98, 0x20, 0x0c, 0xc4, 0x06, 0x61, 0x30, 0x28, 0xc0, 0xcd, 0x4d, 0x10, 0x86, 0x62, 0xc3, 0x80, 0x24, 0xc4, 0x04, 0x41, 0x00, 0x36, 0x00, 0x1b,
0x06, 0x82, 0x61, 0x36, 0x04, 0xcd, 0x86, 0x61, 0x58, 0x9c, 0x09, 0x42, 0xa2, 0x6c, 0x08, 0x20, 0x12, 0x6d, 0x61, 0x69, 0x6e, 0x4c, 0xa6, 0xac, 0xbe, 0xa8, 0xc2, 0xe4, 0xce, 0xca, 0xe8, 0x26,
0x08, 0x84, 0x31, 0x41, 0x20, 0x8e, 0x0d, 0x01, 0x31, 0x41, 0x20, 0x90, 0x09, 0x02, 0x91, 0x6c, 0x58, 0x88, 0x89, 0xaa, 0xac, 0x6b, 0xc0, 0x88, 0x0b, 0xd8, 0x10, 0x64, 0x1b, 0x06, 0x40, 0x03,
0x36, 0x14, 0x8b, 0xb4, 0x01, 0x40, 0x15, 0x36, 0x36, 0xbb, 0x36, 0x97, 0x34, 0xb2, 0x32, 0x37, 0xba, 0x29, 0x41, 0x50, 0x85, 0x0c, 0xcf, 0xc5, 0xae, 0x4c, 0x6e, 0x2e, 0xed, 0xcd, 0x6d, 0x4a,
0x40, 0x34, 0x21, 0xc3, 0x73, 0xb1, 0x0b, 0x63, 0xb3, 0x2b, 0x93, 0x9b, 0x12, 0x18, 0x75, 0xc8, 0xf0, 0x5c, 0xe6, 0xd0, 0xc2, 0xc8, 0xca, 0xe4, 0x9a, 0xde, 0xc8, 0xca, 0xd8, 0xa6, 0x04, 0x49,
0x25, 0x32, 0x3c, 0x17, 0xba, 0x3c, 0xb8, 0xb2, 0x20, 0x37, 0xb7, 0x37, 0xba, 0x30, 0xba, 0xb4, 0x37, 0xb7, 0xb9, 0x29, 0x81, 0x53, 0x87, 0x0c, 0xcf, 0xc5, 0x2e, 0xad, 0xec, 0x2e, 0x89, 0x6c,
0x8a, 0x2e, 0x8c, 0xae, 0x6c, 0x4a, 0x00, 0xd5, 0x21, 0xc3, 0x73, 0x29, 0x73, 0xa3, 0x93, 0xcb, 0x83, 0x7a, 0x4b, 0x73, 0xa3, 0x9b, 0x9b, 0x12, 0x6c, 0x00, 0x79, 0x18, 0x00, 0x00, 0x42, 0x00,
0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f,
0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d,
0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec,
0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc,
0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87,
0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee,
0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81,
0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c,
0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0x03, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x16, 0x50, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x34, 0x39, 0x11, 0x81, 0x52, 0xd3, 0x43,
0x4d, 0x7e, 0x71, 0xdb, 0x06, 0x40, 0x30, 0x00, 0xd2, 0x00, 0x61, 0x20, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x34, 0xa5,
0x40, 0x54, 0x02, 0x45, 0x50, 0x06, 0x54, 0x23, 0x00, 0x63, 0x04, 0x20, 0x08, 0x82, 0xe8, 0x37, 0x46, 0x00, 0x82, 0x20, 0x08, 0x7f, 0x63, 0x04, 0x20, 0x08, 0x82, 0xf8, 0x07, 0x00, 0x23, 0x06,
0x09, 0x00, 0x82, 0x60, 0x60, 0x48, 0x08, 0x04, 0x2d, 0xc4, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x18, 0x12, 0x02, 0x41, 0xc7, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x86, 0x84, 0x40, 0x90,
0x21, 0x8c, 0x18, 0x24, 0x00, 0x08, 0x82, 0x81, 0x21, 0x21, 0x10, 0x54, 0x04, 0x08, 0x00, 0x00, 0x00, 0x00,
};
memset(&desc, 0, sizeof(desc));
desc.rt_format = DXGI_FORMAT_A8_UNORM;
desc.no_pipeline = true;
if (!init_test_context(&context, &desc))
return;
if (!context_supports_dxil(&context))
{
destroy_test_context(&context);
return;
}
device = context.device;
command_list = context.list;
queue = context.queue;
init_pipeline_state_desc_dxil(&pso_desc, context.root_signature, 0, NULL, NULL, NULL);
pso_desc.RTVFormats[0] = DXGI_FORMAT_A8_UNORM;
pso_desc.NumRenderTargets = 1;
pso_desc.PS.pShaderBytecode = ps_code_dxil;
pso_desc.PS.BytecodeLength = sizeof(ps_code_dxil);
hr = ID3D12Device_CreateGraphicsPipelineState(device, &pso_desc,
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
ok(hr == S_OK, "Failed to create graphics pipeline state, hr %#x.\n", hr);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, black, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_uint8(context.render_target, 0, queue, command_list, 0xff, 0);
destroy_test_context(&context);
}
void test_multisample_rendering(void)
{
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
ID3D12GraphicsCommandList *command_list;
ID3D12PipelineState *ms_pipeline_state;
D3D12_CPU_DESCRIPTOR_HANDLE ms_rtv;
ID3D12Resource *ms_render_target;
struct test_context_desc desc;
struct test_context context;
ID3D12DescriptorHeap *heap;
ID3D12CommandQueue *queue;
uint32_t sample;
unsigned int i;
HRESULT hr;
static const DWORD ps_color_code[] =
{
#if 0
float4 main(uint id : SV_SampleIndex) : SV_Target
{
switch (id)
{
case 0: return float4(1.0f, 0.0f, 0.0f, 1.0f);
case 1: return float4(0.0f, 1.0f, 0.0f, 1.0f);
case 2: return float4(0.0f, 0.0f, 1.0f, 1.0f);
default: return float4(0.0f, 0.0f, 0.0f, 1.0f);
}
}
#endif
0x43425844, 0x94c35f48, 0x04c6b0f7, 0x407d8214, 0xc24f01e5, 0x00000001, 0x00000194, 0x00000003,
0x0000002c, 0x00000064, 0x00000098, 0x4e475349, 0x00000030, 0x00000001, 0x00000008, 0x00000020,
0x00000000, 0x0000000a, 0x00000001, 0x00000000, 0x00000101, 0x535f5653, 0x6c706d61, 0x646e4965,
0xab007865, 0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000,
0x00000003, 0x00000000, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, 0x000000f4,
0x00000050, 0x0000003d, 0x0100086a, 0x04000863, 0x00101012, 0x00000000, 0x0000000a, 0x03000065,
0x001020f2, 0x00000000, 0x0300004c, 0x0010100a, 0x00000000, 0x03000006, 0x00004001, 0x00000000,
0x08000036, 0x001020f2, 0x00000000, 0x00004002, 0x3f800000, 0x00000000, 0x00000000, 0x3f800000,
0x0100003e, 0x03000006, 0x00004001, 0x00000001, 0x08000036, 0x001020f2, 0x00000000, 0x00004002,
0x00000000, 0x3f800000, 0x00000000, 0x3f800000, 0x0100003e, 0x03000006, 0x00004001, 0x00000002,
0x08000036, 0x001020f2, 0x00000000, 0x00004002, 0x00000000, 0x00000000, 0x3f800000, 0x3f800000,
0x0100003e, 0x0100000a, 0x08000036, 0x001020f2, 0x00000000, 0x00004002, 0x00000000, 0x00000000,
0x00000000, 0x3f800000, 0x0100003e, 0x01000017, 0x0100003e,
};
static const D3D12_SHADER_BYTECODE ps_color = {ps_color_code, sizeof(ps_color_code)};
static const DWORD ps_resolve_code[] =
{
#if 0
Texture2DMS<float4> t;
uint sample;
uint rt_size;
float4 main(float4 position : SV_Position) : SV_Target
{
float3 p;
t.GetDimensions(p.x, p.y, p.z);
p *= float3(position.x / rt_size, position.y / rt_size, 0);
return t.Load((int2)p.xy, sample);
}
#endif
0x43425844, 0x68a4590b, 0xc1ec3070, 0x1b957c43, 0x0c080741, 0x00000001, 0x000001c8, 0x00000003,
0x0000002c, 0x00000060, 0x00000094, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020,
0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000030f, 0x505f5653, 0x7469736f, 0x006e6f69,
0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000000, 0x00000003,
0x00000000, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074, 0x58454853, 0x0000012c, 0x00000050,
0x0000004b, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x04002058, 0x00107000,
0x00000000, 0x00005555, 0x04002064, 0x00101032, 0x00000000, 0x00000001, 0x03000065, 0x001020f2,
0x00000000, 0x02000068, 0x00000001, 0x06000056, 0x00100012, 0x00000000, 0x0020801a, 0x00000000,
0x00000000, 0x0700000e, 0x00100032, 0x00000000, 0x00101046, 0x00000000, 0x00100006, 0x00000000,
0x8900003d, 0x80000102, 0x00155543, 0x001000c2, 0x00000000, 0x00004001, 0x00000000, 0x001074e6,
0x00000000, 0x07000038, 0x00100032, 0x00000000, 0x00100046, 0x00000000, 0x00100ae6, 0x00000000,
0x0500001b, 0x00100032, 0x00000000, 0x00100046, 0x00000000, 0x08000036, 0x001000c2, 0x00000000,
0x00004002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x8c00002e, 0x80000102, 0x00155543,
0x001020f2, 0x00000000, 0x00100e46, 0x00000000, 0x00107e46, 0x00000000, 0x0020800a, 0x00000000,
0x00000000, 0x0100003e,
};
static const D3D12_SHADER_BYTECODE ps_resolve = {ps_resolve_code, sizeof(ps_resolve_code)};
static const unsigned int expected_colors[] = {0xff0000ff, 0xff00ff00, 0xffff0000, 0xff000000};
if (use_warp_device)
{
skip("Sample shading tests fail on WARP.\n");
return;
}
memset(&desc, 0, sizeof(desc));
desc.rt_width = desc.rt_height = 32;
desc.rt_descriptor_count = 2;
desc.no_root_signature = true;
if (!init_test_context(&context, &desc))
return;
command_list = context.list;
queue = context.queue;
context.root_signature = create_texture_root_signature(context.device,
D3D12_SHADER_VISIBILITY_PIXEL, 2, 0);
init_pipeline_state_desc(&pso_desc, context.root_signature,
context.render_target_desc.Format, NULL, &ps_resolve, NULL);
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
ok(hr == S_OK, "Failed to create pipeline, hr %#x.\n", hr);
pso_desc.PS = ps_color;
pso_desc.SampleDesc.Count = 4;
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
&IID_ID3D12PipelineState, (void **)&ms_pipeline_state);
ok(hr == S_OK, "Failed to create pipeline, hr %#x.\n", hr);
ms_rtv = get_cpu_rtv_handle(&context, context.rtv_heap, 1);
desc.sample_desc.Count = 4;
create_render_target(&context, &desc, &ms_render_target, &ms_rtv);
heap = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1);
ID3D12Device_CreateShaderResourceView(context.device, ms_render_target, NULL,
get_cpu_descriptor_handle(&context, heap, 0));
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, ms_rtv, white, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &ms_rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, ms_pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_SetDescriptorHeaps(command_list, 1, &heap);
ID3D12GraphicsCommandList_SetGraphicsRootDescriptorTable(command_list, 0,
get_gpu_descriptor_handle(&context, heap, 0));
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, ms_render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_RESOLVE_DEST);
ID3D12GraphicsCommandList_ResolveSubresource(command_list,
context.render_target, 0, ms_render_target, 0, context.render_target_desc.Format);
transition_resource_state(command_list, ms_render_target,
D3D12_RESOURCE_STATE_RESOLVE_SOURCE, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RESOLVE_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xff404040, 2);
for (i = 0; i < ARRAY_SIZE(expected_colors); ++i)
{
reset_command_list(command_list, context.allocator);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_RENDER_TARGET);
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
ID3D12GraphicsCommandList_SetDescriptorHeaps(command_list, 1, &heap);
ID3D12GraphicsCommandList_SetGraphicsRootDescriptorTable(command_list, 0,
get_gpu_descriptor_handle(&context, heap, 0));
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 1, 1, &desc.rt_width, 1);
sample = i;
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 1, 1, &sample, 0);
ID3D12GraphicsCommandList_DrawInstanced(command_list, 3, 1, 0, 0);
transition_resource_state(command_list, context.render_target,
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
check_sub_resource_uint(context.render_target, 0, queue, command_list, expected_colors[i], 0);
}
ID3D12DescriptorHeap_Release(heap);
ID3D12Resource_Release(ms_render_target);
ID3D12PipelineState_Release(ms_pipeline_state);
destroy_test_context(&context);
}

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More