Compare commits
259 Commits
Author | SHA1 | Date |
---|---|---|
Joshua Ashton | d00d035321 | |
Joshua Ashton | 253dc9027a | |
Derek Lesho | 146f5b8a74 | |
Hans-Kristian Arntzen | db4a8544a1 | |
Hans-Kristian Arntzen | 1d25b29413 | |
Hans-Kristian Arntzen | 34a04a1a7f | |
Hans-Kristian Arntzen | b839fe14bb | |
Hans-Kristian Arntzen | d3a76eee90 | |
Hans-Kristian Arntzen | 481680ecd8 | |
Hans-Kristian Arntzen | 11c82c84d1 | |
Hans-Kristian Arntzen | c0b9682c69 | |
Hans-Kristian Arntzen | 9d8abd2db5 | |
Derek Lesho | df1829e407 | |
Hans-Kristian Arntzen | be2aafff1a | |
Derek Lesho | 849537614a | |
Derek Lesho | f487db4756 | |
Hans-Kristian Arntzen | 6265a7b5ce | |
Hans-Kristian Arntzen | 4f4c96bb11 | |
Derek Lesho | a2439e766f | |
Hans-Kristian Arntzen | 21799b202b | |
Hans-Kristian Arntzen | 4ff504b52d | |
Hans-Kristian Arntzen | 6335e411bb | |
Hans-Kristian Arntzen | 11c943dd7e | |
Hans-Kristian Arntzen | 5b73139f18 | |
Hans-Kristian Arntzen | 73700f4c3a | |
Hans-Kristian Arntzen | a917d60ca5 | |
Hans-Kristian Arntzen | 8d780458f1 | |
Hans-Kristian Arntzen | 8da6ca6772 | |
Hans-Kristian Arntzen | 766da69afb | |
Hans-Kristian Arntzen | b7a960f94f | |
Hans-Kristian Arntzen | ee39209798 | |
Hans-Kristian Arntzen | afb87e013f | |
Hans-Kristian Arntzen | 433262c254 | |
Hans-Kristian Arntzen | 277bbe35e8 | |
Hans-Kristian Arntzen | 9451fdcab9 | |
Hans-Kristian Arntzen | 0640f44560 | |
Hans-Kristian Arntzen | b287864cd1 | |
Hans-Kristian Arntzen | 0a7b13fe7f | |
Hans-Kristian Arntzen | f704cb9776 | |
Hans-Kristian Arntzen | e17a7cb40c | |
Hans-Kristian Arntzen | 9e45c72256 | |
Hans-Kristian Arntzen | 2a8c762025 | |
Hans-Kristian Arntzen | 3b8a13e63d | |
Hans-Kristian Arntzen | 65804bbde5 | |
Hans-Kristian Arntzen | 233ff38175 | |
Hans-Kristian Arntzen | 4a07d9c038 | |
Hans-Kristian Arntzen | bcdac3180a | |
Hans-Kristian Arntzen | df11b5ba5a | |
Hans-Kristian Arntzen | e138a5117a | |
Hans-Kristian Arntzen | 96fdb71ae4 | |
Hans-Kristian Arntzen | fe707989fe | |
Hans-Kristian Arntzen | 6d3c5d53b0 | |
Hans-Kristian Arntzen | f93a581dae | |
Hans-Kristian Arntzen | b7bbdcabd4 | |
Hans-Kristian Arntzen | a28e4b6e11 | |
Hans-Kristian Arntzen | eda0b2fab2 | |
Hans-Kristian Arntzen | 7f5dbcfc40 | |
Hans-Kristian Arntzen | d333159c86 | |
Hans-Kristian Arntzen | 74eb676cfb | |
Hans-Kristian Arntzen | 5033904e10 | |
Hans-Kristian Arntzen | b34931eb17 | |
Hans-Kristian Arntzen | 7410f53912 | |
Hans-Kristian Arntzen | 089d2c6cb7 | |
Hans-Kristian Arntzen | 03fdbac59e | |
Hans-Kristian Arntzen | 7832eeb60d | |
Hans-Kristian Arntzen | 8a94c3ce0e | |
Hans-Kristian Arntzen | ddb425c5cb | |
Hans-Kristian Arntzen | ad7459551d | |
Hans-Kristian Arntzen | e3c36a47dd | |
Hans-Kristian Arntzen | ee8b8374b4 | |
Hans-Kristian Arntzen | ce00c9322d | |
Hans-Kristian Arntzen | b88b04e4f1 | |
Hans-Kristian Arntzen | 4a121b9aaa | |
Hans-Kristian Arntzen | 0ef6a8b798 | |
Hans-Kristian Arntzen | 49b6e67e7d | |
Hans-Kristian Arntzen | 2ef3fd469c | |
Hans-Kristian Arntzen | 22778b99be | |
Hans-Kristian Arntzen | b8b2a93aa6 | |
Hans-Kristian Arntzen | 14470d5456 | |
Hans-Kristian Arntzen | 3aad4edf6e | |
Hans-Kristian Arntzen | 3c92b3a1bc | |
Hans-Kristian Arntzen | 8473355a98 | |
Hans-Kristian Arntzen | 1438ff5637 | |
Hans-Kristian Arntzen | c3ee963d2f | |
Hans-Kristian Arntzen | 684e41fabe | |
Philip Rebohle | 1d869e3e21 | |
Tatsuyuki Ishi | 02c7ec404c | |
Hans-Kristian Arntzen | 9b5f3bfc26 | |
Hans-Kristian Arntzen | b4ab6c3f08 | |
Hans-Kristian Arntzen | 707af8152e | |
Hans-Kristian Arntzen | bc759be2af | |
Hans-Kristian Arntzen | 18f1d1c72e | |
Hans-Kristian Arntzen | 1b704287e5 | |
Hans-Kristian Arntzen | f975f09bb1 | |
Hans-Kristian Arntzen | 619a54810d | |
Hans-Kristian Arntzen | cecb8d6ebc | |
Hans-Kristian Arntzen | 8ae391e675 | |
Hans-Kristian Arntzen | a30205589f | |
Hans-Kristian Arntzen | abdef77695 | |
Hans-Kristian Arntzen | c132073df8 | |
Hans-Kristian Arntzen | 128852200a | |
Hans-Kristian Arntzen | 717026f903 | |
Hans-Kristian Arntzen | b849bd4256 | |
Georg Lehmann | d8905afd5d | |
Hans-Kristian Arntzen | de5b751468 | |
Hans-Kristian Arntzen | 219d9698b3 | |
Hans-Kristian Arntzen | acef5429c5 | |
Hans-Kristian Arntzen | 135aff4685 | |
Hans-Kristian Arntzen | 2f6a9e0d55 | |
Hans-Kristian Arntzen | 3a19dea7c7 | |
Tatsuyuki Ishi | 39d07dea2c | |
Tatsuyuki Ishi | 3577ca3144 | |
Tatsuyuki Ishi | 829ac72e3d | |
Hans-Kristian Arntzen | c64916686d | |
Hans-Kristian Arntzen | c4b00bbe1e | |
Hans-Kristian Arntzen | fd05839eb9 | |
Hans-Kristian Arntzen | 46470017a3 | |
Georg Lehmann | cbca29dd90 | |
Hans-Kristian Arntzen | c3fb6a6c5e | |
Hans-Kristian Arntzen | e8f1936ee2 | |
Hans-Kristian Arntzen | 4166eb042b | |
Hans-Kristian Arntzen | 7a002698f3 | |
Hans-Kristian Arntzen | 896e6fb868 | |
Hans-Kristian Arntzen | 8989360087 | |
Hans-Kristian Arntzen | f804ddc4c7 | |
Hans-Kristian Arntzen | 3b0d7e043d | |
Hans-Kristian Arntzen | 75e0506404 | |
Hans-Kristian Arntzen | 0f9d7dd10d | |
Hans-Kristian Arntzen | 7acc33ae39 | |
Hans-Kristian Arntzen | 7916d2a6d8 | |
Hans-Kristian Arntzen | 48157c29e8 | |
Hans-Kristian Arntzen | 467db76f90 | |
Hans-Kristian Arntzen | 2953ef8688 | |
Hans-Kristian Arntzen | f964532619 | |
Hans-Kristian Arntzen | 5a0c8289d8 | |
Hans-Kristian Arntzen | cca7613bca | |
Philip Rebohle | 910f15dff8 | |
Hans-Kristian Arntzen | a94e9b8b6a | |
Hans-Kristian Arntzen | 4ac0a3b455 | |
Hans-Kristian Arntzen | 300058d9a7 | |
Hans-Kristian Arntzen | 2e16a777ca | |
Hans-Kristian Arntzen | ac211d5f6a | |
Hans-Kristian Arntzen | 1dc4bbe5f2 | |
Tatsuyuki Ishi | 2965b7e379 | |
Tatsuyuki Ishi | 0d9c0a3903 | |
Robin Kertels | 1a773cfb71 | |
Robin Kertels | cdabda7805 | |
Robin Kertels | 8ac7aaca99 | |
Robin Kertels | 7e7c472005 | |
Hans-Kristian Arntzen | 71940797d1 | |
Hans-Kristian Arntzen | 4603c25d69 | |
Hans-Kristian Arntzen | 97201b8e93 | |
Hans-Kristian Arntzen | 51199752dd | |
Hans-Kristian Arntzen | ebe589d622 | |
Hans-Kristian Arntzen | 55a6847c61 | |
Hans-Kristian Arntzen | 04c020525c | |
Dean Beeler | 063ce7e4bd | |
Hans-Kristian Arntzen | 2c54e18245 | |
Philip Rebohle | bb2e35c539 | |
Philip Rebohle | d5ad5bb1de | |
Philip Rebohle | beb58f8472 | |
Hans-Kristian Arntzen | 358f95aff2 | |
Philip Rebohle | 119e00ed45 | |
Philip Rebohle | beaedbd857 | |
Philip Rebohle | 81927c5895 | |
Philip Rebohle | e7a6af4971 | |
Philip Rebohle | a1d5e6f39a | |
Hans-Kristian Arntzen | 4a05360a0a | |
Hans-Kristian Arntzen | 0c4df9b32c | |
Hans-Kristian Arntzen | 25c4bc18e7 | |
Hans-Kristian Arntzen | 30ec6b7f1f | |
Hans-Kristian Arntzen | c47a6a904b | |
Hans-Kristian Arntzen | 5044975152 | |
Hans-Kristian Arntzen | 8dc8b72807 | |
Hans-Kristian Arntzen | ae0dafa3a1 | |
Hans-Kristian Arntzen | 6c8542f7d6 | |
Hans-Kristian Arntzen | 2dcb1e2efc | |
Hans-Kristian Arntzen | 3095ed84d3 | |
Hans-Kristian Arntzen | db9b9a13de | |
Hans-Kristian Arntzen | 637834dc75 | |
Hans-Kristian Arntzen | 93928424a9 | |
Hans-Kristian Arntzen | c8b143c0bd | |
Hans-Kristian Arntzen | ca0a186a4b | |
Philip Rebohle | c9101b8ec3 | |
Philip Rebohle | 829c02bf90 | |
Philip Rebohle | e4184830c5 | |
Philip Rebohle | d1425ee4d1 | |
Denis Barkar | 8dda6df729 | |
Joshua Ashton | 2ed513b99a | |
Hans-Kristian Arntzen | 19e088cdfc | |
Hans-Kristian Arntzen | 241078d7e8 | |
Hans-Kristian Arntzen | e01589a33b | |
Hans-Kristian Arntzen | 2e704c5a5e | |
Hans-Kristian Arntzen | 6f43f450c8 | |
Hans-Kristian Arntzen | cfeaa18b09 | |
Hans-Kristian Arntzen | da63f0beac | |
Hans-Kristian Arntzen | 35e777f8a0 | |
Hans-Kristian Arntzen | 095a36cbaf | |
Philip Rebohle | 6378f1b880 | |
Philip Rebohle | 307190e96b | |
Hans-Kristian Arntzen | 2e8fb27182 | |
Hans-Kristian Arntzen | 1b5f7e8fc3 | |
Hans-Kristian Arntzen | cf65a78570 | |
Philip Rebohle | 1d3957fe6d | |
Philip Rebohle | c9abcfa656 | |
Hans-Kristian Arntzen | 03427c6ee6 | |
Hans-Kristian Arntzen | 09682f8417 | |
Hans-Kristian Arntzen | 6273780e50 | |
Hans-Kristian Arntzen | 6e915dd2c0 | |
Philip Rebohle | 34f5fc6a31 | |
Hans-Kristian Arntzen | 63530501a5 | |
Hans-Kristian Arntzen | dd6534f3f8 | |
Hans-Kristian Arntzen | 09997b4dd8 | |
Hans-Kristian Arntzen | 6d35f98e59 | |
Hans-Kristian Arntzen | e61cc0234a | |
Hans-Kristian Arntzen | c54895b4b7 | |
Hans-Kristian Arntzen | a6700d3d85 | |
Hans-Kristian Arntzen | f0cac9d97c | |
Hans-Kristian Arntzen | 08c0ea209f | |
Hans-Kristian Arntzen | 64d42c08ee | |
Hans-Kristian Arntzen | 3d8ef2b349 | |
Hans-Kristian Arntzen | 33b9166fec | |
Hans-Kristian Arntzen | 972ce74ac6 | |
Robin Kertels | 5f97d1eb70 | |
Robin Kertels | a6ea442819 | |
Hans-Kristian Arntzen | 365dd05557 | |
Hans-Kristian Arntzen | 5017b3723c | |
Hans-Kristian Arntzen | 6a4f2842cb | |
Hans-Kristian Arntzen | 18a5315db4 | |
Hans-Kristian Arntzen | 7c228139c3 | |
Hans-Kristian Arntzen | 30b4abcea1 | |
Hans-Kristian Arntzen | 17b1ffb41a | |
Hans-Kristian Arntzen | f9da3bf564 | |
Hans-Kristian Arntzen | 5c70a24de1 | |
Hans-Kristian Arntzen | c6149b47cd | |
Hans-Kristian Arntzen | cc08339624 | |
Hans-Kristian Arntzen | 422f6804fb | |
Georg Lehmann | 7d4ed66881 | |
Georg Lehmann | 14a06680d9 | |
Hans-Kristian Arntzen | c9bac85dd1 | |
Hans-Kristian Arntzen | 409dc57645 | |
Hans-Kristian Arntzen | b330900659 | |
Hans-Kristian Arntzen | 92a8c0ad78 | |
Hans-Kristian Arntzen | c864f1322f | |
Philip Rebohle | 9a408367dc | |
Philip Rebohle | 51e6b2bbbe | |
Philip Rebohle | 94f82d1085 | |
Philip Rebohle | 1a68267962 | |
Philip Rebohle | c4f88951fc | |
Philip Rebohle | 9673ac173d | |
Philip Rebohle | 3783eaf4f7 | |
Philip Rebohle | 024ef02f9b | |
Philip Rebohle | 549d4ee63f | |
Philip Rebohle | 6186cc1f0e | |
Philip Rebohle | 2c92ab7d1e | |
Philip Rebohle | ba04b02bf6 | |
Hans-Kristian Arntzen | 9fbae668fe | |
Hans-Kristian Arntzen | ce45297695 | |
LemiSt24 | c411d0d0c2 |
|
@ -15,7 +15,7 @@ jobs:
|
|||
|
||||
- name: Build release
|
||||
id: build-release
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v7
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
export VERSION_NAME="${GITHUB_REF##*/}-${GITHUB_SHA##*/}"
|
||||
|
|
|
@ -18,7 +18,7 @@ jobs:
|
|||
|
||||
- name: Build MinGW x86
|
||||
id: build-mingw-x86
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v7
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
meson -Denable_tests=True -Denable_extras=True --cross-file=build-win32.txt --buildtype release build-mingw-x86
|
||||
|
@ -26,7 +26,7 @@ jobs:
|
|||
|
||||
- name: Build MinGW x64
|
||||
id: build-mingw-x64
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v7
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
meson -Denable_tests=True -Denable_extras=True --cross-file=build-win64.txt --buildtype release build-mingw-x64
|
||||
|
@ -34,7 +34,7 @@ jobs:
|
|||
|
||||
- name: Build Native GCC x86
|
||||
id: build-native-gcc-x86
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v7
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
export CC="gcc -m32"
|
||||
|
@ -45,7 +45,7 @@ jobs:
|
|||
|
||||
- name: Build Native GCC x64
|
||||
id: build-native-gcc-x64
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v7
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
export CC="gcc"
|
||||
|
@ -55,7 +55,7 @@ jobs:
|
|||
|
||||
- name: Build Native Clang x86
|
||||
id: build-native-clang-x86
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v7
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
export CC="clang -m32"
|
||||
|
@ -66,7 +66,7 @@ jobs:
|
|||
|
||||
- name: Build Native Clang x64
|
||||
id: build-native-clang-x64
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v7
|
||||
uses: Joshua-Ashton/arch-mingw-github-action@v8
|
||||
with:
|
||||
command: |
|
||||
export CC="clang"
|
||||
|
|
70
README.md
70
README.md
|
@ -22,18 +22,19 @@ There are some hard requirements on drivers to be able to implement D3D12 in a r
|
|||
- `VK_EXT_descriptor_indexing` with at least 1000000 UpdateAfterBind descriptors for all types except UniformBuffer.
|
||||
Essentially all features in `VkPhysicalDeviceDescriptorIndexingFeatures` must be supported.
|
||||
- `VK_KHR_timeline_semaphore`
|
||||
- `VK_KHR_create_renderpass2`
|
||||
- `VK_KHR_sampler_mirror_clamp_to_edge`
|
||||
- `VK_EXT_robustness2`
|
||||
- `VK_KHR_separate_depth_stencil_layouts`
|
||||
- `VK_KHR_bind_memory2`
|
||||
- `VK_KHR_copy_commands2`
|
||||
- `VK_KHR_dynamic_rendering`
|
||||
- `VK_EXT_extended_dynamic_state`
|
||||
- `VK_EXT_extended_dynamic_state2`
|
||||
|
||||
Some notable extensions that **should** be supported for optimal or correct behavior.
|
||||
These extensions will likely become mandatory later.
|
||||
|
||||
- `VK_KHR_buffer_device_address`
|
||||
- `VK_EXT_extended_dynamic_state`
|
||||
- `VK_EXT_image_view_min_lod`
|
||||
|
||||
`VK_VALVE_mutable_descriptor_type` is also highly recommended, but not mandatory.
|
||||
|
@ -41,20 +42,16 @@ These extensions will likely become mandatory later.
|
|||
### AMD (RADV)
|
||||
|
||||
For AMD, RADV is the recommended driver and the one that sees most testing on AMD GPUs.
|
||||
The recommendation here is to use a driver built from Git.
|
||||
The minimum requirement at the moment is Mesa 22.0 since it supports `VK_KHR_dynamic_rendering`.
|
||||
|
||||
NOTE: For older Mesa versions, use the v2.6 release.
|
||||
|
||||
### NVIDIA
|
||||
|
||||
The [Vulkan beta drivers](https://developer.nvidia.com/vulkan-driver) generally contain the latest
|
||||
driver fixes that we identify while getting games to work.
|
||||
At least Linux 455.26.01 (2020-10-20) is recommended as it contains fixes for:
|
||||
|
||||
> Reduce host memory consumption for descriptor memory when VkDescriptorSetVariableDescriptorCountAllocateInfo is used.
|
||||
|
||||
> Fixed a bug in a barrier optimization that allowed some back-to-back copies to run unordered
|
||||
|
||||
These fixes should find their way into stable drivers eventually, but if you're having issues, test the latest development drivers,
|
||||
as that is what we test against.
|
||||
The latest drivers (stable, beta or Vulkan beta tracks) are always preferred.
|
||||
If you're having problems, always try the latest drivers.
|
||||
|
||||
### Intel
|
||||
|
||||
|
@ -188,6 +185,39 @@ commas or semicolons.
|
|||
- `VKD3D_PROFILE_PATH` - If profiling is enabled in the build, a profiling block is
|
||||
emitted to `${VKD3D_PROFILE_PATH}.${pid}`.
|
||||
|
||||
## Shader cache
|
||||
|
||||
By default, vkd3d-proton manages its own driver cache.
|
||||
This cache is intended to cache DXBC/DXIL -> SPIR-V conversion.
|
||||
This reduces stutter (when pipelines are created last minute and app relies on hot driver cache)
|
||||
and load times (when applications do the right thing of loading PSOs up front).
|
||||
|
||||
Behavior is designed to be close to DXVK state cache.
|
||||
|
||||
#### Default behavior
|
||||
|
||||
`vkd3d-proton.cache` (and `vkd3d-proton.cache.write`) are placed in the current working directory.
|
||||
Generally, this is the game install folder when running in Steam.
|
||||
|
||||
#### Custom directory
|
||||
|
||||
`VKD3D_SHADER_CACHE_PATH=/path/to/directory` overrides the directory where `vkd3d-proton.cache` is placed.
|
||||
|
||||
#### Disable cache
|
||||
|
||||
`VKD3D_SHADER_CACHE_PATH=0` disables the internal cache, and any caching would have to be explicitly managed
|
||||
by application.
|
||||
|
||||
### Behavior of ID3D12PipelineLibrary
|
||||
|
||||
When explicit shader cache is used, the need for application managed pipeline libraries is greatly diminished,
|
||||
and the cache applications interact with is a dummy cache.
|
||||
If the vkd3d-proton shader cache is disabled, ID3D12PipelineLibrary stores everything relevant for a full cache,
|
||||
i.e. SPIR-V and PSO driver cache blob.
|
||||
`VKD3D_CONFIG=pipeline_library_app_cache` is an alternative to `VKD3D_SHADER_CACHE_PATH=0` and can be
|
||||
automatically enabled based on app-profiles if relevant in the future if applications manage the caches better
|
||||
than vkd3d-proton can do automagically.
|
||||
|
||||
## CPU profiling (development)
|
||||
|
||||
Pass `-Denable_profiling=true` to Meson to enable a profiled build. With a profiled build, use `VKD3D_PROFILE_PATH` environment variable.
|
||||
|
@ -218,6 +248,17 @@ pass `-Denable_renderdoc=true` to Meson.
|
|||
made on first encounter with the target shader.
|
||||
If both are set, the capture counter is only incremented and considered when a submission contains the use of the target shader.
|
||||
|
||||
### Breadcrumbs debugging
|
||||
|
||||
For debugging GPU hangs, it's useful to know where crashes happen.
|
||||
If the build has trace enabled (non-release builds), breadcrumbs support is also enabled.
|
||||
|
||||
`VKD3D_CONFIG=breadcrumbs` will instrument command lists with `VK_AMD_buffer_marker` or `VK_NV_device_checkpoints`.
|
||||
On GPU device lost or timeout, crash dumps are written to the log.
|
||||
For best results on RADV, use `RADV_DEBUG=syncshaders`. The logs will print a digested form of the command lists
|
||||
which were executing at the time, and attempt to narrow down the possible range of commands which could
|
||||
have caused a crash.
|
||||
|
||||
### Shader logging
|
||||
|
||||
It is possible to log the output of replaced shaders, essentially a custom shader printf. To enable this feature, `VK_KHR_buffer_device_address` must be supported.
|
||||
|
@ -229,8 +270,11 @@ and avoids any possible accidental hiding of bugs by introducing validation laye
|
|||
Using `debugPrintEXT` is also possible if that fits better with your debugging scenario.
|
||||
With this shader replacement scheme, we're able to add shader logging as unintrusive as possible.
|
||||
|
||||
Replaced shaders will need to include `debug_channel.h` from `include/shader-debug`.
|
||||
Use `glslc -I/path/to/vkd3d-proton/include/shader-debug --target-env=vulkan1.1` when compiling replaced shaders.
|
||||
```
|
||||
# Inside folder full of override shaders, build everything with:
|
||||
make -C /path/to/include/shader-debug M=$PWD
|
||||
```
|
||||
The shader can then include `#include "debug_channel.h"` and use various functions below.
|
||||
|
||||
```
|
||||
void DEBUG_CHANNEL_INIT(uvec3 ID);
|
||||
|
|
|
@ -456,13 +456,8 @@ static void cxg_mesh_create(ID3D12Device *device, float inner_radius, float oute
|
|||
float r0, r1, r2;
|
||||
float angle, da;
|
||||
|
||||
if (!(vertices = calloc(tooth_count, 12 * sizeof(*vertices))))
|
||||
return;
|
||||
if (!(faces = calloc(tooth_count, 20 * sizeof(*faces))))
|
||||
{
|
||||
free(vertices);
|
||||
return;
|
||||
}
|
||||
vertices = calloc(tooth_count, 12 * sizeof(*vertices));
|
||||
faces = calloc(tooth_count, 20 * sizeof(*faces));
|
||||
|
||||
r0 = inner_radius;
|
||||
r1 = outer_radius - tooth_depth / 2.0f;
|
||||
|
|
|
@ -27,9 +27,12 @@
|
|||
#include <stdint.h>
|
||||
#include <limits.h>
|
||||
#include <stdbool.h>
|
||||
#include <assert.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
|
@ -44,11 +47,13 @@
|
|||
|
||||
static inline uint64_t align64(uint64_t addr, uint64_t alignment)
|
||||
{
|
||||
assert(alignment > 0 && (alignment & (alignment - 1)) == 0);
|
||||
return (addr + (alignment - 1)) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
static inline size_t align(size_t addr, size_t alignment)
|
||||
{
|
||||
assert(alignment > 0 && (alignment & (alignment - 1)) == 0);
|
||||
return (addr + (alignment - 1)) & ~(alignment - 1);
|
||||
}
|
||||
|
||||
|
@ -118,8 +123,7 @@ static inline unsigned int vkd3d_bitmask_tzcnt32(uint32_t mask)
|
|||
{
|
||||
#ifdef _MSC_VER
|
||||
unsigned long result;
|
||||
_BitScanForward(&result, mask) ? result : 32;
|
||||
return result;
|
||||
return _BitScanForward(&result, mask) ? result : 32;
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
return mask ? __builtin_ctz(mask) : 32;
|
||||
#else
|
||||
|
@ -290,4 +294,36 @@ static inline void *void_ptr_offset(void *ptr, size_t offset)
|
|||
#define VKD3D_THREAD_LOCAL __thread
|
||||
#endif
|
||||
|
||||
static inline uint64_t vkd3d_get_current_time_ns(void)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER li, lf;
|
||||
uint64_t whole, part;
|
||||
QueryPerformanceCounter(&li);
|
||||
QueryPerformanceFrequency(&lf);
|
||||
whole = (li.QuadPart / lf.QuadPart) * 1000000000;
|
||||
part = ((li.QuadPart % lf.QuadPart) * 1000000000) / lf.QuadPart;
|
||||
return whole + part;
|
||||
#else
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
|
||||
return ts.tv_sec * 1000000000ll + ts.tv_nsec;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma intrinsic(__rdtsc)
|
||||
#endif
|
||||
|
||||
static inline uint64_t vkd3d_get_current_time_ticks(void)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
return __builtin_ia32_rdtsc();
|
||||
#else
|
||||
return vkd3d_get_current_time_ns();
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* __VKD3D_COMMON_H */
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
/*
|
||||
* Copyright 2022 Hans-Kristian Arntzen for Valve Corporation
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#ifndef __VKD3D_FILE_UTILS_H
|
||||
#define __VKD3D_FILE_UTILS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
struct vkd3d_memory_mapped_file
|
||||
{
|
||||
void *mapped;
|
||||
size_t mapped_size;
|
||||
};
|
||||
|
||||
/* On failure, ensures the struct is cleared to zero.
|
||||
* A reference to the file is kept through the memory mapping. */
|
||||
bool vkd3d_file_map_read_only(const char *path, struct vkd3d_memory_mapped_file *file);
|
||||
/* Clears out file on unmap. */
|
||||
void vkd3d_file_unmap(struct vkd3d_memory_mapped_file *file);
|
||||
bool vkd3d_file_rename_overwrite(const char *from_path, const char *to_path);
|
||||
bool vkd3d_file_rename_no_replace(const char *from_path, const char *to_path);
|
||||
bool vkd3d_file_delete(const char *path);
|
||||
FILE *vkd3d_file_open_exclusive_write(const char *path);
|
||||
|
||||
#endif
|
|
@ -23,6 +23,7 @@
|
|||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "vkd3d_common.h"
|
||||
#include "vkd3d_debug.h"
|
||||
|
||||
static inline void *vkd3d_malloc(size_t size)
|
||||
|
@ -57,12 +58,12 @@ static inline void vkd3d_free(void *ptr)
|
|||
bool vkd3d_array_reserve(void **elements, size_t *capacity,
|
||||
size_t element_count, size_t element_size);
|
||||
|
||||
static inline void *vkd3d_malloc_aligned(size_t size, size_t align)
|
||||
static inline void *vkd3d_malloc_aligned(size_t size, size_t alignment)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
return _aligned_malloc(size, align);
|
||||
return _aligned_malloc(size, alignment);
|
||||
#else
|
||||
return aligned_alloc(align, size);
|
||||
return aligned_alloc(alignment, align(size, alignment));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -37,6 +37,8 @@ int vkd3d_dlclose(vkd3d_module_t handle);
|
|||
|
||||
const char *vkd3d_dlerror(void);
|
||||
|
||||
bool vkd3d_get_env_var(const char *name, char *value, size_t value_size);
|
||||
|
||||
bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX]);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -21,39 +21,15 @@
|
|||
|
||||
#include "vkd3d_windows.h"
|
||||
#include "vkd3d_spinlock.h"
|
||||
#include <stdint.h>
|
||||
#include "vkd3d_common.h"
|
||||
|
||||
#ifdef VKD3D_ENABLE_PROFILING
|
||||
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#else
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
void vkd3d_init_profiling(void);
|
||||
bool vkd3d_uses_profiling(void);
|
||||
unsigned int vkd3d_profiling_register_region(const char *name, spinlock_t *lock, uint32_t *latch);
|
||||
void vkd3d_profiling_notify_work(unsigned int index, uint64_t start_ticks, uint64_t end_ticks, unsigned int iteration_count);
|
||||
|
||||
static inline uint64_t vkd3d_profiling_get_tick_count(void)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
LARGE_INTEGER li, lf;
|
||||
uint64_t whole, part;
|
||||
QueryPerformanceCounter(&li);
|
||||
QueryPerformanceFrequency(&lf);
|
||||
whole = (li.QuadPart / lf.QuadPart) * 1000000000;
|
||||
part = ((li.QuadPart % lf.QuadPart) * 1000000000) / lf.QuadPart;
|
||||
return whole + part;
|
||||
#else
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
|
||||
return ts.tv_sec * 1000000000ll + ts.tv_nsec;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define VKD3D_REGION_DECL(name) \
|
||||
static uint32_t _vkd3d_region_latch_##name; \
|
||||
static spinlock_t _vkd3d_region_lock_##name; \
|
||||
|
@ -65,12 +41,12 @@ static inline uint64_t vkd3d_profiling_get_tick_count(void)
|
|||
do { \
|
||||
if (!(_vkd3d_region_index_##name = vkd3d_atomic_uint32_load_explicit(&_vkd3d_region_latch_##name, vkd3d_memory_order_acquire))) \
|
||||
_vkd3d_region_index_##name = vkd3d_profiling_register_region(#name, &_vkd3d_region_lock_##name, &_vkd3d_region_latch_##name); \
|
||||
_vkd3d_region_begin_tick_##name = vkd3d_profiling_get_tick_count(); \
|
||||
_vkd3d_region_begin_tick_##name = vkd3d_get_current_time_ticks(); \
|
||||
} while(0)
|
||||
|
||||
#define VKD3D_REGION_END_ITERATIONS(name, iter) \
|
||||
do { \
|
||||
_vkd3d_region_end_tick_##name = vkd3d_profiling_get_tick_count(); \
|
||||
_vkd3d_region_end_tick_##name = vkd3d_get_current_time_ticks(); \
|
||||
vkd3d_profiling_notify_work(_vkd3d_region_index_##name, _vkd3d_region_begin_tick_##name, _vkd3d_region_end_tick_##name, iter); \
|
||||
} while(0)
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ WCHAR *vkd3d_dup_demangled_entry_point(const char *str);
|
|||
char *vkd3d_dup_demangled_entry_point_ascii(const char *str);
|
||||
|
||||
bool vkd3d_export_strequal(const WCHAR *a, const WCHAR *b);
|
||||
bool vkd3d_export_strequal_mixed(const WCHAR *a, const char *b);
|
||||
bool vkd3d_export_strequal_substr(const WCHAR *a, size_t n, const WCHAR *b);
|
||||
|
||||
char *vkd3d_strdup(const char *str);
|
||||
|
|
|
@ -1,35 +1,6 @@
|
|||
#ifndef __VULKAN_PRIVATE_EXTENSIONS_H__
|
||||
#define __VULKAN_PRIVATE_EXTENSIONS_H__
|
||||
|
||||
/* Temporary kludge since these types are not public. */
|
||||
|
||||
#define VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_SET_HOST_MAPPING_FEATURES_VALVE ((VkStructureType)1000420000)
|
||||
#define VK_STRUCTURE_TYPE_DESCRIPTOR_SET_BINDING_REFERENCE_VALVE ((VkStructureType)1000420001)
|
||||
#define VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_HOST_MAPPING_INFO_VALVE ((VkStructureType)1000420002)
|
||||
|
||||
#define VK_VALVE_DESCRIPTOR_SET_HOST_MAPPING_SPEC_VERSION 1
|
||||
#define VK_VALVE_DESCRIPTOR_SET_HOST_MAPPING_EXTENSION_NAME "VK_VALVE_descriptor_set_host_mapping"
|
||||
typedef struct VkPhysicalDeviceDescriptorSetHostMappingFeaturesVALVE {
|
||||
VkStructureType sType;
|
||||
void* pNext;
|
||||
VkBool32 descriptorSetHostMapping;
|
||||
} VkPhysicalDeviceDescriptorSetHostMappingFeaturesVALVE;
|
||||
|
||||
typedef struct VkDescriptorSetBindingReferenceVALVE {
|
||||
VkStructureType sType;
|
||||
const void* pNext;
|
||||
VkDescriptorSetLayout descriptorSetLayout;
|
||||
uint32_t binding;
|
||||
} VkDescriptorSetBindingReferenceVALVE;
|
||||
|
||||
typedef struct VkDescriptorSetLayoutHostMappingInfoVALVE {
|
||||
VkStructureType sType;
|
||||
void* pNext;
|
||||
size_t descriptorOffset;
|
||||
uint32_t descriptorSize;
|
||||
} VkDescriptorSetLayoutHostMappingInfoVALVE;
|
||||
|
||||
typedef void (VKAPI_PTR *PFN_vkGetDescriptorSetLayoutHostMappingInfoVALVE)(VkDevice device, const VkDescriptorSetBindingReferenceVALVE* pBindingReference, VkDescriptorSetLayoutHostMappingInfoVALVE* pHostMapping);
|
||||
typedef void (VKAPI_PTR *PFN_vkGetDescriptorSetHostMappingVALVE)(VkDevice device, VkDescriptorSet descriptorSet, void** ppData);
|
||||
/* Nothing here at the moment. Add hacks here! */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,71 @@
|
|||
INCLUDE_DIR := $(CURDIR)
|
||||
|
||||
VERT_SOURCES := $(wildcard $(M)/*.vert)
|
||||
FRAG_SOURCES := $(wildcard $(M)/*.frag)
|
||||
COMP_SOURCES := $(wildcard $(M)/*.comp)
|
||||
TESC_SOURCES := $(wildcard $(M)/*.tesc)
|
||||
TESE_SOURCES := $(wildcard $(M)/*.tese)
|
||||
GEOM_SOURCES := $(wildcard $(M)/*.geom)
|
||||
RGEN_SOURCES := $(wildcard $(M)/*.rgen)
|
||||
RINT_SOURCES := $(wildcard $(M)/*.rint)
|
||||
RAHIT_SOURCES := $(wildcard $(M)/*.rahit)
|
||||
RCHIT_SOURCES := $(wildcard $(M)/*.rchit)
|
||||
RMISS_SOURCES := $(wildcard $(M)/*.rmiss)
|
||||
RCALL_SOURCES := $(wildcard $(M)/*.rcall)
|
||||
|
||||
SPV_OBJECTS := \
|
||||
$(VERT_SOURCES:.vert=.spv) \
|
||||
$(FRAG_SOURCES:.frag=.spv) \
|
||||
$(COMP_SOURCES:.comp=.spv) \
|
||||
$(TESC_SOURCES:.tesc=.spv) \
|
||||
$(TESE_SOURCES:.tese=.spv) \
|
||||
$(GEOM_SOURCES:.geom=.spv) \
|
||||
$(RGEN_SOURCES:.rgen=.spv) \
|
||||
$(RINT_SOURCES:.rint=.spv) \
|
||||
$(RAHIT_SOURCES:.rahit=.spv) \
|
||||
$(RCHIT_SOURCES:.rchit=.spv) \
|
||||
$(RMISS_SOURCES:.rmiss=.spv) \
|
||||
$(RCALL_SOURCES:.rcall=.spv)
|
||||
|
||||
%.spv: %.vert
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.frag
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 -DDEBUG_CHANNEL_HELPER_LANES $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.comp
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.geom
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.tesc
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.tese
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.rgen
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.rint
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.rahit
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.rchit
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.rmiss
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
|
||||
|
||||
%.spv: %.rcall
|
||||
glslc -o $@ $< -I$(INCLUDE_DIR) --target-env=vulkan1.1 --target-spv=spv1.4 $(GLSLC_FLAGS)
|
||||
|
||||
all: $(SPV_OBJECTS)
|
||||
|
||||
clean:
|
||||
rm -f $(SPV_OBJECTS)
|
||||
|
||||
.PHONY: clean
|
|
@ -23,14 +23,17 @@
|
|||
#extension GL_ARB_gpu_shader_int64 : require
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#ifdef DEBUG_CHANNEL_HELPER_LANES
|
||||
#extension GL_EXT_demote_to_helper_invocation : require
|
||||
#endif
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) buffer ControlBlock
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) coherent buffer ControlBlock
|
||||
{
|
||||
uint message_counter;
|
||||
uint instance_counter;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) buffer RingBuffer
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) coherent buffer RingBuffer
|
||||
{
|
||||
uint data[];
|
||||
};
|
||||
|
@ -48,24 +51,73 @@ const uint DEBUG_CHANNEL_FMT_F32 = 2;
|
|||
const uint DEBUG_CHANNEL_FMT_HEX_ALL = DEBUG_CHANNEL_FMT_HEX * 0x55555555u;
|
||||
const uint DEBUG_CHANNEL_FMT_I32_ALL = DEBUG_CHANNEL_FMT_I32 * 0x55555555u;
|
||||
const uint DEBUG_CHANNEL_FMT_F32_ALL = DEBUG_CHANNEL_FMT_F32 * 0x55555555u;
|
||||
const uint DEBUG_CHANNEL_WORD_COOKIE = 0xdeadca70u; /* Let host fish for this cookie in device lost scenarios. */
|
||||
|
||||
uint DEBUG_CHANNEL_INSTANCE_COUNTER;
|
||||
uvec3 DEBUG_CHANNEL_ID;
|
||||
|
||||
/* Need to make sure the elected subgroup can have side effects. */
|
||||
#ifdef DEBUG_CHANNEL_HELPER_LANES
|
||||
bool DEBUG_CHANNEL_ELECT()
|
||||
{
|
||||
bool elected = false;
|
||||
if (!helperInvocationEXT())
|
||||
elected = subgroupElect();
|
||||
return elected;
|
||||
}
|
||||
#else
|
||||
bool DEBUG_CHANNEL_ELECT()
|
||||
{
|
||||
return subgroupElect();
|
||||
}
|
||||
#endif
|
||||
|
||||
void DEBUG_CHANNEL_INIT(uvec3 id)
|
||||
{
|
||||
if (!DEBUG_SHADER_RING_ACTIVE)
|
||||
return;
|
||||
DEBUG_CHANNEL_ID = id;
|
||||
uint inst;
|
||||
if (subgroupElect())
|
||||
#ifdef DEBUG_CHANNEL_HELPER_LANES
|
||||
if (!helperInvocationEXT())
|
||||
{
|
||||
/* Elect and broadcast must happen without helper lanes here.
|
||||
* We must perform the instance increment with side effects,
|
||||
* and broadcast first must pick the elected lane. */
|
||||
if (subgroupElect())
|
||||
inst = atomicAdd(ControlBlock(DEBUG_SHADER_ATOMIC_BDA).instance_counter, 1u);
|
||||
DEBUG_CHANNEL_INSTANCE_COUNTER = subgroupBroadcastFirst(inst);
|
||||
}
|
||||
/* Helper lanes cannot write debug messages, since they cannot have side effects.
|
||||
* Leave it undefined, and we should ensure SGPR propagation either way ... */
|
||||
#else
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
inst = atomicAdd(ControlBlock(DEBUG_SHADER_ATOMIC_BDA).instance_counter, 1u);
|
||||
DEBUG_CHANNEL_INSTANCE_COUNTER = subgroupBroadcastFirst(inst);
|
||||
#endif
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_WRITE_HEADER(RingBuffer buf, uint offset, uint num_words, uint fmt)
|
||||
void DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3 id, uint inst)
|
||||
{
|
||||
if (!DEBUG_SHADER_RING_ACTIVE)
|
||||
return;
|
||||
DEBUG_CHANNEL_ID = id;
|
||||
DEBUG_CHANNEL_INSTANCE_COUNTER = inst;
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_UNLOCK_MESSAGE(RingBuffer buf, uint offset, uint num_words)
|
||||
{
|
||||
memoryBarrierBuffer();
|
||||
/* Make sure this word is made visible last. This way the ring thread can avoid reading bogus messages.
|
||||
* If the host thread observed a num_word of 0, we know a message was allocated, but we don't necessarily
|
||||
* have a complete write yet.
|
||||
* In a device lost scenario, we can try to fish for valid messages. */
|
||||
buf.data[(offset + 0) & DEBUG_SHADER_RING_MASK] = num_words | DEBUG_CHANNEL_WORD_COOKIE;
|
||||
memoryBarrierBuffer();
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_WRITE_HEADER(RingBuffer buf, uint offset, uint fmt)
|
||||
{
|
||||
buf.data[(offset + 0) & DEBUG_SHADER_RING_MASK] = num_words;
|
||||
buf.data[(offset + 1) & DEBUG_SHADER_RING_MASK] = uint(DEBUG_SHADER_HASH);
|
||||
buf.data[(offset + 2) & DEBUG_SHADER_RING_MASK] = uint(DEBUG_SHADER_HASH >> 32);
|
||||
buf.data[(offset + 3) & DEBUG_SHADER_RING_MASK] = DEBUG_CHANNEL_INSTANCE_COUNTER;
|
||||
|
@ -87,7 +139,9 @@ void DEBUG_CHANNEL_MSG_()
|
|||
return;
|
||||
uint words = 8;
|
||||
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(RingBuffer(DEBUG_SHADER_RING_BDA), offset, words, 0);
|
||||
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, 0);
|
||||
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_(uint fmt, uint v0)
|
||||
|
@ -97,8 +151,9 @@ void DEBUG_CHANNEL_MSG_(uint fmt, uint v0)
|
|||
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
|
||||
uint words = 9;
|
||||
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, words, fmt);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
|
||||
buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
|
||||
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1)
|
||||
|
@ -108,9 +163,10 @@ void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1)
|
|||
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
|
||||
uint words = 10;
|
||||
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, words, fmt);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
|
||||
buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
|
||||
buf.data[(offset + 9) & DEBUG_SHADER_RING_MASK] = v1;
|
||||
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2)
|
||||
|
@ -120,10 +176,11 @@ void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2)
|
|||
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
|
||||
uint words = 11;
|
||||
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, words, fmt);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
|
||||
buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
|
||||
buf.data[(offset + 9) & DEBUG_SHADER_RING_MASK] = v1;
|
||||
buf.data[(offset + 10) & DEBUG_SHADER_RING_MASK] = v2;
|
||||
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2, uint v3)
|
||||
|
@ -133,11 +190,12 @@ void DEBUG_CHANNEL_MSG_(uint fmt, uint v0, uint v1, uint v2, uint v3)
|
|||
RingBuffer buf = RingBuffer(DEBUG_SHADER_RING_BDA);
|
||||
uint words = 12;
|
||||
uint offset = DEBUG_CHANNEL_ALLOCATE(words);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, words, fmt);
|
||||
DEBUG_CHANNEL_WRITE_HEADER(buf, offset, fmt);
|
||||
buf.data[(offset + 8) & DEBUG_SHADER_RING_MASK] = v0;
|
||||
buf.data[(offset + 9) & DEBUG_SHADER_RING_MASK] = v1;
|
||||
buf.data[(offset + 10) & DEBUG_SHADER_RING_MASK] = v2;
|
||||
buf.data[(offset + 11) & DEBUG_SHADER_RING_MASK] = v3;
|
||||
DEBUG_CHANNEL_UNLOCK_MESSAGE(buf, offset, words);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG()
|
||||
|
@ -205,4 +263,76 @@ void DEBUG_CHANNEL_MSG(float v0, float v1, float v2, float v3)
|
|||
DEBUG_CHANNEL_MSG_(DEBUG_CHANNEL_FMT_F32_ALL, floatBitsToUint(v0), floatBitsToUint(v1), floatBitsToUint(v2), floatBitsToUint(v3));
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(uint v0)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(uint v0, uint v1)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(uint v0, uint v1, uint v2)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1, v2);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(uint v0, uint v1, uint v2, uint v3)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(int v0)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(int v0, int v1)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(int v0, int v1, int v2)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1, v2);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(int v0, int v1, int v2, int v3)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(float v0)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(float v0, float v1)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(float v0, float v1, float v2)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1, v2);
|
||||
}
|
||||
|
||||
void DEBUG_CHANNEL_MSG_UNIFORM(float v0, float v1, float v2, float v3)
|
||||
{
|
||||
if (DEBUG_CHANNEL_ELECT())
|
||||
DEBUG_CHANNEL_MSG(v0, v1, v2, v3);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -59,35 +59,39 @@
|
|||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
enum vkd3d_config_flags
|
||||
{
|
||||
VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001,
|
||||
VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS = 0x00000002,
|
||||
VKD3D_CONFIG_FLAG_DEBUG_UTILS = 0x00000004,
|
||||
VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV = 0x00000008,
|
||||
VKD3D_CONFIG_FLAG_DXR = 0x00000010,
|
||||
VKD3D_CONFIG_FLAG_SINGLE_QUEUE = 0x00000020,
|
||||
VKD3D_CONFIG_FLAG_DESCRIPTOR_QA_CHECKS = 0x00000040,
|
||||
VKD3D_CONFIG_FLAG_FORCE_RTV_EXCLUSIVE_QUEUE = 0x00000080,
|
||||
VKD3D_CONFIG_FLAG_FORCE_DSV_EXCLUSIVE_QUEUE = 0x00000100,
|
||||
VKD3D_CONFIG_FLAG_FORCE_MINIMUM_SUBGROUP_SIZE = 0x00000200,
|
||||
VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV = 0x00000400,
|
||||
VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET = 0x00000800,
|
||||
VKD3D_CONFIG_FLAG_IGNORE_RTV_HOST_VISIBLE = 0x00001000,
|
||||
VKD3D_CONFIG_FLAG_FORCE_HOST_CACHED = 0x00002000,
|
||||
VKD3D_CONFIG_FLAG_DXR11 = 0x00004000,
|
||||
VKD3D_CONFIG_FLAG_FORCE_NO_INVARIANT_POSITION = 0x00008000,
|
||||
VKD3D_CONFIG_FLAG_WORKAROUND_MISSING_COLOR_COMPUTE_BARRIERS = 0x00010000,
|
||||
VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE = 0x00020000,
|
||||
VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_NO_SERIALIZE_SPIRV = 0x00040000,
|
||||
VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_SANITIZE_SPIRV = 0x00080000,
|
||||
VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG = 0x00100000,
|
||||
VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_SPIRV = 0x00200000,
|
||||
VKD3D_CONFIG_FLAG_MUTABLE_SINGLE_SET = 0x00400000,
|
||||
VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR = 0x00800000,
|
||||
VKD3D_CONFIG_FLAG_RECYCLE_COMMAND_POOLS = 0x01000000,
|
||||
VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_MISMATCH_DRIVER = 0x02000000,
|
||||
};
|
||||
#define VKD3D_CONFIG_FLAG_VULKAN_DEBUG (1ull << 0)
|
||||
#define VKD3D_CONFIG_FLAG_SKIP_APPLICATION_WORKAROUNDS (1ull << 1)
|
||||
#define VKD3D_CONFIG_FLAG_DEBUG_UTILS (1ull << 2)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_STATIC_CBV (1ull << 3)
|
||||
#define VKD3D_CONFIG_FLAG_DXR (1ull << 4)
|
||||
#define VKD3D_CONFIG_FLAG_SINGLE_QUEUE (1ull << 5)
|
||||
#define VKD3D_CONFIG_FLAG_DESCRIPTOR_QA_CHECKS (1ull << 6)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_RTV_EXCLUSIVE_QUEUE (1ull << 7)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_DSV_EXCLUSIVE_QUEUE (1ull << 8)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_MINIMUM_SUBGROUP_SIZE (1ull << 9)
|
||||
#define VKD3D_CONFIG_FLAG_NO_UPLOAD_HVV (1ull << 10)
|
||||
#define VKD3D_CONFIG_FLAG_LOG_MEMORY_BUDGET (1ull << 11)
|
||||
#define VKD3D_CONFIG_FLAG_IGNORE_RTV_HOST_VISIBLE (1ull << 12)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_HOST_CACHED (1ull << 13)
|
||||
#define VKD3D_CONFIG_FLAG_DXR11 (1ull << 14)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_NO_INVARIANT_POSITION (1ull << 15)
|
||||
#define VKD3D_CONFIG_FLAG_GLOBAL_PIPELINE_CACHE (1ull << 16)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_NO_SERIALIZE_SPIRV (1ull << 17)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_SANITIZE_SPIRV (1ull << 18)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_LOG (1ull << 19)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_SPIRV (1ull << 20)
|
||||
#define VKD3D_CONFIG_FLAG_MUTABLE_SINGLE_SET (1ull << 21)
|
||||
#define VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR (1ull << 22)
|
||||
#define VKD3D_CONFIG_FLAG_RECYCLE_COMMAND_POOLS (1ull << 23)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_IGNORE_MISMATCH_DRIVER (1ull << 24)
|
||||
#define VKD3D_CONFIG_FLAG_BREADCRUMBS (1ull << 25)
|
||||
#define VKD3D_CONFIG_FLAG_PIPELINE_LIBRARY_APP_CACHE_ONLY (1ull << 26)
|
||||
#define VKD3D_CONFIG_FLAG_SHADER_CACHE_SYNC (1ull << 27)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_RAW_VA_CBV (1ull << 28)
|
||||
#define VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV (1ull << 29)
|
||||
#define VKD3D_CONFIG_FLAG_ALLOW_SBT_COLLECTION (1ull << 30)
|
||||
#define VKD3D_CONFIG_FLAG_FORCE_NATIVE_FP16 (1ull << 31)
|
||||
#define VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK (1ull << 32)
|
||||
|
||||
typedef HRESULT (*PFN_vkd3d_signal_event)(HANDLE event);
|
||||
|
||||
|
|
|
@ -3644,8 +3644,8 @@ interface ID3D12CommandQueue : ID3D12Pageable
|
|||
ID3D12Heap *heap,
|
||||
UINT range_count,
|
||||
const D3D12_TILE_RANGE_FLAGS *range_flags,
|
||||
UINT *heap_range_offsets,
|
||||
UINT *range_tile_counts,
|
||||
const UINT *heap_range_offsets,
|
||||
const UINT *range_tile_counts,
|
||||
D3D12_TILE_MAPPING_FLAGS flags);
|
||||
|
||||
void CopyTileMappings(ID3D12Resource *dst_resource,
|
||||
|
|
|
@ -241,6 +241,7 @@ struct vkd3d_shader_root_constant
|
|||
struct vkd3d_shader_root_descriptor
|
||||
{
|
||||
struct vkd3d_shader_resource_binding *binding;
|
||||
uint32_t raw_va_root_descriptor_index;
|
||||
};
|
||||
|
||||
struct vkd3d_shader_root_parameter
|
||||
|
@ -308,6 +309,9 @@ enum vkd3d_shader_target_extension
|
|||
* all in range, or all out of range. We can implement structured buffer vectorization of vec3,
|
||||
* but not byte address buffer. */
|
||||
VKD3D_SHADER_TARGET_EXTENSION_ASSUME_PER_COMPONENT_SSBO_ROBUSTNESS,
|
||||
VKD3D_SHADER_TARGET_EXTENSION_BARYCENTRIC_KHR,
|
||||
VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT,
|
||||
VKD3D_SHADER_TARGET_EXTENSION_COUNT,
|
||||
};
|
||||
|
||||
enum vkd3d_shader_quirk
|
||||
|
@ -656,6 +660,7 @@ struct vkd3d_shader_scan_info
|
|||
bool has_side_effects;
|
||||
bool needs_late_zs;
|
||||
bool discards;
|
||||
bool has_uav_counter;
|
||||
unsigned int patch_vertex_count;
|
||||
};
|
||||
|
||||
|
@ -749,7 +754,11 @@ int vkd3d_shader_compile_dxbc(const struct vkd3d_shader_code *dxbc,
|
|||
void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *code);
|
||||
|
||||
int vkd3d_shader_parse_root_signature(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *root_signature);
|
||||
struct vkd3d_versioned_root_signature_desc *root_signature,
|
||||
vkd3d_shader_hash_t *compatibility_hash);
|
||||
int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_size,
|
||||
struct vkd3d_versioned_root_signature_desc *desc,
|
||||
vkd3d_shader_hash_t *compatibility_hash);
|
||||
void vkd3d_shader_free_root_signature(struct vkd3d_versioned_root_signature_desc *root_signature);
|
||||
|
||||
/* FIXME: Add support for returning error messages (ID3DBlob). */
|
||||
|
@ -775,19 +784,65 @@ void vkd3d_shader_free_shader_signature(struct vkd3d_shader_signature *signature
|
|||
struct vkd3d_shader_library_entry_point
|
||||
{
|
||||
unsigned int identifier;
|
||||
VkShaderStageFlagBits stage;
|
||||
WCHAR *mangled_entry_point;
|
||||
WCHAR *plain_entry_point;
|
||||
char *real_entry_point;
|
||||
VkShaderStageFlagBits stage;
|
||||
};
|
||||
|
||||
int vkd3d_shader_dxil_append_library_entry_points(
|
||||
enum vkd3d_shader_subobject_kind
|
||||
{
|
||||
/* Matches DXIL for simplicity. */
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_STATE_OBJECT_CONFIG = 0,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE = 1,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_LOCAL_ROOT_SIGNATURE = 2,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION = 8,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_SHADER_CONFIG = 9,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG = 10,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_HIT_GROUP = 11,
|
||||
VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1 = 12,
|
||||
};
|
||||
|
||||
struct vkd3d_shader_library_subobject
|
||||
{
|
||||
enum vkd3d_shader_subobject_kind kind;
|
||||
unsigned int dxil_identifier;
|
||||
|
||||
/* All const pointers here point directly to the DXBC blob,
|
||||
* so they do not need to be freed.
|
||||
* Fortunately for us, the C strings are zero-terminated in the blob itself. */
|
||||
|
||||
/* In the blob, ASCII is used as identifier, where API uses wide strings, sigh ... */
|
||||
const char *name;
|
||||
|
||||
union
|
||||
{
|
||||
D3D12_RAYTRACING_PIPELINE_CONFIG1 pipeline_config;
|
||||
D3D12_RAYTRACING_SHADER_CONFIG shader_config;
|
||||
D3D12_STATE_OBJECT_CONFIG object_config;
|
||||
|
||||
/* Duped strings because API wants wide strings for no good reason. */
|
||||
D3D12_HIT_GROUP_DESC hit_group;
|
||||
D3D12_DXIL_SUBOBJECT_TO_EXPORTS_ASSOCIATION association;
|
||||
|
||||
struct
|
||||
{
|
||||
const void *data;
|
||||
size_t size;
|
||||
} payload;
|
||||
} data;
|
||||
};
|
||||
|
||||
int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
|
||||
const D3D12_DXIL_LIBRARY_DESC *library_desc,
|
||||
unsigned int identifier,
|
||||
struct vkd3d_shader_library_entry_point **entry_points,
|
||||
size_t *entry_point_size, size_t *entry_point_count);
|
||||
size_t *entry_point_size, size_t *entry_point_count,
|
||||
struct vkd3d_shader_library_subobject **subobjects,
|
||||
size_t *subobjects_size, size_t *subobjects_count);
|
||||
|
||||
void vkd3d_shader_dxil_free_library_entry_points(struct vkd3d_shader_library_entry_point *entry_points, size_t count);
|
||||
void vkd3d_shader_dxil_free_library_subobjects(struct vkd3d_shader_library_subobject *subobjects, size_t count);
|
||||
|
||||
int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
|
||||
const char *export,
|
||||
|
@ -813,7 +868,8 @@ typedef int (*PFN_vkd3d_shader_compile_dxbc)(const struct vkd3d_shader_code *dxb
|
|||
typedef void (*PFN_vkd3d_shader_free_shader_code)(struct vkd3d_shader_code *code);
|
||||
|
||||
typedef int (*PFN_vkd3d_shader_parse_root_signature)(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *root_signature);
|
||||
struct vkd3d_versioned_root_signature_desc *root_signature,
|
||||
vkd3d_shader_hash_t *compatibility_hash);
|
||||
typedef void (*PFN_vkd3d_shader_free_root_signature)(struct vkd3d_versioned_root_signature_desc *root_signature);
|
||||
|
||||
typedef int (*PFN_vkd3d_shader_serialize_root_signature)(
|
||||
|
|
|
@ -88,6 +88,9 @@ typedef void *HANDLE;
|
|||
|
||||
typedef const WCHAR* LPCWSTR;
|
||||
|
||||
#define _fseeki64(a, b, c) fseeko64(a, b, c)
|
||||
#define _ftelli64(a) ftello64(a)
|
||||
|
||||
/* GUID */
|
||||
# ifdef __WIDL__
|
||||
typedef struct
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
#include "vkd3d_debug.h"
|
||||
#include "vkd3d_threads.h"
|
||||
|
||||
#include "vkd3d_platform.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <errno.h>
|
||||
|
@ -58,13 +60,13 @@ static FILE *vkd3d_log_file;
|
|||
|
||||
static void vkd3d_dbg_init_once(void)
|
||||
{
|
||||
const char *vkd3d_debug;
|
||||
char vkd3d_debug[VKD3D_PATH_MAX];
|
||||
unsigned int channel, i;
|
||||
|
||||
for (channel = 0; channel < VKD3D_DBG_CHANNEL_COUNT; channel++)
|
||||
{
|
||||
if (!(vkd3d_debug = getenv(env_for_channel[channel])))
|
||||
vkd3d_debug = "";
|
||||
if (!vkd3d_get_env_var(env_for_channel[channel], vkd3d_debug, sizeof(vkd3d_debug)))
|
||||
strncpy(vkd3d_debug, "", VKD3D_PATH_MAX);
|
||||
|
||||
for (i = 1; i < ARRAY_SIZE(debug_level_names); ++i)
|
||||
if (!strcmp(debug_level_names[i], vkd3d_debug))
|
||||
|
@ -75,7 +77,7 @@ static void vkd3d_dbg_init_once(void)
|
|||
vkd3d_dbg_level[channel] = VKD3D_DBG_LEVEL_FIXME;
|
||||
}
|
||||
|
||||
if ((vkd3d_debug = getenv("VKD3D_LOG_FILE")))
|
||||
if (vkd3d_get_env_var("VKD3D_LOG_FILE", vkd3d_debug, sizeof(vkd3d_debug)))
|
||||
{
|
||||
vkd3d_log_file = fopen(vkd3d_debug, "w");
|
||||
if (!vkd3d_log_file)
|
||||
|
@ -281,11 +283,11 @@ const char *debugstr_w(const WCHAR *wstr)
|
|||
|
||||
unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value)
|
||||
{
|
||||
const char *value = getenv(name);
|
||||
char value[VKD3D_PATH_MAX];
|
||||
unsigned long r;
|
||||
char *end_ptr;
|
||||
|
||||
if (value)
|
||||
if (vkd3d_get_env_var(name, value, sizeof(value)) && strlen(value) > 0)
|
||||
{
|
||||
errno = 0;
|
||||
r = strtoul(value, &end_ptr, 0);
|
||||
|
|
|
@ -0,0 +1,188 @@
|
|||
/*
|
||||
* Copyright 2022 Hans-Kristian Arntzen for Valve Corporation
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
|
||||
#include "vkd3d_file_utils.h"
|
||||
#include "vkd3d_debug.h"
|
||||
|
||||
/* For disk cache. */
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#include <io.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
#endif
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <stdio.h>
|
||||
|
||||
bool vkd3d_file_rename_overwrite(const char *from_path, const char *to_path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
DWORD code = ERROR_SUCCESS;
|
||||
|
||||
if (!MoveFileA(from_path, to_path))
|
||||
{
|
||||
code = GetLastError();
|
||||
if (code == ERROR_ALREADY_EXISTS)
|
||||
{
|
||||
code = ERROR_SUCCESS;
|
||||
if (!ReplaceFileA(to_path, from_path, NULL, 0, NULL, NULL))
|
||||
code = GetLastError();
|
||||
}
|
||||
}
|
||||
|
||||
return code == ERROR_SUCCESS;
|
||||
#else
|
||||
return rename(from_path, to_path) == 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool vkd3d_file_rename_no_replace(const char *from_path, const char *to_path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
DWORD code = ERROR_SUCCESS;
|
||||
if (!MoveFileA(from_path, to_path))
|
||||
code = GetLastError();
|
||||
return code == ERROR_SUCCESS;
|
||||
#else
|
||||
return renameat2(AT_FDCWD, from_path, AT_FDCWD, to_path, RENAME_NOREPLACE) == 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
bool vkd3d_file_delete(const char *path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
DWORD code = ERROR_SUCCESS;
|
||||
if (!DeleteFileA(path))
|
||||
code = GetLastError();
|
||||
return code == ERROR_SUCCESS;
|
||||
#else
|
||||
return unlink(path) == 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
FILE *vkd3d_file_open_exclusive_write(const char *path)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
/* From Fossilize. AFAIK, there is no direct way to make this work with FILE interface, so have to roundtrip
|
||||
* through jank POSIX layer.
|
||||
* wbx kinda works, but Wine warns about it, despite it working anyways.
|
||||
* Older MSVC runtimes do not support wbx. */
|
||||
FILE *file = NULL;
|
||||
int fd;
|
||||
fd = _open(path, _O_BINARY | _O_WRONLY | _O_CREAT | _O_EXCL | _O_TRUNC | _O_SEQUENTIAL,
|
||||
_S_IWRITE | _S_IREAD);
|
||||
if (fd >= 0)
|
||||
{
|
||||
file = _fdopen(fd, "wb");
|
||||
/* _fdopen takes ownership. */
|
||||
if (!file)
|
||||
_close(fd);
|
||||
}
|
||||
return file;
|
||||
#else
|
||||
return fopen(path, "wbx");
|
||||
#endif
|
||||
}
|
||||
|
||||
void vkd3d_file_unmap(struct vkd3d_memory_mapped_file *file)
|
||||
{
|
||||
if (file->mapped)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
UnmapViewOfFile(file->mapped);
|
||||
#else
|
||||
munmap(file->mapped, file->mapped_size);
|
||||
#endif
|
||||
}
|
||||
memset(file, 0, sizeof(*file));
|
||||
}
|
||||
|
||||
bool vkd3d_file_map_read_only(const char *path, struct vkd3d_memory_mapped_file *file)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
DWORD size_hi, size_lo;
|
||||
HANDLE file_mapping;
|
||||
HANDLE handle;
|
||||
#else
|
||||
struct stat stat_buf;
|
||||
int fd;
|
||||
#endif
|
||||
|
||||
file->mapped = NULL;
|
||||
file->mapped_size = 0;
|
||||
|
||||
#ifdef _WIN32
|
||||
handle = CreateFileA(path, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_DELETE, NULL,
|
||||
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN,
|
||||
INVALID_HANDLE_VALUE);
|
||||
if (handle == INVALID_HANDLE_VALUE)
|
||||
goto out;
|
||||
|
||||
size_lo = GetFileSize(handle, &size_hi);
|
||||
file->mapped_size = size_lo | (((uint64_t)size_hi) << 32);
|
||||
|
||||
file_mapping = CreateFileMappingA(handle, NULL, PAGE_READONLY, 0, 0, NULL);
|
||||
if (file_mapping == INVALID_HANDLE_VALUE)
|
||||
goto out;
|
||||
|
||||
file->mapped = MapViewOfFile(file_mapping, FILE_MAP_READ, 0, 0, file->mapped_size);
|
||||
CloseHandle(file_mapping);
|
||||
file_mapping = INVALID_HANDLE_VALUE;
|
||||
if (!file->mapped)
|
||||
{
|
||||
ERR("Failed to MapViewOfFile for %s.\n", path);
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
if (handle != INVALID_HANDLE_VALUE)
|
||||
CloseHandle(handle);
|
||||
#else
|
||||
fd = open(path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
goto out;
|
||||
|
||||
if (fstat(fd, &stat_buf) < 0)
|
||||
{
|
||||
ERR("Failed to fstat pipeline cache.\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Map private to make sure we get CoW behavior in case someone clobbers
|
||||
* the cache while in flight. We need to read data directly out of the cache. */
|
||||
file->mapped = mmap(NULL, stat_buf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
|
||||
if (file->mapped != MAP_FAILED)
|
||||
file->mapped_size = stat_buf.st_size;
|
||||
else
|
||||
goto out;
|
||||
|
||||
out:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
#endif
|
||||
|
||||
if (!file->mapped)
|
||||
file->mapped_size = 0;
|
||||
return file->mapped != NULL;
|
||||
}
|
|
@ -4,6 +4,8 @@ vkd3d_common_src = [
|
|||
'utf8.c',
|
||||
'profiling.c',
|
||||
'string.c',
|
||||
'file_utils.c',
|
||||
'platform.c',
|
||||
]
|
||||
|
||||
vkd3d_common_lib = static_library('vkd3d_common', vkd3d_common_src, vkd3d_header_files,
|
||||
|
|
|
@ -18,6 +18,9 @@
|
|||
|
||||
#include "vkd3d_platform.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(__linux__)
|
||||
|
||||
# include <dlfcn.h>
|
||||
|
@ -153,3 +156,43 @@ bool vkd3d_get_program_name(char program_name[VKD3D_PATH_MAX])
|
|||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32)
|
||||
|
||||
bool vkd3d_get_env_var(const char *name, char *value, size_t value_size)
|
||||
{
|
||||
DWORD len;
|
||||
|
||||
assert(value);
|
||||
assert(value_size > 0);
|
||||
|
||||
len = GetEnvironmentVariableA(name, value, value_size);
|
||||
if (len > 0 && len <= value_size)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
value[0] = '\0';
|
||||
return false;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
bool vkd3d_get_env_var(const char *name, char *value, size_t value_size)
|
||||
{
|
||||
const char *env_value;
|
||||
|
||||
assert(value);
|
||||
assert(value_size > 0);
|
||||
|
||||
if ((env_value = getenv(name)))
|
||||
{
|
||||
snprintf(value, value_size, "%s", env_value);
|
||||
return true;
|
||||
}
|
||||
|
||||
value[0] = '\0';
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
|
@ -21,6 +21,7 @@
|
|||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
|
||||
#include "vkd3d_profiling.h"
|
||||
#include "vkd3d_platform.h"
|
||||
#include "vkd3d_threads.h"
|
||||
#include "vkd3d_debug.h"
|
||||
#include <stdlib.h>
|
||||
|
@ -124,8 +125,10 @@ static void vkd3d_init_profiling_path(const char *path)
|
|||
|
||||
static void vkd3d_init_profiling_once(void)
|
||||
{
|
||||
const char *path = getenv("VKD3D_PROFILE_PATH");
|
||||
if (path)
|
||||
char path[VKD3D_PATH_MAX];
|
||||
|
||||
vkd3d_get_env_var("VKD3D_PROFILE_PATH", path, sizeof(path));
|
||||
if (strlen(path) > 0)
|
||||
vkd3d_init_profiling_path(path);
|
||||
}
|
||||
|
||||
|
|
|
@ -82,6 +82,21 @@ bool vkd3d_export_strequal(const WCHAR *a, const WCHAR *b)
|
|||
return *a == *b;
|
||||
}
|
||||
|
||||
bool vkd3d_export_strequal_mixed(const WCHAR *a, const char *b)
|
||||
{
|
||||
if (!a || !b)
|
||||
return false;
|
||||
|
||||
while (*a != '\0' && *b != '\0')
|
||||
{
|
||||
if (*a != *b)
|
||||
return false;
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
return *a == *b;
|
||||
}
|
||||
|
||||
bool vkd3d_export_strequal_substr(const WCHAR *a, size_t expected_n, const WCHAR *b)
|
||||
{
|
||||
size_t n = 0;
|
||||
|
|
|
@ -2755,8 +2755,9 @@ static int shader_parse_static_samplers(struct root_signature_parser_context *co
|
|||
return VKD3D_OK;
|
||||
}
|
||||
|
||||
static int shader_parse_root_signature(const char *data, unsigned int data_size,
|
||||
struct vkd3d_versioned_root_signature_desc *desc)
|
||||
int vkd3d_shader_parse_root_signature_raw(const char *data, unsigned int data_size,
|
||||
struct vkd3d_versioned_root_signature_desc *desc,
|
||||
vkd3d_shader_hash_t *compatibility_hash)
|
||||
{
|
||||
struct vkd3d_root_signature_desc *v_1_0 = &desc->v_1_0;
|
||||
struct root_signature_parser_context context;
|
||||
|
@ -2764,6 +2765,8 @@ static int shader_parse_root_signature(const char *data, unsigned int data_size,
|
|||
const char *ptr = data;
|
||||
int ret;
|
||||
|
||||
memset(desc, 0, sizeof(*desc));
|
||||
|
||||
context.data = data;
|
||||
context.data_size = data_size;
|
||||
|
||||
|
@ -2835,28 +2838,46 @@ static int shader_parse_root_signature(const char *data, unsigned int data_size,
|
|||
read_uint32(&ptr, &v_1_0->flags);
|
||||
TRACE("Flags %#x.\n", v_1_0->flags);
|
||||
|
||||
if (compatibility_hash)
|
||||
{
|
||||
struct vkd3d_shader_code code = { data, data_size };
|
||||
*compatibility_hash = vkd3d_shader_hash(&code);
|
||||
}
|
||||
|
||||
return VKD3D_OK;
|
||||
}
|
||||
|
||||
static int rts0_handler(const char *data, DWORD data_size, DWORD tag, void *context)
|
||||
{
|
||||
struct vkd3d_versioned_root_signature_desc *desc = context;
|
||||
struct vkd3d_shader_code *payload = context;
|
||||
|
||||
if (tag != TAG_RTS0)
|
||||
return VKD3D_OK;
|
||||
|
||||
return shader_parse_root_signature(data, data_size, desc);
|
||||
payload->code = data;
|
||||
payload->size = data_size;
|
||||
return VKD3D_OK;
|
||||
}
|
||||
|
||||
int vkd3d_shader_parse_root_signature(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *root_signature)
|
||||
struct vkd3d_versioned_root_signature_desc *root_signature,
|
||||
vkd3d_shader_hash_t *compatibility_hash)
|
||||
{
|
||||
struct vkd3d_shader_code raw_payload;
|
||||
int ret;
|
||||
|
||||
TRACE("dxbc {%p, %zu}, root_signature %p.\n", dxbc->code, dxbc->size, root_signature);
|
||||
|
||||
memset(root_signature, 0, sizeof(*root_signature));
|
||||
if ((ret = parse_dxbc(dxbc->code, dxbc->size, rts0_handler, root_signature)) < 0)
|
||||
memset(&raw_payload, 0, sizeof(raw_payload));
|
||||
|
||||
if ((ret = parse_dxbc(dxbc->code, dxbc->size, rts0_handler, &raw_payload)) < 0)
|
||||
return ret;
|
||||
|
||||
if (!raw_payload.code)
|
||||
return VKD3D_ERROR;
|
||||
|
||||
if ((ret = vkd3d_shader_parse_root_signature_raw(raw_payload.code, raw_payload.size,
|
||||
root_signature, compatibility_hash)) < 0)
|
||||
{
|
||||
vkd3d_shader_free_root_signature(root_signature);
|
||||
return ret;
|
||||
|
|
|
@ -764,6 +764,30 @@ int vkd3d_shader_compile_dxil(const struct vkd3d_shader_code *dxbc,
|
|||
goto end;
|
||||
}
|
||||
}
|
||||
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_BARYCENTRIC_KHR)
|
||||
{
|
||||
static const dxil_spv_option_barycentric_khr helper =
|
||||
{ { DXIL_SPV_OPTION_BARYCENTRIC_KHR }, DXIL_SPV_TRUE };
|
||||
|
||||
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support BARYCENTRIC_KHR.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT)
|
||||
{
|
||||
static const dxil_spv_option_min_precision_native_16bit helper =
|
||||
{ { DXIL_SPV_OPTION_MIN_PRECISION_NATIVE_16BIT }, DXIL_SPV_TRUE };
|
||||
|
||||
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support MIN_PRECISION_NATIVE_16BIT.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (compiler_args->dual_source_blending)
|
||||
|
@ -1250,6 +1274,18 @@ int vkd3d_shader_compile_dxil_export(const struct vkd3d_shader_code *dxil,
|
|||
goto end;
|
||||
}
|
||||
}
|
||||
else if (compiler_args->target_extensions[i] == VKD3D_SHADER_TARGET_EXTENSION_MIN_PRECISION_IS_NATIVE_16BIT)
|
||||
{
|
||||
static const dxil_spv_option_min_precision_native_16bit helper =
|
||||
{ { DXIL_SPV_OPTION_MIN_PRECISION_NATIVE_16BIT }, DXIL_SPV_TRUE };
|
||||
|
||||
if (dxil_spv_converter_add_option(converter, &helper.base) != DXIL_SPV_SUCCESS)
|
||||
{
|
||||
ERR("dxil-spirv does not support MIN_PRECISION_NATIVE_16BIT.\n");
|
||||
ret = VKD3D_ERROR_NOT_IMPLEMENTED;
|
||||
goto end;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1316,6 +1352,31 @@ void vkd3d_shader_dxil_free_library_entry_points(struct vkd3d_shader_library_ent
|
|||
vkd3d_free(entry_points);
|
||||
}
|
||||
|
||||
void vkd3d_shader_dxil_free_library_subobjects(struct vkd3d_shader_library_subobject *subobjects, size_t count)
|
||||
{
|
||||
size_t i, j;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
if (subobjects[i].kind == VKD3D_SHADER_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION)
|
||||
{
|
||||
for (j = 0; j < subobjects[i].data.association.NumExports; j++)
|
||||
vkd3d_free((void*)subobjects[i].data.association.pExports[j]);
|
||||
vkd3d_free((void*)subobjects[i].data.association.pExports);
|
||||
vkd3d_free((void*)subobjects[i].data.association.SubobjectToAssociate);
|
||||
}
|
||||
else if (subobjects[i].kind == VKD3D_SHADER_SUBOBJECT_KIND_HIT_GROUP)
|
||||
{
|
||||
vkd3d_free((void*)subobjects[i].data.hit_group.HitGroupExport);
|
||||
vkd3d_free((void*)subobjects[i].data.hit_group.AnyHitShaderImport);
|
||||
vkd3d_free((void*)subobjects[i].data.hit_group.ClosestHitShaderImport);
|
||||
vkd3d_free((void*)subobjects[i].data.hit_group.IntersectionShaderImport);
|
||||
}
|
||||
}
|
||||
|
||||
vkd3d_free(subobjects);
|
||||
}
|
||||
|
||||
static VkShaderStageFlagBits convert_stage(dxil_spv_shader_stage stage)
|
||||
{
|
||||
/* Only interested in RT entry_points. There is no way yet to use lib_6_3+ for non-RT. */
|
||||
|
@ -1360,20 +1421,95 @@ static bool vkd3d_dxil_build_entry(struct vkd3d_shader_library_entry_point *entr
|
|||
return true;
|
||||
}
|
||||
|
||||
int vkd3d_shader_dxil_append_library_entry_points(
|
||||
static void vkd3d_shader_dxil_copy_subobject(unsigned int identifier,
|
||||
struct vkd3d_shader_library_subobject *subobject,
|
||||
const dxil_spv_rdat_subobject *dxil_subobject)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
/* Reuse same enums as DXIL. */
|
||||
subobject->kind = (enum vkd3d_shader_subobject_kind)dxil_subobject->kind;
|
||||
subobject->name = dxil_subobject->subobject_name;
|
||||
subobject->dxil_identifier = identifier;
|
||||
|
||||
switch (dxil_subobject->kind)
|
||||
{
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_GLOBAL_ROOT_SIGNATURE:
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_LOCAL_ROOT_SIGNATURE:
|
||||
subobject->data.payload.data = dxil_subobject->payload;
|
||||
subobject->data.payload.size = dxil_subobject->payload_size;
|
||||
break;
|
||||
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG:
|
||||
/* Normalize the kind. */
|
||||
subobject->kind = VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1;
|
||||
subobject->data.pipeline_config.MaxTraceRecursionDepth = dxil_subobject->args[0];
|
||||
subobject->data.pipeline_config.Flags = 0;
|
||||
break;
|
||||
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1:
|
||||
subobject->kind = VKD3D_SHADER_SUBOBJECT_KIND_RAYTRACING_PIPELINE_CONFIG1;
|
||||
subobject->data.pipeline_config.MaxTraceRecursionDepth = dxil_subobject->args[0];
|
||||
subobject->data.pipeline_config.Flags = dxil_subobject->args[1];
|
||||
break;
|
||||
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_RAYTRACING_SHADER_CONFIG:
|
||||
subobject->data.shader_config.MaxPayloadSizeInBytes = dxil_subobject->args[0];
|
||||
subobject->data.shader_config.MaxAttributeSizeInBytes = dxil_subobject->args[1];
|
||||
break;
|
||||
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_HIT_GROUP:
|
||||
/* Enum aliases. */
|
||||
subobject->data.hit_group.Type = (D3D12_HIT_GROUP_TYPE)dxil_subobject->hit_group_type;
|
||||
assert(dxil_subobject->num_exports == 3);
|
||||
/* Implementation simplifies a lot if we can reuse the D3D12 type here. */
|
||||
subobject->data.hit_group.HitGroupExport = vkd3d_dup_entry_point(dxil_subobject->subobject_name);
|
||||
subobject->data.hit_group.AnyHitShaderImport = dxil_subobject->exports[0] && *dxil_subobject->exports[0] != '\0' ?
|
||||
vkd3d_dup_entry_point(dxil_subobject->exports[0]) : NULL;
|
||||
subobject->data.hit_group.ClosestHitShaderImport = dxil_subobject->exports[1] && *dxil_subobject->exports[1] != '\0' ?
|
||||
vkd3d_dup_entry_point(dxil_subobject->exports[1]) : NULL;
|
||||
subobject->data.hit_group.IntersectionShaderImport = dxil_subobject->exports[2] && *dxil_subobject->exports[2] != '\0' ?
|
||||
vkd3d_dup_entry_point(dxil_subobject->exports[2]) : NULL;
|
||||
break;
|
||||
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_STATE_OBJECT_CONFIG:
|
||||
subobject->data.object_config.Flags = dxil_subobject->args[0];
|
||||
break;
|
||||
|
||||
case DXIL_SPV_RDAT_SUBOBJECT_KIND_SUBOBJECT_TO_EXPORTS_ASSOCIATION:
|
||||
assert(dxil_subobject->num_exports >= 1);
|
||||
subobject->data.association.SubobjectToAssociate = vkd3d_dup_entry_point(dxil_subobject->exports[0]);
|
||||
subobject->data.association.pExports = vkd3d_malloc((dxil_subobject->num_exports - 1) * sizeof(LPCWSTR));
|
||||
subobject->data.association.NumExports = dxil_subobject->num_exports - 1;
|
||||
for (i = 1; i < dxil_subobject->num_exports; i++)
|
||||
subobject->data.association.pExports[i - 1] = vkd3d_dup_entry_point(dxil_subobject->exports[i]);
|
||||
break;
|
||||
|
||||
default:
|
||||
FIXME("Unrecognized RDAT subobject type: %u.\n", dxil_subobject->kind);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int vkd3d_shader_dxil_append_library_entry_points_and_subobjects(
|
||||
const D3D12_DXIL_LIBRARY_DESC *library_desc,
|
||||
unsigned int identifier,
|
||||
struct vkd3d_shader_library_entry_point **entry_points,
|
||||
size_t *entry_point_size, size_t *entry_point_count)
|
||||
size_t *entry_point_size, size_t *entry_point_count,
|
||||
struct vkd3d_shader_library_subobject **subobjects,
|
||||
size_t *subobjects_size, size_t *subobjects_count)
|
||||
{
|
||||
struct vkd3d_shader_library_entry_point new_entry;
|
||||
struct vkd3d_shader_library_subobject *subobject;
|
||||
dxil_spv_parsed_blob blob = NULL;
|
||||
struct vkd3d_shader_code code;
|
||||
dxil_spv_rdat_subobject sub;
|
||||
dxil_spv_shader_stage stage;
|
||||
const char *mangled_entry;
|
||||
char *ascii_entry = NULL;
|
||||
vkd3d_shader_hash_t hash;
|
||||
unsigned int count, i;
|
||||
unsigned int count, i, j;
|
||||
unsigned int rdat_count;
|
||||
int ret = VKD3D_OK;
|
||||
|
||||
memset(&new_entry, 0, sizeof(new_entry));
|
||||
|
@ -1394,6 +1530,8 @@ int vkd3d_shader_dxil_append_library_entry_points(
|
|||
goto end;
|
||||
}
|
||||
|
||||
rdat_count = dxil_spv_parsed_blob_get_num_rdat_subobjects(blob);
|
||||
|
||||
if (library_desc->NumExports)
|
||||
{
|
||||
for (i = 0; i < library_desc->NumExports; i++)
|
||||
|
@ -1403,24 +1541,44 @@ int vkd3d_shader_dxil_append_library_entry_points(
|
|||
else
|
||||
ascii_entry = vkd3d_strdup_w_utf8(library_desc->pExports[i].Name, 0);
|
||||
|
||||
stage = dxil_spv_parsed_blob_get_shader_stage_for_entry(blob, ascii_entry);
|
||||
if (stage == DXIL_SPV_STAGE_UNKNOWN)
|
||||
/* An export can point to a subobject or an entry point. */
|
||||
for (j = 0; j < rdat_count; j++)
|
||||
{
|
||||
ret = VKD3D_ERROR_INVALID_ARGUMENT;
|
||||
goto end;
|
||||
dxil_spv_parsed_blob_get_rdat_subobject(blob, j, &sub);
|
||||
/* Subobject names are not mangled. */
|
||||
if (strcmp(sub.subobject_name, ascii_entry) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
new_entry.real_entry_point = ascii_entry;
|
||||
new_entry.plain_entry_point = vkd3d_wstrdup(library_desc->pExports[i].Name);
|
||||
new_entry.mangled_entry_point = NULL;
|
||||
new_entry.identifier = identifier;
|
||||
new_entry.stage = convert_stage(stage);
|
||||
ascii_entry = NULL;
|
||||
if (j < rdat_count)
|
||||
{
|
||||
vkd3d_array_reserve((void**)subobjects, subobjects_size,
|
||||
*subobjects_count + 1, sizeof(**subobjects));
|
||||
subobject = &(*subobjects)[*subobjects_count];
|
||||
vkd3d_shader_dxil_copy_subobject(identifier, subobject, &sub);
|
||||
*subobjects_count += 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
stage = dxil_spv_parsed_blob_get_shader_stage_for_entry(blob, ascii_entry);
|
||||
if (stage == DXIL_SPV_STAGE_UNKNOWN)
|
||||
{
|
||||
ret = VKD3D_ERROR_INVALID_ARGUMENT;
|
||||
goto end;
|
||||
}
|
||||
|
||||
vkd3d_array_reserve((void**)entry_points, entry_point_size,
|
||||
*entry_point_count + 1, sizeof(new_entry));
|
||||
(*entry_points)[(*entry_point_count)++] = new_entry;
|
||||
memset(&new_entry, 0, sizeof(new_entry));
|
||||
new_entry.real_entry_point = ascii_entry;
|
||||
new_entry.plain_entry_point = vkd3d_wstrdup(library_desc->pExports[i].Name);
|
||||
new_entry.mangled_entry_point = NULL;
|
||||
new_entry.identifier = identifier;
|
||||
new_entry.stage = convert_stage(stage);
|
||||
ascii_entry = NULL;
|
||||
|
||||
vkd3d_array_reserve((void**)entry_points, entry_point_size,
|
||||
*entry_point_count + 1, sizeof(new_entry));
|
||||
(*entry_points)[(*entry_point_count)++] = new_entry;
|
||||
memset(&new_entry, 0, sizeof(new_entry));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1453,6 +1611,21 @@ int vkd3d_shader_dxil_append_library_entry_points(
|
|||
(*entry_points)[(*entry_point_count)++] = new_entry;
|
||||
memset(&new_entry, 0, sizeof(new_entry));
|
||||
}
|
||||
|
||||
if (rdat_count)
|
||||
{
|
||||
/* All subobjects are also exported. */
|
||||
vkd3d_array_reserve((void**)subobjects, subobjects_size,
|
||||
*subobjects_count + rdat_count, sizeof(**subobjects));
|
||||
|
||||
for (i = 0; i < rdat_count; i++)
|
||||
{
|
||||
dxil_spv_parsed_blob_get_rdat_subobject(blob, i, &sub);
|
||||
subobject = &(*subobjects)[*subobjects_count];
|
||||
vkd3d_shader_dxil_copy_subobject(identifier, subobject, &sub);
|
||||
*subobjects_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
end:
|
||||
|
|
|
@ -1404,6 +1404,13 @@ static uint32_t vkd3d_spirv_build_op_logical_and(struct vkd3d_spirv_builder *bui
|
|||
SpvOpLogicalAnd, result_type, operand0, operand1);
|
||||
}
|
||||
|
||||
static uint32_t vkd3d_spirv_build_op_any(struct vkd3d_spirv_builder *builder,
|
||||
uint32_t result_type, uint32_t operand0)
|
||||
{
|
||||
return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream,
|
||||
SpvOpAny, result_type, operand0);
|
||||
}
|
||||
|
||||
static uint32_t vkd3d_spirv_build_op_iequal(struct vkd3d_spirv_builder *builder,
|
||||
uint32_t result_type, uint32_t operand0, uint32_t operand1)
|
||||
{
|
||||
|
@ -1931,12 +1938,12 @@ vkd3d_spirv_resource_type_table[] =
|
|||
{VKD3D_SHADER_RESOURCE_TEXTURE_2DMS, SpvDim2D, 0, 1, 2, 2},
|
||||
{VKD3D_SHADER_RESOURCE_TEXTURE_2D, SpvDim2D, 0, 0, 2, 2},
|
||||
{VKD3D_SHADER_RESOURCE_TEXTURE_3D, SpvDim3D, 0, 0, 3, 3},
|
||||
{VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, SpvDimCube, 0, 0, 3, 0},
|
||||
{VKD3D_SHADER_RESOURCE_TEXTURE_CUBE, SpvDimCube, 0, 0, 3, 3},
|
||||
{VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY, SpvDim1D, 1, 0, 2, 1,
|
||||
SpvCapabilitySampled1D, SpvCapabilityImage1D},
|
||||
{VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY, SpvDim2D, 1, 0, 3, 2},
|
||||
{VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY, SpvDim2D, 1, 1, 3, 2},
|
||||
{VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, SpvDimCube, 1, 0, 4, 0,
|
||||
{VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY, SpvDimCube, 1, 0, 4, 3,
|
||||
SpvCapabilitySampledCubeArray, SpvCapabilityImageCubeArray},
|
||||
};
|
||||
|
||||
|
@ -2323,6 +2330,8 @@ struct vkd3d_dxbc_compiler
|
|||
vkd3d_shader_hash_t descriptor_qa_shader_hash;
|
||||
#endif
|
||||
|
||||
uint32_t robust_physical_counter_func_id;
|
||||
|
||||
int compiler_error;
|
||||
};
|
||||
|
||||
|
@ -3512,8 +3521,17 @@ static uint32_t vkd3d_dxbc_compiler_emit_load_constant_buffer(struct vkd3d_dxbc_
|
|||
}
|
||||
}
|
||||
|
||||
ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id,
|
||||
base_id, indexes, last_index + 1);
|
||||
if (access_mask == SpvMemoryAccessAlignedMask)
|
||||
{
|
||||
/* For physical pointers, prefer InBounds for optimal codegen. */
|
||||
ptr_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, ptr_type_id,
|
||||
base_id, indexes, last_index + 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id,
|
||||
base_id, indexes, last_index + 1);
|
||||
}
|
||||
|
||||
if (reg->modifier == VKD3DSPRM_NONUNIFORM)
|
||||
vkd3d_dxbc_compiler_decorate_nonuniform(compiler, ptr_id);
|
||||
|
@ -5513,31 +5531,22 @@ static const struct vkd3d_shader_global_binding *vkd3d_dxbc_compiler_get_global_
|
|||
{
|
||||
if (binding->flags & VKD3D_SHADER_BINDING_FLAG_RAW_VA)
|
||||
{
|
||||
uint32_t counter_struct_id, pointer_struct_id, array_type_id;
|
||||
|
||||
counter_struct_id = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_UINT, 1);
|
||||
counter_struct_id = vkd3d_spirv_build_op_type_struct(builder, &counter_struct_id, 1);
|
||||
|
||||
vkd3d_spirv_build_op_member_decorate1(builder, counter_struct_id, 0, SpvDecorationOffset, 0);
|
||||
vkd3d_spirv_build_op_decorate(builder, counter_struct_id, SpvDecorationBlock, NULL, 0);
|
||||
vkd3d_spirv_build_op_name(builder, counter_struct_id, "uav_ctr_t");
|
||||
|
||||
type_id = vkd3d_spirv_build_op_type_pointer(builder, SpvStorageClassPhysicalStorageBuffer, counter_struct_id);
|
||||
uint32_t struct_id, array_type_id;
|
||||
|
||||
type_id = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_UINT, 2);
|
||||
array_type_id = vkd3d_spirv_build_op_type_runtime_array(builder, type_id);
|
||||
vkd3d_spirv_build_op_decorate1(builder, array_type_id, SpvDecorationArrayStride, sizeof(uint64_t));
|
||||
struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1);
|
||||
|
||||
pointer_struct_id = vkd3d_spirv_build_op_type_struct(builder, &array_type_id, 1);
|
||||
|
||||
vkd3d_spirv_build_op_member_decorate1(builder, pointer_struct_id, 0, SpvDecorationOffset, 0);
|
||||
vkd3d_spirv_build_op_decorate(builder, pointer_struct_id, SpvDecorationBufferBlock, NULL, 0);
|
||||
vkd3d_spirv_build_op_name(builder, pointer_struct_id, "uav_ctrs_t");
|
||||
vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, SpvDecorationOffset, 0);
|
||||
vkd3d_spirv_build_op_member_decorate(builder, struct_id, 0, SpvDecorationNonWritable, NULL, 0);
|
||||
vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBufferBlock, NULL, 0);
|
||||
vkd3d_spirv_build_op_name(builder, struct_id, "uav_ctrs_t");
|
||||
|
||||
var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream,
|
||||
vkd3d_spirv_get_op_type_pointer(builder, storage_class, pointer_struct_id),
|
||||
vkd3d_spirv_get_op_type_pointer(builder, storage_class, struct_id),
|
||||
storage_class, 0);
|
||||
|
||||
vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationAliasedPointer, NULL, 0);
|
||||
vkd3d_spirv_enable_capability(builder, SpvCapabilityPhysicalStorageBufferAddresses);
|
||||
}
|
||||
else
|
||||
|
@ -5710,10 +5719,116 @@ static const struct vkd3d_shader_buffer_reference_type *vkd3d_dxbc_compiler_get_
|
|||
static void vkd3d_dxbc_compiler_emit_descriptor_qa_checks(struct vkd3d_dxbc_compiler *compiler);
|
||||
#endif
|
||||
|
||||
static void vkd3d_dxbc_compiler_emit_robust_physical_counter_func(struct vkd3d_dxbc_compiler *compiler)
|
||||
{
|
||||
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
|
||||
uint32_t not_equal_vec_id, not_equal_id;
|
||||
uint32_t merge_label_id, body_label_id;
|
||||
uint32_t ptr_type_id, ptr_id;
|
||||
uint32_t parameter_types[3];
|
||||
uint32_t parameter_ids[3];
|
||||
uint32_t phi_arguments[4];
|
||||
uint32_t atomic_args[4];
|
||||
uint32_t func_type_id;
|
||||
uint32_t phi_result_id;
|
||||
uint32_t uvec2_type;
|
||||
uint32_t bvec2_type;
|
||||
uint32_t result_id;
|
||||
uint32_t bool_type;
|
||||
uint32_t u32_type;
|
||||
uint32_t label_id;
|
||||
uint32_t zero_id;
|
||||
unsigned int i;
|
||||
|
||||
bool_type = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_BOOL, 1);
|
||||
bvec2_type = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_BOOL, 2);
|
||||
u32_type = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_UINT, 1);
|
||||
uvec2_type = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_UINT, 2);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(parameter_types); i++)
|
||||
parameter_types[i] = i == 0 ? uvec2_type : u32_type;
|
||||
|
||||
func_type_id = vkd3d_spirv_get_op_type_function(builder, u32_type,
|
||||
parameter_types, ARRAY_SIZE(parameter_types));
|
||||
compiler->robust_physical_counter_func_id = vkd3d_spirv_alloc_id(builder);
|
||||
vkd3d_spirv_build_op_name(builder, compiler->robust_physical_counter_func_id, "robust_physical_counter_op");
|
||||
vkd3d_spirv_build_op_function(builder, u32_type, compiler->robust_physical_counter_func_id,
|
||||
SpvFunctionControlMaskNone, func_type_id);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(parameter_ids); i++)
|
||||
parameter_ids[i] = vkd3d_spirv_build_op_function_parameter(builder, i == 0 ? uvec2_type : u32_type);
|
||||
|
||||
vkd3d_spirv_build_op_name(builder, parameter_ids[0], "bda");
|
||||
vkd3d_spirv_build_op_name(builder, parameter_ids[1], "direction");
|
||||
vkd3d_spirv_build_op_name(builder, parameter_ids[2], "fixup");
|
||||
|
||||
label_id = vkd3d_spirv_alloc_id(builder);
|
||||
merge_label_id = vkd3d_spirv_alloc_id(builder);
|
||||
body_label_id = vkd3d_spirv_alloc_id(builder);
|
||||
zero_id = vkd3d_dxbc_compiler_get_constant_uint_vector(compiler, 0, 2);
|
||||
|
||||
vkd3d_spirv_build_op_label(builder, label_id);
|
||||
not_equal_vec_id = vkd3d_spirv_build_op_inotequal(builder, bvec2_type,
|
||||
parameter_ids[0], zero_id);
|
||||
not_equal_id = vkd3d_spirv_build_op_any(builder, bool_type, not_equal_vec_id);
|
||||
|
||||
vkd3d_spirv_build_op_selection_merge(builder, merge_label_id, SpvSelectionControlMaskNone);
|
||||
vkd3d_spirv_build_op_branch_conditional(builder, not_equal_id, body_label_id, merge_label_id);
|
||||
|
||||
phi_arguments[1] = body_label_id;
|
||||
phi_arguments[2] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0);
|
||||
phi_arguments[3] = label_id;
|
||||
|
||||
{
|
||||
vkd3d_spirv_build_op_label(builder, body_label_id);
|
||||
ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPhysicalStorageBuffer, u32_type);
|
||||
ptr_id = vkd3d_spirv_build_op_bitcast(builder, ptr_type_id, parameter_ids[0]);
|
||||
|
||||
atomic_args[0] = ptr_id;
|
||||
atomic_args[1] = vkd3d_dxbc_compiler_get_constant_uint(compiler, SpvScopeDevice);
|
||||
atomic_args[2] = vkd3d_dxbc_compiler_get_constant_uint(compiler, SpvMemoryAccessMaskNone);
|
||||
atomic_args[3] = parameter_ids[1];
|
||||
|
||||
result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream,
|
||||
SpvOpAtomicIAdd, u32_type,
|
||||
atomic_args, ARRAY_SIZE(atomic_args));
|
||||
phi_arguments[0] = vkd3d_spirv_build_op_iadd(builder, u32_type,
|
||||
result_id, parameter_ids[2]);
|
||||
|
||||
vkd3d_spirv_build_op_branch(builder, merge_label_id);
|
||||
}
|
||||
|
||||
vkd3d_spirv_build_op_label(builder, merge_label_id);
|
||||
phi_result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream,
|
||||
SpvOpPhi, u32_type,
|
||||
phi_arguments, ARRAY_SIZE(phi_arguments));
|
||||
vkd3d_spirv_build_op_return_value(builder, phi_result_id);
|
||||
vkd3d_spirv_build_op_function_end(builder);
|
||||
vkd3d_spirv_enable_capability(builder, SpvCapabilityPhysicalStorageBufferAddresses);
|
||||
}
|
||||
|
||||
static uint32_t vkd3d_dxbc_compiler_emit_robust_physical_counter(struct vkd3d_dxbc_compiler *compiler,
|
||||
uint32_t bda_id, bool increment)
|
||||
{
|
||||
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
|
||||
uint32_t u32_type;
|
||||
uint32_t args[3];
|
||||
|
||||
u32_type = vkd3d_spirv_get_type_id(builder, VKD3D_TYPE_UINT, 1);
|
||||
args[0] = bda_id;
|
||||
args[1] = vkd3d_dxbc_compiler_get_constant_uint(compiler, increment ? 1u : -1u);
|
||||
args[2] = vkd3d_dxbc_compiler_get_constant_uint(compiler, increment ? 0u : -1u);
|
||||
|
||||
return vkd3d_spirv_build_op_function_call(builder, u32_type,
|
||||
compiler->robust_physical_counter_func_id,
|
||||
args, ARRAY_SIZE(args));
|
||||
}
|
||||
|
||||
static void vkd3d_dxbc_compiler_emit_initial_declarations(struct vkd3d_dxbc_compiler *compiler)
|
||||
{
|
||||
const struct vkd3d_shader_transform_feedback_info *xfb_info = compiler->shader_interface.xfb_info;
|
||||
struct vkd3d_spirv_builder *builder = &compiler->spirv_builder;
|
||||
unsigned int i;
|
||||
|
||||
switch (compiler->shader_type)
|
||||
{
|
||||
|
@ -5763,6 +5878,19 @@ static void vkd3d_dxbc_compiler_emit_initial_declarations(struct vkd3d_dxbc_comp
|
|||
vkd3d_dxbc_compiler_emit_descriptor_qa_checks(compiler);
|
||||
#endif
|
||||
|
||||
if (compiler->scan_info->has_uav_counter)
|
||||
{
|
||||
/* Check if we're expected to deal with RAW VAs. In this case we will enable BDA. */
|
||||
for (i = 0; i < compiler->shader_interface.binding_count; i++)
|
||||
{
|
||||
if (compiler->shader_interface.bindings[i].flags & VKD3D_SHADER_BINDING_FLAG_RAW_VA)
|
||||
{
|
||||
vkd3d_dxbc_compiler_emit_robust_physical_counter_func(compiler);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL)
|
||||
{
|
||||
vkd3d_spirv_builder_begin_main_function(builder);
|
||||
|
@ -6434,8 +6562,13 @@ static void vkd3d_dxbc_compiler_emit_dcl_constant_buffer(struct vkd3d_dxbc_compi
|
|||
else if (binding && (binding->flags & VKD3D_SHADER_BINDING_FLAG_RAW_VA))
|
||||
{
|
||||
storage_class = SpvStorageClassPhysicalStorageBuffer;
|
||||
/* Could use cb->size here, but we will use InBounds access chains
|
||||
* which could confuse a compiler if we tried
|
||||
* to access an array out of bounds. Robustness on descriptors depends on the descriptor, not the
|
||||
* declaration, and it's possible to declare a CBV with fewer array elements than you access.
|
||||
* In this case, we pretend to have a 64 KiB descriptor. */
|
||||
type_id = vkd3d_dxbc_compiler_get_buffer_reference_type(compiler,
|
||||
VKD3D_DATA_FLOAT, 4, cb->size, 0)->type_id;
|
||||
VKD3D_DATA_FLOAT, 4, 4 * 1024, 0)->type_id;
|
||||
var_id = compiler->root_parameter_var_id;
|
||||
}
|
||||
else
|
||||
|
@ -9377,9 +9510,9 @@ static void vkd3d_dxbc_compiler_emit_gather4(struct vkd3d_dxbc_compiler *compile
|
|||
unsigned int image_flags = VKD3D_IMAGE_FLAG_SAMPLED;
|
||||
SpvImageOperandsMask operands_mask = 0;
|
||||
unsigned int image_operand_count = 0;
|
||||
uint32_t image_operands[1] = { 0 };
|
||||
struct vkd3d_shader_image image;
|
||||
unsigned int component_idx;
|
||||
uint32_t image_operands[1];
|
||||
DWORD coordinate_mask;
|
||||
bool extended_offset;
|
||||
bool is_sparse_op;
|
||||
|
@ -9678,7 +9811,19 @@ static void vkd3d_dxbc_compiler_emit_ld_raw_structured_srv_uav(struct vkd3d_dxbc
|
|||
uint32_t indices[2];
|
||||
indices[0] = vkd3d_dxbc_compiler_get_constant_uint(compiler, 0);
|
||||
indices[1] = coordinate_id;
|
||||
ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, image.id, indices, ARRAY_SIZE(indices));
|
||||
|
||||
if (access_mask == SpvMemoryAccessAlignedMask)
|
||||
{
|
||||
/* For physical pointers, prefer InBounds for optimal codegen. */
|
||||
ptr_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, ptr_type_id,
|
||||
image.id, indices, ARRAY_SIZE(indices));
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id,
|
||||
image.id, indices, ARRAY_SIZE(indices));
|
||||
}
|
||||
|
||||
constituents[j++] = vkd3d_spirv_build_op_loadv(builder, type_id, ptr_id, access_mask, &alignment, 1);
|
||||
|
||||
if (resource->reg.modifier == VKD3DSPRM_NONUNIFORM)
|
||||
|
@ -9818,7 +9963,17 @@ static void vkd3d_dxbc_compiler_emit_store_uav_raw_structured(struct vkd3d_dxbc_
|
|||
if (component_count > 1)
|
||||
texel_id = vkd3d_spirv_build_op_composite_extract1(builder, type_id, texel_id, component_idx);
|
||||
|
||||
ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id, image.id, indices, ARRAY_SIZE(indices));
|
||||
if (access_mask == SpvMemoryAccessAlignedMask)
|
||||
{
|
||||
ptr_id = vkd3d_spirv_build_op_in_bounds_access_chain(builder, ptr_type_id,
|
||||
image.id, indices, ARRAY_SIZE(indices));
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr_id = vkd3d_spirv_build_op_access_chain(builder, ptr_type_id,
|
||||
image.id, indices, ARRAY_SIZE(indices));
|
||||
}
|
||||
|
||||
vkd3d_spirv_build_op_storev(builder, ptr_id, texel_id, access_mask, &alignment, 1);
|
||||
|
||||
if (dst->reg.modifier == VKD3DSPRM_NONUNIFORM)
|
||||
|
@ -9983,6 +10138,7 @@ static void vkd3d_dxbc_compiler_emit_uav_counter_instruction(struct vkd3d_dxbc_c
|
|||
const struct vkd3d_shader_resource_binding *binding;
|
||||
uint32_t type_id, result_id, pointer_id, zero_id;
|
||||
const struct vkd3d_symbol *resource_symbol;
|
||||
bool check_post_decrement;
|
||||
uint32_t operands[3];
|
||||
SpvOp op;
|
||||
|
||||
|
@ -9998,7 +10154,6 @@ static void vkd3d_dxbc_compiler_emit_uav_counter_instruction(struct vkd3d_dxbc_c
|
|||
|
||||
if (binding && (binding->flags & VKD3D_SHADER_BINDING_FLAG_RAW_VA))
|
||||
{
|
||||
uint32_t ctr_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassPhysicalStorageBuffer, type_id);
|
||||
uint32_t buf_ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, resource_symbol->info.resource.uav_counter_type_id);
|
||||
uint32_t indices[2];
|
||||
|
||||
|
@ -10013,8 +10168,10 @@ static void vkd3d_dxbc_compiler_emit_uav_counter_instruction(struct vkd3d_dxbc_c
|
|||
resource_symbol->info.resource.uav_counter_type_id,
|
||||
pointer_id, SpvMemoryAccessMaskNone);
|
||||
|
||||
pointer_id = vkd3d_spirv_build_op_access_chain1(builder,
|
||||
ctr_ptr_type_id, pointer_id, zero_id);
|
||||
result_id = vkd3d_dxbc_compiler_emit_robust_physical_counter(compiler, pointer_id,
|
||||
instruction->handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC);
|
||||
|
||||
check_post_decrement = false;
|
||||
}
|
||||
else if (binding && (binding->flags & VKD3D_SHADER_BINDING_FLAG_BINDLESS))
|
||||
{
|
||||
|
@ -10034,6 +10191,8 @@ static void vkd3d_dxbc_compiler_emit_uav_counter_instruction(struct vkd3d_dxbc_c
|
|||
/* Need to mark the pointer argument itself as non-uniform. */
|
||||
if (src->reg.modifier == VKD3DSPRM_NONUNIFORM)
|
||||
vkd3d_dxbc_compiler_decorate_nonuniform(compiler, pointer_id);
|
||||
|
||||
check_post_decrement = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -10041,19 +10200,25 @@ static void vkd3d_dxbc_compiler_emit_uav_counter_instruction(struct vkd3d_dxbc_c
|
|||
|
||||
pointer_id = vkd3d_spirv_build_op_image_texel_pointer(builder, ptr_type_id,
|
||||
resource_symbol->info.resource.uav_counter_id, zero_id, zero_id);
|
||||
|
||||
check_post_decrement = true;
|
||||
}
|
||||
|
||||
operands[0] = pointer_id;
|
||||
operands[1] = vkd3d_dxbc_compiler_get_constant_uint(compiler, SpvScopeDevice);
|
||||
operands[2] = vkd3d_dxbc_compiler_get_constant_uint(compiler, memory_semantics);
|
||||
result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream,
|
||||
op, type_id, operands, ARRAY_SIZE(operands));
|
||||
if (op == SpvOpAtomicIDecrement)
|
||||
if (check_post_decrement)
|
||||
{
|
||||
/* SpvOpAtomicIDecrement returns the original value. */
|
||||
result_id = vkd3d_spirv_build_op_isub(builder, type_id, result_id,
|
||||
vkd3d_dxbc_compiler_get_constant_uint(compiler, 1));
|
||||
operands[0] = pointer_id;
|
||||
operands[1] = vkd3d_dxbc_compiler_get_constant_uint(compiler, SpvScopeDevice);
|
||||
operands[2] = vkd3d_dxbc_compiler_get_constant_uint(compiler, memory_semantics);
|
||||
result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream,
|
||||
op, type_id, operands, ARRAY_SIZE(operands));
|
||||
if (op == SpvOpAtomicIDecrement)
|
||||
{
|
||||
/* SpvOpAtomicIDecrement returns the original value. */
|
||||
result_id = vkd3d_spirv_build_op_isub(builder, type_id, result_id,
|
||||
vkd3d_dxbc_compiler_get_constant_uint(compiler, 1));
|
||||
}
|
||||
}
|
||||
|
||||
vkd3d_dxbc_compiler_emit_store_dst(compiler, dst, result_id);
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,8 @@
|
|||
|
||||
#include "vkd3d_shader_private.h"
|
||||
|
||||
#include "vkd3d_platform.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
|
@ -81,13 +83,13 @@ err:
|
|||
bool vkd3d_shader_replace(vkd3d_shader_hash_t hash, const void **data, size_t *size)
|
||||
{
|
||||
static bool enabled = true;
|
||||
char path[VKD3D_PATH_MAX];
|
||||
char filename[1024];
|
||||
const char *path;
|
||||
|
||||
if (!enabled)
|
||||
return false;
|
||||
|
||||
if (!(path = getenv("VKD3D_SHADER_OVERRIDE")))
|
||||
if (!vkd3d_get_env_var("VKD3D_SHADER_OVERRIDE", path, sizeof(path)))
|
||||
{
|
||||
enabled = false;
|
||||
return false;
|
||||
|
@ -100,13 +102,13 @@ bool vkd3d_shader_replace(vkd3d_shader_hash_t hash, const void **data, size_t *s
|
|||
bool vkd3d_shader_replace_export(vkd3d_shader_hash_t hash, const void **data, size_t *size, const char *export)
|
||||
{
|
||||
static bool enabled = true;
|
||||
char path[VKD3D_PATH_MAX];
|
||||
char filename[1024];
|
||||
const char *path;
|
||||
|
||||
if (!enabled)
|
||||
return false;
|
||||
|
||||
if (!(path = getenv("VKD3D_SHADER_OVERRIDE")))
|
||||
if (!vkd3d_get_env_var("VKD3D_SHADER_OVERRIDE", path, sizeof(path)))
|
||||
{
|
||||
enabled = false;
|
||||
return false;
|
||||
|
@ -119,12 +121,12 @@ bool vkd3d_shader_replace_export(vkd3d_shader_hash_t hash, const void **data, si
|
|||
void vkd3d_shader_dump_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shader_code *shader, const char *ext)
|
||||
{
|
||||
static bool enabled = true;
|
||||
const char *path;
|
||||
char path[VKD3D_PATH_MAX];
|
||||
|
||||
if (!enabled)
|
||||
return;
|
||||
|
||||
if (!(path = getenv("VKD3D_SHADER_DUMP_PATH")))
|
||||
if (!vkd3d_get_env_var("VKD3D_SHADER_DUMP_PATH", path, sizeof(path)))
|
||||
{
|
||||
enabled = false;
|
||||
return;
|
||||
|
@ -136,12 +138,12 @@ void vkd3d_shader_dump_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shade
|
|||
void vkd3d_shader_dump_spirv_shader(vkd3d_shader_hash_t hash, const struct vkd3d_shader_code *shader)
|
||||
{
|
||||
static bool enabled = true;
|
||||
const char *path;
|
||||
char path[VKD3D_PATH_MAX];
|
||||
|
||||
if (!enabled)
|
||||
return;
|
||||
|
||||
if (!(path = getenv("VKD3D_SHADER_DUMP_PATH")))
|
||||
if (!vkd3d_get_env_var("VKD3D_SHADER_DUMP_PATH", path, sizeof(path)))
|
||||
{
|
||||
enabled = false;
|
||||
return;
|
||||
|
@ -154,13 +156,13 @@ void vkd3d_shader_dump_spirv_shader_export(vkd3d_shader_hash_t hash, const struc
|
|||
const char *export)
|
||||
{
|
||||
static bool enabled = true;
|
||||
const char *path;
|
||||
char path[VKD3D_PATH_MAX];
|
||||
char tag[1024];
|
||||
|
||||
if (!enabled)
|
||||
return;
|
||||
|
||||
if (!(path = getenv("VKD3D_SHADER_DUMP_PATH")))
|
||||
if (!vkd3d_get_env_var("VKD3D_SHADER_DUMP_PATH", path, sizeof(path)))
|
||||
{
|
||||
enabled = false;
|
||||
return;
|
||||
|
@ -475,6 +477,7 @@ static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_info *
|
|||
const struct vkd3d_shader_register *reg)
|
||||
{
|
||||
scan_info->has_side_effects = true;
|
||||
scan_info->has_uav_counter = true;
|
||||
vkd3d_shader_scan_set_register_flags(scan_info, VKD3DSPR_UAV,
|
||||
reg->idx[0].offset, VKD3D_SHADER_UAV_FLAG_ATOMIC_COUNTER);
|
||||
}
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
#include "vkd3d_private.h"
|
||||
|
||||
#define RT_TRACE TRACE
|
||||
|
||||
void vkd3d_acceleration_structure_build_info_cleanup(
|
||||
struct vkd3d_acceleration_structure_build_info *info)
|
||||
{
|
||||
|
@ -74,19 +76,31 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
|
|||
bool have_triangles, have_aabbs;
|
||||
unsigned int i;
|
||||
|
||||
RT_TRACE("Converting inputs.\n");
|
||||
RT_TRACE("=====================\n");
|
||||
|
||||
build_info = &info->build_info;
|
||||
memset(build_info, 0, sizeof(*build_info));
|
||||
build_info->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR;
|
||||
|
||||
if (desc->Type == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL)
|
||||
{
|
||||
build_info->type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
|
||||
RT_TRACE("Top level build.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
build_info->type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
|
||||
RT_TRACE("Bottom level build.\n");
|
||||
}
|
||||
|
||||
build_info->flags = d3d12_build_flags_to_vk(desc->Flags);
|
||||
|
||||
if (desc->Flags & D3D12_RAYTRACING_ACCELERATION_STRUCTURE_BUILD_FLAG_PERFORM_UPDATE)
|
||||
{
|
||||
RT_TRACE("BUILD_FLAG_PERFORM_UPDATE.\n");
|
||||
build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR;
|
||||
}
|
||||
else
|
||||
build_info->mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
|
||||
|
||||
|
@ -109,6 +123,9 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
|
|||
info->primitive_counts = info->primitive_counts_stack;
|
||||
info->primitive_counts[0] = desc->NumDescs;
|
||||
build_info->geometryCount = 1;
|
||||
RT_TRACE(" ArrayOfPointers: %u.\n",
|
||||
desc->DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS ? 1 : 0);
|
||||
RT_TRACE(" NumDescs: %u.\n", info->primitive_counts[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -132,13 +149,21 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
|
|||
for (i = 0; i < desc->NumDescs; i++)
|
||||
{
|
||||
info->geometries[i].sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
|
||||
RT_TRACE(" Geom %u:\n", i);
|
||||
|
||||
if (desc->DescsLayout == D3D12_ELEMENTS_LAYOUT_ARRAY_OF_POINTERS)
|
||||
{
|
||||
geom_desc = desc->ppGeometryDescs[i];
|
||||
RT_TRACE(" ArrayOfPointers\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
geom_desc = &desc->pGeometryDescs[i];
|
||||
RT_TRACE(" PointerToArray\n");
|
||||
}
|
||||
|
||||
info->geometries[i].flags = d3d12_geometry_flags_to_vk(geom_desc->Flags);
|
||||
RT_TRACE(" Flags = #%x\n", geom_desc->Flags);
|
||||
|
||||
switch (geom_desc->Type)
|
||||
{
|
||||
|
@ -155,17 +180,26 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
|
|||
triangles = &info->geometries[i].geometry.triangles;
|
||||
triangles->sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR;
|
||||
triangles->indexData.deviceAddress = geom_desc->Triangles.IndexBuffer;
|
||||
if (geom_desc->Triangles.IndexBuffer)
|
||||
if (geom_desc->Triangles.IndexFormat != DXGI_FORMAT_UNKNOWN)
|
||||
{
|
||||
if (!geom_desc->Triangles.IndexBuffer)
|
||||
WARN("Application is using IndexBuffer = 0 and IndexFormat != UNKNOWN. Likely application bug.\n");
|
||||
|
||||
triangles->indexType =
|
||||
geom_desc->Triangles.IndexFormat == DXGI_FORMAT_R16_UINT ?
|
||||
VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
|
||||
info->primitive_counts[i] = geom_desc->Triangles.IndexCount / 3;
|
||||
RT_TRACE(" Indexed : Index count = %u (%u bits)\n",
|
||||
geom_desc->Triangles.IndexCount,
|
||||
triangles->indexType == VK_INDEX_TYPE_UINT16 ? 16 : 32);
|
||||
RT_TRACE(" Vertex count: %u\n", geom_desc->Triangles.VertexCount);
|
||||
RT_TRACE(" IBO VA: %"PRIx64".\n", geom_desc->Triangles.IndexBuffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
info->primitive_counts[i] = geom_desc->Triangles.VertexCount / 3;
|
||||
triangles->indexType = VK_INDEX_TYPE_NONE_KHR;
|
||||
RT_TRACE(" Triangle list : Vertex count: %u\n", geom_desc->Triangles.VertexCount);
|
||||
}
|
||||
|
||||
triangles->maxVertex = max(1, geom_desc->Triangles.VertexCount) - 1;
|
||||
|
@ -173,6 +207,11 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
|
|||
triangles->vertexFormat = vkd3d_internal_get_vk_format(device, geom_desc->Triangles.VertexFormat);
|
||||
triangles->vertexData.deviceAddress = geom_desc->Triangles.VertexBuffer.StartAddress;
|
||||
triangles->transformData.deviceAddress = geom_desc->Triangles.Transform3x4;
|
||||
|
||||
RT_TRACE(" Transform3x4: %s\n", geom_desc->Triangles.Transform3x4 ? "on" : "off");
|
||||
RT_TRACE(" Vertex format: %s\n", debug_dxgi_format(geom_desc->Triangles.VertexFormat));
|
||||
RT_TRACE(" VBO VA: %"PRIx64"\n", geom_desc->Triangles.VertexBuffer.StartAddress);
|
||||
RT_TRACE(" Vertex stride: %"PRIu64" bytes\n", geom_desc->Triangles.VertexBuffer.StrideInBytes);
|
||||
break;
|
||||
|
||||
case D3D12_RAYTRACING_GEOMETRY_TYPE_PROCEDURAL_PRIMITIVE_AABBS:
|
||||
|
@ -190,12 +229,15 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
|
|||
aabbs->stride = geom_desc->AABBs.AABBs.StrideInBytes;
|
||||
aabbs->data.deviceAddress = geom_desc->AABBs.AABBs.StartAddress;
|
||||
info->primitive_counts[i] = geom_desc->AABBs.AABBCount;
|
||||
RT_TRACE(" AABB stride: %"PRIu64" bytes\n", geom_desc->AABBs.AABBs.StrideInBytes);
|
||||
break;
|
||||
|
||||
default:
|
||||
FIXME("Unsupported geometry type %u.\n", geom_desc->Type);
|
||||
return false;
|
||||
}
|
||||
|
||||
RT_TRACE(" Primitive count %u.\n", info->primitive_counts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -209,6 +251,8 @@ bool vkd3d_acceleration_structure_convert_inputs(const struct d3d12_device *devi
|
|||
}
|
||||
|
||||
build_info->pGeometries = info->geometries;
|
||||
|
||||
RT_TRACE("=====================\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -262,12 +306,18 @@ static void vkd3d_acceleration_structure_write_postbuild_info(
|
|||
type_index = VKD3D_QUERY_TYPE_INDEX_RT_COMPACTED_SIZE;
|
||||
stride = sizeof(uint64_t);
|
||||
}
|
||||
else if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_CURRENT_SIZE &&
|
||||
list->device->device_info.ray_tracing_maintenance1_features.rayTracingMaintenance1)
|
||||
{
|
||||
vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SIZE_KHR;
|
||||
type_index = VKD3D_QUERY_TYPE_INDEX_RT_CURRENT_SIZE;
|
||||
stride = sizeof(uint64_t);
|
||||
}
|
||||
else if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION)
|
||||
{
|
||||
vk_query_type = VK_QUERY_TYPE_ACCELERATION_STRUCTURE_SERIALIZATION_SIZE_KHR;
|
||||
type_index = VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE;
|
||||
stride = sizeof(uint64_t);
|
||||
FIXME("NumBottomLevelPointers will always return 0.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -298,9 +348,31 @@ static void vkd3d_acceleration_structure_write_postbuild_info(
|
|||
|
||||
if (desc->InfoType == D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_SERIALIZATION)
|
||||
{
|
||||
/* TODO: We'll need some way to store these values for later use and copy them here instead. */
|
||||
VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, vk_buffer, offset + sizeof(uint64_t),
|
||||
sizeof(uint64_t), 0));
|
||||
if (list->device->device_info.ray_tracing_maintenance1_features.rayTracingMaintenance1)
|
||||
{
|
||||
type_index = VKD3D_QUERY_TYPE_INDEX_RT_SERIALIZE_SIZE_BOTTOM_LEVEL_POINTERS;
|
||||
if (!d3d12_command_allocator_allocate_query_from_type_index(list->allocator,
|
||||
type_index, &vk_query_pool, &vk_query_index))
|
||||
{
|
||||
ERR("Failed to allocate query.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
d3d12_command_list_reset_query(list, vk_query_pool, vk_query_index);
|
||||
|
||||
VK_CALL(vkCmdWriteAccelerationStructuresPropertiesKHR(list->vk_command_buffer,
|
||||
1, &vk_acceleration_structure, vk_query_type, vk_query_pool, vk_query_index));
|
||||
VK_CALL(vkCmdCopyQueryPoolResults(list->vk_command_buffer,
|
||||
vk_query_pool, vk_query_index, 1,
|
||||
vk_buffer, offset + sizeof(uint64_t), stride,
|
||||
VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT));
|
||||
}
|
||||
else
|
||||
{
|
||||
FIXME("NumBottomLevelPointers will always return 0.\n");
|
||||
VK_CALL(vkCmdFillBuffer(list->vk_command_buffer, vk_buffer, offset + sizeof(uint64_t),
|
||||
sizeof(uint64_t), 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,655 @@
|
|||
/*
|
||||
* Copyright 2022 Hans-Kristian Arntzen for Valve Corporation
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
|
||||
#include "vkd3d_private.h"
|
||||
#include "vkd3d_debug.h"
|
||||
#include "vkd3d_common.h"
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/* Just allocate everything up front. This only consumes host memory anyways. */
|
||||
#define MAX_COMMAND_LISTS (32 * 1024)
|
||||
|
||||
/* Questionable on 32-bit, but we don't really care. */
|
||||
#define NV_ENCODE_CHECKPOINT(context, counter) ((void*) ((uintptr_t)(context) + (uintptr_t)MAX_COMMAND_LISTS * (counter)))
|
||||
#define NV_CHECKPOINT_CONTEXT(ptr) ((uint32_t)((uintptr_t)(ptr) % MAX_COMMAND_LISTS))
|
||||
#define NV_CHECKPOINT_COUNTER(ptr) ((uint32_t)((uintptr_t)(ptr) / MAX_COMMAND_LISTS))
|
||||
|
||||
static const char *vkd3d_breadcrumb_command_type_to_str(enum vkd3d_breadcrumb_command_type type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER:
|
||||
return "top_marker";
|
||||
case VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER:
|
||||
return "bottom_marker";
|
||||
case VKD3D_BREADCRUMB_COMMAND_SET_SHADER_HASH:
|
||||
return "set_shader_hash";
|
||||
case VKD3D_BREADCRUMB_COMMAND_DRAW:
|
||||
return "draw";
|
||||
case VKD3D_BREADCRUMB_COMMAND_DRAW_INDEXED:
|
||||
return "draw_indexed";
|
||||
case VKD3D_BREADCRUMB_COMMAND_DISPATCH:
|
||||
return "dispatch";
|
||||
case VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT:
|
||||
return "execute_indirect";
|
||||
case VKD3D_BREADCRUMB_COMMAND_EXECUTE_INDIRECT_TEMPLATE:
|
||||
return "execute_indirect_template";
|
||||
case VKD3D_BREADCRUMB_COMMAND_COPY:
|
||||
return "copy";
|
||||
case VKD3D_BREADCRUMB_COMMAND_RESOLVE:
|
||||
return "resolve";
|
||||
case VKD3D_BREADCRUMB_COMMAND_WBI:
|
||||
return "wbi";
|
||||
case VKD3D_BREADCRUMB_COMMAND_RESOLVE_QUERY:
|
||||
return "resolve_query";
|
||||
case VKD3D_BREADCRUMB_COMMAND_GATHER_VIRTUAL_QUERY:
|
||||
return "gather_virtual_query";
|
||||
case VKD3D_BREADCRUMB_COMMAND_BUILD_RTAS:
|
||||
return "build_rtas";
|
||||
case VKD3D_BREADCRUMB_COMMAND_COPY_RTAS:
|
||||
return "copy_rtas";
|
||||
case VKD3D_BREADCRUMB_COMMAND_EMIT_RTAS_POSTBUILD:
|
||||
return "emit_rtas_postbuild";
|
||||
case VKD3D_BREADCRUMB_COMMAND_TRACE_RAYS:
|
||||
return "trace_rays";
|
||||
case VKD3D_BREADCRUMB_COMMAND_BARRIER:
|
||||
return "barrier";
|
||||
case VKD3D_BREADCRUMB_COMMAND_AUX32:
|
||||
return "aux32";
|
||||
case VKD3D_BREADCRUMB_COMMAND_AUX64:
|
||||
return "aux64";
|
||||
case VKD3D_BREADCRUMB_COMMAND_VBO:
|
||||
return "vbo";
|
||||
case VKD3D_BREADCRUMB_COMMAND_IBO:
|
||||
return "ibo";
|
||||
case VKD3D_BREADCRUMB_COMMAND_ROOT_DESC:
|
||||
return "root_desc";
|
||||
case VKD3D_BREADCRUMB_COMMAND_ROOT_CONST:
|
||||
return "root_const";
|
||||
case VKD3D_BREADCRUMB_COMMAND_TAG:
|
||||
return "tag";
|
||||
|
||||
default:
|
||||
return "?";
|
||||
}
|
||||
}
|
||||
|
||||
HRESULT vkd3d_breadcrumb_tracer_init(struct vkd3d_breadcrumb_tracer *tracer, struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
D3D12_HEAP_PROPERTIES heap_properties;
|
||||
D3D12_RESOURCE_DESC1 resource_desc;
|
||||
VkMemoryPropertyFlags memory_props;
|
||||
HRESULT hr;
|
||||
int rc;
|
||||
|
||||
memset(tracer, 0, sizeof(*tracer));
|
||||
|
||||
if ((rc = pthread_mutex_init(&tracer->lock, NULL)))
|
||||
return hresult_from_errno(rc);
|
||||
|
||||
if (device->vk_info.AMD_buffer_marker)
|
||||
{
|
||||
INFO("Enabling AMD_buffer_marker breadcrumbs.\n");
|
||||
memset(&resource_desc, 0, sizeof(resource_desc));
|
||||
resource_desc.Width = MAX_COMMAND_LISTS * sizeof(struct vkd3d_breadcrumb_counter);
|
||||
resource_desc.Height = 1;
|
||||
resource_desc.DepthOrArraySize = 1;
|
||||
resource_desc.MipLevels = 1;
|
||||
resource_desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
resource_desc.SampleDesc.Count = 1;
|
||||
resource_desc.SampleDesc.Quality = 0;
|
||||
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
||||
resource_desc.Flags = D3D12_RESOURCE_FLAG_NONE;
|
||||
|
||||
if (FAILED(hr = vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
|
||||
&resource_desc, &tracer->host_buffer)))
|
||||
{
|
||||
goto err;
|
||||
}
|
||||
|
||||
memory_props = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
|
||||
|
||||
/* If device faults in the middle of execution we will never get the chance to flush device caches.
|
||||
* Make sure that breadcrumbs are always written directly out.
|
||||
* This is the primary usecase for the device coherent/uncached extension after all ...
|
||||
* Don't make this a hard requirement since buffer markers might be implicitly coherent on some
|
||||
* implementations (Turnip?). */
|
||||
if (device->device_info.device_coherent_memory_features_amd.deviceCoherentMemory)
|
||||
{
|
||||
memory_props |= VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
|
||||
VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
|
||||
}
|
||||
|
||||
if (FAILED(hr = vkd3d_allocate_buffer_memory(device, tracer->host_buffer,
|
||||
memory_props, &tracer->host_buffer_memory)))
|
||||
{
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (VK_CALL(vkMapMemory(device->vk_device, tracer->host_buffer_memory.vk_memory,
|
||||
0, VK_WHOLE_SIZE,
|
||||
0, (void**)&tracer->mapped)) != VK_SUCCESS)
|
||||
{
|
||||
hr = E_OUTOFMEMORY;
|
||||
goto err;
|
||||
}
|
||||
|
||||
memset(tracer->mapped, 0, sizeof(*tracer->mapped) * MAX_COMMAND_LISTS);
|
||||
}
|
||||
else if (device->vk_info.NV_device_diagnostic_checkpoints)
|
||||
{
|
||||
INFO("Enabling NV_device_diagnostics_checkpoints breadcrumbs.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
ERR("Breadcrumbs require support for either AMD_buffer_marker or NV_device_diagnostics_checkpoints.\n");
|
||||
hr = E_FAIL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
tracer->trace_contexts = vkd3d_calloc(MAX_COMMAND_LISTS, sizeof(*tracer->trace_contexts));
|
||||
tracer->trace_context_index = 0;
|
||||
|
||||
return S_OK;
|
||||
|
||||
err:
|
||||
vkd3d_breadcrumb_tracer_cleanup(tracer, device);
|
||||
return hr;
|
||||
}
|
||||
|
||||
void vkd3d_breadcrumb_tracer_cleanup(struct vkd3d_breadcrumb_tracer *tracer, struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
|
||||
if (device->vk_info.AMD_buffer_marker)
|
||||
{
|
||||
VK_CALL(vkDestroyBuffer(device->vk_device, tracer->host_buffer, NULL));
|
||||
vkd3d_free_device_memory(device, &tracer->host_buffer_memory);
|
||||
}
|
||||
|
||||
vkd3d_free(tracer->trace_contexts);
|
||||
pthread_mutex_destroy(&tracer->lock);
|
||||
}
|
||||
|
||||
unsigned int vkd3d_breadcrumb_tracer_allocate_command_list(struct vkd3d_breadcrumb_tracer *tracer,
|
||||
struct d3d12_command_list *list, struct d3d12_command_allocator *allocator)
|
||||
{
|
||||
unsigned int index = UINT32_MAX;
|
||||
unsigned int iteration_count;
|
||||
int rc;
|
||||
|
||||
if ((rc = pthread_mutex_lock(&tracer->lock)))
|
||||
{
|
||||
ERR("Failed to lock mutex, rc %d.\n", rc);
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
/* Since this is a ring, this is extremely likely to succeed on first attempt. */
|
||||
for (iteration_count = 0; iteration_count < MAX_COMMAND_LISTS; iteration_count++)
|
||||
{
|
||||
tracer->trace_context_index = (tracer->trace_context_index + 1) % MAX_COMMAND_LISTS;
|
||||
if (!tracer->trace_contexts[tracer->trace_context_index].locked)
|
||||
{
|
||||
tracer->trace_contexts[tracer->trace_context_index].locked = 1;
|
||||
index = tracer->trace_context_index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
pthread_mutex_unlock(&tracer->lock);
|
||||
|
||||
if (index == UINT32_MAX)
|
||||
{
|
||||
ERR("Failed to allocate new index for command list.\n");
|
||||
return index;
|
||||
}
|
||||
|
||||
TRACE("Allocating breadcrumb context %u for list %p.\n", index, list);
|
||||
list->breadcrumb_context_index = index;
|
||||
|
||||
/* Need to clear this on a fresh allocation rather than release, since we can end up releasing a command list
|
||||
* before we observe the device lost. */
|
||||
tracer->trace_contexts[index].command_count = 0;
|
||||
tracer->trace_contexts[index].counter = 0;
|
||||
|
||||
if (list->device->vk_info.AMD_buffer_marker)
|
||||
memset(&tracer->mapped[index], 0, sizeof(tracer->mapped[index]));
|
||||
|
||||
vkd3d_array_reserve((void**)&allocator->breadcrumb_context_indices, &allocator->breadcrumb_context_index_size,
|
||||
allocator->breadcrumb_context_index_count + 1,
|
||||
sizeof(*allocator->breadcrumb_context_indices));
|
||||
allocator->breadcrumb_context_indices[allocator->breadcrumb_context_index_count++] = index;
|
||||
return index;
|
||||
}
|
||||
|
||||
/* Command allocator keeps a list of allocated breadcrumb command lists. */
|
||||
void vkd3d_breadcrumb_tracer_release_command_lists(struct vkd3d_breadcrumb_tracer *tracer,
|
||||
const unsigned int *indices, size_t indices_count)
|
||||
{
|
||||
unsigned int index;
|
||||
size_t i;
|
||||
int rc;
|
||||
|
||||
if (!indices_count)
|
||||
return;
|
||||
|
||||
if ((rc = pthread_mutex_lock(&tracer->lock)))
|
||||
{
|
||||
ERR("Failed to lock mutex, rc %d.\n", rc);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < indices_count; i++)
|
||||
{
|
||||
index = indices[i];
|
||||
if (index != UINT32_MAX)
|
||||
tracer->trace_contexts[index].locked = 0;
|
||||
TRACE("Releasing breadcrumb context %u.\n", index);
|
||||
}
|
||||
pthread_mutex_unlock(&tracer->lock);
|
||||
}
|
||||
|
||||
static void vkd3d_breadcrumb_tracer_report_command_list(
|
||||
const struct vkd3d_breadcrumb_command_list_trace_context *context,
|
||||
uint32_t begin_marker,
|
||||
uint32_t end_marker)
|
||||
{
|
||||
const struct vkd3d_breadcrumb_command *cmd;
|
||||
bool observed_begin_cmd = false;
|
||||
bool observed_end_cmd = false;
|
||||
unsigned int i;
|
||||
|
||||
if (end_marker == 0)
|
||||
{
|
||||
ERR(" ===== Potential crash region BEGIN (make sure RADV_DEBUG=syncshaders is used for maximum accuracy) =====\n");
|
||||
observed_begin_cmd = true;
|
||||
}
|
||||
|
||||
/* We can assume that possible culprit commands lie between the end_marker
|
||||
* and top_marker. */
|
||||
for (i = 0; i < context->command_count; i++)
|
||||
{
|
||||
cmd = &context->commands[i];
|
||||
|
||||
/* If there is a command which sets TOP_OF_PIPE, but we haven't observed the marker yet,
|
||||
* the command processor hasn't gotten there yet (most likely ...), so that should be the
|
||||
* natural end-point. */
|
||||
if (!observed_end_cmd &&
|
||||
cmd->type == VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER &&
|
||||
cmd->count > begin_marker)
|
||||
{
|
||||
observed_end_cmd = true;
|
||||
ERR(" ===== Potential crash region END =====\n");
|
||||
}
|
||||
|
||||
if (cmd->type == VKD3D_BREADCRUMB_COMMAND_AUX32)
|
||||
{
|
||||
ERR(" Set arg: %u (#%x)\n", cmd->word_32bit, cmd->word_32bit);
|
||||
}
|
||||
else if (cmd->type == VKD3D_BREADCRUMB_COMMAND_AUX64)
|
||||
{
|
||||
ERR(" Set arg: %"PRIu64" (#%"PRIx64")\n", cmd->word_64bit, cmd->word_64bit);
|
||||
}
|
||||
else if (cmd->type == VKD3D_BREADCRUMB_COMMAND_TAG)
|
||||
{
|
||||
ERR(" Tag: %s\n", cmd->tag);
|
||||
}
|
||||
else
|
||||
{
|
||||
ERR(" Command: %s\n", vkd3d_breadcrumb_command_type_to_str(cmd->type));
|
||||
|
||||
switch (cmd->type)
|
||||
{
|
||||
case VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER:
|
||||
case VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER:
|
||||
ERR(" marker: %u\n", cmd->count);
|
||||
break;
|
||||
|
||||
case VKD3D_BREADCRUMB_COMMAND_SET_SHADER_HASH:
|
||||
ERR(" hash: %016"PRIx64", stage: %x\n", cmd->shader.hash, cmd->shader.stage);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* We have proved we observed this command is complete.
|
||||
* Some command after this signal is at fault. */
|
||||
if (!observed_begin_cmd &&
|
||||
cmd->type == VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER &&
|
||||
cmd->count == end_marker)
|
||||
{
|
||||
observed_begin_cmd = true;
|
||||
ERR(" ===== Potential crash region BEGIN (make sure RADV_DEBUG=syncshaders is used for maximum accuracy) =====\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vkd3d_breadcrumb_tracer_report_command_list_amd(struct vkd3d_breadcrumb_tracer *tracer,
|
||||
unsigned int context_index)
|
||||
{
|
||||
const struct vkd3d_breadcrumb_command_list_trace_context *context;
|
||||
uint32_t begin_marker;
|
||||
uint32_t end_marker;
|
||||
|
||||
context = &tracer->trace_contexts[context_index];
|
||||
|
||||
/* Unused, cannot be the cause. */
|
||||
if (context->counter == 0)
|
||||
return;
|
||||
|
||||
begin_marker = tracer->mapped[context_index].begin_marker;
|
||||
end_marker = tracer->mapped[context_index].end_marker;
|
||||
|
||||
/* Never executed, cannot be the cause. */
|
||||
if (begin_marker == 0 && end_marker == 0)
|
||||
return;
|
||||
|
||||
/* Successfully retired, cannot be the cause. */
|
||||
if (begin_marker == UINT32_MAX && end_marker == UINT32_MAX)
|
||||
return;
|
||||
|
||||
/* Edge case if we re-submitted a command list,
|
||||
* but it ends up crashing before we hit any BOTTOM_OF_PIPE
|
||||
* marker. Normalize the inputs such that end_marker <= begin_marker. */
|
||||
if (begin_marker > 0 && end_marker == UINT32_MAX)
|
||||
end_marker = 0;
|
||||
|
||||
ERR("Found pending command list context %u in executable state, TOP_OF_PIPE marker %u, BOTTOM_OF_PIPE marker %u.\n",
|
||||
context_index, begin_marker, end_marker);
|
||||
vkd3d_breadcrumb_tracer_report_command_list(context, begin_marker, end_marker);
|
||||
ERR("Done analyzing command list.\n");
|
||||
}
|
||||
|
||||
static void vkd3d_breadcrumb_tracer_report_queue_nv(struct vkd3d_breadcrumb_tracer *tracer,
|
||||
struct d3d12_device *device,
|
||||
VkQueue vk_queue)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
uint32_t begin_marker, end_marker;
|
||||
uint32_t checkpoint_context_index;
|
||||
VkCheckpointDataNV *checkpoints;
|
||||
uint32_t checkpoint_marker;
|
||||
uint32_t checkpoint_count;
|
||||
uint32_t context_index;
|
||||
uint32_t i;
|
||||
|
||||
VK_CALL(vkGetQueueCheckpointDataNV(vk_queue, &checkpoint_count, NULL));
|
||||
if (checkpoint_count == 0)
|
||||
return;
|
||||
|
||||
checkpoints = vkd3d_calloc(checkpoint_count, sizeof(VkCheckpointDataNV));
|
||||
for (i = 0; i < checkpoint_count; i++)
|
||||
checkpoints[i].sType = VK_STRUCTURE_TYPE_CHECKPOINT_DATA_NV;
|
||||
VK_CALL(vkGetQueueCheckpointDataNV(vk_queue, &checkpoint_count, checkpoints));
|
||||
|
||||
context_index = UINT32_MAX;
|
||||
begin_marker = 0;
|
||||
end_marker = 0;
|
||||
|
||||
for (i = 0; i < checkpoint_count; i++)
|
||||
{
|
||||
checkpoint_context_index = NV_CHECKPOINT_CONTEXT(checkpoints[i].pCheckpointMarker);
|
||||
checkpoint_marker = NV_CHECKPOINT_COUNTER(checkpoints[i].pCheckpointMarker);
|
||||
|
||||
if (context_index != checkpoint_context_index && context_index != UINT32_MAX)
|
||||
{
|
||||
FIXME("Markers have different contexts. Execution is likely split across multiple command buffers?\n");
|
||||
context_index = UINT32_MAX;
|
||||
break;
|
||||
}
|
||||
|
||||
context_index = checkpoint_context_index;
|
||||
|
||||
if (checkpoints[i].stage == VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT && checkpoint_marker > begin_marker)
|
||||
{
|
||||
/* We want to find the latest TOP_OF_PIPE_BIT. Then we prove that command processor got to that point. */
|
||||
begin_marker = checkpoint_marker;
|
||||
}
|
||||
else if (checkpoints[i].stage == VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT && checkpoint_marker > end_marker)
|
||||
{
|
||||
/* We want to find the latest BOTTOM_OF_PIPE_BIT. Then we prove that we got that far. */
|
||||
end_marker = checkpoint_marker;
|
||||
}
|
||||
else if (checkpoints[i].stage != VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT &&
|
||||
checkpoints[i].stage != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
|
||||
{
|
||||
FIXME("Unexpected checkpoint pipeline stage. #%x\n", checkpoints[i].stage);
|
||||
context_index = UINT32_MAX;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (context_index != UINT32_MAX && begin_marker != 0 && end_marker != 0 && end_marker != UINT32_MAX)
|
||||
{
|
||||
ERR("Found pending command list context %u in executable state, TOP_OF_PIPE marker %u, BOTTOM_OF_PIPE marker %u.\n",
|
||||
context_index, begin_marker, end_marker);
|
||||
vkd3d_breadcrumb_tracer_report_command_list(&tracer->trace_contexts[context_index], begin_marker, end_marker);
|
||||
ERR("Done analyzing command list.\n");
|
||||
}
|
||||
|
||||
vkd3d_free(checkpoints);
|
||||
}
|
||||
|
||||
void vkd3d_breadcrumb_tracer_report_device_lost(struct vkd3d_breadcrumb_tracer *tracer,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
struct vkd3d_queue_family_info *queue_family_info;
|
||||
VkQueue vk_queue;
|
||||
unsigned int i;
|
||||
|
||||
ERR("Device lost observed, analyzing breadcrumbs ...\n");
|
||||
|
||||
if (device->vk_info.AMD_buffer_marker)
|
||||
{
|
||||
/* AMD path, buffer marker. */
|
||||
for (i = 0; i < MAX_COMMAND_LISTS; i++)
|
||||
vkd3d_breadcrumb_tracer_report_command_list_amd(tracer, i);
|
||||
}
|
||||
else if (device->vk_info.NV_device_diagnostic_checkpoints)
|
||||
{
|
||||
/* vkGetQueueCheckpointDataNV does not require us to synchronize access to the queue. */
|
||||
queue_family_info = d3d12_device_get_vkd3d_queue_family(device, D3D12_COMMAND_LIST_TYPE_DIRECT);
|
||||
for (i = 0; i < queue_family_info->queue_count; i++)
|
||||
{
|
||||
vk_queue = queue_family_info->queues[i]->vk_queue;
|
||||
vkd3d_breadcrumb_tracer_report_queue_nv(tracer, device, vk_queue);
|
||||
}
|
||||
|
||||
queue_family_info = d3d12_device_get_vkd3d_queue_family(device, D3D12_COMMAND_LIST_TYPE_COMPUTE);
|
||||
for (i = 0; i < queue_family_info->queue_count; i++)
|
||||
{
|
||||
vk_queue = queue_family_info->queues[i]->vk_queue;
|
||||
vkd3d_breadcrumb_tracer_report_queue_nv(tracer, device, vk_queue);
|
||||
}
|
||||
|
||||
queue_family_info = d3d12_device_get_vkd3d_queue_family(device, D3D12_COMMAND_LIST_TYPE_COPY);
|
||||
for (i = 0; i < queue_family_info->queue_count; i++)
|
||||
{
|
||||
vk_queue = queue_family_info->queues[i]->vk_queue;
|
||||
vkd3d_breadcrumb_tracer_report_queue_nv(tracer, device, vk_queue);
|
||||
}
|
||||
}
|
||||
|
||||
ERR("Done analyzing breadcrumbs ...\n");
|
||||
}
|
||||
|
||||
void vkd3d_breadcrumb_tracer_begin_command_list(struct d3d12_command_list *list)
|
||||
{
|
||||
struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
||||
struct vkd3d_breadcrumb_command_list_trace_context *trace;
|
||||
unsigned int context = list->breadcrumb_context_index;
|
||||
struct vkd3d_breadcrumb_command cmd;
|
||||
|
||||
if (context == UINT32_MAX)
|
||||
return;
|
||||
|
||||
trace = &breadcrumb_tracer->trace_contexts[context];
|
||||
trace->counter++;
|
||||
|
||||
cmd.count = trace->counter;
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
|
||||
if (list->device->vk_info.AMD_buffer_marker)
|
||||
{
|
||||
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
breadcrumb_tracer->host_buffer,
|
||||
context * sizeof(struct vkd3d_breadcrumb_counter) +
|
||||
offsetof(struct vkd3d_breadcrumb_counter, begin_marker),
|
||||
trace->counter));
|
||||
}
|
||||
else if (list->device->vk_info.NV_device_diagnostic_checkpoints)
|
||||
{
|
||||
/* A checkpoint is implicitly a top and bottom marker. */
|
||||
cmd.count = trace->counter;
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
|
||||
VK_CALL(vkCmdSetCheckpointNV(list->vk_command_buffer, NV_ENCODE_CHECKPOINT(context, trace->counter)));
|
||||
}
|
||||
}
|
||||
|
||||
void vkd3d_breadcrumb_tracer_add_command(struct d3d12_command_list *list,
|
||||
const struct vkd3d_breadcrumb_command *command)
|
||||
{
|
||||
struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
|
||||
struct vkd3d_breadcrumb_command_list_trace_context *trace;
|
||||
unsigned int context = list->breadcrumb_context_index;
|
||||
|
||||
if (context == UINT32_MAX)
|
||||
return;
|
||||
|
||||
trace = &breadcrumb_tracer->trace_contexts[context];
|
||||
|
||||
TRACE("Adding command (%s) to context %u.\n",
|
||||
vkd3d_breadcrumb_command_type_to_str(command->type), context);
|
||||
|
||||
vkd3d_array_reserve((void**)&trace->commands, &trace->command_size,
|
||||
trace->command_count + 1, sizeof(*trace->commands));
|
||||
trace->commands[trace->command_count++] = *command;
|
||||
}
|
||||
|
||||
void vkd3d_breadcrumb_tracer_signal(struct d3d12_command_list *list)
|
||||
{
|
||||
struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
||||
struct vkd3d_breadcrumb_command_list_trace_context *trace;
|
||||
unsigned int context = list->breadcrumb_context_index;
|
||||
struct vkd3d_breadcrumb_command cmd;
|
||||
|
||||
if (context == UINT32_MAX)
|
||||
return;
|
||||
|
||||
trace = &breadcrumb_tracer->trace_contexts[context];
|
||||
|
||||
if (list->device->vk_info.AMD_buffer_marker)
|
||||
{
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
|
||||
cmd.count = trace->counter;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
TRACE("Breadcrumb signal bottom-of-pipe context %u -> %u\n", context, cmd.count);
|
||||
|
||||
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
|
||||
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||
breadcrumb_tracer->host_buffer,
|
||||
context * sizeof(struct vkd3d_breadcrumb_counter) +
|
||||
offsetof(struct vkd3d_breadcrumb_counter, end_marker),
|
||||
trace->counter));
|
||||
|
||||
trace->counter++;
|
||||
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
|
||||
cmd.count = trace->counter;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
TRACE("Breadcrumb signal top-of-pipe context %u -> %u\n", context, cmd.count);
|
||||
|
||||
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
breadcrumb_tracer->host_buffer,
|
||||
context * sizeof(struct vkd3d_breadcrumb_counter) +
|
||||
offsetof(struct vkd3d_breadcrumb_counter, begin_marker),
|
||||
trace->counter));
|
||||
}
|
||||
else if (list->device->vk_info.NV_device_diagnostic_checkpoints)
|
||||
{
|
||||
trace->counter++;
|
||||
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
|
||||
cmd.count = trace->counter;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
TRACE("Breadcrumb signal top-of-pipe context %u -> %u\n", context, cmd.count);
|
||||
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
|
||||
cmd.count = trace->counter;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
TRACE("Breadcrumb signal bottom-of-pipe context %u -> %u\n", context, cmd.count);
|
||||
|
||||
VK_CALL(vkCmdSetCheckpointNV(list->vk_command_buffer, NV_ENCODE_CHECKPOINT(context, trace->counter)));
|
||||
}
|
||||
}
|
||||
|
||||
void vkd3d_breadcrumb_tracer_end_command_list(struct d3d12_command_list *list)
|
||||
{
|
||||
struct vkd3d_breadcrumb_tracer *breadcrumb_tracer = &list->device->breadcrumb_tracer;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
||||
struct vkd3d_breadcrumb_command_list_trace_context *trace;
|
||||
unsigned int context = list->breadcrumb_context_index;
|
||||
struct vkd3d_breadcrumb_command cmd;
|
||||
|
||||
if (context == UINT32_MAX)
|
||||
return;
|
||||
|
||||
trace = &breadcrumb_tracer->trace_contexts[context];
|
||||
trace->counter = UINT32_MAX;
|
||||
|
||||
if (list->device->vk_info.AMD_buffer_marker)
|
||||
{
|
||||
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
|
||||
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||
breadcrumb_tracer->host_buffer,
|
||||
context * sizeof(struct vkd3d_breadcrumb_counter) +
|
||||
offsetof(struct vkd3d_breadcrumb_counter, begin_marker),
|
||||
trace->counter));
|
||||
|
||||
VK_CALL(vkCmdWriteBufferMarkerAMD(list->vk_command_buffer,
|
||||
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||
breadcrumb_tracer->host_buffer,
|
||||
context * sizeof(struct vkd3d_breadcrumb_counter) +
|
||||
offsetof(struct vkd3d_breadcrumb_counter, end_marker),
|
||||
trace->counter));
|
||||
}
|
||||
else if (list->device->vk_info.NV_device_diagnostic_checkpoints)
|
||||
{
|
||||
VK_CALL(vkCmdSetCheckpointNV(list->vk_command_buffer, NV_ENCODE_CHECKPOINT(context, trace->counter)));
|
||||
}
|
||||
|
||||
cmd.count = trace->counter;
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_TOP_MARKER;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
cmd.type = VKD3D_BREADCRUMB_COMMAND_SET_BOTTOM_MARKER;
|
||||
vkd3d_breadcrumb_tracer_add_command(list, &cmd);
|
||||
}
|
1632
libs/vkd3d/cache.c
1632
libs/vkd3d/cache.c
File diff suppressed because it is too large
Load Diff
4040
libs/vkd3d/command.c
4040
libs/vkd3d/command.c
File diff suppressed because it is too large
Load Diff
|
@ -21,6 +21,7 @@
|
|||
#include "vkd3d_private.h"
|
||||
#include "vkd3d_debug.h"
|
||||
#include "vkd3d_common.h"
|
||||
#include "vkd3d_platform.h"
|
||||
#include <stdio.h>
|
||||
|
||||
void vkd3d_shader_debug_ring_init_spec_constant(struct d3d12_device *device,
|
||||
|
@ -53,22 +54,199 @@ void vkd3d_shader_debug_ring_init_spec_constant(struct d3d12_device *device,
|
|||
info->map_entries[3].size = sizeof(uint32_t);
|
||||
}
|
||||
|
||||
#define READ_RING_WORD(off) ring->mapped_ring[(off) & ((ring->ring_size / sizeof(uint32_t)) - 1)]
|
||||
#define READ_RING_WORD_ACQUIRE(off) \
|
||||
vkd3d_atomic_uint32_load_explicit(&ring->mapped_ring[(off) & ((ring->ring_size / sizeof(uint32_t)) - 1)], \
|
||||
vkd3d_memory_order_acquire)
|
||||
#define DEBUG_CHANNEL_WORD_COOKIE 0xdeadca70u
|
||||
#define DEBUG_CHANNEL_WORD_MASK 0xfffffff0u
|
||||
|
||||
static const char *vkd3d_patch_command_token_str(enum vkd3d_patch_command_token token)
|
||||
{
|
||||
switch (token)
|
||||
{
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32: return "RootConst";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO: return "IBO VA LO";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI: return "IBO VA HI";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_SIZE: return "IBO Size";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_FORMAT: return "IBO Type";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO: return "VBO VA LO";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI: return "VBO VA HI";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_SIZE: return "VBO Size";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_STRIDE: return "VBO Stride";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO: return "ROOT VA LO";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI: return "ROOT VA HI";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_COUNT: return "Vertex Count";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INDEX_COUNT: return "Index Count";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_INSTANCE_COUNT: return "Instance Count";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INDEX: return "First Index";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_VERTEX: return "First Vertex";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_FIRST_INSTANCE: return "First Instance";
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VERTEX_OFFSET: return "Vertex Offset";
|
||||
default: return "???";
|
||||
}
|
||||
}
|
||||
|
||||
static bool vkd3d_patch_command_token_is_hex(enum vkd3d_patch_command_token token)
|
||||
{
|
||||
switch (token)
|
||||
{
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_LO:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_IBO_VA_HI:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_LO:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_VBO_VA_HI:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_LO:
|
||||
case VKD3D_PATCH_COMMAND_TOKEN_COPY_ROOT_VA_HI:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool vkd3d_shader_debug_ring_print_message(struct vkd3d_shader_debug_ring *ring,
|
||||
uint32_t word_offset, uint32_t message_word_count)
|
||||
{
|
||||
uint32_t i, debug_instance, debug_thread_id[3], fmt;
|
||||
char message_buffer[4096];
|
||||
uint64_t shader_hash;
|
||||
size_t len, avail;
|
||||
|
||||
if (message_word_count < 8)
|
||||
{
|
||||
ERR("Message word count %u is invalid.\n", message_word_count);
|
||||
return false;
|
||||
}
|
||||
|
||||
shader_hash = (uint64_t)READ_RING_WORD(word_offset + 1) | ((uint64_t)READ_RING_WORD(word_offset + 2) << 32);
|
||||
debug_instance = READ_RING_WORD(word_offset + 3);
|
||||
for (i = 0; i < 3; i++)
|
||||
debug_thread_id[i] = READ_RING_WORD(word_offset + 4 + i);
|
||||
fmt = READ_RING_WORD(word_offset + 7);
|
||||
|
||||
word_offset += 8;
|
||||
message_word_count -= 8;
|
||||
|
||||
if (shader_hash == 0)
|
||||
{
|
||||
/* We got this from our internal debug shaders. Pretty-print.
|
||||
* Make sure the log is sortable for easier debug.
|
||||
* TODO: Might consider a callback system that listeners from different subsystems can listen to and print their own messages,
|
||||
* but that is overengineering at this time ... */
|
||||
snprintf(message_buffer, sizeof(message_buffer), "ExecuteIndirect: GlobalCommandIndex %010u, Debug tag %010u, DrawID %04u (ThreadID %04u): ",
|
||||
debug_instance, debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
|
||||
|
||||
if (message_word_count == 2)
|
||||
{
|
||||
len = strlen(message_buffer);
|
||||
avail = sizeof(message_buffer) - len;
|
||||
snprintf(message_buffer + len, avail, "DrawCount %u, MaxDrawCount %u",
|
||||
READ_RING_WORD(word_offset + 0),
|
||||
READ_RING_WORD(word_offset + 1));
|
||||
}
|
||||
else if (message_word_count == 4)
|
||||
{
|
||||
union { uint32_t u32; float f32; int32_t s32; } value;
|
||||
enum vkd3d_patch_command_token token;
|
||||
uint32_t dst_offset;
|
||||
uint32_t src_offset;
|
||||
|
||||
len = strlen(message_buffer);
|
||||
avail = sizeof(message_buffer) - len;
|
||||
|
||||
token = READ_RING_WORD(word_offset + 0);
|
||||
dst_offset = READ_RING_WORD(word_offset + 1);
|
||||
src_offset = READ_RING_WORD(word_offset + 2);
|
||||
value.u32 = READ_RING_WORD(word_offset + 3);
|
||||
|
||||
if (vkd3d_patch_command_token_is_hex(token))
|
||||
{
|
||||
snprintf(message_buffer + len, avail, "%s <- #%08x",
|
||||
vkd3d_patch_command_token_str(token), value.u32);
|
||||
}
|
||||
else if (token == VKD3D_PATCH_COMMAND_TOKEN_COPY_CONST_U32)
|
||||
{
|
||||
snprintf(message_buffer + len, avail, "%s <- {hex #%08x, s32 %d, f32 %f}",
|
||||
vkd3d_patch_command_token_str(token), value.u32, value.s32, value.f32);
|
||||
}
|
||||
else
|
||||
{
|
||||
snprintf(message_buffer + len, avail, "%s <- %d",
|
||||
vkd3d_patch_command_token_str(token), value.s32);
|
||||
}
|
||||
|
||||
len = strlen(message_buffer);
|
||||
avail = sizeof(message_buffer) - len;
|
||||
snprintf(message_buffer + len, avail, " (dst offset %u, src offset %u)", dst_offset, src_offset);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
snprintf(message_buffer, sizeof(message_buffer), "Shader: %"PRIx64": Instance %010u, ID (%u, %u, %u):",
|
||||
shader_hash, debug_instance,
|
||||
debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
|
||||
|
||||
for (i = 0; i < message_word_count; i++)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f32;
|
||||
uint32_t u32;
|
||||
int32_t i32;
|
||||
} u;
|
||||
const char *delim;
|
||||
u.u32 = READ_RING_WORD(word_offset + i);
|
||||
|
||||
len = strlen(message_buffer);
|
||||
if (len + 1 >= sizeof(message_buffer))
|
||||
break;
|
||||
avail = sizeof(message_buffer) - len;
|
||||
|
||||
delim = i == 0 ? " " : ", ";
|
||||
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_HEX 0u
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_I32 1u
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_F32 2u
|
||||
switch ((fmt >> (2u * i)) & 3u)
|
||||
{
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_HEX:
|
||||
snprintf(message_buffer + len, avail, "%s#%x", delim, u.u32);
|
||||
break;
|
||||
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_I32:
|
||||
snprintf(message_buffer + len, avail, "%s%d", delim, u.i32);
|
||||
break;
|
||||
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_F32:
|
||||
snprintf(message_buffer + len, avail, "%s%f", delim, u.f32);
|
||||
break;
|
||||
|
||||
default:
|
||||
snprintf(message_buffer + len, avail, "%s????", delim);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
INFO("%s\n", message_buffer);
|
||||
return true;
|
||||
}
|
||||
|
||||
void *vkd3d_shader_debug_ring_thread_main(void *arg)
|
||||
{
|
||||
uint32_t last_counter, new_counter, count, i, j, message_word_count, debug_instance, debug_thread_id[3], fmt;
|
||||
uint32_t last_counter, new_counter, count, i, cookie_word_count;
|
||||
volatile const uint32_t *ring_counter; /* Atomic updated by the GPU. */
|
||||
struct vkd3d_shader_debug_ring *ring;
|
||||
struct d3d12_device *device = arg;
|
||||
const uint32_t *ring_counter;
|
||||
const uint32_t *ring_base;
|
||||
char message_buffer[4096];
|
||||
bool is_active = true;
|
||||
uint64_t shader_hash;
|
||||
uint32_t *ring_base;
|
||||
uint32_t word_count;
|
||||
size_t ring_mask;
|
||||
|
||||
ring = &device->debug_ring;
|
||||
ring_mask = ring->ring_size - 1;
|
||||
ring_counter = ring->mapped;
|
||||
ring_base = ring_counter + (ring->ring_offset / sizeof(uint32_t));
|
||||
ring_mask = (ring->ring_size / sizeof(uint32_t)) - 1;
|
||||
ring_counter = ring->mapped_control_block;
|
||||
ring_base = ring->mapped_ring;
|
||||
last_counter = 0;
|
||||
|
||||
vkd3d_set_thread_name("debug-ring");
|
||||
|
@ -82,88 +260,93 @@ void *vkd3d_shader_debug_ring_thread_main(void *arg)
|
|||
pthread_mutex_unlock(&ring->ring_lock);
|
||||
|
||||
new_counter = *ring_counter;
|
||||
|
||||
if (last_counter != new_counter)
|
||||
{
|
||||
count = (new_counter - last_counter) & ring_mask;
|
||||
|
||||
/* Assume that each iteration can safely use 1/4th of the buffer to avoid WAR hazards. */
|
||||
if ((new_counter - last_counter) > (ring->ring_size / 16))
|
||||
if (count > (ring->ring_size / 16))
|
||||
{
|
||||
ERR("Debug ring is probably too small (%u new words this iteration), increase size to avoid risk of dropping messages.\n",
|
||||
new_counter - last_counter);
|
||||
count);
|
||||
}
|
||||
|
||||
for (i = 0; i < count; )
|
||||
{
|
||||
#define READ_RING_WORD(off) ring_base[((off) + i + last_counter) & ring_mask]
|
||||
message_word_count = READ_RING_WORD(0);
|
||||
if (i + message_word_count > count)
|
||||
break;
|
||||
if (message_word_count < 8 || message_word_count > 16 + 8)
|
||||
break;
|
||||
/* The debug ring shader has "release" semantics for the word count write,
|
||||
* so just make sure the reads don't get reordered here. */
|
||||
cookie_word_count = READ_RING_WORD_ACQUIRE(last_counter + i);
|
||||
word_count = cookie_word_count & ~DEBUG_CHANNEL_WORD_MASK;
|
||||
|
||||
shader_hash = (uint64_t)READ_RING_WORD(1) | ((uint64_t)READ_RING_WORD(2) << 32);
|
||||
debug_instance = READ_RING_WORD(3);
|
||||
for (j = 0; j < 3; j++)
|
||||
debug_thread_id[j] = READ_RING_WORD(4 + j);
|
||||
fmt = READ_RING_WORD(7);
|
||||
|
||||
snprintf(message_buffer, sizeof(message_buffer), "Shader: %"PRIx64": Instance %u, ID (%u, %u, %u):",
|
||||
shader_hash, debug_instance,
|
||||
debug_thread_id[0], debug_thread_id[1], debug_thread_id[2]);
|
||||
|
||||
i += 8;
|
||||
message_word_count -= 8;
|
||||
|
||||
for (j = 0; j < message_word_count; j++)
|
||||
if (cookie_word_count == 0)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f32;
|
||||
uint32_t u32;
|
||||
int32_t i32;
|
||||
} u;
|
||||
const char *delim;
|
||||
size_t len, avail;
|
||||
u.u32 = READ_RING_WORD(j);
|
||||
|
||||
len = strlen(message_buffer);
|
||||
if (len + 1 >= sizeof(message_buffer))
|
||||
break;
|
||||
avail = sizeof(message_buffer) - len;
|
||||
|
||||
delim = j == 0 ? " " : ", ";
|
||||
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_HEX 0u
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_I32 1u
|
||||
#define VKD3D_DEBUG_CHANNEL_FMT_F32 2u
|
||||
switch ((fmt >> (2u * j)) & 3u)
|
||||
{
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_HEX:
|
||||
snprintf(message_buffer + len, avail, "%s#%x", delim, u.u32);
|
||||
break;
|
||||
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_I32:
|
||||
snprintf(message_buffer + len, avail, "%s%d", delim, u.i32);
|
||||
break;
|
||||
|
||||
case VKD3D_DEBUG_CHANNEL_FMT_F32:
|
||||
snprintf(message_buffer + len, avail, "%s%f", delim, u.f32);
|
||||
break;
|
||||
|
||||
default:
|
||||
snprintf(message_buffer + len, avail, "%s????", delim);
|
||||
break;
|
||||
}
|
||||
ERR("Message was allocated, but write did not complete. last_counter = %u, rewrite new_counter = %u -> %u\n",
|
||||
last_counter, new_counter, last_counter + i);
|
||||
/* Rewind the counter, and try again later. */
|
||||
new_counter = last_counter + i;
|
||||
break;
|
||||
}
|
||||
|
||||
INFO("%s\n", message_buffer);
|
||||
/* If something is written here, it must be a cookie. */
|
||||
if ((cookie_word_count & DEBUG_CHANNEL_WORD_MASK) != DEBUG_CHANNEL_WORD_COOKIE)
|
||||
{
|
||||
ERR("Invalid message work cookie detected, 0x%x.\n", cookie_word_count);
|
||||
break;
|
||||
}
|
||||
|
||||
#undef READ_RING_WORD
|
||||
i += message_word_count;
|
||||
if (i + word_count > count)
|
||||
{
|
||||
ERR("Message word count %u is out of bounds (i = %u, count = %u).\n",
|
||||
word_count, i, count);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!vkd3d_shader_debug_ring_print_message(ring, last_counter + i, word_count))
|
||||
break;
|
||||
|
||||
i += word_count;
|
||||
}
|
||||
}
|
||||
last_counter = new_counter;
|
||||
|
||||
/* Make sure to clear out any messages we read so that when the ring gets around to
|
||||
* this point again, we can detect unwritten memory.
|
||||
* This relies on having a ring that is large enough, but in practice, if we just make the ring
|
||||
* large enough, there is nothing to worry about. */
|
||||
while (last_counter != new_counter)
|
||||
{
|
||||
ring_base[last_counter & ring_mask] = 0;
|
||||
last_counter++;
|
||||
}
|
||||
}
|
||||
|
||||
if (ring->device_lost)
|
||||
{
|
||||
INFO("Device lost detected, attempting to fish for clues.\n");
|
||||
new_counter = *ring_counter;
|
||||
if (last_counter != new_counter)
|
||||
{
|
||||
count = (new_counter - last_counter) & ring_mask;
|
||||
for (i = 0; i < count; )
|
||||
{
|
||||
cookie_word_count = READ_RING_WORD_ACQUIRE(last_counter + i);
|
||||
word_count = cookie_word_count & ~DEBUG_CHANNEL_WORD_MASK;
|
||||
|
||||
/* This is considered a message if it has the marker and a word count that is in-range. */
|
||||
if ((cookie_word_count & DEBUG_CHANNEL_WORD_MASK) == DEBUG_CHANNEL_WORD_COOKIE &&
|
||||
i + word_count <= count &&
|
||||
vkd3d_shader_debug_ring_print_message(ring, last_counter + i, word_count))
|
||||
{
|
||||
i += word_count;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Keep going. */
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
INFO("Done fishing for clues ...\n");
|
||||
}
|
||||
|
||||
return NULL;
|
||||
|
@ -175,19 +358,20 @@ HRESULT vkd3d_shader_debug_ring_init(struct vkd3d_shader_debug_ring *ring,
|
|||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
D3D12_HEAP_PROPERTIES heap_properties;
|
||||
D3D12_RESOURCE_DESC1 resource_desc;
|
||||
const char *env;
|
||||
VkMemoryPropertyFlags memory_props;
|
||||
char env[VKD3D_PATH_MAX];
|
||||
|
||||
memset(ring, 0, sizeof(*ring));
|
||||
if (!(env = getenv("VKD3D_SHADER_DEBUG_RING_SIZE_LOG2")))
|
||||
|
||||
if (!vkd3d_get_env_var("VKD3D_SHADER_DEBUG_RING_SIZE_LOG2", env, sizeof(env)))
|
||||
return S_OK;
|
||||
|
||||
ring->active = true;
|
||||
|
||||
ring->ring_size = (size_t)1 << strtoul(env, NULL, 0);
|
||||
// Reserve 4k to be used as a control block of some sort.
|
||||
ring->ring_offset = 4096;
|
||||
ring->control_block_size = 4096;
|
||||
|
||||
WARN("Enabling shader debug ring of size: %zu.\n", ring->ring_size);
|
||||
INFO("Enabling shader debug ring of size: %zu.\n", ring->ring_size);
|
||||
|
||||
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
|
||||
{
|
||||
|
@ -201,7 +385,7 @@ HRESULT vkd3d_shader_debug_ring_init(struct vkd3d_shader_debug_ring *ring,
|
|||
heap_properties.MemoryPoolPreference = D3D12_MEMORY_POOL_L0;
|
||||
|
||||
memset(&resource_desc, 0, sizeof(resource_desc));
|
||||
resource_desc.Width = ring->ring_offset + ring->ring_size;
|
||||
resource_desc.Width = ring->ring_size;
|
||||
resource_desc.Height = 1;
|
||||
resource_desc.DepthOrArraySize = 1;
|
||||
resource_desc.MipLevels = 1;
|
||||
|
@ -212,33 +396,71 @@ HRESULT vkd3d_shader_debug_ring_init(struct vkd3d_shader_debug_ring *ring,
|
|||
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
||||
|
||||
if (FAILED(vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
|
||||
&resource_desc, &ring->host_buffer)))
|
||||
&resource_desc, &ring->host_buffer)))
|
||||
goto err_free_buffers;
|
||||
|
||||
memory_props = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
|
||||
/* If we're doing breadcrumb debugging, we also need to be able to read debug ring messages
|
||||
* from a crash, so we cannot rely on being able to copy the device payload back to host.
|
||||
* Use PCI-e BAR + UNCACHED + DEVICE_COHERENT if we must. */
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
|
||||
{
|
||||
INFO("Using debug ring with breadcrumbs, opting in to device uncached payload buffer.\n");
|
||||
/* We use coherent in the debug_channel.h header, but not necessarily guaranteed to be coherent with
|
||||
* host reads, so make extra sure. */
|
||||
if (device->device_info.device_coherent_memory_features_amd.deviceCoherentMemory)
|
||||
{
|
||||
memory_props |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD;
|
||||
INFO("Enabling uncached device memory for debug ring.\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (FAILED(vkd3d_allocate_buffer_memory(device, ring->host_buffer,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
|
||||
&ring->host_buffer_memory)))
|
||||
memory_props, &ring->host_buffer_memory)))
|
||||
goto err_free_buffers;
|
||||
|
||||
ring->ring_device_address = vkd3d_get_buffer_device_address(device, ring->host_buffer) + ring->ring_offset;
|
||||
|
||||
resource_desc.Width = ring->ring_offset;
|
||||
resource_desc.Width = ring->control_block_size;
|
||||
memset(&heap_properties, 0, sizeof(heap_properties));
|
||||
heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
|
||||
if (FAILED(vkd3d_create_buffer(device, &heap_properties, D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS,
|
||||
&resource_desc, &ring->device_atomic_buffer)))
|
||||
&resource_desc, &ring->device_atomic_buffer)))
|
||||
goto err_free_buffers;
|
||||
|
||||
memory_props = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
|
||||
if (vkd3d_config_flags & VKD3D_CONFIG_FLAG_BREADCRUMBS)
|
||||
{
|
||||
/* Expect crashes since we won't have time to flush caches.
|
||||
* We use coherent in the debug_channel.h header, but not necessarily guaranteed to be coherent with
|
||||
* host reads, so make extra sure. */
|
||||
if (device->device_info.device_coherent_memory_features_amd.deviceCoherentMemory)
|
||||
memory_props |= VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD;
|
||||
}
|
||||
|
||||
if (FAILED(vkd3d_allocate_buffer_memory(device, ring->device_atomic_buffer,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, &ring->device_atomic_buffer_memory)))
|
||||
memory_props, &ring->device_atomic_buffer_memory)))
|
||||
goto err_free_buffers;
|
||||
|
||||
if (VK_CALL(vkMapMemory(device->vk_device, ring->host_buffer_memory.vk_memory, 0, VK_WHOLE_SIZE, 0, &ring->mapped)) != VK_SUCCESS)
|
||||
if (VK_CALL(vkMapMemory(device->vk_device, ring->host_buffer_memory.vk_memory,
|
||||
0, VK_WHOLE_SIZE, 0, (void**)&ring->mapped_ring)) != VK_SUCCESS)
|
||||
goto err_free_buffers;
|
||||
|
||||
if (VK_CALL(vkMapMemory(device->vk_device, ring->device_atomic_buffer_memory.vk_memory,
|
||||
0, VK_WHOLE_SIZE, 0, (void**)&ring->mapped_control_block)) != VK_SUCCESS)
|
||||
goto err_free_buffers;
|
||||
|
||||
ring->ring_device_address = vkd3d_get_buffer_device_address(device, ring->host_buffer);
|
||||
ring->atomic_device_address = vkd3d_get_buffer_device_address(device, ring->device_atomic_buffer);
|
||||
|
||||
memset(ring->mapped_control_block, 0, ring->control_block_size);
|
||||
memset(ring->mapped_ring, 0, ring->ring_size);
|
||||
|
||||
if (pthread_mutex_init(&ring->ring_lock, NULL) != 0)
|
||||
goto err_free_buffers;
|
||||
if (pthread_cond_init(&ring->ring_cond, NULL) != 0)
|
||||
|
@ -286,40 +508,24 @@ void vkd3d_shader_debug_ring_cleanup(struct vkd3d_shader_debug_ring *ring,
|
|||
vkd3d_free_device_memory(device, &ring->device_atomic_buffer_memory);
|
||||
}
|
||||
|
||||
void vkd3d_shader_debug_ring_end_command_buffer(struct d3d12_command_list *list)
|
||||
static pthread_mutex_t debug_ring_teardown_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
void vkd3d_shader_debug_ring_kick(struct vkd3d_shader_debug_ring *ring, struct d3d12_device *device, bool device_lost)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
||||
VkCopyBufferInfo2KHR copy_info;
|
||||
VkBufferCopy2KHR buffer_copy;
|
||||
VkMemoryBarrier barrier;
|
||||
|
||||
if (list->device->debug_ring.active &&
|
||||
list->has_replaced_shaders &&
|
||||
(list->type == D3D12_COMMAND_LIST_TYPE_DIRECT || list->type == D3D12_COMMAND_LIST_TYPE_COMPUTE))
|
||||
if (device_lost)
|
||||
{
|
||||
barrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
|
||||
barrier.pNext = NULL;
|
||||
barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
|
||||
VK_CALL(vkCmdPipelineBarrier(list->vk_command_buffer,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
|
||||
1, &barrier, 0, NULL, 0, NULL));
|
||||
|
||||
buffer_copy.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR;
|
||||
buffer_copy.pNext = NULL;
|
||||
buffer_copy.size = list->device->debug_ring.ring_offset;
|
||||
buffer_copy.dstOffset = 0;
|
||||
buffer_copy.srcOffset = 0;
|
||||
|
||||
copy_info.sType = VK_STRUCTURE_TYPE_COPY_BUFFER_INFO_2_KHR;
|
||||
copy_info.pNext = NULL;
|
||||
copy_info.srcBuffer = list->device->debug_ring.device_atomic_buffer;
|
||||
copy_info.dstBuffer = list->device->debug_ring.host_buffer;
|
||||
copy_info.regionCount = 1;
|
||||
copy_info.pRegions = &buffer_copy;
|
||||
|
||||
VK_CALL(vkCmdCopyBuffer2KHR(list->vk_command_buffer, ©_info));
|
||||
/* Host barrier is taken care of automatically. */
|
||||
/* Need a global lock here since multiple threads can observe device lost at the same time. */
|
||||
pthread_mutex_lock(&debug_ring_teardown_lock);
|
||||
{
|
||||
ring->device_lost = true;
|
||||
/* We're going to die or hang after this most likely, so make sure we get to see all messages the
|
||||
* GPU had to write. Just cleanup now. */
|
||||
vkd3d_shader_debug_ring_cleanup(ring, device);
|
||||
}
|
||||
pthread_mutex_unlock(&debug_ring_teardown_lock);
|
||||
}
|
||||
else
|
||||
{
|
||||
pthread_cond_signal(&ring->ring_cond);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -76,10 +76,10 @@ static const char *debug_descriptor_type(vkd3d_descriptor_qa_flags type_flags)
|
|||
|
||||
static void vkd3d_descriptor_debug_init_once(void)
|
||||
{
|
||||
const char *env;
|
||||
char env[VKD3D_PATH_MAX];
|
||||
vkd3d_get_env_var("VKD3D_DESCRIPTOR_QA_LOG", env, sizeof(env));
|
||||
|
||||
env = getenv("VKD3D_DESCRIPTOR_QA_LOG");
|
||||
if (env)
|
||||
if (strlen(env) > 0)
|
||||
{
|
||||
INFO("Enabling VKD3D_DESCRIPTOR_QA_LOG\n");
|
||||
descriptor_debug_file = fopen(env, "w");
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -240,6 +240,7 @@ static HRESULT d3d12_heap_init(struct d3d12_heap *heap, struct d3d12_device *dev
|
|||
|
||||
alloc_info.heap_desc = heap->desc;
|
||||
alloc_info.host_ptr = host_address;
|
||||
alloc_info.extra_allocation_flags = 0;
|
||||
|
||||
if (FAILED(hr = vkd3d_private_store_init(&heap->private_store)))
|
||||
return hr;
|
||||
|
|
|
@ -327,34 +327,39 @@ static HRESULT vkd3d_import_host_memory(struct d3d12_device *device, void *host_
|
|||
void *pNext, struct vkd3d_device_memory_allocation *allocation)
|
||||
{
|
||||
VkImportMemoryHostPointerInfoEXT import_info;
|
||||
HRESULT hr;
|
||||
HRESULT hr = S_OK;
|
||||
|
||||
import_info.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT;
|
||||
import_info.pNext = pNext;
|
||||
import_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT;
|
||||
import_info.pHostPointer = host_address;
|
||||
|
||||
if (FAILED(hr = vkd3d_try_allocate_device_memory(device, size,
|
||||
if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_USE_HOST_IMPORT_FALLBACK) ||
|
||||
FAILED(hr = vkd3d_try_allocate_device_memory(device, size,
|
||||
type_flags, type_mask, &import_info, allocation)))
|
||||
{
|
||||
WARN("Failed to import host memory, hr %#x.\n", hr);
|
||||
if (FAILED(hr))
|
||||
WARN("Failed to import host memory, hr %#x.\n", hr);
|
||||
/* If we failed, fall back to a host-visible allocation. Generally
|
||||
* the app will access the memory thorugh the main host pointer,
|
||||
* so it's fine. */
|
||||
hr = vkd3d_try_allocate_device_memory(device, size,
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
type_mask, &import_info, allocation);
|
||||
type_mask, pNext, allocation);
|
||||
}
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocation *allocation, struct d3d12_device *device, struct vkd3d_memory_allocator *allocator)
|
||||
static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocation *allocation,
|
||||
struct d3d12_device *device, struct vkd3d_memory_allocator *allocator)
|
||||
{
|
||||
if (device->device_info.buffer_device_address_features.bufferDeviceAddress)
|
||||
allocation->resource.va = vkd3d_get_buffer_device_address(device, allocation->resource.vk_buffer);
|
||||
else
|
||||
else if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
|
||||
allocation->resource.va = vkd3d_va_map_alloc_fake_va(&allocator->va_map, allocation->resource.size);
|
||||
else
|
||||
allocation->resource.va = 0xdeadbeef;
|
||||
|
||||
if (!allocation->resource.va)
|
||||
{
|
||||
|
@ -362,7 +367,9 @@ static HRESULT vkd3d_allocation_assign_gpu_address(struct vkd3d_memory_allocatio
|
|||
return E_OUTOFMEMORY;
|
||||
}
|
||||
|
||||
vkd3d_va_map_insert(&allocator->va_map, &allocation->resource);
|
||||
/* Internal scratch buffers are not visible to application so we never have to map it back to VkBuffer. */
|
||||
if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
|
||||
vkd3d_va_map_insert(&allocator->va_map, &allocation->resource);
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
|
@ -446,10 +453,12 @@ static void vkd3d_memory_allocation_free(const struct vkd3d_memory_allocation *a
|
|||
|
||||
if ((allocation->flags & VKD3D_ALLOCATION_FLAG_GPU_ADDRESS) && allocation->resource.va)
|
||||
{
|
||||
vkd3d_va_map_remove(&allocator->va_map, &allocation->resource);
|
||||
|
||||
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
|
||||
vkd3d_va_map_free_fake_va(&allocator->va_map, allocation->resource.va, allocation->resource.size);
|
||||
if (!(allocation->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH))
|
||||
{
|
||||
vkd3d_va_map_remove(&allocator->va_map, &allocation->resource);
|
||||
if (!device->device_info.buffer_device_address_features.bufferDeviceAddress)
|
||||
vkd3d_va_map_free_fake_va(&allocator->va_map, allocation->resource.va, allocation->resource.size);
|
||||
}
|
||||
}
|
||||
|
||||
if (allocation->resource.view_map)
|
||||
|
@ -1117,6 +1126,8 @@ static HRESULT vkd3d_memory_allocator_flush_clears_locked(struct vkd3d_memory_al
|
|||
vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE));
|
||||
vkd3d_queue_release(allocator->vkd3d_queue);
|
||||
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(device, vr == VK_ERROR_DEVICE_LOST);
|
||||
|
||||
if (vr < 0)
|
||||
{
|
||||
ERR("Failed to submit command buffer, vr %d.\n", vr);
|
||||
|
@ -1146,6 +1157,7 @@ static HRESULT vkd3d_memory_allocator_flush_clears_locked(struct vkd3d_memory_al
|
|||
for (i = 0; i < queue_family->queue_count; i++)
|
||||
{
|
||||
vkd3d_queue_add_wait(queue_family->queues[i],
|
||||
NULL,
|
||||
clear_queue->vk_semaphore,
|
||||
clear_queue->next_signal_value);
|
||||
}
|
||||
|
@ -1390,13 +1402,35 @@ static HRESULT vkd3d_suballocate_memory(struct d3d12_device *device, struct vkd3
|
|||
return hr;
|
||||
}
|
||||
|
||||
static inline bool vkd3d_driver_implicitly_clears(VkDriverId driver_id)
|
||||
{
|
||||
switch (driver_id)
|
||||
{
|
||||
/* Known to pass test_stress_suballocation which hits this path. */
|
||||
case VK_DRIVER_ID_MESA_RADV:
|
||||
case VK_DRIVER_ID_NVIDIA_PROPRIETARY:
|
||||
case VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
HRESULT vkd3d_allocate_memory(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
|
||||
const struct vkd3d_allocate_memory_info *info, struct vkd3d_memory_allocation *allocation)
|
||||
{
|
||||
bool implementation_implicitly_clears;
|
||||
bool needs_clear;
|
||||
bool suballocate;
|
||||
HRESULT hr;
|
||||
|
||||
if (!info->pNext && !info->host_ptr && info->memory_requirements.size < VKD3D_VA_BLOCK_SIZE &&
|
||||
!(info->heap_flags & (D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH)))
|
||||
suballocate = !info->pNext && !info->host_ptr &&
|
||||
info->memory_requirements.size < VKD3D_VA_BLOCK_SIZE &&
|
||||
!(info->heap_flags & (D3D12_HEAP_FLAG_DENY_BUFFERS | D3D12_HEAP_FLAG_ALLOW_WRITE_WATCH)) &&
|
||||
!(info->flags & VKD3D_ALLOCATION_FLAG_INTERNAL_SCRATCH);
|
||||
|
||||
if (suballocate)
|
||||
hr = vkd3d_suballocate_memory(device, allocator, info, allocation);
|
||||
else
|
||||
hr = vkd3d_memory_allocation_init(allocation, device, allocator, info);
|
||||
|
@ -1404,8 +1438,20 @@ HRESULT vkd3d_allocate_memory(struct d3d12_device *device, struct vkd3d_memory_a
|
|||
if (FAILED(hr))
|
||||
return hr;
|
||||
|
||||
if (!(info->heap_flags & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) &&
|
||||
!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR))
|
||||
/* If we're allocating Vulkan memory directly,
|
||||
* we can rely on the driver doing this for us.
|
||||
* This is relying on implementation details.
|
||||
* RADV definitely does this, and it seems like NV also does it.
|
||||
* TODO: an extension for this would be nice. */
|
||||
implementation_implicitly_clears =
|
||||
vkd3d_driver_implicitly_clears(device->device_info.driver_properties.driverID) &&
|
||||
!suballocate;
|
||||
|
||||
needs_clear = !implementation_implicitly_clears &&
|
||||
!(info->heap_flags & D3D12_HEAP_FLAG_CREATE_NOT_ZEROED) &&
|
||||
!(vkd3d_config_flags & VKD3D_CONFIG_FLAG_MEMORY_ALLOCATOR_SKIP_CLEAR);
|
||||
|
||||
if (needs_clear)
|
||||
vkd3d_memory_allocator_clear_allocation(allocator, device, allocation);
|
||||
|
||||
return hr;
|
||||
|
@ -1434,6 +1480,7 @@ static bool vkd3d_heap_allocation_accept_deferred_resource_placements(struct d3d
|
|||
HRESULT vkd3d_allocate_heap_memory(struct d3d12_device *device, struct vkd3d_memory_allocator *allocator,
|
||||
const struct vkd3d_allocate_heap_memory_info *info, struct vkd3d_memory_allocation *allocation)
|
||||
{
|
||||
struct vkd3d_allocate_heap_memory_info heap_info;
|
||||
struct vkd3d_allocate_memory_info alloc_info;
|
||||
HRESULT hr;
|
||||
|
||||
|
@ -1445,9 +1492,31 @@ HRESULT vkd3d_allocate_heap_memory(struct d3d12_device *device, struct vkd3d_mem
|
|||
alloc_info.heap_flags = info->heap_desc.Flags;
|
||||
alloc_info.host_ptr = info->host_ptr;
|
||||
|
||||
alloc_info.flags |= info->extra_allocation_flags;
|
||||
if (!(info->heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS))
|
||||
alloc_info.flags |= VKD3D_ALLOCATION_FLAG_GLOBAL_BUFFER;
|
||||
|
||||
if (is_cpu_accessible_heap(&info->heap_desc.Properties))
|
||||
{
|
||||
if (info->heap_desc.Flags & D3D12_HEAP_FLAG_DENY_BUFFERS)
|
||||
{
|
||||
/* If the heap was only designed to handle images, the heap is useless,
|
||||
* and we can force everything to go through committed path. */
|
||||
memset(allocation, 0, sizeof(*allocation));
|
||||
return S_OK;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* CPU visible textures are never placed on a heap directly,
|
||||
* since LINEAR images have alignment / size requirements
|
||||
* that are vastly different from OPTIMAL ones.
|
||||
* We can place buffers however. */
|
||||
heap_info = *info;
|
||||
info = &heap_info;
|
||||
heap_info.heap_desc.Flags |= D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
|
||||
}
|
||||
}
|
||||
|
||||
hr = vkd3d_allocate_memory(device, allocator, &alloc_info, allocation);
|
||||
if (hr == E_OUTOFMEMORY && vkd3d_heap_allocation_accept_deferred_resource_placements(device,
|
||||
&info->heap_desc.Properties, info->heap_desc.Flags))
|
||||
|
|
|
@ -27,6 +27,8 @@ vkd3d_shaders =[
|
|||
|
||||
'shaders/vs_swapchain_fullscreen.vert',
|
||||
'shaders/fs_swapchain_fullscreen.frag',
|
||||
'shaders/cs_execute_indirect_patch.comp',
|
||||
'shaders/cs_execute_indirect_patch_debug_ring.comp',
|
||||
]
|
||||
|
||||
vkd3d_src = [
|
||||
|
@ -39,7 +41,6 @@ vkd3d_src = [
|
|||
'heap.c',
|
||||
'memory.c',
|
||||
'meta.c',
|
||||
'platform.c',
|
||||
'resource.c',
|
||||
'state.c',
|
||||
'utils.c',
|
||||
|
@ -62,6 +63,14 @@ if enable_descriptor_qa
|
|||
vkd3d_src += ['descriptor_debug.c']
|
||||
endif
|
||||
|
||||
if enable_breadcrumbs
|
||||
vkd3d_src += ['breadcrumbs.c']
|
||||
endif
|
||||
|
||||
if vkd3d_platform == 'windows'
|
||||
vkd3d_src += ['shared_metadata.c']
|
||||
endif
|
||||
|
||||
if not enable_d3d12
|
||||
vkd3d_lib = shared_library('vkd3d-proton', vkd3d_src, glsl_generator.process(vkd3d_shaders), vkd3d_build, vkd3d_version,
|
||||
dependencies : [ vkd3d_common_dep, vkd3d_shader_dep ] + vkd3d_extra_libs,
|
||||
|
|
|
@ -137,73 +137,8 @@ static VkResult vkd3d_meta_create_compute_pipeline(struct d3d12_device *device,
|
|||
return vr;
|
||||
}
|
||||
|
||||
static VkResult vkd3d_meta_create_render_pass(struct d3d12_device *device, VkSampleCountFlagBits samples,
|
||||
const struct vkd3d_format *format, VkImageLayout layout, VkRenderPass *vk_render_pass)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkAttachmentDescription2KHR attachment_desc;
|
||||
VkAttachmentReference2KHR attachment_ref;
|
||||
VkSubpassDescription2KHR subpass_desc;
|
||||
VkRenderPassCreateInfo2KHR pass_info;
|
||||
bool has_depth_target;
|
||||
VkResult vr;
|
||||
|
||||
assert(format);
|
||||
|
||||
has_depth_target = (format->vk_aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) != 0;
|
||||
|
||||
attachment_desc.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR;
|
||||
attachment_desc.pNext = NULL;
|
||||
attachment_desc.flags = 0;
|
||||
attachment_desc.format = format->vk_format;
|
||||
attachment_desc.samples = samples;
|
||||
attachment_desc.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
attachment_desc.initialLayout = layout;
|
||||
attachment_desc.finalLayout = layout;
|
||||
|
||||
attachment_ref.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
|
||||
attachment_ref.pNext = NULL;
|
||||
attachment_ref.attachment = 0;
|
||||
attachment_ref.layout = layout;
|
||||
attachment_ref.aspectMask = 0; /* input attachment aspect mask */
|
||||
|
||||
subpass_desc.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR;
|
||||
subpass_desc.pNext = NULL;
|
||||
subpass_desc.flags = 0;
|
||||
subpass_desc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||
subpass_desc.viewMask = 0;
|
||||
subpass_desc.inputAttachmentCount = 0;
|
||||
subpass_desc.pInputAttachments = NULL;
|
||||
subpass_desc.colorAttachmentCount = has_depth_target ? 0 : 1;
|
||||
subpass_desc.pColorAttachments = has_depth_target ? NULL : &attachment_ref;
|
||||
subpass_desc.pResolveAttachments = NULL;
|
||||
subpass_desc.pDepthStencilAttachment = has_depth_target ? &attachment_ref : NULL;
|
||||
subpass_desc.preserveAttachmentCount = 0;
|
||||
subpass_desc.pPreserveAttachments = NULL;
|
||||
|
||||
pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR;
|
||||
pass_info.pNext = NULL;
|
||||
pass_info.flags = 0;
|
||||
pass_info.attachmentCount = 1;
|
||||
pass_info.pAttachments = &attachment_desc;
|
||||
pass_info.subpassCount = 1;
|
||||
pass_info.pSubpasses = &subpass_desc;
|
||||
pass_info.dependencyCount = 0;
|
||||
pass_info.pDependencies = NULL;
|
||||
pass_info.correlatedViewMaskCount = 0;
|
||||
pass_info.pCorrelatedViewMasks = NULL;
|
||||
|
||||
if ((vr = VK_CALL(vkCreateRenderPass2KHR(device->vk_device, &pass_info, NULL, vk_render_pass))) < 0)
|
||||
ERR("Failed to create render pass, vr %d.\n", vr);
|
||||
|
||||
return vr;
|
||||
}
|
||||
|
||||
static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
VkPipelineLayout layout, VkRenderPass render_pass,
|
||||
VkPipelineLayout layout, VkFormat color_format, VkFormat ds_format, VkImageAspectFlags vk_aspect_mask,
|
||||
VkShaderModule vs_module, VkShaderModule fs_module,
|
||||
VkSampleCountFlagBits samples, const VkPipelineDepthStencilStateCreateInfo *ds_state,
|
||||
const VkPipelineColorBlendStateCreateInfo *cb_state, const VkSpecializationInfo *spec_info,
|
||||
|
@ -213,6 +148,7 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_
|
|||
VkPipelineShaderStageCreateInfo shader_stages[3];
|
||||
VkPipelineInputAssemblyStateCreateInfo ia_state;
|
||||
VkPipelineRasterizationStateCreateInfo rs_state;
|
||||
VkPipelineRenderingCreateInfoKHR rendering_info;
|
||||
VkPipelineVertexInputStateCreateInfo vi_state;
|
||||
VkPipelineMultisampleStateCreateInfo ms_state;
|
||||
VkPipelineViewportStateCreateInfo vp_state;
|
||||
|
@ -279,8 +215,16 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_
|
|||
dyn_state.dynamicStateCount = ARRAY_SIZE(dynamic_states);
|
||||
dyn_state.pDynamicStates = dynamic_states;
|
||||
|
||||
rendering_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR;
|
||||
rendering_info.pNext = NULL;
|
||||
rendering_info.viewMask = 0;
|
||||
rendering_info.colorAttachmentCount = color_format && (vk_aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) ? 1 : 0;
|
||||
rendering_info.pColorAttachmentFormats = color_format ? &color_format : NULL;
|
||||
rendering_info.depthAttachmentFormat = (vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) ? ds_format : VK_FORMAT_UNDEFINED;
|
||||
rendering_info.stencilAttachmentFormat = (vk_aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) ? ds_format : VK_FORMAT_UNDEFINED;
|
||||
|
||||
pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
|
||||
pipeline_info.pNext = NULL;
|
||||
pipeline_info.pNext = &rendering_info;
|
||||
pipeline_info.flags = 0;
|
||||
pipeline_info.stageCount = 0;
|
||||
pipeline_info.pStages = shader_stages;
|
||||
|
@ -294,7 +238,7 @@ static VkResult vkd3d_meta_create_graphics_pipeline(struct vkd3d_meta_ops *meta_
|
|||
pipeline_info.pColorBlendState = cb_state;
|
||||
pipeline_info.pDynamicState = &dyn_state;
|
||||
pipeline_info.layout = layout;
|
||||
pipeline_info.renderPass = render_pass;
|
||||
pipeline_info.renderPass = VK_NULL_HANDLE;
|
||||
pipeline_info.subpass = 0;
|
||||
pipeline_info.basePipelineHandle = VK_NULL_HANDLE;
|
||||
pipeline_info.basePipelineIndex = -1;
|
||||
|
@ -633,7 +577,6 @@ void vkd3d_copy_image_ops_cleanup(struct vkd3d_copy_image_ops *meta_copy_image_o
|
|||
{
|
||||
struct vkd3d_copy_image_pipeline *pipeline = &meta_copy_image_ops->pipelines[i];
|
||||
|
||||
VK_CALL(vkDestroyRenderPass(device->vk_device, pipeline->vk_render_pass, NULL));
|
||||
VK_CALL(vkDestroyPipeline(device->vk_device, pipeline->vk_pipeline, NULL));
|
||||
}
|
||||
|
||||
|
@ -648,89 +591,14 @@ void vkd3d_copy_image_ops_cleanup(struct vkd3d_copy_image_ops *meta_copy_image_o
|
|||
vkd3d_free(meta_copy_image_ops->pipelines);
|
||||
}
|
||||
|
||||
static VkResult vkd3d_meta_create_swapchain_render_pass(struct d3d12_device *device,
|
||||
const struct vkd3d_swapchain_pipeline_key *key, VkRenderPass *render_pass)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkRenderPassCreateInfo2KHR render_pass_info;
|
||||
VkAttachmentDescription2KHR attachment_desc;
|
||||
VkAttachmentReference2KHR attachment_ref;
|
||||
VkSubpassDescription2KHR subpass_desc;
|
||||
VkSubpassDependency2KHR subpass_dep;
|
||||
|
||||
attachment_desc.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR;
|
||||
attachment_desc.pNext = NULL;
|
||||
attachment_desc.loadOp = key->load_op;
|
||||
attachment_desc.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
attachment_desc.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
attachment_desc.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
|
||||
attachment_desc.format = key->format;
|
||||
attachment_desc.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
attachment_desc.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
attachment_desc.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
attachment_desc.flags = 0;
|
||||
|
||||
attachment_ref.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
|
||||
attachment_ref.pNext = NULL;
|
||||
attachment_ref.attachment = 0;
|
||||
attachment_ref.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
attachment_ref.aspectMask = 0; /* input attachment aspect mask */
|
||||
|
||||
subpass_desc.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR;
|
||||
subpass_desc.pNext = NULL;
|
||||
subpass_desc.flags = 0;
|
||||
subpass_desc.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||
subpass_desc.viewMask = 0;
|
||||
subpass_desc.inputAttachmentCount = 0;
|
||||
subpass_desc.pInputAttachments = NULL;
|
||||
subpass_desc.colorAttachmentCount = 1;
|
||||
subpass_desc.pColorAttachments = &attachment_ref;
|
||||
subpass_desc.pResolveAttachments = NULL;
|
||||
subpass_desc.pDepthStencilAttachment = NULL;
|
||||
subpass_desc.preserveAttachmentCount = 0;
|
||||
subpass_desc.pPreserveAttachments = NULL;
|
||||
|
||||
subpass_dep.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2_KHR;
|
||||
subpass_dep.pNext = NULL;
|
||||
subpass_dep.srcSubpass = VK_SUBPASS_EXTERNAL;
|
||||
subpass_dep.dstSubpass = 0;
|
||||
subpass_dep.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
subpass_dep.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
subpass_dep.srcAccessMask = 0;
|
||||
subpass_dep.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
subpass_dep.dependencyFlags = 0;
|
||||
subpass_dep.viewOffset = 0;
|
||||
|
||||
render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR;
|
||||
render_pass_info.pNext = NULL;
|
||||
render_pass_info.flags = 0;
|
||||
render_pass_info.attachmentCount = 1;
|
||||
render_pass_info.pAttachments = &attachment_desc;
|
||||
render_pass_info.subpassCount = 1;
|
||||
render_pass_info.pSubpasses = &subpass_desc;
|
||||
render_pass_info.dependencyCount = 1;
|
||||
render_pass_info.pDependencies = &subpass_dep;
|
||||
render_pass_info.correlatedViewMaskCount = 0;
|
||||
render_pass_info.pCorrelatedViewMasks = NULL;
|
||||
|
||||
return VK_CALL(vkCreateRenderPass2KHR(device->vk_device, &render_pass_info, NULL, render_pass));
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_meta_create_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
const struct vkd3d_swapchain_pipeline_key *key, struct vkd3d_swapchain_pipeline *pipeline)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &meta_ops->device->vk_procs;
|
||||
struct vkd3d_swapchain_ops *meta_swapchain_ops = &meta_ops->swapchain;
|
||||
VkPipelineColorBlendAttachmentState blend_att;
|
||||
VkPipelineColorBlendStateCreateInfo cb_state;
|
||||
VkResult vr;
|
||||
|
||||
if ((vr = vkd3d_meta_create_swapchain_render_pass(meta_ops->device, key, &pipeline->vk_render_pass)))
|
||||
{
|
||||
ERR("Failed to create render pass, vr %d.\n", vr);
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
memset(&cb_state, 0, sizeof(cb_state));
|
||||
memset(&blend_att, 0, sizeof(blend_att));
|
||||
cb_state.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
|
||||
|
@ -743,14 +611,11 @@ static HRESULT vkd3d_meta_create_swapchain_pipeline(struct vkd3d_meta_ops *meta_
|
|||
VK_COLOR_COMPONENT_A_BIT;
|
||||
|
||||
if ((vr = vkd3d_meta_create_graphics_pipeline(meta_ops,
|
||||
meta_swapchain_ops->vk_pipeline_layouts[key->filter], pipeline->vk_render_pass,
|
||||
meta_swapchain_ops->vk_pipeline_layouts[key->filter], key->format, VK_FORMAT_UNDEFINED, VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
meta_swapchain_ops->vk_vs_module, meta_swapchain_ops->vk_fs_module, 1,
|
||||
NULL, &cb_state,
|
||||
NULL, &pipeline->vk_pipeline)) < 0)
|
||||
{
|
||||
VK_CALL(vkDestroyRenderPass(meta_ops->device->vk_device, pipeline->vk_render_pass, NULL));
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
pipeline->key = *key;
|
||||
return S_OK;
|
||||
|
@ -759,7 +624,6 @@ static HRESULT vkd3d_meta_create_swapchain_pipeline(struct vkd3d_meta_ops *meta_
|
|||
static HRESULT vkd3d_meta_create_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
const struct vkd3d_copy_image_pipeline_key *key, struct vkd3d_copy_image_pipeline *pipeline)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &meta_ops->device->vk_procs;
|
||||
struct vkd3d_copy_image_ops *meta_copy_image_ops = &meta_ops->copy_image;
|
||||
VkPipelineColorBlendAttachmentState blend_attachment;
|
||||
VkPipelineDepthStencilStateCreateInfo ds_state;
|
||||
|
@ -846,10 +710,7 @@ static HRESULT vkd3d_meta_create_copy_image_pipeline(struct vkd3d_meta_ops *meta
|
|||
cb_state.pAttachments = &blend_attachment;
|
||||
memset(&cb_state.blendConstants, 0, sizeof(cb_state.blendConstants));
|
||||
|
||||
if ((vr = vkd3d_meta_create_render_pass(meta_ops->device,
|
||||
key->sample_count, key->format, key->layout, &pipeline->vk_render_pass)) < 0)
|
||||
return hresult_from_vk_result(vr);
|
||||
|
||||
/* Special path when copying stencil -> color. */
|
||||
if (key->format->vk_format == VK_FORMAT_R8_UINT)
|
||||
{
|
||||
/* Special path when copying stencil -> color. */
|
||||
|
@ -867,14 +728,14 @@ static HRESULT vkd3d_meta_create_copy_image_pipeline(struct vkd3d_meta_ops *meta
|
|||
}
|
||||
|
||||
if ((vr = vkd3d_meta_create_graphics_pipeline(meta_ops,
|
||||
meta_copy_image_ops->vk_pipeline_layout, pipeline->vk_render_pass,
|
||||
meta_copy_image_ops->vk_pipeline_layout,
|
||||
has_depth_target ? VK_FORMAT_UNDEFINED : key->format->vk_format,
|
||||
has_depth_target ? key->format->vk_format : VK_FORMAT_UNDEFINED,
|
||||
key->format->vk_aspect_mask,
|
||||
VK_NULL_HANDLE, vk_module, key->sample_count,
|
||||
has_depth_target ? &ds_state : NULL, has_depth_target ? NULL : &cb_state,
|
||||
&spec_info, &pipeline->vk_pipeline)) < 0)
|
||||
{
|
||||
VK_CALL(vkDestroyRenderPass(meta_ops->device->vk_device, pipeline->vk_render_pass, NULL));
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
pipeline->key = *key;
|
||||
return S_OK;
|
||||
|
@ -904,7 +765,6 @@ HRESULT vkd3d_meta_get_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
|
||||
if (!memcmp(key, &pipeline->key, sizeof(*key)))
|
||||
{
|
||||
info->vk_render_pass = pipeline->vk_render_pass;
|
||||
info->vk_pipeline = pipeline->vk_pipeline;
|
||||
pthread_mutex_unlock(&meta_copy_image_ops->mutex);
|
||||
return S_OK;
|
||||
|
@ -926,7 +786,6 @@ HRESULT vkd3d_meta_get_copy_image_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
return hr;
|
||||
}
|
||||
|
||||
info->vk_render_pass = pipeline->vk_render_pass;
|
||||
info->vk_pipeline = pipeline->vk_pipeline;
|
||||
|
||||
pthread_mutex_unlock(&meta_copy_image_ops->mutex);
|
||||
|
@ -1087,7 +946,6 @@ void vkd3d_swapchain_ops_cleanup(struct vkd3d_swapchain_ops *meta_swapchain_ops,
|
|||
{
|
||||
struct vkd3d_swapchain_pipeline *pipeline = &meta_swapchain_ops->pipelines[i];
|
||||
|
||||
VK_CALL(vkDestroyRenderPass(device->vk_device, pipeline->vk_render_pass, NULL));
|
||||
VK_CALL(vkDestroyPipeline(device->vk_device, pipeline->vk_pipeline, NULL));
|
||||
}
|
||||
|
||||
|
@ -1128,7 +986,6 @@ HRESULT vkd3d_meta_get_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
|
||||
if (!memcmp(key, &pipeline->key, sizeof(*key)))
|
||||
{
|
||||
info->vk_render_pass = pipeline->vk_render_pass;
|
||||
info->vk_pipeline = pipeline->vk_pipeline;
|
||||
pthread_mutex_unlock(&meta_swapchain_ops->mutex);
|
||||
return S_OK;
|
||||
|
@ -1150,7 +1007,6 @@ HRESULT vkd3d_meta_get_swapchain_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
return hr;
|
||||
}
|
||||
|
||||
info->vk_render_pass = pipeline->vk_render_pass;
|
||||
info->vk_pipeline = pipeline->vk_pipeline;
|
||||
|
||||
pthread_mutex_unlock(&meta_swapchain_ops->mutex);
|
||||
|
@ -1361,6 +1217,144 @@ void vkd3d_meta_get_predicate_pipeline(struct vkd3d_meta_ops *meta_ops,
|
|||
info->data_size = predicate_ops->data_sizes[command_type];
|
||||
}
|
||||
|
||||
HRESULT vkd3d_execute_indirect_ops_init(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
VkPushConstantRange push_constant_range;
|
||||
VkResult vr;
|
||||
int rc;
|
||||
|
||||
if ((rc = pthread_mutex_init(&meta_indirect_ops->mutex, NULL)))
|
||||
return hresult_from_errno(rc);
|
||||
|
||||
push_constant_range.offset = 0;
|
||||
push_constant_range.size = sizeof(struct vkd3d_execute_indirect_args);
|
||||
push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
|
||||
if ((vr = vkd3d_meta_create_pipeline_layout(device, 0, NULL, 1,
|
||||
&push_constant_range, &meta_indirect_ops->vk_pipeline_layout)) < 0)
|
||||
{
|
||||
pthread_mutex_destroy(&meta_indirect_ops->mutex);
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
meta_indirect_ops->pipelines_count = 0;
|
||||
meta_indirect_ops->pipelines_size = 0;
|
||||
meta_indirect_ops->pipelines = NULL;
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
struct vkd3d_meta_execute_indirect_spec_constant_data
|
||||
{
|
||||
struct vkd3d_shader_debug_ring_spec_constants constants;
|
||||
uint32_t workgroup_size_x;
|
||||
};
|
||||
|
||||
HRESULT vkd3d_meta_get_execute_indirect_pipeline(struct vkd3d_meta_ops *meta_ops,
|
||||
uint32_t patch_command_count, struct vkd3d_execute_indirect_info *info)
|
||||
{
|
||||
struct vkd3d_meta_execute_indirect_spec_constant_data execute_indirect_spec_constants;
|
||||
VkSpecializationMapEntry map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES + 1];
|
||||
struct vkd3d_execute_indirect_ops *meta_indirect_ops = &meta_ops->execute_indirect;
|
||||
struct vkd3d_shader_debug_ring_spec_info debug_ring_info;
|
||||
|
||||
VkSpecializationInfo spec;
|
||||
HRESULT hr = S_OK;
|
||||
VkResult vr;
|
||||
bool debug;
|
||||
size_t i;
|
||||
int rc;
|
||||
|
||||
if ((rc = pthread_mutex_lock(&meta_indirect_ops->mutex)))
|
||||
{
|
||||
ERR("Failed to lock mutex, error %d.\n", rc);
|
||||
return hresult_from_errno(rc);
|
||||
}
|
||||
|
||||
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
|
||||
{
|
||||
if (meta_indirect_ops->pipelines[i].workgroup_size_x == patch_command_count)
|
||||
{
|
||||
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
|
||||
info->vk_pipeline = meta_indirect_ops->pipelines[i].vk_pipeline;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
debug = meta_ops->device->debug_ring.active;
|
||||
|
||||
/* If we have debug ring, we can dump indirect command buffer data to the ring as well.
|
||||
* Vital for debugging broken execute indirect data with templates. */
|
||||
if (debug)
|
||||
{
|
||||
vkd3d_shader_debug_ring_init_spec_constant(meta_ops->device, &debug_ring_info,
|
||||
0 /* Reserve this hash for internal debug streams. */);
|
||||
|
||||
memset(&execute_indirect_spec_constants, 0, sizeof(execute_indirect_spec_constants));
|
||||
execute_indirect_spec_constants.constants = debug_ring_info.constants;
|
||||
execute_indirect_spec_constants.workgroup_size_x = patch_command_count;
|
||||
|
||||
memcpy(map_entry, debug_ring_info.map_entries, sizeof(debug_ring_info.map_entries));
|
||||
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].constantID = 4;
|
||||
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].offset =
|
||||
offsetof(struct vkd3d_meta_execute_indirect_spec_constant_data, workgroup_size_x);
|
||||
map_entry[VKD3D_SHADER_DEBUG_RING_SPEC_INFO_MAP_ENTRIES].size = sizeof(patch_command_count);
|
||||
|
||||
spec.pMapEntries = map_entry;
|
||||
spec.pData = &execute_indirect_spec_constants;
|
||||
spec.mapEntryCount = ARRAY_SIZE(map_entry);
|
||||
spec.dataSize = sizeof(execute_indirect_spec_constants);
|
||||
}
|
||||
else
|
||||
{
|
||||
map_entry[0].constantID = 0;
|
||||
map_entry[0].offset = 0;
|
||||
map_entry[0].size = sizeof(patch_command_count);
|
||||
|
||||
spec.pMapEntries = map_entry;
|
||||
spec.pData = &patch_command_count;
|
||||
spec.mapEntryCount = 1;
|
||||
spec.dataSize = sizeof(patch_command_count);
|
||||
}
|
||||
|
||||
vkd3d_array_reserve((void**)&meta_indirect_ops->pipelines, &meta_indirect_ops->pipelines_size,
|
||||
meta_indirect_ops->pipelines_count + 1, sizeof(*meta_indirect_ops->pipelines));
|
||||
|
||||
meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].workgroup_size_x = patch_command_count;
|
||||
|
||||
vr = vkd3d_meta_create_compute_pipeline(meta_ops->device,
|
||||
debug ? sizeof(cs_execute_indirect_patch_debug_ring) : sizeof(cs_execute_indirect_patch),
|
||||
debug ? cs_execute_indirect_patch_debug_ring : cs_execute_indirect_patch,
|
||||
meta_indirect_ops->vk_pipeline_layout, &spec,
|
||||
&meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline);
|
||||
|
||||
if (vr)
|
||||
{
|
||||
hr = hresult_from_vk_result(vr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
info->vk_pipeline_layout = meta_indirect_ops->vk_pipeline_layout;
|
||||
info->vk_pipeline = meta_indirect_ops->pipelines[meta_indirect_ops->pipelines_count].vk_pipeline;
|
||||
meta_indirect_ops->pipelines_count++;
|
||||
|
||||
out:
|
||||
pthread_mutex_unlock(&meta_indirect_ops->mutex);
|
||||
return hr;
|
||||
}
|
||||
|
||||
void vkd3d_execute_indirect_ops_cleanup(struct vkd3d_execute_indirect_ops *meta_indirect_ops,
|
||||
struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < meta_indirect_ops->pipelines_count; i++)
|
||||
VK_CALL(vkDestroyPipeline(device->vk_device, meta_indirect_ops->pipelines[i].vk_pipeline, NULL));
|
||||
VK_CALL(vkDestroyPipelineLayout(device->vk_device, meta_indirect_ops->vk_pipeline_layout, NULL));
|
||||
pthread_mutex_destroy(&meta_indirect_ops->mutex);
|
||||
}
|
||||
|
||||
HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
@ -1386,8 +1380,13 @@ HRESULT vkd3d_meta_ops_init(struct vkd3d_meta_ops *meta_ops, struct d3d12_device
|
|||
if (FAILED(hr = vkd3d_predicate_ops_init(&meta_ops->predicate, device)))
|
||||
goto fail_predicate_ops;
|
||||
|
||||
if (FAILED(hr = vkd3d_execute_indirect_ops_init(&meta_ops->execute_indirect, device)))
|
||||
goto fail_execute_indirect_ops;
|
||||
|
||||
return S_OK;
|
||||
|
||||
fail_execute_indirect_ops:
|
||||
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
|
||||
fail_predicate_ops:
|
||||
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
||||
fail_query_ops:
|
||||
|
@ -1404,6 +1403,7 @@ fail_common:
|
|||
|
||||
HRESULT vkd3d_meta_ops_cleanup(struct vkd3d_meta_ops *meta_ops, struct d3d12_device *device)
|
||||
{
|
||||
vkd3d_execute_indirect_ops_cleanup(&meta_ops->execute_indirect, device);
|
||||
vkd3d_predicate_ops_cleanup(&meta_ops->predicate, device);
|
||||
vkd3d_query_ops_cleanup(&meta_ops->query, device);
|
||||
vkd3d_swapchain_ops_cleanup(&meta_ops->swapchain, device);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -100,9 +100,9 @@ static bool vkd3d_renderdoc_enable_submit_counter(uint32_t counter)
|
|||
|
||||
static void vkd3d_renderdoc_init_once(void)
|
||||
{
|
||||
char counts[VKD3D_PATH_MAX];
|
||||
pRENDERDOC_GetAPI get_api;
|
||||
const char *counts;
|
||||
const char *env;
|
||||
char env[VKD3D_PATH_MAX];
|
||||
|
||||
#ifdef _WIN32
|
||||
HMODULE renderdoc;
|
||||
|
@ -112,19 +112,19 @@ static void vkd3d_renderdoc_init_once(void)
|
|||
void *fn_ptr;
|
||||
#endif
|
||||
|
||||
env = getenv("VKD3D_AUTO_CAPTURE_SHADER");
|
||||
counts = getenv("VKD3D_AUTO_CAPTURE_COUNTS");
|
||||
vkd3d_get_env_var("VKD3D_AUTO_CAPTURE_SHADER", env, sizeof(env));
|
||||
vkd3d_get_env_var("VKD3D_AUTO_CAPTURE_COUNTS", counts, sizeof(counts));
|
||||
|
||||
if (!env && !counts)
|
||||
if (strlen(env) == 0 && strlen(counts) == 0)
|
||||
{
|
||||
WARN("VKD3D_AUTO_CAPTURE_SHADER or VKD3D_AUTO_CAPTURE_COUNTS is not set, RenderDoc auto capture will not be enabled.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!counts)
|
||||
if (strlen(counts) == 0)
|
||||
WARN("VKD3D_AUTO_CAPTURE_COUNTS is not set, will assume that only the first submission is captured.\n");
|
||||
|
||||
if (env)
|
||||
if (strlen(env) > 0)
|
||||
renderdoc_capture_shader_hash = strtoull(env, NULL, 16);
|
||||
|
||||
if (renderdoc_capture_shader_hash)
|
||||
|
@ -132,7 +132,7 @@ static void vkd3d_renderdoc_init_once(void)
|
|||
else
|
||||
INFO("Enabling RenderDoc capture for all shaders.\n");
|
||||
|
||||
if (counts)
|
||||
if (strlen(counts) > 0)
|
||||
vkd3d_renderdoc_init_capture_count_list(counts);
|
||||
else
|
||||
{
|
||||
|
|
|
@ -224,13 +224,8 @@ static bool vkd3d_get_format_compatibility_list(const struct d3d12_device *devic
|
|||
|
||||
if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)
|
||||
{
|
||||
const struct vkd3d_format *uint_format = vkd3d_get_format(device, list.uint_format, false);
|
||||
|
||||
/* Format used for ClearUnorderedAccessViewUint */
|
||||
if (uint_format)
|
||||
vkd3d_format_compatibility_list_add_format(&list, uint_format->vk_format);
|
||||
|
||||
/* Legacy D3D11 compatibility rule that allows typed UAV loads on FL11.0 hardware */
|
||||
/* Legacy D3D11 compatibility rule that allows typed UAV loads on FL11.0 hardware.
|
||||
* 5.3.9.5 from D3D11 functional spec. 32-bit typeless formats can be viewed as R32{U,I,F}.*/
|
||||
if (format->byte_count == 4 && format->type == VKD3D_FORMAT_TYPE_TYPELESS)
|
||||
{
|
||||
for (i = 0; i < ARRAY_SIZE(r32_uav_formats); i++)
|
||||
|
@ -299,7 +294,31 @@ static bool vkd3d_is_linear_tiling_supported(const struct d3d12_device *device,
|
|||
return supported;
|
||||
}
|
||||
|
||||
static VkImageLayout vk_common_image_layout_from_d3d12_desc(const D3D12_RESOURCE_DESC1 *desc)
|
||||
static bool d3d12_device_prefers_general_depth_stencil(const struct d3d12_device *device)
|
||||
{
|
||||
if (device->vk_info.KHR_driver_properties)
|
||||
{
|
||||
if (device->device_info.driver_properties.driverID == VK_DRIVER_ID_NVIDIA_PROPRIETARY)
|
||||
{
|
||||
/* NVIDIA doesn't really care about layouts for the most part. */
|
||||
return true;
|
||||
}
|
||||
else if (device->device_info.driver_properties.driverID == VK_DRIVER_ID_MESA_RADV)
|
||||
{
|
||||
/* RADV can use TC-compat HTILE without too much issues on Polaris and later.
|
||||
* Use GENERAL for these GPUs.
|
||||
* Pre-Polaris we run into issues where even read-only depth requires decompress
|
||||
* so using GENERAL shouldn't really make things worse, it's going to run pretty bad
|
||||
* either way. */
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static VkImageLayout vk_common_image_layout_from_d3d12_desc(const struct d3d12_device *device,
|
||||
const D3D12_RESOURCE_DESC1 *desc)
|
||||
{
|
||||
/* We need aggressive decay and promotion into anything. */
|
||||
if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS)
|
||||
|
@ -307,6 +326,20 @@ static VkImageLayout vk_common_image_layout_from_d3d12_desc(const D3D12_RESOURCE
|
|||
if (desc->Layout == D3D12_TEXTURE_LAYOUT_ROW_MAJOR)
|
||||
return VK_IMAGE_LAYOUT_GENERAL;
|
||||
|
||||
/* This is counter-intuitive, but using GENERAL layout for depth-stencils works around
|
||||
* having to perform DSV plane tracking all the time, since we don't necessarily know at recording time
|
||||
* if a DSV image is OPTIMAL or READ_ONLY.
|
||||
* This saves us many redundant barriers while rendering, especially since games tend
|
||||
* to split their rendering across many command lists in parallel.
|
||||
* On several implementations, GENERAL is a perfectly fine layout to use,
|
||||
* on others it is a disaster since compression is disabled :') */
|
||||
if (((desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL | D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)) ==
|
||||
D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) &&
|
||||
d3d12_device_prefers_general_depth_stencil(device))
|
||||
{
|
||||
return VK_IMAGE_LAYOUT_GENERAL;
|
||||
}
|
||||
|
||||
/* DENY_SHADER_RESOURCE only allowed with ALLOW_DEPTH_STENCIL */
|
||||
if (desc->Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE)
|
||||
return VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
|
@ -452,19 +485,28 @@ static bool vkd3d_format_check_usage_support(struct d3d12_device *device, VkForm
|
|||
return (supported_flags & required_flags) == required_flags;
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_create_image(struct d3d12_device *device,
|
||||
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
|
||||
const D3D12_RESOURCE_DESC1 *desc, struct d3d12_resource *resource, VkImage *vk_image)
|
||||
struct vkd3d_image_create_info
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
struct vkd3d_format_compatibility_list compat_list;
|
||||
const bool sparse_resource = !heap_properties;
|
||||
struct vkd3d_format_compatibility_list format_compat_list;
|
||||
VkExternalMemoryImageCreateInfo external_info;
|
||||
VkImageFormatListCreateInfoKHR format_list;
|
||||
const struct vkd3d_format *format;
|
||||
VkImageCreateInfo image_info;
|
||||
};
|
||||
|
||||
static HRESULT vkd3d_get_image_create_info(struct d3d12_device *device,
|
||||
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
|
||||
const D3D12_RESOURCE_DESC1 *desc, struct d3d12_resource *resource,
|
||||
struct vkd3d_image_create_info *create_info)
|
||||
{
|
||||
struct vkd3d_format_compatibility_list *compat_list = &create_info->format_compat_list;
|
||||
VkExternalMemoryImageCreateInfo *external_info = &create_info->external_info;
|
||||
VkImageFormatListCreateInfoKHR *format_list = &create_info->format_list;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkImageCreateInfo *image_info = &create_info->image_info;
|
||||
const bool sparse_resource = !heap_properties;
|
||||
const struct vkd3d_format *format;
|
||||
bool use_concurrent;
|
||||
unsigned int i;
|
||||
VkResult vr;
|
||||
|
||||
if (!resource)
|
||||
{
|
||||
|
@ -479,32 +521,40 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device,
|
|||
format = resource->format;
|
||||
}
|
||||
|
||||
image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
||||
image_info.pNext = NULL;
|
||||
image_info.flags = 0;
|
||||
image_info->sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
||||
image_info->pNext = NULL;
|
||||
image_info->flags = 0;
|
||||
|
||||
if (resource && (resource->heap_flags & D3D12_HEAP_FLAG_SHARED))
|
||||
{
|
||||
external_info->sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO;
|
||||
external_info->pNext = NULL;
|
||||
external_info->handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
|
||||
|
||||
image_info->pNext = external_info;
|
||||
}
|
||||
|
||||
if (!(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))
|
||||
{
|
||||
if (vkd3d_get_format_compatibility_list(device, desc, &compat_list))
|
||||
if (vkd3d_get_format_compatibility_list(device, desc, compat_list))
|
||||
{
|
||||
format_list.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR;
|
||||
format_list.pNext = NULL;
|
||||
format_list.viewFormatCount = compat_list.format_count;
|
||||
format_list.pViewFormats = compat_list.vk_formats;
|
||||
format_list->sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR;
|
||||
format_list->pNext = image_info->pNext;
|
||||
format_list->viewFormatCount = compat_list->format_count;
|
||||
format_list->pViewFormats = compat_list->vk_formats;
|
||||
|
||||
image_info.pNext = &format_list;
|
||||
image_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
|
||||
image_info->pNext = format_list;
|
||||
image_info->flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
|
||||
}
|
||||
}
|
||||
if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D
|
||||
&& desc->Width == desc->Height && desc->DepthOrArraySize >= 6
|
||||
&& desc->SampleDesc.Count == 1)
|
||||
image_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
|
||||
if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D)
|
||||
image_info.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT_KHR;
|
||||
image_info->flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
|
||||
|
||||
if (sparse_resource)
|
||||
{
|
||||
image_info.flags |= VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
|
||||
image_info->flags |= VK_IMAGE_CREATE_SPARSE_BINDING_BIT |
|
||||
VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT |
|
||||
VK_IMAGE_CREATE_SPARSE_ALIASED_BIT;
|
||||
|
||||
|
@ -528,24 +578,24 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device,
|
|||
}
|
||||
}
|
||||
|
||||
image_info.imageType = vk_image_type_from_d3d12_resource_dimension(desc->Dimension);
|
||||
image_info.format = format->vk_format;
|
||||
image_info.extent.width = desc->Width;
|
||||
image_info.extent.height = desc->Height;
|
||||
image_info->imageType = vk_image_type_from_d3d12_resource_dimension(desc->Dimension);
|
||||
image_info->format = format->vk_format;
|
||||
image_info->extent.width = desc->Width;
|
||||
image_info->extent.height = desc->Height;
|
||||
|
||||
if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D)
|
||||
{
|
||||
image_info.extent.depth = desc->DepthOrArraySize;
|
||||
image_info.arrayLayers = 1;
|
||||
image_info->extent.depth = desc->DepthOrArraySize;
|
||||
image_info->arrayLayers = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
image_info.extent.depth = 1;
|
||||
image_info.arrayLayers = desc->DepthOrArraySize;
|
||||
image_info->extent.depth = 1;
|
||||
image_info->arrayLayers = desc->DepthOrArraySize;
|
||||
}
|
||||
|
||||
image_info.mipLevels = min(desc->MipLevels, max_miplevel_count(desc));
|
||||
image_info.samples = vk_samples_from_dxgi_sample_desc(&desc->SampleDesc);
|
||||
image_info->mipLevels = min(desc->MipLevels, max_miplevel_count(desc));
|
||||
image_info->samples = vk_samples_from_dxgi_sample_desc(&desc->SampleDesc);
|
||||
|
||||
if (sparse_resource)
|
||||
{
|
||||
|
@ -555,15 +605,15 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device,
|
|||
return E_INVALIDARG;
|
||||
}
|
||||
|
||||
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
image_info->tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
}
|
||||
else if (desc->Layout == D3D12_TEXTURE_LAYOUT_UNKNOWN || desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE)
|
||||
{
|
||||
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
image_info->tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
}
|
||||
else if (desc->Layout == D3D12_TEXTURE_LAYOUT_ROW_MAJOR)
|
||||
{
|
||||
image_info.tiling = VK_IMAGE_TILING_LINEAR;
|
||||
image_info->tiling = VK_IMAGE_TILING_LINEAR;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -571,26 +621,31 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device,
|
|||
return E_NOTIMPL;
|
||||
}
|
||||
|
||||
image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
|
||||
image_info->usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
|
||||
if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)
|
||||
image_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||
image_info->usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||
if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL)
|
||||
image_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
image_info->usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)
|
||||
image_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
image_info->usage |= VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
if (!(desc->Flags & D3D12_RESOURCE_FLAG_DENY_SHADER_RESOURCE))
|
||||
image_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||
image_info->usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||
|
||||
/* Additional usage flags for shader-based copies */
|
||||
if (vkd3d_format_allows_shader_copies(format->dxgi_format))
|
||||
{
|
||||
image_info.usage |= (format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
image_info->usage |= (format->vk_aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
? VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT
|
||||
: VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||
}
|
||||
|
||||
if (vkd3d_resource_can_be_vrs(device, heap_properties, desc))
|
||||
image_info.usage |= VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
|
||||
image_info->usage |= VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
|
||||
|
||||
/* Additional image flags as necessary */
|
||||
if (image_info->imageType == VK_IMAGE_TYPE_3D &&
|
||||
(image_info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
|
||||
image_info->flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
|
||||
|
||||
use_concurrent = !!(device->unique_queue_mask & (device->unique_queue_mask - 1));
|
||||
|
||||
|
@ -610,61 +665,61 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device,
|
|||
{
|
||||
/* For multi-queue, we have to use CONCURRENT since D3D does
|
||||
* not give us enough information to do ownership transfers. */
|
||||
image_info.sharingMode = VK_SHARING_MODE_CONCURRENT;
|
||||
image_info.queueFamilyIndexCount = device->queue_family_count;
|
||||
image_info.pQueueFamilyIndices = device->queue_family_indices;
|
||||
image_info->sharingMode = VK_SHARING_MODE_CONCURRENT;
|
||||
image_info->queueFamilyIndexCount = device->queue_family_count;
|
||||
image_info->pQueueFamilyIndices = device->queue_family_indices;
|
||||
}
|
||||
else
|
||||
{
|
||||
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
image_info.queueFamilyIndexCount = 0;
|
||||
image_info.pQueueFamilyIndices = NULL;
|
||||
image_info->sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
image_info->queueFamilyIndexCount = 0;
|
||||
image_info->pQueueFamilyIndices = NULL;
|
||||
}
|
||||
|
||||
if (heap_properties && is_cpu_accessible_heap(heap_properties))
|
||||
{
|
||||
image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
|
||||
image_info->initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
|
||||
/* Required for ReadFromSubresource(). */
|
||||
image_info.tiling = VK_IMAGE_TILING_LINEAR;
|
||||
image_info->tiling = VK_IMAGE_TILING_LINEAR;
|
||||
|
||||
if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_IGNORE_RTV_HOST_VISIBLE) &&
|
||||
(image_info.usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
|
||||
(image_info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
|
||||
{
|
||||
WARN("Workaround applied. Ignoring RTV on linear resources.\n");
|
||||
image_info.usage &= ~VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||
image_info->usage &= ~VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||
if (resource)
|
||||
resource->desc.Flags &= ~D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
image_info->initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
}
|
||||
|
||||
if ((image_info.flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
|
||||
!vkd3d_format_check_usage_support(device, format->vk_format, image_info.usage, image_info.tiling))
|
||||
image_info.flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
|
||||
if ((image_info->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
|
||||
!vkd3d_format_check_usage_support(device, format->vk_format, image_info->usage, image_info->tiling))
|
||||
image_info->flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
|
||||
|
||||
if (image_info.tiling == VK_IMAGE_TILING_LINEAR)
|
||||
if (image_info->tiling == VK_IMAGE_TILING_LINEAR)
|
||||
{
|
||||
bool supported = vkd3d_is_linear_tiling_supported(device, &image_info);
|
||||
bool supported = vkd3d_is_linear_tiling_supported(device, image_info);
|
||||
|
||||
/* Apparently NV drivers do not support EXTENDED_USAGE_BIT on linear images? */
|
||||
if (!supported && (image_info.flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT))
|
||||
if (!supported && (image_info->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT))
|
||||
{
|
||||
WARN("Linear image not supported, attempting without EXTENDED_USAGE as a workaround ...\n");
|
||||
image_info.flags &= ~VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
|
||||
supported = vkd3d_is_linear_tiling_supported(device, &image_info);
|
||||
image_info->flags &= ~VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
|
||||
supported = vkd3d_is_linear_tiling_supported(device, image_info);
|
||||
}
|
||||
|
||||
if (!supported)
|
||||
{
|
||||
WARN("Linear image not supported, forcing OPTIMAL tiling ...\n");
|
||||
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
image_info->tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
|
||||
if ((image_info.flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
|
||||
!vkd3d_format_check_usage_support(device, format->vk_format, image_info.usage, image_info.tiling))
|
||||
image_info.flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
|
||||
if ((image_info->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) &&
|
||||
!vkd3d_format_check_usage_support(device, format->vk_format, image_info->usage, image_info->tiling))
|
||||
image_info->flags |= VK_IMAGE_CREATE_EXTENDED_USAGE_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -676,14 +731,14 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device,
|
|||
// D3D12 only allows sparse images with one aspect, so we can only
|
||||
// get one struct for metadata aspect and one for the data aspect
|
||||
VK_CALL(vkGetPhysicalDeviceSparseImageFormatProperties(
|
||||
device->vk_physical_device, image_info.format,
|
||||
image_info.imageType, image_info.samples, image_info.usage,
|
||||
image_info.tiling, &sparse_info_count, sparse_infos));
|
||||
device->vk_physical_device, image_info->format,
|
||||
image_info->imageType, image_info->samples, image_info->usage,
|
||||
image_info->tiling, &sparse_info_count, sparse_infos));
|
||||
|
||||
if (!sparse_info_count)
|
||||
{
|
||||
ERR("Sparse images not supported with format %u, type %u, samples %u, usage %#x, tiling %u.\n",
|
||||
image_info.format, image_info.imageType, image_info.samples, image_info.usage, image_info.tiling);
|
||||
image_info->format, image_info->imageType, image_info->samples, image_info->usage, image_info->tiling);
|
||||
return E_INVALIDARG;
|
||||
}
|
||||
|
||||
|
@ -702,19 +757,35 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device,
|
|||
|
||||
if (resource)
|
||||
{
|
||||
if (image_info.tiling == VK_IMAGE_TILING_LINEAR)
|
||||
if (image_info->tiling == VK_IMAGE_TILING_LINEAR)
|
||||
{
|
||||
resource->flags |= VKD3D_RESOURCE_LINEAR_TILING;
|
||||
resource->common_layout = VK_IMAGE_LAYOUT_GENERAL;
|
||||
}
|
||||
else
|
||||
resource->common_layout = vk_common_image_layout_from_d3d12_desc(desc);
|
||||
resource->common_layout = vk_common_image_layout_from_d3d12_desc(device, desc);
|
||||
|
||||
if (desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_SIMULTANEOUS_ACCESS)
|
||||
resource->flags |= VKD3D_RESOURCE_SIMULTANEOUS_ACCESS;
|
||||
}
|
||||
|
||||
if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, vk_image))) < 0)
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_create_image(struct d3d12_device *device,
|
||||
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags,
|
||||
const D3D12_RESOURCE_DESC1 *desc, struct d3d12_resource *resource, VkImage *vk_image)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
struct vkd3d_image_create_info create_info;
|
||||
VkResult vr;
|
||||
HRESULT hr;
|
||||
|
||||
if (FAILED(hr = vkd3d_get_image_create_info(device, heap_properties,
|
||||
heap_flags, desc, resource, &create_info)))
|
||||
return hr;
|
||||
|
||||
if ((vr = VK_CALL(vkCreateImage(device->vk_device, &create_info.image_info, NULL, vk_image))) < 0)
|
||||
WARN("Failed to create Vulkan image, vr %d.\n", vr);
|
||||
|
||||
return hresult_from_vk_result(vr);
|
||||
|
@ -725,10 +796,11 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device,
|
|||
{
|
||||
static const D3D12_HEAP_PROPERTIES heap_properties = {D3D12_HEAP_TYPE_DEFAULT};
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkDeviceImageMemoryRequirementsKHR requirement_info;
|
||||
struct vkd3d_image_create_info create_info;
|
||||
D3D12_RESOURCE_DESC1 validated_desc;
|
||||
VkMemoryRequirements requirements;
|
||||
VkMemoryRequirements2 requirements;
|
||||
VkDeviceSize target_alignment;
|
||||
VkImage vk_image;
|
||||
HRESULT hr;
|
||||
|
||||
assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER);
|
||||
|
@ -741,15 +813,21 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device,
|
|||
desc = &validated_desc;
|
||||
}
|
||||
|
||||
/* XXX: We have to create an image to get its memory requirements. */
|
||||
if (FAILED(hr = vkd3d_create_image(device, &heap_properties, 0, desc, NULL, &vk_image)))
|
||||
if (FAILED(hr = vkd3d_get_image_create_info(device, &heap_properties, 0, desc, NULL, &create_info)))
|
||||
return hr;
|
||||
|
||||
VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &requirements));
|
||||
VK_CALL(vkDestroyImage(device->vk_device, vk_image, NULL));
|
||||
requirement_info.sType = VK_STRUCTURE_TYPE_DEVICE_IMAGE_MEMORY_REQUIREMENTS_KHR;
|
||||
requirement_info.pNext = NULL;
|
||||
requirement_info.pCreateInfo = &create_info.image_info;
|
||||
requirement_info.planeAspect = 0; /* irrelevant for us */
|
||||
|
||||
allocation_info->SizeInBytes = requirements.size;
|
||||
allocation_info->Alignment = requirements.alignment;
|
||||
requirements.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
|
||||
requirements.pNext = NULL;
|
||||
|
||||
VK_CALL(vkGetDeviceImageMemoryRequirementsKHR(device->vk_device, &requirement_info, &requirements));
|
||||
|
||||
allocation_info->SizeInBytes = requirements.memoryRequirements.size;
|
||||
allocation_info->Alignment = requirements.memoryRequirements.alignment;
|
||||
|
||||
/* Do not report alignments greater than DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT
|
||||
* since that might confuse apps. Instead, pad the allocation so that we can
|
||||
|
@ -803,6 +881,7 @@ static uint32_t vkd3d_view_entry_hash(const void *key)
|
|||
hash = hash_combine(hash, k->u.texture.components.g);
|
||||
hash = hash_combine(hash, k->u.texture.components.b);
|
||||
hash = hash_combine(hash, k->u.texture.components.a);
|
||||
hash = hash_combine(hash, k->u.texture.image_usage);
|
||||
hash = hash_combine(hash, k->u.texture.allowed_swizzle);
|
||||
break;
|
||||
|
||||
|
@ -863,6 +942,7 @@ static bool vkd3d_view_entry_compare(const void *key, const struct hash_map_entr
|
|||
k->u.texture.components.g == e->key.u.texture.components.g &&
|
||||
k->u.texture.components.b == e->key.u.texture.components.b &&
|
||||
k->u.texture.components.a == e->key.u.texture.components.a &&
|
||||
k->u.texture.image_usage == e->key.u.texture.image_usage &&
|
||||
k->u.texture.allowed_swizzle == e->key.u.texture.allowed_swizzle;
|
||||
|
||||
case VKD3D_VIEW_TYPE_SAMPLER:
|
||||
|
@ -1322,18 +1402,12 @@ static void d3d12_resource_get_tiling(struct d3d12_device *device, struct d3d12_
|
|||
|
||||
tile_count += packed_tiles;
|
||||
|
||||
if (standard_mips)
|
||||
{
|
||||
tile_shape->WidthInTexels = block_extent.width;
|
||||
tile_shape->HeightInTexels = block_extent.height;
|
||||
tile_shape->DepthInTexels = block_extent.depth;
|
||||
}
|
||||
else
|
||||
{
|
||||
tile_shape->WidthInTexels = 0;
|
||||
tile_shape->HeightInTexels = 0;
|
||||
tile_shape->DepthInTexels = 0;
|
||||
}
|
||||
/* Docs say that we should clear tile_shape to zero if there are no standard mips,
|
||||
* but this conflicts with all native drivers, so the docs are likely lying here.
|
||||
* See test_get_resource_tiling() for info. */
|
||||
tile_shape->WidthInTexels = block_extent.width;
|
||||
tile_shape->HeightInTexels = block_extent.height;
|
||||
tile_shape->DepthInTexels = block_extent.depth;
|
||||
|
||||
*total_tile_count = tile_count;
|
||||
}
|
||||
|
@ -2639,7 +2713,7 @@ static HRESULT d3d12_resource_create(struct d3d12_device *device, uint32_t flags
|
|||
|
||||
HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12_RESOURCE_DESC1 *desc,
|
||||
const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, D3D12_RESOURCE_STATES initial_state,
|
||||
const D3D12_CLEAR_VALUE *optimized_clear_value, struct d3d12_resource **resource)
|
||||
const D3D12_CLEAR_VALUE *optimized_clear_value, HANDLE shared_handle, struct d3d12_resource **resource)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
struct d3d12_resource *object;
|
||||
|
@ -2660,6 +2734,11 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12
|
|||
bool use_dedicated_allocation;
|
||||
VkResult vr;
|
||||
|
||||
#ifdef _WIN32
|
||||
VkImportMemoryWin32HandleInfoKHR import_info;
|
||||
VkExportMemoryAllocateInfo export_info;
|
||||
#endif
|
||||
|
||||
if (FAILED(hr = d3d12_resource_create_vk_resource(object, device)))
|
||||
goto fail;
|
||||
|
||||
|
@ -2692,10 +2771,36 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12
|
|||
else
|
||||
allocate_info.heap_flags |= D3D12_HEAP_FLAG_ALLOW_ONLY_NON_RT_DS_TEXTURES;
|
||||
|
||||
if (heap_flags & D3D12_HEAP_FLAG_SHARED)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
use_dedicated_allocation = true;
|
||||
|
||||
if (shared_handle && shared_handle != INVALID_HANDLE_VALUE)
|
||||
{
|
||||
import_info.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_KHR;
|
||||
import_info.pNext = allocate_info.pNext;
|
||||
import_info.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
|
||||
import_info.handle = shared_handle;
|
||||
import_info.name = NULL;
|
||||
allocate_info.pNext = &import_info;
|
||||
}
|
||||
else
|
||||
{
|
||||
export_info.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO;
|
||||
export_info.pNext = allocate_info.pNext;
|
||||
export_info.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
|
||||
allocate_info.pNext = &export_info;
|
||||
}
|
||||
#else
|
||||
FIXME("D3D12_HEAP_FLAG_SHARED can only be implemented in native Win32.\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
if (use_dedicated_allocation)
|
||||
{
|
||||
dedicated_info.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO;
|
||||
dedicated_info.pNext = NULL;
|
||||
dedicated_info.pNext = allocate_info.pNext;
|
||||
dedicated_info.image = object->res.vk_image;
|
||||
dedicated_info.buffer = VK_NULL_HANDLE;
|
||||
allocate_info.pNext = &dedicated_info;
|
||||
|
@ -2742,6 +2847,14 @@ HRESULT d3d12_resource_create_committed(struct d3d12_device *device, const D3D12
|
|||
allocate_info.heap_desc.SizeInBytes = align(desc->Width, allocate_info.heap_desc.Alignment);
|
||||
allocate_info.heap_desc.Flags = heap_flags | D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
|
||||
|
||||
/* Be very careful with suballocated buffers. */
|
||||
if ((vkd3d_config_flags & VKD3D_CONFIG_FLAG_ZERO_MEMORY_WORKAROUNDS_COMMITTED_BUFFER_UAV) &&
|
||||
(desc->Flags & D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS) &&
|
||||
desc->Width < VKD3D_VA_BLOCK_SIZE)
|
||||
{
|
||||
allocate_info.heap_desc.Flags &= ~D3D12_HEAP_FLAG_CREATE_NOT_ZEROED;
|
||||
}
|
||||
|
||||
if (FAILED(hr = vkd3d_allocate_heap_memory(device,
|
||||
&device->memory_allocator, &allocate_info, &object->mem)))
|
||||
goto fail;
|
||||
|
@ -2793,20 +2906,30 @@ HRESULT d3d12_resource_create_placed(struct d3d12_device *device, const D3D12_RE
|
|||
VkMemoryRequirements memory_requirements;
|
||||
VkBindImageMemoryInfo bind_info;
|
||||
struct d3d12_resource *object;
|
||||
bool force_committed;
|
||||
VkResult vr;
|
||||
HRESULT hr;
|
||||
|
||||
if (FAILED(hr = d3d12_resource_validate_heap(desc, heap)))
|
||||
return hr;
|
||||
|
||||
if (heap->allocation.device_allocation.vk_memory == VK_NULL_HANDLE)
|
||||
/* Placed linear textures are ... problematic
|
||||
* since we have no way of signalling that they have different alignment and size requirements
|
||||
* than optimal textures. GetResourceAllocationInfo() does not take heap property information
|
||||
* and assumes that we are not modifying the tiling mode. */
|
||||
force_committed = desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER &&
|
||||
is_cpu_accessible_heap(&heap->desc.Properties);
|
||||
|
||||
if (force_committed || heap->allocation.device_allocation.vk_memory == VK_NULL_HANDLE)
|
||||
{
|
||||
WARN("Placing resource on heap with no memory backing it. Falling back to committed resource.\n");
|
||||
if (!force_committed)
|
||||
WARN("Placing resource on heap with no memory backing it. Falling back to committed resource.\n");
|
||||
|
||||
if (FAILED(hr = d3d12_resource_create_committed(device, desc, &heap->desc.Properties,
|
||||
heap->desc.Flags & ~(D3D12_HEAP_FLAG_DENY_BUFFERS |
|
||||
D3D12_HEAP_FLAG_DENY_NON_RT_DS_TEXTURES |
|
||||
D3D12_HEAP_FLAG_DENY_RT_DS_TEXTURES),
|
||||
initial_state, optimized_clear_value, resource)))
|
||||
initial_state, optimized_clear_value, NULL, resource)))
|
||||
{
|
||||
ERR("Failed to create fallback committed resource.\n");
|
||||
}
|
||||
|
@ -2877,6 +3000,15 @@ HRESULT d3d12_resource_create_placed(struct d3d12_device *device, const D3D12_RE
|
|||
goto fail;
|
||||
}
|
||||
|
||||
/* Placed RTV and DSV *must* be explicitly initialized after alias barriers and first use,
|
||||
* so there is no need to do initial layout transition ourselves.
|
||||
* It is extremely dangerous to do so since the initialization will clobber other
|
||||
* aliased buffers when clearing DCC/HTILE state.
|
||||
* For details, see:
|
||||
* https://docs.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-createplacedresource#notes-on-the-required-resource-initialization. */
|
||||
if (desc->Flags & (D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET | D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL))
|
||||
object->initial_layout_transition = 0;
|
||||
|
||||
*resource = object;
|
||||
return S_OK;
|
||||
|
||||
|
@ -2952,7 +3084,7 @@ VKD3D_EXPORT HRESULT vkd3d_create_image_resource(ID3D12Device *device,
|
|||
object->flags = create_info->flags;
|
||||
object->flags |= VKD3D_RESOURCE_EXTERNAL;
|
||||
object->initial_layout_transition = 1;
|
||||
object->common_layout = vk_common_image_layout_from_d3d12_desc(&object->desc);
|
||||
object->common_layout = vk_common_image_layout_from_d3d12_desc(d3d12_device, &object->desc);
|
||||
|
||||
memset(&object->sparse, 0, sizeof(object->sparse));
|
||||
|
||||
|
@ -3311,7 +3443,7 @@ bool vkd3d_create_raw_r32ui_vk_buffer_view(struct d3d12_device *device,
|
|||
return vr == VK_SUCCESS;
|
||||
}
|
||||
|
||||
static bool vkd3d_create_vk_buffer_view(struct d3d12_device *device,
|
||||
bool vkd3d_create_vk_buffer_view(struct d3d12_device *device,
|
||||
VkBuffer vk_buffer, const struct vkd3d_format *format,
|
||||
VkDeviceSize offset, VkDeviceSize range, VkBufferView *vk_view)
|
||||
{
|
||||
|
@ -3611,6 +3743,7 @@ static bool init_default_texture_view_desc(struct vkd3d_texture_view_desc *desc,
|
|||
desc->miplevel_clamp = 0.0f;
|
||||
desc->layer_idx = 0;
|
||||
desc->layer_count = d3d12_resource_desc_get_layer_count(&resource->desc);
|
||||
desc->image_usage = 0;
|
||||
|
||||
switch (resource->desc.Dimension)
|
||||
{
|
||||
|
@ -3645,6 +3778,7 @@ static bool init_default_texture_view_desc(struct vkd3d_texture_view_desc *desc,
|
|||
bool vkd3d_create_texture_view(struct d3d12_device *device, const struct vkd3d_texture_view_desc *desc, struct vkd3d_view **view)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkImageViewUsageCreateInfo image_usage_create_info;
|
||||
const struct vkd3d_format *format = desc->format;
|
||||
VkImageViewMinLodCreateInfoEXT min_lod_desc;
|
||||
VkImageView vk_view = VK_NULL_HANDLE;
|
||||
|
@ -3695,6 +3829,11 @@ bool vkd3d_create_texture_view(struct d3d12_device *device, const struct vkd3d_t
|
|||
}
|
||||
}
|
||||
|
||||
image_usage_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO;
|
||||
image_usage_create_info.pNext = NULL;
|
||||
image_usage_create_info.usage = desc->image_usage;
|
||||
vk_prepend_struct(&view_desc, &image_usage_create_info);
|
||||
|
||||
if ((vr = VK_CALL(vkCreateImageView(device->vk_device, &view_desc, NULL, &vk_view))) < 0)
|
||||
{
|
||||
WARN("Failed to create Vulkan image view, vr %d.\n", vr);
|
||||
|
@ -4192,6 +4331,7 @@ static void vkd3d_create_texture_srv(vkd3d_cpu_descriptor_va_t desc_va,
|
|||
key.view_type = VKD3D_VIEW_TYPE_IMAGE;
|
||||
key.u.texture.miplevel_count = VK_REMAINING_MIP_LEVELS;
|
||||
key.u.texture.allowed_swizzle = true;
|
||||
key.u.texture.image_usage = VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||
|
||||
if (desc)
|
||||
{
|
||||
|
@ -4586,6 +4726,8 @@ static void vkd3d_create_texture_uav(vkd3d_cpu_descriptor_va_t desc_va,
|
|||
if (!init_default_texture_view_desc(&key.u.texture, resource, desc ? desc->Format : 0))
|
||||
return;
|
||||
|
||||
key.u.texture.image_usage = VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
|
||||
if (vkd3d_format_is_compressed(key.u.texture.format))
|
||||
{
|
||||
WARN("UAVs cannot be created for compressed formats.\n");
|
||||
|
@ -5008,6 +5150,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev
|
|||
}
|
||||
|
||||
key.view_type = VKD3D_VIEW_TYPE_IMAGE;
|
||||
key.u.texture.image_usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||
|
||||
if (desc)
|
||||
{
|
||||
|
@ -5113,6 +5256,7 @@ void d3d12_rtv_desc_create_dsv(struct d3d12_rtv_desc *dsv_desc, struct d3d12_dev
|
|||
}
|
||||
|
||||
key.view_type = VKD3D_VIEW_TYPE_IMAGE;
|
||||
key.u.texture.image_usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
|
||||
if (desc)
|
||||
{
|
||||
|
@ -6366,7 +6510,9 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
|
|||
struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkMemoryRequirements memory_requirements;
|
||||
VkDeviceBufferMemoryRequirementsKHR buffer_requirement_info;
|
||||
VkDeviceImageMemoryRequirementsKHR image_requirement_info;
|
||||
VkMemoryRequirements2 memory_requirements;
|
||||
struct vkd3d_memory_topology topology;
|
||||
VkBufferCreateInfo buffer_info;
|
||||
uint32_t sampled_type_mask_cpu;
|
||||
|
@ -6376,9 +6522,6 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
|
|||
uint32_t host_visible_mask;
|
||||
uint32_t buffer_type_mask;
|
||||
uint32_t rt_ds_type_mask;
|
||||
VkBuffer buffer;
|
||||
VkImage image;
|
||||
VkResult vr;
|
||||
uint32_t i;
|
||||
|
||||
vkd3d_memory_info_get_topology(&topology, device);
|
||||
|
@ -6388,6 +6531,18 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
|
|||
if (pthread_mutex_init(&info->budget_lock, NULL) != 0)
|
||||
return E_OUTOFMEMORY;
|
||||
|
||||
buffer_requirement_info.sType = VK_STRUCTURE_TYPE_DEVICE_BUFFER_MEMORY_REQUIREMENTS_KHR;
|
||||
buffer_requirement_info.pNext = NULL;
|
||||
buffer_requirement_info.pCreateInfo = &buffer_info;
|
||||
|
||||
image_requirement_info.sType = VK_STRUCTURE_TYPE_DEVICE_IMAGE_MEMORY_REQUIREMENTS_KHR;
|
||||
image_requirement_info.pNext = NULL;
|
||||
image_requirement_info.pCreateInfo = &image_info;
|
||||
image_requirement_info.planeAspect = 0;
|
||||
|
||||
memory_requirements.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2;
|
||||
memory_requirements.pNext = NULL;
|
||||
|
||||
memset(&buffer_info, 0, sizeof(buffer_info));
|
||||
buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
buffer_info.size = 65536;
|
||||
|
@ -6410,15 +6565,8 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
|
|||
VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR;
|
||||
}
|
||||
|
||||
if ((vr = VK_CALL(vkCreateBuffer(device->vk_device, &buffer_info, NULL, &buffer))) < 0)
|
||||
{
|
||||
ERR("Failed to create dummy buffer");
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, buffer, &memory_requirements));
|
||||
VK_CALL(vkDestroyBuffer(device->vk_device, buffer, NULL));
|
||||
buffer_type_mask = memory_requirements.memoryTypeBits;
|
||||
VK_CALL(vkGetDeviceBufferMemoryRequirementsKHR(device->vk_device, &buffer_requirement_info, &memory_requirements));
|
||||
buffer_type_mask = memory_requirements.memoryRequirements.memoryTypeBits;
|
||||
|
||||
memset(&image_info, 0, sizeof(image_info));
|
||||
image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
||||
|
@ -6438,15 +6586,8 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
|
|||
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
||||
if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, &image))) < 0)
|
||||
{
|
||||
ERR("Failed to create dummy sampled image");
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
VK_CALL(vkGetImageMemoryRequirements(device->vk_device, image, &memory_requirements));
|
||||
VK_CALL(vkDestroyImage(device->vk_device, image, NULL));
|
||||
sampled_type_mask = memory_requirements.memoryTypeBits;
|
||||
VK_CALL(vkGetDeviceImageMemoryRequirementsKHR(device->vk_device, &image_requirement_info, &memory_requirements));
|
||||
sampled_type_mask = memory_requirements.memoryRequirements.memoryTypeBits;
|
||||
|
||||
/* CPU accessible images are always LINEAR.
|
||||
* If we ever get a way to write to OPTIMAL-ly tiled images, we can drop this and just
|
||||
|
@ -6461,12 +6602,8 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
|
|||
sampled_type_mask_cpu = 0;
|
||||
if (vkd3d_is_linear_tiling_supported(device, &image_info))
|
||||
{
|
||||
if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, &image))) == VK_SUCCESS)
|
||||
{
|
||||
VK_CALL(vkGetImageMemoryRequirements(device->vk_device, image, &memory_requirements));
|
||||
VK_CALL(vkDestroyImage(device->vk_device, image, NULL));
|
||||
sampled_type_mask_cpu = memory_requirements.memoryTypeBits;
|
||||
}
|
||||
VK_CALL(vkGetDeviceImageMemoryRequirementsKHR(device->vk_device, &image_requirement_info, &memory_requirements));
|
||||
sampled_type_mask_cpu = memory_requirements.memoryRequirements.memoryTypeBits;
|
||||
}
|
||||
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
@ -6478,27 +6615,16 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
|
|||
VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
VK_IMAGE_USAGE_STORAGE_BIT;
|
||||
|
||||
if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, &image))) < 0)
|
||||
{
|
||||
ERR("Failed to create dummy color image");
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
VK_CALL(vkGetImageMemoryRequirements(device->vk_device, image, &memory_requirements));
|
||||
VK_CALL(vkDestroyImage(device->vk_device, image, NULL));
|
||||
rt_ds_type_mask = memory_requirements.memoryTypeBits;
|
||||
VK_CALL(vkGetDeviceImageMemoryRequirementsKHR(device->vk_device, &image_requirement_info, &memory_requirements));
|
||||
rt_ds_type_mask = memory_requirements.memoryRequirements.memoryTypeBits;
|
||||
|
||||
image_info.tiling = VK_IMAGE_TILING_LINEAR;
|
||||
image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
|
||||
rt_ds_type_mask_cpu = 0;
|
||||
if (vkd3d_is_linear_tiling_supported(device, &image_info))
|
||||
{
|
||||
if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, &image))) == VK_SUCCESS)
|
||||
{
|
||||
VK_CALL(vkGetImageMemoryRequirements(device->vk_device, image, &memory_requirements));
|
||||
VK_CALL(vkDestroyImage(device->vk_device, image, NULL));
|
||||
rt_ds_type_mask_cpu = memory_requirements.memoryTypeBits;
|
||||
}
|
||||
VK_CALL(vkGetDeviceImageMemoryRequirementsKHR(device->vk_device, &image_requirement_info, &memory_requirements));
|
||||
rt_ds_type_mask_cpu = memory_requirements.memoryRequirements.memoryTypeBits;
|
||||
}
|
||||
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
image_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
@ -6509,15 +6635,8 @@ HRESULT vkd3d_memory_info_init(struct vkd3d_memory_info *info,
|
|||
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT |
|
||||
VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||
|
||||
if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, &image))) < 0)
|
||||
{
|
||||
ERR("Failed to create dummy depth-stencil image");
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
VK_CALL(vkGetImageMemoryRequirements(device->vk_device, image, &memory_requirements));
|
||||
VK_CALL(vkDestroyImage(device->vk_device, image, NULL));
|
||||
rt_ds_type_mask &= memory_requirements.memoryTypeBits;
|
||||
VK_CALL(vkGetDeviceImageMemoryRequirementsKHR(device->vk_device, &image_requirement_info, &memory_requirements));
|
||||
rt_ds_type_mask &= memory_requirements.memoryRequirements.memoryTypeBits;
|
||||
|
||||
/* Unsure if we can have host visible depth-stencil.
|
||||
* On AMD, we can get linear RT, but not linear DS, so for now, just don't check for that.
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
#version 450
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference_uvec2 : require
|
||||
|
||||
layout(local_size_x_id = 0) in;
|
||||
|
||||
struct Command
|
||||
{
|
||||
uint type;
|
||||
uint src_offset;
|
||||
uint dst_offset;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands
|
||||
{
|
||||
Command commands[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(push_constant) uniform Registers
|
||||
{
|
||||
Commands commands_va;
|
||||
SrcBuffer src_buffer_va;
|
||||
DstBuffer dst_buffer_va;
|
||||
uvec2 indirect_count_va;
|
||||
IndirectCountWrite dst_indirect_count_va;
|
||||
uint src_stride;
|
||||
uint dst_stride;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
Command cmd = commands_va.commands[gl_LocalInvocationIndex];
|
||||
|
||||
uint draw_id = gl_WorkGroupID.x;
|
||||
uint max_draws = gl_NumWorkGroups.x;
|
||||
|
||||
if (any(notEqual(indirect_count_va, uvec2(0))))
|
||||
{
|
||||
max_draws = min(max_draws, IndirectCount(indirect_count_va).count);
|
||||
if (gl_WorkGroupID.x == 0u)
|
||||
dst_indirect_count_va.count = max_draws;
|
||||
}
|
||||
|
||||
if (draw_id < max_draws)
|
||||
{
|
||||
uint src_offset = src_stride * draw_id + cmd.src_offset;
|
||||
uint dst_offset = dst_stride * draw_id + cmd.dst_offset;
|
||||
uint src_value = src_buffer_va.values[src_offset];
|
||||
dst_buffer_va.values[dst_offset] = src_value;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
#version 450
|
||||
#extension GL_EXT_buffer_reference : require
|
||||
#extension GL_EXT_buffer_reference_uvec2 : require
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#include "../../../include/shader-debug/debug_channel.h"
|
||||
|
||||
layout(local_size_x_id = 4) in;
|
||||
|
||||
struct Command
|
||||
{
|
||||
uint type;
|
||||
uint src_offset;
|
||||
uint dst_offset;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer Commands
|
||||
{
|
||||
Command commands[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SrcBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer DstBuffer {
|
||||
uint values[];
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer IndirectCount {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer IndirectCountWrite {
|
||||
uint count;
|
||||
};
|
||||
|
||||
layout(push_constant) uniform Registers
|
||||
{
|
||||
Commands commands_va;
|
||||
SrcBuffer src_buffer_va;
|
||||
DstBuffer dst_buffer_va;
|
||||
uvec2 indirect_count_va;
|
||||
IndirectCountWrite dst_indirect_count_va;
|
||||
uint src_stride;
|
||||
uint dst_stride;
|
||||
|
||||
// Debug metadata here
|
||||
uint debug_tag;
|
||||
uint implicit_instance;
|
||||
};
|
||||
|
||||
void main()
|
||||
{
|
||||
if (debug_tag != 0u)
|
||||
DEBUG_CHANNEL_INIT_IMPLICIT_INSTANCE(uvec3(debug_tag, gl_WorkGroupID.x, gl_LocalInvocationIndex), implicit_instance);
|
||||
|
||||
Command cmd = commands_va.commands[gl_LocalInvocationIndex];
|
||||
|
||||
uint draw_id = gl_WorkGroupID.x;
|
||||
uint max_draws = gl_NumWorkGroups.x;
|
||||
if (any(notEqual(indirect_count_va, uvec2(0))))
|
||||
{
|
||||
max_draws = min(max_draws, IndirectCount(indirect_count_va).count);
|
||||
if (gl_WorkGroupID.x == 0u)
|
||||
dst_indirect_count_va.count = max_draws;
|
||||
}
|
||||
|
||||
if (debug_tag != 0u && gl_WorkGroupID.x == 0)
|
||||
DEBUG_CHANNEL_MSG_UNIFORM(int(max_draws), int(gl_NumWorkGroups.x));
|
||||
|
||||
if (draw_id < max_draws)
|
||||
{
|
||||
uint src_offset = src_stride * draw_id + cmd.src_offset;
|
||||
uint dst_offset = dst_stride * draw_id + cmd.dst_offset;
|
||||
|
||||
uint src_value = src_buffer_va.values[src_offset];
|
||||
|
||||
if (debug_tag != 0u)
|
||||
DEBUG_CHANNEL_MSG(cmd.type, dst_offset, src_offset, src_value);
|
||||
|
||||
dst_buffer_va.values[dst_offset] = src_value;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,68 @@
|
|||
/*
|
||||
* Copyright 2021 Derek Lesho for Codeweavers
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
#include "vkd3d_private.h"
|
||||
|
||||
#include "winioctl.h"
|
||||
|
||||
#define IOCTL_SHARED_GPU_RESOURCE_SET_METADATA CTL_CODE(FILE_DEVICE_VIDEO, 4, METHOD_BUFFERED, FILE_WRITE_ACCESS)
|
||||
#define IOCTL_SHARED_GPU_RESOURCE_GET_METADATA CTL_CODE(FILE_DEVICE_VIDEO, 5, METHOD_BUFFERED, FILE_READ_ACCESS)
|
||||
#define IOCTL_SHARED_GPU_RESOURCE_OPEN CTL_CODE(FILE_DEVICE_VIDEO, 1, METHOD_BUFFERED, FILE_WRITE_ACCESS)
|
||||
|
||||
bool vkd3d_set_shared_metadata(HANDLE handle, void *buf, uint32_t buf_size)
|
||||
{
|
||||
DWORD ret_size;
|
||||
|
||||
return DeviceIoControl(handle, IOCTL_SHARED_GPU_RESOURCE_SET_METADATA, buf, buf_size, NULL, 0, &ret_size, NULL);
|
||||
}
|
||||
|
||||
bool vkd3d_get_shared_metadata(HANDLE handle, void *buf, uint32_t buf_size, uint32_t *metadata_size)
|
||||
{
|
||||
DWORD ret_size;
|
||||
|
||||
bool ret = DeviceIoControl(handle, IOCTL_SHARED_GPU_RESOURCE_GET_METADATA, NULL, 0, buf, buf_size, &ret_size, NULL);
|
||||
|
||||
if (metadata_size)
|
||||
*metadata_size = ret_size;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
HANDLE vkd3d_open_kmt_handle(HANDLE kmt_handle)
|
||||
{
|
||||
struct
|
||||
{
|
||||
unsigned int kmt_handle;
|
||||
/* the following parameter represents a larger sized string for a dynamically allocated struct for use when opening an object by name */
|
||||
WCHAR name[1];
|
||||
} shared_resource_open;
|
||||
|
||||
HANDLE nt_handle = CreateFileA("\\\\.\\SharedGpuResource", GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
|
||||
if (nt_handle == INVALID_HANDLE_VALUE)
|
||||
return nt_handle;
|
||||
|
||||
shared_resource_open.kmt_handle = (ULONG_PTR)kmt_handle;
|
||||
shared_resource_open.name[0] = 0;
|
||||
if (!DeviceIoControl(nt_handle, IOCTL_SHARED_GPU_RESOURCE_OPEN, &shared_resource_open, sizeof(shared_resource_open), NULL, 0, NULL, NULL))
|
||||
{
|
||||
CloseHandle(nt_handle);
|
||||
return INVALID_HANDLE_VALUE;
|
||||
}
|
||||
return nt_handle;
|
||||
}
|
1150
libs/vkd3d/state.c
1150
libs/vkd3d/state.c
File diff suppressed because it is too large
Load Diff
|
@ -180,7 +180,6 @@ struct d3d12_swapchain
|
|||
VkImage vk_images[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
VkImage vk_swapchain_images[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
VkImageView vk_swapchain_image_views[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
VkFramebuffer vk_framebuffers[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
VkCommandBuffer vk_cmd_buffers[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
bool vk_acquire_semaphores_signaled[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
VkSemaphore vk_acquire_semaphores[DXGI_MAX_SWAP_CHAIN_BUFFERS];
|
||||
|
@ -805,11 +804,16 @@ static BOOL d3d12_swapchain_is_present_mode_supported(struct d3d12_swapchain *sw
|
|||
return supported;
|
||||
}
|
||||
|
||||
static BOOL d3d12_swapchain_has_user_images(struct d3d12_swapchain *swapchain)
|
||||
static bool d3d12_swapchain_has_user_images(struct d3d12_swapchain *swapchain)
|
||||
{
|
||||
return !!swapchain->vk_images[0];
|
||||
}
|
||||
|
||||
static bool d3d12_swapchain_has_user_descriptors(struct d3d12_swapchain *swapchain)
|
||||
{
|
||||
return swapchain->descriptors.pool != VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
static HRESULT d3d12_swapchain_get_user_graphics_pipeline(struct d3d12_swapchain *swapchain, VkFormat format)
|
||||
{
|
||||
struct d3d12_device *device = d3d12_swapchain_device(swapchain);
|
||||
|
@ -817,8 +821,6 @@ static HRESULT d3d12_swapchain_get_user_graphics_pipeline(struct d3d12_swapchain
|
|||
HRESULT hr;
|
||||
|
||||
key.bind_point = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||
key.load_op = swapchain->desc.Scaling == DXGI_SCALING_NONE ?
|
||||
VK_ATTACHMENT_LOAD_OP_CLEAR : VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
key.filter = swapchain->desc.Scaling == DXGI_SCALING_NONE ? VK_FILTER_NEAREST : VK_FILTER_LINEAR;
|
||||
key.format = format;
|
||||
|
||||
|
@ -932,9 +934,6 @@ static HRESULT d3d12_swapchain_create_user_buffers(struct d3d12_swapchain *swapc
|
|||
HRESULT hr;
|
||||
UINT i;
|
||||
|
||||
if (d3d12_swapchain_has_user_images(swapchain))
|
||||
return S_OK;
|
||||
|
||||
heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
heap_properties.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
|
||||
heap_properties.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
|
||||
|
@ -954,31 +953,38 @@ static HRESULT d3d12_swapchain_create_user_buffers(struct d3d12_swapchain *swapc
|
|||
resource_desc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
|
||||
resource_desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
|
||||
|
||||
for (i = 0; i < swapchain->desc.BufferCount; i++)
|
||||
if (!d3d12_swapchain_has_user_images(swapchain))
|
||||
{
|
||||
if (FAILED(hr = d3d12_resource_create_committed(d3d12_swapchain_device(swapchain),
|
||||
&resource_desc, &heap_properties, D3D12_HEAP_FLAG_NONE,
|
||||
D3D12_RESOURCE_STATE_PRESENT, NULL, &object)))
|
||||
for (i = 0; i < swapchain->desc.BufferCount; i++)
|
||||
{
|
||||
ERR("Failed to create image for swapchain buffer");
|
||||
return hr;
|
||||
if (FAILED(hr = d3d12_resource_create_committed(d3d12_swapchain_device(swapchain),
|
||||
&resource_desc, &heap_properties, D3D12_HEAP_FLAG_NONE,
|
||||
D3D12_RESOURCE_STATE_PRESENT, NULL, NULL, &object)))
|
||||
{
|
||||
ERR("Failed to create image for swapchain buffer");
|
||||
return hr;
|
||||
}
|
||||
|
||||
swapchain->vk_images[i] = object->res.vk_image;
|
||||
swapchain->buffers[i] = (ID3D12Resource *)&object->ID3D12Resource_iface;
|
||||
|
||||
vkd3d_resource_incref(swapchain->buffers[i]);
|
||||
ID3D12Resource_Release(swapchain->buffers[i]);
|
||||
|
||||
/* It is technically possible to just start presenting images without rendering to them.
|
||||
* The initial resource state for swapchain images is PRESENT.
|
||||
* Since presentable images are dedicated allocations, we can safely queue a transition into common state
|
||||
* right away. We will also drain the queue when we release the images, so there is no risk of early delete. */
|
||||
vkd3d_enqueue_initial_transition(&swapchain->command_queue->ID3D12CommandQueue_iface, swapchain->buffers[i]);
|
||||
}
|
||||
|
||||
swapchain->vk_images[i] = object->res.vk_image;
|
||||
swapchain->buffers[i] = (ID3D12Resource *)&object->ID3D12Resource_iface;
|
||||
|
||||
vkd3d_resource_incref(swapchain->buffers[i]);
|
||||
ID3D12Resource_Release(swapchain->buffers[i]);
|
||||
|
||||
/* It is technically possible to just start presenting images without rendering to them.
|
||||
* The initial resource state for swapchain images is PRESENT.
|
||||
* Since presentable images are dedicated allocations, we can safely queue a transition into common state
|
||||
* right away. We will also drain the queue when we release the images, so there is no risk of early delete. */
|
||||
vkd3d_enqueue_initial_transition(&swapchain->command_queue->ID3D12CommandQueue_iface, swapchain->buffers[i]);
|
||||
}
|
||||
|
||||
if (FAILED(hr = d3d12_swapchain_create_user_descriptors(swapchain, vk_format)))
|
||||
return hr;
|
||||
/* If we don't have a swapchain pipeline layout yet (0x0 surface on first frame),
|
||||
* we cannot allocate any descriptors yet. We'll create the descriptors eventually
|
||||
* when we get a proper swapchain working. */
|
||||
if (!d3d12_swapchain_has_user_descriptors(swapchain) && swapchain->pipeline.vk_set_layout)
|
||||
if (FAILED(hr = d3d12_swapchain_create_user_descriptors(swapchain, vk_format)))
|
||||
return hr;
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
@ -987,16 +993,13 @@ static VkResult d3d12_swapchain_record_swapchain_blit(struct d3d12_swapchain *sw
|
|||
VkCommandBuffer vk_cmd_buffer, unsigned int dst_index, unsigned int src_index)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
|
||||
VkSubpassBeginInfoKHR subpass_begin_info;
|
||||
VkSubpassEndInfoKHR subpass_end_info;
|
||||
VkRenderingAttachmentInfoKHR attachment_info;
|
||||
VkCommandBufferBeginInfo begin_info;
|
||||
VkRenderPassBeginInfo rp_info;
|
||||
VkClearValue clear_value;
|
||||
VkImageMemoryBarrier image_barrier;
|
||||
VkRenderingInfoKHR rendering_info;
|
||||
VkViewport viewport;
|
||||
VkResult vr;
|
||||
|
||||
memset(&clear_value, 0, sizeof(clear_value));
|
||||
|
||||
begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
begin_info.pNext = NULL;
|
||||
begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
|
@ -1008,39 +1011,28 @@ static VkResult d3d12_swapchain_record_swapchain_blit(struct d3d12_swapchain *sw
|
|||
return vr;
|
||||
}
|
||||
|
||||
rp_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
||||
rp_info.pNext = NULL;
|
||||
rp_info.renderPass = swapchain->pipeline.vk_render_pass;
|
||||
rp_info.framebuffer = swapchain->vk_framebuffers[dst_index];
|
||||
|
||||
rp_info.renderArea.offset.x = 0;
|
||||
rp_info.renderArea.offset.y = 0;
|
||||
rp_info.renderArea.extent.width = swapchain->vk_swapchain_width;
|
||||
rp_info.renderArea.extent.height = swapchain->vk_swapchain_height;
|
||||
|
||||
subpass_begin_info.sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO_KHR;
|
||||
subpass_begin_info.pNext = NULL;
|
||||
subpass_begin_info.contents = VK_SUBPASS_CONTENTS_INLINE;
|
||||
|
||||
subpass_end_info.sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO_KHR;
|
||||
subpass_end_info.pNext = NULL;
|
||||
memset(&attachment_info, 0, sizeof(attachment_info));
|
||||
attachment_info.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR;
|
||||
attachment_info.imageView = swapchain->vk_swapchain_image_views[dst_index];
|
||||
attachment_info.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
attachment_info.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
|
||||
attachment_info.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
|
||||
if (swapchain->desc.Scaling == DXGI_SCALING_NONE)
|
||||
{
|
||||
rp_info.clearValueCount = 1;
|
||||
rp_info.pClearValues = &clear_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
rp_info.clearValueCount = 0;
|
||||
rp_info.pClearValues = NULL;
|
||||
}
|
||||
attachment_info.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
|
||||
|
||||
memset(&rendering_info, 0, sizeof(rendering_info));
|
||||
rendering_info.sType = VK_STRUCTURE_TYPE_RENDERING_INFO_KHR;
|
||||
rendering_info.renderArea.extent.width = swapchain->vk_swapchain_width;
|
||||
rendering_info.renderArea.extent.height = swapchain->vk_swapchain_height;
|
||||
rendering_info.layerCount = 1;
|
||||
rendering_info.colorAttachmentCount = 1;
|
||||
rendering_info.pColorAttachments = &attachment_info;
|
||||
|
||||
viewport.x = viewport.y = 0.0f;
|
||||
viewport.minDepth = 0.0f;
|
||||
viewport.maxDepth = 1.0f;
|
||||
|
||||
VK_CALL(vkCmdBeginRenderPass2KHR(vk_cmd_buffer, &rp_info, &subpass_begin_info));
|
||||
if (swapchain->desc.Scaling == DXGI_SCALING_NONE)
|
||||
{
|
||||
viewport.width = (float)swapchain->desc.Width;
|
||||
|
@ -1052,15 +1044,48 @@ static VkResult d3d12_swapchain_record_swapchain_blit(struct d3d12_swapchain *sw
|
|||
viewport.height = swapchain->vk_swapchain_height;
|
||||
}
|
||||
|
||||
VK_CALL(vkCmdSetViewport(vk_cmd_buffer, 0, 1, &viewport));
|
||||
VK_CALL(vkCmdSetScissor(vk_cmd_buffer, 0, 1, &rp_info.renderArea));
|
||||
image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
image_barrier.pNext = NULL;
|
||||
image_barrier.srcAccessMask = 0;
|
||||
image_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
image_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
image_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
image_barrier.image = swapchain->vk_swapchain_images[dst_index];
|
||||
image_barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
image_barrier.subresourceRange.baseMipLevel = 0;
|
||||
image_barrier.subresourceRange.levelCount = 1;
|
||||
image_barrier.subresourceRange.baseArrayLayer = 0;
|
||||
image_barrier.subresourceRange.layerCount = 1;
|
||||
|
||||
if (attachment_info.loadOp != VK_ATTACHMENT_LOAD_OP_DONT_CARE)
|
||||
image_barrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT;
|
||||
|
||||
VK_CALL(vkCmdPipelineBarrier(vk_cmd_buffer,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
0, 0, NULL, 0, NULL, 1, &image_barrier));
|
||||
|
||||
VK_CALL(vkCmdBeginRenderingKHR(vk_cmd_buffer, &rendering_info));
|
||||
VK_CALL(vkCmdSetViewport(vk_cmd_buffer, 0, 1, &viewport));
|
||||
VK_CALL(vkCmdSetScissor(vk_cmd_buffer, 0, 1, &rendering_info.renderArea));
|
||||
VK_CALL(vkCmdBindPipeline(vk_cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, swapchain->pipeline.vk_pipeline));
|
||||
VK_CALL(vkCmdBindDescriptorSets(vk_cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
swapchain->pipeline.vk_pipeline_layout, 0, 1, &swapchain->descriptors.sets[src_index],
|
||||
0, NULL));
|
||||
VK_CALL(vkCmdDraw(vk_cmd_buffer, 3, 1, 0, 0));
|
||||
VK_CALL(vkCmdEndRenderPass2KHR(vk_cmd_buffer, &subpass_end_info));
|
||||
VK_CALL(vkCmdEndRenderingKHR(vk_cmd_buffer));
|
||||
|
||||
image_barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
image_barrier.dstAccessMask = 0;
|
||||
image_barrier.oldLayout = image_barrier.newLayout;
|
||||
image_barrier.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
|
||||
VK_CALL(vkCmdPipelineBarrier(vk_cmd_buffer,
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
|
||||
0, 0, NULL, 0, NULL, 1, &image_barrier));
|
||||
|
||||
if ((vr = VK_CALL(vkEndCommandBuffer(vk_cmd_buffer))) < 0)
|
||||
WARN("Failed to end command buffer, vr %d.\n", vr);
|
||||
|
@ -1068,7 +1093,7 @@ static VkResult d3d12_swapchain_record_swapchain_blit(struct d3d12_swapchain *sw
|
|||
return vr;
|
||||
}
|
||||
|
||||
static void d3d12_swapchain_destroy_framebuffers(struct d3d12_swapchain *swapchain)
|
||||
static void d3d12_swapchain_destroy_views(struct d3d12_swapchain *swapchain)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
|
||||
VkDevice vk_device = d3d12_swapchain_device(swapchain)->vk_device;
|
||||
|
@ -1077,30 +1102,18 @@ static void d3d12_swapchain_destroy_framebuffers(struct d3d12_swapchain *swapcha
|
|||
for (i = 0; i < swapchain->buffer_count; i++)
|
||||
{
|
||||
VK_CALL(vkDestroyImageView(vk_device, swapchain->vk_swapchain_image_views[i], NULL));
|
||||
VK_CALL(vkDestroyFramebuffer(vk_device, swapchain->vk_framebuffers[i], NULL));
|
||||
swapchain->vk_swapchain_image_views[i] = VK_NULL_HANDLE;
|
||||
swapchain->vk_framebuffers[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
static HRESULT d3d12_swapchain_create_framebuffers(struct d3d12_swapchain *swapchain, VkFormat format)
|
||||
static HRESULT d3d12_swapchain_create_views(struct d3d12_swapchain *swapchain, VkFormat format)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
|
||||
VkDevice vk_device = d3d12_swapchain_device(swapchain)->vk_device;
|
||||
VkImageViewCreateInfo image_view_info;
|
||||
VkFramebufferCreateInfo fb_info;
|
||||
unsigned int i;
|
||||
VkResult vr;
|
||||
|
||||
fb_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
|
||||
fb_info.pNext = NULL;
|
||||
fb_info.flags = 0;
|
||||
fb_info.renderPass = swapchain->pipeline.vk_render_pass;
|
||||
fb_info.width = swapchain->vk_swapchain_width;
|
||||
fb_info.height = swapchain->vk_swapchain_height;
|
||||
fb_info.layers = 1;
|
||||
fb_info.attachmentCount = 1;
|
||||
|
||||
image_view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
|
||||
image_view_info.pNext = NULL;
|
||||
image_view_info.flags = 0;
|
||||
|
@ -1121,9 +1134,6 @@ static HRESULT d3d12_swapchain_create_framebuffers(struct d3d12_swapchain *swapc
|
|||
image_view_info.image = swapchain->vk_swapchain_images[i];
|
||||
if ((vr = VK_CALL(vkCreateImageView(vk_device, &image_view_info, NULL, &swapchain->vk_swapchain_image_views[i]))))
|
||||
return hresult_from_vk_result(vr);
|
||||
fb_info.pAttachments = &swapchain->vk_swapchain_image_views[i];
|
||||
if ((vr = VK_CALL(vkCreateFramebuffer(vk_device, &fb_info, NULL, &swapchain->vk_framebuffers[i]))))
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
|
@ -1225,8 +1235,6 @@ static HRESULT d3d12_swapchain_create_buffers(struct d3d12_swapchain *swapchain,
|
|||
VkResult vr;
|
||||
HRESULT hr;
|
||||
|
||||
d3d12_swapchain_destroy_framebuffers(swapchain);
|
||||
|
||||
if ((vr = VK_CALL(vkGetSwapchainImagesKHR(vk_device, vk_swapchain, &image_count, NULL))) < 0)
|
||||
{
|
||||
WARN("Failed to get Vulkan swapchain images, vr %d.\n", vr);
|
||||
|
@ -1255,7 +1263,7 @@ static HRESULT d3d12_swapchain_create_buffers(struct d3d12_swapchain *swapchain,
|
|||
|
||||
if (queue_desc.Type == D3D12_COMMAND_LIST_TYPE_DIRECT)
|
||||
{
|
||||
if (FAILED(hr = d3d12_swapchain_create_framebuffers(swapchain, vk_swapchain_format)))
|
||||
if (FAILED(hr = d3d12_swapchain_create_views(swapchain, vk_swapchain_format)))
|
||||
return hr;
|
||||
}
|
||||
|
||||
|
@ -1301,25 +1309,30 @@ static VkResult d3d12_swapchain_unsignal_acquire_semaphore(struct d3d12_swapchai
|
|||
if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))))
|
||||
{
|
||||
ERR("Failed to submit unsignal operation, vr %d\n", vr);
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
goto end;
|
||||
}
|
||||
|
||||
swapchain->vk_acquire_semaphores_signaled[frame_id] = false;
|
||||
|
||||
if (vk_fence)
|
||||
{
|
||||
if ((vr = VK_CALL(vkWaitForFences(swapchain->command_queue->device->vk_device, 1, &vk_fence, VK_TRUE, UINT64_MAX))))
|
||||
ERR("Failed to wait for fences, vr %d\n", vr);
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
}
|
||||
|
||||
end:
|
||||
VK_CALL(vkDestroyFence(vk_device, vk_fence, NULL));
|
||||
return vr;
|
||||
}
|
||||
|
||||
static void d3d12_swapchain_destroy_buffers(struct d3d12_swapchain *swapchain, BOOL destroy_user_buffers)
|
||||
static void d3d12_swapchain_destroy_resources(struct d3d12_swapchain *swapchain, bool destroy_user_buffers)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
|
||||
VkQueue vk_queue;
|
||||
unsigned int i;
|
||||
VkResult vr;
|
||||
|
||||
if (swapchain->command_queue)
|
||||
{
|
||||
|
@ -1332,7 +1345,8 @@ static void d3d12_swapchain_destroy_buffers(struct d3d12_swapchain *swapchain, B
|
|||
if (swapchain->vk_acquire_semaphores_signaled[i])
|
||||
d3d12_swapchain_unsignal_acquire_semaphore(swapchain, vk_queue, i, false);
|
||||
|
||||
VK_CALL(vkQueueWaitIdle(vk_queue));
|
||||
vr = VK_CALL(vkQueueWaitIdle(vk_queue));
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
|
||||
vkd3d_release_vk_queue(d3d12_swapchain_queue_iface(swapchain));
|
||||
}
|
||||
|
@ -1342,18 +1356,20 @@ static void d3d12_swapchain_destroy_buffers(struct d3d12_swapchain *swapchain, B
|
|||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < swapchain->desc.BufferCount; ++i)
|
||||
{
|
||||
if (swapchain->buffers[i] && destroy_user_buffers)
|
||||
{
|
||||
vkd3d_resource_decref(swapchain->buffers[i]);
|
||||
swapchain->buffers[i] = NULL;
|
||||
swapchain->vk_images[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
if (destroy_user_buffers)
|
||||
{
|
||||
for (i = 0; i < swapchain->desc.BufferCount; ++i)
|
||||
{
|
||||
if (swapchain->buffers[i])
|
||||
{
|
||||
vkd3d_resource_decref(swapchain->buffers[i]);
|
||||
swapchain->buffers[i] = NULL;
|
||||
swapchain->vk_images[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
d3d12_swapchain_destroy_user_descriptors(swapchain);
|
||||
}
|
||||
|
||||
if (swapchain->command_queue && swapchain->command_queue->device->vk_device)
|
||||
{
|
||||
|
@ -1372,6 +1388,8 @@ static void d3d12_swapchain_destroy_buffers(struct d3d12_swapchain *swapchain, B
|
|||
VK_CALL(vkDestroyCommandPool(swapchain->command_queue->device->vk_device, swapchain->vk_cmd_pool, NULL));
|
||||
swapchain->vk_cmd_pool = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
d3d12_swapchain_destroy_views(swapchain);
|
||||
}
|
||||
|
||||
static bool d3d12_swapchain_has_nonzero_surface_size(struct d3d12_swapchain *swapchain)
|
||||
|
@ -1391,7 +1409,7 @@ static bool d3d12_swapchain_has_nonzero_surface_size(struct d3d12_swapchain *swa
|
|||
return surface_caps.maxImageExtent.width != 0 && surface_caps.maxImageExtent.height != 0;
|
||||
}
|
||||
|
||||
static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *swapchain)
|
||||
static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *swapchain, bool force_surface_lost)
|
||||
{
|
||||
VkPhysicalDevice vk_physical_device = d3d12_swapchain_device(swapchain)->vk_physical_device;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
|
||||
|
@ -1402,9 +1420,9 @@ static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *s
|
|||
unsigned int width, height, image_count;
|
||||
VkSurfaceCapabilitiesKHR surface_caps;
|
||||
unsigned int override_image_count;
|
||||
char count_env[VKD3D_PATH_MAX];
|
||||
VkSwapchainKHR vk_swapchain;
|
||||
VkImageUsageFlags usage;
|
||||
const char *count_env;
|
||||
VkResult vr;
|
||||
HRESULT hr;
|
||||
|
||||
|
@ -1418,8 +1436,19 @@ static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *s
|
|||
swapchain->vk_surface, &swapchain->desc, &vk_swapchain_format)))
|
||||
return hr;
|
||||
|
||||
vr = VK_CALL(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk_physical_device,
|
||||
swapchain->vk_surface, &surface_caps));
|
||||
if (force_surface_lost)
|
||||
{
|
||||
/* If we cannot successfully present after 2 attempts, we must assume the swapchain
|
||||
* is in an unstable state with many resizes happening async. Until things stabilize,
|
||||
* force a dummy swapchain for now so that we can make forward progress.
|
||||
* When we don't have a proper swapchain, we will attempt again next present. */
|
||||
vr = VK_ERROR_SURFACE_LOST_KHR;
|
||||
}
|
||||
else
|
||||
{
|
||||
vr = VK_CALL(vkGetPhysicalDeviceSurfaceCapabilitiesKHR(vk_physical_device,
|
||||
swapchain->vk_surface, &surface_caps));
|
||||
}
|
||||
|
||||
if (vr == VK_ERROR_SURFACE_LOST_KHR)
|
||||
{
|
||||
|
@ -1447,8 +1476,8 @@ static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *s
|
|||
image_count = swapchain->desc.BufferCount + 1;
|
||||
image_count = max(image_count, surface_caps.minImageCount);
|
||||
|
||||
count_env = getenv("VKD3D_SWAPCHAIN_IMAGES");
|
||||
if (count_env)
|
||||
vkd3d_get_env_var("VKD3D_SWAPCHAIN_IMAGES", count_env, sizeof(count_env));
|
||||
if (strlen(count_env) > 0)
|
||||
{
|
||||
override_image_count = strtoul(count_env, NULL, 0);
|
||||
image_count = max(image_count, override_image_count);
|
||||
|
@ -1574,23 +1603,12 @@ static HRESULT d3d12_swapchain_create_vulkan_swapchain(struct d3d12_swapchain *s
|
|||
if (FAILED(hr = d3d12_swapchain_create_user_buffers(swapchain, vk_format)))
|
||||
return hr;
|
||||
|
||||
d3d12_swapchain_destroy_buffers(swapchain, FALSE);
|
||||
d3d12_swapchain_destroy_framebuffers(swapchain);
|
||||
d3d12_swapchain_destroy_resources(swapchain, false);
|
||||
swapchain->buffer_count = 0;
|
||||
return S_OK;
|
||||
}
|
||||
}
|
||||
|
||||
static HRESULT d3d12_swapchain_recreate_vulkan_swapchain(struct d3d12_swapchain *swapchain)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain)))
|
||||
ERR("Failed to recreate Vulkan swapchain, hr %#x.\n", hr);
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
static inline struct d3d12_swapchain *d3d12_swapchain_from_IDXGISwapChain(dxgi_swapchain_iface *iface)
|
||||
{
|
||||
return CONTAINING_RECORD(iface, struct d3d12_swapchain, IDXGISwapChain_iface);
|
||||
|
@ -1636,8 +1654,7 @@ static void d3d12_swapchain_destroy(struct d3d12_swapchain *swapchain)
|
|||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = d3d12_swapchain_procs(swapchain);
|
||||
|
||||
d3d12_swapchain_destroy_buffers(swapchain, TRUE);
|
||||
d3d12_swapchain_destroy_framebuffers(swapchain);
|
||||
d3d12_swapchain_destroy_resources(swapchain, true);
|
||||
|
||||
if (swapchain->frame_latency_event)
|
||||
CloseHandle(swapchain->frame_latency_event);
|
||||
|
@ -1769,9 +1786,9 @@ static HRESULT d3d12_swapchain_set_sync_interval(struct d3d12_swapchain *swapcha
|
|||
return S_OK;
|
||||
}
|
||||
|
||||
d3d12_swapchain_destroy_buffers(swapchain, FALSE);
|
||||
d3d12_swapchain_destroy_resources(swapchain, false);
|
||||
swapchain->present_mode = present_mode;
|
||||
return d3d12_swapchain_recreate_vulkan_swapchain(swapchain);
|
||||
return d3d12_swapchain_create_vulkan_swapchain(swapchain, false);
|
||||
}
|
||||
|
||||
static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain, VkQueue vk_queue)
|
||||
|
@ -1789,6 +1806,11 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
if (swapchain->vk_swapchain == VK_NULL_HANDLE)
|
||||
return VK_SUCCESS;
|
||||
|
||||
/* If we know we're already suboptimal, e.g. observed in present or acquire after present,
|
||||
* just recreate the swapchain right away. */
|
||||
if (swapchain->is_suboptimal)
|
||||
return VK_ERROR_OUT_OF_DATE_KHR;
|
||||
|
||||
if (swapchain->vk_image_index == INVALID_VK_IMAGE_INDEX)
|
||||
{
|
||||
/* If we hit SUBOPTIMAL path last AcquireNextImageKHR, we will have a pending acquire we did not
|
||||
|
@ -1802,13 +1824,15 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
swapchain->vk_acquire_semaphores[swapchain->frame_id],
|
||||
VK_NULL_HANDLE, &swapchain->vk_image_index));
|
||||
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
|
||||
if (vr >= 0)
|
||||
{
|
||||
swapchain->vk_acquire_semaphores_signaled[swapchain->frame_id] = true;
|
||||
/* If we have observed suboptimal once, guarantees that we keep observing it
|
||||
* until we have recreated the swapchain. */
|
||||
if (swapchain->is_suboptimal)
|
||||
vr = VK_SUBOPTIMAL_KHR;
|
||||
if (vr == VK_SUBOPTIMAL_KHR)
|
||||
swapchain->is_suboptimal = true;
|
||||
}
|
||||
|
||||
if (vr == VK_SUBOPTIMAL_KHR)
|
||||
|
@ -1841,6 +1865,7 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
VK_TRUE, UINT64_MAX))))
|
||||
{
|
||||
ERR("Failed to wait for fence.\n");
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
return vr;
|
||||
}
|
||||
|
||||
|
@ -1869,6 +1894,7 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, swapchain->vk_blit_fences[swapchain->vk_image_index]))) < 0)
|
||||
{
|
||||
ERR("Failed to blit swapchain buffer, vr %d.\n", vr);
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
return vr;
|
||||
}
|
||||
|
||||
|
@ -1882,12 +1908,11 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
swapchain->frame_id = (swapchain->frame_id + 1) % swapchain->buffer_count;
|
||||
swapchain->vk_image_index = INVALID_VK_IMAGE_INDEX;
|
||||
|
||||
if (vr == VK_SUBOPTIMAL_KHR)
|
||||
swapchain->is_suboptimal = true;
|
||||
|
||||
/* If we have observed suboptimal once, guarantees that we keep observing it
|
||||
* until we have recreated the swapchain. */
|
||||
if (swapchain->is_suboptimal)
|
||||
if (vr == VK_SUBOPTIMAL_KHR)
|
||||
swapchain->is_suboptimal = true;
|
||||
else if (swapchain->is_suboptimal)
|
||||
vr = VK_SUBOPTIMAL_KHR;
|
||||
|
||||
/* Could get SUBOPTIMAL here. Defer acquiring if we hit that path.
|
||||
|
@ -1900,6 +1925,8 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
swapchain->vk_acquire_semaphores[swapchain->frame_id], VK_NULL_HANDLE,
|
||||
&swapchain->vk_image_index));
|
||||
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
|
||||
if (vr >= 0)
|
||||
{
|
||||
swapchain->vk_acquire_semaphores_signaled[swapchain->frame_id] = true;
|
||||
|
@ -1918,9 +1945,13 @@ static VkResult d3d12_swapchain_queue_present(struct d3d12_swapchain *swapchain,
|
|||
swapchain->vk_image_index = INVALID_VK_IMAGE_INDEX;
|
||||
}
|
||||
}
|
||||
|
||||
/* Not being able to successfully acquire here is okay, we'll defer the acquire to next frame. */
|
||||
vr = VK_SUCCESS;
|
||||
}
|
||||
|
||||
VKD3D_DEVICE_REPORT_BREADCRUMB_IF(swapchain->command_queue->device, vr == VK_ERROR_DEVICE_LOST);
|
||||
|
||||
return vr;
|
||||
}
|
||||
|
||||
|
@ -1959,15 +1990,17 @@ static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain,
|
|||
return E_FAIL;
|
||||
}
|
||||
|
||||
/* We must have some kind of forward progress here. Keep trying until we exhaust all possible avenues. */
|
||||
vr = d3d12_swapchain_queue_present(swapchain, vk_queue);
|
||||
if (vr == VK_ERROR_OUT_OF_DATE_KHR)
|
||||
if (vr < 0)
|
||||
{
|
||||
vkd3d_release_vk_queue(d3d12_swapchain_queue_iface(swapchain));
|
||||
|
||||
TRACE("Recreating Vulkan swapchain.\n");
|
||||
|
||||
d3d12_swapchain_destroy_buffers(swapchain, FALSE);
|
||||
if (FAILED(hr = d3d12_swapchain_recreate_vulkan_swapchain(swapchain)))
|
||||
d3d12_swapchain_destroy_resources(swapchain, false);
|
||||
|
||||
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain, false)))
|
||||
return hr;
|
||||
|
||||
if (!(vk_queue = vkd3d_acquire_vk_queue(d3d12_swapchain_queue_iface(swapchain))))
|
||||
|
@ -1977,7 +2010,22 @@ static HRESULT d3d12_swapchain_present(struct d3d12_swapchain *swapchain,
|
|||
}
|
||||
|
||||
if ((vr = d3d12_swapchain_queue_present(swapchain, vk_queue)) < 0)
|
||||
ERR("Failed to present after recreating swapchain, vr %d.\n", vr);
|
||||
{
|
||||
ERR("Failed to present after recreating swapchain, vr %d. Attempting fallback swapchain.\n", vr);
|
||||
vkd3d_release_vk_queue(d3d12_swapchain_queue_iface(swapchain));
|
||||
d3d12_swapchain_destroy_resources(swapchain, false);
|
||||
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain, true)))
|
||||
return hr;
|
||||
|
||||
if (!(vk_queue = vkd3d_acquire_vk_queue(d3d12_swapchain_queue_iface(swapchain))))
|
||||
{
|
||||
ERR("Failed to acquire Vulkan queue.\n");
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
if ((vr = d3d12_swapchain_queue_present(swapchain, vk_queue)) < 0)
|
||||
ERR("Failed to present even after creating dummy swapchain, vr %d. This should not be possible.\n", vr);
|
||||
}
|
||||
}
|
||||
|
||||
vkd3d_release_vk_queue(d3d12_swapchain_queue_iface(swapchain));
|
||||
|
@ -2257,9 +2305,9 @@ static HRESULT d3d12_swapchain_resize_buffers(struct d3d12_swapchain *swapchain,
|
|||
&& desc->Format == new_desc.Format && desc->BufferCount == new_desc.BufferCount)
|
||||
return S_OK;
|
||||
|
||||
d3d12_swapchain_destroy_buffers(swapchain, TRUE);
|
||||
d3d12_swapchain_destroy_resources(swapchain, true);
|
||||
swapchain->desc = new_desc;
|
||||
return d3d12_swapchain_recreate_vulkan_swapchain(swapchain);
|
||||
return d3d12_swapchain_create_vulkan_swapchain(swapchain, false);
|
||||
}
|
||||
|
||||
static HRESULT STDMETHODCALLTYPE d3d12_swapchain_ResizeBuffers(dxgi_swapchain_iface *iface,
|
||||
|
@ -2840,7 +2888,7 @@ static HRESULT d3d12_swapchain_init(struct d3d12_swapchain *swapchain, IDXGIFact
|
|||
ID3D12CommandQueue_AddRef(&queue->ID3D12CommandQueue_iface);
|
||||
d3d12_device_add_ref(queue->device);
|
||||
|
||||
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain)))
|
||||
if (FAILED(hr = d3d12_swapchain_create_vulkan_swapchain(swapchain, false)))
|
||||
{
|
||||
d3d12_swapchain_destroy(swapchain);
|
||||
return hr;
|
||||
|
|
|
@ -1018,6 +1018,16 @@ HRESULT hresult_from_errno(int rc)
|
|||
|
||||
HRESULT hresult_from_vk_result(VkResult vr)
|
||||
{
|
||||
/* Wine tends to dispatch Vulkan calls to their own syscall stack.
|
||||
* Crashes are captured and return this magic VkResult.
|
||||
* Report it explicitly here so it's easier to debug when it happens. */
|
||||
if (vr == -1073741819)
|
||||
{
|
||||
ERR("Detected segfault in Wine syscall handler.\n");
|
||||
/* HACK: For ad-hoc debugging can also trigger backtrace printing here. */
|
||||
return E_POINTER;
|
||||
}
|
||||
|
||||
switch (vr)
|
||||
{
|
||||
case VK_SUCCESS:
|
||||
|
|
|
@ -165,15 +165,28 @@ static CONST_VTBL struct ID3D12RootSignatureDeserializerVtbl d3d12_root_signatur
|
|||
|
||||
static int vkd3d_parse_root_signature_for_version(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *out_desc,
|
||||
enum vkd3d_root_signature_version target_version)
|
||||
enum vkd3d_root_signature_version target_version,
|
||||
bool raw_payload,
|
||||
vkd3d_shader_hash_t *compatibility_hash)
|
||||
{
|
||||
struct vkd3d_versioned_root_signature_desc desc, converted_desc;
|
||||
int ret;
|
||||
|
||||
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &desc)) < 0)
|
||||
if (raw_payload)
|
||||
{
|
||||
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
|
||||
return ret;
|
||||
if ((ret = vkd3d_shader_parse_root_signature_raw(dxbc->code, dxbc->size, &desc, compatibility_hash)) < 0)
|
||||
{
|
||||
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &desc, compatibility_hash)) < 0)
|
||||
{
|
||||
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (desc.version == target_version)
|
||||
|
@ -197,15 +210,27 @@ static int vkd3d_parse_root_signature_for_version(const struct vkd3d_shader_code
|
|||
}
|
||||
|
||||
int vkd3d_parse_root_signature_v_1_0(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *out_desc)
|
||||
struct vkd3d_versioned_root_signature_desc *out_desc,
|
||||
vkd3d_shader_hash_t *compatibility_hash)
|
||||
{
|
||||
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_0);
|
||||
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_0, false,
|
||||
compatibility_hash);
|
||||
}
|
||||
|
||||
int vkd3d_parse_root_signature_v_1_1(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *out_desc)
|
||||
struct vkd3d_versioned_root_signature_desc *out_desc,
|
||||
vkd3d_shader_hash_t *compatibility_hash)
|
||||
{
|
||||
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_1);
|
||||
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_1, false,
|
||||
compatibility_hash);
|
||||
}
|
||||
|
||||
int vkd3d_parse_root_signature_v_1_1_from_raw_payload(const struct vkd3d_shader_code *dxbc,
|
||||
struct vkd3d_versioned_root_signature_desc *out_desc,
|
||||
vkd3d_shader_hash_t *compatibility_hash)
|
||||
{
|
||||
return vkd3d_parse_root_signature_for_version(dxbc, out_desc, VKD3D_ROOT_SIGNATURE_VERSION_1_1, true,
|
||||
compatibility_hash);
|
||||
}
|
||||
|
||||
static HRESULT d3d12_root_signature_deserializer_init(struct d3d12_root_signature_deserializer *deserializer,
|
||||
|
@ -216,7 +241,7 @@ static HRESULT d3d12_root_signature_deserializer_init(struct d3d12_root_signatur
|
|||
deserializer->ID3D12RootSignatureDeserializer_iface.lpVtbl = &d3d12_root_signature_deserializer_vtbl;
|
||||
deserializer->refcount = 1;
|
||||
|
||||
if ((ret = vkd3d_parse_root_signature_v_1_0(dxbc, &deserializer->desc.vkd3d)) < 0)
|
||||
if ((ret = vkd3d_parse_root_signature_v_1_0(dxbc, &deserializer->desc.vkd3d, NULL)) < 0)
|
||||
return hresult_from_vkd3d_result(ret);
|
||||
|
||||
return S_OK;
|
||||
|
@ -394,7 +419,7 @@ static HRESULT d3d12_versioned_root_signature_deserializer_init(struct d3d12_ver
|
|||
deserializer->ID3D12VersionedRootSignatureDeserializer_iface.lpVtbl = &d3d12_versioned_root_signature_deserializer_vtbl;
|
||||
deserializer->refcount = 1;
|
||||
|
||||
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &deserializer->desc.vkd3d)) < 0)
|
||||
if ((ret = vkd3d_shader_parse_root_signature(dxbc, &deserializer->desc.vkd3d, NULL)) < 0)
|
||||
{
|
||||
WARN("Failed to parse root signature, vkd3d result %d.\n", ret);
|
||||
return hresult_from_vkd3d_result(ret);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -45,6 +45,8 @@ enum vkd3d_meta_copy_mode
|
|||
#include <cs_resolve_binary_queries.h>
|
||||
#include <cs_resolve_predicate.h>
|
||||
#include <cs_resolve_query.h>
|
||||
#include <cs_execute_indirect_patch.h>
|
||||
#include <cs_execute_indirect_patch_debug_ring.h>
|
||||
#include <vs_fullscreen_layer.h>
|
||||
#include <vs_fullscreen.h>
|
||||
#include <gs_fullscreen.h>
|
||||
|
|
|
@ -49,6 +49,7 @@ VK_INSTANCE_PFN(vkGetPhysicalDeviceQueueFamilyProperties)
|
|||
VK_INSTANCE_PFN(vkGetPhysicalDeviceSparseImageFormatProperties)
|
||||
VK_INSTANCE_PFN(vkGetPhysicalDeviceFeatures2)
|
||||
VK_INSTANCE_PFN(vkGetPhysicalDeviceProperties2)
|
||||
VK_INSTANCE_PFN(vkGetPhysicalDeviceExternalSemaphoreProperties)
|
||||
|
||||
/* VK_EXT_debug_utils */
|
||||
VK_INSTANCE_EXT_PFN(vkCreateDebugUtilsMessengerEXT)
|
||||
|
@ -62,7 +63,6 @@ VK_DEVICE_PFN(vkAllocateDescriptorSets)
|
|||
VK_DEVICE_PFN(vkAllocateMemory)
|
||||
VK_DEVICE_PFN(vkBeginCommandBuffer)
|
||||
VK_DEVICE_PFN(vkCmdBeginQuery)
|
||||
VK_DEVICE_PFN(vkCmdBeginRenderPass)
|
||||
VK_DEVICE_PFN(vkCmdBindDescriptorSets)
|
||||
VK_DEVICE_PFN(vkCmdBindIndexBuffer)
|
||||
VK_DEVICE_PFN(vkCmdBindPipeline)
|
||||
|
@ -78,7 +78,6 @@ VK_DEVICE_PFN(vkCmdDrawIndexed)
|
|||
VK_DEVICE_PFN(vkCmdDrawIndexedIndirect)
|
||||
VK_DEVICE_PFN(vkCmdDrawIndirect)
|
||||
VK_DEVICE_PFN(vkCmdEndQuery)
|
||||
VK_DEVICE_PFN(vkCmdEndRenderPass)
|
||||
VK_DEVICE_PFN(vkCmdExecuteCommands)
|
||||
VK_DEVICE_PFN(vkCmdFillBuffer)
|
||||
VK_DEVICE_PFN(vkCmdNextSubpass)
|
||||
|
@ -114,7 +113,6 @@ VK_DEVICE_PFN(vkCreateImageView)
|
|||
VK_DEVICE_PFN(vkCreatePipelineCache)
|
||||
VK_DEVICE_PFN(vkCreatePipelineLayout)
|
||||
VK_DEVICE_PFN(vkCreateQueryPool)
|
||||
VK_DEVICE_PFN(vkCreateRenderPass)
|
||||
VK_DEVICE_PFN(vkCreateSampler)
|
||||
VK_DEVICE_PFN(vkCreateSemaphore)
|
||||
VK_DEVICE_PFN(vkCreateShaderModule)
|
||||
|
@ -132,7 +130,6 @@ VK_DEVICE_PFN(vkDestroyPipeline)
|
|||
VK_DEVICE_PFN(vkDestroyPipelineCache)
|
||||
VK_DEVICE_PFN(vkDestroyPipelineLayout)
|
||||
VK_DEVICE_PFN(vkDestroyQueryPool)
|
||||
VK_DEVICE_PFN(vkDestroyRenderPass)
|
||||
VK_DEVICE_PFN(vkDestroySampler)
|
||||
VK_DEVICE_PFN(vkDestroySemaphore)
|
||||
VK_DEVICE_PFN(vkDestroyShaderModule)
|
||||
|
@ -156,7 +153,6 @@ VK_DEVICE_PFN(vkGetImageSparseMemoryRequirements2)
|
|||
VK_DEVICE_PFN(vkGetImageSubresourceLayout)
|
||||
VK_DEVICE_PFN(vkGetPipelineCacheData)
|
||||
VK_DEVICE_PFN(vkGetQueryPoolResults)
|
||||
VK_DEVICE_PFN(vkGetRenderAreaGranularity)
|
||||
VK_DEVICE_PFN(vkInvalidateMappedMemoryRanges)
|
||||
VK_DEVICE_PFN(vkMapMemory)
|
||||
VK_DEVICE_PFN(vkMergePipelineCaches)
|
||||
|
@ -211,12 +207,6 @@ VK_DEVICE_EXT_PFN(vkCmdCopyAccelerationStructureKHR)
|
|||
VK_INSTANCE_EXT_PFN(vkGetPhysicalDeviceFragmentShadingRatesKHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdSetFragmentShadingRateKHR)
|
||||
|
||||
/* VK_KHR_create_renderpass2 */
|
||||
VK_DEVICE_EXT_PFN(vkCmdBeginRenderPass2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdEndRenderPass2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdNextSubpass2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkCreateRenderPass2KHR)
|
||||
|
||||
/* VK_KHR_bind_memory2 */
|
||||
VK_DEVICE_EXT_PFN(vkBindBufferMemory2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkBindImageMemory2KHR)
|
||||
|
@ -229,6 +219,23 @@ VK_DEVICE_EXT_PFN(vkCmdCopyImage2KHR)
|
|||
VK_DEVICE_EXT_PFN(vkCmdCopyImageToBuffer2KHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdResolveImage2KHR)
|
||||
|
||||
/* VK_KHR_maintenance4 */
|
||||
VK_DEVICE_EXT_PFN(vkGetDeviceBufferMemoryRequirementsKHR)
|
||||
VK_DEVICE_EXT_PFN(vkGetDeviceImageMemoryRequirementsKHR)
|
||||
VK_DEVICE_EXT_PFN(vkGetDeviceImageSparseMemoryRequirementsKHR)
|
||||
|
||||
#ifdef VK_KHR_external_memory_win32
|
||||
/* VK_KHR_external_memory_win32 */
|
||||
VK_DEVICE_EXT_PFN(vkGetMemoryWin32HandleKHR)
|
||||
VK_DEVICE_EXT_PFN(vkGetMemoryWin32HandlePropertiesKHR)
|
||||
#endif
|
||||
|
||||
#ifdef VK_KHR_external_semaphore_win32
|
||||
/* VK_KHR_external_semaphore_win32 */
|
||||
VK_DEVICE_EXT_PFN(vkGetSemaphoreWin32HandleKHR)
|
||||
VK_DEVICE_EXT_PFN(vkImportSemaphoreWin32HandleKHR)
|
||||
#endif
|
||||
|
||||
/* VK_EXT_calibrated_timestamps */
|
||||
VK_DEVICE_EXT_PFN(vkGetCalibratedTimestampsEXT)
|
||||
VK_INSTANCE_EXT_PFN(vkGetPhysicalDeviceCalibrateableTimeDomainsEXT)
|
||||
|
@ -256,6 +263,9 @@ VK_DEVICE_EXT_PFN(vkCmdSetPrimitiveTopologyEXT)
|
|||
VK_DEVICE_EXT_PFN(vkCmdSetScissorWithCountEXT)
|
||||
VK_DEVICE_EXT_PFN(vkCmdSetViewportWithCountEXT)
|
||||
|
||||
/* VK_EXT_extended_dynamic_state2 */
|
||||
VK_DEVICE_EXT_PFN(vkCmdSetPrimitiveRestartEnableEXT)
|
||||
|
||||
/* VK_EXT_external_memory_host */
|
||||
VK_DEVICE_EXT_PFN(vkGetMemoryHostPointerPropertiesEXT)
|
||||
|
||||
|
@ -279,9 +289,20 @@ VK_DEVICE_EXT_PFN(vkGetSwapchainImagesKHR)
|
|||
VK_DEVICE_EXT_PFN(vkAcquireNextImageKHR)
|
||||
VK_DEVICE_EXT_PFN(vkQueuePresentKHR)
|
||||
|
||||
/* VK_KHR_dynamic_rendering */
|
||||
VK_DEVICE_EXT_PFN(vkCmdBeginRenderingKHR)
|
||||
VK_DEVICE_EXT_PFN(vkCmdEndRenderingKHR)
|
||||
|
||||
/* VK_KHR_ray_tracing_maintenance1 */
|
||||
VK_DEVICE_EXT_PFN(vkCmdTraceRaysIndirect2KHR)
|
||||
|
||||
/* VK_AMD_buffer_marker */
|
||||
VK_DEVICE_EXT_PFN(vkCmdWriteBufferMarkerAMD)
|
||||
|
||||
/* VK_NV_device_diagnostic_checkpoints */
|
||||
VK_DEVICE_EXT_PFN(vkCmdSetCheckpointNV)
|
||||
VK_DEVICE_EXT_PFN(vkGetQueueCheckpointDataNV)
|
||||
|
||||
/* VK_NVX_binary_import */
|
||||
VK_DEVICE_EXT_PFN(vkCreateCuModuleNVX)
|
||||
VK_DEVICE_EXT_PFN(vkCreateCuFunctionNVX)
|
||||
|
@ -297,6 +318,12 @@ VK_DEVICE_EXT_PFN(vkGetImageViewAddressNVX)
|
|||
VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutHostMappingInfoVALVE)
|
||||
VK_DEVICE_EXT_PFN(vkGetDescriptorSetHostMappingVALVE)
|
||||
|
||||
/* VK_NV_device_generated_commands */
|
||||
VK_DEVICE_EXT_PFN(vkCreateIndirectCommandsLayoutNV)
|
||||
VK_DEVICE_EXT_PFN(vkDestroyIndirectCommandsLayoutNV)
|
||||
VK_DEVICE_EXT_PFN(vkGetGeneratedCommandsMemoryRequirementsNV)
|
||||
VK_DEVICE_EXT_PFN(vkCmdExecuteGeneratedCommandsNV)
|
||||
|
||||
#undef VK_INSTANCE_PFN
|
||||
#undef VK_INSTANCE_EXT_PFN
|
||||
#undef VK_DEVICE_PFN
|
||||
|
|
|
@ -64,6 +64,11 @@ if not enable_trace
|
|||
add_project_arguments('-DVKD3D_NO_TRACE_MESSAGES', language : 'c')
|
||||
endif
|
||||
|
||||
enable_breadcrumbs = enable_trace
|
||||
if enable_breadcrumbs
|
||||
add_project_arguments('-DVKD3D_ENABLE_BREADCRUMBS', language : 'c')
|
||||
endif
|
||||
|
||||
vkd3d_external_includes = [ './subprojects/Vulkan-Headers/include', './subprojects/SPIRV-Headers/include' ]
|
||||
vkd3d_public_includes = [ './include' ] + vkd3d_external_includes
|
||||
vkd3d_private_includes = [ './include/private' ] + vkd3d_public_includes
|
||||
|
@ -78,7 +83,7 @@ idl_generator = generator(idl_compiler,
|
|||
arguments : [ '-h', '-o', '@OUTPUT@', '@INPUT@' ])
|
||||
|
||||
glsl_compiler = find_program('glslangValidator')
|
||||
glsl_args = [ '-V', '--vn', '@BASENAME@', '@INPUT@', '-o', '@OUTPUT@' ]
|
||||
glsl_args = [ '-V', '--target-env', 'vulkan1.1', '--vn', '@BASENAME@', '@INPUT@', '-o', '@OUTPUT@' ]
|
||||
if run_command(glsl_compiler, [ '--quiet', '--version' ], check : false).returncode() == 0
|
||||
glsl_args += [ '--quiet' ]
|
||||
endif
|
||||
|
|
|
@ -75,17 +75,35 @@ def main():
|
|||
parser.add_argument('--per-iteration', action = 'store_true', help = 'Represent ticks in terms of ticks / iteration. Cannot be used with --divider.')
|
||||
parser.add_argument('--name', nargs = '+', type = str, help = 'Only display data for certain counters.')
|
||||
parser.add_argument('--sort', type = str, default = 'none', help = 'Sorts input data according to "iterations" or "ticks".')
|
||||
parser.add_argument('--delta', type = str, help = 'Subtract iterations and timing from other profile blob.')
|
||||
parser.add_argument('profile', help = 'The profile binary blob.')
|
||||
|
||||
args = parser.parse_args()
|
||||
if not args.profile:
|
||||
raise AssertionError('Need profile folder.')
|
||||
|
||||
delta_map = {}
|
||||
if args.delta is not None:
|
||||
with open(args.delta, 'rb') as f:
|
||||
for block in iter(lambda: f.read(64), b''):
|
||||
if is_valid_block(block):
|
||||
b = parse_block(block)
|
||||
delta_map[b.name] = b
|
||||
|
||||
blocks = []
|
||||
with open(args.profile, 'rb') as f:
|
||||
for block in iter(lambda: f.read(64), b''):
|
||||
if is_valid_block(block):
|
||||
blocks.append(parse_block(block))
|
||||
b = parse_block(block)
|
||||
if b.name in delta_map:
|
||||
d = delta_map[b.name]
|
||||
b = ProfileCase(ticks = b.ticks - d.ticks,
|
||||
iterations = b.iterations - d.iterations,
|
||||
name = b.name)
|
||||
if b.iterations < 0 or b.ticks < 0:
|
||||
raise AssertionError('After subtracting, iterations or ticks became negative.')
|
||||
if b.iterations > 0:
|
||||
blocks.append(b)
|
||||
|
||||
if args.divider is not None:
|
||||
if args.per_iteration:
|
||||
|
@ -114,11 +132,11 @@ def main():
|
|||
print(' Iterations:', block.iterations)
|
||||
|
||||
if args.divider is not None:
|
||||
print(' Time spent per iteration of {}: {:.3f}'.format(args.divider, block.ticks / 1000.0), "us")
|
||||
print(' Time spent per iteration of {}: {:.3f}'.format(args.divider, block.ticks / 1000.0), "Kcycles")
|
||||
elif args.per_iteration:
|
||||
print(' Time spent per iteration: {:.3f}'.format(block.ticks / 1000.0), "us")
|
||||
print(' Time spent per iteration: {:.3f}'.format(block.ticks / 1000.0), "Kcycles")
|
||||
else:
|
||||
print(' Total time spent: {:.3f}'.format(block.ticks / 1000.0), "us")
|
||||
print(' Total time spent: {:.3f}'.format(block.ticks / 1000.0), "Kcycles")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
|
@ -1 +1 @@
|
|||
Subproject commit 83e1a9ed8ce289cebb1c02c8167d663dc1befb24
|
||||
Subproject commit 245d25ce8c3337919dc7916d0e62e31a0d8748ab
|
|
@ -1 +1 @@
|
|||
Subproject commit b537bbb91bccdbc695cb7e5211d608f8d1c205bd
|
||||
Subproject commit 9f2fd6356c14376ab5b88518d6dd4e6787084525
|
|
@ -141,7 +141,7 @@ void test_clear_depth_stencil_view(void)
|
|||
void test_clear_render_target_view(void)
|
||||
{
|
||||
static const unsigned int array_expected_colors[] = {0xff00ff00, 0xff0000ff, 0xffff0000};
|
||||
static const struct vec4 array_colors[] =
|
||||
static const float array_colors[][4] =
|
||||
{
|
||||
{0.0f, 1.0f, 0.0f, 1.0f},
|
||||
{1.0f, 0.0f, 0.0f, 1.0f},
|
||||
|
@ -324,8 +324,7 @@ void test_clear_render_target_view(void)
|
|||
rtv_desc.Texture2DArray.ArraySize = 1;
|
||||
|
||||
ID3D12Device_CreateRenderTargetView(device, resource, &rtv_desc, rtv_handle);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, &array_colors[i].x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, array_colors[i], 0, NULL);
|
||||
}
|
||||
|
||||
transition_resource_state(command_list, resource,
|
||||
|
@ -355,8 +354,7 @@ void test_clear_render_target_view(void)
|
|||
rtv_desc.Texture2DMSArray.ArraySize = 1;
|
||||
|
||||
ID3D12Device_CreateRenderTargetView(device, resource, &rtv_desc, rtv_handle);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, &array_colors[i].x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtv_handle, array_colors[i], 0, NULL);
|
||||
}
|
||||
|
||||
transition_resource_state(command_list, resource,
|
||||
|
@ -704,6 +702,7 @@ void test_clear_unordered_access_view_image(void)
|
|||
{DXGI_FORMAT_R8G8B8A8_UINT, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x04030201},
|
||||
{DXGI_FORMAT_R8G8B8A8_UINT, 1, 1, 0, 0, 1, 0, {{0}}, {0x123, 0, 0, 0}, 0x00000023},
|
||||
{DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x04030201},
|
||||
{DXGI_FORMAT_R11G11B10_FLOAT, 1, 1, 0, 0, 1, 0, {{0}}, {0, 0, 0, 0}, 0x00000000},
|
||||
{DXGI_FORMAT_R11G11B10_FLOAT, 1, 1, 0, 0, 1, 0, {{0}}, {1, 2, 3, 4}, 0x00c01001},
|
||||
/* Test float clears with formats. */
|
||||
{DXGI_FORMAT_R16G16_UNORM, 1, 1, 0, 0, 1, 0, {{0}},
|
||||
|
|
|
@ -1159,8 +1159,8 @@ void test_bundle_state_inheritance(void)
|
|||
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
check_sub_resource_uint(context.render_target, 0, queue, command_list, 0xff00ff00, 0);
|
||||
|
||||
ID3D12CommandAllocator_Release(bundle_allocator);
|
||||
ID3D12GraphicsCommandList_Release(bundle);
|
||||
ID3D12CommandAllocator_Release(bundle_allocator);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
|
@ -1449,6 +1449,721 @@ void test_vbv_stride_edge_cases(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_execute_indirect_state(void)
|
||||
{
|
||||
static const struct vec4 values = { 1000.0f, 2000.0f, 3000.0f, 4000.0f };
|
||||
D3D12_INDIRECT_ARGUMENT_DESC indirect_argument_descs[2];
|
||||
D3D12_COMMAND_SIGNATURE_DESC command_signature_desc;
|
||||
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
ID3D12CommandSignature *command_signature;
|
||||
D3D12_SO_DECLARATION_ENTRY so_entries[1];
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
D3D12_ROOT_PARAMETER root_parameters[4];
|
||||
ID3D12RootSignature *root_signatures[2];
|
||||
ID3D12Resource *argument_buffer_late;
|
||||
D3D12_STREAM_OUTPUT_BUFFER_VIEW sov;
|
||||
ID3D12Resource *streamout_buffer;
|
||||
D3D12_VERTEX_BUFFER_VIEW vbvs[2];
|
||||
ID3D12Resource *argument_buffer;
|
||||
struct test_context_desc desc;
|
||||
ID3D12PipelineState *psos[2];
|
||||
struct test_context context;
|
||||
struct resource_readback rb;
|
||||
D3D12_INDEX_BUFFER_VIEW ibv;
|
||||
ID3D12CommandQueue *queue;
|
||||
const UINT so_stride = 16;
|
||||
ID3D12Resource *vbo[3];
|
||||
ID3D12Resource *ibo[2];
|
||||
unsigned int i, j, k;
|
||||
ID3D12Resource *cbv;
|
||||
ID3D12Resource *srv;
|
||||
ID3D12Resource *uav;
|
||||
HRESULT hr;
|
||||
|
||||
static const D3D12_INPUT_ELEMENT_DESC layout_desc[] =
|
||||
{
|
||||
{"COLOR", 0, DXGI_FORMAT_R32_FLOAT, 0, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0},
|
||||
{"COLOR", 1, DXGI_FORMAT_R32_FLOAT, 1, 0, D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0},
|
||||
};
|
||||
|
||||
struct test
|
||||
{
|
||||
const D3D12_INDIRECT_ARGUMENT_DESC *indirect_arguments;
|
||||
uint32_t indirect_argument_count;
|
||||
const void *argument_buffer_data;
|
||||
size_t argument_buffer_size;
|
||||
uint32_t api_max_count;
|
||||
const struct vec4 *expected_output;
|
||||
uint32_t expected_output_count;
|
||||
uint32_t stride;
|
||||
uint32_t pso_index;
|
||||
bool needs_root_sig;
|
||||
};
|
||||
|
||||
/* Modify root parameters. */
|
||||
struct root_constant_data
|
||||
{
|
||||
float constants[2];
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC root_constant_sig[2] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, .Constant = {
|
||||
.RootParameterIndex = 0, .DestOffsetIn32BitValues = 1, .Num32BitValuesToSet = 2 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED }
|
||||
};
|
||||
|
||||
static const struct root_constant_data root_constant_data[] =
|
||||
{
|
||||
{
|
||||
.constants = { 100.0f, 500.0f },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.constants = { 200.0f, 800.0f },
|
||||
.indexed = { .IndexCountPerInstance = 1, .InstanceCount = 2,
|
||||
.StartIndexLocation = 1, .StartInstanceLocation = 100, }
|
||||
},
|
||||
};
|
||||
|
||||
static const struct vec4 root_constant_expected[] =
|
||||
{
|
||||
{ 1000.0f, 64.0f + 100.0f, 500.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 100.0f, 500.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4001.0f },
|
||||
};
|
||||
|
||||
/* Modify root parameters, but very large root signature to test boundary conditions. */
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC root_constant_spill_sig[2] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, .Constant = {
|
||||
.RootParameterIndex = 0, .DestOffsetIn32BitValues = 44 + 1, .Num32BitValuesToSet = 2 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED }
|
||||
};
|
||||
|
||||
static const struct root_constant_data root_constant_spill_data[] =
|
||||
{
|
||||
{
|
||||
.constants = { 100.0f, 500.0f },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.constants = { 200.0f, 800.0f },
|
||||
.indexed = { .IndexCountPerInstance = 1, .InstanceCount = 2,
|
||||
.StartIndexLocation = 1, .StartInstanceLocation = 100, }
|
||||
},
|
||||
};
|
||||
|
||||
static const struct vec4 root_constant_spill_expected[] =
|
||||
{
|
||||
{ 1000.0f, 64.0f + 100.0f, 500.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 100.0f, 500.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4000.0f },
|
||||
{ 1001.0f, 65.0f + 200.0f, 800.0f, 4001.0f },
|
||||
};
|
||||
|
||||
/* Modify VBOs. */
|
||||
struct indirect_vbo_data
|
||||
{
|
||||
D3D12_VERTEX_BUFFER_VIEW view[2];
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_vbo_sig[3] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW, .VertexBuffer = { .Slot = 0 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW, .VertexBuffer = { .Slot = 1 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED },
|
||||
};
|
||||
|
||||
/* Fill buffer locations later. */
|
||||
struct indirect_vbo_data indirect_vbo_data[] =
|
||||
{
|
||||
{
|
||||
.view = { { 0, 64, 8 }, { 0, 64, 16 } },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 2 }
|
||||
},
|
||||
{
|
||||
/* Test indirectly binding NULL descriptor and 0 stride. */
|
||||
.view = { { 0, 0, 0 }, { 0, 64, 0 } },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
}
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_vbo_expected[] =
|
||||
{
|
||||
{ 1064.0f, 2128.0f, 3000.0f, 4000.0f },
|
||||
{ 1066.0f, 2132.0f, 3000.0f, 4000.0f },
|
||||
{ 1064.0f, 2128.0f, 3000.0f, 4001.0f },
|
||||
{ 1066.0f, 2132.0f, 3000.0f, 4001.0f },
|
||||
{ 1000.0f, 2016.0f, 3000.0f, 4000.0f }, /* This is buggy on WARP and AMD. We seem to get null descriptor instead. */
|
||||
{ 1000.0f, 2016.0f, 3000.0f, 4000.0f }, /* This is buggy on WARP and AMD. */
|
||||
};
|
||||
|
||||
/* Modify just one VBO. */
|
||||
struct indirect_vbo_one_data
|
||||
{
|
||||
D3D12_VERTEX_BUFFER_VIEW view;
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_vbo_one_sig[2] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW, .VertexBuffer = { .Slot = 0 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED },
|
||||
};
|
||||
|
||||
/* Fill buffer locations later. */
|
||||
struct indirect_vbo_one_data indirect_vbo_one_data[] =
|
||||
{
|
||||
{
|
||||
.view = { 0, 64, 8 },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.indexed = { .IndexCountPerInstance = 1, .InstanceCount = 1 }
|
||||
}
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_vbo_one_expected[] =
|
||||
{
|
||||
{ 1128.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
{ 1130.0f, 2065.0f, 3000.0f, 4000.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
};
|
||||
|
||||
/* Indirect IBO */
|
||||
struct indirect_ibo_data
|
||||
{
|
||||
D3D12_INDEX_BUFFER_VIEW view;
|
||||
D3D12_DRAW_INDEXED_ARGUMENTS indexed;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_ibo_sig[2] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_INDEX_BUFFER_VIEW },
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED },
|
||||
};
|
||||
|
||||
struct indirect_ibo_data indirect_ibo_data[] =
|
||||
{
|
||||
{
|
||||
.view = { 0, 0, DXGI_FORMAT_R32_UINT },
|
||||
.indexed = { .IndexCountPerInstance = 2, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.view = { 0, 64, DXGI_FORMAT_R16_UINT },
|
||||
.indexed = { .IndexCountPerInstance = 4, .InstanceCount = 1 }
|
||||
},
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_ibo_expected[] =
|
||||
{
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
{ 1016.0f, 2080.0f, 3000.0f, 4000.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
{ 1017.0f, 2081.0f, 3000.0f, 4000.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f, 4000.0f },
|
||||
};
|
||||
|
||||
/* Indirect root arguments */
|
||||
struct indirect_root_descriptor_data
|
||||
{
|
||||
D3D12_GPU_VIRTUAL_ADDRESS cbv;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS srv;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS uav;
|
||||
D3D12_DRAW_ARGUMENTS array;
|
||||
};
|
||||
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_root_descriptor_sig[4] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW, .ConstantBufferView = { .RootParameterIndex = 1 } },
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_SHADER_RESOURCE_VIEW, .ShaderResourceView = { .RootParameterIndex = 2 } },
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_UNORDERED_ACCESS_VIEW, .UnorderedAccessView = { .RootParameterIndex = 3 } },
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW },
|
||||
};
|
||||
|
||||
struct indirect_root_descriptor_data indirect_root_descriptor_data[] =
|
||||
{
|
||||
{ .array = { .VertexCountPerInstance = 1, .InstanceCount = 1 } },
|
||||
{ .array = { .VertexCountPerInstance = 1, .InstanceCount = 1 } },
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_root_descriptor_expected[] =
|
||||
{
|
||||
{ 1000.0f, 2064.0f, 3000.0f + 64.0f, 4000.0f + 2.0f },
|
||||
{ 1000.0f, 2064.0f, 3000.0f + 128.0f, 4000.0f + 3.0f },
|
||||
};
|
||||
|
||||
/* Test packing rules.
|
||||
* 64-bit aligned values are tightly packed with 32-bit alignment when they are in indirect command buffers. */
|
||||
struct indirect_alignment_data
|
||||
{
|
||||
float value;
|
||||
uint32_t cbv_va[2];
|
||||
D3D12_DRAW_ARGUMENTS arrays;
|
||||
};
|
||||
static const D3D12_INDIRECT_ARGUMENT_DESC indirect_alignment_sig[3] =
|
||||
{
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, .Constant = {
|
||||
.RootParameterIndex = 0, .DestOffsetIn32BitValues = 1, .Num32BitValuesToSet = 1 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW, .ConstantBufferView = { .RootParameterIndex = 1 }},
|
||||
{ .Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW },
|
||||
};
|
||||
|
||||
struct indirect_alignment_data indirect_alignment_data[] =
|
||||
{
|
||||
{
|
||||
.value = 5.0f,
|
||||
.arrays = { .VertexCountPerInstance = 1, .InstanceCount = 1 }
|
||||
},
|
||||
{
|
||||
.value = 6.0f,
|
||||
.arrays = { .VertexCountPerInstance = 1, .InstanceCount = 1 }
|
||||
},
|
||||
};
|
||||
|
||||
static const struct vec4 indirect_alignment_expected[] =
|
||||
{
|
||||
{ 1000.0f, 69.0f, 3064.0f, 4000.0f },
|
||||
{ 1000.0f, 70.0f, 3128.0f, 4000.0f },
|
||||
};
|
||||
|
||||
#define DECL_TEST(t, pso_index, needs_root_sig) { t##_sig, ARRAY_SIZE(t##_sig), t##_data, sizeof(t##_data), ARRAY_SIZE(t##_data), \
|
||||
t##_expected, ARRAY_SIZE(t##_expected), sizeof(*(t##_data)), pso_index, needs_root_sig }
|
||||
const struct test tests[] =
|
||||
{
|
||||
DECL_TEST(root_constant, 0, true),
|
||||
DECL_TEST(indirect_vbo, 0, false),
|
||||
DECL_TEST(indirect_vbo_one, 0, false),
|
||||
DECL_TEST(indirect_ibo, 0, false),
|
||||
DECL_TEST(indirect_root_descriptor, 0, true),
|
||||
DECL_TEST(indirect_alignment, 0, true),
|
||||
DECL_TEST(root_constant_spill, 1, true),
|
||||
DECL_TEST(indirect_root_descriptor, 1, true),
|
||||
};
|
||||
#undef DECL_TEST
|
||||
|
||||
uint32_t ibo_data[ARRAY_SIZE(ibo)][64];
|
||||
float vbo_data[ARRAY_SIZE(vbo)][64];
|
||||
float generic_data[4096];
|
||||
|
||||
static const DWORD vs_code_small_cbv[] =
|
||||
{
|
||||
#if 0
|
||||
cbuffer RootCBV : register(b0)
|
||||
{
|
||||
float a;
|
||||
};
|
||||
|
||||
StructuredBuffer<float> RootSRV : register(t0);
|
||||
|
||||
cbuffer RootConstants : register(b0, space1)
|
||||
{
|
||||
float4 root;
|
||||
};
|
||||
|
||||
float4 main(float c0 : COLOR0, float c1 : COLOR1, uint iid : SV_InstanceID) : SV_Position
|
||||
{
|
||||
return float4(c0, c1, a, RootSRV[0] + float(iid)) + root;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x33b7b302, 0x34259b9b, 0x3e8568d9, 0x5a5e0c3e, 0x00000001, 0x00000268, 0x00000003,
|
||||
0x0000002c, 0x00000098, 0x000000cc, 0x4e475349, 0x00000064, 0x00000003, 0x00000008, 0x00000050,
|
||||
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000101, 0x00000050, 0x00000001, 0x00000000,
|
||||
0x00000003, 0x00000001, 0x00000101, 0x00000056, 0x00000000, 0x00000008, 0x00000001, 0x00000002,
|
||||
0x00000101, 0x4f4c4f43, 0x56530052, 0x736e495f, 0x636e6174, 0x00444965, 0x4e47534f, 0x0000002c,
|
||||
0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f,
|
||||
0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000194, 0x00010051, 0x00000065, 0x0100086a,
|
||||
0x07000059, 0x00308e46, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x07000059,
|
||||
0x00308e46, 0x00000001, 0x00000000, 0x00000000, 0x00000001, 0x00000001, 0x070000a2, 0x00307e46,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x0300005f, 0x00101012, 0x00000000,
|
||||
0x0300005f, 0x00101012, 0x00000001, 0x04000060, 0x00101012, 0x00000002, 0x00000008, 0x04000067,
|
||||
0x001020f2, 0x00000000, 0x00000001, 0x02000068, 0x00000001, 0x0a0000a7, 0x00100012, 0x00000000,
|
||||
0x00004001, 0x00000000, 0x00004001, 0x00000000, 0x00207006, 0x00000000, 0x00000000, 0x05000056,
|
||||
0x00100022, 0x00000000, 0x0010100a, 0x00000002, 0x07000000, 0x00100012, 0x00000000, 0x0010001a,
|
||||
0x00000000, 0x0010000a, 0x00000000, 0x09000000, 0x00102012, 0x00000000, 0x0010100a, 0x00000000,
|
||||
0x0030800a, 0x00000001, 0x00000000, 0x00000000, 0x09000000, 0x00102022, 0x00000000, 0x0010100a,
|
||||
0x00000001, 0x0030801a, 0x00000001, 0x00000000, 0x00000000, 0x0b000000, 0x00102042, 0x00000000,
|
||||
0x0030800a, 0x00000000, 0x00000000, 0x00000000, 0x0030802a, 0x00000001, 0x00000000, 0x00000000,
|
||||
0x09000000, 0x00102082, 0x00000000, 0x0010000a, 0x00000000, 0x0030803a, 0x00000001, 0x00000000,
|
||||
0x00000000, 0x0100003e,
|
||||
};
|
||||
|
||||
static const DWORD vs_code_large_cbv[] =
|
||||
{
|
||||
#if 0
|
||||
cbuffer RootCBV : register(b0)
|
||||
{
|
||||
float a;
|
||||
};
|
||||
|
||||
StructuredBuffer<float> RootSRV : register(t0);
|
||||
|
||||
cbuffer RootConstants : register(b0, space1)
|
||||
{
|
||||
// Cannot use arrays for root constants in D3D12.
|
||||
float4 pad0, pad1, pad2, pad3, pad4, pad5, pad6, pad7, pad8, pad9, pad10;
|
||||
float4 root;
|
||||
};
|
||||
|
||||
float4 main(float c0 : COLOR0, float c1 : COLOR1, uint iid : SV_InstanceID) : SV_Position
|
||||
{
|
||||
return float4(c0, c1, a, RootSRV[0] + float(iid)) + root;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x99a057e8, 0x20344569, 0x434f8a7a, 0xf9171e08, 0x00000001, 0x00000268, 0x00000003,
|
||||
0x0000002c, 0x00000098, 0x000000cc, 0x4e475349, 0x00000064, 0x00000003, 0x00000008, 0x00000050,
|
||||
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000101, 0x00000050, 0x00000001, 0x00000000,
|
||||
0x00000003, 0x00000001, 0x00000101, 0x00000056, 0x00000000, 0x00000008, 0x00000001, 0x00000002,
|
||||
0x00000101, 0x4f4c4f43, 0x56530052, 0x736e495f, 0x636e6174, 0x00444965, 0x4e47534f, 0x0000002c,
|
||||
0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f,
|
||||
0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000194, 0x00010051, 0x00000065, 0x0100086a,
|
||||
0x07000059, 0x00308e46, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000, 0x07000059,
|
||||
0x00308e46, 0x00000001, 0x00000000, 0x00000000, 0x0000000c, 0x00000001, 0x070000a2, 0x00307e46,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000004, 0x00000000, 0x0300005f, 0x00101012, 0x00000000,
|
||||
0x0300005f, 0x00101012, 0x00000001, 0x04000060, 0x00101012, 0x00000002, 0x00000008, 0x04000067,
|
||||
0x001020f2, 0x00000000, 0x00000001, 0x02000068, 0x00000001, 0x0a0000a7, 0x00100012, 0x00000000,
|
||||
0x00004001, 0x00000000, 0x00004001, 0x00000000, 0x00207006, 0x00000000, 0x00000000, 0x05000056,
|
||||
0x00100022, 0x00000000, 0x0010100a, 0x00000002, 0x07000000, 0x00100012, 0x00000000, 0x0010001a,
|
||||
0x00000000, 0x0010000a, 0x00000000, 0x09000000, 0x00102012, 0x00000000, 0x0010100a, 0x00000000,
|
||||
0x0030800a, 0x00000001, 0x00000000, 0x0000000b, 0x09000000, 0x00102022, 0x00000000, 0x0010100a,
|
||||
0x00000001, 0x0030801a, 0x00000001, 0x00000000, 0x0000000b, 0x0b000000, 0x00102042, 0x00000000,
|
||||
0x0030800a, 0x00000000, 0x00000000, 0x00000000, 0x0030802a, 0x00000001, 0x00000000, 0x0000000b,
|
||||
0x09000000, 0x00102082, 0x00000000, 0x0010000a, 0x00000000, 0x0030803a, 0x00000001, 0x00000000,
|
||||
0x0000000b, 0x0100003e,
|
||||
};
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.no_root_signature = true;
|
||||
desc.no_pipeline = true;
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
command_list = context.list;
|
||||
queue = context.queue;
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(ibo); j++)
|
||||
for (i = 0; i < ARRAY_SIZE(ibo_data[j]); i++)
|
||||
ibo_data[j][i] = j * 16 + i;
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(vbo); j++)
|
||||
for (i = 0; i < ARRAY_SIZE(vbo_data[j]); i++)
|
||||
vbo_data[j][i] = (float)(j * ARRAY_SIZE(vbo_data[j]) + i);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(generic_data); i++)
|
||||
generic_data[i] = (float)i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ibo); i++)
|
||||
ibo[i] = create_upload_buffer(context.device, sizeof(ibo_data[i]), ibo_data[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(vbo); i++)
|
||||
vbo[i] = create_upload_buffer(context.device, sizeof(vbo_data[i]), vbo_data[i]);
|
||||
cbv = create_upload_buffer(context.device, sizeof(generic_data), generic_data);
|
||||
srv = create_upload_buffer(context.device, sizeof(generic_data), generic_data);
|
||||
uav = create_default_buffer(context.device, sizeof(generic_data),
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
indirect_vbo_data[0].view[0].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[1]);
|
||||
indirect_vbo_data[0].view[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[2]);
|
||||
indirect_vbo_data[1].view[0].BufferLocation = 0;
|
||||
indirect_vbo_data[1].view[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[0]) + 64;
|
||||
|
||||
indirect_vbo_one_data[0].view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[2]);
|
||||
indirect_vbo_one_data[1].view.BufferLocation = 0;
|
||||
|
||||
indirect_ibo_data[1].view.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(ibo[1]);
|
||||
|
||||
indirect_root_descriptor_data[0].cbv = ID3D12Resource_GetGPUVirtualAddress(cbv) + 256;
|
||||
indirect_root_descriptor_data[0].srv = ID3D12Resource_GetGPUVirtualAddress(srv) + 8;
|
||||
indirect_root_descriptor_data[0].uav = ID3D12Resource_GetGPUVirtualAddress(uav) + 4;
|
||||
indirect_root_descriptor_data[1].cbv = ID3D12Resource_GetGPUVirtualAddress(cbv) + 512;
|
||||
indirect_root_descriptor_data[1].srv = ID3D12Resource_GetGPUVirtualAddress(srv) + 12;
|
||||
indirect_root_descriptor_data[1].uav = ID3D12Resource_GetGPUVirtualAddress(uav) + 8;
|
||||
|
||||
memcpy(indirect_alignment_data[0].cbv_va, &indirect_root_descriptor_data[0].cbv, sizeof(D3D12_GPU_VIRTUAL_ADDRESS));
|
||||
memcpy(indirect_alignment_data[1].cbv_va, &indirect_root_descriptor_data[1].cbv, sizeof(D3D12_GPU_VIRTUAL_ADDRESS));
|
||||
|
||||
memset(&root_signature_desc, 0, sizeof(root_signature_desc));
|
||||
root_signature_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT |
|
||||
D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT;
|
||||
|
||||
memset(root_parameters, 0, sizeof(root_parameters));
|
||||
root_signature_desc.pParameters = root_parameters;
|
||||
root_signature_desc.NumParameters = ARRAY_SIZE(root_parameters);
|
||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
||||
root_parameters[0].Constants.RegisterSpace = 1;
|
||||
root_parameters[0].Constants.Num32BitValues = 4;
|
||||
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
||||
root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
|
||||
root_parameters[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
|
||||
hr = create_root_signature(context.device, &root_signature_desc, &root_signatures[0]);
|
||||
ok(SUCCEEDED(hr), "Failed to create root signature, hr #%x.\n", hr);
|
||||
root_parameters[0].Constants.Num32BitValues = 48;
|
||||
hr = create_root_signature(context.device, &root_signature_desc, &root_signatures[1]);
|
||||
ok(SUCCEEDED(hr), "Failed to create root signature, hr #%x.\n", hr);
|
||||
|
||||
memset(so_entries, 0, sizeof(so_entries));
|
||||
so_entries[0].ComponentCount = 4;
|
||||
so_entries[0].SemanticName = "SV_Position";
|
||||
|
||||
memset(&pso_desc, 0, sizeof(pso_desc));
|
||||
pso_desc.VS.pShaderBytecode = vs_code_small_cbv;
|
||||
pso_desc.VS.BytecodeLength = sizeof(vs_code_small_cbv);
|
||||
pso_desc.StreamOutput.NumStrides = 1;
|
||||
pso_desc.StreamOutput.pBufferStrides = &so_stride;
|
||||
pso_desc.StreamOutput.pSODeclaration = so_entries;
|
||||
pso_desc.StreamOutput.NumEntries = ARRAY_SIZE(so_entries);
|
||||
pso_desc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM;
|
||||
pso_desc.pRootSignature = root_signatures[0];
|
||||
pso_desc.SampleDesc.Count = 1;
|
||||
pso_desc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
|
||||
pso_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
|
||||
pso_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
pso_desc.InputLayout.NumElements = ARRAY_SIZE(layout_desc);
|
||||
pso_desc.InputLayout.pInputElementDescs = layout_desc;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc, &IID_ID3D12PipelineState, (void**)&psos[0]);
|
||||
ok(SUCCEEDED(hr), "Failed to create PSO, hr #%x.\n", hr);
|
||||
pso_desc.VS.pShaderBytecode = vs_code_large_cbv;
|
||||
pso_desc.VS.BytecodeLength = sizeof(vs_code_large_cbv);
|
||||
pso_desc.pRootSignature = root_signatures[1];
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc, &IID_ID3D12PipelineState, (void**)&psos[1]);
|
||||
ok(SUCCEEDED(hr), "Failed to create PSO, hr #%x.\n", hr);
|
||||
|
||||
/* Verify sanity checks.
|
||||
* As per validation layers, there must be exactly one command in the signature.
|
||||
* It must come last. Verify that we check for this. */
|
||||
memset(&command_signature_desc, 0, sizeof(command_signature_desc));
|
||||
command_signature_desc.NumArgumentDescs = 1;
|
||||
command_signature_desc.pArgumentDescs = indirect_argument_descs;
|
||||
command_signature_desc.ByteStride = sizeof(D3D12_VERTEX_BUFFER_VIEW);
|
||||
indirect_argument_descs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW;
|
||||
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc, NULL,
|
||||
&IID_ID3D12CommandSignature, (void**)&command_signature);
|
||||
ok(hr == E_INVALIDARG, "Unexpected hr #%x.\n", hr);
|
||||
|
||||
command_signature_desc.NumArgumentDescs = 2;
|
||||
command_signature_desc.pArgumentDescs = indirect_argument_descs;
|
||||
command_signature_desc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) + sizeof(D3D12_VERTEX_BUFFER_VIEW);
|
||||
indirect_argument_descs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
|
||||
indirect_argument_descs[1].Type = D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW;
|
||||
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc, NULL,
|
||||
&IID_ID3D12CommandSignature, (void**)&command_signature);
|
||||
ok(hr == E_INVALIDARG, "Unexpected hr #%x.\n", hr);
|
||||
|
||||
command_signature_desc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS) + sizeof(D3D12_DRAW_INDEXED_ARGUMENTS);
|
||||
indirect_argument_descs[0].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
|
||||
indirect_argument_descs[1].Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
|
||||
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc, NULL,
|
||||
&IID_ID3D12CommandSignature, (void**)&command_signature);
|
||||
ok(hr == E_INVALIDARG, "Unexpected hr #%x.\n", hr);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++)
|
||||
{
|
||||
struct vec4 expect_reset_state[2];
|
||||
const struct vec4 *expect, *v;
|
||||
uint32_t expected_output_size;
|
||||
uint32_t clear_vbo_mask;
|
||||
bool root_cbv;
|
||||
uint32_t size;
|
||||
|
||||
vkd3d_test_set_context("Test %u", i);
|
||||
|
||||
command_signature_desc.ByteStride = tests[i].stride;
|
||||
command_signature_desc.pArgumentDescs = tests[i].indirect_arguments;
|
||||
command_signature_desc.NumArgumentDescs = tests[i].indirect_argument_count;
|
||||
command_signature_desc.NodeMask = 0;
|
||||
hr = ID3D12Device_CreateCommandSignature(context.device, &command_signature_desc,
|
||||
tests[i].needs_root_sig ? root_signatures[tests[i].pso_index] : NULL,
|
||||
&IID_ID3D12CommandSignature, (void**)&command_signature);
|
||||
|
||||
/* Updating root CBV requires push BDA path, which we don't enable on NV by default yet. */
|
||||
root_cbv = false;
|
||||
for (j = 0; j < tests[i].indirect_argument_count; j++)
|
||||
{
|
||||
if (tests[i].indirect_arguments[j].Type == D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT_BUFFER_VIEW)
|
||||
{
|
||||
root_cbv = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
if (root_cbv && is_nvidia_device(context.device))
|
||||
skip("Creating indirect root CBV update failed. If the GPU is NVIDIA, try VKD3D_CONFIG=force_raw_va_cbv.\n");
|
||||
else
|
||||
skip("Failed creating command signature, skipping test.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
argument_buffer = create_upload_buffer(context.device, 256 * 1024, NULL);
|
||||
argument_buffer_late = create_default_buffer(context.device, 256 * 1024,
|
||||
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
|
||||
#define UNALIGNED_ARGUMENT_BUFFER_OFFSET (64 * 1024 + 4)
|
||||
#define UNALIGNED_COUNT_BUFFER_OFFSET (128 * 1024 + 4)
|
||||
#define ALIGNED_COUNT_BUFFER_OFFSET (128 * 1024 + 4 * 1024)
|
||||
{
|
||||
uint8_t *ptr;
|
||||
ID3D12Resource_Map(argument_buffer, 0, NULL, (void**)&ptr);
|
||||
memcpy(ptr, tests[i].argument_buffer_data, tests[i].argument_buffer_size);
|
||||
memcpy(ptr + UNALIGNED_ARGUMENT_BUFFER_OFFSET, tests[i].argument_buffer_data, tests[i].argument_buffer_size);
|
||||
memcpy(ptr + UNALIGNED_COUNT_BUFFER_OFFSET, &tests[i].api_max_count, sizeof(tests[i].api_max_count));
|
||||
memcpy(ptr + ALIGNED_COUNT_BUFFER_OFFSET, &tests[i].api_max_count, sizeof(tests[i].api_max_count));
|
||||
ID3D12Resource_Unmap(argument_buffer, 0, NULL);
|
||||
}
|
||||
|
||||
streamout_buffer = create_default_buffer(context.device, 64 * 1024,
|
||||
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_STREAM_OUT);
|
||||
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, root_signatures[tests[i].pso_index]);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, psos[tests[i].pso_index]);
|
||||
sov.SizeInBytes = 64 * 1024 - sizeof(struct vec4);
|
||||
sov.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(streamout_buffer) + sizeof(struct vec4);
|
||||
sov.BufferFilledSizeLocation = ID3D12Resource_GetGPUVirtualAddress(streamout_buffer);
|
||||
ID3D12GraphicsCommandList_SOSetTargets(command_list, 0, 1, &sov);
|
||||
|
||||
/* Set up default rendering state. */
|
||||
ibv.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(ibo[0]);
|
||||
ibv.SizeInBytes = sizeof(ibo_data[0]);
|
||||
ibv.Format = DXGI_FORMAT_R32_UINT;
|
||||
vbvs[0].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[0]);
|
||||
vbvs[0].SizeInBytes = sizeof(vbo_data[0]);
|
||||
vbvs[0].StrideInBytes = 4;
|
||||
vbvs[1].BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo[1]);
|
||||
vbvs[1].SizeInBytes = sizeof(vbo_data[1]);
|
||||
vbvs[1].StrideInBytes = 4;
|
||||
|
||||
ID3D12GraphicsCommandList_IASetIndexBuffer(command_list, &ibv);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_POINTLIST);
|
||||
ID3D12GraphicsCommandList_IASetVertexBuffers(command_list, 0, 2, vbvs);
|
||||
|
||||
for (j = 0; j < (tests[i].pso_index ? 12 : 1); j++)
|
||||
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(command_list, 0, 4, &values, 4 * j);
|
||||
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1,
|
||||
ID3D12Resource_GetGPUVirtualAddress(cbv));
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootShaderResourceView(command_list, 2,
|
||||
ID3D12Resource_GetGPUVirtualAddress(srv));
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootUnorderedAccessView(command_list, 3,
|
||||
ID3D12Resource_GetGPUVirtualAddress(uav));
|
||||
ID3D12GraphicsCommandList_ExecuteIndirect(command_list, command_signature, tests[i].api_max_count,
|
||||
argument_buffer, 0, NULL, 0);
|
||||
/* Test equivalent call with indirect count. */
|
||||
ID3D12GraphicsCommandList_ExecuteIndirect(command_list, command_signature, 1024,
|
||||
argument_buffer, UNALIGNED_ARGUMENT_BUFFER_OFFSET,
|
||||
argument_buffer, UNALIGNED_COUNT_BUFFER_OFFSET);
|
||||
/* Test equivalent, but now with late transition to INDIRECT. */
|
||||
ID3D12GraphicsCommandList_CopyResource(command_list, argument_buffer_late, argument_buffer);
|
||||
transition_resource_state(command_list, argument_buffer_late, D3D12_RESOURCE_STATE_COPY_DEST,
|
||||
D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT);
|
||||
ID3D12GraphicsCommandList_ExecuteIndirect(command_list, command_signature, 1024,
|
||||
argument_buffer_late, 0, argument_buffer_late, ALIGNED_COUNT_BUFFER_OFFSET);
|
||||
|
||||
/* Root descriptors which are part of the state block are cleared to NULL. Recover them here
|
||||
* since attempting to draw next test will crash GPU. */
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootConstantBufferView(command_list, 1,
|
||||
ID3D12Resource_GetGPUVirtualAddress(cbv));
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootShaderResourceView(command_list, 2,
|
||||
ID3D12Resource_GetGPUVirtualAddress(srv));
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootUnorderedAccessView(command_list, 3,
|
||||
ID3D12Resource_GetGPUVirtualAddress(uav));
|
||||
|
||||
/* Other state is cleared to 0. */
|
||||
|
||||
ID3D12GraphicsCommandList_DrawInstanced(command_list, 2, 1, 0, 0);
|
||||
transition_resource_state(command_list, streamout_buffer, D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
get_buffer_readback_with_command_list(streamout_buffer, DXGI_FORMAT_R32G32B32A32_FLOAT, &rb, queue, command_list);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
|
||||
expected_output_size = (tests[i].expected_output_count * 3 + 2) * sizeof(struct vec4);
|
||||
size = get_readback_uint(&rb, 0, 0, 0);
|
||||
ok(size == expected_output_size, "Expected size %u, got %u.\n", expected_output_size, size);
|
||||
|
||||
for (j = 0; j < tests[i].expected_output_count; j++)
|
||||
{
|
||||
expect = &tests[i].expected_output[j];
|
||||
v = get_readback_vec4(&rb, j + 1, 0);
|
||||
ok(compare_vec4(v, expect, 0), "Element (direct count) %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
|
||||
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
|
||||
|
||||
v = get_readback_vec4(&rb, j + tests[i].expected_output_count + 1, 0);
|
||||
ok(compare_vec4(v, expect, 0), "Element (indirect count) %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
|
||||
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
|
||||
|
||||
v = get_readback_vec4(&rb, j + 2 * tests[i].expected_output_count + 1, 0);
|
||||
ok(compare_vec4(v, expect, 0), "Element (late latch) %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
|
||||
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
|
||||
}
|
||||
|
||||
clear_vbo_mask = 0;
|
||||
expect_reset_state[0] = values;
|
||||
|
||||
/* Root constant state is cleared to zero if it's part of the signature. */
|
||||
for (j = 0; j < tests[i].indirect_argument_count; j++)
|
||||
{
|
||||
if (tests[i].indirect_arguments[j].Type == D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT)
|
||||
{
|
||||
for (k = 0; k < tests[i].indirect_arguments[j].Constant.Num32BitValuesToSet; k++)
|
||||
(&expect_reset_state[0].x)[(tests[i].indirect_arguments[j].Constant.DestOffsetIn32BitValues + k) % 4] = 0.0f;
|
||||
}
|
||||
else if (tests[i].indirect_arguments[j].Type == D3D12_INDIRECT_ARGUMENT_TYPE_VERTEX_BUFFER_VIEW)
|
||||
clear_vbo_mask |= 1u << tests[i].indirect_arguments[j].VertexBuffer.Slot;
|
||||
}
|
||||
|
||||
expect_reset_state[1] = expect_reset_state[0];
|
||||
|
||||
/* VBO/IBO state is cleared to zero if it's part of the signature.
|
||||
* A NULL IBO should be seen as a IBO which only reads 0 index. */
|
||||
if (!(clear_vbo_mask & (1u << 0)))
|
||||
expect_reset_state[1].x += 1.0f;
|
||||
|
||||
if (!(clear_vbo_mask & (1u << 1)))
|
||||
{
|
||||
expect_reset_state[0].y += 64.0f;
|
||||
expect_reset_state[1].y += 65.0f;
|
||||
}
|
||||
|
||||
for (j = 0; j < 2; j++)
|
||||
{
|
||||
v = get_readback_vec4(&rb, j + 1 + 3 * tests[i].expected_output_count, 0);
|
||||
expect = &expect_reset_state[j];
|
||||
ok(compare_vec4(v, expect, 0), "Post-reset element %u failed: (%f, %f, %f, %f) != (%f, %f, %f, %f)\n",
|
||||
j, v->x, v->y, v->z, v->w, expect->x, expect->y, expect->z, expect->w);
|
||||
}
|
||||
|
||||
ID3D12CommandSignature_Release(command_signature);
|
||||
ID3D12Resource_Release(argument_buffer);
|
||||
ID3D12Resource_Release(argument_buffer_late);
|
||||
ID3D12Resource_Release(streamout_buffer);
|
||||
release_resource_readback(&rb);
|
||||
}
|
||||
vkd3d_test_set_context(NULL);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(psos); i++)
|
||||
ID3D12PipelineState_Release(psos[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(root_signatures); i++)
|
||||
ID3D12RootSignature_Release(root_signatures[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(vbo); i++)
|
||||
ID3D12Resource_Release(vbo[i]);
|
||||
for (i = 0; i < ARRAY_SIZE(ibo); i++)
|
||||
ID3D12Resource_Release(ibo[i]);
|
||||
ID3D12Resource_Release(cbv);
|
||||
ID3D12Resource_Release(srv);
|
||||
ID3D12Resource_Release(uav);
|
||||
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_execute_indirect(void)
|
||||
{
|
||||
ID3D12Resource *argument_buffer, *count_buffer, *uav;
|
||||
|
@ -2860,9 +3575,9 @@ void test_conditional_rendering(void)
|
|||
|
||||
void test_write_buffer_immediate(void)
|
||||
{
|
||||
D3D12_WRITEBUFFERIMMEDIATE_PARAMETER parameters[2];
|
||||
D3D12_WRITEBUFFERIMMEDIATE_PARAMETER parameters[3];
|
||||
ID3D12GraphicsCommandList2 *command_list2;
|
||||
D3D12_WRITEBUFFERIMMEDIATE_MODE modes[2];
|
||||
D3D12_WRITEBUFFERIMMEDIATE_MODE modes[3];
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
struct resource_readback rb;
|
||||
struct test_context context;
|
||||
|
@ -2872,7 +3587,7 @@ void test_write_buffer_immediate(void)
|
|||
unsigned int value;
|
||||
HRESULT hr;
|
||||
|
||||
static const unsigned int data_values[] = {0xdeadbeef, 0xf00baa};
|
||||
static const unsigned int data_values[] = {0xdeadbeef, 0xf00baa, 0xdeadbeef, 0xf00baa};
|
||||
|
||||
if (!init_test_context(&context, NULL))
|
||||
return;
|
||||
|
@ -2897,6 +3612,8 @@ void test_write_buffer_immediate(void)
|
|||
parameters[0].Value = 0x1020304;
|
||||
parameters[1].Dest = parameters[0].Dest + sizeof(data_values[0]);
|
||||
parameters[1].Value = 0xc0d0e0f;
|
||||
parameters[2].Dest = parameters[0].Dest + sizeof(data_values[0]) * 3;
|
||||
parameters[2].Value = 0x5060708;
|
||||
ID3D12GraphicsCommandList2_WriteBufferImmediate(command_list2, ARRAY_SIZE(parameters), parameters, NULL);
|
||||
hr = ID3D12GraphicsCommandList_Close(command_list);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
@ -2909,13 +3626,19 @@ void test_write_buffer_immediate(void)
|
|||
ok(value == parameters[0].Value, "Got unexpected value %#x, expected %#x.\n", value, parameters[0].Value);
|
||||
value = get_readback_uint(&rb, 1, 0, 0);
|
||||
ok(value == parameters[1].Value, "Got unexpected value %#x, expected %#x.\n", value, parameters[1].Value);
|
||||
value = get_readback_uint(&rb, 2, 0, 0);
|
||||
ok(value == data_values[2], "Got unexpected value %#x, expected %#x.\n", value, data_values[2]);
|
||||
value = get_readback_uint(&rb, 3, 0, 0);
|
||||
ok(value == parameters[2].Value, "Got unexpected value %#x, expected %#x.\n", value, parameters[2].Value);
|
||||
release_resource_readback(&rb);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
|
||||
parameters[0].Value = 0x2030405;
|
||||
parameters[1].Value = 0xb0c0d0e;
|
||||
modes[0] = D3D12_WRITEBUFFERIMMEDIATE_MODE_MARKER_IN;
|
||||
modes[1] = D3D12_WRITEBUFFERIMMEDIATE_MODE_MARKER_OUT;
|
||||
parameters[2].Value = 0x708090a;
|
||||
modes[0] = D3D12_WRITEBUFFERIMMEDIATE_MODE_DEFAULT;
|
||||
modes[1] = D3D12_WRITEBUFFERIMMEDIATE_MODE_MARKER_IN;
|
||||
modes[2] = D3D12_WRITEBUFFERIMMEDIATE_MODE_MARKER_OUT;
|
||||
ID3D12GraphicsCommandList2_WriteBufferImmediate(command_list2, ARRAY_SIZE(parameters), parameters, modes);
|
||||
hr = ID3D12GraphicsCommandList_Close(command_list);
|
||||
ok(hr == S_OK, "Got unexpected hr %#x.\n", hr);
|
||||
|
@ -2928,6 +3651,8 @@ void test_write_buffer_immediate(void)
|
|||
ok(value == parameters[0].Value, "Got unexpected value %#x, expected %#x.\n", value, parameters[0].Value);
|
||||
value = get_readback_uint(&rb, 1, 0, 0);
|
||||
ok(value == parameters[1].Value, "Got unexpected value %#x, expected %#x.\n", value, parameters[1].Value);
|
||||
value = get_readback_uint(&rb, 3, 0, 0);
|
||||
ok(value == parameters[2].Value, "Got unexpected value %#x, expected %#x.\n", value, parameters[2].Value);
|
||||
release_resource_readback(&rb);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
|
||||
|
|
|
@ -554,9 +554,9 @@ void test_copy_texture_buffer(void)
|
|||
|
||||
void test_copy_buffer_to_depth_stencil(void)
|
||||
{
|
||||
ID3D12Resource *src_buffer_stencil = NULL;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
struct resource_readback rb_stencil;
|
||||
ID3D12Resource *src_buffer_stencil;
|
||||
struct resource_readback rb_depth;
|
||||
ID3D12Resource *src_buffer_depth;
|
||||
struct test_context_desc desc;
|
||||
|
|
|
@ -1533,3 +1533,461 @@ void test_stencil_export_dxil(void)
|
|||
test_stencil_export(true);
|
||||
}
|
||||
|
||||
void test_depth_stencil_layout_tracking(void)
|
||||
{
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
struct depth_stencil_resource ds;
|
||||
struct test_context_desc desc;
|
||||
ID3D12PipelineState *psos[4];
|
||||
struct test_context context;
|
||||
D3D12_DISCARD_REGION region;
|
||||
ID3D12RootSignature *rs;
|
||||
unsigned int i, j;
|
||||
HRESULT hr;
|
||||
|
||||
static const DWORD vs_code[] =
|
||||
{
|
||||
#if 0
|
||||
cbuffer C : register(b0)
|
||||
{
|
||||
float z;
|
||||
};
|
||||
|
||||
float4 main(uint vid : SV_VertexID) : SV_Position
|
||||
{
|
||||
if (vid == 0)
|
||||
return float4(-1.0, -1.0, z, 1.0);
|
||||
else if (vid == 1)
|
||||
return float4(-1.0, +3.0, z, 1.0);
|
||||
else
|
||||
return float4(+3.0, -1.0, z, 1.0);
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x31be9212, 0x8e44bbde, 0x8f0a87b5, 0xb8d5783b, 0x00000001, 0x000001dc, 0x00000003,
|
||||
0x0000002c, 0x00000060, 0x00000094, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020,
|
||||
0x00000000, 0x00000006, 0x00000001, 0x00000000, 0x00000101, 0x565f5653, 0x65747265, 0x00444978,
|
||||
0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003,
|
||||
0x00000000, 0x0000000f, 0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000140, 0x00010050,
|
||||
0x00000050, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x04000060, 0x00101012,
|
||||
0x00000000, 0x00000006, 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x02000068, 0x00000001,
|
||||
0x0300001f, 0x0010100a, 0x00000000, 0x08000036, 0x001020b2, 0x00000000, 0x00004002, 0xbf800000,
|
||||
0xbf800000, 0x00000000, 0x3f800000, 0x06000036, 0x00102042, 0x00000000, 0x0020800a, 0x00000000,
|
||||
0x00000000, 0x0100003e, 0x01000012, 0x07000020, 0x00100012, 0x00000000, 0x0010100a, 0x00000000,
|
||||
0x00004001, 0x00000001, 0x0304001f, 0x0010000a, 0x00000000, 0x08000036, 0x001020b2, 0x00000000,
|
||||
0x00004002, 0xbf800000, 0x40400000, 0x00000000, 0x3f800000, 0x06000036, 0x00102042, 0x00000000,
|
||||
0x0020800a, 0x00000000, 0x00000000, 0x0100003e, 0x01000012, 0x08000036, 0x001020b2, 0x00000000,
|
||||
0x00004002, 0x40400000, 0xbf800000, 0x00000000, 0x3f800000, 0x06000036, 0x00102042, 0x00000000,
|
||||
0x0020800a, 0x00000000, 0x00000000, 0x0100003e, 0x01000015, 0x01000015, 0x0100003e,
|
||||
};
|
||||
|
||||
static const DWORD ps_code[] =
|
||||
{
|
||||
#if 0
|
||||
void main() {}
|
||||
#endif
|
||||
0x43425844, 0x499d4ed5, 0xbbe2842c, 0x179313ee, 0xde5cd5d9, 0x00000001, 0x00000064, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000010, 0x00000050, 0x00000004, 0x0100086a,
|
||||
0x0100003e,
|
||||
};
|
||||
|
||||
static const D3D12_SHADER_BYTECODE vs = SHADER_BYTECODE(vs_code);
|
||||
static const D3D12_SHADER_BYTECODE ps = SHADER_BYTECODE(ps_code);
|
||||
|
||||
enum draw_type
|
||||
{
|
||||
DRAW_TYPE_DRAW,
|
||||
DRAW_TYPE_TRANSITION,
|
||||
DRAW_TYPE_CLEAR,
|
||||
DRAW_TYPE_DISCARD,
|
||||
};
|
||||
|
||||
struct draw
|
||||
{
|
||||
bool depth_write;
|
||||
bool stencil_write;
|
||||
enum draw_type type;
|
||||
D3D12_RECT rect;
|
||||
float z;
|
||||
uint8_t stencil;
|
||||
};
|
||||
|
||||
static const struct draw test_full_promotion[] =
|
||||
{
|
||||
{ false, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_full_promotion_no_read[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_promotion[] =
|
||||
{
|
||||
{ false, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
/* Expect transition to WRITE/READ */
|
||||
{ true, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
/* Expect transition to WRITE/WRITE */
|
||||
{ false, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
{ false, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_full_implicit_transition[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_full_explicit_transition[] =
|
||||
{
|
||||
{ false, false, DRAW_TYPE_TRANSITION },
|
||||
{ true, true, DRAW_TYPE_TRANSITION },
|
||||
/* We should already know the attachment is optimal. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_transition_depth[] =
|
||||
{
|
||||
{ false, true, DRAW_TYPE_TRANSITION },
|
||||
/* Mark depth as optimal. */
|
||||
{ true, true, DRAW_TYPE_TRANSITION },
|
||||
/* Promote stencil state here. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_transition_stencil[] =
|
||||
{
|
||||
{ true, false, DRAW_TYPE_TRANSITION },
|
||||
/* Mark stencil as optimal. */
|
||||
{ true, true, DRAW_TYPE_TRANSITION },
|
||||
/* Promote depth state here. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_full_clear_transition[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.5f, 128 },
|
||||
/* We should already know the attachment is optimal. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_full_discard_transition[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_DISCARD },
|
||||
/* We should already know the attachment is optimal. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_clear_depth[] =
|
||||
{
|
||||
{ true, false, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.5f, 128 },
|
||||
/* Promote stencil here. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_clear_stencil[] =
|
||||
{
|
||||
{ false, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.5f, 128 },
|
||||
/* Promote depth here. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_discard_depth[] =
|
||||
{
|
||||
{ true, false, DRAW_TYPE_DISCARD },
|
||||
/* Promote stencil here. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_discard_stencil[] =
|
||||
{
|
||||
{ false, true, DRAW_TYPE_DISCARD },
|
||||
/* Promote depth here. */
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_decay[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
/* This should decay the resource back to READ_ONLY. */
|
||||
{ false, false, DRAW_TYPE_TRANSITION },
|
||||
{ false, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_decay_depth[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.0f, 128 },
|
||||
{ false, true, DRAW_TYPE_TRANSITION },
|
||||
{ false, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_decay_stencil[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.5f, 0 },
|
||||
{ true, false, DRAW_TYPE_TRANSITION },
|
||||
{ true, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_sub_clear_no_render_pass[] =
|
||||
{
|
||||
/* Both of these will be emitted as separate clear passes, but no UNDEFINED transition. */
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 512 }, 0.0f, 0 },
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 512, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_sub_clear_separate_no_render_pass[] =
|
||||
{
|
||||
/* Same as above, but separate layouts. */
|
||||
{ true, false, DRAW_TYPE_CLEAR, { 0, 0, 1024, 512 }, 0.0f, 0 },
|
||||
{ true, false, DRAW_TYPE_CLEAR, { 0, 512, 1024, 1024 }, 0.0f, 0 },
|
||||
{ false, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 512 }, 0.0f, 0 },
|
||||
{ false, true, DRAW_TYPE_CLEAR, { 0, 512, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_sub_clear_after_discard[] =
|
||||
{
|
||||
/* Both of these will be emitted as separate clear passes, but no UNDEFINED transition. */
|
||||
{ true, true, DRAW_TYPE_DISCARD },
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 512 }, 0.0f, 0 },
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 512, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_sub_clear_separate_after_discard[] =
|
||||
{
|
||||
/* Same as above, but separate layouts. */
|
||||
{ true, false, DRAW_TYPE_DISCARD },
|
||||
{ true, false, DRAW_TYPE_CLEAR, { 0, 0, 1024, 512 }, 0.0f, 0 },
|
||||
{ true, false, DRAW_TYPE_CLEAR, { 0, 512, 1024, 1024 }, 0.0f, 0 },
|
||||
{ false, true, DRAW_TYPE_DISCARD },
|
||||
{ false, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 512 }, 0.0f, 0 },
|
||||
{ false, true, DRAW_TYPE_CLEAR, { 0, 512, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_clear_in_render_pass[] =
|
||||
{
|
||||
{ true, true, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.5f, 128 },
|
||||
/* No need to split render pass here and promote layout. */
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_clear_in_render_pass_promote[] =
|
||||
{
|
||||
{ false, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.5f, 128 },
|
||||
/* Need to split render pass here and promote layout. */
|
||||
{ true, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
static const struct draw test_partial_clear_in_render_pass_promote[] =
|
||||
{
|
||||
{ true, false, DRAW_TYPE_DRAW, { 0, 0, 1024, 1024 }, 0.0f, 128 },
|
||||
/* Need to split render pass here and promote layout. */
|
||||
{ false, true, DRAW_TYPE_CLEAR, { 0, 0, 1024, 1024 }, 0.0f, 0 },
|
||||
};
|
||||
|
||||
struct test
|
||||
{
|
||||
const struct draw *draws;
|
||||
unsigned int draw_count;
|
||||
};
|
||||
|
||||
/* It's also useful to test this with validation layers on, since this is mostly a test to see if we handle
|
||||
* the layout transitions correctly. */
|
||||
static const struct test tests[] =
|
||||
{
|
||||
{ test_full_promotion, ARRAY_SIZE(test_full_promotion) },
|
||||
{ test_full_promotion_no_read, ARRAY_SIZE(test_full_promotion_no_read) },
|
||||
{ test_partial_promotion, ARRAY_SIZE(test_partial_promotion) },
|
||||
{ test_full_implicit_transition, ARRAY_SIZE(test_full_implicit_transition) },
|
||||
{ test_full_explicit_transition, ARRAY_SIZE(test_full_explicit_transition) },
|
||||
{ test_full_clear_transition, ARRAY_SIZE(test_full_clear_transition) },
|
||||
{ test_full_discard_transition, ARRAY_SIZE(test_full_discard_transition) },
|
||||
{ test_partial_transition_depth, ARRAY_SIZE(test_partial_transition_depth) },
|
||||
{ test_partial_transition_stencil, ARRAY_SIZE(test_partial_transition_stencil) },
|
||||
{ test_partial_clear_depth, ARRAY_SIZE(test_partial_clear_depth) },
|
||||
{ test_partial_clear_stencil, ARRAY_SIZE(test_partial_clear_stencil) },
|
||||
{ test_partial_discard_depth, ARRAY_SIZE(test_partial_discard_depth) },
|
||||
{ test_partial_discard_stencil, ARRAY_SIZE(test_partial_discard_stencil) },
|
||||
{ test_decay, ARRAY_SIZE(test_decay) },
|
||||
{ test_decay_depth, ARRAY_SIZE(test_decay_depth) },
|
||||
{ test_decay_stencil, ARRAY_SIZE(test_decay_stencil) },
|
||||
{ test_sub_clear_no_render_pass, ARRAY_SIZE(test_sub_clear_no_render_pass) },
|
||||
{ test_sub_clear_separate_no_render_pass, ARRAY_SIZE(test_sub_clear_separate_no_render_pass) },
|
||||
{ test_sub_clear_after_discard, ARRAY_SIZE(test_sub_clear_after_discard) },
|
||||
{ test_sub_clear_separate_after_discard, ARRAY_SIZE(test_sub_clear_separate_after_discard) },
|
||||
{ test_clear_in_render_pass, ARRAY_SIZE(test_clear_in_render_pass) },
|
||||
{ test_clear_in_render_pass_promote, ARRAY_SIZE(test_clear_in_render_pass_promote) },
|
||||
{ test_partial_clear_in_render_pass_promote, ARRAY_SIZE(test_partial_clear_in_render_pass_promote) },
|
||||
};
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.no_render_target = true;
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
|
||||
init_depth_stencil(&ds, context.device, 1024, 1024, 1, 1,
|
||||
DXGI_FORMAT_R32G8X24_TYPELESS, DXGI_FORMAT_D32_FLOAT_S8X24_UINT, NULL);
|
||||
rs = create_32bit_constants_root_signature(context.device, 0, 1, D3D12_SHADER_VISIBILITY_VERTEX);
|
||||
|
||||
init_pipeline_state_desc(&pso_desc, rs, 0, &vs, &ps, NULL);
|
||||
|
||||
pso_desc.NumRenderTargets = 0;
|
||||
pso_desc.DSVFormat = DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
|
||||
pso_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(psos); i++)
|
||||
{
|
||||
pso_desc.DepthStencilState.StencilEnable = TRUE;
|
||||
pso_desc.DepthStencilState.DepthEnable = TRUE;
|
||||
pso_desc.DepthStencilState.StencilReadMask = 0xFF;
|
||||
|
||||
if (i >= 2)
|
||||
{
|
||||
pso_desc.DepthStencilState.StencilWriteMask = 0xFF;
|
||||
pso_desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_REPLACE;
|
||||
pso_desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_REPLACE;
|
||||
pso_desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_REPLACE;
|
||||
}
|
||||
else
|
||||
{
|
||||
pso_desc.DepthStencilState.StencilWriteMask = 0x00;
|
||||
pso_desc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP;
|
||||
pso_desc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
|
||||
pso_desc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_KEEP;
|
||||
}
|
||||
|
||||
pso_desc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
|
||||
pso_desc.DepthStencilState.BackFace = pso_desc.DepthStencilState.FrontFace;
|
||||
|
||||
pso_desc.DepthStencilState.DepthWriteMask = (i & 1) ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
|
||||
pso_desc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
|
||||
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&psos[i]);
|
||||
ok(SUCCEEDED(hr), "Failed to create graphics pipeline state, hr %#x.\n", hr);
|
||||
}
|
||||
|
||||
/* In the tests, begin command lists from a clean slate.
|
||||
* Implementation must assume the depth-stencil image is in read-only state until proven otherwise. */
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++)
|
||||
{
|
||||
const D3D12_VIEWPORT vp = { 0, 0, 1024, 1024, 0, 1 };
|
||||
D3D12_RESOURCE_STATES stencil_state = D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
||||
D3D12_RESOURCE_STATES depth_state = D3D12_RESOURCE_STATE_DEPTH_WRITE;
|
||||
D3D12_RESOURCE_STATES new_stencil_state;
|
||||
D3D12_RESOURCE_STATES new_depth_state;
|
||||
|
||||
vkd3d_test_set_context("Test %u", i);
|
||||
|
||||
/* Initialize the DS image to a known state. */
|
||||
ID3D12GraphicsCommandList_ClearDepthStencilView(context.list, ds.dsv_handle,
|
||||
D3D12_CLEAR_FLAG_DEPTH | D3D12_CLEAR_FLAG_STENCIL,
|
||||
1.0f, 255, 0, NULL);
|
||||
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
check_sub_resource_float(ds.texture, 0, context.queue, context.list, 1.0f, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
check_sub_resource_uint8(ds.texture, 1, context.queue, context.list, 255, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
||||
ID3D12GraphicsCommandList_Close(context.list);
|
||||
exec_command_list(context.queue, context.list);
|
||||
wait_queue_idle(context.device, context.queue);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(context.list, 0, NULL, FALSE, &ds.dsv_handle);
|
||||
ID3D12GraphicsCommandList_RSSetViewports(context.list, 1, &vp);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(context.list, rs);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(context.list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
for (j = 0; j < tests[i].draw_count; j++)
|
||||
{
|
||||
switch (tests[i].draws[j].type)
|
||||
{
|
||||
case DRAW_TYPE_DRAW:
|
||||
ID3D12GraphicsCommandList_RSSetScissorRects(context.list, 1, &tests[i].draws[j].rect);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, psos[tests[i].draws[j].depth_write + tests[i].draws[j].stencil_write * 2]);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRoot32BitConstants(context.list, 0, 1, &tests[i].draws[j].z, 0);
|
||||
ID3D12GraphicsCommandList_OMSetStencilRef(context.list, tests[i].draws[j].stencil);
|
||||
ID3D12GraphicsCommandList_DrawInstanced(context.list, 3, 1, 0, 0);
|
||||
break;
|
||||
|
||||
case DRAW_TYPE_TRANSITION:
|
||||
new_depth_state = tests[i].draws[j].depth_write ? D3D12_RESOURCE_STATE_DEPTH_WRITE :
|
||||
(D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||
new_stencil_state = tests[i].draws[j].stencil_write ? D3D12_RESOURCE_STATE_DEPTH_WRITE :
|
||||
(D3D12_RESOURCE_STATE_DEPTH_READ | D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
|
||||
|
||||
if (new_depth_state != depth_state)
|
||||
{
|
||||
transition_sub_resource_state(context.list, ds.texture, 0, depth_state, new_depth_state);
|
||||
depth_state = new_depth_state;
|
||||
}
|
||||
|
||||
if (new_stencil_state != stencil_state)
|
||||
{
|
||||
transition_sub_resource_state(context.list, ds.texture, 1, stencil_state, new_stencil_state);
|
||||
stencil_state = new_stencil_state;
|
||||
}
|
||||
break;
|
||||
|
||||
case DRAW_TYPE_CLEAR:
|
||||
ID3D12GraphicsCommandList_ClearDepthStencilView(context.list, ds.dsv_handle,
|
||||
(tests[i].draws[j].depth_write ? D3D12_CLEAR_FLAG_DEPTH : 0) |
|
||||
(tests[i].draws[j].stencil_write ? D3D12_CLEAR_FLAG_STENCIL : 0),
|
||||
tests[i].draws[j].z, tests[i].draws[j].stencil, 1, &tests[i].draws[j].rect);
|
||||
break;
|
||||
|
||||
case DRAW_TYPE_DISCARD:
|
||||
region.NumRects = 0;
|
||||
region.pRects = NULL;
|
||||
|
||||
if (tests[i].draws[j].depth_write && tests[i].draws[j].stencil_write)
|
||||
{
|
||||
region.FirstSubresource = 0;
|
||||
region.NumSubresources = 2;
|
||||
}
|
||||
else if (tests[i].draws[j].depth_write)
|
||||
{
|
||||
region.FirstSubresource = 0;
|
||||
region.NumSubresources = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
region.FirstSubresource = 1;
|
||||
region.NumSubresources = 1;
|
||||
}
|
||||
|
||||
ID3D12GraphicsCommandList_DiscardResource(context.list, ds.texture, ®ion);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Normalize the resource state back to DEPTH_WRITE. */
|
||||
if (depth_state != D3D12_RESOURCE_STATE_DEPTH_WRITE)
|
||||
transition_sub_resource_state(context.list, ds.texture, 0, depth_state, D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
||||
if (stencil_state != D3D12_RESOURCE_STATE_DEPTH_WRITE)
|
||||
transition_sub_resource_state(context.list, ds.texture, 1, stencil_state, D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
||||
|
||||
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
check_sub_resource_float(ds.texture, 0, context.queue, context.list, 0.0f, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
check_sub_resource_uint8(ds.texture, 1, context.queue, context.list, 0, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_COPY_SOURCE, D3D12_RESOURCE_STATE_DEPTH_WRITE);
|
||||
ID3D12GraphicsCommandList_Close(context.list);
|
||||
exec_command_list(context.queue, context.list);
|
||||
wait_queue_idle(context.device, context.queue);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
}
|
||||
vkd3d_test_set_context(NULL);
|
||||
|
||||
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
check_sub_resource_float(ds.texture, 0, context.queue, context.list, 0.0f, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
check_sub_resource_uint8(ds.texture, 1, context.queue, context.list, 0, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
|
||||
ID3D12RootSignature_Release(rs);
|
||||
for (i = 0; i < ARRAY_SIZE(psos); i++)
|
||||
ID3D12PipelineState_Release(psos[i]);
|
||||
destroy_depth_stencil(&ds);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
|
|
@ -4552,31 +4552,41 @@ void test_typed_srv_uav_cast(void)
|
|||
{ DXGI_FORMAT_R16_UINT, DXGI_FORMAT_R16_FLOAT, false, false },
|
||||
{ DXGI_FORMAT_R16_SINT, DXGI_FORMAT_R16_FLOAT, false, false },
|
||||
|
||||
/* Special D3D11 magic. For UAVs, we can reinterpret formats as the "always supported" types R32{U,I,F}.
|
||||
* If typeless, we can cast to any R32U/I/F format.
|
||||
* If not typeless, we follow float <-> non-float ban. */
|
||||
{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R32_UINT, false, true },
|
||||
{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R32_SINT, false, true },
|
||||
{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R32_FLOAT, false, false },
|
||||
{ DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R32_UINT, false, true },
|
||||
{ DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R32_SINT, false, true },
|
||||
{ DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R32_FLOAT, false, false },
|
||||
/* 5.3.9.5 from D3D11 functional spec. 32-bit typeless formats
|
||||
* can be viewed as R32{U,I,F}. The D3D12 validation runtime appears to be buggy
|
||||
* and also allows fully typed views even if bits per component don't match.
|
||||
* This feature is derived from legacy D3D11 jank, so assume the validation layers are
|
||||
* just buggy. */
|
||||
|
||||
{ DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R32_UINT, false, true },
|
||||
{ DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R32_SINT, false, true },
|
||||
{ DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R32_FLOAT, false, true },
|
||||
|
||||
{ DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R32_UINT, false, true },
|
||||
{ DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R32_SINT, false, true },
|
||||
{ DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R32_FLOAT, false, false },
|
||||
{ DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R32_UINT, false, true },
|
||||
{ DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R32_SINT, false, true },
|
||||
{ DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R32_FLOAT, false, false },
|
||||
{ DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R32_UINT, false, false },
|
||||
{ DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R32_SINT, false, false },
|
||||
{ DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R32_FLOAT, false, true },
|
||||
{ DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R32_UINT, false, true },
|
||||
{ DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R32_SINT, false, true },
|
||||
{ DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R32_FLOAT, false, true },
|
||||
|
||||
{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R32_FLOAT, false, false },
|
||||
{ DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R32_FLOAT, false, false },
|
||||
{ DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R32_FLOAT, false, false },
|
||||
{ DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R32_FLOAT, false, false },
|
||||
{ DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R32_UINT, false, false },
|
||||
{ DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R32_SINT, false, false },
|
||||
|
||||
/* D3D12 validation does not complain about these, but it should according to D3D11 functional spec.
|
||||
* No docs for D3D12 say otherwise.
|
||||
* These tests can trip assertions in drivers since we will not emit MUTABLE at all
|
||||
* for some of these tests. */
|
||||
#if 0
|
||||
{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R32_UINT, false, true },
|
||||
{ DXGI_FORMAT_R8G8B8A8_UNORM, DXGI_FORMAT_R32_SINT, false, true },
|
||||
{ DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R32_UINT, false, true },
|
||||
{ DXGI_FORMAT_R8G8B8A8_UINT, DXGI_FORMAT_R32_SINT, false, true },
|
||||
{ DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R32_UINT, false, true },
|
||||
{ DXGI_FORMAT_R16G16_UNORM, DXGI_FORMAT_R32_SINT, false, true },
|
||||
{ DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R32_UINT, false, true },
|
||||
{ DXGI_FORMAT_R16G16_UINT, DXGI_FORMAT_R32_SINT, false, true },
|
||||
{ DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R32_FLOAT, false, true },
|
||||
#endif
|
||||
};
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
|
@ -4877,3 +4887,633 @@ void test_typed_srv_cast_clear(void)
|
|||
ID3D12DescriptorHeap_Release(heap);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_uav_3d_sliced_view(void)
|
||||
{
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav;
|
||||
D3D12_ROOT_SIGNATURE_DESC rs_desc;
|
||||
D3D12_ROOT_PARAMETER root_params[2];
|
||||
ID3D12PipelineState *pso_poison;
|
||||
ID3D12PipelineState *pso_actual;
|
||||
struct resource_readback rb[2];
|
||||
D3D12_DESCRIPTOR_RANGE range;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE h;
|
||||
uint32_t reference[16][4][4];
|
||||
struct test_context context;
|
||||
ID3D12DescriptorHeap *heap;
|
||||
ID3D12Resource *resource;
|
||||
unsigned int x, y, z;
|
||||
unsigned int i;
|
||||
|
||||
static const DWORD cs_actual_dxbc[] =
|
||||
{
|
||||
#if 0
|
||||
cbuffer C : register(b0) { uint value; }
|
||||
RWTexture3D<uint> T : register(u0);
|
||||
|
||||
[numthreads(4, 4, 16)]
|
||||
void main(uint3 thr : SV_DispatchThreadID)
|
||||
{
|
||||
uint w, h, d;
|
||||
T.GetDimensions(w, h, d);
|
||||
if (thr.z < d)
|
||||
T[thr] = value | (w << 8) | (h << 16) | (d << 24);
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0xf1736792, 0x8492219a, 0x6751cced, 0xf0219682, 0x00000001, 0x00000188, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000134, 0x00050050, 0x0000004d, 0x0100086a,
|
||||
0x04000059, 0x00208e46, 0x00000000, 0x00000001, 0x0400289c, 0x0011e000, 0x00000000, 0x00004444,
|
||||
0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000004, 0x00000004, 0x00000010,
|
||||
0x8900103d, 0x80000142, 0x00111103, 0x00100072, 0x00000000, 0x00004001, 0x00000000, 0x0011ee46,
|
||||
0x00000000, 0x0600004f, 0x00100082, 0x00000000, 0x0002002a, 0x0010002a, 0x00000000, 0x0304001f,
|
||||
0x0010003a, 0x00000000, 0x0a000029, 0x00100072, 0x00000000, 0x00100246, 0x00000000, 0x00004002,
|
||||
0x00000008, 0x00000010, 0x00000018, 0x00000000, 0x0800003c, 0x00100012, 0x00000000, 0x0010000a,
|
||||
0x00000000, 0x0020800a, 0x00000000, 0x00000000, 0x0700003c, 0x00100012, 0x00000000, 0x0010001a,
|
||||
0x00000000, 0x0010000a, 0x00000000, 0x0700003c, 0x00100012, 0x00000000, 0x0010002a, 0x00000000,
|
||||
0x0010000a, 0x00000000, 0x060000a4, 0x0011e0f2, 0x00000000, 0x00020a46, 0x00100006, 0x00000000,
|
||||
0x01000015, 0x0100003e,
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE cs_actual = SHADER_BYTECODE(cs_actual_dxbc);
|
||||
|
||||
static const DWORD cs_poison_dxbc[] =
|
||||
{
|
||||
#if 0
|
||||
cbuffer C : register(b0) { uint value; }
|
||||
RWTexture3D<uint> T : register(u0);
|
||||
|
||||
[numthreads(4, 4, 16)]
|
||||
void main(uint3 thr : SV_DispatchThreadID)
|
||||
{
|
||||
T[thr] = 0xdeadca7;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x4c99e486, 0x7707bd40, 0xceb3b496, 0xe22f4397, 0x00000001, 0x000000b0, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x0000005c, 0x00050050, 0x00000017, 0x0100086a,
|
||||
0x0400289c, 0x0011e000, 0x00000000, 0x00004444, 0x0200005f, 0x00020072, 0x0400009b, 0x00000004,
|
||||
0x00000004, 0x00000010, 0x090000a4, 0x0011e0f2, 0x00000000, 0x00020a46, 0x00004002, 0x0deadca7,
|
||||
0x0deadca7, 0x0deadca7, 0x0deadca7, 0x0100003e,
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE cs_poison = SHADER_BYTECODE(cs_poison_dxbc);
|
||||
|
||||
static const D3D12_TEX3D_UAV slices[] =
|
||||
{
|
||||
/* Just to clear everything */
|
||||
{ 0, 0, -1u }, /* -1 means all remaining slices. */
|
||||
{ 1, 0, -1u },
|
||||
/* ... */
|
||||
|
||||
{ 0, 0, 2 },
|
||||
{ 0, 5, 3 },
|
||||
{ 0, 9, 1 },
|
||||
{ 0, 12, 4 },
|
||||
{ 0, 10, 5 },
|
||||
{ 1, 0, 2 },
|
||||
{ 1, 4, 3 },
|
||||
{ 0, 15, -1u },
|
||||
/* WSize = 0 is not allowed. Trips DEVICE_LOST. */
|
||||
};
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
memset(&rs_desc, 0, sizeof(rs_desc));
|
||||
memset(root_params, 0, sizeof(root_params));
|
||||
memset(&range, 0, sizeof(range));
|
||||
|
||||
rs_desc.NumParameters = ARRAY_SIZE(root_params);
|
||||
rs_desc.pParameters = root_params;
|
||||
|
||||
root_params[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
root_params[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_params[0].DescriptorTable.NumDescriptorRanges = 1;
|
||||
root_params[0].DescriptorTable.pDescriptorRanges = ⦥
|
||||
range.NumDescriptors = 1;
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||
|
||||
root_params[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
||||
root_params[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_params[1].Constants.Num32BitValues = 1;
|
||||
|
||||
create_root_signature(context.device, &rs_desc, &context.root_signature);
|
||||
pso_actual = create_compute_pipeline_state(context.device, context.root_signature, cs_actual);
|
||||
pso_poison = create_compute_pipeline_state(context.device, context.root_signature, cs_poison);
|
||||
|
||||
resource = create_default_texture3d(context.device, 4, 4, 16, 2, DXGI_FORMAT_R32_UINT,
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
memset(&uav, 0, sizeof(uav));
|
||||
uav.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D;
|
||||
uav.Format = DXGI_FORMAT_R32_UINT;
|
||||
|
||||
heap = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, ARRAY_SIZE(slices));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(slices); i++)
|
||||
{
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap);
|
||||
h.ptr += i * ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
uav.Texture3D = slices[i];
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav, h);
|
||||
}
|
||||
|
||||
ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &heap);
|
||||
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature);
|
||||
|
||||
h = ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(heap);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(slices); i++)
|
||||
{
|
||||
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 0, h);
|
||||
ID3D12GraphicsCommandList_SetComputeRoot32BitConstant(context.list, 1, i + 1, 0);
|
||||
/* First, attempt to flood the descriptor with writes. Validates robustness. */
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, pso_poison);
|
||||
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
|
||||
uav_barrier(context.list, resource);
|
||||
/* Now, only write in bounds. Makes sure FirstWSlice offset works. */
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, pso_actual);
|
||||
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
|
||||
uav_barrier(context.list, resource);
|
||||
|
||||
h.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
}
|
||||
|
||||
transition_resource_state(context.list, resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
get_texture_readback_with_command_list(resource, 0, &rb[0], context.queue, context.list);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(slices); i++)
|
||||
{
|
||||
unsigned int num_slices;
|
||||
|
||||
if (slices[i].MipSlice != 0)
|
||||
continue;
|
||||
|
||||
num_slices = min(16 - slices[i].FirstWSlice, slices[i].WSize);
|
||||
|
||||
for (z = 0; z < num_slices; z++)
|
||||
{
|
||||
for (y = 0; y < 4; y++)
|
||||
{
|
||||
for (x = 0; x < 4; x++)
|
||||
{
|
||||
uint32_t *ref = &reference[z + slices[i].FirstWSlice][y][x];
|
||||
*ref = i + 1;
|
||||
*ref |= 4 << 8;
|
||||
*ref |= 4 << 16;
|
||||
*ref |= num_slices << 24;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (z = 0; z < 16; z++)
|
||||
{
|
||||
for (y = 0; y < 4; y++)
|
||||
{
|
||||
for (x = 0; x < 4; x++)
|
||||
{
|
||||
uint32_t value;
|
||||
value = get_readback_uint(&rb[0], x, y, z);
|
||||
todo ok(value == reference[z][y][x], "Error for mip 0 at %u, %u, %u. Got %x, expected %x.\n", x, y, z, value, reference[z][y][x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
reset_command_list(context.list, context.allocator);
|
||||
get_texture_readback_with_command_list(resource, 1, &rb[1], context.queue, context.list);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(slices); i++)
|
||||
{
|
||||
unsigned int num_slices;
|
||||
|
||||
if (slices[i].MipSlice != 1)
|
||||
continue;
|
||||
|
||||
num_slices = min(8 - slices[i].FirstWSlice, slices[i].WSize);
|
||||
|
||||
for (z = 0; z < num_slices; z++)
|
||||
{
|
||||
for (y = 0; y < 2; y++)
|
||||
{
|
||||
for (x = 0; x < 2; x++)
|
||||
{
|
||||
uint32_t *ref = &reference[z + slices[i].FirstWSlice][y][x];
|
||||
*ref = i + 1;
|
||||
*ref |= 2 << 8;
|
||||
*ref |= 2 << 16;
|
||||
*ref |= num_slices << 24;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (z = 0; z < 8; z++)
|
||||
{
|
||||
for (y = 0; y < 2; y++)
|
||||
{
|
||||
for (x = 0; x < 2; x++)
|
||||
{
|
||||
uint32_t value;
|
||||
value = get_readback_uint(&rb[1], x, y, z);
|
||||
todo ok(value == reference[z][y][x], "Error for mip 1 at %u, %u, %u. Got %x, expected %x.\n", x, y, z, value, reference[z][y][x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(rb); i++)
|
||||
release_resource_readback(&rb[i]);
|
||||
ID3D12Resource_Release(resource);
|
||||
ID3D12PipelineState_Release(pso_actual);
|
||||
ID3D12PipelineState_Release(pso_poison);
|
||||
ID3D12DescriptorHeap_Release(heap);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_root_descriptor_offset_sign(void)
|
||||
{
|
||||
/* Exploratory test in nature. Will likely crash GPU if not on native drivers. Tweak ifdef to run it. */
|
||||
#if 1
|
||||
skip("Skipping exploratory test for root descriptor over/underflow test.\n");
|
||||
#else
|
||||
ID3D12RootSignature *root_signature;
|
||||
D3D12_ROOT_PARAMETER root_params[3];
|
||||
D3D12_ROOT_SIGNATURE_DESC rs_desc;
|
||||
ID3D12Resource *output_buffer;
|
||||
ID3D12Resource *input_buffer;
|
||||
struct resource_readback rb;
|
||||
struct test_context context;
|
||||
ID3D12PipelineState *pso;
|
||||
uint32_t values[4];
|
||||
unsigned int i;
|
||||
|
||||
static const BYTE cs_code_dxil[] =
|
||||
{
|
||||
#if 0
|
||||
RWStructuredBuffer<uint4> RW : register(u0);
|
||||
StructuredBuffer<uint> R0 : register(t0);
|
||||
ByteAddressBuffer R1 : register(t1);
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main()
|
||||
{
|
||||
uint a = R0[-1]; // Negative index
|
||||
uint b = R0[1u << 30]; // offset 4 GB. Does it overflow back to 0?
|
||||
uint c = R1.Load(-4); // Negative offset
|
||||
uint d = R1.Load(0);
|
||||
RW[0] = uint4(a, b, c, d);
|
||||
}
|
||||
#endif
|
||||
0x44, 0x58, 0x42, 0x43, 0xac, 0xbf, 0xf4, 0x1f, 0x2f, 0x84, 0x34, 0x51, 0x10, 0xd2, 0xe1, 0x21, 0x95, 0x3b, 0xc5, 0x21, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x07, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x1c, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x90, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe5, 0x8f, 0xa6, 0x7e, 0x5d, 0xa7, 0xe6, 0xd6, 0x02, 0xac, 0xbd, 0xbf, 0x6f, 0x1b, 0xee, 0xc4, 0x44, 0x58, 0x49, 0x4c,
|
||||
0xe8, 0x05, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00, 0x7a, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xd0, 0x05, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde,
|
||||
0x21, 0x0c, 0x00, 0x00, 0x71, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39,
|
||||
0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88,
|
||||
0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06,
|
||||
0x51, 0x18, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xaa, 0x0d, 0x84, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x01, 0x00, 0x00, 0x00,
|
||||
0x49, 0x18, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x45, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04,
|
||||
0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x60, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0x10, 0x1a, 0xf7,
|
||||
0x0c, 0x97, 0x3f, 0x61, 0x0f, 0x21, 0xf9, 0x21, 0xd0, 0x0c, 0x0b, 0x81, 0x02, 0x32, 0x47, 0x00, 0x06, 0x73, 0x04, 0x41, 0x31, 0x8a, 0x19, 0xc6, 0x1c, 0x42, 0x37, 0x0d, 0x97, 0x3f, 0x61, 0x0f,
|
||||
0x21, 0xf9, 0x2b, 0x21, 0xad, 0xc4, 0xe4, 0x23, 0xb7, 0x8d, 0x0a, 0x63, 0x8c, 0x31, 0xa5, 0x50, 0xa6, 0x18, 0x43, 0xab, 0x28, 0xc0, 0x14, 0x63, 0x8c, 0x31, 0x66, 0x50, 0x1b, 0x08, 0x98, 0x89,
|
||||
0x0c, 0xc6, 0x81, 0x1d, 0xc2, 0x61, 0x1e, 0xe6, 0xc1, 0x0d, 0x66, 0x81, 0x1e, 0xe4, 0xa1, 0x1e, 0xc6, 0x81, 0x1e, 0xea, 0x41, 0x1e, 0xca, 0x81, 0x1c, 0x44, 0xa1, 0x1e, 0xcc, 0xc1, 0x1c, 0xca,
|
||||
0x41, 0x1e, 0xf8, 0xa0, 0x1e, 0xdc, 0x61, 0x1e, 0xd2, 0xe1, 0x1c, 0xdc, 0xa1, 0x1c, 0xc8, 0x01, 0x0c, 0xd2, 0xc1, 0x1d, 0xe8, 0xc1, 0x0f, 0x50, 0x60, 0x08, 0x1e, 0x26, 0x4d, 0x11, 0x25, 0x4c,
|
||||
0xfe, 0x86, 0x4d, 0x84, 0x36, 0x0c, 0x11, 0x21, 0x49, 0x1b, 0x55, 0x14, 0x44, 0x84, 0x02, 0x43, 0x72, 0x18, 0x81, 0x30, 0x66, 0x92, 0x83, 0x71, 0x60, 0x87, 0x70, 0x98, 0x87, 0x79, 0x70, 0x03,
|
||||
0x59, 0xb8, 0x85, 0x59, 0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71, 0xa0, 0x87, 0x7a, 0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8, 0x07, 0x73, 0x30, 0x87, 0x72, 0x90, 0x07, 0x3e, 0xb0, 0x87, 0x72, 0x18,
|
||||
0x07, 0x7a, 0x78, 0x07, 0x79, 0xe0, 0x83, 0x7a, 0x70, 0x87, 0x79, 0x48, 0x87, 0x73, 0x70, 0x87, 0x72, 0x20, 0x07, 0x30, 0x48, 0x07, 0x77, 0xa0, 0x07, 0x36, 0x00, 0x03, 0x3a, 0xf0, 0x03, 0x30,
|
||||
0xf0, 0x03, 0x14, 0x50, 0xaa, 0x73, 0x04, 0xa0, 0x00, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50,
|
||||
0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78,
|
||||
0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07,
|
||||
0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0,
|
||||
0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x86, 0x3c, 0x08, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x16, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x34,
|
||||
0x40, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x90, 0x05, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
|
||||
0xc6, 0x04, 0x43, 0x32, 0x25, 0x30, 0x02, 0x50, 0x0c, 0x85, 0x51, 0x08, 0x65, 0x51, 0x20, 0x74, 0x46, 0x00, 0xe8, 0x16, 0x08, 0xcd, 0x19, 0x00, 0xb2, 0x33, 0x00, 0x14, 0x67, 0x00, 0x00, 0x00,
|
||||
0x79, 0x18, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
|
||||
0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x04,
|
||||
0x83, 0x98, 0x20, 0x18, 0xc5, 0x06, 0x61, 0x20, 0x26, 0x08, 0x86, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0xd1, 0x43, 0x60, 0x82, 0x60, 0x1c, 0x13, 0x04,
|
||||
0x03, 0xd9, 0x20, 0x0c, 0xcd, 0x86, 0x84, 0x50, 0x16, 0x82, 0x18, 0x18, 0xc2, 0x99, 0x20, 0x4c, 0xcd, 0x04, 0xc1, 0x48, 0x36, 0x24, 0x03, 0xb4, 0x10, 0xc3, 0x10, 0x11, 0xc0, 0x06, 0xe1, 0x91,
|
||||
0x26, 0x08, 0x96, 0x33, 0x41, 0x38, 0x96, 0x09, 0x82, 0xa1, 0x6c, 0x10, 0x06, 0x6b, 0xc3, 0x42, 0x50, 0x0b, 0x41, 0x0c, 0x4c, 0x55, 0x55, 0xd7, 0x86, 0x00, 0xdb, 0x40, 0x4c, 0x19, 0x00, 0x4c,
|
||||
0x10, 0x04, 0x80, 0x44, 0x5b, 0x58, 0x9a, 0xdb, 0x04, 0xe1, 0x62, 0x36, 0x0c, 0xc3, 0x30, 0x6c, 0x20, 0x88, 0xae, 0xf1, 0x36, 0x14, 0x1b, 0x07, 0x68, 0x5f, 0x15, 0x36, 0x36, 0xbb, 0x36, 0x97,
|
||||
0x34, 0xb2, 0x32, 0x37, 0xba, 0x29, 0x41, 0x50, 0x85, 0x0c, 0xcf, 0xc5, 0xae, 0x4c, 0x6e, 0x2e, 0xed, 0xcd, 0x6d, 0x4a, 0x40, 0x34, 0x21, 0xc3, 0x73, 0xb1, 0x0b, 0x63, 0xb3, 0x2b, 0x93, 0x9b,
|
||||
0x12, 0x18, 0x75, 0xc8, 0xf0, 0x5c, 0xe6, 0xd0, 0xc2, 0xc8, 0xca, 0xe4, 0x9a, 0xde, 0xc8, 0xca, 0xd8, 0xa6, 0x04, 0x48, 0x19, 0x32, 0x3c, 0x17, 0xb9, 0xb2, 0xb9, 0xb7, 0x3a, 0xb9, 0xb1, 0xb2,
|
||||
0xb9, 0x29, 0x41, 0x56, 0x87, 0x0c, 0xcf, 0xa5, 0xcc, 0x8d, 0x4e, 0x2e, 0x0f, 0xea, 0x2d, 0xcd, 0x8d, 0x6e, 0x6e, 0x4a, 0xf0, 0x01, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
|
||||
0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10,
|
||||
0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03,
|
||||
0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e,
|
||||
0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b,
|
||||
0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90,
|
||||
0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e,
|
||||
0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca,
|
||||
0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82,
|
||||
0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x8c, 0xc8, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x72, 0x10, 0x87, 0x73, 0x70, 0x03, 0x7b, 0x08, 0x07, 0x79, 0x60, 0x87, 0x70, 0xc8, 0x87, 0x77, 0xa8, 0x07, 0x7a,
|
||||
0x00, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x26, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc8, 0x6d, 0xdb, 0x40, 0x35,
|
||||
0x5c, 0xbe, 0xf3, 0xf8, 0x01, 0x55, 0x14, 0x44, 0xc4, 0x4e, 0x4e, 0x44, 0xf8, 0xc8, 0x6d, 0x5b, 0x80, 0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x42, 0x44, 0x00, 0x13, 0x11, 0x02, 0xcd, 0xb0, 0x10, 0x06,
|
||||
0x40, 0x30, 0x00, 0xd2, 0x00, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x34, 0x46, 0x00, 0x4a,
|
||||
0xa0, 0x3c, 0xc8, 0x14, 0x62, 0x40, 0xc9, 0xcd, 0x00, 0xd4, 0x40, 0x01, 0x02, 0x02, 0x02, 0x22, 0x54, 0x42, 0x29, 0x06, 0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x80, 0x64,
|
||||
0x46, 0x52, 0x55, 0xcf, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x20, 0xda, 0xb1, 0x5c, 0x17, 0x34, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0xc8, 0x86, 0x30, 0xd7, 0x15, 0x8d, 0x18, 0x20, 0x00, 0x08,
|
||||
0x82, 0xc1, 0xb2, 0x29, 0xc1, 0x81, 0x8d, 0x26, 0x04, 0xc0, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x1d, 0x33, 0x20, 0xda, 0x68, 0x42, 0x00, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0xc1, 0xf2,
|
||||
0x39, 0x46, 0xc2, 0x8c, 0x26, 0x04, 0xc0, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x2c, 0x61, 0x00, 0x21, 0x9e, 0x33, 0x9a, 0x10, 0x00, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0xd0, 0x88, 0x01, 0xb3,
|
||||
0x80, 0x01, 0x18, 0x1c, 0xc5, 0x10, 0x4c, 0x08, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE cs_code = SHADER_BYTECODE(cs_code_dxil);
|
||||
|
||||
static const uint32_t test_data[] = { 0, 1, 2, 3, 4, 5, 6, 7 };
|
||||
|
||||
memset(&rs_desc, 0, sizeof(rs_desc));
|
||||
rs_desc.NumParameters = ARRAY_SIZE(root_params);
|
||||
rs_desc.pParameters = root_params;
|
||||
memset(root_params, 0, sizeof(root_params));
|
||||
root_params[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_params[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
|
||||
root_params[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_params[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
|
||||
root_params[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_params[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
|
||||
root_params[2].Descriptor.ShaderRegister = 1;
|
||||
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
if (!context_supports_dxil(&context))
|
||||
{
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
input_buffer = create_upload_buffer(context.device, sizeof(test_data), test_data);
|
||||
output_buffer = create_default_buffer(context.device, 16,
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
create_root_signature(context.device, &rs_desc, &root_signature);
|
||||
pso = create_compute_pipeline_state(context.device, root_signature, cs_code);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, pso);
|
||||
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, root_signature);
|
||||
ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 0,
|
||||
ID3D12Resource_GetGPUVirtualAddress(output_buffer));
|
||||
ID3D12GraphicsCommandList_SetComputeRootShaderResourceView(context.list, 1,
|
||||
ID3D12Resource_GetGPUVirtualAddress(input_buffer) + 16);
|
||||
ID3D12GraphicsCommandList_SetComputeRootShaderResourceView(context.list, 2,
|
||||
ID3D12Resource_GetGPUVirtualAddress(input_buffer) + 16);
|
||||
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
|
||||
transition_resource_state(context.list, output_buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
get_buffer_readback_with_command_list(output_buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
values[i] = get_readback_uint(&rb, i, 0, 0);
|
||||
|
||||
skip("Got structured access [-1] = #%x\n", values[0]);
|
||||
skip("Got structured access [1u << 30] = #%x\n", values[1]);
|
||||
skip("Got byte address [-4] = #%x\n", values[2]);
|
||||
skip("Got byte address [0] = #%x\n", values[3]);
|
||||
|
||||
/* Observed on AMD:
|
||||
test_root_descriptor_offset_sign:5262: Test skipped: Got structured access [-1] = #4b416743 <-- Garbage. Likely we accessed garbage memory way out at (4 * UINT_MAX) & UINT_MAX offset.
|
||||
test_root_descriptor_offset_sign:5263: Test skipped: Got structured access [1u << 30] = #4 <-- Suggests 32-bit uint offset.
|
||||
test_root_descriptor_offset_sign:5264: Test skipped: Got byte address [-4] = #0 <-- Suggests we hit robustness for driver generated descriptor.
|
||||
test_root_descriptor_offset_sign:5265: Test skipped: Got byte address [0] = #4
|
||||
*/
|
||||
|
||||
/* Observed on NV: Blue screen of death (?!?!). */
|
||||
|
||||
/* Observed on Intel: All 0. Likely faulted and terminated the dispatch before we could write results. */
|
||||
|
||||
ID3D12RootSignature_Release(root_signature);
|
||||
ID3D12PipelineState_Release(pso);
|
||||
ID3D12Resource_Release(input_buffer);
|
||||
ID3D12Resource_Release(output_buffer);
|
||||
release_resource_readback(&rb);
|
||||
destroy_test_context(&context);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void test_uav_counters_null_behavior(bool use_dxil)
|
||||
{
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
||||
D3D12_ROOT_SIGNATURE_DESC rs_desc;
|
||||
ID3D12DescriptorHeap *cpu_heap;
|
||||
D3D12_ROOT_PARAMETER rs_param;
|
||||
D3D12_DESCRIPTOR_RANGE range;
|
||||
struct test_context context;
|
||||
struct resource_readback rb;
|
||||
ID3D12DescriptorHeap *heap;
|
||||
ID3D12Resource *resource;
|
||||
unsigned int i;
|
||||
|
||||
#if 0
|
||||
RWStructuredBuffer<uint> RWBuf[4] : register(u0);
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main(int wg : SV_GroupID)
|
||||
{
|
||||
RWBuf[wg >> 2][wg & 3] = RWBuf[wg >> 2].IncrementCounter() + 64;
|
||||
}
|
||||
#endif
|
||||
static const DWORD cs_code_dxbc[] =
|
||||
{
|
||||
0x43425844, 0xb5433247, 0x4cd30f6c, 0x58100e67, 0xc179ade1, 0x00000001, 0x00000134, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e0, 0x00050051, 0x00000038, 0x0100086a,
|
||||
0x0700009e, 0x0031ee46, 0x00000000, 0x00000000, 0x00000003, 0x00000004, 0x00000000, 0x0200005f,
|
||||
0x00021012, 0x02000068, 0x00000002, 0x0400009b, 0x00000001, 0x00000001, 0x00000001, 0x0600002a,
|
||||
0x00100012, 0x00000000, 0x0002100a, 0x00004001, 0x00000002, 0x06000001, 0x00100022, 0x00000000,
|
||||
0x0002100a, 0x00004001, 0x00000003, 0x070000b2, 0x00100012, 0x00000001, 0x0421e000, 0x00000000,
|
||||
0x0010000a, 0x00000000, 0x0700001e, 0x00100042, 0x00000000, 0x0010000a, 0x00000001, 0x00004001,
|
||||
0x00000040, 0x0b0000a8, 0x0421e012, 0x00000000, 0x0010000a, 0x00000000, 0x0010001a, 0x00000000,
|
||||
0x00004001, 0x00000000, 0x0010002a, 0x00000000, 0x0100003e,
|
||||
};
|
||||
|
||||
static const BYTE cs_code_dxil[] =
|
||||
{
|
||||
0x44, 0x58, 0x42, 0x43, 0xc6, 0xfe, 0xe1, 0x77, 0xd8, 0x5c, 0x56, 0xc7, 0x6e, 0xf7, 0xe2, 0xf7, 0xb3, 0xb0, 0x40, 0xe0, 0x01, 0x00, 0x00, 0x00, 0x34, 0x06, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0xd0, 0x00, 0x00, 0x00, 0xec, 0x00, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x60, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0xc1, 0xf5, 0xe2,
|
||||
0x29, 0x0a, 0x7c, 0x68, 0x4a, 0xfa, 0x15, 0xe9, 0x1a, 0x85, 0x63, 0x21, 0x44, 0x58, 0x49, 0x4c, 0x40, 0x05, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00, 0x50, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c,
|
||||
0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x28, 0x05, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x47, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02,
|
||||
0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90,
|
||||
0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07,
|
||||
0x40, 0x02, 0xa8, 0x0d, 0x84, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x6d, 0x30, 0x86, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x09, 0xa8, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84,
|
||||
0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x54, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x73,
|
||||
0x04, 0x08, 0x99, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x10, 0x68, 0x86, 0x85, 0x40, 0xc1, 0x29, 0xc4, 0x18, 0xc8, 0x50, 0x9a, 0x23, 0x08, 0x8a, 0x81, 0x86, 0x19, 0x63, 0x11, 0x2b,
|
||||
0x0a, 0x18, 0x68, 0x8c, 0x31, 0xc6, 0x30, 0xe4, 0x06, 0x02, 0x66, 0x32, 0x83, 0x71, 0x60, 0x87, 0x70, 0x98, 0x87, 0x79, 0x70, 0x03, 0x59, 0xb8, 0x85, 0x59, 0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71,
|
||||
0xa0, 0x87, 0x7a, 0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8, 0x07, 0x73, 0x30, 0x87, 0x72, 0x90, 0x07, 0x3e, 0xa8, 0x07, 0x77, 0x98, 0x87, 0x74, 0x38, 0x07, 0x77, 0x28, 0x07, 0x72, 0x00, 0x83,
|
||||
0x74, 0x70, 0x07, 0x7a, 0xf0, 0x03, 0x14, 0x8c, 0x24, 0x88, 0x24, 0xe7, 0x08, 0x40, 0x01, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0,
|
||||
0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07,
|
||||
0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90,
|
||||
0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6,
|
||||
0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x04, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x12, 0x20, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x18, 0xf2, 0x30, 0x40, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0x71, 0x80, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x0b, 0x04, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x10, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x25, 0x30, 0x02, 0x50, 0x0c, 0x85, 0x50, 0x18, 0xb4, 0x46, 0x00, 0x6a,
|
||||
0x80, 0x68, 0x81, 0xd0, 0x9c, 0x01, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1,
|
||||
0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
|
||||
0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xa1, 0x98, 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x20, 0x26, 0x08, 0xc3, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08,
|
||||
0x53, 0x43, 0x60, 0x82, 0x30, 0x20, 0x13, 0x84, 0x21, 0x99, 0x20, 0x2c, 0xca, 0x04, 0x61, 0x59, 0x36, 0x08, 0x03, 0xb3, 0x61, 0x21, 0x94, 0x85, 0x20, 0x98, 0xc6, 0x79, 0x1c, 0x68, 0x43, 0x10,
|
||||
0x6d, 0x20, 0x00, 0x09, 0x00, 0x26, 0x08, 0x02, 0x40, 0xa2, 0x2d, 0x2c, 0xcd, 0x6d, 0x82, 0x40, 0x31, 0x1b, 0x86, 0x61, 0x18, 0x36, 0x10, 0x84, 0xc5, 0x5c, 0x1b, 0x0a, 0xaa, 0x02, 0x26, 0xac,
|
||||
0x0a, 0x1b, 0x9b, 0x5d, 0x9b, 0x4b, 0x1a, 0x59, 0x99, 0x1b, 0xdd, 0x94, 0x20, 0xa8, 0x42, 0x86, 0xe7, 0x62, 0x57, 0x26, 0x37, 0x97, 0xf6, 0xe6, 0x36, 0x25, 0x20, 0x9a, 0x90, 0xe1, 0xb9, 0xd8,
|
||||
0x85, 0xb1, 0xd9, 0x95, 0xc9, 0x4d, 0x09, 0x8c, 0x3a, 0x64, 0x78, 0x2e, 0x73, 0x68, 0x61, 0x64, 0x65, 0x72, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x53, 0x02, 0xa4, 0x0c, 0x19, 0x9e, 0x8b, 0x5c, 0xd9,
|
||||
0xdc, 0x5b, 0x9d, 0xdc, 0x58, 0xd9, 0xdc, 0x94, 0x40, 0xaa, 0x43, 0x86, 0xe7, 0x52, 0xe6, 0x46, 0x27, 0x97, 0x07, 0xf5, 0x96, 0xe6, 0x46, 0x37, 0x37, 0x25, 0xc0, 0x00, 0x79, 0x18, 0x00, 0x00,
|
||||
0x4c, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6,
|
||||
0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8,
|
||||
0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11,
|
||||
0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89,
|
||||
0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37,
|
||||
0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81,
|
||||
0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c,
|
||||
0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc,
|
||||
0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20,
|
||||
0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x46, 0x50, 0x0d, 0x97,
|
||||
0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb, 0x26, 0x90, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0xb9, 0xcf, 0x00, 0x4c, 0x04, 0xe7, 0x50,
|
||||
0xcd, 0x44, 0x44, 0x36, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0x84, 0x05, 0x44, 0xc3, 0xe5, 0x3b, 0x8f, 0x6f, 0x44, 0x0e, 0xf5, 0x88, 0x83, 0x8f,
|
||||
0xdc, 0xb6, 0x01, 0x10, 0x0c, 0x80, 0x34, 0x00, 0x61, 0x20, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x34, 0x66, 0x00, 0x8a,
|
||||
0x37, 0xa0, 0x08, 0x4a, 0xae, 0x18, 0x03, 0x0a, 0x30, 0xa0, 0x0c, 0x4a, 0x31, 0x80, 0x4c, 0x09, 0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0x58, 0x48, 0x54, 0x01, 0x92,
|
||||
0x15, 0x4c, 0x30, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0xcc, 0x95, 0x10, 0x54, 0x00, 0x8d, 0x18, 0x1c, 0x00, 0x08, 0x82, 0x41, 0x92, 0x25, 0x41, 0x51, 0x41, 0x02, 0x65, 0x24, 0x3a, 0x62, 0xd0,
|
||||
0x00, 0x20, 0x08, 0x06, 0x8e, 0x96, 0x10, 0x01, 0x26, 0x40, 0x10, 0x84, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
static const D3D12_SHADER_BYTECODE cs_dxbc = SHADER_BYTECODE(cs_code_dxbc);
|
||||
static const D3D12_SHADER_BYTECODE cs_dxil = SHADER_BYTECODE(cs_code_dxil);
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
if (use_dxil && !context_supports_dxil(&context))
|
||||
{
|
||||
skip("Context does not support DXIL.\n");
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&rs_desc, 0, sizeof(rs_desc));
|
||||
memset(&rs_param, 0, sizeof(rs_param));
|
||||
memset(&range, 0, sizeof(range));
|
||||
rs_desc.NumParameters = 1;
|
||||
rs_desc.pParameters = &rs_param;
|
||||
rs_param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
rs_param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rs_param.DescriptorTable.NumDescriptorRanges = 1;
|
||||
rs_param.DescriptorTable.pDescriptorRanges = ⦥
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||
range.NumDescriptors = 8;
|
||||
create_root_signature(context.device, &rs_desc, &context.root_signature);
|
||||
context.pipeline_state = create_compute_pipeline_state(context.device, context.root_signature, use_dxil ? cs_dxil : cs_dxbc);
|
||||
|
||||
cpu_heap = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 8);
|
||||
heap = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 8);
|
||||
resource = create_default_buffer(context.device, D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT * 9,
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
memset(&uav_desc, 0, sizeof(uav_desc));
|
||||
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE cpu_h, gpu_h;
|
||||
cpu_h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu_heap);
|
||||
gpu_h = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap);
|
||||
cpu_h.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) * i;
|
||||
gpu_h.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) * i;
|
||||
|
||||
uav_desc.Buffer.NumElements = 4;
|
||||
uav_desc.Buffer.FirstElement = 4 * i;
|
||||
uav_desc.Buffer.StructureByteStride = 4;
|
||||
uav_desc.Buffer.CounterOffsetInBytes = D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT * (i + 1);
|
||||
|
||||
/* AMD drivers don't seem to clear the UAV counter if we pass in NULL, so
|
||||
* test a path which does not do that. */
|
||||
if (i < 4)
|
||||
{
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, resource, resource, &uav_desc, cpu_h);
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, resource, resource, &uav_desc, gpu_h);
|
||||
}
|
||||
|
||||
uav_desc.Buffer.CounterOffsetInBytes = 0;
|
||||
|
||||
/* Test writing NULL UAV counter after a non-NULL UAV counter. Makes sure that we are indeed supposed
|
||||
* to clear out UAV counters to NULL every time. */
|
||||
if ((i & 3) == 3)
|
||||
{
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, NULL, NULL, &uav_desc, cpu_h);
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, NULL, NULL, &uav_desc, gpu_h);
|
||||
}
|
||||
else if ((i & 3) >= 1)
|
||||
{
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav_desc, cpu_h);
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav_desc, gpu_h);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Test copy behavior. Make sure we correctly copy NULL counters as well. */
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, resource, NULL, &uav_desc, cpu_h);
|
||||
ID3D12Device_CopyDescriptorsSimple(context.device, 1,
|
||||
gpu_h, cpu_h, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
}
|
||||
}
|
||||
|
||||
ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &heap);
|
||||
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, context.pipeline_state);
|
||||
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 0,
|
||||
ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(heap));
|
||||
ID3D12GraphicsCommandList_Dispatch(context.list, 8 * 4, 1, 1);
|
||||
transition_resource_state(context.list, resource, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
get_buffer_readback_with_command_list(resource, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list);
|
||||
|
||||
for (i = 0; i < 8 * 4; i++)
|
||||
{
|
||||
/* Possible behavior is very varied here:
|
||||
* NV: If UAV counter is NULL, NV makes the main descriptor robust.
|
||||
* AMD: Writing NULL uav counter does not update the counter descriptor, the atomic update will still go through.
|
||||
* Intel: Behaves as you would expect. Atomic op returns 0, writes to main descriptor behaves as you'd expect. */
|
||||
uint32_t value = get_readback_uint(&rb, i, 0, 0);
|
||||
ok(value == 0 || (value >= 64 && value < (64 + 4)), "Unexpected value %u = %u\n", i, value);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
uint32_t value = get_readback_uint(&rb, (i + 1) * (D3D12_UAV_COUNTER_PLACEMENT_ALIGNMENT / 4), 0, 0);
|
||||
if (i < 4)
|
||||
{
|
||||
/* AMD behavior: Passing NULL does not necessarily clear out UAV counter.
|
||||
* It is undefined to access UAV counter like this.
|
||||
* https://docs.microsoft.com/en-us/windows/win32/direct3d12/uav-counters
|
||||
* "If a shader attempts to access the counter of a UAV that does not have an associated counter,
|
||||
* then the debug layer will issue a warning,
|
||||
* and a GPU page fault will occur causing the apps’s device to be removed." */
|
||||
ok(value == 0 || value == 4, "Unexpected counter %u = %u.\n", i, value);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Technically undefined, but all drivers behave robustly here, we should too. */
|
||||
ok(value == 0, "Unexpected counter %u = %u.\n", i, value);
|
||||
}
|
||||
}
|
||||
|
||||
release_resource_readback(&rb);
|
||||
ID3D12DescriptorHeap_Release(heap);
|
||||
ID3D12DescriptorHeap_Release(cpu_heap);
|
||||
ID3D12Resource_Release(resource);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_uav_counter_null_behavior_dxbc(void)
|
||||
{
|
||||
test_uav_counters_null_behavior(false);
|
||||
}
|
||||
|
||||
void test_uav_counter_null_behavior_dxil(void)
|
||||
{
|
||||
test_uav_counters_null_behavior(true);
|
||||
}
|
||||
|
|
|
@ -1074,10 +1074,10 @@ void test_reset_command_allocator(void)
|
|||
command_allocator, NULL, &IID_ID3D12GraphicsCommandList, (void **)&command_list2);
|
||||
ok(hr == S_OK, "Failed to create command list, hr %#x.\n", hr);
|
||||
|
||||
ID3D12CommandAllocator_Release(command_allocator);
|
||||
ID3D12CommandAllocator_Release(command_allocator2);
|
||||
ID3D12GraphicsCommandList_Release(command_list);
|
||||
ID3D12GraphicsCommandList_Release(command_list2);
|
||||
ID3D12CommandAllocator_Release(command_allocator);
|
||||
ID3D12CommandAllocator_Release(command_allocator2);
|
||||
}
|
||||
|
||||
refcount = ID3D12Device_Release(device);
|
||||
|
|
|
@ -1633,10 +1633,35 @@ static void test_dual_source_blending(bool use_dxil)
|
|||
0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x49, 0x19, 0x48, 0x60, 0xf0, 0x3d, 0xc2, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x24, 0x65, 0x20, 0x81, 0xc1, 0xd7, 0x04, 0x18, 0x0e, 0x04, 0x00, 0x05, 0x00,
|
||||
0x00, 0x00, 0xc5, 0x01, 0x91, 0x8e, 0xec, 0xb7, 0x38, 0xcc, 0x9e, 0x7f, 0xc7, 0xe2, 0xba, 0xd9, 0x5c, 0x96, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
static const DWORD ps_code_3rt_dxbc[] =
|
||||
{
|
||||
#if 0
|
||||
float4 c0;
|
||||
float4 c1;
|
||||
|
||||
void main(out float4 o0 : SV_Target0, out float4 o1 : SV_Target1, out float4 o2 : SV_Target2)
|
||||
{
|
||||
o0 = c0;
|
||||
o1 = c1;
|
||||
o2 = 1.0.xxxx;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0xe1e2c26b, 0x10d9607c, 0x4a0f0786, 0xc368f603, 0x00000001, 0x0000013c, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x000000a0, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x0000005c, 0x00000003, 0x00000008, 0x00000050, 0x00000000, 0x00000000, 0x00000003, 0x00000000,
|
||||
0x0000000f, 0x00000050, 0x00000001, 0x00000000, 0x00000003, 0x00000001, 0x0000000f, 0x00000050,
|
||||
0x00000002, 0x00000000, 0x00000003, 0x00000002, 0x0000000f, 0x545f5653, 0x65677261, 0xabab0074,
|
||||
0x58454853, 0x00000094, 0x00000050, 0x00000025, 0x0100086a, 0x04000059, 0x00208e46, 0x00000000,
|
||||
0x00000002, 0x03000065, 0x001020f2, 0x00000000, 0x03000065, 0x001020f2, 0x00000001, 0x03000065,
|
||||
0x001020f2, 0x00000002, 0x06000036, 0x001020f2, 0x00000000, 0x00208e46, 0x00000000, 0x00000000,
|
||||
0x06000036, 0x001020f2, 0x00000001, 0x00208e46, 0x00000000, 0x00000001, 0x08000036, 0x001020f2,
|
||||
0x00000002, 0x00004002, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x0100003e,
|
||||
};
|
||||
const D3D12_SHADER_BYTECODE ps = {
|
||||
use_dxil ? (const void*)ps_code_dxil : (const void*)ps_code_dxbc,
|
||||
use_dxil ? sizeof(ps_code_dxil) : sizeof(ps_code_dxbc)
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE ps_3rt = SHADER_BYTECODE(ps_code_3rt_dxbc);
|
||||
static const struct
|
||||
{
|
||||
struct
|
||||
|
@ -1681,6 +1706,7 @@ static void test_dual_source_blending(bool use_dxil)
|
|||
init_pipeline_state_desc(&pso_desc, context.root_signature,
|
||||
context.render_target_desc.Format, NULL, &ps, NULL);
|
||||
}
|
||||
|
||||
pso_desc.BlendState.RenderTarget[0].BlendEnable = true;
|
||||
pso_desc.BlendState.RenderTarget[0].SrcBlend = D3D12_BLEND_SRC_COLOR;
|
||||
pso_desc.BlendState.RenderTarget[0].DestBlend = D3D12_BLEND_SRC1_COLOR;
|
||||
|
@ -1688,6 +1714,54 @@ static void test_dual_source_blending(bool use_dxil)
|
|||
pso_desc.BlendState.RenderTarget[0].SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA;
|
||||
pso_desc.BlendState.RenderTarget[0].DestBlendAlpha = D3D12_BLEND_SRC1_ALPHA;
|
||||
pso_desc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
||||
|
||||
pso_desc.NumRenderTargets = 2;
|
||||
pso_desc.RTVFormats[1] = pso_desc.RTVFormats[0];
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
ok(hr == E_INVALIDARG, "Unexpected result, hr %#x.\n", hr);
|
||||
|
||||
/* Write mask of 0 is not enough. */
|
||||
pso_desc.BlendState.IndependentBlendEnable = TRUE;
|
||||
pso_desc.BlendState.RenderTarget[1].RenderTargetWriteMask = 0;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void**)&context.pipeline_state);
|
||||
ok(hr == E_INVALIDARG, "Unexpected result, hr %#x.\n", hr);
|
||||
|
||||
/* This appears to be allowed however. */
|
||||
pso_desc.RTVFormats[1] = DXGI_FORMAT_UNKNOWN;
|
||||
pso_desc.BlendState.IndependentBlendEnable = FALSE;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void**)&context.pipeline_state);
|
||||
ok(hr == S_OK, "Failed to create pipeline, hr %#x.\n", hr);
|
||||
ID3D12PipelineState_Release(context.pipeline_state);
|
||||
|
||||
/* >2 RTs is also allowed as long as we keep using NULL format. */
|
||||
pso_desc.NumRenderTargets = 3;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void**)&context.pipeline_state);
|
||||
ok(hr == S_OK, "Failed to create pipeline, hr %#x.\n", hr);
|
||||
ID3D12PipelineState_Release(context.pipeline_state);
|
||||
|
||||
/* This is still allowed. We need to only consider RTs with IOSIG entry apparently ... */
|
||||
pso_desc.RTVFormats[2] = pso_desc.RTVFormats[0];
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void**)&context.pipeline_state);
|
||||
ok(hr == S_OK, "Failed to create pipeline, hr %#x.\n", hr);
|
||||
ID3D12PipelineState_Release(context.pipeline_state);
|
||||
|
||||
if (!use_dxil)
|
||||
{
|
||||
/* If we try to write to o2 however, this must fail. */
|
||||
pso_desc.PS = ps_3rt;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void**)&context.pipeline_state);
|
||||
ok(hr == E_INVALIDARG, "Unexpected result, hr %#x.\n", hr);
|
||||
pso_desc.PS = ps;
|
||||
}
|
||||
|
||||
pso_desc.NumRenderTargets = 1;
|
||||
pso_desc.RTVFormats[2] = DXGI_FORMAT_UNKNOWN;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
ok(hr == S_OK, "Failed to create pipeline, hr %#x.\n", hr);
|
||||
|
@ -1727,7 +1801,7 @@ static void test_dual_source_blending(bool use_dxil)
|
|||
pso_desc.BlendState.RenderTarget[1] = pso_desc.BlendState.RenderTarget[0];
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
todo ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
if (SUCCEEDED(hr))
|
||||
ID3D12PipelineState_Release(context.pipeline_state);
|
||||
context.pipeline_state = NULL;
|
||||
|
@ -1736,7 +1810,7 @@ static void test_dual_source_blending(bool use_dxil)
|
|||
pso_desc.BlendState.RenderTarget[1].DestBlend = D3D12_BLEND_SRC_COLOR;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
todo ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
if (SUCCEEDED(hr))
|
||||
ID3D12PipelineState_Release(context.pipeline_state);
|
||||
context.pipeline_state = NULL;
|
||||
|
@ -1753,7 +1827,7 @@ static void test_dual_source_blending(bool use_dxil)
|
|||
pso_desc.BlendState.RenderTarget[0].BlendOpAlpha = D3D12_BLEND_OP_ADD;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(context.device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
todo ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
ok(hr == E_INVALIDARG, "Got unexpected hr %#x.\n", hr);
|
||||
if (SUCCEEDED(hr))
|
||||
ID3D12PipelineState_Release(context.pipeline_state);
|
||||
context.pipeline_state = NULL;
|
||||
|
@ -2227,3 +2301,119 @@ void test_mismatching_pso_stages(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_pipeline_no_ps_nonzero_rts(void)
|
||||
{
|
||||
const FLOAT white[] = { 100.0f, 100.0f, 100.0f, 100.0f };
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE rtv_handle;
|
||||
D3D12_ROOT_SIGNATURE_DESC rs_desc;
|
||||
struct depth_stencil_resource ds;
|
||||
D3D12_INPUT_LAYOUT_DESC layout;
|
||||
D3D12_INPUT_ELEMENT_DESC elem;
|
||||
struct test_context_desc desc;
|
||||
D3D12_VERTEX_BUFFER_VIEW vbv;
|
||||
struct test_context context;
|
||||
ID3D12DescriptorHeap *rtv;
|
||||
ID3D12Resource *vbo;
|
||||
ID3D12Resource *rt;
|
||||
D3D12_VIEWPORT vp;
|
||||
D3D12_RECT sci;
|
||||
|
||||
static const FLOAT vbo_data[] =
|
||||
{
|
||||
-1.0f, -1.0f, 0.5f, 1.0f,
|
||||
+3.0f, -1.0f, 0.5f, 1.0f,
|
||||
-1.0f, +3.0f, 0.5f, 1.0f,
|
||||
};
|
||||
|
||||
static const DWORD vs_code[] =
|
||||
{
|
||||
#if 0
|
||||
float4 main(float4 a : A) : SV_Position
|
||||
{
|
||||
return a;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0xecd820c8, 0x89ee4b40, 0xb73efa73, 0x4ed91573, 0x00000001, 0x000000d4, 0x00000003,
|
||||
0x0000002c, 0x00000058, 0x0000008c, 0x4e475349, 0x00000024, 0x00000001, 0x00000008, 0x00000020,
|
||||
0x00000000, 0x00000000, 0x00000003, 0x00000000, 0x00000f0f, 0xabab0041, 0x4e47534f, 0x0000002c,
|
||||
0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003, 0x00000000, 0x0000000f,
|
||||
0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x00000040, 0x00010050, 0x00000010, 0x0100086a,
|
||||
0x0300005f, 0x001010f2, 0x00000000, 0x04000067, 0x001020f2, 0x00000000, 0x00000001, 0x05000036,
|
||||
0x001020f2, 0x00000000, 0x00101e46, 0x00000000, 0x0100003e,
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE vs = SHADER_BYTECODE(vs_code);
|
||||
|
||||
layout.NumElements = 1;
|
||||
layout.pInputElementDescs = &elem;
|
||||
memset(&elem, 0, sizeof(elem));
|
||||
elem.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
|
||||
elem.InputSlotClass = D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
|
||||
elem.SemanticName = "A";
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.no_pipeline = true;
|
||||
desc.no_root_signature = true;
|
||||
desc.no_render_target = true;
|
||||
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
|
||||
init_depth_stencil(&ds, context.device, 1, 1, 1, 1, DXGI_FORMAT_D32_FLOAT, DXGI_FORMAT_D32_FLOAT, NULL);
|
||||
rt = create_default_texture2d(context.device, 1, 1, 1, 1, DXGI_FORMAT_R32_FLOAT,
|
||||
D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET,
|
||||
D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||
|
||||
rtv = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_RTV, 1);
|
||||
|
||||
memset(&rs_desc, 0, sizeof(rs_desc));
|
||||
rs_desc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
|
||||
create_root_signature(context.device, &rs_desc, &context.root_signature);
|
||||
|
||||
init_pipeline_state_desc(&pso, context.root_signature, DXGI_FORMAT_R8G8B8A8_UNORM, &vs, NULL, &layout);
|
||||
pso.DSVFormat = DXGI_FORMAT_D32_FLOAT;
|
||||
pso.DepthStencilState.DepthEnable = TRUE;
|
||||
pso.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
|
||||
pso.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS;
|
||||
pso.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
|
||||
pso.PS.BytecodeLength = 0;
|
||||
pso.PS.pShaderBytecode = NULL;
|
||||
|
||||
rtv_handle = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(rtv);
|
||||
|
||||
ID3D12Device_CreateGraphicsPipelineState(context.device, &pso, &IID_ID3D12PipelineState, (void**)&context.pipeline_state);
|
||||
ID3D12Device_CreateRenderTargetView(context.device, rt, NULL, rtv_handle);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(context.list, rtv_handle, white, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearDepthStencilView(context.list, ds.dsv_handle, D3D12_CLEAR_FLAG_DEPTH, 1.0f, 0, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(context.list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, context.pipeline_state);
|
||||
set_viewport(&vp, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f);
|
||||
ID3D12GraphicsCommandList_RSSetViewports(context.list, 1, &vp);
|
||||
set_rect(&sci, 0, 0, 1, 1);
|
||||
ID3D12GraphicsCommandList_RSSetScissorRects(context.list, 1, &sci);
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(context.list, 1, &rtv_handle, TRUE, &ds.dsv_handle);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(context.list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
|
||||
vbo = create_upload_buffer(context.device, sizeof(vbo_data), vbo_data);
|
||||
vbv.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(vbo);
|
||||
vbv.SizeInBytes = sizeof(vbo_data);
|
||||
vbv.StrideInBytes = 16;
|
||||
ID3D12GraphicsCommandList_IASetVertexBuffers(context.list, 0, 1, &vbv);
|
||||
ID3D12GraphicsCommandList_DrawInstanced(context.list, 3, 1, 0, 0);
|
||||
|
||||
transition_resource_state(context.list, rt, D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
transition_resource_state(context.list, ds.texture, D3D12_RESOURCE_STATE_DEPTH_WRITE, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
/* Verify depth buffer was written to. */
|
||||
check_sub_resource_float(ds.texture, 0, context.queue, context.list, 0.5f, 0);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
/* Verify that the invalid R32_FLOAT RTV was just ignored. */
|
||||
check_sub_resource_float(rt, 0, context.queue, context.list, 100.0f, 0);
|
||||
|
||||
ID3D12Resource_Release(rt);
|
||||
ID3D12Resource_Release(vbo);
|
||||
ID3D12DescriptorHeap_Release(rtv);
|
||||
destroy_depth_stencil(&ds);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
|
|
@ -125,11 +125,15 @@ void test_pipeline_library(void)
|
|||
ID3D12PipelineLibrary *pipeline_library;
|
||||
ID3D12RootSignature *root_signature;
|
||||
struct test_context context;
|
||||
ID3D12PipelineState *state3;
|
||||
ID3D12PipelineState *state2;
|
||||
ID3D12PipelineState *state;
|
||||
ULONG reference_refcount;
|
||||
size_t serialized_size;
|
||||
ID3D12Device1 *device1;
|
||||
void *serialized_data;
|
||||
ID3D12Device *device;
|
||||
ID3D12Fence *fence;
|
||||
HRESULT hr;
|
||||
|
||||
#if 0
|
||||
|
@ -242,11 +246,20 @@ void test_pipeline_library(void)
|
|||
ok(hr == S_OK, "Failed to create graphics pipeline, hr %#x.\n", hr);
|
||||
|
||||
hr = ID3D12PipelineLibrary_StorePipeline(pipeline_library, graphics_name, state);
|
||||
ok(hr == S_OK, "Failed to store compute pipeline, hr %x.\n", hr);
|
||||
ok(hr == S_OK, "Failed to store graphics pipeline, hr %x.\n", hr);
|
||||
|
||||
/* Try to load PSO after a Store. Verify that we have a ref-count. */
|
||||
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library, graphics_name, &graphics_desc,
|
||||
&IID_ID3D12PipelineState, (void**)&state2);
|
||||
ok(hr == S_OK, "Failed to load graphics pipeline, hr %x.\n", hr);
|
||||
ok(state == state2, "Resulting PSOs must point to same object.\n");
|
||||
ok(get_refcount(state2) == 2, "Refcount %u != 2.\n", get_refcount(state2));
|
||||
|
||||
hr = ID3D12PipelineLibrary_StorePipeline(pipeline_library, compute_name, state);
|
||||
ok(hr == E_INVALIDARG, "Storing pipeline with already existing name succeeded, hr %x.\n", hr);
|
||||
|
||||
ID3D12PipelineState_Release(state);
|
||||
ID3D12PipelineState_Release(state2);
|
||||
|
||||
/* Test looking up pipelines in a new pipeline library */
|
||||
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
|
||||
|
@ -283,15 +296,66 @@ void test_pipeline_library(void)
|
|||
serialized_size, &IID_ID3D12PipelineLibrary, (void**)&pipeline_library);
|
||||
ok(hr == S_OK, "Failed to create pipeline library, hr %#x.\n");
|
||||
|
||||
/* Verify that PSO library must internally ref-count a unique PSO. */
|
||||
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
|
||||
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state);
|
||||
ok(hr == S_OK, "Failed to load graphics pipeline from pipeline library, hr %#x.\n", hr);
|
||||
ID3D12PipelineState_Release(state);
|
||||
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
|
||||
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state2);
|
||||
ok(hr == S_OK, "Failed to load graphics pipeline from pipeline library, hr %#x.\n", hr);
|
||||
hr = ID3D12PipelineLibrary_LoadGraphicsPipeline(pipeline_library,
|
||||
graphics_name, &graphics_desc, &IID_ID3D12PipelineState, (void**)&state3);
|
||||
ok(hr == S_OK, "Failed to load graphics pipeline from pipeline library, hr %#x.\n", hr);
|
||||
|
||||
ok(state == state2 && state == state3, "Resulting PSOs must point to same object.\n");
|
||||
ok(get_refcount(state) == 3, "Refcount %u != 3.\n", get_refcount(state));
|
||||
ok(get_refcount(state2) == 3, "Refcount %u != 3.\n", get_refcount(state2));
|
||||
ok(get_refcount(state3) == 3, "Refcount %u != 3.\n", get_refcount(state3));
|
||||
ID3D12PipelineState_Release(state);
|
||||
ID3D12PipelineState_Release(state2);
|
||||
ID3D12PipelineState_Release(state3);
|
||||
|
||||
reference_refcount = get_refcount(context.device);
|
||||
|
||||
/* Verify that PSO library must internally ref-count a unique PSO. */
|
||||
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
|
||||
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state);
|
||||
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
|
||||
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
|
||||
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state2);
|
||||
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
|
||||
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
|
||||
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state3);
|
||||
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
|
||||
|
||||
ok(get_refcount(context.device) == reference_refcount + 1, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 1);
|
||||
ID3D12Device_CreateFence(context.device, 0, D3D12_FENCE_FLAG_NONE, &IID_ID3D12Fence, (void**)&fence);
|
||||
ok(get_refcount(context.device) == reference_refcount + 2, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 2);
|
||||
|
||||
ID3D12PipelineState_SetPrivateDataInterface(state, &IID_ID3D12Fence, (const IUnknown *)fence);
|
||||
ok(get_refcount(fence) == 2, "Refcount %u != 2.\n", get_refcount(fence));
|
||||
|
||||
ok(state == state2 && state == state3, "Resulting PSOs must point to same object.\n");
|
||||
ok(state && get_refcount(state) == 3, "Refcount %u != 3.\n", get_refcount(state));
|
||||
ok(state2 && get_refcount(state2) == 3, "Refcount %u != 3.\n", get_refcount(state2));
|
||||
ok(state3 && get_refcount(state3) == 3, "Refcount %u != 3.\n", get_refcount(state3));
|
||||
ID3D12PipelineState_Release(state);
|
||||
ID3D12PipelineState_Release(state2);
|
||||
ok(get_refcount(fence) == 2, "Refcount %u != 2.\n", get_refcount(fence));
|
||||
ok(get_refcount(context.device) == reference_refcount + 2, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 2);
|
||||
ok(ID3D12PipelineState_Release(state3) == 0, "Refcount did not hit 0.\n");
|
||||
/* Releasing the last public reference does not release private data. */
|
||||
ok(get_refcount(fence) == 2, "Refcount %u != 2.\n", get_refcount(fence));
|
||||
/* Device ref count does release however ... */
|
||||
ok(get_refcount(context.device) == reference_refcount + 1, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 1);
|
||||
|
||||
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
|
||||
compute_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state2);
|
||||
/* Device ref count increases here again. */
|
||||
ok(get_refcount(context.device) == reference_refcount + 2, "Refcount %u != %u\n", get_refcount(context.device), reference_refcount + 2);
|
||||
ok(hr == S_OK, "Failed to load compute pipeline from pipeline library, hr %#x.\n", hr);
|
||||
ok(state == state2, "Reloading dead PSO must point to same object.\n");
|
||||
ID3D12PipelineState_Release(state2);
|
||||
|
||||
hr = ID3D12PipelineLibrary_LoadComputePipeline(pipeline_library,
|
||||
graphics_name, &compute_desc, &IID_ID3D12PipelineState, (void**)&state);
|
||||
|
@ -301,6 +365,9 @@ void test_pipeline_library(void)
|
|||
ID3D12PipelineState_Release(state);
|
||||
|
||||
ID3D12PipelineLibrary_Release(pipeline_library);
|
||||
/* This should release the fence reference. */
|
||||
ok(get_refcount(fence) == 1, "Refcount %u != 1.\n", get_refcount(fence));
|
||||
ID3D12Fence_Release(fence);
|
||||
|
||||
free(serialized_data);
|
||||
ID3D12RootSignature_Release(root_signature);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -24,8 +24,8 @@
|
|||
|
||||
void test_unbound_rtv_rendering(void)
|
||||
{
|
||||
static const struct vec4 white = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
static const struct vec4 red = { 1.0f, 0.0f, 0.0f, 1.0f };
|
||||
static const float white[] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE rt_handle;
|
||||
|
@ -91,8 +91,8 @@ void test_unbound_rtv_rendering(void)
|
|||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
ok(hr == S_OK, "Failed to create state, hr %#x.\n", hr);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rt_handle, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rt_handle, white, 0, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
|
@ -120,7 +120,8 @@ void test_unbound_rtv_rendering(void)
|
|||
|
||||
void test_unknown_rtv_format(void)
|
||||
{
|
||||
static const struct vec4 white = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const struct vec4 vec4_white = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
struct vec4 expected_vec4 = {0.0f, 0.0f, 0.0f, 1.0f};
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
|
@ -185,7 +186,7 @@ void test_unknown_rtv_format(void)
|
|||
create_render_target(&context, &desc, &render_targets[1], &rtvs[2]);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(rtvs); ++i)
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtvs[i], &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, rtvs[i], white, 0, NULL);
|
||||
|
||||
/* NULL RTV */
|
||||
memset(&rtv_desc, 0, sizeof(rtv_desc));
|
||||
|
@ -212,7 +213,7 @@ void test_unknown_rtv_format(void)
|
|||
transition_resource_state(command_list, render_targets[1],
|
||||
D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &white, 0);
|
||||
check_sub_resource_vec4(context.render_target, 0, queue, command_list, &vec4_white, 0);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
expected_vec4.x = 2.0f;
|
||||
check_sub_resource_vec4(render_targets[0], 0, queue, command_list, &expected_vec4, 0);
|
||||
|
|
|
@ -2638,7 +2638,8 @@ void test_stress_suballocation_thread(void *userdata)
|
|||
{
|
||||
/* Randomly allocate heaps and place a buffer on top of it. */
|
||||
alloc_heap = rand_r(&seed) % 2 == 0;
|
||||
alloc_size = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT * (1 + rand_r(&seed) % 20);
|
||||
/* Ensures we sometimes hit dedicated allocation paths. (2 MiB limit). */
|
||||
alloc_size = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT * (1 + rand_r(&seed) % 40);
|
||||
keep_alive = rand_r(&seed) % 2 == 0;
|
||||
|
||||
if (buffers[i] && keep_alive)
|
||||
|
|
|
@ -1467,3 +1467,36 @@ void test_missing_bindings_root_signature(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_root_signature_empty_blob(void)
|
||||
{
|
||||
ID3D12RootSignature *root_signature;
|
||||
struct test_context context;
|
||||
HRESULT hr;
|
||||
|
||||
static const DWORD cs_code[] =
|
||||
{
|
||||
#if 0
|
||||
RWStructuredBuffer<uint> RWBuf;
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main(int wg : SV_GroupID)
|
||||
{
|
||||
RWBuf[wg] = wg;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x81a88c98, 0x1ab24abd, 0xfdb8fb1f, 0x7e9cb035, 0x00000001, 0x000000a8, 0x00000003,
|
||||
0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f,
|
||||
0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000054, 0x00050050, 0x00000015, 0x0100086a,
|
||||
0x0400009e, 0x0011e000, 0x00000000, 0x00000004, 0x0200005f, 0x00021012, 0x0400009b, 0x00000001,
|
||||
0x00000001, 0x00000001, 0x070000a8, 0x0011e012, 0x00000000, 0x0002100a, 0x00004001, 0x00000000,
|
||||
0x0002100a, 0x0100003e,
|
||||
};
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
hr = ID3D12Device_CreateRootSignature(context.device, 0, cs_code, sizeof(cs_code), &IID_ID3D12RootSignature, (void **)&root_signature);
|
||||
/* Has to be E_FAIL, not E_INVALIDARG, oddly enough. */
|
||||
ok(hr == E_FAIL, "Unexpected hr #%x.\n", hr);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
|
|
@ -5134,7 +5134,7 @@ void test_gather(void)
|
|||
{0.3f, 1.3f, 1.2f, 0.2f}, {1.3f, 2.3f, 2.2f, 1.2f}, {2.3f, 3.3f, 3.2f, 2.2f}, {3.3f, 3.3f, 3.2f, 3.2f},
|
||||
{0.3f, 1.3f, 1.3f, 0.3f}, {1.3f, 2.3f, 2.3f, 1.3f}, {2.3f, 3.3f, 3.3f, 2.3f}, {3.3f, 3.3f, 3.3f, 3.3f},
|
||||
};
|
||||
static const struct vec4 white = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const D3D12_SUBRESOURCE_DATA resource_data = {&texture_data, sizeof(texture_data) / 4};
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
|
@ -5171,7 +5171,7 @@ void test_gather(void)
|
|||
context.pipeline_state = create_pipeline_state(context.device,
|
||||
context.root_signature, desc.rt_format, NULL, &ps_gather4, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -5209,7 +5209,7 @@ void test_gather(void)
|
|||
context.pipeline_state = create_pipeline_state(context.device,
|
||||
context.root_signature, desc.rt_format, NULL, &ps_gather4_offset, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -5247,7 +5247,7 @@ void test_gather(void)
|
|||
context.pipeline_state = create_pipeline_state(context.device,
|
||||
context.root_signature, desc.rt_format, NULL, &ps_gather4_green, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -5285,7 +5285,7 @@ void test_gather(void)
|
|||
context.pipeline_state = create_pipeline_state(context.device,
|
||||
context.root_signature, desc.rt_format, NULL, &ps_gather4_po, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -5321,7 +5321,7 @@ void test_gather(void)
|
|||
constants.offset_x = 0;
|
||||
constants.offset_y = 0;
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -5455,7 +5455,7 @@ void test_gather_c(void)
|
|||
{0.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 1.0f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f, 1.0f, 1.0f},
|
||||
{0.0f, 0.0f, 0.0f, 0.0f}, {0.0f, 1.0f, 1.0f, 0.0f}, {1.0f, 1.0f, 1.0f, 1.0f}, {1.0f, 1.0f, 1.0f, 1.0f},
|
||||
};
|
||||
static const struct vec4 white = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const D3D12_SUBRESOURCE_DATA resource_data = {&texture_data, sizeof(texture_data) / 4};
|
||||
static const D3D12_STATIC_SAMPLER_DESC sampler_desc =
|
||||
{
|
||||
|
@ -5511,7 +5511,7 @@ void test_gather_c(void)
|
|||
context.pipeline_state = create_pipeline_state(context.device,
|
||||
context.root_signature, desc.rt_format, NULL, &ps_gather4_c, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -5549,7 +5549,7 @@ void test_gather_c(void)
|
|||
context.pipeline_state = create_pipeline_state(context.device,
|
||||
context.root_signature, desc.rt_format, NULL, &ps_gather4_po_c, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -5585,7 +5585,7 @@ void test_gather_c(void)
|
|||
constants.offset_x = 0;
|
||||
constants.offset_y = 0;
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
|
@ -6281,7 +6281,7 @@ void test_multisample_array_texture(void)
|
|||
};
|
||||
static const D3D12_SHADER_BYTECODE ps = {ps_code, sizeof(ps_code)};
|
||||
static const float white[] = {1.0f, 1.0f, 1.0f, 1.0f};
|
||||
static const struct vec4 colors[] =
|
||||
static const float colors[][4] =
|
||||
{
|
||||
{1.0f, 0.0f, 0.0f, 1.0f},
|
||||
{0.0f, 1.0f, 0.0f, 1.0f},
|
||||
|
@ -6386,8 +6386,7 @@ void test_multisample_array_texture(void)
|
|||
rtv_desc.Texture2DMSArray.FirstArraySlice = i;
|
||||
rtv_desc.Texture2DMSArray.ArraySize = 1;
|
||||
ID3D12Device_CreateRenderTargetView(device, texture, &rtv_desc, cpu_handle);
|
||||
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, cpu_handle, &colors[i].x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, cpu_handle, colors[i], 0, NULL);
|
||||
}
|
||||
|
||||
transition_resource_state(command_list, texture,
|
||||
|
|
|
@ -2131,8 +2131,8 @@ void test_sv_barycentric(void)
|
|||
#define BARY_RES 128
|
||||
|
||||
static const D3D12_VIEWPORT vp = { 0, 0, BARY_RES, BARY_RES, 0, 1 };
|
||||
static const float white[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
static const D3D12_RECT sci = { 0, 0, BARY_RES, BARY_RES };
|
||||
static const float white[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
static const uint8_t provoking_lut[] = {
|
||||
192, 212, 224, 244,
|
||||
128, 144, 160, 176,
|
||||
|
@ -4215,6 +4215,290 @@ void test_shader_sm64_packed(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_shader_waveop_maximal_convergence(void)
|
||||
{
|
||||
D3D12_ROOT_PARAMETER root_parameters[2];
|
||||
ID3D12PipelineState *pso_nonconverged;
|
||||
ID3D12PipelineState *pso_converged;
|
||||
D3D12_ROOT_SIGNATURE_DESC rs_desc;
|
||||
struct test_context context;
|
||||
struct resource_readback rb;
|
||||
ID3D12Resource *outputs;
|
||||
ID3D12Resource *inputs;
|
||||
unsigned int i;
|
||||
|
||||
static const uint32_t inputs_data[] =
|
||||
{
|
||||
2, 3, 1, 3,
|
||||
2, 0, 0, 1,
|
||||
0, 1, 3, 2,
|
||||
2, 1, 2, 2,
|
||||
};
|
||||
|
||||
static const uint32_t reference_converged[] =
|
||||
{
|
||||
25, 25, 25, 25,
|
||||
25, 25, 25, 25,
|
||||
25, 25, 25, 25,
|
||||
25, 25, 25, 25,
|
||||
};
|
||||
|
||||
static const uint32_t reference_nonconverged[] =
|
||||
{
|
||||
12, 9, 4, 9,
|
||||
12, 0, 0, 4,
|
||||
0, 4, 9, 12,
|
||||
12, 4, 12, 12,
|
||||
};
|
||||
|
||||
#if 0
|
||||
StructuredBuffer<uint> RO : register(t0);
|
||||
RWStructuredBuffer<uint> RW : register(u0);
|
||||
|
||||
[numthreads(16, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
uint v = RO[thr];
|
||||
uint result;
|
||||
while (true)
|
||||
{
|
||||
uint first = WaveReadLaneFirst(v);
|
||||
if (v == first)
|
||||
{
|
||||
result = WaveActiveSum(v);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
RW[thr] = result;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Compiled with Version: dxcompiler.dll: 1.5 - 1.4.1907.0; dxil.dll: 1.4(10.0.18362.1) */
|
||||
static const BYTE reconvergence_dxil[] =
|
||||
{
|
||||
0x44, 0x58, 0x42, 0x43, 0x27, 0x94, 0xf5, 0xbd, 0x53, 0x66, 0x70, 0xdb, 0xe2, 0x95, 0x5c, 0x8c, 0xdc, 0x10, 0x0b, 0xdf, 0x01, 0x00, 0x00, 0x00, 0xe4, 0x06, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x34, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00, 0xc8, 0x00, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x5c, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x14, 0x06, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00, 0x85, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x00, 0x01, 0x00, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0xfc, 0x05, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x7c, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
|
||||
0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42,
|
||||
0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50,
|
||||
0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d,
|
||||
0x84, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x6d, 0x30, 0x86, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x09, 0xa8, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42,
|
||||
0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3,
|
||||
0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x68, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x73, 0x04, 0x48, 0x29, 0xc6,
|
||||
0x18, 0xc6, 0xd0, 0x21, 0x73, 0xcf, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0x1f, 0x02, 0xcd, 0xb0, 0x10, 0x28, 0x48, 0x73, 0x04, 0x41, 0x31, 0xd4, 0x30, 0x63, 0x2c, 0x62, 0x45, 0x01, 0x43, 0x8d,
|
||||
0x31, 0xc6, 0x18, 0x86, 0xdc, 0x4d, 0xc3, 0xe5, 0x4f, 0xd8, 0x43, 0x48, 0xfe, 0x4a, 0x48, 0x2b, 0x31, 0xf9, 0xc8, 0x6d, 0xa3, 0x62, 0x8c, 0x31, 0x46, 0x29, 0xe0, 0x50, 0x63, 0x50, 0x1c, 0x08,
|
||||
0x98, 0x89, 0x0c, 0xc6, 0x81, 0x1d, 0xc2, 0x61, 0x1e, 0xe6, 0xc1, 0x0d, 0x66, 0x81, 0x1e, 0xe4, 0xa1, 0x1e, 0xc6, 0x81, 0x1e, 0xea, 0x41, 0x1e, 0xca, 0x81, 0x1c, 0x44, 0xa1, 0x1e, 0xcc, 0xc1,
|
||||
0x1c, 0xca, 0x41, 0x1e, 0xf8, 0xa0, 0x1e, 0xdc, 0x61, 0x1e, 0xd2, 0xe1, 0x1c, 0xdc, 0xa1, 0x1c, 0xc8, 0x01, 0x0c, 0xd2, 0xc1, 0x1d, 0xe8, 0xc1, 0x0f, 0x50, 0x30, 0x88, 0xce, 0x64, 0x06, 0xe3,
|
||||
0xc0, 0x0e, 0xe1, 0x30, 0x0f, 0xf3, 0xe0, 0x06, 0xb2, 0x70, 0x0b, 0xb3, 0x40, 0x0f, 0xf2, 0x50, 0x0f, 0xe3, 0x40, 0x0f, 0xf5, 0x20, 0x0f, 0xe5, 0x40, 0x0e, 0xa2, 0x50, 0x0f, 0xe6, 0x60, 0x0e,
|
||||
0xe5, 0x20, 0x0f, 0x7c, 0x50, 0x0f, 0xee, 0x30, 0x0f, 0xe9, 0x70, 0x0e, 0xee, 0x50, 0x0e, 0xe4, 0x00, 0x06, 0xe9, 0xe0, 0x0e, 0xf4, 0xe0, 0x07, 0x28, 0x18, 0x64, 0xe7, 0x08, 0x40, 0x61, 0x0a,
|
||||
0x00, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d,
|
||||
0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e,
|
||||
0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10,
|
||||
0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78,
|
||||
0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x04, 0x10, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x0e, 0x20, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x10, 0x40, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x30, 0xe4, 0x61, 0x80, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0xe3, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0x27, 0x02,
|
||||
0x02, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x2c, 0x10, 0x0a, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a,
|
||||
0x25, 0x30, 0x02, 0x50, 0x08, 0xc5, 0x50, 0x18, 0x05, 0x42, 0x6b, 0x04, 0x80, 0x70, 0x81, 0x02, 0x02, 0xd1, 0x9d, 0x01, 0xa0, 0x3a, 0x03, 0x00, 0x79, 0x18, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00,
|
||||
0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0xc4, 0x88, 0x0c, 0x6f, 0xec, 0xed, 0x4d, 0x0c, 0x44, 0x06, 0x26, 0x26, 0xc7, 0x05, 0xa6, 0xc6, 0x05, 0x06, 0x66, 0x43, 0x10, 0x4c, 0x10, 0x86, 0x63,
|
||||
0x82, 0x30, 0x20, 0x1b, 0x84, 0x81, 0x98, 0x20, 0x0c, 0xc9, 0x06, 0x61, 0x30, 0x28, 0x8c, 0xcd, 0x4d, 0x10, 0x06, 0x65, 0xc3, 0x80, 0x24, 0xc4, 0x04, 0xa1, 0x82, 0x08, 0x4c, 0x10, 0x86, 0x65,
|
||||
0x43, 0x42, 0x2c, 0x0c, 0x41, 0x0c, 0x0d, 0x71, 0x6c, 0x08, 0x9c, 0x09, 0xc2, 0xf5, 0x4c, 0x10, 0x96, 0x66, 0xc3, 0x42, 0x40, 0x0c, 0x41, 0x0c, 0x4d, 0x14, 0x45, 0xc7, 0x86, 0x40, 0xda, 0x40,
|
||||
0x3c, 0x13, 0x00, 0x4c, 0x10, 0x04, 0x60, 0x03, 0xb0, 0x61, 0x20, 0x2c, 0x6b, 0x43, 0x70, 0x6d, 0x18, 0x86, 0x0a, 0x23, 0xd1, 0x16, 0x96, 0xe6, 0x36, 0x41, 0xc0, 0x9c, 0x09, 0xc2, 0xc0, 0x6c,
|
||||
0x18, 0xb8, 0x61, 0xd8, 0x40, 0x10, 0x9b, 0xd1, 0x6d, 0x28, 0x2a, 0x0d, 0xa0, 0xbc, 0x2a, 0x6c, 0x6c, 0x76, 0x6d, 0x2e, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x53, 0x82, 0xa0, 0x0a, 0x19, 0x9e, 0x8b,
|
||||
0x5d, 0x99, 0xdc, 0x5c, 0xda, 0x9b, 0xdb, 0x94, 0x80, 0x68, 0x42, 0x86, 0xe7, 0x62, 0x17, 0xc6, 0x66, 0x57, 0x26, 0x37, 0x25, 0x30, 0xea, 0x90, 0xe1, 0xb9, 0xcc, 0xa1, 0x85, 0x91, 0x95, 0xc9,
|
||||
0x35, 0xbd, 0x91, 0x95, 0xb1, 0x4d, 0x09, 0x92, 0x32, 0x64, 0x78, 0x2e, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x53, 0x82, 0xa9, 0x12, 0x19, 0x9e, 0x0b, 0x5d, 0x1e, 0x5c, 0x59,
|
||||
0x90, 0x9b, 0xdb, 0x1b, 0x5d, 0x18, 0x5d, 0xda, 0x9b, 0xdb, 0xdc, 0x94, 0x00, 0xab, 0x43, 0x86, 0xe7, 0x52, 0xe6, 0x46, 0x27, 0x97, 0x07, 0xf5, 0x96, 0xe6, 0x46, 0x37, 0x37, 0x25, 0xf0, 0x00,
|
||||
0x79, 0x18, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73,
|
||||
0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b,
|
||||
0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20,
|
||||
0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61,
|
||||
0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87,
|
||||
0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98,
|
||||
0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61,
|
||||
0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b,
|
||||
0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x8c, 0xcc, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x74, 0x60, 0x07, 0x37, 0x90, 0x87, 0x72, 0x98, 0x87, 0x77, 0xa8,
|
||||
0x07, 0x79, 0x18, 0x87, 0x72, 0x70, 0x83, 0x70, 0xa0, 0x07, 0x7a, 0x90, 0x87, 0x74, 0x10, 0x87, 0x7a, 0xa0, 0x87, 0x72, 0x00, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
|
||||
0x66, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc8, 0x6d, 0x5b, 0x41, 0x35, 0x5c, 0xbe, 0xf3, 0xf8, 0x01, 0x55, 0x14, 0x44, 0xc4, 0x4e, 0x4e, 0x44,
|
||||
0xf8, 0xc8, 0x6d, 0x1b, 0x81, 0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x42, 0x44, 0x00, 0x13, 0x11, 0x02, 0xcd, 0xb0, 0x10, 0x16, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x74, 0x44, 0x04, 0x30, 0x88, 0x83,
|
||||
0x8f, 0xdc, 0xb6, 0x09, 0x58, 0xc3, 0xe5, 0x3b, 0x8f, 0x6f, 0x01, 0x15, 0xa1, 0x09, 0x13, 0x52, 0x11, 0xe8, 0xe3, 0x23, 0xb7, 0x6d, 0x03, 0xdb, 0x70, 0xf9, 0xce, 0xe3, 0x5b, 0x40, 0x45, 0xac,
|
||||
0x04, 0x30, 0x94, 0x40, 0x43, 0x7c, 0x48, 0x24, 0x4d, 0x3e, 0x72, 0xdb, 0x06, 0x40, 0x30, 0x00, 0xd2, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x13, 0x04, 0x43, 0x2c,
|
||||
0x10, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x34, 0x66, 0x00, 0x4a, 0xae, 0x74, 0x03, 0x0a, 0x31, 0xa0, 0xd8, 0x03, 0xca, 0x3d, 0xa0, 0x14, 0x03, 0xc8, 0x94, 0xc0, 0x08, 0x00, 0x00, 0x00,
|
||||
0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xc0, 0x58, 0x88, 0x20, 0x49, 0xcd, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x30, 0x57, 0x22, 0x4c, 0x93, 0x33, 0x62, 0x60, 0x00, 0x20, 0x08, 0x06, 0xc4, 0x96,
|
||||
0x50, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x10, 0x5d, 0x89, 0x10, 0x54, 0xa3, 0x09, 0x01, 0x30, 0x4b, 0x10, 0x8c, 0x18, 0x18, 0x00, 0x08, 0x82, 0x01, 0xc1, 0x29, 0xc1, 0x70, 0x83, 0x10, 0x80,
|
||||
0xc1, 0x2c, 0x83, 0x10, 0x04, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0x70, 0x7c, 0xcb, 0x80, 0x24, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0xe0, 0x74, 0x0b, 0x62, 0x68, 0x41, 0x14, 0x45, 0x0a, 0x02,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE reconvergence_code = SHADER_BYTECODE(reconvergence_dxil);
|
||||
|
||||
/* Compiled with Version: dxcompiler.dll: 1.6 - 1.6.2112.12 (770ac0cc1); dxil.dll: 1.6(101.6.2112.2)
|
||||
* This version of DXC seems to workaround the control flow issue and explicitly hoists the wave-op out of the branch
|
||||
* to avoid convergence, most curious! */
|
||||
#if 0
|
||||
@dx.break.cond = internal constant [1 x i32] zeroinitializer
|
||||
define void @main() {
|
||||
%1 = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.break.cond, i32 0, i32 0)
|
||||
%2 = icmp eq i32 %1, 0
|
||||
%3 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 0, i1 false) ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
|
||||
%4 = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false) ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
|
||||
%5 = call i32 @dx.op.threadId.i32(i32 93, i32 0) ; ThreadId(component)
|
||||
%6 = call %dx.types.ResRet.i32 @dx.op.bufferLoad.i32(i32 68, %dx.types.Handle %4, i32 %5, i32 0) ; BufferLoad(srv,index,wot)
|
||||
%7 = extractvalue %dx.types.ResRet.i32 %6, 0
|
||||
br label %8
|
||||
|
||||
; <label>:8 ; preds = %13, %0
|
||||
%9 = call i32 @dx.op.waveReadLaneFirst.i32(i32 118, i32 %7) ; WaveReadLaneFirst(value)
|
||||
%10 = icmp eq i32 %7, %9
|
||||
br i1 %10, label %11, label %13
|
||||
|
||||
; <label>:11 ; preds = %8
|
||||
%12 = call i32 @dx.op.waveActiveOp.i32(i32 119, i32 %7, i8 0, i8 1) ; WaveActiveOp(value,op,sop)
|
||||
; Wow ... Load a constant 0 from a Private array and branch on that.
|
||||
br i1 %2, label %14, label %13
|
||||
|
||||
; <label>:13 ; preds = %11, %8
|
||||
br label %8
|
||||
|
||||
; <label>:14 ; preds = %11
|
||||
call void @dx.op.bufferStore.i32(i32 69, %dx.types.Handle %3, i32 %5, i32 0, i32 %12, i32 undef, i32 undef, i32 undef, i8 1) ; BufferStore(uav,coord0,coord1,value0,value1,value2,value3,mask)
|
||||
ret void
|
||||
}
|
||||
#endif
|
||||
static const BYTE nonconvergence_dxil[] =
|
||||
{
|
||||
0x44, 0x58, 0x42, 0x43, 0x6a, 0xe3, 0x1a, 0x21, 0xec, 0x33, 0x8f, 0x47, 0xcb, 0xd9, 0x75, 0x47, 0x59, 0x62, 0x3f, 0x1f, 0x01, 0x00, 0x00, 0x00, 0x6c, 0x07, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x04, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x78, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x96, 0xd4, 0x61, 0x94, 0x9f, 0xd5, 0x2a, 0x70, 0x1d, 0xed, 0x1f, 0xc3,
|
||||
0x53, 0x48, 0x48, 0x9e, 0x44, 0x58, 0x49, 0x4c, 0x60, 0x06, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00, 0x98, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||
0x48, 0x06, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde, 0x21, 0x0c, 0x00, 0x00, 0x8f, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91,
|
||||
0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39, 0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14,
|
||||
0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88, 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c,
|
||||
0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06, 0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x84, 0xf0, 0xff, 0xff,
|
||||
0xff, 0xff, 0x03, 0x20, 0x6d, 0x30, 0x86, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00, 0x09, 0xa8, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06,
|
||||
0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14,
|
||||
0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c, 0x10, 0x74, 0x73, 0x04, 0x60, 0x90, 0x01, 0x80, 0xc2, 0x08, 0x40, 0x09, 0x06, 0x91, 0x32, 0x00, 0x00, 0xc8, 0xcc, 0x11, 0x20, 0xa5, 0x00, 0x00,
|
||||
0x20, 0x44, 0x89, 0xd0, 0x3d, 0xc3, 0xe5, 0x4f, 0xd8, 0x43, 0x48, 0x7e, 0x08, 0x34, 0xc3, 0x42, 0xa0, 0x60, 0xcd, 0x11, 0x04, 0xc5, 0x60, 0x00, 0x01, 0xd0, 0xc8, 0x15, 0x65, 0x00, 0x06, 0x00,
|
||||
0x00, 0x00, 0x20, 0x82, 0x37, 0x0d, 0x97, 0x3f, 0x61, 0x0f, 0x21, 0xf9, 0x2b, 0x21, 0xad, 0xc4, 0xe4, 0x23, 0xb7, 0x8d, 0x0a, 0x00, 0x00, 0x00, 0xa5, 0x90, 0x80, 0x01, 0x40, 0x73, 0x20, 0x60,
|
||||
0x26, 0x32, 0x18, 0x07, 0x76, 0x08, 0x87, 0x79, 0x98, 0x07, 0x37, 0x98, 0x05, 0x7a, 0x90, 0x87, 0x7a, 0x18, 0x07, 0x7a, 0xa8, 0x07, 0x79, 0x28, 0x07, 0x72, 0x10, 0x85, 0x7a, 0x30, 0x07, 0x73,
|
||||
0x28, 0x07, 0x79, 0xe0, 0x83, 0x7a, 0x70, 0x87, 0x79, 0x48, 0x87, 0x73, 0x70, 0x87, 0x72, 0x20, 0x07, 0x30, 0x48, 0x07, 0x77, 0xa0, 0x07, 0x3f, 0x40, 0x01, 0x20, 0x3b, 0x93, 0x19, 0x8c, 0x03,
|
||||
0x3b, 0x84, 0xc3, 0x3c, 0xcc, 0x83, 0x1b, 0xc8, 0xc2, 0x2d, 0xcc, 0x02, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xd4, 0x83, 0x3c, 0x94, 0x03, 0x39, 0x88, 0x42, 0x3d, 0x98, 0x83, 0x39, 0x94,
|
||||
0x83, 0x3c, 0xf0, 0x41, 0x3d, 0xb8, 0xc3, 0x3c, 0xa4, 0xc3, 0x39, 0xb8, 0x43, 0x39, 0x90, 0x03, 0x18, 0xa4, 0x83, 0x3b, 0xd0, 0x83, 0x1f, 0xa0, 0x00, 0x10, 0x9e, 0x23, 0x00, 0x05, 0x02, 0x53,
|
||||
0x00, 0x00, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d,
|
||||
0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e,
|
||||
0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10,
|
||||
0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78,
|
||||
0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x3a, 0x0f, 0x24, 0x90, 0x21, 0x23, 0x25, 0x40, 0x00, 0x1e, 0xa6, 0x31, 0xe4, 0x21, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x63, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0x27, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x80, 0x21, 0x8f, 0x01, 0x04, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x1e, 0x07, 0x08, 0x80, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x10, 0x10, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x26, 0x20, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc8, 0x02, 0x01, 0x0b, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14,
|
||||
0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x02, 0x25, 0x30, 0x02, 0x50, 0x0c, 0x85, 0x51, 0x08, 0x05, 0x42, 0xba, 0x40, 0x01, 0x81, 0xa8, 0x8d, 0x00, 0xd0, 0x9d, 0x01,
|
||||
0xa0, 0x3c, 0x03, 0x40, 0x61, 0x04, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1,
|
||||
0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
|
||||
0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x04, 0x00, 0x99, 0x20, 0x00, 0xc9, 0x06, 0x61, 0x20, 0x26, 0x08, 0x80, 0xb2, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08,
|
||||
0x17, 0x44, 0x60, 0x82, 0x00, 0x2c, 0x13, 0x04, 0x80, 0xd9, 0x20, 0x0c, 0xcd, 0x86, 0x84, 0x50, 0x16, 0x82, 0x18, 0x18, 0xc2, 0xd9, 0x10, 0x3c, 0x13, 0x84, 0x2c, 0x9a, 0x20, 0x34, 0xcf, 0x86,
|
||||
0x85, 0x88, 0x16, 0x82, 0x18, 0x18, 0x49, 0x92, 0x9c, 0x0d, 0xc1, 0xb4, 0x81, 0x80, 0x28, 0x00, 0x98, 0x20, 0x14, 0x01, 0x89, 0xb6, 0xb0, 0x34, 0xb7, 0x09, 0x82, 0xe6, 0x4c, 0x10, 0x80, 0x66,
|
||||
0xc3, 0x90, 0x0d, 0xc3, 0x06, 0x82, 0xc0, 0x1a, 0x6d, 0x43, 0x61, 0x5d, 0x40, 0xb5, 0x55, 0x61, 0x63, 0xb3, 0x6b, 0x73, 0x49, 0x23, 0x2b, 0x73, 0xa3, 0x9b, 0x12, 0x04, 0x55, 0xc8, 0xf0, 0x5c,
|
||||
0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc, 0xa6, 0x04, 0x44, 0x13, 0x32, 0x3c, 0x17, 0xbb, 0x30, 0x36, 0xbb, 0x32, 0xb9, 0x29, 0x81, 0x51, 0x87, 0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac, 0x4c,
|
||||
0xae, 0xe9, 0x8d, 0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3, 0x73, 0x91, 0x2b, 0x9b, 0x7b, 0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b, 0x12, 0x50, 0x75, 0xc8, 0xf0, 0x5c, 0xca, 0xdc, 0xe8, 0xe4,
|
||||
0xf2, 0xa0, 0xde, 0xd2, 0xdc, 0xe8, 0xe6, 0xa6, 0x04, 0x1b, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88,
|
||||
0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce,
|
||||
0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48,
|
||||
0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e,
|
||||
0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b,
|
||||
0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78,
|
||||
0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1,
|
||||
0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39,
|
||||
0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70,
|
||||
0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xcc, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe2, 0x20, 0x0f, 0xe5, 0x10, 0x0e, 0xeb, 0xe0, 0x06, 0xe2, 0x20, 0x0f, 0x33, 0x22, 0x88, 0x1c, 0xf0, 0xc1,
|
||||
0x0d, 0xc8, 0x41, 0x1c, 0xce, 0xc1, 0x0d, 0xec, 0x21, 0x1c, 0xe4, 0x81, 0x1d, 0xc2, 0x21, 0x1f, 0xde, 0xa1, 0x1e, 0xe8, 0x01, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x06, 0xd0, 0x0c, 0x97, 0x1f, 0x44, 0x04, 0xa0, 0xf8, 0x82, 0xd3, 0x0c, 0x76, 0x40, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0x95, 0x0e, 0x30, 0xf8, 0xc8, 0x6d, 0x9b, 0x41, 0x35,
|
||||
0x5c, 0xbe, 0xf3, 0xf8, 0x01, 0x55, 0x14, 0x44, 0xc4, 0x4e, 0x4e, 0x44, 0xf8, 0xc8, 0x6d, 0x5b, 0x81, 0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0x42, 0x44, 0x00, 0x13, 0x11, 0x02, 0xcd, 0xb0, 0x10, 0x26,
|
||||
0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x74, 0x44, 0x04, 0x30, 0x88, 0x83, 0x8f, 0xdc, 0xb6, 0x0d, 0x58, 0xc3, 0xe5, 0x3b, 0x8f, 0x6f, 0x01, 0x15, 0xa1, 0x09, 0x13, 0x52, 0x11, 0xe8, 0xe3, 0x23,
|
||||
0xb7, 0x6d, 0x04, 0xdb, 0x70, 0xf9, 0xce, 0xe3, 0x5b, 0x40, 0x45, 0xac, 0x04, 0x30, 0x94, 0x40, 0x43, 0x7c, 0x48, 0x24, 0x4d, 0x3e, 0x72, 0xdb, 0x16, 0x40, 0x30, 0x00, 0xd2, 0x00, 0x00, 0x00,
|
||||
0x61, 0x20, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x13, 0x04, 0x45, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x66, 0x00, 0x4a, 0xae, 0x74, 0x03, 0x0a, 0x31, 0xa0, 0xd8, 0x03,
|
||||
0xca, 0x3d, 0xa0, 0x14, 0x03, 0x08, 0x95, 0xc0, 0x08, 0x00, 0xed, 0xa1, 0x8e, 0x40, 0x00, 0x80, 0x04, 0x48, 0x00, 0x00, 0x14, 0x00, 0x30, 0xdc, 0x10, 0x54, 0x60, 0x30, 0x62, 0x90, 0x00, 0x20,
|
||||
0x08, 0x06, 0x8e, 0xb6, 0x14, 0x96, 0x05, 0x8d, 0x18, 0x24, 0x00, 0x08, 0x82, 0x81, 0xb3, 0x31, 0xc5, 0x75, 0x45, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x60, 0x7c, 0x0c, 0x36, 0x62, 0x80, 0x00,
|
||||
0x20, 0x08, 0x06, 0xd3, 0xc6, 0x08, 0x41, 0x36, 0x9a, 0x10, 0x00, 0xb3, 0x04, 0xc1, 0x88, 0x81, 0x01, 0x80, 0x20, 0x18, 0x18, 0x60, 0xd0, 0x04, 0xc3, 0x0d, 0x42, 0x00, 0x06, 0xb3, 0x0c, 0xc2,
|
||||
0x10, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0x32, 0x06, 0xce, 0xb0, 0x30, 0xb3, 0x0c, 0xc4, 0x90, 0xcc, 0x12, 0x04, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x00, 0x85, 0x81, 0x83, 0x18, 0x5e,
|
||||
0x40, 0x51, 0x54, 0x83, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE nonconvergence_code = SHADER_BYTECODE(nonconvergence_dxil);
|
||||
|
||||
memset(root_parameters, 0, sizeof(root_parameters));
|
||||
memset(&rs_desc, 0, sizeof(rs_desc));
|
||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
|
||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
|
||||
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
rs_desc.pParameters = root_parameters;
|
||||
rs_desc.NumParameters = ARRAY_SIZE(root_parameters);
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
if (!context_supports_dxil(&context))
|
||||
{
|
||||
skip("Context does not support DXIL.\n");
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
create_root_signature(context.device, &rs_desc, &context.root_signature);
|
||||
pso_converged = create_compute_pipeline_state(context.device, context.root_signature, reconvergence_code);
|
||||
pso_nonconverged = create_compute_pipeline_state(context.device, context.root_signature, nonconvergence_code);
|
||||
|
||||
inputs = create_upload_buffer(context.device, sizeof(inputs_data), inputs_data);
|
||||
outputs = create_default_buffer(context.device, sizeof(inputs_data) * 2,
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, pso_converged);
|
||||
ID3D12GraphicsCommandList_SetComputeRootShaderResourceView(context.list, 0,
|
||||
ID3D12Resource_GetGPUVirtualAddress(inputs));
|
||||
ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 1,
|
||||
ID3D12Resource_GetGPUVirtualAddress(outputs));
|
||||
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
|
||||
ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 1,
|
||||
ID3D12Resource_GetGPUVirtualAddress(outputs) + sizeof(reference_converged));
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, pso_nonconverged);
|
||||
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
|
||||
|
||||
transition_resource_state(context.list, outputs,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
get_buffer_readback_with_command_list(outputs, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(reference_converged); i++)
|
||||
{
|
||||
uint32_t v = get_readback_uint(&rb, i, 0, 0);
|
||||
ok(v == reference_converged[i], "Element %u, %u != %u.\n", i, v, reference_converged[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(reference_nonconverged); i++)
|
||||
{
|
||||
uint32_t v = get_readback_uint(&rb, i + 16, 0, 0);
|
||||
ok(v == reference_nonconverged[i], "Element %u, %u != %u.\n", i, v, reference_nonconverged[i]);
|
||||
}
|
||||
|
||||
release_resource_readback(&rb);
|
||||
|
||||
ID3D12Resource_Release(inputs);
|
||||
ID3D12Resource_Release(outputs);
|
||||
ID3D12PipelineState_Release(pso_converged);
|
||||
ID3D12PipelineState_Release(pso_nonconverged);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_shader_sm65_wave_intrinsics(void)
|
||||
{
|
||||
D3D12_FEATURE_DATA_SHADER_MODEL shader_model;
|
||||
|
@ -4550,7 +4834,7 @@ void test_shader_sm66_is_helper_lane(void)
|
|||
{
|
||||
/* Oh, hi there. */
|
||||
static const float alpha_keys[4] = { 0.75f, 2.25f, 3.25f, 3.75f };
|
||||
static const struct vec4 white = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
static const float white[] = { 1.0f, 1.0f, 1.0f, 1.0f };
|
||||
D3D12_FEATURE_DATA_SHADER_MODEL shader_model;
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
||||
|
@ -4757,7 +5041,7 @@ void test_shader_sm66_is_helper_lane(void)
|
|||
ID3D12Device_CreateUnorderedAccessView(context.device, atomic_buffer, NULL, &uav_desc, cpu_handle);
|
||||
|
||||
ID3D12GraphicsCommandList_SetDescriptorHeaps(command_list, 1, &heap);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, &white.x, 0, NULL);
|
||||
ID3D12GraphicsCommandList_ClearRenderTargetView(command_list, context.rtv, white, 0, NULL);
|
||||
ID3D12GraphicsCommandList_OMSetRenderTargets(command_list, 1, &context.rtv, false, NULL);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
|
||||
|
@ -4804,7 +5088,7 @@ void test_shader_sm66_is_helper_lane(void)
|
|||
expected.w = 8881.0f;
|
||||
}
|
||||
else
|
||||
expected = white;
|
||||
memcpy(&expected, white, sizeof(white));
|
||||
|
||||
ok(compare_vec4(value, &expected, 0), "Mismatch pixel %u, %u, (%f %f %f %f) != (%f %f %f %f).\n",
|
||||
x, y, expected.x, expected.y, expected.z, expected.w,
|
||||
|
@ -4820,3 +5104,588 @@ void test_shader_sm66_is_helper_lane(void)
|
|||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_advanced_cbv_layout(void)
|
||||
{
|
||||
/* This is extremely cursed in DXC ... D: */
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS4 features4;
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS1 features1;
|
||||
D3D12_CONSTANT_BUFFER_VIEW_DESC cbv_desc;
|
||||
D3D12_ROOT_PARAMETER root_parameters[3];
|
||||
D3D12_ROOT_SIGNATURE_DESC rs_desc;
|
||||
D3D12_DESCRIPTOR_RANGE range[1];
|
||||
struct test_context context;
|
||||
struct resource_readback rb;
|
||||
ID3D12DescriptorHeap *heap;
|
||||
ID3D12Resource *cbv_buffer;
|
||||
ID3D12Resource *uav_buffer;
|
||||
uint32_t input_buffer[64];
|
||||
ID3D12PipelineState *pso;
|
||||
bool support_16bit;
|
||||
bool support_64bit;
|
||||
unsigned int i, j;
|
||||
|
||||
#if 0
|
||||
cbuffer Cbuf : register(b0)
|
||||
{
|
||||
uint64_t4 values_root[8];
|
||||
};
|
||||
|
||||
cbuffer Cbuf : register(b0, space1)
|
||||
{
|
||||
uint64_t4 values_table[8];
|
||||
};
|
||||
|
||||
RWStructuredBuffer<uint> RWBuf : register(u0);
|
||||
|
||||
uint pack4(uint4 v)
|
||||
{
|
||||
return v.x | (v.y << 8) | (v.z << 16) | (v.w << 24);
|
||||
}
|
||||
|
||||
[numthreads(8, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
uint64_t4 v = values_root[thr] + values_table[thr];
|
||||
uint4 lo = uint4(v);
|
||||
uint4 hi = uint4(v >> 32);
|
||||
RWBuf[2 * thr + 0] = pack4(lo);
|
||||
RWBuf[2 * thr + 1] = pack4(hi);
|
||||
}
|
||||
#endif
|
||||
static const BYTE cs_legacy_uint64_code[] =
|
||||
{
|
||||
0x44, 0x58, 0x42, 0x43, 0x96, 0xca, 0xa3, 0x18, 0x56, 0xf8, 0xd5, 0xa3, 0xd8, 0xc2, 0x6f, 0x4e, 0x51, 0x02, 0xf3, 0x54, 0x01, 0x00, 0x00, 0x00, 0x88, 0x07, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x1c, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x90, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x81, 0x75, 0xa5, 0x06, 0x98, 0x3e, 0x10, 0x1e, 0x1e, 0xd3, 0x41, 0xf1, 0xd3, 0xdf, 0xbb, 0x4f, 0x44, 0x58, 0x49, 0x4c,
|
||||
0x64, 0x06, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00, 0x99, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x4c, 0x06, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde,
|
||||
0x21, 0x0c, 0x00, 0x00, 0x90, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39,
|
||||
0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88,
|
||||
0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06,
|
||||
0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x01, 0xd5, 0x06, 0x62,
|
||||
0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
|
||||
0x31, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c,
|
||||
0x10, 0x6c, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x73, 0x04, 0xa0, 0x70, 0xd4, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0xcf, 0x6d, 0x54, 0xb1, 0x12, 0x93,
|
||||
0x8f, 0xe8, 0x38, 0x12, 0x8c, 0x99, 0x23, 0x40, 0x08, 0xdd, 0x33, 0x5c, 0xfe, 0x84, 0x3d, 0x84, 0xe4, 0x87, 0x40, 0x33, 0x2c, 0x04, 0x0a, 0x52, 0x21, 0xce, 0x50, 0x83, 0xd6, 0x1c, 0x41, 0x50,
|
||||
0x0c, 0x35, 0xd0, 0x18, 0x8d, 0x5c, 0x51, 0xc0, 0x50, 0x63, 0x8c, 0x31, 0x06, 0x22, 0x38, 0x10, 0x30, 0x93, 0x19, 0x8c, 0x03, 0x3b, 0x84, 0xc3, 0x3c, 0xcc, 0x83, 0x1b, 0xc8, 0xc2, 0x2d, 0xcc,
|
||||
0x02, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xd4, 0x83, 0x3c, 0x94, 0x03, 0x39, 0x88, 0x42, 0x3d, 0x98, 0x83, 0x39, 0x94, 0x83, 0x3c, 0xf0, 0x41, 0x3d, 0xb8, 0xc3, 0x3c, 0xa4, 0xc3, 0x39,
|
||||
0xb8, 0x43, 0x39, 0x90, 0x03, 0x18, 0xa4, 0x83, 0x3b, 0xd0, 0x83, 0x1f, 0xa0, 0x60, 0xd0, 0x1c, 0x46, 0x20, 0x8c, 0x44, 0xa8, 0x47, 0x70, 0x01, 0x55, 0xa0, 0xc0, 0xd2, 0x3d, 0x83, 0x0b, 0xa8,
|
||||
0xc2, 0xa7, 0x51, 0x60, 0x29, 0x03, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e,
|
||||
0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x73, 0x20,
|
||||
0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74,
|
||||
0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07,
|
||||
0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86,
|
||||
0x3c, 0x04, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x16, 0x20, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x38, 0x40, 0x00, 0x08, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0x81, 0x80, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x0b, 0x04, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14,
|
||||
0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x25, 0x30, 0x02, 0x50, 0x0c, 0x85, 0x51, 0x08, 0x05, 0x48, 0x50, 0x10, 0x64, 0x0a, 0x14, 0x10, 0x30, 0x80, 0xda, 0x08,
|
||||
0x00, 0xd1, 0x19, 0x00, 0xc2, 0x33, 0x00, 0xa4, 0x67, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b,
|
||||
0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81,
|
||||
0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xa1, 0x98, 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x20, 0x26, 0x08, 0xc3, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06,
|
||||
0xc4, 0x20, 0x26, 0x08, 0x94, 0x43, 0x60, 0x82, 0x30, 0x20, 0x13, 0x84, 0xa6, 0x99, 0x20, 0x0c, 0xc9, 0x06, 0x61, 0x70, 0x36, 0x2c, 0x84, 0xb2, 0x10, 0xc4, 0xc0, 0x34, 0x4d, 0xf3, 0x6c, 0x08,
|
||||
0xa0, 0x09, 0x02, 0xf6, 0x4c, 0x10, 0x06, 0x65, 0x03, 0x42, 0x48, 0x0b, 0x41, 0x0c, 0x13, 0x30, 0x41, 0xd0, 0xa0, 0x0d, 0xc8, 0x50, 0x2d, 0x03, 0x31, 0x4c, 0xc0, 0x06, 0x81, 0xb2, 0x36, 0x10,
|
||||
0x40, 0x74, 0x01, 0x13, 0x04, 0x01, 0x20, 0xd1, 0x16, 0x96, 0xe6, 0x36, 0x41, 0x30, 0x98, 0x09, 0xc2, 0xb0, 0x6c, 0x18, 0xb8, 0x61, 0xd8, 0x40, 0x10, 0x9b, 0xd3, 0x6d, 0x28, 0x32, 0x0d, 0xc0,
|
||||
0xbc, 0x2a, 0x6c, 0x6c, 0x76, 0x6d, 0x2e, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x53, 0x82, 0xa0, 0x0a, 0x19, 0x9e, 0x8b, 0x5d, 0x99, 0xdc, 0x5c, 0xda, 0x9b, 0xdb, 0x94, 0x80, 0x68, 0x42, 0x86, 0xe7,
|
||||
0x62, 0x17, 0xc6, 0x66, 0x57, 0x26, 0x37, 0x25, 0x30, 0xea, 0x90, 0xe1, 0xb9, 0xcc, 0xa1, 0x85, 0x91, 0x95, 0xc9, 0x35, 0xbd, 0x91, 0x95, 0xb1, 0x4d, 0x09, 0x90, 0x32, 0x64, 0x78, 0x2e, 0x72,
|
||||
0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x53, 0x82, 0xab, 0x0e, 0x19, 0x9e, 0x4b, 0x99, 0x1b, 0x9d, 0x5c, 0x1e, 0xd4, 0x5b, 0x9a, 0x1b, 0xdd, 0xdc, 0x94, 0xc0, 0x03, 0x00, 0x00, 0x00,
|
||||
0x79, 0x18, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73,
|
||||
0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b,
|
||||
0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20,
|
||||
0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61,
|
||||
0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87,
|
||||
0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98,
|
||||
0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61,
|
||||
0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b,
|
||||
0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8,
|
||||
0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
|
||||
0x46, 0x50, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb, 0x26, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03,
|
||||
0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6, 0x23, 0x3a, 0x6e, 0x03, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0x58, 0x80, 0x34, 0x5c, 0xbe, 0xf3, 0xf8, 0xd3,
|
||||
0x11, 0x11, 0xc0, 0x20, 0x0e, 0x3e, 0x72, 0xdb, 0x06, 0x40, 0x30, 0x00, 0xd2, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x4b, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00,
|
||||
0x0a, 0x00, 0x00, 0x00, 0x34, 0x66, 0x00, 0xca, 0xae, 0xe4, 0x4a, 0x31, 0xa0, 0x74, 0x03, 0x0a, 0xa4, 0x60, 0x2a, 0x90, 0xa0, 0x02, 0x01, 0x11, 0x2a, 0x10, 0x10, 0x10, 0x82, 0x4c, 0x01, 0x15,
|
||||
0x4c, 0x81, 0x14, 0x04, 0xa1, 0x12, 0x28, 0x02, 0x00, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xe0, 0x78, 0x8e, 0xb0, 0x6d, 0xd4, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x38, 0xdf,
|
||||
0x23, 0x74, 0x5c, 0x35, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x0e, 0x18, 0x40, 0x43, 0xd7, 0x59, 0x23, 0x06, 0x06, 0x00, 0x82, 0x60, 0x40, 0x8c, 0xc1, 0xe3, 0x55, 0x00, 0x06, 0x37, 0x62, 0x70,
|
||||
0x00, 0x20, 0x08, 0x06, 0x0b, 0x19, 0x50, 0x43, 0x30, 0x9a, 0x10, 0x00, 0xa3, 0x09, 0x42, 0x50, 0x04, 0x19, 0xec, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x68, 0x80, 0x1d, 0xc1, 0x68, 0x42,
|
||||
0x00, 0x8c, 0x26, 0x08, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x6b, 0xb0, 0x2d, 0xc8, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0xc1, 0x88, 0xc1, 0x01, 0x80, 0x20, 0x18, 0x2c, 0x6e, 0xe0,
|
||||
0x39, 0xc7, 0x68, 0x42, 0x00, 0x8c, 0x26, 0x08, 0x41, 0x15, 0x0c, 0x54, 0xc1, 0x40, 0x11, 0x0a, 0x14, 0xa1, 0xc0, 0x11, 0x03, 0x8e, 0x18, 0x70, 0xc4, 0x80, 0x23, 0x06, 0x14, 0x02, 0x06, 0x78,
|
||||
0xc1, 0x80, 0x2a, 0xe4, 0xe0, 0x2a, 0x38, 0xb6, 0x8c, 0x34, 0xb8, 0x12, 0x82, 0xad, 0x43, 0x0d, 0xae, 0x84, 0x60, 0x47, 0x0c, 0x1a, 0x00, 0x04, 0xc1, 0x00, 0xfa, 0x83, 0x37, 0x10, 0x03, 0xaf,
|
||||
0x0f, 0x02, 0x39, 0x90, 0x03, 0x39, 0x20, 0x83, 0x7a, 0xce, 0x00, 0x2f, 0x18, 0x50, 0x41, 0x1b, 0x68, 0x05, 0xca, 0x96, 0xa4, 0x06, 0x78, 0xc1, 0x80, 0x0a, 0xe0, 0x40, 0x8b, 0x08, 0xb6, 0xaa,
|
||||
0x36, 0xc0, 0x0b, 0x06, 0x54, 0x30, 0x07, 0x5a, 0x44, 0xb0, 0x23, 0x06, 0x0d, 0x00, 0x82, 0x60, 0x00, 0xad, 0xc2, 0x1e, 0xb8, 0x81, 0x19, 0xa4, 0x42, 0xe0, 0x07, 0x7e, 0xe0, 0x07, 0x70, 0x80,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
/* -no-legacy-cbuf-layout variant. Tests CBufferLoad vs LoadLegacy() */
|
||||
static const BYTE cs_modern_uint64_code[] =
|
||||
{
|
||||
0x44, 0x58, 0x42, 0x43, 0x18, 0x99, 0xb1, 0x23, 0x80, 0xf3, 0x05, 0xf7, 0xc0, 0x1e, 0xd6, 0x21, 0x5c, 0xf3, 0xd0, 0xf3, 0x01, 0x00, 0x00, 0x00, 0x94, 0x07, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x1c, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x90, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9e, 0x97, 0x67, 0xe4, 0xf8, 0x45, 0xd6, 0x79, 0xed, 0x09, 0xc1, 0x5b, 0x49, 0x81, 0xb9, 0xa3, 0x44, 0x58, 0x49, 0x4c,
|
||||
0x70, 0x06, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00, 0x9c, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x58, 0x06, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde,
|
||||
0x21, 0x0c, 0x00, 0x00, 0x93, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39,
|
||||
0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88,
|
||||
0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06,
|
||||
0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x01, 0xd5, 0x06, 0x62,
|
||||
0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
|
||||
0x2c, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c,
|
||||
0x10, 0x68, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x73, 0x04, 0xa0, 0x30, 0x47, 0x80, 0xd0, 0xb9, 0x67, 0xb8, 0xfc, 0x09, 0x7b, 0x08, 0xc9, 0x0f, 0x81,
|
||||
0x66, 0x58, 0x08, 0x14, 0xa0, 0x52, 0x98, 0x91, 0xc6, 0x20, 0x35, 0x47, 0x10, 0x14, 0x23, 0x8d, 0x33, 0x06, 0xa3, 0x56, 0x14, 0x30, 0xd2, 0x18, 0x63, 0x8c, 0x71, 0xe8, 0x0d, 0x04, 0xcc, 0x64,
|
||||
0x06, 0xe3, 0xc0, 0x0e, 0xe1, 0x30, 0x0f, 0xf3, 0xe0, 0x06, 0xb2, 0x70, 0x0b, 0xb3, 0x40, 0x0f, 0xf2, 0x50, 0x0f, 0xe3, 0x40, 0x0f, 0xf5, 0x20, 0x0f, 0xe5, 0x40, 0x0e, 0xa2, 0x50, 0x0f, 0xe6,
|
||||
0x60, 0x0e, 0xe5, 0x20, 0x0f, 0x7c, 0x50, 0x0f, 0xee, 0x30, 0x0f, 0xe9, 0x70, 0x0e, 0xee, 0x50, 0x0e, 0xe4, 0x00, 0x06, 0xe9, 0xe0, 0x0e, 0xf4, 0xe0, 0x07, 0x28, 0x18, 0x24, 0x87, 0x11, 0x08,
|
||||
0x23, 0x11, 0xe8, 0x11, 0x5c, 0x40, 0x15, 0x28, 0xa8, 0x64, 0xcf, 0xe0, 0x02, 0xaa, 0xf0, 0x69, 0x14, 0x54, 0xc2, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79,
|
||||
0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0,
|
||||
0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73,
|
||||
0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07,
|
||||
0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x04, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x14, 0x20, 0x00, 0x04, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x34, 0x40, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0x79, 0x80, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20,
|
||||
0x0b, 0x04, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x25, 0x30, 0x02, 0x50, 0x0c, 0x85, 0x51, 0x08,
|
||||
0x05, 0x48, 0x50, 0x10, 0x64, 0x0a, 0x14, 0x10, 0x30, 0x80, 0xd8, 0x08, 0x00, 0xcd, 0x19, 0x00, 0xba, 0x33, 0x00, 0x94, 0x67, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00,
|
||||
0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b,
|
||||
0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xa1, 0x98, 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x20,
|
||||
0x26, 0x08, 0xc3, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x93, 0x43, 0x60, 0x82, 0x30, 0x20, 0x13, 0x04, 0xa6, 0x99, 0x20, 0x0c, 0xc9, 0x06, 0x61, 0x70,
|
||||
0x36, 0x2c, 0x84, 0xb2, 0x10, 0xc4, 0xc0, 0x34, 0x4d, 0xf3, 0x6c, 0x08, 0xa0, 0x09, 0xc2, 0xf5, 0x4c, 0x10, 0x06, 0x65, 0x03, 0x42, 0x48, 0x0b, 0x41, 0x0c, 0x13, 0x30, 0x41, 0xc8, 0xa0, 0x0d,
|
||||
0xc8, 0x50, 0x2d, 0x03, 0x31, 0x4c, 0xc0, 0x06, 0x81, 0xb2, 0x36, 0x10, 0x40, 0x74, 0x01, 0x13, 0x04, 0x01, 0x20, 0xd1, 0x16, 0x96, 0xe6, 0x36, 0x41, 0x30, 0x98, 0x09, 0xc2, 0xb0, 0x6c, 0x18,
|
||||
0xb8, 0x61, 0xd8, 0x40, 0x10, 0x9b, 0xd3, 0x6d, 0x28, 0x32, 0x0d, 0xc0, 0xbc, 0x2a, 0x6c, 0x6c, 0x76, 0x6d, 0x2e, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x53, 0x82, 0xa0, 0x0a, 0x19, 0x9e, 0x8b, 0x5d,
|
||||
0x99, 0xdc, 0x5c, 0xda, 0x9b, 0xdb, 0x94, 0x80, 0x68, 0x42, 0x86, 0xe7, 0x62, 0x17, 0xc6, 0x66, 0x57, 0x26, 0x37, 0x25, 0x30, 0xea, 0x90, 0xe1, 0xb9, 0xcc, 0xa1, 0x85, 0x91, 0x95, 0xc9, 0x35,
|
||||
0xbd, 0x91, 0x95, 0xb1, 0x4d, 0x09, 0x90, 0x32, 0x64, 0x78, 0x2e, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x53, 0x82, 0xab, 0x0e, 0x19, 0x9e, 0x4b, 0x99, 0x1b, 0x9d, 0x5c, 0x1e,
|
||||
0xd4, 0x5b, 0x9a, 0x1b, 0xdd, 0xdc, 0x94, 0xc0, 0x03, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88,
|
||||
0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce,
|
||||
0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48,
|
||||
0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e,
|
||||
0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b,
|
||||
0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78,
|
||||
0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1,
|
||||
0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39,
|
||||
0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70,
|
||||
0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f,
|
||||
0xf4, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x46, 0x50, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb, 0x26, 0x50,
|
||||
0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x3e, 0xa2, 0xe3, 0x36, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0x84,
|
||||
0x05, 0x48, 0xc3, 0xe5, 0x3b, 0x8f, 0x3f, 0x1d, 0x11, 0x01, 0x0c, 0xe2, 0xe0, 0x23, 0xb7, 0x6d, 0x00, 0x04, 0x03, 0x20, 0x0d, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00,
|
||||
0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x34, 0x8a, 0x6e, 0x06, 0xa0, 0xe4, 0x0a, 0xa6, 0x14, 0x03, 0x4a, 0x37, 0xa0, 0x14, 0x0a, 0xa4, 0x02, 0x09, 0x2a, 0x10,
|
||||
0x10, 0xa1, 0x02, 0x01, 0x01, 0x21, 0xc8, 0x14, 0x50, 0xc1, 0x14, 0x48, 0x41, 0xd0, 0x29, 0x81, 0x22, 0x00, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xd0, 0x7c, 0x8f, 0xc0, 0x71, 0xd5,
|
||||
0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x34, 0x60, 0x00, 0x09, 0x5e, 0x67, 0x8d, 0x18, 0x24, 0x00, 0x08, 0x82, 0x41, 0x13, 0x06, 0xd1, 0xe0, 0x79, 0xd7, 0x88, 0x81, 0x01, 0x80, 0x20, 0x18, 0x10,
|
||||
0x64, 0xf0, 0x7c, 0x15, 0x3c, 0x37, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x4a, 0x19, 0x58, 0x43, 0xc0, 0x95, 0xd0, 0xed, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x28, 0x67, 0x80, 0x15, 0x81, 0x67,
|
||||
0xc2, 0x07, 0x82, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x50, 0xd2, 0x40, 0x3b, 0x02, 0x30, 0x28, 0x03, 0xdb, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x50, 0xd6, 0x80, 0x4b, 0x02, 0x31, 0x18, 0x31,
|
||||
0x40, 0x00, 0x10, 0x04, 0x03, 0x85, 0x0d, 0xba, 0x05, 0x19, 0x83, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x50, 0xda, 0xc0, 0x63, 0x0e, 0x32, 0x18, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0xc5, 0x0d,
|
||||
0xbe, 0xc6, 0x28, 0x83, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x50, 0xde, 0x00, 0x0c, 0x9c, 0xc2, 0x0c, 0x8a, 0x58, 0xa0, 0x08, 0x05, 0x8a, 0x48, 0xa0, 0x08, 0x04, 0x8e, 0x18, 0x70, 0xc4, 0x80,
|
||||
0x23, 0x06, 0x1c, 0x31, 0xa0, 0x10, 0x0f, 0x2f, 0x18, 0x50, 0x45, 0x1c, 0x5c, 0x05, 0xc7, 0x96, 0x61, 0x06, 0x57, 0x42, 0xb0, 0x75, 0xb0, 0xc1, 0x95, 0x10, 0x6c, 0x75, 0x7d, 0x70, 0x23, 0x06,
|
||||
0x0d, 0x00, 0x82, 0x60, 0xf0, 0xfc, 0x81, 0x1b, 0x84, 0x41, 0xd0, 0x07, 0x42, 0x1c, 0xc4, 0x41, 0x1c, 0x8c, 0x41, 0x41, 0x66, 0x80, 0x17, 0x0c, 0xa8, 0x80, 0x0d, 0xb4, 0x82, 0x65, 0x6b, 0x4a,
|
||||
0x03, 0xbc, 0x60, 0x40, 0x05, 0x6f, 0xa0, 0x45, 0x04, 0x5b, 0x16, 0x1b, 0xe0, 0x05, 0x03, 0x2a, 0x90, 0x03, 0x2d, 0x22, 0xd8, 0x6a, 0x54, 0x61, 0x47, 0x0c, 0x1a, 0x00, 0x04, 0xc1, 0xe0, 0x61,
|
||||
0x85, 0x3d, 0x70, 0x83, 0x40, 0x15, 0x04, 0x3f, 0xf0, 0x03, 0x3f, 0x80, 0x03, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
#if 0
|
||||
struct Vec8 { uint16_t4 lo; uint16_t4 hi; };
|
||||
|
||||
cbuffer Cbuf : register(b0)
|
||||
{
|
||||
Vec8 values_root[8];
|
||||
};
|
||||
|
||||
cbuffer Cbuf : register(b0, space1)
|
||||
{
|
||||
Vec8 values_table[8];
|
||||
};
|
||||
|
||||
RWStructuredBuffer<uint> RWBuf : register(u0);
|
||||
|
||||
uint pack4(uint4 v)
|
||||
{
|
||||
return v.x | (v.y << 8) | (v.z << 16) | (v.w << 24);
|
||||
}
|
||||
|
||||
[numthreads(8, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
Vec8 v_root = values_root[thr];
|
||||
Vec8 v_table = values_table[thr];
|
||||
uint4 lo = uint4(v_root.lo) + uint4(v_root.hi);
|
||||
uint4 hi = uint4(v_table.lo) + uint4(v_table.hi);
|
||||
RWBuf[2 * thr + 0] = pack4(lo);
|
||||
RWBuf[2 * thr + 1] = pack4(hi);
|
||||
}
|
||||
#endif
|
||||
/* -Tcs_6_2 test.hlsl -Qstrip_reflect -Qstrip_debug -enable-16bit-types */
|
||||
static const BYTE cs_legacy_uint16_code[] =
|
||||
{
|
||||
0x44, 0x58, 0x42, 0x43, 0x4f, 0xae, 0x04, 0x82, 0xc1, 0xba, 0x0a, 0x10, 0xd2, 0xf7, 0x7f, 0xff, 0x9f, 0x91, 0x46, 0xcc, 0x01, 0x00, 0x00, 0x00, 0xac, 0x07, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x1c, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x90, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x6d, 0xd7, 0xfd, 0xb1, 0xd3, 0x39, 0xdf, 0x73, 0xd3, 0x6e, 0x62, 0xe5, 0xca, 0x4d, 0x8a, 0x44, 0x58, 0x49, 0x4c,
|
||||
0x88, 0x06, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xa2, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x70, 0x06, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde,
|
||||
0x21, 0x0c, 0x00, 0x00, 0x99, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39,
|
||||
0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88,
|
||||
0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06,
|
||||
0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x84, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x6d, 0x30, 0x86, 0xff,
|
||||
0xff, 0xff, 0xff, 0x1f, 0x00, 0x09, 0xa8, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
|
||||
0x36, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c,
|
||||
0x10, 0x74, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x73, 0x04, 0x08, 0x99, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x10, 0x68, 0x86, 0x85, 0x40,
|
||||
0xc1, 0x29, 0x0b, 0x18, 0x68, 0x8c, 0x31, 0xc6, 0x30, 0x83, 0xd2, 0x1c, 0x01, 0x74, 0xd6, 0x70, 0xf9, 0x13, 0xf6, 0x10, 0x92, 0xcf, 0x6d, 0x54, 0xb1, 0x12, 0x93, 0x8f, 0xd4, 0xba, 0xcf, 0x23,
|
||||
0x64, 0xad, 0xb5, 0xd6, 0x5a, 0xab, 0x10, 0x6c, 0xa0, 0x41, 0x6d, 0x8e, 0x20, 0x28, 0x06, 0x1a, 0x66, 0x8c, 0x47, 0x70, 0x20, 0x60, 0x26, 0x33, 0x18, 0x07, 0x76, 0x08, 0x87, 0x79, 0x98, 0x07,
|
||||
0x37, 0x90, 0x85, 0x5b, 0x98, 0x05, 0x7a, 0x90, 0x87, 0x7a, 0x18, 0x07, 0x7a, 0xa8, 0x07, 0x79, 0x28, 0x07, 0x72, 0x10, 0x85, 0x7a, 0x30, 0x07, 0x73, 0x28, 0x07, 0x79, 0xe0, 0x83, 0x7a, 0x70,
|
||||
0x87, 0x79, 0x48, 0x87, 0x73, 0x70, 0x87, 0x72, 0x20, 0x07, 0x30, 0x48, 0x07, 0x77, 0xa0, 0x07, 0x3f, 0x40, 0xc1, 0xa0, 0x39, 0x8c, 0x40, 0x2c, 0xb7, 0x48, 0x53, 0x44, 0x09, 0x93, 0xff, 0x12,
|
||||
0x02, 0x8f, 0x84, 0x5a, 0x13, 0xc1, 0x1e, 0xc1, 0x05, 0x54, 0x81, 0x82, 0x4b, 0xf8, 0x0c, 0x2e, 0xa0, 0x0a, 0x9f, 0x46, 0xc1, 0x25, 0x3d, 0x47, 0x00, 0x0a, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0,
|
||||
0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07,
|
||||
0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90,
|
||||
0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6,
|
||||
0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07,
|
||||
0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90,
|
||||
0x27, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x4f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x43, 0x1e, 0x08, 0x08, 0x80, 0x01, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0x40, 0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x25, 0x50, 0x04, 0xc5,
|
||||
0x30, 0x02, 0x50, 0x18, 0x85, 0x50, 0x80, 0x02, 0x05, 0x41, 0x6f, 0x04, 0x80, 0x78, 0x81, 0x03, 0x02, 0x22, 0x50, 0x9e, 0x01, 0xa0, 0x3d, 0x03, 0x40, 0x74, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x79, 0x18, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99,
|
||||
0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84,
|
||||
0xa1, 0x98, 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x20, 0x26, 0x08, 0xc3, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x03, 0x32, 0x41, 0xa0, 0x22, 0x02, 0x13, 0x84,
|
||||
0x21, 0x99, 0x20, 0x3c, 0xcd, 0x04, 0x61, 0x50, 0x36, 0x08, 0xc3, 0xb3, 0x61, 0x51, 0x16, 0x46, 0x51, 0x86, 0xc6, 0x71, 0x1c, 0x68, 0x43, 0x10, 0x4d, 0x10, 0xb2, 0x67, 0x82, 0x30, 0x2c, 0x1b,
|
||||
0x10, 0x65, 0x62, 0x14, 0x65, 0xa0, 0x80, 0x09, 0xc2, 0x06, 0x6d, 0x40, 0x06, 0x8b, 0x19, 0x94, 0x81, 0x02, 0x36, 0x08, 0xd5, 0xb5, 0x81, 0x00, 0x24, 0x0c, 0x98, 0x20, 0x08, 0x00, 0x89, 0xb6,
|
||||
0xb0, 0x34, 0xb7, 0x09, 0x02, 0xe7, 0x4c, 0x10, 0x06, 0x66, 0xc3, 0xd0, 0x0d, 0xc3, 0x06, 0x42, 0xe1, 0x1e, 0x6f, 0x43, 0xa1, 0x6d, 0x40, 0xf6, 0x55, 0x61, 0x63, 0xb3, 0x6b, 0x73, 0x49, 0x23,
|
||||
0x2b, 0x73, 0xa3, 0x9b, 0x12, 0x04, 0x55, 0xc8, 0xf0, 0x5c, 0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc, 0xa6, 0x04, 0x44, 0x13, 0x32, 0x3c, 0x17, 0xbb, 0x30, 0x36, 0xbb, 0x32, 0xb9, 0x29, 0x81,
|
||||
0x51, 0x87, 0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac, 0x4c, 0xae, 0xe9, 0x8d, 0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3, 0x73, 0x91, 0x2b, 0x9b, 0x7b, 0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b,
|
||||
0x12, 0x60, 0x75, 0xc8, 0xf0, 0x5c, 0xca, 0xdc, 0xe8, 0xe4, 0xf2, 0xa0, 0xde, 0xd2, 0xdc, 0xe8, 0xe6, 0xa6, 0x04, 0x1f, 0x00, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00,
|
||||
0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10,
|
||||
0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03,
|
||||
0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e,
|
||||
0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b,
|
||||
0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90,
|
||||
0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e,
|
||||
0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca,
|
||||
0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82,
|
||||
0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06,
|
||||
0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x36, 0xb0, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10,
|
||||
0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x25, 0x61, 0x00, 0x02, 0xe6, 0x23, 0xb5, 0x6e, 0x04, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x0b, 0x11, 0x01, 0x4c, 0x44, 0x08, 0x34, 0xc3, 0x42, 0x98, 0x00,
|
||||
0x36, 0x5c, 0xbe, 0xf3, 0xf8, 0x11, 0x60, 0x6d, 0x54, 0x51, 0x10, 0x11, 0x3b, 0x39, 0x11, 0xe1, 0x23, 0xb7, 0x6d, 0x01, 0xd2, 0x70, 0xf9, 0xce, 0xe3, 0x4f, 0x47, 0x44, 0x00, 0x83, 0x38, 0xf8,
|
||||
0xc8, 0x6d, 0x1b, 0x00, 0xc1, 0x00, 0x48, 0x03, 0x61, 0x20, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x66, 0x00, 0x4a,
|
||||
0xae, 0xec, 0x0a, 0xa4, 0x60, 0x0a, 0x53, 0xa0, 0x74, 0x03, 0xc8, 0x94, 0x40, 0x11, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0x00, 0x5d, 0x88, 0x30, 0x4d, 0xce, 0x88, 0x41, 0x02, 0x80,
|
||||
0x20, 0x18, 0x40, 0x58, 0x22, 0x5c, 0xd4, 0x33, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x50, 0xa6, 0x0c, 0x55, 0x05, 0x8d, 0x18, 0x18, 0x00, 0x08, 0x82, 0x01, 0xd1, 0x19, 0xd6, 0x88, 0xc1, 0x01,
|
||||
0x80, 0x20, 0x18, 0x34, 0xdc, 0x22, 0x04, 0xa3, 0x09, 0x01, 0x30, 0x9a, 0x20, 0x04, 0xa3, 0x09, 0x83, 0x30, 0x9a, 0x40, 0x0c, 0x47, 0x8c, 0x38, 0x62, 0xc4, 0x11, 0x23, 0x8e, 0x18, 0x31, 0x9a,
|
||||
0x90, 0x10, 0xa3, 0x09, 0x4a, 0x31, 0x9a, 0xb0, 0x18, 0xa3, 0x09, 0xcc, 0x71, 0xc4, 0x88, 0x23, 0x46, 0x1c, 0x31, 0xe2, 0x88, 0x11, 0x46, 0x30, 0x30, 0x30, 0x82, 0x81, 0x81, 0x11, 0x0c, 0x0c,
|
||||
0x8c, 0x60, 0x60, 0x30, 0x62, 0x70, 0x00, 0x20, 0x08, 0x06, 0x4d, 0x1c, 0x80, 0x01, 0x66, 0x8d, 0x26, 0x04, 0xc0, 0x68, 0x82, 0x10, 0x8c, 0x26, 0x0c, 0xc2, 0x68, 0x02, 0x31, 0x1c, 0x31, 0xe2,
|
||||
0x88, 0x11, 0x47, 0x8c, 0x38, 0x62, 0xc4, 0x68, 0x42, 0x42, 0x8c, 0x26, 0x28, 0xc5, 0x68, 0xc2, 0x62, 0x8c, 0x26, 0x30, 0xc7, 0x11, 0x23, 0x8e, 0x18, 0x71, 0xc4, 0x88, 0x23, 0x46, 0x18, 0xc1,
|
||||
0xc0, 0xc0, 0x08, 0x06, 0x06, 0x46, 0x30, 0x30, 0x30, 0x82, 0x81, 0x81, 0x61, 0x7d, 0x70, 0x83, 0x0a, 0xb4, 0xad, 0xcc, 0x0e, 0xae, 0x84, 0x60, 0x4b, 0xbb, 0x83, 0x2b, 0x21, 0xd8, 0x8a, 0x03,
|
||||
0x55, 0xb8, 0x11, 0x03, 0x07, 0x00, 0x41, 0x30, 0x48, 0x5c, 0x21, 0x0f, 0xea, 0x20, 0x40, 0x05, 0xc1, 0x0f, 0xfc, 0xc0, 0x0f, 0xee, 0xc0, 0x14, 0x4c, 0x21, 0x85, 0x1b, 0x54, 0xc0, 0x6c, 0x2d,
|
||||
0x7d, 0x70, 0x25, 0x04, 0x5b, 0x8c, 0x1f, 0x5c, 0x09, 0xc1, 0xd6, 0x11, 0x0b, 0x3b, 0x62, 0xe0, 0x00, 0x20, 0x08, 0x06, 0x49, 0x2d, 0x80, 0x02, 0x1f, 0x04, 0xaf, 0x20, 0x94, 0x42, 0x29, 0x94,
|
||||
0x82, 0x1f, 0xb4, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
static const BYTE cs_modern_uint16_code[] =
|
||||
{
|
||||
0x44, 0x58, 0x42, 0x43, 0x39, 0x52, 0xf1, 0x80, 0x79, 0x9b, 0x59, 0xc4, 0x15, 0xb8, 0x28, 0x20, 0x4a, 0x97, 0x3b, 0x4f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x1c, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x90, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x64, 0xa7, 0x4b, 0xfc, 0x17, 0xd7, 0x69, 0x59, 0x79, 0x99, 0x2b, 0x2f, 0x9d, 0x15, 0xb4, 0x44, 0x58, 0x49, 0x4c,
|
||||
0xdc, 0x06, 0x00, 0x00, 0x62, 0x00, 0x05, 0x00, 0xb7, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x02, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0xc4, 0x06, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde,
|
||||
0x21, 0x0c, 0x00, 0x00, 0xae, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39,
|
||||
0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88,
|
||||
0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06,
|
||||
0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x84, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x6d, 0x30, 0x86, 0xff,
|
||||
0xff, 0xff, 0xff, 0x1f, 0x00, 0x09, 0xa8, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
|
||||
0x30, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c,
|
||||
0x10, 0x70, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x73, 0x04, 0x08, 0x99, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x10, 0x68, 0x86, 0x85, 0x40,
|
||||
0xc1, 0x29, 0x0b, 0x18, 0x68, 0x8c, 0x31, 0xc6, 0x30, 0x83, 0xd2, 0x1c, 0x01, 0x54, 0x8a, 0x35, 0xd0, 0x18, 0xc4, 0xe6, 0x08, 0x82, 0x62, 0xa0, 0x61, 0xc6, 0x70, 0xf4, 0x06, 0x02, 0x66, 0x32,
|
||||
0x83, 0x71, 0x60, 0x87, 0x70, 0x98, 0x87, 0x79, 0x70, 0x03, 0x59, 0xb8, 0x85, 0x59, 0xa0, 0x07, 0x79, 0xa8, 0x87, 0x71, 0xa0, 0x87, 0x7a, 0x90, 0x87, 0x72, 0x20, 0x07, 0x51, 0xa8, 0x07, 0x73,
|
||||
0x30, 0x87, 0x72, 0x90, 0x07, 0x3e, 0xa8, 0x07, 0x77, 0x98, 0x87, 0x74, 0x38, 0x07, 0x77, 0x28, 0x07, 0x72, 0x00, 0x83, 0x74, 0x70, 0x07, 0x7a, 0xf0, 0x03, 0x14, 0x0c, 0x92, 0xc3, 0x08, 0xc4,
|
||||
0x72, 0x8b, 0x34, 0x45, 0x94, 0x30, 0xf9, 0x2f, 0x21, 0xf0, 0x48, 0xa0, 0x34, 0x11, 0xea, 0x11, 0x5c, 0x40, 0x15, 0x28, 0xb0, 0x74, 0xcf, 0xe0, 0x02, 0xaa, 0xf0, 0x69, 0x14, 0x58, 0xca, 0x73,
|
||||
0x04, 0xa0, 0x00, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xae, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d,
|
||||
0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x78, 0xd0, 0x06, 0xe9, 0x10, 0x07,
|
||||
0x76, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x6d, 0x90, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0x60, 0x0e, 0x71, 0x60,
|
||||
0x07, 0x7a, 0x10, 0x07, 0x76, 0xd0, 0x06, 0xe6, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xee, 0x80, 0x07, 0x7a,
|
||||
0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x7a, 0x60, 0x07, 0x74, 0x30, 0xe4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0xc8, 0x43, 0x00, 0x01, 0x10, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x90, 0x27, 0x01, 0x02, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x21, 0x0f, 0x03, 0x04, 0xc0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x43, 0x9e, 0x07, 0x08, 0x80, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb2, 0x40, 0x0d, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47,
|
||||
0xc6, 0x04, 0x43, 0x1a, 0x25, 0x50, 0x04, 0xc5, 0x30, 0x02, 0x50, 0x18, 0x85, 0x50, 0x80, 0x02, 0x05, 0x41, 0x6e, 0x04, 0x80, 0x76, 0x81, 0x03, 0x02, 0x22, 0x10, 0x9e, 0x01, 0x20, 0x3d, 0x03,
|
||||
0x40, 0x73, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90, 0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1,
|
||||
0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62, 0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73,
|
||||
0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xa1, 0x98, 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x20, 0x26, 0x08, 0xc3, 0xb1, 0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08,
|
||||
0x03, 0x32, 0x41, 0x98, 0x22, 0x02, 0x13, 0x84, 0x21, 0x99, 0x20, 0x38, 0xcd, 0x04, 0x61, 0x50, 0x36, 0x08, 0xc3, 0xb3, 0x61, 0x51, 0x16, 0x46, 0x51, 0x86, 0xc6, 0x71, 0x1c, 0x68, 0x43, 0x10,
|
||||
0x4d, 0x10, 0xb0, 0x67, 0x82, 0x30, 0x2c, 0x1b, 0x10, 0x65, 0x62, 0x14, 0x65, 0xa0, 0x80, 0x09, 0x82, 0x06, 0x6d, 0x40, 0x06, 0x8b, 0x19, 0x94, 0x81, 0x02, 0x36, 0x08, 0xd5, 0xb5, 0x81, 0x00,
|
||||
0x24, 0x0c, 0x98, 0x20, 0x08, 0x00, 0x89, 0xb6, 0xb0, 0x34, 0xb7, 0x09, 0xc2, 0xe6, 0x4c, 0x10, 0x06, 0x66, 0xc3, 0xd0, 0x0d, 0xc3, 0x06, 0x42, 0xe1, 0x1e, 0x6f, 0x43, 0xa1, 0x6d, 0x40, 0xf6,
|
||||
0x55, 0x61, 0x63, 0xb3, 0x6b, 0x73, 0x49, 0x23, 0x2b, 0x73, 0xa3, 0x9b, 0x12, 0x04, 0x55, 0xc8, 0xf0, 0x5c, 0xec, 0xca, 0xe4, 0xe6, 0xd2, 0xde, 0xdc, 0xa6, 0x04, 0x44, 0x13, 0x32, 0x3c, 0x17,
|
||||
0xbb, 0x30, 0x36, 0xbb, 0x32, 0xb9, 0x29, 0x81, 0x51, 0x87, 0x0c, 0xcf, 0x65, 0x0e, 0x2d, 0x8c, 0xac, 0x4c, 0xae, 0xe9, 0x8d, 0xac, 0x8c, 0x6d, 0x4a, 0x80, 0x94, 0x21, 0xc3, 0x73, 0x91, 0x2b,
|
||||
0x9b, 0x7b, 0xab, 0x93, 0x1b, 0x2b, 0x9b, 0x9b, 0x12, 0x60, 0x75, 0xc8, 0xf0, 0x5c, 0xca, 0xdc, 0xe8, 0xe4, 0xf2, 0xa0, 0xde, 0xd2, 0xdc, 0xe8, 0xe6, 0xa6, 0x04, 0x1f, 0x00, 0x00, 0x00, 0x00,
|
||||
0x79, 0x18, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3, 0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73,
|
||||
0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30, 0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b,
|
||||
0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07, 0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20,
|
||||
0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d, 0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61,
|
||||
0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76, 0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87,
|
||||
0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87, 0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98,
|
||||
0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61,
|
||||
0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8, 0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b,
|
||||
0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87, 0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8,
|
||||
0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x00, 0x00, 0x00, 0x71, 0x20, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
|
||||
0x36, 0x50, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x3e, 0x52, 0xeb, 0x46, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33,
|
||||
0x2c, 0x84, 0x09, 0x60, 0xc3, 0xe5, 0x3b, 0x8f, 0x1f, 0x01, 0xd6, 0x46, 0x15, 0x05, 0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb, 0x16, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xfe, 0x74, 0x44, 0x04,
|
||||
0x30, 0x88, 0x83, 0x8f, 0xdc, 0xb6, 0x01, 0x10, 0x0c, 0x80, 0x34, 0x00, 0x61, 0x20, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x34, 0x8a, 0x6e, 0x06, 0xa0, 0xe4, 0x0a, 0xa4, 0x60, 0x0a, 0x53, 0xa0, 0x74, 0x03, 0xc8, 0x94, 0x40, 0x11, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00, 0x82, 0x60, 0xf0, 0x5c, 0x87, 0x30, 0x4d, 0xce,
|
||||
0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x3c, 0x18, 0x22, 0x5c, 0xd4, 0x33, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x4f, 0x96, 0x0c, 0x55, 0x05, 0x8d, 0x18, 0x18, 0x00, 0x08, 0x82, 0x01, 0xd1, 0x19,
|
||||
0x56, 0x05, 0xd5, 0x8d, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xd3, 0x39, 0x43, 0x40, 0x95, 0x70, 0xed, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x30, 0x1f, 0x54, 0x04, 0x96, 0x09, 0x19, 0x08, 0x46,
|
||||
0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x09, 0x03, 0xe9, 0x08, 0xb0, 0x32, 0xb8, 0x1d, 0x31, 0x40, 0x00, 0x10, 0x04, 0x03, 0x66, 0x0c, 0xa8, 0x24, 0xd0, 0xee, 0x18, 0x71, 0xc6, 0x88, 0x2b, 0x46,
|
||||
0x1c, 0x31, 0xa2, 0x98, 0x6f, 0x47, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x80, 0x49, 0x03, 0xed, 0x09, 0xc0, 0xa0, 0x90, 0x31, 0x80, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x60, 0xd6, 0x80, 0x8b, 0x02,
|
||||
0x31, 0x28, 0x65, 0x0c, 0x60, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x98, 0x36, 0xf0, 0xa6, 0x80, 0x0c, 0xae, 0x18, 0x71, 0xc4, 0x88, 0x1b, 0x46, 0xd8, 0xd0, 0xc0, 0xc0, 0x96, 0x06, 0x06, 0x46,
|
||||
0x34, 0x30, 0x30, 0xa2, 0x81, 0xc1, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x30, 0x75, 0x60, 0x06, 0x5c, 0xc6, 0x06, 0x23, 0x06, 0x08, 0x00, 0x82, 0x60, 0xc0, 0xd8, 0xc1, 0x19, 0x74, 0x58, 0x1b,
|
||||
0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0x73, 0x07, 0x68, 0xe0, 0x5d, 0x6e, 0x30, 0x62, 0x80, 0x00, 0x20, 0x08, 0x06, 0x0c, 0x1e, 0xa4, 0xc1, 0x67, 0xbd, 0xc1, 0x11, 0x23, 0x8e, 0x18, 0x71,
|
||||
0xc4, 0x88, 0x23, 0x46, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xd3, 0x07, 0x6e, 0x40, 0x06, 0x15, 0x1d, 0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xe3, 0x07, 0x6f, 0x50, 0x06, 0x54, 0x1d,
|
||||
0x8c, 0x18, 0x20, 0x00, 0x08, 0x82, 0x01, 0xf3, 0x07, 0x70, 0x60, 0x06, 0x93, 0x1d, 0xdc, 0x30, 0xe2, 0x86, 0x11, 0x37, 0x8c, 0xb0, 0x41, 0x81, 0x81, 0x21, 0x0a, 0x0c, 0x8c, 0x50, 0x60, 0x60,
|
||||
0x84, 0x02, 0x03, 0xab, 0xfc, 0xe0, 0x06, 0x15, 0x5c, 0x5b, 0xd6, 0x1d, 0x5c, 0x09, 0xc1, 0xd6, 0x85, 0x07, 0x57, 0x42, 0xb0, 0x25, 0x07, 0xab, 0x70, 0x23, 0x06, 0x0e, 0x00, 0x82, 0x60, 0x90,
|
||||
0xbc, 0x82, 0x1e, 0xd8, 0x41, 0x90, 0x0a, 0x82, 0x1f, 0xf8, 0x81, 0x1f, 0xe0, 0xc1, 0x29, 0x98, 0x52, 0x0a, 0x37, 0xa8, 0x80, 0xd9, 0x5a, 0xfc, 0xe0, 0x4a, 0x08, 0xb6, 0x98, 0x3f, 0xb8, 0x12,
|
||||
0x82, 0xad, 0x43, 0x16, 0x76, 0xc4, 0xc0, 0x01, 0x40, 0x10, 0x0c, 0x12, 0x5b, 0x08, 0x85, 0x3e, 0x08, 0x60, 0x41, 0x28, 0x85, 0x52, 0x28, 0x85, 0x3f, 0x70, 0x05, 0x04, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
#if 0
|
||||
cbuffer Cbuf : register(b0)
|
||||
{
|
||||
uint4 values_root[8];
|
||||
};
|
||||
|
||||
cbuffer Cbuf : register(b0, space1)
|
||||
{
|
||||
uint4 values_table[8];
|
||||
};
|
||||
|
||||
RWStructuredBuffer<uint> RWBuf : register(u0);
|
||||
|
||||
uint pack4(uint4 v)
|
||||
{
|
||||
return v.x | (v.y << 8) | (v.z << 16) | (v.w << 24);
|
||||
}
|
||||
|
||||
[numthreads(8, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
uint4 lo = values_root[thr];
|
||||
uint4 hi = values_table[thr];
|
||||
RWBuf[2 * thr + 0] = pack4(lo);
|
||||
RWBuf[2 * thr + 1] = pack4(hi);
|
||||
}
|
||||
#endif
|
||||
static const BYTE cs_modern_uint32_code[] =
|
||||
{
|
||||
0x44, 0x58, 0x42, 0x43, 0xb4, 0xcf, 0x15, 0xbf, 0x17, 0x57, 0xac, 0x4d, 0x2e, 0xda, 0x48, 0x7e, 0xf8, 0x0a, 0x89, 0x23, 0x01, 0x00, 0x00, 0x00, 0x40, 0x07, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x38, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x58, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x1c, 0x01, 0x00, 0x00, 0x53, 0x46, 0x49, 0x30, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x49, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x31, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x50, 0x53, 0x56, 0x30, 0x90, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x48, 0x41, 0x53, 0x48, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2b, 0x6b, 0x32, 0xc2, 0xa4, 0xa0, 0xc4, 0xd4, 0xdb, 0xee, 0xb1, 0x4d, 0xa5, 0x0d, 0x3b, 0x44, 0x58, 0x49, 0x4c,
|
||||
0x1c, 0x06, 0x00, 0x00, 0x60, 0x00, 0x05, 0x00, 0x87, 0x01, 0x00, 0x00, 0x44, 0x58, 0x49, 0x4c, 0x00, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x04, 0x06, 0x00, 0x00, 0x42, 0x43, 0xc0, 0xde,
|
||||
0x21, 0x0c, 0x00, 0x00, 0x7e, 0x01, 0x00, 0x00, 0x0b, 0x82, 0x20, 0x00, 0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x07, 0x81, 0x23, 0x91, 0x41, 0xc8, 0x04, 0x49, 0x06, 0x10, 0x32, 0x39,
|
||||
0x92, 0x01, 0x84, 0x0c, 0x25, 0x05, 0x08, 0x19, 0x1e, 0x04, 0x8b, 0x62, 0x80, 0x14, 0x45, 0x02, 0x42, 0x92, 0x0b, 0x42, 0xa4, 0x10, 0x32, 0x14, 0x38, 0x08, 0x18, 0x4b, 0x0a, 0x32, 0x52, 0x88,
|
||||
0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xa5, 0x00, 0x19, 0x32, 0x42, 0xe4, 0x48, 0x0e, 0x90, 0x91, 0x22, 0xc4, 0x50, 0x41, 0x51, 0x81, 0x8c, 0xe1, 0x83, 0xe5, 0x8a, 0x04, 0x29, 0x46, 0x06,
|
||||
0x51, 0x18, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1b, 0x8c, 0xe0, 0xff, 0xff, 0xff, 0xff, 0x07, 0x40, 0x02, 0xa8, 0x0d, 0x86, 0xf0, 0xff, 0xff, 0xff, 0xff, 0x03, 0x20, 0x01, 0xd5, 0x06, 0x62,
|
||||
0xf8, 0xff, 0xff, 0xff, 0xff, 0x01, 0x90, 0x00, 0x49, 0x18, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x13, 0x82, 0x60, 0x42, 0x20, 0x4c, 0x08, 0x06, 0x00, 0x00, 0x00, 0x00, 0x89, 0x20, 0x00, 0x00,
|
||||
0x2c, 0x00, 0x00, 0x00, 0x32, 0x22, 0x48, 0x09, 0x20, 0x64, 0x85, 0x04, 0x93, 0x22, 0xa4, 0x84, 0x04, 0x93, 0x22, 0xe3, 0x84, 0xa1, 0x90, 0x14, 0x12, 0x4c, 0x8a, 0x8c, 0x0b, 0x84, 0xa4, 0x4c,
|
||||
0x10, 0x68, 0x23, 0x00, 0x25, 0x00, 0x14, 0xe6, 0x08, 0xc0, 0xa0, 0x0c, 0x63, 0x0c, 0x22, 0x73, 0x04, 0x08, 0x99, 0x7b, 0x86, 0xcb, 0x9f, 0xb0, 0x87, 0x90, 0xfc, 0x10, 0x68, 0x86, 0x85, 0x40,
|
||||
0xc1, 0x29, 0xc5, 0x18, 0x68, 0x0c, 0x4a, 0x73, 0x04, 0x41, 0x31, 0xd0, 0x30, 0x63, 0x2c, 0x62, 0x45, 0x01, 0x03, 0x8d, 0x31, 0xc6, 0x18, 0x86, 0xdc, 0x40, 0xc0, 0x4c, 0x66, 0x30, 0x0e, 0xec,
|
||||
0x10, 0x0e, 0xf3, 0x30, 0x0f, 0x6e, 0x20, 0x0b, 0xb7, 0x30, 0x0b, 0xf4, 0x20, 0x0f, 0xf5, 0x30, 0x0e, 0xf4, 0x50, 0x0f, 0xf2, 0x50, 0x0e, 0xe4, 0x20, 0x0a, 0xf5, 0x60, 0x0e, 0xe6, 0x50, 0x0e,
|
||||
0xf2, 0xc0, 0x07, 0xf5, 0xe0, 0x0e, 0xf3, 0x90, 0x0e, 0xe7, 0xe0, 0x0e, 0xe5, 0x40, 0x0e, 0x60, 0x90, 0x0e, 0xee, 0x40, 0x0f, 0x7e, 0x80, 0x82, 0x41, 0x71, 0x18, 0x81, 0x18, 0x12, 0x61, 0x1e,
|
||||
0xc1, 0x05, 0x54, 0x81, 0x02, 0x4a, 0xf5, 0x0c, 0x2e, 0xa0, 0x0a, 0x9f, 0x46, 0x01, 0xa5, 0x3b, 0x47, 0x00, 0x0a, 0x00, 0x13, 0x14, 0x72, 0xc0, 0x87, 0x74, 0x60, 0x87, 0x36, 0x68, 0x87, 0x79,
|
||||
0x68, 0x03, 0x72, 0xc0, 0x87, 0x0d, 0xaf, 0x50, 0x0e, 0x6d, 0xd0, 0x0e, 0x7a, 0x50, 0x0e, 0x6d, 0x00, 0x0f, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0xa0,
|
||||
0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x78, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x90, 0x0e, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xd0, 0x06, 0xe9, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x73,
|
||||
0x20, 0x07, 0x6d, 0x90, 0x0e, 0x76, 0x40, 0x07, 0x7a, 0x60, 0x07, 0x74, 0xd0, 0x06, 0xe6, 0x10, 0x07, 0x76, 0xa0, 0x07, 0x73, 0x20, 0x07, 0x6d, 0x60, 0x0e, 0x73, 0x20, 0x07, 0x7a, 0x30, 0x07,
|
||||
0x72, 0xd0, 0x06, 0xe6, 0x60, 0x07, 0x74, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x6d, 0xe0, 0x0e, 0x78, 0xa0, 0x07, 0x71, 0x60, 0x07, 0x7a, 0x30, 0x07, 0x72, 0xa0, 0x07, 0x76, 0x40, 0x07, 0x43, 0x9e,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0x3c, 0x04, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x79, 0x12, 0x20, 0x00, 0x04, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0xf2, 0x30, 0x40, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0xe4, 0x71, 0x80, 0x00, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20,
|
||||
0x0b, 0x04, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x32, 0x1e, 0x98, 0x14, 0x19, 0x11, 0x4c, 0x90, 0x8c, 0x09, 0x26, 0x47, 0xc6, 0x04, 0x43, 0x1a, 0x25, 0x30, 0x02, 0x50, 0x0c, 0x85, 0x51, 0x08,
|
||||
0x05, 0x28, 0x50, 0x10, 0xb4, 0x46, 0x00, 0x28, 0x17, 0x08, 0xd9, 0x19, 0x00, 0xc2, 0x33, 0x00, 0x24, 0x67, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x1a, 0x03, 0x4c, 0x90,
|
||||
0x46, 0x02, 0x13, 0x44, 0x35, 0x18, 0x63, 0x0b, 0x73, 0x3b, 0x03, 0xb1, 0x2b, 0x93, 0x9b, 0x4b, 0x7b, 0x73, 0x03, 0x99, 0x71, 0xb9, 0x01, 0x41, 0xa1, 0x0b, 0x3b, 0x9b, 0x7b, 0x91, 0x2a, 0x62,
|
||||
0x2a, 0x0a, 0x9a, 0x2a, 0xfa, 0x9a, 0xb9, 0x81, 0x79, 0x31, 0x4b, 0x73, 0x0b, 0x63, 0x4b, 0xd9, 0x10, 0x04, 0x13, 0x84, 0xa1, 0x98, 0x20, 0x0c, 0xc6, 0x06, 0x61, 0x20, 0x26, 0x08, 0xc3, 0xb1,
|
||||
0x41, 0x18, 0x0c, 0x0a, 0x63, 0x73, 0x1b, 0x06, 0xc4, 0x20, 0x26, 0x08, 0x12, 0x44, 0x60, 0x82, 0x30, 0x20, 0x13, 0x84, 0x85, 0x99, 0x20, 0x0c, 0xc9, 0x06, 0x61, 0x70, 0x36, 0x2c, 0x84, 0xb2,
|
||||
0x10, 0xc4, 0xc0, 0x34, 0x4d, 0xf3, 0x6c, 0x08, 0xa0, 0x09, 0x82, 0xe5, 0x4c, 0x10, 0x06, 0x65, 0x03, 0x42, 0x48, 0x0b, 0x41, 0x0c, 0x13, 0x30, 0x41, 0xc0, 0x9e, 0x0d, 0xc8, 0x50, 0x2d, 0x03,
|
||||
0x31, 0x4c, 0xc0, 0x06, 0x81, 0xb2, 0x36, 0x10, 0x40, 0x74, 0x01, 0x13, 0x04, 0x01, 0x20, 0xd1, 0x16, 0x96, 0xe6, 0x36, 0x41, 0xc8, 0x9a, 0x09, 0xc2, 0xb0, 0x6c, 0x18, 0xb8, 0x61, 0xd8, 0x40,
|
||||
0x10, 0x9b, 0xd3, 0x6d, 0x28, 0x32, 0x0d, 0xc0, 0xbc, 0x2a, 0x6c, 0x6c, 0x76, 0x6d, 0x2e, 0x69, 0x64, 0x65, 0x6e, 0x74, 0x53, 0x82, 0xa0, 0x0a, 0x19, 0x9e, 0x8b, 0x5d, 0x99, 0xdc, 0x5c, 0xda,
|
||||
0x9b, 0xdb, 0x94, 0x80, 0x68, 0x42, 0x86, 0xe7, 0x62, 0x17, 0xc6, 0x66, 0x57, 0x26, 0x37, 0x25, 0x30, 0xea, 0x90, 0xe1, 0xb9, 0xcc, 0xa1, 0x85, 0x91, 0x95, 0xc9, 0x35, 0xbd, 0x91, 0x95, 0xb1,
|
||||
0x4d, 0x09, 0x90, 0x32, 0x64, 0x78, 0x2e, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x73, 0x53, 0x82, 0xab, 0x0e, 0x19, 0x9e, 0x4b, 0x99, 0x1b, 0x9d, 0x5c, 0x1e, 0xd4, 0x5b, 0x9a, 0x1b,
|
||||
0xdd, 0xdc, 0x94, 0xc0, 0x03, 0x00, 0x00, 0x00, 0x79, 0x18, 0x00, 0x00, 0x4c, 0x00, 0x00, 0x00, 0x33, 0x08, 0x80, 0x1c, 0xc4, 0xe1, 0x1c, 0x66, 0x14, 0x01, 0x3d, 0x88, 0x43, 0x38, 0x84, 0xc3,
|
||||
0x8c, 0x42, 0x80, 0x07, 0x79, 0x78, 0x07, 0x73, 0x98, 0x71, 0x0c, 0xe6, 0x00, 0x0f, 0xed, 0x10, 0x0e, 0xf4, 0x80, 0x0e, 0x33, 0x0c, 0x42, 0x1e, 0xc2, 0xc1, 0x1d, 0xce, 0xa1, 0x1c, 0x66, 0x30,
|
||||
0x05, 0x3d, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1b, 0xcc, 0x03, 0x3d, 0xc8, 0x43, 0x3d, 0x8c, 0x03, 0x3d, 0xcc, 0x78, 0x8c, 0x74, 0x70, 0x07, 0x7b, 0x08, 0x07, 0x79, 0x48, 0x87, 0x70, 0x70, 0x07,
|
||||
0x7a, 0x70, 0x03, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, 0x19, 0xcc, 0x11, 0x0e, 0xec, 0x90, 0x0e, 0xe1, 0x30, 0x0f, 0x6e, 0x30, 0x0f, 0xe3, 0xf0, 0x0e, 0xf0, 0x50, 0x0e, 0x33, 0x10, 0xc4, 0x1d,
|
||||
0xde, 0x21, 0x1c, 0xd8, 0x21, 0x1d, 0xc2, 0x61, 0x1e, 0x66, 0x30, 0x89, 0x3b, 0xbc, 0x83, 0x3b, 0xd0, 0x43, 0x39, 0xb4, 0x03, 0x3c, 0xbc, 0x83, 0x3c, 0x84, 0x03, 0x3b, 0xcc, 0xf0, 0x14, 0x76,
|
||||
0x60, 0x07, 0x7b, 0x68, 0x07, 0x37, 0x68, 0x87, 0x72, 0x68, 0x07, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, 0x60, 0x07, 0x76, 0x28, 0x07, 0x76, 0xf8, 0x05, 0x76, 0x78, 0x87, 0x77, 0x80, 0x87,
|
||||
0x5f, 0x08, 0x87, 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, 0x81, 0x2c, 0xee, 0xf0, 0x0e, 0xee, 0xe0, 0x0e, 0xf5, 0xc0, 0x0e, 0xec, 0x30, 0x03, 0x62, 0xc8, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c,
|
||||
0xcc, 0xa1, 0x1c, 0xe4, 0xa1, 0x1c, 0xdc, 0x61, 0x1c, 0xca, 0x21, 0x1c, 0xc4, 0x81, 0x1d, 0xca, 0x61, 0x06, 0xd6, 0x90, 0x43, 0x39, 0xc8, 0x43, 0x39, 0x98, 0x43, 0x39, 0xc8, 0x43, 0x39, 0xb8,
|
||||
0xc3, 0x38, 0x94, 0x43, 0x38, 0x88, 0x03, 0x3b, 0x94, 0xc3, 0x2f, 0xbc, 0x83, 0x3c, 0xfc, 0x82, 0x3b, 0xd4, 0x03, 0x3b, 0xb0, 0xc3, 0x0c, 0xc4, 0x21, 0x07, 0x7c, 0x70, 0x03, 0x7a, 0x28, 0x87,
|
||||
0x76, 0x80, 0x87, 0x19, 0xd1, 0x43, 0x0e, 0xf8, 0xe0, 0x06, 0xe4, 0x20, 0x0e, 0xe7, 0xe0, 0x06, 0xf6, 0x10, 0x0e, 0xf2, 0xc0, 0x0e, 0xe1, 0x90, 0x0f, 0xef, 0x50, 0x0f, 0xf4, 0x00, 0x00, 0x00,
|
||||
0x71, 0x20, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x46, 0x50, 0x0d, 0x97, 0xef, 0x3c, 0x7e, 0x40, 0x15, 0x05, 0x11, 0xb1, 0x93, 0x13, 0x11, 0x3e, 0x72, 0xdb, 0x26, 0x50, 0x0d, 0x97, 0xef, 0x3c,
|
||||
0xbe, 0x10, 0x50, 0x45, 0x41, 0x44, 0xa5, 0x03, 0x0c, 0x3e, 0x72, 0xdb, 0x36, 0x20, 0x0d, 0x97, 0xef, 0x3c, 0xbe, 0x10, 0x11, 0xc0, 0x44, 0x84, 0x40, 0x33, 0x2c, 0x84, 0x05, 0x48, 0xc3, 0xe5,
|
||||
0x3b, 0x8f, 0x3f, 0x1d, 0x11, 0x01, 0x0c, 0xe2, 0xe0, 0x23, 0xb7, 0x6d, 0x00, 0x04, 0x03, 0x20, 0x0d, 0x00, 0x00, 0x00, 0x61, 0x20, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x13, 0x04, 0x41, 0x2c,
|
||||
0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x8a, 0x6e, 0x06, 0xa0, 0xe4, 0x0a, 0xa4, 0x60, 0x4a, 0x31, 0xa0, 0x74, 0x03, 0xc8, 0x94, 0x40, 0x11, 0x00, 0x00, 0x23, 0x06, 0x09, 0x00,
|
||||
0x82, 0x60, 0xc0, 0x5c, 0x87, 0x40, 0x51, 0xce, 0x88, 0x41, 0x02, 0x80, 0x20, 0x18, 0x30, 0x18, 0x22, 0x58, 0xd5, 0x33, 0x62, 0x90, 0x00, 0x20, 0x08, 0x06, 0x4c, 0x96, 0x0c, 0x96, 0x05, 0x8d,
|
||||
0x18, 0x18, 0x00, 0x08, 0x82, 0x01, 0xc1, 0x19, 0x57, 0x05, 0xd5, 0x8d, 0x18, 0x20, 0x00, 0x08, 0x82, 0x41, 0xd2, 0x39, 0x43, 0x40, 0x95, 0x70, 0xed, 0x88, 0x01, 0x02, 0x80, 0x20, 0x18, 0x24,
|
||||
0x1f, 0x54, 0x04, 0x96, 0x09, 0x19, 0x08, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x20, 0x09, 0x03, 0xe9, 0x08, 0xb0, 0x32, 0xb8, 0x1d, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0x64, 0x0c, 0xa8, 0x24,
|
||||
0xd0, 0x46, 0x0c, 0x10, 0x00, 0x04, 0xc1, 0x20, 0x21, 0x83, 0x6a, 0x41, 0xb6, 0x11, 0x03, 0x04, 0x00, 0x41, 0x30, 0x48, 0xca, 0xc0, 0x62, 0x0e, 0x6e, 0xc4, 0x00, 0x01, 0x40, 0x10, 0x0c, 0x12,
|
||||
0x33, 0xb8, 0x1a, 0xa3, 0x1b, 0x31, 0x40, 0x00, 0x10, 0x04, 0x83, 0xe4, 0x0c, 0x30, 0xa7, 0xf0, 0x2a, 0xf9, 0xae, 0x02, 0x66, 0x2b, 0xc1, 0xae, 0x84, 0x60, 0x2b, 0xc9, 0xae, 0x84, 0x60, 0x6b,
|
||||
0x5a, 0x83, 0x1b, 0x31, 0x68, 0x00, 0x10, 0x04, 0x03, 0xa7, 0x0d, 0xb6, 0x2b, 0x58, 0x03, 0xe1, 0xfb, 0xbe, 0xac, 0x14, 0x33, 0xb8, 0x0a, 0x98, 0xad, 0xe5, 0xbb, 0x12, 0x82, 0x2d, 0x06, 0x0c,
|
||||
0xae, 0x84, 0x60, 0xeb, 0x90, 0x83, 0x1d, 0x31, 0x68, 0x00, 0x10, 0x04, 0x03, 0x87, 0x0e, 0xc4, 0xc0, 0x0b, 0xe4, 0x40, 0x30, 0x03, 0x33, 0x30, 0x03, 0x30, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
};
|
||||
|
||||
static const D3D12_SHADER_BYTECODE cs_legacy_uint64 = SHADER_BYTECODE(cs_legacy_uint64_code);
|
||||
static const D3D12_SHADER_BYTECODE cs_modern_uint64 = SHADER_BYTECODE(cs_modern_uint64_code);
|
||||
static const D3D12_SHADER_BYTECODE cs_legacy_uint16 = SHADER_BYTECODE(cs_legacy_uint16_code);
|
||||
/* DXC miscompiles the stride for elements for 16-bit. It thinks elements are 32-bit, but loads 16-bit ;_; */
|
||||
static const D3D12_SHADER_BYTECODE cs_modern_uint16 = SHADER_BYTECODE(cs_modern_uint16_code);
|
||||
static const D3D12_SHADER_BYTECODE cs_modern_uint32 = SHADER_BYTECODE(cs_modern_uint32_code);
|
||||
|
||||
struct test
|
||||
{
|
||||
const D3D12_SHADER_BYTECODE *cs;
|
||||
bool requires_16bit;
|
||||
bool requires_64bit;
|
||||
uint32_t reference[16];
|
||||
};
|
||||
|
||||
static const struct test tests[] =
|
||||
{
|
||||
{ &cs_legacy_uint64, false, true, {
|
||||
0xc080400, 0xe0a0602, 0x1c181410, 0x1e1a1612,
|
||||
0x2c282420, 0x2e2a2622, 0x3c383430, 0x3e3a3632,
|
||||
0x4c484440, 0x4e4a4642, 0x5c585450, 0x5e5a5652,
|
||||
0x6c686460, 0x6e6a6662, 0x7c787470, 0x7e7a7672 }},
|
||||
{ &cs_modern_uint64, false, true, {
|
||||
0xc080400, 0xe0a0602, 0x1c181410, 0x1e1a1612,
|
||||
0x2c282420, 0x2e2a2622, 0x3c383430, 0x3e3a3632,
|
||||
0x4c484440, 0x4e4a4642, 0x5c585450, 0x5e5a5652,
|
||||
0x6c686460, 0x6e6a6662, 0x7c787470, 0x7e7a7672 }},
|
||||
{ &cs_legacy_uint16, true, false, {
|
||||
0x40002, 0x40002, 0xc000a, 0xc000a,
|
||||
0x140012, 0x140012, 0x1c001a, 0x1c001a,
|
||||
0x240022, 0x240022, 0x2c002a, 0x2c002a,
|
||||
0x340032, 0x340032, 0x3c003a, 0x3c003a, }},
|
||||
{ &cs_modern_uint16, true, false, {
|
||||
0x08060402, 0x08060402, 0x100e0c0a, 0x100e0c0a,
|
||||
0x18161412, 0x18161412, 0x201e1c1a, 0x201e1c1a,
|
||||
0x28262422, 0x28262422, 0x302e2c2a, 0x302e2c2a,
|
||||
0x38363432, 0x38363432, 0x403e3c3a, 0x403e3c3a, }},
|
||||
{ &cs_modern_uint32, false, false, {
|
||||
0x03020100, 0x03020100, 0x07060504, 0x07060504,
|
||||
0x0b0a0908, 0x0b0a0908, 0x0f0e0d0c, 0x0f0e0d0c,
|
||||
0x13121110, 0x13121110, 0x17161514, 0x17161514,
|
||||
0x1b1a1918, 0x1b1a1918, 0x1f1e1d1c, 0x1f1e1d1c, }},
|
||||
};
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
if (!context_supports_dxil(&context))
|
||||
{
|
||||
destroy_test_context(&context);
|
||||
skip("DXIL not supported.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(input_buffer); i++)
|
||||
input_buffer[i] = i;
|
||||
|
||||
memset(&rs_desc, 0, sizeof(rs_desc));
|
||||
memset(root_parameters, 0, sizeof(root_parameters));
|
||||
memset(range, 0, sizeof(range));
|
||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
|
||||
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
||||
root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
root_parameters[2].DescriptorTable.NumDescriptorRanges = ARRAY_SIZE(range);
|
||||
root_parameters[2].DescriptorTable.pDescriptorRanges = range;
|
||||
range[0].NumDescriptors = 1;
|
||||
range[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
||||
range[0].RegisterSpace = 1;
|
||||
rs_desc.NumParameters = ARRAY_SIZE(root_parameters);
|
||||
rs_desc.pParameters = root_parameters;
|
||||
|
||||
create_root_signature(context.device, &rs_desc, &context.root_signature);
|
||||
|
||||
support_16bit =
|
||||
SUCCEEDED(ID3D12Device_CheckFeatureSupport(context.device, D3D12_FEATURE_D3D12_OPTIONS4,
|
||||
&features4, sizeof(features4))) &&
|
||||
features4.Native16BitShaderOpsSupported;
|
||||
|
||||
support_64bit =
|
||||
SUCCEEDED(ID3D12Device_CheckFeatureSupport(context.device, D3D12_FEATURE_D3D12_OPTIONS1,
|
||||
&features1, sizeof(features1))) &&
|
||||
features1.Int64ShaderOps;
|
||||
|
||||
heap = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1);
|
||||
cbv_buffer = create_upload_buffer(context.device, sizeof(input_buffer), input_buffer);
|
||||
uav_buffer = create_default_buffer(context.device, 256, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
cbv_desc.SizeInBytes = sizeof(input_buffer);
|
||||
cbv_desc.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(cbv_buffer);
|
||||
ID3D12Device_CreateConstantBufferView(context.device, &cbv_desc,
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(heap));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++)
|
||||
{
|
||||
vkd3d_test_set_context("Test %u", i);
|
||||
|
||||
if (tests[i].requires_16bit && !support_16bit)
|
||||
{
|
||||
skip("Test requires 16-bit, but not supported.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
if (tests[i].requires_64bit && !support_64bit)
|
||||
{
|
||||
skip("Test requires 64-bit, but not supported.\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
pso = create_compute_pipeline_state(context.device, context.root_signature, *tests[i].cs);
|
||||
ok(!!pso, "Failed to create PSO.\n");
|
||||
if (!pso)
|
||||
continue;
|
||||
|
||||
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &heap);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, pso);
|
||||
ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 0,
|
||||
ID3D12Resource_GetGPUVirtualAddress(uav_buffer));
|
||||
ID3D12GraphicsCommandList_SetComputeRootConstantBufferView(context.list, 1,
|
||||
ID3D12Resource_GetGPUVirtualAddress(cbv_buffer));
|
||||
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 2,
|
||||
ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(heap));
|
||||
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
|
||||
|
||||
transition_resource_state(context.list, uav_buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
||||
D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
get_buffer_readback_with_command_list(uav_buffer, DXGI_FORMAT_R32_UINT, &rb, context.queue, context.list);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
transition_resource_state(context.list, uav_buffer, D3D12_RESOURCE_STATE_COPY_SOURCE,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
ID3D12PipelineState_Release(pso);
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(tests[i].reference); j++)
|
||||
{
|
||||
uint32_t ref = tests[i].reference[j];
|
||||
uint32_t v = get_readback_uint(&rb, j, 0, 0);
|
||||
ok(v == ref, "Value %u: #%x != #%x\n", j, v, ref);
|
||||
}
|
||||
release_resource_readback(&rb);
|
||||
}
|
||||
vkd3d_test_set_context(NULL);
|
||||
|
||||
ID3D12Resource_Release(cbv_buffer);
|
||||
ID3D12Resource_Release(uav_buffer);
|
||||
ID3D12DescriptorHeap_Release(heap);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
|
|
@ -65,6 +65,16 @@ void test_get_resource_tiling(void)
|
|||
/* Test buffers */
|
||||
{ D3D12_RESOURCE_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 1024, 1, 1, 1, 1, 1, 0, 65536, 1, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 16*65536, 1, 1, 1, 16, 1, 0, 65536, 1, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
/* Test small resource behavior */
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 1, 1, 1, 1, 1, 1, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 2, 2, 1, 2, 1, 2, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 4, 4, 1, 3, 1, 3, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 8, 8, 1, 4, 1, 4, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 16, 16, 1, 5, 1, 5, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 32, 32, 1, 6, 1, 6, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 64, 64, 1, 7, 1, 7, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 128, 128, 1, 8, 1, 8, 0, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 256, 256, 1, 9, 2, 9, 1, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
/* Test various image formats */
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8_UNORM, 512, 512, 1, 1, 4, 1, 1, 256, 256, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8_UNORM, 512, 512, 1, 1, 8, 1, 1, 256, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
|
@ -86,7 +96,7 @@ void test_get_resource_tiling(void)
|
|||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 128, 128, 1, 8, 1, 8, 1, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 512, 512, 1, 10, 21, 10, 3, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 512, 512, 4, 3, 84, 12, 3, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 64, 64, 1, 1, 0, 1, 0, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE2D, DXGI_FORMAT_R8G8B8A8_UNORM, 64, 64, 1, 1, 1, 1, 0, 128, 128, 1, D3D12_TILED_RESOURCES_TIER_1 },
|
||||
/* Test 3D textures */
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE3D, DXGI_FORMAT_R8_UNORM, 64, 64, 64, 1, 4, 1, 1, 64, 32, 32, D3D12_TILED_RESOURCES_TIER_3 },
|
||||
{ D3D12_RESOURCE_DIMENSION_TEXTURE3D, DXGI_FORMAT_R8G8_UNORM, 64, 64, 64, 1, 8, 1, 1, 32, 32, 32, D3D12_TILED_RESOURCES_TIER_3 },
|
||||
|
@ -213,18 +223,10 @@ void test_get_resource_tiling(void)
|
|||
ok((packed_mip_info.NumTilesForPackedMips == 0) == (packed_mip_info.NumPackedMips == 0),
|
||||
"Unexpected packed tile count %u.\n", packed_mip_info.NumTilesForPackedMips);
|
||||
|
||||
if (packed_mip_info.NumStandardMips || !packed_mip_info.NumPackedMips)
|
||||
{
|
||||
ok(tile_shape.WidthInTexels == tests[i].tile_shape_w, "Unexpected tile width %u.\n", tile_shape.WidthInTexels);
|
||||
ok(tile_shape.HeightInTexels == tests[i].tile_shape_h, "Unexpected tile height %u.\n", tile_shape.HeightInTexels);
|
||||
ok(tile_shape.DepthInTexels == tests[i].tile_shape_d, "Unexpected tile depth %u.\n", tile_shape.DepthInTexels);
|
||||
}
|
||||
else
|
||||
{
|
||||
ok(!tile_shape.WidthInTexels && !tile_shape.HeightInTexels && !tile_shape.DepthInTexels,
|
||||
"Unexpected tile shape (%u,%u,%u) for packed resource.\n",
|
||||
tile_shape.WidthInTexels, tile_shape.HeightInTexels, tile_shape.DepthInTexels);
|
||||
}
|
||||
/* Docs say that tile shape should be cleared to zero if there is no standard mip, but drivers don't seem to care about this. */
|
||||
ok(tile_shape.WidthInTexels == tests[i].tile_shape_w, "Unexpected tile width %u.\n", tile_shape.WidthInTexels);
|
||||
ok(tile_shape.HeightInTexels == tests[i].tile_shape_h, "Unexpected tile height %u.\n", tile_shape.HeightInTexels);
|
||||
ok(tile_shape.DepthInTexels == tests[i].tile_shape_d, "Unexpected tile depth %u.\n", tile_shape.DepthInTexels);
|
||||
|
||||
for (j = 0; j < tests[i].expected_tiling_count; j++)
|
||||
{
|
||||
|
@ -3381,3 +3383,248 @@ void test_texture_feedback_instructions_dxil(void)
|
|||
test_texture_feedback_instructions(true);
|
||||
}
|
||||
|
||||
void test_sparse_buffer_memory_lifetime(void)
|
||||
{
|
||||
/* Attempt to bind sparse memory, then free the underlying heap, but keep the sparse resource
|
||||
* alive. This should confuse drivers that attempt to track BO lifetimes. */
|
||||
D3D12_UNORDERED_ACCESS_VIEW_DESC uav_desc;
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC srv_desc;
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS options;
|
||||
const UINT values[] = { 42, 42, 42, 42 };
|
||||
D3D12_ROOT_PARAMETER root_parameters[2];
|
||||
D3D12_TILE_REGION_SIZE region_size;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE h_gpu;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE h_cpu;
|
||||
D3D12_ROOT_SIGNATURE_DESC rs_desc;
|
||||
D3D12_DESCRIPTOR_RANGE desc_range;
|
||||
struct test_context context;
|
||||
struct resource_readback rb;
|
||||
ID3D12DescriptorHeap *cpu;
|
||||
ID3D12DescriptorHeap *gpu;
|
||||
D3D12_HEAP_DESC heap_desc;
|
||||
D3D12_RESOURCE_DESC desc;
|
||||
ID3D12Resource *sparse;
|
||||
ID3D12Resource *buffer;
|
||||
ID3D12Heap *heap_live;
|
||||
ID3D12Heap *heap;
|
||||
unsigned int i;
|
||||
HRESULT hr;
|
||||
|
||||
static const DWORD cs_sparse_query_dxbc[] =
|
||||
{
|
||||
#if 0
|
||||
RWStructuredBuffer<uint> RWBuf : register(u0);
|
||||
Buffer<uint> Buf : register(t0);
|
||||
|
||||
[numthreads(1, 1, 1)]
|
||||
void main(uint thr : SV_DispatchThreadID)
|
||||
{
|
||||
uint code;
|
||||
|
||||
// Sample mapped, but freed memory. See what CheckAccessFullyMapped returns.
|
||||
uint data = Buf.Load(thr, code);
|
||||
uint value = CheckAccessFullyMapped(code) ? (1u << 16) : 0u;
|
||||
value |= data & 0xffffu;
|
||||
RWBuf[2 * thr + 0] = value;
|
||||
|
||||
// Sample not yet mapped memory. See what CheckAccessFullyMapped returns.
|
||||
data = Buf.Load(thr + 1024 * 1024, code);
|
||||
value = CheckAccessFullyMapped(code) ? (1u << 16) : 0u;
|
||||
value |= data & 0xffffu;
|
||||
|
||||
RWBuf[2 * thr + 1] = value;
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x8c2a40af, 0x2a9b20a6, 0xa99f0977, 0x37daacf5, 0x00000001, 0x00000280, 0x00000004,
|
||||
0x00000030, 0x00000040, 0x00000050, 0x00000270, 0x4e475349, 0x00000008, 0x00000000, 0x00000008,
|
||||
0x4e47534f, 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x00000218, 0x00050050, 0x00000086,
|
||||
0x0100086a, 0x04000858, 0x00107000, 0x00000000, 0x00004444, 0x0400009e, 0x0011e000, 0x00000000,
|
||||
0x00000004, 0x0200005f, 0x00020012, 0x02000068, 0x00000002, 0x0400009b, 0x00000001, 0x00000001,
|
||||
0x00000001, 0x8a0000df, 0x80000042, 0x00111103, 0x00100012, 0x00000000, 0x00100012, 0x00000001,
|
||||
0x00020006, 0x00107e46, 0x00000000, 0x050000ea, 0x00100022, 0x00000000, 0x0010000a, 0x00000001,
|
||||
0x09000037, 0x00100022, 0x00000000, 0x0010001a, 0x00000000, 0x00004001, 0x00010000, 0x00004001,
|
||||
0x00000000, 0x0b00008c, 0x00100012, 0x00000000, 0x00004001, 0x00000010, 0x00004001, 0x00000000,
|
||||
0x0010000a, 0x00000000, 0x0010001a, 0x00000000, 0x06000029, 0x00100022, 0x00000000, 0x0002000a,
|
||||
0x00004001, 0x00000001, 0x090000a8, 0x0011e012, 0x00000000, 0x0010001a, 0x00000000, 0x00004001,
|
||||
0x00000000, 0x0010000a, 0x00000000, 0x1300008c, 0x00100052, 0x00000000, 0x00004002, 0x00000014,
|
||||
0x00000000, 0x0000001f, 0x00000000, 0x00004002, 0x00000000, 0x00000000, 0x00000001, 0x00000000,
|
||||
0x00020006, 0x00004002, 0x00100000, 0x00000000, 0x00000001, 0x00000000, 0x8b0000df, 0x80000042,
|
||||
0x00111103, 0x00100012, 0x00000000, 0x00100012, 0x00000001, 0x00100006, 0x00000000, 0x00107e46,
|
||||
0x00000000, 0x050000ea, 0x00100082, 0x00000000, 0x0010000a, 0x00000001, 0x09000037, 0x00100082,
|
||||
0x00000000, 0x0010003a, 0x00000000, 0x00004001, 0x00010000, 0x00004001, 0x00000000, 0x0b00008c,
|
||||
0x00100012, 0x00000000, 0x00004001, 0x00000010, 0x00004001, 0x00000000, 0x0010000a, 0x00000000,
|
||||
0x0010003a, 0x00000000, 0x090000a8, 0x0011e012, 0x00000000, 0x0010002a, 0x00000000, 0x00004001,
|
||||
0x00000000, 0x0010000a, 0x00000000, 0x0100003e, 0x30494653, 0x00000008, 0x00000100, 0x00000000,
|
||||
};
|
||||
static const D3D12_SHADER_BYTECODE cs_sparse_query = SHADER_BYTECODE(cs_sparse_query_dxbc);
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
hr = ID3D12Device_CheckFeatureSupport(context.device, D3D12_FEATURE_D3D12_OPTIONS, &options, sizeof(options));
|
||||
ok(hr == S_OK, "Failed to check feature support, hr %#x.\n", hr);
|
||||
|
||||
if (options.TiledResourcesTier < D3D12_TILED_RESOURCES_TIER_2)
|
||||
{
|
||||
skip("Tiled resources Tier 2 not supported by device.\n");
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
memset(&rs_desc, 0, sizeof(rs_desc));
|
||||
memset(root_parameters, 0, sizeof(root_parameters));
|
||||
memset(&desc_range, 0, sizeof(desc_range));
|
||||
rs_desc.NumParameters = ARRAY_SIZE(root_parameters);
|
||||
rs_desc.pParameters = root_parameters;
|
||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
|
||||
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
|
||||
root_parameters[1].DescriptorTable.pDescriptorRanges = &desc_range;
|
||||
desc_range.NumDescriptors = 1;
|
||||
desc_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
create_root_signature(context.device, &rs_desc, &context.root_signature);
|
||||
context.pipeline_state = create_compute_pipeline_state(context.device, context.root_signature, cs_sparse_query);
|
||||
|
||||
memset(&heap_desc, 0, sizeof(heap_desc));
|
||||
heap_desc.SizeInBytes = 4 * 1024 * 1024;
|
||||
heap_desc.Properties.Type = D3D12_HEAP_TYPE_DEFAULT;
|
||||
heap_desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
|
||||
heap_desc.Flags = D3D12_HEAP_FLAG_ALLOW_ONLY_BUFFERS;
|
||||
hr = ID3D12Device_CreateHeap(context.device, &heap_desc, &IID_ID3D12Heap, (void**)&heap);
|
||||
ok(SUCCEEDED(hr), "Failed to create heap, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_CreateHeap(context.device, &heap_desc, &IID_ID3D12Heap, (void**)&heap_live);
|
||||
ok(SUCCEEDED(hr), "Failed to create heap, hr #%x.\n", hr);
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.Width = 64 * 1024 * 1024;
|
||||
desc.Height = 1;
|
||||
desc.DepthOrArraySize = 1;
|
||||
desc.SampleDesc.Count = 1;
|
||||
desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
|
||||
desc.MipLevels = 1;
|
||||
desc.Alignment = D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT;
|
||||
desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
|
||||
desc.Flags = D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
|
||||
hr = ID3D12Device_CreateReservedResource(context.device, &desc, D3D12_RESOURCE_STATE_UNORDERED_ACCESS,
|
||||
NULL, &IID_ID3D12Resource, (void**)&sparse);
|
||||
ok(SUCCEEDED(hr), "Failed to create reserved resource, hr #%x.\n", hr);
|
||||
|
||||
{
|
||||
const D3D12_TILED_RESOURCE_COORDINATE region_start_coordinate = { 0 };
|
||||
const D3D12_TILE_RANGE_FLAGS range_flag = D3D12_TILE_RANGE_FLAG_NULL;
|
||||
const UINT offset = 0;
|
||||
const UINT count = desc.Width / (64 * 1024);
|
||||
region_size.UseBox = FALSE;
|
||||
region_size.NumTiles = desc.Width / (64 * 1024);
|
||||
ID3D12CommandQueue_UpdateTileMappings(context.queue, sparse, 1, ®ion_start_coordinate, ®ion_size,
|
||||
NULL, 1, &range_flag, &offset, &count, D3D12_TILE_MAPPING_FLAG_NONE);
|
||||
}
|
||||
|
||||
region_size.UseBox = FALSE;
|
||||
region_size.NumTiles = 1;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
const D3D12_TILED_RESOURCE_COORDINATE region_start_coordinate = { i, 0, 0, 0 };
|
||||
const D3D12_TILE_RANGE_FLAGS range_flag = D3D12_TILE_RANGE_FLAG_NONE;
|
||||
const UINT offset = i;
|
||||
const UINT count = 1;
|
||||
|
||||
ID3D12CommandQueue_UpdateTileMappings(context.queue, sparse, 1, ®ion_start_coordinate, ®ion_size,
|
||||
i == 0 ? heap : heap_live, 1, &range_flag, &offset, &count, D3D12_TILE_MAPPING_FLAG_NONE);
|
||||
}
|
||||
wait_queue_idle(context.device, context.queue);
|
||||
|
||||
buffer = create_default_buffer(context.device, 128 * 1024,
|
||||
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
cpu = create_cpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 1);
|
||||
gpu = create_gpu_descriptor_heap(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2);
|
||||
memset(&uav_desc, 0, sizeof(uav_desc));
|
||||
uav_desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
|
||||
uav_desc.Format = DXGI_FORMAT_R32_UINT;
|
||||
uav_desc.Buffer.NumElements = 128 * 1024 / 4;
|
||||
uav_desc.Buffer.FirstElement = 0;
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, sparse, NULL, &uav_desc,
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu));
|
||||
ID3D12Device_CreateUnorderedAccessView(context.device, sparse, NULL, &uav_desc,
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(gpu));
|
||||
|
||||
memset(&srv_desc, 0, sizeof(srv_desc));
|
||||
srv_desc.Buffer.FirstElement = 0;
|
||||
srv_desc.Buffer.NumElements = 2 * 1024 * 1024;
|
||||
srv_desc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
|
||||
srv_desc.Format = DXGI_FORMAT_R32_UINT;
|
||||
srv_desc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
|
||||
|
||||
h_cpu = ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(gpu);
|
||||
h_cpu.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
ID3D12Device_CreateShaderResourceView(context.device, sparse, &srv_desc, h_cpu);
|
||||
|
||||
ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu);
|
||||
ID3D12GraphicsCommandList_ClearUnorderedAccessViewUint(context.list,
|
||||
ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(gpu),
|
||||
ID3D12DescriptorHeap_GetCPUDescriptorHandleForHeapStart(cpu), sparse, values, 0, NULL);
|
||||
transition_resource_state(context.list, sparse,
|
||||
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
ID3D12GraphicsCommandList_CopyBufferRegion(context.list, buffer, 0, sparse, 0, 128 * 1024);
|
||||
transition_resource_state(context.list, buffer,
|
||||
D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_UINT,
|
||||
&rb, context.queue, context.list);
|
||||
reset_command_list(context.list, context.allocator);
|
||||
ok(get_readback_uint(&rb, 0, 0, 0) == 42, "Got #%x, expected 42.\n", get_readback_uint(&rb, 0, 0, 0));
|
||||
ok(get_readback_uint(&rb, 64 * 1024 / 4, 0, 0) == 42, "Got #%x, expected 42.\n", get_readback_uint(&rb, 64 * 1024 / 4, 0, 0));
|
||||
release_resource_readback(&rb);
|
||||
|
||||
ID3D12Heap_Release(heap);
|
||||
|
||||
/* Access a resource where we can hypothetically access the freed heap memory. */
|
||||
/* On AMD Windows native at least, if we read the freed region, we read garbage, which proves it's not required to unbind explicitly.
|
||||
* We'd read 0 in that case. */
|
||||
ID3D12GraphicsCommandList_CopyBufferRegion(context.list, buffer, 0, sparse, 64 * 1024, 64 * 1024);
|
||||
|
||||
#define EXPLORE_UNDEFINED_BEHAVIOR 0
|
||||
|
||||
#if EXPLORE_UNDEFINED_BEHAVIOR
|
||||
/* This reads unmapped memory. */
|
||||
ID3D12GraphicsCommandList_CopyBufferRegion(context.list, buffer, 1024, sparse, 1024, 1024);
|
||||
#endif
|
||||
|
||||
transition_resource_state(context.list, buffer, D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
h_gpu = ID3D12DescriptorHeap_GetGPUDescriptorHandleForHeapStart(gpu);
|
||||
h_gpu.ptr += ID3D12Device_GetDescriptorHandleIncrementSize(context.device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
ID3D12GraphicsCommandList_SetDescriptorHeaps(context.list, 1, &gpu);
|
||||
ID3D12GraphicsCommandList_SetComputeRootSignature(context.list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(context.list, context.pipeline_state);
|
||||
ID3D12GraphicsCommandList_SetComputeRootUnorderedAccessView(context.list, 0, ID3D12Resource_GetGPUVirtualAddress(buffer));
|
||||
ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(context.list, 1, h_gpu);
|
||||
#if EXPLORE_UNDEFINED_BEHAVIOR
|
||||
ID3D12GraphicsCommandList_Dispatch(context.list, 1, 1, 1);
|
||||
#endif
|
||||
|
||||
transition_resource_state(context.list, buffer, D3D12_RESOURCE_STATE_UNORDERED_ACCESS, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
get_buffer_readback_with_command_list(buffer, DXGI_FORMAT_R32_UINT,
|
||||
&rb, context.queue, context.list);
|
||||
|
||||
#if EXPLORE_UNDEFINED_BEHAVIOR
|
||||
skip("Reading undefined value #%x.\n", get_readback_uint(&rb, 0, 0, 0));
|
||||
skip("Reading value #%x (expect 0).\n", get_readback_uint(&rb, 1, 0, 0));
|
||||
skip("Reading undefined value #%x.\n", get_readback_uint(&rb, 1024 / 4, 0, 0));
|
||||
#endif
|
||||
ok(get_readback_uint(&rb, 2048 / 4, 0, 0) == 42, "Got #%x, expected 42.\n", get_readback_uint(&rb, 2048 / 4, 0, 0));
|
||||
ok(get_readback_uint(&rb, 64 * 1024 / 4, 0, 0) == 42, "Got #%x, expected 42.\n", get_readback_uint(&rb, 64 * 1024 / 4, 0, 0));
|
||||
release_resource_readback(&rb);
|
||||
|
||||
ID3D12Resource_Release(buffer);
|
||||
ID3D12Resource_Release(sparse);
|
||||
ID3D12DescriptorHeap_Release(cpu);
|
||||
ID3D12DescriptorHeap_Release(gpu);
|
||||
ID3D12Heap_Release(heap_live);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
|
|
|
@ -22,6 +22,159 @@
|
|||
#define VKD3D_DBG_CHANNEL VKD3D_DBG_CHANNEL_API
|
||||
#include "d3d12_crosstest.h"
|
||||
|
||||
void test_primitive_restart_list_topology_stream_output(void)
|
||||
{
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
ID3D12Resource *counter_buffer, *so_buffer;
|
||||
ID3D12GraphicsCommandList *command_list;
|
||||
D3D12_STREAM_OUTPUT_BUFFER_VIEW sobv;
|
||||
struct test_context_desc desc;
|
||||
ID3D12Resource *index_buffer;
|
||||
struct resource_readback rb;
|
||||
struct test_context context;
|
||||
D3D12_INDEX_BUFFER_VIEW ibv;
|
||||
ID3D12CommandQueue *queue;
|
||||
const struct vec4 *data;
|
||||
ID3D12Device *device;
|
||||
uint32_t counter;
|
||||
unsigned int i;
|
||||
HRESULT hr;
|
||||
|
||||
static const D3D12_SO_DECLARATION_ENTRY so_declaration[] =
|
||||
{
|
||||
{0, "SV_Position", 0, 0, 4, 0},
|
||||
};
|
||||
static const struct vec4 expected_output[] =
|
||||
{
|
||||
/* Strip */
|
||||
{ 2000.0f, 2000.0f, 2000.0f, 2000.0f },
|
||||
{ 3000.0f, 3000.0f, 3000.0f, 3000.0f },
|
||||
{ 4000.0f, 4000.0f, 4000.0f, 4000.0f },
|
||||
|
||||
/* List */
|
||||
{ 0.0f, 0.0f, 0.0f, 0.0f },
|
||||
{ 1.0f, 1.0f, 1.0f, 1.0f },
|
||||
{ -1.0f, -1.0f, -1.0f, -1.0f },
|
||||
{ 9.0f, 9.0f, 9.0f, 9.0f },
|
||||
{ -1.0f, -1.0f, -1.0f, -1.0f },
|
||||
{ -1.0f, -1.0f, -1.0f, -1.0f },
|
||||
{ 2000.0f, 2000.0f, 2000.0f, 2000.0f },
|
||||
{ 3000.0f, 3000.0f, 3000.0f, 3000.0f },
|
||||
{ 4000.0f, 4000.0f, 4000.0f, 4000.0f },
|
||||
|
||||
/* Strip */
|
||||
{ 2000.0f, 2000.0f, 2000.0f, 2000.0f },
|
||||
{ 3000.0f, 3000.0f, 3000.0f, 3000.0f },
|
||||
{ 4000.0f, 4000.0f, 4000.0f, 4000.0f },
|
||||
};
|
||||
static const uint32_t index_data[] = { 0, 1, UINT32_MAX, 9, UINT32_MAX, UINT32_MAX, 2000, 3000, 4000 };
|
||||
static const UINT strides[] = { 16 };
|
||||
|
||||
static const DWORD vs_code[] =
|
||||
{
|
||||
#if 0
|
||||
float4 main(uint vid : SV_VertexID) : SV_Position
|
||||
{
|
||||
if (vid == ~0u)
|
||||
return float4(-1, -1, -1, -1);
|
||||
else
|
||||
return float4(vid, vid, vid, vid);
|
||||
}
|
||||
#endif
|
||||
0x43425844, 0x59eaaf80, 0xf7ab5160, 0xf0ce6da4, 0x82ce289b, 0x00000001, 0x00000140, 0x00000003,
|
||||
0x0000002c, 0x00000060, 0x00000094, 0x4e475349, 0x0000002c, 0x00000001, 0x00000008, 0x00000020,
|
||||
0x00000000, 0x00000006, 0x00000001, 0x00000000, 0x00000101, 0x565f5653, 0x65747265, 0x00444978,
|
||||
0x4e47534f, 0x0000002c, 0x00000001, 0x00000008, 0x00000020, 0x00000000, 0x00000001, 0x00000003,
|
||||
0x00000000, 0x0000000f, 0x505f5653, 0x7469736f, 0x006e6f69, 0x58454853, 0x000000a4, 0x00010050,
|
||||
0x00000029, 0x0100086a, 0x04000060, 0x00101012, 0x00000000, 0x00000006, 0x04000067, 0x001020f2,
|
||||
0x00000000, 0x00000001, 0x02000068, 0x00000001, 0x07000020, 0x00100012, 0x00000000, 0x0010100a,
|
||||
0x00000000, 0x00004001, 0xffffffff, 0x0304001f, 0x0010000a, 0x00000000, 0x08000036, 0x001020f2,
|
||||
0x00000000, 0x00004002, 0xbf800000, 0xbf800000, 0xbf800000, 0xbf800000, 0x0100003e, 0x01000012,
|
||||
0x05000056, 0x001020f2, 0x00000000, 0x00101006, 0x00000000, 0x0100003e, 0x01000015, 0x0100003e,
|
||||
};
|
||||
|
||||
static const D3D12_SHADER_BYTECODE vs = SHADER_BYTECODE(vs_code);
|
||||
|
||||
memset(&desc, 0, sizeof(desc));
|
||||
desc.root_signature_flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_STREAM_OUTPUT;
|
||||
desc.no_pipeline = true;
|
||||
if (!init_test_context(&context, &desc))
|
||||
return;
|
||||
|
||||
device = context.device;
|
||||
command_list = context.list;
|
||||
queue = context.queue;
|
||||
|
||||
init_pipeline_state_desc(&pso_desc, context.root_signature, 0, &vs, NULL, NULL);
|
||||
pso_desc.StreamOutput.NumEntries = ARRAY_SIZE(so_declaration);
|
||||
pso_desc.StreamOutput.pSODeclaration = so_declaration;
|
||||
pso_desc.StreamOutput.pBufferStrides = strides;
|
||||
pso_desc.StreamOutput.NumStrides = ARRAY_SIZE(strides);
|
||||
pso_desc.StreamOutput.RasterizedStream = D3D12_SO_NO_RASTERIZED_STREAM;
|
||||
pso_desc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_0xFFFFFFFF;
|
||||
hr = ID3D12Device_CreateGraphicsPipelineState(device, &pso_desc,
|
||||
&IID_ID3D12PipelineState, (void **)&context.pipeline_state);
|
||||
ok(SUCCEEDED(hr), "Failed to create PSO, hr #%x.\n", hr);
|
||||
|
||||
counter_buffer = create_default_buffer(device, 32,
|
||||
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_STREAM_OUT);
|
||||
so_buffer = create_default_buffer(device, 4096,
|
||||
D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_STREAM_OUT);
|
||||
index_buffer = create_upload_buffer(device, sizeof(index_data), index_data);
|
||||
sobv.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(so_buffer);
|
||||
sobv.SizeInBytes = 4096;
|
||||
sobv.BufferFilledSizeLocation = ID3D12Resource_GetGPUVirtualAddress(counter_buffer);
|
||||
|
||||
ibv.Format = DXGI_FORMAT_R32_UINT;
|
||||
ibv.BufferLocation = ID3D12Resource_GetGPUVirtualAddress(index_buffer);
|
||||
ibv.SizeInBytes = sizeof(index_data);
|
||||
|
||||
ID3D12GraphicsCommandList_RSSetScissorRects(command_list, 1, &context.scissor_rect);
|
||||
ID3D12GraphicsCommandList_RSSetViewports(command_list, 1, &context.viewport);
|
||||
ID3D12GraphicsCommandList_SetGraphicsRootSignature(command_list, context.root_signature);
|
||||
ID3D12GraphicsCommandList_SetPipelineState(command_list, context.pipeline_state);
|
||||
ID3D12GraphicsCommandList_SOSetTargets(command_list, 0, 1, &sobv);
|
||||
ID3D12GraphicsCommandList_IASetIndexBuffer(command_list, &ibv);
|
||||
|
||||
/* Primitive restart state only applies to strip primitives. */
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
||||
ID3D12GraphicsCommandList_DrawIndexedInstanced(command_list, ARRAY_SIZE(index_data), 1,
|
||||
0, 0, 0);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
|
||||
ID3D12GraphicsCommandList_DrawIndexedInstanced(command_list, ARRAY_SIZE(index_data), 1,
|
||||
0, 0, 0);
|
||||
ID3D12GraphicsCommandList_IASetPrimitiveTopology(command_list, D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
|
||||
ID3D12GraphicsCommandList_DrawIndexedInstanced(command_list, ARRAY_SIZE(index_data), 1,
|
||||
0, 0, 0);
|
||||
|
||||
transition_resource_state(command_list, counter_buffer,
|
||||
D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
transition_resource_state(command_list, so_buffer,
|
||||
D3D12_RESOURCE_STATE_STREAM_OUT, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
|
||||
get_buffer_readback_with_command_list(counter_buffer, DXGI_FORMAT_R32_UINT, &rb, queue, command_list);
|
||||
counter = get_readback_uint(&rb, 0, 0, 0);
|
||||
ok(counter == sizeof(expected_output), "Got unexpected counter %u, expected %u.\n",
|
||||
counter, (unsigned int)sizeof(expected_output));
|
||||
release_resource_readback(&rb);
|
||||
reset_command_list(command_list, context.allocator);
|
||||
get_buffer_readback_with_command_list(so_buffer, DXGI_FORMAT_UNKNOWN, &rb, queue, command_list);
|
||||
for (i = 0; i < ARRAY_SIZE(expected_output); ++i)
|
||||
{
|
||||
const struct vec4 *expected = &expected_output[i];
|
||||
data = get_readback_vec4(&rb, i, 0);
|
||||
ok(compare_vec4(data, expected, 1),
|
||||
"Got {%.8e, %.8e, %.8e, %.8e}, expected {%.8e, %.8e, %.8e, %.8e}.\n",
|
||||
data->x, data->y, data->z, data->w, expected->x, expected->y, expected->z, expected->w);
|
||||
}
|
||||
release_resource_readback(&rb);
|
||||
|
||||
ID3D12Resource_Release(index_buffer);
|
||||
ID3D12Resource_Release(counter_buffer);
|
||||
ID3D12Resource_Release(so_buffer);
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
static void test_vertex_shader_stream_output(bool use_dxil)
|
||||
{
|
||||
D3D12_GRAPHICS_PIPELINE_STATE_DESC pso_desc;
|
||||
|
|
|
@ -1187,3 +1187,247 @@ void test_create_fence(void)
|
|||
ok(!refcount, "ID3D12Device has %u references left.\n", (unsigned int)refcount);
|
||||
}
|
||||
|
||||
void test_fence_wait_robustness_inner(bool shared_handles)
|
||||
{
|
||||
VKD3D_UNUSED HANDLE shared_signal = NULL;
|
||||
VKD3D_UNUSED HANDLE shared_drain = NULL;
|
||||
VKD3D_UNUSED HANDLE shared_wait = NULL;
|
||||
ID3D12CommandAllocator *allocator[2];
|
||||
ID3D12Fence *signal_fence_dup = NULL;
|
||||
D3D12_COMMAND_QUEUE_DESC queue_desc;
|
||||
ID3D12Fence *drain_fence_dup = NULL;
|
||||
ID3D12Fence *wait_fence_dup = NULL;
|
||||
ID3D12GraphicsCommandList *list[2];
|
||||
ID3D12CommandQueue *compute_queue;
|
||||
struct test_context context;
|
||||
ID3D12Fence *signal_fence;
|
||||
ID3D12Fence *drain_fence;
|
||||
ID3D12Fence *wait_fence;
|
||||
ID3D12Resource *src;
|
||||
ID3D12Resource *dst;
|
||||
unsigned int i;
|
||||
HANDLE event;
|
||||
UINT value;
|
||||
HRESULT hr;
|
||||
|
||||
if (!init_compute_test_context(&context))
|
||||
return;
|
||||
|
||||
hr = ID3D12Device_CreateFence(context.device, 0,
|
||||
shared_handles ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE,
|
||||
&IID_ID3D12Fence, (void**)&signal_fence);
|
||||
todo_if(shared_handles) ok(SUCCEEDED(hr), "Failed to create fence, hr #%x.\n", hr);
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
skip("Failed to create fence, skipping test ...\n");
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
hr = ID3D12Device_CreateFence(context.device, 0,
|
||||
shared_handles ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE,
|
||||
&IID_ID3D12Fence, (void**)&wait_fence);
|
||||
ok(SUCCEEDED(hr), "Failed to create fence, hr #%x.\n", hr);
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
skip("Failed to create fence, skipping test ...\n");
|
||||
ID3D12Fence_Release(signal_fence);
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
hr = ID3D12Device_CreateFence(context.device, 0,
|
||||
shared_handles ? D3D12_FENCE_FLAG_SHARED : D3D12_FENCE_FLAG_NONE,
|
||||
&IID_ID3D12Fence, (void**)&drain_fence);
|
||||
ok(SUCCEEDED(hr), "Failed to create fence, hr #%x.\n", hr);
|
||||
|
||||
if (FAILED(hr))
|
||||
{
|
||||
skip("Failed to create fence, skipping test ...\n");
|
||||
ID3D12Fence_Release(signal_fence);
|
||||
ID3D12Fence_Release(wait_fence);
|
||||
destroy_test_context(&context);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
if (shared_handles)
|
||||
{
|
||||
hr = ID3D12Device_CreateSharedHandle(context.device, (ID3D12DeviceChild*)signal_fence,
|
||||
NULL, GENERIC_ALL, NULL, &shared_signal);
|
||||
ok(SUCCEEDED(hr), "Failed to create shared handle, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_CreateSharedHandle(context.device, (ID3D12DeviceChild*)wait_fence,
|
||||
NULL, GENERIC_ALL, NULL, &shared_wait);
|
||||
ok(SUCCEEDED(hr), "Failed to create shared handle, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_CreateSharedHandle(context.device, (ID3D12DeviceChild*)drain_fence,
|
||||
NULL, GENERIC_ALL, NULL, &shared_drain);
|
||||
ok(SUCCEEDED(hr), "Failed to create shared handle, hr #%x.\n", hr);
|
||||
|
||||
ID3D12Fence_Release(signal_fence);
|
||||
ID3D12Fence_Release(wait_fence);
|
||||
ID3D12Fence_Release(drain_fence);
|
||||
|
||||
hr = ID3D12Device_OpenSharedHandle(context.device, shared_signal, &IID_ID3D12Fence, (void**)&signal_fence);
|
||||
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_OpenSharedHandle(context.device, shared_wait, &IID_ID3D12Fence, (void**)&wait_fence);
|
||||
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_OpenSharedHandle(context.device, shared_drain, &IID_ID3D12Fence, (void**)&drain_fence);
|
||||
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
|
||||
|
||||
/* OpenSharedHandle takes a kernel level reference on the HANDLE. */
|
||||
hr = ID3D12Device_OpenSharedHandle(context.device, shared_signal, &IID_ID3D12Fence, (void**)&signal_fence_dup);
|
||||
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_OpenSharedHandle(context.device, shared_wait, &IID_ID3D12Fence, (void**)&wait_fence_dup);
|
||||
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
|
||||
hr = ID3D12Device_OpenSharedHandle(context.device, shared_drain, &IID_ID3D12Fence, (void**)&drain_fence_dup);
|
||||
ok(SUCCEEDED(hr), "Failed to open shared handle, hr #%x.\n", hr);
|
||||
|
||||
/* Observed behavior: Closing the last reference to the kernel HANDLE object unblocks all waiters.
|
||||
* This isn't really implementable in Wine as it stands since applications are free to share
|
||||
* the HANDLE and Dupe it arbitrarily.
|
||||
* For now, assume this is not a thing, we can report TDR-like situations if this comes up in practice. */
|
||||
if (shared_signal)
|
||||
CloseHandle(shared_signal);
|
||||
if (shared_wait)
|
||||
CloseHandle(shared_wait);
|
||||
if (shared_drain)
|
||||
CloseHandle(shared_drain);
|
||||
}
|
||||
#endif
|
||||
|
||||
memset(&queue_desc, 0, sizeof(queue_desc));
|
||||
queue_desc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
|
||||
queue_desc.Priority = D3D12_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
|
||||
|
||||
src = create_default_buffer(context.device, 256 * 1024 * 1024, D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_SOURCE);
|
||||
dst = create_default_buffer(context.device, 256 * 1024 * 1024, D3D12_RESOURCE_FLAG_NONE, D3D12_RESOURCE_STATE_COPY_DEST);
|
||||
|
||||
ID3D12Device_CreateCommandQueue(context.device, &queue_desc, &IID_ID3D12CommandQueue, (void**)&compute_queue);
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
ID3D12Device_CreateCommandAllocator(context.device, D3D12_COMMAND_LIST_TYPE_COMPUTE,
|
||||
&IID_ID3D12CommandAllocator, (void**)&allocator[i]);
|
||||
ID3D12Device_CreateCommandList(context.device, 0, D3D12_COMMAND_LIST_TYPE_COMPUTE, allocator[i], NULL,
|
||||
&IID_ID3D12GraphicsCommandList, (void**)&list[i]);
|
||||
}
|
||||
|
||||
/* Heavy copy action. */
|
||||
for (i = 0; i < 128; i++)
|
||||
{
|
||||
ID3D12GraphicsCommandList_CopyResource(list[0], dst, src);
|
||||
ID3D12GraphicsCommandList_CopyResource(list[1], src, dst);
|
||||
}
|
||||
|
||||
ID3D12GraphicsCommandList_Close(list[0]);
|
||||
ID3D12GraphicsCommandList_Close(list[1]);
|
||||
|
||||
/* Note on ref-count checks: The debug layers can take transient public ref-counts it seems. */
|
||||
|
||||
ID3D12CommandQueue_ExecuteCommandLists(context.queue, 1, (ID3D12CommandList * const *)&list[0]);
|
||||
ID3D12CommandQueue_Signal(context.queue, signal_fence, 1);
|
||||
/* Validate that signal/wait does not take public ref-counts. */
|
||||
value = get_refcount(signal_fence);
|
||||
ok(value == 1, "Unexpected ref-count %u\n", value);
|
||||
|
||||
/* The GPU copy is 32 GB worth of BW. There is literally zero chance it would have completed in this amount of time. */
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(signal_fence);
|
||||
ok(value == 0, "Unexpected signal event %u.\n", value);
|
||||
|
||||
/* Try waiting for a signal that never comes. We'll be able to unblock this wait
|
||||
* when we fully release the fence. */
|
||||
ID3D12CommandQueue_Wait(compute_queue, signal_fence, UINT64_MAX);
|
||||
value = get_refcount(signal_fence);
|
||||
ok(value == 1, "Unexpected ref-count %u\n", value);
|
||||
|
||||
ID3D12CommandQueue_Signal(compute_queue, wait_fence, 1);
|
||||
value = get_refcount(wait_fence);
|
||||
ok(value == 1, "Unexpected ref-count %u\n", value);
|
||||
|
||||
/* The GPU copy is 32 GB worth of BW. There is literally zero chance it would have completed in this amount of time. */
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(wait_fence);
|
||||
ok(value == 0, "Unexpected signal event %u.\n", value);
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(signal_fence);
|
||||
ok(value == 0, "Unexpected signal event %u.\n", value);
|
||||
|
||||
ID3D12CommandQueue_Wait(compute_queue, wait_fence, 1);
|
||||
value = get_refcount(wait_fence);
|
||||
ok(value == 1, "Unexpected ref-count %u\n", value);
|
||||
|
||||
/* Check that we can queue up event completion.
|
||||
* Again, verify that releasing the fence unblocks all waiters ... */
|
||||
event = create_event();
|
||||
ID3D12Fence_SetEventOnCompletion(signal_fence, UINT64_MAX, event);
|
||||
|
||||
if (signal_fence_dup)
|
||||
ID3D12Fence_Release(signal_fence_dup);
|
||||
if (wait_fence_dup)
|
||||
ID3D12Fence_Release(wait_fence_dup);
|
||||
|
||||
/* The GPU copy is 32 GB worth of BW. There is literally zero chance it would have completed in this amount of time.
|
||||
* Makes sure that the fences aren't signalled when we try to free them.
|
||||
* (Sure, there is a theoretical race condition if GPU completes between this check and the release, but seriously ...). */
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(signal_fence);
|
||||
ok(value == 0, "Unexpected signal event %u.\n", value);
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(wait_fence);
|
||||
ok(value == 0, "Unexpected signal event %u.\n", value);
|
||||
|
||||
/* Test that it's valid to release fence while it's in flight.
|
||||
* If we don't cause device lost and drain_fence is waited on successfully we pass the test. */
|
||||
value = ID3D12Fence_Release(signal_fence);
|
||||
ok(value == 0, "Unexpected fence ref-count %u.\n", value);
|
||||
value = ID3D12Fence_Release(wait_fence);
|
||||
ok(value == 0, "Unexpected fence ref-count %u.\n", value);
|
||||
|
||||
ID3D12CommandQueue_ExecuteCommandLists(compute_queue, 1, (ID3D12CommandList * const *)&list[1]);
|
||||
ID3D12CommandQueue_Signal(compute_queue, drain_fence, 1);
|
||||
|
||||
wait_event(event, INFINITE);
|
||||
destroy_event(event);
|
||||
ID3D12Fence_SetEventOnCompletion(drain_fence, 1, NULL);
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(drain_fence);
|
||||
ok(value == 1, "Expected fence wait value 1, but got %u.\n", value);
|
||||
|
||||
if (drain_fence_dup)
|
||||
{
|
||||
/* Check we observe the counter in sibling fences as well. */
|
||||
value = (UINT)ID3D12Fence_GetCompletedValue(drain_fence_dup);
|
||||
ok(value == 1, "Expected fence wait value 1, but got %u.\n", value);
|
||||
ID3D12Fence_Release(drain_fence_dup);
|
||||
}
|
||||
|
||||
value = ID3D12Fence_Release(drain_fence);
|
||||
ok(value == 0, "Unexpected fence ref-count %u.\n", value);
|
||||
|
||||
/* Early freeing of fences might signal the drain fence too early, causing GPU hang. */
|
||||
wait_queue_idle(context.device, context.queue);
|
||||
wait_queue_idle(context.device, compute_queue);
|
||||
|
||||
ID3D12CommandQueue_Release(compute_queue);
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
ID3D12CommandAllocator_Release(allocator[i]);
|
||||
ID3D12GraphicsCommandList_Release(list[i]);
|
||||
}
|
||||
ID3D12Resource_Release(dst);
|
||||
ID3D12Resource_Release(src);
|
||||
|
||||
destroy_test_context(&context);
|
||||
}
|
||||
|
||||
void test_fence_wait_robustness(void)
|
||||
{
|
||||
test_fence_wait_robustness_inner(false);
|
||||
}
|
||||
|
||||
void test_fence_wait_robustness_shared(void)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
test_fence_wait_robustness_inner(true);
|
||||
#else
|
||||
skip("Shared fences not supported on native Linux build.\n");
|
||||
#endif
|
||||
}
|
|
@ -27,6 +27,10 @@ PFN_D3D12_GET_DEBUG_INTERFACE pfn_D3D12GetDebugInterface;
|
|||
const char *vkd3d_test_platform = "other";
|
||||
struct vkd3d_test_state_context vkd3d_test_state;
|
||||
|
||||
#ifdef _WIN32
|
||||
RENDERDOC_API_1_0_0 *renderdoc_api;
|
||||
#endif
|
||||
|
||||
bool compare_float(float f, float g, int ulps)
|
||||
{
|
||||
int x, y;
|
||||
|
@ -842,6 +846,9 @@ ID3D12CommandSignature *create_command_signature_(unsigned int line,
|
|||
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH:
|
||||
signature_desc.ByteStride = sizeof(D3D12_DISPATCH_ARGUMENTS);
|
||||
break;
|
||||
case D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH_RAYS:
|
||||
signature_desc.ByteStride = sizeof(D3D12_DISPATCH_RAYS_DESC);
|
||||
break;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
@ -858,6 +865,7 @@ ID3D12CommandSignature *create_command_signature_(unsigned int line,
|
|||
|
||||
bool init_compute_test_context_(unsigned int line, struct test_context *context)
|
||||
{
|
||||
D3D12_COMMAND_LIST_TYPE command_list_type = D3D12_COMMAND_LIST_TYPE_COMPUTE;
|
||||
ID3D12Device *device;
|
||||
HRESULT hr;
|
||||
|
||||
|
@ -870,14 +878,21 @@ bool init_compute_test_context_(unsigned int line, struct test_context *context)
|
|||
}
|
||||
device = context->device;
|
||||
|
||||
context->queue = create_command_queue_(line, device,
|
||||
D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
|
||||
#ifdef _WIN32
|
||||
begin_renderdoc_capturing(device);
|
||||
/* Workaround RenderDoc bug. It expects a DIRECT command queue to exist. */
|
||||
if (renderdoc_api)
|
||||
command_list_type = D3D12_COMMAND_LIST_TYPE_DIRECT;
|
||||
#endif
|
||||
|
||||
hr = ID3D12Device_CreateCommandAllocator(device, D3D12_COMMAND_LIST_TYPE_COMPUTE,
|
||||
context->queue = create_command_queue_(line, device,
|
||||
command_list_type, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
|
||||
|
||||
hr = ID3D12Device_CreateCommandAllocator(device, command_list_type,
|
||||
&IID_ID3D12CommandAllocator, (void **)&context->allocator);
|
||||
ok_(line)(hr == S_OK, "Failed to create command allocator, hr %#x.\n", hr);
|
||||
|
||||
hr = ID3D12Device_CreateCommandList(device, 0, D3D12_COMMAND_LIST_TYPE_COMPUTE,
|
||||
hr = ID3D12Device_CreateCommandList(device, 0, command_list_type,
|
||||
context->allocator, NULL, &IID_ID3D12GraphicsCommandList, (void **)&context->list);
|
||||
ok_(line)(hr == S_OK, "Failed to create command list, hr %#x.\n", hr);
|
||||
|
||||
|
|
|
@ -19,6 +19,10 @@
|
|||
#ifndef __VKD3D_D3D12_TEST_UTILS_H
|
||||
#define __VKD3D_D3D12_TEST_UTILS_H
|
||||
|
||||
#ifdef _WIN32
|
||||
#include "renderdoc_app.h"
|
||||
#endif
|
||||
|
||||
#define SHADER_BYTECODE(code) {code,sizeof(code)}
|
||||
|
||||
#define wait_queue_idle(a, b) wait_queue_idle_(__LINE__, a, b)
|
||||
|
@ -1049,6 +1053,45 @@ static inline void create_render_target_(unsigned int line, struct test_context
|
|||
ID3D12Device_CreateRenderTargetView(context->device, *render_target, NULL, *rtv);
|
||||
}
|
||||
|
||||
/* Utility code for capturing native D3D12 tests, which is why this only covers Win32.
|
||||
* Launch the d3d12.exe test binary from RenderDoc UI.
|
||||
* For Vulkan capturing, use VKD3D_AUTO_CAPTURE_COUNTS and friends instead. */
|
||||
#ifdef _WIN32
|
||||
extern RENDERDOC_API_1_0_0 *renderdoc_api;
|
||||
|
||||
static inline void begin_renderdoc_capturing(ID3D12Device *device)
|
||||
{
|
||||
pRENDERDOC_GetAPI get_api;
|
||||
HANDLE renderdoc;
|
||||
FARPROC fn_ptr;
|
||||
|
||||
if (!renderdoc_api)
|
||||
{
|
||||
renderdoc = GetModuleHandleA("renderdoc.dll");
|
||||
if (renderdoc)
|
||||
{
|
||||
fn_ptr = GetProcAddress(renderdoc, "RENDERDOC_GetAPI");
|
||||
if (fn_ptr)
|
||||
{
|
||||
/* Workaround compiler warnings about casting to function pointer. */
|
||||
memcpy(&get_api, &fn_ptr, sizeof(fn_ptr));
|
||||
if (!get_api(eRENDERDOC_API_Version_1_0_0, (void **)&renderdoc_api))
|
||||
renderdoc_api = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (renderdoc_api)
|
||||
renderdoc_api->StartFrameCapture(device, NULL);
|
||||
}
|
||||
|
||||
static inline void end_renderdoc_capturing(ID3D12Device *device)
|
||||
{
|
||||
if (renderdoc_api)
|
||||
renderdoc_api->EndFrameCapture(device, NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
#define init_test_context(context, desc) init_test_context_(__LINE__, context, desc)
|
||||
static inline bool init_test_context_(unsigned int line, struct test_context *context,
|
||||
const struct test_context_desc *desc)
|
||||
|
@ -1066,6 +1109,10 @@ static inline bool init_test_context_(unsigned int line, struct test_context *co
|
|||
}
|
||||
device = context->device;
|
||||
|
||||
#ifdef _WIN32
|
||||
begin_renderdoc_capturing(device);
|
||||
#endif
|
||||
|
||||
context->queue = create_command_queue_(line, device, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
|
||||
|
||||
hr = ID3D12Device_CreateCommandAllocator(device, D3D12_COMMAND_LIST_TYPE_DIRECT,
|
||||
|
@ -1117,6 +1164,10 @@ static inline void destroy_test_context_(unsigned int line, struct test_context
|
|||
{
|
||||
ULONG refcount;
|
||||
|
||||
#ifdef _WIN32
|
||||
end_renderdoc_capturing(context->device);
|
||||
#endif
|
||||
|
||||
if (context->pipeline_state)
|
||||
ID3D12PipelineState_Release(context->pipeline_state);
|
||||
if (context->root_signature)
|
||||
|
|
|
@ -123,6 +123,8 @@ decl_test(test_tgsm);
|
|||
decl_test(test_uav_load);
|
||||
decl_test(test_cs_uav_store);
|
||||
decl_test(test_uav_counters);
|
||||
decl_test(test_uav_counter_null_behavior_dxbc);
|
||||
decl_test(test_uav_counter_null_behavior_dxil);
|
||||
decl_test(test_decrement_uav_counter);
|
||||
decl_test(test_atomic_instructions_dxbc);
|
||||
decl_test(test_atomic_instructions_dxil);
|
||||
|
@ -135,6 +137,7 @@ decl_test(test_resolve_non_issued_query_data);
|
|||
decl_test(test_resolve_query_data_in_different_command_list);
|
||||
decl_test(test_resolve_query_data_in_reordered_command_list);
|
||||
decl_test(test_execute_indirect);
|
||||
decl_test(test_execute_indirect_state);
|
||||
decl_test(test_dispatch_zero_thread_groups);
|
||||
decl_test(test_unaligned_vertex_stride);
|
||||
decl_test(test_zero_vertex_stride);
|
||||
|
@ -199,6 +202,7 @@ decl_test(test_primitive_restart);
|
|||
decl_test(test_index_buffer_edge_case_stream_output);
|
||||
decl_test(test_vertex_shader_stream_output_dxbc);
|
||||
decl_test(test_vertex_shader_stream_output_dxil);
|
||||
decl_test(test_primitive_restart_list_topology_stream_output);
|
||||
decl_test(test_read_write_subresource);
|
||||
decl_test(test_queue_wait);
|
||||
decl_test(test_graphics_compute_queue_synchronization);
|
||||
|
@ -284,6 +288,7 @@ decl_test(test_shader_sm66_64bit_atomics);
|
|||
decl_test(test_shader_sm66_is_helper_lane);
|
||||
decl_test(test_get_copyable_footprints_planar);
|
||||
decl_test(test_depth_stencil_test_no_dsv);
|
||||
decl_test(test_depth_stencil_layout_tracking);
|
||||
decl_test(test_copy_buffer_to_depth_stencil);
|
||||
decl_test(test_map_texture_validation);
|
||||
decl_test(test_read_write_subresource_2d);
|
||||
|
@ -292,6 +297,7 @@ decl_test(test_integer_blending_pipeline_state);
|
|||
decl_test(test_discard_resource_uav);
|
||||
decl_test(test_unbound_rtv_rendering);
|
||||
decl_test(test_raytracing_local_rs_static_sampler);
|
||||
decl_test(test_raytracing_local_rs_static_sampler_collection);
|
||||
decl_test(test_rayquery);
|
||||
decl_test(test_typed_srv_uav_cast);
|
||||
decl_test(test_typed_srv_cast_clear);
|
||||
|
@ -300,3 +306,18 @@ decl_test(test_mesh_shader_create_pipeline);
|
|||
decl_test(test_mesh_shader_rendering);
|
||||
decl_test(test_mesh_shader_execute_indirect);
|
||||
decl_test(test_amplification_shader);
|
||||
decl_test(test_advanced_cbv_layout);
|
||||
decl_test(test_shader_waveop_maximal_convergence);
|
||||
decl_test(test_uav_3d_sliced_view);
|
||||
decl_test(test_pipeline_no_ps_nonzero_rts);
|
||||
decl_test(test_root_descriptor_offset_sign);
|
||||
decl_test(test_raytracing_no_global_root_signature);
|
||||
decl_test(test_raytracing_missing_required_objects);
|
||||
decl_test(test_raytracing_reject_duplicate_objects);
|
||||
decl_test(test_raytracing_embedded_subobjects);
|
||||
decl_test(test_raytracing_default_association_tiebreak);
|
||||
decl_test(test_raytracing_collection_identifiers);
|
||||
decl_test(test_fence_wait_robustness);
|
||||
decl_test(test_fence_wait_robustness_shared);
|
||||
decl_test(test_root_signature_empty_blob);
|
||||
decl_test(test_sparse_buffer_memory_lifetime);
|
||||
|
|
Loading…
Reference in New Issue